From cfbf115b2646a08c1eb2cb50c4ee8cd7602f3624 Mon Sep 17 00:00:00 2001 From: Ben Alpert Date: Fri, 18 Sep 2015 21:43:14 -0700 Subject: [PATCH] Add hardware-counter.cpp and deps from HHVM --- .../perf-counters/src/hardware-counter.cpp | 481 +++++++++++ scripts/perf-counters/src/hardware-counter.h | 118 +++ scripts/perf-counters/src/portability.h | 187 +++++ scripts/perf-counters/src/thread-local.cpp | 108 +++ scripts/perf-counters/src/thread-local.h | 765 ++++++++++++++++++ 5 files changed, 1659 insertions(+) create mode 100644 scripts/perf-counters/src/hardware-counter.cpp create mode 100644 scripts/perf-counters/src/hardware-counter.h create mode 100644 scripts/perf-counters/src/portability.h create mode 100644 scripts/perf-counters/src/thread-local.cpp create mode 100644 scripts/perf-counters/src/thread-local.h diff --git a/scripts/perf-counters/src/hardware-counter.cpp b/scripts/perf-counters/src/hardware-counter.cpp new file mode 100644 index 0000000000..26783d28f4 --- /dev/null +++ b/scripts/perf-counters/src/hardware-counter.cpp @@ -0,0 +1,481 @@ +/* + +----------------------------------------------------------------------+ + | HipHop for PHP | + +----------------------------------------------------------------------+ + | Copyright (c) 2010-2015 Facebook, Inc. (http://www.facebook.com) | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ +*/ + +#include "hphp/util/hardware-counter.h" + +#ifndef NO_HARDWARE_COUNTERS + +#include + +#include "hphp/util/logger.h" + +#define _GNU_SOURCE 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace HPHP { +/////////////////////////////////////////////////////////////////////////////// + +IMPLEMENT_THREAD_LOCAL_NO_CHECK(HardwareCounter, + HardwareCounter::s_counter); + +static bool s_recordSubprocessTimes = false; +static bool s_profileHWEnable; +static std::string s_profileHWEvents; + +static inline bool useCounters() { +#ifdef VALGRIND + return false; +#else + return s_profileHWEnable; +#endif +} + +class HardwareCounterImpl { +public: + HardwareCounterImpl(int type, unsigned long config, + const char* desc = nullptr) + : m_desc(desc ? desc : ""), m_err(0), m_fd(-1), inited(false) { + memset (&pe, 0, sizeof (struct perf_event_attr)); + pe.type = type; + pe.size = sizeof (struct perf_event_attr); + pe.config = config; + pe.inherit = s_recordSubprocessTimes; + pe.disabled = 1; + pe.pinned = 0; + pe.exclude_kernel = 0; + pe.exclude_hv = 1; + pe.read_format = + PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING; + } + + ~HardwareCounterImpl() { + close(); + } + + void init_if_not() { + /* + * perf_event_open(struct perf_event_attr *hw_event_uptr, pid_t pid, + * int cpu, int group_fd, unsigned long flags) + */ + if (inited) return; + inited = true; + m_fd = syscall(__NR_perf_event_open, &pe, 0, -1, -1, 0); + if (m_fd < 0) { + Logger::Verbose("perf_event_open failed with: %s", + folly::errnoStr(errno).c_str()); + m_err = -1; + return; + } + if (ioctl(m_fd, PERF_EVENT_IOC_ENABLE, 0) < 0) { + Logger::Warning("perf_event failed to enable: %s", + folly::errnoStr(errno).c_str()); + close(); + m_err = -1; + return; + } + reset(); + } + + int64_t read() { + uint64_t values[3]; + if (readRaw(values)) { + if (!values[2]) return 0; + int64_t value = (double)values[0] * values[1] / values[2]; + return value + extra; + } + return 0; + } + + void incCount(int64_t amount) { + extra += amount; + } + + bool readRaw(uint64_t* values) { + if (m_err || !useCounters()) return false; + init_if_not(); + + if (m_fd > 0) { + /* + * read the count + scaling values + * + * It is not necessary to stop an event to read its value + */ + auto ret = ::read(m_fd, values, sizeof(*values) * 3); + if (ret == sizeof(*values) * 3) { + values[0] -= reset_values[0]; + values[1] -= reset_values[1]; + values[2] -= reset_values[2]; + return true; + } + } + return false; + } + + void reset() { + if (m_err || !useCounters()) return; + init_if_not(); + extra = 0; + if (m_fd > 0) { + if (ioctl (m_fd, PERF_EVENT_IOC_RESET, 0) < 0) { + Logger::Warning("perf_event failed to reset with: %s", + folly::errnoStr(errno).c_str()); + m_err = -1; + return; + } + auto ret = ::read(m_fd, reset_values, sizeof(reset_values)); + if (ret != sizeof(reset_values)) { + Logger::Warning("perf_event failed to reset with: %s", + folly::errnoStr(errno).c_str()); + m_err = -1; + return; + } + } + } + +public: + std::string m_desc; + int m_err; +private: + int m_fd; + struct perf_event_attr pe; + bool inited; + uint64_t reset_values[3]; + uint64_t extra{0}; + + void close() { + if (m_fd > 0) { + ::close(m_fd); + m_fd = -1; + } + } +}; + +class InstructionCounter : public HardwareCounterImpl { +public: + InstructionCounter() : + HardwareCounterImpl(PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS) {} +}; + +class LoadCounter : public HardwareCounterImpl { +public: + LoadCounter() : + HardwareCounterImpl(PERF_TYPE_HW_CACHE, + (PERF_COUNT_HW_CACHE_L1D | ((PERF_COUNT_HW_CACHE_OP_READ) << 8))) {} +}; + +class StoreCounter : public HardwareCounterImpl { +public: + StoreCounter() : + HardwareCounterImpl(PERF_TYPE_HW_CACHE, + PERF_COUNT_HW_CACHE_L1D | ((PERF_COUNT_HW_CACHE_OP_WRITE) << 8)) {} +}; + +HardwareCounter::HardwareCounter() + : m_countersSet(false) { + m_instructionCounter.reset(new InstructionCounter()); + if (s_profileHWEvents.empty()) { + m_loadCounter.reset(new LoadCounter()); + m_storeCounter.reset(new StoreCounter()); + } else { + m_countersSet = true; + setPerfEvents(s_profileHWEvents); + } +} + +HardwareCounter::~HardwareCounter() { +} + +void HardwareCounter::Init(bool enable, const std::string& events, + bool subProc) { + s_profileHWEnable = enable; + s_profileHWEvents = events; + s_recordSubprocessTimes = subProc; +} + +void HardwareCounter::Reset() { + s_counter->reset(); +} + +void HardwareCounter::reset() { + m_instructionCounter->reset(); + if (!m_countersSet) { + m_storeCounter->reset(); + m_loadCounter->reset(); + } + for (unsigned i = 0; i < m_counters.size(); i++) { + m_counters[i]->reset(); + } +} + +int64_t HardwareCounter::GetInstructionCount() { + return s_counter->getInstructionCount(); +} + +int64_t HardwareCounter::getInstructionCount() { + return m_instructionCounter->read(); +} + +int64_t HardwareCounter::GetLoadCount() { + return s_counter->getLoadCount(); +} + +int64_t HardwareCounter::getLoadCount() { + return m_loadCounter->read(); +} + +int64_t HardwareCounter::GetStoreCount() { + return s_counter->getStoreCount(); +} + +int64_t HardwareCounter::getStoreCount() { + return m_storeCounter->read(); +} + +void HardwareCounter::IncInstructionCount(int64_t amount) { + s_counter->m_instructionCounter->incCount(amount); +} + +void HardwareCounter::IncLoadCount(int64_t amount) { + if (!s_counter->m_countersSet) { + s_counter->m_loadCounter->incCount(amount); + } +} + +void HardwareCounter::IncStoreCount(int64_t amount) { + if (!s_counter->m_countersSet) { + s_counter->m_storeCounter->incCount(amount); + } +} + +struct PerfTable perfTable[] = { + /* PERF_TYPE_HARDWARE events */ +#define PC(n) PERF_TYPE_HARDWARE, PERF_COUNT_HW_ ## n + { "cpu-cycles", PC(CPU_CYCLES) }, + { "cycles", PC(CPU_CYCLES) }, + { "instructions", PC(INSTRUCTIONS) }, + { "cache-references", PC(CACHE_REFERENCES) }, + { "cache-misses", PC(CACHE_MISSES) }, + { "branch-instructions", PC(BRANCH_INSTRUCTIONS) }, + { "branches", PC(BRANCH_INSTRUCTIONS) }, + { "branch-misses", PC(BRANCH_MISSES) }, + { "bus-cycles", PC(BUS_CYCLES) }, + { "stalled-cycles-frontend", PC(STALLED_CYCLES_FRONTEND) }, + { "stalled-cycles-backend", PC(STALLED_CYCLES_BACKEND) }, + + /* PERF_TYPE_HW_CACHE hw_cache_id */ +#define PCC(n) PERF_TYPE_HW_CACHE, PERF_COUNT_HW_CACHE_ ## n + { "L1-dcache-", PCC(L1D) }, + { "L1-icache-", PCC(L1I) }, + { "LLC-", PCC(LL) }, + { "dTLB-", PCC(DTLB) }, + { "iTLB-", PCC(ITLB) }, + { "branch-", PCC(BPU) }, + + /* PERF_TYPE_HW_CACHE hw_cache_op, hw_cache_result */ +#define PCCO(n, m) PERF_TYPE_HW_CACHE, \ + ((PERF_COUNT_HW_CACHE_OP_ ## n) << 8 | \ + (PERF_COUNT_HW_CACHE_RESULT_ ## m) << 16) + { "loads", PCCO(READ, ACCESS) }, + { "load-misses", PCCO(READ, MISS) }, + { "stores", PCCO(WRITE, ACCESS) }, + { "store-misses", PCCO(WRITE, MISS) }, + { "prefetches", PCCO(PREFETCH, ACCESS) }, + { "prefetch-misses", PCCO(PREFETCH, MISS) } +}; + +static int findEvent(const char *event, struct PerfTable *t, + int len, int *match_len) { + int i; + + for (i = 0; i < len; i++) { + if (!strncmp(event, t[i].name, strlen(t[i].name))) { + *match_len = strlen(t[i].name); + return i; + } + } + return -1; +} + +#define CPUID_STEPPING(x) ((x) & 0xf) +#define CPUID_MODEL(x) (((x) & 0xf0) >> 4) +#define CPUID_FAMILY(x) (((x) & 0xf00) >> 8) +#define CPUID_TYPE(x) (((x) & 0x3000) >> 12) + +// hack to get LLC counters on perflab frc machines +static bool isIntelE5_2670() { +#ifdef __x86_64__ + unsigned long x; + asm volatile ("cpuid" : "=a"(x): "a"(1) : "ebx", "ecx", "edx"); + return CPUID_STEPPING(x) == 6 && CPUID_MODEL(x) == 0xd + && CPUID_FAMILY(x) == 6 && CPUID_TYPE(x) == 0; +#else + return false; +#endif +} + +static void checkLLCHack(const char* event, uint32_t& type, uint64_t& config) { + if (!strncmp(event, "LLC-load", 8) && isIntelE5_2670()) { + type = PERF_TYPE_RAW; + if (!strncmp(&event[4], "loads", 5)) { + config = 0x534f2e; + } else if (!strncmp(&event[4], "load-misses", 11)) { + config = 0x53412e; + } + } +} + +bool HardwareCounter::addPerfEvent(const char* event) { + uint32_t type = 0; + uint64_t config = 0; + int i, match_len; + bool found = false; + const char* ev = event; + + while ((i = findEvent(ev, perfTable, + sizeof(perfTable)/sizeof(struct PerfTable), + &match_len)) + != -1) { + if (!found) { + found = true; + type = perfTable[i].type; + } else if (type != perfTable[i].type) { + Logger::Warning("failed to find perf event: %s", event); + return false; + } + config |= perfTable[i].config; + ev = &ev[match_len]; + } + + checkLLCHack(event, type, config); + + // Check if we have a raw spec. + if (!found && event[0] == 'r' && event[1] != 0) { + config = strtoull(event + 1, const_cast(&ev), 16); + if (*ev == 0) { + found = true; + type = PERF_TYPE_RAW; + } + } + + if (!found || *ev) { + Logger::Warning("failed to find perf event: %s", event); + return false; + } + auto hwc = folly::make_unique(type, config, event); + if (hwc->m_err) { + Logger::Warning("failed to set perf event: %s", event); + return false; + } + m_counters.emplace_back(std::move(hwc)); + if (!m_countersSet) { + // reset load and store counters. This is because + // perf does not seem to handle more than three counters + // very well. + m_loadCounter.reset(); + m_storeCounter.reset(); + m_countersSet = true; + } + return true; +} + +bool HardwareCounter::eventExists(const char *event) { + // hopefully m_counters set is small, so a linear scan does not hurt + for(unsigned i = 0; i < m_counters.size(); i++) { + if (!strcmp(event, m_counters[i]->m_desc.c_str())) { + return true; + } + } + return false; +} + +bool HardwareCounter::setPerfEvents(folly::StringPiece sevents) { + // Make a copy of the string for use with strtok. + auto const sevents_buf = static_cast(malloc(sevents.size() + 1)); + SCOPE_EXIT { free(sevents_buf); }; + memcpy(sevents_buf, sevents.data(), sevents.size()); + sevents_buf[sevents.size()] = '\0'; + + char* strtok_buf = nullptr; + char* s = strtok_r(sevents_buf, ",", &strtok_buf); + while (s) { + if (!eventExists(s) && !addPerfEvent(s)) { + return false; + } + s = strtok_r(nullptr, ",", &strtok_buf); + } + return true; +} + +bool HardwareCounter::SetPerfEvents(folly::StringPiece events) { + return s_counter->setPerfEvents(events); +} + +void HardwareCounter::clearPerfEvents() { + m_counters.clear(); +} + +void HardwareCounter::ClearPerfEvents() { + s_counter->clearPerfEvents(); +} + +const std::string + s_instructions("instructions"), + s_loads("loads"), + s_stores("stores"); + +void HardwareCounter::getPerfEvents(PerfEventCallback f, void* data) { + f(s_instructions, getInstructionCount(), data); + if (!m_countersSet) { + f(s_loads, getLoadCount(), data); + f(s_stores, getStoreCount(), data); + } + for (unsigned i = 0; i < m_counters.size(); i++) { + f(m_counters[i]->m_desc, m_counters[i]->read(), data); + } +} + +void HardwareCounter::GetPerfEvents(PerfEventCallback f, void* data) { + s_counter->getPerfEvents(f, data); +} + +/////////////////////////////////////////////////////////////////////////////// +} + + +#else // NO_HARDWARE_COUNTERS + +namespace HPHP { +/////////////////////////////////////////////////////////////////////////////// + +HardwareCounter HardwareCounter::s_counter; + +/////////////////////////////////////////////////////////////////////////////// +} + +#endif // NO_HARDWARE_COUNTERS diff --git a/scripts/perf-counters/src/hardware-counter.h b/scripts/perf-counters/src/hardware-counter.h new file mode 100644 index 0000000000..7a92dcc5ae --- /dev/null +++ b/scripts/perf-counters/src/hardware-counter.h @@ -0,0 +1,118 @@ +/* + +----------------------------------------------------------------------+ + | HipHop for PHP | + +----------------------------------------------------------------------+ + | Copyright (c) 2010-2015 Facebook, Inc. (http://www.facebook.com) | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ +*/ + +#ifndef incl_HPHP_UTIL_HARDWARE_COUNTER_H_ +#define incl_HPHP_UTIL_HARDWARE_COUNTER_H_ + +#include "hphp/util/thread-local.h" + +#include + +#include +#include + +namespace HPHP { +/////////////////////////////////////////////////////////////////////////////// + +#ifndef NO_HARDWARE_COUNTERS + +class InstructionCounter; +class LoadCounter; +class StoreCounter; + +struct PerfTable { + const char* name; + uint32_t type; + uint64_t config; +}; + +class HardwareCounterImpl; + +class HardwareCounter { +public: + HardwareCounter(); + ~HardwareCounter(); + + static void Reset(); + static int64_t GetInstructionCount(); + static int64_t GetLoadCount(); + static int64_t GetStoreCount(); + static bool SetPerfEvents(folly::StringPiece events); + static void IncInstructionCount(int64_t amount); + static void IncLoadCount(int64_t amount); + static void IncStoreCount(int64_t amount); + + typedef void (*PerfEventCallback)(const std::string&, int64_t, void*); + static void GetPerfEvents(PerfEventCallback f, void* data); + static void ClearPerfEvents(); + static void Init(bool enable, const std::string& events, bool subProc); + static DECLARE_THREAD_LOCAL_NO_CHECK(HardwareCounter, s_counter); + bool m_countersSet{false}; +private: + void reset(); + int64_t getInstructionCount(); + int64_t getLoadCount(); + int64_t getStoreCount(); + bool eventExists(const char* event); + bool addPerfEvent(const char* event); + bool setPerfEvents(folly::StringPiece events); + void getPerfEvents(PerfEventCallback f, void* data); + void clearPerfEvents(); + + std::unique_ptr m_instructionCounter; + std::unique_ptr m_loadCounter; + std::unique_ptr m_storeCounter; + std::vector> m_counters; +}; + +#else // NO_HARDWARE_COUNTERS + +/* Stub implementation for platforms without hardware counters (non-linux) + * This mock class pretends to track performance events, but just returns + * static values, so it doesn't even need to worry about thread safety + * for the one static instance of itself. + */ +class HardwareCounter { +public: + HardwareCounter() : m_countersSet(false) { } + ~HardwareCounter() { } + + static void Reset() { } + static int64_t GetInstructionCount() { return 0; } + static int64_t GetLoadCount() { return 0; } + static int64_t GetStoreCount() { return 0; } + static bool SetPerfEvents(folly::StringPiece events) { return false; } + static void IncInstructionCount(int64_t amount) {} + static void IncLoadCount(int64_t amount) {} + static void IncStoreCount(int64_t amount) {} + typedef void (*PerfEventCallback)(const std::string&, int64_t, void*); + static void GetPerfEvents(PerfEventCallback f, void* data) { } + static void ClearPerfEvents() { } + static void Init(bool enable, const std::string& events, bool subProc) {} + + // Normally exposed by DECLARE_THREAD_LOCAL_NO_CHECK + void getCheck() { } + void destroy() { } + static HardwareCounter s_counter; + bool m_countersSet; +}; + +#endif // NO_HARDWARE_COUNTERS + +/////////////////////////////////////////////////////////////////////////////// +} + +#endif diff --git a/scripts/perf-counters/src/portability.h b/scripts/perf-counters/src/portability.h new file mode 100644 index 0000000000..26b4e1b1b0 --- /dev/null +++ b/scripts/perf-counters/src/portability.h @@ -0,0 +1,187 @@ +/* + +----------------------------------------------------------------------+ + | HipHop for PHP | + +----------------------------------------------------------------------+ + | Copyright (c) 2010-2015 Facebook, Inc. (http://www.facebook.com) | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ +*/ +#ifndef incl_HPHP_PORTABILITY_H_ +#define incl_HPHP_PORTABILITY_H_ + +#include // defining LIKELY/UNLIKELY is part of this header +#include +#include // defining FOLLY_DISABLE_ADDRESS_SANITIZER + +////////////////////////////////////////////////////////////////////// + +/* + * Various macros to make certain things conditional on either + * compiler or architecture. + * + * Currently we don't *really* compile on anything other than gcc or + * sometimes clang, and there are some parts of the code using + * __attribute__ stuff directly, but some things go through these + * macros to make it maybe easier to change later. + */ + +////////////////////////////////////////////////////////////////////// + +// TODO: does clang define __GNUC__ ? +#ifndef __GNUC__ +# define __attribute__(x) +#endif + +////////////////////////////////////////////////////////////////////// + +#ifdef ATTRIBUTE_UNUSED +# undef ATTRIBUTE_UNUSED +#endif +#ifdef ATTRIBUTE_NORETURN +# undef ATTRIBUTE_NORETURN +#endif +#ifdef ATTRIBUTE_PRINTF +# undef ATTRIBUTE_PRINTF +#endif +#ifdef ATTRIBUTE_PRINTF_STRING +# undef ATTRIBUTE_PRINTF_STRING +#endif + +#define ATTRIBUTE_PRINTF_STRING FOLLY_PRINTF_FORMAT + +#ifdef _MSC_VER +#define ATTRIBUTE_NORETURN __declspec(noreturn) +#define ATTRIBUTE_PRINTF(a1, a2) +#ifndef __thread +# define __thread __declspec(thread) +#endif +#define ATTRIBUTE_UNUSED + +#define ALWAYS_INLINE __forceinline +#define EXTERNALLY_VISIBLE +#define FLATTEN +#define NEVER_INLINE __declspec(noinline) +#define UNUSED +#else +#define ATTRIBUTE_NORETURN __attribute__((__noreturn__)) +#define ATTRIBUTE_PRINTF(a1, a2) \ + __attribute__((__format__ (__printf__, a1, a2))) +#define ATTRIBUTE_UNUSED __attribute__((__unused__)) + +#define ALWAYS_INLINE inline __attribute__((__always_inline__)) +#define EXTERNALLY_VISIBLE __attribute__((__externally_visible__)) +#define FLATTEN __attribute__((__flatten__)) +#define NEVER_INLINE __attribute__((__noinline__)) +#define UNUSED __attribute__((__unused__)) +#endif + +#ifdef DEBUG +# define DEBUG_ONLY /* nop */ +#else +# define DEBUG_ONLY UNUSED +#endif + +/* + * We need to keep some unreferenced functions from being removed by + * the linker. There is no compile time mechanism for doing this, but + * by putting them in the same section as some other, referenced function + * in the same file, we can keep them around. + * + * So this macro should be used to mark at least one function that is + * referenced, and other functions that are not referenced in the same + * file. + * + * Note: this may not work properly with LTO. We'll revisit when/if we + * move to it. + */ +#ifndef __APPLE__ +# define KEEP_SECTION \ + __attribute__((__section__(".text.keep"))) +#else +# define KEEP_SECTION \ + __attribute__((__section__(".text,.text.keep"))) +#endif + +#if defined(__APPLE__) +// OS X has a macro "isset" defined in this header. Force the include so we can +// make sure the macro gets undef'd. (I think this also applies to BSD, but we +// can cross that road when we come to it.) +# include +# ifdef isset +# undef isset +# endif +#endif + +////////////////////////////////////////////////////////////////////// + +#if defined(__x86_64__) + +# if defined(__clang__) +# define DECLARE_FRAME_POINTER(fp) \ + ActRec* fp; \ + asm volatile("mov %%rbp, %0" : "=r" (fp) ::) +# else +# define DECLARE_FRAME_POINTER(fp) register ActRec* fp asm("rbp"); +# endif + +#elif defined(_M_X64) + +// TODO: FIXME! Without this implemented properly, the JIT +// will fail "pretty spectacularly". +# define DECLARE_FRAME_POINTER(fp) \ + always_assert(false); \ + register ActRec* fp = nullptr; + +#elif defined(__AARCH64EL__) + +# if defined(__clang__) +# error Clang implementation not done for ARM +# endif +# define DECLARE_FRAME_POINTER(fp) register ActRec* fp asm("x29"); + +#elif defined(__powerpc64__) + +# if defined(__clang__) +# error Clang implementation not done for PPC64 +# endif +# define DECLARE_FRAME_POINTER(fp) register ActRec* fp = (ActRec*) __builtin_frame_address(0); + +#else + +# error What are the stack and frame pointers called on your architecture? + +#endif + +////////////////////////////////////////////////////////////////////// + +// We reserve the exit status 127 to signal a failure in the +// interpreter. 127 is a valid exit code on all reasonable +// architectures: POSIX requires at least 8 unsigned bits and +// Windows 32 signed bits. +#define HPHP_EXIT_FAILURE 127 + +////////////////////////////////////////////////////////////////////// + +#if FACEBOOK +// Linking in libbfd is a gigantic PITA. If you want this yourself in a non-FB +// build, feel free to define HAVE_LIBBFD and specify the right options to link +// in libbfd.a in the extra C++ options. +#define HAVE_LIBBFD 1 +#endif + +#ifndef PACKAGE +// The value doesn't matter, but it must be defined before you include +// bfd.h +#define PACKAGE "hhvm" +#endif + +////////////////////////////////////////////////////////////////////// + +#endif diff --git a/scripts/perf-counters/src/thread-local.cpp b/scripts/perf-counters/src/thread-local.cpp new file mode 100644 index 0000000000..5efac97231 --- /dev/null +++ b/scripts/perf-counters/src/thread-local.cpp @@ -0,0 +1,108 @@ +/* + +----------------------------------------------------------------------+ + | HipHop for PHP | + +----------------------------------------------------------------------+ + | Copyright (c) 2010-2015 Facebook, Inc. (http://www.facebook.com) | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ +*/ + +#include "hphp/util/thread-local.h" + +#ifdef __linux__ +#include +#include +#include +extern "C" { +extern int arch_prctl(int, unsigned long*); +} +#endif //__linux__ + +namespace HPHP { + +#ifdef USE_GCC_FAST_TLS + +void ThreadLocalManager::OnThreadExit(void* p) { + auto list = getList(p); + p = list->head; + delete list; + while (p != nullptr) { + auto* pNode = static_cast*>(p); + if (pNode->m_on_thread_exit_fn) { + pNode->m_on_thread_exit_fn(p); + } + p = pNode->m_next; + } +} + +void ThreadLocalManager::PushTop(void* nodePtr, size_t nodeSize) { + auto& node = *static_cast*>(nodePtr); + auto key = GetManager().m_key; + auto list = getList(pthread_getspecific(key)); + if (UNLIKELY(!list)) { + ThreadLocalSetValue(key, list = new ThreadLocalList); + } + node.m_next = list->head; + node.m_size = nodeSize; + list->head = node.m_next; +} + +ThreadLocalManager& ThreadLocalManager::GetManager() { + static ThreadLocalManager m; + return m; +} + +#ifdef __APPLE__ +ThreadLocalManager::ThreadLocalList::ThreadLocalList() { + pthread_t self = pthread_self(); + handler.__routine = ThreadLocalManager::OnThreadExit; + handler.__arg = this; + handler.__next = self->__cleanup_stack; + self->__cleanup_stack = &handler; +} +#endif + +#endif + +#ifdef __linux__ + +static int visit_phdr(dl_phdr_info* info, size_t, void*) { + for (size_t i = 0, n = info->dlpi_phnum; i < n; ++i) { + const auto& hdr = info->dlpi_phdr[i]; + auto addr = info->dlpi_addr + hdr.p_vaddr; + if (addr < 0x100000000LL && hdr.p_type == PT_TLS) { + // found the main thread-local section + assert(int(hdr.p_memsz) == hdr.p_memsz); // ensure no truncation + return hdr.p_memsz; + } + } + return 0; +} + +std::pair getCppTdata() { + uintptr_t addr; + if (!arch_prctl(ARCH_GET_FS, &addr)) { + // fs points to the end of the threadlocal area. + size_t size = dl_iterate_phdr(&visit_phdr, nullptr); + return {(void*)(addr - size), size}; + } + return {nullptr, 0}; +} + +#else + +// how do you find the thread local section on your system? +std::pair getCppTdata() { + return {nullptr, 0}; +} + +#endif //__linux__ + +} diff --git a/scripts/perf-counters/src/thread-local.h b/scripts/perf-counters/src/thread-local.h new file mode 100644 index 0000000000..6f6fbce661 --- /dev/null +++ b/scripts/perf-counters/src/thread-local.h @@ -0,0 +1,765 @@ +/* + +----------------------------------------------------------------------+ + | HipHop for PHP | + +----------------------------------------------------------------------+ + | Copyright (c) 2010-2015 Facebook, Inc. (http://www.facebook.com) | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ +*/ + +#ifndef incl_HPHP_THREAD_LOCAL_H_ +#define incl_HPHP_THREAD_LOCAL_H_ + +#include +#include "hphp/util/exception.h" +#include +#include +#include + +namespace HPHP { + +// return the location of the current thread's tdata section +std::pair getCppTdata(); + +inline uintptr_t tlsBase() { + uintptr_t retval; +#if defined(__x86_64__) + asm ("movq %%fs:0, %0" : "=r" (retval)); +#elif defined(__AARCH64EL__) + // mrs == "move register <-- system" + // tpidr_el0 == "thread process id register for exception level 0" + asm ("mrs %0, tpidr_el0" : "=r" (retval)); +#elif defined (__powerpc64__) + asm ("xor %0,%0,%0\n\t" + "or %0,%0,13\n\t" + : "=r" (retval)); +#elif defined(_M_X64) + retval = (uintptr_t)_readfsbase_u64(); + retval = *(uintptr_t*)(retval + 88); +#else +# error How do you access thread-local storage on this machine? +#endif + return retval; +} + +/////////////////////////////////////////////////////////////////////////////// +// gcc >= 4.3.0 supports the '__thread' keyword for thread locals +// +// Clang seems to have added this feature, or at the very least it is ignoring +// __thread keyword and compiling anyway +// +// On OSX, gcc does emulate TLS but in a manner that invalidates assumptions +// we have made about __thread and makes accessing thread-local variables in a +// JIT-friendly fashion difficult (as the compiler is doing a lot of magic that +// is not contractual or documented that we would need to duplicate in emitted +// code) so for now we're not going to use it. One possibility if we really +// want to do this is to generate functions that access variables of interest +// in ThreadLocal* (all of them are NoCheck right now) and use the bytes of +// gcc's compiled functions to find the values we would need to pass to +// __emutls_get_address. +// +// icc 13.0.0 appears to support it as well but we end up with +// assembler warnings of unknown importance about incorrect section +// types +// +// __thread on cygwin and mingw uses pthreads emulation not native tls so +// the emulation for thread local must be used as well +// +// So we use __thread on gcc, icc and clang, unless we are on OSX. On OSX, we +// use our own emulation. Use the DECLARE_THREAD_LOCAL() and +// IMPLEMENT_THREAD_LOCAL() macros to access either __thread or the emulation +// as appropriate. + +#if !defined(NO_TLS) && \ + !defined(__CYGWIN__) && !defined(__MINGW__) && \ + ((__llvm__ && __clang__) || \ + __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || \ + __INTEL_COMPILER || defined(_MSC_VER)) +#define USE_GCC_FAST_TLS +#endif + +/////////////////////////////////////////////////////////////////////////////// +// helper + +inline void ThreadLocalCheckReturn(int ret, const char *funcName) { + if (ret != 0) { + // This is used from global constructors so the safest thing to do is just + // print to stderr and exit(). + fprintf(stderr, "%s returned %d: %s", funcName, ret, + folly::errnoStr(ret).c_str()); + exit(1); + } +} + +inline void ThreadLocalCreateKey(pthread_key_t *key, void (*del)(void*)) { + int ret = pthread_key_create(key, del); + ThreadLocalCheckReturn(ret, "pthread_key_create"); +} + +inline void ThreadLocalSetValue(pthread_key_t key, const void* value) { + int ret = pthread_setspecific(key, value); + ThreadLocalCheckReturn(ret, "pthread_setspecific"); +} + +#ifdef __APPLE__ +typedef struct __darwin_pthread_handler_rec darwin_pthread_handler; +#endif + +/////////////////////////////////////////////////////////////////////////////// + +/** + * A thread-local object is a "global" object within a thread. This is useful + * for writing apartment-threaded code, where nothing is actually shared + * between different threads (hence no locking) but those variables are not + * on stack in local scope. To use it, just do something like this, + * + * IMPLEMENT_THREAD_LOCAL(MyClass, static_object); + * static_object->data_ = ...; + * static_object->doSomething(); + * + * IMPLEMENT_THREAD_LOCAL(int, static_number); + * int value = *static_number; + * + * So, syntax-wise it's similar to pointers. The type parameter can be a + * primitive types. If it's a class, there has to be a default constructor. + */ + +/////////////////////////////////////////////////////////////////////////////// +#if defined(USE_GCC_FAST_TLS) + +/** + * We keep a linked list of destructors in ThreadLocalManager to be called on + * thread exit. ThreadLocalNode is a node in this list. + */ +template +struct ThreadLocalNode { + T * m_p; + void (*m_on_thread_exit_fn)(void * p); + void * m_next; + size_t m_size; +}; + +struct ThreadLocalManager { + template + static void PushTop(ThreadLocalNode& node) { + PushTop(&node, sizeof(T)); + } + template void scan(F& mark) const; + +private: + static void PushTop(void* node, size_t size); + struct ThreadLocalList { + void* head{nullptr}; +#ifdef __APPLE__ + ThreadLocalList(); + darwin_pthread_handler handler; +#endif + }; + static ThreadLocalList* getList(void* p) { + return static_cast(p); + } + ThreadLocalManager() : m_key(0) { +#ifdef __APPLE__ + ThreadLocalCreateKey(&m_key, nullptr); +#else + ThreadLocalCreateKey(&m_key, ThreadLocalManager::OnThreadExit); +#endif + }; + static void OnThreadExit(void *p); + pthread_key_t m_key; + + static ThreadLocalManager& GetManager(); +}; + +/////////////////////////////////////////////////////////////////////////////// +// ThreadLocal allocates by calling new without parameters and frees by calling +// delete + +template +void ThreadLocalOnThreadExit(void * p) { + ThreadLocalNode * pNode = (ThreadLocalNode*)p; + delete pNode->m_p; + pNode->m_p = nullptr; +} + +/** + * The USE_GCC_FAST_TLS implementation of ThreadLocal is just a lazy-initialized + * pointer wrapper. In this case, we have one ThreadLocal object per thread. + */ +template +struct ThreadLocal { + T *get() const { + if (m_node.m_p == nullptr) { + const_cast*>(this)->create(); + } + return m_node.m_p; + } + + NEVER_INLINE void create(); + + bool isNull() const { return m_node.m_p == nullptr; } + + void destroy() { + delete m_node.m_p; + m_node.m_p = nullptr; + } + + void nullOut() { + m_node.m_p = nullptr; + } + + T *operator->() const { + return get(); + } + + T &operator*() const { + return *get(); + } + + ThreadLocalNode m_node; +}; + +template +void ThreadLocal::create() { + if (m_node.m_on_thread_exit_fn == nullptr) { + m_node.m_on_thread_exit_fn = ThreadLocalOnThreadExit; + ThreadLocalManager::PushTop(m_node); + } + assert(m_node.m_p == nullptr); + m_node.m_p = new T(); +} + +/** + * ThreadLocalNoCheck is a pointer wrapper like ThreadLocal, except that it is + * explicitly initialized with getCheck(), rather than being initialized when + * it is first dereferenced. + */ +template +struct ThreadLocalNoCheck { + NEVER_INLINE T *getCheck() const; + T* getNoCheck() const { + assert(m_node.m_p); + return m_node.m_p; + } + + NEVER_INLINE void create(); + + bool isNull() const { return m_node.m_p == nullptr; } + + void destroy() { + delete m_node.m_p; + m_node.m_p = nullptr; + } + + T *operator->() const { + return getNoCheck(); + } + + T &operator*() const { + return *getNoCheck(); + } + + ThreadLocalNode m_node; +private: + void setNull() { m_node.m_p = nullptr; } +}; + +template +void ThreadLocalNoCheck::create() { + if (m_node.m_on_thread_exit_fn == nullptr) { + m_node.m_on_thread_exit_fn = ThreadLocalOnThreadExit; + ThreadLocalManager::PushTop(m_node); + } + assert(m_node.m_p == nullptr); + m_node.m_p = new T(); +} +template +T *ThreadLocalNoCheck::getCheck() const { + if (m_node.m_p == nullptr) { + const_cast*>(this)->create(); + } + return m_node.m_p; +} + + +/////////////////////////////////////////////////////////////////////////////// +// Singleton thread-local storage for T + +template +void ThreadLocalSingletonOnThreadExit(void *obj) { + T::OnThreadExit((T*)obj); +} + +// ThreadLocalSingleton has NoCheck property +template +class ThreadLocalSingleton { +public: + ThreadLocalSingleton() { s_inited = true; } + + NEVER_INLINE static T *getCheck(); + + static T* getNoCheck() { + assert(s_inited); + assert(s_singleton == (T*)&s_storage); + return (T*)&s_storage; + } + + static bool isNull() { return s_singleton == nullptr; } + + static void destroy() { + assert(!s_singleton || s_singleton == (T*)&s_storage); + T* p = s_singleton; + if (p) { + T::Delete(p); + s_singleton = nullptr; + } + } + + T *operator->() const { + return getNoCheck(); + } + + T &operator*() const { + return *getNoCheck(); + } + +private: + static __thread T *s_singleton; + typedef typename std::aligned_storage::type + StorageType; + static __thread StorageType s_storage; + static bool s_inited; // no-fast-TLS requires construction so be consistent +}; + +template +bool ThreadLocalSingleton::s_inited = false; + +template +T *ThreadLocalSingleton::getCheck() { + assert(s_inited); + if (!s_singleton) { + T* p = (T*) &s_storage; + T::Create(p); + s_singleton = p; + } + return s_singleton; +} + +template __thread T *ThreadLocalSingleton::s_singleton; +template __thread typename ThreadLocalSingleton::StorageType + ThreadLocalSingleton::s_storage; + + +/////////////////////////////////////////////////////////////////////////////// +// some classes don't need new/delete at all + +template +struct ThreadLocalProxy { + T *get() const { + if (m_p == nullptr && throwOnNull) { + throw Exception("ThreadLocalProxy::get() called before set()"); + } + return m_p; + } + + void set(T* obj) { + m_p = obj; + } + + bool isNull() const { return m_p == nullptr; } + + void destroy() { + m_p = nullptr; + } + + T *operator->() const { + return get(); + } + + T &operator*() const { + return *get(); + } + + T * m_p; +}; + +/* + * How to use the thread-local macros: + * + * Use DECLARE_THREAD_LOCAL to declare a *static* class field as thread local: + * class SomeClass { + * static DECLARE_THREAD_LOCAL(SomeFieldType, f); + * } + * + * Use IMPLEMENT_THREAD_LOCAL in the cpp file to implement the field: + * IMPLEMENT_THREAD_LOCAL(SomeFieldType, SomeClass::f); + * + * Remember: *Never* write IMPLEMENT_THREAD_LOCAL in a header file. + */ + +#define DECLARE_THREAD_LOCAL(T, f) \ + __thread HPHP::ThreadLocal f +#define IMPLEMENT_THREAD_LOCAL(T, f) \ + __thread HPHP::ThreadLocal f + +#define DECLARE_THREAD_LOCAL_NO_CHECK(T, f) \ + __thread HPHP::ThreadLocalNoCheck f +#define IMPLEMENT_THREAD_LOCAL_NO_CHECK(T, f) \ + __thread HPHP::ThreadLocalNoCheck f + +#define DECLARE_THREAD_LOCAL_PROXY(T, N, f) \ + __thread HPHP::ThreadLocalProxy f +#define IMPLEMENT_THREAD_LOCAL_PROXY(T, N, f) \ + __thread HPHP::ThreadLocalProxy f + +#else /* USE_GCC_FAST_TLS */ + +/////////////////////////////////////////////////////////////////////////////// +// ThreadLocal allocates by calling new() without parameters + +template +void ThreadLocalOnThreadExit(void *p) { + delete (T*)p; +} + +#ifdef __APPLE__ +// The __thread variables in class T will be freed when pthread calls +// the destructor function on Mac. We can register a handler in +// pthread_t->__cleanup_stack similar to pthread_cleanup_push(). The handler +// will be called earlier so the __thread variables will still exist in the +// handler when the thread exits. +// +// See the details at: +// https://github.com/facebook/hhvm/issues/4444#issuecomment-92497582 +typedef struct __darwin_pthread_handler_rec darwin_pthread_handler; + +template +void ThreadLocalOnThreadCleanup(void *key) { + void *obj = pthread_getspecific((pthread_key_t)key); + if (obj) { + ThreadLocalOnThreadExit(obj); + } +} + +inline void ThreadLocalSetCleanupHandler(pthread_key_t cleanup_key, + pthread_key_t key, + void (*del)(void*)) { + // Prevent from adding the handler for multiple times. + darwin_pthread_handler *handler = + (darwin_pthread_handler*)pthread_getspecific(cleanup_key); + if (handler) + return; + + pthread_t self = pthread_self(); + + handler = new darwin_pthread_handler(); + handler->__routine = del; + handler->__arg = (void*)key; + handler->__next = self->__cleanup_stack; + self->__cleanup_stack = handler; + + ThreadLocalSetValue(cleanup_key, handler); +} +#endif + +/** + * This is the emulation version of ThreadLocal. In this case, the ThreadLocal + * object is a true global, and the get() method returns a thread-dependent + * pointer from pthread's thread-specific data management. + */ +template +class ThreadLocal { +public: + /** + * Constructor that has to be called from a thread-neutral place. + */ + ThreadLocal() : m_key(0) { +#ifdef __APPLE__ + ThreadLocalCreateKey(&m_key, nullptr); + ThreadLocalCreateKey(&m_cleanup_key, + ThreadLocalOnThreadExit); +#else + ThreadLocalCreateKey(&m_key, ThreadLocalOnThreadExit); +#endif + } + + T *get() const { + T *obj = (T*)pthread_getspecific(m_key); + if (obj == nullptr) { + obj = new T(); + ThreadLocalSetValue(m_key, obj); +#ifdef __APPLE__ + ThreadLocalSetCleanupHandler(m_cleanup_key, m_key, + ThreadLocalOnThreadCleanup); +#endif + } + return obj; + } + + bool isNull() const { return pthread_getspecific(m_key) == nullptr; } + + void destroy() { + delete (T*)pthread_getspecific(m_key); + ThreadLocalSetValue(m_key, nullptr); + } + + void nullOut() { + ThreadLocalSetValue(m_key, nullptr); + } + + /** + * Access object's member or method through this operator overload. + */ + T *operator->() const { + return get(); + } + + T &operator*() const { + return *get(); + } + +private: + pthread_key_t m_key; + +#ifdef __APPLE__ + pthread_key_t m_cleanup_key; +#endif +}; + +template +class ThreadLocalNoCheck { +public: + /** + * Constructor that has to be called from a thread-neutral place. + */ + ThreadLocalNoCheck() : m_key(0) { +#ifdef __APPLE__ + ThreadLocalCreateKey(&m_key, nullptr); + ThreadLocalCreateKey(&m_cleanup_key, + ThreadLocalOnThreadExit); +#else + ThreadLocalCreateKey(&m_key, ThreadLocalOnThreadExit); +#endif + } + + NEVER_INLINE T *getCheck() const; + + T* getNoCheck() const { + T *obj = (T*)pthread_getspecific(m_key); + assert(obj); + return obj; + } + + bool isNull() const { return pthread_getspecific(m_key) == nullptr; } + + void destroy() { + delete (T*)pthread_getspecific(m_key); + ThreadLocalSetValue(m_key, nullptr); + } + + /** + * Access object's member or method through this operator overload. + */ + T *operator->() const { + return getNoCheck(); + } + + T &operator*() const { + return *getNoCheck(); + } + +public: + void setNull() { ThreadLocalSetValue(m_key, nullptr); } + pthread_key_t m_key; + +#ifdef __APPLE__ + pthread_key_t m_cleanup_key; +#endif +}; + +template +T *ThreadLocalNoCheck::getCheck() const { + T *obj = (T*)pthread_getspecific(m_key); + if (obj == nullptr) { + obj = new T(); + ThreadLocalSetValue(m_key, obj); +#ifdef __APPLE__ + ThreadLocalSetCleanupHandler(m_cleanup_key, m_key, + ThreadLocalOnThreadCleanup); +#endif + } + return obj; +} + +/////////////////////////////////////////////////////////////////////////////// +// Singleton thread-local storage for T + +template +void ThreadLocalSingletonOnThreadExit(void *obj) { + T::OnThreadExit((T*)obj); + free(obj); +} + +#ifdef __APPLE__ +template +void ThreadLocalSingletonOnThreadCleanup(void *key) { + void *obj = pthread_getspecific((pthread_key_t)key); + if (obj) { + ThreadLocalSingletonOnThreadExit(obj); + } +} +#endif + +// ThreadLocalSingleton has NoCheck property +template +class ThreadLocalSingleton { +public: + ThreadLocalSingleton() { getKey(); } + + NEVER_INLINE static T *getCheck(); + static T* getNoCheck() { + assert(s_inited); + T *obj = (T*)pthread_getspecific(s_key); + assert(obj); + return obj; + } + + static bool isNull() { + return !s_inited || pthread_getspecific(s_key) == nullptr; + } + + static void destroy() { + void* p = pthread_getspecific(s_key); + T::Delete((T*)p); + free(p); + ThreadLocalSetValue(s_key, nullptr); + } + + T *operator->() const { + return getNoCheck(); + } + + T &operator*() const { + return *getNoCheck(); + } + +private: + static pthread_key_t s_key; + static bool s_inited; // pthread_key_t has no portable valid sentinel + +#ifdef __APPLE__ + static pthread_key_t s_cleanup_key; +#endif + + static pthread_key_t getKey() { + if (!s_inited) { + s_inited = true; +#ifdef __APPLE__ + ThreadLocalCreateKey(&s_key, nullptr); + ThreadLocalCreateKey(&s_cleanup_key, + ThreadLocalOnThreadExit); +#else + ThreadLocalCreateKey(&s_key, ThreadLocalSingletonOnThreadExit); +#endif + } + return s_key; + } +}; + +template +T *ThreadLocalSingleton::getCheck() { + assert(s_inited); + T *obj = (T*)pthread_getspecific(s_key); + if (obj == nullptr) { + obj = (T*)malloc(sizeof(T)); + T::Create(obj); + ThreadLocalSetValue(s_key, obj); +#ifdef __APPLE__ + ThreadLocalSetCleanupHandler(s_cleanup_key, s_key, + ThreadLocalSingletonOnThreadCleanup); +#endif + } + return obj; +} + +template +pthread_key_t ThreadLocalSingleton::s_key; +template +bool ThreadLocalSingleton::s_inited = false; + +#ifdef __APPLE__ +template +pthread_key_t ThreadLocalSingleton::s_cleanup_key; +#endif + +/////////////////////////////////////////////////////////////////////////////// +// some classes don't need new/delete at all + +template +class ThreadLocalProxy { +public: + /** + * Constructor that has to be called from a thread-neutral place. + */ + ThreadLocalProxy() : m_key(0) { + ThreadLocalCreateKey(&m_key, nullptr); + } + + T *get() const { + T *obj = (T*)pthread_getspecific(m_key); + if (obj == nullptr && throwOnNull) { + throw Exception("ThreadLocalProxy::get() called before set()"); + } + return obj; + } + + void set(T* obj) { + ThreadLocalSetValue(m_key, obj); + } + + bool isNull() const { return pthread_getspecific(m_key) == nullptr; } + + void destroy() { + ThreadLocalSetValue(m_key, nullptr); + } + + /** + * Access object's member or method through this operator overload. + */ + T *operator->() const { + return get(); + } + + T &operator*() const { + return *get(); + } + +public: + pthread_key_t m_key; +}; + +/** + * The emulation version of the thread-local macros + */ +#define DECLARE_THREAD_LOCAL(T, f) HPHP::ThreadLocal f +#define IMPLEMENT_THREAD_LOCAL(T, f) HPHP::ThreadLocal f + +#define DECLARE_THREAD_LOCAL_NO_CHECK(T, f) HPHP::ThreadLocalNoCheck f +#define IMPLEMENT_THREAD_LOCAL_NO_CHECK(T, f) HPHP::ThreadLocalNoCheck f + +#define DECLARE_THREAD_LOCAL_PROXY(T, N, f) HPHP::ThreadLocalProxy f +#define IMPLEMENT_THREAD_LOCAL_PROXY(T, N, f) HPHP::ThreadLocalProxy f + +#endif /* USE_GCC_FAST_TLS */ + +/////////////////////////////////////////////////////////////////////////////// +} + +#endif // incl_HPHP_THREAD_LOCAL_H_