diff --git a/c10/core/AllocatorConfig.cpp b/c10/core/AllocatorConfig.cpp new file mode 100644 index 00000000000..9ceb40ccf6d --- /dev/null +++ b/c10/core/AllocatorConfig.cpp @@ -0,0 +1,233 @@ +#include +#include +#include +#include + +namespace c10::CachingAllocator { + +namespace { +constexpr size_t kRoundUpPowerOfTwoIntervals = 16; +constexpr size_t kMB = 1024 * 1024ul; +constexpr size_t kRoundUpPowerOfTwoStart = 1 * kMB; // 1MB +constexpr size_t kRoundUpPowerOfTwoEnd = 64 * 1024ul * kMB; // 64GB +} // anonymous namespace + +AcceleratorAllocatorConfig& AcceleratorAllocatorConfig::instance() { + static AcceleratorAllocatorConfig instance; +#define C10_ALLOCATOR_CONFIG_PARSE_ENV(env, deprecated) \ + auto env##_name = c10::utils::get_env(#env); \ + if (env##_name.has_value()) { \ + if (deprecated) { \ + TORCH_WARN_ONCE(#env " is deprecated, use PYTORCH_ALLOC_CONF instead"); \ + } \ + instance.parseArgs(env##_name.value()); \ + return true; \ + } + static bool env_flag [[maybe_unused]] = []() { + C10_ALLOCATOR_CONFIG_PARSE_ENV(PYTORCH_ALLOC_CONF, false) + // Keep this for backwards compatibility + C10_ALLOCATOR_CONFIG_PARSE_ENV(PYTORCH_CUDA_ALLOC_CONF, /*deprecated=*/true) + C10_ALLOCATOR_CONFIG_PARSE_ENV(PYTORCH_HIP_ALLOC_CONF, /*deprecated=*/true) + return false; + }(); +#undef C10_ALLOCATOR_CONFIG_PARSE_ENV + return instance; +} + +AcceleratorAllocatorConfig::AcceleratorAllocatorConfig() { + roundup_power2_divisions_.assign(kRoundUpPowerOfTwoIntervals, 0); +} + +size_t AcceleratorAllocatorConfig::roundup_power2_divisions(size_t size) { + size_t log_size = (63 - llvm::countLeadingZeros(size)); + + // Our intervals start at 1MB and end at 64GB + const size_t interval_start = + 63 - llvm::countLeadingZeros(kRoundUpPowerOfTwoStart); + const size_t interval_end = + 63 - llvm::countLeadingZeros(kRoundUpPowerOfTwoEnd); + TORCH_CHECK( + interval_end - interval_start == kRoundUpPowerOfTwoIntervals, + "kRoundUpPowerOfTwoIntervals mismatch"); + + size_t index = + (log_size > interval_start) ? (log_size - interval_start) : 0ul; + index = std::min(index, kRoundUpPowerOfTwoIntervals - 1); + return instance().roundup_power2_divisions_[index]; +} + +size_t AcceleratorAllocatorConfig::parseMaxSplitSize( + const ConfigTokenizer& tokenizer, + size_t i) { + tokenizer.checkToken(++i, ":"); + constexpr size_t min_allowed_split_size_mb = kLargeBuffer / kMB; + constexpr size_t max_allowed_split_size_mb = + std::numeric_limits::max() / kMB; + + size_t val_env = tokenizer.toSizeT(++i); + TORCH_CHECK( + val_env >= min_allowed_split_size_mb, + "CachingAllocator option max_split_size_mb too small, must be >= ", + min_allowed_split_size_mb); + val_env = std::min(val_env, max_allowed_split_size_mb); + max_split_size_ = val_env * kMB; + + return i; +} + +size_t AcceleratorAllocatorConfig::parseMaxNonSplitRoundingSize( + const ConfigTokenizer& tokenizer, + size_t i) { + tokenizer.checkToken(++i, ":"); + constexpr size_t min_allowed_split_size_mb = kLargeBuffer / kMB; + constexpr size_t max_allowed_split_size_mb = + std::numeric_limits::max() / kMB; + + size_t val_env = tokenizer.toSizeT(++i); + TORCH_CHECK( + val_env >= min_allowed_split_size_mb, + "CachingAllocator option max_non_split_rounding_mb too small, must be >= ", + min_allowed_split_size_mb); + val_env = std::min(val_env, max_allowed_split_size_mb); + max_non_split_rounding_size_ = val_env * kMB; + + return i; +} + +size_t AcceleratorAllocatorConfig::parseGarbageCollectionThreshold( + const ConfigTokenizer& tokenizer, + size_t i) { + tokenizer.checkToken(++i, ":"); + double val_env = tokenizer.toDouble(++i); + TORCH_CHECK( + val_env > 0 && val_env < 1.0, + "garbage_collect_threshold is invalid, set it in (0.0, 1.0)"); + garbage_collection_threshold_ = val_env; + + return i; +} + +size_t AcceleratorAllocatorConfig::parseRoundUpPower2Divisions( + const ConfigTokenizer& tokenizer, + size_t i) { + tokenizer.checkToken(++i, ":"); + bool first_value = true; + + if (tokenizer[++i] == "[") { + size_t last_index = 0; + // NOLINTNEXTLINE(bugprone-inc-dec-in-conditions) + while (++i < tokenizer.size() && tokenizer[i] != "]") { + size_t value_index = i; + tokenizer.checkToken(++i, ":"); + size_t value = tokenizer.toSizeT(++i); + TORCH_CHECK( + value == 0 || llvm::isPowerOf2_64(value), + "For roundups, the divisions has to be power of 2 or 0 to disable roundup "); + + if (tokenizer[value_index] == ">") { + std::fill( + std::next( + roundup_power2_divisions_.begin(), + static_cast::difference_type>( + last_index + 1)), + roundup_power2_divisions_.end(), + value); + } else { + size_t boundary = tokenizer.toSizeT(value_index); + TORCH_CHECK( + llvm::isPowerOf2_64(boundary), + "For roundups, the intervals have to be power of 2 "); + + size_t index = 63 - llvm::countLeadingZeros(boundary); + index = + std::clamp(index, size_t{0}, roundup_power2_divisions_.size() - 1); + + if (first_value) { + std::fill( + roundup_power2_divisions_.begin(), + std::next( + roundup_power2_divisions_.begin(), + static_cast::difference_type>(index)), + value); + first_value = false; + } + roundup_power2_divisions_[index] = value; + last_index = index; + } + + if (tokenizer[i + 1] != "]") { + tokenizer.checkToken(++i, ","); + } + } + TORCH_INTERNAL_ASSERT( + i < tokenizer.size(), + "Expected closing bracket ']' in ConfigTokenizer but reached end of config"); + } else { // Keep this for backwards compatibility + size_t value = tokenizer.toSizeT(i); + TORCH_CHECK( + llvm::isPowerOf2_64(value), + "For roundups, the divisions has to be power of 2 "); + std::fill( + roundup_power2_divisions_.begin(), + roundup_power2_divisions_.end(), + value); + } + return i; +} + +size_t AcceleratorAllocatorConfig::parseExpandableSegments( + const ConfigTokenizer& tokenizer, + size_t i) { + tokenizer.checkToken(++i, ":"); + use_expandable_segments_ = tokenizer.toBool(++i); + + return i; +} + +size_t AcceleratorAllocatorConfig::parsePinnedUseBackgroundThreads( + const ConfigTokenizer& tokenizer, + size_t i) { + tokenizer.checkToken(++i, ":"); + pinned_use_background_threads_ = tokenizer.toBool(++i); + + return i; +} + +void AcceleratorAllocatorConfig::parseArgs(const std::string& env) { + // The following option will be reset to its default value if not explicitly + // set each time. + max_split_size_ = std::numeric_limits::max(); + roundup_power2_divisions_.assign(kRoundUpPowerOfTwoIntervals, 0); + garbage_collection_threshold_ = 0; + + { + std::lock_guard lock(last_allocator_settings_mutex_); + last_allocator_settings_ = env; + } + + ConfigTokenizer tokenizer(env); + for (size_t i = 0; i < tokenizer.size(); i++) { + const auto& key = tokenizer[i]; + if (key == "max_split_size_mb") { + i = parseMaxSplitSize(tokenizer, i); + } else if (key == "max_non_split_rounding_mb") { + i = parseMaxNonSplitRoundingSize(tokenizer, i); + } else if (key == "garbage_collection_threshold") { + i = parseGarbageCollectionThreshold(tokenizer, i); + } else if (key == "roundup_power2_divisions") { + i = parseRoundUpPower2Divisions(tokenizer, i); + } else if (key == "expandable_segments") { + i = parseExpandableSegments(tokenizer, i); + } else if (key == "pinned_use_background_threads") { + i = parsePinnedUseBackgroundThreads(tokenizer, i); + } else { + i = tokenizer.skipKey(i); + } + + if (i + 1 < tokenizer.size()) { + tokenizer.checkToken(++i, ","); + } + } +} + +} // namespace c10::CachingAllocator diff --git a/c10/core/AllocatorConfig.h b/c10/core/AllocatorConfig.h new file mode 100644 index 00000000000..e19160ea597 --- /dev/null +++ b/c10/core/AllocatorConfig.h @@ -0,0 +1,337 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +namespace c10::CachingAllocator { + +// "large" allocations may be packed in 20 MiB blocks +const size_t kLargeBuffer = 20971520; + +// A utility class for tokenizing allocator configuration strings into discrete +// parts. For example, the config string: +// "key1:val1,key2:[val2,val3]" +// is tokenized into: +// "key1", ":", "val1", ",", "key2", ":", "[", "val2", ",", "val3", "]", +// +// Tokens include keys, values, and special characters (':', ',', '[', ']'). +// Whitespace is ignored. +class ConfigTokenizer { + public: + explicit ConfigTokenizer(const std::string& env) { + std::string buffer; + for (char ch : env) { + if (ch == ',' || ch == ':' || ch == '[' || ch == ']') { + if (!buffer.empty()) { + config_.emplace_back(std::move(buffer)); + buffer.clear(); + } + config_.emplace_back(1, ch); + } else if (!std::isspace(static_cast(ch))) { + buffer += ch; + } + } + if (!buffer.empty()) { + config_.emplace_back(std::move(buffer)); + } + } + + const std::string& operator[](size_t i) const { + TORCH_INTERNAL_ASSERT( + i < config_.size(), "Index out of bounds in ConfigTokenizer"); + return config_[i]; + } + + size_t size() const { + return config_.size(); + } + + bool checkToken(size_t i, const std::string& token) const { + checkIndex(i); + return config_[i] == token; + } + + size_t toSizeT(size_t i) const { + checkIndex(i); + return std::stoull(config_[i]); + } + + double toDouble(size_t i) const { + checkIndex(i); + return std::stod(config_[i]); + } + + bool toBool(size_t i) const { + checkIndex(i); + const auto& token = config_[i]; + if (token == "True") { + return true; + } else if (token == "False") { + return false; + } else { + TORCH_CHECK( + false, + "Expected 'True' or 'False' at index ", + i, + " in ConfigTokenizer but got '", + token, + "'"); + } + } + + // Skips the current token group and returns the index of the value token. + // Assumes the current index `i` points to a key name in a key-value pair. + size_t skipKey(size_t i) const { + // Expect a colon after the key + checkToken(++i, ":"); + + ++i; // Move to the value + checkIndex(i); + if (config_[i] != "[") { + // Value is a single token (not a list) -> return its index + return i; + } + + // Skip tokens inside the list until matching ']' + // NOLINTNEXTLINE(bugprone-inc-dec-in-conditions) + while (++i < config_.size() && config_[i] != "]") { + } + + TORCH_INTERNAL_ASSERT( + i < config_.size(), + "Expected closing bracket ']' in ConfigTokenizer but reached end of config"); + + return i; // Return the index of the closing ']' + } + + private: + void checkIndex(size_t i) const { + TORCH_INTERNAL_ASSERT( + i < config_.size(), "Index out of bounds in ConfigTokenizer"); + } + + std::vector config_; +}; + +/** + * Note [AcceleratorAllocatorConfig design] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This class configures memory allocation for both device and host memory. A + * single `AcceleratorAllocatorConfig` instance is shared across all accelerator + * backends, such as CUDA and XPU, under the assumption that relevant + * environment variables apply uniformly to all accelerators. Device-specific + * configuration extensions are supported via hooks (see + * `registerDeviceConfigParserHook`). + * + * Recommended design: + * - Place common configurations in `AcceleratorAllocatorConfig`. + * - Extend backend-specific configurations in corresponding device-specific + * classes, such as `CUDAAllocatorConfig`, etc. + * + * Scope: + * - Configuration options must be environment-variable driven. + * + * Naming Convention: + * - Public API names in `AcceleratorAllocatorConfig` should be device-generic. + * - Members prefixed with `pinned_` are specific to the host/pinned allocator. + * - Environment variable names should be generic across backends. + * - Comma-separated key-value pairs in the format: `key:value`. Use square + * brackets `[]` for list values Example: `key1:123, key2:[val1,val2]` + * + * Environment Variables: + * - The primary environment variable for configuration is `PYTORCH_ALLOC_CONF`. + * - For backward compatibility, `PYTORCH_CUDA_ALLOC_CONF` is also supported + * with lower priority. + */ + +class C10_API AcceleratorAllocatorConfig { + public: + static AcceleratorAllocatorConfig& instance(); + + C10_DISABLE_COPY_AND_ASSIGN(AcceleratorAllocatorConfig); + AcceleratorAllocatorConfig(AcceleratorAllocatorConfig&&) = delete; + AcceleratorAllocatorConfig& operator=(AcceleratorAllocatorConfig&&) = delete; + ~AcceleratorAllocatorConfig() = default; + + /* Device allocator settings */ + + // Returns the maximum block size (in MB) that is allowed to be split. The + // default is unlimited (all blocks can be split). + static size_t max_split_size() { + return instance().max_split_size_; + } + + // Returns the maximum block size (in MB) that is allowed to be rounded up + // without requiring splitting when searching for a free block. The default is + // 20 MiB. + static size_t max_non_split_rounding_size() { + return instance().max_non_split_rounding_size_; + } + + // Return the number of divisions used when rounding up allocation sizes (in + // MB) to the nearest power-of-2 boundary. + static size_t roundup_power2_divisions(size_t size); + + // Returns the vector of division factors used for rounding up allocation + // sizes. These divisions apply to size intervals between 1MB and 64GB. + static std::vector roundup_power2_divisions() { + return instance().roundup_power2_divisions_; + } + + // Returns the threshold that triggers garbage collection when the ratio of + // used memory to maximum allowed memory exceeds this value. The default is 0, + // meaning no garbage collection is triggered. The value should be in the + // range (0.0, 1.0). + static double garbage_collection_threshold() { + return instance().garbage_collection_threshold_; + } + + // Returns whether the expandable segment feature is enabled. This allows the + // allocator to start with one segment that grows as needed, rather than + // creating a new segment for each allocation. Default is false (expandable + // segments disabled). + static bool use_expandable_segments() { + return instance().use_expandable_segments_; + } + + /* Host allocator settings */ + + // Returns whether the pinned host allocator uses background threads for + // processing events. This is useful for improving performance in scenarios + // where many small allocations are made. Default is false (background threads + // disabled). + static bool pinned_use_background_threads() { + return instance().pinned_use_background_threads_; + } + + /* Settings for both device and host allocator */ + + // Returns the current allocator settings as a string. This string is useful + // to expand device-specific allocator configurations + static std::string last_allocator_settings() { + std::lock_guard lock(instance().last_allocator_settings_mutex_); + return instance().last_allocator_settings_; + } + + // Parses the environment variable `env` to update the allocator settings. + // If the environment variable is not set, it does nothing. + // The configuration string should be a comma-separated list of key-value + // pairs, where each key is a configuration option and the value is the + // corresponding setting. For example: + // "max_split_size_mb:100,max_non_split_rounding_mb:20,garbage_collection_threshold:0.5,roundup_power2_divisions:[64:8,256:4,1024:4,>:1],expandable_segments:true,pinned_use_background_threads:true" + void parseArgs(const std::string& env); + + // Registers a device-specific configuration parser hook. This allows + // backends to parse additional device-specific configuration options from the + // environment variable. The hook should be a function that takes a string + // (the environment variable value) and parses it to set device-specific + // configuration options. + // The hook will be called when the environment variable is parsed. + // If a hook is already registered, it will be replaced with the new one. + void registerDeviceConfigParserHook( + std::function hook) { + device_config_parser_hook_ = std::move(hook); + } + + // Calls the registered device-specific configuration parser hook with the + // provided environment string. This allows backends to parse additional + // device-specific configuration options from the environment variable. + // If no hook is registered, this function does nothing. + void callDeviceConfigParserHook(const std::string& env) const { + if (device_config_parser_hook_) { + device_config_parser_hook_(env); + } + } + + private: + AcceleratorAllocatorConfig(); + + /* Internal functions for device allocator */ + + // Parse `max_split_size_mb` from environment variable. + size_t parseMaxSplitSize(const ConfigTokenizer& tokenizer, size_t i); + // Parse `max_non_split_rounding_mb` from environment variable. + size_t parseMaxNonSplitRoundingSize( + const ConfigTokenizer& tokenizer, + size_t i); + // Parse `garbage_collection_threshold` from environment variable. + size_t parseGarbageCollectionThreshold( + const ConfigTokenizer& tokenizer, + size_t i); + // Parse `roundup_power2_divisions` from environment variable. + size_t parseRoundUpPower2Divisions( + const ConfigTokenizer& tokenizer, + size_t i); + // Parse `expandable_segments` from environment variable. + size_t parseExpandableSegments(const ConfigTokenizer& tokenizer, size_t i); + + /* Internal functions for host allocator */ + + // Parse `pinned_use_background_threads` from environment variable. + size_t parsePinnedUseBackgroundThreads( + const ConfigTokenizer& tokenizer, + size_t i); + + /* The following members are specifically used for the device allocator. */ + + // The maximum block size that is allowed to be split. + std::atomic max_split_size_{std::numeric_limits::max()}; + // The maximum allowable extra size of a memory block without requiring + // splitting when searching for a free block. + std::atomic max_non_split_rounding_size_{kLargeBuffer}; + // Used to store how memory allocations of different sizes should be rounded + // up to the nearest power of 2 divisions. + std::vector roundup_power2_divisions_; + // The threshold that triggers garbage collection when the ratio of used + // memory to maximum allowed memory exceeds this value. + std::atomic garbage_collection_threshold_{0}; + // A flag to enable expandable segments feature. + std::atomic use_expandable_segments_{false}; + + /* The following members are specifically used for the host allocator. */ + + // A flag to enable background thread for processing events. + std::atomic pinned_use_background_threads_{false}; + + /* The following members are used for both device and host allocator. */ + + // Record the last allocator config environment setting. + std::mutex last_allocator_settings_mutex_; + std::string last_allocator_settings_; + + // Optional hook for parsing additional device-specific allocator settings. + // This allows backends (e.g., CUDA, XPU) to register a custom parser for + // their own environment configuration extensions. + std::function device_config_parser_hook_{nullptr}; +}; + +C10_API inline void setAllocatorSettings(const std::string& env) { + AcceleratorAllocatorConfig::instance().parseArgs(env); + AcceleratorAllocatorConfig::instance().callDeviceConfigParserHook(env); +} + +C10_API inline std::string getAllocatorSettings() { + return AcceleratorAllocatorConfig::instance().last_allocator_settings(); +} + +struct DeviceConfigParserHookRegistry { + explicit DeviceConfigParserHookRegistry( + std::function hook) { + AcceleratorAllocatorConfig::instance().registerDeviceConfigParserHook( + std::move(hook)); + } +}; + +#define REGISTER_ALLOCATOR_CONFIG_PARSE_HOOK(hook) \ + namespace { \ + static at::CachingAllocator::DeviceConfigParserHookRegistry \ + g_device_config_parse_hook_registry_instance(hook); \ + } + +} // namespace c10::CachingAllocator diff --git a/c10/test/core/AllocatorConfig_test.cpp b/c10/test/core/AllocatorConfig_test.cpp new file mode 100644 index 00000000000..c051cf4cd4a --- /dev/null +++ b/c10/test/core/AllocatorConfig_test.cpp @@ -0,0 +1,123 @@ +#include + +#include + +using namespace c10::CachingAllocator; +constexpr size_t kMB = 1024 * 1024ul; + +struct ExtendedAllocatorConfig { + static ExtendedAllocatorConfig& instance() { + static ExtendedAllocatorConfig instance; + return instance; + } + + // Returns the device-specific option value in bytes. + static size_t device_specific_option() { + return instance().device_specific_option_; + } + + void parseArgs(const std::string& env) { + // Parse device-specific options from the environment variable + ConfigTokenizer tokenizer(env); + for (size_t i = 0; i < tokenizer.size(); i++) { + const auto& key = tokenizer[i]; + if (key == "device_specific_option_mb") { + tokenizer.checkToken(++i, ":"); + device_specific_option_ = tokenizer.toSizeT(++i) * kMB; + } else { + i = tokenizer.skipKey(i); + } + + if (i + 1 < tokenizer.size()) { + tokenizer.checkToken(++i, ","); + } + } + } + + private: + // Device-specific option, e.g., memory limit for a specific device. + std::atomic device_specific_option_{0}; +}; + +REGISTER_ALLOCATOR_CONFIG_PARSE_HOOK([](const std::string& env) { + ExtendedAllocatorConfig::instance().parseArgs(env); +}) + +TEST(AllocatorConfigTest, allocator_config_test) { + std::string env = + "max_split_size_mb:40," + "max_non_split_rounding_mb:30," + "garbage_collection_threshold:0.5," + "roundup_power2_divisions:[64:8,128:2,256:4,512:2,1024:4,>:1]," + "expandable_segments:True," + "pinned_use_background_threads:True," + "device_specific_option_mb:64"; + c10::CachingAllocator::setAllocatorSettings(env); + EXPECT_EQ(c10::CachingAllocator::getAllocatorSettings(), env); + EXPECT_EQ(AcceleratorAllocatorConfig::max_split_size(), 40 * kMB); + EXPECT_EQ( + AcceleratorAllocatorConfig::max_non_split_rounding_size(), 30 * kMB); + EXPECT_EQ(AcceleratorAllocatorConfig::garbage_collection_threshold(), 0.5); + EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(32 * kMB), 8); + EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(64 * kMB), 8); + EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(128 * kMB), 2); + EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(256 * kMB), 4); + EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(512 * kMB), 2); + EXPECT_EQ( + AcceleratorAllocatorConfig::roundup_power2_divisions(1024 * kMB), 4); + EXPECT_EQ( + AcceleratorAllocatorConfig::roundup_power2_divisions(2048 * kMB), 1); + EXPECT_EQ( + AcceleratorAllocatorConfig::roundup_power2_divisions(4096 * kMB), 1); + EXPECT_EQ( + AcceleratorAllocatorConfig::roundup_power2_divisions(8192 * kMB), 1); + EXPECT_EQ(AcceleratorAllocatorConfig::use_expandable_segments(), true); + EXPECT_EQ(AcceleratorAllocatorConfig::pinned_use_background_threads(), true); + EXPECT_EQ(ExtendedAllocatorConfig::device_specific_option(), 64 * kMB); + + env = + "max_split_size_mb:20," + "max_non_split_rounding_mb:40," + "garbage_collection_threshold:0.8"; + c10::CachingAllocator::setAllocatorSettings(env); + EXPECT_EQ(c10::CachingAllocator::getAllocatorSettings(), env); + EXPECT_EQ(AcceleratorAllocatorConfig::max_split_size(), 20 * kMB); + EXPECT_EQ( + AcceleratorAllocatorConfig::max_non_split_rounding_size(), 40 * kMB); + EXPECT_EQ(AcceleratorAllocatorConfig::garbage_collection_threshold(), 0.8); + + // roundup_power2_divisions knob array syntax + env = "roundup_power2_divisions:[128:8,256:16,512:1,2048:8,>:2]"; + c10::CachingAllocator::setAllocatorSettings(env); + EXPECT_EQ(c10::CachingAllocator::getAllocatorSettings(), env); + EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(64 * kMB), 8); + EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(128 * kMB), 8); + EXPECT_EQ( + AcceleratorAllocatorConfig::roundup_power2_divisions(256 * kMB), 16); + EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(512 * kMB), 1); + EXPECT_EQ( + AcceleratorAllocatorConfig::roundup_power2_divisions(1024 * kMB), 0); + EXPECT_EQ( + AcceleratorAllocatorConfig::roundup_power2_divisions(2048 * kMB), 8); + EXPECT_EQ( + AcceleratorAllocatorConfig::roundup_power2_divisions(4096 * kMB), 2); + + // roundup_power2_divisions single value syntax for backward compatibility + env = "roundup_power2_divisions:4"; + c10::CachingAllocator::setAllocatorSettings(env); + EXPECT_EQ(c10::CachingAllocator::getAllocatorSettings(), env); + EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(64 * kMB), 4); + EXPECT_EQ(AcceleratorAllocatorConfig::roundup_power2_divisions(256 * kMB), 4); + EXPECT_EQ( + AcceleratorAllocatorConfig::roundup_power2_divisions(2048 * kMB), 4); + + env = "expandable_segments:False,"; + c10::CachingAllocator::setAllocatorSettings(env); + EXPECT_EQ(c10::CachingAllocator::getAllocatorSettings(), env); + EXPECT_EQ(AcceleratorAllocatorConfig::use_expandable_segments(), false); + + env = "pinned_use_background_threads:False"; + c10::CachingAllocator::setAllocatorSettings(env); + EXPECT_EQ(c10::CachingAllocator::getAllocatorSettings(), env); + EXPECT_EQ(AcceleratorAllocatorConfig::pinned_use_background_threads(), false); +}