Fix clang-tidy warnings on c10/xpu files (#169231)

This PR fixes clang-tidy warnings on c10/xpu files. Pull Request resolved: https://github.com/pytorch/pytorch/pull/169231 Approved by: https://github.com/guangyey, https://github.com/EikanWang, https://github.com/albanD
2026-01-15 12:15:51 +00:00 · 2025-12-11 09:48:53 +00:00
parent b870068d3d
commit 28fe2d3dbd
5 changed files with 44 additions and 39 deletions
--- a/c10/xpu/XPUCachingAllocator.cpp
+++ b/c10/xpu/XPUCachingAllocator.cpp
@@ -34,6 +34,7 @@ struct BlockPool {

  std::set<Block*, Comparison> blocks;
  std::set<Block*, Comparison> unmapped;
+  // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
  const bool is_small;
  PrivatePool* owner_PrivatePool;
 };
@@ -63,7 +64,6 @@ struct Block {
      void* ptr)
      : device(device),
        queue(queue),
-        stream_uses(),
        size(size),
        requested_size(0),
        pool(pool),
@@ -71,11 +71,7 @@ struct Block {

  // constructor for search key
  Block(DeviceIndex device, sycl::queue* queue, size_t size)
-      : device(device),
-        queue(queue),
-        stream_uses(),
-        size(size),
-        requested_size(0) {}
+      : device(device), queue(queue), size(size), requested_size(0) {}

  bool is_split() const {
    return (prev != nullptr) || (next != nullptr);
@@ -142,7 +138,8 @@ struct ExpandableSegment {
    // The extra 1/8 allows flexibility for remapping or moving pages within the
    // segment when unmapping earlier regions.
    constexpr float kVirtualMemOversubscriptFactor = 1.125f; // 1 + 1/8
-    max_handles_ = numSegments(device_total * kVirtualMemOversubscriptFactor);
+    max_handles_ = numSegments(static_cast<size_t>(
+        static_cast<float>(device_total) * kVirtualMemOversubscriptFactor));
    ptr_ = sycl::ext::oneapi::experimental::reserve_virtual_mem(
        segment_size_ * max_handles_, xpu::get_device_context());
  }
@@ -168,15 +165,16 @@ struct ExpandableSegment {
    // Allocate and map physical memory for each segment.
    for (const auto i : c10::irange(begin, end)) {
      TORCH_INTERNAL_ASSERT(!handles_.at(i));
+      auto& handle = handles_.at(i);
      try {
        // Allocate physical memory for each segment. Construct the physical_mem
        // in-place to avoid copies.
-        handles_.at(i).emplace(
+        auto& mem = handle.emplace(
            xpu::get_raw_device(device_),
            xpu::get_device_context(),
            segment_size_);
        // Map the allocated physical memory into the virtual address space.
-        handles_.at(i).value().map(
+        mem.map(
            ptr_ + i * segment_size_,
            segment_size_,
            sycl::ext::oneapi::experimental::address_access_mode::read_write);
@@ -187,13 +185,14 @@ struct ExpandableSegment {
        // Note: constructing physical_mem may over-subscribe device memory but
        // not immediately trigger OOM. The actual OOM can occur during map().
        // Roll back all segments allocated or mapped in this operation.
-        handles_.at(i) = std::nullopt;
+        handle.reset();
        for (const auto j : c10::irange(begin, i)) {
          sycl::ext::oneapi::experimental::unmap(
+              // NOLINTNEXTLINE(performance-no-int-to-ptr)
              reinterpret_cast<void*>(ptr_ + segment_size_ * j),
              segment_size_,
              xpu::get_device_context());
-          handles_.at(j) = std::nullopt;
+          handles_.at(j).reset();
        }
        trimHandles();
        return rangeFromHandles(begin, begin);
@@ -245,6 +244,7 @@ struct ExpandableSegment {
      // ranges. Users must explicitly call unmap on all ranges before
      // destroying the physical_mem object.
      sycl::ext::oneapi::experimental::unmap(
+          // NOLINTNEXTLINE(performance-no-int-to-ptr)
          reinterpret_cast<void*>(ptr_ + segment_size_ * i),
          segment_size_,
          xpu::get_device_context());
@@ -318,9 +318,9 @@ struct ExpandableSegment {
  size_t max_handles_{0};
  // Physical memory handles for the segments.
  std::vector<std::optional<sycl::ext::oneapi::experimental::physical_mem>>
-      handles_{};
+      handles_;
  // Peer devices on which this memory could be accessible, reserved.
-  std::vector<c10::DeviceIndex> peers_{};
+  std::vector<c10::DeviceIndex> peers_;
 };

 struct AllocParams {
@@ -330,10 +330,7 @@ struct AllocParams {
      sycl::queue* queue,
      BlockPool* pool,
      size_t alloc_size)
-      : search_key(device, queue, size),
-        pool(pool),
-        alloc_size(alloc_size),
-        block(nullptr) {}
+      : search_key(device, queue, size), pool(pool), alloc_size(alloc_size) {}

  DeviceIndex device() const {
    return search_key.device;
@@ -350,7 +347,7 @@ struct AllocParams {
  Block search_key;
  BlockPool* pool;
  size_t alloc_size;
-  Block* block;
+  Block* block{nullptr};
  StatTypes stat_types = {};
 };

@@ -987,7 +984,7 @@ class DeviceCachingAllocator {
  }

  Block* alloc_found_block(
-      AllocParams params,
+      const AllocParams& params,
      size_t orig_size,
      bool split_remainder) {
    auto size = params.size();
@@ -1151,7 +1148,7 @@ class DeviceCachingAllocator {
          " Please use `empty_cache` to release all unoccupied cached memory.");
    }
    bool split_remainder = should_split(params.block, params.size());
-    return alloc_found_block(std::move(params), orig_size, split_remainder);
+    return alloc_found_block(params, orig_size, split_remainder);
  }

  void free(Block* block) {
@@ -1254,7 +1251,8 @@ class DeviceCachingAllocator {
    const auto device_total =
        xpu::get_raw_device(device_index)
            .get_info<sycl::info::device::global_mem_size>();
-    allowed_memory_maximum = static_cast<size_t>(fraction * device_total);
+    allowed_memory_maximum =
+        static_cast<size_t>(fraction * static_cast<double>(device_total));
    set_fraction = true;
  }

--- a/c10/xpu/XPUException.h
+++ b/c10/xpu/XPUException.h
@@ -5,18 +5,19 @@

 namespace c10::xpu {

-static inline sycl::async_handler asyncHandler = [](sycl::exception_list el) {
-  if (el.size() == 0) {
-    return;
-  }
-  for (const auto& e : el) {
-    try {
-      std::rethrow_exception(e);
-    } catch (sycl::exception& e) {
-      TORCH_WARN("SYCL Exception: ", e.what());
-    }
-  }
-  throw;
-};
+static inline sycl::async_handler asyncHandler =
+    [](const sycl::exception_list& el) {
+      if (el.size() == 0) {
+        return;
+      }
+      for (const auto& e : el) {
+        try {
+          std::rethrow_exception(e);
+        } catch (sycl::exception& e) {
+          TORCH_WARN("SYCL Exception: ", e.what());
+        }
+      }
+      throw;
+    };

 } // namespace c10::xpu
--- a/c10/xpu/XPUStream.cpp
+++ b/c10/xpu/XPUStream.cpp
@@ -5,7 +5,6 @@

 #include <atomic>
 #include <deque>
-#include <mutex>
 #include <vector>

 namespace c10::xpu {
@@ -30,6 +29,7 @@ std::deque<
    std::array<std::atomic<uint32_t>, max_compile_time_stream_priorities>>
    priority_counters;

+// NOLINTNEXTLINE(*c-arrays)
 thread_local std::unique_ptr<StreamId[]> current_streams = nullptr;

 /*
@@ -174,6 +174,7 @@ void initXPUStreamsOnce() {
  // Inits current streams (thread local) to the last queue in the "normal
  // priority" queue pool. Note: the queue pool have not been initialized yet.
  // It will be initialized in initDeviceStreamState for the specified device.
+  // NOLINTNEXTLINE(*c-arrays)
  current_streams = std::make_unique<StreamId[]>(num_gpus);
  for (const auto i : c10::irange(num_gpus)) {
    // Assigning the current stream to the last one in the pool can be
@@ -238,9 +239,11 @@ sycl::queue& XPUStream::queue() const {
  switch (st) {
    case StreamIdType::NORMAL:
    case StreamIdType::HIGH:
+      // NOLINTNEXTLINE(performance-no-int-to-ptr)
      return *streams[device_index][static_cast<uint8_t>(st)][si];
    // See Note [External XPU Stream]
    case StreamIdType::EXT:
+      // NOLINTNEXTLINE(performance-no-int-to-ptr)
      return *(reinterpret_cast<sycl::queue*>(stream_id));
    default:
      TORCH_CHECK(
--- a/c10/xpu/XPUStream.h
+++ b/c10/xpu/XPUStream.h
@@ -44,7 +44,7 @@ class C10_XPU_API XPUStream {
  }

  /// Construct a XPUStream from a Stream with no error checking.
-  explicit XPUStream(Unchecked, Stream stream) : stream_(stream) {}
+  explicit XPUStream(Unchecked /*unused*/, Stream stream) : stream_(stream) {}

  bool operator==(const XPUStream& other) const noexcept {
    return unwrap() == other.unwrap();
--- a/c10/xpu/test/impl/XPUStreamTest.cpp
+++ b/c10/xpu/test/impl/XPUStreamTest.cpp
@@ -20,7 +20,7 @@ TEST(XPUStreamTest, CopyAndMoveTest) {
    return;
  }

-  int32_t device = -1;
+  c10::DeviceIndex device = -1;
  sycl::queue queue;
  c10::xpu::XPUStream copyStream = c10::xpu::getStreamFromPool();
  {
@@ -119,8 +119,10 @@ TEST(XPUStreamTest, MultithreadStreamBehavior) {

  c10::xpu::XPUStream cur_stream = c10::xpu::getCurrentXPUStream();

-  EXPECT_NE(cur_stream, *s0);
-  EXPECT_NE(cur_stream, *s1);
+  EXPECT_TRUE(s0);
+  EXPECT_TRUE(s1);
+  EXPECT_NE(cur_stream, s0);
+  EXPECT_NE(cur_stream, s1);
  EXPECT_NE(s0, s1);
 }

@@ -167,6 +169,7 @@ TEST(XPUStreamTest, StreamFunction) {
  }

  constexpr int numel = 1024;
+  // NOLINTNEXTLINE(*-c-arrays)
  int hostData[numel];
  initHostData(hostData, numel);