[CUDA][CUDA Graphs] Use count when allocating storage for edgeData (#169576)

`cudaGraphNodeGetDependencies` expects the pointer to `edgeData` to have enough storage for as many entries as `deps`

For #169390

Pull Request resolved: https://github.com/pytorch/pytorch/pull/169576
Approved by: https://github.com/eee4017, https://github.com/ngimel
This commit is contained in:
eqy
2025-12-05 16:25:41 +00:00
committed by PyTorch MergeBot
parent a934a421dd
commit c44798d730

View File

@@ -8,6 +8,7 @@
#include <c10/util/Gauge.h>
#include <c10/util/Logging.h>
#include <c10/util/ScopeExit.h>
#include <c10/util/SmallVector.h>
#include <c10/util/UniqueVoidPtr.h>
#include <c10/util/env.h>
#include <c10/util/error.h>
@@ -1799,8 +1800,10 @@ class DeviceCachingAllocator {
if (deps == nullptr) {
C10_CUDA_CHECK(cudaGraphNodeGetDependencies(n, deps, nullptr, count));
} else {
cudaGraphEdgeData edgeData;
C10_CUDA_CHECK(cudaGraphNodeGetDependencies(n, deps, &edgeData, count));
SmallVector<cudaGraphEdgeData> edgeData;
edgeData.resize(*count);
C10_CUDA_CHECK(
cudaGraphNodeGetDependencies(n, deps, edgeData.data(), count));
}
#else
C10_CUDA_CHECK(cudaGraphNodeGetDependencies(n, deps, count));