mirror of
https://github.com/zebrajr/pytorch.git
synced 2026-01-15 12:15:51 +00:00
[CUDA][CUDA Graphs] Use count when allocating storage for edgeData (#169576)
`cudaGraphNodeGetDependencies` expects the pointer to `edgeData` to have enough storage for as many entries as `deps` For #169390 Pull Request resolved: https://github.com/pytorch/pytorch/pull/169576 Approved by: https://github.com/eee4017, https://github.com/ngimel
This commit is contained in:
@@ -8,6 +8,7 @@
|
||||
#include <c10/util/Gauge.h>
|
||||
#include <c10/util/Logging.h>
|
||||
#include <c10/util/ScopeExit.h>
|
||||
#include <c10/util/SmallVector.h>
|
||||
#include <c10/util/UniqueVoidPtr.h>
|
||||
#include <c10/util/env.h>
|
||||
#include <c10/util/error.h>
|
||||
@@ -1799,8 +1800,10 @@ class DeviceCachingAllocator {
|
||||
if (deps == nullptr) {
|
||||
C10_CUDA_CHECK(cudaGraphNodeGetDependencies(n, deps, nullptr, count));
|
||||
} else {
|
||||
cudaGraphEdgeData edgeData;
|
||||
C10_CUDA_CHECK(cudaGraphNodeGetDependencies(n, deps, &edgeData, count));
|
||||
SmallVector<cudaGraphEdgeData> edgeData;
|
||||
edgeData.resize(*count);
|
||||
C10_CUDA_CHECK(
|
||||
cudaGraphNodeGetDependencies(n, deps, edgeData.data(), count));
|
||||
}
|
||||
#else
|
||||
C10_CUDA_CHECK(cudaGraphNodeGetDependencies(n, deps, count));
|
||||
|
||||
Reference in New Issue
Block a user