mirror of
https://github.com/zebrajr/pytorch.git
synced 2026-01-15 12:15:51 +00:00
Revert "c10d/logging: add C10D_LOCK_GUARD (#134131)"
This reverts commit f33bcbe5fd.
Reverted https://github.com/pytorch/pytorch/pull/134131 on behalf of https://github.com/kit1980 due to See D61985186 ([comment](https://github.com/pytorch/pytorch/pull/134131#issuecomment-2327556381))
This commit is contained in:
@@ -180,7 +180,7 @@ class ProcessGroupNCCLNoHeartbeatCaught
|
||||
: ProcessGroupNCCLTimedOutErrors(store, rank, size, opts),
|
||||
hasMonitorThreadCaughtError_(false) {}
|
||||
|
||||
std::timed_mutex& getWatchdogMutex() {
|
||||
std::mutex& getWatchdogMutex() {
|
||||
return workMetaListMutex_;
|
||||
}
|
||||
|
||||
@@ -413,7 +413,7 @@ TEST_F(ProcessGroupNCCLErrorsTest, testNCCLErrorsNoHeartbeat) {
|
||||
work = pg.allreduce(tensors_);
|
||||
{
|
||||
// Now run all reduce with errors.
|
||||
std::lock_guard<std::timed_mutex> lock(pg.getWatchdogMutex());
|
||||
std::lock_guard<std::mutex> lock(pg.getWatchdogMutex());
|
||||
LOG(INFO) << "Lock watchdog thread.";
|
||||
// Wait long enough before monitor thread throws exceptions.
|
||||
std::this_thread::sleep_for(
|
||||
|
||||
Reference in New Issue
Block a user