Revert "c10d/logging: add C10D_LOCK_GUARD (#134131)"

This reverts commit f33bcbe5fd.

Reverted https://github.com/pytorch/pytorch/pull/134131 on behalf of https://github.com/kit1980 due to See D61985186 ([comment](https://github.com/pytorch/pytorch/pull/134131#issuecomment-2327556381))
This commit is contained in:
PyTorch MergeBot
2024-09-03 22:35:14 +00:00
parent 2fd36086bc
commit c044deb9ce
14 changed files with 59 additions and 183 deletions

View File

@@ -180,7 +180,7 @@ class ProcessGroupNCCLNoHeartbeatCaught
: ProcessGroupNCCLTimedOutErrors(store, rank, size, opts),
hasMonitorThreadCaughtError_(false) {}
std::timed_mutex& getWatchdogMutex() {
std::mutex& getWatchdogMutex() {
return workMetaListMutex_;
}
@@ -413,7 +413,7 @@ TEST_F(ProcessGroupNCCLErrorsTest, testNCCLErrorsNoHeartbeat) {
work = pg.allreduce(tensors_);
{
// Now run all reduce with errors.
std::lock_guard<std::timed_mutex> lock(pg.getWatchdogMutex());
std::lock_guard<std::mutex> lock(pg.getWatchdogMutex());
LOG(INFO) << "Lock watchdog thread.";
// Wait long enough before monitor thread throws exceptions.
std::this_thread::sleep_for(