Improve benchmarks/dynamo:check_perf_csv output and failure summary (#161728)

Resolves https://github.com/pytorch/pytorch/issues/161290 ## Summary Expands `dynamo/check_perf_csv.py` output capabilities with latency, compile time and memory information: - Display's measured speedup and display % from target - Added clear messaging for all passing model tests when no regression is found - Added error handling if csv file is missing ### Example (Failing Check) ```bash python benchmarks/dynamo/check_perf_csv.py -f reports-dir/inductor_training_smoketest.csv -t 1.40 ``` **Example Output:** ``` Checking inductor_training_smoketest.csv (speedup threshold >= 1.40x) hf_Bert speedup=1.005x, latency=390.8 ms/iter, compile=1.526s, mem_ratio=1.02x (eager=360.6 GB, dynamo=369.3 GB) Error 1 model(s) performance regressed hf_Bert - hf_Bert: 1.005x (< 1.40x; -28.2% from target) ``` ### Example (Passing Check) ```bash python benchmarks/dynamo/check_perf_csv.py -f reports-dir/inductor_training_smoketest.csv -t 1.40 ``` **Example Output:** ``` Checking inductor_training_smoketest.csv (speedup threshold >= 1.00x) hf_Bert speedup=1.005x, latency=390.8 ms/iter, compile=1.526s, mem_ratio=1.02x (eager=360.6 GB, dynamo=369.3 GB) All 1 model(s) passed threshold check (>= 1.00x) ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/161728 Approved by: https://github.com/isuruf
2026-01-15 12:15:51 +00:00 · 2025-11-17 17:54:24 +00:00
parent 567dcdba75
commit 2f3bb7482c
1 changed files with 41 additions and 8 deletions
--- a/benchmarks/dynamo/check_perf_csv.py
+++ b/benchmarks/dynamo/check_perf_csv.py
@@ -9,28 +9,61 @@ def check_perf_csv(filename, threshold, threshold_scale):
    """
    Basic performance checking.
    """
+    try:
+        df = pd.read_csv(filename)
+    except FileNotFoundError:
+        print(f"Error: File {filename} not found")
+        sys.exit(1)

-    df = pd.read_csv(filename)
+    effective_threshold = threshold * threshold_scale
+    print(f"Checking {filename} (speedup threshold >= {effective_threshold:.2f}x)\n")

    failed = []
    for _, row in df.iterrows():
        model_name = row["name"]
-        speedup = row["speedup"]
-        if speedup < threshold * threshold_scale:
-            failed.append(model_name)
+        speedup = float(row["speedup"])
+        abs_latency = float(row["abs_latency"])
+        compilation_latency = float(row["compilation_latency"])
+        compression_ratio = float(row["compression_ratio"])
+        eager_peak_mem = float(row["eager_peak_mem"])
+        dynamo_peak_mem = float(row["dynamo_peak_mem"])

-        print(f"{model_name:34} {speedup}")
+        perf_summary = f"{model_name:34} speedup={speedup:.3f}x"
+        if pd.notna(abs_latency):
+            perf_summary += f", latency={abs_latency:.1f} ms/iter"
+        if pd.notna(compilation_latency):
+            perf_summary += f", compile={compilation_latency:.3f}s"
+        if pd.notna(compression_ratio):
+            perf_summary += f", mem_ratio={1 / compression_ratio:.2f}x"
+            if pd.notna(eager_peak_mem) and pd.notna(dynamo_peak_mem):
+                perf_summary += (
+                    f" (eager={eager_peak_mem:.1f} GB, dynamo={dynamo_peak_mem:.1f} GB)"
+                )
+
+        if speedup < effective_threshold:
+            failed.append((model_name, speedup))
+
+        print(perf_summary)

    if failed:
        print(
            textwrap.dedent(
                f"""
-                Error {len(failed)} models performance regressed
-                    {" ".join(failed)}
+                Error {len(failed)} model(s) performance regressed
+                    {" ".join([name for name, _ in failed])}
                """
            )
        )
+        for name, sp in sorted(failed, key=lambda x: x[1]):
+            pct_from_target = (sp / effective_threshold - 1.0) * 100.0
+            print(
+                f"  - {name}: {sp:.3f}x (< {effective_threshold:.2f}x; {pct_from_target:.1f}% from target)"
+            )
        sys.exit(1)
+    else:
+        print(
+            f"\nAll {len(df)} model(s) passed threshold check (>= {effective_threshold:.2f}x)"
+        )


 if __name__ == "__main__":
@@ -44,7 +77,7 @@ if __name__ == "__main__":
        "-s",
        type=float,
        default=1.0,
-        help="multiple threshold by this value to relax the check",
+        help="multiply threshold by this value to relax the check",
    )
    args = parser.parse_args()
    check_perf_csv(args.file, args.threshold, args.threshold_scale)