Update slow tests (#169805)

This PR is auto-generated weekly by [this action](https://github.com/pytorch/pytorch/blob/main/.github/workflows/weekly.yml).
Update the list of slow tests.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/169805
Approved by: https://github.com/pytorchbot
This commit is contained in:
PyTorch UpdateBot
2025-12-15 12:10:07 +00:00
committed by PyTorch MergeBot
parent 118b0d9037
commit 3b3fb3dd0b

View File

@@ -1,278 +1,251 @@
{
"EndToEndLSTM (__main__.RNNTest)": 195.11499938964843,
"MultiheadAttention (__main__.ModulesTest)": 142.00380249023436,
"test_AllenaiLongformerBase_repro_cpu_halide (__main__.HalideCpuTests)": 214.6786651611328,
"test_RNN_cpu_vs_cudnn_no_dropout (__main__.TestNN)": 72.39199912548065,
"test_RNN_cpu_vs_cudnn_with_dropout (__main__.TestNN)": 73.05633429686229,
"test_StridedShard_to_shard_order (__main__.Test_StridedShard_with_shard_order)": 253.58512496948242,
"test__adaptive_avg_pool2d (__main__.CPUReproTests)": 106.16550159454346,
"test_adaptive_max_pool2d1_cpu_halide (__main__.HalideCpuTests)": 116.58166758219402,
"test_addmm_relu_tunableop_rocm_cuda_float32 (__main__.TestLinalgCUDA)": 62.60266876220703,
"test_after_aot_cpu_runtime_error (__main__.MinifierIsolateTests)": 62.53962421417236,
"test_alexnet_prefix_cpu_halide (__main__.HalideCpuTests)": 177.9409942626953,
"test_aot_autograd_disable_functionalization_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 60.34580052693685,
"test_aot_autograd_disable_functionalization_symbolic_exhaustive_linalg_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 67.86476732889811,
"test_aot_autograd_disable_functionalization_symbolic_exhaustive_nn_functional_max_pool1d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 142.54474639892578,
"test_aot_autograd_disable_functionalization_symbolic_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 195.94950103759766,
"test_aot_autograd_disable_functionalization_symbolic_exhaustive_nn_functional_max_pool3d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 120.17424774169922,
"test_aot_autograd_disable_functionalization_symbolic_exhaustive_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 65.93349933624268,
"test_aot_autograd_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 66.56851626980689,
"test_aot_autograd_symbolic_exhaustive_linalg_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 70.99724960327148,
"test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool1d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 149.67525100708008,
"test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 180.85475158691406,
"test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool3d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 104.83274841308594,
"test_aot_autograd_symbolic_exhaustive_nn_functional_unfold_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 60.97112907901887,
"test_aot_autograd_symbolic_exhaustive_ormqr_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 67.97749996185303,
"test_aot_autograd_symbolic_exhaustive_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 73.70349884033203,
"test_aot_autograd_symbolic_module_exhaustive_nn_TransformerDecoderLayer_cpu_float32 (__main__.TestEagerFusionModuleInfoCPU)": 119.76774978637695,
"test_associative_scan_partial_grad_combine_mode_generic_compile_mode_compile_dynamic_shape_reverse_False_cpu (__main__.AssociativeScanTests)": 93.69075012207031,
"test_associative_scan_partial_grad_combine_mode_generic_compile_mode_compile_dynamic_shape_reverse_True_cpu (__main__.AssociativeScanTests)": 109.89175033569336,
"test_avg_pool3d_backward2_cpu (__main__.CpuTests)": 801.439599609375,
"test_avg_pool3d_backward2_cpu (__main__.CpuTritonTests)": 270.46433512369794,
"test_avg_pool3d_backward2_cuda (__main__.GPUTests)": 211.92539825439454,
"test_avg_pool3d_backward2_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 526.4229965209961,
"test_avg_pool3d_backward2_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 540.007625579834,
"test_avg_pool3d_backward2_dynamic_shapes_cuda (__main__.DynamicShapesCodegenGPUTests)": 73.37349891662598,
"test_avg_pool3d_backward2_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 146.07825088500977,
"test_avg_pool3d_backward_cpu_halide (__main__.HalideCpuTests)": 61.05500030517578,
"test_backward_nn_functional_multi_head_attention_forward_cpu_float32 (__main__.TestCompositeComplianceCPU)": 77.6555004119873,
"test_backward_nn_functional_multi_head_attention_forward_cuda_float32 (__main__.TestCompositeComplianceCUDA)": 63.8514986038208,
"test_basic_cpu (__main__.EfficientConvBNEvalCpuTests)": 264.5168743133545,
"test_basic_cuda (__main__.EfficientConvBNEvalGpuTests)": 165.7322540283203,
"test_checkpointing_without_reentrant_input_requires_grad_False (__main__.TestAutogradWithCompiledAutograd)": 330.8664970397949,
"test_checkpointing_without_reentrant_input_requires_grad_True (__main__.TestAutogradWithCompiledAutograd)": 423.7527503967285,
"test_collect_callgrind (__main__.TestBenchmarkUtils)": 313.5642509460449,
"test_comprehensive_cholesky_inverse_cuda_float32 (__main__.TestDecompCUDA)": 70.66033256053925,
"test_comprehensive_diff_cuda_complex128 (__main__.TestDecompCUDA)": 98.57474899291992,
"test_comprehensive_diff_cuda_complex64 (__main__.TestDecompCUDA)": 103.05299949645996,
"test_comprehensive_diff_cuda_float32 (__main__.TestDecompCUDA)": 67.24449920654297,
"test_comprehensive_diff_cuda_float64 (__main__.TestDecompCUDA)": 68.60375022888184,
"test_comprehensive_grid_sampler_2d_cpu_bfloat16 (__main__.TestDecompCPU)": 105.27174758911133,
"test_comprehensive_grid_sampler_2d_cpu_float16 (__main__.TestDecompCPU)": 97.67850112915039,
"test_comprehensive_grid_sampler_2d_cpu_float32 (__main__.TestDecompCPU)": 458.8267517089844,
"test_comprehensive_grid_sampler_2d_cpu_float64 (__main__.TestDecompCPU)": 451.6082458496094,
"test_comprehensive_grid_sampler_2d_cuda_bfloat16 (__main__.TestDecompCUDA)": 298.8152503967285,
"test_comprehensive_grid_sampler_2d_cuda_float16 (__main__.TestDecompCUDA)": 255.6614990234375,
"test_comprehensive_grid_sampler_2d_cuda_float32 (__main__.TestDecompCUDA)": 1176.4095153808594,
"test_comprehensive_grid_sampler_2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 72.6922492980957,
"test_comprehensive_grid_sampler_2d_cuda_float64 (__main__.TestDecompCUDA)": 1098.8550109863281,
"test_comprehensive_grid_sampler_2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 77.52225112915039,
"test_comprehensive_linalg_lu_factor_cuda_complex128 (__main__.TestDecompCUDA)": 64.52633285522461,
"test_comprehensive_linalg_lu_solve_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 72.95650100708008,
"test_comprehensive_linalg_lu_solve_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 71.89800071716309,
"test_comprehensive_linalg_solve_triangular_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 72.7504997253418,
"test_comprehensive_linalg_solve_triangular_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 74.69425201416016,
"test_comprehensive_linalg_svd_cuda_complex128 (__main__.TestDecompCUDA)": 62.47725009918213,
"test_comprehensive_linalg_svd_cuda_complex64 (__main__.TestDecompCUDA)": 66.51850032806396,
"test_comprehensive_masked_norm_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 115.14674758911133,
"test_comprehensive_masked_norm_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 111.31599998474121,
"test_comprehensive_masked_norm_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 108.47875022888184,
"test_comprehensive_nn_functional_conv_transpose3d_cuda_complex128 (__main__.TestDecompCUDA)": 63.36350059509277,
"test_comprehensive_nn_functional_conv_transpose3d_cuda_complex64 (__main__.TestDecompCUDA)": 64.12074947357178,
"test_comprehensive_nn_functional_gaussian_nll_loss_cpu_float32 (__main__.TestDecompCPU)": 63.71774959564209,
"test_comprehensive_nn_functional_gaussian_nll_loss_cpu_float64 (__main__.TestDecompCPU)": 66.63899975731259,
"test_comprehensive_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestDecompCUDA)": 114.73800086975098,
"test_comprehensive_nn_functional_gaussian_nll_loss_cuda_float64 (__main__.TestDecompCUDA)": 110.1662483215332,
"test_comprehensive_nn_functional_grid_sample_cpu_float32 (__main__.TestDecompCPU)": 115.3847484588623,
"test_comprehensive_nn_functional_grid_sample_cpu_float64 (__main__.TestDecompCPU)": 109.4905014038086,
"test_comprehensive_nn_functional_grid_sample_cuda_float32 (__main__.TestDecompCUDA)": 306.85575103759766,
"test_comprehensive_nn_functional_grid_sample_cuda_float64 (__main__.TestDecompCUDA)": 228.0407485961914,
"test_comprehensive_nn_functional_interpolate_bicubic_cuda_float32 (__main__.TestDecompCUDA)": 78.3700008392334,
"test_comprehensive_nn_functional_interpolate_bicubic_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 84.47775268554688,
"test_comprehensive_nn_functional_interpolate_bicubic_cuda_float64 (__main__.TestDecompCUDA)": 78.47249984741211,
"test_comprehensive_nn_functional_interpolate_bicubic_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 86.97974967956543,
"test_comprehensive_nn_functional_interpolate_trilinear_cuda_float32 (__main__.TestDecompCUDA)": 124.5634994506836,
"test_comprehensive_nn_functional_interpolate_trilinear_cuda_float64 (__main__.TestDecompCUDA)": 122.19799995422363,
"test_comprehensive_nn_functional_max_pool2d_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 1262.0645141601562,
"test_comprehensive_nn_functional_max_pool2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 1255.4177551269531,
"test_comprehensive_nn_functional_max_pool2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 1257.4462585449219,
"test_comprehensive_nn_functional_max_pool3d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 605.8682556152344,
"test_comprehensive_nn_functional_max_pool3d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 615.4145050048828,
"test_comprehensive_nn_functional_max_unpool2d_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 66.37674903869629,
"test_comprehensive_nn_functional_max_unpool2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 65.44024848937988,
"test_comprehensive_nn_functional_max_unpool2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 66.0570011138916,
"test_comprehensive_nn_functional_pad_reflect_cuda_complex64 (__main__.TestDecompCUDA)": 62.90416653951009,
"test_comprehensive_ormqr_cpu_complex64 (__main__.TestDecompCPU)": 61.29275035858154,
"test_comprehensive_ormqr_cuda_complex128 (__main__.TestDecompCUDA)": 113.26900100708008,
"test_comprehensive_ormqr_cuda_complex64 (__main__.TestDecompCUDA)": 112.6924991607666,
"test_comprehensive_ormqr_cuda_float32 (__main__.TestDecompCUDA)": 73.96350288391113,
"test_comprehensive_ormqr_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 80.25400161743164,
"test_comprehensive_ormqr_cuda_float64 (__main__.TestDecompCUDA)": 70.42575073242188,
"test_comprehensive_pca_lowrank_cuda_complex64 (__main__.TestDecompCUDA)": 99.28966617584229,
"test_comprehensive_svd_cuda_complex128 (__main__.TestDecompCUDA)": 69.16975021362305,
"test_comprehensive_svd_cuda_complex64 (__main__.TestDecompCUDA)": 75.30550003051758,
"test_comprehensive_svd_lowrank_cuda_complex128 (__main__.TestDecompCUDA)": 120.88183275858562,
"test_comprehensive_svd_lowrank_cuda_complex64 (__main__.TestDecompCUDA)": 119.77483590443929,
"test_comprehensive_svd_lowrank_cuda_float32 (__main__.TestDecompCUDA)": 120.83816623687744,
"test_constructor_autograd_SparseBSC_cuda (__main__.TestSparseAnyCUDA)": 86.3487491607666,
"test_constructor_autograd_SparseBSR_cuda (__main__.TestSparseAnyCUDA)": 78.20924949645996,
"test_conv1d_basic (__main__.TestXNNPACKConv1dTransformPass)": 89.26825046539307,
"test_conv1d_with_relu_fc (__main__.TestXNNPACKConv1dTransformPass)": 220.15350151062012,
"test_conv2d_binary_broadcast_shapes_cpu (__main__.TestPatternMatcherGenericCPU)": 77.47299766540527,
"test_conv3d_binary_broadcast_shapes_cpu (__main__.TestPatternMatcherGenericCPU)": 156.85225296020508,
"test_conv_bn_fuse_cpu (__main__.CpuTests)": 68.80920028686523,
"test_conv_bn_fuse_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 68.10125064849854,
"test_conv_large_batch_1_cuda (__main__.TestConvolutionNNDeviceTypeCUDA)": 121.31333414713542,
"test_conv_unary_fusion_nnc (__main__.TestMkldnnFusion)": 80.68750095367432,
"test_correctness_AdamW_use_closure_True_cuda_float32 (__main__.CompiledOptimizerParityTestsCUDA)": 82.94275093078613,
"test_correctness_Adam_use_closure_True_cuda_float32 (__main__.CompiledOptimizerParityTestsCUDA)": 80.35500144958496,
"test_count_nonzero_all (__main__.TestBool)": 650.8682556152344,
"test_cross_entropy_large_tensor_reduction_sum_cuda (__main__.TestNNDeviceTypeCUDA)": 323.86448669433594,
"test_ddp_uneven_inputs (__main__.TestDistBackendWithSpawn)": 450.4883321126302,
"test_diff_hyperparams_sharding_strategy_str_no_shard (__main__.TestFSDPUseOrigParamsMultipleParamGroups)": 60.20799891153971,
"test_dispatch_symbolic_meta_outplace_all_strides_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestMetaCUDA)": 87.0319995880127,
"test_dtensor_op_db_nn_functional_gaussian_nll_loss_cpu_float32 (__main__.TestLocalDTensorOpsCPU)": 1517.4078125,
"test_dtensor_op_db_nn_functional_gaussian_nll_loss_cpu_float32 (__main__.TestMultiThreadedDTensorOpsCPU)": 90.65559997558594,
"test_error_detection_and_propagation (__main__.NcclErrorHandlingTest)": 67.08999888102214,
"test_fail_arithmetic_ops.py (__main__.TestTyping)": 72.2988748550415,
"test_fail_creation_ops.py (__main__.TestTyping)": 102.47843830759932,
"test_fn_fwgrad_bwgrad_cumprod_cuda_complex128 (__main__.TestFwdGradientsCUDA)": 80.67500114440918,
"test_fn_grad_add_cpu_complex128 (__main__.TestComplexBwdGradientsCPU)": 75.49025793998472,
"test_fn_grad_constant_pad_nd_cpu_complex128 (__main__.TestComplexBwdGradientsCPU)": 177.47612947033298,
"test_fn_grad_constant_pad_nd_cuda_complex128 (__main__.TestComplexBwdGradientsCUDA)": 124.01433499654134,
"test_fn_grad_diagonal_scatter_cpu_complex128 (__main__.TestComplexBwdGradientsCPU)": 387.570063929404,
"test_fn_grad_diagonal_scatter_cuda_complex128 (__main__.TestComplexBwdGradientsCUDA)": 155.9375,
"test_fn_grad_flip_cpu_complex128 (__main__.TestComplexBwdGradientsCPU)": 61.3171936158211,
"test_fn_grad_rsub_cpu_complex128 (__main__.TestComplexBwdGradientsCPU)": 83.22996791716545,
"test_fn_grad_rsub_cuda_complex128 (__main__.TestComplexBwdGradientsCUDA)": 62.90033372243246,
"test_fn_grad_sub_cpu_complex128 (__main__.TestComplexBwdGradientsCPU)": 77.49893539182601,
"test_fn_grad_sub_cuda_complex128 (__main__.TestComplexBwdGradientsCUDA)": 61.0314998626709,
"test_fn_grad_where_cpu_complex128 (__main__.TestComplexBwdGradientsCPU)": 99.6264519230012,
"test_fn_grad_where_cuda_complex128 (__main__.TestComplexBwdGradientsCUDA)": 72.60183270772298,
"test_fn_gradgrad_cumprod_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 84.32150268554688,
"test_fuse_large_params_cpu (__main__.CpuTests)": 97.63633219401042,
"test_fuse_large_params_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 167.9266242980957,
"test_fuse_large_params_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 167.08250045776367,
"test_fuse_large_params_dynamic_shapes_cuda (__main__.DynamicShapesCodegenGPUTests)": 148.94650268554688,
"test_fuse_large_params_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 118.18500137329102,
"test_grad_nn_Transformer_cuda_float64 (__main__.TestModuleCUDA)": 81.35249900817871,
"test_gradgrad_nn_TransformerDecoderLayer_cuda_float64 (__main__.TestModuleCUDA)": 196.03149795532227,
"test_gradgrad_nn_TransformerEncoder_eval_mode_cuda_float64 (__main__.TestModuleCUDA)": 111.10725021362305,
"test_gradgrad_nn_TransformerEncoder_train_mode_cuda_float64 (__main__.TestModuleCUDA)": 134.25675010681152,
"test_gradgrad_nn_Transformer_cuda_float64 (__main__.TestModuleCUDA)": 614.353271484375,
"test_graph_make_graphed_callables_same_pool (__main__.TestCuda)": 102.73666604359944,
"test_graph_partition_refcount_dynamic_shapes_cuda (__main__.DynamicShapesCodegenGPUTests)": 385.05516481399536,
"test_graph_partition_refcount_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 395.0171728134155,
"test_grid_sampler_2d_cpu_halide (__main__.HalideCpuTests)": 195.79066467285156,
"test_indirect_device_assert (__main__.TritonCodeGenTests)": 312.15050506591797,
"test_inductor_no_recursionerror_on_for_loops_dynamic_shapes (__main__.DynamicShapesReproTests)": 71.82537364959717,
"test_inplace_gradgrad_cumprod_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 85.09174919128418,
"test_inputs_overlapping_with_mutation_stress_dynamic_shapes (__main__.DynamicShapesAotAutogradFallbackTests)": 129.14387321472168,
"test_jit_cuda_archflags (__main__.TestCppExtensionJIT)": 120.64374923706055,
"test_linear_binary_cpp_wrapper (__main__.TestCppWrapper)": 130.71199989318848,
"test_linear_binary_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 126.44325256347656,
"test_list_clearing_cuda (__main__.GPUTests)": 61.48289999961853,
"test_longformer_chunk_dynamic_shapes (__main__.DynamicShapesReproTests)": 104.84637451171875,
"test_lstm_cpu (__main__.TestMkldnnCPU)": 102.5270004272461,
"test_many_overlapping_inputs_does_not_explode_guards_dynamic_shapes (__main__.DynamicShapesReproTests)": 136.7854986190796,
"test_max_pool2d2_cpu_halide (__main__.HalideCpuTests)": 426.58765665690106,
"test_max_pool2d3_cpu_halide (__main__.HalideCpuTests)": 133.9463348388672,
"test_max_pool2d5_cpu_halide (__main__.HalideCpuTests)": 359.5349934895833,
"test_max_pool2d_with_indices_backward4_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 68.19662570953369,
"test_max_pool2d_with_indices_backward4_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 64.87825012207031,
"test_nll_loss_large_tensor_reduction_sum_cuda (__main__.TestNNDeviceTypeCUDA)": 340.27033456166583,
"test_ordered_distribute_all_combination (__main__.DistributeWithDeviceOrderTest)": 135.83149814605713,
"test_ordered_distribute_all_combination (__main__.DistributeWithDeviceOrderTestWithLocalTensor)": 67.58062505722046,
"test_ordered_redistribute_with_partial (__main__.DistributeWithDeviceOrderTest)": 198.98699951171875,
"test_ordered_redistribute_with_partial (__main__.DistributeWithDeviceOrderTestWithLocalTensor)": 500.39749908447266,
"test_pool3d_large_size_int64_cuda (__main__.TestPoolingNNDeviceTypeCUDA)": 65.12433274586995,
"test_proper_exit (__main__.TestDataLoader)": 203.98437309265137,
"test_proper_exit (__main__.TestDataLoaderPersistentWorkers)": 196.37637424468994,
"test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False (__main__.TestPatternMatcher)": 63.505500078201294,
"test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 111.68949890136719,
"test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 107.62675094604492,
"test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True (__main__.TestPatternMatcher)": 93.89300155639648,
"test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 117.77149963378906,
"test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 110.85300254821777,
"test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False (__main__.TestPatternMatcher)": 88.89249992370605,
"test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 100.24625015258789,
"test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 111.7132511138916,
"test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True (__main__.TestPatternMatcher)": 84.21674919128418,
"test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 105.77849960327148,
"test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 109.34375,
"test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False (__main__.TestPatternMatcher)": 92.73649978637695,
"test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 105.92499923706055,
"test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 108.25849914550781,
"test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True (__main__.TestPatternMatcher)": 64.16908399264018,
"test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 111.11400032043457,
"test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 114.92299842834473,
"test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False (__main__.TestPatternMatcher)": 61.62425025304159,
"test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 105.86524963378906,
"test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 105.85474967956543,
"test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True (__main__.TestPatternMatcher)": 66.22370831171672,
"test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 113.55375099182129,
"test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 107.45649909973145,
"test_quick_core_backward__unsafe_masked_index_cpu_float64 (__main__.TestDecompCPU)": 573.1502685546875,
"test_quick_core_backward__unsafe_masked_index_cuda_float64 (__main__.TestDecompCUDA)": 1091.4237670898438,
"test_quick_core_backward__unsafe_masked_index_put_accumulate_cpu_float64 (__main__.TestDecompCPU)": 781.7357482910156,
"test_quick_core_backward__unsafe_masked_index_put_accumulate_cuda_float64 (__main__.TestDecompCUDA)": 1477.8807678222656,
"test_quick_core_backward_nn_functional_max_unpool3d_grad_cpu_float64 (__main__.TestDecompCPU)": 91.73400115966797,
"test_quick_core_backward_nn_functional_max_unpool3d_grad_cuda_float64 (__main__.TestDecompCUDA)": 274.52249908447266,
"test_quick_core_backward_roll_cpu_float64 (__main__.TestDecompCPU)": 142.28099822998047,
"test_quick_core_backward_roll_cuda_float64 (__main__.TestDecompCUDA)": 227.64300155639648,
"test_quick_core_backward_select_scatter_cpu_float64 (__main__.TestDecompCPU)": 78.95800018310547,
"test_quick_core_backward_select_scatter_cuda_float64 (__main__.TestDecompCUDA)": 139.07250213623047,
"test_quick_core_backward_split_cuda_float64 (__main__.TestDecompCUDA)": 70.76949882507324,
"test_quick_core_backward_split_with_sizes_copy_cpu_float64 (__main__.TestDecompCPU)": 100.25174903869629,
"test_quick_core_backward_split_with_sizes_copy_cuda_float64 (__main__.TestDecompCUDA)": 170.1675033569336,
"test_quick_core_backward_std_cpu_float64 (__main__.TestDecompCPU)": 79.20649909973145,
"test_quick_core_backward_std_cuda_float64 (__main__.TestDecompCUDA)": 147.0157470703125,
"test_register_spills_cuda (__main__.BenchmarkFusionGpuTest)": 85.56925010681152,
"test_run2run_determinism_model_name_BertForMaskedLM_training_or_inference_inference_precision_amp (__main__.DeterministicTest)": 62.117165883382164,
"test_run2run_determinism_model_name_BertForMaskedLM_training_or_inference_inference_precision_bfloat16 (__main__.DeterministicTest)": 80.71633275349934,
"test_run2run_determinism_model_name_BertForMaskedLM_training_or_inference_training_precision_amp (__main__.DeterministicTest)": 162.40999857584634,
"test_run2run_determinism_model_name_BertForMaskedLM_training_or_inference_training_precision_bfloat16 (__main__.DeterministicTest)": 112.27533340454102,
"test_run2run_determinism_model_name_BertForMaskedLM_training_or_inference_training_precision_float16 (__main__.DeterministicTest)": 147.1988321940104,
"test_run2run_determinism_model_name_BertForMaskedLM_training_or_inference_training_precision_float32 (__main__.DeterministicTest)": 104.0053342183431,
"test_run2run_determinism_model_name_DistillGPT2_training_or_inference_training_precision_amp (__main__.DeterministicTest)": 61.973000844319664,
"test_run2run_determinism_model_name_DistillGPT2_training_or_inference_training_precision_bfloat16 (__main__.DeterministicTest)": 61.754499435424805,
"test_run2run_determinism_model_name_DistillGPT2_training_or_inference_training_precision_float16 (__main__.DeterministicTest)": 60.08883412679037,
"test_run2run_determinism_model_name_DistillGPT2_training_or_inference_training_precision_float32 (__main__.DeterministicTest)": 86.1146666208903,
"test_run2run_determinism_model_name_GoogleFnet_training_or_inference_training_precision_amp (__main__.DeterministicTest)": 104.33766746520996,
"test_run2run_determinism_model_name_GoogleFnet_training_or_inference_training_precision_bfloat16 (__main__.DeterministicTest)": 114.78433227539062,
"test_run2run_determinism_model_name_GoogleFnet_training_or_inference_training_precision_float16 (__main__.DeterministicTest)": 86.49966684977214,
"test_run2run_determinism_model_name_GoogleFnet_training_or_inference_training_precision_float32 (__main__.DeterministicTest)": 71.44516626993816,
"test_runtime_checks_large_cpu (__main__.AOTInductorTestABICompatibleCpu)": 66.8162488937378,
"test_runtime_checks_large_cpu_with_stack_allocation (__main__.AOTInductorTestABICompatibleCpuWithStackAllocation)": 72.04562425613403,
"test_runtime_checks_large_cuda (__main__.AOTInductorTestABICompatibleGpu)": 184.2334976196289,
"test_scaled_gemm_offline_tunableop_cuda_float8_e4m3fnuz (__main__.TestLinalgCUDA)": 84.8563323020935,
"test_sdpa_kernel_ctx_manager2_dynamic_shapes (__main__.DynamicShapesCtxManagerTests)": 107.34962558746338,
"test_shuffler_iterdatapipe (__main__.IntegrationTestDataLoaderDataPipe)": 119.15850162506104,
"test_slow_tasks (__main__.TestFunctionalAutogradBenchmark)": 140.9133758544922,
"test_sort_dynamic_shape_with_check_cuda (__main__.TestInductorDynamicCUDA)": 106.76350021362305,
"test_sort_stable_cpu (__main__.CpuTritonTests)": 1319.0793050130208,
"test_sort_stable_cuda (__main__.GPUTests)": 96.01039962768554,
"test_split_cumsum_cpu (__main__.CpuTritonTests)": 90.8499984741211,
"test_svd_lowrank_cuda_complex128 (__main__.TestLinalgCUDA)": 304.4350051879883,
"test_tensor_split (__main__.TestVmapOperators)": 105.89132479213826,
"test_terminate_handler_on_crash (__main__.TestTorch)": 167.24449968338013,
"test_terminate_signal (__main__.ForkTest)": 199.22387313842773,
"test_terminate_signal (__main__.ParallelForkServerShouldWorkTest)": 199.12587642669678,
"test_terminate_signal (__main__.SpawnTest)": 200.71112155914307,
"test_train_parity_multi_group_unshard_async_op (__main__.TestFullyShard1DTrainingCore)": 65.3956667582194,
"test_triton_bsr_scatter_mm_blocksize_64_cuda_float32 (__main__.TestSparseCompressedTritonKernelsCUDA)": 88.69500064849854,
"test_triton_bsr_softmax_cuda_bfloat16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 212.44074630737305,
"test_triton_bsr_softmax_cuda_float16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 209.64949798583984,
"test_triton_bsr_softmax_cuda_float32 (__main__.TestSparseCompressedTritonKernelsCUDA)": 144.97124862670898,
"test_upsample_bicubic2d_cpu_halide (__main__.HalideCpuTests)": 97.45366668701172,
"test_variant_consistency_jit_nn_functional_max_pool2d_cpu_float32 (__main__.TestJitCPU)": 93.24074745178223,
"test_variant_consistency_jit_nn_functional_max_pool2d_cuda_float32 (__main__.TestJitCUDA)": 76.62825012207031,
"test_vec_compare_op_cpu_only (__main__.CPUReproTests)": 60.935458501180015,
"test_vmapjvpvjp_linalg_lstsq_grad_oriented_cpu_float32 (__main__.TestOperatorsCPU)": 96.73649978637695,
"test_vmapjvpvjp_linalg_lu_solve_cpu_float32 (__main__.TestOperatorsCPU)": 73.72424983978271,
"test_vmapjvpvjp_linalg_lu_solve_cuda_float32 (__main__.TestOperatorsCUDA)": 67.43249893188477,
"test_vmapjvpvjp_linalg_svd_cuda_float32 (__main__.TestOperatorsCUDA)": 62.6795015335083,
"test_vmapjvpvjp_max_pool2d_with_indices_backward_cpu_float32 (__main__.TestOperatorsCPU)": 75.2802505493164,
"test_vmapjvpvjp_max_pool2d_with_indices_backward_cuda_float32 (__main__.TestOperatorsCUDA)": 77.50925064086914,
"test_vmapjvpvjp_nn_functional_conv2d_cpu_float32 (__main__.TestOperatorsCPU)": 66.33838690480879,
"test_vmapjvpvjp_nn_functional_max_pool2d_cpu_float32 (__main__.TestOperatorsCPU)": 67.38049983978271,
"test_vmapjvpvjp_nn_functional_max_pool2d_cuda_float32 (__main__.TestOperatorsCUDA)": 75.26774978637695,
"test_vmapjvpvjp_svd_cpu_float32 (__main__.TestOperatorsCPU)": 61.21835474814138,
"test_vmapjvpvjp_unbind_cpu_float32 (__main__.TestOperatorsCPU)": 65.22375106811523,
"test_vmapjvpvjp_unbind_cuda_float32 (__main__.TestOperatorsCUDA)": 79.81699752807617,
"test_vmapvjpvjp_meshgrid_list_of_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 85.13375091552734,
"test_vmapvjpvjp_meshgrid_variadic_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 83.11999893188477,
"test_vmapvjpvjp_nn_functional_bilinear_cuda_float32 (__main__.TestOperatorsCUDA)": 111.10899925231934,
"test_warp_softmax_64bit_indexing_cuda_float16 (__main__.TestNNDeviceTypeCUDA)": 154.79667123158774,
"test_warp_softmax_64bit_indexing_cuda_float32 (__main__.TestNNDeviceTypeCUDA)": 137.61766529083252
"EndToEndLSTM (__main__.RNNTest)": 156.4280014038086,
"MultiheadAttention (__main__.ModulesTest)": 142.05475234985352,
"test__adaptive_avg_pool2d (__main__.CPUReproTests)": 94.29028483799526,
"test_after_aot_cpu_runtime_error (__main__.MinifierIsolateTests)": 61.061214447021484,
"test_aot_autograd_disable_functionalization_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 64.10949897766113,
"test_aot_autograd_disable_functionalization_symbolic_exhaustive_linalg_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 63.37899875640869,
"test_aot_autograd_disable_functionalization_symbolic_exhaustive_nn_functional_max_pool1d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 133.07775115966797,
"test_aot_autograd_disable_functionalization_symbolic_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 182.88374710083008,
"test_aot_autograd_disable_functionalization_symbolic_exhaustive_nn_functional_max_pool3d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 107.19750022888184,
"test_aot_autograd_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 65.34900093078613,
"test_aot_autograd_symbolic_exhaustive_linalg_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 65.77025127410889,
"test_aot_autograd_symbolic_exhaustive_masked_norm_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 67.55673129741962,
"test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool1d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 139.04249954223633,
"test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 207.48150253295898,
"test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool3d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 111.83749961853027,
"test_aot_autograd_symbolic_exhaustive_nn_functional_max_unpool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 61.02223124870887,
"test_aot_autograd_symbolic_exhaustive_nn_functional_unfold_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 60.61625099182129,
"test_aot_autograd_symbolic_exhaustive_ormqr_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 72.0044994354248,
"test_aot_autograd_symbolic_exhaustive_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 78.97699928283691,
"test_aot_autograd_symbolic_module_exhaustive_nn_TransformerDecoderLayer_cpu_float32 (__main__.TestEagerFusionModuleInfoCPU)": 120.6155014038086,
"test_associative_scan_partial_grad_combine_mode_generic_compile_mode_compile_dynamic_shape_reverse_False_cpu (__main__.AssociativeScanTests)": 90.98520050048828,
"test_associative_scan_partial_grad_combine_mode_generic_compile_mode_compile_dynamic_shape_reverse_True_cpu (__main__.AssociativeScanTests)": 107.81600036621094,
"test_avg_pool3d_backward2_cpu (__main__.CpuTests)": 832.6403930664062,
"test_avg_pool3d_backward2_cuda (__main__.GPUTests)": 209.78656339645386,
"test_avg_pool3d_backward2_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 511.25464303152904,
"test_avg_pool3d_backward2_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 502.13043212890625,
"test_avg_pool3d_backward2_dynamic_shapes_cuda (__main__.DynamicShapesCodegenGPUTests)": 69.21749954223633,
"test_avg_pool3d_backward2_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 141.99519958496094,
"test_backward_nn_functional_multi_head_attention_forward_cpu_float32 (__main__.TestCompositeComplianceCPU)": 74.15374851226807,
"test_backward_nn_functional_multi_head_attention_forward_cuda_float32 (__main__.TestCompositeComplianceCUDA)": 68.91049995422364,
"test_basic_cpu (__main__.EfficientConvBNEvalCpuTests)": 217.71485846383231,
"test_basic_cuda (__main__.EfficientConvBNEvalGpuTests)": 151.827001953125,
"test_cat_2k_args (__main__.TestTEFuserDynamic)": 60.52952872465054,
"test_checkpointing_without_reentrant_input_requires_grad_False (__main__.TestAutogradWithCompiledAutograd)": 313.9864283970424,
"test_checkpointing_without_reentrant_input_requires_grad_True (__main__.TestAutogradWithCompiledAutograd)": 403.9378574916295,
"test_collect_callgrind (__main__.TestBenchmarkUtils)": 295.59543064662387,
"test_comprehensive_diff_cuda_complex128 (__main__.TestDecompCUDA)": 101.16829986572266,
"test_comprehensive_diff_cuda_complex64 (__main__.TestDecompCUDA)": 99.17959976196289,
"test_comprehensive_diff_cuda_float32 (__main__.TestDecompCUDA)": 67.93409957885743,
"test_comprehensive_diff_cuda_float64 (__main__.TestDecompCUDA)": 68.0083999633789,
"test_comprehensive_grid_sampler_2d_cpu_bfloat16 (__main__.TestDecompCPU)": 121.40875053405762,
"test_comprehensive_grid_sampler_2d_cpu_float16 (__main__.TestDecompCPU)": 107.16825103759766,
"test_comprehensive_grid_sampler_2d_cpu_float32 (__main__.TestDecompCPU)": 476.1532516479492,
"test_comprehensive_grid_sampler_2d_cpu_float64 (__main__.TestDecompCPU)": 436.6274948120117,
"test_comprehensive_grid_sampler_2d_cuda_bfloat16 (__main__.TestDecompCUDA)": 271.2396987915039,
"test_comprehensive_grid_sampler_2d_cuda_float16 (__main__.TestDecompCUDA)": 268.1447006225586,
"test_comprehensive_grid_sampler_2d_cuda_float32 (__main__.TestDecompCUDA)": 1175.651300048828,
"test_comprehensive_grid_sampler_2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 70.56930084228516,
"test_comprehensive_grid_sampler_2d_cuda_float64 (__main__.TestDecompCUDA)": 1088.7627990722656,
"test_comprehensive_grid_sampler_2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 74.085400390625,
"test_comprehensive_linalg_lu_solve_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 73.21680068969727,
"test_comprehensive_linalg_lu_solve_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 69.35940017700196,
"test_comprehensive_linalg_solve_triangular_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 74.40799865722656,
"test_comprehensive_linalg_solve_triangular_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 73.20870056152344,
"test_comprehensive_linalg_svd_cuda_complex128 (__main__.TestDecompCUDA)": 65.00710029602051,
"test_comprehensive_linalg_svd_cuda_complex64 (__main__.TestDecompCUDA)": 64.47350044250489,
"test_comprehensive_linalg_vector_norm_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 91.43851551865086,
"test_comprehensive_linalg_vector_norm_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 94.38921136567087,
"test_comprehensive_masked_norm_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 112.68219909667968,
"test_comprehensive_masked_norm_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 107.13470001220703,
"test_comprehensive_masked_norm_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 106.79499893188476,
"test_comprehensive_nn_functional_conv_transpose3d_cuda_complex128 (__main__.TestDecompCUDA)": 61.82360000610352,
"test_comprehensive_nn_functional_conv_transpose3d_cuda_complex64 (__main__.TestDecompCUDA)": 61.832200622558595,
"test_comprehensive_nn_functional_gaussian_nll_loss_cpu_float32 (__main__.TestDecompCPU)": 69.2859992980957,
"test_comprehensive_nn_functional_gaussian_nll_loss_cpu_float64 (__main__.TestDecompCPU)": 61.617000579833984,
"test_comprehensive_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestDecompCUDA)": 109.80070037841797,
"test_comprehensive_nn_functional_gaussian_nll_loss_cuda_float64 (__main__.TestDecompCUDA)": 105.05930023193359,
"test_comprehensive_nn_functional_grid_sample_cpu_float32 (__main__.TestDecompCPU)": 110.41274833679199,
"test_comprehensive_nn_functional_grid_sample_cpu_float64 (__main__.TestDecompCPU)": 117.82600212097168,
"test_comprehensive_nn_functional_grid_sample_cuda_float32 (__main__.TestDecompCUDA)": 267.2045959472656,
"test_comprehensive_nn_functional_grid_sample_cuda_float64 (__main__.TestDecompCUDA)": 226.3138000488281,
"test_comprehensive_nn_functional_interpolate_bicubic_cuda_float32 (__main__.TestDecompCUDA)": 78.2098014831543,
"test_comprehensive_nn_functional_interpolate_bicubic_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 83.08559951782226,
"test_comprehensive_nn_functional_interpolate_bicubic_cuda_float64 (__main__.TestDecompCUDA)": 79.31420059204102,
"test_comprehensive_nn_functional_interpolate_bicubic_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 84.31810150146484,
"test_comprehensive_nn_functional_interpolate_trilinear_cuda_float32 (__main__.TestDecompCUDA)": 124.66989974975586,
"test_comprehensive_nn_functional_interpolate_trilinear_cuda_float64 (__main__.TestDecompCUDA)": 122.46029968261719,
"test_comprehensive_nn_functional_max_pool2d_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 1227.5965942382813,
"test_comprehensive_nn_functional_max_pool2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 1224.6120971679688,
"test_comprehensive_nn_functional_max_pool2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 1215.3135009765624,
"test_comprehensive_nn_functional_max_pool3d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 574.5224914550781,
"test_comprehensive_nn_functional_max_pool3d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 576.6116943359375,
"test_comprehensive_nn_functional_max_unpool2d_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 66.11190147399903,
"test_comprehensive_nn_functional_max_unpool2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 64.53090057373046,
"test_comprehensive_nn_functional_max_unpool2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 64.63370094299316,
"test_comprehensive_ormqr_cpu_complex64 (__main__.TestDecompCPU)": 63.500749588012695,
"test_comprehensive_ormqr_cuda_complex128 (__main__.TestDecompCUDA)": 118.29800109863281,
"test_comprehensive_ormqr_cuda_complex64 (__main__.TestDecompCUDA)": 115.92799911499023,
"test_comprehensive_ormqr_cuda_float32 (__main__.TestDecompCUDA)": 71.202099609375,
"test_comprehensive_ormqr_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 76.42730102539062,
"test_comprehensive_ormqr_cuda_float64 (__main__.TestDecompCUDA)": 74.30220031738281,
"test_comprehensive_svd_cuda_complex128 (__main__.TestDecompCUDA)": 73.72639999389648,
"test_comprehensive_svd_cuda_complex64 (__main__.TestDecompCUDA)": 71.33249969482422,
"test_comprehensive_svd_lowrank_cuda_complex128 (__main__.TestDecompCUDA)": 64.70350074768066,
"test_comprehensive_svd_lowrank_cuda_complex64 (__main__.TestDecompCUDA)": 63.244300079345706,
"test_constructor_autograd_SparseBSC_cuda (__main__.TestSparseAnyCUDA)": 97.68460006713867,
"test_constructor_autograd_SparseBSR_cuda (__main__.TestSparseAnyCUDA)": 90.53349990844727,
"test_conv1d_basic (__main__.TestXNNPACKConv1dTransformPass)": 112.51621355329242,
"test_conv1d_with_relu_fc (__main__.TestXNNPACKConv1dTransformPass)": 242.19535500662667,
"test_conv2d_binary_broadcast_shapes_cpu (__main__.TestPatternMatcherGenericCPU)": 68.08974933624268,
"test_conv3d_binary_broadcast_shapes_cpu (__main__.TestPatternMatcherGenericCPU)": 135.26825141906738,
"test_conv3d_binary_dynamic_shapes_cpu (__main__.TestDynamicPatternMatcherGenericCPU)": 67.82638888888889,
"test_conv3d_cuda (__main__.AOTInductorTestABICompatibleGpu)": 120.85684497539813,
"test_conv3d_unary_dynamic_shapes_cpu (__main__.TestDynamicPatternMatcherGenericCPU)": 60.322389390733505,
"test_conv_bn_fuse_cpu (__main__.CpuTests)": 67.01239929199218,
"test_conv_transpose_with_output_size_and_no_batch_dim_ConvTranspose3d_cuda (__main__.TestConvolutionNNDeviceTypeCUDA)": 145.28714145337497,
"test_conv_unary_fusion_nnc (__main__.TestMkldnnFusion)": 81.58721351623535,
"test_correctness_AdamW_use_closure_True_cuda_float32 (__main__.CompiledOptimizerParityTestsCUDA)": 85.3761001586914,
"test_correctness_Adam_use_closure_True_cuda_float32 (__main__.CompiledOptimizerParityTestsCUDA)": 82.11160125732422,
"test_count_nonzero_all (__main__.TestBool)": 647.7996433803013,
"test_cp_flex_attention_document_mask (__main__.CPFlexAttentionTest)": 71.75066630045573,
"test_cross_entropy_large_tensor_reduction_mean_cuda (__main__.TestNNDeviceTypeCUDA)": 112.6964959303538,
"test_dispatch_symbolic_meta_outplace_all_strides_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestMetaCUDA)": 86.26990051269532,
"test_dtensor_op_db_nn_functional_poisson_nll_loss_cpu_float32 (__main__.TestLocalDTensorOpsCPU)": 115.99650001525879,
"test_eager_sequence_nr_dynamic_shapes (__main__.DynamicShapesAotAutogradFallbackTests)": 229.80603500892377,
"test_eig_check_magma_cuda_float32 (__main__.TestLinalgCUDA)": 129.7280044555664,
"test_fail_arithmetic_ops.py (__main__.TestTyping)": 68.54400089808873,
"test_fail_random.py (__main__.TestTyping)": 178.38840666271392,
"test_fd_sharing (__main__.TestMultiprocessing)": 61.969195964289646,
"test_fn_fwgrad_bwgrad_cumprod_cuda_complex128 (__main__.TestFwdGradientsCUDA)": 98.24910049438476,
"test_fn_gradgrad_cumprod_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 82.74409904479981,
"test_fs_sharing (__main__.TestMultiprocessing)": 64.23455582724677,
"test_fuse_large_params_cpu (__main__.CpuTests)": 101.42200215657552,
"test_fuse_large_params_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 161.51942879813058,
"test_fuse_large_params_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 162.31028856549943,
"test_fuse_large_params_dynamic_shapes_cuda (__main__.DynamicShapesCodegenGPUTests)": 149.26629943847655,
"test_fuse_large_params_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 118.3827995300293,
"test_grad_nn_Transformer_cpu_float64 (__main__.TestModuleCPU)": 68.17949033250996,
"test_grad_nn_Transformer_cuda_float64 (__main__.TestModuleCUDA)": 84.50290069580078,
"test_gradgrad_nn_LSTM_eval_mode_cuda_float64 (__main__.TestModuleCUDA)": 65.09507634089543,
"test_gradgrad_nn_LSTM_train_mode_cuda_float64 (__main__.TestModuleCUDA)": 65.47933355967204,
"test_gradgrad_nn_TransformerDecoderLayer_cuda_float64 (__main__.TestModuleCUDA)": 183.63109741210937,
"test_gradgrad_nn_TransformerEncoder_eval_mode_cuda_float64 (__main__.TestModuleCUDA)": 114.0967010498047,
"test_gradgrad_nn_TransformerEncoder_train_mode_cuda_float64 (__main__.TestModuleCUDA)": 133.20650100708008,
"test_gradgrad_nn_Transformer_cuda_float64 (__main__.TestModuleCUDA)": 513.7934997558593,
"test_index (__main__.DistTensorOpsTest)": 63.37289934158325,
"test_indirect_device_assert (__main__.TritonCodeGenTests)": 308.16075134277344,
"test_inductor_dynamic_shapes_broadcasting_dynamic_shapes (__main__.DynamicShapesReproTests)": 123.50510314415241,
"test_inductor_no_recursionerror_on_for_loops_dynamic_shapes (__main__.DynamicShapesReproTests)": 70.94635799952916,
"test_inplace_gradgrad_cumprod_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 84.69389953613282,
"test_inputs_overlapping_with_mutation_stress_dynamic_shapes (__main__.DynamicShapesAotAutogradFallbackTests)": 141.22007206508093,
"test_jit_cuda_archflags (__main__.TestCppExtensionJIT)": 78.74900007247925,
"test_linalg_solve_triangular_large_cuda_complex128 (__main__.TestLinalgCUDA)": 120.89850234985352,
"test_linalg_solve_triangular_large_cuda_complex64 (__main__.TestLinalgCUDA)": 91.9739990234375,
"test_linear_binary_cpp_wrapper (__main__.TestCppWrapper)": 120.82975006103516,
"test_linear_binary_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 119.11174774169922,
"test_longformer_chunk_dynamic_shapes (__main__.DynamicShapesReproTests)": 104.80221448625836,
"test_low_memory_max_pool_dilation_1_dim_3_cpu_halide (__main__.HalideCpuTests)": 585.4649919782366,
"test_low_memory_max_pool_dilation_2_dim_3_cpu_halide (__main__.HalideCpuTests)": 497.8724278041295,
"test_lstm_cpu (__main__.TestMkldnnCPU)": 93.07950210571289,
"test_many_overlapping_inputs_does_not_explode_guards_dynamic_shapes (__main__.DynamicShapesReproTests)": 124.84971400669643,
"test_max_pool2d_with_indices_backward4_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 61.881356375558035,
"test_max_pool2d_with_indices_backward4_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 63.254143306187224,
"test_nll_loss_large_tensor_reduction_mean_cuda (__main__.TestNNDeviceTypeCUDA)": 329.2579941749573,
"test_pattern_matcher_multi_user_cpu (__main__.CpuTritonTests)": 141.80814688546317,
"test_proper_exit (__main__.TestDataLoader)": 218.38585935320174,
"test_proper_exit (__main__.TestDataLoaderPersistentWorkers)": 227.89378683907645,
"test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False (__main__.TestPatternMatcher)": 102.8917465209961,
"test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 105.06525039672852,
"test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 101.56850051879883,
"test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True (__main__.TestPatternMatcher)": 78.47400093078613,
"test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 108.95525169372559,
"test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 104.72825050354004,
"test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False (__main__.TestPatternMatcher)": 78.20949745178223,
"test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 96.36775207519531,
"test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 99.7805004119873,
"test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True (__main__.TestPatternMatcher)": 80.42650032043457,
"test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 102.69049835205078,
"test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 102.31074905395508,
"test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False (__main__.TestPatternMatcher)": 109.89425086975098,
"test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 100.21624946594238,
"test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 102.30649948120117,
"test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True (__main__.TestPatternMatcher)": 77.84124946594238,
"test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 103.11899757385254,
"test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 103.89025115966797,
"test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False (__main__.TestPatternMatcher)": 71.87825012207031,
"test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 97.63850021362305,
"test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 101.92625045776367,
"test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True (__main__.TestPatternMatcher)": 77.63574981689453,
"test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 109.77174949645996,
"test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 103.81300163269043,
"test_quick_core_backward__unsafe_masked_index_cpu_float64 (__main__.TestDecompCPU)": 536.6219940185547,
"test_quick_core_backward__unsafe_masked_index_cuda_float64 (__main__.TestDecompCUDA)": 1043.1840942382812,
"test_quick_core_backward__unsafe_masked_index_put_accumulate_cpu_float64 (__main__.TestDecompCPU)": 794.3912506103516,
"test_quick_core_backward__unsafe_masked_index_put_accumulate_cuda_float64 (__main__.TestDecompCUDA)": 1367.2698120117188,
"test_quick_core_backward_nn_functional_max_unpool3d_grad_cpu_float64 (__main__.TestDecompCPU)": 77.04675102233887,
"test_quick_core_backward_nn_functional_max_unpool3d_grad_cuda_float64 (__main__.TestDecompCUDA)": 280.63609924316404,
"test_quick_core_backward_roll_cpu_float64 (__main__.TestDecompCPU)": 113.44674873352051,
"test_quick_core_backward_roll_cuda_float64 (__main__.TestDecompCUDA)": 229.65490112304687,
"test_quick_core_backward_select_scatter_cpu_float64 (__main__.TestDecompCPU)": 75.17299842834473,
"test_quick_core_backward_select_scatter_cuda_float64 (__main__.TestDecompCUDA)": 138.64920043945312,
"test_quick_core_backward_split_cuda_float64 (__main__.TestDecompCUDA)": 68.40219993591309,
"test_quick_core_backward_split_with_sizes_copy_cpu_float64 (__main__.TestDecompCPU)": 86.44400024414062,
"test_quick_core_backward_split_with_sizes_copy_cuda_float64 (__main__.TestDecompCUDA)": 167.45450286865236,
"test_quick_core_backward_std_cpu_float64 (__main__.TestDecompCPU)": 74.3385009765625,
"test_quick_core_backward_std_cuda_float64 (__main__.TestDecompCUDA)": 136.91199951171876,
"test_register_spills_cuda (__main__.BenchmarkFusionGpuTest)": 113.36530075073242,
"test_replicatepad_64bit_indexing_cuda_float16 (__main__.TestNNDeviceTypeCUDA)": 70.2165002822876,
"test_run2run_determinism_model_name_BertForMaskedLM_training_or_inference_inference_precision_amp (__main__.DeterministicTest)": 77.06460037231446,
"test_run2run_determinism_model_name_BertForMaskedLM_training_or_inference_training_precision_amp (__main__.DeterministicTest)": 149.1957015991211,
"test_run2run_determinism_model_name_BertForMaskedLM_training_or_inference_training_precision_bfloat16 (__main__.DeterministicTest)": 150.3646011352539,
"test_run2run_determinism_model_name_BertForMaskedLM_training_or_inference_training_precision_float16 (__main__.DeterministicTest)": 162.8407989501953,
"test_run2run_determinism_model_name_BertForMaskedLM_training_or_inference_training_precision_float32 (__main__.DeterministicTest)": 140.64679794311525,
"test_run2run_determinism_model_name_DistillGPT2_training_or_inference_training_precision_amp (__main__.DeterministicTest)": 80.052099609375,
"test_run2run_determinism_model_name_DistillGPT2_training_or_inference_training_precision_bfloat16 (__main__.DeterministicTest)": 80.33979949951171,
"test_run2run_determinism_model_name_DistillGPT2_training_or_inference_training_precision_float16 (__main__.DeterministicTest)": 80.46890029907226,
"test_run2run_determinism_model_name_DistillGPT2_training_or_inference_training_precision_float32 (__main__.DeterministicTest)": 77.31679916381836,
"test_run2run_determinism_model_name_GoogleFnet_training_or_inference_training_precision_amp (__main__.DeterministicTest)": 94.19669952392579,
"test_run2run_determinism_model_name_GoogleFnet_training_or_inference_training_precision_bfloat16 (__main__.DeterministicTest)": 93.968798828125,
"test_run2run_determinism_model_name_GoogleFnet_training_or_inference_training_precision_float16 (__main__.DeterministicTest)": 95.43030014038087,
"test_run2run_determinism_model_name_GoogleFnet_training_or_inference_training_precision_float32 (__main__.DeterministicTest)": 94.34460067749023,
"test_runtime_checks_large_cpu (__main__.AOTInductorTestABICompatibleCpu)": 69.63399925231934,
"test_runtime_checks_large_cpu_with_stack_allocation (__main__.AOTInductorTestABICompatibleCpuWithStackAllocation)": 66.46521404811314,
"test_runtime_checks_large_cuda (__main__.AOTInductorTestABICompatibleGpu)": 189.52279815673828,
"test_save_load_large_string_attribute (__main__.TestSaveLoad)": 95.83349895477295,
"test_sdpa_kernel_ctx_manager2_dynamic_shapes (__main__.DynamicShapesCtxManagerTests)": 107.00114277430943,
"test_shuffler_iterdatapipe (__main__.IntegrationTestDataLoaderDataPipe)": 117.35257175990513,
"test_slow_tasks (__main__.TestFunctionalAutogradBenchmark)": 120.11764253888812,
"test_sort_bool_cpu (__main__.CpuTritonTests)": 350.2287118094308,
"test_sort_dynamic_shape_with_check_cuda (__main__.TestInductorDynamicCUDA)": 371.5840934753418,
"test_sort_stable_cuda (__main__.GPUTests)": 87.17187404632568,
"test_sort_transpose_cpu (__main__.CpuTritonTests)": 376.56386021205356,
"test_svd_lowrank_cuda_complex128 (__main__.TestLinalgCUDA)": 296.1006278991699,
"test_terminate_handler_on_crash (__main__.TestTorch)": 176.9331459743636,
"test_terminate_signal (__main__.ForkTest)": 197.2466388238328,
"test_terminate_signal (__main__.ParallelForkServerShouldWorkTest)": 197.36571486986108,
"test_terminate_signal (__main__.SpawnTest)": 201.76207041740417,
"test_train_parity_multi_group (__main__.TestFullyShard1DTrainingCore)": 163.89737558364868,
"test_triton_bsr_scatter_mm_blocksize_64_cuda_float32 (__main__.TestSparseCompressedTritonKernelsCUDA)": 65.74759979248047,
"test_triton_bsr_softmax_cuda_bfloat16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 404.46739349365237,
"test_triton_bsr_softmax_cuda_float16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 321.34139862060545,
"test_triton_bsr_softmax_cuda_float32 (__main__.TestSparseCompressedTritonKernelsCUDA)": 240.05119934082032,
"test_variant_consistency_jit_nn_functional_max_pool2d_cpu_float32 (__main__.TestJitCPU)": 92.86999893188477,
"test_variant_consistency_jit_nn_functional_max_pool2d_cuda_float32 (__main__.TestJitCUDA)": 77.62930107116699,
"test_vec_compare_op_cpu_only (__main__.CPUReproTests)": 61.40728569030762,
"test_view_ops (__main__.TestViewOpsWithLocalTensor)": 85.29030755849985,
"test_vmapjvpvjp_linalg_lstsq_grad_oriented_cpu_float32 (__main__.TestOperatorsCPU)": 83.75,
"test_vmapjvpvjp_linalg_lstsq_grad_oriented_cuda_float32 (__main__.TestOperatorsCUDA)": 222.90328543526786,
"test_vmapjvpvjp_linalg_lu_solve_cuda_float32 (__main__.TestOperatorsCUDA)": 70.33380088806152,
"test_vmapjvpvjp_max_pool2d_with_indices_backward_cpu_float32 (__main__.TestOperatorsCPU)": 63.39599895477295,
"test_vmapjvpvjp_max_pool2d_with_indices_backward_cuda_float32 (__main__.TestOperatorsCUDA)": 75.14130058288575,
"test_vmapjvpvjp_nn_functional_max_pool2d_cpu_float32 (__main__.TestOperatorsCPU)": 64.2445011138916,
"test_vmapjvpvjp_nn_functional_max_pool2d_cuda_float32 (__main__.TestOperatorsCUDA)": 78.53580017089844,
"test_vmapjvpvjp_unbind_cpu_float32 (__main__.TestOperatorsCPU)": 62.246999740600586,
"test_vmapjvpvjp_unbind_cuda_float32 (__main__.TestOperatorsCUDA)": 84.20339965820312,
"test_vmapvjpvjp_linalg_lstsq_cuda_float32 (__main__.TestOperatorsCUDA)": 110.56262493133545,
"test_vmapvjpvjp_meshgrid_list_of_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 79.86129951477051,
"test_vmapvjpvjp_meshgrid_variadic_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 78.55820007324219,
"test_vmapvjpvjp_nn_functional_bilinear_cuda_float32 (__main__.TestOperatorsCUDA)": 123.33470077514649
}