diff --git a/test/slow_tests.json b/test/slow_tests.json index 5f4a4934fd0..0b2c07ff726 100644 --- a/test/slow_tests.json +++ b/test/slow_tests.json @@ -1,278 +1,251 @@ { - "EndToEndLSTM (__main__.RNNTest)": 195.11499938964843, - "MultiheadAttention (__main__.ModulesTest)": 142.00380249023436, - "test_AllenaiLongformerBase_repro_cpu_halide (__main__.HalideCpuTests)": 214.6786651611328, - "test_RNN_cpu_vs_cudnn_no_dropout (__main__.TestNN)": 72.39199912548065, - "test_RNN_cpu_vs_cudnn_with_dropout (__main__.TestNN)": 73.05633429686229, - "test_StridedShard_to_shard_order (__main__.Test_StridedShard_with_shard_order)": 253.58512496948242, - "test__adaptive_avg_pool2d (__main__.CPUReproTests)": 106.16550159454346, - "test_adaptive_max_pool2d1_cpu_halide (__main__.HalideCpuTests)": 116.58166758219402, - "test_addmm_relu_tunableop_rocm_cuda_float32 (__main__.TestLinalgCUDA)": 62.60266876220703, - "test_after_aot_cpu_runtime_error (__main__.MinifierIsolateTests)": 62.53962421417236, - "test_alexnet_prefix_cpu_halide (__main__.HalideCpuTests)": 177.9409942626953, - "test_aot_autograd_disable_functionalization_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 60.34580052693685, - "test_aot_autograd_disable_functionalization_symbolic_exhaustive_linalg_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 67.86476732889811, - "test_aot_autograd_disable_functionalization_symbolic_exhaustive_nn_functional_max_pool1d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 142.54474639892578, - "test_aot_autograd_disable_functionalization_symbolic_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 195.94950103759766, - "test_aot_autograd_disable_functionalization_symbolic_exhaustive_nn_functional_max_pool3d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 120.17424774169922, - "test_aot_autograd_disable_functionalization_symbolic_exhaustive_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 65.93349933624268, - "test_aot_autograd_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 66.56851626980689, - "test_aot_autograd_symbolic_exhaustive_linalg_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 70.99724960327148, - "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool1d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 149.67525100708008, - "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 180.85475158691406, - "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool3d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 104.83274841308594, - "test_aot_autograd_symbolic_exhaustive_nn_functional_unfold_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 60.97112907901887, - "test_aot_autograd_symbolic_exhaustive_ormqr_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 67.97749996185303, - "test_aot_autograd_symbolic_exhaustive_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 73.70349884033203, - "test_aot_autograd_symbolic_module_exhaustive_nn_TransformerDecoderLayer_cpu_float32 (__main__.TestEagerFusionModuleInfoCPU)": 119.76774978637695, - "test_associative_scan_partial_grad_combine_mode_generic_compile_mode_compile_dynamic_shape_reverse_False_cpu (__main__.AssociativeScanTests)": 93.69075012207031, - "test_associative_scan_partial_grad_combine_mode_generic_compile_mode_compile_dynamic_shape_reverse_True_cpu (__main__.AssociativeScanTests)": 109.89175033569336, - "test_avg_pool3d_backward2_cpu (__main__.CpuTests)": 801.439599609375, - "test_avg_pool3d_backward2_cpu (__main__.CpuTritonTests)": 270.46433512369794, - "test_avg_pool3d_backward2_cuda (__main__.GPUTests)": 211.92539825439454, - "test_avg_pool3d_backward2_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 526.4229965209961, - "test_avg_pool3d_backward2_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 540.007625579834, - "test_avg_pool3d_backward2_dynamic_shapes_cuda (__main__.DynamicShapesCodegenGPUTests)": 73.37349891662598, - "test_avg_pool3d_backward2_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 146.07825088500977, - "test_avg_pool3d_backward_cpu_halide (__main__.HalideCpuTests)": 61.05500030517578, - "test_backward_nn_functional_multi_head_attention_forward_cpu_float32 (__main__.TestCompositeComplianceCPU)": 77.6555004119873, - "test_backward_nn_functional_multi_head_attention_forward_cuda_float32 (__main__.TestCompositeComplianceCUDA)": 63.8514986038208, - "test_basic_cpu (__main__.EfficientConvBNEvalCpuTests)": 264.5168743133545, - "test_basic_cuda (__main__.EfficientConvBNEvalGpuTests)": 165.7322540283203, - "test_checkpointing_without_reentrant_input_requires_grad_False (__main__.TestAutogradWithCompiledAutograd)": 330.8664970397949, - "test_checkpointing_without_reentrant_input_requires_grad_True (__main__.TestAutogradWithCompiledAutograd)": 423.7527503967285, - "test_collect_callgrind (__main__.TestBenchmarkUtils)": 313.5642509460449, - "test_comprehensive_cholesky_inverse_cuda_float32 (__main__.TestDecompCUDA)": 70.66033256053925, - "test_comprehensive_diff_cuda_complex128 (__main__.TestDecompCUDA)": 98.57474899291992, - "test_comprehensive_diff_cuda_complex64 (__main__.TestDecompCUDA)": 103.05299949645996, - "test_comprehensive_diff_cuda_float32 (__main__.TestDecompCUDA)": 67.24449920654297, - "test_comprehensive_diff_cuda_float64 (__main__.TestDecompCUDA)": 68.60375022888184, - "test_comprehensive_grid_sampler_2d_cpu_bfloat16 (__main__.TestDecompCPU)": 105.27174758911133, - "test_comprehensive_grid_sampler_2d_cpu_float16 (__main__.TestDecompCPU)": 97.67850112915039, - "test_comprehensive_grid_sampler_2d_cpu_float32 (__main__.TestDecompCPU)": 458.8267517089844, - "test_comprehensive_grid_sampler_2d_cpu_float64 (__main__.TestDecompCPU)": 451.6082458496094, - "test_comprehensive_grid_sampler_2d_cuda_bfloat16 (__main__.TestDecompCUDA)": 298.8152503967285, - "test_comprehensive_grid_sampler_2d_cuda_float16 (__main__.TestDecompCUDA)": 255.6614990234375, - "test_comprehensive_grid_sampler_2d_cuda_float32 (__main__.TestDecompCUDA)": 1176.4095153808594, - "test_comprehensive_grid_sampler_2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 72.6922492980957, - "test_comprehensive_grid_sampler_2d_cuda_float64 (__main__.TestDecompCUDA)": 1098.8550109863281, - "test_comprehensive_grid_sampler_2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 77.52225112915039, - "test_comprehensive_linalg_lu_factor_cuda_complex128 (__main__.TestDecompCUDA)": 64.52633285522461, - "test_comprehensive_linalg_lu_solve_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 72.95650100708008, - "test_comprehensive_linalg_lu_solve_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 71.89800071716309, - "test_comprehensive_linalg_solve_triangular_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 72.7504997253418, - "test_comprehensive_linalg_solve_triangular_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 74.69425201416016, - "test_comprehensive_linalg_svd_cuda_complex128 (__main__.TestDecompCUDA)": 62.47725009918213, - "test_comprehensive_linalg_svd_cuda_complex64 (__main__.TestDecompCUDA)": 66.51850032806396, - "test_comprehensive_masked_norm_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 115.14674758911133, - "test_comprehensive_masked_norm_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 111.31599998474121, - "test_comprehensive_masked_norm_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 108.47875022888184, - "test_comprehensive_nn_functional_conv_transpose3d_cuda_complex128 (__main__.TestDecompCUDA)": 63.36350059509277, - "test_comprehensive_nn_functional_conv_transpose3d_cuda_complex64 (__main__.TestDecompCUDA)": 64.12074947357178, - "test_comprehensive_nn_functional_gaussian_nll_loss_cpu_float32 (__main__.TestDecompCPU)": 63.71774959564209, - "test_comprehensive_nn_functional_gaussian_nll_loss_cpu_float64 (__main__.TestDecompCPU)": 66.63899975731259, - "test_comprehensive_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestDecompCUDA)": 114.73800086975098, - "test_comprehensive_nn_functional_gaussian_nll_loss_cuda_float64 (__main__.TestDecompCUDA)": 110.1662483215332, - "test_comprehensive_nn_functional_grid_sample_cpu_float32 (__main__.TestDecompCPU)": 115.3847484588623, - "test_comprehensive_nn_functional_grid_sample_cpu_float64 (__main__.TestDecompCPU)": 109.4905014038086, - "test_comprehensive_nn_functional_grid_sample_cuda_float32 (__main__.TestDecompCUDA)": 306.85575103759766, - "test_comprehensive_nn_functional_grid_sample_cuda_float64 (__main__.TestDecompCUDA)": 228.0407485961914, - "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float32 (__main__.TestDecompCUDA)": 78.3700008392334, - "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 84.47775268554688, - "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float64 (__main__.TestDecompCUDA)": 78.47249984741211, - "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 86.97974967956543, - "test_comprehensive_nn_functional_interpolate_trilinear_cuda_float32 (__main__.TestDecompCUDA)": 124.5634994506836, - "test_comprehensive_nn_functional_interpolate_trilinear_cuda_float64 (__main__.TestDecompCUDA)": 122.19799995422363, - "test_comprehensive_nn_functional_max_pool2d_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 1262.0645141601562, - "test_comprehensive_nn_functional_max_pool2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 1255.4177551269531, - "test_comprehensive_nn_functional_max_pool2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 1257.4462585449219, - "test_comprehensive_nn_functional_max_pool3d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 605.8682556152344, - "test_comprehensive_nn_functional_max_pool3d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 615.4145050048828, - "test_comprehensive_nn_functional_max_unpool2d_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 66.37674903869629, - "test_comprehensive_nn_functional_max_unpool2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 65.44024848937988, - "test_comprehensive_nn_functional_max_unpool2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 66.0570011138916, - "test_comprehensive_nn_functional_pad_reflect_cuda_complex64 (__main__.TestDecompCUDA)": 62.90416653951009, - "test_comprehensive_ormqr_cpu_complex64 (__main__.TestDecompCPU)": 61.29275035858154, - "test_comprehensive_ormqr_cuda_complex128 (__main__.TestDecompCUDA)": 113.26900100708008, - "test_comprehensive_ormqr_cuda_complex64 (__main__.TestDecompCUDA)": 112.6924991607666, - "test_comprehensive_ormqr_cuda_float32 (__main__.TestDecompCUDA)": 73.96350288391113, - "test_comprehensive_ormqr_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 80.25400161743164, - "test_comprehensive_ormqr_cuda_float64 (__main__.TestDecompCUDA)": 70.42575073242188, - "test_comprehensive_pca_lowrank_cuda_complex64 (__main__.TestDecompCUDA)": 99.28966617584229, - "test_comprehensive_svd_cuda_complex128 (__main__.TestDecompCUDA)": 69.16975021362305, - "test_comprehensive_svd_cuda_complex64 (__main__.TestDecompCUDA)": 75.30550003051758, - "test_comprehensive_svd_lowrank_cuda_complex128 (__main__.TestDecompCUDA)": 120.88183275858562, - "test_comprehensive_svd_lowrank_cuda_complex64 (__main__.TestDecompCUDA)": 119.77483590443929, - "test_comprehensive_svd_lowrank_cuda_float32 (__main__.TestDecompCUDA)": 120.83816623687744, - "test_constructor_autograd_SparseBSC_cuda (__main__.TestSparseAnyCUDA)": 86.3487491607666, - "test_constructor_autograd_SparseBSR_cuda (__main__.TestSparseAnyCUDA)": 78.20924949645996, - "test_conv1d_basic (__main__.TestXNNPACKConv1dTransformPass)": 89.26825046539307, - "test_conv1d_with_relu_fc (__main__.TestXNNPACKConv1dTransformPass)": 220.15350151062012, - "test_conv2d_binary_broadcast_shapes_cpu (__main__.TestPatternMatcherGenericCPU)": 77.47299766540527, - "test_conv3d_binary_broadcast_shapes_cpu (__main__.TestPatternMatcherGenericCPU)": 156.85225296020508, - "test_conv_bn_fuse_cpu (__main__.CpuTests)": 68.80920028686523, - "test_conv_bn_fuse_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 68.10125064849854, - "test_conv_large_batch_1_cuda (__main__.TestConvolutionNNDeviceTypeCUDA)": 121.31333414713542, - "test_conv_unary_fusion_nnc (__main__.TestMkldnnFusion)": 80.68750095367432, - "test_correctness_AdamW_use_closure_True_cuda_float32 (__main__.CompiledOptimizerParityTestsCUDA)": 82.94275093078613, - "test_correctness_Adam_use_closure_True_cuda_float32 (__main__.CompiledOptimizerParityTestsCUDA)": 80.35500144958496, - "test_count_nonzero_all (__main__.TestBool)": 650.8682556152344, - "test_cross_entropy_large_tensor_reduction_sum_cuda (__main__.TestNNDeviceTypeCUDA)": 323.86448669433594, - "test_ddp_uneven_inputs (__main__.TestDistBackendWithSpawn)": 450.4883321126302, - "test_diff_hyperparams_sharding_strategy_str_no_shard (__main__.TestFSDPUseOrigParamsMultipleParamGroups)": 60.20799891153971, - "test_dispatch_symbolic_meta_outplace_all_strides_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestMetaCUDA)": 87.0319995880127, - "test_dtensor_op_db_nn_functional_gaussian_nll_loss_cpu_float32 (__main__.TestLocalDTensorOpsCPU)": 1517.4078125, - "test_dtensor_op_db_nn_functional_gaussian_nll_loss_cpu_float32 (__main__.TestMultiThreadedDTensorOpsCPU)": 90.65559997558594, - "test_error_detection_and_propagation (__main__.NcclErrorHandlingTest)": 67.08999888102214, - "test_fail_arithmetic_ops.py (__main__.TestTyping)": 72.2988748550415, - "test_fail_creation_ops.py (__main__.TestTyping)": 102.47843830759932, - "test_fn_fwgrad_bwgrad_cumprod_cuda_complex128 (__main__.TestFwdGradientsCUDA)": 80.67500114440918, - "test_fn_grad_add_cpu_complex128 (__main__.TestComplexBwdGradientsCPU)": 75.49025793998472, - "test_fn_grad_constant_pad_nd_cpu_complex128 (__main__.TestComplexBwdGradientsCPU)": 177.47612947033298, - "test_fn_grad_constant_pad_nd_cuda_complex128 (__main__.TestComplexBwdGradientsCUDA)": 124.01433499654134, - "test_fn_grad_diagonal_scatter_cpu_complex128 (__main__.TestComplexBwdGradientsCPU)": 387.570063929404, - "test_fn_grad_diagonal_scatter_cuda_complex128 (__main__.TestComplexBwdGradientsCUDA)": 155.9375, - "test_fn_grad_flip_cpu_complex128 (__main__.TestComplexBwdGradientsCPU)": 61.3171936158211, - "test_fn_grad_rsub_cpu_complex128 (__main__.TestComplexBwdGradientsCPU)": 83.22996791716545, - "test_fn_grad_rsub_cuda_complex128 (__main__.TestComplexBwdGradientsCUDA)": 62.90033372243246, - "test_fn_grad_sub_cpu_complex128 (__main__.TestComplexBwdGradientsCPU)": 77.49893539182601, - "test_fn_grad_sub_cuda_complex128 (__main__.TestComplexBwdGradientsCUDA)": 61.0314998626709, - "test_fn_grad_where_cpu_complex128 (__main__.TestComplexBwdGradientsCPU)": 99.6264519230012, - "test_fn_grad_where_cuda_complex128 (__main__.TestComplexBwdGradientsCUDA)": 72.60183270772298, - "test_fn_gradgrad_cumprod_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 84.32150268554688, - "test_fuse_large_params_cpu (__main__.CpuTests)": 97.63633219401042, - "test_fuse_large_params_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 167.9266242980957, - "test_fuse_large_params_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 167.08250045776367, - "test_fuse_large_params_dynamic_shapes_cuda (__main__.DynamicShapesCodegenGPUTests)": 148.94650268554688, - "test_fuse_large_params_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 118.18500137329102, - "test_grad_nn_Transformer_cuda_float64 (__main__.TestModuleCUDA)": 81.35249900817871, - "test_gradgrad_nn_TransformerDecoderLayer_cuda_float64 (__main__.TestModuleCUDA)": 196.03149795532227, - "test_gradgrad_nn_TransformerEncoder_eval_mode_cuda_float64 (__main__.TestModuleCUDA)": 111.10725021362305, - "test_gradgrad_nn_TransformerEncoder_train_mode_cuda_float64 (__main__.TestModuleCUDA)": 134.25675010681152, - "test_gradgrad_nn_Transformer_cuda_float64 (__main__.TestModuleCUDA)": 614.353271484375, - "test_graph_make_graphed_callables_same_pool (__main__.TestCuda)": 102.73666604359944, - "test_graph_partition_refcount_dynamic_shapes_cuda (__main__.DynamicShapesCodegenGPUTests)": 385.05516481399536, - "test_graph_partition_refcount_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 395.0171728134155, - "test_grid_sampler_2d_cpu_halide (__main__.HalideCpuTests)": 195.79066467285156, - "test_indirect_device_assert (__main__.TritonCodeGenTests)": 312.15050506591797, - "test_inductor_no_recursionerror_on_for_loops_dynamic_shapes (__main__.DynamicShapesReproTests)": 71.82537364959717, - "test_inplace_gradgrad_cumprod_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 85.09174919128418, - "test_inputs_overlapping_with_mutation_stress_dynamic_shapes (__main__.DynamicShapesAotAutogradFallbackTests)": 129.14387321472168, - "test_jit_cuda_archflags (__main__.TestCppExtensionJIT)": 120.64374923706055, - "test_linear_binary_cpp_wrapper (__main__.TestCppWrapper)": 130.71199989318848, - "test_linear_binary_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 126.44325256347656, - "test_list_clearing_cuda (__main__.GPUTests)": 61.48289999961853, - "test_longformer_chunk_dynamic_shapes (__main__.DynamicShapesReproTests)": 104.84637451171875, - "test_lstm_cpu (__main__.TestMkldnnCPU)": 102.5270004272461, - "test_many_overlapping_inputs_does_not_explode_guards_dynamic_shapes (__main__.DynamicShapesReproTests)": 136.7854986190796, - "test_max_pool2d2_cpu_halide (__main__.HalideCpuTests)": 426.58765665690106, - "test_max_pool2d3_cpu_halide (__main__.HalideCpuTests)": 133.9463348388672, - "test_max_pool2d5_cpu_halide (__main__.HalideCpuTests)": 359.5349934895833, - "test_max_pool2d_with_indices_backward4_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 68.19662570953369, - "test_max_pool2d_with_indices_backward4_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 64.87825012207031, - "test_nll_loss_large_tensor_reduction_sum_cuda (__main__.TestNNDeviceTypeCUDA)": 340.27033456166583, - "test_ordered_distribute_all_combination (__main__.DistributeWithDeviceOrderTest)": 135.83149814605713, - "test_ordered_distribute_all_combination (__main__.DistributeWithDeviceOrderTestWithLocalTensor)": 67.58062505722046, - "test_ordered_redistribute_with_partial (__main__.DistributeWithDeviceOrderTest)": 198.98699951171875, - "test_ordered_redistribute_with_partial (__main__.DistributeWithDeviceOrderTestWithLocalTensor)": 500.39749908447266, - "test_pool3d_large_size_int64_cuda (__main__.TestPoolingNNDeviceTypeCUDA)": 65.12433274586995, - "test_proper_exit (__main__.TestDataLoader)": 203.98437309265137, - "test_proper_exit (__main__.TestDataLoaderPersistentWorkers)": 196.37637424468994, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False (__main__.TestPatternMatcher)": 63.505500078201294, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 111.68949890136719, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 107.62675094604492, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True (__main__.TestPatternMatcher)": 93.89300155639648, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 117.77149963378906, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 110.85300254821777, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False (__main__.TestPatternMatcher)": 88.89249992370605, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 100.24625015258789, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 111.7132511138916, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True (__main__.TestPatternMatcher)": 84.21674919128418, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 105.77849960327148, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 109.34375, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False (__main__.TestPatternMatcher)": 92.73649978637695, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 105.92499923706055, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 108.25849914550781, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True (__main__.TestPatternMatcher)": 64.16908399264018, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 111.11400032043457, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 114.92299842834473, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False (__main__.TestPatternMatcher)": 61.62425025304159, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 105.86524963378906, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 105.85474967956543, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True (__main__.TestPatternMatcher)": 66.22370831171672, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 113.55375099182129, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 107.45649909973145, - "test_quick_core_backward__unsafe_masked_index_cpu_float64 (__main__.TestDecompCPU)": 573.1502685546875, - "test_quick_core_backward__unsafe_masked_index_cuda_float64 (__main__.TestDecompCUDA)": 1091.4237670898438, - "test_quick_core_backward__unsafe_masked_index_put_accumulate_cpu_float64 (__main__.TestDecompCPU)": 781.7357482910156, - "test_quick_core_backward__unsafe_masked_index_put_accumulate_cuda_float64 (__main__.TestDecompCUDA)": 1477.8807678222656, - "test_quick_core_backward_nn_functional_max_unpool3d_grad_cpu_float64 (__main__.TestDecompCPU)": 91.73400115966797, - "test_quick_core_backward_nn_functional_max_unpool3d_grad_cuda_float64 (__main__.TestDecompCUDA)": 274.52249908447266, - "test_quick_core_backward_roll_cpu_float64 (__main__.TestDecompCPU)": 142.28099822998047, - "test_quick_core_backward_roll_cuda_float64 (__main__.TestDecompCUDA)": 227.64300155639648, - "test_quick_core_backward_select_scatter_cpu_float64 (__main__.TestDecompCPU)": 78.95800018310547, - "test_quick_core_backward_select_scatter_cuda_float64 (__main__.TestDecompCUDA)": 139.07250213623047, - "test_quick_core_backward_split_cuda_float64 (__main__.TestDecompCUDA)": 70.76949882507324, - "test_quick_core_backward_split_with_sizes_copy_cpu_float64 (__main__.TestDecompCPU)": 100.25174903869629, - "test_quick_core_backward_split_with_sizes_copy_cuda_float64 (__main__.TestDecompCUDA)": 170.1675033569336, - "test_quick_core_backward_std_cpu_float64 (__main__.TestDecompCPU)": 79.20649909973145, - "test_quick_core_backward_std_cuda_float64 (__main__.TestDecompCUDA)": 147.0157470703125, - "test_register_spills_cuda (__main__.BenchmarkFusionGpuTest)": 85.56925010681152, - "test_run2run_determinism_model_name_BertForMaskedLM_training_or_inference_inference_precision_amp (__main__.DeterministicTest)": 62.117165883382164, - "test_run2run_determinism_model_name_BertForMaskedLM_training_or_inference_inference_precision_bfloat16 (__main__.DeterministicTest)": 80.71633275349934, - "test_run2run_determinism_model_name_BertForMaskedLM_training_or_inference_training_precision_amp (__main__.DeterministicTest)": 162.40999857584634, - "test_run2run_determinism_model_name_BertForMaskedLM_training_or_inference_training_precision_bfloat16 (__main__.DeterministicTest)": 112.27533340454102, - "test_run2run_determinism_model_name_BertForMaskedLM_training_or_inference_training_precision_float16 (__main__.DeterministicTest)": 147.1988321940104, - "test_run2run_determinism_model_name_BertForMaskedLM_training_or_inference_training_precision_float32 (__main__.DeterministicTest)": 104.0053342183431, - "test_run2run_determinism_model_name_DistillGPT2_training_or_inference_training_precision_amp (__main__.DeterministicTest)": 61.973000844319664, - "test_run2run_determinism_model_name_DistillGPT2_training_or_inference_training_precision_bfloat16 (__main__.DeterministicTest)": 61.754499435424805, - "test_run2run_determinism_model_name_DistillGPT2_training_or_inference_training_precision_float16 (__main__.DeterministicTest)": 60.08883412679037, - "test_run2run_determinism_model_name_DistillGPT2_training_or_inference_training_precision_float32 (__main__.DeterministicTest)": 86.1146666208903, - "test_run2run_determinism_model_name_GoogleFnet_training_or_inference_training_precision_amp (__main__.DeterministicTest)": 104.33766746520996, - "test_run2run_determinism_model_name_GoogleFnet_training_or_inference_training_precision_bfloat16 (__main__.DeterministicTest)": 114.78433227539062, - "test_run2run_determinism_model_name_GoogleFnet_training_or_inference_training_precision_float16 (__main__.DeterministicTest)": 86.49966684977214, - "test_run2run_determinism_model_name_GoogleFnet_training_or_inference_training_precision_float32 (__main__.DeterministicTest)": 71.44516626993816, - "test_runtime_checks_large_cpu (__main__.AOTInductorTestABICompatibleCpu)": 66.8162488937378, - "test_runtime_checks_large_cpu_with_stack_allocation (__main__.AOTInductorTestABICompatibleCpuWithStackAllocation)": 72.04562425613403, - "test_runtime_checks_large_cuda (__main__.AOTInductorTestABICompatibleGpu)": 184.2334976196289, - "test_scaled_gemm_offline_tunableop_cuda_float8_e4m3fnuz (__main__.TestLinalgCUDA)": 84.8563323020935, - "test_sdpa_kernel_ctx_manager2_dynamic_shapes (__main__.DynamicShapesCtxManagerTests)": 107.34962558746338, - "test_shuffler_iterdatapipe (__main__.IntegrationTestDataLoaderDataPipe)": 119.15850162506104, - "test_slow_tasks (__main__.TestFunctionalAutogradBenchmark)": 140.9133758544922, - "test_sort_dynamic_shape_with_check_cuda (__main__.TestInductorDynamicCUDA)": 106.76350021362305, - "test_sort_stable_cpu (__main__.CpuTritonTests)": 1319.0793050130208, - "test_sort_stable_cuda (__main__.GPUTests)": 96.01039962768554, - "test_split_cumsum_cpu (__main__.CpuTritonTests)": 90.8499984741211, - "test_svd_lowrank_cuda_complex128 (__main__.TestLinalgCUDA)": 304.4350051879883, - "test_tensor_split (__main__.TestVmapOperators)": 105.89132479213826, - "test_terminate_handler_on_crash (__main__.TestTorch)": 167.24449968338013, - "test_terminate_signal (__main__.ForkTest)": 199.22387313842773, - "test_terminate_signal (__main__.ParallelForkServerShouldWorkTest)": 199.12587642669678, - "test_terminate_signal (__main__.SpawnTest)": 200.71112155914307, - "test_train_parity_multi_group_unshard_async_op (__main__.TestFullyShard1DTrainingCore)": 65.3956667582194, - "test_triton_bsr_scatter_mm_blocksize_64_cuda_float32 (__main__.TestSparseCompressedTritonKernelsCUDA)": 88.69500064849854, - "test_triton_bsr_softmax_cuda_bfloat16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 212.44074630737305, - "test_triton_bsr_softmax_cuda_float16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 209.64949798583984, - "test_triton_bsr_softmax_cuda_float32 (__main__.TestSparseCompressedTritonKernelsCUDA)": 144.97124862670898, - "test_upsample_bicubic2d_cpu_halide (__main__.HalideCpuTests)": 97.45366668701172, - "test_variant_consistency_jit_nn_functional_max_pool2d_cpu_float32 (__main__.TestJitCPU)": 93.24074745178223, - "test_variant_consistency_jit_nn_functional_max_pool2d_cuda_float32 (__main__.TestJitCUDA)": 76.62825012207031, - "test_vec_compare_op_cpu_only (__main__.CPUReproTests)": 60.935458501180015, - "test_vmapjvpvjp_linalg_lstsq_grad_oriented_cpu_float32 (__main__.TestOperatorsCPU)": 96.73649978637695, - "test_vmapjvpvjp_linalg_lu_solve_cpu_float32 (__main__.TestOperatorsCPU)": 73.72424983978271, - "test_vmapjvpvjp_linalg_lu_solve_cuda_float32 (__main__.TestOperatorsCUDA)": 67.43249893188477, - "test_vmapjvpvjp_linalg_svd_cuda_float32 (__main__.TestOperatorsCUDA)": 62.6795015335083, - "test_vmapjvpvjp_max_pool2d_with_indices_backward_cpu_float32 (__main__.TestOperatorsCPU)": 75.2802505493164, - "test_vmapjvpvjp_max_pool2d_with_indices_backward_cuda_float32 (__main__.TestOperatorsCUDA)": 77.50925064086914, - "test_vmapjvpvjp_nn_functional_conv2d_cpu_float32 (__main__.TestOperatorsCPU)": 66.33838690480879, - "test_vmapjvpvjp_nn_functional_max_pool2d_cpu_float32 (__main__.TestOperatorsCPU)": 67.38049983978271, - "test_vmapjvpvjp_nn_functional_max_pool2d_cuda_float32 (__main__.TestOperatorsCUDA)": 75.26774978637695, - "test_vmapjvpvjp_svd_cpu_float32 (__main__.TestOperatorsCPU)": 61.21835474814138, - "test_vmapjvpvjp_unbind_cpu_float32 (__main__.TestOperatorsCPU)": 65.22375106811523, - "test_vmapjvpvjp_unbind_cuda_float32 (__main__.TestOperatorsCUDA)": 79.81699752807617, - "test_vmapvjpvjp_meshgrid_list_of_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 85.13375091552734, - "test_vmapvjpvjp_meshgrid_variadic_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 83.11999893188477, - "test_vmapvjpvjp_nn_functional_bilinear_cuda_float32 (__main__.TestOperatorsCUDA)": 111.10899925231934, - "test_warp_softmax_64bit_indexing_cuda_float16 (__main__.TestNNDeviceTypeCUDA)": 154.79667123158774, - "test_warp_softmax_64bit_indexing_cuda_float32 (__main__.TestNNDeviceTypeCUDA)": 137.61766529083252 + "EndToEndLSTM (__main__.RNNTest)": 156.4280014038086, + "MultiheadAttention (__main__.ModulesTest)": 142.05475234985352, + "test__adaptive_avg_pool2d (__main__.CPUReproTests)": 94.29028483799526, + "test_after_aot_cpu_runtime_error (__main__.MinifierIsolateTests)": 61.061214447021484, + "test_aot_autograd_disable_functionalization_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 64.10949897766113, + "test_aot_autograd_disable_functionalization_symbolic_exhaustive_linalg_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 63.37899875640869, + "test_aot_autograd_disable_functionalization_symbolic_exhaustive_nn_functional_max_pool1d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 133.07775115966797, + "test_aot_autograd_disable_functionalization_symbolic_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 182.88374710083008, + "test_aot_autograd_disable_functionalization_symbolic_exhaustive_nn_functional_max_pool3d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 107.19750022888184, + "test_aot_autograd_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 65.34900093078613, + "test_aot_autograd_symbolic_exhaustive_linalg_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 65.77025127410889, + "test_aot_autograd_symbolic_exhaustive_masked_norm_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 67.55673129741962, + "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool1d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 139.04249954223633, + "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 207.48150253295898, + "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool3d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 111.83749961853027, + "test_aot_autograd_symbolic_exhaustive_nn_functional_max_unpool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 61.02223124870887, + "test_aot_autograd_symbolic_exhaustive_nn_functional_unfold_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 60.61625099182129, + "test_aot_autograd_symbolic_exhaustive_ormqr_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 72.0044994354248, + "test_aot_autograd_symbolic_exhaustive_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 78.97699928283691, + "test_aot_autograd_symbolic_module_exhaustive_nn_TransformerDecoderLayer_cpu_float32 (__main__.TestEagerFusionModuleInfoCPU)": 120.6155014038086, + "test_associative_scan_partial_grad_combine_mode_generic_compile_mode_compile_dynamic_shape_reverse_False_cpu (__main__.AssociativeScanTests)": 90.98520050048828, + "test_associative_scan_partial_grad_combine_mode_generic_compile_mode_compile_dynamic_shape_reverse_True_cpu (__main__.AssociativeScanTests)": 107.81600036621094, + "test_avg_pool3d_backward2_cpu (__main__.CpuTests)": 832.6403930664062, + "test_avg_pool3d_backward2_cuda (__main__.GPUTests)": 209.78656339645386, + "test_avg_pool3d_backward2_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 511.25464303152904, + "test_avg_pool3d_backward2_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 502.13043212890625, + "test_avg_pool3d_backward2_dynamic_shapes_cuda (__main__.DynamicShapesCodegenGPUTests)": 69.21749954223633, + "test_avg_pool3d_backward2_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 141.99519958496094, + "test_backward_nn_functional_multi_head_attention_forward_cpu_float32 (__main__.TestCompositeComplianceCPU)": 74.15374851226807, + "test_backward_nn_functional_multi_head_attention_forward_cuda_float32 (__main__.TestCompositeComplianceCUDA)": 68.91049995422364, + "test_basic_cpu (__main__.EfficientConvBNEvalCpuTests)": 217.71485846383231, + "test_basic_cuda (__main__.EfficientConvBNEvalGpuTests)": 151.827001953125, + "test_cat_2k_args (__main__.TestTEFuserDynamic)": 60.52952872465054, + "test_checkpointing_without_reentrant_input_requires_grad_False (__main__.TestAutogradWithCompiledAutograd)": 313.9864283970424, + "test_checkpointing_without_reentrant_input_requires_grad_True (__main__.TestAutogradWithCompiledAutograd)": 403.9378574916295, + "test_collect_callgrind (__main__.TestBenchmarkUtils)": 295.59543064662387, + "test_comprehensive_diff_cuda_complex128 (__main__.TestDecompCUDA)": 101.16829986572266, + "test_comprehensive_diff_cuda_complex64 (__main__.TestDecompCUDA)": 99.17959976196289, + "test_comprehensive_diff_cuda_float32 (__main__.TestDecompCUDA)": 67.93409957885743, + "test_comprehensive_diff_cuda_float64 (__main__.TestDecompCUDA)": 68.0083999633789, + "test_comprehensive_grid_sampler_2d_cpu_bfloat16 (__main__.TestDecompCPU)": 121.40875053405762, + "test_comprehensive_grid_sampler_2d_cpu_float16 (__main__.TestDecompCPU)": 107.16825103759766, + "test_comprehensive_grid_sampler_2d_cpu_float32 (__main__.TestDecompCPU)": 476.1532516479492, + "test_comprehensive_grid_sampler_2d_cpu_float64 (__main__.TestDecompCPU)": 436.6274948120117, + "test_comprehensive_grid_sampler_2d_cuda_bfloat16 (__main__.TestDecompCUDA)": 271.2396987915039, + "test_comprehensive_grid_sampler_2d_cuda_float16 (__main__.TestDecompCUDA)": 268.1447006225586, + "test_comprehensive_grid_sampler_2d_cuda_float32 (__main__.TestDecompCUDA)": 1175.651300048828, + "test_comprehensive_grid_sampler_2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 70.56930084228516, + "test_comprehensive_grid_sampler_2d_cuda_float64 (__main__.TestDecompCUDA)": 1088.7627990722656, + "test_comprehensive_grid_sampler_2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 74.085400390625, + "test_comprehensive_linalg_lu_solve_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 73.21680068969727, + "test_comprehensive_linalg_lu_solve_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 69.35940017700196, + "test_comprehensive_linalg_solve_triangular_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 74.40799865722656, + "test_comprehensive_linalg_solve_triangular_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 73.20870056152344, + "test_comprehensive_linalg_svd_cuda_complex128 (__main__.TestDecompCUDA)": 65.00710029602051, + "test_comprehensive_linalg_svd_cuda_complex64 (__main__.TestDecompCUDA)": 64.47350044250489, + "test_comprehensive_linalg_vector_norm_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 91.43851551865086, + "test_comprehensive_linalg_vector_norm_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 94.38921136567087, + "test_comprehensive_masked_norm_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 112.68219909667968, + "test_comprehensive_masked_norm_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 107.13470001220703, + "test_comprehensive_masked_norm_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 106.79499893188476, + "test_comprehensive_nn_functional_conv_transpose3d_cuda_complex128 (__main__.TestDecompCUDA)": 61.82360000610352, + "test_comprehensive_nn_functional_conv_transpose3d_cuda_complex64 (__main__.TestDecompCUDA)": 61.832200622558595, + "test_comprehensive_nn_functional_gaussian_nll_loss_cpu_float32 (__main__.TestDecompCPU)": 69.2859992980957, + "test_comprehensive_nn_functional_gaussian_nll_loss_cpu_float64 (__main__.TestDecompCPU)": 61.617000579833984, + "test_comprehensive_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestDecompCUDA)": 109.80070037841797, + "test_comprehensive_nn_functional_gaussian_nll_loss_cuda_float64 (__main__.TestDecompCUDA)": 105.05930023193359, + "test_comprehensive_nn_functional_grid_sample_cpu_float32 (__main__.TestDecompCPU)": 110.41274833679199, + "test_comprehensive_nn_functional_grid_sample_cpu_float64 (__main__.TestDecompCPU)": 117.82600212097168, + "test_comprehensive_nn_functional_grid_sample_cuda_float32 (__main__.TestDecompCUDA)": 267.2045959472656, + "test_comprehensive_nn_functional_grid_sample_cuda_float64 (__main__.TestDecompCUDA)": 226.3138000488281, + "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float32 (__main__.TestDecompCUDA)": 78.2098014831543, + "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 83.08559951782226, + "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float64 (__main__.TestDecompCUDA)": 79.31420059204102, + "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 84.31810150146484, + "test_comprehensive_nn_functional_interpolate_trilinear_cuda_float32 (__main__.TestDecompCUDA)": 124.66989974975586, + "test_comprehensive_nn_functional_interpolate_trilinear_cuda_float64 (__main__.TestDecompCUDA)": 122.46029968261719, + "test_comprehensive_nn_functional_max_pool2d_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 1227.5965942382813, + "test_comprehensive_nn_functional_max_pool2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 1224.6120971679688, + "test_comprehensive_nn_functional_max_pool2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 1215.3135009765624, + "test_comprehensive_nn_functional_max_pool3d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 574.5224914550781, + "test_comprehensive_nn_functional_max_pool3d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 576.6116943359375, + "test_comprehensive_nn_functional_max_unpool2d_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 66.11190147399903, + "test_comprehensive_nn_functional_max_unpool2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 64.53090057373046, + "test_comprehensive_nn_functional_max_unpool2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 64.63370094299316, + "test_comprehensive_ormqr_cpu_complex64 (__main__.TestDecompCPU)": 63.500749588012695, + "test_comprehensive_ormqr_cuda_complex128 (__main__.TestDecompCUDA)": 118.29800109863281, + "test_comprehensive_ormqr_cuda_complex64 (__main__.TestDecompCUDA)": 115.92799911499023, + "test_comprehensive_ormqr_cuda_float32 (__main__.TestDecompCUDA)": 71.202099609375, + "test_comprehensive_ormqr_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 76.42730102539062, + "test_comprehensive_ormqr_cuda_float64 (__main__.TestDecompCUDA)": 74.30220031738281, + "test_comprehensive_svd_cuda_complex128 (__main__.TestDecompCUDA)": 73.72639999389648, + "test_comprehensive_svd_cuda_complex64 (__main__.TestDecompCUDA)": 71.33249969482422, + "test_comprehensive_svd_lowrank_cuda_complex128 (__main__.TestDecompCUDA)": 64.70350074768066, + "test_comprehensive_svd_lowrank_cuda_complex64 (__main__.TestDecompCUDA)": 63.244300079345706, + "test_constructor_autograd_SparseBSC_cuda (__main__.TestSparseAnyCUDA)": 97.68460006713867, + "test_constructor_autograd_SparseBSR_cuda (__main__.TestSparseAnyCUDA)": 90.53349990844727, + "test_conv1d_basic (__main__.TestXNNPACKConv1dTransformPass)": 112.51621355329242, + "test_conv1d_with_relu_fc (__main__.TestXNNPACKConv1dTransformPass)": 242.19535500662667, + "test_conv2d_binary_broadcast_shapes_cpu (__main__.TestPatternMatcherGenericCPU)": 68.08974933624268, + "test_conv3d_binary_broadcast_shapes_cpu (__main__.TestPatternMatcherGenericCPU)": 135.26825141906738, + "test_conv3d_binary_dynamic_shapes_cpu (__main__.TestDynamicPatternMatcherGenericCPU)": 67.82638888888889, + "test_conv3d_cuda (__main__.AOTInductorTestABICompatibleGpu)": 120.85684497539813, + "test_conv3d_unary_dynamic_shapes_cpu (__main__.TestDynamicPatternMatcherGenericCPU)": 60.322389390733505, + "test_conv_bn_fuse_cpu (__main__.CpuTests)": 67.01239929199218, + "test_conv_transpose_with_output_size_and_no_batch_dim_ConvTranspose3d_cuda (__main__.TestConvolutionNNDeviceTypeCUDA)": 145.28714145337497, + "test_conv_unary_fusion_nnc (__main__.TestMkldnnFusion)": 81.58721351623535, + "test_correctness_AdamW_use_closure_True_cuda_float32 (__main__.CompiledOptimizerParityTestsCUDA)": 85.3761001586914, + "test_correctness_Adam_use_closure_True_cuda_float32 (__main__.CompiledOptimizerParityTestsCUDA)": 82.11160125732422, + "test_count_nonzero_all (__main__.TestBool)": 647.7996433803013, + "test_cp_flex_attention_document_mask (__main__.CPFlexAttentionTest)": 71.75066630045573, + "test_cross_entropy_large_tensor_reduction_mean_cuda (__main__.TestNNDeviceTypeCUDA)": 112.6964959303538, + "test_dispatch_symbolic_meta_outplace_all_strides_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestMetaCUDA)": 86.26990051269532, + "test_dtensor_op_db_nn_functional_poisson_nll_loss_cpu_float32 (__main__.TestLocalDTensorOpsCPU)": 115.99650001525879, + "test_eager_sequence_nr_dynamic_shapes (__main__.DynamicShapesAotAutogradFallbackTests)": 229.80603500892377, + "test_eig_check_magma_cuda_float32 (__main__.TestLinalgCUDA)": 129.7280044555664, + "test_fail_arithmetic_ops.py (__main__.TestTyping)": 68.54400089808873, + "test_fail_random.py (__main__.TestTyping)": 178.38840666271392, + "test_fd_sharing (__main__.TestMultiprocessing)": 61.969195964289646, + "test_fn_fwgrad_bwgrad_cumprod_cuda_complex128 (__main__.TestFwdGradientsCUDA)": 98.24910049438476, + "test_fn_gradgrad_cumprod_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 82.74409904479981, + "test_fs_sharing (__main__.TestMultiprocessing)": 64.23455582724677, + "test_fuse_large_params_cpu (__main__.CpuTests)": 101.42200215657552, + "test_fuse_large_params_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 161.51942879813058, + "test_fuse_large_params_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 162.31028856549943, + "test_fuse_large_params_dynamic_shapes_cuda (__main__.DynamicShapesCodegenGPUTests)": 149.26629943847655, + "test_fuse_large_params_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 118.3827995300293, + "test_grad_nn_Transformer_cpu_float64 (__main__.TestModuleCPU)": 68.17949033250996, + "test_grad_nn_Transformer_cuda_float64 (__main__.TestModuleCUDA)": 84.50290069580078, + "test_gradgrad_nn_LSTM_eval_mode_cuda_float64 (__main__.TestModuleCUDA)": 65.09507634089543, + "test_gradgrad_nn_LSTM_train_mode_cuda_float64 (__main__.TestModuleCUDA)": 65.47933355967204, + "test_gradgrad_nn_TransformerDecoderLayer_cuda_float64 (__main__.TestModuleCUDA)": 183.63109741210937, + "test_gradgrad_nn_TransformerEncoder_eval_mode_cuda_float64 (__main__.TestModuleCUDA)": 114.0967010498047, + "test_gradgrad_nn_TransformerEncoder_train_mode_cuda_float64 (__main__.TestModuleCUDA)": 133.20650100708008, + "test_gradgrad_nn_Transformer_cuda_float64 (__main__.TestModuleCUDA)": 513.7934997558593, + "test_index (__main__.DistTensorOpsTest)": 63.37289934158325, + "test_indirect_device_assert (__main__.TritonCodeGenTests)": 308.16075134277344, + "test_inductor_dynamic_shapes_broadcasting_dynamic_shapes (__main__.DynamicShapesReproTests)": 123.50510314415241, + "test_inductor_no_recursionerror_on_for_loops_dynamic_shapes (__main__.DynamicShapesReproTests)": 70.94635799952916, + "test_inplace_gradgrad_cumprod_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 84.69389953613282, + "test_inputs_overlapping_with_mutation_stress_dynamic_shapes (__main__.DynamicShapesAotAutogradFallbackTests)": 141.22007206508093, + "test_jit_cuda_archflags (__main__.TestCppExtensionJIT)": 78.74900007247925, + "test_linalg_solve_triangular_large_cuda_complex128 (__main__.TestLinalgCUDA)": 120.89850234985352, + "test_linalg_solve_triangular_large_cuda_complex64 (__main__.TestLinalgCUDA)": 91.9739990234375, + "test_linear_binary_cpp_wrapper (__main__.TestCppWrapper)": 120.82975006103516, + "test_linear_binary_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 119.11174774169922, + "test_longformer_chunk_dynamic_shapes (__main__.DynamicShapesReproTests)": 104.80221448625836, + "test_low_memory_max_pool_dilation_1_dim_3_cpu_halide (__main__.HalideCpuTests)": 585.4649919782366, + "test_low_memory_max_pool_dilation_2_dim_3_cpu_halide (__main__.HalideCpuTests)": 497.8724278041295, + "test_lstm_cpu (__main__.TestMkldnnCPU)": 93.07950210571289, + "test_many_overlapping_inputs_does_not_explode_guards_dynamic_shapes (__main__.DynamicShapesReproTests)": 124.84971400669643, + "test_max_pool2d_with_indices_backward4_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 61.881356375558035, + "test_max_pool2d_with_indices_backward4_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 63.254143306187224, + "test_nll_loss_large_tensor_reduction_mean_cuda (__main__.TestNNDeviceTypeCUDA)": 329.2579941749573, + "test_pattern_matcher_multi_user_cpu (__main__.CpuTritonTests)": 141.80814688546317, + "test_proper_exit (__main__.TestDataLoader)": 218.38585935320174, + "test_proper_exit (__main__.TestDataLoaderPersistentWorkers)": 227.89378683907645, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False (__main__.TestPatternMatcher)": 102.8917465209961, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 105.06525039672852, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 101.56850051879883, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True (__main__.TestPatternMatcher)": 78.47400093078613, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 108.95525169372559, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 104.72825050354004, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False (__main__.TestPatternMatcher)": 78.20949745178223, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 96.36775207519531, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 99.7805004119873, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True (__main__.TestPatternMatcher)": 80.42650032043457, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 102.69049835205078, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 102.31074905395508, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False (__main__.TestPatternMatcher)": 109.89425086975098, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 100.21624946594238, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 102.30649948120117, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True (__main__.TestPatternMatcher)": 77.84124946594238, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 103.11899757385254, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 103.89025115966797, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False (__main__.TestPatternMatcher)": 71.87825012207031, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 97.63850021362305, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 101.92625045776367, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True (__main__.TestPatternMatcher)": 77.63574981689453, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 109.77174949645996, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 103.81300163269043, + "test_quick_core_backward__unsafe_masked_index_cpu_float64 (__main__.TestDecompCPU)": 536.6219940185547, + "test_quick_core_backward__unsafe_masked_index_cuda_float64 (__main__.TestDecompCUDA)": 1043.1840942382812, + "test_quick_core_backward__unsafe_masked_index_put_accumulate_cpu_float64 (__main__.TestDecompCPU)": 794.3912506103516, + "test_quick_core_backward__unsafe_masked_index_put_accumulate_cuda_float64 (__main__.TestDecompCUDA)": 1367.2698120117188, + "test_quick_core_backward_nn_functional_max_unpool3d_grad_cpu_float64 (__main__.TestDecompCPU)": 77.04675102233887, + "test_quick_core_backward_nn_functional_max_unpool3d_grad_cuda_float64 (__main__.TestDecompCUDA)": 280.63609924316404, + "test_quick_core_backward_roll_cpu_float64 (__main__.TestDecompCPU)": 113.44674873352051, + "test_quick_core_backward_roll_cuda_float64 (__main__.TestDecompCUDA)": 229.65490112304687, + "test_quick_core_backward_select_scatter_cpu_float64 (__main__.TestDecompCPU)": 75.17299842834473, + "test_quick_core_backward_select_scatter_cuda_float64 (__main__.TestDecompCUDA)": 138.64920043945312, + "test_quick_core_backward_split_cuda_float64 (__main__.TestDecompCUDA)": 68.40219993591309, + "test_quick_core_backward_split_with_sizes_copy_cpu_float64 (__main__.TestDecompCPU)": 86.44400024414062, + "test_quick_core_backward_split_with_sizes_copy_cuda_float64 (__main__.TestDecompCUDA)": 167.45450286865236, + "test_quick_core_backward_std_cpu_float64 (__main__.TestDecompCPU)": 74.3385009765625, + "test_quick_core_backward_std_cuda_float64 (__main__.TestDecompCUDA)": 136.91199951171876, + "test_register_spills_cuda (__main__.BenchmarkFusionGpuTest)": 113.36530075073242, + "test_replicatepad_64bit_indexing_cuda_float16 (__main__.TestNNDeviceTypeCUDA)": 70.2165002822876, + "test_run2run_determinism_model_name_BertForMaskedLM_training_or_inference_inference_precision_amp (__main__.DeterministicTest)": 77.06460037231446, + "test_run2run_determinism_model_name_BertForMaskedLM_training_or_inference_training_precision_amp (__main__.DeterministicTest)": 149.1957015991211, + "test_run2run_determinism_model_name_BertForMaskedLM_training_or_inference_training_precision_bfloat16 (__main__.DeterministicTest)": 150.3646011352539, + "test_run2run_determinism_model_name_BertForMaskedLM_training_or_inference_training_precision_float16 (__main__.DeterministicTest)": 162.8407989501953, + "test_run2run_determinism_model_name_BertForMaskedLM_training_or_inference_training_precision_float32 (__main__.DeterministicTest)": 140.64679794311525, + "test_run2run_determinism_model_name_DistillGPT2_training_or_inference_training_precision_amp (__main__.DeterministicTest)": 80.052099609375, + "test_run2run_determinism_model_name_DistillGPT2_training_or_inference_training_precision_bfloat16 (__main__.DeterministicTest)": 80.33979949951171, + "test_run2run_determinism_model_name_DistillGPT2_training_or_inference_training_precision_float16 (__main__.DeterministicTest)": 80.46890029907226, + "test_run2run_determinism_model_name_DistillGPT2_training_or_inference_training_precision_float32 (__main__.DeterministicTest)": 77.31679916381836, + "test_run2run_determinism_model_name_GoogleFnet_training_or_inference_training_precision_amp (__main__.DeterministicTest)": 94.19669952392579, + "test_run2run_determinism_model_name_GoogleFnet_training_or_inference_training_precision_bfloat16 (__main__.DeterministicTest)": 93.968798828125, + "test_run2run_determinism_model_name_GoogleFnet_training_or_inference_training_precision_float16 (__main__.DeterministicTest)": 95.43030014038087, + "test_run2run_determinism_model_name_GoogleFnet_training_or_inference_training_precision_float32 (__main__.DeterministicTest)": 94.34460067749023, + "test_runtime_checks_large_cpu (__main__.AOTInductorTestABICompatibleCpu)": 69.63399925231934, + "test_runtime_checks_large_cpu_with_stack_allocation (__main__.AOTInductorTestABICompatibleCpuWithStackAllocation)": 66.46521404811314, + "test_runtime_checks_large_cuda (__main__.AOTInductorTestABICompatibleGpu)": 189.52279815673828, + "test_save_load_large_string_attribute (__main__.TestSaveLoad)": 95.83349895477295, + "test_sdpa_kernel_ctx_manager2_dynamic_shapes (__main__.DynamicShapesCtxManagerTests)": 107.00114277430943, + "test_shuffler_iterdatapipe (__main__.IntegrationTestDataLoaderDataPipe)": 117.35257175990513, + "test_slow_tasks (__main__.TestFunctionalAutogradBenchmark)": 120.11764253888812, + "test_sort_bool_cpu (__main__.CpuTritonTests)": 350.2287118094308, + "test_sort_dynamic_shape_with_check_cuda (__main__.TestInductorDynamicCUDA)": 371.5840934753418, + "test_sort_stable_cuda (__main__.GPUTests)": 87.17187404632568, + "test_sort_transpose_cpu (__main__.CpuTritonTests)": 376.56386021205356, + "test_svd_lowrank_cuda_complex128 (__main__.TestLinalgCUDA)": 296.1006278991699, + "test_terminate_handler_on_crash (__main__.TestTorch)": 176.9331459743636, + "test_terminate_signal (__main__.ForkTest)": 197.2466388238328, + "test_terminate_signal (__main__.ParallelForkServerShouldWorkTest)": 197.36571486986108, + "test_terminate_signal (__main__.SpawnTest)": 201.76207041740417, + "test_train_parity_multi_group (__main__.TestFullyShard1DTrainingCore)": 163.89737558364868, + "test_triton_bsr_scatter_mm_blocksize_64_cuda_float32 (__main__.TestSparseCompressedTritonKernelsCUDA)": 65.74759979248047, + "test_triton_bsr_softmax_cuda_bfloat16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 404.46739349365237, + "test_triton_bsr_softmax_cuda_float16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 321.34139862060545, + "test_triton_bsr_softmax_cuda_float32 (__main__.TestSparseCompressedTritonKernelsCUDA)": 240.05119934082032, + "test_variant_consistency_jit_nn_functional_max_pool2d_cpu_float32 (__main__.TestJitCPU)": 92.86999893188477, + "test_variant_consistency_jit_nn_functional_max_pool2d_cuda_float32 (__main__.TestJitCUDA)": 77.62930107116699, + "test_vec_compare_op_cpu_only (__main__.CPUReproTests)": 61.40728569030762, + "test_view_ops (__main__.TestViewOpsWithLocalTensor)": 85.29030755849985, + "test_vmapjvpvjp_linalg_lstsq_grad_oriented_cpu_float32 (__main__.TestOperatorsCPU)": 83.75, + "test_vmapjvpvjp_linalg_lstsq_grad_oriented_cuda_float32 (__main__.TestOperatorsCUDA)": 222.90328543526786, + "test_vmapjvpvjp_linalg_lu_solve_cuda_float32 (__main__.TestOperatorsCUDA)": 70.33380088806152, + "test_vmapjvpvjp_max_pool2d_with_indices_backward_cpu_float32 (__main__.TestOperatorsCPU)": 63.39599895477295, + "test_vmapjvpvjp_max_pool2d_with_indices_backward_cuda_float32 (__main__.TestOperatorsCUDA)": 75.14130058288575, + "test_vmapjvpvjp_nn_functional_max_pool2d_cpu_float32 (__main__.TestOperatorsCPU)": 64.2445011138916, + "test_vmapjvpvjp_nn_functional_max_pool2d_cuda_float32 (__main__.TestOperatorsCUDA)": 78.53580017089844, + "test_vmapjvpvjp_unbind_cpu_float32 (__main__.TestOperatorsCPU)": 62.246999740600586, + "test_vmapjvpvjp_unbind_cuda_float32 (__main__.TestOperatorsCUDA)": 84.20339965820312, + "test_vmapvjpvjp_linalg_lstsq_cuda_float32 (__main__.TestOperatorsCUDA)": 110.56262493133545, + "test_vmapvjpvjp_meshgrid_list_of_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 79.86129951477051, + "test_vmapvjpvjp_meshgrid_variadic_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 78.55820007324219, + "test_vmapvjpvjp_nn_functional_bilinear_cuda_float32 (__main__.TestOperatorsCUDA)": 123.33470077514649 } \ No newline at end of file