From 0ec946a0522748332f42675a4d690ff32d773d42 Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Wed, 24 Sep 2025 23:02:08 +0000 Subject: [PATCH] [ROCm] Increase binary build timeout to 5 hours (300 minutes) (#163776) Despite narrowing down the [FBGEMM_GENAI build to gfx942](https://github.com/pytorch/pytorch/pull/162648), the nightly builds still timed out because they [didn't get enough time to finish the post-PyTorch-build steps](https://github.com/pytorch/pytorch/actions/runs/17969771026/job/51109432897). This PR increases timeout for ROCm builds for both [libtorch ](https://github.com/pytorch/pytorch/actions/runs/17969771026)and [manywheel](https://github.com/pytorch/pytorch/actions/runs/17969771041), because both of those are close to the 4hr mark currently. This PR is a more ROCm-targeted version of https://github.com/pytorch/pytorch/pull/162880 (which is for release/2.9 branch). Pull Request resolved: https://github.com/pytorch/pytorch/pull/163776 Approved by: https://github.com/jeffdaily Co-authored-by: Jeff Daily --- .../templates/linux_binary_build_workflow.yml.j2 | 3 +++ .../generated-linux-binary-libtorch-nightly.yml | 2 ++ .../generated-linux-binary-manywheel-nightly.yml | 14 ++++++++++++++ .../generated-linux-binary-manywheel-rocm-main.yml | 1 + 4 files changed, 20 insertions(+) diff --git a/.github/templates/linux_binary_build_workflow.yml.j2 b/.github/templates/linux_binary_build_workflow.yml.j2 index a0f8befddf3..32e931e42f5 100644 --- a/.github/templates/linux_binary_build_workflow.yml.j2 +++ b/.github/templates/linux_binary_build_workflow.yml.j2 @@ -77,6 +77,9 @@ jobs: runs_on: linux.s390x ALPINE_IMAGE: "docker.io/s390x/alpine" timeout-minutes: 420 + {%- elif config["gpu_arch_type"] == "rocm" %} + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + timeout-minutes: 300 {%- elif "conda" in build_environment and config["gpu_arch_type"] == "cuda" %} runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" runs_on: linux.24xlarge.ephemeral diff --git a/.github/workflows/generated-linux-binary-libtorch-nightly.yml b/.github/workflows/generated-linux-binary-libtorch-nightly.yml index 03835a9f5f3..e63527e39ca 100644 --- a/.github/workflows/generated-linux-binary-libtorch-nightly.yml +++ b/.github/workflows/generated-linux-binary-libtorch-nightly.yml @@ -333,6 +333,7 @@ jobs: LIBTORCH_CONFIG: release LIBTORCH_VARIANT: shared-with-deps runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + timeout-minutes: 300 build_name: libtorch-rocm6_3-shared-with-deps-release build_environment: linux-binary-libtorch secrets: @@ -447,6 +448,7 @@ jobs: LIBTORCH_CONFIG: release LIBTORCH_VARIANT: shared-with-deps runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + timeout-minutes: 300 build_name: libtorch-rocm6_4-shared-with-deps-release build_environment: linux-binary-libtorch secrets: diff --git a/.github/workflows/generated-linux-binary-manywheel-nightly.yml b/.github/workflows/generated-linux-binary-manywheel-nightly.yml index 0f87f97df69..0b097abdc10 100644 --- a/.github/workflows/generated-linux-binary-manywheel-nightly.yml +++ b/.github/workflows/generated-linux-binary-manywheel-nightly.yml @@ -323,6 +323,7 @@ jobs: DOCKER_IMAGE_TAG_PREFIX: rocm6.3 DESIRED_PYTHON: "3.10" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + timeout-minutes: 300 build_name: manywheel-py3_10-rocm6_3 build_environment: linux-binary-manywheel secrets: @@ -434,6 +435,7 @@ jobs: DOCKER_IMAGE_TAG_PREFIX: rocm6.4 DESIRED_PYTHON: "3.10" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + timeout-minutes: 300 build_name: manywheel-py3_10-rocm6_4 build_environment: linux-binary-manywheel secrets: @@ -915,6 +917,7 @@ jobs: DOCKER_IMAGE_TAG_PREFIX: rocm6.3 DESIRED_PYTHON: "3.11" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + timeout-minutes: 300 build_name: manywheel-py3_11-rocm6_3 build_environment: linux-binary-manywheel secrets: @@ -1026,6 +1029,7 @@ jobs: DOCKER_IMAGE_TAG_PREFIX: rocm6.4 DESIRED_PYTHON: "3.11" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + timeout-minutes: 300 build_name: manywheel-py3_11-rocm6_4 build_environment: linux-binary-manywheel secrets: @@ -1507,6 +1511,7 @@ jobs: DOCKER_IMAGE_TAG_PREFIX: rocm6.3 DESIRED_PYTHON: "3.12" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + timeout-minutes: 300 build_name: manywheel-py3_12-rocm6_3 build_environment: linux-binary-manywheel secrets: @@ -1618,6 +1623,7 @@ jobs: DOCKER_IMAGE_TAG_PREFIX: rocm6.4 DESIRED_PYTHON: "3.12" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + timeout-minutes: 300 build_name: manywheel-py3_12-rocm6_4 build_environment: linux-binary-manywheel secrets: @@ -2099,6 +2105,7 @@ jobs: DOCKER_IMAGE_TAG_PREFIX: rocm6.3 DESIRED_PYTHON: "3.13" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + timeout-minutes: 300 build_name: manywheel-py3_13-rocm6_3 build_environment: linux-binary-manywheel secrets: @@ -2210,6 +2217,7 @@ jobs: DOCKER_IMAGE_TAG_PREFIX: rocm6.4 DESIRED_PYTHON: "3.13" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + timeout-minutes: 300 build_name: manywheel-py3_13-rocm6_4 build_environment: linux-binary-manywheel secrets: @@ -2691,6 +2699,7 @@ jobs: DOCKER_IMAGE_TAG_PREFIX: rocm6.3 DESIRED_PYTHON: "3.13t" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + timeout-minutes: 300 build_name: manywheel-py3_13t-rocm6_3 build_environment: linux-binary-manywheel secrets: @@ -2802,6 +2811,7 @@ jobs: DOCKER_IMAGE_TAG_PREFIX: rocm6.4 DESIRED_PYTHON: "3.13t" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + timeout-minutes: 300 build_name: manywheel-py3_13t-rocm6_4 build_environment: linux-binary-manywheel secrets: @@ -3283,6 +3293,7 @@ jobs: DOCKER_IMAGE_TAG_PREFIX: rocm6.3 DESIRED_PYTHON: "3.14" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + timeout-minutes: 300 build_name: manywheel-py3_14-rocm6_3 build_environment: linux-binary-manywheel secrets: @@ -3394,6 +3405,7 @@ jobs: DOCKER_IMAGE_TAG_PREFIX: rocm6.4 DESIRED_PYTHON: "3.14" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + timeout-minutes: 300 build_name: manywheel-py3_14-rocm6_4 build_environment: linux-binary-manywheel secrets: @@ -3875,6 +3887,7 @@ jobs: DOCKER_IMAGE_TAG_PREFIX: rocm6.3 DESIRED_PYTHON: "3.14t" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + timeout-minutes: 300 build_name: manywheel-py3_14t-rocm6_3 build_environment: linux-binary-manywheel secrets: @@ -3986,6 +3999,7 @@ jobs: DOCKER_IMAGE_TAG_PREFIX: rocm6.4 DESIRED_PYTHON: "3.14t" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + timeout-minutes: 300 build_name: manywheel-py3_14t-rocm6_4 build_environment: linux-binary-manywheel secrets: diff --git a/.github/workflows/generated-linux-binary-manywheel-rocm-main.yml b/.github/workflows/generated-linux-binary-manywheel-rocm-main.yml index 18706347026..00eb4ebbc49 100644 --- a/.github/workflows/generated-linux-binary-manywheel-rocm-main.yml +++ b/.github/workflows/generated-linux-binary-manywheel-rocm-main.yml @@ -60,6 +60,7 @@ jobs: DOCKER_IMAGE_TAG_PREFIX: rocm6.4 DESIRED_PYTHON: "3.10" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + timeout-minutes: 300 build_name: manywheel-py3_10-rocm6_4 build_environment: linux-binary-manywheel-rocm secrets: