From 2bdd6bbb127e972f3b8faf3229fdac925fb13e8d Mon Sep 17 00:00:00 2001 From: Ivan Zaitsev Date: Mon, 15 Dec 2025 22:41:59 +0000 Subject: [PATCH] Instrument trunk and pull wfs (linux) with job and test filtering (#168201) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Summary Adds job and test filtering to trunk and pull workflows (Linux jobs) to enable granular autorevert restarts. ### Rationale Autorevert needs to restart specific failing jobs/tests without re-running the entire workflow. This enables targeted bisection and faster CI TTS. ### Approach Added workflow_dispatch inputs to trunk/pull workflows: - `jobs-to-include`: Space-separated job display names to run - `tests-to-include`: Space-separated test modules to run A new `job-filter.yml` reusable workflow computes a space-padded filter string (job-name) for contains() matching. Each job's if: condition checks: `if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-py3.10-gcc11 ') }}` > [!IMPORTANT] > the new convention is to check against JOB DISPLAY NAME (not id!) as id is not available in clickhouse (for autorevert) ### Test Runner Instrumentation `run_test.py` reads `TESTS_TO_INCLUDE` env var, parses test modules (strips .py suffix and ::method), and overrides `--include` (if include is also present the result is intersection of both). Target determination is disabled when filter is active. ### Filter Formats Jobs: Space-separated display names (must match workflow **name**: field exactly) `linux-jammy-py3.10-gcc11 linux-docs` Tests: Space-separated test paths (module format, consistent with existing ROCm test filtering) distributed/elastic/multiprocessing/api_test test_torch ### Testing see [this dispatch](https://github.com/pytorch/pytorch/actions/runs/19880436041): * ran on the latest branch `instrument-trunk-pull-linux-with-job-test-filters` with filters: * jobs: linux-jammy-py3.10-gcc11 * tests: distributed/elastic/multiprocessing/api_test results: - Only 1 job reported test results: linux-jammy-py3.10-gcc11 / test (distributed, 1, 2, lf.linux.2xlarge) - 26 tests executed - all from distributed/elastic/multiprocessing/api_test.py
tests run ``` ⏺ clickhouse - run_clickhouse_query (MCP)(query: "SELECT \n file,\n classname,\n name\nFROM tests.all_test_runs \nWHERE workflow_id = 19880436041\nORDER BY classname, name", inline_result_limit_bytes: 8192) ⎿  { "time": 1.049098014831543, "result_rows": [ … +139 lines (ctrl+o to expand) ⏺ All 26 Tests Run in distributed/elastic/multiprocessing/api_test.py | Class | Test Method | |--------------------------------|--------------------------------------------------------------------------------------| | RunProcResultsTest | test_get_failures | | | test_is_failed | | StartProcessesAsBinaryTest | test_binary_exit | | | test_binary_incorrect_entrypoint | | | test_binary_raises | | | test_subprocess_context_close | | | test_validate_full_rank | | StartProcessesAsFuncTest | test_args_env_len_mismatch | | | test_function_large_ret_val | | | test_function_raise | | | test_function_with_tensor | | | test_invalid_log_dir | | | test_multiprocess_context_close | | | test_multiprocessing_context_poll_raises_exception ← (the one you originally wanted) | | | test_pcontext_wait | | | test_pcontext_wait_on_a_child_thread | | | test_to_map | | | test_void_function | | | test_wait_for_all_child_procs_to_exit | | StartProcessesListAsBinaryTest | test_binary | | | test_binary_duplicate_log_filters | | | test_binary_redirect_and_tee | | StartProcessesListAsFuncTest | test_function | | StdTest | test_from_str_bad_input | | | test_from_value | | | test_from_value_map | ```
---- latest test: https://github.com/pytorch/pytorch/actions/runs/20044971553 Pull Request resolved: https://github.com/pytorch/pytorch/pull/168201 Approved by: https://github.com/jeanschmidt, https://github.com/huydhn --- .github/workflows/_linux-test.yml | 7 + .github/workflows/job-filter.yml | 50 +++++++ .github/workflows/pull.yml | 125 +++++++++++++++--- .github/workflows/trunk.yml | 114 +++++++++++++--- test/run_test.py | 14 ++ .../adapters/workflow_consistency_linter.py | 16 +++ 6 files changed, 290 insertions(+), 36 deletions(-) create mode 100644 .github/workflows/job-filter.yml diff --git a/.github/workflows/_linux-test.yml b/.github/workflows/_linux-test.yml index 745e1c046ab..bc7d05e0110 100644 --- a/.github/workflows/_linux-test.yml +++ b/.github/workflows/_linux-test.yml @@ -28,6 +28,11 @@ on: default: 240 description: | Set the maximum (in minutes) how long the workflow should take to finish + tests-to-include: + required: false + type: string + default: "" + description: Space-separated tests to include (empty string implies default list) use-gha: required: false type: string @@ -327,6 +332,7 @@ jobs: XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }} PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }} + TESTS_TO_INCLUDE: ${{ inputs.tests-to-include }} DASHBOARD_TAG: ${{ inputs.dashboard-tag }} VLLM_TEST_HUGGING_FACE_TOKEN: ${{ secrets.VLLM_TEST_HUGGING_FACE_TOKEN }} HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} @@ -405,6 +411,7 @@ jobs: -e XLA_CLANG_CACHE_S3_BUCKET_NAME \ -e PYTORCH_TEST_CUDA_MEM_LEAK_CHECK \ -e PYTORCH_TEST_RERUN_DISABLED_TESTS \ + -e TESTS_TO_INCLUDE \ -e SKIP_SCCACHE_INITIALIZATION=1 \ -e HUGGING_FACE_HUB_TOKEN \ -e VLLM_TEST_HUGGING_FACE_TOKEN \ diff --git a/.github/workflows/job-filter.yml b/.github/workflows/job-filter.yml new file mode 100644 index 00000000000..29b5afd485b --- /dev/null +++ b/.github/workflows/job-filter.yml @@ -0,0 +1,50 @@ +name: job-filter + +# Job Filter Rules (for trunk.yml, pull.yml) +# ========================================== +# 1. DEFAULT PATTERN - use job's display name (from `name:` field) with space padding: +# if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' DISPLAY-NAME ') }} +# needs: [job-filter, ...] +# +# 2. SHARED BUILD DEPENDENCY - when multiple jobs depend on the same build, +# the build job must include all dependent display names: +# # build job (used by both linux-jammy-py3.10-gcc11 test AND linux-docs) +# if: ${{ needs.job-filter.outputs.jobs == '' || contains(..., ' linux-jammy-py3.10-gcc11 ') || contains(..., ' linux-docs ') }} +# +# 3. PRE-EXISTING CONDITION - when a job has a condition that existed before filtering +# (e.g., only run on certain events/tags), use OR logic to allow filter override: +# if: ${{ PRE_EXISTING_CONDITION || (needs.job-filter.outputs.jobs != '' && contains(needs.job-filter.outputs.jobs, ' DISPLAY-NAME ')) }} +# Examples of pre-existing conditions: +# - startsWith(github.event.ref, 'refs/tags/ciflow/trunk') +# - github.event_name == 'pull_request' +# +# 4. SECURITY CONDITIONS - conditions like `github.repository_owner == 'pytorch'` +# must ALWAYS be enforced, keep using AND: +# if: ${{ github.repository_owner == 'pytorch' && (needs.job-filter.outputs.jobs == '' || contains(...)) }} + +on: + workflow_call: + inputs: + jobs-to-include: + required: false + type: string + default: "" + outputs: + jobs: + description: "Space-padded job filter string" + value: ${{ jobs.compute.outputs.jobs }} + +jobs: + compute: + runs-on: ubuntu-latest + outputs: + jobs: ${{ steps.set.outputs.jobs }} + steps: + - id: set + run: | + jobs="${{ inputs.jobs-to-include }}" + if [ -n "$jobs" ]; then + echo "jobs= ${jobs} " >> "$GITHUB_OUTPUT" + else + echo "jobs=" >> "$GITHUB_OUTPUT" + fi diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index be98711f485..79be6f268de 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -12,6 +12,17 @@ on: tags: - ciflow/pull/* workflow_dispatch: + inputs: + jobs-to-include: + description: "Space-separated list of job display names to run (empty = all)" + required: false + default: "" + type: string + tests-to-include: + description: "Space-separated tests to include (passed to test runner; empty = default)" + required: false + default: "" + type: string schedule: - cron: 29 8 * * * # about 1:29am PDT @@ -24,6 +35,14 @@ permissions: contents: read jobs: + # See job-filter.yml for rules on adding job filter conditions + job-filter: + if: github.repository_owner == 'pytorch' + name: job-filter + uses: ./.github/workflows/job-filter.yml + with: + jobs-to-include: ${{ github.event.inputs.jobs-to-include || '' }} + llm-td: if: github.repository_owner == 'pytorch' name: before-test @@ -50,9 +69,12 @@ jobs: curr_branch: ${{ github.head_ref || github.ref_name }} linux-jammy-py3_10-gcc11-build: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-py3.10-gcc11 ') || contains(needs.job-filter.outputs.jobs, ' linux-docs ') }} name: linux-jammy-py3.10-gcc11 uses: ./.github/workflows/_linux-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build-environment: linux-jammy-py3.10-gcc11 @@ -76,30 +98,39 @@ jobs: secrets: inherit linux-jammy-py3_10-gcc11-test: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-py3.10-gcc11 ') }} name: linux-jammy-py3.10-gcc11 uses: ./.github/workflows/_linux-test.yml needs: - linux-jammy-py3_10-gcc11-build - target-determination + - job-filter with: build-environment: ${{ needs.linux-jammy-py3_10-gcc11-build.outputs.build-environment }} docker-image: ${{ needs.linux-jammy-py3_10-gcc11-build.outputs.docker-image }} test-matrix: ${{ needs.linux-jammy-py3_10-gcc11-build.outputs.test-matrix }} + tests-to-include: ${{ github.event.inputs.tests-to-include || '' }} secrets: inherit linux-docs: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-docs ') }} name: linux-docs uses: ./.github/workflows/_docs.yml - needs: linux-jammy-py3_10-gcc11-build + needs: + - linux-jammy-py3_10-gcc11-build + - job-filter with: build-environment: ${{ needs.linux-jammy-py3_10-gcc11-build.outputs.build-environment }} docker-image: ${{ needs.linux-jammy-py3_10-gcc11-build.outputs.docker-image }} secrets: inherit linux-jammy-py3_10-gcc11-no-ops: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-py3.10-gcc11-no-ops ') }} name: linux-jammy-py3.10-gcc11-no-ops uses: ./.github/workflows/_linux-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build-environment: linux-jammy-py3.10-gcc11-no-ops @@ -111,9 +142,12 @@ jobs: secrets: inherit linux-jammy-py3_10-gcc11-pch: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-py3.10-gcc11-pch ') }} name: linux-jammy-py3.10-gcc11-pch uses: ./.github/workflows/_linux-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build-environment: linux-jammy-py3.10-gcc11-pch @@ -125,9 +159,12 @@ jobs: secrets: inherit linux-jammy-py3_10-clang18-asan-build: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-py3.10-clang18-asan ') }} name: linux-jammy-py3.10-clang18-asan uses: ./.github/workflows/_linux-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: runner: linux.2xlarge.memory runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -148,22 +185,28 @@ jobs: secrets: inherit linux-jammy-py3_10-clang18-asan-test: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-py3.10-clang18-asan ') }} name: linux-jammy-py3.10-clang18-asan uses: ./.github/workflows/_linux-test.yml needs: - linux-jammy-py3_10-clang18-asan-build - target-determination + - job-filter with: build-environment: ${{ needs.linux-jammy-py3_10-clang18-asan-build.outputs.build-environment }} docker-image: ${{ needs.linux-jammy-py3_10-clang18-asan-build.outputs.docker-image }} test-matrix: ${{ needs.linux-jammy-py3_10-clang18-asan-build.outputs.test-matrix }} sync-tag: asan-test + tests-to-include: ${{ github.event.inputs.tests-to-include || '' }} secrets: inherit linux-jammy-py3_10-clang12-onnx-build: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-py3.10-clang12-onnx ') }} name: linux-jammy-py3.10-clang12-onnx uses: ./.github/workflows/_linux-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build-environment: linux-jammy-py3.10-clang12-onnx @@ -176,21 +219,27 @@ jobs: secrets: inherit linux-jammy-py3_10-clang12-onnx-test: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-py3.10-clang12-onnx ') }} name: linux-jammy-py3.10-clang12-onnx uses: ./.github/workflows/_linux-test.yml needs: - linux-jammy-py3_10-clang12-onnx-build - target-determination + - job-filter with: build-environment: ${{ needs.linux-jammy-py3_10-clang12-onnx-build.outputs.build-environment }} docker-image: ${{ needs.linux-jammy-py3_10-clang12-onnx-build.outputs.docker-image }} test-matrix: ${{ needs.linux-jammy-py3_10-clang12-onnx-build.outputs.test-matrix }} + tests-to-include: ${{ github.event.inputs.tests-to-include || '' }} secrets: inherit linux-jammy-py3_10-clang12-build: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-py3.10-clang12 ') }} name: linux-jammy-py3.10-clang12 uses: ./.github/workflows/_linux-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build-environment: linux-jammy-py3.10-clang12 @@ -213,21 +262,27 @@ jobs: secrets: inherit linux-jammy-py3_10-clang12-test: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-py3.10-clang12 ') }} name: linux-jammy-py3.10-clang12 uses: ./.github/workflows/_linux-test.yml needs: - linux-jammy-py3_10-clang12-build - target-determination + - job-filter with: build-environment: ${{ needs.linux-jammy-py3_10-clang12-build.outputs.build-environment }} docker-image: ${{ needs.linux-jammy-py3_10-clang12-build.outputs.docker-image }} test-matrix: ${{ needs.linux-jammy-py3_10-clang12-build.outputs.test-matrix }} + tests-to-include: ${{ github.event.inputs.tests-to-include || '' }} secrets: inherit linux-jammy-py3_14-clang12-build: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-py3.14-clang12 ') }} name: linux-jammy-py3.14-clang12 uses: ./.github/workflows/_linux-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build-environment: linux-jammy-py3.14-clang12 @@ -250,19 +305,26 @@ jobs: secrets: inherit linux-jammy-py3_14-clang12-test: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-py3.14-clang12 ') }} name: linux-jammy-py3.14-clang12 uses: ./.github/workflows/_linux-test.yml - needs: linux-jammy-py3_14-clang12-build + needs: + - linux-jammy-py3_14-clang12-build + - job-filter with: build-environment: ${{ needs.linux-jammy-py3_14-clang12-build.outputs.build-environment }} docker-image: ${{ needs.linux-jammy-py3_14-clang12-build.outputs.docker-image }} test-matrix: ${{ needs.linux-jammy-py3_14-clang12-build.outputs.test-matrix }} + tests-to-include: ${{ github.event.inputs.tests-to-include || '' }} secrets: inherit linux-jammy-cuda12_8-cudnn9-py3_10-clang12-build: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-cuda12.8-cudnn9-py3.10-clang12 ') }} name: linux-jammy-cuda12.8-cudnn9-py3.10-clang12 uses: ./.github/workflows/_linux-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build-environment: linux-jammy-cuda12.8-cudnn9-py3.10-clang12 @@ -274,9 +336,12 @@ jobs: secrets: inherit linux-jammy-cpu-py3_10-gcc11-bazel-test: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-cpu-py3.10-gcc11-bazel-test ') }} name: linux-jammy-cpu-py3.10-gcc11-bazel-test uses: ./.github/workflows/_bazel-build-test.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: runner: "${{ needs.get-label-type.outputs.label-type }}linux.large" build-environment: linux-jammy-cuda12.8-py3.10-gcc11-bazel-test @@ -289,9 +354,12 @@ jobs: secrets: inherit linux-jammy-py3_10-gcc11-mobile-lightweight-dispatch-build: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-py3.10-gcc11-mobile-lightweight-dispatch-build ') }} name: linux-jammy-py3.10-gcc11-mobile-lightweight-dispatch-build uses: ./.github/workflows/_linux-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build-environment: linux-jammy-py3.10-gcc11-mobile-lightweight-dispatch-build @@ -304,11 +372,13 @@ jobs: secrets: inherit linux-jammy-rocm-py3_10-build: + if: github.event_name == 'pull_request' || (needs.job-filter.outputs.jobs != '' && contains(needs.job-filter.outputs.jobs, ' linux-jammy-rocm-py3.10 ')) # don't run build twice on main - if: github.event_name == 'pull_request' name: linux-jammy-rocm-py3.10 uses: ./.github/workflows/_linux-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build-environment: linux-jammy-rocm-py3.10 @@ -323,9 +393,12 @@ jobs: secrets: inherit linux-jammy-cuda12_8-py3_10-gcc11-inductor-build: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' cuda12.8-py3.10-gcc11-sm75 ') }} name: cuda12.8-py3.10-gcc11-sm75 uses: ./.github/workflows/_linux-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm75 @@ -338,19 +411,26 @@ jobs: secrets: inherit linux-jammy-cuda12_8-py3_10-gcc11-inductor-test: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' cuda12.8-py3.10-gcc11-sm75 ') }} name: cuda12.8-py3.10-gcc11-sm75 uses: ./.github/workflows/_linux-test.yml - needs: linux-jammy-cuda12_8-py3_10-gcc11-inductor-build + needs: + - linux-jammy-cuda12_8-py3_10-gcc11-inductor-build + - job-filter with: build-environment: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-inductor-build.outputs.build-environment }} docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-inductor-build.outputs.docker-image }} test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-inductor-build.outputs.test-matrix }} + tests-to-include: ${{ github.event.inputs.tests-to-include || '' }} secrets: inherit linux-jammy-cuda13_0-py3_10-gcc11-inductor-build: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' cuda13.0-py3.10-gcc11-sm75 ') }} name: cuda13.0-py3.10-gcc11-sm75 uses: ./.github/workflows/_linux-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build-environment: linux-jammy-cuda13.0-py3.10-gcc11-sm75 @@ -363,19 +443,26 @@ jobs: secrets: inherit linux-jammy-cuda13_0-py3_10-gcc11-inductor-test: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' cuda13.0-py3.10-gcc11-sm75 ') }} name: cuda13.0-py3.10-gcc11-sm75 uses: ./.github/workflows/_linux-test.yml - needs: linux-jammy-cuda13_0-py3_10-gcc11-inductor-build + needs: + - linux-jammy-cuda13_0-py3_10-gcc11-inductor-build + - job-filter with: build-environment: linux-jammy-cuda13.0-py3.10-gcc11-sm75 docker-image: ${{ needs.linux-jammy-cuda13_0-py3_10-gcc11-inductor-build.outputs.docker-image }} test-matrix: ${{ needs.linux-jammy-cuda13_0-py3_10-gcc11-inductor-build.outputs.test-matrix }} + tests-to-include: ${{ github.event.inputs.tests-to-include || '' }} secrets: inherit linux-jammy-xpu-n-py3_10-build: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-xpu-n-py3.10 ') }} name: linux-jammy-xpu-n-py3.10 uses: ./.github/workflows/_linux-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: # This should sync with the build in xpu.yml but xpu uses a larger runner # sync-tag: linux-xpu-n-build diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 26d370f5702..e9c7bfb91f9 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -9,6 +9,17 @@ on: tags: - ciflow/trunk/* workflow_dispatch: + inputs: + jobs-to-include: + description: "Space-separated list of job display names to run (empty = all)" + required: false + default: "" + type: string + tests-to-include: + description: "Space-separated tests to include (passed to test runner; empty = default)" + required: false + default: "" + type: string schedule: - cron: 29 8 * * * # about 1:29am PDT @@ -21,6 +32,14 @@ permissions: contents: read jobs: + # See job-filter.yml for rules on adding job filter conditions + job-filter: + if: github.repository_owner == 'pytorch' + name: job-filter + uses: ./.github/workflows/job-filter.yml + with: + jobs-to-include: ${{ github.event.inputs.jobs-to-include || '' }} + llm-td: if: github.repository_owner == 'pytorch' name: before-test @@ -48,9 +67,12 @@ jobs: curr_ref_type: ${{ github.ref_type }} libtorch-linux-jammy-cuda12_8-py3_10-gcc11-debug-build: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' libtorch-linux-jammy-cuda12.8-py3.10-gcc11-debug ') }} name: libtorch-linux-jammy-cuda12.8-py3.10-gcc11-debug uses: ./.github/workflows/_linux-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: build-environment: libtorch-linux-jammy-cuda12.8-py3.10-gcc11 docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11 @@ -64,9 +86,12 @@ jobs: secrets: inherit linux-jammy-cuda12_8-py3_10-gcc11-build: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-cuda12.8-py3.10-gcc11 ') }} name: linux-jammy-cuda12.8-py3.10-gcc11 uses: ./.github/workflows/_linux-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build-environment: linux-jammy-cuda12.8-py3.10-gcc11 @@ -88,22 +113,28 @@ jobs: secrets: inherit linux-jammy-cuda12_8-py3_10-gcc11-test: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-cuda12.8-py3.10-gcc11 ') }} name: linux-jammy-cuda12.8-py3.10-gcc11 uses: ./.github/workflows/_linux-test.yml needs: - linux-jammy-cuda12_8-py3_10-gcc11-build - target-determination + - job-filter with: timeout-minutes: 360 build-environment: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-build.outputs.build-environment }} docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-build.outputs.docker-image }} test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-build.outputs.test-matrix }} + tests-to-include: ${{ github.event.inputs.tests-to-include || '' }} secrets: inherit linux-jammy-cuda13_0-py3_10-gcc11-build: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-cuda13.0-py3.10-gcc11 ') }} name: linux-jammy-cuda13.0-py3.10-gcc11 uses: ./.github/workflows/_linux-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build-environment: linux-jammy-cuda13.0-py3.10-gcc11 @@ -124,23 +155,29 @@ jobs: secrets: inherit linux-jammy-cuda13_0-py3_10-gcc11-test: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-cuda13.0-py3.10-gcc11 ') }} name: linux-jammy-cuda13.0-py3.10-gcc11 uses: ./.github/workflows/_linux-test.yml needs: - linux-jammy-cuda13_0-py3_10-gcc11-build - target-determination + - job-filter with: timeout-minutes: 360 build-environment: linux-jammy-cuda13.0-py3.10-gcc11 docker-image: ${{ needs.linux-jammy-cuda13_0-py3_10-gcc11-build.outputs.docker-image }} test-matrix: ${{ needs.linux-jammy-cuda13_0-py3_10-gcc11-build.outputs.test-matrix }} + tests-to-include: ${{ github.event.inputs.tests-to-include || '' }} secrets: inherit # no-ops builds test USE_PER_OPERATOR_HEADERS=0 where ATen/ops is not generated linux-jammy-cuda12_8-py3_10-gcc11-no-ops-build: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-cuda12.8-py3.10-gcc11-no-ops ') }} name: linux-jammy-cuda12.8-py3.10-gcc11-no-ops uses: ./.github/workflows/_linux-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build-environment: linux-jammy-cuda12.8-py3.10-gcc11-no-ops @@ -152,9 +189,12 @@ jobs: secrets: inherit linux-jammy-cuda13_0-py3_10-gcc11-no-ops-build: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-cuda13.0-py3.10-gcc11-no-ops ') }} name: linux-jammy-cuda13.0-py3.10-gcc11-no-ops uses: ./.github/workflows/_linux-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build-environment: linux-jammy-cuda13.0-py3.10-gcc11-no-ops @@ -167,9 +207,10 @@ jobs: secrets: inherit macos-py3-arm64-build: - if: github.repository_owner == 'pytorch' + if: ${{ github.repository_owner == 'pytorch' && (needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' macos-py3-arm64 ')) }} name: macos-py3-arm64 uses: ./.github/workflows/_mac-build.yml + needs: job-filter with: sync-tag: macos-py3-arm64-build build-environment: macos-py3-arm64 @@ -189,11 +230,13 @@ jobs: secrets: inherit macos-py3-arm64-test: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' macos-py3-arm64 ') }} name: macos-py3-arm64 uses: ./.github/workflows/_mac-test.yml needs: - macos-py3-arm64-build - target-determination + - job-filter with: build-environment: ${{ needs.macos-py3-arm64-build.outputs.build-environment }} # Same as the build job @@ -203,9 +246,12 @@ jobs: secrets: inherit win-vs2022-cpu-py3-build: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' win-vs2022-cpu-py3 ') }} name: win-vs2022-cpu-py3 uses: ./.github/workflows/_win-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: build-environment: win-vs2022-cpu-py3 cuda-version: cpu @@ -221,11 +267,13 @@ jobs: secrets: inherit win-vs2022-cpu-py3-test: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' win-vs2022-cpu-py3 ') }} name: win-vs2022-cpu-py3 uses: ./.github/workflows/_win-test.yml needs: - win-vs2022-cpu-py3-build - target-determination + - job-filter with: build-environment: ${{ needs.win-vs2022-cpu-py3-build.outputs.build-environment }} cuda-version: cpu @@ -234,9 +282,12 @@ jobs: secrets: inherit win-vs2022-cuda12_8-py3-build: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' win-vs2022-cuda12.8-py3 ') }} name: win-vs2022-cuda12.8-py3 uses: ./.github/workflows/_win-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: build-environment: win-vs2022-cuda12.8-py3 cuda-version: "12.8" @@ -244,10 +295,12 @@ jobs: secrets: inherit linux-jammy-rocm-py3_10-build: - if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/trunk') }} + if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/trunk') || (needs.job-filter.outputs.jobs != '' && contains(needs.job-filter.outputs.jobs, ' linux-jammy-rocm-py3.10 ')) }} name: linux-jammy-rocm-py3.10 uses: ./.github/workflows/_linux-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build-environment: linux-jammy-rocm-py3.10 @@ -268,7 +321,7 @@ jobs: secrets: inherit linux-jammy-rocm-py3_10-test: - if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/trunk') }} + if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/trunk') || (needs.job-filter.outputs.jobs != '' && contains(needs.job-filter.outputs.jobs, ' linux-jammy-rocm-py3.10 ')) }} permissions: id-token: write contents: read @@ -277,16 +330,21 @@ jobs: needs: - linux-jammy-rocm-py3_10-build - target-determination + - job-filter with: build-environment: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.build-environment }} docker-image: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.docker-image }} test-matrix: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.test-matrix }} + tests-to-include: ${{ github.event.inputs.tests-to-include || '' }} secrets: inherit inductor-build: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' inductor-build ') }} name: inductor-build uses: ./.github/workflows/_linux-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: build-environment: linux-jammy-cuda12.8-py3.12-gcc11-sm80 docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11-inductor-benchmarks @@ -294,9 +352,12 @@ jobs: secrets: inherit inductor-build-cuda13: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' inductor-build-cuda13 ') }} name: inductor-build-cuda13 uses: ./.github/workflows/_linux-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: build-environment: linux-jammy-cuda13.0-py3.12-gcc11-sm80 docker-image-name: ci-image:pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc11-inductor-benchmarks @@ -305,12 +366,14 @@ jobs: # Test cross-compiled models with Windows libs extracted from wheel cross-compile-linux-test: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' cross-compile-linux-test ') }} name: cross-compile-linux-test uses: ./.github/workflows/_linux-test.yml needs: - linux-jammy-cuda12_8-py3_10-gcc11-build - get-label-type - win-vs2022-cuda12_8-py3-build + - job-filter with: build-environment: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-build.outputs.build-environment }} docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-build.outputs.docker-image }} @@ -318,12 +381,16 @@ jobs: { include: [ { config: "aoti_cross_compile_for_windows", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu", win_torch_wheel_artifact: "win-vs2022-cuda12.8-py3" }, ]} + tests-to-include: ${{ github.event.inputs.tests-to-include || '' }} secrets: inherit verify-cachebench-cpu-build: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' verify-cachebench-cpu-build ') || contains(needs.job-filter.outputs.jobs, ' verify-cachebench-cpu-test ') }} name: verify-cachebench-cpu-build uses: ./.github/workflows/_linux-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build-environment: linux-jammy-py3.10-gcc11 @@ -335,21 +402,27 @@ jobs: secrets: inherit verify-cachebench-cpu-test: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' verify-cachebench-cpu-test ') }} name: verify-cachebench-cpu-test uses: ./.github/workflows/_linux-test.yml needs: - verify-cachebench-cpu-build - target-determination + - job-filter with: build-environment: ${{ needs.verify-cachebench-cpu-build.outputs.build-environment }} docker-image: ${{ needs.verify-cachebench-cpu-build.outputs.docker-image }} test-matrix: ${{ needs.verify-cachebench-cpu-build.outputs.test-matrix }} + tests-to-include: ${{ github.event.inputs.tests-to-include || '' }} secrets: inherit linux-jammy-py3-clang12-executorch-build: +# if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-py3-clang12-executorch ') }} name: linux-jammy-py3-clang12-executorch uses: ./.github/workflows/_linux-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter if: false # Has been broken for a while with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -362,19 +435,26 @@ jobs: secrets: inherit linux-jammy-py3-clang12-executorch-test: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-py3-clang12-executorch ') }} name: linux-jammy-py3-clang12-executorch uses: ./.github/workflows/_linux-test.yml - needs: linux-jammy-py3-clang12-executorch-build + needs: + - linux-jammy-py3-clang12-executorch-build + - job-filter with: build-environment: ${{ needs.linux-jammy-py3-clang12-executorch-build.outputs.build-environment }} docker-image: ${{ needs.linux-jammy-py3-clang12-executorch-build.outputs.docker-image }} test-matrix: ${{ needs.linux-jammy-py3-clang12-executorch-build.outputs.test-matrix }} + tests-to-include: ${{ github.event.inputs.tests-to-include || '' }} secrets: inherit linux-jammy-py3_10-gcc11-full-debug-build-only: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-py3.10-gcc11-full-debug-build-only ') }} name: linux-jammy-py3.10-gcc11-full-debug-build-only uses: ./.github/workflows/_linux-build.yml - needs: get-label-type + needs: + - get-label-type + - job-filter with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" runner: linux.2xlarge.memory diff --git a/test/run_test.py b/test/run_test.py index ac36d5db27e..e024f3ae1a5 100755 --- a/test/run_test.py +++ b/test/run_test.py @@ -2069,6 +2069,20 @@ def main(): check_pip_packages() options = parse_args() + tests_to_include_env = os.environ.get("TESTS_TO_INCLUDE", "").strip() + if tests_to_include_env: + # Parse env var tests to module names (strips .py suffix and ::method) + env_tests = {parse_test_module(t) for t in tests_to_include_env.split()} + + if options.include != TESTS: + # --include was explicitly provided, intersect with env var + cli_tests = {parse_test_module(t) for t in options.include} + options.include = list(env_tests & cli_tests) + else: + # No explicit --include, use env var tests + options.include = list(env_tests) + + options.enable_td = False # Include sharding info in all metrics which_shard, num_shards = get_sharding_opts(options) diff --git a/tools/linter/adapters/workflow_consistency_linter.py b/tools/linter/adapters/workflow_consistency_linter.py index 64a2ce15dc2..d1aec25a813 100644 --- a/tools/linter/adapters/workflow_consistency_linter.py +++ b/tools/linter/adapters/workflow_consistency_linter.py @@ -107,6 +107,22 @@ def get_jobs_with_sync_tag( # same is true for ['with']['test-matrix'] if "test-matrix" in job.get("with", {}): del job["with"]["test-matrix"] + # and ['with']['tests-to-include'], since dispatch filters differ + if "tests-to-include" in job.get("with", {}): + del job["with"]["tests-to-include"] + + # normalize needs: remove helper job-filter so comparisons ignore it + needs = job.get("needs") + if needs: + needs_list = [needs] if isinstance(needs, str) else list(needs) + needs_list = [n for n in needs_list if n != "job-filter"] + if not needs_list: + job.pop("needs", None) + elif len(needs_list) == 1: + job["needs"] = needs_list[0] + else: + job["needs"] = needs_list + return (sync_tag, job_id, job)