diff --git a/.github/workflows/sycl-ur-perf-benchmarking.yml b/.github/workflows/sycl-ur-perf-benchmarking.yml index 4890c1c26a7d1..3732500c00b9d 100644 --- a/.github/workflows/sycl-ur-perf-benchmarking.yml +++ b/.github/workflows/sycl-ur-perf-benchmarking.yml @@ -227,30 +227,14 @@ jobs: toolchain_decompress_command: ${{ needs.build_nightly.outputs.toolchain_decompress_command }} # END nightly benchmarking path - # Benchmark framework builds and runs on PRs path: - build_pr: - name: '[PR] Build SYCL' - if: github.event_name == 'pull_request' - uses: ./.github/workflows/sycl-linux-build.yml - with: - build_ref: ${{ github.sha }} - build_cache_root: "/__w/" - build_cache_suffix: "default" - # Docker image has last nightly pre-installed and added to the PATH - build_image: "ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest" - cc: clang - cxx: clang++ - changes: '[]' - toolchain_artifact: sycl_linux_default - + # BEGIN benchmark framework builds and runs on PRs path # TODO: When we have stable BMG runner(s), consider moving this job to that runner. test_benchmark_framework: name: '[PR] Benchmark suite testing' - needs: [build_pr] permissions: contents: write packages: read - if: always() && !cancelled() && needs.build_pr.outputs.build_conclusion == 'success' + if: github.event_name == 'pull_request' uses: ./.github/workflows/sycl-linux-run-tests.yml with: name: 'Framework test: PVC_PERF, L0, Minimal preset' diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml index 7321dc5aa932c..a131d334afc45 100644 --- a/devops/actions/run-tests/benchmark/action.yml +++ b/devops/actions/run-tests/benchmark/action.yml @@ -113,10 +113,10 @@ runs: # modified output the entire sycl build dir as an artifact, in which the # intermediate files required can be stitched together from the build files. # However, this is not exactly "clean" or "fun to maintain"... - - name: Build Unified Runtime + - name: Build LLVM shell: bash run: | - # Build Unified Runtime + echo "::group::checkout_llvm" # Sparse-checkout UR at build ref: git clone --depth 1 --no-checkout https://github.com/intel/llvm ur cd ur @@ -124,6 +124,8 @@ runs: git sparse-checkout set unified-runtime git fetch origin ${{ inputs.build_ref }} git checkout FETCH_HEAD + echo "::endgroup::" + echo "::group::configure_llvm" # Configure UR mkdir build install @@ -135,32 +137,41 @@ runs: -DUR_BUILD_ADAPTER_L0=ON \ -DUR_BUILD_ADAPTER_L0_V2=ON - # Build and install UR + echo "::endgroup::" + echo "::group::build_and_install_llvm" + cmake --build build -j "$(nproc)" cmake --install build cd - + + echo "::endgroup::" # Install level zero v1.25.2 # This is to have the latest level zero required by Compute Benchmarks # Remove this w/a once the sycl nightly images are updated to have level zero v1.25.2 - name: Install level zero v1.25.2 shell: bash run: | - # Install level zero v1.25.2 + echo "::group::checkout_level_zero" # Checkout Level Zero at build ref: wget https://github.com/oneapi-src/level-zero/archive/refs/tags/v1.25.2.tar.gz -O level-zero-v1.25.2.tar.gz tar -xvf level-zero-v1.25.2.tar.gz cd level-zero-1.25.2 - # Configure Level Zero + echo "::endgroup::" + echo "::group::configure_level_zero" + cmake -DCMAKE_BUILD_TYPE=Release \ -Bbuild - # Build and install Level Zero + echo "::endgroup::" + echo "::group::build_and_install_level_zero" + cmake --build build -j "$(nproc)" sudo cmake --install build cd - + echo "::endgroup::" - name: Set env var for results branch shell: bash run: | @@ -181,10 +192,10 @@ runs: SAVE_PREFIX: ${{ inputs.save_name }} shell: bash run: | - # Build and run benchmarks # TODO generate summary + display helpful message here export CMPLR_ROOT=./toolchain - echo "-----" + echo "::group::install_python_deps" + echo "Installing python dependencies..." # Using --break-system-packages because: # - venv is not installed # - unable to install anything via pip, as python packages in the docker @@ -192,7 +203,8 @@ runs: # - apt is unable to install anything due to unresolved dpkg dependencies, # as a result of how the sycl nightly images are created pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt - echo "-----" + echo "::endgroup::" + echo "::group::sycl_ls" # By default, the benchmark scripts forceload level_zero FORCELOAD_ADAPTER="${ONEAPI_DEVICE_SELECTOR%%:*}" @@ -228,7 +240,8 @@ runs: export COMPUTE_RUNTIME_TAG_CACHE="$(cat ./devops/dependencies.json | jq -r .linux.compute_runtime.github_tag)" sycl-ls - echo "-----" + echo "::endgroup::" + echo "::group::run_benchmarks" WORKDIR="$(realpath ./llvm_test_workdir)" if [ -n "$WORKDIR" ] && [ -d "$WORKDIR" ] && [[ "$WORKDIR" == *llvm_test_workdir* ]]; then rm -rf "$WORKDIR" ; fi @@ -247,7 +260,8 @@ runs: ${{ inputs.exit_on_failure == 'true' && '--exit-on-failure --iterations 1' || '' }} # TODO: add back: "--flamegraph inclusive" once works properly - echo "-----" + echo "::endgroup::" + echo "::group::compare_results" python3 ./devops/scripts/benchmarks/compare.py to_hist \ --avg-type EWMA \ --cutoff "$(date -u -d '7 days ago' +'%Y%m%d_%H%M%S')" \ @@ -260,7 +274,9 @@ runs: --produce-github-summary \ ${{ inputs.dry_run == 'true' && '--dry-run' || '' }} \ - echo "-----" + echo "::endgroup::" + + LLVM_BENCHMARKS_UNIT_TESTING=1 COMPUTE_BENCHMARKS_BUILD_PATH=$WORKDIR/compute-benchmarks-build python3 ./devops/scripts/benchmarks/tests/test_integration.py - name: Cache changes and upload github summary if: always() diff --git a/devops/scripts/benchmarks/README.md b/devops/scripts/benchmarks/README.md index 5429f75788015..d312da59b127a 100644 --- a/devops/scripts/benchmarks/README.md +++ b/devops/scripts/benchmarks/README.md @@ -52,6 +52,21 @@ $ cmake --build ~/ur_build -j $(nproc) $ cmake --install ~/ur_build ``` +## Testing + +There is a test which can execute benchmarking code and do some checks +of internal data structures. In order to use it one should +- prepare environment on its own (Level Zero, OneAPI or somehow SYCL +`clang++` compiler) +- have CMPLR_ROOT set and pointing to directory with `clang++` +- have COMPUTE_BENCHMARKS_BUILD_PATH variable pointing to build directory of compute-benchmarks +- set LLVM_BENCHMARKS_UNIT_TESTING=1 + +Then tests can be executed by +``` +python3 ./devops/scripts/benchmarks/tests/test_integration.py +``` + ## Results By default, the benchmark results are not stored. diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index 96dfc61e56dd6..a4e09198c226f 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -270,6 +270,52 @@ def benchmarks(self) -> list[Benchmark]: ) ) + record_and_replay_params = product([0, 1], [0, 1]) + for emulate, instantiate in record_and_replay_params: + + def createRrBench(variant_name: str, **kwargs): + return RecordAndReplay( + self, + RUNTIMES.LEVEL_ZERO, + variant_name, + PROFILERS.TIMER, + mRec=1, + mInst=instantiate, + mDest=0, + emulate=emulate, + **kwargs, + ) + + benches += [ + createRrBench( + "large", + nForksInLvl=2, + nLvls=4, + nCmdSetsInLvl=10, + nInstantiations=10, + nAppendKern=10, + nAppendCopy=1, + ), + createRrBench( + "medium", + nForksInLvl=1, + nLvls=1, + nCmdSetsInLvl=10, + nInstantiations=10, + nAppendKern=10, + nAppendCopy=10, + ), + createRrBench( + "short", + nForksInLvl=1, + nLvls=4, + nCmdSetsInLvl=1, + nInstantiations=0, + nAppendKern=1, + nAppendCopy=0, + ), + ] + # Add UR-specific benchmarks benches += [ # TODO: multithread_benchmark_ur fails with segfault @@ -648,6 +694,49 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: ] +class RecordAndReplay(ComputeBenchmark): + def __init__( + self, bench, runtime: RUNTIMES, variant_name: str, profiler_type, **kwargs + ): + self.variant_name = variant_name + self.rr_params = kwargs + self.iterations_regular = 1000 + self.iterations_trace = 10 + super().__init__( + bench, + f"record_and_replay_benchmark_{runtime.value}", + "RecordGraph", + runtime, + profiler_type, + ) + + def explicit_group(self): + return f"{self.test} {self.variant_name}" + + def display_name(self) -> str: + return f"{self.explicit_group()}_{self.runtime.value}" + + def name(self): + ret = [] + for k, v in self.rr_params.items(): + if k[0] == "n": # numeric parameter + ret.append(f"{k[1:]} {v}") + elif k[0] == "m": + if v != 0: # measure parameter + ret.append(f"{k[1:]}") + else: # boolean parameter + if v != 0: + ret.append(k) + ret.sort() + return self.bench_name + " " + ", ".join(ret) + + def get_tags(self): + return ["L0"] + + def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: + return [f"--{k}={v}" for k, v in self.rr_params.items()] + + class QueueInOrderMemcpy(ComputeBenchmark): def __init__(self, bench, isCopyOnly, source, destination, size, profiler_type): self.isCopyOnly = isCopyOnly diff --git a/devops/scripts/benchmarks/git_project.py b/devops/scripts/benchmarks/git_project.py index 7d2ffd6706b44..1761b0049224b 100644 --- a/devops/scripts/benchmarks/git_project.py +++ b/devops/scripts/benchmarks/git_project.py @@ -3,6 +3,7 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +import os from pathlib import Path import shutil @@ -10,7 +11,6 @@ from utils.utils import run from options import options - class GitProject: def __init__( self, @@ -167,6 +167,11 @@ def _setup_repo(self) -> bool: Returns: bool: True if the repository was cloned or updated, False if it was already up-to-date. """ + if os.environ.get("LLVM_BENCHMARKS_UNIT_TESTING") == "1": + log.debug( + f"Skipping git operations during unit testing of {self._name} (LLVM_BENCHMARKS_UNIT_TESTING=1)." + ) + return False if not self.src_dir.exists(): self._git_clone() return True diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py index de372d7279cc2..a63eca053a598 100755 --- a/devops/scripts/benchmarks/main.py +++ b/devops/scripts/benchmarks/main.py @@ -137,10 +137,13 @@ def process_results( stddev_threshold_override if stddev_threshold_override is not None else options.stddev_threshold - ) * mean_value + ) + threshold_scaled = threshold * mean_value - if stddev > threshold: - log.warning(f"stddev {stddev} above the threshold {threshold} for {label}") + if stddev > threshold_scaled: + log.warning( + f"stddev {stddev} above the threshold {threshold_scaled} ({threshold} times {mean_value}) for {label}" + ) valid_results = False rlist.sort(key=lambda res: res.value) @@ -228,6 +231,10 @@ def main(directory, additional_env_vars, compare_names, filter): benchmark for benchmark in s.benchmarks() if benchmark.enabled() ] if filter: + # log.info(f"all benchmarks:\n" + "\n".join([b.name() for b in suite_benchmarks])) + log.debug( + f"Filtering {len(suite_benchmarks)} benchmarks in {s.name()} suite for {filter.pattern}" + ) suite_benchmarks = [ benchmark for benchmark in suite_benchmarks @@ -713,6 +720,7 @@ def validate_and_parse_env_args(env_args): options.dry_run = args.dry_run options.umf = args.umf options.iterations_stddev = args.iterations_stddev + options.stddev_threshold = args.stddev_threshold options.build_igc = args.build_igc options.current_run_name = args.relative_perf options.cudnn_directory = args.cudnn_directory diff --git a/devops/scripts/benchmarks/tests/test_integration.py b/devops/scripts/benchmarks/tests/test_integration.py new file mode 100644 index 0000000000000..f4be229404850 --- /dev/null +++ b/devops/scripts/benchmarks/tests/test_integration.py @@ -0,0 +1,188 @@ +# Copyright (C) 2025 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +import logging +import os +import shutil +import unittest +import tempfile +import subprocess +import json +from collections import namedtuple + +DataJson = namedtuple("DataJson", ["runs", "metadata", "tags", "names"]) +DataJsonRun = namedtuple("DataJsonRun", ["name", "results"]) +DataJsonResult = namedtuple( + "DataJsonResult", ["name", "label", "suite", "value", "unit"] +) +DataJsonMetatdata = namedtuple( + "DataJsonMetatdata", + [ + "type", + "unstable", + "tags", + "range_min", + "range_max", + "display_name", + "explicit_group", + ], +) + + +class App: + def __init__(self): + self.OUTPUT_DIR = None + self.RESULTS_DIR = None + self.WORKDIR_DIR = None + + def prepare_dirs(self): + self.OUTPUT_DIR = tempfile.mkdtemp() + self.RESULTS_DIR = tempfile.mkdtemp() + self.WORKDIR_DIR = tempfile.mkdtemp() + + # when UT does not want to build compute-benchmarks from scratch, it can provide prebuilt path + cb_targetpath = os.environ.get("COMPUTE_BENCHMARKS_BUILD_PATH") + if cb_targetpath and os.path.isdir(cb_targetpath): + cb_build_dir = os.path.join(self.WORKDIR_DIR, "compute-benchmarks-build") + os.symlink(cb_targetpath, cb_build_dir) + with open( + os.path.join(self.WORKDIR_DIR, "BENCH_WORKDIR_VERSION"), "w" + ) as f: + f.write("2.0") # TODO: take from main.INTERNAL_WORKDIR_VERSION + + def remove_dirs(self): + for d in [self.RESULTS_DIR, self.OUTPUT_DIR, self.WORKDIR_DIR]: + if d is not None: + shutil.rmtree(d, ignore_errors=True) + + def run_main(self, *args): + + # TODO: not yet tested: "--detect-version", "sycl,compute_runtime" + + procesResult = subprocess.run( + [ + "./devops/scripts/benchmarks/main.py", + self.WORKDIR_DIR, + "--sycl", + os.environ.get("CMPLR_ROOT"), + "--save", + "testfile", + "--output-html", + "remote", + "--results-dir", + self.RESULTS_DIR, + "--output-dir", + self.OUTPUT_DIR, + "--preset", + "Minimal", + "--timestamp-override", + "20240102_030405", + "--stddev-threshold", + "999999999.9", + "--exit-on-failure", + *args, + ], + capture_output=True, + ) + print("MAIN_PY_STDOUT:\n" + procesResult.stdout.decode()) + print("MAIN_PY_STDERR:\n" + procesResult.stderr.decode()) + return procesResult.returncode + + def get_output(self): + with open(os.path.join(self.OUTPUT_DIR, "data.json")) as f: + out = json.load(f) + return DataJson( + runs=[ + DataJsonRun( + name=run["name"], + results=[ + DataJsonResult( + name=r["name"], + label=r["label"], + suite=r["suite"], + value=r["value"], + unit=r["unit"], + ) + for r in run["results"] + ], + ) + for run in out["benchmarkRuns"] + ], + metadata=dict( + [ + ( + k, + DataJsonMetatdata( + type=v["type"], + unstable=v.get("unstable", False), + tags=v.get("tags", []), + range_min=v.get("range_min"), + range_max=v.get("range_max"), + display_name=v.get("display_name"), + explicit_group=v.get("explicit_group"), + ), + ) + for k, v in out["benchmarkMetadata"].items() + ] + ), + tags=out["benchmarkTags"], + names=out["defaultCompareNames"], + ) + + +# add "--verbose" for debug logs + + +class TestE2E(unittest.TestCase): + def setUp(self): + # Load test data + print(f"::group::{self._testMethodName}") + self.app = App() + self.app.remove_dirs() + self.app.prepare_dirs() + + def tearDown(self): + self.app.remove_dirs() + print(f"::endgroup::") + + def _checkGroup( + self, expectedGroupName: str, benchMetadata: DataJsonMetatdata, out: DataJson + ): + self.assertEqual(benchMetadata.type, "benchmark") + benchmarkGroupName = benchMetadata.explicit_group + self.assertEqual(benchmarkGroupName, expectedGroupName) + groupMetadata = out.metadata[benchmarkGroupName] + self.assertEqual(groupMetadata.type, "group") + + def _checkResultsExist(self, caseName: str, out: DataJson): + self.assertIn(caseName, [r.name for r in out.runs[0].results]) + + def _checkCase(self, caseName: str, groupName: str, tags: set[str]): + run_result = self.app.run_main("--filter", caseName + "$") + self.assertEqual(run_result, 0, "Subprocess did not exit cleanly") + + out = self.app.get_output() + self._checkResultsExist(caseName, out) + + metadata = out.metadata[caseName] + self.assertEqual(set(metadata.tags), tags) + self._checkGroup(groupName, metadata, out) + + def test_record_and_replay(self): + self._checkCase( + "record_and_replay_benchmark_l0 AppendCopy 1, AppendKern 10, CmdSetsInLvl 10, ForksInLvl 2, Instantiations 10, Lvls 4, Rec", + "RecordGraph large", + {"L0"}, + ) + + def test_submit_kernel(self): + self._checkCase( + "api_overhead_benchmark_l0 SubmitKernel out of order with measure completion KernelExecTime=20", + "SubmitKernel out of order with completion using events long kernel", + {"L0", "latency", "micro", "submit"}, + ) + + +if __name__ == "__main__": + unittest.main()