Skip to content

Commit

Permalink
MLPerf default config version with bert & 3d-unet benchmarks (#392)
Browse files Browse the repository at this point in the history
* Using CUDA 12.4 & MLPerf Inference results 4.1.

* MLPerf metrics parser using metadata.json from inference results v4.1.

* Passing unit test, adding 3d-unet benchmark.

* Passing functional test.

* Passing CUDAAndNVIDIAGPUDriverInstallationTest test cases.

* Passing NvidiaContainerToolKitInstallationTests cases.

* Documentation.

* String interpolation for the parser.

* Documentation updates.

* Changing expected latency for 3d-unet single stream.

* Review changes.

* Documentation for adding custom configs information.

* Documentation fix.

* Increment VERSION.

* Updating parser to have simpler names for metrics.

* Fixing a typo.

* Updating documentation with new metric names.

---------

Co-authored-by: saibulusu <[email protected]>
  • Loading branch information
saibulusu and saibulusu authored Nov 11, 2024
1 parent 93c4114 commit 112f990
Show file tree
Hide file tree
Showing 40 changed files with 3,256 additions and 6,794 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,24 @@
{
"A100-PCIe-80GBx4_TRT-custom_k_99_9_MaxP-Server": { "bert": "Accuracy = 91.873, Threshold = 90.783. Accuracy test PASSED." },
"A100-PCIe-80GBx4_TRT-custom_k_99_9_MaxP-SingleStream": { "bert": "Accuracy = 91.568, Threshold = 90.783. Accuracy test PASSED." },
"A100-PCIe-80GBx4_TRT-custom_k_99_9_MaxP-Offline": { "bert": "Accuracy = 91.832, Threshold = 90.783. Accuracy test FAILED." }
"accuracy": [
{
"name": "F1",
"pass": true,
"threshold": 89.96526,
"value": 90.2147015680108
}
],
"accuracy_pass": true,
"benchmark_full": "bert-99",
"benchmark_short": "bert",
"config_name": "DGX-A100_A100-SXM4-40GBx8_TRT-custom_k_99_MaxP-Offline",
"detected_system": "SystemConfiguration(host_cpu_conf=CPUConfiguration(layout={CPU(name=\"AMD EPYC 7V12 64-Core Processor\", architecture=CPUArchitecture.x86_64, core_count=48, threads_per_core=1): 2}), host_mem_conf=MemoryConfiguration(host_memory_capacity=Memory(quantity=928.7656999999999, byte_suffix=ByteSuffix.GB), comparison_tolerance=0.05), accelerator_conf=AcceleratorConfiguration(layout={GPU(name=\"NVIDIA A100-SXM4-40GB\", accelerator_type=AcceleratorType.Discrete, vram=Memory(quantity=40.0, byte_suffix=ByteSuffix.GiB), max_power_limit=400.0, pci_id=\"0x20B010DE\", compute_sm=80): 8}), numa_conf=NUMAConfiguration(numa_nodes={}, num_numa_nodes=4), system_id=\"DGX-A100_A100-SXM4-40GBx8\")",
"effective_min_duration_ms": 600000,
"effective_samples_per_query": 19800000,
"satisfies_query_constraint": true,
"scenario": "Offline",
"scenario_key": "result_samples_per_second",
"summary_string": "[PASSED] F1: 90.215 (Threshold=89.965)",
"system_name": "DGX-A100_A100-SXM4-40GBx8_TRT",
"tensorrt_version": "10.2.0",
"test_mode": "AccuracyOnly"
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,18 @@
{
"A100-PCIe-80GBx4_TRT_Triton-triton_k_99_9_MaxP-Server": { "bert": "result_scheduled_samples_per_sec: 4751.78, Result is INVALID" },
"A100-PCIe-80GBx4_TRT_Triton-triton_k_99_9_MaxP-SingleStream": { "bert": "result_90.00_percentile_latency_ns: 2202969, Result is VALID" }
"benchmark_full": "bert-99",
"benchmark_short": "bert",
"config_name": "DGX-A100_A100-SXM4-40GBx8_TRT-custom_k_99_MaxP-Server",
"detected_system": "SystemConfiguration(host_cpu_conf=CPUConfiguration(layout={CPU(name=\"AMD EPYC 7V12 64-Core Processor\", architecture=CPUArchitecture.x86_64, core_count=48, threads_per_core=1): 2}), host_mem_conf=MemoryConfiguration(host_memory_capacity=Memory(quantity=928.7656999999999, byte_suffix=ByteSuffix.GB), comparison_tolerance=0.05), accelerator_conf=AcceleratorConfiguration(layout={GPU(name=\"NVIDIA A100-SXM4-40GB\", accelerator_type=AcceleratorType.Discrete, vram=Memory(quantity=40.0, byte_suffix=ByteSuffix.GiB), max_power_limit=400.0, pci_id=\"0x20B010DE\", compute_sm=80): 8}), numa_conf=NUMAConfiguration(numa_nodes={}, num_numa_nodes=4), system_id=\"DGX-A100_A100-SXM4-40GBx8\")",
"early_stopping_met": true,
"effective_min_duration_ms": 600000,
"effective_min_query_count": 100,
"result_completed_samples_per_sec": 25405.6,
"result_validity": "VALID",
"satisfies_query_constraint": false,
"scenario": "Server",
"scenario_key": "result_completed_samples_per_sec",
"summary_string": "result_completed_samples_per_sec: 25405.6, Result is VALID, 10-min runtime requirement met: True",
"system_name": "DGX-A100_A100-SXM4-40GBx8_TRT",
"tensorrt_version": "10.2.0",
"test_mode": "PerformanceOnly"
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ public async Task MLPerfWorkloadProfileExecutesTheExpectedDependenciesAndReboot(
{
$"sudo apt update",
$"sudo apt install build-essential -yq",
$"sudo wget https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda_12.0.0_525.60.13_linux.run",
$"sudo sh cuda_12.0.0_525.60.13_linux.run --silent",
$"sudo wget https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux.run",
$"sudo sh cuda_12.4.0_550.54.14_linux.run --silent",
$"sudo bash -c \"echo 'export PATH=/usr/local/cuda-11.6/bin${{PATH:+:${{PATH}}}}' | sudo tee -a /home/[a-z]+/.bashrc\"",
$"bash -c \"echo 'export LD_LIBRARY_PATH=/usr/local/cuda-11.6/lib64${{LD_LIBRARY_PATH:+:${{LD_LIBRARY_PATH}}}}' " +
"| sudo tee -a /home/[a-z]+/.bashrc\""
Expand Down Expand Up @@ -80,7 +80,6 @@ public async Task MLPerfWorkloadProfileExecutesTheExpectedRemainingDependenciesA

this.mockFixture.Setup(PlatformID.Unix);
this.mockFixture.SetupDisks(withRemoteDisks: true);
this.mockFixture.SetupWorkloadPackage("mlperf", expectedFiles: @"closed/NVIDIA/Makefile");

string expectedStateId = nameof(CudaAndNvidiaGPUDriverInstallation);
await this.mockFixture.StateManager.SaveStateAsync(expectedStateId, JObject.Parse("{ \"any\": \"state\" }"), CancellationToken.None)
Expand Down Expand Up @@ -125,32 +124,14 @@ private IEnumerable<string> GetExpectedCommands()
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make download_data BENCHMARKS=bert""",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make download_model BENCHMARKS=bert""",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make preprocess_data BENCHMARKS=bert""",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make download_data BENCHMARKS=rnnt""",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make download_model BENCHMARKS=rnnt""",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make preprocess_data BENCHMARKS=rnnt""",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make download_data BENCHMARKS=ssd-mobilenet""",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make download_model BENCHMARKS=ssd-mobilenet""",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make preprocess_data BENCHMARKS=ssd-mobilenet""",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make download_data BENCHMARKS=ssd-resnet34""",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make download_model BENCHMARKS=ssd-resnet34""",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make preprocess_data BENCHMARKS=ssd-resnet34""",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make download_data BENCHMARKS=3d-unet""",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make download_model BENCHMARKS=3d-unet""",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make preprocess_data BENCHMARKS=3d-unet""",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make build""",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make run RUN_ARGS='--benchmarks=bert --scenarios=Offline,Server,SingleStream --config_ver=default --test_mode=PerformanceOnly --fast'",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make run RUN_ARGS='--benchmarks=bert --scenarios=Offline,Server,SingleStream --config_ver=default --test_mode=AccuracyOnly --fast'",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make run RUN_ARGS='--benchmarks=bert --scenarios=Offline,Server,SingleStream --config_ver=high_accuracy --test_mode=PerformanceOnly --fast'",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make run RUN_ARGS='--benchmarks=bert --scenarios=Offline,Server,SingleStream --config_ver=high_accuracy --test_mode=AccuracyOnly --fast'",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make run RUN_ARGS='--benchmarks=bert --scenarios=Offline,Server,SingleStream --config_ver=triton --test_mode=PerformanceOnly --fast'",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make run RUN_ARGS='--benchmarks=bert --scenarios=Offline,Server,SingleStream --config_ver=triton --test_mode=AccuracyOnly --fast'",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make run RUN_ARGS='--benchmarks=bert --scenarios=Offline,Server,SingleStream --config_ver=high_accuracy_triton --test_mode=PerformanceOnly --fast'",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make run RUN_ARGS='--benchmarks=bert --scenarios=Offline,Server,SingleStream --config_ver=high_accuracy_triton --test_mode=AccuracyOnly --fast'",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make run RUN_ARGS='--benchmarks=ssd-mobilenet --scenarios=Offline,MultiStream,SingleStream --config_ver=default --test_mode=PerformanceOnly --fast'",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make run RUN_ARGS='--benchmarks=ssd-mobilenet --scenarios=Offline,MultiStream,SingleStream --config_ver=default --test_mode=AccuracyOnly --fast'",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make run RUN_ARGS='--benchmarks=ssd-mobilenet --scenarios=Offline,MultiStream,SingleStream --config_ver=triton --test_mode=PerformanceOnly --fast'",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make run RUN_ARGS='--benchmarks=ssd-mobilenet --scenarios=Offline,MultiStream,SingleStream --config_ver=triton --test_mode=AccuracyOnly --fast'",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make run RUN_ARGS='--benchmarks=ssd-resnet34 --scenarios=Offline,Server,SingleStream,MultiStream --config_ver=default --test_mode=PerformanceOnly --fast'",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make run RUN_ARGS='--benchmarks=ssd-resnet34 --scenarios=Offline,Server,SingleStream,MultiStream --config_ver=default --test_mode=AccuracyOnly --fast'",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make run RUN_ARGS='--benchmarks=ssd-resnet34 --scenarios=Offline,Server,SingleStream,MultiStream --config_ver=triton --test_mode=PerformanceOnly --fast'",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make run RUN_ARGS='--benchmarks=ssd-resnet34 --scenarios=Offline,Server,SingleStream,MultiStream --config_ver=triton --test_mode=AccuracyOnly --fast'",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make run RUN_ARGS='--benchmarks=3d-unet --scenarios=Offline,SingleStream --config_ver=default --test_mode=PerformanceOnly --fast'",
@"sudo docker exec -u [a-z]+ mlperf-inference-[a-z]+-x86_64 sudo bash -c ""export MLPERF_SCRATCH_PATH=(.*)/scratch && make run RUN_ARGS='--benchmarks=3d-unet --scenarios=Offline,SingleStream --config_ver=default --test_mode=AccuracyOnly --fast'"
};
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"accuracy": [
{
"name": "F1",
"pass": true,
"threshold": 89.96526,
"value": 90.2147015680108
}
],
"accuracy_pass": true,
"benchmark_full": "bert-99",
"benchmark_short": "bert",
"config_name": "DGX-A100_A100-SXM4-40GBx8_TRT-custom_k_99_MaxP-Offline",
"detected_system": "SystemConfiguration(host_cpu_conf=CPUConfiguration(layout={CPU(name=\"AMD EPYC 7V12 64-Core Processor\", architecture=CPUArchitecture.x86_64, core_count=48, threads_per_core=1): 2}), host_mem_conf=MemoryConfiguration(host_memory_capacity=Memory(quantity=928.7656999999999, byte_suffix=ByteSuffix.GB), comparison_tolerance=0.05), accelerator_conf=AcceleratorConfiguration(layout={GPU(name=\"NVIDIA A100-SXM4-40GB\", accelerator_type=AcceleratorType.Discrete, vram=Memory(quantity=40.0, byte_suffix=ByteSuffix.GiB), max_power_limit=400.0, pci_id=\"0x20B010DE\", compute_sm=80): 8}), numa_conf=NUMAConfiguration(numa_nodes={}, num_numa_nodes=4), system_id=\"DGX-A100_A100-SXM4-40GBx8\")",
"effective_min_duration_ms": 600000,
"effective_samples_per_query": 19800000,
"satisfies_query_constraint": true,
"scenario": "Offline",
"scenario_key": "result_samples_per_second",
"summary_string": "[PASSED] F1: 90.215 (Threshold=89.965)",
"system_name": "DGX-A100_A100-SXM4-40GBx8_TRT",
"tensorrt_version": "10.2.0",
"test_mode": "AccuracyOnly"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"accuracy": [
{
"name": "F1",
"pass": false,
"threshold": 1.0,
"value": 1.5
}
],
"accuracy_pass": true,
"benchmark_full": "bert-99",
"benchmark_short": "bert",
"config_name": "DGX-A100_A100-SXM4-40GBx8_TRT-custom_k_99_MaxP-Offline",
"detected_system": "SystemConfiguration(host_cpu_conf=CPUConfiguration(layout={CPU(name=\"AMD EPYC 7V12 64-Core Processor\", architecture=CPUArchitecture.x86_64, core_count=48, threads_per_core=1): 2}), host_mem_conf=MemoryConfiguration(host_memory_capacity=Memory(quantity=928.7656999999999, byte_suffix=ByteSuffix.GB), comparison_tolerance=0.05), accelerator_conf=AcceleratorConfiguration(layout={GPU(name=\"NVIDIA A100-SXM4-40GB\", accelerator_type=AcceleratorType.Discrete, vram=Memory(quantity=40.0, byte_suffix=ByteSuffix.GiB), max_power_limit=400.0, pci_id=\"0x20B010DE\", compute_sm=80): 8}), numa_conf=NUMAConfiguration(numa_nodes={}, num_numa_nodes=4), system_id=\"DGX-A100_A100-SXM4-40GBx8\")",
"effective_min_duration_ms": 600000,
"effective_samples_per_query": 19800000,
"satisfies_query_constraint": true,
"scenario": "Offline",
"scenario_key": "result_samples_per_second",
"summary_string": "[FAILED] F1: 90.215 (Threshold=89.965)",
"system_name": "DGX-A100_A100-SXM4-40GBx8_TRT",
"tensorrt_version": "10.2.0",
"test_mode": "AccuracyOnly"
}

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"benchmark_full": "bert-99",
"benchmark_short": "bert",
"config_name": "DGX-A100_A100-SXM4-40GBx8_TRT-custom_k_99_MaxP-Server",
"detected_system": "SystemConfiguration(host_cpu_conf=CPUConfiguration(layout={CPU(name=\"AMD EPYC 7V12 64-Core Processor\", architecture=CPUArchitecture.x86_64, core_count=48, threads_per_core=1): 2}), host_mem_conf=MemoryConfiguration(host_memory_capacity=Memory(quantity=928.7656999999999, byte_suffix=ByteSuffix.GB), comparison_tolerance=0.05), accelerator_conf=AcceleratorConfiguration(layout={GPU(name=\"NVIDIA A100-SXM4-40GB\", accelerator_type=AcceleratorType.Discrete, vram=Memory(quantity=40.0, byte_suffix=ByteSuffix.GiB), max_power_limit=400.0, pci_id=\"0x20B010DE\", compute_sm=80): 8}), numa_conf=NUMAConfiguration(numa_nodes={}, num_numa_nodes=4), system_id=\"DGX-A100_A100-SXM4-40GBx8\")",
"early_stopping_met": true,
"effective_min_duration_ms": 600000,
"effective_min_query_count": 100,
"result_completed_samples_per_sec": 25405.6,
"result_validity": "VALID",
"satisfies_query_constraint": false,
"scenario": "Server",
"scenario_key": "result_completed_samples_per_sec",
"summary_string": "result_completed_samples_per_sec: 25405.6, Result is VALID, 10-min runtime requirement met: True",
"system_name": "DGX-A100_A100-SXM4-40GBx8_TRT",
"tensorrt_version": "10.2.0",
"test_mode": "PerformanceOnly"
}
Loading

0 comments on commit 112f990

Please sign in to comment.