From 45eb2f8fef2f43381046b99c32d990dbfb937734 Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Tue, 11 Feb 2025 11:31:20 -0500 Subject: [PATCH 1/2] [AMDGPU] Replace gfx940 and gfx941 with gfx942 in llvm gfx940 and gfx941 are no longer supported. This is one of a series of PRs to remove them from the code base. This PR removes all non-documentation occurrences of gfx940/gfx941 from the llvm directory, and the remaining occurrences in clang. Documentation changes will follow. For SWDEV-512631 --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 4 +- .../CodeGenCXX/dynamic-cast-address-space.cpp | 4 +- clang/test/CodeGenOpenCL/amdgpu-features.cl | 6 +- .../builtins-amdgcn-fp-atomics-gfx90a-err.cl | 2 +- .../Misc/target-invalid-cpu-note/amdgcn.c | 2 - llvm/docs/AMDGPUUsage.rst | 4 +- llvm/include/llvm/BinaryFormat/ELF.h | 4 +- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 56 +++---- llvm/include/llvm/TargetParser/TargetParser.h | 2 - llvm/lib/Object/ELFObjectFile.cpp | 4 - llvm/lib/ObjectYAML/ELFYAML.cpp | 2 - llvm/lib/Target/AMDGPU/AMDGPU.td | 68 +++----- .../AMDGPU/AMDGPUInstructionSelector.cpp | 2 +- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 28 ++-- llvm/lib/Target/AMDGPU/BUFInstructions.td | 22 +-- llvm/lib/Target/AMDGPU/DSInstructions.td | 2 +- .../Disassembler/AMDGPUDisassembler.cpp | 6 +- llvm/lib/Target/AMDGPU/FLATInstructions.td | 96 +++++------ .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 99 +++++------ llvm/lib/Target/AMDGPU/GCNProcessors.td | 14 +- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 41 ++--- .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp | 21 +-- .../MCTargetDesc/AMDGPUTargetStreamer.cpp | 4 - llvm/lib/Target/AMDGPU/SIDefines.h | 4 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 20 +-- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 4 +- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 4 +- llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 45 ++--- llvm/lib/Target/AMDGPU/SISchedule.td | 6 +- .../Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp | 12 +- .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 8 +- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 6 +- llvm/lib/Target/AMDGPU/VOP1Instructions.td | 4 +- llvm/lib/Target/AMDGPU/VOP2Instructions.td | 8 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td | 8 +- llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 154 +++++++++--------- llvm/lib/TargetParser/TargetParser.cpp | 10 +- llvm/tools/llvm-readobj/ELFDumper.cpp | 2 - 38 files changed, 352 insertions(+), 436 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 39e295aced96b..e7e5ed77f432b 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -248,13 +248,13 @@ TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fmax_f64, "dd*0d", "t", "gfx90a-inst TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f64, "dd*3d", "t", "gfx90a-insts") TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f32, "ff*3f", "t", "gfx8-insts") -TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_f32, "ff*0f", "t", "gfx940-insts") +TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_f32, "ff*0f", "t", "gfx942-insts") TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2f16, "V2hV2h*0V2h", "t", "atomic-flat-pk-add-16-insts") TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2bf16, "V2sV2s*0V2s", "t", "atomic-flat-pk-add-16-insts") TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2bf16, "V2sV2s*1V2s", "t", "atomic-global-pk-add-bf16-inst") TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2bf16, "V2sV2s*3V2s", "t", "atomic-ds-pk-add-16-insts") TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2f16, "V2hV2h*3V2h", "t", "atomic-ds-pk-add-16-insts") -TARGET_BUILTIN(__builtin_amdgcn_global_load_lds, "vv*1v*3IUiIiIUi", "t", "gfx940-insts") +TARGET_BUILTIN(__builtin_amdgcn_global_load_lds, "vv*1v*3IUiIiIUi", "t", "gfx942-insts") //===----------------------------------------------------------------------===// // Deep learning builtins. diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp index 0460352cf7ffc..f07dbd9a29b98 100644 --- a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp +++ b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp @@ -112,9 +112,9 @@ const B& f(A *a) { // CHECK: attributes #[[ATTR3]] = { nounwind } // CHECK: attributes #[[ATTR4]] = { noreturn } //. -// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" } +// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx942-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" } // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR1:[0-9]+]] = { nounwind willreturn memory(read) } -// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" } +// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx942-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" } // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR3]] = { nounwind } // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR4]] = { noreturn } //. diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index d12dcead6fadf..2c9f3c78b1df2 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -83,9 +83,9 @@ // GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" -// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx942-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" +// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx942-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx942-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" // GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" // GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl index f651ce349e206..86d84005133bc 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl @@ -9,7 +9,7 @@ typedef short __attribute__((ext_vector_type(2))) short2; void test_atomic_fadd(__global half2 *addrh2, __local half2 *addrh2l, half2 xh2, __global short2 *addrs2, __local short2 *addrs2l, short2 xs2, __global float *addrf, float xf) { - __builtin_amdgcn_flat_atomic_fadd_f32(addrf, xf); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_f32' needs target feature gfx940-insts}} + __builtin_amdgcn_flat_atomic_fadd_f32(addrf, xf); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_f32' needs target feature gfx942-insts}} __builtin_amdgcn_flat_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_v2f16' needs target feature atomic-flat-pk-add-16-insts}} __builtin_amdgcn_flat_atomic_fadd_v2bf16(addrs2, xs2); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_v2bf16' needs target feature atomic-flat-pk-add-16-insts}} __builtin_amdgcn_global_atomic_fadd_v2bf16(addrs2, xs2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2bf16' needs target feature atomic-global-pk-add-bf16-inst}} diff --git a/clang/test/Misc/target-invalid-cpu-note/amdgcn.c b/clang/test/Misc/target-invalid-cpu-note/amdgcn.c index 642d2df211c21..9ef44b2bb403e 100644 --- a/clang/test/Misc/target-invalid-cpu-note/amdgcn.c +++ b/clang/test/Misc/target-invalid-cpu-note/amdgcn.c @@ -45,8 +45,6 @@ // CHECK-SAME: {{^}}, gfx909 // CHECK-SAME: {{^}}, gfx90a // CHECK-SAME: {{^}}, gfx90c -// CHECK-SAME: {{^}}, gfx940 -// CHECK-SAME: {{^}}, gfx941 // CHECK-SAME: {{^}}, gfx942 // CHECK-SAME: {{^}}, gfx950 // CHECK-SAME: {{^}}, gfx1010 diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 899b2cf3b4901..d4742bb1eaf09 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -2221,7 +2221,7 @@ The AMDGPU backend uses the following ELF header: ``EF_AMDGPU_MACH_AMDGCN_GFX1035`` 0x03d ``gfx1035`` ``EF_AMDGPU_MACH_AMDGCN_GFX1034`` 0x03e ``gfx1034`` ``EF_AMDGPU_MACH_AMDGCN_GFX90A`` 0x03f ``gfx90a`` - ``EF_AMDGPU_MACH_AMDGCN_GFX940`` 0x040 ``gfx940`` + *reserved* 0x040 Reserved. ``EF_AMDGPU_MACH_AMDGCN_GFX1100`` 0x041 ``gfx1100`` ``EF_AMDGPU_MACH_AMDGCN_GFX1013`` 0x042 ``gfx1013`` ``EF_AMDGPU_MACH_AMDGCN_GFX1150`` 0x043 ``gfx1150`` @@ -2232,7 +2232,7 @@ The AMDGPU backend uses the following ELF header: ``EF_AMDGPU_MACH_AMDGCN_GFX1200`` 0x048 ``gfx1200`` *reserved* 0x049 Reserved. ``EF_AMDGPU_MACH_AMDGCN_GFX1151`` 0x04a ``gfx1151`` - ``EF_AMDGPU_MACH_AMDGCN_GFX941`` 0x04b ``gfx941`` + *reserved* 0x04b Reserved. ``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942`` *reserved* 0x04d Reserved. ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201`` diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 4b826bbf58f17..e0415725d9e86 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -814,7 +814,7 @@ enum : unsigned { EF_AMDGPU_MACH_AMDGCN_GFX1035 = 0x03d, EF_AMDGPU_MACH_AMDGCN_GFX1034 = 0x03e, EF_AMDGPU_MACH_AMDGCN_GFX90A = 0x03f, - EF_AMDGPU_MACH_AMDGCN_GFX940 = 0x040, + EF_AMDGPU_MACH_AMDGCN_RESERVED_0X40 = 0x040, EF_AMDGPU_MACH_AMDGCN_GFX1100 = 0x041, EF_AMDGPU_MACH_AMDGCN_GFX1013 = 0x042, EF_AMDGPU_MACH_AMDGCN_GFX1150 = 0x043, @@ -825,7 +825,7 @@ enum : unsigned { EF_AMDGPU_MACH_AMDGCN_GFX1200 = 0x048, EF_AMDGPU_MACH_AMDGCN_RESERVED_0X49 = 0x049, EF_AMDGPU_MACH_AMDGCN_GFX1151 = 0x04a, - EF_AMDGPU_MACH_AMDGCN_GFX941 = 0x04b, + EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4B = 0x04b, EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c, EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D = 0x04d, EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e, diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index d5d185ebc12f6..1fc94baf15841 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1074,7 +1074,7 @@ class AMDGPUImageDimIntrinsic.DmaskArgIndex>>]), @@ -1308,7 +1308,7 @@ def int_amdgcn_s_buffer_load : DefaultAttrsIntrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // Note: volatile bit is **not** permitted here. @@ -1338,7 +1338,7 @@ class AMDGPURawBufferLoad : DefaultAttrsIntrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1368,7 +1368,7 @@ class AMDGPURawPtrBufferLoad : DefaultAttrsIntri llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1400,7 +1400,7 @@ class AMDGPUStructBufferLoad : DefaultAttrsIntri llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1418,7 +1418,7 @@ class AMDGPUStructAtomicBufferLoad : Intrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1435,7 +1435,7 @@ class AMDGPUStructPtrBufferLoad : DefaultAttrsIn llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1454,7 +1454,7 @@ class AMDGPUStructPtrAtomicBufferLoad : Intrinsi llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1472,7 +1472,7 @@ class AMDGPURawBufferStore : DefaultAttrsIntrins llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1490,7 +1490,7 @@ class AMDGPURawPtrBufferStore : DefaultAttrsIntr llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1510,7 +1510,7 @@ class AMDGPUStructBufferStore : DefaultAttrsIntr llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1529,7 +1529,7 @@ class AMDGPUStructPtrBufferStore : DefaultAttrsI llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1615,7 +1615,7 @@ def int_amdgcn_raw_ptr_buffer_atomic_cmpswap : Intrinsic< // gfx908 intrinsic def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic; -// Supports float and <2 x half> on gfx908. Supports v2bf16 on gfx90a, gfx940, gfx950, gfx12+. +// Supports float and <2 x half> on gfx908. Supports v2bf16 on gfx90a, gfx942, gfx950, gfx12+. def int_amdgcn_raw_ptr_buffer_atomic_fadd : AMDGPURawPtrBufferAtomic; class AMDGPUStructBufferAtomic : Intrinsic < @@ -1714,7 +1714,7 @@ def int_amdgcn_raw_tbuffer_load : DefaultAttrsIntrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz [IntrReadMem, @@ -1730,7 +1730,7 @@ def int_amdgcn_raw_ptr_tbuffer_load : DefaultAttrsIntrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1748,7 +1748,7 @@ def int_amdgcn_raw_tbuffer_store : DefaultAttrsIntrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1766,7 +1766,7 @@ def int_amdgcn_raw_ptr_tbuffer_store : DefaultAttrsIntrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1784,7 +1784,7 @@ def int_amdgcn_struct_tbuffer_load : DefaultAttrsIntrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1802,7 +1802,7 @@ def int_amdgcn_struct_ptr_tbuffer_load : DefaultAttrsIntrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1821,7 +1821,7 @@ def int_amdgcn_struct_ptr_tbuffer_store : DefaultAttrsIntrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1840,7 +1840,7 @@ def int_amdgcn_struct_tbuffer_store : DefaultAttrsIntrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1859,7 +1859,7 @@ class AMDGPURawBufferLoadLDS : Intrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1878,7 +1878,7 @@ class AMDGPURawPtrBufferLoadLDS : Intrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1901,7 +1901,7 @@ class AMDGPUStructBufferLoadLDS : Intrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -1921,7 +1921,7 @@ class AMDGPUStructPtrBufferLoadLDS : Intrinsic < llvm_i32_ty], // auxiliary/cachepolicy(imm): // bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11), // bit 3 = swz, bit 4 = scc (gfx90a) - // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 + // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1 // gfx12+: bits [0-2] = th, bits [3-4] = scope, // bit 6 = swz // all: volatile op (bit 31, stripped at lowering) @@ -2994,7 +2994,7 @@ def int_amdgcn_fdot2_f32_bf16 : // f32 %r = llvm.amdgcn.fdot2c.f32.bf16(v2bf16 %a, v2bf16 %b, f32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + c // TODO: This actually is similar to llvm.amdgcn.fdot2 intrinsics which produces -// v_dot2c_f32_f16 on gfx940. Maybe we can consolidate these. +// v_dot2c_f32_f16 on gfx942. Maybe we can consolidate these. def int_amdgcn_fdot2c_f32_bf16 : ClangBuiltin<"__builtin_amdgcn_fdot2c_f32_bf16">, @@ -3237,7 +3237,7 @@ def int_amdgcn_mfma_f32_4x4x4bf16_1k : AMDGPUMfmaIntrinsic; def int_amdgcn_mfma_f32_16x16x16bf16_1k : AMDGPUMfmaIntrinsic; -// Note: in gfx940 BLGP argument is replaced by NEG bitfield in the DGEMM MFMA. +// Note: in gfx942 BLGP argument is replaced by NEG bitfield in the DGEMM MFMA. // Three bits corresponding to the neg modifier applied to the respective // source operand. def int_amdgcn_mfma_f64_16x16x4f64 : AMDGPUMfmaIntrinsic; @@ -3245,7 +3245,7 @@ def int_amdgcn_mfma_f64_4x4x4f64 : AMDGPUMfmaIntrinsic : diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h index 55e7b417428c4..f776b41f3d7ca 100644 --- a/llvm/include/llvm/TargetParser/TargetParser.h +++ b/llvm/include/llvm/TargetParser/TargetParser.h @@ -83,8 +83,6 @@ enum GPUKind : uint32_t { GK_GFX909 = 65, GK_GFX90A = 66, GK_GFX90C = 67, - GK_GFX940 = 68, - GK_GFX941 = 69, GK_GFX942 = 70, GK_GFX950 = 71, diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp index 2d3d70db50c39..ac25d76709726 100644 --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -545,10 +545,6 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const { return "gfx90a"; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: return "gfx90c"; - case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: - return "gfx940"; - case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: - return "gfx941"; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: return "gfx942"; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950: diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index 539834fc8d4db..28df9fd565df2 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -609,8 +609,6 @@ void ScalarBitSetTraits::bitset(IO &IO, BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90C, EF_AMDGPU_MACH); - BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX940, EF_AMDGPU_MACH); - BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX941, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX942, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX950, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH); diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 6439149d801f6..e0da312c51a82 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -372,10 +372,10 @@ def FeatureGFX90AInsts : SubtargetFeature<"gfx90a-insts", // [HasAtomicFMinFMaxF64GlobalInsts, HasAtomicFMinFMaxF64FlatInsts] // TODO >; -def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts", - "GFX940Insts", +def FeatureGFX942Insts : SubtargetFeature<"gfx942-insts", + "GFX942Insts", "true", - "Additional instructions for GFX940+" + "Additional instructions for GFX942+" >; def FeaturePermlane16Swap : SubtargetFeature<"permlane16-swap", @@ -1040,12 +1040,6 @@ def FeatureVALUTransUseHazard : SubtargetFeature<"valu-trans-use-hazard", "Hazard when TRANS instructions are closely followed by a use of the result" >; -def FeatureForceStoreSC0SC1 : SubtargetFeature<"force-store-sc0-sc1", - "HasForceStoreSC0SC1", - "true", - "Has SC0 and SC1 on stores" ->; - def FeatureSALUFloatInsts : SubtargetFeature<"salu-float", "HasSALUFloatInsts", "true", @@ -1564,7 +1558,7 @@ def FeatureISAVersion9_0_C : FeatureSet< def FeatureISAVersion9_4_Common : FeatureSet< [FeatureGFX9, FeatureGFX90AInsts, - FeatureGFX940Insts, + FeatureGFX942Insts, FeatureFmaMixInsts, FeatureLDSBankCount32, FeatureDLInsts, @@ -1619,28 +1613,6 @@ def FeatureISAVersion9_5_Common : FeatureSet< FeatureAtomicBufferPkAddBF16Inst ])>; -def FeatureISAVersion9_4_0 : FeatureSet< - !listconcat(FeatureISAVersion9_4_Common.Features, - [ - FeatureAddressableLocalMemorySize65536, - FeatureForceStoreSC0SC1, - FeatureFP8Insts, - FeatureFP8ConversionInsts, - FeatureCvtFP8VOP1Bug, - FeatureXF32Insts - ])>; - -def FeatureISAVersion9_4_1 : FeatureSet< - !listconcat(FeatureISAVersion9_4_Common.Features, - [ - FeatureAddressableLocalMemorySize65536, - FeatureForceStoreSC0SC1, - FeatureFP8Insts, - FeatureFP8ConversionInsts, - FeatureCvtFP8VOP1Bug, - FeatureXF32Insts - ])>; - def FeatureISAVersion9_4_2 : FeatureSet< !listconcat(FeatureISAVersion9_4_Common.Features, [ @@ -2075,20 +2047,20 @@ def isGFX8GFX9NotGFX90A : AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX90AInsts))>; def isGFX90AOnly : - Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX940Insts()">, - AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX940Insts))>; + Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX942Insts()">, + AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX942Insts))>; def isGFX908orGFX90A : - Predicate<"Subtarget->hasMAIInsts() && !Subtarget->hasGFX940Insts()">, - AssemblerPredicate<(all_of FeatureMAIInsts, (not FeatureGFX940Insts))>; + Predicate<"Subtarget->hasMAIInsts() && !Subtarget->hasGFX942Insts()">, + AssemblerPredicate<(all_of FeatureMAIInsts, (not FeatureGFX942Insts))>; -def isGFX940Plus : - Predicate<"Subtarget->hasGFX940Insts()">, - AssemblerPredicate<(all_of FeatureGFX940Insts)>; +def isGFX942Plus : + Predicate<"Subtarget->hasGFX942Insts()">, + AssemblerPredicate<(all_of FeatureGFX942Insts)>; -def isNotGFX940Plus : - Predicate<"!Subtarget->hasGFX940Insts()">, - AssemblerPredicate<(all_of (not FeatureGFX940Insts))>; +def isNotGFX942Plus : + Predicate<"!Subtarget->hasGFX942Insts()">, + AssemblerPredicate<(all_of (not FeatureGFX942Insts))>; def HasGFX950Insts : Predicate<"Subtarget->hasGFX950Insts()">, @@ -2102,11 +2074,11 @@ def HasPermlane32Swap : Predicate<"Subtarget->hasPermlane32Swap()">, AssemblerPredicate<(all_of FeaturePermlane32Swap)>; -def isGFX8GFX9NotGFX940 : - Predicate<"!Subtarget->hasGFX940Insts() &&" +def isGFX8GFX9NotGFX942 : + Predicate<"!Subtarget->hasGFX942Insts() &&" "(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, - AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX940Insts))>; + AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX942Insts))>; def isGFX8GFX9 : Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" @@ -2213,9 +2185,9 @@ def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">, AssemblerPredicate<(all_of FeatureGFX9Insts)>; def HasFlatScratchSTMode : Predicate<"Subtarget->hasFlatScratchSTMode()">, - AssemblerPredicate<(any_of FeatureGFX10_3Insts, FeatureGFX940Insts)>; + AssemblerPredicate<(any_of FeatureGFX10_3Insts, FeatureGFX942Insts)>; def HasFlatScratchSVSMode : Predicate<"Subtarget->hasFlatScratchSVSMode()">, - AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX11Insts)>; + AssemblerPredicate<(any_of FeatureGFX942Insts, FeatureGFX11Insts)>; def HasGFX10_AEncoding : Predicate<"Subtarget->hasGFX10_AEncoding()">, AssemblerPredicate<(all_of FeatureGFX10_AEncoding)>; @@ -2323,7 +2295,7 @@ def HasPkMovB32 : Predicate<"Subtarget->hasPkMovB32()">, def HasFmaakFmamkF32Insts : Predicate<"Subtarget->hasFmaakFmamkF32Insts()">, - AssemblerPredicate<(any_of FeatureGFX10Insts, FeatureGFX940Insts)>; + AssemblerPredicate<(any_of FeatureGFX10Insts, FeatureGFX942Insts)>; def HasImageInsts : Predicate<"Subtarget->hasImageInsts()">, AssemblerPredicate<(all_of FeatureImageInsts)>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 3bbbbcf71d8ae..cf3843869808b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -4295,7 +4295,7 @@ AMDGPUInstructionSelector::selectVOP3PModsImpl( // TODO: Handle G_FSUB 0 as fneg // TODO: Match op_sel through g_build_vector_trunc and g_shuffle_vector. - (void)IsDOT; // DOTs do not use OPSEL on gfx940+, check ST.hasDOTOpSelHazard() + (void)IsDOT; // DOTs do not use OPSEL on gfx942+, check ST.hasDOTOpSelHazard() // Packed instructions do not have abs modifiers. Mods |= SISrcMods::OP_SEL_1; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 54ed3789326cb..13ace855caee4 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1492,14 +1492,12 @@ class AMDGPUAsmParser : public MCTargetAsmParser { return AMDGPU::isGFX9(getSTI()); } - // TODO: isGFX90A is also true for GFX940. We need to clean it. + // TODO: isGFX90A is also true for GFX942. We need to clean it. bool isGFX90A() const { return AMDGPU::isGFX90A(getSTI()); } - bool isGFX940() const { - return AMDGPU::isGFX940(getSTI()); - } + bool isGFX942() const { return AMDGPU::isGFX942(getSTI()); } bool isGFX9Plus() const { return AMDGPU::isGFX9Plus(getSTI()); @@ -4633,7 +4631,7 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { uint64_t TSFlags = MII.get(Opc).TSFlags; - if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) { + if (isGFX942() && (TSFlags & SIInstrFlags::IsDOT)) { int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); if (OpSelIdx != -1) { if (Inst.getOperand(OpSelIdx).getImm() != 0) @@ -4942,12 +4940,12 @@ bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:"); auto FB = getFeatureBits(); bool UsesNeg = false; - if (FB[AMDGPU::FeatureGFX940Insts]) { + if (FB[AMDGPU::FeatureGFX942Insts]) { switch (Opc) { - case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: - case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: - case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: - case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: + case AMDGPU::V_MFMA_F64_16X16X4F64_gfx942_acd: + case AMDGPU::V_MFMA_F64_16X16X4F64_gfx942_vcd: + case AMDGPU::V_MFMA_F64_4X4X4F64_gfx942_acd: + case AMDGPU::V_MFMA_F64_4X4X4F64_gfx942_vcd: UsesNeg = true; } } @@ -5062,7 +5060,7 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, } } - if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { + if (isGFX90A() && !isGFX942() && (CPol & CPol::SCC)) { const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF | SIInstrFlags::MTBUF | SIInstrFlags::MIMG | SIInstrFlags::FLAT; @@ -5081,7 +5079,7 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, if (TSFlags & SIInstrFlags::IsAtomicRet) { if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { - Error(IDLoc, isGFX940() ? "instruction must use sc0" + Error(IDLoc, isGFX942() ? "instruction must use sc0" : "instruction must use glc"); return false; } @@ -5090,8 +5088,8 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); StringRef CStr(S.getPointer()); S = SMLoc::getFromPointer( - &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); - Error(S, isGFX940() ? "instruction must not use sc0" + &CStr.data()[CStr.find(isGFX942() ? "sc0" : "glc")]); + Error(S, isGFX942() ? "instruction must not use sc0" : "instruction must not use glc"); return false; } @@ -6657,7 +6655,7 @@ unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const { Disabling = Id.consume_front("no"); - if (isGFX940() && !Mnemo.starts_with("s_")) { + if (isGFX942() && !Mnemo.starts_with("s_")) { return StringSwitch(Id) .Case("nt", AMDGPU::CPol::NT) .Case("sc0", AMDGPU::CPol::SC0) diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index f2686bdf56b41..a48115fbfb272 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1146,7 +1146,7 @@ let OtherPredicates = [HasGFX10_BEncoding] in { >; } -let SubtargetPredicate = isGFX8GFX9NotGFX940 in { +let SubtargetPredicate = isGFX8GFX9NotGFX942 in { def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">; } @@ -1228,7 +1228,7 @@ defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Pseudo_Stores < } // End HasD16LoadStore -let SubtargetPredicate = isNotGFX940Plus in +let SubtargetPredicate = isNotGFX942Plus in def BUFFER_WBINVL1 : MUBUF_Invalidate < "buffer_wbinvl1", int_amdgcn_buffer_wbinvl1 >; @@ -1311,7 +1311,7 @@ let SubtargetPredicate = isGFX7Plus in { // Instruction definitions for CI and newer. //===----------------------------------------------------------------------===// -let SubtargetPredicate = isNotGFX940Plus in +let SubtargetPredicate = isNotGFX942Plus in def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol", int_amdgcn_buffer_wbinvl1_vol>; @@ -1341,7 +1341,7 @@ let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { } def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> { - let SubtargetPredicate = isGFX940Plus; + let SubtargetPredicate = isGFX942Plus; let has_glc = 1; let has_sccb = 1; let InOperandList = (ins CPol_0:$cpol); @@ -3095,9 +3095,9 @@ multiclass MUBUF_Real_gfx90a op, } } -class MUBUF_Real_gfx940 op, MUBUF_Pseudo ps> : - MUBUF_Real_Base_vi { - let AssemblerPredicate = isGFX940Plus; +class MUBUF_Real_gfx942 op, MUBUF_Pseudo ps> : + MUBUF_Real_Base_vi { + let AssemblerPredicate = isGFX942Plus; let DecoderNamespace = "GFX9"; let AsmString = ps.Mnemonic # ps.AsmOperands; @@ -3116,7 +3116,7 @@ multiclass MUBUF_Real_vi_gfx90a op, bit isTFE = 0> : MUBUF_Real_vi { let AssemblerPredicate = isGFX90AOnly in defm NAME : MUBUF_Real_gfx90a; - def _gfx940 : MUBUF_Real_gfx940; + def _gfx942 : MUBUF_Real_gfx942; } } @@ -3314,9 +3314,9 @@ let AsmString = BUFFER_WBL2.Mnemonic, // drop flags defm BUFFER_WBL2 : MUBUF_Real_gfx90a<0x28>; defm BUFFER_INVL2 : MUBUF_Real_gfx90a<0x29>; -let SubtargetPredicate = isGFX940Plus in { -def BUFFER_WBL2_gfx940 : MUBUF_Real_gfx940<0x28, BUFFER_WBL2>; -def BUFFER_INV_gfx940 : MUBUF_Real_gfx940<0x29, BUFFER_INV>; +let SubtargetPredicate = isGFX942Plus in { +def BUFFER_WBL2_gfx942 : MUBUF_Real_gfx942<0x28, BUFFER_WBL2>; +def BUFFER_INV_gfx942 : MUBUF_Real_gfx942<0x29, BUFFER_INV>; } class MTBUF_Real_Base_vi op, MTBUF_Pseudo ps, int Enc> : diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 9ca853befba73..d3487daee364f 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -1773,7 +1773,7 @@ def DS_READ_B128_vi : DS_Real_vi<0xff, DS_READ_B128>; def DS_ADD_F64_vi : DS_Real_vi<0x5c, DS_ADD_F64>; def DS_ADD_RTN_F64_vi : DS_Real_vi<0x7c, DS_ADD_RTN_F64>; -// GFX940+. +// GFX942+. def DS_PK_ADD_F16_vi : DS_Real_vi<0x17, DS_PK_ADD_F16>; def DS_PK_ADD_RTN_F16_vi : DS_Real_vi<0xb7, DS_PK_ADD_RTN_F16>; def DS_PK_ADD_BF16_vi : DS_Real_vi<0x18, DS_PK_ADD_BF16>; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 308ab8e3b82c4..6413dd0d6288a 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -551,7 +551,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } else if (Bytes.size() >= 16 && STI.hasFeature(AMDGPU::FeatureGFX950Insts)) { DecoderUInt128 DecW = eat16Bytes(Bytes); - if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS)) + if (tryDecodeInst(DecoderTableGFX942128, MI, DecW, Address, CS)) break; // Reinitialize Bytes @@ -580,8 +580,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS)) break; - if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) && - tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS)) + if (STI.hasFeature(AMDGPU::FeatureGFX942Insts) && + tryDecodeInst(DecoderTableGFX94264, MI, QW, Address, CS)) break; if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) && diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 8fa708b74dde3..b6a83d0fcbc25 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -802,7 +802,7 @@ defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax", } // End SubtargetPredicate = isGFX7GFX10GFX11 -// GFX940-, GFX11-only flat instructions. +// GFX942-, GFX11-only flat instructions. let SubtargetPredicate = HasFlatAtomicFaddF32Inst in { defm FLAT_ATOMIC_ADD_F32 : FLAT_Atomic_Pseudo<"flat_atomic_add_f32", VGPR_32, f32>; } // End SubtargetPredicate = HasFlatAtomicFaddF32Inst @@ -1836,10 +1836,10 @@ multiclass FLAT_Real_AllAddr_vi op, def _SADDR_vi : FLAT_Real_vi(NAME#"_SADDR"), has_sccb>; } -class FLAT_Real_gfx940 op, FLAT_Pseudo ps> : +class FLAT_Real_gfx942 op, FLAT_Pseudo ps> : FLAT_Real , - SIMCInstr { - let AssemblerPredicate = isGFX940Plus; + SIMCInstr { + let AssemblerPredicate = isGFX942Plus; let DecoderNamespace = "GFX9"; let Inst{13} = ps.sve; let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue); @@ -1847,43 +1847,43 @@ class FLAT_Real_gfx940 op, FLAT_Pseudo ps> : multiclass FLAT_Real_AllAddr_SVE_vi op> { def _vi : FLAT_Real_vi(NAME)> { - let AssemblerPredicate = isGFX8GFX9NotGFX940; - let OtherPredicates = [isGFX8GFX9NotGFX940]; + let AssemblerPredicate = isGFX8GFX9NotGFX942; + let OtherPredicates = [isGFX8GFX9NotGFX942]; } def _SADDR_vi : FLAT_Real_vi(NAME#"_SADDR")> { let DecoderNamespace = "GFX9"; } - let AssemblerPredicate = isGFX940Plus, SubtargetPredicate = isGFX940Plus in { - def _VE_gfx940 : FLAT_Real_gfx940(NAME)>; - def _SVS_gfx940 : FLAT_Real_gfx940(NAME#"_SVS")>; - def _ST_gfx940 : FLAT_Real_gfx940(NAME#"_ST")>; + let AssemblerPredicate = isGFX942Plus, SubtargetPredicate = isGFX942Plus in { + def _VE_gfx942 : FLAT_Real_gfx942(NAME)>; + def _SVS_gfx942 : FLAT_Real_gfx942(NAME#"_SVS")>; + def _ST_gfx942 : FLAT_Real_gfx942(NAME#"_ST")>; } } -multiclass FLAT_Real_AllAddr_LDS op, bits<7> pre_gfx940_op, - string pre_gfx940_name = !subst("_lds", "", !cast(NAME).Mnemonic), +multiclass FLAT_Real_AllAddr_LDS op, bits<7> pre_gfx942_op, + string pre_gfx942_name = !subst("_lds", "", !cast(NAME).Mnemonic), bit has_sccb = !cast(NAME).has_sccb> { - let OtherPredicates = [isGFX8GFX9NotGFX940] in { - def _vi : FLAT_Real_vi(NAME), has_sccb> { - let AsmString = pre_gfx940_name # !cast(NAME).AsmOperands # " lds"; + let OtherPredicates = [isGFX8GFX9NotGFX942] in { + def _vi : FLAT_Real_vi(NAME), has_sccb> { + let AsmString = pre_gfx942_name # !cast(NAME).AsmOperands # " lds"; } - def _SADDR_vi : FLAT_Real_vi(NAME#"_SADDR"), has_sccb> { - let AsmString = pre_gfx940_name # !cast(NAME#"_SADDR").AsmOperands # " lds"; + def _SADDR_vi : FLAT_Real_vi(NAME#"_SADDR"), has_sccb> { + let AsmString = pre_gfx942_name # !cast(NAME#"_SADDR").AsmOperands # " lds"; } } - let SubtargetPredicate = isGFX940Plus in { - def _gfx940 : FLAT_Real_gfx940(NAME)>; - def _SADDR_gfx940 : FLAT_Real_gfx940(NAME#"_SADDR")>; + let SubtargetPredicate = isGFX942Plus in { + def _gfx942 : FLAT_Real_gfx942(NAME)>; + def _SADDR_gfx942 : FLAT_Real_gfx942(NAME#"_SADDR")>; } } -multiclass FLAT_Real_AllAddr_SVE_LDS op, bits<7> pre_gfx940_op> { - defm "" : FLAT_Real_AllAddr_LDS; - let SubtargetPredicate = isGFX940Plus in { - def _SVS_gfx940 : FLAT_Real_gfx940(NAME#"_SVS")>; - def _ST_gfx940 : FLAT_Real_gfx940(NAME#"_ST")>; +multiclass FLAT_Real_AllAddr_SVE_LDS op, bits<7> pre_gfx942_op> { + defm "" : FLAT_Real_AllAddr_LDS; + let SubtargetPredicate = isGFX942Plus in { + def _SVS_gfx942 : FLAT_Real_gfx942(NAME#"_SVS")>; + def _ST_gfx942 : FLAT_Real_gfx942(NAME#"_ST")>; } } @@ -2045,8 +2045,8 @@ defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_SVE_vi <0x1d>; defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x1e>; defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x1f>; -let SubtargetPredicate = isGFX8GFX9NotGFX940 in { - // These instructions are encoded differently on gfx90* and gfx940. +let SubtargetPredicate = isGFX8GFX9NotGFX942 in { + // These instructions are encoded differently on gfx90* and gfx942. defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d, 0>; defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>; } @@ -2060,39 +2060,39 @@ let SubtargetPredicate = isGFX90AOnly in { defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>; } // End SubtargetPredicate = isGFX90AOnly -multiclass FLAT_Real_AllAddr_gfx940 op> { - def _gfx940 : FLAT_Real_gfx940(NAME)>; - def _SADDR_gfx940 : FLAT_Real_gfx940(NAME#"_SADDR")>; +multiclass FLAT_Real_AllAddr_gfx942 op> { + def _gfx942 : FLAT_Real_gfx942(NAME)>; + def _SADDR_gfx942 : FLAT_Real_gfx942(NAME#"_SADDR")>; } -multiclass FLAT_Real_Atomics_gfx940 op> { +multiclass FLAT_Real_Atomics_gfx942 op> { defvar ps = !cast(NAME); - def _gfx940 : FLAT_Real_gfx940(ps.PseudoInstr)>; - def _RTN_gfx940 : FLAT_Real_gfx940(ps.PseudoInstr # "_RTN")>; + def _gfx942 : FLAT_Real_gfx942(ps.PseudoInstr)>; + def _RTN_gfx942 : FLAT_Real_gfx942(ps.PseudoInstr # "_RTN")>; } -multiclass FLAT_Global_Real_Atomics_gfx940 op> : - FLAT_Real_AllAddr_gfx940 { - def _RTN_gfx940 : FLAT_Real_gfx940 (NAME#"_RTN")>; - def _SADDR_RTN_gfx940 : FLAT_Real_gfx940 (NAME#"_SADDR_RTN")>; +multiclass FLAT_Global_Real_Atomics_gfx942 op> : + FLAT_Real_AllAddr_gfx942 { + def _RTN_gfx942 : FLAT_Real_gfx942 (NAME#"_RTN")>; + def _SADDR_RTN_gfx942 : FLAT_Real_gfx942 (NAME#"_SADDR_RTN")>; } -let SubtargetPredicate = isGFX940Plus in { - // These instructions are encoded differently on gfx90* and gfx940. - defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_gfx940 <0x04d>; - defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_gfx940 <0x04e>; +let SubtargetPredicate = isGFX942Plus in { + // These instructions are encoded differently on gfx90* and gfx942. + defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_gfx942 <0x04d>; + defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_gfx942 <0x04e>; - defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_gfx940<0x4f>; - defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_gfx940<0x50>; - defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_gfx940<0x51>; - defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>; - defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>; - defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>; + defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_gfx942<0x4f>; + defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_gfx942<0x50>; + defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_gfx942<0x51>; + defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx942<0x4f>; + defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx942<0x50>; + defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx942<0x51>; defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d>; defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e>; defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52>; defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>; -} // End SubtargetPredicate = isGFX940Plus +} // End SubtargetPredicate = isGFX942Plus //===----------------------------------------------------------------------===// // GFX10. diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 827598078af53..0f76b0ac5331d 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -124,10 +124,10 @@ static bool isXDL(const GCNSubtarget &ST, const MachineInstr &MI) { Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64) return false; - if (!ST.hasGFX940Insts()) + if (!ST.hasGFX942Insts()) return true; - return AMDGPU::getMAIIsGFX940XDL(Opcode); + return AMDGPU::getMAIIsGFX942XDL(Opcode); } static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII, @@ -870,7 +870,7 @@ GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def, // 8 bytes can have there store data over written by the next instruction. const SIRegisterInfo *TRI = ST.getRegisterInfo(); - const int VALUWaitStates = ST.hasGFX940Insts() ? 2 : 1; + const int VALUWaitStates = ST.hasGFX942Insts() ? 2 : 1; int WaitStatesNeeded = 0; if (!TRI->isVectorRegister(MRI, Def.getReg())) @@ -2251,9 +2251,9 @@ int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) { } static int -GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(int NumPasses, +GFX942_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(int NumPasses, bool IsGFX950) { - // xdl def cycles | gfx940 | gfx950 + // xdl def cycles | gfx942 | gfx950 // 2 pass | 3 4 // 4 pass | 5 6 // 8 pass | 9 10 @@ -2262,9 +2262,9 @@ GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(int NumPasses, } static int -GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(int NumPasses, +GFX942_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(int NumPasses, bool IsGFX950) { - // xdl def cycles | gfx940 | gfx950 + // xdl def cycles | gfx942 | gfx950 // 2 pass | 3 3 // 4 pass | 5 6 // 8 pass | 9 10 @@ -2273,7 +2273,7 @@ GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(int NumPasses, } static int -GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses) { +GFX942_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses) { // 2 pass -> 2 // 4 pass -> 4 // 8 pass -> 8 @@ -2282,7 +2282,7 @@ GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses) { } static int -GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses) { +GFX942_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses) { // 2 pass -> 4 // 4 pass -> 6 // 8 pass -> 10 @@ -2290,9 +2290,9 @@ GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses) { return NumPasses + 2; } -static int GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses, +static int GFX942_XDL_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses, bool IsGFX950) { - // xdl def cycles | gfx940 | gfx950 + // xdl def cycles | gfx942 | gfx950 // 2 pass | 5 5 // 4 pass | 7 8 // 8 pass | 11 12 @@ -2343,7 +2343,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { const int DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 11; const int GFX950_DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 19; const int DMFMA4x4WritesVGPRFullSrcCWaitStates = 4; - const int GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates = 2; + const int GFX942_SMFMA4x4WritesVGPRFullSrcCWaitStates = 2; const int MaxWaitStates = 19; if (!Use.isReg()) @@ -2375,7 +2375,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { unsigned Opc1 = MI1->getOpcode(); int NeedWaitStates = 0; if (OpNo == SrcCIdx) { - if (!isDGEMM(Opc) && (!ST.hasGFX940Insts() && isDGEMM(Opc1))) { + if (!isDGEMM(Opc) && (!ST.hasGFX942Insts() && isDGEMM(Opc1))) { NeedWaitStates = 0; } else if (FullReg) { if ((Opc == AMDGPU::V_MFMA_F64_4X4X4F64_e64 || @@ -2383,9 +2383,9 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { (Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_e64 || Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64)) NeedWaitStates = DMFMA4x4WritesVGPRFullSrcCWaitStates; - else if (ST.hasGFX940Insts() && + else if (ST.hasGFX942Insts() && TSchedModel.computeInstrLatency(MI1) == 2) - NeedWaitStates = GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates; + NeedWaitStates = GFX942_SMFMA4x4WritesVGPRFullSrcCWaitStates; } else { switch (Opc1) { case AMDGPU::V_MFMA_F64_16X16X4F64_e64: @@ -2405,18 +2405,18 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { break; default: int NumPasses = TSchedModel.computeInstrLatency(MI1); - if (ST.hasGFX940Insts()) { + if (ST.hasGFX942Insts()) { if (isXDL(ST, *MI) && !isXDL(ST, *MI1)) break; NeedWaitStates = isXDL(ST, *MI1) ? (isXDL(ST, *MI) - ? GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates( + ? GFX942_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates( NumPasses, ST.hasGFX950Insts()) - : GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates( + : GFX942_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates( NumPasses, ST.hasGFX950Insts())) - : GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates( + : GFX942_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates( NumPasses); break; } @@ -2462,12 +2462,12 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { default: int NumPasses = TSchedModel.computeInstrLatency(MI1); - if (ST.hasGFX940Insts()) { + if (ST.hasGFX942Insts()) { NeedWaitStates = isXDL(ST, *MI1) - ? GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates( + ? GFX942_XDL_N_PassWritesVGPROverlappedSrcABWaitStates( NumPasses, ST.hasGFX950Insts()) - : GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates( + : GFX942_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates( NumPasses); break; } @@ -2590,7 +2590,7 @@ int GCNHazardRecognizer::checkPermlaneHazards(MachineInstr *MI) { return WaitStatesNeeded; } -static int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) { +static int GFX942_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) { // 2 pass -> 4 // 4 pass -> 6 // 8 pass -> 10 @@ -2598,9 +2598,9 @@ static int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) { return NumPasses + 2; } -static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses, +static int GFX942_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses, bool IsGFX950) { - // xdl def cycles | gfx940 | gfx950 + // xdl def cycles | gfx942 | gfx950 // 2 pass | 5 5 // 4 pass | 7 8 // 8 pass | 11 12 @@ -2608,9 +2608,9 @@ static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses, return NumPasses + 3 + (NumPasses != 2 && IsGFX950); } -static int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses, +static int GFX942_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses, bool IsGFX950) { - // xdl def cycles | gfx940 | gfx950 + // xdl def cycles | gfx942 | gfx950 // 2 pass | 5 5 // 4 pass | 7 8 // 8 pass | 11 12 @@ -2618,7 +2618,7 @@ static int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses, return NumPasses + 3 + (NumPasses != 2 && IsGFX950); } -static int GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses) { +static int GFX942_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses) { // 2 pass -> 4 // 4 pass -> 6 // 8 pass -> 10 @@ -2723,7 +2723,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { // is a DGEMM instruction in-between a VALU and a VMEM instruction it // causes the SQ to incorrectly not insert two wait states between the two // instructions needed to avoid data hazard. - if (IsMem && ST.hasGFX90AInsts() && !ST.hasGFX940Insts()) { + if (IsMem && ST.hasGFX90AInsts() && !ST.hasGFX942Insts()) { DGEMMAfterVALUWrite = false; if (TRI.isVectorRegister(MRI, Reg)) { int WaitStatesNeededForUse = @@ -2763,12 +2763,12 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { default: llvm_unreachable("unexpected dgemm"); } - } else if (ST.hasGFX940Insts()) { + } else if (ST.hasGFX942Insts()) { NeedWaitStates = isXDL(ST, *MFMA) - ? GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates( + ? GFX942_XDL_N_PassWriteVgprVALUMemExpReadWaitStates( NumPasses, ST.hasGFX950Insts()) - : GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates( + : GFX942_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates( NumPasses); } else { switch (HazardDefLatency) { @@ -2813,7 +2813,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { const int SMFMA16x16WriteVgprVALUWawWaitStates = 11; const int SMFMA32x32WriteVgprVALUWawWaitStates = 19; const int SMFMA4x4ReadVgprVALUWarWaitStates = 1; - const int GFX940_XDL4PassReadVgprVALUWarWaitStates = 3; + const int GFX942_XDL4PassReadVgprVALUWarWaitStates = 3; const int SMFMA16x16ReadVgprVALUWarWaitStates = 7; const int SMFMA32x32ReadVgprVALUWarWaitStates = 15; const int DMFMA4x4WriteVgprVALUWriteWaitStates = 6; @@ -2850,12 +2850,12 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { default: llvm_unreachable("unexpected number of cycles for dgemm"); } - } else if (ST.hasGFX940Insts()) { + } else if (ST.hasGFX942Insts()) { NeedWaitStates = isXDL(ST, *MFMA) - ? GFX940_XDL_N_PassWriteVgprVALUWawWaitStates( + ? GFX942_XDL_N_PassWriteVgprVALUWawWaitStates( NumPasses, ST.hasGFX950Insts()) - : GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(NumPasses); + : GFX942_SMFMA_N_PassWriteVgprVALUWawWaitStates(NumPasses); } else { switch (NumPasses) { case 2: @@ -2884,7 +2884,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { !MI.readsRegister(Reg, &TRI)) return false; - if (ST.hasGFX940Insts() && !isXDL(ST, MI)) + if (ST.hasGFX942Insts() && !isXDL(ST, MI)) return false; const MachineOperand *SrcC = @@ -2906,16 +2906,21 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { unsigned HazardDefLatency = TSchedModel.computeInstrLatency(MFMA); int NeedWaitStates = MaxWaitStates; switch (HazardDefLatency) { - case 2: NeedWaitStates = SMFMA4x4ReadVgprVALUWarWaitStates; - break; - case 4: assert(ST.hasGFX940Insts()); - NeedWaitStates = GFX940_XDL4PassReadVgprVALUWarWaitStates; - break; - case 8: NeedWaitStates = SMFMA16x16ReadVgprVALUWarWaitStates; - break; - case 16: [[fallthrough]]; - default: NeedWaitStates = SMFMA32x32ReadVgprVALUWarWaitStates; - break; + case 2: + NeedWaitStates = SMFMA4x4ReadVgprVALUWarWaitStates; + break; + case 4: + assert(ST.hasGFX942Insts()); + NeedWaitStates = GFX942_XDL4PassReadVgprVALUWarWaitStates; + break; + case 8: + NeedWaitStates = SMFMA16x16ReadVgprVALUWarWaitStates; + break; + case 16: + [[fallthrough]]; + default: + NeedWaitStates = SMFMA32x32ReadVgprVALUWarWaitStates; + break; } int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceUse; diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td index a86c76bb6075e..0b372e29efe67 100644 --- a/llvm/lib/Target/AMDGPU/GCNProcessors.td +++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td @@ -192,15 +192,7 @@ def : ProcessorModel<"gfx90c", SIQuarterSpeedModel, FeatureISAVersion9_0_C.Features >; -def : ProcessorModel<"gfx940", SIDPGFX940FullSpeedModel, - FeatureISAVersion9_4_0.Features ->; - -def : ProcessorModel<"gfx941", SIDPGFX940FullSpeedModel, - FeatureISAVersion9_4_1.Features ->; - -def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel, +def : ProcessorModel<"gfx942", SIDPGFX942FullSpeedModel, FeatureISAVersion9_4_2.Features >; @@ -213,8 +205,8 @@ def : ProcessorModel<"gfx9-generic", SIQuarterSpeedModel, FeatureISAVersion9_Generic.Features >; -// [gfx940, gfx941, gfx942] -def : ProcessorModel<"gfx9-4-generic", SIDPGFX940FullSpeedModel, +// [gfx942] +def : ProcessorModel<"gfx9-4-generic", SIDPGFX942FullSpeedModel, FeatureISAVersion9_4_Generic.Features >; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 342b211199dca..72f3d1abb82fe 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -107,7 +107,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool GFX8Insts = false; bool GFX9Insts = false; bool GFX90AInsts = false; - bool GFX940Insts = false; + bool GFX942Insts = false; bool GFX950Insts = false; bool GFX10Insts = false; bool GFX11Insts = false; @@ -246,7 +246,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasMADIntraFwdBug = false; bool HasVOPDInsts = false; bool HasVALUTransUseHazard = false; - bool HasForceStoreSC0SC1 = false; bool HasRequiredExportPriority = false; bool HasVmemWriteVgprInOrder = false; bool HasAshrPkInsts = false; @@ -654,10 +653,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, // The ST addressing mode means no registers are used, either VGPR or SGPR, // but only immediate offset is swizzled and added to the FLAT scratch base. bool hasFlatScratchSTMode() const { - return hasFlatScratchInsts() && (hasGFX10_3Insts() || hasGFX940Insts()); + return hasFlatScratchInsts() && (hasGFX10_3Insts() || hasGFX942Insts()); } - bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; } + bool hasFlatScratchSVSMode() const { return GFX942Insts || GFX11Insts; } bool hasScalarFlatScratchInsts() const { return ScalarFlatScratchInsts; @@ -676,9 +675,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return GFX10_BEncoding; } - bool hasExportInsts() const { - return !hasGFX940Insts(); - } + bool hasExportInsts() const { return !hasGFX942Insts(); } bool hasVINTERPEncoding() const { return GFX11Insts; @@ -1073,7 +1070,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, } bool hasFmaakFmamkF32Insts() const { - return getGeneration() >= GFX10 || hasGFX940Insts(); + return getGeneration() >= GFX10 || hasGFX942Insts(); } bool hasImageInsts() const { @@ -1130,9 +1127,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasMadF16() const; - bool hasMovB64() const { return GFX940Insts; } + bool hasMovB64() const { return GFX942Insts; } - bool hasLshlAddB64() const { return GFX940Insts; } + bool hasLshlAddB64() const { return GFX942Insts; } bool enableSIScheduler() const { return EnableSIScheduler; @@ -1216,25 +1213,21 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, // Shift amount of a 64 bit shift cannot be a highest allocated register // if also at the end of the allocation block. - bool hasShift64HighRegBug() const { - return GFX90AInsts && !GFX940Insts; - } + bool hasShift64HighRegBug() const { return GFX90AInsts && !GFX942Insts; } // Has one cycle hazard on transcendental instruction feeding a // non transcendental VALU. - bool hasTransForwardingHazard() const { return GFX940Insts; } + bool hasTransForwardingHazard() const { return GFX942Insts; } // Has one cycle hazard on a VALU instruction partially writing dst with // a shift of result bits feeding another VALU instruction. - bool hasDstSelForwardingHazard() const { return GFX940Insts; } + bool hasDstSelForwardingHazard() const { return GFX942Insts; } // Cannot use op_sel with v_dot instructions. - bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; } + bool hasDOTOpSelHazard() const { return GFX942Insts || GFX11Insts; } // Does not have HW interlocs for VALU writing and then reading SGPRs. - bool hasVDecCoExecHazard() const { - return GFX940Insts; - } + bool hasVDecCoExecHazard() const { return GFX942Insts; } bool hasNSAtoVMEMBug() const { return HasNSAtoVMEMBug; @@ -1264,8 +1257,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasCvtScaleForwardingHazard() const { return GFX950Insts; } - bool hasForceStoreSC0SC1() const { return HasForceStoreSC0SC1; } - bool requiresCodeObjectV6() const { return RequiresCOV6; } bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; } @@ -1297,12 +1288,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasPackedTID() const { return HasPackedTID; } - // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that + // GFX942 is a derivation to GFX90A. hasGFX942Insts() being true implies that // hasGFX90AInsts is also true. - bool hasGFX940Insts() const { return GFX940Insts; } + bool hasGFX942Insts() const { return GFX942Insts; } - // GFX950 is a derivation to GFX940. hasGFX950Insts() implies that - // hasGFX940Insts and hasGFX90AInsts are also true. + // GFX950 is a derivation to GFX942. hasGFX950Insts() implies that + // hasGFX942Insts and hasGFX90AInsts are also true. bool hasGFX950Insts() const { return GFX950Insts; } /// Returns true if the target supports diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index 381841f142855..5a72543c8eef1 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -151,15 +151,16 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo, } if (Imm & CPol::GLC) - O << ((AMDGPU::isGFX940(STI) && - !(MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SMRD)) ? " sc0" - : " glc"); + O << ((AMDGPU::isGFX942(STI) && + !(MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SMRD)) + ? " sc0" + : " glc"); if (Imm & CPol::SLC) - O << (AMDGPU::isGFX940(STI) ? " nt" : " slc"); + O << (AMDGPU::isGFX942(STI) ? " nt" : " slc"); if ((Imm & CPol::DLC) && AMDGPU::isGFX10Plus(STI)) O << " dlc"; if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI)) - O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc"); + O << (AMDGPU::isGFX942(STI) ? " sc1" : " scc"); if (Imm & ~CPol::ALL_pregfx12) O << " /* unexpected cache policy bit */"; } @@ -629,12 +630,12 @@ void AMDGPUInstPrinter::printBLGP(const MCInst *MI, unsigned OpNo, if (!Imm) return; - if (AMDGPU::isGFX940(STI)) { + if (AMDGPU::isGFX942(STI)) { switch (MI->getOpcode()) { - case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: - case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: - case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: - case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: + case AMDGPU::V_MFMA_F64_16X16X4F64_gfx942_acd: + case AMDGPU::V_MFMA_F64_16X16X4F64_gfx942_vcd: + case AMDGPU::V_MFMA_F64_4X4X4F64_gfx942_acd: + case AMDGPU::V_MFMA_F64_4X4X4F64_gfx942_vcd: O << " neg:[" << (Imm & 1) << ',' << ((Imm >> 1) & 1) << ',' << ((Imm >> 2) & 1) << ']'; return; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 059bab5838526..4a4ad712e304d 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -93,8 +93,6 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) { case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break; - case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break; - case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: AK = GK_GFX941; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950: AK = GK_GFX950; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break; @@ -180,8 +178,6 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) { case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909; case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A; case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C; - case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940; - case GK_GFX941: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX941; case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942; case GK_GFX950: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX950; case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010; diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index f812ae652b63d..3d5976183c10f 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -42,7 +42,7 @@ enum { GFX10 = 6, SDWA10 = 7, GFX90A = 8, - GFX940 = 9, + GFX942 = 9, GFX11 = 10, GFX12 = 11, }; @@ -542,7 +542,7 @@ enum Id { // HwRegCode, (6) [5:0] ID_EXCP_FLAG_USER = 18, ID_TRAP_CTRL = 19, - // GFX940 specific registers + // GFX942 specific registers ID_XCC_ID = 20, ID_SQ_PERF_SNAPSHOT_DATA = 21, ID_SQ_PERF_SNAPSHOT_DATA1 = 22, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 28debbcfc1ede..8e524ed61bb12 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16825,39 +16825,39 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { // safe. The message phrasing also should be better. if (globalMemoryFPAtomicIsLegal(*Subtarget, RMW, HasSystemScope)) { if (AS == AMDGPUAS::FLAT_ADDRESS) { - // gfx940, gfx12 + // gfx942, gfx12 if (Subtarget->hasAtomicFlatPkAdd16Insts() && isV2F16OrV2BF16(Ty)) return ReportUnsafeHWInst(AtomicExpansionKind::None); } else if (AMDGPU::isExtendedGlobalAddrSpace(AS)) { - // gfx90a, gfx940, gfx12 + // gfx90a, gfx942, gfx12 if (Subtarget->hasAtomicBufferGlobalPkAddF16Insts() && isV2F16(Ty)) return ReportUnsafeHWInst(AtomicExpansionKind::None); - // gfx940, gfx12 + // gfx942, gfx12 if (Subtarget->hasAtomicGlobalPkAddBF16Inst() && isV2BF16(Ty)) return ReportUnsafeHWInst(AtomicExpansionKind::None); } else if (AS == AMDGPUAS::BUFFER_FAT_POINTER) { - // gfx90a, gfx940, gfx12 + // gfx90a, gfx942, gfx12 if (Subtarget->hasAtomicBufferGlobalPkAddF16Insts() && isV2F16(Ty)) return ReportUnsafeHWInst(AtomicExpansionKind::None); - // While gfx90a/gfx940 supports v2bf16 for global/flat, it does not for + // While gfx90a/gfx942 supports v2bf16 for global/flat, it does not for // buffer. gfx12 does have the buffer version. if (Subtarget->hasAtomicBufferPkAddBF16Inst() && isV2BF16(Ty)) return ReportUnsafeHWInst(AtomicExpansionKind::None); } - // global and flat atomic fadd f64: gfx90a, gfx940. + // global and flat atomic fadd f64: gfx90a, gfx942. if (Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst() && Ty->isDoubleTy()) return ReportUnsafeHWInst(AtomicExpansionKind::None); if (AS != AMDGPUAS::FLAT_ADDRESS) { if (Ty->isFloatTy()) { - // global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx940, + // global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx942, // gfx11+. if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts()) return ReportUnsafeHWInst(AtomicExpansionKind::None); - // global/buffer atomic fadd f32 rtn: gfx90a, gfx940, gfx11+. + // global/buffer atomic fadd f32 rtn: gfx90a, gfx942, gfx11+. if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts()) return ReportUnsafeHWInst(AtomicExpansionKind::None); } else { @@ -16869,7 +16869,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { } } - // flat atomic fadd f32: gfx940, gfx11+. + // flat atomic fadd f32: gfx942, gfx11+. if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) { if (Subtarget->hasFlatAtomicFaddF32Inst()) return ReportUnsafeHWInst(AtomicExpansionKind::None); @@ -16908,7 +16908,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { // float, double restored in gfx10. // double removed again in gfx11, so only f32 for gfx11/gfx12. // - // For gfx9, gfx90a and gfx940 support f64 for global (same as fadd), but + // For gfx9, gfx90a and gfx942 support f64 for global (same as fadd), but // no f32. if (AS == AMDGPUAS::FLAT_ADDRESS) { if (Subtarget->hasAtomicFMinFMaxF32FlatInsts() && Ty->isFloatTy()) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index baacb5d3d5455..4ef1c4dae0b35 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -9462,8 +9462,8 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const { if (ST.hasGFX90AInsts()) { uint16_t NMCOp = (uint16_t)-1; - if (ST.hasGFX940Insts()) - NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX940); + if (ST.hasGFX942Insts()) + NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX942); if (NMCOp == (uint16_t)-1) NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX90A); if (NMCOp == (uint16_t)-1) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index bb78e77a9dc1a..958fe01c52805 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -28,7 +28,7 @@ def SIEncodingFamily { int GFX10 = 6; int SDWA10 = 7; int GFX90A = 8; - int GFX940 = 9; + int GFX942 = 9; int GFX11 = 10; int GFX12 = 11; } @@ -3106,7 +3106,7 @@ def getMCOpcodeGen : InstrMapping { [!cast(SIEncodingFamily.GFX10)], [!cast(SIEncodingFamily.SDWA10)], [!cast(SIEncodingFamily.GFX90A)], - [!cast(SIEncodingFamily.GFX940)], + [!cast(SIEncodingFamily.GFX942)], [!cast(SIEncodingFamily.GFX11)], [!cast(SIEncodingFamily.GFX12)]]; } diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index be6cff873532b..9a41afcd56f44 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -359,11 +359,6 @@ class SICacheControl { /// Virtual destructor to allow derivations to be deleted. virtual ~SICacheControl() = default; - - virtual bool tryForceStoreSC0SC1(const SIMemOpInfo &MOI, - MachineBasicBlock::iterator &MI) const { - return false; - } }; class SIGfx6CacheControl : public SICacheControl { @@ -470,7 +465,7 @@ class SIGfx90ACacheControl : public SIGfx7CacheControl { Position Pos) const override; }; -class SIGfx940CacheControl : public SIGfx90ACacheControl { +class SIGfx942CacheControl : public SIGfx90ACacheControl { protected: /// Sets SC0 bit to "true" if present in \p MI. Returns true if \p MI @@ -492,8 +487,7 @@ class SIGfx940CacheControl : public SIGfx90ACacheControl { } public: - - SIGfx940CacheControl(const GCNSubtarget &ST) : SIGfx90ACacheControl(ST) {}; + SIGfx942CacheControl(const GCNSubtarget &ST) : SIGfx90ACacheControl(ST) {}; bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, @@ -518,20 +512,6 @@ class SIGfx940CacheControl : public SIGfx90ACacheControl { bool insertRelease(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace, bool IsCrossAddrSpaceOrdering, Position Pos) const override; - - bool tryForceStoreSC0SC1(const SIMemOpInfo &MOI, - MachineBasicBlock::iterator &MI) const override { - bool Changed = false; - if (ST.hasForceStoreSC0SC1() && - (MOI.getInstrAddrSpace() & (SIAtomicAddrSpace::SCRATCH | - SIAtomicAddrSpace::GLOBAL | - SIAtomicAddrSpace::OTHER)) != - SIAtomicAddrSpace::NONE) { - Changed |= enableSC0Bit(MI); - Changed |= enableSC1Bit(MI); - } - return Changed; - } }; class SIGfx10CacheControl : public SIGfx7CacheControl { @@ -958,8 +938,8 @@ bool SICacheControl::enableNamedBit(const MachineBasicBlock::iterator MI, /* static */ std::unique_ptr SICacheControl::create(const GCNSubtarget &ST) { GCNSubtarget::Generation Generation = ST.getGeneration(); - if (ST.hasGFX940Insts()) - return std::make_unique(ST); + if (ST.hasGFX942Insts()) + return std::make_unique(ST); if (ST.hasGFX90AInsts()) return std::make_unique(ST); if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS) @@ -1577,7 +1557,7 @@ bool SIGfx90ACacheControl::insertRelease(MachineBasicBlock::iterator &MI, return Changed; } -bool SIGfx940CacheControl::enableLoadCacheBypass( +bool SIGfx942CacheControl::enableLoadCacheBypass( const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const { assert(MI->mayLoad() && !MI->mayStore()); @@ -1621,9 +1601,9 @@ bool SIGfx940CacheControl::enableLoadCacheBypass( return Changed; } -bool SIGfx940CacheControl::enableStoreCacheBypass( - const MachineBasicBlock::iterator &MI, - SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const { +bool SIGfx942CacheControl::enableStoreCacheBypass( + const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, + SIAtomicAddrSpace AddrSpace) const { assert(!MI->mayLoad() && MI->mayStore()); bool Changed = false; @@ -1661,7 +1641,7 @@ bool SIGfx940CacheControl::enableStoreCacheBypass( return Changed; } -bool SIGfx940CacheControl::enableRMWCacheBypass( +bool SIGfx942CacheControl::enableRMWCacheBypass( const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const { assert(MI->mayLoad() && MI->mayStore()); @@ -1690,7 +1670,7 @@ bool SIGfx940CacheControl::enableRMWCacheBypass( return Changed; } -bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal( +bool SIGfx942CacheControl::enableVolatileAndOrNonTemporal( MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op, bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false) const { // Only handle load and store, not atomic read-modify-write insructions. The @@ -1730,7 +1710,7 @@ bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal( return Changed; } -bool SIGfx940CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, +bool SIGfx942CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace, Position Pos) const { @@ -1816,7 +1796,7 @@ bool SIGfx940CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, return Changed; } -bool SIGfx940CacheControl::insertRelease(MachineBasicBlock::iterator &MI, +bool SIGfx942CacheControl::insertRelease(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace, bool IsCrossAddrSpaceOrdering, @@ -2821,7 +2801,6 @@ bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) { Changed |= expandLoad(*MOI, MI); else if (const auto &MOI = MOA.getStoreInfo(MI)) { Changed |= expandStore(*MOI, MI); - Changed |= CC->tryForceStoreSC0SC1(*MOI, MI); } else if (const auto &MOI = MOA.getAtomicFenceInfo(MI)) Changed |= expandAtomicFence(*MOI, MI); else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI)) diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td index 117add324db56..2a374b360b04a 100644 --- a/llvm/lib/Target/AMDGPU/SISchedule.td +++ b/llvm/lib/Target/AMDGPU/SISchedule.td @@ -94,7 +94,7 @@ class SISchedMachineModel : SchedMachineModel { def SIFullSpeedModel : SISchedMachineModel; def SIQuarterSpeedModel : SISchedMachineModel; def SIDPFullSpeedModel : SISchedMachineModel; -def SIDPGFX940FullSpeedModel : SISchedMachineModel; +def SIDPGFX942FullSpeedModel : SISchedMachineModel; def SIDPGFX950FullSpeedModel : SISchedMachineModel; def GFX10SpeedModel : SISchedMachineModel; def GFX11SpeedModel : SISchedMachineModel; @@ -276,7 +276,7 @@ def : InstRW<[Write8PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_16X16X")>; } // End SchedModel = SIDPFullSpeedModel -let SchedModel = SIDPGFX940FullSpeedModel in { +let SchedModel = SIDPGFX942FullSpeedModel in { defm : SICommonWriteRes; @@ -308,7 +308,7 @@ def : InstRW<[Write8PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_16X16X")>; def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_16X16X")>; def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_32X32X")>; -} // End SchedModel = SIDPGFX940FullSpeedModel +} // End SchedModel = SIDPGFX942FullSpeedModel let SchedModel = SIDPGFX950FullSpeedModel in { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp index a8e4ce133ffbc..373c6be33e8a8 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp @@ -216,12 +216,12 @@ static constexpr CustomOperand Operands[] = { {{"HW_REG_SCRATCH_BASE_HI"}, ID_FLAT_SCR_HI, isGFX12Plus}, {{"HW_REG_SHADER_CYCLES_LO"}, ID_SHADER_CYCLES, isGFX12Plus}, - // GFX940 specific registers - {{"HW_REG_XCC_ID"}, ID_XCC_ID, isGFX940}, - {{"HW_REG_SQ_PERF_SNAPSHOT_DATA"}, ID_SQ_PERF_SNAPSHOT_DATA, isGFX940}, - {{"HW_REG_SQ_PERF_SNAPSHOT_DATA1"}, ID_SQ_PERF_SNAPSHOT_DATA1, isGFX940}, - {{"HW_REG_SQ_PERF_SNAPSHOT_PC_LO"}, ID_SQ_PERF_SNAPSHOT_PC_LO, isGFX940}, - {{"HW_REG_SQ_PERF_SNAPSHOT_PC_HI"}, ID_SQ_PERF_SNAPSHOT_PC_HI, isGFX940}, + // GFX942 specific registers + {{"HW_REG_XCC_ID"}, ID_XCC_ID, isGFX942}, + {{"HW_REG_SQ_PERF_SNAPSHOT_DATA"}, ID_SQ_PERF_SNAPSHOT_DATA, isGFX942}, + {{"HW_REG_SQ_PERF_SNAPSHOT_DATA1"}, ID_SQ_PERF_SNAPSHOT_DATA1, isGFX942}, + {{"HW_REG_SQ_PERF_SNAPSHOT_PC_LO"}, ID_SQ_PERF_SNAPSHOT_PC_LO, isGFX942}, + {{"HW_REG_SQ_PERF_SNAPSHOT_PC_HI"}, ID_SQ_PERF_SNAPSHOT_PC_HI, isGFX942}, // Aliases {{"HW_REG_HW_ID"}, ID_HW_ID1, isGFX10}, diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 59afcbed35294..97d4210767a11 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -542,9 +542,9 @@ bool getMAIIsDGEMM(unsigned Opc) { return Info ? Info->is_dgemm : false; } -bool getMAIIsGFX940XDL(unsigned Opc) { +bool getMAIIsGFX942XDL(unsigned Opc) { const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); - return Info ? Info->is_gfx940_xdl : false; + return Info ? Info->is_gfx942_xdl : false; } uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal) { @@ -2283,8 +2283,8 @@ bool isGFX90A(const MCSubtargetInfo &STI) { return STI.hasFeature(AMDGPU::FeatureGFX90AInsts); } -bool isGFX940(const MCSubtargetInfo &STI) { - return STI.hasFeature(AMDGPU::FeatureGFX940Insts); +bool isGFX942(const MCSubtargetInfo &STI) { + return STI.hasFeature(AMDGPU::FeatureGFX942Insts); } bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index e458b6b9604b6..8acb802ffe311 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -99,7 +99,7 @@ struct GcnBufferFormatInfo { struct MAIInstInfo { uint16_t Opcode; bool is_dgemm; - bool is_gfx940_xdl; + bool is_gfx942_xdl; }; struct MFMA_F8F6F4_Info { @@ -584,7 +584,7 @@ LLVM_READONLY bool getMAIIsDGEMM(unsigned Opc); LLVM_READONLY -bool getMAIIsGFX940XDL(unsigned Opc); +bool getMAIIsGFX942XDL(unsigned Opc); struct CanBeVOPD { bool X; @@ -1363,7 +1363,7 @@ bool isGFX10_BEncoding(const MCSubtargetInfo &STI); bool hasGFX10_3Insts(const MCSubtargetInfo &STI); bool isGFX10_3_GFX11(const MCSubtargetInfo &STI); bool isGFX90A(const MCSubtargetInfo &STI); -bool isGFX940(const MCSubtargetInfo &STI); +bool isGFX942(const MCSubtargetInfo &STI); bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI); bool hasMAIInsts(const MCSubtargetInfo &STI); bool hasVOPD(const MCSubtargetInfo &STI); diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index a407ae797a48b..1a3a7ec52c3b2 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -238,7 +238,7 @@ def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> { let isReMaterializable = 1, isAsCheapAsAMove = 1 in { defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>; -let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in +let SubtargetPredicate = isGFX942Plus, SchedRW = [Write64Bit] in defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>; } // End isMoveImm = 1 @@ -1558,7 +1558,7 @@ multiclass VOP1_OpSel_Real_e32e64_gfx9 op> { defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; -let AssemblerPredicate = isGFX940Plus in +let AssemblerPredicate = isGFX942Plus in defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>; defm V_CVT_F32_BF16 : VOP1_Real_gfx9 <0x5b>; diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 900c91731aa1b..95ff1165a4ce3 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -2367,8 +2367,8 @@ multiclass VOP2_Real_MADK_vi op> { VOP2_MADKe(NAME).Pfl>; } -multiclass VOP2_Real_MADK_gfx940 op> { - def _gfx940 : VOP2_Real(NAME), SIEncodingFamily.GFX940>, +multiclass VOP2_Real_MADK_gfx942 op> { + def _gfx942 : VOP2_Real(NAME), SIEncodingFamily.GFX942>, VOP2_MADKe(NAME).Pfl> { let DecoderNamespace = "GFX9"; } @@ -2668,8 +2668,8 @@ let IsSingle = 1 in { } let SubtargetPredicate = HasFmaakFmamkF32Insts in { -defm V_FMAMK_F32 : VOP2_Real_MADK_gfx940 <0x17>; -defm V_FMAAK_F32 : VOP2_Real_MADK_gfx940 <0x18>; +defm V_FMAMK_F32 : VOP2_Real_MADK_gfx942 <0x17>; +defm V_FMAAK_F32 : VOP2_Real_MADK_gfx942 <0x18>; } multiclass VOP2_Real_DOT_ACC_gfx9 op> : Base_VOP2_Real_e32e64_vi { diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index afafc2ecccfaf..3824383aeace0 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -687,7 +687,7 @@ defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile>; let OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0, @@ -705,7 +705,7 @@ let OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0, // These instructions have non-standard use of op_sel. In particular they are // using op_sel bits 2 and 3 while only having two sources. Therefore dummy // src2 is used to hold the op_sel value. - let Constraints = "$vdst = $src2", DisableEncoding = "$src2", SubtargetPredicate = isGFX940Plus in { + let Constraints = "$vdst = $src2", DisableEncoding = "$src2", SubtargetPredicate = isGFX942Plus in { defm V_CVT_SR_FP8_F32 : VOP3Inst<"v_cvt_sr_fp8_f32", VOP3_CVT_SR_F8_F32_Profile>; defm V_CVT_SR_BF8_F32 : VOP3Inst<"v_cvt_sr_bf8_f32", VOP3_CVT_SR_F8_F32_Profile>; } @@ -734,7 +734,7 @@ foreach Index = [0, -1] in { def : Cvt_PK_F8_F32_Pat; } -let SubtargetPredicate = isGFX940Plus in { +let SubtargetPredicate = isGFX942Plus in { foreach Index = [0, 1, 2, 3] in { def : Cvt_SR_F8_F32_Pat; def : Cvt_SR_F8_F32_Pat; @@ -766,7 +766,7 @@ def : GCNPat< (DivergentBinFrag i32:$src0, IsPow2Plus1:$src1), (V_LSHL_ADD_U32_e64 i32:$src0, (i32 (Log2_32 imm:$src1)), i32:$src0)>; -let SubtargetPredicate = isGFX940Plus in +let SubtargetPredicate = isGFX942Plus in def : GCNPat< (ThreeOpFrag i64:$src0, i32:$src1, i64:$src2), (V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2) diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 5e825e7259a95..b36002bc3d44c 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -883,7 +883,7 @@ class MAIInst(NAME); bit is_dgemm = 0; - bit is_gfx940_xdl = 0; + bit is_gfx942_xdl = 0; let PseudoInstr = NAME; // FIXME: Why is this not the default } @@ -1008,7 +1008,7 @@ defm V_MFMA_F32_16X16X4F32 : MAIInst<"v_mfma_f32_16x16x4f32", "F32_F32_X4", defm V_MFMA_F32_32X32X1F32 : MAIInst<"v_mfma_f32_32x32x1f32", "F32_F32_X32", int_amdgcn_mfma_f32_32x32x1f32>; defm V_MFMA_F32_32X32X2F32 : MAIInst<"v_mfma_f32_32x32x2f32", "F32_F32_X16", int_amdgcn_mfma_f32_32x32x2f32>; -let is_gfx940_xdl = 1 in { +let is_gfx942_xdl = 1 in { defm V_MFMA_F32_4X4X4F16 : MAIInst<"v_mfma_f32_4x4x4f16", "F32_V4F16_X4", int_amdgcn_mfma_f32_4x4x4f16>; defm V_MFMA_I32_4X4X4I8 : MAIInst<"v_mfma_i32_4x4x4i8", "I32_I32_X4", int_amdgcn_mfma_i32_4x4x4i8>; defm V_MFMA_F32_16X16X4F16 : MAIInst<"v_mfma_f32_16x16x4f16", "F32_V4F16_X16", int_amdgcn_mfma_f32_16x16x4f16>; @@ -1029,7 +1029,7 @@ defm V_MFMA_F32_32X32X2BF16 : MAIInst<"v_mfma_f32_32x32x2bf16", "F32_V2I16_X32", defm V_MFMA_F32_32X32X4BF16 : MAIInst<"v_mfma_f32_32x32x4bf16", "F32_V2I16_X16", int_amdgcn_mfma_f32_32x32x4bf16>; } -let SubtargetPredicate = HasGFX950Insts, is_gfx940_xdl = 1 in { +let SubtargetPredicate = HasGFX950Insts, is_gfx942_xdl = 1 in { defm V_MFMA_F32_16X16X32_F16 : MAIInst<"v_mfma_f32_16x16x32f16", "F32_V8F16_X32", int_amdgcn_mfma_f32_16x16x32_f16>; defm V_MFMA_F32_32X32X16_F16 : MAIInst<"v_mfma_f32_32x32x16f16", "F32_V8F16_X16", int_amdgcn_mfma_f32_32x32x16_f16>; defm V_MFMA_F32_16X16X32_BF16 : MAIInst<"v_mfma_f32_16x16x32bf16", "F32_V8BF16_X4", int_amdgcn_mfma_f32_16x16x32_bf16>; @@ -1057,7 +1057,7 @@ defm V_MFMA_LD_SCALE_B32 : VOP3PInst<"v_mfma_ld_scale_b32", VOP_MFMA_LD_SCALE>; } let SubtargetPredicate = isGFX90APlus in { - let is_gfx940_xdl = 1 in { + let is_gfx942_xdl = 1 in { defm V_MFMA_F32_32X32X4BF16_1K : MAIInst<"v_mfma_f32_32x32x4bf16_1k", "F32_V4I16_X32", int_amdgcn_mfma_f32_32x32x4bf16_1k>; defm V_MFMA_F32_16X16X4BF16_1K : MAIInst<"v_mfma_f32_16x16x4bf16_1k", "F32_V4I16_X16", int_amdgcn_mfma_f32_16x16x4bf16_1k>; defm V_MFMA_F32_4X4X4BF16_1K : MAIInst<"v_mfma_f32_4x4x4bf16_1k", "F32_V4I16_X4", int_amdgcn_mfma_f32_4x4x4bf16_1k>; @@ -1071,17 +1071,17 @@ let SubtargetPredicate = isGFX90APlus in { } } // End SubtargetPredicate = isGFX90APlus -let SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1 in { +let SubtargetPredicate = isGFX942Plus, is_gfx942_xdl = 1 in { defm V_MFMA_I32_32X32X16I8 : MAIInst<"v_mfma_i32_32x32x16i8", "I32_I64_X32", int_amdgcn_mfma_i32_32x32x16_i8>; defm V_MFMA_I32_16X16X32I8 : MAIInst<"v_mfma_i32_16x16x32i8", "I32_I64_X16", int_amdgcn_mfma_i32_16x16x32_i8>; -} // End SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1 +} // End SubtargetPredicate = isGFX942Plus, is_gfx942_xdl = 1 -let SubtargetPredicate = HasXF32Insts, is_gfx940_xdl = 1 in { +let SubtargetPredicate = HasXF32Insts, is_gfx942_xdl = 1 in { defm V_MFMA_F32_16X16X8XF32 : MAIInst<"v_mfma_f32_16x16x8xf32", "F32_V2F32_X16", int_amdgcn_mfma_f32_16x16x8_xf32>; defm V_MFMA_F32_32X32X4XF32 : MAIInst<"v_mfma_f32_32x32x4xf32", "F32_V2F32_X32", int_amdgcn_mfma_f32_32x32x4_xf32>; -} // End SubtargetPredicate = HasXF32Insts, is_gfx940_xdl = 1 +} // End SubtargetPredicate = HasXF32Insts, is_gfx942_xdl = 1 -let SubtargetPredicate = HasFP8Insts, is_gfx940_xdl = 1 in { +let SubtargetPredicate = HasFP8Insts, is_gfx942_xdl = 1 in { defm V_MFMA_F32_16X16X32_BF8_BF8 : MAIInst<"v_mfma_f32_16x16x32_bf8_bf8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_bf8_bf8>; defm V_MFMA_F32_16X16X32_BF8_FP8 : MAIInst<"v_mfma_f32_16x16x32_bf8_fp8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_bf8_fp8>; defm V_MFMA_F32_16X16X32_FP8_BF8 : MAIInst<"v_mfma_f32_16x16x32_fp8_bf8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_fp8_bf8>; @@ -1090,16 +1090,16 @@ let SubtargetPredicate = HasFP8Insts, is_gfx940_xdl = 1 in { defm V_MFMA_F32_32X32X16_BF8_FP8 : MAIInst<"v_mfma_f32_32x32x16_bf8_fp8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_bf8_fp8>; defm V_MFMA_F32_32X32X16_FP8_BF8 : MAIInst<"v_mfma_f32_32x32x16_fp8_bf8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_fp8_bf8>; defm V_MFMA_F32_32X32X16_FP8_FP8 : MAIInst<"v_mfma_f32_32x32x16_fp8_fp8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_fp8_fp8>; -} // End SubtargetPredicate = HasFP8Insts, is_gfx940_xdl = 1 +} // End SubtargetPredicate = HasFP8Insts, is_gfx942_xdl = 1 multiclass SMFMACInst { let Constraints = "$vdst = $src2", DisableEncoding = "$src2", - isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1, is_gfx940_xdl = 1 in { + isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1, is_gfx942_xdl = 1 in { def _e64 : MAIInst("VOPProfileSMFMAC_" # P), node>; } } -let SubtargetPredicate = isGFX940Plus in { +let SubtargetPredicate = isGFX942Plus in { defm V_SMFMAC_F32_16X16X32_F16 : SMFMACInst<"v_smfmac_f32_16x16x32_f16", "F32_16X16X32_F16", int_amdgcn_smfmac_f32_16x16x32_f16>; defm V_SMFMAC_F32_32X32X16_F16 : SMFMACInst<"v_smfmac_f32_32x32x16_f16", "F32_32X32X16_F16", int_amdgcn_smfmac_f32_32x32x16_f16>; defm V_SMFMAC_F32_16X16X32_BF16 : SMFMACInst<"v_smfmac_f32_16x16x32_bf16", "F32_16X16X32_I16", int_amdgcn_smfmac_f32_16x16x32_bf16>; @@ -1108,7 +1108,7 @@ defm V_SMFMAC_I32_16X16X64_I8 : SMFMACInst<"v_smfmac_i32_16x16x64_i8", defm V_SMFMAC_I32_32X32X32_I8 : SMFMACInst<"v_smfmac_i32_32x32x32_i8", "I32_32X32X32_I8", int_amdgcn_smfmac_i32_32x32x32_i8>; } -let SubtargetPredicate = HasFP8Insts, is_gfx940_xdl = 1 in { +let SubtargetPredicate = HasFP8Insts, is_gfx942_xdl = 1 in { defm V_SMFMAC_F32_16X16X64_BF8_BF8 : SMFMACInst<"v_smfmac_f32_16x16x64_bf8_bf8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_bf8_bf8>; defm V_SMFMAC_F32_16X16X64_BF8_FP8 : SMFMACInst<"v_smfmac_f32_16x16x64_bf8_fp8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_bf8_fp8>; defm V_SMFMAC_F32_16X16X64_FP8_BF8 : SMFMACInst<"v_smfmac_f32_16x16x64_fp8_bf8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_fp8_bf8>; @@ -1117,7 +1117,7 @@ defm V_SMFMAC_F32_32X32X32_BF8_BF8 : SMFMACInst<"v_smfmac_f32_32x32x32_bf8_bf8", defm V_SMFMAC_F32_32X32X32_BF8_FP8 : SMFMACInst<"v_smfmac_f32_32x32x32_bf8_fp8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_bf8_fp8>; defm V_SMFMAC_F32_32X32X32_FP8_BF8 : SMFMACInst<"v_smfmac_f32_32x32x32_fp8_bf8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_fp8_bf8>; defm V_SMFMAC_F32_32X32X32_FP8_FP8 : SMFMACInst<"v_smfmac_f32_32x32x32_fp8_fp8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_fp8_fp8>; -} // End SubtargetPredicate = HasFP8Insts, is_gfx940_xdl = 1 +} // End SubtargetPredicate = HasFP8Insts, is_gfx942_xdl = 1 let SubtargetPredicate = HasGFX950Insts in { defm V_SMFMAC_F32_16X16X64_F16 : SMFMACInst<"v_smfmac_f32_16x16x64_f16", "F32_16X16X64_F16", int_amdgcn_smfmac_f32_16x16x64_f16>; @@ -1140,7 +1140,7 @@ def MAIInstInfoTable : GenericTable { let FilterClass = "MAIInst"; let CppTypeName = "MAIInstInfo"; let Fields = [ - "Opcode", "is_dgemm", "is_gfx940_xdl" + "Opcode", "is_dgemm", "is_gfx942_xdl" ]; let PrimaryKey = ["Opcode"]; @@ -1950,7 +1950,7 @@ multiclass VOP3P_Real_MFMA_gfx90a op> { } } -multiclass VOP3P_Real_MFMA_gfx940_aliases(Op # "_e64"), VOP3_Pseudo PS_VCD = !cast(Op # "_vgprcd" # "_e64"), VOPProfile Pfl_ACD = PS_ACD.Pfl, @@ -1959,59 +1959,59 @@ multiclass VOP3P_Real_MFMA_gfx940_aliases(Op # "_gfx940_acd") Pfl_ACD.DstRC:$vdst, + (!cast(Op # "_gfx942_acd") Pfl_ACD.DstRC:$vdst, Pfl_ACD.Src0RC64:$src0, Pfl_ACD.Src1RC64:$src1, Pfl_ACD.Src2RC64:$src2, CBSZ:$cbsz, ABID:$abid, blgp:$blgp)>, PredicateControl; def : InstAlias (Op # "_gfx940_vcd") Pfl_VCD.DstRC:$vdst, + (!cast(Op # "_gfx942_vcd") Pfl_VCD.DstRC:$vdst, Pfl_VCD.Src0RC64:$src0, Pfl_VCD.Src1RC64:$src1, Pfl_VCD.Src2RC64:$src2, CBSZ:$cbsz, ABID:$abid, blgp:$blgp)>, PredicateControl; } } } -multiclass VOP3P_Real_MFMA_gfx940 op, string Name = !cast(NAME#"_e64").Mnemonic, +multiclass VOP3P_Real_MFMA_gfx942 op, string Name = !cast(NAME#"_e64").Mnemonic, VOP3_Pseudo PS_ACD = !cast(NAME # "_e64"), VOP3_Pseudo PS_VCD = !cast(NAME # "_vgprcd" # "_e64")> { - let AssemblerPredicate = isGFX940Plus, - DecoderNamespace = "GFX940", + let AssemblerPredicate = isGFX942Plus, + DecoderNamespace = "GFX942", AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in { - def _gfx940_acd : VOP3P_Real, + def _gfx942_acd : VOP3P_Real, VOP3Pe_MAI ; - def _gfx940_vcd : VOP3P_Real, + def _gfx942_vcd : VOP3P_Real, VOP3Pe_MAI ; - } // End AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX940" + } // End AssemblerPredicate = isGFX942Plus, DecoderNamespace = "GFX942" let SubtargetPredicate = PS_ACD.SubtargetPredicate, OtherPredicates = PS_ACD.OtherPredicates, - AssemblerPredicate = isGFX940Plus + AssemblerPredicate = isGFX942Plus in { - defm : VOP3P_Real_MFMA_gfx940_aliases; + defm : VOP3P_Real_MFMA_gfx942_aliases; if !ne(!subst("_1k", "", PS_ACD.Mnemonic), PS_ACD.Mnemonic) then - defm : VOP3P_Real_MFMA_gfx940_aliases; + defm : VOP3P_Real_MFMA_gfx942_aliases; } } -multiclass VOP3P_Real_MFMA_F8F6F4_gfx940 op, string Name = !cast(NAME#"_e64").Mnemonic, +multiclass VOP3P_Real_MFMA_F8F6F4_gfx942 op, string Name = !cast(NAME#"_e64").Mnemonic, VOP3_Pseudo PS_ACD = !cast(NAME # "_e64"), VOP3_Pseudo PS_VCD = !cast(NAME # "_vgprcd" # "_e64")> { defvar F8F8Name = !substr(NAME, 0, !sub(!size(NAME), !size("_fN_fM")))#"_f8_f8"; - let AssemblerPredicate = isGFX940Plus, - DecoderNamespace = "GFX940", + let AssemblerPredicate = isGFX942Plus, + DecoderNamespace = "GFX942", AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in { - def _gfx940_acd : VOP3P_Real, + def _gfx942_acd : VOP3P_Real, VOP3Pe_MAI , - MFMA_F8F6F4_WithSizeTable_Helper; + MFMA_F8F6F4_WithSizeTable_Helper; - def _gfx940_vcd : VOP3P_Real, + def _gfx942_vcd : VOP3P_Real, VOP3Pe_MAI , - MFMA_F8F6F4_WithSizeTable_Helper; - } // End AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX940" + MFMA_F8F6F4_WithSizeTable_Helper; + } // End AssemblerPredicate = isGFX942Plus, DecoderNamespace = "GFX942" } multiclass VOP3P_Real_MFMA_gfx950 op, string Name = !cast(NAME#"_e64").Mnemonic, @@ -2019,23 +2019,23 @@ multiclass VOP3P_Real_MFMA_gfx950 op, string Name = !cast(N VOP3_Pseudo PS_VCD = !cast(NAME # "_vgprcd" # "_e64")> { let SubtargetPredicate = HasGFX950Insts, AssemblerPredicate = HasGFX950Insts in { - defm "" : VOP3P_Real_MFMA_gfx940; + defm "" : VOP3P_Real_MFMA_gfx942; } } multiclass VOP3P_Real_MFMA_F8F6F4_gfx950_mc op, string Name> { - defm _f8_f8 : VOP3P_Real_MFMA_F8F6F4_gfx940; + defm _f8_f8 : VOP3P_Real_MFMA_F8F6F4_gfx942; let isAsmParserOnly = true in { // Disable ambiguous disassembly. - defm _f8_f6 : VOP3P_Real_MFMA_F8F6F4_gfx940; - defm _f6_f8 : VOP3P_Real_MFMA_F8F6F4_gfx940; - defm _f8_f4 : VOP3P_Real_MFMA_F8F6F4_gfx940; - defm _f4_f8 : VOP3P_Real_MFMA_F8F6F4_gfx940; - defm _f6_f6 : VOP3P_Real_MFMA_F8F6F4_gfx940; - defm _f6_f4 : VOP3P_Real_MFMA_F8F6F4_gfx940; - defm _f4_f6 : VOP3P_Real_MFMA_F8F6F4_gfx940; - defm _f4_f4 : VOP3P_Real_MFMA_F8F6F4_gfx940; + defm _f8_f6 : VOP3P_Real_MFMA_F8F6F4_gfx942; + defm _f6_f8 : VOP3P_Real_MFMA_F8F6F4_gfx942; + defm _f8_f4 : VOP3P_Real_MFMA_F8F6F4_gfx942; + defm _f4_f8 : VOP3P_Real_MFMA_F8F6F4_gfx942; + defm _f6_f6 : VOP3P_Real_MFMA_F8F6F4_gfx942; + defm _f6_f4 : VOP3P_Real_MFMA_F8F6F4_gfx942; + defm _f4_f6 : VOP3P_Real_MFMA_F8F6F4_gfx942; + defm _f4_f4 : VOP3P_Real_MFMA_F8F6F4_gfx942; } } @@ -2046,15 +2046,15 @@ multiclass VOP3PX_Real_ScaledMFMA op> { defvar F8F8Name = !substr(NAME, 0, !sub(!size(NAME), !size("_fN_fM")))#"_f8_f8"; let SubtargetPredicate = HasGFX950Insts, - DecoderNamespace = "GFX940", + DecoderNamespace = "GFX942", AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in { - def _gfx940_acd : VOP3P_Real, + def _gfx942_acd : VOP3P_Real, VOP3PXe , - MFMA_F8F6F4_WithSizeTable_Helper; + MFMA_F8F6F4_WithSizeTable_Helper; - def _gfx940_vcd : VOP3P_Real, + def _gfx942_vcd : VOP3P_Real, VOP3PXe , - MFMA_F8F6F4_WithSizeTable_Helper; + MFMA_F8F6F4_WithSizeTable_Helper; } } @@ -2087,18 +2087,18 @@ multiclass VOP3P_Real_MFMA_vi_gfx90a op> : VOP3P_Real_MFMA_gfx90a , VOP3P_Real_MFMA_vi ; -multiclass VOP3P_Real_MFMA op, string GFX940Name = !cast(NAME#"_e64").Mnemonic> : +multiclass VOP3P_Real_MFMA op, string GFX942Name = !cast(NAME#"_e64").Mnemonic> : VOP3P_Real_MFMA_vi_gfx90a , - VOP3P_Real_MFMA_gfx940 ; + VOP3P_Real_MFMA_gfx942 ; multiclass VOP3P_Real_SMFMAC op, string alias> { - def _gfx940 : VOP3P_Real(NAME#"_e64"), SIEncodingFamily.VI>, + def _gfx942 : VOP3P_Real(NAME#"_e64"), SIEncodingFamily.VI>, VOP3Pe_SMFMAC { - let AssemblerPredicate = isGFX940Plus; + let AssemblerPredicate = isGFX942Plus; let DecoderNamespace = "GFX8"; } def : AMDGPUMnemonicAlias(NAME#"_e64").Mnemonic> { - let AssemblerPredicate = isGFX940Plus; + let AssemblerPredicate = isGFX942Plus; } } @@ -2196,28 +2196,28 @@ defm V_MFMA_F32_32X32X64_F8F6F4 : VOP3P_Real_MFMA_F8F6F4_gfx950_mc <0x2e, "v_mf defm V_MFMA_SCALE_F32_32X32X64_F8F6F4 : VOP3PX_Real_ScaledMFMA_F8F6F4_mc <0x2e>; defm V_DOT2_F32_BF16 : VOP3P_Real_vi<0x1a>; -defm V_MFMA_I32_32X32X16I8 : VOP3P_Real_MFMA_gfx940 <0x56, "v_mfma_i32_32x32x16_i8">; -defm V_MFMA_I32_16X16X32I8 : VOP3P_Real_MFMA_gfx940 <0x57, "v_mfma_i32_16x16x32_i8">; -defm V_MFMA_F32_16X16X8XF32 : VOP3P_Real_MFMA_gfx940 <0x3e, "v_mfma_f32_16x16x8_xf32">; -defm V_MFMA_F32_32X32X4XF32 : VOP3P_Real_MFMA_gfx940 <0x3f, "v_mfma_f32_32x32x4_xf32">; - -defm V_MFMA_F32_16X16X32_BF8_BF8 : VOP3P_Real_MFMA_gfx940 <0x70>; -defm V_MFMA_F32_16X16X32_BF8_FP8 : VOP3P_Real_MFMA_gfx940 <0x71>; -defm V_MFMA_F32_16X16X32_FP8_BF8 : VOP3P_Real_MFMA_gfx940 <0x72>; -defm V_MFMA_F32_16X16X32_FP8_FP8 : VOP3P_Real_MFMA_gfx940 <0x73>; -defm V_MFMA_F32_32X32X16_BF8_BF8 : VOP3P_Real_MFMA_gfx940 <0x74>; -defm V_MFMA_F32_32X32X16_BF8_FP8 : VOP3P_Real_MFMA_gfx940 <0x75>; -defm V_MFMA_F32_32X32X16_FP8_BF8 : VOP3P_Real_MFMA_gfx940 <0x76>; -defm V_MFMA_F32_32X32X16_FP8_FP8 : VOP3P_Real_MFMA_gfx940 <0x77>; - -defm V_MFMA_F32_32X32X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5d, "v_mfma_f32_32x32x4_2b_bf16">; -defm V_MFMA_F32_16X16X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5e, "v_mfma_f32_16x16x4_4b_bf16">; -defm V_MFMA_F32_4X4X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5f, "v_mfma_f32_4x4x4_16b_bf16">; -defm V_MFMA_F32_32X32X8BF16_1K : VOP3P_Real_MFMA_gfx940 <0x60, "v_mfma_f32_32x32x8_bf16">; -defm V_MFMA_F32_16X16X16BF16_1K : VOP3P_Real_MFMA_gfx940 <0x61, "v_mfma_f32_16x16x16_bf16">; - -defm V_MFMA_F64_16X16X4F64 : VOP3P_Real_MFMA_gfx940 <0x6e, "v_mfma_f64_16x16x4_f64">; -defm V_MFMA_F64_4X4X4F64 : VOP3P_Real_MFMA_gfx940 <0x6f, "v_mfma_f64_4x4x4_4b_f64">; +defm V_MFMA_I32_32X32X16I8 : VOP3P_Real_MFMA_gfx942 <0x56, "v_mfma_i32_32x32x16_i8">; +defm V_MFMA_I32_16X16X32I8 : VOP3P_Real_MFMA_gfx942 <0x57, "v_mfma_i32_16x16x32_i8">; +defm V_MFMA_F32_16X16X8XF32 : VOP3P_Real_MFMA_gfx942 <0x3e, "v_mfma_f32_16x16x8_xf32">; +defm V_MFMA_F32_32X32X4XF32 : VOP3P_Real_MFMA_gfx942 <0x3f, "v_mfma_f32_32x32x4_xf32">; + +defm V_MFMA_F32_16X16X32_BF8_BF8 : VOP3P_Real_MFMA_gfx942 <0x70>; +defm V_MFMA_F32_16X16X32_BF8_FP8 : VOP3P_Real_MFMA_gfx942 <0x71>; +defm V_MFMA_F32_16X16X32_FP8_BF8 : VOP3P_Real_MFMA_gfx942 <0x72>; +defm V_MFMA_F32_16X16X32_FP8_FP8 : VOP3P_Real_MFMA_gfx942 <0x73>; +defm V_MFMA_F32_32X32X16_BF8_BF8 : VOP3P_Real_MFMA_gfx942 <0x74>; +defm V_MFMA_F32_32X32X16_BF8_FP8 : VOP3P_Real_MFMA_gfx942 <0x75>; +defm V_MFMA_F32_32X32X16_FP8_BF8 : VOP3P_Real_MFMA_gfx942 <0x76>; +defm V_MFMA_F32_32X32X16_FP8_FP8 : VOP3P_Real_MFMA_gfx942 <0x77>; + +defm V_MFMA_F32_32X32X4BF16_1K : VOP3P_Real_MFMA_gfx942 <0x5d, "v_mfma_f32_32x32x4_2b_bf16">; +defm V_MFMA_F32_16X16X4BF16_1K : VOP3P_Real_MFMA_gfx942 <0x5e, "v_mfma_f32_16x16x4_4b_bf16">; +defm V_MFMA_F32_4X4X4BF16_1K : VOP3P_Real_MFMA_gfx942 <0x5f, "v_mfma_f32_4x4x4_16b_bf16">; +defm V_MFMA_F32_32X32X8BF16_1K : VOP3P_Real_MFMA_gfx942 <0x60, "v_mfma_f32_32x32x8_bf16">; +defm V_MFMA_F32_16X16X16BF16_1K : VOP3P_Real_MFMA_gfx942 <0x61, "v_mfma_f32_16x16x16_bf16">; + +defm V_MFMA_F64_16X16X4F64 : VOP3P_Real_MFMA_gfx942 <0x6e, "v_mfma_f64_16x16x4_f64">; +defm V_MFMA_F64_4X4X4F64 : VOP3P_Real_MFMA_gfx942 <0x6f, "v_mfma_f64_4x4x4_4b_f64">; defm V_SMFMAC_F32_16X16X32_F16 : VOP3P_Real_SMFMAC <0x62, "v_smfmac_f32_16x16x32f16">; defm V_SMFMAC_F32_32X32X16_F16 : VOP3P_Real_SMFMAC <0x64, "v_smfmac_f32_32x32x16f16">; diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index 0a605dfd017cb..13cb0d6461eb8 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -104,8 +104,6 @@ constexpr GPUInfo AMDGCNGPUs[] = { {{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, {{"gfx90a"}, {"gfx90a"}, GK_GFX90A, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, {{"gfx90c"}, {"gfx90c"}, GK_GFX90C, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, - {{"gfx940"}, {"gfx940"}, GK_GFX940, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, - {{"gfx941"}, {"gfx941"}, GK_GFX941, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, {{"gfx942"}, {"gfx942"}, GK_GFX942, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, {{"gfx950"}, {"gfx950"}, GK_GFX950, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP}, @@ -260,8 +258,6 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) { case GK_GFX909: return {9, 0, 9}; case GK_GFX90A: return {9, 0, 10}; case GK_GFX90C: return {9, 0, 12}; - case GK_GFX940: return {9, 4, 0}; - case GK_GFX941: return {9, 4, 1}; case GK_GFX942: return {9, 4, 2}; case GK_GFX950: return {9, 5, 0}; case GK_GFX1010: return {10, 1, 0}; @@ -362,7 +358,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["gfx8-insts"] = true; Features["gfx9-insts"] = true; Features["gfx90a-insts"] = true; - Features["gfx940-insts"] = true; + Features["gfx942-insts"] = true; Features["gfx950-insts"] = true; Features["gfx10-insts"] = true; Features["gfx10-3-insts"] = true; @@ -506,15 +502,13 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["gfx950-insts"] = true; [[fallthrough]]; case GK_GFX942: - case GK_GFX941: - case GK_GFX940: Features["fp8-insts"] = true; Features["fp8-conversion-insts"] = true; if (Kind != GK_GFX950) Features["xf32-insts"] = true; [[fallthrough]]; case GK_GFX9_4_GENERIC: - Features["gfx940-insts"] = true; + Features["gfx942-insts"] = true; Features["atomic-ds-pk-add-16-insts"] = true; Features["atomic-flat-pk-add-16-insts"] = true; Features["atomic-global-pk-add-bf16-inst"] = true; diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 2128a95510f22..07001daec25a4 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1616,8 +1616,6 @@ const EnumEntry ElfHeaderMipsFlags[] = { ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), \ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX950, "gfx950"), \ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \ From 55eb9b355497800cec0754e20798ad4123aba265 Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Wed, 12 Feb 2025 03:44:08 -0500 Subject: [PATCH 2/2] Revert part of the initial PR to leave the subtarget feature alone. Also revert the FeatureForceStoreSC0SC1 removal so that it can be handled in a separate PR. --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 4 +- .../CodeGenCXX/dynamic-cast-address-space.cpp | 4 +- clang/test/CodeGenOpenCL/amdgpu-features.cl | 6 +- .../builtins-amdgcn-fp-atomics-gfx90a-err.cl | 2 +- llvm/lib/Target/AMDGPU/AMDGPU.td | 46 +++--- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 28 ++-- llvm/lib/Target/AMDGPU/BUFInstructions.td | 22 +-- .../Disassembler/AMDGPUDisassembler.cpp | 6 +- llvm/lib/Target/AMDGPU/FLATInstructions.td | 94 +++++------ .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 93 +++++------ llvm/lib/Target/AMDGPU/GCNSubtarget.h | 41 +++-- .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp | 21 ++- llvm/lib/Target/AMDGPU/SIDefines.h | 4 +- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 4 +- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 4 +- llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 44 +++-- .../Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp | 10 +- .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 8 +- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 6 +- llvm/lib/Target/AMDGPU/VOP1Instructions.td | 4 +- llvm/lib/Target/AMDGPU/VOP2Instructions.td | 8 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td | 8 +- llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 154 +++++++++--------- llvm/lib/TargetParser/TargetParser.cpp | 4 +- 24 files changed, 328 insertions(+), 297 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index e7e5ed77f432b..39e295aced96b 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -248,13 +248,13 @@ TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fmax_f64, "dd*0d", "t", "gfx90a-inst TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f64, "dd*3d", "t", "gfx90a-insts") TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f32, "ff*3f", "t", "gfx8-insts") -TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_f32, "ff*0f", "t", "gfx942-insts") +TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_f32, "ff*0f", "t", "gfx940-insts") TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2f16, "V2hV2h*0V2h", "t", "atomic-flat-pk-add-16-insts") TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2bf16, "V2sV2s*0V2s", "t", "atomic-flat-pk-add-16-insts") TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2bf16, "V2sV2s*1V2s", "t", "atomic-global-pk-add-bf16-inst") TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2bf16, "V2sV2s*3V2s", "t", "atomic-ds-pk-add-16-insts") TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2f16, "V2hV2h*3V2h", "t", "atomic-ds-pk-add-16-insts") -TARGET_BUILTIN(__builtin_amdgcn_global_load_lds, "vv*1v*3IUiIiIUi", "t", "gfx942-insts") +TARGET_BUILTIN(__builtin_amdgcn_global_load_lds, "vv*1v*3IUiIiIUi", "t", "gfx940-insts") //===----------------------------------------------------------------------===// // Deep learning builtins. diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp index f07dbd9a29b98..0460352cf7ffc 100644 --- a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp +++ b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp @@ -112,9 +112,9 @@ const B& f(A *a) { // CHECK: attributes #[[ATTR3]] = { nounwind } // CHECK: attributes #[[ATTR4]] = { noreturn } //. -// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx942-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" } +// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" } // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR1:[0-9]+]] = { nounwind willreturn memory(read) } -// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx942-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" } +// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" } // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR3]] = { nounwind } // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR4]] = { noreturn } //. diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index 2c9f3c78b1df2..d12dcead6fadf 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -83,9 +83,9 @@ // GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx942-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" -// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx942-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx942-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" +// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" // GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" // GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl index 86d84005133bc..f651ce349e206 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl @@ -9,7 +9,7 @@ typedef short __attribute__((ext_vector_type(2))) short2; void test_atomic_fadd(__global half2 *addrh2, __local half2 *addrh2l, half2 xh2, __global short2 *addrs2, __local short2 *addrs2l, short2 xs2, __global float *addrf, float xf) { - __builtin_amdgcn_flat_atomic_fadd_f32(addrf, xf); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_f32' needs target feature gfx942-insts}} + __builtin_amdgcn_flat_atomic_fadd_f32(addrf, xf); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_f32' needs target feature gfx940-insts}} __builtin_amdgcn_flat_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_v2f16' needs target feature atomic-flat-pk-add-16-insts}} __builtin_amdgcn_flat_atomic_fadd_v2bf16(addrs2, xs2); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_v2bf16' needs target feature atomic-flat-pk-add-16-insts}} __builtin_amdgcn_global_atomic_fadd_v2bf16(addrs2, xs2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2bf16' needs target feature atomic-global-pk-add-bf16-inst}} diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index e0da312c51a82..3aabca49b249e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -372,10 +372,10 @@ def FeatureGFX90AInsts : SubtargetFeature<"gfx90a-insts", // [HasAtomicFMinFMaxF64GlobalInsts, HasAtomicFMinFMaxF64FlatInsts] // TODO >; -def FeatureGFX942Insts : SubtargetFeature<"gfx942-insts", - "GFX942Insts", +def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts", + "GFX940Insts", "true", - "Additional instructions for GFX942+" + "Additional instructions for GFX940+" >; def FeaturePermlane16Swap : SubtargetFeature<"permlane16-swap", @@ -1040,6 +1040,12 @@ def FeatureVALUTransUseHazard : SubtargetFeature<"valu-trans-use-hazard", "Hazard when TRANS instructions are closely followed by a use of the result" >; +def FeatureForceStoreSC0SC1 : SubtargetFeature<"force-store-sc0-sc1", + "HasForceStoreSC0SC1", + "true", + "Has SC0 and SC1 on stores" +>; + def FeatureSALUFloatInsts : SubtargetFeature<"salu-float", "HasSALUFloatInsts", "true", @@ -1558,7 +1564,7 @@ def FeatureISAVersion9_0_C : FeatureSet< def FeatureISAVersion9_4_Common : FeatureSet< [FeatureGFX9, FeatureGFX90AInsts, - FeatureGFX942Insts, + FeatureGFX940Insts, FeatureFmaMixInsts, FeatureLDSBankCount32, FeatureDLInsts, @@ -2047,20 +2053,20 @@ def isGFX8GFX9NotGFX90A : AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX90AInsts))>; def isGFX90AOnly : - Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX942Insts()">, - AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX942Insts))>; + Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX940Insts()">, + AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX940Insts))>; def isGFX908orGFX90A : - Predicate<"Subtarget->hasMAIInsts() && !Subtarget->hasGFX942Insts()">, - AssemblerPredicate<(all_of FeatureMAIInsts, (not FeatureGFX942Insts))>; + Predicate<"Subtarget->hasMAIInsts() && !Subtarget->hasGFX940Insts()">, + AssemblerPredicate<(all_of FeatureMAIInsts, (not FeatureGFX940Insts))>; -def isGFX942Plus : - Predicate<"Subtarget->hasGFX942Insts()">, - AssemblerPredicate<(all_of FeatureGFX942Insts)>; +def isGFX940Plus : + Predicate<"Subtarget->hasGFX940Insts()">, + AssemblerPredicate<(all_of FeatureGFX940Insts)>; -def isNotGFX942Plus : - Predicate<"!Subtarget->hasGFX942Insts()">, - AssemblerPredicate<(all_of (not FeatureGFX942Insts))>; +def isNotGFX940Plus : + Predicate<"!Subtarget->hasGFX940Insts()">, + AssemblerPredicate<(all_of (not FeatureGFX940Insts))>; def HasGFX950Insts : Predicate<"Subtarget->hasGFX950Insts()">, @@ -2074,11 +2080,11 @@ def HasPermlane32Swap : Predicate<"Subtarget->hasPermlane32Swap()">, AssemblerPredicate<(all_of FeaturePermlane32Swap)>; -def isGFX8GFX9NotGFX942 : - Predicate<"!Subtarget->hasGFX942Insts() &&" +def isGFX8GFX9NotGFX940 : + Predicate<"!Subtarget->hasGFX940Insts() &&" "(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, - AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX942Insts))>; + AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX940Insts))>; def isGFX8GFX9 : Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" @@ -2185,9 +2191,9 @@ def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">, AssemblerPredicate<(all_of FeatureGFX9Insts)>; def HasFlatScratchSTMode : Predicate<"Subtarget->hasFlatScratchSTMode()">, - AssemblerPredicate<(any_of FeatureGFX10_3Insts, FeatureGFX942Insts)>; + AssemblerPredicate<(any_of FeatureGFX10_3Insts, FeatureGFX940Insts)>; def HasFlatScratchSVSMode : Predicate<"Subtarget->hasFlatScratchSVSMode()">, - AssemblerPredicate<(any_of FeatureGFX942Insts, FeatureGFX11Insts)>; + AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX11Insts)>; def HasGFX10_AEncoding : Predicate<"Subtarget->hasGFX10_AEncoding()">, AssemblerPredicate<(all_of FeatureGFX10_AEncoding)>; @@ -2295,7 +2301,7 @@ def HasPkMovB32 : Predicate<"Subtarget->hasPkMovB32()">, def HasFmaakFmamkF32Insts : Predicate<"Subtarget->hasFmaakFmamkF32Insts()">, - AssemblerPredicate<(any_of FeatureGFX10Insts, FeatureGFX942Insts)>; + AssemblerPredicate<(any_of FeatureGFX10Insts, FeatureGFX940Insts)>; def HasImageInsts : Predicate<"Subtarget->hasImageInsts()">, AssemblerPredicate<(all_of FeatureImageInsts)>; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 13ace855caee4..54ed3789326cb 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1492,12 +1492,14 @@ class AMDGPUAsmParser : public MCTargetAsmParser { return AMDGPU::isGFX9(getSTI()); } - // TODO: isGFX90A is also true for GFX942. We need to clean it. + // TODO: isGFX90A is also true for GFX940. We need to clean it. bool isGFX90A() const { return AMDGPU::isGFX90A(getSTI()); } - bool isGFX942() const { return AMDGPU::isGFX942(getSTI()); } + bool isGFX940() const { + return AMDGPU::isGFX940(getSTI()); + } bool isGFX9Plus() const { return AMDGPU::isGFX9Plus(getSTI()); @@ -4631,7 +4633,7 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { uint64_t TSFlags = MII.get(Opc).TSFlags; - if (isGFX942() && (TSFlags & SIInstrFlags::IsDOT)) { + if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) { int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); if (OpSelIdx != -1) { if (Inst.getOperand(OpSelIdx).getImm() != 0) @@ -4940,12 +4942,12 @@ bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:"); auto FB = getFeatureBits(); bool UsesNeg = false; - if (FB[AMDGPU::FeatureGFX942Insts]) { + if (FB[AMDGPU::FeatureGFX940Insts]) { switch (Opc) { - case AMDGPU::V_MFMA_F64_16X16X4F64_gfx942_acd: - case AMDGPU::V_MFMA_F64_16X16X4F64_gfx942_vcd: - case AMDGPU::V_MFMA_F64_4X4X4F64_gfx942_acd: - case AMDGPU::V_MFMA_F64_4X4X4F64_gfx942_vcd: + case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: + case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: + case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: + case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: UsesNeg = true; } } @@ -5060,7 +5062,7 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, } } - if (isGFX90A() && !isGFX942() && (CPol & CPol::SCC)) { + if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF | SIInstrFlags::MTBUF | SIInstrFlags::MIMG | SIInstrFlags::FLAT; @@ -5079,7 +5081,7 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, if (TSFlags & SIInstrFlags::IsAtomicRet) { if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { - Error(IDLoc, isGFX942() ? "instruction must use sc0" + Error(IDLoc, isGFX940() ? "instruction must use sc0" : "instruction must use glc"); return false; } @@ -5088,8 +5090,8 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); StringRef CStr(S.getPointer()); S = SMLoc::getFromPointer( - &CStr.data()[CStr.find(isGFX942() ? "sc0" : "glc")]); - Error(S, isGFX942() ? "instruction must not use sc0" + &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); + Error(S, isGFX940() ? "instruction must not use sc0" : "instruction must not use glc"); return false; } @@ -6655,7 +6657,7 @@ unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const { Disabling = Id.consume_front("no"); - if (isGFX942() && !Mnemo.starts_with("s_")) { + if (isGFX940() && !Mnemo.starts_with("s_")) { return StringSwitch(Id) .Case("nt", AMDGPU::CPol::NT) .Case("sc0", AMDGPU::CPol::SC0) diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index a48115fbfb272..f2686bdf56b41 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1146,7 +1146,7 @@ let OtherPredicates = [HasGFX10_BEncoding] in { >; } -let SubtargetPredicate = isGFX8GFX9NotGFX942 in { +let SubtargetPredicate = isGFX8GFX9NotGFX940 in { def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">; } @@ -1228,7 +1228,7 @@ defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Pseudo_Stores < } // End HasD16LoadStore -let SubtargetPredicate = isNotGFX942Plus in +let SubtargetPredicate = isNotGFX940Plus in def BUFFER_WBINVL1 : MUBUF_Invalidate < "buffer_wbinvl1", int_amdgcn_buffer_wbinvl1 >; @@ -1311,7 +1311,7 @@ let SubtargetPredicate = isGFX7Plus in { // Instruction definitions for CI and newer. //===----------------------------------------------------------------------===// -let SubtargetPredicate = isNotGFX942Plus in +let SubtargetPredicate = isNotGFX940Plus in def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol", int_amdgcn_buffer_wbinvl1_vol>; @@ -1341,7 +1341,7 @@ let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { } def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> { - let SubtargetPredicate = isGFX942Plus; + let SubtargetPredicate = isGFX940Plus; let has_glc = 1; let has_sccb = 1; let InOperandList = (ins CPol_0:$cpol); @@ -3095,9 +3095,9 @@ multiclass MUBUF_Real_gfx90a op, } } -class MUBUF_Real_gfx942 op, MUBUF_Pseudo ps> : - MUBUF_Real_Base_vi { - let AssemblerPredicate = isGFX942Plus; +class MUBUF_Real_gfx940 op, MUBUF_Pseudo ps> : + MUBUF_Real_Base_vi { + let AssemblerPredicate = isGFX940Plus; let DecoderNamespace = "GFX9"; let AsmString = ps.Mnemonic # ps.AsmOperands; @@ -3116,7 +3116,7 @@ multiclass MUBUF_Real_vi_gfx90a op, bit isTFE = 0> : MUBUF_Real_vi { let AssemblerPredicate = isGFX90AOnly in defm NAME : MUBUF_Real_gfx90a; - def _gfx942 : MUBUF_Real_gfx942; + def _gfx940 : MUBUF_Real_gfx940; } } @@ -3314,9 +3314,9 @@ let AsmString = BUFFER_WBL2.Mnemonic, // drop flags defm BUFFER_WBL2 : MUBUF_Real_gfx90a<0x28>; defm BUFFER_INVL2 : MUBUF_Real_gfx90a<0x29>; -let SubtargetPredicate = isGFX942Plus in { -def BUFFER_WBL2_gfx942 : MUBUF_Real_gfx942<0x28, BUFFER_WBL2>; -def BUFFER_INV_gfx942 : MUBUF_Real_gfx942<0x29, BUFFER_INV>; +let SubtargetPredicate = isGFX940Plus in { +def BUFFER_WBL2_gfx940 : MUBUF_Real_gfx940<0x28, BUFFER_WBL2>; +def BUFFER_INV_gfx940 : MUBUF_Real_gfx940<0x29, BUFFER_INV>; } class MTBUF_Real_Base_vi op, MTBUF_Pseudo ps, int Enc> : diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 6413dd0d6288a..308ab8e3b82c4 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -551,7 +551,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } else if (Bytes.size() >= 16 && STI.hasFeature(AMDGPU::FeatureGFX950Insts)) { DecoderUInt128 DecW = eat16Bytes(Bytes); - if (tryDecodeInst(DecoderTableGFX942128, MI, DecW, Address, CS)) + if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS)) break; // Reinitialize Bytes @@ -580,8 +580,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS)) break; - if (STI.hasFeature(AMDGPU::FeatureGFX942Insts) && - tryDecodeInst(DecoderTableGFX94264, MI, QW, Address, CS)) + if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) && + tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS)) break; if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) && diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index b6a83d0fcbc25..aa0bd7dba8100 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -1836,10 +1836,10 @@ multiclass FLAT_Real_AllAddr_vi op, def _SADDR_vi : FLAT_Real_vi(NAME#"_SADDR"), has_sccb>; } -class FLAT_Real_gfx942 op, FLAT_Pseudo ps> : +class FLAT_Real_gfx940 op, FLAT_Pseudo ps> : FLAT_Real , - SIMCInstr { - let AssemblerPredicate = isGFX942Plus; + SIMCInstr { + let AssemblerPredicate = isGFX940Plus; let DecoderNamespace = "GFX9"; let Inst{13} = ps.sve; let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue); @@ -1847,43 +1847,43 @@ class FLAT_Real_gfx942 op, FLAT_Pseudo ps> : multiclass FLAT_Real_AllAddr_SVE_vi op> { def _vi : FLAT_Real_vi(NAME)> { - let AssemblerPredicate = isGFX8GFX9NotGFX942; - let OtherPredicates = [isGFX8GFX9NotGFX942]; + let AssemblerPredicate = isGFX8GFX9NotGFX940; + let OtherPredicates = [isGFX8GFX9NotGFX940]; } def _SADDR_vi : FLAT_Real_vi(NAME#"_SADDR")> { let DecoderNamespace = "GFX9"; } - let AssemblerPredicate = isGFX942Plus, SubtargetPredicate = isGFX942Plus in { - def _VE_gfx942 : FLAT_Real_gfx942(NAME)>; - def _SVS_gfx942 : FLAT_Real_gfx942(NAME#"_SVS")>; - def _ST_gfx942 : FLAT_Real_gfx942(NAME#"_ST")>; + let AssemblerPredicate = isGFX940Plus, SubtargetPredicate = isGFX940Plus in { + def _VE_gfx940 : FLAT_Real_gfx940(NAME)>; + def _SVS_gfx940 : FLAT_Real_gfx940(NAME#"_SVS")>; + def _ST_gfx940 : FLAT_Real_gfx940(NAME#"_ST")>; } } -multiclass FLAT_Real_AllAddr_LDS op, bits<7> pre_gfx942_op, - string pre_gfx942_name = !subst("_lds", "", !cast(NAME).Mnemonic), +multiclass FLAT_Real_AllAddr_LDS op, bits<7> pre_gfx940_op, + string pre_gfx940_name = !subst("_lds", "", !cast(NAME).Mnemonic), bit has_sccb = !cast(NAME).has_sccb> { - let OtherPredicates = [isGFX8GFX9NotGFX942] in { - def _vi : FLAT_Real_vi(NAME), has_sccb> { - let AsmString = pre_gfx942_name # !cast(NAME).AsmOperands # " lds"; + let OtherPredicates = [isGFX8GFX9NotGFX940] in { + def _vi : FLAT_Real_vi(NAME), has_sccb> { + let AsmString = pre_gfx940_name # !cast(NAME).AsmOperands # " lds"; } - def _SADDR_vi : FLAT_Real_vi(NAME#"_SADDR"), has_sccb> { - let AsmString = pre_gfx942_name # !cast(NAME#"_SADDR").AsmOperands # " lds"; + def _SADDR_vi : FLAT_Real_vi(NAME#"_SADDR"), has_sccb> { + let AsmString = pre_gfx940_name # !cast(NAME#"_SADDR").AsmOperands # " lds"; } } - let SubtargetPredicate = isGFX942Plus in { - def _gfx942 : FLAT_Real_gfx942(NAME)>; - def _SADDR_gfx942 : FLAT_Real_gfx942(NAME#"_SADDR")>; + let SubtargetPredicate = isGFX940Plus in { + def _gfx940 : FLAT_Real_gfx940(NAME)>; + def _SADDR_gfx940 : FLAT_Real_gfx940(NAME#"_SADDR")>; } } -multiclass FLAT_Real_AllAddr_SVE_LDS op, bits<7> pre_gfx942_op> { - defm "" : FLAT_Real_AllAddr_LDS; - let SubtargetPredicate = isGFX942Plus in { - def _SVS_gfx942 : FLAT_Real_gfx942(NAME#"_SVS")>; - def _ST_gfx942 : FLAT_Real_gfx942(NAME#"_ST")>; +multiclass FLAT_Real_AllAddr_SVE_LDS op, bits<7> pre_gfx940_op> { + defm "" : FLAT_Real_AllAddr_LDS; + let SubtargetPredicate = isGFX940Plus in { + def _SVS_gfx940 : FLAT_Real_gfx940(NAME#"_SVS")>; + def _ST_gfx940 : FLAT_Real_gfx940(NAME#"_ST")>; } } @@ -2045,8 +2045,8 @@ defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_SVE_vi <0x1d>; defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x1e>; defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x1f>; -let SubtargetPredicate = isGFX8GFX9NotGFX942 in { - // These instructions are encoded differently on gfx90* and gfx942. +let SubtargetPredicate = isGFX8GFX9NotGFX940 in { + // These instructions are encoded differently on gfx90* and gfx94*. defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d, 0>; defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>; } @@ -2060,39 +2060,39 @@ let SubtargetPredicate = isGFX90AOnly in { defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>; } // End SubtargetPredicate = isGFX90AOnly -multiclass FLAT_Real_AllAddr_gfx942 op> { - def _gfx942 : FLAT_Real_gfx942(NAME)>; - def _SADDR_gfx942 : FLAT_Real_gfx942(NAME#"_SADDR")>; +multiclass FLAT_Real_AllAddr_gfx940 op> { + def _gfx940 : FLAT_Real_gfx940(NAME)>; + def _SADDR_gfx940 : FLAT_Real_gfx940(NAME#"_SADDR")>; } -multiclass FLAT_Real_Atomics_gfx942 op> { +multiclass FLAT_Real_Atomics_gfx940 op> { defvar ps = !cast(NAME); - def _gfx942 : FLAT_Real_gfx942(ps.PseudoInstr)>; - def _RTN_gfx942 : FLAT_Real_gfx942(ps.PseudoInstr # "_RTN")>; + def _gfx940 : FLAT_Real_gfx940(ps.PseudoInstr)>; + def _RTN_gfx940 : FLAT_Real_gfx940(ps.PseudoInstr # "_RTN")>; } -multiclass FLAT_Global_Real_Atomics_gfx942 op> : - FLAT_Real_AllAddr_gfx942 { - def _RTN_gfx942 : FLAT_Real_gfx942 (NAME#"_RTN")>; - def _SADDR_RTN_gfx942 : FLAT_Real_gfx942 (NAME#"_SADDR_RTN")>; +multiclass FLAT_Global_Real_Atomics_gfx940 op> : + FLAT_Real_AllAddr_gfx940 { + def _RTN_gfx940 : FLAT_Real_gfx940 (NAME#"_RTN")>; + def _SADDR_RTN_gfx940 : FLAT_Real_gfx940 (NAME#"_SADDR_RTN")>; } -let SubtargetPredicate = isGFX942Plus in { - // These instructions are encoded differently on gfx90* and gfx942. - defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_gfx942 <0x04d>; - defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_gfx942 <0x04e>; +let SubtargetPredicate = isGFX940Plus in { + // These instructions are encoded differently on gfx90* and gfx940. + defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_gfx940 <0x04d>; + defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_gfx940 <0x04e>; - defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_gfx942<0x4f>; - defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_gfx942<0x50>; - defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_gfx942<0x51>; - defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx942<0x4f>; - defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx942<0x50>; - defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx942<0x51>; + defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_gfx940<0x4f>; + defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_gfx940<0x50>; + defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_gfx940<0x51>; + defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>; + defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>; + defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>; defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d>; defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e>; defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52>; defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>; -} // End SubtargetPredicate = isGFX942Plus +} // End SubtargetPredicate = isGFX940Plus //===----------------------------------------------------------------------===// // GFX10. diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 0f76b0ac5331d..1ff75095b220a 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -124,10 +124,10 @@ static bool isXDL(const GCNSubtarget &ST, const MachineInstr &MI) { Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64) return false; - if (!ST.hasGFX942Insts()) + if (!ST.hasGFX940Insts()) return true; - return AMDGPU::getMAIIsGFX942XDL(Opcode); + return AMDGPU::getMAIIsGFX940XDL(Opcode); } static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII, @@ -870,7 +870,7 @@ GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def, // 8 bytes can have there store data over written by the next instruction. const SIRegisterInfo *TRI = ST.getRegisterInfo(); - const int VALUWaitStates = ST.hasGFX942Insts() ? 2 : 1; + const int VALUWaitStates = ST.hasGFX940Insts() ? 2 : 1; int WaitStatesNeeded = 0; if (!TRI->isVectorRegister(MRI, Def.getReg())) @@ -2251,9 +2251,9 @@ int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) { } static int -GFX942_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(int NumPasses, +GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(int NumPasses, bool IsGFX950) { - // xdl def cycles | gfx942 | gfx950 + // xdl def cycles | gfx940 | gfx950 // 2 pass | 3 4 // 4 pass | 5 6 // 8 pass | 9 10 @@ -2262,9 +2262,9 @@ GFX942_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(int NumPasses, } static int -GFX942_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(int NumPasses, +GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(int NumPasses, bool IsGFX950) { - // xdl def cycles | gfx942 | gfx950 + // xdl def cycles | gfx940 | gfx950 // 2 pass | 3 3 // 4 pass | 5 6 // 8 pass | 9 10 @@ -2273,7 +2273,7 @@ GFX942_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(int NumPasses, } static int -GFX942_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses) { +GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses) { // 2 pass -> 2 // 4 pass -> 4 // 8 pass -> 8 @@ -2282,7 +2282,7 @@ GFX942_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses) { } static int -GFX942_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses) { +GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses) { // 2 pass -> 4 // 4 pass -> 6 // 8 pass -> 10 @@ -2290,7 +2290,7 @@ GFX942_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses) { return NumPasses + 2; } -static int GFX942_XDL_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses, +static int GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses, bool IsGFX950) { // xdl def cycles | gfx942 | gfx950 // 2 pass | 5 5 @@ -2343,7 +2343,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { const int DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 11; const int GFX950_DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 19; const int DMFMA4x4WritesVGPRFullSrcCWaitStates = 4; - const int GFX942_SMFMA4x4WritesVGPRFullSrcCWaitStates = 2; + const int GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates = 2; const int MaxWaitStates = 19; if (!Use.isReg()) @@ -2375,7 +2375,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { unsigned Opc1 = MI1->getOpcode(); int NeedWaitStates = 0; if (OpNo == SrcCIdx) { - if (!isDGEMM(Opc) && (!ST.hasGFX942Insts() && isDGEMM(Opc1))) { + if (!isDGEMM(Opc) && (!ST.hasGFX940Insts() && isDGEMM(Opc1))) { NeedWaitStates = 0; } else if (FullReg) { if ((Opc == AMDGPU::V_MFMA_F64_4X4X4F64_e64 || @@ -2383,9 +2383,9 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { (Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_e64 || Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64)) NeedWaitStates = DMFMA4x4WritesVGPRFullSrcCWaitStates; - else if (ST.hasGFX942Insts() && + else if (ST.hasGFX940Insts() && TSchedModel.computeInstrLatency(MI1) == 2) - NeedWaitStates = GFX942_SMFMA4x4WritesVGPRFullSrcCWaitStates; + NeedWaitStates = GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates; } else { switch (Opc1) { case AMDGPU::V_MFMA_F64_16X16X4F64_e64: @@ -2405,18 +2405,18 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { break; default: int NumPasses = TSchedModel.computeInstrLatency(MI1); - if (ST.hasGFX942Insts()) { + if (ST.hasGFX940Insts()) { if (isXDL(ST, *MI) && !isXDL(ST, *MI1)) break; NeedWaitStates = isXDL(ST, *MI1) ? (isXDL(ST, *MI) - ? GFX942_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates( + ? GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates( NumPasses, ST.hasGFX950Insts()) - : GFX942_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates( + : GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates( NumPasses, ST.hasGFX950Insts())) - : GFX942_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates( + : GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates( NumPasses); break; } @@ -2462,12 +2462,12 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { default: int NumPasses = TSchedModel.computeInstrLatency(MI1); - if (ST.hasGFX942Insts()) { + if (ST.hasGFX940Insts()) { NeedWaitStates = isXDL(ST, *MI1) - ? GFX942_XDL_N_PassWritesVGPROverlappedSrcABWaitStates( + ? GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates( NumPasses, ST.hasGFX950Insts()) - : GFX942_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates( + : GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates( NumPasses); break; } @@ -2590,7 +2590,7 @@ int GCNHazardRecognizer::checkPermlaneHazards(MachineInstr *MI) { return WaitStatesNeeded; } -static int GFX942_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) { +static int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) { // 2 pass -> 4 // 4 pass -> 6 // 8 pass -> 10 @@ -2598,7 +2598,7 @@ static int GFX942_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) { return NumPasses + 2; } -static int GFX942_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses, +static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses, bool IsGFX950) { // xdl def cycles | gfx942 | gfx950 // 2 pass | 5 5 @@ -2608,7 +2608,7 @@ static int GFX942_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses, return NumPasses + 3 + (NumPasses != 2 && IsGFX950); } -static int GFX942_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses, +static int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses, bool IsGFX950) { // xdl def cycles | gfx942 | gfx950 // 2 pass | 5 5 @@ -2618,7 +2618,7 @@ static int GFX942_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses, return NumPasses + 3 + (NumPasses != 2 && IsGFX950); } -static int GFX942_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses) { +static int GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses) { // 2 pass -> 4 // 4 pass -> 6 // 8 pass -> 10 @@ -2723,7 +2723,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { // is a DGEMM instruction in-between a VALU and a VMEM instruction it // causes the SQ to incorrectly not insert two wait states between the two // instructions needed to avoid data hazard. - if (IsMem && ST.hasGFX90AInsts() && !ST.hasGFX942Insts()) { + if (IsMem && ST.hasGFX90AInsts() && !ST.hasGFX940Insts()) { DGEMMAfterVALUWrite = false; if (TRI.isVectorRegister(MRI, Reg)) { int WaitStatesNeededForUse = @@ -2763,12 +2763,12 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { default: llvm_unreachable("unexpected dgemm"); } - } else if (ST.hasGFX942Insts()) { + } else if (ST.hasGFX940Insts()) { NeedWaitStates = isXDL(ST, *MFMA) - ? GFX942_XDL_N_PassWriteVgprVALUMemExpReadWaitStates( + ? GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates( NumPasses, ST.hasGFX950Insts()) - : GFX942_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates( + : GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates( NumPasses); } else { switch (HazardDefLatency) { @@ -2813,7 +2813,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { const int SMFMA16x16WriteVgprVALUWawWaitStates = 11; const int SMFMA32x32WriteVgprVALUWawWaitStates = 19; const int SMFMA4x4ReadVgprVALUWarWaitStates = 1; - const int GFX942_XDL4PassReadVgprVALUWarWaitStates = 3; + const int GFX940_XDL4PassReadVgprVALUWarWaitStates = 3; const int SMFMA16x16ReadVgprVALUWarWaitStates = 7; const int SMFMA32x32ReadVgprVALUWarWaitStates = 15; const int DMFMA4x4WriteVgprVALUWriteWaitStates = 6; @@ -2850,12 +2850,12 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { default: llvm_unreachable("unexpected number of cycles for dgemm"); } - } else if (ST.hasGFX942Insts()) { + } else if (ST.hasGFX940Insts()) { NeedWaitStates = isXDL(ST, *MFMA) - ? GFX942_XDL_N_PassWriteVgprVALUWawWaitStates( + ? GFX940_XDL_N_PassWriteVgprVALUWawWaitStates( NumPasses, ST.hasGFX950Insts()) - : GFX942_SMFMA_N_PassWriteVgprVALUWawWaitStates(NumPasses); + : GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(NumPasses); } else { switch (NumPasses) { case 2: @@ -2884,7 +2884,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { !MI.readsRegister(Reg, &TRI)) return false; - if (ST.hasGFX942Insts() && !isXDL(ST, MI)) + if (ST.hasGFX940Insts() && !isXDL(ST, MI)) return false; const MachineOperand *SrcC = @@ -2906,21 +2906,16 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { unsigned HazardDefLatency = TSchedModel.computeInstrLatency(MFMA); int NeedWaitStates = MaxWaitStates; switch (HazardDefLatency) { - case 2: - NeedWaitStates = SMFMA4x4ReadVgprVALUWarWaitStates; - break; - case 4: - assert(ST.hasGFX942Insts()); - NeedWaitStates = GFX942_XDL4PassReadVgprVALUWarWaitStates; - break; - case 8: - NeedWaitStates = SMFMA16x16ReadVgprVALUWarWaitStates; - break; - case 16: - [[fallthrough]]; - default: - NeedWaitStates = SMFMA32x32ReadVgprVALUWarWaitStates; - break; + case 2: NeedWaitStates = SMFMA4x4ReadVgprVALUWarWaitStates; + break; + case 4: assert(ST.hasGFX940Insts()); + NeedWaitStates = GFX940_XDL4PassReadVgprVALUWarWaitStates; + break; + case 8: NeedWaitStates = SMFMA16x16ReadVgprVALUWarWaitStates; + break; + case 16: [[fallthrough]]; + default: NeedWaitStates = SMFMA32x32ReadVgprVALUWarWaitStates; + break; } int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceUse; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 72f3d1abb82fe..f7c5c472c93a5 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -107,7 +107,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool GFX8Insts = false; bool GFX9Insts = false; bool GFX90AInsts = false; - bool GFX942Insts = false; + bool GFX940Insts = false; bool GFX950Insts = false; bool GFX10Insts = false; bool GFX11Insts = false; @@ -246,6 +246,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasMADIntraFwdBug = false; bool HasVOPDInsts = false; bool HasVALUTransUseHazard = false; + bool HasForceStoreSC0SC1 = false; bool HasRequiredExportPriority = false; bool HasVmemWriteVgprInOrder = false; bool HasAshrPkInsts = false; @@ -653,10 +654,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, // The ST addressing mode means no registers are used, either VGPR or SGPR, // but only immediate offset is swizzled and added to the FLAT scratch base. bool hasFlatScratchSTMode() const { - return hasFlatScratchInsts() && (hasGFX10_3Insts() || hasGFX942Insts()); + return hasFlatScratchInsts() && (hasGFX10_3Insts() || hasGFX940Insts()); } - bool hasFlatScratchSVSMode() const { return GFX942Insts || GFX11Insts; } + bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; } bool hasScalarFlatScratchInsts() const { return ScalarFlatScratchInsts; @@ -675,7 +676,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return GFX10_BEncoding; } - bool hasExportInsts() const { return !hasGFX942Insts(); } + bool hasExportInsts() const { + return !hasGFX940Insts(); + } bool hasVINTERPEncoding() const { return GFX11Insts; @@ -1070,7 +1073,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, } bool hasFmaakFmamkF32Insts() const { - return getGeneration() >= GFX10 || hasGFX942Insts(); + return getGeneration() >= GFX10 || hasGFX940Insts(); } bool hasImageInsts() const { @@ -1127,9 +1130,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasMadF16() const; - bool hasMovB64() const { return GFX942Insts; } + bool hasMovB64() const { return GFX940Insts; } - bool hasLshlAddB64() const { return GFX942Insts; } + bool hasLshlAddB64() const { return GFX940Insts; } bool enableSIScheduler() const { return EnableSIScheduler; @@ -1213,21 +1216,25 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, // Shift amount of a 64 bit shift cannot be a highest allocated register // if also at the end of the allocation block. - bool hasShift64HighRegBug() const { return GFX90AInsts && !GFX942Insts; } + bool hasShift64HighRegBug() const { + return GFX90AInsts && !GFX940Insts; + } // Has one cycle hazard on transcendental instruction feeding a // non transcendental VALU. - bool hasTransForwardingHazard() const { return GFX942Insts; } + bool hasTransForwardingHazard() const { return GFX940Insts; } // Has one cycle hazard on a VALU instruction partially writing dst with // a shift of result bits feeding another VALU instruction. - bool hasDstSelForwardingHazard() const { return GFX942Insts; } + bool hasDstSelForwardingHazard() const { return GFX940Insts; } // Cannot use op_sel with v_dot instructions. - bool hasDOTOpSelHazard() const { return GFX942Insts || GFX11Insts; } + bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; } // Does not have HW interlocs for VALU writing and then reading SGPRs. - bool hasVDecCoExecHazard() const { return GFX942Insts; } + bool hasVDecCoExecHazard() const { + return GFX940Insts; + } bool hasNSAtoVMEMBug() const { return HasNSAtoVMEMBug; @@ -1257,6 +1264,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasCvtScaleForwardingHazard() const { return GFX950Insts; } + bool hasForceStoreSC0SC1() const { return HasForceStoreSC0SC1; } + bool requiresCodeObjectV6() const { return RequiresCOV6; } bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; } @@ -1288,12 +1297,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasPackedTID() const { return HasPackedTID; } - // GFX942 is a derivation to GFX90A. hasGFX942Insts() being true implies that + // GFX94* is a derivation to GFX90A. hasGFX940Insts() being true implies that // hasGFX90AInsts is also true. - bool hasGFX942Insts() const { return GFX942Insts; } + bool hasGFX940Insts() const { return GFX940Insts; } - // GFX950 is a derivation to GFX942. hasGFX950Insts() implies that - // hasGFX942Insts and hasGFX90AInsts are also true. + // GFX950 is a derivation to GFX94*. hasGFX950Insts() implies that + // hasGFX940Insts and hasGFX90AInsts are also true. bool hasGFX950Insts() const { return GFX950Insts; } /// Returns true if the target supports diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index 5a72543c8eef1..381841f142855 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -151,16 +151,15 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo, } if (Imm & CPol::GLC) - O << ((AMDGPU::isGFX942(STI) && - !(MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SMRD)) - ? " sc0" - : " glc"); + O << ((AMDGPU::isGFX940(STI) && + !(MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SMRD)) ? " sc0" + : " glc"); if (Imm & CPol::SLC) - O << (AMDGPU::isGFX942(STI) ? " nt" : " slc"); + O << (AMDGPU::isGFX940(STI) ? " nt" : " slc"); if ((Imm & CPol::DLC) && AMDGPU::isGFX10Plus(STI)) O << " dlc"; if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI)) - O << (AMDGPU::isGFX942(STI) ? " sc1" : " scc"); + O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc"); if (Imm & ~CPol::ALL_pregfx12) O << " /* unexpected cache policy bit */"; } @@ -630,12 +629,12 @@ void AMDGPUInstPrinter::printBLGP(const MCInst *MI, unsigned OpNo, if (!Imm) return; - if (AMDGPU::isGFX942(STI)) { + if (AMDGPU::isGFX940(STI)) { switch (MI->getOpcode()) { - case AMDGPU::V_MFMA_F64_16X16X4F64_gfx942_acd: - case AMDGPU::V_MFMA_F64_16X16X4F64_gfx942_vcd: - case AMDGPU::V_MFMA_F64_4X4X4F64_gfx942_acd: - case AMDGPU::V_MFMA_F64_4X4X4F64_gfx942_vcd: + case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: + case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: + case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: + case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: O << " neg:[" << (Imm & 1) << ',' << ((Imm >> 1) & 1) << ',' << ((Imm >> 2) & 1) << ']'; return; diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 3d5976183c10f..721601efcc804 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -42,7 +42,7 @@ enum { GFX10 = 6, SDWA10 = 7, GFX90A = 8, - GFX942 = 9, + GFX940 = 9, GFX11 = 10, GFX12 = 11, }; @@ -542,7 +542,7 @@ enum Id { // HwRegCode, (6) [5:0] ID_EXCP_FLAG_USER = 18, ID_TRAP_CTRL = 19, - // GFX942 specific registers + // GFX94* specific registers ID_XCC_ID = 20, ID_SQ_PERF_SNAPSHOT_DATA = 21, ID_SQ_PERF_SNAPSHOT_DATA1 = 22, diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 4ef1c4dae0b35..baacb5d3d5455 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -9462,8 +9462,8 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const { if (ST.hasGFX90AInsts()) { uint16_t NMCOp = (uint16_t)-1; - if (ST.hasGFX942Insts()) - NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX942); + if (ST.hasGFX940Insts()) + NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX940); if (NMCOp == (uint16_t)-1) NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX90A); if (NMCOp == (uint16_t)-1) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 958fe01c52805..bb78e77a9dc1a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -28,7 +28,7 @@ def SIEncodingFamily { int GFX10 = 6; int SDWA10 = 7; int GFX90A = 8; - int GFX942 = 9; + int GFX940 = 9; int GFX11 = 10; int GFX12 = 11; } @@ -3106,7 +3106,7 @@ def getMCOpcodeGen : InstrMapping { [!cast(SIEncodingFamily.GFX10)], [!cast(SIEncodingFamily.SDWA10)], [!cast(SIEncodingFamily.GFX90A)], - [!cast(SIEncodingFamily.GFX942)], + [!cast(SIEncodingFamily.GFX940)], [!cast(SIEncodingFamily.GFX11)], [!cast(SIEncodingFamily.GFX12)]]; } diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 9a41afcd56f44..79fb36acc0ea7 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -359,6 +359,11 @@ class SICacheControl { /// Virtual destructor to allow derivations to be deleted. virtual ~SICacheControl() = default; + + virtual bool tryForceStoreSC0SC1(const SIMemOpInfo &MOI, + MachineBasicBlock::iterator &MI) const { + return false; + } }; class SIGfx6CacheControl : public SICacheControl { @@ -465,7 +470,7 @@ class SIGfx90ACacheControl : public SIGfx7CacheControl { Position Pos) const override; }; -class SIGfx942CacheControl : public SIGfx90ACacheControl { +class SIGfx940CacheControl : public SIGfx90ACacheControl { protected: /// Sets SC0 bit to "true" if present in \p MI. Returns true if \p MI @@ -487,7 +492,7 @@ class SIGfx942CacheControl : public SIGfx90ACacheControl { } public: - SIGfx942CacheControl(const GCNSubtarget &ST) : SIGfx90ACacheControl(ST) {}; + SIGfx940CacheControl(const GCNSubtarget &ST) : SIGfx90ACacheControl(ST) {}; bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, @@ -512,6 +517,20 @@ class SIGfx942CacheControl : public SIGfx90ACacheControl { bool insertRelease(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace, bool IsCrossAddrSpaceOrdering, Position Pos) const override; + + bool tryForceStoreSC0SC1(const SIMemOpInfo &MOI, + MachineBasicBlock::iterator &MI) const override { + bool Changed = false; + if (ST.hasForceStoreSC0SC1() && + (MOI.getInstrAddrSpace() & (SIAtomicAddrSpace::SCRATCH | + SIAtomicAddrSpace::GLOBAL | + SIAtomicAddrSpace::OTHER)) != + SIAtomicAddrSpace::NONE) { + Changed |= enableSC0Bit(MI); + Changed |= enableSC1Bit(MI); + } + return Changed; + } }; class SIGfx10CacheControl : public SIGfx7CacheControl { @@ -938,8 +957,8 @@ bool SICacheControl::enableNamedBit(const MachineBasicBlock::iterator MI, /* static */ std::unique_ptr SICacheControl::create(const GCNSubtarget &ST) { GCNSubtarget::Generation Generation = ST.getGeneration(); - if (ST.hasGFX942Insts()) - return std::make_unique(ST); + if (ST.hasGFX940Insts()) + return std::make_unique(ST); if (ST.hasGFX90AInsts()) return std::make_unique(ST); if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS) @@ -1557,7 +1576,7 @@ bool SIGfx90ACacheControl::insertRelease(MachineBasicBlock::iterator &MI, return Changed; } -bool SIGfx942CacheControl::enableLoadCacheBypass( +bool SIGfx940CacheControl::enableLoadCacheBypass( const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const { assert(MI->mayLoad() && !MI->mayStore()); @@ -1601,9 +1620,9 @@ bool SIGfx942CacheControl::enableLoadCacheBypass( return Changed; } -bool SIGfx942CacheControl::enableStoreCacheBypass( - const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, - SIAtomicAddrSpace AddrSpace) const { +bool SIGfx940CacheControl::enableStoreCacheBypass( + const MachineBasicBlock::iterator &MI, + SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const { assert(!MI->mayLoad() && MI->mayStore()); bool Changed = false; @@ -1641,7 +1660,7 @@ bool SIGfx942CacheControl::enableStoreCacheBypass( return Changed; } -bool SIGfx942CacheControl::enableRMWCacheBypass( +bool SIGfx940CacheControl::enableRMWCacheBypass( const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const { assert(MI->mayLoad() && MI->mayStore()); @@ -1670,7 +1689,7 @@ bool SIGfx942CacheControl::enableRMWCacheBypass( return Changed; } -bool SIGfx942CacheControl::enableVolatileAndOrNonTemporal( +bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal( MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op, bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false) const { // Only handle load and store, not atomic read-modify-write insructions. The @@ -1710,7 +1729,7 @@ bool SIGfx942CacheControl::enableVolatileAndOrNonTemporal( return Changed; } -bool SIGfx942CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, +bool SIGfx940CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace, Position Pos) const { @@ -1796,7 +1815,7 @@ bool SIGfx942CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, return Changed; } -bool SIGfx942CacheControl::insertRelease(MachineBasicBlock::iterator &MI, +bool SIGfx940CacheControl::insertRelease(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace, bool IsCrossAddrSpaceOrdering, @@ -2801,6 +2820,7 @@ bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) { Changed |= expandLoad(*MOI, MI); else if (const auto &MOI = MOA.getStoreInfo(MI)) { Changed |= expandStore(*MOI, MI); + Changed |= CC->tryForceStoreSC0SC1(*MOI, MI); } else if (const auto &MOI = MOA.getAtomicFenceInfo(MI)) Changed |= expandAtomicFence(*MOI, MI); else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI)) diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp index 373c6be33e8a8..e433b85489e6e 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp @@ -217,11 +217,11 @@ static constexpr CustomOperand Operands[] = { {{"HW_REG_SHADER_CYCLES_LO"}, ID_SHADER_CYCLES, isGFX12Plus}, // GFX942 specific registers - {{"HW_REG_XCC_ID"}, ID_XCC_ID, isGFX942}, - {{"HW_REG_SQ_PERF_SNAPSHOT_DATA"}, ID_SQ_PERF_SNAPSHOT_DATA, isGFX942}, - {{"HW_REG_SQ_PERF_SNAPSHOT_DATA1"}, ID_SQ_PERF_SNAPSHOT_DATA1, isGFX942}, - {{"HW_REG_SQ_PERF_SNAPSHOT_PC_LO"}, ID_SQ_PERF_SNAPSHOT_PC_LO, isGFX942}, - {{"HW_REG_SQ_PERF_SNAPSHOT_PC_HI"}, ID_SQ_PERF_SNAPSHOT_PC_HI, isGFX942}, + {{"HW_REG_XCC_ID"}, ID_XCC_ID, isGFX940}, + {{"HW_REG_SQ_PERF_SNAPSHOT_DATA"}, ID_SQ_PERF_SNAPSHOT_DATA, isGFX940}, + {{"HW_REG_SQ_PERF_SNAPSHOT_DATA1"}, ID_SQ_PERF_SNAPSHOT_DATA1, isGFX940}, + {{"HW_REG_SQ_PERF_SNAPSHOT_PC_LO"}, ID_SQ_PERF_SNAPSHOT_PC_LO, isGFX940}, + {{"HW_REG_SQ_PERF_SNAPSHOT_PC_HI"}, ID_SQ_PERF_SNAPSHOT_PC_HI, isGFX940}, // Aliases {{"HW_REG_HW_ID"}, ID_HW_ID1, isGFX10}, diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 97d4210767a11..59afcbed35294 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -542,9 +542,9 @@ bool getMAIIsDGEMM(unsigned Opc) { return Info ? Info->is_dgemm : false; } -bool getMAIIsGFX942XDL(unsigned Opc) { +bool getMAIIsGFX940XDL(unsigned Opc) { const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); - return Info ? Info->is_gfx942_xdl : false; + return Info ? Info->is_gfx940_xdl : false; } uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal) { @@ -2283,8 +2283,8 @@ bool isGFX90A(const MCSubtargetInfo &STI) { return STI.hasFeature(AMDGPU::FeatureGFX90AInsts); } -bool isGFX942(const MCSubtargetInfo &STI) { - return STI.hasFeature(AMDGPU::FeatureGFX942Insts); +bool isGFX940(const MCSubtargetInfo &STI) { + return STI.hasFeature(AMDGPU::FeatureGFX940Insts); } bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 8acb802ffe311..e458b6b9604b6 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -99,7 +99,7 @@ struct GcnBufferFormatInfo { struct MAIInstInfo { uint16_t Opcode; bool is_dgemm; - bool is_gfx942_xdl; + bool is_gfx940_xdl; }; struct MFMA_F8F6F4_Info { @@ -584,7 +584,7 @@ LLVM_READONLY bool getMAIIsDGEMM(unsigned Opc); LLVM_READONLY -bool getMAIIsGFX942XDL(unsigned Opc); +bool getMAIIsGFX940XDL(unsigned Opc); struct CanBeVOPD { bool X; @@ -1363,7 +1363,7 @@ bool isGFX10_BEncoding(const MCSubtargetInfo &STI); bool hasGFX10_3Insts(const MCSubtargetInfo &STI); bool isGFX10_3_GFX11(const MCSubtargetInfo &STI); bool isGFX90A(const MCSubtargetInfo &STI); -bool isGFX942(const MCSubtargetInfo &STI); +bool isGFX940(const MCSubtargetInfo &STI); bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI); bool hasMAIInsts(const MCSubtargetInfo &STI); bool hasVOPD(const MCSubtargetInfo &STI); diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 1a3a7ec52c3b2..a407ae797a48b 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -238,7 +238,7 @@ def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> { let isReMaterializable = 1, isAsCheapAsAMove = 1 in { defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>; -let SubtargetPredicate = isGFX942Plus, SchedRW = [Write64Bit] in +let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>; } // End isMoveImm = 1 @@ -1558,7 +1558,7 @@ multiclass VOP1_OpSel_Real_e32e64_gfx9 op> { defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; -let AssemblerPredicate = isGFX942Plus in +let AssemblerPredicate = isGFX940Plus in defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>; defm V_CVT_F32_BF16 : VOP1_Real_gfx9 <0x5b>; diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 95ff1165a4ce3..900c91731aa1b 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -2367,8 +2367,8 @@ multiclass VOP2_Real_MADK_vi op> { VOP2_MADKe(NAME).Pfl>; } -multiclass VOP2_Real_MADK_gfx942 op> { - def _gfx942 : VOP2_Real(NAME), SIEncodingFamily.GFX942>, +multiclass VOP2_Real_MADK_gfx940 op> { + def _gfx940 : VOP2_Real(NAME), SIEncodingFamily.GFX940>, VOP2_MADKe(NAME).Pfl> { let DecoderNamespace = "GFX9"; } @@ -2668,8 +2668,8 @@ let IsSingle = 1 in { } let SubtargetPredicate = HasFmaakFmamkF32Insts in { -defm V_FMAMK_F32 : VOP2_Real_MADK_gfx942 <0x17>; -defm V_FMAAK_F32 : VOP2_Real_MADK_gfx942 <0x18>; +defm V_FMAMK_F32 : VOP2_Real_MADK_gfx940 <0x17>; +defm V_FMAAK_F32 : VOP2_Real_MADK_gfx940 <0x18>; } multiclass VOP2_Real_DOT_ACC_gfx9 op> : Base_VOP2_Real_e32e64_vi { diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 3824383aeace0..afafc2ecccfaf 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -687,7 +687,7 @@ defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile>; let OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0, @@ -705,7 +705,7 @@ let OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0, // These instructions have non-standard use of op_sel. In particular they are // using op_sel bits 2 and 3 while only having two sources. Therefore dummy // src2 is used to hold the op_sel value. - let Constraints = "$vdst = $src2", DisableEncoding = "$src2", SubtargetPredicate = isGFX942Plus in { + let Constraints = "$vdst = $src2", DisableEncoding = "$src2", SubtargetPredicate = isGFX940Plus in { defm V_CVT_SR_FP8_F32 : VOP3Inst<"v_cvt_sr_fp8_f32", VOP3_CVT_SR_F8_F32_Profile>; defm V_CVT_SR_BF8_F32 : VOP3Inst<"v_cvt_sr_bf8_f32", VOP3_CVT_SR_F8_F32_Profile>; } @@ -734,7 +734,7 @@ foreach Index = [0, -1] in { def : Cvt_PK_F8_F32_Pat; } -let SubtargetPredicate = isGFX942Plus in { +let SubtargetPredicate = isGFX940Plus in { foreach Index = [0, 1, 2, 3] in { def : Cvt_SR_F8_F32_Pat; def : Cvt_SR_F8_F32_Pat; @@ -766,7 +766,7 @@ def : GCNPat< (DivergentBinFrag i32:$src0, IsPow2Plus1:$src1), (V_LSHL_ADD_U32_e64 i32:$src0, (i32 (Log2_32 imm:$src1)), i32:$src0)>; -let SubtargetPredicate = isGFX942Plus in +let SubtargetPredicate = isGFX940Plus in def : GCNPat< (ThreeOpFrag i64:$src0, i32:$src1, i64:$src2), (V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2) diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index b36002bc3d44c..5e825e7259a95 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -883,7 +883,7 @@ class MAIInst(NAME); bit is_dgemm = 0; - bit is_gfx942_xdl = 0; + bit is_gfx940_xdl = 0; let PseudoInstr = NAME; // FIXME: Why is this not the default } @@ -1008,7 +1008,7 @@ defm V_MFMA_F32_16X16X4F32 : MAIInst<"v_mfma_f32_16x16x4f32", "F32_F32_X4", defm V_MFMA_F32_32X32X1F32 : MAIInst<"v_mfma_f32_32x32x1f32", "F32_F32_X32", int_amdgcn_mfma_f32_32x32x1f32>; defm V_MFMA_F32_32X32X2F32 : MAIInst<"v_mfma_f32_32x32x2f32", "F32_F32_X16", int_amdgcn_mfma_f32_32x32x2f32>; -let is_gfx942_xdl = 1 in { +let is_gfx940_xdl = 1 in { defm V_MFMA_F32_4X4X4F16 : MAIInst<"v_mfma_f32_4x4x4f16", "F32_V4F16_X4", int_amdgcn_mfma_f32_4x4x4f16>; defm V_MFMA_I32_4X4X4I8 : MAIInst<"v_mfma_i32_4x4x4i8", "I32_I32_X4", int_amdgcn_mfma_i32_4x4x4i8>; defm V_MFMA_F32_16X16X4F16 : MAIInst<"v_mfma_f32_16x16x4f16", "F32_V4F16_X16", int_amdgcn_mfma_f32_16x16x4f16>; @@ -1029,7 +1029,7 @@ defm V_MFMA_F32_32X32X2BF16 : MAIInst<"v_mfma_f32_32x32x2bf16", "F32_V2I16_X32", defm V_MFMA_F32_32X32X4BF16 : MAIInst<"v_mfma_f32_32x32x4bf16", "F32_V2I16_X16", int_amdgcn_mfma_f32_32x32x4bf16>; } -let SubtargetPredicate = HasGFX950Insts, is_gfx942_xdl = 1 in { +let SubtargetPredicate = HasGFX950Insts, is_gfx940_xdl = 1 in { defm V_MFMA_F32_16X16X32_F16 : MAIInst<"v_mfma_f32_16x16x32f16", "F32_V8F16_X32", int_amdgcn_mfma_f32_16x16x32_f16>; defm V_MFMA_F32_32X32X16_F16 : MAIInst<"v_mfma_f32_32x32x16f16", "F32_V8F16_X16", int_amdgcn_mfma_f32_32x32x16_f16>; defm V_MFMA_F32_16X16X32_BF16 : MAIInst<"v_mfma_f32_16x16x32bf16", "F32_V8BF16_X4", int_amdgcn_mfma_f32_16x16x32_bf16>; @@ -1057,7 +1057,7 @@ defm V_MFMA_LD_SCALE_B32 : VOP3PInst<"v_mfma_ld_scale_b32", VOP_MFMA_LD_SCALE>; } let SubtargetPredicate = isGFX90APlus in { - let is_gfx942_xdl = 1 in { + let is_gfx940_xdl = 1 in { defm V_MFMA_F32_32X32X4BF16_1K : MAIInst<"v_mfma_f32_32x32x4bf16_1k", "F32_V4I16_X32", int_amdgcn_mfma_f32_32x32x4bf16_1k>; defm V_MFMA_F32_16X16X4BF16_1K : MAIInst<"v_mfma_f32_16x16x4bf16_1k", "F32_V4I16_X16", int_amdgcn_mfma_f32_16x16x4bf16_1k>; defm V_MFMA_F32_4X4X4BF16_1K : MAIInst<"v_mfma_f32_4x4x4bf16_1k", "F32_V4I16_X4", int_amdgcn_mfma_f32_4x4x4bf16_1k>; @@ -1071,17 +1071,17 @@ let SubtargetPredicate = isGFX90APlus in { } } // End SubtargetPredicate = isGFX90APlus -let SubtargetPredicate = isGFX942Plus, is_gfx942_xdl = 1 in { +let SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1 in { defm V_MFMA_I32_32X32X16I8 : MAIInst<"v_mfma_i32_32x32x16i8", "I32_I64_X32", int_amdgcn_mfma_i32_32x32x16_i8>; defm V_MFMA_I32_16X16X32I8 : MAIInst<"v_mfma_i32_16x16x32i8", "I32_I64_X16", int_amdgcn_mfma_i32_16x16x32_i8>; -} // End SubtargetPredicate = isGFX942Plus, is_gfx942_xdl = 1 +} // End SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1 -let SubtargetPredicate = HasXF32Insts, is_gfx942_xdl = 1 in { +let SubtargetPredicate = HasXF32Insts, is_gfx940_xdl = 1 in { defm V_MFMA_F32_16X16X8XF32 : MAIInst<"v_mfma_f32_16x16x8xf32", "F32_V2F32_X16", int_amdgcn_mfma_f32_16x16x8_xf32>; defm V_MFMA_F32_32X32X4XF32 : MAIInst<"v_mfma_f32_32x32x4xf32", "F32_V2F32_X32", int_amdgcn_mfma_f32_32x32x4_xf32>; -} // End SubtargetPredicate = HasXF32Insts, is_gfx942_xdl = 1 +} // End SubtargetPredicate = HasXF32Insts, is_gfx940_xdl = 1 -let SubtargetPredicate = HasFP8Insts, is_gfx942_xdl = 1 in { +let SubtargetPredicate = HasFP8Insts, is_gfx940_xdl = 1 in { defm V_MFMA_F32_16X16X32_BF8_BF8 : MAIInst<"v_mfma_f32_16x16x32_bf8_bf8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_bf8_bf8>; defm V_MFMA_F32_16X16X32_BF8_FP8 : MAIInst<"v_mfma_f32_16x16x32_bf8_fp8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_bf8_fp8>; defm V_MFMA_F32_16X16X32_FP8_BF8 : MAIInst<"v_mfma_f32_16x16x32_fp8_bf8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_fp8_bf8>; @@ -1090,16 +1090,16 @@ let SubtargetPredicate = HasFP8Insts, is_gfx942_xdl = 1 in { defm V_MFMA_F32_32X32X16_BF8_FP8 : MAIInst<"v_mfma_f32_32x32x16_bf8_fp8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_bf8_fp8>; defm V_MFMA_F32_32X32X16_FP8_BF8 : MAIInst<"v_mfma_f32_32x32x16_fp8_bf8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_fp8_bf8>; defm V_MFMA_F32_32X32X16_FP8_FP8 : MAIInst<"v_mfma_f32_32x32x16_fp8_fp8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_fp8_fp8>; -} // End SubtargetPredicate = HasFP8Insts, is_gfx942_xdl = 1 +} // End SubtargetPredicate = HasFP8Insts, is_gfx940_xdl = 1 multiclass SMFMACInst { let Constraints = "$vdst = $src2", DisableEncoding = "$src2", - isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1, is_gfx942_xdl = 1 in { + isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1, is_gfx940_xdl = 1 in { def _e64 : MAIInst("VOPProfileSMFMAC_" # P), node>; } } -let SubtargetPredicate = isGFX942Plus in { +let SubtargetPredicate = isGFX940Plus in { defm V_SMFMAC_F32_16X16X32_F16 : SMFMACInst<"v_smfmac_f32_16x16x32_f16", "F32_16X16X32_F16", int_amdgcn_smfmac_f32_16x16x32_f16>; defm V_SMFMAC_F32_32X32X16_F16 : SMFMACInst<"v_smfmac_f32_32x32x16_f16", "F32_32X32X16_F16", int_amdgcn_smfmac_f32_32x32x16_f16>; defm V_SMFMAC_F32_16X16X32_BF16 : SMFMACInst<"v_smfmac_f32_16x16x32_bf16", "F32_16X16X32_I16", int_amdgcn_smfmac_f32_16x16x32_bf16>; @@ -1108,7 +1108,7 @@ defm V_SMFMAC_I32_16X16X64_I8 : SMFMACInst<"v_smfmac_i32_16x16x64_i8", defm V_SMFMAC_I32_32X32X32_I8 : SMFMACInst<"v_smfmac_i32_32x32x32_i8", "I32_32X32X32_I8", int_amdgcn_smfmac_i32_32x32x32_i8>; } -let SubtargetPredicate = HasFP8Insts, is_gfx942_xdl = 1 in { +let SubtargetPredicate = HasFP8Insts, is_gfx940_xdl = 1 in { defm V_SMFMAC_F32_16X16X64_BF8_BF8 : SMFMACInst<"v_smfmac_f32_16x16x64_bf8_bf8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_bf8_bf8>; defm V_SMFMAC_F32_16X16X64_BF8_FP8 : SMFMACInst<"v_smfmac_f32_16x16x64_bf8_fp8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_bf8_fp8>; defm V_SMFMAC_F32_16X16X64_FP8_BF8 : SMFMACInst<"v_smfmac_f32_16x16x64_fp8_bf8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_fp8_bf8>; @@ -1117,7 +1117,7 @@ defm V_SMFMAC_F32_32X32X32_BF8_BF8 : SMFMACInst<"v_smfmac_f32_32x32x32_bf8_bf8", defm V_SMFMAC_F32_32X32X32_BF8_FP8 : SMFMACInst<"v_smfmac_f32_32x32x32_bf8_fp8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_bf8_fp8>; defm V_SMFMAC_F32_32X32X32_FP8_BF8 : SMFMACInst<"v_smfmac_f32_32x32x32_fp8_bf8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_fp8_bf8>; defm V_SMFMAC_F32_32X32X32_FP8_FP8 : SMFMACInst<"v_smfmac_f32_32x32x32_fp8_fp8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_fp8_fp8>; -} // End SubtargetPredicate = HasFP8Insts, is_gfx942_xdl = 1 +} // End SubtargetPredicate = HasFP8Insts, is_gfx940_xdl = 1 let SubtargetPredicate = HasGFX950Insts in { defm V_SMFMAC_F32_16X16X64_F16 : SMFMACInst<"v_smfmac_f32_16x16x64_f16", "F32_16X16X64_F16", int_amdgcn_smfmac_f32_16x16x64_f16>; @@ -1140,7 +1140,7 @@ def MAIInstInfoTable : GenericTable { let FilterClass = "MAIInst"; let CppTypeName = "MAIInstInfo"; let Fields = [ - "Opcode", "is_dgemm", "is_gfx942_xdl" + "Opcode", "is_dgemm", "is_gfx940_xdl" ]; let PrimaryKey = ["Opcode"]; @@ -1950,7 +1950,7 @@ multiclass VOP3P_Real_MFMA_gfx90a op> { } } -multiclass VOP3P_Real_MFMA_gfx942_aliases(Op # "_e64"), VOP3_Pseudo PS_VCD = !cast(Op # "_vgprcd" # "_e64"), VOPProfile Pfl_ACD = PS_ACD.Pfl, @@ -1959,59 +1959,59 @@ multiclass VOP3P_Real_MFMA_gfx942_aliases(Op # "_gfx942_acd") Pfl_ACD.DstRC:$vdst, + (!cast(Op # "_gfx940_acd") Pfl_ACD.DstRC:$vdst, Pfl_ACD.Src0RC64:$src0, Pfl_ACD.Src1RC64:$src1, Pfl_ACD.Src2RC64:$src2, CBSZ:$cbsz, ABID:$abid, blgp:$blgp)>, PredicateControl; def : InstAlias (Op # "_gfx942_vcd") Pfl_VCD.DstRC:$vdst, + (!cast(Op # "_gfx940_vcd") Pfl_VCD.DstRC:$vdst, Pfl_VCD.Src0RC64:$src0, Pfl_VCD.Src1RC64:$src1, Pfl_VCD.Src2RC64:$src2, CBSZ:$cbsz, ABID:$abid, blgp:$blgp)>, PredicateControl; } } } -multiclass VOP3P_Real_MFMA_gfx942 op, string Name = !cast(NAME#"_e64").Mnemonic, +multiclass VOP3P_Real_MFMA_gfx940 op, string Name = !cast(NAME#"_e64").Mnemonic, VOP3_Pseudo PS_ACD = !cast(NAME # "_e64"), VOP3_Pseudo PS_VCD = !cast(NAME # "_vgprcd" # "_e64")> { - let AssemblerPredicate = isGFX942Plus, - DecoderNamespace = "GFX942", + let AssemblerPredicate = isGFX940Plus, + DecoderNamespace = "GFX940", AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in { - def _gfx942_acd : VOP3P_Real, + def _gfx940_acd : VOP3P_Real, VOP3Pe_MAI ; - def _gfx942_vcd : VOP3P_Real, + def _gfx940_vcd : VOP3P_Real, VOP3Pe_MAI ; - } // End AssemblerPredicate = isGFX942Plus, DecoderNamespace = "GFX942" + } // End AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX940" let SubtargetPredicate = PS_ACD.SubtargetPredicate, OtherPredicates = PS_ACD.OtherPredicates, - AssemblerPredicate = isGFX942Plus + AssemblerPredicate = isGFX940Plus in { - defm : VOP3P_Real_MFMA_gfx942_aliases; + defm : VOP3P_Real_MFMA_gfx940_aliases; if !ne(!subst("_1k", "", PS_ACD.Mnemonic), PS_ACD.Mnemonic) then - defm : VOP3P_Real_MFMA_gfx942_aliases; + defm : VOP3P_Real_MFMA_gfx940_aliases; } } -multiclass VOP3P_Real_MFMA_F8F6F4_gfx942 op, string Name = !cast(NAME#"_e64").Mnemonic, +multiclass VOP3P_Real_MFMA_F8F6F4_gfx940 op, string Name = !cast(NAME#"_e64").Mnemonic, VOP3_Pseudo PS_ACD = !cast(NAME # "_e64"), VOP3_Pseudo PS_VCD = !cast(NAME # "_vgprcd" # "_e64")> { defvar F8F8Name = !substr(NAME, 0, !sub(!size(NAME), !size("_fN_fM")))#"_f8_f8"; - let AssemblerPredicate = isGFX942Plus, - DecoderNamespace = "GFX942", + let AssemblerPredicate = isGFX940Plus, + DecoderNamespace = "GFX940", AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in { - def _gfx942_acd : VOP3P_Real, + def _gfx940_acd : VOP3P_Real, VOP3Pe_MAI , - MFMA_F8F6F4_WithSizeTable_Helper; + MFMA_F8F6F4_WithSizeTable_Helper; - def _gfx942_vcd : VOP3P_Real, + def _gfx940_vcd : VOP3P_Real, VOP3Pe_MAI , - MFMA_F8F6F4_WithSizeTable_Helper; - } // End AssemblerPredicate = isGFX942Plus, DecoderNamespace = "GFX942" + MFMA_F8F6F4_WithSizeTable_Helper; + } // End AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX940" } multiclass VOP3P_Real_MFMA_gfx950 op, string Name = !cast(NAME#"_e64").Mnemonic, @@ -2019,23 +2019,23 @@ multiclass VOP3P_Real_MFMA_gfx950 op, string Name = !cast(N VOP3_Pseudo PS_VCD = !cast(NAME # "_vgprcd" # "_e64")> { let SubtargetPredicate = HasGFX950Insts, AssemblerPredicate = HasGFX950Insts in { - defm "" : VOP3P_Real_MFMA_gfx942; + defm "" : VOP3P_Real_MFMA_gfx940; } } multiclass VOP3P_Real_MFMA_F8F6F4_gfx950_mc op, string Name> { - defm _f8_f8 : VOP3P_Real_MFMA_F8F6F4_gfx942; + defm _f8_f8 : VOP3P_Real_MFMA_F8F6F4_gfx940; let isAsmParserOnly = true in { // Disable ambiguous disassembly. - defm _f8_f6 : VOP3P_Real_MFMA_F8F6F4_gfx942; - defm _f6_f8 : VOP3P_Real_MFMA_F8F6F4_gfx942; - defm _f8_f4 : VOP3P_Real_MFMA_F8F6F4_gfx942; - defm _f4_f8 : VOP3P_Real_MFMA_F8F6F4_gfx942; - defm _f6_f6 : VOP3P_Real_MFMA_F8F6F4_gfx942; - defm _f6_f4 : VOP3P_Real_MFMA_F8F6F4_gfx942; - defm _f4_f6 : VOP3P_Real_MFMA_F8F6F4_gfx942; - defm _f4_f4 : VOP3P_Real_MFMA_F8F6F4_gfx942; + defm _f8_f6 : VOP3P_Real_MFMA_F8F6F4_gfx940; + defm _f6_f8 : VOP3P_Real_MFMA_F8F6F4_gfx940; + defm _f8_f4 : VOP3P_Real_MFMA_F8F6F4_gfx940; + defm _f4_f8 : VOP3P_Real_MFMA_F8F6F4_gfx940; + defm _f6_f6 : VOP3P_Real_MFMA_F8F6F4_gfx940; + defm _f6_f4 : VOP3P_Real_MFMA_F8F6F4_gfx940; + defm _f4_f6 : VOP3P_Real_MFMA_F8F6F4_gfx940; + defm _f4_f4 : VOP3P_Real_MFMA_F8F6F4_gfx940; } } @@ -2046,15 +2046,15 @@ multiclass VOP3PX_Real_ScaledMFMA op> { defvar F8F8Name = !substr(NAME, 0, !sub(!size(NAME), !size("_fN_fM")))#"_f8_f8"; let SubtargetPredicate = HasGFX950Insts, - DecoderNamespace = "GFX942", + DecoderNamespace = "GFX940", AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in { - def _gfx942_acd : VOP3P_Real, + def _gfx940_acd : VOP3P_Real, VOP3PXe , - MFMA_F8F6F4_WithSizeTable_Helper; + MFMA_F8F6F4_WithSizeTable_Helper; - def _gfx942_vcd : VOP3P_Real, + def _gfx940_vcd : VOP3P_Real, VOP3PXe , - MFMA_F8F6F4_WithSizeTable_Helper; + MFMA_F8F6F4_WithSizeTable_Helper; } } @@ -2087,18 +2087,18 @@ multiclass VOP3P_Real_MFMA_vi_gfx90a op> : VOP3P_Real_MFMA_gfx90a , VOP3P_Real_MFMA_vi ; -multiclass VOP3P_Real_MFMA op, string GFX942Name = !cast(NAME#"_e64").Mnemonic> : +multiclass VOP3P_Real_MFMA op, string GFX940Name = !cast(NAME#"_e64").Mnemonic> : VOP3P_Real_MFMA_vi_gfx90a , - VOP3P_Real_MFMA_gfx942 ; + VOP3P_Real_MFMA_gfx940 ; multiclass VOP3P_Real_SMFMAC op, string alias> { - def _gfx942 : VOP3P_Real(NAME#"_e64"), SIEncodingFamily.VI>, + def _gfx940 : VOP3P_Real(NAME#"_e64"), SIEncodingFamily.VI>, VOP3Pe_SMFMAC { - let AssemblerPredicate = isGFX942Plus; + let AssemblerPredicate = isGFX940Plus; let DecoderNamespace = "GFX8"; } def : AMDGPUMnemonicAlias(NAME#"_e64").Mnemonic> { - let AssemblerPredicate = isGFX942Plus; + let AssemblerPredicate = isGFX940Plus; } } @@ -2196,28 +2196,28 @@ defm V_MFMA_F32_32X32X64_F8F6F4 : VOP3P_Real_MFMA_F8F6F4_gfx950_mc <0x2e, "v_mf defm V_MFMA_SCALE_F32_32X32X64_F8F6F4 : VOP3PX_Real_ScaledMFMA_F8F6F4_mc <0x2e>; defm V_DOT2_F32_BF16 : VOP3P_Real_vi<0x1a>; -defm V_MFMA_I32_32X32X16I8 : VOP3P_Real_MFMA_gfx942 <0x56, "v_mfma_i32_32x32x16_i8">; -defm V_MFMA_I32_16X16X32I8 : VOP3P_Real_MFMA_gfx942 <0x57, "v_mfma_i32_16x16x32_i8">; -defm V_MFMA_F32_16X16X8XF32 : VOP3P_Real_MFMA_gfx942 <0x3e, "v_mfma_f32_16x16x8_xf32">; -defm V_MFMA_F32_32X32X4XF32 : VOP3P_Real_MFMA_gfx942 <0x3f, "v_mfma_f32_32x32x4_xf32">; - -defm V_MFMA_F32_16X16X32_BF8_BF8 : VOP3P_Real_MFMA_gfx942 <0x70>; -defm V_MFMA_F32_16X16X32_BF8_FP8 : VOP3P_Real_MFMA_gfx942 <0x71>; -defm V_MFMA_F32_16X16X32_FP8_BF8 : VOP3P_Real_MFMA_gfx942 <0x72>; -defm V_MFMA_F32_16X16X32_FP8_FP8 : VOP3P_Real_MFMA_gfx942 <0x73>; -defm V_MFMA_F32_32X32X16_BF8_BF8 : VOP3P_Real_MFMA_gfx942 <0x74>; -defm V_MFMA_F32_32X32X16_BF8_FP8 : VOP3P_Real_MFMA_gfx942 <0x75>; -defm V_MFMA_F32_32X32X16_FP8_BF8 : VOP3P_Real_MFMA_gfx942 <0x76>; -defm V_MFMA_F32_32X32X16_FP8_FP8 : VOP3P_Real_MFMA_gfx942 <0x77>; - -defm V_MFMA_F32_32X32X4BF16_1K : VOP3P_Real_MFMA_gfx942 <0x5d, "v_mfma_f32_32x32x4_2b_bf16">; -defm V_MFMA_F32_16X16X4BF16_1K : VOP3P_Real_MFMA_gfx942 <0x5e, "v_mfma_f32_16x16x4_4b_bf16">; -defm V_MFMA_F32_4X4X4BF16_1K : VOP3P_Real_MFMA_gfx942 <0x5f, "v_mfma_f32_4x4x4_16b_bf16">; -defm V_MFMA_F32_32X32X8BF16_1K : VOP3P_Real_MFMA_gfx942 <0x60, "v_mfma_f32_32x32x8_bf16">; -defm V_MFMA_F32_16X16X16BF16_1K : VOP3P_Real_MFMA_gfx942 <0x61, "v_mfma_f32_16x16x16_bf16">; - -defm V_MFMA_F64_16X16X4F64 : VOP3P_Real_MFMA_gfx942 <0x6e, "v_mfma_f64_16x16x4_f64">; -defm V_MFMA_F64_4X4X4F64 : VOP3P_Real_MFMA_gfx942 <0x6f, "v_mfma_f64_4x4x4_4b_f64">; +defm V_MFMA_I32_32X32X16I8 : VOP3P_Real_MFMA_gfx940 <0x56, "v_mfma_i32_32x32x16_i8">; +defm V_MFMA_I32_16X16X32I8 : VOP3P_Real_MFMA_gfx940 <0x57, "v_mfma_i32_16x16x32_i8">; +defm V_MFMA_F32_16X16X8XF32 : VOP3P_Real_MFMA_gfx940 <0x3e, "v_mfma_f32_16x16x8_xf32">; +defm V_MFMA_F32_32X32X4XF32 : VOP3P_Real_MFMA_gfx940 <0x3f, "v_mfma_f32_32x32x4_xf32">; + +defm V_MFMA_F32_16X16X32_BF8_BF8 : VOP3P_Real_MFMA_gfx940 <0x70>; +defm V_MFMA_F32_16X16X32_BF8_FP8 : VOP3P_Real_MFMA_gfx940 <0x71>; +defm V_MFMA_F32_16X16X32_FP8_BF8 : VOP3P_Real_MFMA_gfx940 <0x72>; +defm V_MFMA_F32_16X16X32_FP8_FP8 : VOP3P_Real_MFMA_gfx940 <0x73>; +defm V_MFMA_F32_32X32X16_BF8_BF8 : VOP3P_Real_MFMA_gfx940 <0x74>; +defm V_MFMA_F32_32X32X16_BF8_FP8 : VOP3P_Real_MFMA_gfx940 <0x75>; +defm V_MFMA_F32_32X32X16_FP8_BF8 : VOP3P_Real_MFMA_gfx940 <0x76>; +defm V_MFMA_F32_32X32X16_FP8_FP8 : VOP3P_Real_MFMA_gfx940 <0x77>; + +defm V_MFMA_F32_32X32X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5d, "v_mfma_f32_32x32x4_2b_bf16">; +defm V_MFMA_F32_16X16X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5e, "v_mfma_f32_16x16x4_4b_bf16">; +defm V_MFMA_F32_4X4X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5f, "v_mfma_f32_4x4x4_16b_bf16">; +defm V_MFMA_F32_32X32X8BF16_1K : VOP3P_Real_MFMA_gfx940 <0x60, "v_mfma_f32_32x32x8_bf16">; +defm V_MFMA_F32_16X16X16BF16_1K : VOP3P_Real_MFMA_gfx940 <0x61, "v_mfma_f32_16x16x16_bf16">; + +defm V_MFMA_F64_16X16X4F64 : VOP3P_Real_MFMA_gfx940 <0x6e, "v_mfma_f64_16x16x4_f64">; +defm V_MFMA_F64_4X4X4F64 : VOP3P_Real_MFMA_gfx940 <0x6f, "v_mfma_f64_4x4x4_4b_f64">; defm V_SMFMAC_F32_16X16X32_F16 : VOP3P_Real_SMFMAC <0x62, "v_smfmac_f32_16x16x32f16">; defm V_SMFMAC_F32_32X32X16_F16 : VOP3P_Real_SMFMAC <0x64, "v_smfmac_f32_32x32x16f16">; diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index 13cb0d6461eb8..8731a16b88a5c 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -358,7 +358,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["gfx8-insts"] = true; Features["gfx9-insts"] = true; Features["gfx90a-insts"] = true; - Features["gfx942-insts"] = true; + Features["gfx940-insts"] = true; Features["gfx950-insts"] = true; Features["gfx10-insts"] = true; Features["gfx10-3-insts"] = true; @@ -508,7 +508,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["xf32-insts"] = true; [[fallthrough]]; case GK_GFX9_4_GENERIC: - Features["gfx942-insts"] = true; + Features["gfx940-insts"] = true; Features["atomic-ds-pk-add-16-insts"] = true; Features["atomic-flat-pk-add-16-insts"] = true; Features["atomic-global-pk-add-bf16-inst"] = true;