Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMDGPU] Replace gfx940 and gfx941 with gfx942 in llvm #126763

Open
wants to merge 2 commits into
base: users/ritter-x2a/rm-gfx940-gfx941-clang
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions clang/test/Misc/target-invalid-cpu-note/amdgcn.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@
// CHECK-SAME: {{^}}, gfx909
// CHECK-SAME: {{^}}, gfx90a
// CHECK-SAME: {{^}}, gfx90c
// CHECK-SAME: {{^}}, gfx940
// CHECK-SAME: {{^}}, gfx941
// CHECK-SAME: {{^}}, gfx942
// CHECK-SAME: {{^}}, gfx950
// CHECK-SAME: {{^}}, gfx1010
Expand Down
4 changes: 2 additions & 2 deletions llvm/docs/AMDGPUUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2221,7 +2221,7 @@ The AMDGPU backend uses the following ELF header:
``EF_AMDGPU_MACH_AMDGCN_GFX1035`` 0x03d ``gfx1035``
``EF_AMDGPU_MACH_AMDGCN_GFX1034`` 0x03e ``gfx1034``
``EF_AMDGPU_MACH_AMDGCN_GFX90A`` 0x03f ``gfx90a``
``EF_AMDGPU_MACH_AMDGCN_GFX940`` 0x040 ``gfx940``
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could keep this documented

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kzhuravl asked me to do it this way, consistently to the other reserved entries

*reserved* 0x040 Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX1100`` 0x041 ``gfx1100``
``EF_AMDGPU_MACH_AMDGCN_GFX1013`` 0x042 ``gfx1013``
``EF_AMDGPU_MACH_AMDGCN_GFX1150`` 0x043 ``gfx1150``
Expand All @@ -2232,7 +2232,7 @@ The AMDGPU backend uses the following ELF header:
``EF_AMDGPU_MACH_AMDGCN_GFX1200`` 0x048 ``gfx1200``
*reserved* 0x049 Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX1151`` 0x04a ``gfx1151``
``EF_AMDGPU_MACH_AMDGCN_GFX941`` 0x04b ``gfx941``
*reserved* 0x04b Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942``
*reserved* 0x04d Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
Expand Down
4 changes: 2 additions & 2 deletions llvm/include/llvm/BinaryFormat/ELF.h
Original file line number Diff line number Diff line change
Expand Up @@ -814,7 +814,7 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX1035 = 0x03d,
EF_AMDGPU_MACH_AMDGCN_GFX1034 = 0x03e,
EF_AMDGPU_MACH_AMDGCN_GFX90A = 0x03f,
EF_AMDGPU_MACH_AMDGCN_GFX940 = 0x040,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X40 = 0x040,
EF_AMDGPU_MACH_AMDGCN_GFX1100 = 0x041,
EF_AMDGPU_MACH_AMDGCN_GFX1013 = 0x042,
EF_AMDGPU_MACH_AMDGCN_GFX1150 = 0x043,
Expand All @@ -825,7 +825,7 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX1200 = 0x048,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X49 = 0x049,
EF_AMDGPU_MACH_AMDGCN_GFX1151 = 0x04a,
EF_AMDGPU_MACH_AMDGCN_GFX941 = 0x04b,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4B = 0x04b,
EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D = 0x04d,
EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
Expand Down
56 changes: 28 additions & 28 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Large diffs are not rendered by default.

2 changes: 0 additions & 2 deletions llvm/include/llvm/TargetParser/TargetParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,6 @@ enum GPUKind : uint32_t {
GK_GFX909 = 65,
GK_GFX90A = 66,
GK_GFX90C = 67,
GK_GFX940 = 68,
GK_GFX941 = 69,
GK_GFX942 = 70,
GK_GFX950 = 71,

Expand Down
4 changes: 0 additions & 4 deletions llvm/lib/Object/ELFObjectFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -545,10 +545,6 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
return "gfx90a";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C:
return "gfx90c";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940:
return "gfx940";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941:
return "gfx941";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942:
return "gfx942";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950:
Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/ObjectYAML/ELFYAML.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -609,8 +609,6 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90C, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX940, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX941, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX942, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX950, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
Expand Down
22 changes: 0 additions & 22 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1619,28 +1619,6 @@ def FeatureISAVersion9_5_Common : FeatureSet<
FeatureAtomicBufferPkAddBF16Inst
])>;

def FeatureISAVersion9_4_0 : FeatureSet<
!listconcat(FeatureISAVersion9_4_Common.Features,
[
FeatureAddressableLocalMemorySize65536,
FeatureForceStoreSC0SC1,
ritter-x2a marked this conversation as resolved.
Show resolved Hide resolved
FeatureFP8Insts,
FeatureFP8ConversionInsts,
FeatureCvtFP8VOP1Bug,
FeatureXF32Insts
])>;

def FeatureISAVersion9_4_1 : FeatureSet<
!listconcat(FeatureISAVersion9_4_Common.Features,
[
FeatureAddressableLocalMemorySize65536,
FeatureForceStoreSC0SC1,
FeatureFP8Insts,
FeatureFP8ConversionInsts,
FeatureCvtFP8VOP1Bug,
FeatureXF32Insts
])>;

def FeatureISAVersion9_4_2 : FeatureSet<
!listconcat(FeatureISAVersion9_4_Common.Features,
[
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4295,7 +4295,7 @@ AMDGPUInstructionSelector::selectVOP3PModsImpl(
// TODO: Handle G_FSUB 0 as fneg

// TODO: Match op_sel through g_build_vector_trunc and g_shuffle_vector.
(void)IsDOT; // DOTs do not use OPSEL on gfx940+, check ST.hasDOTOpSelHazard()
(void)IsDOT; // DOTs do not use OPSEL on gfx942+, check ST.hasDOTOpSelHazard()

// Packed instructions do not have abs modifiers.
Mods |= SISrcMods::OP_SEL_1;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/DSInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1773,7 +1773,7 @@ def DS_READ_B128_vi : DS_Real_vi<0xff, DS_READ_B128>;
def DS_ADD_F64_vi : DS_Real_vi<0x5c, DS_ADD_F64>;
def DS_ADD_RTN_F64_vi : DS_Real_vi<0x7c, DS_ADD_RTN_F64>;

// GFX940+.
// GFX942+.
def DS_PK_ADD_F16_vi : DS_Real_vi<0x17, DS_PK_ADD_F16>;
def DS_PK_ADD_RTN_F16_vi : DS_Real_vi<0xb7, DS_PK_ADD_RTN_F16>;
def DS_PK_ADD_BF16_vi : DS_Real_vi<0x18, DS_PK_ADD_BF16>;
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/FLATInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -802,7 +802,7 @@ defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax",

} // End SubtargetPredicate = isGFX7GFX10GFX11

// GFX940-, GFX11-only flat instructions.
// GFX942-, GFX11-only flat instructions.
let SubtargetPredicate = HasFlatAtomicFaddF32Inst in {
defm FLAT_ATOMIC_ADD_F32 : FLAT_Atomic_Pseudo<"flat_atomic_add_f32", VGPR_32, f32>;
} // End SubtargetPredicate = HasFlatAtomicFaddF32Inst
Expand Down Expand Up @@ -2046,7 +2046,7 @@ defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x1e>;
defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x1f>;

let SubtargetPredicate = isGFX8GFX9NotGFX940 in {
// These instructions are encoded differently on gfx90* and gfx940.
// These instructions are encoded differently on gfx90* and gfx94*.
defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d, 0>;
defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>;
}
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2292,7 +2292,7 @@ GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses) {

static int GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses,
bool IsGFX950) {
// xdl def cycles | gfx940 | gfx950
// xdl def cycles | gfx942 | gfx950
// 2 pass | 5 5
// 4 pass | 7 8
// 8 pass | 11 12
Expand Down Expand Up @@ -2600,7 +2600,7 @@ static int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) {

static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses,
bool IsGFX950) {
// xdl def cycles | gfx940 | gfx950
// xdl def cycles | gfx942 | gfx950
// 2 pass | 5 5
// 4 pass | 7 8
// 8 pass | 11 12
Expand All @@ -2610,7 +2610,7 @@ static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses,

static int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses,
bool IsGFX950) {
// xdl def cycles | gfx940 | gfx950
// xdl def cycles | gfx942 | gfx950
// 2 pass | 5 5
// 4 pass | 7 8
// 8 pass | 11 12
Expand Down
14 changes: 3 additions & 11 deletions llvm/lib/Target/AMDGPU/GCNProcessors.td
Original file line number Diff line number Diff line change
Expand Up @@ -192,15 +192,7 @@ def : ProcessorModel<"gfx90c", SIQuarterSpeedModel,
FeatureISAVersion9_0_C.Features
>;

def : ProcessorModel<"gfx940", SIDPGFX940FullSpeedModel,
FeatureISAVersion9_4_0.Features
>;

def : ProcessorModel<"gfx941", SIDPGFX940FullSpeedModel,
FeatureISAVersion9_4_1.Features
>;

def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel,
def : ProcessorModel<"gfx942", SIDPGFX942FullSpeedModel,
FeatureISAVersion9_4_2.Features
>;

Expand All @@ -213,8 +205,8 @@ def : ProcessorModel<"gfx9-generic", SIQuarterSpeedModel,
FeatureISAVersion9_Generic.Features
>;

// [gfx940, gfx941, gfx942]
def : ProcessorModel<"gfx9-4-generic", SIDPGFX940FullSpeedModel,
// [gfx942]
def : ProcessorModel<"gfx9-4-generic", SIDPGFX942FullSpeedModel,
FeatureISAVersion9_4_Generic.Features
>;

Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -1297,11 +1297,11 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,

bool hasPackedTID() const { return HasPackedTID; }

// GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that
// GFX94* is a derivation to GFX90A. hasGFX940Insts() being true implies that
// hasGFX90AInsts is also true.
bool hasGFX940Insts() const { return GFX940Insts; }

// GFX950 is a derivation to GFX940. hasGFX950Insts() implies that
// GFX950 is a derivation to GFX94*. hasGFX950Insts() implies that
// hasGFX940Insts and hasGFX90AInsts are also true.
bool hasGFX950Insts() const { return GFX950Insts; }

Expand Down
4 changes: 0 additions & 4 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,6 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: AK = GK_GFX941; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950: AK = GK_GFX950; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
Expand Down Expand Up @@ -180,8 +178,6 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A;
case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C;
case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940;
case GK_GFX941: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX941;
case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942;
case GK_GFX950: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX950;
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIDefines.h
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,7 @@ enum Id { // HwRegCode, (6) [5:0]
ID_EXCP_FLAG_USER = 18,
ID_TRAP_CTRL = 19,

// GFX940 specific registers
// GFX94* specific registers
ID_XCC_ID = 20,
ID_SQ_PERF_SNAPSHOT_DATA = 21,
ID_SQ_PERF_SNAPSHOT_DATA1 = 22,
Expand Down
20 changes: 10 additions & 10 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16825,39 +16825,39 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
// safe. The message phrasing also should be better.
if (globalMemoryFPAtomicIsLegal(*Subtarget, RMW, HasSystemScope)) {
if (AS == AMDGPUAS::FLAT_ADDRESS) {
// gfx940, gfx12
// gfx942, gfx12
if (Subtarget->hasAtomicFlatPkAdd16Insts() && isV2F16OrV2BF16(Ty))
return ReportUnsafeHWInst(AtomicExpansionKind::None);
} else if (AMDGPU::isExtendedGlobalAddrSpace(AS)) {
// gfx90a, gfx940, gfx12
// gfx90a, gfx942, gfx12
if (Subtarget->hasAtomicBufferGlobalPkAddF16Insts() && isV2F16(Ty))
return ReportUnsafeHWInst(AtomicExpansionKind::None);

// gfx940, gfx12
// gfx942, gfx12
if (Subtarget->hasAtomicGlobalPkAddBF16Inst() && isV2BF16(Ty))
return ReportUnsafeHWInst(AtomicExpansionKind::None);
} else if (AS == AMDGPUAS::BUFFER_FAT_POINTER) {
// gfx90a, gfx940, gfx12
// gfx90a, gfx942, gfx12
if (Subtarget->hasAtomicBufferGlobalPkAddF16Insts() && isV2F16(Ty))
return ReportUnsafeHWInst(AtomicExpansionKind::None);

// While gfx90a/gfx940 supports v2bf16 for global/flat, it does not for
// While gfx90a/gfx942 supports v2bf16 for global/flat, it does not for
// buffer. gfx12 does have the buffer version.
if (Subtarget->hasAtomicBufferPkAddBF16Inst() && isV2BF16(Ty))
return ReportUnsafeHWInst(AtomicExpansionKind::None);
}

// global and flat atomic fadd f64: gfx90a, gfx940.
// global and flat atomic fadd f64: gfx90a, gfx942.
if (Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst() && Ty->isDoubleTy())
return ReportUnsafeHWInst(AtomicExpansionKind::None);

if (AS != AMDGPUAS::FLAT_ADDRESS) {
if (Ty->isFloatTy()) {
// global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx940,
// global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx942,
// gfx11+.
if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
return ReportUnsafeHWInst(AtomicExpansionKind::None);
// global/buffer atomic fadd f32 rtn: gfx90a, gfx940, gfx11+.
// global/buffer atomic fadd f32 rtn: gfx90a, gfx942, gfx11+.
if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
return ReportUnsafeHWInst(AtomicExpansionKind::None);
} else {
Expand All @@ -16869,7 +16869,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
}
}

// flat atomic fadd f32: gfx940, gfx11+.
// flat atomic fadd f32: gfx942, gfx11+.
if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) {
if (Subtarget->hasFlatAtomicFaddF32Inst())
return ReportUnsafeHWInst(AtomicExpansionKind::None);
Expand Down Expand Up @@ -16908,7 +16908,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
// float, double restored in gfx10.
// double removed again in gfx11, so only f32 for gfx11/gfx12.
//
// For gfx9, gfx90a and gfx940 support f64 for global (same as fadd), but
// For gfx9, gfx90a and gfx942 support f64 for global (same as fadd), but
// no f32.
if (AS == AMDGPUAS::FLAT_ADDRESS) {
if (Subtarget->hasAtomicFMinFMaxF32FlatInsts() && Ty->isFloatTy())
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,6 @@ class SIGfx940CacheControl : public SIGfx90ACacheControl {
}

public:

SIGfx940CacheControl(const GCNSubtarget &ST) : SIGfx90ACacheControl(ST) {};

bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/SISchedule.td
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ class SISchedMachineModel : SchedMachineModel {
def SIFullSpeedModel : SISchedMachineModel;
def SIQuarterSpeedModel : SISchedMachineModel;
def SIDPFullSpeedModel : SISchedMachineModel;
def SIDPGFX940FullSpeedModel : SISchedMachineModel;
def SIDPGFX942FullSpeedModel : SISchedMachineModel;
def SIDPGFX950FullSpeedModel : SISchedMachineModel;
def GFX10SpeedModel : SISchedMachineModel;
def GFX11SpeedModel : SISchedMachineModel;
Expand Down Expand Up @@ -276,7 +276,7 @@ def : InstRW<[Write8PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_16X16X")>;

} // End SchedModel = SIDPFullSpeedModel

let SchedModel = SIDPGFX940FullSpeedModel in {
let SchedModel = SIDPGFX942FullSpeedModel in {

defm : SICommonWriteRes;

Expand Down Expand Up @@ -308,7 +308,7 @@ def : InstRW<[Write8PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_16X16X")>;
def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_16X16X")>;
def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_32X32X")>;

} // End SchedModel = SIDPGFX940FullSpeedModel
} // End SchedModel = SIDPGFX942FullSpeedModel


let SchedModel = SIDPGFX950FullSpeedModel in {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ static constexpr CustomOperand Operands[] = {
{{"HW_REG_SCRATCH_BASE_HI"}, ID_FLAT_SCR_HI, isGFX12Plus},
{{"HW_REG_SHADER_CYCLES_LO"}, ID_SHADER_CYCLES, isGFX12Plus},

// GFX940 specific registers
// GFX942 specific registers
{{"HW_REG_XCC_ID"}, ID_XCC_ID, isGFX940},
{{"HW_REG_SQ_PERF_SNAPSHOT_DATA"}, ID_SQ_PERF_SNAPSHOT_DATA, isGFX940},
{{"HW_REG_SQ_PERF_SNAPSHOT_DATA1"}, ID_SQ_PERF_SNAPSHOT_DATA1, isGFX940},
Expand Down
6 changes: 0 additions & 6 deletions llvm/lib/TargetParser/TargetParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,6 @@ constexpr GPUInfo AMDGCNGPUs[] = {
{{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
{{"gfx90a"}, {"gfx90a"}, GK_GFX90A, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx90c"}, {"gfx90c"}, GK_GFX90C, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
{{"gfx940"}, {"gfx940"}, GK_GFX940, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx941"}, {"gfx941"}, GK_GFX941, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx942"}, {"gfx942"}, GK_GFX942, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx950"}, {"gfx950"}, GK_GFX950, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
Expand Down Expand Up @@ -260,8 +258,6 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX909: return {9, 0, 9};
case GK_GFX90A: return {9, 0, 10};
case GK_GFX90C: return {9, 0, 12};
case GK_GFX940: return {9, 4, 0};
case GK_GFX941: return {9, 4, 1};
case GK_GFX942: return {9, 4, 2};
case GK_GFX950: return {9, 5, 0};
case GK_GFX1010: return {10, 1, 0};
Expand Down Expand Up @@ -506,8 +502,6 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["gfx950-insts"] = true;
[[fallthrough]];
case GK_GFX942:
case GK_GFX941:
case GK_GFX940:
Features["fp8-insts"] = true;
Features["fp8-conversion-insts"] = true;
if (Kind != GK_GFX950)
Expand Down
2 changes: 0 additions & 2 deletions llvm/tools/llvm-readobj/ELFDumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1616,8 +1616,6 @@ const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX950, "gfx950"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \
Expand Down
Loading