AMX-COMPLEX support

-- TCMMIMFP16PS, TCMMRLFP16PS instructions
-- AMX.asm fix: Similar to GATHER instructions, 3-operand AMX instructions cannot have the same operand more than once
Checked with XED version: [v2025.06.08]
This commit is contained in:
InstLatx64 2025-10-06 15:01:12 +02:00 committed by Maciej Wieczor-Retman
parent 8f47ba7cc7
commit 62f5f6990f
3 changed files with 20 additions and 10 deletions

View File

@ -1,7 +1,9 @@
bits 64
%macro amx 1
%macro amx 3
%define treg tmm %+ %1
%define treg2 tmm %+ %2
%define treg3 tmm %+ %3
ldtilecfg [rsi]
sttilecfg [rdi]
@ -16,11 +18,14 @@
tileloaddt1 treg, [rax,rdx]
tileloaddt1 treg, [rax,rdx*2]
tdpbf16ps treg, treg, treg
tdpbssd treg, treg, treg
tdpbusd treg, treg, treg
tdpbsud treg, treg, treg
tdpbuud treg, treg, treg
tdpbf16ps treg, treg2, treg3
tdpbssd treg, treg2, treg3
tdpbusd treg, treg2, treg3
tdpbsud treg, treg2, treg3
tdpbuud treg, treg2, treg3
tdpfp16ps treg, treg2, treg3
tcmmimfp16ps treg, treg2, treg3
tcmmrlfp16ps treg, treg2, treg3
tilestored [rax], treg
tilestored [rax,rdx], treg
@ -30,7 +35,11 @@
%endmacro
%assign n 0
%assign m 1
%assign l 2
%rep 8
amx n
%assign n n+1
amx n, m, l
%assign n ((n+1) % 8)
%assign m ((m+1) % 8)
%assign l ((l+1) % 8)
%endrep

View File

@ -157,6 +157,7 @@ if_("AMXTILE", "AMX tile configuration instructions");
if_("AMXBF16", "AMX bfloat16 multiplication");
if_("AMXFP16", "AMX FP16 multiplication");
if_("AMXINT8", "AMX 8-bit integer multiplication");
if_("AMXCOMPLEX", "AMX float16 complex multiplication");
if_("FRED", "Flexible Return and Exception Delivery (FRED)");
if_("RAOINT", "Remote atomic operations (RAO-INT)");
if_("UINTR", "User interrupts");

View File

@ -5420,6 +5420,8 @@ LDTILECFG mem512 [m: vex+.128.np.0f38.w0 49 /0] AMXTILE,SZ,LONG
STTILECFG mem512 [m: vex+.128.66.0f38.w0 49 /0] AMXTILE,SZ,LONG
TDPBF16PS tmmreg,tmmreg,tmmreg [rmv: vex.128.f3.0f38.w0 5c /r] AMXBF16,LONG
TDPFP16PS tmmreg,tmmreg,tmmreg [rmv: vex.128.f2.0f38.w0 5c /r] AMXFP16,LONG
TCMMIMFP16PS tmmreg,tmmreg,tmmreg [rmv: vex.128.66.0f38.w0 6c /r] AMXCOMPLEX,LONG
TCMMRLFP16PS tmmreg,tmmreg,tmmreg [rmv: vex.128.np.0f38.w0 6c /r] AMXCOMPLEX,LONG
TDPBSSD tmmreg,tmmreg,tmmreg [rmv: vex.128.f2.0f38.w0 5e /r] AMXINT8,LONG
TDPBSUD tmmreg,tmmreg,tmmreg [rmv: vex.128.f3.0f38.w0 5e /r] AMXINT8,LONG
TDPBUSD tmmreg,tmmreg,tmmreg [rmv: vex.128.66.0f38.w0 5e /r] AMXINT8,LONG
@ -5442,8 +5444,6 @@ T2RPNTLVWZ0RST1 tmmreg,mem [rm: vex.128.np.map5.w0 f9 /r] FUTURE,SIB
T2RPNTLVWZ1RS tmmreg,mem [rm: vex.128.66.map5.w0 f8 /r] FUTURE,SIB
T2RPNTLVWZ1RST1 tmmreg,mem [rm: vex.128.66.map5.w0 f9 /r] FUTURE,SIB
TCMMIMFP16PS tmmreg,tmmreg,tmmreg [rmv: vex.128.66.0f38.w0 6c /r] FUTURE
TCMMRLFP16PS tmmreg,tmmreg,tmmreg [rmv: vex.128.np.0f38.w0 6c /r] FUTURE
TCONJTCMMIMFP16PS tmmreg,tmmreg,tmmreg [rmv: vex.128.np.0f38.w0 6b /r] FUTURE
TCONJTFP16 tmmreg,tmmreg [rm: vex.128.66.0f38.w0 6b /r] FUTURE
TCVTROWD2PS zmmreg,tmmreg,reg32 [rmv: evex.512.f3.0f38.w0 4a /r] FUTURE