ldtilecfg(ptr[rax + rcx * 4 + 64]); ldtilecfg(ptr [r30+r29*4+0x12]); ldtilecfg(ptr [rax]); sttilecfg(ptr[rsp + rax * 8 + 128]); sttilecfg(ptr [r30+r29*4+0x12]); sttilecfg(ptr [r30]); tileloadd(tmm3, ptr[rdi + rdx * 2 + 8]); tileloadd(tmm2, ptr [r30+r29*4+0x12]); tileloaddt1(tmm4, ptr[r8 + r9 + 32]); tileloaddt1(tmm7, ptr [r30+r29*4+0x12]); tilerelease(); tilestored(ptr[r10 + r11 * 2 + 32], tmm2); tilestored(ptr [r30+r29*4+0x12], tmm1); tilezero(tmm7); tdpbssd(tmm1, tmm2, tmm3); tdpbsud(tmm2, tmm3, tmm4); tdpbusd(tmm3, tmm4, tmm5); tdpbuud(tmm4, tmm5, tmm6); tdpfp16ps(tmm5, tmm6, tmm7); tdpbf16ps(tmm5, tmm6, tmm7); tileloadd(tmm1, ptr[r8+r8]); tileloadd(tmm1, ptr[rax+rcx*4]); tileloadd(tmm1, ptr[r8+r9*1+0x40]); tileloadd(tmm1, ptr[r30+r29*1+0x80]); tileloaddrs(tmm3, ptr[rdi + rdx * 2 + 8]); tileloaddrs(tmm7, ptr[r31 + rdx * 2 + 8]); tileloaddrst1(tmm4, ptr[r8 + r9 + 32]); tileloaddrst1(tmm4, ptr[r25 + r9 + 32]); tdpbf8ps(tmm1, tmm2, tmm3); tdpbhf8ps(tmm1, tmm2, tmm3); tdphbf8ps(tmm1, tmm2, tmm3); tdphf8ps(tmm1, tmm2, tmm3); tmmultf32ps(tmm1, tmm2, tmm3); t2rpntlvwz0(tmm1, ptr[rax+r8*2+0x80]); t2rpntlvwz0(tmm7, ptr[r30+r8*2+0x80]); t2rpntlvwz0t1(tmm1, ptr[rax+r8*2+0x80]); t2rpntlvwz0t1(tmm7, ptr[r30+r8*2+0x80]); t2rpntlvwz1(tmm1, ptr[rax+r8*2+0x80]); t2rpntlvwz1(tmm7, ptr[r30+r8*2+0x80]); t2rpntlvwz1t1(tmm1, ptr[rax+r8*2+0x80]); t2rpntlvwz1t1(tmm7, ptr[r30+r8*2+0x80]); t2rpntlvwz0rs(tmm1, ptr[rax+r8*2+0x80]); t2rpntlvwz0rs(tmm7, ptr[r30+r8*2+0x80]); t2rpntlvwz0rst1(tmm1, ptr[rax+r8*2+0x80]); t2rpntlvwz0rst1(tmm7, ptr[r30+r8*2+0x80]); t2rpntlvwz1rs(tmm1, ptr[rax+r8*2+0x80]); t2rpntlvwz1rs(tmm7, ptr[r30+r8*2+0x80]); t2rpntlvwz1rst1(tmm1, ptr[rax+r8*2+0x80]); t2rpntlvwz1rst1(tmm7, ptr[r30+r8*2+0x80]); tcmmimfp16ps(tmm1, tmm2, tmm3); tcmmrlfp16ps(tmm1, tmm2, tmm3); tconjtcmmimfp16ps(tmm1, tmm2, tmm3); tconjtfp16(tmm1, tmm2); tcvtrowps2bf16h(zmm1, tmm2, r30d); tcvtrowps2bf16h(zmm29, tmm2, 0x12); tcvtrowps2bf16l(zmm1, tmm2, r30d); tcvtrowps2bf16l(zmm29, tmm2, 0x12); tcvtrowps2phh(zmm1, tmm2, r30d); tcvtrowps2phh(zmm29, tmm2, 0x12); tcvtrowps2phl(zmm1, tmm2, r30d); tcvtrowps2phl(zmm29, tmm2, 0x12); tilemovrow(zmm1, tmm2, r30d); tilemovrow(zmm29, tmm2, 0x12); ttcmmimfp16ps(tmm1, tmm2, tmm3); ttcmmrlfp16ps(tmm1, tmm2, tmm3); ttdpbf16ps(tmm1, tmm2, tmm3); ttdpfp16ps(tmm1, tmm2, tmm3); ttmmultf32ps(tmm1, tmm2, tmm3); ttransposed(tmm1, tmm2); tcvtrowd2ps(zmm20, tmm1, r30d); tcvtrowd2ps(zmm20, tmm1, 0x12);