Fuck git
This commit is contained in:
Vendored
+94
@@ -0,0 +1,94 @@
|
||||
ldtilecfg(ptr[rax + rcx * 4 + 64]);
|
||||
ldtilecfg(ptr [r30+r29*4+0x12]);
|
||||
ldtilecfg(ptr [rax]);
|
||||
sttilecfg(ptr[rsp + rax * 8 + 128]);
|
||||
sttilecfg(ptr [r30+r29*4+0x12]);
|
||||
sttilecfg(ptr [r30]);
|
||||
tileloadd(tmm3, ptr[rdi + rdx * 2 + 8]);
|
||||
tileloadd(tmm2, ptr [r30+r29*4+0x12]);
|
||||
tileloaddt1(tmm4, ptr[r8 + r9 + 32]);
|
||||
tileloaddt1(tmm7, ptr [r30+r29*4+0x12]);
|
||||
tilerelease();
|
||||
tilestored(ptr[r10 + r11 * 2 + 32], tmm2);
|
||||
tilestored(ptr [r30+r29*4+0x12], tmm1);
|
||||
tilezero(tmm7);
|
||||
tdpbssd(tmm1, tmm2, tmm3);
|
||||
tdpbsud(tmm2, tmm3, tmm4);
|
||||
tdpbusd(tmm3, tmm4, tmm5);
|
||||
tdpbuud(tmm4, tmm5, tmm6);
|
||||
tdpfp16ps(tmm5, tmm6, tmm7);
|
||||
tdpbf16ps(tmm5, tmm6, tmm7);
|
||||
tileloadd(tmm1, ptr[r8+r8]);
|
||||
tileloadd(tmm1, ptr[rax+rcx*4]);
|
||||
tileloadd(tmm1, ptr[r8+r9*1+0x40]);
|
||||
tileloadd(tmm1, ptr[r30+r29*1+0x80]);
|
||||
tileloaddrs(tmm3, ptr[rdi + rdx * 2 + 8]);
|
||||
tileloaddrs(tmm7, ptr[r31 + rdx * 2 + 8]);
|
||||
tileloaddrst1(tmm4, ptr[r8 + r9 + 32]);
|
||||
tileloaddrst1(tmm4, ptr[r25 + r9 + 32]);
|
||||
|
||||
tdpbf8ps(tmm1, tmm2, tmm3);
|
||||
tdpbhf8ps(tmm1, tmm2, tmm3);
|
||||
tdphbf8ps(tmm1, tmm2, tmm3);
|
||||
tdphf8ps(tmm1, tmm2, tmm3);
|
||||
|
||||
tmmultf32ps(tmm1, tmm2, tmm3);
|
||||
|
||||
t2rpntlvwz0(tmm1, ptr[rax+r8*2+0x80]);
|
||||
t2rpntlvwz0(tmm7, ptr[r30+r8*2+0x80]);
|
||||
|
||||
t2rpntlvwz0t1(tmm1, ptr[rax+r8*2+0x80]);
|
||||
t2rpntlvwz0t1(tmm7, ptr[r30+r8*2+0x80]);
|
||||
|
||||
t2rpntlvwz1(tmm1, ptr[rax+r8*2+0x80]);
|
||||
t2rpntlvwz1(tmm7, ptr[r30+r8*2+0x80]);
|
||||
|
||||
t2rpntlvwz1t1(tmm1, ptr[rax+r8*2+0x80]);
|
||||
t2rpntlvwz1t1(tmm7, ptr[r30+r8*2+0x80]);
|
||||
|
||||
t2rpntlvwz0rs(tmm1, ptr[rax+r8*2+0x80]);
|
||||
t2rpntlvwz0rs(tmm7, ptr[r30+r8*2+0x80]);
|
||||
|
||||
t2rpntlvwz0rst1(tmm1, ptr[rax+r8*2+0x80]);
|
||||
t2rpntlvwz0rst1(tmm7, ptr[r30+r8*2+0x80]);
|
||||
|
||||
t2rpntlvwz1rs(tmm1, ptr[rax+r8*2+0x80]);
|
||||
t2rpntlvwz1rs(tmm7, ptr[r30+r8*2+0x80]);
|
||||
|
||||
t2rpntlvwz1rst1(tmm1, ptr[rax+r8*2+0x80]);
|
||||
t2rpntlvwz1rst1(tmm7, ptr[r30+r8*2+0x80]);
|
||||
|
||||
tcmmimfp16ps(tmm1, tmm2, tmm3);
|
||||
tcmmrlfp16ps(tmm1, tmm2, tmm3);
|
||||
|
||||
tconjtcmmimfp16ps(tmm1, tmm2, tmm3);
|
||||
|
||||
tconjtfp16(tmm1, tmm2);
|
||||
|
||||
tcvtrowps2bf16h(zmm1, tmm2, r30d);
|
||||
tcvtrowps2bf16h(zmm29, tmm2, 0x12);
|
||||
|
||||
tcvtrowps2bf16l(zmm1, tmm2, r30d);
|
||||
tcvtrowps2bf16l(zmm29, tmm2, 0x12);
|
||||
|
||||
tcvtrowps2phh(zmm1, tmm2, r30d);
|
||||
tcvtrowps2phh(zmm29, tmm2, 0x12);
|
||||
|
||||
tcvtrowps2phl(zmm1, tmm2, r30d);
|
||||
tcvtrowps2phl(zmm29, tmm2, 0x12);
|
||||
|
||||
tilemovrow(zmm1, tmm2, r30d);
|
||||
tilemovrow(zmm29, tmm2, 0x12);
|
||||
|
||||
ttcmmimfp16ps(tmm1, tmm2, tmm3);
|
||||
ttcmmrlfp16ps(tmm1, tmm2, tmm3);
|
||||
|
||||
ttdpbf16ps(tmm1, tmm2, tmm3);
|
||||
ttdpfp16ps(tmm1, tmm2, tmm3);
|
||||
|
||||
ttmmultf32ps(tmm1, tmm2, tmm3);
|
||||
|
||||
ttransposed(tmm1, tmm2);
|
||||
|
||||
tcvtrowd2ps(zmm20, tmm1, r30d);
|
||||
tcvtrowd2ps(zmm20, tmm1, 0x12);
|
||||
Vendored
+21
@@ -0,0 +1,21 @@
|
||||
// https://github.com/herumi/xbyak/pull/202
|
||||
sal(rax, r8, 1);
|
||||
sar(rax, r9, 4);
|
||||
shl(rax, rdi, 8);
|
||||
shr(rax, rsi, 12);
|
||||
rcl(rax, r10, 16);
|
||||
rcr(rax, r11, 20);
|
||||
rol(rax, r14, 24);
|
||||
ror(rax, r15, 28);
|
||||
sal(rcx, qword[r8], 32);
|
||||
sar(rcx, qword[r9], 36);
|
||||
sal(rcx, qword[rdi], 40);
|
||||
sar(rcx, qword[rsi], 44);
|
||||
rcl(rcx, qword[r10], 48);
|
||||
rcr(rcx, qword[r11], 52);
|
||||
rol(rcx, qword[r14], 56);
|
||||
ror(rcx, qword[r15], 60);
|
||||
|
||||
imul(rax, rdx, r10);
|
||||
imul(rcx, r15, qword[rdi]);
|
||||
|
||||
Vendored
+210
@@ -0,0 +1,210 @@
|
||||
vaddbf16(xm1, xm2, xm3);
|
||||
vaddbf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vaddbf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vaddbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vdivbf16(xm1, xm2, xm3);
|
||||
vdivbf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vdivbf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vdivbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vmaxbf16(xm1, xm2, xm3);
|
||||
vmaxbf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vmaxbf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vmaxbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vminbf16(xm1, xm2, xm3);
|
||||
vminbf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vminbf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vminbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vmulbf16(xm1, xm2, xm3);
|
||||
vmulbf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vmulbf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vmulbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vscalefbf16(xm1, xm2, xm3);
|
||||
vscalefbf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vscalefbf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vscalefbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vsubbf16(xm1, xm2, xm3);
|
||||
vsubbf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vsubbf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vsubbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
// madd
|
||||
vfmadd132bf16(xm1, xm2, xm3);
|
||||
vfmadd132bf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vfmadd132bf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vfmadd132bf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vfmadd213bf16(xm1, xm2, xm3);
|
||||
vfmadd213bf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vfmadd213bf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vfmadd213bf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vfmadd231bf16(xm1, xm2, xm3);
|
||||
vfmadd231bf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vfmadd231bf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vfmadd231bf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
// nmadd
|
||||
vfnmadd132bf16(xm1, xm2, xm3);
|
||||
vfnmadd132bf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vfnmadd132bf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vfnmadd132bf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vfnmadd213bf16(xm1, xm2, xm3);
|
||||
vfnmadd213bf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vfnmadd213bf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vfnmadd213bf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vfnmadd231bf16(xm1, xm2, xm3);
|
||||
vfnmadd231bf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vfnmadd231bf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vfnmadd231bf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
// msub
|
||||
vfmsub132bf16(xm1, xm2, xm3);
|
||||
vfmsub132bf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vfmsub132bf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vfmsub132bf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vfmsub213bf16(xm1, xm2, xm3);
|
||||
vfmsub213bf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vfmsub213bf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vfmsub213bf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vfmsub231bf16(xm1, xm2, xm3);
|
||||
vfmsub231bf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vfmsub231bf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vfmsub231bf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
// nmsub
|
||||
vfnmsub132bf16(xm1, xm2, xm3);
|
||||
vfnmsub132bf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vfnmsub132bf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vfnmsub132bf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vfnmsub213bf16(xm1, xm2, xm3);
|
||||
vfnmsub213bf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vfnmsub213bf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vfnmsub213bf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vfnmsub231bf16(xm1, xm2, xm3);
|
||||
vfnmsub231bf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vfnmsub231bf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vfnmsub231bf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vcmpbf16(k1, xm5, xm4, 5);
|
||||
vcmpbf16(k2, ym5, ym4, 6);
|
||||
vcmpbf16(k3, ym15, ptr_b[rax+128], 7);
|
||||
vcmpbf16(k4, zm30, zm20, 8);
|
||||
vcmpbf16(k5, zm1, ptr[rax+128], 9);
|
||||
vcmpbf16(k6, zm10, ptr_b[rax+128], 10);
|
||||
|
||||
vfpclassbf16(k1, xm4, 5);
|
||||
vfpclassbf16(k2|k5, ym4, 6);
|
||||
vfpclassbf16(k3|k5, zm20, 7);
|
||||
vfpclassbf16(k3|k5, xword[rax+128], 8);
|
||||
vfpclassbf16(k3, xword_b[rax+128], 9);
|
||||
vfpclassbf16(k5|k5, yword[rax+128], 10);
|
||||
vfpclassbf16(k6|k5, yword_b[rax+128], 11);
|
||||
vfpclassbf16(k7|k5, zword[rax+128], 12);
|
||||
vfpclassbf16(k7|k5, zword_b[rax+128], 13);
|
||||
|
||||
vcomisbf16(xm2, xm3);
|
||||
vcomisbf16(xm2, ptr[rax+128]);
|
||||
|
||||
vgetexpbf16(xm1|k3, xmm2);
|
||||
vgetexpbf16(xm1|k3, ptr[rax+128]);
|
||||
vgetexpbf16(xm1|k3, ptr_b[rax+128]);
|
||||
|
||||
vgetexpbf16(ym1|k3, ymm2);
|
||||
vgetexpbf16(ym1|k3, ptr[rax+128]);
|
||||
vgetexpbf16(ym1|k3, ptr_b[rax+128]);
|
||||
|
||||
vgetexpbf16(zm1|k3, zmm2);
|
||||
vgetexpbf16(zm1|k3, ptr[rax+128]);
|
||||
vgetexpbf16(zm1|k3, ptr_b[rax+128]);
|
||||
|
||||
vgetmantbf16(xm1|k3, xmm2, 3);
|
||||
vgetmantbf16(xm1|k3, ptr[rax+128], 5);
|
||||
vgetmantbf16(xm1|k3, ptr_b[rax+128], 9);
|
||||
|
||||
vgetmantbf16(ym1|k3, ymm2, 3);
|
||||
vgetmantbf16(ym1|k3, ptr[rax+128], 5);
|
||||
vgetmantbf16(ym1|k3, ptr_b[rax+128], 9);
|
||||
|
||||
vgetmantbf16(zm1|k3, zmm2, 3);
|
||||
vgetmantbf16(zm1|k3, ptr[rax+128], 5);
|
||||
vgetmantbf16(zm1|k3, ptr_b[rax+128], 9);
|
||||
|
||||
vrcpbf16(xm1|k5, xm2);
|
||||
vrcpbf16(xm1|k5, ptr[rcx+128]);
|
||||
vrcpbf16(xm1|k5, ptr_b[rcx+128]);
|
||||
|
||||
vrcpbf16(ym1|k5, ym2);
|
||||
vrcpbf16(ym1|k5, ptr[rcx+128]);
|
||||
vrcpbf16(ym1|k5, ptr_b[rcx+128]);
|
||||
|
||||
vrcpbf16(zm1|k5, zm2);
|
||||
vrcpbf16(zm1|k5, ptr[rcx+128]);
|
||||
vrcpbf16(zm1|k5, ptr_b[rcx+128]);
|
||||
|
||||
vreducebf16(xm1|k4, xm2, 1);
|
||||
vreducebf16(xm1|k4, ptr[rax+128], 1);
|
||||
vreducebf16(xm1|k4, ptr_b[rax+128], 1);
|
||||
|
||||
vreducebf16(ym1|k4, ym2, 1);
|
||||
vreducebf16(ym1|k4, ptr[rax+128], 1);
|
||||
vreducebf16(ym1|k4, ptr_b[rax+128], 1);
|
||||
|
||||
vreducebf16(zm1|k4, zm2, 1);
|
||||
vreducebf16(zm1|k4, ptr[rax+128], 1);
|
||||
vreducebf16(zm1|k4, ptr_b[rax+128], 1);
|
||||
|
||||
vrndscalebf16(xm1|k4, xm2, 1);
|
||||
vrndscalebf16(xm1|k4, ptr[rax+128], 1);
|
||||
vrndscalebf16(xm1|k4, ptr_b[rax+128], 1);
|
||||
|
||||
vrndscalebf16(ym1|k4, ym2, 1);
|
||||
vrndscalebf16(ym1|k4, ptr[rax+128], 1);
|
||||
vrndscalebf16(ym1|k4, ptr_b[rax+128], 1);
|
||||
|
||||
vrndscalebf16(zm1|k4, zm2, 1);
|
||||
vrndscalebf16(zm1|k4, ptr[rax+128], 1);
|
||||
vrndscalebf16(zm1|k4, ptr_b[rax+128], 1);
|
||||
|
||||
vrsqrtbf16(xm1|k5, xm2);
|
||||
vrsqrtbf16(xm1|k5, ptr[rcx+128]);
|
||||
vrsqrtbf16(xm1|k5, ptr_b[rcx+128]);
|
||||
|
||||
vrsqrtbf16(ym1|k5, ym2);
|
||||
vrsqrtbf16(ym1|k5, ptr[rcx+128]);
|
||||
vrsqrtbf16(ym1|k5, ptr_b[rcx+128]);
|
||||
|
||||
vrsqrtbf16(zm1|k5, zm2);
|
||||
vrsqrtbf16(zm1|k5, ptr[rcx+128]);
|
||||
vrsqrtbf16(zm1|k5, ptr_b[rcx+128]);
|
||||
|
||||
vscalefbf16(xm1|k5, xm5, xm2);
|
||||
vscalefbf16(xm1|k5, xm5, ptr[rcx+128]);
|
||||
vscalefbf16(xm1|k5, xm5, ptr_b[rcx+128]);
|
||||
|
||||
vscalefbf16(ym1|k5, ym9, ym2);
|
||||
vscalefbf16(ym1|k5, ym9, ptr[rcx+128]);
|
||||
vscalefbf16(ym1|k5, ym9, ptr_b[rcx+128]);
|
||||
|
||||
vscalefbf16(zm1|k5, zm30, zm2);
|
||||
vscalefbf16(zm1|k5, zm30, ptr[rcx+128]);
|
||||
vscalefbf16(zm1|k5, zm30, ptr_b[rcx+128]);
|
||||
|
||||
vsqrtbf16(xm5|k3, xmm4);
|
||||
vsqrtbf16(xm5|k3, ptr[rax+128]);
|
||||
vsqrtbf16(xm5|k3, ptr_b[rax+128]);
|
||||
|
||||
vsqrtbf16(ym5|k3, ymm4);
|
||||
vsqrtbf16(ym5|k3, ptr[rax+128]);
|
||||
vsqrtbf16(ym5|k3, ptr_b[rax+128]);
|
||||
|
||||
vsqrtbf16(zm5|k3, zmm4);
|
||||
vsqrtbf16(zm5|k3, ptr[rax+128]);
|
||||
vsqrtbf16(zm5|k3, ptr_b[rax+128]);
|
||||
Vendored
+17
@@ -0,0 +1,17 @@
|
||||
vcomxsd(xm1, xm2|T_sae);
|
||||
vcomxsd(xm1, ptr[rax+128]);
|
||||
|
||||
vcomxsh(xm1, xm2|T_sae);
|
||||
vcomxsh(xm1, ptr[rax+128]);
|
||||
|
||||
vcomxss(xm1, xm2|T_sae);
|
||||
vcomxss(xm1, ptr[rax+128]);
|
||||
|
||||
vucomxsd(xm1, xm2|T_sae);
|
||||
vucomxsd(xm1, ptr[rax+128]);
|
||||
|
||||
vucomxsh(xm1, xm2|T_sae);
|
||||
vucomxsh(xm1, ptr[rax+128]);
|
||||
|
||||
vucomxss(xm1, xm2|T_sae);
|
||||
vucomxss(xm1, ptr[rax+128]);
|
||||
+200
@@ -0,0 +1,200 @@
|
||||
vcvt2ps2phx(xm1|k5, xm2, xm3);
|
||||
vcvt2ps2phx(xm1|k5, xm2, ptr[rax+128]);
|
||||
vcvt2ps2phx(xm1|k5, xm2, ptr_b[rax+128]);
|
||||
|
||||
vcvt2ps2phx(ym1|k5, ym2, ym3);
|
||||
vcvt2ps2phx(ym1|k5, ym2, ptr[rax+128]);
|
||||
vcvt2ps2phx(ym1|k5, ym2, ptr_b[rax+128]);
|
||||
|
||||
vcvt2ps2phx(zm1|k5, zm2, zm3);
|
||||
vcvt2ps2phx(zm1|k5, zm2, ptr[rax+128]);
|
||||
vcvt2ps2phx(zm1|k5, zm2, ptr_b[rax+128]);
|
||||
|
||||
// vcvtbiasph2hf8
|
||||
vcvtbiasph2bf8(xm1|k2, xm3, xm5);
|
||||
vcvtbiasph2bf8(xm1|k2, xm3, ptr[rax+128]);
|
||||
vcvtbiasph2bf8(xm1|k2, xm3, ptr_b[rax+128]);
|
||||
|
||||
vcvtbiasph2bf8(xm1|k2, ym3, ym5);
|
||||
vcvtbiasph2bf8(xm1|k2, ym3, ptr[rax+128]);
|
||||
vcvtbiasph2bf8(xm1|k2, ym3, ptr_b[rax+128]);
|
||||
|
||||
vcvtbiasph2bf8(ym1|k2, zm3, zm5);
|
||||
vcvtbiasph2bf8(ym1|k2, zm3, ptr[rax+128]);
|
||||
vcvtbiasph2bf8(ym1|k2, zm3, ptr_b[rax+128]);
|
||||
|
||||
// vcvtbiasph2bf8s
|
||||
vcvtbiasph2bf8s(xm1|k2, xm3, xm5);
|
||||
vcvtbiasph2bf8s(xm1|k2, xm3, ptr[rax+128]);
|
||||
vcvtbiasph2bf8s(xm1|k2, xm3, ptr_b[rax+128]);
|
||||
|
||||
vcvtbiasph2bf8s(xm1|k2, ym3, ym5);
|
||||
vcvtbiasph2bf8s(xm1|k2, ym3, ptr[rax+128]);
|
||||
vcvtbiasph2bf8s(xm1|k2, ym3, ptr_b[rax+128]);
|
||||
|
||||
vcvtbiasph2bf8s(ym1|k2, zm3, zm5);
|
||||
vcvtbiasph2bf8s(ym1|k2, zm3, ptr[rax+128]);
|
||||
vcvtbiasph2bf8s(ym1|k2, zm3, ptr_b[rax+128]);
|
||||
|
||||
// vcvtbiasph2hf8
|
||||
vcvtbiasph2hf8(xm1|k2, xm3, xm5);
|
||||
vcvtbiasph2hf8(xm1|k2, xm3, ptr[rax+128]);
|
||||
vcvtbiasph2hf8(xm1|k2, xm3, ptr_b[rax+128]);
|
||||
|
||||
vcvtbiasph2hf8(xm1|k2, ym3, ym5);
|
||||
vcvtbiasph2hf8(xm1|k2, ym3, ptr[rax+128]);
|
||||
vcvtbiasph2hf8(xm1|k2, ym3, ptr_b[rax+128]);
|
||||
|
||||
vcvtbiasph2hf8(ym1|k2, zm3, zm5);
|
||||
vcvtbiasph2hf8(ym1|k2, zm3, ptr[rax+128]);
|
||||
vcvtbiasph2hf8(ym1|k2, zm3, ptr_b[rax+128]);
|
||||
|
||||
// vcvtbiasph2hf8s
|
||||
vcvtbiasph2hf8s(xm1|k2, xm3, xm5);
|
||||
vcvtbiasph2hf8s(xm1|k2, xm3, ptr[rax+128]);
|
||||
vcvtbiasph2hf8s(xm1|k2, xm3, ptr_b[rax+128]);
|
||||
|
||||
vcvtbiasph2hf8s(xm1|k2, ym3, ym5);
|
||||
vcvtbiasph2hf8s(xm1|k2, ym3, ptr[rax+128]);
|
||||
vcvtbiasph2hf8s(xm1|k2, ym3, ptr_b[rax+128]);
|
||||
|
||||
vcvtbiasph2hf8s(ym1|k2, zm3, zm5);
|
||||
vcvtbiasph2hf8s(ym1|k2, zm3, ptr[rax+128]);
|
||||
vcvtbiasph2hf8s(ym1|k2, zm3, ptr_b[rax+128]);
|
||||
|
||||
vcvthf82ph(xm1|k5|T_z, xm2);
|
||||
vcvthf82ph(xm1|k5|T_z, ptr[rax+128]);
|
||||
|
||||
vcvthf82ph(ym1|k5|T_z, xm2);
|
||||
vcvthf82ph(ym1|k5|T_z, ptr[rax+128]);
|
||||
|
||||
vcvthf82ph(zm1|k5|T_z, ym2);
|
||||
vcvthf82ph(zm1|k5|T_z, ptr[rax+128]);
|
||||
|
||||
//
|
||||
vcvt2ph2bf8(xm1|k4|T_z, xm2, xm3);
|
||||
vcvt2ph2bf8(xm1|k4, xm2, ptr[rax+128]);
|
||||
vcvt2ph2bf8(xm1|T_z, xm2, ptr_b[rax+128]);
|
||||
|
||||
vcvt2ph2bf8(ym1|k4|T_z, ym2, ym3);
|
||||
vcvt2ph2bf8(ym1|k4, ym2, ptr[rax+128]);
|
||||
vcvt2ph2bf8(ym1|T_z, ym2, ptr_b[rax+128]);
|
||||
|
||||
vcvt2ph2bf8(zm1|k4|T_z, zm2, zm3);
|
||||
vcvt2ph2bf8(zm1|k4, zm2, ptr[rax+128]);
|
||||
vcvt2ph2bf8(zm1|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
//
|
||||
vcvt2ph2bf8s(xm1|k4|T_z, xm2, xm3);
|
||||
vcvt2ph2bf8s(xm1|k4, xm2, ptr[rax+128]);
|
||||
vcvt2ph2bf8s(xm1|T_z, xm2, ptr_b[rax+128]);
|
||||
|
||||
vcvt2ph2bf8s(ym1|k4|T_z, ym2, ym3);
|
||||
vcvt2ph2bf8s(ym1|k4, ym2, ptr[rax+128]);
|
||||
vcvt2ph2bf8s(ym1|T_z, ym2, ptr_b[rax+128]);
|
||||
|
||||
vcvt2ph2bf8s(zm1|k4|T_z, zm2, zm3);
|
||||
vcvt2ph2bf8s(zm1|k4, zm2, ptr[rax+128]);
|
||||
vcvt2ph2bf8s(zm1|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
//
|
||||
vcvt2ph2hf8(xm1|k4|T_z, xm2, xm3);
|
||||
vcvt2ph2hf8(xm1|k4, xm2, ptr[rax+128]);
|
||||
vcvt2ph2hf8(xm1|T_z, xm2, ptr_b[rax+128]);
|
||||
|
||||
vcvt2ph2hf8(ym1|k4|T_z, ym2, ym3);
|
||||
vcvt2ph2hf8(ym1|k4, ym2, ptr[rax+128]);
|
||||
vcvt2ph2hf8(ym1|T_z, ym2, ptr_b[rax+128]);
|
||||
|
||||
vcvt2ph2hf8(zm1|k4|T_z, zm2, zm3);
|
||||
vcvt2ph2hf8(zm1|k4, zm2, ptr[rax+128]);
|
||||
vcvt2ph2hf8(zm1|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
//
|
||||
vcvt2ph2hf8s(xm1|k4|T_z, xm2, xm3);
|
||||
vcvt2ph2hf8s(xm1|k4, xm2, ptr[rax+128]);
|
||||
vcvt2ph2hf8s(xm1|T_z, xm2, ptr_b[rax+128]);
|
||||
|
||||
vcvt2ph2hf8s(ym1|k4|T_z, ym2, ym3);
|
||||
vcvt2ph2hf8s(ym1|k4, ym2, ptr[rax+128]);
|
||||
vcvt2ph2hf8s(ym1|T_z, ym2, ptr_b[rax+128]);
|
||||
|
||||
vcvt2ph2hf8s(zm1|k4|T_z, zm2, zm3);
|
||||
vcvt2ph2hf8s(zm1|k4, zm2, ptr[rax+128]);
|
||||
vcvt2ph2hf8s(zm1|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
// vcvtph2bf8
|
||||
vcvtph2bf8(xmm1|k2|T_z, xmm2);
|
||||
vcvtph2bf8(xmm1|k2|T_z, xword [rax+128]);
|
||||
vcvtph2bf8(xmm1|k2|T_z, xword_b[rax+128]);
|
||||
|
||||
vcvtph2bf8(xmm1|k2|T_z, ymm2);
|
||||
vcvtph2bf8(xmm1|k2|T_z, yword[rax+128]);
|
||||
vcvtph2bf8(xmm1|k2|T_z, yword_b[rax+128]);
|
||||
|
||||
vcvtph2bf8(ymm1|k2|T_z, zmm2);
|
||||
vcvtph2bf8(ymm1|k2|T_z, zword[rax+128]);
|
||||
vcvtph2bf8(ymm1|k2|T_z, zword_b[rax+128]);
|
||||
|
||||
// vcvtph2bf8s
|
||||
vcvtph2bf8s(xmm1|k2|T_z, xmm2);
|
||||
vcvtph2bf8s(xmm1|k2|T_z, xword [rax+128]);
|
||||
vcvtph2bf8s(xmm1|k2|T_z, xword_b[rax+128]);
|
||||
|
||||
vcvtph2bf8s(xmm1|k2|T_z, ymm2);
|
||||
vcvtph2bf8s(xmm1|k2|T_z, yword[rax+128]);
|
||||
vcvtph2bf8s(xmm1|k2|T_z, yword_b[rax+128]);
|
||||
|
||||
vcvtph2bf8s(ymm1|k2|T_z, zmm2);
|
||||
vcvtph2bf8s(ymm1|k2|T_z, zword[rax+128]);
|
||||
vcvtph2bf8s(ymm1|k2|T_z, zword_b[rax+128]);
|
||||
|
||||
// vcvtph2hf8
|
||||
vcvtph2hf8(xmm1|k2|T_z, xmm2);
|
||||
vcvtph2hf8(xmm1|k2|T_z, xword [rax+128]);
|
||||
vcvtph2hf8(xmm1|k2|T_z, xword_b[rax+128]);
|
||||
|
||||
vcvtph2hf8(xmm1|k2|T_z, ymm2);
|
||||
vcvtph2hf8(xmm1|k2|T_z, yword[rax+128]);
|
||||
vcvtph2hf8(xmm1|k2|T_z, yword_b[rax+128]);
|
||||
|
||||
vcvtph2hf8(ymm1|k2|T_z, zmm2);
|
||||
vcvtph2hf8(ymm1|k2|T_z, zword[rax+128]);
|
||||
vcvtph2hf8(ymm1|k2|T_z, zword_b[rax+128]);
|
||||
|
||||
// vcvtph2hf8s
|
||||
vcvtph2hf8s(xmm1|k2|T_z, xmm2);
|
||||
vcvtph2hf8s(xmm1|k2|T_z, xword [rax+128]);
|
||||
vcvtph2hf8s(xmm1|k2|T_z, xword_b[rax+128]);
|
||||
|
||||
vcvtph2hf8s(xmm1|k2|T_z, ymm2);
|
||||
vcvtph2hf8s(xmm1|k2|T_z, yword[rax+128]);
|
||||
vcvtph2hf8s(xmm1|k2|T_z, yword_b[rax+128]);
|
||||
|
||||
vcvtph2hf8s(ymm1|k2|T_z, zmm2);
|
||||
vcvtph2hf8s(ymm1|k2|T_z, zword[rax+128]);
|
||||
vcvtph2hf8s(ymm1|k2|T_z, zword_b[rax+128]);
|
||||
|
||||
// AVX-NE-CONVERT
|
||||
vbcstnebf162ps(xmm15, ptr[rax+128]);
|
||||
vbcstnebf162ps(xmm15, ptr[rax+128]);
|
||||
|
||||
vbcstnesh2ps(ymm15, ptr[rax+128]);
|
||||
vbcstnesh2ps(ymm15, ptr[rax+128]);
|
||||
|
||||
vcvtneebf162ps(xmm15, ptr[rax+128]);
|
||||
vcvtneebf162ps(ymm15, ptr[rax+128]);
|
||||
|
||||
vcvtneeph2ps(xmm15, ptr[rax+128]);
|
||||
vcvtneeph2ps(ymm15, ptr[rax+128]);
|
||||
|
||||
vcvtneobf162ps(xmm15, ptr[rax+128]);
|
||||
vcvtneobf162ps(ymm15, ptr[rax+128]);
|
||||
|
||||
vcvtneoph2ps(xmm15, ptr[rax+128]);
|
||||
vcvtneoph2ps(ymm15, ptr[rax+128]);
|
||||
|
||||
vcvtneps2bf16(xmm15, xmm3, VexEncoding);
|
||||
vcvtneps2bf16(xmm15, ptr[rax+128], VexEncoding);
|
||||
vcvtneps2bf16(xmm15, ymm3, VexEncoding);
|
||||
vcvtneps2bf16(xmm15, ptr[rax+128], VexEncoding);
|
||||
+63
@@ -0,0 +1,63 @@
|
||||
vminmaxbf16(xm1|k3|T_z, xm2, xm3, 5);
|
||||
vminmaxbf16(xm1|k3|T_z, xm2, ptr[rax+128], 5);
|
||||
vminmaxbf16(xm1|k3|T_z, xm2, ptr_b[rax+128], 5);
|
||||
|
||||
vminmaxbf16(ym1|k3|T_z, ym2, ym3, 5);
|
||||
vminmaxbf16(ym1|k3|T_z, ym2, ptr[rax+128], 5);
|
||||
vminmaxbf16(ym1|k3|T_z, ym2, ptr_b[rax+128], 5);
|
||||
|
||||
vminmaxbf16(zm1|k3|T_z, zm2, zm3, 5);
|
||||
vminmaxbf16(zm1|k3|T_z, zm2, ptr[rax+128], 5);
|
||||
vminmaxbf16(zm1|k3|T_z, zm2, ptr_b[rax+128], 5);
|
||||
//
|
||||
vminmaxpd(xm1|k3|T_z, xm2, xm3, 5);
|
||||
vminmaxpd(xm1|k3|T_z, xm2, ptr[rax+128], 5);
|
||||
vminmaxpd(xm1|k3|T_z, xm2, ptr_b[rax+128], 5);
|
||||
|
||||
vminmaxpd(ym1|k3|T_z, ym2, ym3, 5);
|
||||
vminmaxpd(ym1|k3|T_z, ym2, ptr[rax+128], 5);
|
||||
vminmaxpd(ym1|k3|T_z, ym2, ptr_b[rax+128], 5);
|
||||
|
||||
vminmaxpd(zm1|k3|T_z, zm2, zm3, 5);
|
||||
vminmaxpd(zm1|k3|T_z, zm2, zm3|T_sae, 5);
|
||||
vminmaxpd(zm1|k3|T_z, zm2, ptr[rax+128], 5);
|
||||
vminmaxpd(zm1|k3|T_z, zm2, ptr_b[rax+128], 5);
|
||||
//
|
||||
vminmaxph(xm1|k3|T_z, xm2, xm3, 5);
|
||||
vminmaxph(xm1|k3|T_z, xm2, ptr[rax+128], 5);
|
||||
vminmaxph(xm1|k3|T_z, xm2, ptr[rax+128], 5);
|
||||
vminmaxph(xm1|k3|T_z, xm2, ptr_b[rax+128], 5);
|
||||
|
||||
vminmaxph(ym1|k3|T_z, ym2, ym3, 5);
|
||||
vminmaxph(ym1|k3|T_z, ym2, ptr[rax+128], 5);
|
||||
vminmaxph(ym1|k3|T_z, ym2, ptr_b[rax+128], 5);
|
||||
|
||||
vminmaxph(zm1|k3|T_z, zm2, zm3, 5);
|
||||
vminmaxph(zm1|k3|T_z, zm2, zm3|T_sae, 5);
|
||||
vminmaxph(zm1|k3|T_z, zm2, ptr[rax+128], 5);
|
||||
vminmaxph(zm1|k3|T_z, zm2, ptr_b[rax+128], 5);
|
||||
//
|
||||
vminmaxps(xm1|k3|T_z, xm2, xm3, 5);
|
||||
vminmaxps(xm1|k3|T_z, xm2, ptr[rax+128], 5);
|
||||
vminmaxps(xm1|k3|T_z, xm2, ptr_b[rax+128], 5);
|
||||
|
||||
vminmaxps(ym1|k3|T_z, ym2, ym3, 5);
|
||||
vminmaxps(ym1|k3|T_z, ym2, ptr[rax+128], 5);
|
||||
vminmaxps(ym1|k3|T_z, ym2, ptr_b[rax+128], 5);
|
||||
|
||||
vminmaxps(zm1|k3|T_z, zm2, zm3, 5);
|
||||
vminmaxps(zm1|k3|T_z, zm2, zm3|T_sae, 5);
|
||||
vminmaxps(zm1|k3|T_z, zm2, ptr[rax+128], 5);
|
||||
vminmaxps(zm1|k3|T_z, zm2, ptr_b[rax+128], 5);
|
||||
//
|
||||
vminmaxsd(xm1|k3|T_z, xm2, xm3, 5);
|
||||
vminmaxsd(xm1|k3|T_z, xm2, xm3|T_sae, 5);
|
||||
vminmaxsd(xm1|k3|T_z, xm2, ptr[rax+128], 5);
|
||||
//
|
||||
vminmaxsh(xm1|k3|T_z, xm2, xm3, 5);
|
||||
vminmaxsh(xm1|k3|T_z, xm2, xm3|T_sae, 5);
|
||||
vminmaxsh(xm1|k3|T_z, xm2, ptr[rax+128], 5);
|
||||
//
|
||||
vminmaxss(xm1|k3|T_z, xm2, xm3, 5);
|
||||
vminmaxss(xm1|k3|T_z, xm2, xm3|T_sae, 5);
|
||||
vminmaxss(xm1|k3|T_z, xm2, ptr[rax+128], 5);
|
||||
Vendored
+265
@@ -0,0 +1,265 @@
|
||||
// AVX10 integer and FP16 VNNI, media and zero-extending
|
||||
vdpphps(xm1, xm2, xm3);
|
||||
vdpphps(xm1, xm2, ptr[rax+128]);
|
||||
vdpphps(xm1, xm2, ptr_b[rax+128]);
|
||||
|
||||
vdpphps(ym1, ym2, ym3);
|
||||
vdpphps(ym1, ym2, ptr[rax+128]);
|
||||
vdpphps(ym1, ym2, ptr_b[rax+128]);
|
||||
|
||||
vdpphps(zm1, zm2, zm3);
|
||||
vdpphps(zm1, zm2, ptr[rax+128]);
|
||||
vdpphps(zm1, zm2, ptr_b[rax+128]);
|
||||
//
|
||||
vmpsadbw(xm1, xm3, xm15, 3);
|
||||
vmpsadbw(xm1|T_z, xm4, ptr[rax+128], 5);
|
||||
|
||||
vmpsadbw(ym1|k4, ym3, ym15, 3);
|
||||
vmpsadbw(ym1, ym4, ptr[rax+128], 5);
|
||||
|
||||
vmpsadbw(zm1|k4, zm3, zm15, 3);
|
||||
vmpsadbw(zm1, zm4, ptr[rax+128], 5);
|
||||
//
|
||||
vpdpbssd(xm1, xm2, xm3);
|
||||
vpdpbssd(xm1, xm2, ptr[rax+128]);
|
||||
vpdpbssd(xm1, xm2, ptr_b[rax+128]);
|
||||
|
||||
vpdpbssd(ym1, ym2, ym3);
|
||||
vpdpbssd(ym1, ym2, ptr[rax+128]);
|
||||
vpdpbssd(ym1, ym2, ptr_b[rax+128]);
|
||||
|
||||
vpdpbssd(zm1, zm2, zm3);
|
||||
vpdpbssd(zm1, zm2, ptr[rax+128]);
|
||||
vpdpbssd(zm1, zm2, ptr_b[rax+128]);
|
||||
//
|
||||
vpdpbssds(xm1, xm2, xm3);
|
||||
vpdpbssds(xm1, xm2, ptr[rax+128]);
|
||||
vpdpbssds(xm1, xm2, ptr_b[rax+128]);
|
||||
|
||||
vpdpbssds(ym1, ym2, ym3);
|
||||
vpdpbssds(ym1, ym2, ptr[rax+128]);
|
||||
vpdpbssds(ym1, ym2, ptr_b[rax+128]);
|
||||
|
||||
vpdpbssds(zm1, zm2, zm3);
|
||||
vpdpbssds(zm1, zm2, ptr[rax+128]);
|
||||
vpdpbssds(zm1, zm2, ptr_b[rax+128]);
|
||||
//
|
||||
vpdpbsud(xm1, xm2, xm3);
|
||||
vpdpbsud(xm1, xm2, ptr[rax+128]);
|
||||
vpdpbsud(xm1, xm2, ptr_b[rax+128]);
|
||||
|
||||
vpdpbsud(ym1, ym2, ym3);
|
||||
vpdpbsud(ym1, ym2, ptr[rax+128]);
|
||||
vpdpbsud(ym1, ym2, ptr_b[rax+128]);
|
||||
|
||||
vpdpbsud(zm1, zm2, zm3);
|
||||
vpdpbsud(zm1, zm2, ptr[rax+128]);
|
||||
vpdpbsud(zm1, zm2, ptr_b[rax+128]);
|
||||
//
|
||||
vpdpbsuds(xm1, xm2, xm3);
|
||||
vpdpbsuds(xm1, xm2, ptr[rax+128]);
|
||||
vpdpbsuds(xm1, xm2, ptr_b[rax+128]);
|
||||
|
||||
vpdpbsuds(ym1, ym2, ym3);
|
||||
vpdpbsuds(ym1, ym2, ptr[rax+128]);
|
||||
vpdpbsuds(ym1, ym2, ptr_b[rax+128]);
|
||||
|
||||
vpdpbsuds(zm1, zm2, zm3);
|
||||
vpdpbsuds(zm1, zm2, ptr[rax+128]);
|
||||
vpdpbsuds(zm1, zm2, ptr_b[rax+128]);
|
||||
|
||||
//
|
||||
vpdpbuud(xm1, xm2, xm3);
|
||||
vpdpbuud(xm1, xm2, ptr[rax+128]);
|
||||
vpdpbuud(xm1, xm2, ptr_b[rax+128]);
|
||||
|
||||
vpdpbuud(ym1, ym2, ym3);
|
||||
vpdpbuud(ym1, ym2, ptr[rax+128]);
|
||||
vpdpbuud(ym1, ym2, ptr_b[rax+128]);
|
||||
|
||||
vpdpbuud(zm1, zm2, zm3);
|
||||
vpdpbuud(zm1, zm2, ptr[rax+128]);
|
||||
vpdpbuud(zm1, zm2, ptr_b[rax+128]);
|
||||
//
|
||||
vpdpbuuds(xm1, xm2, xm3);
|
||||
vpdpbuuds(xm1, xm2, ptr[rax+128]);
|
||||
vpdpbuuds(xm1, xm2, ptr_b[rax+128]);
|
||||
|
||||
vpdpbuuds(ym1, ym2, ym3);
|
||||
vpdpbuuds(ym1, ym2, ptr[rax+128]);
|
||||
vpdpbuuds(ym1, ym2, ptr_b[rax+128]);
|
||||
|
||||
vpdpbuuds(zm1, zm2, zm3);
|
||||
vpdpbuuds(zm1, zm2, ptr[rax+128]);
|
||||
vpdpbuuds(zm1, zm2, ptr_b[rax+128]);
|
||||
|
||||
//
|
||||
vpdpwsud(xm1, xm2, xm3);
|
||||
vpdpwsud(xm1, xm2, ptr[rax+128]);
|
||||
vpdpwsud(xm1, xm2, ptr_b[rax+128]);
|
||||
|
||||
vpdpwsud(ym1, ym2, ym3);
|
||||
vpdpwsud(ym1, ym2, ptr[rax+128]);
|
||||
vpdpwsud(ym1, ym2, ptr_b[rax+128]);
|
||||
|
||||
vpdpwsud(zm1, zm2, zm3);
|
||||
vpdpwsud(zm1, zm2, ptr[rax+128]);
|
||||
vpdpwsud(zm1, zm2, ptr_b[rax+128]);
|
||||
//
|
||||
vpdpwsuds(xm1, xm2, xm3);
|
||||
vpdpwsuds(xm1, xm2, ptr[rax+128]);
|
||||
vpdpwsuds(xm1, xm2, ptr_b[rax+128]);
|
||||
|
||||
vpdpwsuds(ym1, ym2, ym3);
|
||||
vpdpwsuds(ym1, ym2, ptr[rax+128]);
|
||||
vpdpwsuds(ym1, ym2, ptr_b[rax+128]);
|
||||
|
||||
vpdpwsuds(zm1, zm2, zm3);
|
||||
vpdpwsuds(zm1, zm2, ptr[rax+128]);
|
||||
vpdpwsuds(zm1, zm2, ptr_b[rax+128]);
|
||||
//
|
||||
vpdpwsud(xm1, xm2, xm3);
|
||||
vpdpwsud(xm1, xm2, ptr[rax+128]);
|
||||
vpdpwsud(xm1, xm2, ptr_b[rax+128]);
|
||||
|
||||
vpdpwsud(ym1, ym2, ym3);
|
||||
vpdpwsud(ym1, ym2, ptr[rax+128]);
|
||||
vpdpwsud(ym1, ym2, ptr_b[rax+128]);
|
||||
|
||||
vpdpwsud(zm1, zm2, zm3);
|
||||
vpdpwsud(zm1, zm2, ptr[rax+128]);
|
||||
vpdpwsud(zm1, zm2, ptr_b[rax+128]);
|
||||
//
|
||||
vpdpwsuds(xm1, xm2, xm3);
|
||||
vpdpwsuds(xm1, xm2, ptr[rax+128]);
|
||||
vpdpwsuds(xm1, xm2, ptr_b[rax+128]);
|
||||
|
||||
vpdpwsuds(ym1, ym2, ym3);
|
||||
vpdpwsuds(ym1, ym2, ptr[rax+128]);
|
||||
vpdpwsuds(ym1, ym2, ptr_b[rax+128]);
|
||||
|
||||
vpdpwsuds(zm1, zm2, zm3);
|
||||
vpdpwsuds(zm1, zm2, ptr[rax+128]);
|
||||
vpdpwsuds(zm1, zm2, ptr_b[rax+128]);
|
||||
|
||||
//
|
||||
vpdpwuud(xm1, xm2, xm3);
|
||||
vpdpwuud(xm1, xm2, ptr[rax+128]);
|
||||
vpdpwuud(xm1, xm2, ptr_b[rax+128]);
|
||||
|
||||
vpdpwuud(ym1, ym2, ym3);
|
||||
vpdpwuud(ym1, ym2, ptr[rax+128]);
|
||||
vpdpwuud(ym1, ym2, ptr_b[rax+128]);
|
||||
|
||||
vpdpwuud(zm1, zm2, zm3);
|
||||
vpdpwuud(zm1, zm2, ptr[rax+128]);
|
||||
vpdpwuud(zm1, zm2, ptr_b[rax+128]);
|
||||
//
|
||||
vpdpwuuds(xm1, xm2, xm3);
|
||||
vpdpwuuds(xm1, xm2, ptr[rax+128]);
|
||||
vpdpwuuds(xm1, xm2, ptr_b[rax+128]);
|
||||
|
||||
vpdpwuuds(ym1, ym2, ym3);
|
||||
vpdpwuuds(ym1, ym2, ptr[rax+128]);
|
||||
vpdpwuuds(ym1, ym2, ptr_b[rax+128]);
|
||||
|
||||
vpdpwuuds(zm1, zm2, zm3);
|
||||
vpdpwuuds(zm1, zm2, ptr[rax+128]);
|
||||
vpdpwuuds(zm1, zm2, ptr_b[rax+128]);
|
||||
|
||||
//
|
||||
vmovd(xm10, xm20);
|
||||
vmovd(xm1, xm2);
|
||||
vmovd(xm10, ptr[rax+128]);
|
||||
vmovd(ptr[rax+128], xm30);
|
||||
//
|
||||
vmovw(xm1, xm20);
|
||||
vmovw(xm1, xm2);
|
||||
vmovw(xm3, ptr [rax+0x40]);
|
||||
vmovw(ptr [rax+0x40], xm7);
|
||||
//
|
||||
push(rax);
|
||||
push(rcx);
|
||||
push(rdx);
|
||||
push(rbx);
|
||||
push(rsp);
|
||||
push(rbp);
|
||||
push(rsi);
|
||||
push(rdi);
|
||||
push(r8);
|
||||
push(r9);
|
||||
push(r10);
|
||||
push(r11);
|
||||
push(r12);
|
||||
push(r13);
|
||||
push(r14);
|
||||
push(r15);
|
||||
push(r16);
|
||||
push(r17);
|
||||
push(r18);
|
||||
push(r19);
|
||||
push(r20);
|
||||
push(r21);
|
||||
push(r22);
|
||||
push(r23);
|
||||
push(r24);
|
||||
push(r25);
|
||||
push(r26);
|
||||
push(r27);
|
||||
push(r28);
|
||||
push(r29);
|
||||
push(r30);
|
||||
push(r31);
|
||||
pop(rax);
|
||||
pop(rcx);
|
||||
pop(rdx);
|
||||
pop(rbx);
|
||||
pop(rsp);
|
||||
pop(rbp);
|
||||
pop(rsi);
|
||||
pop(rdi);
|
||||
pop(r8);
|
||||
pop(r9);
|
||||
pop(r10);
|
||||
pop(r11);
|
||||
pop(r12);
|
||||
pop(r13);
|
||||
pop(r14);
|
||||
pop(r15);
|
||||
pop(r16);
|
||||
pop(r17);
|
||||
pop(r18);
|
||||
pop(r19);
|
||||
pop(r20);
|
||||
pop(r21);
|
||||
pop(r22);
|
||||
pop(r23);
|
||||
pop(r24);
|
||||
pop(r25);
|
||||
pop(r26);
|
||||
pop(r27);
|
||||
pop(r28);
|
||||
pop(r29);
|
||||
pop(r30);
|
||||
pop(r31);
|
||||
|
||||
movrs(rcx, ptr[rax]);
|
||||
movrs(ecx, ptr[rax]);
|
||||
movrs(cx, ptr[rax]);
|
||||
movrs(cl, ptr[rax+rdx*4]);
|
||||
|
||||
vmovrsb(xm1|k1|T_z, ptr[rax+128]);
|
||||
vmovrsb(ym1|k1|T_z, ptr[rax+128]);
|
||||
vmovrsb(zm1|k1|T_z, ptr[rax+128]);
|
||||
|
||||
vmovrsd(xm1|k1|T_z, ptr[rax+128]);
|
||||
vmovrsd(ym1|k1|T_z, ptr[rax+128]);
|
||||
vmovrsd(zm1|k1|T_z, ptr[rax+128]);
|
||||
|
||||
vmovrsq(xm1|k1|T_z, ptr[rax+128]);
|
||||
vmovrsq(ym1|k1|T_z, ptr[rax+128]);
|
||||
vmovrsq(zm1|k1|T_z, ptr[rax+128]);
|
||||
|
||||
vmovrsw(xm1|k1|T_z, ptr[rax+128]);
|
||||
vmovrsw(ym1|k1|T_z, ptr[rax+128]);
|
||||
vmovrsw(zm1|k1|T_z, ptr[rax+128]);
|
||||
Vendored
+638
@@ -0,0 +1,638 @@
|
||||
v4fmaddps(zmm1, zmm8, ptr [rdx + 64]);
|
||||
v4fmaddss(xmm15, xmm8, ptr [rax + 64]);
|
||||
v4fnmaddps(zmm5 | k5, zmm2, ptr [rcx + 0x80]);
|
||||
v4fnmaddss(xmm31, xmm2, ptr [rsp + 0x80]);
|
||||
vp4dpwssd(zmm23 | k7 | T_z, zmm1, ptr [rax + 64]);
|
||||
vp4dpwssds(zmm10 | k4, zmm3, ptr [rsp + rax * 4 + 64]);
|
||||
vaesdec(xmm20, xmm30, ptr [rcx + 64]);
|
||||
vaesdec(ymm1, ymm2, ptr [rcx + 64]);
|
||||
vaesdec(zmm1, zmm2, ptr [rcx + 64]);
|
||||
vaesdeclast(xmm20, xmm30, ptr [rax + 64]);
|
||||
vaesdeclast(ymm20, ymm30, ptr [rax + 64]);
|
||||
vaesdeclast(zmm20, zmm30, ptr [rax + 64]);
|
||||
vaesenc(xmm20, xmm30, ptr [rcx + 64]);
|
||||
vaesenc(ymm1, ymm2, ptr [rcx + 64]);
|
||||
vaesenc(zmm1, zmm2, ptr [rcx + 64]);
|
||||
vaesenclast(xmm20, xmm30, ptr [rax + 64]);
|
||||
vaesenclast(ymm20, ymm30, ptr [rax + 64]);
|
||||
vaesenclast(zmm20, zmm30, ptr [rax + 64]);
|
||||
vpclmulqdq(xmm2, xmm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(ymm2, ymm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(zmm2, zmm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(xmm20, xmm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(ymm20, ymm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(zmm20, zmm3, ptr [rax + 64], 3);
|
||||
vpcompressb(ptr[rax + 64], xmm1);
|
||||
vpcompressb(xmm30 | k5, xmm1);
|
||||
vpcompressb(ptr[rax + 64], ymm1);
|
||||
vpcompressb(ymm30 | k3 |T_z, ymm1);
|
||||
vpcompressb(ptr[rax + 64], zmm1);
|
||||
vpcompressb(zmm30 | k2 |T_z, zmm1);
|
||||
vpcompressw(ptr[rax + 64], xmm1);
|
||||
vpcompressw(xmm30 | k5, xmm1);
|
||||
vpcompressw(ptr[rax + 64], ymm1);
|
||||
vpcompressw(ymm30 | k3 |T_z, ymm1);
|
||||
vpcompressw(ptr[rax + 64], zmm1);
|
||||
vpcompressw(zmm30 | k2 |T_z, zmm1);
|
||||
vpshldw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshldw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshldw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
vpshldd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshldd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshldd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
vpshldq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshldq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshldq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
vpshldvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshldvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshldvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
vpshldvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshldvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshldvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
vpshldvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshldvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshldvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
vpshrdw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshrdw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshrdw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
vpshrdd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshrdd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshrdd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
vpshrdq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshrdq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshrdq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
vpshrdvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshrdvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshrdvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
vpshrdvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshrdvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshrdvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
vpshrdvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshrdvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshrdvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
vpshrdd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdvd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
|
||||
vpopcntb(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntb(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntb(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntw(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntw(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntw(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntd(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntd(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntd(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntd(xmm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntd(ymm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntd(zmm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntq(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntq(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntq(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntq(xmm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntq(ymm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntq(zmm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpdpbusd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
|
||||
vpdpbusd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
|
||||
vpdpbusd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
|
||||
vpdpbusd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
|
||||
vpdpbusds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
|
||||
vpdpbusds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
|
||||
vpdpbusds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
|
||||
vpdpwssd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
|
||||
vpdpwssd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
|
||||
vpdpwssd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
|
||||
vpdpwssds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
|
||||
vpdpwssds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
|
||||
vpdpwssds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
|
||||
vpexpandb(xmm5|k3|T_z, xmm30);
|
||||
vpexpandb(ymm5|k3|T_z, ymm30);
|
||||
vpexpandb(zmm5|k3|T_z, zmm30);
|
||||
vpexpandb(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandb(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandb(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandw(xmm5|k3|T_z, xmm30);
|
||||
vpexpandw(ymm5|k3|T_z, ymm30);
|
||||
vpexpandw(zmm5|k3|T_z, zmm30);
|
||||
vpexpandw(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandw(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandw(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpshufbitqmb(k1|k2, xmm2, ptr [rax + 0x40]);
|
||||
vpshufbitqmb(k1|k2, ymm2, ptr [rax + 0x40]);
|
||||
vpshufbitqmb(k1|k2, zmm2, ptr [rax + 0x40]);
|
||||
gf2p8affineinvqb(xmm1, xmm2, 3);
|
||||
gf2p8affineinvqb(xmm1, ptr [rax + 0x40], 3);
|
||||
vgf2p8affineinvqb(xmm1, xmm5, xmm2, 3);
|
||||
vgf2p8affineinvqb(ymm1, ymm5, ymm2, 3);
|
||||
vgf2p8affineinvqb(xmm1, xmm5, ptr [rax + 0x40], 3);
|
||||
vgf2p8affineinvqb(ymm1, ymm5, ptr [rax + 0x40], 3);
|
||||
vgf2p8affineinvqb(xmm30, xmm31, xmm4, 5);
|
||||
vgf2p8affineinvqb(ymm30, ymm31, ymm4, 5);
|
||||
vgf2p8affineinvqb(zmm30, zmm31, zmm4, 5);
|
||||
vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
|
||||
gf2p8affineqb(xmm1, xmm2, 3);
|
||||
gf2p8affineqb(xmm1, ptr [rax + 0x40], 3);
|
||||
vgf2p8affineqb(xmm1, xmm5, xmm2, 3);
|
||||
vgf2p8affineqb(ymm1, ymm5, ymm2, 3);
|
||||
vgf2p8affineqb(xmm1, xmm5, ptr [rax + 0x40], 3);
|
||||
vgf2p8affineqb(ymm1, ymm5, ptr [rax + 0x40], 3);
|
||||
vgf2p8affineqb(xmm30, xmm31, xmm4, 5);
|
||||
vgf2p8affineqb(ymm30, ymm31, ymm4, 5);
|
||||
vgf2p8affineqb(zmm30, zmm31, zmm4, 5);
|
||||
vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
|
||||
vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
|
||||
vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
|
||||
gf2p8mulb(xmm1, xmm2);
|
||||
gf2p8mulb(xmm1, ptr [rax + 0x40]);
|
||||
vgf2p8mulb(xmm1, xmm5, xmm2);
|
||||
vgf2p8mulb(ymm1, ymm5, ymm2);
|
||||
vgf2p8mulb(xmm1, xmm5, ptr [rax + 0x40]);
|
||||
vgf2p8mulb(ymm1, ymm5, ptr [rax + 0x40]);
|
||||
vgf2p8mulb(xmm30, xmm31, xmm4);
|
||||
vgf2p8mulb(ymm30, ymm31, ymm4);
|
||||
vgf2p8mulb(zmm30, zmm31, zmm4);
|
||||
vgf2p8mulb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40]);
|
||||
vgf2p8mulb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40]);
|
||||
vgf2p8mulb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40]);
|
||||
vcvtne2ps2bf16(xmm0 | k1, xmm1, ptr [rax + 64]);
|
||||
vcvtne2ps2bf16(ymm0 | k1 | T_z, ymm0, ptr [rax + 64]);
|
||||
vcvtne2ps2bf16(zmm0 | k1, zmm1, ptr [rax + 64]);
|
||||
vcvtneps2bf16(xmm0, xword [rax + 64]);
|
||||
vcvtneps2bf16(xmm0 | k1, yword [rax + 64]);
|
||||
vcvtneps2bf16(ymm0 | k1, zword [rax + 64]);
|
||||
vcvtneps2bf16(ymm0 | k1, ptr [rax + 64]);
|
||||
vdpbf16ps(xmm0 | k1, xmm1, ptr [rax + 64]);
|
||||
vdpbf16ps(ymm0 | k1, ymm1, ptr [rax + 64]);
|
||||
vdpbf16ps(zmm0 | k1, zmm1, ptr [rax + 64]);
|
||||
vaddph(zmm0, zmm1, ptr[rax+64]);
|
||||
vaddph(ymm0, ymm1, ptr[rax+64]);
|
||||
vaddph(xmm0, xmm1, ptr[rax+64]);
|
||||
vaddph(zmm0, zmm1, ptr_b[rax+64]);
|
||||
vaddph(ymm0, ymm1, ptr_b[rax+64]);
|
||||
vaddph(xmm0, xmm1, ptr_b[rax+64]);
|
||||
vaddsh(xmm0, xmm15, ptr[rax+64]);
|
||||
vaddsh(xmm0|k5|T_z|T_rd_sae, xmm15, xmm3);
|
||||
vcmpph(k1, xm15, ptr[rax+64], 1);
|
||||
vcmpph(k2, ym15, ptr[rax+64], 2);
|
||||
vcmpph(k3, zm15, ptr[rax+64], 3);
|
||||
vcmpph(k1, xm15, ptr_b[rax+64], 1);
|
||||
vcmpph(k2, ym15, ptr_b[rax+64], 2);
|
||||
vcmpph(k3, zm15, ptr_b[rax+64], 3);
|
||||
vcmpsh(k1, xm15, ptr[rax+64], 1);
|
||||
vcmpsh(k3|k5, xmm1, xmm25|T_sae, 4);
|
||||
vcomish(xmm1, ptr[rax+64]);
|
||||
vcomish(xmm1|T_sae, xmm15);
|
||||
vucomish(xmm1, ptr [rax+0x40]);
|
||||
vucomish(xmm1|T_sae, xmm15);
|
||||
vfmaddsub213ph(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vfmaddsub213ph(xmm1, xmm2, ptr_b [rax+0x40]);
|
||||
vfmaddsub213ph(xmm1|k3, xmm2, xmm5);
|
||||
vfmaddsub213ph(ymm1, ymm2, ptr [rax+0x40]);
|
||||
vfmaddsub213ph(ymm1, ymm2, ptr_b[rax+0x40]);
|
||||
vfmaddsub213ph(ymm1|k3, ymm2, ymm5);
|
||||
vfmaddsub213ph(zmm1, zmm2, ptr [rax+0x40]);
|
||||
vfmaddsub213ph(zmm1, zmm2, ptr_b [rax+0x40]);
|
||||
vfmaddsub213ph(zmm1|T_ru_sae, zmm2, zmm5);
|
||||
vfmsubadd132ph(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vfmsubadd132ph(xmm1, xmm2, ptr_b [rax+0x40]);
|
||||
vfmsubadd132ph(ymm1, ymm2, ptr [rax+0x40]);
|
||||
vfmsubadd132ph(ymm1, ymm2, ptr_b [rax+0x40]);
|
||||
vfmsubadd132ph(zmm1, zmm2, ptr [rax+0x40]);
|
||||
vfmsubadd132ph(zmm1, zmm2, ptr_b [rax+0x40]);
|
||||
vfmsubadd132ph(zmm1|T_ru_sae, zmm2, zmm5);
|
||||
vfmadd132ph(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vfmadd132ph(xmm1, xmm2, ptr_b [rax+0x40]);
|
||||
vfmadd132ph(ymm1, ymm2, ptr [rax+0x40]);
|
||||
vfmadd132ph(ymm1, ymm2, ptr_b [rax+0x40]);
|
||||
vfmadd132ph(zmm1, zmm2, ptr [rax+0x40]);
|
||||
vfmadd132ph(zmm1, zmm2, ptr_b [rax+0x40]);
|
||||
vfmadd132ph(zmm1|T_rd_sae, zmm2, zmm5);
|
||||
vfmsub231ph(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vfmsub231ph(xmm1, xmm2, ptr_b [rax+0x40]);
|
||||
vfmsub231ph(ymm1, ymm2, ptr [rax+0x40]);
|
||||
vfmsub231ph(ymm1, ymm2, ptr_b [rax+0x40]);
|
||||
vfmsub231ph(zmm1, zmm2, ptr [rax+0x40]);
|
||||
vfmsub231ph(zmm1, zmm2, ptr_b [rax+0x40]);
|
||||
vfmsub231ph(zmm1|T_rd_sae, zmm2, zmm5);
|
||||
vfnmsub231ph(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vfnmsub231ph(ymm1, ymm2, ptr_b [rax+0x40]);
|
||||
vfnmsub231ph(zmm1, zmm2, ptr_b [rax+0x40]);
|
||||
vfnmsub231ph(zmm1|T_rd_sae, zmm2, zmm5);
|
||||
vfmadd132sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
|
||||
vfmadd132sh(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vfnmadd132sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
|
||||
vfnmadd132sh(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vfmsub132sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
|
||||
vfmsub132sh(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vfnmsub132sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
|
||||
vfnmsub132sh(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vfcmaddcph(xmm1|k1|T_z, xmm2, ptr [rax+0x40]);
|
||||
vfcmaddcph(ymm1|k1|T_z, ymm2, ptr [rax+0x40]);
|
||||
vfcmaddcph(zmm1|k1, zmm2, ptr [rax+0x40]);
|
||||
vfcmaddcph(zmm1|k1|T_rd_sae, zmm2, zmm5);
|
||||
vfcmaddcph(xmm1|k1|T_z, xmm2, ptr_b [rax+0x40]);
|
||||
vfcmaddcph(ymm1|k1|T_z, ymm2, ptr_b [rax+0x40]);
|
||||
vfcmaddcph(zmm1|k1|T_z, zmm2, ptr_b [rax+0x40]);
|
||||
vfmaddcph(xm1, xm2, ptr[rax+0x40]);
|
||||
vfmaddcph(ym1|k1|T_z, ym2, ptr_b[rax+0x40]);
|
||||
vfmaddcph(zm1, zm2, ptr_b[rax+0x40]);
|
||||
vfcmulcph(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vfcmulcph(ymm1|k1|T_z, ymm2, ptr_b [rax+0x40]);
|
||||
vfcmulcph(zmm1, zmm2, ptr_b [rax+0x40]);
|
||||
vfmulcph(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vfmulcph(ymm1|k1|T_z, ymm2, ptr_b [rax+0x40]);
|
||||
vfmulcph(zmm1, zmm2, ptr_b [rax+0x40]);
|
||||
vrcpph(xmm1, ptr [rax+0x40]);
|
||||
vrcpph(xmm1, ptr_b [rax+0x40]);
|
||||
vrcpph(ymm1, ptr [rax+0x40]);
|
||||
vrcpph(ymm1, ptr_b [rax+0x40]);
|
||||
vrcpph(zmm1, ptr [rax+0x40]);
|
||||
vrcpph(zmm1, ptr_b [rax+0x40]);
|
||||
vrcpsh(xmm1, xmm3, ptr [rax+0x40]);
|
||||
vrsqrtph(xmm1, ptr [rax+0x40]);
|
||||
vrsqrtph(xmm1, ptr_b [rax+0x40]);
|
||||
vrsqrtph(ymm2, ptr [rax+0x40]);
|
||||
vrsqrtph(ymm2, ptr_b [rax+0x40]);
|
||||
vrsqrtph(zmm2, ptr [rax+0x40]);
|
||||
vrsqrtph(zmm2, ptr_b [rax+0x40]);
|
||||
vrsqrtsh(xmm1|k5|T_z, xmm7, ptr [rax+0x40]);
|
||||
vsqrtph(xmm1|k4|T_z, ptr [rax+0x40]);
|
||||
vsqrtph(xmm1|k4|T_z, ptr_b [rax+0x40]);
|
||||
vsqrtph(ymm1|k4|T_z, ptr_b [rax+0x40]);
|
||||
vsqrtph(zmm1|k4|T_z, ptr [rax+0x40]);
|
||||
vsqrtph(zmm1|k4|T_z, ptr_b [rax+0x40]);
|
||||
vsqrtsh(xmm1|k4|T_z, xmm5, ptr [rax+0x40]);
|
||||
vsqrtsh(xmm1|k4|T_z|T_rd_sae, xmm5, xmm7);
|
||||
vscalefph(xmm1, xmm5, ptr [rax+0x40]);
|
||||
vscalefph(xmm1, xmm5, ptr_b [rax+0x40]);
|
||||
vscalefph(ymm1, ymm5, ptr [rax+0x40]);
|
||||
vscalefph(ymm1, ymm5, ptr_b [rax+0x40]);
|
||||
vscalefph(zmm1, zmm5, ptr [rax+0x40]);
|
||||
vscalefph(zmm1, zmm5, ptr_b [rax+0x40]);
|
||||
vscalefph(zmm1|k1|T_z|T_rd_sae, zmm5, zmm7);
|
||||
vscalefsh(xmm1, xmm5, ptr [rax+0x40]);
|
||||
vscalefsh(xmm1|k1|T_z|T_rd_sae, xmm5, xmm7);
|
||||
vreduceph(xmm1, ptr [rax+0x40], 0x1);
|
||||
vreduceph(xmm1, ptr_b [rax+0x40], 0x2);
|
||||
vreduceph(ymm1, ptr [rax+0x40], 0x3);
|
||||
vreduceph(ymm1, ptr_b [rax+0x40], 0x4);
|
||||
vreduceph(zmm1, ptr [rax+0x40], 0x5);
|
||||
vreduceph(zmm1, ptr_b [rax+0x40], 0x6);
|
||||
vreduceph(zmm1|k1|T_z|T_sae, zmm5, 0x7);
|
||||
vreducesh(xmm1, xmm3, ptr [rax+0x40], 0x1);
|
||||
vreducesh(xmm1|k1|T_z|T_sae, xmm5, xmm4, 0x2);
|
||||
vrndscaleph(xmm1, ptr [rax+0x40], 0x1);
|
||||
vrndscaleph(xmm1, ptr_b [rax+0x40], 0x2);
|
||||
vrndscaleph(ymm1, ptr [rax+0x40], 0x3);
|
||||
vrndscaleph(ymm1, ptr_b [rax+0x40], 0x4);
|
||||
vrndscaleph(zmm1, ptr [rax+0x40], 0x5);
|
||||
vrndscaleph(zmm1, ptr_b [rax+0x40], 0x6);
|
||||
vrndscaleph(zmm1|k1|T_z|T_sae, zmm5, 0x7);
|
||||
vrndscalesh(xmm1, xmm3, ptr [rax+0x40], 0x1);
|
||||
vrndscalesh(xmm1|k1|T_z|T_sae, xmm5, xmm4, 0x2);
|
||||
vfpclassph(k1, xword [rax+0x40], 0x1);
|
||||
vfpclassph(k1, xword_b[rax+0x40], 0x2);
|
||||
vfpclassph(k1, yword [rax+0x40], 0x3);
|
||||
vfpclassph(k1, yword_b[rax+0x40], 0x4);
|
||||
vfpclassph(k1, zword [rax+0x40], 0x5);
|
||||
vfpclassph(k1, zword_b[rax+0x40], 0x6);
|
||||
vfpclasssh(k1|k2, xmm3, 0x5);
|
||||
vfpclasssh(k1|k2, ptr [rax+0x40], 0x5);
|
||||
vgetexpph(xmm1, ptr [rax+0x40]);
|
||||
vgetexpph(ymm1, ptr_b [rax+0x40]);
|
||||
vgetexpph(zmm1, ptr [rax+0x40]);
|
||||
vgetexpph(zmm1|k1|T_z|T_sae, zmm5);
|
||||
vgetexpsh(xmm1, xmm5, ptr [rax+0x40]);
|
||||
vgetexpsh(xmm1|k1|T_z|T_sae, xmm3, xmm5);
|
||||
vgetmantph(xmm1, ptr [rax+0x40], 0x1);
|
||||
vgetmantph(ymm1, ptr_b [rax+0x40], 0x2);
|
||||
vgetmantph(zmm1, ptr [rax+0x40], 0x3);
|
||||
vgetmantph(zmm1|k1|T_z|T_sae, zmm5, 0x4);
|
||||
vgetmantsh(xmm1, xmm5, ptr [rax+0x40], 0x5);
|
||||
vgetmantsh(xmm1|k1|T_z|T_sae, xmm3, xmm5, 0x6);
|
||||
vmovsh(xmm1|k1|T_z, ptr [rax+0x40]);
|
||||
vmovsh(ptr [rax+0x40]|k1, xmm1);
|
||||
vmovsh(xmm1|k2|T_z, xmm3, xmm5);
|
||||
vcvtsd2sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
|
||||
vcvtsd2sh(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vcvtsh2sd(xmm1|k1|T_z|T_sae, xmm2, xmm3);
|
||||
vcvtsh2sd(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vcvtsh2ss(xmm1|k1|T_z|T_sae, xmm2, xmm3);
|
||||
vcvtsh2ss(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vcvtss2sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
|
||||
vcvtss2sh(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vcvtsh2si(edx|T_rd_sae, xmm1);
|
||||
vcvtsh2si(edx, ptr [rax+0x40]);
|
||||
vcvtsh2si(rdx|T_rd_sae, xmm1);
|
||||
vcvtsh2si(r8, ptr [rax+0x40]);
|
||||
vcvtph2dq(xmm1, xmm5);
|
||||
vcvtph2dq(xmm1, ptr [rax+0x40]);
|
||||
vcvtph2dq(xmm1, ptr_b [rax+0x40]);
|
||||
vcvtph2dq(ymm1|k2|T_z, xmm5);
|
||||
vcvtph2dq(ymm1, ptr [rax+0x40]);
|
||||
vcvtph2dq(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtph2dq(zmm1|k5|T_z|T_rd_sae, ymm3);
|
||||
vcvtph2dq(zmm1|k5|T_z, ptr [rax+0x40]);
|
||||
vcvtph2dq(zmm1|k5|T_z, ptr_b [rax+0x40]);
|
||||
vcvtph2psx(xmm1, xmm5);
|
||||
vcvtph2psx(xmm1, ptr [rax+0x40]);
|
||||
vcvtph2psx(xmm1, ptr_b [rax+0x40]);
|
||||
vcvtph2psx(ymm1|k2|T_z, xmm5);
|
||||
vcvtph2psx(ymm1, ptr [rax+0x40]);
|
||||
vcvtph2psx(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtph2psx(zmm1|k5|T_z|T_sae, ymm3);
|
||||
vcvtph2psx(zmm1|k5|T_z, ptr [rax+0x40]);
|
||||
vcvtph2psx(zmm1|k5|T_z, ptr_b [rax+0x40]);
|
||||
vcvtph2udq(xmm1, xmm5);
|
||||
vcvtph2udq(xmm1, ptr [rax+0x40]);
|
||||
vcvtph2udq(xmm1, ptr_b [rax+0x40]);
|
||||
vcvtph2udq(ymm1|k2|T_z, xmm5);
|
||||
vcvtph2udq(ymm1, ptr [rax+0x40]);
|
||||
vcvtph2udq(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtph2udq(zmm1|k5|T_z|T_rd_sae, ymm3);
|
||||
vcvtph2udq(zmm1|k5|T_z, ptr [rax+0x40]);
|
||||
vcvtph2udq(zmm1|k5|T_z, ptr_b [rax+0x40]);
|
||||
vcvttph2dq(xmm1, xmm5);
|
||||
vcvttph2dq(xmm1, ptr [rax+0x40]);
|
||||
vcvttph2dq(xmm1, ptr_b [rax+0x40]);
|
||||
vcvttph2dq(ymm1|k2|T_z, xmm5);
|
||||
vcvttph2dq(ymm1, ptr [rax+0x40]);
|
||||
vcvttph2dq(ymm1, ptr_b [rax+0x40]);
|
||||
vcvttph2dq(zmm1|k5|T_z|T_sae, ymm3);
|
||||
vcvttph2dq(zmm1|k5|T_z, ptr [rax+0x40]);
|
||||
vcvttph2dq(zmm1|k5|T_z, ptr_b [rax+0x40]);
|
||||
vcvttph2udq(xmm1, xmm5);
|
||||
vcvttph2udq(xmm1, ptr [rax+0x40]);
|
||||
vcvttph2udq(xmm1, ptr_b [rax+0x40]);
|
||||
vcvttph2udq(ymm1|k2|T_z, xmm5);
|
||||
vcvttph2udq(ymm1, ptr [rax+0x40]);
|
||||
vcvttph2udq(ymm1, ptr_b [rax+0x40]);
|
||||
vcvttph2udq(zmm1|k5|T_z|T_sae, ymm3);
|
||||
vcvttph2udq(zmm1|k5|T_z, ptr [rax+0x40]);
|
||||
vcvttph2udq(zmm1|k5|T_z, ptr_b [rax+0x40]);
|
||||
vcvtph2pd(xmm1, xmm5);
|
||||
vcvtph2pd(xmm1, ptr [rax+0x40]);
|
||||
vcvtph2pd(xmm1, ptr_b [rax+0x40]);
|
||||
vcvtph2pd(ymm1|k2|T_z, xmm5);
|
||||
vcvtph2pd(ymm1, ptr [rax+0x40]);
|
||||
vcvtph2pd(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtph2pd(zmm1|k5|T_z|T_sae, xmm3);
|
||||
vcvtph2pd(zmm1|k5|T_z, ptr [rax+0x40]);
|
||||
vcvtph2pd(zmm1|k5|T_z, ptr_b [rax+0x40]);
|
||||
vcvtph2qq(xmm1, xmm5);
|
||||
vcvtph2qq(xmm1, ptr [rax+0x40]);
|
||||
vcvtph2qq(xmm1, ptr_b [rax+0x40]);
|
||||
vcvtph2qq(ymm1|k2|T_z, xmm5);
|
||||
vcvtph2qq(ymm1, ptr [rax+0x40]);
|
||||
vcvtph2qq(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtph2qq(zmm1|k5|T_z|T_rd_sae, xmm3);
|
||||
vcvtph2qq(zmm1|k5|T_z, ptr [rax+0x40]);
|
||||
vcvtph2qq(zmm1|k5|T_z, ptr_b [rax+0x40]);
|
||||
vcvtph2uqq(xmm1, xmm5);
|
||||
vcvtph2uqq(xmm1, ptr [rax+0x40]);
|
||||
vcvtph2uqq(xmm1, ptr_b [rax+0x40]);
|
||||
vcvtph2uqq(ymm1|k2|T_z, xmm5);
|
||||
vcvtph2uqq(ymm1, ptr [rax+0x40]);
|
||||
vcvtph2uqq(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtph2uqq(zmm1|k5|T_z|T_rd_sae, xmm3);
|
||||
vcvtph2uqq(zmm1|k5|T_z, ptr [rax+0x40]);
|
||||
vcvtph2uqq(zmm1|k5|T_z, ptr_b [rax+0x40]);
|
||||
vcvttph2uqq(xmm1, xmm5);
|
||||
vcvttph2uqq(xmm1, ptr [rax+0x40]);
|
||||
vcvttph2uqq(xmm1, ptr_b [rax+0x40]);
|
||||
vcvttph2uqq(ymm1|k2|T_z, xmm5);
|
||||
vcvttph2uqq(ymm1, ptr [rax+0x40]);
|
||||
vcvttph2uqq(ymm1, ptr_b [rax+0x40]);
|
||||
vcvttph2uqq(zmm1|k5|T_z|T_sae, xmm3);
|
||||
vcvttph2uqq(zmm1|k5|T_z, ptr [rax+0x40]);
|
||||
vcvttph2uqq(zmm1|k5|T_z, ptr_b [rax+0x40]);
|
||||
vcvtdq2ph(xmm1, xmm5);
|
||||
vcvtdq2ph(xmm1, xword [rax+0x40]);
|
||||
vcvtdq2ph(xmm1, xword_b [rax+0x40]);
|
||||
vcvtdq2ph(xmm1, yword [rax+0x40]);
|
||||
vcvtdq2ph(xmm1, yword_b [rax+0x40]);
|
||||
vcvtdq2ph(ymm1|k2|T_z|T_rd_sae, zmm5);
|
||||
vcvtdq2ph(ymm1, ptr [rax+0x40]);
|
||||
vcvtdq2ph(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtps2phx(xmm1, xmm5);
|
||||
vcvtps2phx(xmm1, xword [rax+0x40]);
|
||||
vcvtps2phx(xmm1, xword_b [rax+0x40]);
|
||||
vcvtps2phx(xmm1, yword [rax+0x40]);
|
||||
vcvtps2phx(xmm1, yword_b [rax+0x40]);
|
||||
vcvtps2phx(ymm1|k2|T_z|T_rd_sae, zmm5);
|
||||
vcvtps2phx(ymm1, ptr [rax+0x40]);
|
||||
vcvtps2phx(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtudq2ph(xmm1, xmm5);
|
||||
vcvtudq2ph(xmm1, xword [rax+0x40]);
|
||||
vcvtudq2ph(xmm1, xword_b [rax+0x40]);
|
||||
vcvtudq2ph(xmm1, yword [rax+0x40]);
|
||||
vcvtudq2ph(xmm1, yword_b [rax+0x40]);
|
||||
vcvtudq2ph(ymm1|k2|T_z|T_rd_sae, zmm5);
|
||||
vcvtudq2ph(ymm1, ptr [rax+0x40]);
|
||||
vcvtudq2ph(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtpd2ph(xmm1, xmm5);
|
||||
vcvtpd2ph(xmm1, ymm5);
|
||||
vcvtpd2ph(xmm1|k2|T_z|T_rd_sae, zmm5);
|
||||
vcvtpd2ph(xmm1, xword [rax+0x40]);
|
||||
vcvtpd2ph(xmm1, xword_b [rax+0x40]);
|
||||
vcvtpd2ph(xmm1, yword [rax+0x40]);
|
||||
vcvtpd2ph(xmm1, yword_b [rax+0x40]);
|
||||
vcvtpd2ph(xmm1, zword [rax+0x40]);
|
||||
vcvtpd2ph(xmm1, zword_b [rax+0x40]);
|
||||
vcvtqq2ph(xmm1, xmm5);
|
||||
vcvtqq2ph(xmm1, ymm5);
|
||||
vcvtqq2ph(xmm1|k2|T_z|T_rd_sae, zmm5);
|
||||
vcvtqq2ph(xmm1, xword [rax+0x40]);
|
||||
vcvtqq2ph(xmm1, xword_b [rax+0x40]);
|
||||
vcvtqq2ph(xmm1, yword [rax+0x40]);
|
||||
vcvtqq2ph(xmm1, yword_b [rax+0x40]);
|
||||
vcvtqq2ph(xmm1, zword [rax+0x40]);
|
||||
vcvtqq2ph(xmm1, zword_b [rax+0x40]);
|
||||
vcvtuqq2ph(xmm1, xmm5);
|
||||
vcvtuqq2ph(xmm1, ymm5);
|
||||
vcvtuqq2ph(xmm1|k2|T_z|T_rd_sae, zmm5);
|
||||
vcvtuqq2ph(xmm1, xword [rax+0x40]);
|
||||
vcvtuqq2ph(xmm1, xword_b [rax+0x40]);
|
||||
vcvtuqq2ph(xmm1, yword [rax+0x40]);
|
||||
vcvtuqq2ph(xmm1, yword_b [rax+0x40]);
|
||||
vcvtuqq2ph(xmm1, zword [rax+0x40]);
|
||||
vcvtuqq2ph(xmm1, zword_b [rax+0x40]);
|
||||
vcvtph2uw(xmm1, xmm5);
|
||||
vcvtph2uw(xmm1, ptr [rax+0x40]);
|
||||
vcvtph2uw(xmm1, ptr_b [rax+0x40]);
|
||||
vcvtph2uw(ymm1, ptr [rax+0x40]);
|
||||
vcvtph2uw(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtph2uw(zmm1|k2|T_z|T_rd_sae, zmm5);
|
||||
vcvtph2uw(zmm1, ptr [rax+0x40]);
|
||||
vcvtph2uw(zmm1, ptr_b [rax+0x40]);
|
||||
vcvtph2w(xmm1, xmm5);
|
||||
vcvtph2w(xmm1, ptr [rax+0x40]);
|
||||
vcvtph2w(xmm1, ptr_b [rax+0x40]);
|
||||
vcvtph2w(ymm1, ptr [rax+0x40]);
|
||||
vcvtph2w(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtph2w(zmm1|k2|T_z|T_rd_sae, zmm5);
|
||||
vcvtph2w(zmm1, ptr [rax+0x40]);
|
||||
vcvtph2w(zmm1, ptr_b [rax+0x40]);
|
||||
vcvttph2uw(xmm1, xmm5);
|
||||
vcvttph2uw(xmm1, ptr [rax+0x40]);
|
||||
vcvttph2uw(xmm1, ptr_b [rax+0x40]);
|
||||
vcvttph2uw(ymm1, ptr [rax+0x40]);
|
||||
vcvttph2uw(ymm1, ptr_b [rax+0x40]);
|
||||
vcvttph2uw(zmm1|k2|T_z|T_sae, zmm5);
|
||||
vcvttph2uw(zmm1, ptr [rax+0x40]);
|
||||
vcvttph2uw(zmm1, ptr_b [rax+0x40]);
|
||||
vcvttph2w(xmm1, xmm5);
|
||||
vcvttph2w(xmm1, ptr [rax+0x40]);
|
||||
vcvttph2w(xmm1, ptr_b [rax+0x40]);
|
||||
vcvttph2w(ymm1, ptr [rax+0x40]);
|
||||
vcvttph2w(ymm1, ptr_b [rax+0x40]);
|
||||
vcvttph2w(zmm1|k2|T_z|T_sae, zmm5);
|
||||
vcvttph2w(zmm1, ptr [rax+0x40]);
|
||||
vcvttph2w(zmm1, ptr_b [rax+0x40]);
|
||||
vcvtuw2ph(xmm1, xmm5);
|
||||
vcvtuw2ph(xmm1, ptr [rax+0x40]);
|
||||
vcvtuw2ph(xmm1, ptr_b [rax+0x40]);
|
||||
vcvtuw2ph(ymm1, ptr [rax+0x40]);
|
||||
vcvtuw2ph(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtuw2ph(zmm1|k2|T_z|T_rd_sae, zmm5);
|
||||
vcvtuw2ph(zmm1, ptr [rax+0x40]);
|
||||
vcvtuw2ph(zmm1, ptr_b [rax+0x40]);
|
||||
vcvtw2ph(xmm1, xmm5);
|
||||
vcvtw2ph(xmm1, ptr [rax+0x40]);
|
||||
vcvtw2ph(xmm1, ptr_b [rax+0x40]);
|
||||
vcvtw2ph(ymm1, ptr [rax+0x40]);
|
||||
vcvtw2ph(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtw2ph(zmm1|k2|T_z|T_rd_sae, zmm5);
|
||||
vcvtw2ph(zmm1, ptr [rax+0x40]);
|
||||
vcvtw2ph(zmm1, ptr_b [rax+0x40]);
|
||||
vcvtps2ph(xmm1, xmm2, 0x1);
|
||||
vcvtps2ph(ptr [rax+0x40], xmm2, 0x2);
|
||||
vcvtps2ph(xmm1, ymm2, 0x3);
|
||||
vcvtps2ph(ptr [rax+0x40], ymm2, 0x4);
|
||||
vcvtps2ph(xmm1|k1|T_z, xmm2, 0x5);
|
||||
vcvtps2ph(ptr [rax+0x40]|k1, xmm3, 0x6);
|
||||
vcvtps2ph(xmm1|k2, ymm4, 0x7);
|
||||
vcvtps2ph(ptr [rax+0x40]|k2, ymm5, 0x8);
|
||||
vcvtps2ph(ymm1|k2|T_sae, zmm5, 0x9);
|
||||
vcvtps2ph(ptr [rax+0x40]|k5, zmm4, 0xa);
|
||||
vcvtsh2usi(ecx|T_rd_sae, xmm1);
|
||||
vcvtsh2usi(eax, ptr [rax+0x40]);
|
||||
vcvtsh2usi(r9|T_rd_sae, xmm1);
|
||||
vcvtsh2usi(r13, ptr [rax+0x40]);
|
||||
vcvttsh2si(ecx|T_sae, xmm1);
|
||||
vcvttsh2si(eax, ptr [rax+0x40]);
|
||||
vcvttsh2si(r9|T_sae, xmm1);
|
||||
vcvttsh2si(r13, ptr [rax+0x40]);
|
||||
vcvttsh2usi(ecx|T_sae, xmm1);
|
||||
vcvttsh2usi(eax, ptr [rax+0x40]);
|
||||
vcvttsh2usi(r9|T_sae, xmm1);
|
||||
vcvttsh2usi(r13, ptr [rax+0x40]);
|
||||
vcvttph2qq(xmm1, xmm5);
|
||||
vcvttph2qq(xmm1, ptr [rax+0x40]);
|
||||
vcvttph2qq(xmm1, ptr_b [rax+0x40]);
|
||||
vcvttph2qq(ymm1|k2|T_z, xmm5);
|
||||
vcvttph2qq(ymm1, ptr [rax+0x40]);
|
||||
vcvttph2qq(ymm1, ptr_b [rax+0x40]);
|
||||
vcvttph2qq(zmm1|k5|T_z|T_sae, xmm3);
|
||||
vcvttph2qq(zmm1|k5|T_z, ptr [rax+0x40]);
|
||||
vcvttph2qq(zmm1|k5|T_z, ptr_b [rax+0x40]);
|
||||
vcvtsi2sh(xmm1|T_rd_sae, xmm2, eax);
|
||||
vcvtsi2sh(xmm1, xmm2, dword [rax+0x40]);
|
||||
vcvtsi2sh(xmm1|T_rd_sae, xmm2, r9);
|
||||
vcvtsi2sh(xmm1, xmm2, qword [rax+0x40]);
|
||||
vcvtusi2sh(xmm1|T_rd_sae, xmm2, eax);
|
||||
vcvtusi2sh(xmm1, xmm2, dword [rax+0x40]);
|
||||
vcvtusi2sh(xmm1|T_rd_sae, xmm2, r9);
|
||||
vcvtusi2sh(xmm1, xmm2, qword [rax+0x40]);
|
||||
aadd(ptr[rax], ecx);
|
||||
aadd(ptr[eax], ecx);
|
||||
aadd(ptr[rax], r10);
|
||||
aand(ptr[rax], ecx);
|
||||
aand(ptr[eax], ecx);
|
||||
aand(ptr[rax], r10);
|
||||
aor(ptr[rax], ecx);
|
||||
aor(ptr[eax], ecx);
|
||||
aor(ptr[rax], r10);
|
||||
axor(ptr[rax], ecx);
|
||||
axor(ptr[eax], ecx);
|
||||
axor(ptr[rax], r10);
|
||||
cmpbexadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpbxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmplexadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmplxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpnbexadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpnbxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpnlexadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpnlxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpnoxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpnpxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpnsxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpnzxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpoxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmppxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpsxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpzxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
vsha512msg1(ymm3, xmm5);
|
||||
vsha512msg2(ymm9, ymm10);
|
||||
vsha512rnds2(ymm1, ymm3, xmm2);
|
||||
vsm3msg1(xmm1, xmm2, xmm3);
|
||||
vsm3msg1(xmm1, xmm2, ptr [rax]);
|
||||
vsm3msg2(xmm5, xmm7, xmm3);
|
||||
vsm3msg2(xmm5, xmm6, ptr [rax]);
|
||||
vsm3rnds2(xmm5, xmm7, xmm3, 0x12);
|
||||
vsm3rnds2(xmm5, xmm7, ptr [rcx], 0x34);
|
||||
vsm4key4(xmm1, xmm2, xmm3);
|
||||
vsm4key4(xmm1, xmm2, ptr [rdx]);
|
||||
vsm4rnds4(xmm1, xmm2, xmm3);
|
||||
vsm4rnds4(xmm5, xmm6, ptr [rcx+rax*4]);
|
||||
vpdpbssd(xmm1, xmm2, xmm3);
|
||||
vpdpbssd(ymm1, ymm2, ptr [rax]);
|
||||
vpdpbssds(xmm1, xmm2, xmm3);
|
||||
vpdpbssds(ymm1, ymm2, ptr [rax]);
|
||||
vpdpbsud(xmm1, xmm2, xmm3);
|
||||
vpdpbsud(ymm1, ymm2, ptr [rax]);
|
||||
vpdpbsuds(xmm1, xmm2, xmm3);
|
||||
vpdpbsuds(ymm1, ymm2, ptr [rax]);
|
||||
vpdpbuud(xmm1, xmm2, xmm3);
|
||||
vpdpbuud(ymm1, ymm2, ptr [rax]);
|
||||
vpdpbuuds(xmm1, xmm2, xmm3);
|
||||
vpdpbuuds(ymm1, ymm2, ptr [rax]);
|
||||
vpdpwsud(xmm1, xmm2, xmm3);
|
||||
vpdpwsud(ymm1, ymm2, ptr [rax]);
|
||||
vpdpwsuds(xmm1, xmm2, xmm3);
|
||||
vpdpwsuds(ymm1, ymm2, ptr [rax]);
|
||||
vpdpwusd(xmm1, xmm2, xmm3);
|
||||
vpdpwusd(ymm1, ymm2, ptr [rax]);
|
||||
vpdpwusds(xmm1, xmm2, xmm3);
|
||||
vpdpwusds(ymm1, ymm2, ptr [rax]);
|
||||
vpdpwuud(xmm1, xmm2, xmm3);
|
||||
vpdpwuud(ymm1, ymm2, ptr [rax]);
|
||||
vpdpwuuds(xmm1, xmm2, xmm3);
|
||||
vpdpwuuds(ymm1, ymm2, ptr [rax]);
|
||||
+294
@@ -0,0 +1,294 @@
|
||||
//
|
||||
vcvtbf162ibs(xm1, xm2);
|
||||
vcvtbf162ibs(xm1, ptr[rax+128]);
|
||||
vcvtbf162ibs(xm1, ptr_b[rax+128]);
|
||||
|
||||
vcvtbf162ibs(ym1, ym2);
|
||||
vcvtbf162ibs(ym1, ptr[rax+128]);
|
||||
vcvtbf162ibs(ym1, ptr_b[rax+128]);
|
||||
|
||||
vcvtbf162ibs(zm1, zm2);
|
||||
vcvtbf162ibs(zm1, ptr[rax+128]);
|
||||
vcvtbf162ibs(zm1, ptr_b[rax+128]);
|
||||
//
|
||||
vcvtbf162iubs(xm1, xm2);
|
||||
vcvtbf162iubs(xm1, ptr[rax+128]);
|
||||
vcvtbf162iubs(xm1, ptr_b[rax+128]);
|
||||
|
||||
vcvtbf162iubs(ym1, ym2);
|
||||
vcvtbf162iubs(ym1, ptr[rax+128]);
|
||||
vcvtbf162iubs(ym1, ptr_b[rax+128]);
|
||||
|
||||
vcvtbf162iubs(zm1, zm2);
|
||||
vcvtbf162iubs(zm1, ptr[rax+128]);
|
||||
vcvtbf162iubs(zm1, ptr_b[rax+128]);
|
||||
//
|
||||
vcvttbf162ibs(xm1, xm2);
|
||||
vcvttbf162ibs(xm1, ptr[rax+128]);
|
||||
vcvttbf162ibs(xm1, ptr_b[rax+128]);
|
||||
|
||||
vcvttbf162ibs(ym1, ym2);
|
||||
vcvttbf162ibs(ym1, ptr[rax+128]);
|
||||
vcvttbf162ibs(ym1, ptr_b[rax+128]);
|
||||
|
||||
vcvttbf162ibs(zm1, zm2);
|
||||
vcvttbf162ibs(zm1, ptr[rax+128]);
|
||||
vcvttbf162ibs(zm1, ptr_b[rax+128]);
|
||||
//
|
||||
vcvttbf162iubs(xm1, xm2);
|
||||
vcvttbf162iubs(xm1, ptr[rax+128]);
|
||||
vcvttbf162iubs(xm1, ptr_b[rax+128]);
|
||||
|
||||
vcvttbf162iubs(ym1, ym2);
|
||||
vcvttbf162iubs(ym1, ptr[rax+128]);
|
||||
vcvttbf162iubs(ym1, ptr_b[rax+128]);
|
||||
|
||||
vcvttbf162iubs(zm1, zm2);
|
||||
vcvttbf162iubs(zm1, ptr[rax+128]);
|
||||
vcvttbf162iubs(zm1, ptr_b[rax+128]);
|
||||
//
|
||||
vcvttpd2qqs(xm1, xm2);
|
||||
vcvttpd2qqs(xm1, ptr[rax+128]);
|
||||
vcvttpd2qqs(xm1, ptr_b[rax+128]);
|
||||
|
||||
vcvttpd2qqs(ym1, ym2);
|
||||
vcvttpd2qqs(ym1, ptr[rax+128]);
|
||||
vcvttpd2qqs(ym1, ptr_b[rax+128]);
|
||||
|
||||
vcvttpd2qqs(zm1, zm2);
|
||||
vcvttpd2qqs(zm1, zm2|T_sae);
|
||||
vcvttpd2qqs(zm1, ptr[rax+128]);
|
||||
vcvttpd2qqs(zm1, ptr_b[rax+128]);
|
||||
//
|
||||
vcvttpd2uqqs(xm1, xm2);
|
||||
vcvttpd2uqqs(xm1, ptr[rax+128]);
|
||||
vcvttpd2uqqs(xm1, ptr_b[rax+128]);
|
||||
|
||||
vcvttpd2uqqs(ym1, ym2);
|
||||
vcvttpd2uqqs(ym1, ptr[rax+128]);
|
||||
vcvttpd2uqqs(ym1, ptr_b[rax+128]);
|
||||
|
||||
vcvttpd2uqqs(zm1, zm2);
|
||||
vcvttpd2uqqs(zm1, zm2|T_sae);
|
||||
vcvttpd2uqqs(zm1, ptr[rax+128]);
|
||||
vcvttpd2uqqs(zm1, ptr_b[rax+128]);
|
||||
//
|
||||
vcvtph2ibs(xm1, xm2);
|
||||
vcvtph2ibs(xm1, ptr[rax+128]);
|
||||
vcvtph2ibs(xm1, ptr_b[rax+128]);
|
||||
|
||||
vcvtph2ibs(ym1, ym2);
|
||||
vcvtph2ibs(ym1, ptr[rax+128]);
|
||||
vcvtph2ibs(ym1, ptr_b[rax+128]);
|
||||
|
||||
vcvtph2ibs(zm1, zm2);
|
||||
vcvtph2ibs(zm1, zm2|T_ru_sae);
|
||||
vcvtph2ibs(zm1, ptr[rax+128]);
|
||||
vcvtph2ibs(zm1, ptr_b[rax+128]);
|
||||
//
|
||||
vcvtph2iubs(xm1, xm2);
|
||||
vcvtph2iubs(xm1, ptr[rax+128]);
|
||||
vcvtph2iubs(xm1, ptr_b[rax+128]);
|
||||
|
||||
vcvtph2iubs(ym1, ym2);
|
||||
vcvtph2iubs(ym1, ptr[rax+128]);
|
||||
vcvtph2iubs(ym1, ptr_b[rax+128]);
|
||||
|
||||
vcvtph2iubs(zm1, zm2);
|
||||
vcvtph2iubs(zm1, zm2|T_ru_sae);
|
||||
vcvtph2iubs(zm1, ptr[rax+128]);
|
||||
vcvtph2iubs(zm1, ptr_b[rax+128]);
|
||||
//
|
||||
vcvttph2ibs(xm1, xm2);
|
||||
vcvttph2ibs(xm1, ptr[rax+128]);
|
||||
vcvttph2ibs(xm1, ptr_b[rax+128]);
|
||||
|
||||
vcvttph2ibs(ym1, ym2);
|
||||
vcvttph2ibs(ym1, ptr[rax+128]);
|
||||
vcvttph2ibs(ym1, ptr_b[rax+128]);
|
||||
|
||||
vcvttph2ibs(zm1, zm2);
|
||||
vcvttph2ibs(zm1, zm2|T_ru_sae);
|
||||
vcvttph2ibs(zm1, ptr[rax+128]);
|
||||
vcvttph2ibs(zm1, ptr_b[rax+128]);
|
||||
//
|
||||
vcvttph2iubs(xm1, xm2);
|
||||
vcvttph2iubs(xm1, ptr[rax+128]);
|
||||
vcvttph2iubs(xm1, ptr_b[rax+128]);
|
||||
|
||||
vcvttph2iubs(ym1, ym2);
|
||||
vcvttph2iubs(ym1, ptr[rax+128]);
|
||||
vcvttph2iubs(ym1, ptr_b[rax+128]);
|
||||
|
||||
vcvttph2iubs(zm1, zm2);
|
||||
vcvttph2iubs(zm1, zm2|T_ru_sae);
|
||||
vcvttph2iubs(zm1, ptr[rax+128]);
|
||||
vcvttph2iubs(zm1, ptr_b[rax+128]);
|
||||
//
|
||||
vcvttps2dqs(xm1, xm2);
|
||||
vcvttps2dqs(xm1, ptr[rax+128]);
|
||||
vcvttps2dqs(xm1, ptr_b[rax+128]);
|
||||
|
||||
vcvttps2dqs(ym1, ym2);
|
||||
vcvttps2dqs(ym1, ptr[rax+128]);
|
||||
vcvttps2dqs(ym1, ptr_b[rax+128]);
|
||||
|
||||
vcvttps2dqs(zm1, zm2);
|
||||
vcvttps2dqs(zm1, zm2|T_sae);
|
||||
vcvttps2dqs(zm1, ptr[rax+128]);
|
||||
vcvttps2dqs(zm1, ptr_b[rax+128]);
|
||||
//
|
||||
vcvtps2ibs(xm1, xm2);
|
||||
vcvtps2ibs(xm1, ptr[rax+128]);
|
||||
vcvtps2ibs(xm1, ptr_b[rax+128]);
|
||||
|
||||
vcvtps2ibs(ym1, ym2);
|
||||
vcvtps2ibs(ym1, ptr[rax+128]);
|
||||
vcvtps2ibs(ym1, ptr_b[rax+128]);
|
||||
|
||||
vcvtps2ibs(zm1, zm2);
|
||||
vcvtps2ibs(zm1, zm2|T_ru_sae);
|
||||
vcvtps2ibs(zm1, ptr[rax+128]);
|
||||
vcvtps2ibs(zm1, ptr_b[rax+128]);
|
||||
//
|
||||
vcvtps2iubs(xm1, xm2);
|
||||
vcvtps2iubs(xm1, ptr[rax+128]);
|
||||
vcvtps2iubs(xm1, ptr_b[rax+128]);
|
||||
|
||||
vcvtps2iubs(ym1, ym2);
|
||||
vcvtps2iubs(ym1, ptr[rax+128]);
|
||||
vcvtps2iubs(ym1, ptr_b[rax+128]);
|
||||
|
||||
vcvtps2iubs(zm1, zm2);
|
||||
vcvtps2iubs(zm1, zm2|T_ru_sae);
|
||||
vcvtps2iubs(zm1, ptr[rax+128]);
|
||||
vcvtps2iubs(zm1, ptr_b[rax+128]);
|
||||
//
|
||||
vcvttps2ibs(xm1, xm2);
|
||||
vcvttps2ibs(xm1, ptr[rax+128]);
|
||||
vcvttps2ibs(xm1, ptr_b[rax+128]);
|
||||
|
||||
vcvttps2ibs(ym1, ym2);
|
||||
vcvttps2ibs(ym1, ptr[rax+128]);
|
||||
vcvttps2ibs(ym1, ptr_b[rax+128]);
|
||||
|
||||
vcvttps2ibs(zm1, zm2);
|
||||
vcvttps2ibs(zm1, zm2|T_ru_sae);
|
||||
vcvttps2ibs(zm1, ptr[rax+128]);
|
||||
vcvttps2ibs(zm1, ptr_b[rax+128]);
|
||||
//
|
||||
vcvttps2iubs(xm1, xm2);
|
||||
vcvttps2iubs(xm1, ptr[rax+128]);
|
||||
vcvttps2iubs(xm1, ptr_b[rax+128]);
|
||||
|
||||
vcvttps2iubs(ym1, ym2);
|
||||
vcvttps2iubs(ym1, ptr[rax+128]);
|
||||
vcvttps2iubs(ym1, ptr_b[rax+128]);
|
||||
|
||||
vcvttps2iubs(zm1, zm2);
|
||||
vcvttps2iubs(zm1, zm2|T_ru_sae);
|
||||
vcvttps2iubs(zm1, ptr[rax+128]);
|
||||
vcvttps2iubs(zm1, ptr_b[rax+128]);
|
||||
//
|
||||
vcvttps2udqs(xm1, xm2);
|
||||
vcvttps2udqs(xm1, ptr[rax+128]);
|
||||
vcvttps2udqs(xm1, ptr_b[rax+128]);
|
||||
|
||||
vcvttps2udqs(ym1, ym2);
|
||||
vcvttps2udqs(ym1, ptr[rax+128]);
|
||||
vcvttps2udqs(ym1, ptr_b[rax+128]);
|
||||
|
||||
vcvttps2udqs(zm1, zm2);
|
||||
vcvttps2udqs(zm1, zm2|T_sae);
|
||||
vcvttps2udqs(zm1, ptr[rax+128]);
|
||||
vcvttps2udqs(zm1, ptr_b[rax+128]);
|
||||
|
||||
//
|
||||
vcvttpd2dqs(xm1|k1|T_z, xm2);
|
||||
vcvttpd2dqs(xm1|k1|T_z, xword [rax+128]);
|
||||
vcvttpd2dqs(xm1|k1|T_z, xword_b[rax+128]);
|
||||
|
||||
vcvttpd2dqs(xm1|k1|T_z, ym2);
|
||||
vcvttpd2dqs(xm1|k1|T_z, yword [rax+128]);
|
||||
vcvttpd2dqs(xm1|k1|T_z, yword_b[rax+128]);
|
||||
|
||||
vcvttpd2dqs(ym1|k1|T_z, zm2);
|
||||
vcvttpd2dqs(ym1|k1|T_z, zm2|T_sae);
|
||||
vcvttpd2dqs(ym1|k1|T_z, zword [rax+128]);
|
||||
vcvttpd2dqs(ym1|k1|T_z, zword_b[rax+128]);
|
||||
|
||||
//
|
||||
vcvttpd2udqs(xm1|k1|T_z, xm2);
|
||||
vcvttpd2udqs(xm1|k1|T_z, xword [rax+128]);
|
||||
vcvttpd2udqs(xm1|k1|T_z, xword_b[rax+128]);
|
||||
|
||||
vcvttpd2udqs(xm1|k1|T_z, ym2);
|
||||
vcvttpd2udqs(xm1|k1|T_z, yword [rax+128]);
|
||||
vcvttpd2udqs(xm1|k1|T_z, yword_b[rax+128]);
|
||||
|
||||
vcvttpd2udqs(ym1|k1|T_z, zm2);
|
||||
vcvttpd2udqs(ym1|k1|T_z, zm2|T_sae);
|
||||
vcvttpd2udqs(ym1|k1|T_z, zword [rax+128]);
|
||||
vcvttpd2udqs(ym1|k1|T_z, zword_b[rax+128]);
|
||||
//
|
||||
vcvttps2qqs(xm1|k1|T_z, xm2);
|
||||
vcvttps2qqs(xm1|k1|T_z, ptr [rax+128]);
|
||||
vcvttps2qqs(xm1|k1|T_z, ptr_b[rax+128]);
|
||||
|
||||
vcvttps2qqs(ym1|k1|T_z, xm2);
|
||||
vcvttps2qqs(ym1|k1|T_z, ptr [rax+128]);
|
||||
vcvttps2qqs(ym1|k1|T_z, ptr_b[rax+128]);
|
||||
|
||||
vcvttps2qqs(zm1, ym2);
|
||||
vcvttps2qqs(zm1|k1|T_z, ym2);
|
||||
vcvttps2qqs(zm1|k1|T_z|T_sae, ym2);
|
||||
vcvttps2qqs(zm1|k1|T_z, ptr [rax+128]);
|
||||
vcvttps2qqs(zm1|k1|T_z, ptr_b[rax+128]);
|
||||
|
||||
//
|
||||
vcvttps2uqqs(xm1|k1|T_z, xm2);
|
||||
vcvttps2uqqs(xm1|k1|T_z, ptr [rax+128]);
|
||||
vcvttps2uqqs(xm1|k1|T_z, ptr_b[rax+128]);
|
||||
|
||||
vcvttps2uqqs(ym1|k1|T_z, xm2);
|
||||
vcvttps2uqqs(ym1|k1|T_z, ptr [rax+128]);
|
||||
vcvttps2uqqs(ym1|k1|T_z, ptr_b[rax+128]);
|
||||
|
||||
vcvttps2uqqs(zm1, ym2);
|
||||
vcvttps2uqqs(zm1|k1|T_z, ym2);
|
||||
vcvttps2uqqs(zm1|k1|T_z|T_sae, ym2);
|
||||
vcvttps2uqqs(zm1|k1|T_z, ptr [rax+128]);
|
||||
vcvttps2uqqs(zm1|k1|T_z, ptr_b[rax+128]);
|
||||
|
||||
//
|
||||
vcvttsd2sis(eax, xm1);
|
||||
vcvttsd2sis(eax, xm1|T_sae);
|
||||
vcvttsd2sis(eax, ptr[rax+128]);
|
||||
|
||||
vcvttsd2sis(r30, xm1);
|
||||
vcvttsd2sis(r30, xm1|T_sae);
|
||||
vcvttsd2sis(r30, ptr[rax+128]);
|
||||
//
|
||||
vcvttsd2usis(eax, xm1);
|
||||
vcvttsd2usis(eax, xm1|T_sae);
|
||||
vcvttsd2usis(eax, ptr[rax+128]);
|
||||
|
||||
vcvttsd2usis(r30, xm1);
|
||||
vcvttsd2usis(r30, xm1|T_sae);
|
||||
vcvttsd2usis(r30, ptr[rax+128]);
|
||||
//
|
||||
vcvttss2sis(eax, xm1);
|
||||
vcvttss2sis(eax, xm1|T_sae);
|
||||
vcvttss2sis(eax, ptr[rax+128]);
|
||||
|
||||
vcvttss2sis(r30, xm1);
|
||||
vcvttss2sis(r30, xm1|T_sae);
|
||||
vcvttss2sis(r30, ptr[rax+128]);
|
||||
//
|
||||
vcvttss2usis(eax, xm1);
|
||||
vcvttss2usis(eax, xm1|T_sae);
|
||||
vcvttss2usis(eax, ptr[rax+128]);
|
||||
|
||||
vcvttss2usis(r30, xm1);
|
||||
vcvttss2usis(r30, xm1|T_sae);
|
||||
vcvttss2usis(r30, ptr[rax+128]);
|
||||
Reference in New Issue
Block a user