Merge commit '0a097849af1bcd979ff4b430a03971f16822cbcb' as 'external/xbyak'
This commit is contained in:
210
external/xbyak/test/dataset/bf16.txt
vendored
Normal file
210
external/xbyak/test/dataset/bf16.txt
vendored
Normal file
@@ -0,0 +1,210 @@
|
||||
vaddbf16(xm1, xm2, xm3);
|
||||
vaddbf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vaddbf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vaddbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vdivbf16(xm1, xm2, xm3);
|
||||
vdivbf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vdivbf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vdivbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vmaxbf16(xm1, xm2, xm3);
|
||||
vmaxbf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vmaxbf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vmaxbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vminbf16(xm1, xm2, xm3);
|
||||
vminbf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vminbf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vminbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vmulbf16(xm1, xm2, xm3);
|
||||
vmulbf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vmulbf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vmulbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vscalefbf16(xm1, xm2, xm3);
|
||||
vscalefbf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vscalefbf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vscalefbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vsubbf16(xm1, xm2, xm3);
|
||||
vsubbf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vsubbf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vsubbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
// madd
|
||||
vfmadd132bf16(xm1, xm2, xm3);
|
||||
vfmadd132bf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vfmadd132bf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vfmadd132bf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vfmadd213bf16(xm1, xm2, xm3);
|
||||
vfmadd213bf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vfmadd213bf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vfmadd213bf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vfmadd231bf16(xm1, xm2, xm3);
|
||||
vfmadd231bf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vfmadd231bf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vfmadd231bf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
// nmadd
|
||||
vfnmadd132bf16(xm1, xm2, xm3);
|
||||
vfnmadd132bf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vfnmadd132bf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vfnmadd132bf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vfnmadd213bf16(xm1, xm2, xm3);
|
||||
vfnmadd213bf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vfnmadd213bf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vfnmadd213bf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vfnmadd231bf16(xm1, xm2, xm3);
|
||||
vfnmadd231bf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vfnmadd231bf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vfnmadd231bf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
// msub
|
||||
vfmsub132bf16(xm1, xm2, xm3);
|
||||
vfmsub132bf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vfmsub132bf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vfmsub132bf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vfmsub213bf16(xm1, xm2, xm3);
|
||||
vfmsub213bf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vfmsub213bf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vfmsub213bf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vfmsub231bf16(xm1, xm2, xm3);
|
||||
vfmsub231bf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vfmsub231bf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vfmsub231bf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
// nmsub
|
||||
vfnmsub132bf16(xm1, xm2, xm3);
|
||||
vfnmsub132bf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vfnmsub132bf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vfnmsub132bf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vfnmsub213bf16(xm1, xm2, xm3);
|
||||
vfnmsub213bf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vfnmsub213bf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vfnmsub213bf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vfnmsub231bf16(xm1, xm2, xm3);
|
||||
vfnmsub231bf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vfnmsub231bf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vfnmsub231bf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vcmpbf16(k1, xm5, xm4, 5);
|
||||
vcmpbf16(k2, ym5, ym4, 6);
|
||||
vcmpbf16(k3, ym15, ptr_b[rax+128], 7);
|
||||
vcmpbf16(k4, zm30, zm20, 8);
|
||||
vcmpbf16(k5, zm1, ptr[rax+128], 9);
|
||||
vcmpbf16(k6, zm10, ptr_b[rax+128], 10);
|
||||
|
||||
vfpclassbf16(k1, xm4, 5);
|
||||
vfpclassbf16(k2|k5, ym4, 6);
|
||||
vfpclassbf16(k3|k5, zm20, 7);
|
||||
vfpclassbf16(k3|k5, xword[rax+128], 8);
|
||||
vfpclassbf16(k3, xword_b[rax+128], 9);
|
||||
vfpclassbf16(k5|k5, yword[rax+128], 10);
|
||||
vfpclassbf16(k6|k5, yword_b[rax+128], 11);
|
||||
vfpclassbf16(k7|k5, zword[rax+128], 12);
|
||||
vfpclassbf16(k7|k5, zword_b[rax+128], 13);
|
||||
|
||||
vcomisbf16(xm2, xm3);
|
||||
vcomisbf16(xm2, ptr[rax+128]);
|
||||
|
||||
vgetexpbf16(xm1|k3, xmm2);
|
||||
vgetexpbf16(xm1|k3, ptr[rax+128]);
|
||||
vgetexpbf16(xm1|k3, ptr_b[rax+128]);
|
||||
|
||||
vgetexpbf16(ym1|k3, ymm2);
|
||||
vgetexpbf16(ym1|k3, ptr[rax+128]);
|
||||
vgetexpbf16(ym1|k3, ptr_b[rax+128]);
|
||||
|
||||
vgetexpbf16(zm1|k3, zmm2);
|
||||
vgetexpbf16(zm1|k3, ptr[rax+128]);
|
||||
vgetexpbf16(zm1|k3, ptr_b[rax+128]);
|
||||
|
||||
vgetmantbf16(xm1|k3, xmm2, 3);
|
||||
vgetmantbf16(xm1|k3, ptr[rax+128], 5);
|
||||
vgetmantbf16(xm1|k3, ptr_b[rax+128], 9);
|
||||
|
||||
vgetmantbf16(ym1|k3, ymm2, 3);
|
||||
vgetmantbf16(ym1|k3, ptr[rax+128], 5);
|
||||
vgetmantbf16(ym1|k3, ptr_b[rax+128], 9);
|
||||
|
||||
vgetmantbf16(zm1|k3, zmm2, 3);
|
||||
vgetmantbf16(zm1|k3, ptr[rax+128], 5);
|
||||
vgetmantbf16(zm1|k3, ptr_b[rax+128], 9);
|
||||
|
||||
vrcpbf16(xm1|k5, xm2);
|
||||
vrcpbf16(xm1|k5, ptr[rcx+128]);
|
||||
vrcpbf16(xm1|k5, ptr_b[rcx+128]);
|
||||
|
||||
vrcpbf16(ym1|k5, ym2);
|
||||
vrcpbf16(ym1|k5, ptr[rcx+128]);
|
||||
vrcpbf16(ym1|k5, ptr_b[rcx+128]);
|
||||
|
||||
vrcpbf16(zm1|k5, zm2);
|
||||
vrcpbf16(zm1|k5, ptr[rcx+128]);
|
||||
vrcpbf16(zm1|k5, ptr_b[rcx+128]);
|
||||
|
||||
vreducebf16(xm1|k4, xm2, 1);
|
||||
vreducebf16(xm1|k4, ptr[rax+128], 1);
|
||||
vreducebf16(xm1|k4, ptr_b[rax+128], 1);
|
||||
|
||||
vreducebf16(ym1|k4, ym2, 1);
|
||||
vreducebf16(ym1|k4, ptr[rax+128], 1);
|
||||
vreducebf16(ym1|k4, ptr_b[rax+128], 1);
|
||||
|
||||
vreducebf16(zm1|k4, zm2, 1);
|
||||
vreducebf16(zm1|k4, ptr[rax+128], 1);
|
||||
vreducebf16(zm1|k4, ptr_b[rax+128], 1);
|
||||
|
||||
vrndscalebf16(xm1|k4, xm2, 1);
|
||||
vrndscalebf16(xm1|k4, ptr[rax+128], 1);
|
||||
vrndscalebf16(xm1|k4, ptr_b[rax+128], 1);
|
||||
|
||||
vrndscalebf16(ym1|k4, ym2, 1);
|
||||
vrndscalebf16(ym1|k4, ptr[rax+128], 1);
|
||||
vrndscalebf16(ym1|k4, ptr_b[rax+128], 1);
|
||||
|
||||
vrndscalebf16(zm1|k4, zm2, 1);
|
||||
vrndscalebf16(zm1|k4, ptr[rax+128], 1);
|
||||
vrndscalebf16(zm1|k4, ptr_b[rax+128], 1);
|
||||
|
||||
vrsqrtbf16(xm1|k5, xm2);
|
||||
vrsqrtbf16(xm1|k5, ptr[rcx+128]);
|
||||
vrsqrtbf16(xm1|k5, ptr_b[rcx+128]);
|
||||
|
||||
vrsqrtbf16(ym1|k5, ym2);
|
||||
vrsqrtbf16(ym1|k5, ptr[rcx+128]);
|
||||
vrsqrtbf16(ym1|k5, ptr_b[rcx+128]);
|
||||
|
||||
vrsqrtbf16(zm1|k5, zm2);
|
||||
vrsqrtbf16(zm1|k5, ptr[rcx+128]);
|
||||
vrsqrtbf16(zm1|k5, ptr_b[rcx+128]);
|
||||
|
||||
vscalefbf16(xm1|k5, xm5, xm2);
|
||||
vscalefbf16(xm1|k5, xm5, ptr[rcx+128]);
|
||||
vscalefbf16(xm1|k5, xm5, ptr_b[rcx+128]);
|
||||
|
||||
vscalefbf16(ym1|k5, ym9, ym2);
|
||||
vscalefbf16(ym1|k5, ym9, ptr[rcx+128]);
|
||||
vscalefbf16(ym1|k5, ym9, ptr_b[rcx+128]);
|
||||
|
||||
vscalefbf16(zm1|k5, zm30, zm2);
|
||||
vscalefbf16(zm1|k5, zm30, ptr[rcx+128]);
|
||||
vscalefbf16(zm1|k5, zm30, ptr_b[rcx+128]);
|
||||
|
||||
vsqrtbf16(xm5|k3, xmm4);
|
||||
vsqrtbf16(xm5|k3, ptr[rax+128]);
|
||||
vsqrtbf16(xm5|k3, ptr_b[rax+128]);
|
||||
|
||||
vsqrtbf16(ym5|k3, ymm4);
|
||||
vsqrtbf16(ym5|k3, ptr[rax+128]);
|
||||
vsqrtbf16(ym5|k3, ptr_b[rax+128]);
|
||||
|
||||
vsqrtbf16(zm5|k3, zmm4);
|
||||
vsqrtbf16(zm5|k3, ptr[rax+128]);
|
||||
vsqrtbf16(zm5|k3, ptr_b[rax+128]);
|
||||
Reference in New Issue
Block a user