Merge commit '2201a0227297b9251717e44adc32554a51ca0ed6' as 'external/xbyak'

This commit is contained in:
2026-05-12 14:03:16 +02:00
146 changed files with 108693 additions and 0 deletions
+1
View File
@@ -0,0 +1 @@
-Wall -Wextra -Wsuggest-override -Wformat=2 -Wcast-qual -Wwrite-strings -Wfloat-equal -Wpointer-arith
+147
View File
@@ -0,0 +1,147 @@
# To compile with -m32
# apt install g++-multilib
CXX_32 = $(CXX) -m32
CXX_64 = $(CXX) -m64
TARGET = make_nm normalize_prefix bad_address misc cvt_test cvt_test32 noexception misc32 detect_x32 avx10_test
XBYAK_INC=../xbyak/xbyak.h ../xbyak/xbyak_mnemonic.h ../xbyak/xbyak_util.h
UNAME_S=$(shell uname -s)
ifeq ($(shell ./detect_x32),x32)
X32?=1
endif
BIT=32
ifeq ($(shell uname -m),x86_64)
BIT=64
endif
ONLY_64BIT=0
ifeq ($(UNAME_S),Darwin)
# 32-bit binary is not supported
ONLY_64BIT=1
endif
ifeq ($(findstring MINGW64,$(UNAME_S)),MINGW64)
ONLY_64BIT=1
endif
ifeq ($(ONLY_64BIT),0)
TARGET += jmp address
endif
ifeq ($(BIT),64)
TARGET += jmp64 address64 apx
TARGET += sf_test cpumask_test
endif
all: $(TARGET)
CFLAGS_WARN=$(shell cat CFLAGS_WARN.cfg)
CFLAGS=-O2 -Wall -I.. -I. $(CFLAGS_WARN) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) #-std=c++0x
make_nm:
$(CXX) $(CFLAGS) make_nm.cpp -o $@
normalize_prefix: normalize_prefix.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) normalize_prefix.cpp -o $@
test_mmx: test_mmx.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) test_mmx.cpp -o $@ -lpthread
jmp: jmp.cpp $(XBYAK_INC)
$(CXX_32) $(CFLAGS) $< -o $@
jmp64: jmp.cpp $(XBYAK_INC)
$(CXX_64) $(CFLAGS) $< -o $@
address: address.cpp $(XBYAK_INC)
$(CXX_32) $(CFLAGS) $< -o $@
address64: address.cpp $(XBYAK_INC)
$(CXX_64) $(CFLAGS) $< -o $@
bad_address: bad_address.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) $< -o $@
misc: misc.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) $< -o $@
misc32: misc.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) $< -o $@ -DXBYAK32
cvt_test: cvt_test.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) $< -o $@
cvt_test32: cvt_test.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) $< -o $@ -DXBYAK32
noexception: noexception.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) $< -o $@ -fno-exceptions
apx: apx.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) $< -o $@
avx10_test: avx10_test.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) $< -o $@ -DXBYAK64
sf_test: sf_test.cpp $(XBYAK_INC) sf_test_win.h sf_test_gcc.h
$(CXX) $(CFLAGS) $< -o $@ #-DXBYAK64
cpumask_test: cpumask_test.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) $< -o $@
TEST_FILES=old.txt bf16.txt comp.txt misc.txt convert.txt minmax.txt saturation.txt apx.txt amx.txt
xed_test:
@set -e; \
for target in $(addprefix dataset/, $(TEST_FILES)); do \
./test_by_xed.sh $$target || exit 1; \
done
test_nm: normalize_prefix $(TARGET)
$(MAKE) -C ../gen
ifneq ($(ONLY_64BIT),1)
CXX=$(CXX) ./test_nm.sh
CXX=$(CXX) ./test_nm.sh noexcept
CXX=$(CXX) ./test_nm.sh Y
CXX=$(CXX) ./test_nm.sh avx512
CXX=$(CXX) ./test_address.sh
./jmp
./cvt_test32
endif
./bad_address
./misc
./misc32
./cvt_test
ifeq ($(BIT),64)
CXX=$(CXX) ./test_address.sh 64
ifneq ($(X32),1)
CXX=$(CXX) ./test_nm.sh 64
CXX=$(CXX) ./test_nm.sh Y64
endif
./jmp64
./apx
./avx10_test
endif
test_avx: normalize_prefix
ifneq ($(ONLY_64BIT),0)
CXX=$(CXX) ./test_avx.sh
CXX=$(CXX) ./test_avx.sh Y
endif
ifeq ($(BIT),64)
CXX=$(CXX) ./test_avx.sh 64
ifneq ($(X32),1)
CXX=$(CXX) ./test_avx.sh Y64
endif
endif
test_avx512: normalize_prefix
ifneq ($(ONLY_64BIT),0)
CXX=$(CXX) ./test_avx512.sh
endif
ifeq ($(BIT),64)
CXX=$(CXX) ./test_avx512.sh 64
endif
test_avx10: avx10_test
./avx10_test
detect_x32: detect_x32.c
$(CC) $< -o $@
test: detect_x32
$(MAKE) test_nm
$(MAKE) test_avx
$(MAKE) test_avx512
update_sf_test: sf_test.cpp
$(CXX) $(CFLAGS) sf_test.cpp -DXBYAK64_WIN -DDUMP -o sf_test_dump && ./sf_test_dump > sf_test_win.h
$(CXX) $(CFLAGS) sf_test.cpp -DXBYAK64_GCC -DDUMP -o sf_test_dump && ./sf_test_dump > sf_test_gcc.h
clean:
$(RM) a.asm *.lst *.obj *.o $(TARGET) lib_run nm.cpp nm_frame make_512 avx10_test detect_x32 sf_test sf_test_dump
lib_run: lib_test.cpp lib_run.cpp lib.h
$(CXX) $(CFLAGS) lib_run.cpp lib_test.cpp -o lib_run
make_nm: make_nm.cpp $(XBYAK_INC)
.PHONY: test
+14
View File
@@ -0,0 +1,14 @@
OPT=/EHsc -I../xbyak /W4 -D_CRT_SECURE_NO_WARNINGS -I ../
../xbyak/xbyak_mnemonic.h: ../gen/gen_code.exe ../gen/gen_avx512.exe
../gen/gen_code.exe > $@
../gen/gen_avx512.exe >> $@
../gen/gen_code.exe: ../gen/gen_code.cpp #../xbyak/xbyak.h
cl ../gen/gen_code.cpp $(OPT) /Fe:../gen/gen_code.exe
../gen/gen_avx512.exe: ../gen/gen_avx512.cpp #../xbyak/xbyak.h
cl ../gen/gen_avx512.cpp $(OPT) /Fe:../gen/gen_avx512.exe
SUB_HEADER=../xbyak/xbyak_mnemonic.h
all: $(SUB_HEADER)
+9
View File
@@ -0,0 +1,9 @@
@echo off
echo 32bit
rm -rf a.lst b.lst
echo nasm
nasm -l a.lst -f win32 -DWIN32 test.asm
cat a.lst
echo yasm
yasm -l b.lst -f win32 -DWIN32 test.asm
cat b.lst
+155
View File
@@ -0,0 +1,155 @@
#include <stdio.h>
#include <string.h>
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
void genVsibSub(bool isJIT, const char *name, const char *tbl[], size_t tblSize)
{
for (size_t i = 0; i < tblSize; i++) {
if (isJIT) {
printf("%s (ymm7, ptr[", name);
} else {
printf("%s ymm7, [", name);
}
printf("%s", tbl[i]);
if (isJIT) {
printf("], ymm4); dump();\n");
} else {
printf("], ymm4\n");
}
}
}
void genVsib(bool isJIT)
{
if (isJIT) puts("void genVsib() {");
const char *vm32xTbl[] = {
"xmm0",
"xmm0 * 1",
"xmm0 + 4",
"xmm0 + eax",
"xmm0 * 4 + ecx",
"xmm3 * 8 + edi + 123",
"xmm2 * 2 + 5",
"eax + xmm0",
"esp + xmm2",
};
const char *vm32yTbl[] = {
"ymm0",
"ymm0 * 1",
"ymm0 + 4",
"ymm0 + eax",
"ymm0 * 4 + ecx",
"ymm3 * 8 + edi + 123",
"ymm2 * 2 + 5",
"eax + ymm0",
"esp + ymm2",
};
genVsibSub(isJIT, "vgatherdpd", vm32xTbl, NUM_OF_ARRAY(vm32xTbl));
genVsibSub(isJIT, "vgatherqpd", vm32yTbl, NUM_OF_ARRAY(vm32yTbl));
#ifdef XBYAK64
const char *vm32x64Tbl[] = {
"xmm0 + r11",
"r13 + xmm15",
"123 + rsi + xmm2 * 4",
};
genVsibSub(isJIT, "vgatherdpd", vm32x64Tbl, NUM_OF_ARRAY(vm32x64Tbl));
#endif
if (isJIT) puts("}");
}
void genAddress(bool isJIT, const char regTbl[][5], size_t regTblNum)
{
int count = 0;
int funcNum = 1;
if (isJIT) {
puts("void gen0(){");
}
for (size_t i = 0; i < regTblNum + 1; i++) {
const char *base = regTbl[i];
for (size_t j = 0; j < regTblNum + 1; j++) {
if (j == 4) continue; /* esp is not index register */
const char *index = regTbl[j];
static const int scaleTbl[] = { 0, 1, 2, 4, 8 };
for (size_t k = 0; k < NUM_OF_ARRAY(scaleTbl); k++) {
int scale = scaleTbl[k];
static const int dispTbl[] = { 0, 1, 1000, -1, -1000 };
for (size_t m = 0; m < NUM_OF_ARRAY(dispTbl); m++) {
int disp = dispTbl[m];
bool isFirst = true;
if (isJIT) {
printf("mov (ecx, ptr[");
} else {
printf("mov ecx, [");
}
if (i < regTblNum) {
printf("%s", base);
isFirst = false;
}
if (j < regTblNum) {
if (!isFirst) putchar('+');
printf("%s", index);
if (scale) printf("*%d", scale);
isFirst = false;
}
if (isFirst) {
if (isJIT) printf("(void*)");
printf("%d", disp);
} else {
if (disp >= 0) {
putchar('+');
}
printf("%d", disp);
isFirst = false;
}
if (isJIT) {
printf("]); dump();\n");
} else {
printf("]\n");
}
if (isJIT) {
count++;
if ((count % 100) == 0) {
printf("}\n void gen%d(){\n", funcNum++);
}
}
}
}
}
}
if (isJIT) puts("}");
genVsib(isJIT);
if (isJIT) {
printf("void gen(){\n");
for (int i = 0; i < funcNum; i++) {
printf(" gen%d();\n", i);
}
puts("genVsib();");
printf("}\n");
}
}
int main(int argc, char *argv[])
{
argc--, argv++;
bool phase = argc > 0 && strcmp(*argv, "1") == 0;
bool isJIT = (argc > 1);
fprintf(stderr, "phase:%c %s\n", phase ? '1' : '2', isJIT ? "jit" : "asm");
if (phase) {
fprintf(stderr, "32bit reg\n");
static const char reg32Tbl[][5] = {
"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
#ifdef XBYAK64
"r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d",
#endif
};
genAddress(isJIT, reg32Tbl, NUM_OF_ARRAY(reg32Tbl));
} else {
#ifdef XBYAK64
fprintf(stderr, "64bit reg\n");
static const char reg64Tbl[][5] = {
"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
};
genAddress(isJIT, reg64Tbl, NUM_OF_ARRAY(reg64Tbl));
#endif
}
}
+1964
View File
File diff suppressed because it is too large Load Diff
+53
View File
@@ -0,0 +1,53 @@
#include <stdio.h>
#include <string.h>
#include <string>
#include <xbyak/xbyak.h>
#include <xbyak/xbyak_util.h>
#include <cybozu/inttype.hpp>
#include <cybozu/test.hpp>
#include <algorithm>
using namespace Xbyak;
// ymm with sae is not supported from avx10.2 rev 4.0.
CYBOZU_TEST_AUTO(ymm_with_sae)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
CYBOZU_TEST_EXCEPTION(vaddpd(ymm1, ymm2, ymm3 |T_rn_sae), std::exception);
CYBOZU_TEST_EXCEPTION(vcvtph2ibs(xmm1, xmm31 | T_rd_sae), std::exception);
CYBOZU_TEST_EXCEPTION(vcvtph2ibs(ymm1, ymm31 | T_rd_sae), std::exception);
CYBOZU_TEST_EXCEPTION(vcvt2ps2phx(ymm1, ymm2, ymm3 | T_rd_sae), std::exception);
CYBOZU_TEST_EXCEPTION(vminmaxpd(ymm1, ymm2, ymm3 | T_sae, 1), std::exception);
CYBOZU_TEST_EXCEPTION(vminmaxph(ymm1, ymm2, ymm3 | T_sae, 2), std::exception);
CYBOZU_TEST_EXCEPTION(vminmaxps(ymm1, ymm2, ymm3 | T_sae, 3), std::exception);
CYBOZU_TEST_EXCEPTION(vcvtps2ibs(ym1, ym2|T_rd_sae), std::exception);
CYBOZU_TEST_EXCEPTION(vcvtps2ibs(xm1, xm2|T_rd_sae), std::exception);
}
} c;
}
CYBOZU_TEST_AUTO(vmpsadbw)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
setDefaultEncodingAVX10();
vmpsadbw(xm1, xm3, xm15, 3); // vex(avx)
vmpsadbw(ym1, ym3, ptr[rax+128], 3); // vex(avx2)
setDefaultEncodingAVX10(AVX10v2Encoding);
vmpsadbw(ym1, ym3, ym15, 3); // evex(avx10.2)
vmpsadbw(ym1, ym3, ptr[rax+128], 3); // evex(avx10.2)
}
} c;
const uint8_t tbl[] = {
0xc4, 0xc3, 0x61, 0x42, 0xcf, 0x03,
0xc4, 0xe3, 0x65, 0x42, 0x88, 0x80, 0x00, 0x00, 0x00, 0x03,
0x62, 0xd3, 0x66, 0x28, 0x42, 0xcf, 0x03,
0x62, 0xf3, 0x66, 0x28, 0x42, 0x48, 0x04, 0x03,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
+28
View File
@@ -0,0 +1,28 @@
#include <xbyak/xbyak.h>
#include <cybozu/test.hpp>
struct Code : Xbyak::CodeGenerator {
Code()
{
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [esp + esp]), std::exception);
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [ax]), std::exception); // not support
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [esp * 4]), std::exception);
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax * 16]), std::exception);
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax + eax + eax]), std::exception);
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4]), std::exception);
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4]), std::exception);
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [xmm0]), std::exception);
CYBOZU_TEST_EXCEPTION(fld(dword [xmm0]), std::exception);
CYBOZU_TEST_EXCEPTION(vgatherdpd(xmm0, ptr [eax * 2], ymm3), std::exception);
CYBOZU_TEST_EXCEPTION(vgatherdpd(xmm0, ptr [xmm0 + xmm1], ymm3), std::exception);
#ifdef XBYAK64
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [rax + eax]), std::exception);
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [xmm0 + ymm0]), std::exception);
#endif
}
};
CYBOZU_TEST_AUTO(exception)
{
Code c;
}
+124
View File
@@ -0,0 +1,124 @@
//#define XBYAK_CPUMASK_COMPACT 0
#define XBYAK_NO_EXCEPTION
#define XBYAK_CPUMASK_N 8
#define XBYAK_CPUMASK_BITN 3
#include <xbyak/xbyak_util.h>
#include <cybozu/test.hpp>
using namespace Xbyak::util;
CYBOZU_TEST_AUTO(append)
{
CpuMask m;
CYBOZU_TEST_ASSERT(m.empty());
CYBOZU_TEST_EQUAL(m.size(), 0);
CYBOZU_TEST_EQUAL(m.getStr(), "");
CYBOZU_TEST_ASSERT(m.append(2));
CYBOZU_TEST_ASSERT(!m.empty());
CYBOZU_TEST_EQUAL(m.size(), 1u);
CYBOZU_TEST_EQUAL(m.get(0), 2u);
CYBOZU_TEST_EQUAL(m.getStr(), "2");
CYBOZU_TEST_ASSERT(m.append(4));
CYBOZU_TEST_EQUAL(m.size(), 2u);
CYBOZU_TEST_EQUAL(m.get(0), 2u);
CYBOZU_TEST_EQUAL(m.get(1), 4u);
CYBOZU_TEST_EQUAL(m.getStr(), "2,4");
CYBOZU_TEST_ASSERT(!m.append(3)); // not monotonically increasing
CYBOZU_TEST_ASSERT(m.append(7));
CYBOZU_TEST_EQUAL(m.size(), 3u);
CYBOZU_TEST_EQUAL(m.get(0), 2u);
CYBOZU_TEST_EQUAL(m.get(1), 4u);
CYBOZU_TEST_EQUAL(m.get(2), 7u);
CYBOZU_TEST_EQUAL(m.getStr(), "2,4,7");
m.clear();
CYBOZU_TEST_ASSERT(m.append(1));
CYBOZU_TEST_ASSERT(m.append(2));
CYBOZU_TEST_ASSERT(m.append(3));
CYBOZU_TEST_EQUAL(m.getStr(), "1-3");
CYBOZU_TEST_ASSERT(m.append(5));
CYBOZU_TEST_ASSERT(m.append(6));
CYBOZU_TEST_ASSERT(m.append(7));
CYBOZU_TEST_EQUAL(m.getStr(), "1-3,5-7");
m.clear();
CYBOZU_TEST_ASSERT(m.appendRange(1, 3));
CYBOZU_TEST_EQUAL(m.getStr(), "1-3");
CYBOZU_TEST_ASSERT(m.appendRange(5, 7));
CYBOZU_TEST_EQUAL(m.getStr(), "1-3,5-7");
}
CYBOZU_TEST_AUTO(str)
{
const char *s = "0-1,3,4-7";
CpuMask m, m2;
CYBOZU_TEST_ASSERT(m.setStr(s));
CYBOZU_TEST_ASSERT(m2.setStr(m.getStr()));
CYBOZU_TEST_EQUAL(m.getStr(), m2.getStr());
CYBOZU_TEST_ASSERT(m == m2);
}
CYBOZU_TEST_AUTO(errStr)
{
const char *s[] = {
",",
",,",
"1,",
"1,,",
"-8",
"3-",
"0-8",
"0--2",
"2-0",
"2,1",
"0-1-2",
"0,a",
"0-2,",
",0-2",
"0-2,,4",
};
for (size_t i = 0; i < sizeof(s) / sizeof(s[0]); i++) {
CpuMask m;
// printf("errStr test %s\n", s[i]);
CYBOZU_TEST_ASSERT(!m.setStr(s[i]));
}
}
CYBOZU_TEST_AUTO(pattern)
{
const uint32_t bitN = XBYAK_CPUMASK_BITN;
const uint32_t bit = 1 << bitN;
for (uint32_t i = 0; i < (1 << bit); i++) {
CpuMask m;
uint32_t cnt = 0;
for (uint32_t j = 0; j < bit; j++) {
if (i & (1 << j)) {
cnt++;
CYBOZU_TEST_ASSERT(m.append(j));
}
}
CYBOZU_TEST_EQUAL(m.size(), cnt);
#if 0
printf("pattern (%3u) ", i);
for (int j = int(bit) - 1; j >= 0; j--) {
if (i & (uint64_t(1) << j)) printf("%d ", j);
}
printf("\n");
#endif
uint32_t idx = 0;
for (const auto& v : m) {
CYBOZU_TEST_ASSERT(i & (1 << v));
idx++;
}
CYBOZU_TEST_EQUAL(idx, cnt);
CpuMask m2;
std::string mstr = m.getStr();
CYBOZU_TEST_ASSERT(m2.setStr(mstr));
CYBOZU_TEST_ASSERT(m == m2);
CYBOZU_TEST_EQUAL(mstr, m2.getStr());
}
}
+191
View File
@@ -0,0 +1,191 @@
#define XBYAK_NO_OP_NAMES
#include <xbyak/xbyak.h>
#include <cybozu/inttype.hpp>
#include <cybozu/test.hpp>
using namespace Xbyak;
using namespace Xbyak::util;
#ifdef XBYAK64
const struct Ptn {
const Reg8 *reg8;
Reg16 reg16;
Reg32 reg32;
Reg64 reg64;
Xmm x;
Ymm y;
Zmm z;
} tbl[] = {
{ &al, ax, eax, rax, xmm0, ymm0, zmm0 },
{ &bl, bx, ebx, rbx, xmm3, ymm3, zmm3 },
{ &cl, cx, ecx, rcx, xmm1, ymm1, zmm1 },
{ &dl, dx, edx, rdx, xmm2, ymm2, zmm2 },
{ &sil, si, esi, rsi, xmm6, ymm6, zmm6 },
{ &dil, di, edi, rdi, xmm7, ymm7, zmm7 },
{ &bpl, bp, ebp, rbp, xmm5, ymm5, zmm5 },
{ &spl, sp, esp, rsp, xmm4, ymm4, zmm4 },
{ &r8b, r8w, r8d, r8, xmm8, ymm8, zmm8 },
{ &r9b, r9w, r9d, r9, xmm9, ymm9, zmm9 },
{ &r10b, r10w, r10d, r10, xmm10, ymm10, zmm10 },
{ &r11b, r11w, r11d, r11, xmm11, ymm11, zmm11 },
{ &r12b, r12w, r12d, r12, xmm12, ymm12, zmm12 },
{ &r13b, r13w, r13d, r13, xmm13, ymm13, zmm13 },
{ &r14b, r14w, r14d, r14, xmm14, ymm14, zmm14 },
{ &r15b, r15w, r15d, r15, xmm15, ymm15, zmm15 },
{ &r31b, r31w, r31d, r31, xmm31, ymm31, zmm31 },
};
#else
const struct Ptn {
const Reg8 *reg8;
Reg16 reg16;
Reg32 reg32;
Xmm x;
Ymm y;
Zmm z;
} tbl[] = {
{ &al, ax, eax, xmm0, ymm0, zmm0 },
{ &bl, bx, ebx, xmm3, ymm3, zmm3 },
{ &cl, cx, ecx, xmm1, ymm1, zmm1 },
{ &dl, dx, edx, xmm2, ymm2, zmm2 },
{ 0, si, esi, xmm6, ymm6, zmm6 },
{ 0, di, edi, xmm7, ymm7, zmm7 },
{ 0, bp, ebp, xmm5, ymm5, zmm5 },
{ 0, sp, esp, xmm4, ymm4, zmm4 },
};
#endif
CYBOZU_TEST_AUTO(cvt)
{
for (size_t i = 0; i < sizeof(tbl) / sizeof(tbl[0]); i++) {
if (tbl[i].reg8) {
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt8() == *tbl[i].reg8);
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt16() == tbl[i].reg16);
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt32() == tbl[i].reg32);
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt128() == tbl[i].x);
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt256() == tbl[i].y);
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt512() == tbl[i].z);
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt8() == *tbl[i].reg8);
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt8() == *tbl[i].reg8);
CYBOZU_TEST_ASSERT(tbl[i].x.cvt8() == *tbl[i].reg8);
CYBOZU_TEST_ASSERT(tbl[i].y.cvt8() == *tbl[i].reg8);
CYBOZU_TEST_ASSERT(tbl[i].z.cvt8() == *tbl[i].reg8);
}
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt16() == tbl[i].reg16);
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt32() == tbl[i].reg32);
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt128() == tbl[i].x);
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt256() == tbl[i].y);
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt512() == tbl[i].z);
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt16() == tbl[i].reg16);
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt32() == tbl[i].reg32);
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt128() == tbl[i].x);
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt256() == tbl[i].y);
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt512() == tbl[i].z);
CYBOZU_TEST_ASSERT(tbl[i].x.cvt16() == tbl[i].reg16);
CYBOZU_TEST_ASSERT(tbl[i].x.cvt32() == tbl[i].reg32);
CYBOZU_TEST_ASSERT(tbl[i].x.cvt128() == tbl[i].x);
CYBOZU_TEST_ASSERT(tbl[i].x.cvt256() == tbl[i].y);
CYBOZU_TEST_ASSERT(tbl[i].x.cvt512() == tbl[i].z);
CYBOZU_TEST_ASSERT(tbl[i].y.cvt16() == tbl[i].reg16);
CYBOZU_TEST_ASSERT(tbl[i].y.cvt32() == tbl[i].reg32);
CYBOZU_TEST_ASSERT(tbl[i].y.cvt128() == tbl[i].x);
CYBOZU_TEST_ASSERT(tbl[i].y.cvt256() == tbl[i].y);
CYBOZU_TEST_ASSERT(tbl[i].y.cvt512() == tbl[i].z);
CYBOZU_TEST_ASSERT(tbl[i].z.cvt16() == tbl[i].reg16);
CYBOZU_TEST_ASSERT(tbl[i].z.cvt32() == tbl[i].reg32);
CYBOZU_TEST_ASSERT(tbl[i].z.cvt128() == tbl[i].x);
CYBOZU_TEST_ASSERT(tbl[i].z.cvt256() == tbl[i].y);
CYBOZU_TEST_ASSERT(tbl[i].y.cvt512() == tbl[i].z);
#ifdef XBYAK64
if (tbl[i].reg8) {
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt8() == *tbl[i].reg8);
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt64() == tbl[i].reg64);
}
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt16() == tbl[i].reg16);
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt32() == tbl[i].reg32);
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt64() == tbl[i].reg64);
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt128() == tbl[i].x);
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt256() == tbl[i].y);
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt512() == tbl[i].z);
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt64() == tbl[i].reg64);
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt64() == tbl[i].reg64);
CYBOZU_TEST_ASSERT(tbl[i].x.cvt64() == tbl[i].reg64);
CYBOZU_TEST_ASSERT(tbl[i].y.cvt64() == tbl[i].reg64);
CYBOZU_TEST_ASSERT(tbl[i].z.cvt64() == tbl[i].reg64);
#endif
}
{
const Reg8 errTbl[] = {
ah, bh, ch, dh
};
for (size_t i = 0; i < sizeof(errTbl) / sizeof(errTbl[0]); i++) {
CYBOZU_TEST_EXCEPTION(errTbl[i].cvt16(), std::exception);
}
}
#ifdef XBYAK32
{
const Reg16 errTbl[] = {
si, di, bp, sp
};
for (size_t i = 0; i < sizeof(errTbl) / sizeof(errTbl[0]); i++) {
CYBOZU_TEST_EXCEPTION(errTbl[i].cvt8(), std::exception);
}
}
#endif
}
CYBOZU_TEST_AUTO(changeBit)
{
using namespace Xbyak::util;
#ifdef XBYAK64
const size_t N = 7;
const Reg* tbl[][N] = {
{ &al, &ax, &eax, &rax, &xmm0, &ymm0, &zmm0 },
{ &cl, &cx, &ecx, &rcx, &xmm1, &ymm1, &zmm1 },
{ &dl, &dx, &edx, &rdx, &xmm2, &ymm2, &zmm2 },
{ &bl, &bx, &ebx, &rbx, &xmm3, &ymm3, &zmm3 },
{ &spl, &sp, &esp, &rsp, &xmm4, &ymm4, &zmm4 },
{ &bpl, &bp, &ebp, &rbp, &xmm5, &ymm5, &zmm5 },
{ &sil, &si, &esi, &rsi, &xmm6, &ymm6, &zmm6 },
{ &dil, &di, &edi, &rdi, &xmm7, &ymm7, &zmm7 },
{ &r8b, &r8w, &r8d, &r8, &xmm8, &ymm8, &zmm8 },
{ &r15b, &r15w, &r15d, &r15, &xmm15, &ymm15, &zmm15 },
{ &r16b, &r16w, &r16d, &r16, &xmm16, &ymm16, &zmm16 },
{ &r31b, &r31w, &r31d, &r31, &xmm31, &ymm31, &zmm31 },
};
const int bitTbl[N] = { 8, 16, 32, 64, 128, 256, 512 };
#else
const size_t N = 6;
const Reg* tbl[][N] = {
{ &al, &ax, &eax, &xmm0, &ymm0, &zmm0 },
{ &cl, &cx, &ecx, &xmm1, &ymm1, &zmm1 },
{ &dl, &dx, &edx, &xmm2, &ymm2, &zmm2 },
{ &bl, &bx, &ebx, &xmm3, &ymm3, &zmm3 },
{ 0, &sp, &esp, &xmm4, &ymm4, &zmm4 },
{ 0, &bp, &ebp, &xmm5, &ymm5, &zmm5 },
{ 0, &si, &esi, &xmm6, &ymm6, &zmm6 },
{ 0, &di, &edi, &xmm7, &ymm7, &zmm7 },
};
const int bitTbl[N] = { 8, 16, 32, 128, 256, 512 };
#endif
for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
for (size_t j = 0; j < N; j++) {
const Reg *r1 = tbl[i][j];
if (r1 == 0) continue;
for (size_t k = 0; k < N; k++) {
if (tbl[i][k]) {
CYBOZU_TEST_ASSERT(*tbl[i][k] == r1->changeBit(bitTbl[k]));
// printf("%s->changeBit(%d)=%s %s\n", r1->toString(), bitTbl[k], r1->changeBit(bitTbl[k]).toString(), tbl[i][k]->toString());
} else {
CYBOZU_TEST_EXCEPTION(r1->changeBit(bitTbl[k]), std::exception);
}
}
}
}
#ifdef XBYAK64
const Reg8 *special8bitTbl[] = { &ah, &bh, &ch, &dh };
for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(special8bitTbl); i++) {
CYBOZU_TEST_EXCEPTION(special8bitTbl[i]->changeBit(16), std::exception);
}
#endif
}
+163
View File
@@ -0,0 +1,163 @@
#pragma once
/**
@file
@brief int type definition and macros
@author MITSUNARI Shigeo(@herumi)
*/
#if defined(_MSC_VER) && (MSC_VER <= 1500) && !defined(CYBOZU_DEFINED_INTXX)
#define CYBOZU_DEFINED_INTXX
typedef __int64 int64_t;
typedef unsigned __int64 uint64_t;
typedef unsigned int uint32_t;
typedef int int32_t;
typedef unsigned short uint16_t;
typedef short int16_t;
typedef unsigned char uint8_t;
typedef signed char int8_t;
#else
#include <stdint.h>
#endif
#ifdef _MSC_VER
#ifndef CYBOZU_DEFINED_SSIZE_T
#define CYBOZU_DEFINED_SSIZE_T
#ifdef _WIN64
typedef int64_t ssize_t;
#else
typedef int32_t ssize_t;
#endif
#endif
#else
#include <unistd.h> // for ssize_t
#endif
#ifndef CYBOZU_ALIGN
#ifdef _MSC_VER
#define CYBOZU_ALIGN(x) __declspec(align(x))
#else
#define CYBOZU_ALIGN(x) __attribute__((aligned(x)))
#endif
#endif
#ifndef CYBOZU_FORCE_INLINE
#ifdef _MSC_VER
#define CYBOZU_FORCE_INLINE __forceinline
#else
#define CYBOZU_FORCE_INLINE __attribute__((always_inline))
#endif
#endif
#ifndef CYBOZU_UNUSED
#ifdef __GNUC__
#define CYBOZU_UNUSED __attribute__((unused))
#else
#define CYBOZU_UNUSED
#endif
#endif
#ifndef CYBOZU_ALLOCA
#ifdef _MSC_VER
#include <malloc.h>
#define CYBOZU_ALLOCA(x) _malloca(x)
#else
#define CYBOZU_ALLOCA(x) __builtin_alloca(x)
#endif
#endif
#ifndef CYBOZU_NUM_OF_ARRAY
#define CYBOZU_NUM_OF_ARRAY(x) (sizeof(x) / sizeof(*x))
#endif
#ifndef CYBOZU_SNPRINTF
#if defined(_MSC_VER) && (_MSC_VER < 1900)
#define CYBOZU_SNPRINTF(x, len, ...) (void)_snprintf_s(x, len, len - 1, __VA_ARGS__)
#else
#define CYBOZU_SNPRINTF(x, len, ...) (void)snprintf(x, len, __VA_ARGS__)
#endif
#endif
#define CYBOZU_CPP_VERSION_CPP03 0
#define CYBOZU_CPP_VERSION_TR1 1
#define CYBOZU_CPP_VERSION_CPP11 2
#define CYBOZU_CPP_VERSION_CPP14 3
#define CYBOZU_CPP_VERSION_CPP17 4
#ifdef __GNUC__
#define CYBOZU_GNUC_PREREQ(major, minor) ((__GNUC__) * 100 + (__GNUC_MINOR__) >= (major) * 100 + (minor))
#else
#define CYBOZU_GNUC_PREREQ(major, minor) 0
#endif
#if (__cplusplus >= 201703)
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP17
#elif (__cplusplus >= 201402)
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP14
#elif (__cplusplus >= 201103) || (_MSC_VER >= 1500) || defined(__GXX_EXPERIMENTAL_CXX0X__)
#if defined(_MSC_VER) && (_MSC_VER <= 1600)
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_TR1
#else
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP11
#endif
#elif CYBOZU_GNUC_PREREQ(4, 5) || (CYBOZU_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || (__clang_major__ >= 3)
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_TR1
#else
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP03
#endif
#ifdef CYBOZU_USE_BOOST
#define CYBOZU_NAMESPACE_STD boost
#define CYBOZU_NAMESPACE_TR1_BEGIN
#define CYBOZU_NAMESPACE_TR1_END
#elif (CYBOZU_CPP_VERSION == CYBOZU_CPP_VERSION_TR1) && !defined(__APPLE__)
#define CYBOZU_NAMESPACE_STD std::tr1
#define CYBOZU_NAMESPACE_TR1_BEGIN namespace tr1 {
#define CYBOZU_NAMESPACE_TR1_END }
#else
#define CYBOZU_NAMESPACE_STD std
#define CYBOZU_NAMESPACE_TR1_BEGIN
#define CYBOZU_NAMESPACE_TR1_END
#endif
#ifndef CYBOZU_OS_BIT
#if defined(_WIN64) || defined(__x86_64__) || defined(__AARCH64EL__) || defined(__EMSCRIPTEN__)
#define CYBOZU_OS_BIT 64
#else
#define CYBOZU_OS_BIT 32
#endif
#endif
#ifndef CYBOZU_HOST
#define CYBOZU_HOST_UNKNOWN 0
#define CYBOZU_HOST_INTEL 1
#define CYBOZU_HOST_ARM 2
#if defined(_M_IX86) || defined(_M_AMD64) || defined(__x86_64__) || defined(__i386__)
#define CYBOZU_HOST CYBOZU_HOST_INTEL
#elif defined(__arm__) || defined(__AARCH64EL__)
#define CYBOZU_HOST CYBOZU_HOST_ARM
#else
#define CYBOZU_HOST CYBOZU_HOST_UNKNOWN
#endif
#endif
#ifndef CYBOZU_ENDIAN
#define CYBOZU_ENDIAN_UNKNOWN 0
#define CYBOZU_ENDIAN_LITTLE 1
#define CYBOZU_ENDIAN_BIG 2
#if (CYBOZU_HOST == CYBOZU_HOST_INTEL)
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_LITTLE
#elif (CYBOZU_HOST == CYBOZU_HOST_ARM) && (defined(__ARM_EABI__) || defined(__AARCH64EL__))
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_LITTLE
#else
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_UNKNOWN
#endif
#endif
#if CYBOZU_CPP_VERSION >= CYBOZU_CPP_VERSION_CPP11
#define CYBOZU_NOEXCEPT noexcept
#else
#define CYBOZU_NOEXCEPT throw()
#endif
namespace cybozu {
template<class T>
void disable_warning_unused_variable(const T&) { }
template<class T, class S>
T cast(const S* ptr) { return static_cast<T>(static_cast<const void*>(ptr)); }
template<class T, class S>
T cast(S* ptr) { return static_cast<T>(static_cast<void*>(ptr)); }
} // cybozu
+373
View File
@@ -0,0 +1,373 @@
#pragma once
/**
@file
@brief unit test class
@author MITSUNARI Shigeo(@herumi)
*/
#include <stdio.h>
#include <string.h>
#include <string>
#include <list>
#include <iostream>
#include <utility>
#if defined(_MSC_VER) && (MSC_VER <= 1500)
#include <cybozu/inttype.hpp>
#else
#include <stdint.h>
#endif
namespace cybozu { namespace test {
class AutoRun {
typedef void (*Func)();
typedef std::list<std::pair<const char*, Func> > UnitTestList;
public:
AutoRun()
: init_(0)
, term_(0)
, okCount_(0)
, ngCount_(0)
, exceptionCount_(0)
{
}
void setup(Func init, Func term)
{
init_ = init;
term_ = term;
}
void append(const char *name, Func func)
{
list_.push_back(std::make_pair(name, func));
}
void set(bool isOK)
{
if (isOK) {
okCount_++;
} else {
ngCount_++;
}
}
std::string getBaseName(const std::string& name) const
{
#ifdef _WIN32
const char sep = '\\';
#else
const char sep = '/';
#endif
size_t pos = name.find_last_of(sep);
std::string ret = name.substr(pos + 1);
pos = ret.find('.');
return ret.substr(0, pos);
}
int run(int, char *argv[])
{
std::string msg;
try {
if (init_) init_();
for (UnitTestList::const_iterator i = list_.begin(), ie = list_.end(); i != ie; ++i) {
std::cout << "ctest:module=" << i->first << std::endl;
try {
(i->second)();
} catch (std::exception& e) {
exceptionCount_++;
std::cout << "ctest: " << i->first << " is stopped by exception " << e.what() << std::endl;
} catch (...) {
exceptionCount_++;
std::cout << "ctest: " << i->first << " is stopped by unknown exception" << std::endl;
}
}
if (term_) term_();
} catch (std::exception& e) {
msg = std::string("ctest:err:") + e.what();
} catch (...) {
msg = "ctest:err: catch unknown exception";
}
fflush(stdout);
if (msg.empty()) {
int err = ngCount_ + exceptionCount_;
int total = okCount_ + err;
std::cout << "ctest:name=" << getBaseName(*argv)
<< ", module=" << list_.size()
<< ", total=" << total
<< ", ok=" << okCount_
<< ", ng=" << ngCount_
<< ", exception=" << exceptionCount_ << std::endl;
return err > 0 ? 1 : 0;
} else {
std::cout << msg << std::endl;
return 1;
}
}
static inline AutoRun& getInstance()
{
static AutoRun instance;
return instance;
}
private:
Func init_;
Func term_;
int okCount_;
int ngCount_;
int exceptionCount_;
UnitTestList list_;
};
static AutoRun& autoRun = AutoRun::getInstance();
inline void test(bool ret, const std::string& msg, const std::string& param, const char *file, int line)
{
autoRun.set(ret);
if (!ret) {
printf("%s(%d):ctest:%s(%s);\n", file, line, msg.c_str(), param.c_str());
}
}
template<typename T, typename U>
bool isEqual(const T& lhs, const U& rhs)
{
return lhs == rhs;
}
// avoid warning of comparision of integers of different signs
inline bool isEqual(size_t lhs, int rhs)
{
return lhs == size_t(rhs);
}
inline bool isEqual(int lhs, size_t rhs)
{
return size_t(lhs) == rhs;
}
inline bool isEqual(const char *lhs, const char *rhs)
{
return strcmp(lhs, rhs) == 0;
}
inline bool isEqual(char *lhs, const char *rhs)
{
return strcmp(lhs, rhs) == 0;
}
inline bool isEqual(const char *lhs, char *rhs)
{
return strcmp(lhs, rhs) == 0;
}
inline bool isEqual(char *lhs, char *rhs)
{
return strcmp(lhs, rhs) == 0;
}
// avoid to compare float directly
inline bool isEqual(float lhs, float rhs)
{
union fi {
float f;
uint32_t i;
} lfi, rfi;
lfi.f = lhs;
rfi.f = rhs;
return lfi.i == rfi.i;
}
// avoid to compare double directly
inline bool isEqual(double lhs, double rhs)
{
union di {
double d;
uint64_t i;
} ldi, rdi;
ldi.d = lhs;
rdi.d = rhs;
return ldi.i == rdi.i;
}
} } // cybozu::test
#ifndef CYBOZU_TEST_DISABLE_AUTO_RUN
int main(int argc, char *argv[])
{
return cybozu::test::autoRun.run(argc, argv);
}
#endif
/**
alert if !x
@param x [in]
*/
#define CYBOZU_TEST_ASSERT(x) cybozu::test::test(!!(x), "CYBOZU_TEST_ASSERT", #x, __FILE__, __LINE__)
/**
alert if x != y
@param x [in]
@param y [in]
*/
#define CYBOZU_TEST_EQUAL(x, y) { \
bool _cybozu_eq = cybozu::test::isEqual(x, y); \
cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL", #x ", " #y, __FILE__, __LINE__); \
if (!_cybozu_eq) { \
std::cout << "ctest: lhs=" << (x) << std::endl; \
std::cout << "ctest: rhs=" << (y) << std::endl; \
} \
}
/**
alert if fabs(x, y) >= eps
@param x [in]
@param y [in]
*/
#define CYBOZU_TEST_NEAR(x, y, eps) { \
bool _cybozu_isNear = fabs((x) - (y)) < eps; \
cybozu::test::test(_cybozu_isNear, "CYBOZU_TEST_NEAR", #x ", " #y, __FILE__, __LINE__); \
if (!_cybozu_isNear) { \
std::cout << "ctest: lhs=" << (x) << std::endl; \
std::cout << "ctest: rhs=" << (y) << std::endl; \
} \
}
#define CYBOZU_TEST_EQUAL_POINTER(x, y) { \
bool _cybozu_eq = x == y; \
cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL_POINTER", #x ", " #y, __FILE__, __LINE__); \
if (!_cybozu_eq) { \
std::cout << "ctest: lhs=" << static_cast<const void*>(x) << std::endl; \
std::cout << "ctest: rhs=" << static_cast<const void*>(y) << std::endl; \
} \
}
/**
alert if x[] != y[]
@param x [in]
@param y [in]
@param n [in]
*/
#define CYBOZU_TEST_EQUAL_ARRAY(x, y, n) { \
for (size_t _cybozu_test_i = 0, _cybozu_ie = (size_t)(n); _cybozu_test_i < _cybozu_ie; _cybozu_test_i++) { \
bool _cybozu_eq = cybozu::test::isEqual((x)[_cybozu_test_i], (y)[_cybozu_test_i]); \
cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL_ARRAY", #x ", " #y ", " #n, __FILE__, __LINE__); \
if (!_cybozu_eq) { \
std::cout << "ctest: i=" << _cybozu_test_i << std::endl; \
std::cout << "ctest: lhs=" << (x)[_cybozu_test_i] << std::endl; \
std::cout << "ctest: rhs=" << (y)[_cybozu_test_i] << std::endl; \
} \
} \
}
/**
always alert
@param msg [in]
*/
#define CYBOZU_TEST_FAIL(msg) cybozu::test::test(false, "CYBOZU_TEST_FAIL", msg, __FILE__, __LINE__)
/**
verify message in exception
*/
#define CYBOZU_TEST_EXCEPTION_MESSAGE(statement, Exception, msg) \
{ \
int _cybozu_ret = 0; \
std::string _cybozu_errMsg; \
try { \
statement; \
_cybozu_ret = 1; \
} catch (const Exception& _cybozu_e) { \
_cybozu_errMsg = _cybozu_e.what(); \
if (_cybozu_errMsg.find(msg) == std::string::npos) { \
_cybozu_ret = 2; \
} \
} catch (...) { \
_cybozu_ret = 3; \
} \
if (_cybozu_ret) { \
cybozu::test::test(false, "CYBOZU_TEST_EXCEPTION_MESSAGE", #statement ", " #Exception ", " #msg, __FILE__, __LINE__); \
if (_cybozu_ret == 1) { \
std::cout << "ctest: no exception" << std::endl; \
} else if (_cybozu_ret == 2) { \
std::cout << "ctest: bad exception msg:" << _cybozu_errMsg << std::endl; \
} else { \
std::cout << "ctest: unexpected exception" << std::endl; \
} \
} else { \
cybozu::test::autoRun.set(true); \
} \
}
#define CYBOZU_TEST_EXCEPTION(statement, Exception) \
{ \
int _cybozu_ret = 0; \
try { \
statement; \
_cybozu_ret = 1; \
} catch (const Exception&) { \
} catch (...) { \
_cybozu_ret = 2; \
} \
if (_cybozu_ret) { \
cybozu::test::test(false, "CYBOZU_TEST_EXCEPTION", #statement ", " #Exception, __FILE__, __LINE__); \
if (_cybozu_ret == 1) { \
std::cout << "ctest: no exception" << std::endl; \
} else { \
std::cout << "ctest: unexpected exception" << std::endl; \
} \
} else { \
cybozu::test::autoRun.set(true); \
} \
}
/**
verify statement does not throw
*/
#define CYBOZU_TEST_NO_EXCEPTION(statement) \
try { \
statement; \
cybozu::test::autoRun.set(true); \
} catch (...) { \
cybozu::test::test(false, "CYBOZU_TEST_NO_EXCEPTION", #statement, __FILE__, __LINE__); \
}
/**
append auto unit test
@param name [in] module name
*/
#define CYBOZU_TEST_AUTO(name) \
void cybozu_test_ ## name(); \
struct cybozu_test_local_ ## name { \
cybozu_test_local_ ## name() \
{ \
cybozu::test::autoRun.append(#name, cybozu_test_ ## name); \
} \
} cybozu_test_local_instance_ ## name; \
void cybozu_test_ ## name()
/**
append auto unit test with fixture
@param name [in] module name
*/
#define CYBOZU_TEST_AUTO_WITH_FIXTURE(name, Fixture) \
void cybozu_test_ ## name(); \
void cybozu_test_real_ ## name() \
{ \
Fixture f; \
cybozu_test_ ## name(); \
} \
struct cybozu_test_local_ ## name { \
cybozu_test_local_ ## name() \
{ \
cybozu::test::autoRun.append(#name, cybozu_test_real_ ## name); \
} \
} cybozu_test_local_instance_ ## name; \
void cybozu_test_ ## name()
/**
setup fixture
@param Fixture [in] class name of fixture
@note cstr of Fixture is called before test and dstr of Fixture is called after test
*/
#define CYBOZU_TEST_SETUP_FIXTURE(Fixture) \
Fixture *cybozu_test_local_fixture; \
void cybozu_test_local_init() \
{ \
cybozu_test_local_fixture = new Fixture(); \
} \
void cybozu_test_local_term() \
{ \
delete cybozu_test_local_fixture; \
} \
struct cybozu_test_local_fixture_setup_ { \
cybozu_test_local_fixture_setup_() \
{ \
cybozu::test::autoRun.setup(cybozu_test_local_init, cybozu_test_local_term); \
} \
} cybozu_test_local_fixture_setup_instance_;
+94
View File
@@ -0,0 +1,94 @@
ldtilecfg(ptr[rax + rcx * 4 + 64]);
ldtilecfg(ptr [r30+r29*4+0x12]);
ldtilecfg(ptr [rax]);
sttilecfg(ptr[rsp + rax * 8 + 64]);
sttilecfg(ptr [r30+r29*4+0x12]);
sttilecfg(ptr [r30]);
tileloadd(tmm3, ptr[rdi + rdx * 2 + 8]);
tileloadd(tmm2, ptr [r30+r29*4+0x12]);
tileloaddt1(tmm4, ptr[r8 + r9 + 32]);
tileloaddt1(tmm7, ptr [r30+r29*4+0x12]);
tilerelease();
tilestored(ptr[r10 + r11 * 2 + 32], tmm2);
tilestored(ptr [r30+r29*4+0x12], tmm1);
tilezero(tmm7);
tdpbssd(tmm1, tmm2, tmm3);
tdpbsud(tmm2, tmm3, tmm4);
tdpbusd(tmm3, tmm4, tmm5);
tdpbuud(tmm4, tmm5, tmm6);
tdpfp16ps(tmm5, tmm6, tmm7);
tdpbf16ps(tmm5, tmm6, tmm7);
tileloadd(tmm1, ptr[r8+r8]);
tileloadd(tmm1, ptr[rax+rcx*4]);
tileloadd(tmm1, ptr[r8+r9*1+0x40]);
tileloadd(tmm1, ptr[r30+r29*1+0x80]);
tileloaddrs(tmm3, ptr[rdi + rdx * 2 + 8]);
tileloaddrs(tmm7, ptr[r31 + rdx * 2 + 8]);
tileloaddrst1(tmm4, ptr[r8 + r9 + 32]);
tileloaddrst1(tmm4, ptr[r25 + r9 + 32]);
tdpbf8ps(tmm1, tmm2, tmm3);
tdpbhf8ps(tmm1, tmm2, tmm3);
tdphbf8ps(tmm1, tmm2, tmm3);
tdphf8ps(tmm1, tmm2, tmm3);
tmmultf32ps(tmm1, tmm2, tmm3);
//t2rpntlvwz0(tmm1, ptr[rax+r8*2+0x80]);
//t2rpntlvwz0(tmm7, ptr[r30+r8*2+0x80]);
//t2rpntlvwz0t1(tmm1, ptr[rax+r8*2+0x80]);
//t2rpntlvwz0t1(tmm7, ptr[r30+r8*2+0x80]);
//t2rpntlvwz1(tmm1, ptr[rax+r8*2+0x80]);
//t2rpntlvwz1(tmm7, ptr[r30+r8*2+0x80]);
//t2rpntlvwz1t1(tmm1, ptr[rax+r8*2+0x80]);
//t2rpntlvwz1t1(tmm7, ptr[r30+r8*2+0x80]);
//t2rpntlvwz0rs(tmm1, ptr[rax+r8*2+0x80]);
//t2rpntlvwz0rs(tmm7, ptr[r30+r8*2+0x80]);
//t2rpntlvwz0rst1(tmm1, ptr[rax+r8*2+0x80]);
//t2rpntlvwz0rst1(tmm7, ptr[r30+r8*2+0x80]);
//t2rpntlvwz1rs(tmm1, ptr[rax+r8*2+0x80]);
//t2rpntlvwz1rs(tmm7, ptr[r30+r8*2+0x80]);
//t2rpntlvwz1rst1(tmm1, ptr[rax+r8*2+0x80]);
//t2rpntlvwz1rst1(tmm7, ptr[r30+r8*2+0x80]);
tcmmimfp16ps(tmm1, tmm2, tmm3);
tcmmrlfp16ps(tmm1, tmm2, tmm3);
//tconjtcmmimfp16ps(tmm1, tmm2, tmm3);
//tconjtfp16(tmm1, tmm2);
tcvtrowps2bf16h(zmm1, tmm2, r30d);
tcvtrowps2bf16h(zmm29, tmm2, 0x12);
tcvtrowps2bf16l(zmm1, tmm2, r30d);
tcvtrowps2bf16l(zmm29, tmm2, 0x12);
tcvtrowps2phh(zmm1, tmm2, r30d);
tcvtrowps2phh(zmm29, tmm2, 0x12);
tcvtrowps2phl(zmm1, tmm2, r30d);
tcvtrowps2phl(zmm29, tmm2, 0x12);
tilemovrow(zmm1, tmm2, r30d);
tilemovrow(zmm29, tmm2, 0x12);
//ttcmmimfp16ps(tmm1, tmm2, tmm3);
//ttcmmrlfp16ps(tmm1, tmm2, tmm3);
//ttdpbf16ps(tmm1, tmm2, tmm3);
//ttdpfp16ps(tmm1, tmm2, tmm3);
//ttmmultf32ps(tmm1, tmm2, tmm3);
//ttransposed(tmm1, tmm2);
tcvtrowd2ps(zmm20, tmm1, r30d);
tcvtrowd2ps(zmm20, tmm1, 0x12);
+21
View File
@@ -0,0 +1,21 @@
// https://github.com/herumi/xbyak/pull/202
sal(rax, r8, 1);
sar(rax, r9, 4);
shl(rax, rdi, 8);
shr(rax, rsi, 12);
rcl(rax, r10, 16);
rcr(rax, r11, 20);
rol(rax, r14, 24);
ror(rax, r15, 28);
sal(rcx, qword[r8], 32);
sar(rcx, qword[r9], 36);
sal(rcx, qword[rdi], 40);
sar(rcx, qword[rsi], 44);
rcl(rcx, qword[r10], 48);
rcr(rcx, qword[r11], 52);
rol(rcx, qword[r14], 56);
ror(rcx, qword[r15], 60);
imul(rax, rdx, r10);
imul(rcx, r15, qword[rdi]);
+210
View File
@@ -0,0 +1,210 @@
vaddbf16(xm1, xm2, xm3);
vaddbf16(ym1|k1, ym2, ptr[rax+64]);
vaddbf16(ym1|k1, ym2, ptr_b[rax+64]);
vaddbf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
vdivbf16(xm1, xm2, xm3);
vdivbf16(ym1|k1, ym2, ptr[rax+64]);
vdivbf16(ym1|k1, ym2, ptr_b[rax+64]);
vdivbf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
vmaxbf16(xm1, xm2, xm3);
vmaxbf16(ym1|k1, ym2, ptr[rax+64]);
vmaxbf16(ym1|k1, ym2, ptr_b[rax+64]);
vmaxbf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
vminbf16(xm1, xm2, xm3);
vminbf16(ym1|k1, ym2, ptr[rax+64]);
vminbf16(ym1|k1, ym2, ptr_b[rax+64]);
vminbf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
vmulbf16(xm1, xm2, xm3);
vmulbf16(ym1|k1, ym2, ptr[rax+64]);
vmulbf16(ym1|k1, ym2, ptr_b[rax+64]);
vmulbf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
vscalefbf16(xm1, xm2, xm3);
vscalefbf16(ym1|k1, ym2, ptr[rax+64]);
vscalefbf16(ym1|k1, ym2, ptr_b[rax+64]);
vscalefbf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
vsubbf16(xm1, xm2, xm3);
vsubbf16(ym1|k1, ym2, ptr[rax+64]);
vsubbf16(ym1|k1, ym2, ptr_b[rax+64]);
vsubbf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
// madd
vfmadd132bf16(xm1, xm2, xm3);
vfmadd132bf16(ym1|k1, ym2, ptr[rax+64]);
vfmadd132bf16(ym1|k1, ym2, ptr_b[rax+64]);
vfmadd132bf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
vfmadd213bf16(xm1, xm2, xm3);
vfmadd213bf16(ym1|k1, ym2, ptr[rax+64]);
vfmadd213bf16(ym1|k1, ym2, ptr_b[rax+64]);
vfmadd213bf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
vfmadd231bf16(xm1, xm2, xm3);
vfmadd231bf16(ym1|k1, ym2, ptr[rax+64]);
vfmadd231bf16(ym1|k1, ym2, ptr_b[rax+64]);
vfmadd231bf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
// nmadd
vfnmadd132bf16(xm1, xm2, xm3);
vfnmadd132bf16(ym1|k1, ym2, ptr[rax+64]);
vfnmadd132bf16(ym1|k1, ym2, ptr_b[rax+64]);
vfnmadd132bf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
vfnmadd213bf16(xm1, xm2, xm3);
vfnmadd213bf16(ym1|k1, ym2, ptr[rax+64]);
vfnmadd213bf16(ym1|k1, ym2, ptr_b[rax+64]);
vfnmadd213bf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
vfnmadd231bf16(xm1, xm2, xm3);
vfnmadd231bf16(ym1|k1, ym2, ptr[rax+64]);
vfnmadd231bf16(ym1|k1, ym2, ptr_b[rax+64]);
vfnmadd231bf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
// msub
vfmsub132bf16(xm1, xm2, xm3);
vfmsub132bf16(ym1|k1, ym2, ptr[rax+64]);
vfmsub132bf16(ym1|k1, ym2, ptr_b[rax+64]);
vfmsub132bf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
vfmsub213bf16(xm1, xm2, xm3);
vfmsub213bf16(ym1|k1, ym2, ptr[rax+64]);
vfmsub213bf16(ym1|k1, ym2, ptr_b[rax+64]);
vfmsub213bf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
vfmsub231bf16(xm1, xm2, xm3);
vfmsub231bf16(ym1|k1, ym2, ptr[rax+64]);
vfmsub231bf16(ym1|k1, ym2, ptr_b[rax+64]);
vfmsub231bf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
// nmsub
vfnmsub132bf16(xm1, xm2, xm3);
vfnmsub132bf16(ym1|k1, ym2, ptr[rax+64]);
vfnmsub132bf16(ym1|k1, ym2, ptr_b[rax+64]);
vfnmsub132bf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
vfnmsub213bf16(xm1, xm2, xm3);
vfnmsub213bf16(ym1|k1, ym2, ptr[rax+64]);
vfnmsub213bf16(ym1|k1, ym2, ptr_b[rax+64]);
vfnmsub213bf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
vfnmsub231bf16(xm1, xm2, xm3);
vfnmsub231bf16(ym1|k1, ym2, ptr[rax+64]);
vfnmsub231bf16(ym1|k1, ym2, ptr_b[rax+64]);
vfnmsub231bf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
vcmpbf16(k1, xm5, xm4, 5);
vcmpbf16(k2, ym5, ym4, 6);
vcmpbf16(k3, ym15, ptr_b[rax+64], 7);
vcmpbf16(k4, zm30, zm20, 8);
vcmpbf16(k5, zm1, ptr[rax+64], 9);
vcmpbf16(k6, zm10, ptr_b[rax+64], 10);
vfpclassbf16(k1, xm4, 5);
vfpclassbf16(k2|k5, ym4, 6);
vfpclassbf16(k3|k5, zm20, 7);
vfpclassbf16(k3|k5, xword[rax+64], 8);
vfpclassbf16(k3, xword_b[rax+64], 9);
vfpclassbf16(k5|k5, yword[rax+64], 10);
vfpclassbf16(k6|k5, yword_b[rax+64], 11);
vfpclassbf16(k7|k5, zword[rax+64], 12);
vfpclassbf16(k7|k5, zword_b[rax+64], 13);
vcomisbf16(xm2, xm3);
vcomisbf16(xm2, ptr[rax+64]);
vgetexpbf16(xm1|k3, xmm2);
vgetexpbf16(xm1|k3, ptr[rax+64]);
vgetexpbf16(xm1|k3, ptr_b[rax+64]);
vgetexpbf16(ym1|k3, ymm2);
vgetexpbf16(ym1|k3, ptr[rax+64]);
vgetexpbf16(ym1|k3, ptr_b[rax+64]);
vgetexpbf16(zm1|k3, zmm2);
vgetexpbf16(zm1|k3, ptr[rax+64]);
vgetexpbf16(zm1|k3, ptr_b[rax+64]);
vgetmantbf16(xm1|k3, xmm2, 3);
vgetmantbf16(xm1|k3, ptr[rax+64], 5);
vgetmantbf16(xm1|k3, ptr_b[rax+64], 9);
vgetmantbf16(ym1|k3, ymm2, 3);
vgetmantbf16(ym1|k3, ptr[rax+64], 5);
vgetmantbf16(ym1|k3, ptr_b[rax+64], 9);
vgetmantbf16(zm1|k3, zmm2, 3);
vgetmantbf16(zm1|k3, ptr[rax+64], 5);
vgetmantbf16(zm1|k3, ptr_b[rax+64], 9);
vrcpbf16(xm1|k5, xm2);
vrcpbf16(xm1|k5, ptr[rcx+64]);
vrcpbf16(xm1|k5, ptr_b[rcx+64]);
vrcpbf16(ym1|k5, ym2);
vrcpbf16(ym1|k5, ptr[rcx+64]);
vrcpbf16(ym1|k5, ptr_b[rcx+64]);
vrcpbf16(zm1|k5, zm2);
vrcpbf16(zm1|k5, ptr[rcx+64]);
vrcpbf16(zm1|k5, ptr_b[rcx+64]);
vreducebf16(xm1|k4, xm2, 1);
vreducebf16(xm1|k4, ptr[rax+64], 1);
vreducebf16(xm1|k4, ptr_b[rax+64], 1);
vreducebf16(ym1|k4, ym2, 1);
vreducebf16(ym1|k4, ptr[rax+64], 1);
vreducebf16(ym1|k4, ptr_b[rax+64], 1);
vreducebf16(zm1|k4, zm2, 1);
vreducebf16(zm1|k4, ptr[rax+64], 1);
vreducebf16(zm1|k4, ptr_b[rax+64], 1);
vrndscalebf16(xm1|k4, xm2, 1);
vrndscalebf16(xm1|k4, ptr[rax+64], 1);
vrndscalebf16(xm1|k4, ptr_b[rax+64], 1);
vrndscalebf16(ym1|k4, ym2, 1);
vrndscalebf16(ym1|k4, ptr[rax+64], 1);
vrndscalebf16(ym1|k4, ptr_b[rax+64], 1);
vrndscalebf16(zm1|k4, zm2, 1);
vrndscalebf16(zm1|k4, ptr[rax+64], 1);
vrndscalebf16(zm1|k4, ptr_b[rax+64], 1);
vrsqrtbf16(xm1|k5, xm2);
vrsqrtbf16(xm1|k5, ptr[rcx+64]);
vrsqrtbf16(xm1|k5, ptr_b[rcx+64]);
vrsqrtbf16(ym1|k5, ym2);
vrsqrtbf16(ym1|k5, ptr[rcx+64]);
vrsqrtbf16(ym1|k5, ptr_b[rcx+64]);
vrsqrtbf16(zm1|k5, zm2);
vrsqrtbf16(zm1|k5, ptr[rcx+64]);
vrsqrtbf16(zm1|k5, ptr_b[rcx+64]);
vscalefbf16(xm1|k5, xm5, xm2);
vscalefbf16(xm1|k5, xm5, ptr[rcx+64]);
vscalefbf16(xm1|k5, xm5, ptr_b[rcx+64]);
vscalefbf16(ym1|k5, ym9, ym2);
vscalefbf16(ym1|k5, ym9, ptr[rcx+64]);
vscalefbf16(ym1|k5, ym9, ptr_b[rcx+64]);
vscalefbf16(zm1|k5, zm30, zm2);
vscalefbf16(zm1|k5, zm30, ptr[rcx+64]);
vscalefbf16(zm1|k5, zm30, ptr_b[rcx+64]);
vsqrtbf16(xm5|k3, xmm4);
vsqrtbf16(xm5|k3, ptr[rax+64]);
vsqrtbf16(xm5|k3, ptr_b[rax+64]);
vsqrtbf16(ym5|k3, ymm4);
vsqrtbf16(ym5|k3, ptr[rax+64]);
vsqrtbf16(ym5|k3, ptr_b[rax+64]);
vsqrtbf16(zm5|k3, zmm4);
vsqrtbf16(zm5|k3, ptr[rax+64]);
vsqrtbf16(zm5|k3, ptr_b[rax+64]);
+17
View File
@@ -0,0 +1,17 @@
vcomxsd(xm1, xm2|T_sae);
vcomxsd(xm1, ptr[rax+64]);
vcomxsh(xm1, xm2|T_sae);
vcomxsh(xm1, ptr[rax+64]);
vcomxss(xm1, xm2|T_sae);
vcomxss(xm1, ptr[rax+64]);
vucomxsd(xm1, xm2|T_sae);
vucomxsd(xm1, ptr[rax+64]);
vucomxsh(xm1, xm2|T_sae);
vucomxsh(xm1, ptr[rax+64]);
vucomxss(xm1, xm2|T_sae);
vucomxss(xm1, ptr[rax+64]);
+200
View File
@@ -0,0 +1,200 @@
vcvt2ps2phx(xm1|k5, xm2, xm3);
vcvt2ps2phx(xm1|k5, xm2, ptr[rax+64]);
vcvt2ps2phx(xm1|k5, xm2, ptr_b[rax+64]);
vcvt2ps2phx(ym1|k5, ym2, ym3);
vcvt2ps2phx(ym1|k5, ym2, ptr[rax+64]);
vcvt2ps2phx(ym1|k5, ym2, ptr_b[rax+64]);
vcvt2ps2phx(zm1|k5, zm2, zm3);
vcvt2ps2phx(zm1|k5, zm2, ptr[rax+64]);
vcvt2ps2phx(zm1|k5, zm2, ptr_b[rax+64]);
// vcvtbiasph2hf8
vcvtbiasph2bf8(xm1|k2, xm3, xm5);
vcvtbiasph2bf8(xm1|k2, xm3, ptr[rax+64]);
vcvtbiasph2bf8(xm1|k2, xm3, ptr_b[rax+64]);
vcvtbiasph2bf8(xm1|k2, ym3, ym5);
vcvtbiasph2bf8(xm1|k2, ym3, ptr[rax+64]);
vcvtbiasph2bf8(xm1|k2, ym3, ptr_b[rax+64]);
vcvtbiasph2bf8(ym1|k2, zm3, zm5);
vcvtbiasph2bf8(ym1|k2, zm3, ptr[rax+64]);
vcvtbiasph2bf8(ym1|k2, zm3, ptr_b[rax+64]);
// vcvtbiasph2bf8s
vcvtbiasph2bf8s(xm1|k2, xm3, xm5);
vcvtbiasph2bf8s(xm1|k2, xm3, ptr[rax+64]);
vcvtbiasph2bf8s(xm1|k2, xm3, ptr_b[rax+64]);
vcvtbiasph2bf8s(xm1|k2, ym3, ym5);
vcvtbiasph2bf8s(xm1|k2, ym3, ptr[rax+64]);
vcvtbiasph2bf8s(xm1|k2, ym3, ptr_b[rax+64]);
vcvtbiasph2bf8s(ym1|k2, zm3, zm5);
vcvtbiasph2bf8s(ym1|k2, zm3, ptr[rax+64]);
vcvtbiasph2bf8s(ym1|k2, zm3, ptr_b[rax+64]);
// vcvtbiasph2hf8
vcvtbiasph2hf8(xm1|k2, xm3, xm5);
vcvtbiasph2hf8(xm1|k2, xm3, ptr[rax+64]);
vcvtbiasph2hf8(xm1|k2, xm3, ptr_b[rax+64]);
vcvtbiasph2hf8(xm1|k2, ym3, ym5);
vcvtbiasph2hf8(xm1|k2, ym3, ptr[rax+64]);
vcvtbiasph2hf8(xm1|k2, ym3, ptr_b[rax+64]);
vcvtbiasph2hf8(ym1|k2, zm3, zm5);
vcvtbiasph2hf8(ym1|k2, zm3, ptr[rax+64]);
vcvtbiasph2hf8(ym1|k2, zm3, ptr_b[rax+64]);
// vcvtbiasph2hf8s
vcvtbiasph2hf8s(xm1|k2, xm3, xm5);
vcvtbiasph2hf8s(xm1|k2, xm3, ptr[rax+64]);
vcvtbiasph2hf8s(xm1|k2, xm3, ptr_b[rax+64]);
vcvtbiasph2hf8s(xm1|k2, ym3, ym5);
vcvtbiasph2hf8s(xm1|k2, ym3, ptr[rax+64]);
vcvtbiasph2hf8s(xm1|k2, ym3, ptr_b[rax+64]);
vcvtbiasph2hf8s(ym1|k2, zm3, zm5);
vcvtbiasph2hf8s(ym1|k2, zm3, ptr[rax+64]);
vcvtbiasph2hf8s(ym1|k2, zm3, ptr_b[rax+64]);
vcvthf82ph(xm1|k5|T_z, xm2);
vcvthf82ph(xm1|k5|T_z, ptr[rax+64]);
vcvthf82ph(ym1|k5|T_z, xm2);
vcvthf82ph(ym1|k5|T_z, ptr[rax+64]);
vcvthf82ph(zm1|k5|T_z, ym2);
vcvthf82ph(zm1|k5|T_z, ptr[rax+64]);
//
vcvt2ph2bf8(xm1|k4|T_z, xm2, xm3);
vcvt2ph2bf8(xm1|k4, xm2, ptr[rax+64]);
vcvt2ph2bf8(xm1|T_z, xm2, ptr_b[rax+64]);
vcvt2ph2bf8(ym1|k4|T_z, ym2, ym3);
vcvt2ph2bf8(ym1|k4, ym2, ptr[rax+64]);
vcvt2ph2bf8(ym1|T_z, ym2, ptr_b[rax+64]);
vcvt2ph2bf8(zm1|k4|T_z, zm2, zm3);
vcvt2ph2bf8(zm1|k4, zm2, ptr[rax+64]);
vcvt2ph2bf8(zm1|T_z, zm2, ptr_b[rax+64]);
//
vcvt2ph2bf8s(xm1|k4|T_z, xm2, xm3);
vcvt2ph2bf8s(xm1|k4, xm2, ptr[rax+64]);
vcvt2ph2bf8s(xm1|T_z, xm2, ptr_b[rax+64]);
vcvt2ph2bf8s(ym1|k4|T_z, ym2, ym3);
vcvt2ph2bf8s(ym1|k4, ym2, ptr[rax+64]);
vcvt2ph2bf8s(ym1|T_z, ym2, ptr_b[rax+64]);
vcvt2ph2bf8s(zm1|k4|T_z, zm2, zm3);
vcvt2ph2bf8s(zm1|k4, zm2, ptr[rax+64]);
vcvt2ph2bf8s(zm1|T_z, zm2, ptr_b[rax+64]);
//
vcvt2ph2hf8(xm1|k4|T_z, xm2, xm3);
vcvt2ph2hf8(xm1|k4, xm2, ptr[rax+64]);
vcvt2ph2hf8(xm1|T_z, xm2, ptr_b[rax+64]);
vcvt2ph2hf8(ym1|k4|T_z, ym2, ym3);
vcvt2ph2hf8(ym1|k4, ym2, ptr[rax+64]);
vcvt2ph2hf8(ym1|T_z, ym2, ptr_b[rax+64]);
vcvt2ph2hf8(zm1|k4|T_z, zm2, zm3);
vcvt2ph2hf8(zm1|k4, zm2, ptr[rax+64]);
vcvt2ph2hf8(zm1|T_z, zm2, ptr_b[rax+64]);
//
vcvt2ph2hf8s(xm1|k4|T_z, xm2, xm3);
vcvt2ph2hf8s(xm1|k4, xm2, ptr[rax+64]);
vcvt2ph2hf8s(xm1|T_z, xm2, ptr_b[rax+64]);
vcvt2ph2hf8s(ym1|k4|T_z, ym2, ym3);
vcvt2ph2hf8s(ym1|k4, ym2, ptr[rax+64]);
vcvt2ph2hf8s(ym1|T_z, ym2, ptr_b[rax+64]);
vcvt2ph2hf8s(zm1|k4|T_z, zm2, zm3);
vcvt2ph2hf8s(zm1|k4, zm2, ptr[rax+64]);
vcvt2ph2hf8s(zm1|T_z, zm2, ptr_b[rax+64]);
// vcvtph2bf8
vcvtph2bf8(xmm1|k2|T_z, xmm2);
vcvtph2bf8(xmm1|k2|T_z, xword [rax+64]);
vcvtph2bf8(xmm1|k2|T_z, xword_b[rax+64]);
vcvtph2bf8(xmm1|k2|T_z, ymm2);
vcvtph2bf8(xmm1|k2|T_z, yword[rax+64]);
vcvtph2bf8(xmm1|k2|T_z, yword_b[rax+64]);
vcvtph2bf8(ymm1|k2|T_z, zmm2);
vcvtph2bf8(ymm1|k2|T_z, zword[rax+64]);
vcvtph2bf8(ymm1|k2|T_z, zword_b[rax+64]);
// vcvtph2bf8s
vcvtph2bf8s(xmm1|k2|T_z, xmm2);
vcvtph2bf8s(xmm1|k2|T_z, xword [rax+64]);
vcvtph2bf8s(xmm1|k2|T_z, xword_b[rax+64]);
vcvtph2bf8s(xmm1|k2|T_z, ymm2);
vcvtph2bf8s(xmm1|k2|T_z, yword[rax+64]);
vcvtph2bf8s(xmm1|k2|T_z, yword_b[rax+64]);
vcvtph2bf8s(ymm1|k2|T_z, zmm2);
vcvtph2bf8s(ymm1|k2|T_z, zword[rax+64]);
vcvtph2bf8s(ymm1|k2|T_z, zword_b[rax+64]);
// vcvtph2hf8
vcvtph2hf8(xmm1|k2|T_z, xmm2);
vcvtph2hf8(xmm1|k2|T_z, xword [rax+64]);
vcvtph2hf8(xmm1|k2|T_z, xword_b[rax+64]);
vcvtph2hf8(xmm1|k2|T_z, ymm2);
vcvtph2hf8(xmm1|k2|T_z, yword[rax+64]);
vcvtph2hf8(xmm1|k2|T_z, yword_b[rax+64]);
vcvtph2hf8(ymm1|k2|T_z, zmm2);
vcvtph2hf8(ymm1|k2|T_z, zword[rax+64]);
vcvtph2hf8(ymm1|k2|T_z, zword_b[rax+64]);
// vcvtph2hf8s
vcvtph2hf8s(xmm1|k2|T_z, xmm2);
vcvtph2hf8s(xmm1|k2|T_z, xword [rax+64]);
vcvtph2hf8s(xmm1|k2|T_z, xword_b[rax+64]);
vcvtph2hf8s(xmm1|k2|T_z, ymm2);
vcvtph2hf8s(xmm1|k2|T_z, yword[rax+64]);
vcvtph2hf8s(xmm1|k2|T_z, yword_b[rax+64]);
vcvtph2hf8s(ymm1|k2|T_z, zmm2);
vcvtph2hf8s(ymm1|k2|T_z, zword[rax+64]);
vcvtph2hf8s(ymm1|k2|T_z, zword_b[rax+64]);
// AVX-NE-CONVERT
vbcstnebf162ps(xmm15, ptr[rax+64]);
vbcstnebf162ps(xmm15, ptr[rax+64]);
vbcstnesh2ps(ymm15, ptr[rax+64]);
vbcstnesh2ps(ymm15, ptr[rax+64]);
vcvtneebf162ps(xmm15, ptr[rax+64]);
vcvtneebf162ps(ymm15, ptr[rax+64]);
vcvtneeph2ps(xmm15, ptr[rax+64]);
vcvtneeph2ps(ymm15, ptr[rax+64]);
vcvtneobf162ps(xmm15, ptr[rax+64]);
vcvtneobf162ps(ymm15, ptr[rax+64]);
vcvtneoph2ps(xmm15, ptr[rax+64]);
vcvtneoph2ps(ymm15, ptr[rax+64]);
vcvtneps2bf16(xmm15, xmm3, VexEncoding);
vcvtneps2bf16(xmm15, ptr[rax+64], VexEncoding);
vcvtneps2bf16(xmm15, ymm3, VexEncoding);
vcvtneps2bf16(xmm15, ptr[rax+64], VexEncoding);
+63
View File
@@ -0,0 +1,63 @@
vminmaxbf16(xm1|k3|T_z, xm2, xm3, 5);
vminmaxbf16(xm1|k3|T_z, xm2, ptr[rax+64], 5);
vminmaxbf16(xm1|k3|T_z, xm2, ptr_b[rax+64], 5);
vminmaxbf16(ym1|k3|T_z, ym2, ym3, 5);
vminmaxbf16(ym1|k3|T_z, ym2, ptr[rax+64], 5);
vminmaxbf16(ym1|k3|T_z, ym2, ptr_b[rax+64], 5);
vminmaxbf16(zm1|k3|T_z, zm2, zm3, 5);
vminmaxbf16(zm1|k3|T_z, zm2, ptr[rax+64], 5);
vminmaxbf16(zm1|k3|T_z, zm2, ptr_b[rax+64], 5);
//
vminmaxpd(xm1|k3|T_z, xm2, xm3, 5);
vminmaxpd(xm1|k3|T_z, xm2, ptr[rax+64], 5);
vminmaxpd(xm1|k3|T_z, xm2, ptr_b[rax+64], 5);
vminmaxpd(ym1|k3|T_z, ym2, ym3, 5);
vminmaxpd(ym1|k3|T_z, ym2, ptr[rax+64], 5);
vminmaxpd(ym1|k3|T_z, ym2, ptr_b[rax+64], 5);
vminmaxpd(zm1|k3|T_z, zm2, zm3, 5);
vminmaxpd(zm1|k3|T_z, zm2, zm3|T_sae, 5);
vminmaxpd(zm1|k3|T_z, zm2, ptr[rax+64], 5);
vminmaxpd(zm1|k3|T_z, zm2, ptr_b[rax+64], 5);
//
vminmaxph(xm1|k3|T_z, xm2, xm3, 5);
vminmaxph(xm1|k3|T_z, xm2, ptr[rax+64], 5);
vminmaxph(xm1|k3|T_z, xm2, ptr[rax+64], 5);
vminmaxph(xm1|k3|T_z, xm2, ptr_b[rax+64], 5);
vminmaxph(ym1|k3|T_z, ym2, ym3, 5);
vminmaxph(ym1|k3|T_z, ym2, ptr[rax+64], 5);
vminmaxph(ym1|k3|T_z, ym2, ptr_b[rax+64], 5);
vminmaxph(zm1|k3|T_z, zm2, zm3, 5);
vminmaxph(zm1|k3|T_z, zm2, zm3|T_sae, 5);
vminmaxph(zm1|k3|T_z, zm2, ptr[rax+64], 5);
vminmaxph(zm1|k3|T_z, zm2, ptr_b[rax+64], 5);
//
vminmaxps(xm1|k3|T_z, xm2, xm3, 5);
vminmaxps(xm1|k3|T_z, xm2, ptr[rax+64], 5);
vminmaxps(xm1|k3|T_z, xm2, ptr_b[rax+64], 5);
vminmaxps(ym1|k3|T_z, ym2, ym3, 5);
vminmaxps(ym1|k3|T_z, ym2, ptr[rax+64], 5);
vminmaxps(ym1|k3|T_z, ym2, ptr_b[rax+64], 5);
vminmaxps(zm1|k3|T_z, zm2, zm3, 5);
vminmaxps(zm1|k3|T_z, zm2, zm3|T_sae, 5);
vminmaxps(zm1|k3|T_z, zm2, ptr[rax+64], 5);
vminmaxps(zm1|k3|T_z, zm2, ptr_b[rax+64], 5);
//
vminmaxsd(xm1|k3|T_z, xm2, xm3, 5);
vminmaxsd(xm1|k3|T_z, xm2, xm3|T_sae, 5);
vminmaxsd(xm1|k3|T_z, xm2, ptr[rax+64], 5);
//
vminmaxsh(xm1|k3|T_z, xm2, xm3, 5);
vminmaxsh(xm1|k3|T_z, xm2, xm3|T_sae, 5);
vminmaxsh(xm1|k3|T_z, xm2, ptr[rax+64], 5);
//
vminmaxss(xm1|k3|T_z, xm2, xm3, 5);
vminmaxss(xm1|k3|T_z, xm2, xm3|T_sae, 5);
vminmaxss(xm1|k3|T_z, xm2, ptr[rax+64], 5);
+290
View File
@@ -0,0 +1,290 @@
// AVX10 integer and FP16 VNNI, media and zero-extending
vdpphps(xm1, xm2, xm3);
vdpphps(xm1, xm2, ptr[rax+64]);
vdpphps(xm1, xm2, ptr_b[rax+64]);
vdpphps(ym1, ym2, ym3);
vdpphps(ym1, ym2, ptr[rax+64]);
vdpphps(ym1, ym2, ptr_b[rax+64]);
vdpphps(zm1, zm2, zm3);
vdpphps(zm1, zm2, ptr[rax+64]);
vdpphps(zm1, zm2, ptr_b[rax+64]);
//
vmpsadbw(xm1, xm3, xm15, 3);
vmpsadbw(xm1|T_z, xm4, ptr[rax+64], 5);
vmpsadbw(ym1|k4, ym3, ym15, 3);
vmpsadbw(ym1, ym4, ptr[rax+64], 5);
vmpsadbw(zm1|k4, zm3, zm15, 3);
vmpsadbw(zm1, zm4, ptr[rax+64], 5);
//
vpdpbssd(xm1, xm2, xm3);
vpdpbssd(xm1, xm2, ptr[rax+64]);
vpdpbssd(xm1, xm2, ptr_b[rax+64]);
vpdpbssd(ym1, ym2, ym3);
vpdpbssd(ym1, ym2, ptr[rax+64]);
vpdpbssd(ym1, ym2, ptr_b[rax+64]);
vpdpbssd(zm1, zm2, zm3);
vpdpbssd(zm1, zm2, ptr[rax+64]);
vpdpbssd(zm1, zm2, ptr_b[rax+64]);
//
vpdpbssds(xm1, xm2, xm3);
vpdpbssds(xm1, xm2, ptr[rax+64]);
vpdpbssds(xm1, xm2, ptr_b[rax+64]);
vpdpbssds(ym1, ym2, ym3);
vpdpbssds(ym1, ym2, ptr[rax+64]);
vpdpbssds(ym1, ym2, ptr_b[rax+64]);
vpdpbssds(zm1, zm2, zm3);
vpdpbssds(zm1, zm2, ptr[rax+64]);
vpdpbssds(zm1, zm2, ptr_b[rax+64]);
//
vpdpbsud(xm1, xm2, xm3);
vpdpbsud(xm1, xm2, ptr[rax+64]);
vpdpbsud(xm1, xm2, ptr_b[rax+64]);
vpdpbsud(ym1, ym2, ym3);
vpdpbsud(ym1, ym2, ptr[rax+64]);
vpdpbsud(ym1, ym2, ptr_b[rax+64]);
vpdpbsud(zm1, zm2, zm3);
vpdpbsud(zm1, zm2, ptr[rax+64]);
vpdpbsud(zm1, zm2, ptr_b[rax+64]);
//
vpdpbsuds(xm1, xm2, xm3);
vpdpbsuds(xm1, xm2, ptr[rax+64]);
vpdpbsuds(xm1, xm2, ptr_b[rax+64]);
vpdpbsuds(ym1, ym2, ym3);
vpdpbsuds(ym1, ym2, ptr[rax+64]);
vpdpbsuds(ym1, ym2, ptr_b[rax+64]);
vpdpbsuds(zm1, zm2, zm3);
vpdpbsuds(zm1, zm2, ptr[rax+64]);
vpdpbsuds(zm1, zm2, ptr_b[rax+64]);
//
vpdpbuud(xm1, xm2, xm3);
vpdpbuud(xm1, xm2, ptr[rax+64]);
vpdpbuud(xm1, xm2, ptr_b[rax+64]);
vpdpbuud(ym1, ym2, ym3);
vpdpbuud(ym1, ym2, ptr[rax+64]);
vpdpbuud(ym1, ym2, ptr_b[rax+64]);
vpdpbuud(zm1, zm2, zm3);
vpdpbuud(zm1, zm2, ptr[rax+64]);
vpdpbuud(zm1, zm2, ptr_b[rax+64]);
//
vpdpbuuds(xm1, xm2, xm3);
vpdpbuuds(xm1, xm2, ptr[rax+64]);
vpdpbuuds(xm1, xm2, ptr_b[rax+64]);
vpdpbuuds(ym1, ym2, ym3);
vpdpbuuds(ym1, ym2, ptr[rax+64]);
vpdpbuuds(ym1, ym2, ptr_b[rax+64]);
vpdpbuuds(zm1, zm2, zm3);
vpdpbuuds(zm1, zm2, ptr[rax+64]);
vpdpbuuds(zm1, zm2, ptr_b[rax+64]);
//
vpdpwsud(xm1, xm2, xm3);
vpdpwsud(xm1, xm2, ptr[rax+64]);
vpdpwsud(xm1, xm2, ptr_b[rax+64]);
vpdpwsud(ym1, ym2, ym3);
vpdpwsud(ym1, ym2, ptr[rax+64]);
vpdpwsud(ym1, ym2, ptr_b[rax+64]);
vpdpwsud(zm1, zm2, zm3);
vpdpwsud(zm1, zm2, ptr[rax+64]);
vpdpwsud(zm1, zm2, ptr_b[rax+64]);
//
vpdpwsuds(xm1, xm2, xm3);
vpdpwsuds(xm1, xm2, ptr[rax+64]);
vpdpwsuds(xm1, xm2, ptr_b[rax+64]);
vpdpwsuds(ym1, ym2, ym3);
vpdpwsuds(ym1, ym2, ptr[rax+64]);
vpdpwsuds(ym1, ym2, ptr_b[rax+64]);
vpdpwsuds(zm1, zm2, zm3);
vpdpwsuds(zm1, zm2, ptr[rax+64]);
vpdpwsuds(zm1, zm2, ptr_b[rax+64]);
//
vpdpwsud(xm1, xm2, xm3);
vpdpwsud(xm1, xm2, ptr[rax+64]);
vpdpwsud(xm1, xm2, ptr_b[rax+64]);
vpdpwsud(ym1, ym2, ym3);
vpdpwsud(ym1, ym2, ptr[rax+64]);
vpdpwsud(ym1, ym2, ptr_b[rax+64]);
vpdpwsud(zm1, zm2, zm3);
vpdpwsud(zm1, zm2, ptr[rax+64]);
vpdpwsud(zm1, zm2, ptr_b[rax+64]);
//
vpdpwsuds(xm1, xm2, xm3);
vpdpwsuds(xm1, xm2, ptr[rax+64]);
vpdpwsuds(xm1, xm2, ptr_b[rax+64]);
vpdpwsuds(ym1, ym2, ym3);
vpdpwsuds(ym1, ym2, ptr[rax+64]);
vpdpwsuds(ym1, ym2, ptr_b[rax+64]);
vpdpwsuds(zm1, zm2, zm3);
vpdpwsuds(zm1, zm2, ptr[rax+64]);
vpdpwsuds(zm1, zm2, ptr_b[rax+64]);
//
vpdpwuud(xm1, xm2, xm3);
vpdpwuud(xm1, xm2, ptr[rax+64]);
vpdpwuud(xm1, xm2, ptr_b[rax+64]);
vpdpwuud(ym1, ym2, ym3);
vpdpwuud(ym1, ym2, ptr[rax+64]);
vpdpwuud(ym1, ym2, ptr_b[rax+64]);
vpdpwuud(zm1, zm2, zm3);
vpdpwuud(zm1, zm2, ptr[rax+64]);
vpdpwuud(zm1, zm2, ptr_b[rax+64]);
//
vpdpwuuds(xm1, xm2, xm3);
vpdpwuuds(xm1, xm2, ptr[rax+64]);
vpdpwuuds(xm1, xm2, ptr_b[rax+64]);
vpdpwuuds(ym1, ym2, ym3);
vpdpwuuds(ym1, ym2, ptr[rax+64]);
vpdpwuuds(ym1, ym2, ptr_b[rax+64]);
vpdpwuuds(zm1, zm2, zm3);
vpdpwuuds(zm1, zm2, ptr[rax+64]);
vpdpwuuds(zm1, zm2, ptr_b[rax+64]);
//
vmovd(xm10, xm20);
vmovd(xm1, xm2);
vmovd(xm10, ptr[rax+64]);
vmovd(ptr[rax+64], xm30);
//
vmovw(xm1, xm20);
vmovw(xm1, xm2);
vmovw(xm3, ptr [rax+0x40]);
vmovw(ptr [rax+0x40], xm7);
//
push(rax);
push(rcx);
push(rdx);
push(rbx);
push(rsp);
push(rbp);
push(rsi);
push(rdi);
push(r8);
push(r9);
push(r10);
push(r11);
push(r12);
push(r13);
push(r14);
push(r15);
push(r16);
push(r17);
push(r18);
push(r19);
push(r20);
push(r21);
push(r22);
push(r23);
push(r24);
push(r25);
push(r26);
push(r27);
push(r28);
push(r29);
push(r30);
push(r31);
pop(rax);
pop(rcx);
pop(rdx);
pop(rbx);
pop(rsp);
pop(rbp);
pop(rsi);
pop(rdi);
pop(r8);
pop(r9);
pop(r10);
pop(r11);
pop(r12);
pop(r13);
pop(r14);
pop(r15);
pop(r16);
pop(r17);
pop(r18);
pop(r19);
pop(r20);
pop(r21);
pop(r22);
pop(r23);
pop(r24);
pop(r25);
pop(r26);
pop(r27);
pop(r28);
pop(r29);
pop(r30);
pop(r31);
movrs(rcx, ptr[rax]);
movrs(ecx, ptr[rax]);
movrs(cx, ptr[rax]);
movrs(cl, ptr[rax+rdx*4]);
prefetchnta(ptr[rcx]);
prefetcht0(ptr[rcx]);
prefetcht1(ptr[rcx]);
prefetcht2(ptr[rcx]);
prefetchit1(ptr[rip+64]);
prefetchit0(ptr[rip+64]);
prefetchrst2(ptr[rcx]);
vmovrsb(xm1|k1|T_z, ptr[rax+64]);
vmovrsb(ym1|k1|T_z, ptr[rax+64]);
vmovrsb(zm1|k1|T_z, ptr[rax+64]);
vmovrsd(xm1|k1|T_z, ptr[rax+64]);
vmovrsd(ym1|k1|T_z, ptr[rax+64]);
vmovrsd(zm1|k1|T_z, ptr[rax+64]);
vmovrsq(xm1|k1|T_z, ptr[rax+64]);
vmovrsq(ym1|k1|T_z, ptr[rax+64]);
vmovrsq(zm1|k1|T_z, ptr[rax+64]);
vmovrsw(xm1|k1|T_z, ptr[rax+64]);
vmovrsw(ym1|k1|T_z, ptr[rax+64]);
vmovrsw(zm1|k1|T_z, ptr[rax+64]);
// moved for bug of nasm 3.x
vcvtsd2si(esp, xmm4|T_rd_sae);
vcvtsd2si(r8, xmm4|T_rd_sae);
vcvtsd2usi(ecx, xmm4|T_rd_sae);
vcvtsd2usi(r14, xmm4|T_rd_sae);
vcvtss2si(ecx, xmm4|T_rd_sae);
vcvtss2si(r13, xmm4|T_rd_sae);
vcvtss2usi(esi, xmm4|T_rd_sae);
vcvtss2usi(r10, xmm4|T_rd_sae);
vcvttsd2si(ecx, xmm25|T_sae);
vcvttsd2si(r12, xmm25|T_sae);
vcvttsd2usi(edx, xmm25|T_sae);
vcvttsd2usi(rbp, xmm25|T_sae);
vcvttss2si(esp, xmm25|T_sae);
vcvttss2si(r11, xmm25|T_sae);
vcvttss2usi(edi, xmm25|T_sae);
vcvttss2usi(r14, xmm25|T_sae);
+632
View File
@@ -0,0 +1,632 @@
vaesdec(xmm20, xmm30, ptr [rcx + 64]);
vaesdec(ymm1, ymm2, ptr [rcx + 64]);
vaesdec(zmm1, zmm2, ptr [rcx + 64]);
vaesdeclast(xmm20, xmm30, ptr [rax + 64]);
vaesdeclast(ymm20, ymm30, ptr [rax + 64]);
vaesdeclast(zmm20, zmm30, ptr [rax + 64]);
vaesenc(xmm20, xmm30, ptr [rcx + 64]);
vaesenc(ymm1, ymm2, ptr [rcx + 64]);
vaesenc(zmm1, zmm2, ptr [rcx + 64]);
vaesenclast(xmm20, xmm30, ptr [rax + 64]);
vaesenclast(ymm20, ymm30, ptr [rax + 64]);
vaesenclast(zmm20, zmm30, ptr [rax + 64]);
vpclmulqdq(xmm2, xmm3, ptr [rax + 64], 3);
vpclmulqdq(ymm2, ymm3, ptr [rax + 64], 3);
vpclmulqdq(zmm2, zmm3, ptr [rax + 64], 3);
vpclmulqdq(xmm20, xmm3, ptr [rax + 64], 3);
vpclmulqdq(ymm20, ymm3, ptr [rax + 64], 3);
vpclmulqdq(zmm20, zmm3, ptr [rax + 64], 3);
vpcompressb(ptr[rax + 64], xmm1);
vpcompressb(xmm30 | k5, xmm1);
vpcompressb(ptr[rax + 64], ymm1);
vpcompressb(ymm30 | k3 |T_z, ymm1);
vpcompressb(ptr[rax + 64], zmm1);
vpcompressb(zmm30 | k2 |T_z, zmm1);
vpcompressw(ptr[rax + 64], xmm1);
vpcompressw(xmm30 | k5, xmm1);
vpcompressw(ptr[rax + 64], ymm1);
vpcompressw(ymm30 | k3 |T_z, ymm1);
vpcompressw(ptr[rax + 64], zmm1);
vpcompressw(zmm30 | k2 |T_z, zmm1);
vpshldw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
vpshldw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
vpshldw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
vpshldd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
vpshldd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
vpshldd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
vpshldq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
vpshldq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
vpshldq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
vpshldvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshldvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshldvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
vpshldvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshldvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshldvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
vpshldvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshldvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshldvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
vpshrdw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
vpshrdw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
vpshrdw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
vpshrdd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
vpshrdd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
vpshrdd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
vpshrdq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
vpshrdq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
vpshrdq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
vpshrdvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshrdvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshrdvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
vpshrdvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshrdvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshrdvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
vpshrdvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshrdvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshrdvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
vpshrdd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
vpshrdd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
vpshrdd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
vpshrdq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
vpshrdq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
vpshrdq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
vpshrdvd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
vpshrdvd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
vpshrdvd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
vpshrdvq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
vpshrdvq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
vpshrdvq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
vpopcntb(xmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntb(ymm5|k3|T_z, ptr [rax + 0x40]);
vpopcntb(zmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntw(xmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntw(ymm5|k3|T_z, ptr [rax + 0x40]);
vpopcntw(zmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntd(xmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntd(ymm5|k3|T_z, ptr [rax + 0x40]);
vpopcntd(zmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntd(xmm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntd(ymm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntd(zmm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntq(xmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntq(ymm5|k3|T_z, ptr [rax + 0x40]);
vpopcntq(zmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntq(xmm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntq(ymm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntq(zmm5|k3|T_z, ptr_b [rax + 0x40]);
vpdpbusd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
vpdpbusd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
vpdpbusd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
vpdpbusd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
vpdpbusd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
vpdpbusd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
vpdpbusds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
vpdpbusds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
vpdpbusds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
vpdpbusds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
vpdpbusds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
vpdpbusds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
vpdpwssd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
vpdpwssd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
vpdpwssd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
vpdpwssd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
vpdpwssd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
vpdpwssd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
vpdpwssds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
vpdpwssds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
vpdpwssds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
vpdpwssds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
vpdpwssds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
vpdpwssds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
vpexpandb(xmm5|k3|T_z, xmm30);
vpexpandb(ymm5|k3|T_z, ymm30);
vpexpandb(zmm5|k3|T_z, zmm30);
vpexpandb(xmm5|k3|T_z, ptr [rax + 0x40]);
vpexpandb(ymm5|k3|T_z, ptr [rax + 0x40]);
vpexpandb(zmm5|k3|T_z, ptr [rax + 0x40]);
vpexpandw(xmm5|k3|T_z, xmm30);
vpexpandw(ymm5|k3|T_z, ymm30);
vpexpandw(zmm5|k3|T_z, zmm30);
vpexpandw(xmm5|k3|T_z, ptr [rax + 0x40]);
vpexpandw(ymm5|k3|T_z, ptr [rax + 0x40]);
vpexpandw(zmm5|k3|T_z, ptr [rax + 0x40]);
vpshufbitqmb(k1|k2, xmm2, ptr [rax + 0x40]);
vpshufbitqmb(k1|k2, ymm2, ptr [rax + 0x40]);
vpshufbitqmb(k1|k2, zmm2, ptr [rax + 0x40]);
gf2p8affineinvqb(xmm1, xmm2, 3);
gf2p8affineinvqb(xmm1, ptr [rax + 0x40], 3);
vgf2p8affineinvqb(xmm1, xmm5, xmm2, 3);
vgf2p8affineinvqb(ymm1, ymm5, ymm2, 3);
vgf2p8affineinvqb(xmm1, xmm5, ptr [rax + 0x40], 3);
vgf2p8affineinvqb(ymm1, ymm5, ptr [rax + 0x40], 3);
vgf2p8affineinvqb(xmm30, xmm31, xmm4, 5);
vgf2p8affineinvqb(ymm30, ymm31, ymm4, 5);
vgf2p8affineinvqb(zmm30, zmm31, zmm4, 5);
vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
gf2p8affineqb(xmm1, xmm2, 3);
gf2p8affineqb(xmm1, ptr [rax + 0x40], 3);
vgf2p8affineqb(xmm1, xmm5, xmm2, 3);
vgf2p8affineqb(ymm1, ymm5, ymm2, 3);
vgf2p8affineqb(xmm1, xmm5, ptr [rax + 0x40], 3);
vgf2p8affineqb(ymm1, ymm5, ptr [rax + 0x40], 3);
vgf2p8affineqb(xmm30, xmm31, xmm4, 5);
vgf2p8affineqb(ymm30, ymm31, ymm4, 5);
vgf2p8affineqb(zmm30, zmm31, zmm4, 5);
vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
gf2p8mulb(xmm1, xmm2);
gf2p8mulb(xmm1, ptr [rax + 0x40]);
vgf2p8mulb(xmm1, xmm5, xmm2);
vgf2p8mulb(ymm1, ymm5, ymm2);
vgf2p8mulb(xmm1, xmm5, ptr [rax + 0x40]);
vgf2p8mulb(ymm1, ymm5, ptr [rax + 0x40]);
vgf2p8mulb(xmm30, xmm31, xmm4);
vgf2p8mulb(ymm30, ymm31, ymm4);
vgf2p8mulb(zmm30, zmm31, zmm4);
vgf2p8mulb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40]);
vgf2p8mulb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40]);
vgf2p8mulb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40]);
vcvtne2ps2bf16(xmm0 | k1, xmm1, ptr [rax + 64]);
vcvtne2ps2bf16(ymm0 | k1 | T_z, ymm0, ptr [rax + 64]);
vcvtne2ps2bf16(zmm0 | k1, zmm1, ptr [rax + 64]);
vcvtneps2bf16(xmm0, xword [rax + 64]);
vcvtneps2bf16(xmm0 | k1, yword [rax + 64]);
vcvtneps2bf16(ymm0 | k1, zword [rax + 64]);
vcvtneps2bf16(ymm0 | k1, ptr [rax + 64]);
vdpbf16ps(xmm0 | k1, xmm1, ptr [rax + 64]);
vdpbf16ps(ymm0 | k1, ymm1, ptr [rax + 64]);
vdpbf16ps(zmm0 | k1, zmm1, ptr [rax + 64]);
vaddph(zmm0, zmm1, ptr[rax+64]);
vaddph(ymm0, ymm1, ptr[rax+64]);
vaddph(xmm0, xmm1, ptr[rax+64]);
vaddph(zmm0, zmm1, ptr_b[rax+64]);
vaddph(ymm0, ymm1, ptr_b[rax+64]);
vaddph(xmm0, xmm1, ptr_b[rax+64]);
vaddsh(xmm0, xmm15, ptr[rax+64]);
vaddsh(xmm0|k5|T_z|T_rd_sae, xmm15, xmm3);
vcmpph(k1, xm15, ptr[rax+64], 1);
vcmpph(k2, ym15, ptr[rax+64], 2);
vcmpph(k3, zm15, ptr[rax+64], 3);
vcmpph(k1, xm15, ptr_b[rax+64], 1);
vcmpph(k2, ym15, ptr_b[rax+64], 2);
vcmpph(k3, zm15, ptr_b[rax+64], 3);
vcmpsh(k1, xm15, ptr[rax+64], 1);
vcmpsh(k3|k5, xmm1, xmm25|T_sae, 4);
vcomish(xmm1, ptr[rax+64]);
vcomish(xmm1|T_sae, xmm15);
vucomish(xmm1, ptr [rax+0x40]);
vucomish(xmm1|T_sae, xmm15);
vfmaddsub213ph(xmm1, xmm2, ptr [rax+0x40]);
vfmaddsub213ph(xmm1, xmm2, ptr_b [rax+0x40]);
vfmaddsub213ph(xmm1|k3, xmm2, xmm5);
vfmaddsub213ph(ymm1, ymm2, ptr [rax+0x40]);
vfmaddsub213ph(ymm1, ymm2, ptr_b[rax+0x40]);
vfmaddsub213ph(ymm1|k3, ymm2, ymm5);
vfmaddsub213ph(zmm1, zmm2, ptr [rax+0x40]);
vfmaddsub213ph(zmm1, zmm2, ptr_b [rax+0x40]);
vfmaddsub213ph(zmm1|T_ru_sae, zmm2, zmm5);
vfmsubadd132ph(xmm1, xmm2, ptr [rax+0x40]);
vfmsubadd132ph(xmm1, xmm2, ptr_b [rax+0x40]);
vfmsubadd132ph(ymm1, ymm2, ptr [rax+0x40]);
vfmsubadd132ph(ymm1, ymm2, ptr_b [rax+0x40]);
vfmsubadd132ph(zmm1, zmm2, ptr [rax+0x40]);
vfmsubadd132ph(zmm1, zmm2, ptr_b [rax+0x40]);
vfmsubadd132ph(zmm1|T_ru_sae, zmm2, zmm5);
vfmadd132ph(xmm1, xmm2, ptr [rax+0x40]);
vfmadd132ph(xmm1, xmm2, ptr_b [rax+0x40]);
vfmadd132ph(ymm1, ymm2, ptr [rax+0x40]);
vfmadd132ph(ymm1, ymm2, ptr_b [rax+0x40]);
vfmadd132ph(zmm1, zmm2, ptr [rax+0x40]);
vfmadd132ph(zmm1, zmm2, ptr_b [rax+0x40]);
vfmadd132ph(zmm1|T_rd_sae, zmm2, zmm5);
vfmsub231ph(xmm1, xmm2, ptr [rax+0x40]);
vfmsub231ph(xmm1, xmm2, ptr_b [rax+0x40]);
vfmsub231ph(ymm1, ymm2, ptr [rax+0x40]);
vfmsub231ph(ymm1, ymm2, ptr_b [rax+0x40]);
vfmsub231ph(zmm1, zmm2, ptr [rax+0x40]);
vfmsub231ph(zmm1, zmm2, ptr_b [rax+0x40]);
vfmsub231ph(zmm1|T_rd_sae, zmm2, zmm5);
vfnmsub231ph(xmm1, xmm2, ptr [rax+0x40]);
vfnmsub231ph(ymm1, ymm2, ptr_b [rax+0x40]);
vfnmsub231ph(zmm1, zmm2, ptr_b [rax+0x40]);
vfnmsub231ph(zmm1|T_rd_sae, zmm2, zmm5);
vfmadd132sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
vfmadd132sh(xmm1, xmm2, ptr [rax+0x40]);
vfnmadd132sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
vfnmadd132sh(xmm1, xmm2, ptr [rax+0x40]);
vfmsub132sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
vfmsub132sh(xmm1, xmm2, ptr [rax+0x40]);
vfnmsub132sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
vfnmsub132sh(xmm1, xmm2, ptr [rax+0x40]);
vfcmaddcph(xmm1|k1|T_z, xmm2, ptr [rax+0x40]);
vfcmaddcph(ymm1|k1|T_z, ymm2, ptr [rax+0x40]);
vfcmaddcph(zmm1|k1, zmm2, ptr [rax+0x40]);
vfcmaddcph(zmm1|k1|T_rd_sae, zmm2, zmm5);
vfcmaddcph(xmm1|k1|T_z, xmm2, ptr_b [rax+0x40]);
vfcmaddcph(ymm1|k1|T_z, ymm2, ptr_b [rax+0x40]);
vfcmaddcph(zmm1|k1|T_z, zmm2, ptr_b [rax+0x40]);
vfmaddcph(xm1, xm2, ptr[rax+0x40]);
vfmaddcph(ym1|k1|T_z, ym2, ptr_b[rax+0x40]);
vfmaddcph(zm1, zm2, ptr_b[rax+0x40]);
vfcmulcph(xmm1, xmm2, ptr [rax+0x40]);
vfcmulcph(ymm1|k1|T_z, ymm2, ptr_b [rax+0x40]);
vfcmulcph(zmm1, zmm2, ptr_b [rax+0x40]);
vfmulcph(xmm1, xmm2, ptr [rax+0x40]);
vfmulcph(ymm1|k1|T_z, ymm2, ptr_b [rax+0x40]);
vfmulcph(zmm1, zmm2, ptr_b [rax+0x40]);
vrcpph(xmm1, ptr [rax+0x40]);
vrcpph(xmm1, ptr_b [rax+0x40]);
vrcpph(ymm1, ptr [rax+0x40]);
vrcpph(ymm1, ptr_b [rax+0x40]);
vrcpph(zmm1, ptr [rax+0x40]);
vrcpph(zmm1, ptr_b [rax+0x40]);
vrcpsh(xmm1, xmm3, ptr [rax+0x40]);
vrsqrtph(xmm1, ptr [rax+0x40]);
vrsqrtph(xmm1, ptr_b [rax+0x40]);
vrsqrtph(ymm2, ptr [rax+0x40]);
vrsqrtph(ymm2, ptr_b [rax+0x40]);
vrsqrtph(zmm2, ptr [rax+0x40]);
vrsqrtph(zmm2, ptr_b [rax+0x40]);
vrsqrtsh(xmm1|k5|T_z, xmm7, ptr [rax+0x40]);
vsqrtph(xmm1|k4|T_z, ptr [rax+0x40]);
vsqrtph(xmm1|k4|T_z, ptr_b [rax+0x40]);
vsqrtph(ymm1|k4|T_z, ptr_b [rax+0x40]);
vsqrtph(zmm1|k4|T_z, ptr [rax+0x40]);
vsqrtph(zmm1|k4|T_z, ptr_b [rax+0x40]);
vsqrtsh(xmm1|k4|T_z, xmm5, ptr [rax+0x40]);
vsqrtsh(xmm1|k4|T_z|T_rd_sae, xmm5, xmm7);
vscalefph(xmm1, xmm5, ptr [rax+0x40]);
vscalefph(xmm1, xmm5, ptr_b [rax+0x40]);
vscalefph(ymm1, ymm5, ptr [rax+0x40]);
vscalefph(ymm1, ymm5, ptr_b [rax+0x40]);
vscalefph(zmm1, zmm5, ptr [rax+0x40]);
vscalefph(zmm1, zmm5, ptr_b [rax+0x40]);
vscalefph(zmm1|k1|T_z|T_rd_sae, zmm5, zmm7);
vscalefsh(xmm1, xmm5, ptr [rax+0x40]);
vscalefsh(xmm1|k1|T_z|T_rd_sae, xmm5, xmm7);
vreduceph(xmm1, ptr [rax+0x40], 0x1);
vreduceph(xmm1, ptr_b [rax+0x40], 0x2);
vreduceph(ymm1, ptr [rax+0x40], 0x3);
vreduceph(ymm1, ptr_b [rax+0x40], 0x4);
vreduceph(zmm1, ptr [rax+0x40], 0x5);
vreduceph(zmm1, ptr_b [rax+0x40], 0x6);
vreduceph(zmm1|k1|T_z|T_sae, zmm5, 0x7);
vreducesh(xmm1, xmm3, ptr [rax+0x40], 0x1);
vreducesh(xmm1|k1|T_z|T_sae, xmm5, xmm4, 0x2);
vrndscaleph(xmm1, ptr [rax+0x40], 0x1);
vrndscaleph(xmm1, ptr_b [rax+0x40], 0x2);
vrndscaleph(ymm1, ptr [rax+0x40], 0x3);
vrndscaleph(ymm1, ptr_b [rax+0x40], 0x4);
vrndscaleph(zmm1, ptr [rax+0x40], 0x5);
vrndscaleph(zmm1, ptr_b [rax+0x40], 0x6);
vrndscaleph(zmm1|k1|T_z|T_sae, zmm5, 0x7);
vrndscalesh(xmm1, xmm3, ptr [rax+0x40], 0x1);
vrndscalesh(xmm1|k1|T_z|T_sae, xmm5, xmm4, 0x2);
vfpclassph(k1, xword [rax+0x40], 0x1);
vfpclassph(k1, xword_b[rax+0x40], 0x2);
vfpclassph(k1, yword [rax+0x40], 0x3);
vfpclassph(k1, yword_b[rax+0x40], 0x4);
vfpclassph(k1, zword [rax+0x40], 0x5);
vfpclassph(k1, zword_b[rax+0x40], 0x6);
vfpclasssh(k1|k2, xmm3, 0x5);
vfpclasssh(k1|k2, ptr [rax+0x40], 0x5);
vgetexpph(xmm1, ptr [rax+0x40]);
vgetexpph(ymm1, ptr_b [rax+0x40]);
vgetexpph(zmm1, ptr [rax+0x40]);
vgetexpph(zmm1|k1|T_z|T_sae, zmm5);
vgetexpsh(xmm1, xmm5, ptr [rax+0x40]);
vgetexpsh(xmm1|k1|T_z|T_sae, xmm3, xmm5);
vgetmantph(xmm1, ptr [rax+0x40], 0x1);
vgetmantph(ymm1, ptr_b [rax+0x40], 0x2);
vgetmantph(zmm1, ptr [rax+0x40], 0x3);
vgetmantph(zmm1|k1|T_z|T_sae, zmm5, 0x4);
vgetmantsh(xmm1, xmm5, ptr [rax+0x40], 0x5);
vgetmantsh(xmm1|k1|T_z|T_sae, xmm3, xmm5, 0x6);
vmovsh(xmm1|k1|T_z, ptr [rax+0x40]);
vmovsh(ptr [rax+0x40]|k1, xmm1);
vmovsh(xmm1|k2|T_z, xmm3, xmm5);
vcvtsd2sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
vcvtsd2sh(xmm1, xmm2, ptr [rax+0x40]);
vcvtsh2sd(xmm1|k1|T_z|T_sae, xmm2, xmm3);
vcvtsh2sd(xmm1, xmm2, ptr [rax+0x40]);
vcvtsh2ss(xmm1|k1|T_z|T_sae, xmm2, xmm3);
vcvtsh2ss(xmm1, xmm2, ptr [rax+0x40]);
vcvtss2sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
vcvtss2sh(xmm1, xmm2, ptr [rax+0x40]);
vcvtsh2si(edx|T_rd_sae, xmm1);
vcvtsh2si(edx, ptr [rax+0x40]);
vcvtsh2si(rdx|T_rd_sae, xmm1);
vcvtsh2si(r8, ptr [rax+0x40]);
vcvtph2dq(xmm1, xmm5);
vcvtph2dq(xmm1, ptr [rax+0x40]);
vcvtph2dq(xmm1, ptr_b [rax+0x40]);
vcvtph2dq(ymm1|k2|T_z, xmm5);
vcvtph2dq(ymm1, ptr [rax+0x40]);
vcvtph2dq(ymm1, ptr_b [rax+0x40]);
vcvtph2dq(zmm1|k5|T_z|T_rd_sae, ymm3);
vcvtph2dq(zmm1|k5|T_z, ptr [rax+0x40]);
vcvtph2dq(zmm1|k5|T_z, ptr_b [rax+0x40]);
vcvtph2psx(xmm1, xmm5);
vcvtph2psx(xmm1, ptr [rax+0x40]);
vcvtph2psx(xmm1, ptr_b [rax+0x40]);
vcvtph2psx(ymm1|k2|T_z, xmm5);
vcvtph2psx(ymm1, ptr [rax+0x40]);
vcvtph2psx(ymm1, ptr_b [rax+0x40]);
vcvtph2psx(zmm1|k5|T_z|T_sae, ymm3);
vcvtph2psx(zmm1|k5|T_z, ptr [rax+0x40]);
vcvtph2psx(zmm1|k5|T_z, ptr_b [rax+0x40]);
vcvtph2udq(xmm1, xmm5);
vcvtph2udq(xmm1, ptr [rax+0x40]);
vcvtph2udq(xmm1, ptr_b [rax+0x40]);
vcvtph2udq(ymm1|k2|T_z, xmm5);
vcvtph2udq(ymm1, ptr [rax+0x40]);
vcvtph2udq(ymm1, ptr_b [rax+0x40]);
vcvtph2udq(zmm1|k5|T_z|T_rd_sae, ymm3);
vcvtph2udq(zmm1|k5|T_z, ptr [rax+0x40]);
vcvtph2udq(zmm1|k5|T_z, ptr_b [rax+0x40]);
vcvttph2dq(xmm1, xmm5);
vcvttph2dq(xmm1, ptr [rax+0x40]);
vcvttph2dq(xmm1, ptr_b [rax+0x40]);
vcvttph2dq(ymm1|k2|T_z, xmm5);
vcvttph2dq(ymm1, ptr [rax+0x40]);
vcvttph2dq(ymm1, ptr_b [rax+0x40]);
vcvttph2dq(zmm1|k5|T_z|T_sae, ymm3);
vcvttph2dq(zmm1|k5|T_z, ptr [rax+0x40]);
vcvttph2dq(zmm1|k5|T_z, ptr_b [rax+0x40]);
vcvttph2udq(xmm1, xmm5);
vcvttph2udq(xmm1, ptr [rax+0x40]);
vcvttph2udq(xmm1, ptr_b [rax+0x40]);
vcvttph2udq(ymm1|k2|T_z, xmm5);
vcvttph2udq(ymm1, ptr [rax+0x40]);
vcvttph2udq(ymm1, ptr_b [rax+0x40]);
vcvttph2udq(zmm1|k5|T_z|T_sae, ymm3);
vcvttph2udq(zmm1|k5|T_z, ptr [rax+0x40]);
vcvttph2udq(zmm1|k5|T_z, ptr_b [rax+0x40]);
vcvtph2pd(xmm1, xmm5);
vcvtph2pd(xmm1, ptr [rax+0x40]);
vcvtph2pd(xmm1, ptr_b [rax+0x40]);
vcvtph2pd(ymm1|k2|T_z, xmm5);
vcvtph2pd(ymm1, ptr [rax+0x40]);
vcvtph2pd(ymm1, ptr_b [rax+0x40]);
vcvtph2pd(zmm1|k5|T_z|T_sae, xmm3);
vcvtph2pd(zmm1|k5|T_z, ptr [rax+0x40]);
vcvtph2pd(zmm1|k5|T_z, ptr_b [rax+0x40]);
vcvtph2qq(xmm1, xmm5);
vcvtph2qq(xmm1, ptr [rax+0x40]);
vcvtph2qq(xmm1, ptr_b [rax+0x40]);
vcvtph2qq(ymm1|k2|T_z, xmm5);
vcvtph2qq(ymm1, ptr [rax+0x40]);
vcvtph2qq(ymm1, ptr_b [rax+0x40]);
vcvtph2qq(zmm1|k5|T_z|T_rd_sae, xmm3);
vcvtph2qq(zmm1|k5|T_z, ptr [rax+0x40]);
vcvtph2qq(zmm1|k5|T_z, ptr_b [rax+0x40]);
vcvtph2uqq(xmm1, xmm5);
vcvtph2uqq(xmm1, ptr [rax+0x40]);
vcvtph2uqq(xmm1, ptr_b [rax+0x40]);
vcvtph2uqq(ymm1|k2|T_z, xmm5);
vcvtph2uqq(ymm1, ptr [rax+0x40]);
vcvtph2uqq(ymm1, ptr_b [rax+0x40]);
vcvtph2uqq(zmm1|k5|T_z|T_rd_sae, xmm3);
vcvtph2uqq(zmm1|k5|T_z, ptr [rax+0x40]);
vcvtph2uqq(zmm1|k5|T_z, ptr_b [rax+0x40]);
vcvttph2uqq(xmm1, xmm5);
vcvttph2uqq(xmm1, ptr [rax+0x40]);
vcvttph2uqq(xmm1, ptr_b [rax+0x40]);
vcvttph2uqq(ymm1|k2|T_z, xmm5);
vcvttph2uqq(ymm1, ptr [rax+0x40]);
vcvttph2uqq(ymm1, ptr_b [rax+0x40]);
vcvttph2uqq(zmm1|k5|T_z|T_sae, xmm3);
vcvttph2uqq(zmm1|k5|T_z, ptr [rax+0x40]);
vcvttph2uqq(zmm1|k5|T_z, ptr_b [rax+0x40]);
vcvtdq2ph(xmm1, xmm5);
vcvtdq2ph(xmm1, xword [rax+0x40]);
vcvtdq2ph(xmm1, xword_b [rax+0x40]);
vcvtdq2ph(xmm1, yword [rax+0x40]);
vcvtdq2ph(xmm1, yword_b [rax+0x40]);
vcvtdq2ph(ymm1|k2|T_z|T_rd_sae, zmm5);
vcvtdq2ph(ymm1, ptr [rax+0x40]);
vcvtdq2ph(ymm1, ptr_b [rax+0x40]);
vcvtps2phx(xmm1, xmm5);
vcvtps2phx(xmm1, xword [rax+0x40]);
vcvtps2phx(xmm1, xword_b [rax+0x40]);
vcvtps2phx(xmm1, yword [rax+0x40]);
vcvtps2phx(xmm1, yword_b [rax+0x40]);
vcvtps2phx(ymm1|k2|T_z|T_rd_sae, zmm5);
vcvtps2phx(ymm1, ptr [rax+0x40]);
vcvtps2phx(ymm1, ptr_b [rax+0x40]);
vcvtudq2ph(xmm1, xmm5);
vcvtudq2ph(xmm1, xword [rax+0x40]);
vcvtudq2ph(xmm1, xword_b [rax+0x40]);
vcvtudq2ph(xmm1, yword [rax+0x40]);
vcvtudq2ph(xmm1, yword_b [rax+0x40]);
vcvtudq2ph(ymm1|k2|T_z|T_rd_sae, zmm5);
vcvtudq2ph(ymm1, ptr [rax+0x40]);
vcvtudq2ph(ymm1, ptr_b [rax+0x40]);
vcvtpd2ph(xmm1, xmm5);
vcvtpd2ph(xmm1, ymm5);
vcvtpd2ph(xmm1|k2|T_z|T_rd_sae, zmm5);
vcvtpd2ph(xmm1, xword [rax+0x40]);
vcvtpd2ph(xmm1, xword_b [rax+0x40]);
vcvtpd2ph(xmm1, yword [rax+0x40]);
vcvtpd2ph(xmm1, yword_b [rax+0x40]);
vcvtpd2ph(xmm1, zword [rax+0x40]);
vcvtpd2ph(xmm1, zword_b [rax+0x40]);
vcvtqq2ph(xmm1, xmm5);
vcvtqq2ph(xmm1, ymm5);
vcvtqq2ph(xmm1|k2|T_z|T_rd_sae, zmm5);
vcvtqq2ph(xmm1, xword [rax+0x40]);
vcvtqq2ph(xmm1, xword_b [rax+0x40]);
vcvtqq2ph(xmm1, yword [rax+0x40]);
vcvtqq2ph(xmm1, yword_b [rax+0x40]);
vcvtqq2ph(xmm1, zword [rax+0x40]);
vcvtqq2ph(xmm1, zword_b [rax+0x40]);
vcvtuqq2ph(xmm1, xmm5);
vcvtuqq2ph(xmm1, ymm5);
vcvtuqq2ph(xmm1|k2|T_z|T_rd_sae, zmm5);
vcvtuqq2ph(xmm1, xword [rax+0x40]);
vcvtuqq2ph(xmm1, xword_b [rax+0x40]);
vcvtuqq2ph(xmm1, yword [rax+0x40]);
vcvtuqq2ph(xmm1, yword_b [rax+0x40]);
vcvtuqq2ph(xmm1, zword [rax+0x40]);
vcvtuqq2ph(xmm1, zword_b [rax+0x40]);
vcvtph2uw(xmm1, xmm5);
vcvtph2uw(xmm1, ptr [rax+0x40]);
vcvtph2uw(xmm1, ptr_b [rax+0x40]);
vcvtph2uw(ymm1, ptr [rax+0x40]);
vcvtph2uw(ymm1, ptr_b [rax+0x40]);
vcvtph2uw(zmm1|k2|T_z|T_rd_sae, zmm5);
vcvtph2uw(zmm1, ptr [rax+0x40]);
vcvtph2uw(zmm1, ptr_b [rax+0x40]);
vcvtph2w(xmm1, xmm5);
vcvtph2w(xmm1, ptr [rax+0x40]);
vcvtph2w(xmm1, ptr_b [rax+0x40]);
vcvtph2w(ymm1, ptr [rax+0x40]);
vcvtph2w(ymm1, ptr_b [rax+0x40]);
vcvtph2w(zmm1|k2|T_z|T_rd_sae, zmm5);
vcvtph2w(zmm1, ptr [rax+0x40]);
vcvtph2w(zmm1, ptr_b [rax+0x40]);
vcvttph2uw(xmm1, xmm5);
vcvttph2uw(xmm1, ptr [rax+0x40]);
vcvttph2uw(xmm1, ptr_b [rax+0x40]);
vcvttph2uw(ymm1, ptr [rax+0x40]);
vcvttph2uw(ymm1, ptr_b [rax+0x40]);
vcvttph2uw(zmm1|k2|T_z|T_sae, zmm5);
vcvttph2uw(zmm1, ptr [rax+0x40]);
vcvttph2uw(zmm1, ptr_b [rax+0x40]);
vcvttph2w(xmm1, xmm5);
vcvttph2w(xmm1, ptr [rax+0x40]);
vcvttph2w(xmm1, ptr_b [rax+0x40]);
vcvttph2w(ymm1, ptr [rax+0x40]);
vcvttph2w(ymm1, ptr_b [rax+0x40]);
vcvttph2w(zmm1|k2|T_z|T_sae, zmm5);
vcvttph2w(zmm1, ptr [rax+0x40]);
vcvttph2w(zmm1, ptr_b [rax+0x40]);
vcvtuw2ph(xmm1, xmm5);
vcvtuw2ph(xmm1, ptr [rax+0x40]);
vcvtuw2ph(xmm1, ptr_b [rax+0x40]);
vcvtuw2ph(ymm1, ptr [rax+0x40]);
vcvtuw2ph(ymm1, ptr_b [rax+0x40]);
vcvtuw2ph(zmm1|k2|T_z|T_rd_sae, zmm5);
vcvtuw2ph(zmm1, ptr [rax+0x40]);
vcvtuw2ph(zmm1, ptr_b [rax+0x40]);
vcvtw2ph(xmm1, xmm5);
vcvtw2ph(xmm1, ptr [rax+0x40]);
vcvtw2ph(xmm1, ptr_b [rax+0x40]);
vcvtw2ph(ymm1, ptr [rax+0x40]);
vcvtw2ph(ymm1, ptr_b [rax+0x40]);
vcvtw2ph(zmm1|k2|T_z|T_rd_sae, zmm5);
vcvtw2ph(zmm1, ptr [rax+0x40]);
vcvtw2ph(zmm1, ptr_b [rax+0x40]);
vcvtps2ph(xmm1, xmm2, 0x1);
vcvtps2ph(ptr [rax+0x40], xmm2, 0x2);
vcvtps2ph(xmm1, ymm2, 0x3);
vcvtps2ph(ptr [rax+0x40], ymm2, 0x4);
vcvtps2ph(xmm1|k1|T_z, xmm2, 0x5);
vcvtps2ph(ptr [rax+0x40]|k1, xmm3, 0x6);
vcvtps2ph(xmm1|k2, ymm4, 0x7);
vcvtps2ph(ptr [rax+0x40]|k2, ymm5, 0x8);
vcvtps2ph(ymm1|k2|T_sae, zmm5, 0x9);
vcvtps2ph(ptr [rax+0x40]|k5, zmm4, 0xa);
vcvtsh2usi(ecx|T_rd_sae, xmm1);
vcvtsh2usi(eax, ptr [rax+0x40]);
vcvtsh2usi(r9|T_rd_sae, xmm1);
vcvtsh2usi(r13, ptr [rax+0x40]);
vcvttsh2si(ecx|T_sae, xmm1);
vcvttsh2si(eax, ptr [rax+0x40]);
vcvttsh2si(r9|T_sae, xmm1);
vcvttsh2si(r13, ptr [rax+0x40]);
vcvttsh2usi(ecx|T_sae, xmm1);
vcvttsh2usi(eax, ptr [rax+0x40]);
vcvttsh2usi(r9|T_sae, xmm1);
vcvttsh2usi(r13, ptr [rax+0x40]);
vcvttph2qq(xmm1, xmm5);
vcvttph2qq(xmm1, ptr [rax+0x40]);
vcvttph2qq(xmm1, ptr_b [rax+0x40]);
vcvttph2qq(ymm1|k2|T_z, xmm5);
vcvttph2qq(ymm1, ptr [rax+0x40]);
vcvttph2qq(ymm1, ptr_b [rax+0x40]);
vcvttph2qq(zmm1|k5|T_z|T_sae, xmm3);
vcvttph2qq(zmm1|k5|T_z, ptr [rax+0x40]);
vcvttph2qq(zmm1|k5|T_z, ptr_b [rax+0x40]);
vcvtsi2sh(xmm1|T_rd_sae, xmm2, eax);
vcvtsi2sh(xmm1, xmm2, dword [rax+0x40]);
vcvtsi2sh(xmm1|T_rd_sae, xmm2, r9);
vcvtsi2sh(xmm1, xmm2, qword [rax+0x40]);
vcvtusi2sh(xmm1|T_rd_sae, xmm2, eax);
vcvtusi2sh(xmm1, xmm2, dword [rax+0x40]);
vcvtusi2sh(xmm1|T_rd_sae, xmm2, r9);
vcvtusi2sh(xmm1, xmm2, qword [rax+0x40]);
aadd(ptr[rax], ecx);
aadd(ptr[eax], ecx);
aadd(ptr[rax], r10);
aand(ptr[rax], ecx);
aand(ptr[eax], ecx);
aand(ptr[rax], r10);
aor(ptr[rax], ecx);
aor(ptr[eax], ecx);
aor(ptr[rax], r10);
axor(ptr[rax], ecx);
axor(ptr[eax], ecx);
axor(ptr[rax], r10);
cmpbexadd(ptr[rax+r10*4], rcx, rdx);
cmpbxadd(ptr[rax+r10*4], rcx, rdx);
cmplexadd(ptr[rax+r10*4], rcx, rdx);
cmplxadd(ptr[rax+r10*4], rcx, rdx);
cmpnbexadd(ptr[rax+r10*4], rcx, rdx);
cmpnbxadd(ptr[rax+r10*4], rcx, rdx);
cmpnlexadd(ptr[rax+r10*4], rcx, rdx);
cmpnlxadd(ptr[rax+r10*4], rcx, rdx);
cmpnoxadd(ptr[rax+r10*4], rcx, rdx);
cmpnpxadd(ptr[rax+r10*4], rcx, rdx);
cmpnsxadd(ptr[rax+r10*4], rcx, rdx);
cmpnzxadd(ptr[rax+r10*4], rcx, rdx);
cmpoxadd(ptr[rax+r10*4], rcx, rdx);
cmppxadd(ptr[rax+r10*4], rcx, rdx);
cmpsxadd(ptr[rax+r10*4], rcx, rdx);
cmpzxadd(ptr[rax+r10*4], rcx, rdx);
vsha512msg1(ymm3, xmm5);
vsha512msg2(ymm9, ymm10);
vsha512rnds2(ymm1, ymm3, xmm2);
vsm3msg1(xmm1, xmm2, xmm3);
vsm3msg1(xmm1, xmm2, ptr [rax]);
vsm3msg2(xmm5, xmm7, xmm3);
vsm3msg2(xmm5, xmm6, ptr [rax]);
vsm3rnds2(xmm5, xmm7, xmm3, 0x12);
vsm3rnds2(xmm5, xmm7, ptr [rcx], 0x34);
vsm4key4(xmm1, xmm2, xmm3);
vsm4key4(xmm1, xmm2, ptr [rdx]);
vsm4rnds4(xmm1, xmm2, xmm3);
vsm4rnds4(xmm5, xmm6, ptr [rcx+rax*4]);
vpdpbssd(xmm1, xmm2, xmm3);
vpdpbssd(ymm1, ymm2, ptr [rax]);
vpdpbssds(xmm1, xmm2, xmm3);
vpdpbssds(ymm1, ymm2, ptr [rax]);
vpdpbsud(xmm1, xmm2, xmm3);
vpdpbsud(ymm1, ymm2, ptr [rax]);
vpdpbsuds(xmm1, xmm2, xmm3);
vpdpbsuds(ymm1, ymm2, ptr [rax]);
vpdpbuud(xmm1, xmm2, xmm3);
vpdpbuud(ymm1, ymm2, ptr [rax]);
vpdpbuuds(xmm1, xmm2, xmm3);
vpdpbuuds(ymm1, ymm2, ptr [rax]);
vpdpwsud(xmm1, xmm2, xmm3);
vpdpwsud(ymm1, ymm2, ptr [rax]);
vpdpwsuds(xmm1, xmm2, xmm3);
vpdpwsuds(ymm1, ymm2, ptr [rax]);
vpdpwusd(xmm1, xmm2, xmm3);
vpdpwusd(ymm1, ymm2, ptr [rax]);
vpdpwusds(xmm1, xmm2, xmm3);
vpdpwusds(ymm1, ymm2, ptr [rax]);
vpdpwuud(xmm1, xmm2, xmm3);
vpdpwuud(ymm1, ymm2, ptr [rax]);
vpdpwuuds(xmm1, xmm2, xmm3);
vpdpwuuds(ymm1, ymm2, ptr [rax]);
+294
View File
@@ -0,0 +1,294 @@
//
vcvtbf162ibs(xm1, xm2);
vcvtbf162ibs(xm1, ptr[rax+64]);
vcvtbf162ibs(xm1, ptr_b[rax+64]);
vcvtbf162ibs(ym1, ym2);
vcvtbf162ibs(ym1, ptr[rax+64]);
vcvtbf162ibs(ym1, ptr_b[rax+64]);
vcvtbf162ibs(zm1, zm2);
vcvtbf162ibs(zm1, ptr[rax+64]);
vcvtbf162ibs(zm1, ptr_b[rax+64]);
//
vcvtbf162iubs(xm1, xm2);
vcvtbf162iubs(xm1, ptr[rax+64]);
vcvtbf162iubs(xm1, ptr_b[rax+64]);
vcvtbf162iubs(ym1, ym2);
vcvtbf162iubs(ym1, ptr[rax+64]);
vcvtbf162iubs(ym1, ptr_b[rax+64]);
vcvtbf162iubs(zm1, zm2);
vcvtbf162iubs(zm1, ptr[rax+64]);
vcvtbf162iubs(zm1, ptr_b[rax+64]);
//
vcvttbf162ibs(xm1, xm2);
vcvttbf162ibs(xm1, ptr[rax+64]);
vcvttbf162ibs(xm1, ptr_b[rax+64]);
vcvttbf162ibs(ym1, ym2);
vcvttbf162ibs(ym1, ptr[rax+64]);
vcvttbf162ibs(ym1, ptr_b[rax+64]);
vcvttbf162ibs(zm1, zm2);
vcvttbf162ibs(zm1, ptr[rax+64]);
vcvttbf162ibs(zm1, ptr_b[rax+64]);
//
vcvttbf162iubs(xm1, xm2);
vcvttbf162iubs(xm1, ptr[rax+64]);
vcvttbf162iubs(xm1, ptr_b[rax+64]);
vcvttbf162iubs(ym1, ym2);
vcvttbf162iubs(ym1, ptr[rax+64]);
vcvttbf162iubs(ym1, ptr_b[rax+64]);
vcvttbf162iubs(zm1, zm2);
vcvttbf162iubs(zm1, ptr[rax+64]);
vcvttbf162iubs(zm1, ptr_b[rax+64]);
//
vcvttpd2qqs(xm1, xm2);
vcvttpd2qqs(xm1, ptr[rax+64]);
vcvttpd2qqs(xm1, ptr_b[rax+64]);
vcvttpd2qqs(ym1, ym2);
vcvttpd2qqs(ym1, ptr[rax+64]);
vcvttpd2qqs(ym1, ptr_b[rax+64]);
vcvttpd2qqs(zm1, zm2);
vcvttpd2qqs(zm1, zm2|T_sae);
vcvttpd2qqs(zm1, ptr[rax+64]);
vcvttpd2qqs(zm1, ptr_b[rax+64]);
//
vcvttpd2uqqs(xm1, xm2);
vcvttpd2uqqs(xm1, ptr[rax+64]);
vcvttpd2uqqs(xm1, ptr_b[rax+64]);
vcvttpd2uqqs(ym1, ym2);
vcvttpd2uqqs(ym1, ptr[rax+64]);
vcvttpd2uqqs(ym1, ptr_b[rax+64]);
vcvttpd2uqqs(zm1, zm2);
vcvttpd2uqqs(zm1, zm2|T_sae);
vcvttpd2uqqs(zm1, ptr[rax+64]);
vcvttpd2uqqs(zm1, ptr_b[rax+64]);
//
vcvtph2ibs(xm1, xm2);
vcvtph2ibs(xm1, ptr[rax+64]);
vcvtph2ibs(xm1, ptr_b[rax+64]);
vcvtph2ibs(ym1, ym2);
vcvtph2ibs(ym1, ptr[rax+64]);
vcvtph2ibs(ym1, ptr_b[rax+64]);
vcvtph2ibs(zm1, zm2);
vcvtph2ibs(zm1, zm2|T_ru_sae);
vcvtph2ibs(zm1, ptr[rax+64]);
vcvtph2ibs(zm1, ptr_b[rax+64]);
//
vcvtph2iubs(xm1, xm2);
vcvtph2iubs(xm1, ptr[rax+64]);
vcvtph2iubs(xm1, ptr_b[rax+64]);
vcvtph2iubs(ym1, ym2);
vcvtph2iubs(ym1, ptr[rax+64]);
vcvtph2iubs(ym1, ptr_b[rax+64]);
vcvtph2iubs(zm1, zm2);
vcvtph2iubs(zm1, zm2|T_ru_sae);
vcvtph2iubs(zm1, ptr[rax+64]);
vcvtph2iubs(zm1, ptr_b[rax+64]);
//
vcvttph2ibs(xm1, xm2);
vcvttph2ibs(xm1, ptr[rax+64]);
vcvttph2ibs(xm1, ptr_b[rax+64]);
vcvttph2ibs(ym1, ym2);
vcvttph2ibs(ym1, ptr[rax+64]);
vcvttph2ibs(ym1, ptr_b[rax+64]);
vcvttph2ibs(zm1, zm2);
vcvttph2ibs(zm1, zm2|T_ru_sae);
vcvttph2ibs(zm1, ptr[rax+64]);
vcvttph2ibs(zm1, ptr_b[rax+64]);
//
vcvttph2iubs(xm1, xm2);
vcvttph2iubs(xm1, ptr[rax+64]);
vcvttph2iubs(xm1, ptr_b[rax+64]);
vcvttph2iubs(ym1, ym2);
vcvttph2iubs(ym1, ptr[rax+64]);
vcvttph2iubs(ym1, ptr_b[rax+64]);
vcvttph2iubs(zm1, zm2);
vcvttph2iubs(zm1, zm2|T_ru_sae);
vcvttph2iubs(zm1, ptr[rax+64]);
vcvttph2iubs(zm1, ptr_b[rax+64]);
//
vcvttps2dqs(xm1, xm2);
vcvttps2dqs(xm1, ptr[rax+64]);
vcvttps2dqs(xm1, ptr_b[rax+64]);
vcvttps2dqs(ym1, ym2);
vcvttps2dqs(ym1, ptr[rax+64]);
vcvttps2dqs(ym1, ptr_b[rax+64]);
vcvttps2dqs(zm1, zm2);
vcvttps2dqs(zm1, zm2|T_sae);
vcvttps2dqs(zm1, ptr[rax+64]);
vcvttps2dqs(zm1, ptr_b[rax+64]);
//
vcvtps2ibs(xm1, xm2);
vcvtps2ibs(xm1, ptr[rax+64]);
vcvtps2ibs(xm1, ptr_b[rax+64]);
vcvtps2ibs(ym1, ym2);
vcvtps2ibs(ym1, ptr[rax+64]);
vcvtps2ibs(ym1, ptr_b[rax+64]);
vcvtps2ibs(zm1, zm2);
vcvtps2ibs(zm1, zm2|T_ru_sae);
vcvtps2ibs(zm1, ptr[rax+64]);
vcvtps2ibs(zm1, ptr_b[rax+64]);
//
vcvtps2iubs(xm1, xm2);
vcvtps2iubs(xm1, ptr[rax+64]);
vcvtps2iubs(xm1, ptr_b[rax+64]);
vcvtps2iubs(ym1, ym2);
vcvtps2iubs(ym1, ptr[rax+64]);
vcvtps2iubs(ym1, ptr_b[rax+64]);
vcvtps2iubs(zm1, zm2);
vcvtps2iubs(zm1, zm2|T_ru_sae);
vcvtps2iubs(zm1, ptr[rax+64]);
vcvtps2iubs(zm1, ptr_b[rax+64]);
//
vcvttps2ibs(xm1, xm2);
vcvttps2ibs(xm1, ptr[rax+64]);
vcvttps2ibs(xm1, ptr_b[rax+64]);
vcvttps2ibs(ym1, ym2);
vcvttps2ibs(ym1, ptr[rax+64]);
vcvttps2ibs(ym1, ptr_b[rax+64]);
vcvttps2ibs(zm1, zm2);
vcvttps2ibs(zm1, zm2|T_ru_sae);
vcvttps2ibs(zm1, ptr[rax+64]);
vcvttps2ibs(zm1, ptr_b[rax+64]);
//
vcvttps2iubs(xm1, xm2);
vcvttps2iubs(xm1, ptr[rax+64]);
vcvttps2iubs(xm1, ptr_b[rax+64]);
vcvttps2iubs(ym1, ym2);
vcvttps2iubs(ym1, ptr[rax+64]);
vcvttps2iubs(ym1, ptr_b[rax+64]);
vcvttps2iubs(zm1, zm2);
vcvttps2iubs(zm1, zm2|T_ru_sae);
vcvttps2iubs(zm1, ptr[rax+64]);
vcvttps2iubs(zm1, ptr_b[rax+64]);
//
vcvttps2udqs(xm1, xm2);
vcvttps2udqs(xm1, ptr[rax+64]);
vcvttps2udqs(xm1, ptr_b[rax+64]);
vcvttps2udqs(ym1, ym2);
vcvttps2udqs(ym1, ptr[rax+64]);
vcvttps2udqs(ym1, ptr_b[rax+64]);
vcvttps2udqs(zm1, zm2);
vcvttps2udqs(zm1, zm2|T_sae);
vcvttps2udqs(zm1, ptr[rax+64]);
vcvttps2udqs(zm1, ptr_b[rax+64]);
//
vcvttpd2dqs(xm1|k1|T_z, xm2);
vcvttpd2dqs(xm1|k1|T_z, xword [rax+64]);
vcvttpd2dqs(xm1|k1|T_z, xword_b[rax+64]);
vcvttpd2dqs(xm1|k1|T_z, ym2);
vcvttpd2dqs(xm1|k1|T_z, yword [rax+64]);
vcvttpd2dqs(xm1|k1|T_z, yword_b[rax+64]);
vcvttpd2dqs(ym1|k1|T_z, zm2);
vcvttpd2dqs(ym1|k1|T_z, zm2|T_sae);
vcvttpd2dqs(ym1|k1|T_z, zword [rax+64]);
vcvttpd2dqs(ym1|k1|T_z, zword_b[rax+64]);
//
vcvttpd2udqs(xm1|k1|T_z, xm2);
vcvttpd2udqs(xm1|k1|T_z, xword [rax+64]);
vcvttpd2udqs(xm1|k1|T_z, xword_b[rax+64]);
vcvttpd2udqs(xm1|k1|T_z, ym2);
vcvttpd2udqs(xm1|k1|T_z, yword [rax+64]);
vcvttpd2udqs(xm1|k1|T_z, yword_b[rax+64]);
vcvttpd2udqs(ym1|k1|T_z, zm2);
vcvttpd2udqs(ym1|k1|T_z, zm2|T_sae);
vcvttpd2udqs(ym1|k1|T_z, zword [rax+64]);
vcvttpd2udqs(ym1|k1|T_z, zword_b[rax+64]);
//
vcvttps2qqs(xm1|k1|T_z, xm2);
vcvttps2qqs(xm1|k1|T_z, ptr [rax+64]);
vcvttps2qqs(xm1|k1|T_z, ptr_b[rax+64]);
vcvttps2qqs(ym1|k1|T_z, xm2);
vcvttps2qqs(ym1|k1|T_z, ptr [rax+64]);
vcvttps2qqs(ym1|k1|T_z, ptr_b[rax+64]);
vcvttps2qqs(zm1, ym2);
vcvttps2qqs(zm1|k1|T_z, ym2);
vcvttps2qqs(zm1|k1|T_z|T_sae, ym2);
vcvttps2qqs(zm1|k1|T_z, ptr [rax+64]);
vcvttps2qqs(zm1|k1|T_z, ptr_b[rax+64]);
//
vcvttps2uqqs(xm1|k1|T_z, xm2);
vcvttps2uqqs(xm1|k1|T_z, ptr [rax+64]);
vcvttps2uqqs(xm1|k1|T_z, ptr_b[rax+64]);
vcvttps2uqqs(ym1|k1|T_z, xm2);
vcvttps2uqqs(ym1|k1|T_z, ptr [rax+64]);
vcvttps2uqqs(ym1|k1|T_z, ptr_b[rax+64]);
vcvttps2uqqs(zm1, ym2);
vcvttps2uqqs(zm1|k1|T_z, ym2);
vcvttps2uqqs(zm1|k1|T_z|T_sae, ym2);
vcvttps2uqqs(zm1|k1|T_z, ptr [rax+64]);
vcvttps2uqqs(zm1|k1|T_z, ptr_b[rax+64]);
//
vcvttsd2sis(eax, xm1);
vcvttsd2sis(eax, xm1|T_sae);
vcvttsd2sis(eax, ptr[rax+64]);
vcvttsd2sis(r30, xm1);
vcvttsd2sis(r30, xm1|T_sae);
vcvttsd2sis(r30, ptr[rax+64]);
//
vcvttsd2usis(eax, xm1);
vcvttsd2usis(eax, xm1|T_sae);
vcvttsd2usis(eax, ptr[rax+64]);
vcvttsd2usis(r30, xm1);
vcvttsd2usis(r30, xm1|T_sae);
vcvttsd2usis(r30, ptr[rax+64]);
//
vcvttss2sis(eax, xm1);
vcvttss2sis(eax, xm1|T_sae);
vcvttss2sis(eax, ptr[rax+64]);
vcvttss2sis(r30, xm1);
vcvttss2sis(r30, xm1|T_sae);
vcvttss2sis(r30, ptr[rax+64]);
//
vcvttss2usis(eax, xm1);
vcvttss2usis(eax, xm1|T_sae);
vcvttss2usis(eax, ptr[rax+64]);
vcvttss2usis(r30, xm1);
vcvttss2usis(r30, xm1|T_sae);
vcvttss2usis(r30, ptr[rax+64]);
+8
View File
@@ -0,0 +1,8 @@
#include <stdio.h>
int main()
{
#if defined(__x86_64__) && defined(__ILP32__)
puts("x32");
#endif
}
+1883
View File
File diff suppressed because it is too large Load Diff
+63
View File
@@ -0,0 +1,63 @@
#pragma once
#include <stdio.h>
struct Reg {
int r_;
Reg(int r) : r_(r) {}
};
inline const Reg& getReg0() { static const Reg r(0); return r; }
inline const Reg& getReg1() { static const Reg r(1); return r; }
inline const Reg& getReg2() { static const Reg r(2); return r; }
static const Reg& r0 = getReg0();
static const Reg& r1 = getReg1();
static const Reg& r2 = getReg2();
inline void putReg()
{
puts("putReg");
printf("r0=%p, %d\n", &r0, r0.r_);
printf("r0=%p, %d\n", &r0, r1.r_);
printf("r0=%p, %d\n", &r0, r2.r_);
}
struct A {
int a;
A()
: a(5)
{
puts("A cstr");
}
~A()
{
puts("A dstr");
}
void put() const
{
printf("a=%d\n", a);
}
};
template<int dummy = 0>
struct XT {
static A a;
};
template<int dummy>
A XT<dummy>::a;
typedef XT<0> X;
void init();
struct Init {
Init()
{
puts("Init");
init();
putReg();
}
};
static Init s_init;
+51
View File
@@ -0,0 +1,51 @@
#include <stdio.h>
static const struct XXX {
XXX() { puts("XXX"); }
} s_sss;
struct A {
int aaa;
A()
: aaa(123)
{
puts("A cstr");
}
~A()
{
puts("A dstr");
}
void put() const
{
printf("aaa=%d\n", aaa);
}
};
template<int dummy = 0>
struct XT {
static A sss;
};
template<int dummy>
A XT<dummy>::sss;
typedef XT<0> X;
static struct Init {
Init()
{
puts("Init");
X::sss.put();
}
} s_init;
int f() { puts("f"); return 4; }
static const int r = f();
int main()
{
puts("main");
printf("r=%d\n", r);
X::sss.put();
}
+9
View File
@@ -0,0 +1,9 @@
#include "lib.h"
int main()
{
puts("main");
X::a.put();
putReg();
}
+13
View File
@@ -0,0 +1,13 @@
#include "lib.h"
void init()
{
static bool init = true;
printf("in lib_test %d\n", init);
if (!init) return;
init = false;
X::a.put();
putReg();
}
+2210
View File
File diff suppressed because it is too large Load Diff
+3554
View File
File diff suppressed because it is too large Load Diff
+2474
View File
File diff suppressed because it is too large Load Diff
+37
View File
@@ -0,0 +1,37 @@
#define XBYAK_NO_OP_NAMES
#include "xbyak/xbyak.h"
#include <string.h>
#include <vector>
struct Code : Xbyak::CodeGenerator {
Code(int x)
{
mov(eax, x);
ret();
}
};
int main()
try
{
#ifdef XBYAK_USE_MMAP_ALLOCATOR
puts("use Allocator with mmap");
#else
puts("use Allocator with posix_memalign");
#endif
const int N = 70000;
std::vector<Code*> v(N);
for (int i = 0; i < N; i++) {
v[i] = new Code(i);
}
long long sum = 0;
for (int i = 0; i < N; i++) {
sum += v[i]->getCode<int (*)()>()();
}
for (int i = 0; i < N; i++) {
delete v[i];
}
printf("sum=%lld\n", sum);
} catch (std::exception& e) {
printf("ERR %s\n", e.what());
}
+46
View File
@@ -0,0 +1,46 @@
#include <stdio.h>
#define XBYAK_ENABLE_OMITTED_OPERAND
#include "xbyak/xbyak.h"
#define CYBOZU_TEST_DISABLE_AUTO_RUN
#include "cybozu/test.hpp"
using namespace Xbyak;
#ifdef _MSC_VER
#pragma warning(disable : 4245)
#pragma warning(disable : 4312)
#endif
class Sample : public CodeGenerator {
void operator=(const Sample&);
public:
#include "nm.cpp"
};
class ErrorSample : public CodeGenerator {
void operator=(const ErrorSample&);
public:
void gen()
{
#ifndef XBYAK_NO_EXCEPTION
CYBOZU_TEST_EXCEPTION(mov(ptr[eax],1), std::exception);
CYBOZU_TEST_EXCEPTION(test(ptr[eax],1), std::exception);
CYBOZU_TEST_EXCEPTION(adc(ptr[eax],1), std::exception);
CYBOZU_TEST_EXCEPTION(setz(eax), std::exception);
#endif
}
};
int main()
try
{
// the size of Operand exceeds 32 bit.
CYBOZU_TEST_EQUAL(sizeof(Xbyak::Operand), 8u);
Sample s;
s.gen();
ErrorSample es;
es.gen();
} catch (std::exception& e) {
fprintf(stderr, "ERR=%s\n", e.what());
return 1;
}
+111
View File
@@ -0,0 +1,111 @@
#define XBYAK_NO_EXCEPTION
#include <xbyak/xbyak.h>
using namespace Xbyak;
int g_err = 0;
int g_test = 0;
void assertEq(int x, int y)
{
if (x != y) {
printf("ERR x=%d y=%d\n", x, y);
g_err++;
}
g_test++;
}
void assertBool(bool b)
{
if (!b) {
printf("ERR assertBool\n");
g_err++;
}
g_test++;
}
void test1()
{
const int v = 123;
struct Code : CodeGenerator {
Code()
{
mov(eax, v);
ret();
}
} c;
int (*f)() = c.getCode<int (*)()>();
assertEq(f(), v);
assertEq(Xbyak::GetError(), ERR_NONE);
}
void test2()
{
struct Code : CodeGenerator {
Code()
{
Label lp;
L(lp);
L(lp);
}
} c;
assertEq(Xbyak::GetError(), ERR_LABEL_IS_REDEFINED);
Xbyak::ClearError();
}
void test3()
{
static struct EmptyAllocator : Xbyak::Allocator {
uint8_t *alloc(size_t) XBYAK_OVERRIDE { return 0; }
} emptyAllocator;
struct Code : CodeGenerator {
Code() : CodeGenerator(8, 0, &emptyAllocator)
{
mov(eax, 3);
assertBool(Xbyak::GetError() == 0);
mov(eax, 3);
mov(eax, 3);
assertBool(Xbyak::GetError() != 0);
Xbyak::ClearError();
assertBool(Xbyak::GetError() == 0);
}
} c;
}
void test4()
{
struct Code : CodeGenerator {
Code()
{
mov(ptr[eax], 1);
assertBool(Xbyak::GetError() != 0);
Xbyak::ClearError();
test(ptr[eax], 1);
assertBool(Xbyak::GetError() != 0);
Xbyak::ClearError();
adc(ptr[eax], 1);
assertBool(Xbyak::GetError() != 0);
Xbyak::ClearError();
setz(eax);
assertBool(Xbyak::GetError() != 0);
Xbyak::ClearError();
}
};
}
int main()
{
test1();
test2();
test3();
test4();
if (g_err) {
printf("err %d/%d\n", g_err, g_test);
} else {
printf("all ok %d\n", g_test);
}
return g_err != 0;
}
+56
View File
@@ -0,0 +1,56 @@
/*
normalize prefix
*/
#include <string>
#include <set>
#include <iostream>
#include <memory.h>
typedef unsigned char uint8_t;
std::string normalize(std::string line)
{
size_t pos = line.find('(');
/* nasm generates byte codes containing () for xbegin, so remove it. */
if (pos != std::string::npos) {
line.erase(pos, 1);
pos = line.find(')');
if (pos == std::string::npos) {
fprintf(stderr, "line error {%s}\n", line.c_str());
return "";
}
line.erase(pos, 1);
}
static const char tbl[][3] = { "66", "67", "F2", "F3" };
size_t tblNum = sizeof(tbl) / sizeof(tbl[0]);
typedef std::set<std::string> StringSet;
StringSet suf;
pos = 0;
for (; pos < line.size(); pos += 2) {
bool found = false;
for (size_t i = 0; i < tblNum; i++) {
if (::memcmp(&line[pos], tbl[i], 2) == 0) {
found = true;
suf.insert(tbl[i]);
break;
}
}
if (!found) break;
}
std::string ret;
for (StringSet::const_iterator i = suf.begin(), e = suf.end(); i != e; ++i) {
ret += *i;
}
ret += &line[pos];
return ret;
}
int main()
{
std::string line;
while (std::getline(std::cin, line)) {
std::string normalizedLine = normalize(line);
std::cout << normalizedLine << '\n';//std::endl;
}
}
+6
View File
@@ -0,0 +1,6 @@
test script on Windows
this test requires nasm.exe, yasm.exe, cl.exe, awk, diff
test_all ; for all tests
+2
View File
@@ -0,0 +1,2 @@
@echo off
set OPT=/EHsc -I../xbyak -I./ /W4 -D_CRT_SECURE_NO_WARNINGS /nologo
+660
View File
@@ -0,0 +1,660 @@
#include <xbyak/xbyak_util.h>
#include <vector>
#include <map>
#ifdef XBYAK32
#error "this sample is for only 64-bit mode"
#endif
using namespace Xbyak::util;
#ifndef DUMP
#ifdef _MSC_VER
#pragma warning(disable : 4459)
#pragma warning(disable : 4996)
#endif
#include <cybozu/test.hpp>
#ifdef XBYAK64_WIN
#include "sf_test_win.h"
#endif
#ifdef XBYAK64_GCC
#include "sf_test_gcc.h"
#endif
struct Code : public Xbyak::CodeGenerator {
void gen1()
{
StackFrame sf(this, 1);
mov(rax, sf.p[0]);
}
void gen2()
{
StackFrame sf(this, 2);
lea(rax, ptr [sf.p[0] + sf.p[1]]);
}
void gen3()
{
StackFrame sf(this, 3);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
}
void gen4()
{
StackFrame sf(this, 4);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
add(rax, sf.p[3]);
}
void gen5()
{
StackFrame sf(this, 4, UseRCX);
xor_(rcx, rcx);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
add(rax, sf.p[3]);
}
void gen6()
{
StackFrame sf(this, 4, UseRCX | UseRDX);
xor_(rcx, rcx);
xor_(rdx, rdx);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
add(rax, sf.p[3]);
}
void gen7()
{
StackFrame sf(this, 3, UseRCX | UseRDX);
xor_(rcx, rcx);
xor_(rdx, rdx);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
}
void gen8()
{
StackFrame sf(this, 3, 3 | UseRCX | UseRDX);
xor_(rcx, rcx);
xor_(rdx, rdx);
mov(sf.t[0], 1);
mov(sf.t[1], 2);
mov(sf.t[2], 3);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
}
void gen9()
{
StackFrame sf(this, 3, 3 | UseRCX | UseRDX, 32);
xor_(rcx, rcx);
xor_(rdx, rdx);
mov(sf.t[0], 1);
mov(sf.t[1], 2);
mov(sf.t[2], 3);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
mov(ptr [rsp + 8 * 0], rax);
mov(ptr [rsp + 8 * 1], rax);
mov(ptr [rsp + 8 * 2], rax);
mov(ptr [rsp + 8 * 3], rax);
}
void gen10()
{
StackFrame sf(this, 4, 8 | UseRCX | UseRDX, 32);
xor_(rcx, rcx);
xor_(rdx, rdx);
for (int i = 0; i < 8; i++) {
mov(sf.t[i], i);
}
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
add(rax, sf.p[3]);
mov(ptr [rsp + 8 * 0], rax);
mov(ptr [rsp + 8 * 1], rax);
mov(ptr [rsp + 8 * 2], rax);
mov(ptr [rsp + 8 * 3], rax);
}
void gen11()
{
StackFrame sf(this, 0, UseRCX);
xor_(rcx, rcx);
mov(rax, 3);
}
void gen12()
{
StackFrame sf(this, 4, UseRDX);
xor_(rdx, rdx);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
add(rax, sf.p[3]);
}
/*
int64_t f(const int64_t a[13]) { return sum-of-a[]; }
*/
void gen13()
{
StackFrame sf(this, 1, 13);
for (int i = 0; i < 13; i++) {
mov(sf.t[i], ptr[sf.p[0] + i * 8]);
}
mov(rax, sf.t[0]);
for (int i = 1; i < 13; i++) {
add(rax, sf.t[i]);
}
}
/*
same as gen13
*/
void gen14()
{
StackFrame sf(this, 1, 11 | UseRCX | UseRDX);
Pack t = sf.t;
t.append(rcx);
t.append(rdx);
for (int i = 0; i < 13; i++) {
mov(t[i], ptr[sf.p[0] + i * 8]);
}
mov(rax, t[0]);
for (int i = 1; i < 13; i++) {
add(rax, t[i]);
}
}
/*
return (1 << 15) - 1;
*/
void gen15()
{
StackFrame sf(this, 0, 14, 8);
Pack t = sf.t;
t.append(rax);
for (int i = 0; i < 15; i++) {
mov(t[i], uint64_t(1) << i);
}
mov(qword[rsp], 0);
for (int i = 0; i < 15; i++) {
add(ptr[rsp], t[i]);
}
mov(rax, ptr[rsp]);
}
};
struct Code2 : Xbyak::CodeGenerator {
Code2()
: Xbyak::CodeGenerator(4096 * 32)
{
}
void gen(int pNum, int tNum, int stackSizeByte)
{
StackFrame sf(this, pNum, tNum, stackSizeByte);
if (tNum & UseRCX) xor_(rcx, rcx);
if (tNum & UseRDX) xor_(rdx, rdx);
for (int i = 0, n = tNum & ~(UseRCX | UseRDX); i < n; i++) {
mov(sf.t[i], 5);
}
for (int i = 0; i < stackSizeByte; i++) {
mov(byte [rsp + i], 0);
}
mov(rax, 1);
for (int i = 0; i < pNum; i++) {
add(rax, sf.p[i]);
}
}
void gen2(int pNum, int tNum, int stackSizeByte)
{
StackFrame sf(this, pNum, tNum, stackSizeByte);
mov(rax, rsp);
}
};
void verify(const uint8_t *_f, int pNum)
{
uint8_t *f = const_cast<uint8_t*>(_f);
switch (pNum) {
case 0:
CYBOZU_TEST_EQUAL(1, reinterpret_cast<int (*)()>(f)());
return;
case 1:
CYBOZU_TEST_EQUAL(11, reinterpret_cast<int (*)(int)>(f)(10));
return;
case 2:
CYBOZU_TEST_EQUAL(111, reinterpret_cast<int (*)(int, int)>(f)(10, 100));
return;
case 3:
CYBOZU_TEST_EQUAL(1111, reinterpret_cast<int (*)(int, int, int)>(f)(10, 100, 1000));
return;
case 4:
CYBOZU_TEST_EQUAL(11111, reinterpret_cast<int (*)(int, int, int, int)>(f)(10, 100, 1000, 10000));
return;
default:
printf("ERR pNum=%d\n", pNum);
exit(1);
}
}
CYBOZU_TEST_AUTO(param)
{
Code2 code;
for (int stackSize = 0; stackSize < 32; stackSize += 7) {
for (int pNum = 0; pNum < 4; pNum++) {
for (int mode = 0; mode < 4; mode++) {
int maxNum = 0;
int opt = 0;
if (mode == 0) {
maxNum = 10;
} else if (mode == 1) {
maxNum = 9;
opt = UseRCX;
} else if (mode == 2) {
maxNum = 9;
opt = UseRDX;
} else {
maxNum = 8;
opt = UseRCX | UseRDX;
}
for (int tNum = 0; tNum < maxNum; tNum++) {
// printf("pNum=%d, tNum=%d, stackSize=%d\n", pNum, tNum | opt, stackSize);
const uint8_t *f = code.getCurr();
code.gen(pNum, tNum | opt, stackSize);
verify(f, pNum);
/*
check rsp is 16-byte aligned if stackSize > 0
*/
if (stackSize > 0) {
Code2 c2;
c2.gen2(pNum, tNum | opt, stackSize);
uint64_t addr = c2.getCode<uint64_t (*)()>()();
CYBOZU_TEST_EQUAL(addr % 16, 0);
}
}
}
}
}
}
CYBOZU_TEST_AUTO(args)
{
Code code;
int (*f1)(int) = code.getCurr<int (*)(int)>();
code.gen1();
CYBOZU_TEST_EQUAL(5, f1(5));
int (*f2)(int, int) = code.getCurr<int (*)(int, int)>();
code.gen2();
CYBOZU_TEST_EQUAL(9, f2(3, 6));
int (*f3)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
code.gen3();
CYBOZU_TEST_EQUAL(14, f3(1, 4, 9));
int (*f4)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
code.gen4();
CYBOZU_TEST_EQUAL(30, f4(1, 4, 9, 16));
int (*f5)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
code.gen5();
CYBOZU_TEST_EQUAL(23, f5(2, 5, 7, 9));
int (*f6)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
code.gen6();
CYBOZU_TEST_EQUAL(18, f6(3, 4, 5, 6));
int (*f7)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
code.gen7();
CYBOZU_TEST_EQUAL(12, f7(3, 4, 5));
int (*f8)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
code.gen8();
CYBOZU_TEST_EQUAL(23, f8(5, 8, 10));
int (*f9)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
code.gen9();
CYBOZU_TEST_EQUAL(60, f9(10, 20, 30));
int (*f10)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
code.gen10();
CYBOZU_TEST_EQUAL(100, f10(10, 20, 30, 40));
int (*f11)() = code.getCurr<int (*)()>();
code.gen11();
CYBOZU_TEST_EQUAL(3, f11());
int (*f12)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
code.gen12();
CYBOZU_TEST_EQUAL(24, f12(3, 5, 7, 9));
{
int64_t tbl[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 };
int64_t (*f13)(const int64_t*) = code.getCurr<int64_t (*)(const int64_t*)>();
code.gen13();
CYBOZU_TEST_EQUAL(91, f13(tbl));
int64_t (*f14)(const int64_t*) = code.getCurr<int64_t (*)(const int64_t*)>();
code.gen14();
CYBOZU_TEST_EQUAL(91, f14(tbl));
}
int (*f15)() = code.getCurr<int (*)()>();
code.gen15();
CYBOZU_TEST_EQUAL((1 << 15) - 1, f15());
}
void put(const Xbyak::util::Pack& p)
{
for (size_t i = 0, n = p.size(); i < n; i++) {
printf("%s ", p[i].toString());
}
printf("\n");
}
void verifyPack(const Xbyak::util::Pack& p, const int *tbl, size_t tblNum)
{
for (size_t i = 0; i < tblNum; i++) {
CYBOZU_TEST_EQUAL(p[i].getIdx(), tbl[i]);
}
}
CYBOZU_TEST_AUTO(pack)
{
const int N = 10;
Xbyak::Reg64 regTbl[N];
for (int i = 0; i < N; i++) {
regTbl[i] = Xbyak::Reg64(i);
}
Xbyak::util::Pack p(regTbl, N);
const struct {
int pos;
int num;
int tbl[10];
} tbl[] = {
{ 0, 10, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 } },
{ 1, 9, { 1, 2, 3, 4, 5, 6, 7, 8, 9 } },
{ 2, 8, { 2, 3, 4, 5, 6, 7, 8, 9 } },
{ 3, 7, { 3, 4, 5, 6, 7, 8, 9 } },
{ 4, 6, { 4, 5, 6, 7, 8, 9 } },
{ 5, 5, { 5, 6, 7, 8, 9 } },
{ 6, 4, { 6, 7, 8, 9 } },
{ 7, 3, { 7, 8, 9 } },
{ 8, 2, { 8, 9 } },
{ 9, 1, { 9 } },
{ 3, 5, { 3, 4, 5, 6, 7 } },
};
for (size_t i = 0; i < sizeof(tbl) / sizeof(*tbl); i++) {
const int pos = tbl[i].pos;
const int num = tbl[i].num;
verifyPack(p.sub(pos, num), tbl[i].tbl, num);
if (pos + num == N) {
verifyPack(p.sub(pos), tbl[i].tbl, num);
}
}
}
struct CloseCode : Xbyak::CodeGenerator {
CloseCode(size_t mode)
{
switch (mode) {
case 0:
{
StackFrame sf(this, 0);
// close() is automatically called.
}
break;
case 1:
{
StackFrame sf(this, 0, 0, 0, false);
sf.close(); // Explicitly call close().
setProtectModeRE(); // Ensure that no writes occur in destructor by setting read-exec
}
break;
case 2:
{
StackFrame sf(this, 0, 0, 0, false);
sf.close(); // Explicitly call close().
sf.close(); // Explicitly call close().
setProtectModeRE(); // Ensure that no writes occur in destructor by setting read-exec
}
break;
default:
CYBOZU_TEST_ASSERT(false);
}
}
};
CYBOZU_TEST_AUTO(close)
{
const size_t expectedTbl[] = {
1, 1, 2,
};
for (size_t i = 0; i < sizeof(expectedTbl)/sizeof(expectedTbl[0]); i++) {
CloseCode c(i);
CYBOZU_TEST_EQUAL(c.getSize(), expectedTbl[i]);
}
}
#endif
struct ParamId {
int pNum;
int tNum;
int useRegs;
int stackSizeByte;
union av {
uint8_t a[4];
uint32_t v;
};
uint32_t id() const
{
av av;
av.a[0] = uint8_t(pNum);
av.a[1] = uint8_t(tNum);
av.a[2] = uint8_t(useRegs >> 5);
av.a[3] = uint8_t(stackSizeByte);
return av.v;
};
void set_id(uint32_t v)
{
av av;
av.v = v;
pNum = av.a[0];
tNum = av.a[1];
useRegs = av.a[2] << 5;
stackSizeByte = av.a[3];
}
};
typedef std::vector<uint8_t> Bytes;
#ifndef DUMP
void cmpAndDumpIfFailed(int rhs, int lhs, const Bytes& d)
{
CYBOZU_TEST_EQUAL(rhs, lhs);
if (rhs != lhs) {
FILE *fp = fopen("dump.bin", "wb");
fwrite(d.data(), 1, d.size(), fp);
fclose(fp);
exit(1);
}
}
#endif
void stackFrameTest()
{
struct Data {
ParamId paramId;
Bytes code;
};
typedef std::map<uint32_t, Data> DataMap;
DataMap dataMap;
struct Code : Xbyak::CodeGenerator {
Code(int pNum, int tNum, int useRegs, int stackSizeByte)
{
StackFrame sf(this, pNum, tNum|useRegs, stackSizeByte);
// modify
for (int i = 0; i < tNum; i++) {
mov(sf.t[i], 12345);
}
if (useRegs & UseRCX) {
mov(rcx, 12345);
}
if (useRegs & UseRDX) {
mov(rdx, 12345);
}
if (useRegs & UseRSI) {
mov(rsi, 1000);
}
if (useRegs & UseRDI) {
mov(rdi, 2000);
}
// use rbp if UseRBP and !UseRBPAsFramePointer
if ((useRegs & UseRBPAsFramePointer) == UseRBP) {
mov(rbp, 3000);
}
// eax is sum of all params and (esp & 15) if stackSizeByte > 0
if (stackSizeByte > 0) {
mov(eax, esp);
and_(eax, 15);
} else {
xor_(eax, eax);
}
for (int i = 0; i < pNum; i++) {
add(rax, sf.p[i]);
}
}
};
static const uint8_t stackSizeTbl[] = { 0, 33 };
for (int pNum = 0; pNum <= 4; pNum++) {
for (int tNum = 0; tNum <= 14; tNum++) {
for (int i = 0; i < (1<<6); i++) {
int totalNum = pNum + tNum;
int useRegs = 0;
if (i & 1) { useRegs |= UseRCX; totalNum++; }
if (i & 2) { useRegs |= UseRDX; totalNum++; }
if (i & 4) { useRegs |= UseRSI; totalNum++; }
if (i & 8) { useRegs |= UseRDI; totalNum++; }
// UseRBP and UseRBPAsFramePointer are mutually exclusive
if (i & 16) { useRegs |= UseRBP; totalNum++; }
if (!(i & 16) && (i & 32)) { useRegs |= UseRBPAsFramePointer; totalNum++; }
if (totalNum > 14) continue;
for (size_t j = 0; j < sizeof(stackSizeTbl)/sizeof(stackSizeTbl[0]); j++) {
int stackSizeByte = stackSizeTbl[j];
//fprintf(stderr, "pNum=%d, tNum=%d, useRegs=0x%X stackSizeByte=%d\n", pNum, tNum, useRegs, stackSizeByte);
Code c(pNum, tNum, useRegs, stackSizeByte);
//fprintf(stderr, "code size = %d\n", int(c.getSize()));
Data d;
d.paramId.pNum = pNum;
d.paramId.tNum = tNum;
d.paramId.useRegs = useRegs;
d.paramId.stackSizeByte = stackSizeByte;
d.code.assign(c.getCode(), c.getCode() + c.getSize());
dataMap[d.paramId.id()] = d;
#ifndef DUMP
switch (pNum) {
case 0:
{
int (*f)() = c.getCode<int (*)()>();
CYBOZU_TEST_EQUAL(0, f());
// cmpAndDumpIfFailed(0, f(), d.code);
break;
}
case 1:
{
int (*f1)(int) = c.getCode<int (*)(int)>();
CYBOZU_TEST_EQUAL(1, f1(1));
break;
}
case 2:
{
int (*f2)(int, int) = c.getCode<int (*)(int, int)>();
CYBOZU_TEST_EQUAL(11, f2(1, 10));
break;
}
case 3:
{
int (*f3)(int, int, int) = c.getCode<int (*)(int, int, int)>();
CYBOZU_TEST_EQUAL(111, f3(1, 10, 100));
break;
}
case 4:
{
int (*f4)(int, int, int, int) = c.getCode<int (*)(int, int, int, int)>();
CYBOZU_TEST_EQUAL(1111, f4(1, 10, 100, 1000));
break;
}
}
#endif
}
}
}
}
#ifdef DUMP
for (DataMap::const_iterator it = dataMap.begin(); it != dataMap.end(); ++it) {
const Data& d = it->second;
printf("static const uint8_t code_%08x[] = {\n", d.paramId.id());
for (size_t j = 0; j < d.code.size(); j++) {
if (j % 16 == 0) {
if (j > 0) printf("\n");
printf("\t");
}
if (j > 0) printf(" ");
printf("0x%02x,", d.code[j]);
}
printf("\n};\n");
}
printf("static const struct {\n");
printf("\tuint32_t paramId;\n");
printf("\tconst uint8_t *code;\n");
printf("\tsize_t codeSize;\n");
printf("} g_dataVec[] = {\n");
for (DataMap::const_iterator it = dataMap.begin(); it != dataMap.end(); ++it) {
const Data& d = it->second;
printf("\t{ 0x%08x, code_%08x, %zu },\n", d.paramId.id(), d.paramId.id(), d.code.size());
}
printf("};\n");
#else
DataMap dataMapExpected;
for (size_t i = 0; i < sizeof(g_dataVec) / sizeof(*g_dataVec); i++) {
const uint32_t id = g_dataVec[i].paramId;
Data d;
d.paramId.set_id(id);
d.code.assign(g_dataVec[i].code, g_dataVec[i].code + g_dataVec[i].codeSize);
dataMapExpected[id] = d;
}
CYBOZU_TEST_EQUAL(dataMap.size(), dataMapExpected.size());
for (DataMap::const_iterator it = dataMapExpected.begin(); it != dataMapExpected.end(); ++it) {
const uint32_t id = it->first;
DataMap::const_iterator it2 = dataMap.find(id);
CYBOZU_TEST_ASSERT(it2 != dataMap.end());
const Data& d = it2->second;
const Data& dExpected = it->second;
CYBOZU_TEST_EQUAL(d.code.size(), dExpected.code.size());
CYBOZU_TEST_EQUAL_ARRAY(d.code.data(), dExpected.code.data(), d.code.size());
}
#endif
}
#ifdef DUMP
int main()
#else
CYBOZU_TEST_AUTO(stackFrame)
#endif
{
stackFrameTest();
}
+36638
View File
File diff suppressed because it is too large Load Diff
+37414
View File
File diff suppressed because it is too large Load Diff
BIN
View File
Binary file not shown.
+37
View File
@@ -0,0 +1,37 @@
@echo off
set FILTER=grep -v warning
if /i "%1"=="64" (
set OPT2=-DXBYAK64
set OPT3=win64
) else (
set OPT2=-DXBYAK32
set OPT3=win32
)
call set_opt
bmake -f Makefile.win all
if /i "%1"=="64" (
call :sub 1
call :sub 2
) else (
call :sub 1
)
goto end
:sub
echo cl address.cpp %OPT% %OPT2%
cl address.cpp %OPT% %OPT2%
address %1% > a.asm
echo nasm -f %OPT3% -l a.lst a.asm
nasm -f %OPT3% -l a.lst a.asm
awk "{if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
echo address %1% jit > nm.cpp
address %1% jit > nm.cpp
echo cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
nm_frame > x.lst
diff -w x.lst ok.lst
wc x.lst
:end
+46
View File
@@ -0,0 +1,46 @@
#!/bin/sh
set -e
FILTER="grep -v warning"
CXX=${CXX:=g++}
CFLAGS_USER=${CFLAGS}
CFLAGS_WARN="$(cat CFLAGS_WARN.cfg)"
sub()
{
CFLAGS="$CFLAGS_USER $CFLAGS_WARN -I../ $OPT2"
echo $CXX $CFLAGS address.cpp -o address
$CXX $CFLAGS address.cpp -o address
./address $1 > a.asm
echo "asm"
$EXE -f$OPT3 a.asm -l a.lst
awk '{printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst
echo "xbyak"
./address $1 jit > nm.cpp
echo "compile nm_frame.cpp"
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
./nm_frame > x.lst
diff -bB ok.lst x.lst && echo "ok"
}
if [ "$1" = "64" ]; then
echo "nasm(64bit)"
EXE=nasm
OPT2=-DXBYAK64
OPT3=win64
sub 1
sub 2
else
echo "nasm(32bit)"
EXE=nasm
OPT2=-DXBYAK32
OPT3=win32
sub 1
fi
+17
View File
@@ -0,0 +1,17 @@
@echo off
call test_nm_all
echo *** test addressing ***
call test_address
call test_address 64
echo *** test jmp address ***
call test_jmp
echo *** test misc ***
set FILE=misc
call test_misc
echo *** test APX ***
set FILE=apx
call test_misc
echo *** test AVX10 ***
set FILE=avx10_test
call test_misc
echo *** all test end ***
+42
View File
@@ -0,0 +1,42 @@
@echo off
set FILTER=cat
set Y=0
if /i "%1"=="Y" (
set Y=1
set EXE=yasm.exe
set OPT2=-DUSE_YASM -DXBYAK32
set OPT3=win32
) else if /i "%1"=="64" (
set EXE=nasm.exe
set OPT2=-DXBYAK64
set OPT3=win64
set FILTER=normalize_prefix
) else if /i "%1"=="Y64" (
set Y=1
set EXE=yasm.exe
set OPT2=-DUSE_YASM -DXBYAK64
set OPT3=win64
set FILTER=normalize_prefix
) else (
set EXE=nasm.exe
set OPT2=-DXBYAK32
set OPT3=win32
)
call set_opt
bmake -f Makefile.win all
echo cl -I../ make_nm.cpp %OPT% %OPT2% /EHs /DUSE_AVX
cl -I../ make_nm.cpp %OPT% %OPT2% /EHs /DUSE_AVX
make_nm > a.asm
%EXE% -f %OPT3% -l a.lst a.asm
rem connect "?????-" and "??"
if /i "%Y%"=="1" (
awk "NR > 1 {if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
) else (
awk "{if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
)
make_nm jit > nm.cpp
echo cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
nm_frame |%FILTER% > x.lst
diff -w x.lst ok.lst
wc x.lst
+53
View File
@@ -0,0 +1,53 @@
#!/bin/sh
set -e
FILTER="grep -v warning"
CXX=${CXX:=g++}
CFLAGS_USER=${CFLAGS}
CFLAGS_WARN="$(cat CFLAGS_WARN.cfg)"
case $1 in
Y)
echo "yasm(32bit)"
EXE=yasm
OPT2="-DUSE_YASM -DXBYAK32"
OPT3=win32
;;
64)
echo "nasm(64bit)"
EXE=nasm
OPT2=-DXBYAK64
OPT3=win64
FILTER=./normalize_prefix
;;
Y64)
echo "yasm(64bit)"
EXE=yasm
OPT2="-DUSE_YASM -DXBYAK64"
OPT3=win64
FILTER=./normalize_prefix
;;
*)
echo "nasm(32bit)"
EXE=nasm
OPT2=-DXBYAK32
OPT3=win32
;;
esac
CFLAGS="$CFLAGS_USER $CFLAGS_WARN -g -I../ $OPT2 -DUSE_AVX"
echo "compile make_nm.cpp"
$CXX $CFLAGS make_nm.cpp -o make_nm
./make_nm > a.asm
echo "asm"
$EXE -f$OPT3 a.asm -l a.lst
awk '$3 != "1+1" {printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst
echo "xbyak"
./make_nm jit > nm.cpp
echo "compile nm_frame.cpp"
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
./nm_frame | $FILTER > x.lst
diff -bB ok.lst x.lst && echo "ok"
+31
View File
@@ -0,0 +1,31 @@
@echo off
set FILTER=cat
set Y=0
if /i "%1"=="min" (
set EXE=nasm.exe
set OPT2=-DXBYAK64 -DMIN_TEST
set OPT3=win64
set FILTER=normalize_prefix
) else if /i "%1"=="64" (
set EXE=nasm.exe
set OPT2=-DXBYAK64
set OPT3=win64
set FILTER=normalize_prefix
) else (
set EXE=nasm.exe
set OPT2=-DXBYAK32
set OPT3=win32
)
call set_opt
bmake -f Makefile.win all
echo cl -I../ make_512.cpp %OPT% %OPT2% /EHs /DUSE_AVX512
cl -I../ make_512.cpp %OPT% %OPT2% /EHs /DUSE_AVX512
make_512 > a.asm
%EXE% -f %OPT3% -l a.lst a.asm
rem connect "?????-" and "??"
awk "{if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
make_512 jit > nm.cpp
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2% /DXBYAK_AVX512
nm_frame |%FILTER% > x.lst
diff -w x.lst ok.lst
wc x.lst
+40
View File
@@ -0,0 +1,40 @@
#!/bin/sh
set -e
FILTER="grep -v warning"
CXX=${CXX:=g++}
CFLAGS_USER=${CFLAGS}
CFLAGS_WARN="$(cat CFLAGS_WARN.cfg)"
case $1 in
64)
echo "nasm(64bit)"
EXE=nasm
OPT2=-DXBYAK64
OPT3=win64
FILTER=./normalize_prefix
;;
*)
echo "nasm(32bit)"
EXE=nasm
OPT2=-DXBYAK32
OPT3=win32
;;
esac
CFLAGS="$CFLAGS_USER $CFLAGS_WARN -I../ $OPT2 -DUSE_AVX512"
echo "compile make_512.cpp"
$CXX $CFLAGS make_512.cpp -o make_512
./make_512 > a.asm
echo "asm"
$EXE -f$OPT3 a.asm -l a.lst
awk '{printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst
echo "xbyak"
./make_512 jit > nm.cpp
echo "compile nm_frame.cpp"
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame -DXBYAK_AVX512
./nm_frame | $FILTER > x.lst
diff -bB ok.lst x.lst && echo "ok"
+13
View File
@@ -0,0 +1,13 @@
@echo off
echo ** nasm-avx(32bit) ***
call test_avx
echo ** nasm-avx(64bit) ***
call test_avx 64
echo ** yasm-avx(32bit) ***
call test_avx Y
echo ** yasm-avx(64bit) ***
call test_avx Y64
echo ** nasm-avx512(32bit) ***
call test_avx512
echo ** nasm-avx512(64bit) ***
call test_avx512 64
+7
View File
@@ -0,0 +1,7 @@
@echo off
set XED=xed
set CFLAGS=-I ../ /EHsc /nologo
copy %1% tmp.cpp
cl %CFLAGS% test_by_xed.cpp && test_by_xed.exe
%XED% -64 -ir bin > out.txt
python3 test_by_xed.py %1% out.txt
+27
View File
@@ -0,0 +1,27 @@
#include <stdio.h>
#include <xbyak/xbyak.h>
using namespace Xbyak;
struct Code : Xbyak::CodeGenerator {
Code()
: Xbyak::CodeGenerator(4096*8)
{
setDefaultEncodingAVX10(AVX10v2Encoding);
#include "tmp.cpp"
}
};
int main()
try
{
Code c;
FILE *fp = fopen("bin", "wb");
if (fp) {
fwrite(c.getCode(), 1, c.getSize(), fp);
fclose(fp);
}
} catch (std::exception& e) {
printf("ERR %s\n", e.what());
return 1;
}
+456
View File
@@ -0,0 +1,456 @@
import re
import math
import sys
class Reg:
def __init__(self, s):
self.name = s
def __str__(self):
return self.name
def __eq__(self, rhs):
return self.name == rhs.name
def __lt__(self, rhs):
return self.name < rhs.name
g_xmmTbl = '''
xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7
xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 xmm15
xmm16 xmm17 xmm18 xmm19 xmm20 xmm21 xmm22 xmm23
xmm24 xmm25 xmm26 xmm27 xmm28 xmm29 xmm30 xmm31
ymm0 ymm1 ymm2 ymm3 ymm4 ymm5 ymm6 ymm7
ymm8 ymm9 ymm10 ymm11 ymm12 ymm13 ymm14 ymm15
ymm16 ymm17 ymm18 ymm19 ymm20 ymm21 ymm22 ymm23
ymm24 ymm25 ymm26 ymm27 ymm28 ymm29 ymm30 ymm31
zmm0 zmm1 zmm2 zmm3 zmm4 zmm5 zmm6 zmm7
zmm8 zmm9 zmm10 zmm11 zmm12 zmm13 zmm14 zmm15
zmm16 zmm17 zmm18 zmm19 zmm20 zmm21 zmm22 zmm23
zmm24 zmm25 zmm26 zmm27 zmm28 zmm29 zmm30 zmm31
'''.split()
g_tmmTbl = '''
tmm0 tmm1 tmm2 tmm3 tmm4 tmm5 tmm6 tmm7
'''.split()
g_regTbl = '''
eax ecx edx ebx esp ebp esi edi
ax cx dx bx sp bp si di
al cl dl bl ah ch dh bh
k1 k2 k3 k4 k5 k6 k7
rax rcx rdx rbx rsp rbp rsi rdi r8 r9 r10 r11 r12 r13 r14 r15
r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29 r30 r31
r8d r9d r10d r11d r12d r13d r14d r15d
r16d r17d r18d r19d r20d r21d r22d r23d r24d r25d r26d r27d r28d r29d r30d r31d
r8w r9w r10w r11w r12w r13w r14w r15w
r16w r17w r18w r19w r20w r21w r22w r23w r24w r25w r26w r27w r28w r29w r30w r31w
r8b r9b r10b r11b r12b r13b r14b r15b
r16b r17b r18b r19b r20b r21b r22b r23b r24b r25b r26b r27b r28b r29b r30b r31b
spl bpl sil dil
'''.split()+g_tmmTbl+g_xmmTbl
# define global constants
for e in g_regTbl:
globals()[e] = Reg(e)
g_maskTbl = [k1, k2, k3, k4, k5, k6, k7]
g_replaceCharTbl = '{}();|,'
g_replaceChar = str.maketrans(g_replaceCharTbl, ' '*len(g_replaceCharTbl))
g_sizeTbl = ['byte', 'word', 'dword', 'qword', 'xword', 'yword', 'zword']
g_xedSizeTbl = ['xmmword', 'ymmword', 'zmmword']
g_attrTbl = ['T_sae', 'T_rn_sae', 'T_rd_sae', 'T_ru_sae', 'T_rz_sae', 'T_z']
g_attrXedTbl = ['sae', 'rne-sae', 'rd-sae', 'ru-sae', 'rz-sae', 'z']
class Attr:
def __init__(self, s):
self.name = s
def __str__(self):
return self.name
def __eq__(self, rhs):
return self.name == rhs.name
def __lt__(self, rhs):
return self.name < rhs.name
for e in g_attrTbl:
globals()[e] = Attr(e)
def newReg(s):
if type(s) == str:
return Reg(s)
return s
class Memory:
def __init__(self, size=0, base=None, index=None, scale=0, disp=0, broadcast=0, rip=False):
self.size = size
self.base = newReg(base)
self.index = newReg(index)
self.scale = scale
self.disp = disp
self.broadcast = broadcast
self.rip = rip
def __str__(self):
if self.rip:
return f'[rip+{hex(self.disp)}]'
if self.size == 0:
s = 'ptr'
else:
idx = self.size * max(self.broadcast, 1)
s = g_sizeTbl[int(math.log2(idx))]
if self.broadcast > 0:
s += '_b'
s += ' ['
needPlus = False
if self.base:
s += str(self.base)
needPlus = True
if self.index:
if needPlus:
s += '+'
s += str(self.index)
if self.scale > 1:
s += f'*{self.scale}'
needPlus = True
if self.disp:
if needPlus:
s += '+'
s += hex(self.disp)
s += ']'
return s
# Xbyak uses 'ptr' when it can be automatically detected, so we should consider this in the comparison.
def __eq__(self, rhs):
if self.broadcast > rhs.broadcast:
return rhs == self
assert(self.broadcast <= rhs.broadcast)
if self.broadcast == 0:
if rhs.broadcast > 0: return False
# Xbyak uses 'ptr' when it is automatically detected.
# Therefore, the comparison is true if 'ptr' (i.e., size = 0) is used.
if 0 < self.size and 0 < rhs.size and self.size != rhs.size: return False
if self.broadcast == 1: # _b
if rhs.broadcast == 1: # compare ptr_b with ptr_b
if self.size != rhs.size:
return False
if self.size > 0 and (self.size != rhs.size * rhs.broadcast): # compare ptr_b with {1toX}
return False
else:
if self.broadcast != rhs.broadcast: return False
r = self.base == rhs.base and self.index == rhs.index and self.scale == rhs.scale and self.disp == rhs.disp
return r
def parseBroadcast(s):
if '_b' in s:
return (s.replace('_b', ''), 1)
r = re.search(r'({1to(\d+)})', s)
if not r:
return (s, 0)
return (s.replace(r.group(1), ''), int(r.group(2)))
def parseMemory(s, broadcast=0):
org_s = s
s = s.replace(' ', '').lower()
size = 0
base = index = None
scale = 0
disp = 0
if broadcast == 0:
(s, broadcast) = parseBroadcast(s)
# Parse size
for i in range(len(g_sizeTbl)):
w = g_sizeTbl[i]
if s.startswith(w):
size = 1<<i
s = s[len(w):]
break
if size == 0:
for i in range(len(g_xedSizeTbl)):
w = g_xedSizeTbl[i]
if s.startswith(w):
size = 1<<(i+4)
s = s[len(w):]
break
# Remove 'ptr' if present
if s.startswith('ptr'):
s = s[3:]
if s.startswith('_b'):
broadcast = 1
s = s[2:]
# Extract the content inside brackets
r = re.match(r'\[(.*)\]', s)
if not r:
raise ValueError(f'bad format {org_s=}')
# check rip
expr = r.group(1)
r = re.match(r'rip\+([a-fx0-9]+)', expr)
if r:
b = 16 if r.group(1).startswith('0x') else 10
disp = int(r.group(1), b)
return Memory(size, base, index, scale, disp, broadcast, True)
# Parse components
elems = re.findall(r'([a-z0-9]+)(?:\*([0-9]+))?|([+-])', expr)
for i, e in enumerate(elems):
if e[2]: # This is a '+' or '-' sign
continue
if e[0] in g_regTbl:
if base is None and (not e[1] or int(e[1]) == 1):
base = e[0]
elif index is None:
index = e[0]
scale = int(e[1]) if e[1] else 1
else:
raise ValueError(f'bad format2 {s=}')
else:
sign = -1 if i > 0 and elems[i-1][2] == '-' else 1
b = 16 if e[0].startswith('0x') else 10
disp += sign * int(e[0], b)
return Memory(size, base, index, scale, disp, broadcast)
def normalizeName(s):
if s == 'sal':
return 'shl'
return s
class Nmemonic:
def __init__(self, name, args=[], attrs=[]):
self.name = name
self.args = args
self.attrs = attrs.sort()
def __str__(self):
s = f'{self.name}('
for i in range(len(self.args)):
if i > 0:
s += ', '
s += str(self.args[i])
if i == 0 and self.attrs:
for e in self.attrs:
s += f'|{e}'
s += ');'
return s
def __eq__(self, rhs):
return normalizeName(self.name) == normalizeName(rhs.name) and self.args == rhs.args and self.attrs == rhs.attrs
def parseNmemonic(s):
args = []
attrs = []
# remove Xbyak::{Evex,Vex}Encoding
r = re.search(r'(,[^,]*Encoding)', s)
if r:
s = s.replace(r.group(1), '')
(s, broadcast) = parseBroadcast(s)
# replace xm0 with xmm0
while True:
r = re.search(r'([xyz])m(\d\d?)', s)
if not r:
break
s = s.replace(r.group(0), r.group(1) + 'mm' + r.group(2))
# check 'zmm0{k7}'
r = re.search(r'({k[1-7]})', s)
if r:
idx = int(r.group(1)[2])
attrs.append(g_maskTbl[idx-1])
s = s.replace(r.group(1), '')
# check 'zmm0|k7'
r = re.search(r'(\|\s*k[1-7])', s)
if r:
idx = int(r.group(1)[-1])
attrs.append(g_maskTbl[idx-1])
s = s.replace(r.group(1), '')
s = s.translate(g_replaceChar)
# reconstruct memory string
v = []
inMemory = False
for e in s.split():
if inMemory:
v[-1] += e
if ']' in e:
inMemory = False
else:
v.append(e)
if e in g_sizeTbl or e in g_xedSizeTbl or e.startswith('ptr'):
v[-1] += ' ' # to avoid 'byteptr'
if ']' not in v[-1]:
inMemory = True
name = v[0]
for e in v[1:]:
if e.startswith('0x'):
args.append(int(e, 16))
elif e[0] in '0123456789':
args.append(int(e))
elif e in g_attrTbl:
attrs.append(Attr(e))
elif e in g_attrXedTbl:
attrs.append(Attr(g_attrTbl[g_attrXedTbl.index(e)]))
elif e in g_regTbl:
args.append(Reg(e))
# xed special format : xmm8+3
elif e[:-2] in g_xmmTbl and e.endswith('+3'):
args.append(Reg(e[:-2]))
# tmm?+1
elif e[:-2] in g_tmmTbl and e.endswith('+1'):
args.append(Reg(e[:-2]))
else:
args.append(parseMemory(e, broadcast))
return Nmemonic(name, args, attrs)
def loadFile(name):
with open(name) as f:
r = []
for line in f.read().split('\n'):
if line:
if line[0] == '#' or line.startswith('//'):
continue
r.append(line)
return r
# remove top 5 information
# e.g. XDIS 0: AVX512 AVX512EVEX 62F1E91858CB vaddpd ymm1{rne-sae}, ymm2, ymm3
def removeExtraInfo(s):
v = s.split()
return ' '.join(v[5:])
def run(cppText, xedText):
cpp = loadFile(cppText)
xed = loadFile(xedText)
n = len(cpp)
if n != len(xed):
raise Exception(f'different line {n} {len(xed)}')
for i in range(n):
line1 = cpp[i]
line2 = removeExtraInfo(xed[i])
m1 = parseNmemonic(line1)
m2 = parseNmemonic(line2)
assertEqual(m1, m2, f'{i+1}')
print('run ok', n)
def assertEqualStr(a, b, msg=None):
if str(a) != str(b):
raise Exception(f'assert fail {msg}:', str(a), str(b))
def assertEqual(a, b, msg=None):
if a != b:
raise Exception(f'assert fail {msg}:', str(a), str(b))
def MemoryTest():
tbl = [
(Memory(0, rax), 'ptr [rax]'),
(Memory(4, rax), 'dword [rax]'),
(Memory(8, rax, rcx), 'qword [rax+rcx]'),
(Memory(8, rax, rcx, 4), 'qword [rax+rcx*4]'),
(Memory(8, None, rcx, 4), 'qword [rcx*4]'),
(Memory(8, rax, None, 0, 5), 'qword [rax+0x5]'),
(Memory(8, None, None, 0, 255), 'qword [0xff]'),
(Memory(0, r8, r9, 1, 32), 'ptr [r8+r9+0x20]'),
]
for (m, expected) in tbl:
assertEqualStr(m, expected)
assertEqual(Memory(16, rax), Memory(0, rax))
def parseMemoryTest():
print('parseMemoryTest')
tbl = [
('[]', Memory()),
('[rax]', Memory(0, rax)),
('ptr[rax]', Memory(0, rax)),
('ptr_b[rax]', Memory(0, rax, broadcast=1)),
('dword[rbx]', Memory(4, rbx)),
('xword ptr[rcx]', Memory(16, rcx)),
('xmmword ptr[rcx]', Memory(16, rcx)),
('xword ptr[rdx*8]', Memory(16, None, rdx, 8)),
('[12345]', Memory(0, None, None, 0, 12345)),
('[0x12345]', Memory(0, None, None, 0, 0x12345)),
('yword [rax+rdx*4]', Memory(32, rax, rdx, 4)),
('zword [rax+rdx*4+123]', Memory(64, rax, rdx, 4, 123)),
('xword_b [rax]', Memory(16, rax, None, 0, 0, 1)),
('dword [rax]{1to4}', Memory(16, rax, None, 0, 0, 1)),
('yword_b [rax]', Memory(32, rax, None, 0, 0, 1)),
('dword [rax]{1to8}', Memory(32, rax, None, 0, 0, 1)),
]
for (s, expected) in tbl:
my = parseMemory(s)
assertEqualStr(my, expected)
print('compare test')
tbl = [
('ptr[rax]', 'dword[rax]', True),
('byte[rax]', 'dword[rax]', False),
('yword_b[rax]', 'dword [rax]{1to8}', True),
('yword_b[rax]', 'word [rax]{1to16}', True),
('zword_b[rax]', 'word [rax]{1to32}', True),
('zword_b[rax]', 'word [rax]{1to16}', False),
('dword [rax]{1to2}', 'dword [rax] {1to4}', False),
('zword_b[rax]', 'xword_b [rax]', False),
('ptr_b[rax]', 'word [rax]{1to32}', True), # ignore size
]
for (lhs, rhs, eq) in tbl:
a = parseMemory(lhs)
b = parseMemory(rhs)
if eq:
assertEqual(a, b)
assertEqual(b, a)
else:
assert(parseMemory(lhs) != parseMemory(rhs))
def parseNmemonicTest():
print('parseNmemonicTest')
tbl = [
('vaddpd(ymm1, ymm2, ymm3 |T_rn_sae);', Nmemonic('vaddpd', [ymm1, ymm2, ymm3], [T_rn_sae])),
('vaddpd ymm1{rne-sae}, ymm2, ymm3', Nmemonic('vaddpd', [ymm1, ymm2, ymm3], [T_rn_sae])),
('mov(rax, dword ptr [rcx + rdx * 8 ] );', Nmemonic('mov', [rax, Memory(4, rcx, rdx, 8)])),
('mov(rax, ptr [rcx + rdx * 8 ] );', Nmemonic('mov', [rax, Memory(0, rcx, rdx, 8)])),
('vcmppd(k1, ymm2, ymm3 |T_sae, 3);', Nmemonic('vcmppd', [k1, ymm2, ymm3, 3], [T_sae])),
('vcmppd k1{sae}, ymm2, ymm3, 0x3', Nmemonic('vcmppd', [k1, ymm2, ymm3, 3], [T_sae])),
('v4fmaddps zmm1, zmm8+3, xmmword ptr [rdx+0x40]', Nmemonic('v4fmaddps', [zmm1, zmm8, Memory(16, rdx, None, 0, 0x40)])),
('vp4dpwssd zmm23{k7}{z}, zmm1+3, xmmword ptr [rax+0x40]', Nmemonic('vp4dpwssd', [zmm23, zmm1, Memory(16, rax, None, 0, 0x40)], [k7, T_z])),
('v4fnmaddps(zmm5 | k5, zmm2, ptr [rcx + 0x80]);', Nmemonic('v4fnmaddps', [zmm5, zmm2, Memory(0, rcx, None, 0, 0x80)], [k5])),
('vpcompressw(zmm30 | k2 |T_z, zmm1);', Nmemonic('vpcompressw', [zmm30, zmm1], [k2, T_z])),
('vpcompressw zmm30{k2}{z}, zmm1', Nmemonic('vpcompressw', [zmm30, zmm1], [k2, T_z])),
('vpshldw(xmm9|k3|T_z, xmm2, ptr [rax + 0x40], 5);', Nmemonic('vpshldw', [xmm9, xmm2, Memory(0, rax, None, 0, 0x40), 5], [k3, T_z])),
('vpshrdd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);', Nmemonic('vpshrdd', [xmm5, xmm2, Memory(0, rax, None, 0, 0x40, 1), 5], [k3, T_z])),
('vpshrdd xmm5{k3}{z}, xmm2, dword ptr [rax+0x40]{1to4}, 0x5', Nmemonic('vpshrdd', [xmm5, xmm2, Memory(0, rax, None, 0, 0x40, 4), 5], [k3, T_z])),
('vcmpph(k1, xmm15, ptr[rax+64], 1);', Nmemonic('vcmpph', [k1, xmm15, Memory(0, rax, None, 0, 64), 1])),
]
for (s, expected) in tbl:
e = parseNmemonic(s)
assertEqual(e, expected)
def test():
print('test start')
MemoryTest()
parseMemoryTest()
parseNmemonicTest()
print('test end')
def main():
if len(sys.argv) == 2 and sys.argv[1] == 'test':
test()
elif len(sys.argv) == 3:
run(sys.argv[1], sys.argv[2])
else:
print(f'{__name__} <cpp-text> <xed-text> # compare cpp-text and xed-text generated by xed')
print(f'{__name__} test # for test')
if __name__ == '__main__':
main()
+29
View File
@@ -0,0 +1,29 @@
#!/bin/sh
set -e
XED=${XED:=xed}
XED_OPT=${XED_OPT:=-64 -set PREFETCHIT 1 -set PREFETCHRST 1}
#XED_OPT=${XED_OPT:=-64 -chip-check FUTURE}
CXX=${CXX:=g++}
CFLAGS_USER=${CFLAGS}
CFLAGS_WARN="$(cat CFLAGS_WARN.cfg)"
PYTHON=${PYTHON:=python3}
echo $XED
if [ $# -ne 1 ]; then
echo "./test_by_xed.sh <xbyak-cpp>"
exit 1
fi
TARGET=$1
CFLAGS="$CFLAGS_USER $CFLAGS_WARN -I ../"
echo "test:" $TARGET
cp $TARGET tmp.cpp
$CXX $CFLAGS test_by_xed.cpp -o test_by_xed
./test_by_xed || (echo "ERR test_by_xed"; exit 1)
echo "$XED ${XED_OPT} -ir bin > out.txt"
$XED ${XED_OPT} -ir bin > out.txt || (echo "ERR xed"; exit 1)
$PYTHON test_by_xed.py $TARGET out.txt || (echo "ERR test_by_xed.py"; exit 1)
+5
View File
@@ -0,0 +1,5 @@
set TARGETS=old.txt bf16.txt misc.txt convert.txt minmax.txt saturation.txt amx.txt apx.txt comp.txt
for %%f in (%TARGETS%) do (
echo %%f
call test_by_xed.bat dataset\%%f
)
+4
View File
@@ -0,0 +1,4 @@
call set_opt
bmake -f Makefile.win all
cl -I../ -I./ -DXBYAK_TEST jmp.cpp %OPT% /Od /Zi
jmp
+4
View File
@@ -0,0 +1,4 @@
call set_opt
bmake -f Makefile.win all
cl -I../ -I./ -DXBYAK_TEST %FILE%.cpp %OPT% /Od /Zi
%FILE%
+78
View File
@@ -0,0 +1,78 @@
#if defined(_MSC_VER) && (_MSC_VER <= 1200)
#pragma warning(disable:4514)
#pragma warning(disable:4786)
#endif
#include <stdio.h>
#include <stdlib.h>
#include "../../include.mie/mie_thread.h"
#include "xbyak/xbyak.h"
class WriteMMX : public Xbyak::CodeGenerator {
public:
WriteMMX()
{
#ifdef XBYAK32
mov(ecx, ptr [esp + 4]);
#endif
movd(mm0, ecx);
ret();
}
void (*set() const)(int x) { return (void (*)(int x))getCode(); }
};
class ReadMMX : public Xbyak::CodeGenerator {
public:
ReadMMX()
{
movd(eax, mm0);
ret();
}
int (*get() const)() { return (int (*)())getCode(); }
};
class Test : public MIE::ThreadBase<Test> {
int n_;
public:
Test(int n)
: n_(n)
{
}
void threadEntry()
{
printf("n=%d\n", n_);
WriteMMX w;
w.set()(n_);
ReadMMX r;
for (;;) {
int b = r.get()();
printf("b=%d\n", b);
if (b != n_) {
printf("mm0 has changed!\n");
}
MIE::MIE_Sleep(1000);
}
}
void stopThread() { }
};
int main(int argc, char *argv[])
{
#ifdef XBYAK32
puts("32bit");
#else
puts("64bit");
#endif
try {
int n = atoi(argc == 1 ? "1223" : argv[1]);
Test test0(n), test1(n + 1);
test0.beginThread();
test1.beginThread();
test0.joinThread();
test1.joinThread();
} catch (std::exception& e) {
printf("ERR:%s\n", e.what());
} catch (...) {
printf("unknown error\n");
}
}
+46
View File
@@ -0,0 +1,46 @@
@echo off
set FILTER=cat
set Y=0
if /i "%1"=="Y" (
set Y=1
set EXE=yasm.exe
set OPT2=-DUSE_YASM -DXBYAK32
set OPT3=win32
) else if /i "%1"=="64" (
set EXE=nasm.exe
set OPT2=-DXBYAK64
set OPT3=win64
set FILTER=normalize_prefix
) else if /i "%1"=="Y64" (
set Y=1
set EXE=yasm.exe
set OPT2=-DUSE_YASM -DXBYAK64
set OPT3=win64
set FILTER=normalize_prefix
) else if /i "%1"=="noexcept" (
set EXE=nasm.exe
set OPT2=-DXBYAK32 -DXBYAK_NO_EXCEPTION
set OPT3=win32
) else (
set EXE=nasm.exe
set OPT2=-DXBYAK32
set OPT3=win32
)
call set_opt
bmake -f Makefile.win all
echo cl -I../ make_nm.cpp %OPT% %OPT2% /EHs
cl -I../ make_nm.cpp %OPT% %OPT2% /EHs
make_nm > a.asm
rm -rf a.lst
echo %EXE% -f %OPT3% -l a.lst a.asm
%EXE% -f %OPT3% -l a.lst a.asm
rem connect "?????-" and "??"
if /i "%Y%"=="1" (
awk "NR > 1 {if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
) else (
awk "{if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
)
make_nm jit > nm.cpp
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
nm_frame |%FILTER% > x.lst
diff -wb x.lst ok.lst && echo "ok"
+66
View File
@@ -0,0 +1,66 @@
#!/bin/sh
set -e
FILTER=cat
CXX=${CXX:=g++}
CFLAGS_USER=${CFLAGS}
CFLAGS_WARN="$(cat CFLAGS_WARN.cfg)"
case $1 in
Y)
echo "yasm(32bit)"
EXE=yasm
OPT2="-DUSE_YASM -DXBYAK32"
OPT3=win32
;;
64)
echo "nasm(64bit)"
EXE=nasm
OPT2=-DXBYAK64
OPT3=win64
FILTER=./normalize_prefix
;;
Y64)
echo "yasm(64bit)"
EXE=yasm
OPT2="-DUSE_YASM -DXBYAK64"
OPT3=win64
FILTER=./normalize_prefix
;;
avx512)
echo "nasm(64bit) + avx512"
EXE=nasm
OPT2="-DXBYAK64 -DUSE_AVX512"
OPT3=win64
FILTER=./normalize_prefix
;;
noexcept)
echo "nasm(32bit) without exception"
EXE=nasm
OPT2="-DXBYAK32 -DXBYAK_NO_EXCEPTION"
OPT3=win32
;;
*)
echo "nasm(32bit)"
EXE=nasm
OPT2=-DXBYAK32
OPT3=win32
;;
esac
CFLAGS="$CFLAGS_USER $CFLAGS_WARN -g -I../ -I./ $OPT2"
echo "compile make_nm.cpp with $CFLAGS"
$CXX $CFLAGS make_nm.cpp -o make_nm
./make_nm > a.asm
echo "asm"
$EXE -f$OPT3 a.asm -l a.lst
awk '$3 != "1+1" {printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst
echo "xbyak"
./make_nm jit > nm.cpp
echo "compile nm_frame.cpp"
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
./nm_frame | $FILTER > x.lst
diff -bB ok.lst x.lst && echo "ok"
+11
View File
@@ -0,0 +1,11 @@
@echo off
echo *** nasm(32bit) ***
call test_nm
echo *** yasm(32bit) ***
call test_nm Y
echo *** nasm(64bit) ***
call test_nm 64
echo *** yasm(64bit) ***
call test_nm Y64
call test_avx_all