Merge commit '2201a0227297b9251717e44adc32554a51ca0ed6' as 'external/xbyak'
This commit is contained in:
Vendored
+1
@@ -0,0 +1 @@
|
||||
-Wall -Wextra -Wsuggest-override -Wformat=2 -Wcast-qual -Wwrite-strings -Wfloat-equal -Wpointer-arith
|
||||
Vendored
+147
@@ -0,0 +1,147 @@
|
||||
# To compile with -m32
|
||||
# apt install g++-multilib
|
||||
CXX_32 = $(CXX) -m32
|
||||
CXX_64 = $(CXX) -m64
|
||||
TARGET = make_nm normalize_prefix bad_address misc cvt_test cvt_test32 noexception misc32 detect_x32 avx10_test
|
||||
XBYAK_INC=../xbyak/xbyak.h ../xbyak/xbyak_mnemonic.h ../xbyak/xbyak_util.h
|
||||
UNAME_S=$(shell uname -s)
|
||||
ifeq ($(shell ./detect_x32),x32)
|
||||
X32?=1
|
||||
endif
|
||||
BIT=32
|
||||
ifeq ($(shell uname -m),x86_64)
|
||||
BIT=64
|
||||
endif
|
||||
ONLY_64BIT=0
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
# 32-bit binary is not supported
|
||||
ONLY_64BIT=1
|
||||
endif
|
||||
ifeq ($(findstring MINGW64,$(UNAME_S)),MINGW64)
|
||||
ONLY_64BIT=1
|
||||
endif
|
||||
ifeq ($(ONLY_64BIT),0)
|
||||
TARGET += jmp address
|
||||
endif
|
||||
|
||||
ifeq ($(BIT),64)
|
||||
TARGET += jmp64 address64 apx
|
||||
TARGET += sf_test cpumask_test
|
||||
endif
|
||||
|
||||
all: $(TARGET)
|
||||
|
||||
CFLAGS_WARN=$(shell cat CFLAGS_WARN.cfg)
|
||||
|
||||
CFLAGS=-O2 -Wall -I.. -I. $(CFLAGS_WARN) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) #-std=c++0x
|
||||
make_nm:
|
||||
$(CXX) $(CFLAGS) make_nm.cpp -o $@
|
||||
normalize_prefix: normalize_prefix.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) normalize_prefix.cpp -o $@
|
||||
test_mmx: test_mmx.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) test_mmx.cpp -o $@ -lpthread
|
||||
jmp: jmp.cpp $(XBYAK_INC)
|
||||
$(CXX_32) $(CFLAGS) $< -o $@
|
||||
jmp64: jmp.cpp $(XBYAK_INC)
|
||||
$(CXX_64) $(CFLAGS) $< -o $@
|
||||
address: address.cpp $(XBYAK_INC)
|
||||
$(CXX_32) $(CFLAGS) $< -o $@
|
||||
address64: address.cpp $(XBYAK_INC)
|
||||
$(CXX_64) $(CFLAGS) $< -o $@
|
||||
bad_address: bad_address.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) $< -o $@
|
||||
misc: misc.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) $< -o $@
|
||||
misc32: misc.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) $< -o $@ -DXBYAK32
|
||||
cvt_test: cvt_test.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) $< -o $@
|
||||
cvt_test32: cvt_test.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) $< -o $@ -DXBYAK32
|
||||
noexception: noexception.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) $< -o $@ -fno-exceptions
|
||||
apx: apx.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) $< -o $@
|
||||
avx10_test: avx10_test.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) $< -o $@ -DXBYAK64
|
||||
sf_test: sf_test.cpp $(XBYAK_INC) sf_test_win.h sf_test_gcc.h
|
||||
$(CXX) $(CFLAGS) $< -o $@ #-DXBYAK64
|
||||
cpumask_test: cpumask_test.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) $< -o $@
|
||||
|
||||
TEST_FILES=old.txt bf16.txt comp.txt misc.txt convert.txt minmax.txt saturation.txt apx.txt amx.txt
|
||||
xed_test:
|
||||
@set -e; \
|
||||
for target in $(addprefix dataset/, $(TEST_FILES)); do \
|
||||
./test_by_xed.sh $$target || exit 1; \
|
||||
done
|
||||
|
||||
test_nm: normalize_prefix $(TARGET)
|
||||
$(MAKE) -C ../gen
|
||||
ifneq ($(ONLY_64BIT),1)
|
||||
CXX=$(CXX) ./test_nm.sh
|
||||
CXX=$(CXX) ./test_nm.sh noexcept
|
||||
CXX=$(CXX) ./test_nm.sh Y
|
||||
CXX=$(CXX) ./test_nm.sh avx512
|
||||
CXX=$(CXX) ./test_address.sh
|
||||
./jmp
|
||||
./cvt_test32
|
||||
endif
|
||||
./bad_address
|
||||
./misc
|
||||
./misc32
|
||||
./cvt_test
|
||||
ifeq ($(BIT),64)
|
||||
CXX=$(CXX) ./test_address.sh 64
|
||||
ifneq ($(X32),1)
|
||||
CXX=$(CXX) ./test_nm.sh 64
|
||||
CXX=$(CXX) ./test_nm.sh Y64
|
||||
endif
|
||||
./jmp64
|
||||
./apx
|
||||
./avx10_test
|
||||
endif
|
||||
|
||||
test_avx: normalize_prefix
|
||||
ifneq ($(ONLY_64BIT),0)
|
||||
CXX=$(CXX) ./test_avx.sh
|
||||
CXX=$(CXX) ./test_avx.sh Y
|
||||
endif
|
||||
ifeq ($(BIT),64)
|
||||
CXX=$(CXX) ./test_avx.sh 64
|
||||
ifneq ($(X32),1)
|
||||
CXX=$(CXX) ./test_avx.sh Y64
|
||||
endif
|
||||
endif
|
||||
|
||||
test_avx512: normalize_prefix
|
||||
ifneq ($(ONLY_64BIT),0)
|
||||
CXX=$(CXX) ./test_avx512.sh
|
||||
endif
|
||||
ifeq ($(BIT),64)
|
||||
CXX=$(CXX) ./test_avx512.sh 64
|
||||
endif
|
||||
|
||||
test_avx10: avx10_test
|
||||
./avx10_test
|
||||
|
||||
detect_x32: detect_x32.c
|
||||
$(CC) $< -o $@
|
||||
|
||||
test: detect_x32
|
||||
$(MAKE) test_nm
|
||||
$(MAKE) test_avx
|
||||
$(MAKE) test_avx512
|
||||
|
||||
update_sf_test: sf_test.cpp
|
||||
$(CXX) $(CFLAGS) sf_test.cpp -DXBYAK64_WIN -DDUMP -o sf_test_dump && ./sf_test_dump > sf_test_win.h
|
||||
$(CXX) $(CFLAGS) sf_test.cpp -DXBYAK64_GCC -DDUMP -o sf_test_dump && ./sf_test_dump > sf_test_gcc.h
|
||||
|
||||
clean:
|
||||
$(RM) a.asm *.lst *.obj *.o $(TARGET) lib_run nm.cpp nm_frame make_512 avx10_test detect_x32 sf_test sf_test_dump
|
||||
|
||||
lib_run: lib_test.cpp lib_run.cpp lib.h
|
||||
$(CXX) $(CFLAGS) lib_run.cpp lib_test.cpp -o lib_run
|
||||
make_nm: make_nm.cpp $(XBYAK_INC)
|
||||
|
||||
.PHONY: test
|
||||
Vendored
+14
@@ -0,0 +1,14 @@
|
||||
OPT=/EHsc -I../xbyak /W4 -D_CRT_SECURE_NO_WARNINGS -I ../
|
||||
../xbyak/xbyak_mnemonic.h: ../gen/gen_code.exe ../gen/gen_avx512.exe
|
||||
../gen/gen_code.exe > $@
|
||||
../gen/gen_avx512.exe >> $@
|
||||
|
||||
../gen/gen_code.exe: ../gen/gen_code.cpp #../xbyak/xbyak.h
|
||||
cl ../gen/gen_code.cpp $(OPT) /Fe:../gen/gen_code.exe
|
||||
|
||||
../gen/gen_avx512.exe: ../gen/gen_avx512.cpp #../xbyak/xbyak.h
|
||||
cl ../gen/gen_avx512.cpp $(OPT) /Fe:../gen/gen_avx512.exe
|
||||
|
||||
SUB_HEADER=../xbyak/xbyak_mnemonic.h
|
||||
|
||||
all: $(SUB_HEADER)
|
||||
Vendored
+9
@@ -0,0 +1,9 @@
|
||||
@echo off
|
||||
echo 32bit
|
||||
rm -rf a.lst b.lst
|
||||
echo nasm
|
||||
nasm -l a.lst -f win32 -DWIN32 test.asm
|
||||
cat a.lst
|
||||
echo yasm
|
||||
yasm -l b.lst -f win32 -DWIN32 test.asm
|
||||
cat b.lst
|
||||
Vendored
+155
@@ -0,0 +1,155 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
|
||||
|
||||
void genVsibSub(bool isJIT, const char *name, const char *tbl[], size_t tblSize)
|
||||
{
|
||||
for (size_t i = 0; i < tblSize; i++) {
|
||||
if (isJIT) {
|
||||
printf("%s (ymm7, ptr[", name);
|
||||
} else {
|
||||
printf("%s ymm7, [", name);
|
||||
}
|
||||
printf("%s", tbl[i]);
|
||||
if (isJIT) {
|
||||
printf("], ymm4); dump();\n");
|
||||
} else {
|
||||
printf("], ymm4\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
void genVsib(bool isJIT)
|
||||
{
|
||||
if (isJIT) puts("void genVsib() {");
|
||||
const char *vm32xTbl[] = {
|
||||
"xmm0",
|
||||
"xmm0 * 1",
|
||||
"xmm0 + 4",
|
||||
"xmm0 + eax",
|
||||
"xmm0 * 4 + ecx",
|
||||
"xmm3 * 8 + edi + 123",
|
||||
"xmm2 * 2 + 5",
|
||||
"eax + xmm0",
|
||||
"esp + xmm2",
|
||||
};
|
||||
const char *vm32yTbl[] = {
|
||||
"ymm0",
|
||||
"ymm0 * 1",
|
||||
"ymm0 + 4",
|
||||
"ymm0 + eax",
|
||||
"ymm0 * 4 + ecx",
|
||||
"ymm3 * 8 + edi + 123",
|
||||
"ymm2 * 2 + 5",
|
||||
"eax + ymm0",
|
||||
"esp + ymm2",
|
||||
};
|
||||
genVsibSub(isJIT, "vgatherdpd", vm32xTbl, NUM_OF_ARRAY(vm32xTbl));
|
||||
genVsibSub(isJIT, "vgatherqpd", vm32yTbl, NUM_OF_ARRAY(vm32yTbl));
|
||||
#ifdef XBYAK64
|
||||
const char *vm32x64Tbl[] = {
|
||||
"xmm0 + r11",
|
||||
"r13 + xmm15",
|
||||
"123 + rsi + xmm2 * 4",
|
||||
};
|
||||
genVsibSub(isJIT, "vgatherdpd", vm32x64Tbl, NUM_OF_ARRAY(vm32x64Tbl));
|
||||
#endif
|
||||
if (isJIT) puts("}");
|
||||
}
|
||||
|
||||
void genAddress(bool isJIT, const char regTbl[][5], size_t regTblNum)
|
||||
{
|
||||
int count = 0;
|
||||
int funcNum = 1;
|
||||
if (isJIT) {
|
||||
puts("void gen0(){");
|
||||
}
|
||||
for (size_t i = 0; i < regTblNum + 1; i++) {
|
||||
const char *base = regTbl[i];
|
||||
for (size_t j = 0; j < regTblNum + 1; j++) {
|
||||
if (j == 4) continue; /* esp is not index register */
|
||||
const char *index = regTbl[j];
|
||||
static const int scaleTbl[] = { 0, 1, 2, 4, 8 };
|
||||
for (size_t k = 0; k < NUM_OF_ARRAY(scaleTbl); k++) {
|
||||
int scale = scaleTbl[k];
|
||||
static const int dispTbl[] = { 0, 1, 1000, -1, -1000 };
|
||||
for (size_t m = 0; m < NUM_OF_ARRAY(dispTbl); m++) {
|
||||
int disp = dispTbl[m];
|
||||
bool isFirst = true;
|
||||
if (isJIT) {
|
||||
printf("mov (ecx, ptr[");
|
||||
} else {
|
||||
printf("mov ecx, [");
|
||||
}
|
||||
if (i < regTblNum) {
|
||||
printf("%s", base);
|
||||
isFirst = false;
|
||||
}
|
||||
if (j < regTblNum) {
|
||||
if (!isFirst) putchar('+');
|
||||
printf("%s", index);
|
||||
if (scale) printf("*%d", scale);
|
||||
isFirst = false;
|
||||
}
|
||||
if (isFirst) {
|
||||
if (isJIT) printf("(void*)");
|
||||
printf("%d", disp);
|
||||
} else {
|
||||
if (disp >= 0) {
|
||||
putchar('+');
|
||||
}
|
||||
printf("%d", disp);
|
||||
isFirst = false;
|
||||
}
|
||||
if (isJIT) {
|
||||
printf("]); dump();\n");
|
||||
} else {
|
||||
printf("]\n");
|
||||
}
|
||||
if (isJIT) {
|
||||
count++;
|
||||
if ((count % 100) == 0) {
|
||||
printf("}\n void gen%d(){\n", funcNum++);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (isJIT) puts("}");
|
||||
genVsib(isJIT);
|
||||
if (isJIT) {
|
||||
printf("void gen(){\n");
|
||||
for (int i = 0; i < funcNum; i++) {
|
||||
printf(" gen%d();\n", i);
|
||||
}
|
||||
puts("genVsib();");
|
||||
printf("}\n");
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
argc--, argv++;
|
||||
bool phase = argc > 0 && strcmp(*argv, "1") == 0;
|
||||
bool isJIT = (argc > 1);
|
||||
fprintf(stderr, "phase:%c %s\n", phase ? '1' : '2', isJIT ? "jit" : "asm");
|
||||
if (phase) {
|
||||
fprintf(stderr, "32bit reg\n");
|
||||
static const char reg32Tbl[][5] = {
|
||||
"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
|
||||
#ifdef XBYAK64
|
||||
"r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d",
|
||||
#endif
|
||||
};
|
||||
genAddress(isJIT, reg32Tbl, NUM_OF_ARRAY(reg32Tbl));
|
||||
} else {
|
||||
#ifdef XBYAK64
|
||||
fprintf(stderr, "64bit reg\n");
|
||||
static const char reg64Tbl[][5] = {
|
||||
"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
|
||||
};
|
||||
genAddress(isJIT, reg64Tbl, NUM_OF_ARRAY(reg64Tbl));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
Vendored
+1964
File diff suppressed because it is too large
Load Diff
Vendored
+53
@@ -0,0 +1,53 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <xbyak/xbyak_util.h>
|
||||
#include <cybozu/inttype.hpp>
|
||||
#include <cybozu/test.hpp>
|
||||
#include <algorithm>
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
// ymm with sae is not supported from avx10.2 rev 4.0.
|
||||
CYBOZU_TEST_AUTO(ymm_with_sae)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
CYBOZU_TEST_EXCEPTION(vaddpd(ymm1, ymm2, ymm3 |T_rn_sae), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(vcvtph2ibs(xmm1, xmm31 | T_rd_sae), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(vcvtph2ibs(ymm1, ymm31 | T_rd_sae), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(vcvt2ps2phx(ymm1, ymm2, ymm3 | T_rd_sae), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(vminmaxpd(ymm1, ymm2, ymm3 | T_sae, 1), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(vminmaxph(ymm1, ymm2, ymm3 | T_sae, 2), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(vminmaxps(ymm1, ymm2, ymm3 | T_sae, 3), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(vcvtps2ibs(ym1, ym2|T_rd_sae), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(vcvtps2ibs(xm1, xm2|T_rd_sae), std::exception);
|
||||
}
|
||||
} c;
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(vmpsadbw)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
setDefaultEncodingAVX10();
|
||||
vmpsadbw(xm1, xm3, xm15, 3); // vex(avx)
|
||||
vmpsadbw(ym1, ym3, ptr[rax+128], 3); // vex(avx2)
|
||||
setDefaultEncodingAVX10(AVX10v2Encoding);
|
||||
vmpsadbw(ym1, ym3, ym15, 3); // evex(avx10.2)
|
||||
vmpsadbw(ym1, ym3, ptr[rax+128], 3); // evex(avx10.2)
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0xc4, 0xc3, 0x61, 0x42, 0xcf, 0x03,
|
||||
0xc4, 0xe3, 0x65, 0x42, 0x88, 0x80, 0x00, 0x00, 0x00, 0x03,
|
||||
0x62, 0xd3, 0x66, 0x28, 0x42, 0xcf, 0x03,
|
||||
0x62, 0xf3, 0x66, 0x28, 0x42, 0x48, 0x04, 0x03,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
Vendored
+28
@@ -0,0 +1,28 @@
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <cybozu/test.hpp>
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [esp + esp]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [ax]), std::exception); // not support
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [esp * 4]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax * 16]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax + eax + eax]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [xmm0]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(fld(dword [xmm0]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(vgatherdpd(xmm0, ptr [eax * 2], ymm3), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(vgatherdpd(xmm0, ptr [xmm0 + xmm1], ymm3), std::exception);
|
||||
#ifdef XBYAK64
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [rax + eax]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [xmm0 + ymm0]), std::exception);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
CYBOZU_TEST_AUTO(exception)
|
||||
{
|
||||
Code c;
|
||||
}
|
||||
Vendored
+124
@@ -0,0 +1,124 @@
|
||||
//#define XBYAK_CPUMASK_COMPACT 0
|
||||
#define XBYAK_NO_EXCEPTION
|
||||
#define XBYAK_CPUMASK_N 8
|
||||
#define XBYAK_CPUMASK_BITN 3
|
||||
#include <xbyak/xbyak_util.h>
|
||||
#include <cybozu/test.hpp>
|
||||
|
||||
using namespace Xbyak::util;
|
||||
|
||||
CYBOZU_TEST_AUTO(append)
|
||||
{
|
||||
CpuMask m;
|
||||
CYBOZU_TEST_ASSERT(m.empty());
|
||||
CYBOZU_TEST_EQUAL(m.size(), 0);
|
||||
CYBOZU_TEST_EQUAL(m.getStr(), "");
|
||||
|
||||
CYBOZU_TEST_ASSERT(m.append(2));
|
||||
CYBOZU_TEST_ASSERT(!m.empty());
|
||||
CYBOZU_TEST_EQUAL(m.size(), 1u);
|
||||
CYBOZU_TEST_EQUAL(m.get(0), 2u);
|
||||
CYBOZU_TEST_EQUAL(m.getStr(), "2");
|
||||
|
||||
CYBOZU_TEST_ASSERT(m.append(4));
|
||||
CYBOZU_TEST_EQUAL(m.size(), 2u);
|
||||
CYBOZU_TEST_EQUAL(m.get(0), 2u);
|
||||
CYBOZU_TEST_EQUAL(m.get(1), 4u);
|
||||
CYBOZU_TEST_EQUAL(m.getStr(), "2,4");
|
||||
|
||||
CYBOZU_TEST_ASSERT(!m.append(3)); // not monotonically increasing
|
||||
CYBOZU_TEST_ASSERT(m.append(7));
|
||||
CYBOZU_TEST_EQUAL(m.size(), 3u);
|
||||
CYBOZU_TEST_EQUAL(m.get(0), 2u);
|
||||
CYBOZU_TEST_EQUAL(m.get(1), 4u);
|
||||
CYBOZU_TEST_EQUAL(m.get(2), 7u);
|
||||
CYBOZU_TEST_EQUAL(m.getStr(), "2,4,7");
|
||||
|
||||
m.clear();
|
||||
CYBOZU_TEST_ASSERT(m.append(1));
|
||||
CYBOZU_TEST_ASSERT(m.append(2));
|
||||
CYBOZU_TEST_ASSERT(m.append(3));
|
||||
CYBOZU_TEST_EQUAL(m.getStr(), "1-3");
|
||||
CYBOZU_TEST_ASSERT(m.append(5));
|
||||
CYBOZU_TEST_ASSERT(m.append(6));
|
||||
CYBOZU_TEST_ASSERT(m.append(7));
|
||||
CYBOZU_TEST_EQUAL(m.getStr(), "1-3,5-7");
|
||||
|
||||
m.clear();
|
||||
CYBOZU_TEST_ASSERT(m.appendRange(1, 3));
|
||||
CYBOZU_TEST_EQUAL(m.getStr(), "1-3");
|
||||
CYBOZU_TEST_ASSERT(m.appendRange(5, 7));
|
||||
CYBOZU_TEST_EQUAL(m.getStr(), "1-3,5-7");
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(str)
|
||||
{
|
||||
const char *s = "0-1,3,4-7";
|
||||
CpuMask m, m2;
|
||||
|
||||
CYBOZU_TEST_ASSERT(m.setStr(s));
|
||||
CYBOZU_TEST_ASSERT(m2.setStr(m.getStr()));
|
||||
CYBOZU_TEST_EQUAL(m.getStr(), m2.getStr());
|
||||
CYBOZU_TEST_ASSERT(m == m2);
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(errStr)
|
||||
{
|
||||
const char *s[] = {
|
||||
",",
|
||||
",,",
|
||||
"1,",
|
||||
"1,,",
|
||||
"-8",
|
||||
"3-",
|
||||
"0-8",
|
||||
"0--2",
|
||||
"2-0",
|
||||
"2,1",
|
||||
"0-1-2",
|
||||
"0,a",
|
||||
"0-2,",
|
||||
",0-2",
|
||||
"0-2,,4",
|
||||
};
|
||||
for (size_t i = 0; i < sizeof(s) / sizeof(s[0]); i++) {
|
||||
CpuMask m;
|
||||
// printf("errStr test %s\n", s[i]);
|
||||
CYBOZU_TEST_ASSERT(!m.setStr(s[i]));
|
||||
}
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(pattern)
|
||||
{
|
||||
const uint32_t bitN = XBYAK_CPUMASK_BITN;
|
||||
const uint32_t bit = 1 << bitN;
|
||||
for (uint32_t i = 0; i < (1 << bit); i++) {
|
||||
CpuMask m;
|
||||
uint32_t cnt = 0;
|
||||
for (uint32_t j = 0; j < bit; j++) {
|
||||
if (i & (1 << j)) {
|
||||
cnt++;
|
||||
CYBOZU_TEST_ASSERT(m.append(j));
|
||||
}
|
||||
}
|
||||
CYBOZU_TEST_EQUAL(m.size(), cnt);
|
||||
#if 0
|
||||
printf("pattern (%3u) ", i);
|
||||
for (int j = int(bit) - 1; j >= 0; j--) {
|
||||
if (i & (uint64_t(1) << j)) printf("%d ", j);
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
uint32_t idx = 0;
|
||||
for (const auto& v : m) {
|
||||
CYBOZU_TEST_ASSERT(i & (1 << v));
|
||||
idx++;
|
||||
}
|
||||
CYBOZU_TEST_EQUAL(idx, cnt);
|
||||
CpuMask m2;
|
||||
std::string mstr = m.getStr();
|
||||
CYBOZU_TEST_ASSERT(m2.setStr(mstr));
|
||||
CYBOZU_TEST_ASSERT(m == m2);
|
||||
CYBOZU_TEST_EQUAL(mstr, m2.getStr());
|
||||
}
|
||||
}
|
||||
Vendored
+191
@@ -0,0 +1,191 @@
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <cybozu/inttype.hpp>
|
||||
#include <cybozu/test.hpp>
|
||||
|
||||
using namespace Xbyak;
|
||||
using namespace Xbyak::util;
|
||||
|
||||
#ifdef XBYAK64
|
||||
const struct Ptn {
|
||||
const Reg8 *reg8;
|
||||
Reg16 reg16;
|
||||
Reg32 reg32;
|
||||
Reg64 reg64;
|
||||
Xmm x;
|
||||
Ymm y;
|
||||
Zmm z;
|
||||
} tbl[] = {
|
||||
{ &al, ax, eax, rax, xmm0, ymm0, zmm0 },
|
||||
{ &bl, bx, ebx, rbx, xmm3, ymm3, zmm3 },
|
||||
{ &cl, cx, ecx, rcx, xmm1, ymm1, zmm1 },
|
||||
{ &dl, dx, edx, rdx, xmm2, ymm2, zmm2 },
|
||||
{ &sil, si, esi, rsi, xmm6, ymm6, zmm6 },
|
||||
{ &dil, di, edi, rdi, xmm7, ymm7, zmm7 },
|
||||
{ &bpl, bp, ebp, rbp, xmm5, ymm5, zmm5 },
|
||||
{ &spl, sp, esp, rsp, xmm4, ymm4, zmm4 },
|
||||
{ &r8b, r8w, r8d, r8, xmm8, ymm8, zmm8 },
|
||||
{ &r9b, r9w, r9d, r9, xmm9, ymm9, zmm9 },
|
||||
{ &r10b, r10w, r10d, r10, xmm10, ymm10, zmm10 },
|
||||
{ &r11b, r11w, r11d, r11, xmm11, ymm11, zmm11 },
|
||||
{ &r12b, r12w, r12d, r12, xmm12, ymm12, zmm12 },
|
||||
{ &r13b, r13w, r13d, r13, xmm13, ymm13, zmm13 },
|
||||
{ &r14b, r14w, r14d, r14, xmm14, ymm14, zmm14 },
|
||||
{ &r15b, r15w, r15d, r15, xmm15, ymm15, zmm15 },
|
||||
{ &r31b, r31w, r31d, r31, xmm31, ymm31, zmm31 },
|
||||
};
|
||||
#else
|
||||
const struct Ptn {
|
||||
const Reg8 *reg8;
|
||||
Reg16 reg16;
|
||||
Reg32 reg32;
|
||||
Xmm x;
|
||||
Ymm y;
|
||||
Zmm z;
|
||||
} tbl[] = {
|
||||
{ &al, ax, eax, xmm0, ymm0, zmm0 },
|
||||
{ &bl, bx, ebx, xmm3, ymm3, zmm3 },
|
||||
{ &cl, cx, ecx, xmm1, ymm1, zmm1 },
|
||||
{ &dl, dx, edx, xmm2, ymm2, zmm2 },
|
||||
{ 0, si, esi, xmm6, ymm6, zmm6 },
|
||||
{ 0, di, edi, xmm7, ymm7, zmm7 },
|
||||
{ 0, bp, ebp, xmm5, ymm5, zmm5 },
|
||||
{ 0, sp, esp, xmm4, ymm4, zmm4 },
|
||||
};
|
||||
#endif
|
||||
|
||||
CYBOZU_TEST_AUTO(cvt)
|
||||
{
|
||||
for (size_t i = 0; i < sizeof(tbl) / sizeof(tbl[0]); i++) {
|
||||
if (tbl[i].reg8) {
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt8() == *tbl[i].reg8);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt16() == tbl[i].reg16);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt32() == tbl[i].reg32);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt128() == tbl[i].x);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt256() == tbl[i].y);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt512() == tbl[i].z);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt8() == *tbl[i].reg8);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt8() == *tbl[i].reg8);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].x.cvt8() == *tbl[i].reg8);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].y.cvt8() == *tbl[i].reg8);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].z.cvt8() == *tbl[i].reg8);
|
||||
}
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt16() == tbl[i].reg16);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt32() == tbl[i].reg32);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt128() == tbl[i].x);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt256() == tbl[i].y);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt512() == tbl[i].z);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt16() == tbl[i].reg16);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt32() == tbl[i].reg32);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt128() == tbl[i].x);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt256() == tbl[i].y);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt512() == tbl[i].z);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].x.cvt16() == tbl[i].reg16);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].x.cvt32() == tbl[i].reg32);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].x.cvt128() == tbl[i].x);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].x.cvt256() == tbl[i].y);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].x.cvt512() == tbl[i].z);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].y.cvt16() == tbl[i].reg16);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].y.cvt32() == tbl[i].reg32);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].y.cvt128() == tbl[i].x);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].y.cvt256() == tbl[i].y);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].y.cvt512() == tbl[i].z);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].z.cvt16() == tbl[i].reg16);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].z.cvt32() == tbl[i].reg32);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].z.cvt128() == tbl[i].x);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].z.cvt256() == tbl[i].y);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].y.cvt512() == tbl[i].z);
|
||||
#ifdef XBYAK64
|
||||
if (tbl[i].reg8) {
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt8() == *tbl[i].reg8);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt64() == tbl[i].reg64);
|
||||
}
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt16() == tbl[i].reg16);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt32() == tbl[i].reg32);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt64() == tbl[i].reg64);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt128() == tbl[i].x);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt256() == tbl[i].y);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt512() == tbl[i].z);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt64() == tbl[i].reg64);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt64() == tbl[i].reg64);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].x.cvt64() == tbl[i].reg64);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].y.cvt64() == tbl[i].reg64);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].z.cvt64() == tbl[i].reg64);
|
||||
#endif
|
||||
}
|
||||
{
|
||||
const Reg8 errTbl[] = {
|
||||
ah, bh, ch, dh
|
||||
};
|
||||
for (size_t i = 0; i < sizeof(errTbl) / sizeof(errTbl[0]); i++) {
|
||||
CYBOZU_TEST_EXCEPTION(errTbl[i].cvt16(), std::exception);
|
||||
}
|
||||
}
|
||||
#ifdef XBYAK32
|
||||
{
|
||||
const Reg16 errTbl[] = {
|
||||
si, di, bp, sp
|
||||
};
|
||||
for (size_t i = 0; i < sizeof(errTbl) / sizeof(errTbl[0]); i++) {
|
||||
CYBOZU_TEST_EXCEPTION(errTbl[i].cvt8(), std::exception);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(changeBit)
|
||||
{
|
||||
using namespace Xbyak::util;
|
||||
#ifdef XBYAK64
|
||||
const size_t N = 7;
|
||||
const Reg* tbl[][N] = {
|
||||
{ &al, &ax, &eax, &rax, &xmm0, &ymm0, &zmm0 },
|
||||
{ &cl, &cx, &ecx, &rcx, &xmm1, &ymm1, &zmm1 },
|
||||
{ &dl, &dx, &edx, &rdx, &xmm2, &ymm2, &zmm2 },
|
||||
{ &bl, &bx, &ebx, &rbx, &xmm3, &ymm3, &zmm3 },
|
||||
{ &spl, &sp, &esp, &rsp, &xmm4, &ymm4, &zmm4 },
|
||||
{ &bpl, &bp, &ebp, &rbp, &xmm5, &ymm5, &zmm5 },
|
||||
{ &sil, &si, &esi, &rsi, &xmm6, &ymm6, &zmm6 },
|
||||
{ &dil, &di, &edi, &rdi, &xmm7, &ymm7, &zmm7 },
|
||||
{ &r8b, &r8w, &r8d, &r8, &xmm8, &ymm8, &zmm8 },
|
||||
{ &r15b, &r15w, &r15d, &r15, &xmm15, &ymm15, &zmm15 },
|
||||
{ &r16b, &r16w, &r16d, &r16, &xmm16, &ymm16, &zmm16 },
|
||||
{ &r31b, &r31w, &r31d, &r31, &xmm31, &ymm31, &zmm31 },
|
||||
};
|
||||
const int bitTbl[N] = { 8, 16, 32, 64, 128, 256, 512 };
|
||||
#else
|
||||
const size_t N = 6;
|
||||
const Reg* tbl[][N] = {
|
||||
{ &al, &ax, &eax, &xmm0, &ymm0, &zmm0 },
|
||||
{ &cl, &cx, &ecx, &xmm1, &ymm1, &zmm1 },
|
||||
{ &dl, &dx, &edx, &xmm2, &ymm2, &zmm2 },
|
||||
{ &bl, &bx, &ebx, &xmm3, &ymm3, &zmm3 },
|
||||
{ 0, &sp, &esp, &xmm4, &ymm4, &zmm4 },
|
||||
{ 0, &bp, &ebp, &xmm5, &ymm5, &zmm5 },
|
||||
{ 0, &si, &esi, &xmm6, &ymm6, &zmm6 },
|
||||
{ 0, &di, &edi, &xmm7, &ymm7, &zmm7 },
|
||||
};
|
||||
const int bitTbl[N] = { 8, 16, 32, 128, 256, 512 };
|
||||
#endif
|
||||
|
||||
for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
|
||||
for (size_t j = 0; j < N; j++) {
|
||||
const Reg *r1 = tbl[i][j];
|
||||
if (r1 == 0) continue;
|
||||
for (size_t k = 0; k < N; k++) {
|
||||
if (tbl[i][k]) {
|
||||
CYBOZU_TEST_ASSERT(*tbl[i][k] == r1->changeBit(bitTbl[k]));
|
||||
// printf("%s->changeBit(%d)=%s %s\n", r1->toString(), bitTbl[k], r1->changeBit(bitTbl[k]).toString(), tbl[i][k]->toString());
|
||||
} else {
|
||||
CYBOZU_TEST_EXCEPTION(r1->changeBit(bitTbl[k]), std::exception);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef XBYAK64
|
||||
const Reg8 *special8bitTbl[] = { &ah, &bh, &ch, &dh };
|
||||
for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(special8bitTbl); i++) {
|
||||
CYBOZU_TEST_EXCEPTION(special8bitTbl[i]->changeBit(16), std::exception);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
+163
@@ -0,0 +1,163 @@
|
||||
#pragma once
|
||||
/**
|
||||
@file
|
||||
@brief int type definition and macros
|
||||
@author MITSUNARI Shigeo(@herumi)
|
||||
*/
|
||||
|
||||
#if defined(_MSC_VER) && (MSC_VER <= 1500) && !defined(CYBOZU_DEFINED_INTXX)
|
||||
#define CYBOZU_DEFINED_INTXX
|
||||
typedef __int64 int64_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
typedef unsigned int uint32_t;
|
||||
typedef int int32_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef short int16_t;
|
||||
typedef unsigned char uint8_t;
|
||||
typedef signed char int8_t;
|
||||
#else
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifndef CYBOZU_DEFINED_SSIZE_T
|
||||
#define CYBOZU_DEFINED_SSIZE_T
|
||||
#ifdef _WIN64
|
||||
typedef int64_t ssize_t;
|
||||
#else
|
||||
typedef int32_t ssize_t;
|
||||
#endif
|
||||
#endif
|
||||
#else
|
||||
#include <unistd.h> // for ssize_t
|
||||
#endif
|
||||
|
||||
#ifndef CYBOZU_ALIGN
|
||||
#ifdef _MSC_VER
|
||||
#define CYBOZU_ALIGN(x) __declspec(align(x))
|
||||
#else
|
||||
#define CYBOZU_ALIGN(x) __attribute__((aligned(x)))
|
||||
#endif
|
||||
#endif
|
||||
#ifndef CYBOZU_FORCE_INLINE
|
||||
#ifdef _MSC_VER
|
||||
#define CYBOZU_FORCE_INLINE __forceinline
|
||||
#else
|
||||
#define CYBOZU_FORCE_INLINE __attribute__((always_inline))
|
||||
#endif
|
||||
#endif
|
||||
#ifndef CYBOZU_UNUSED
|
||||
#ifdef __GNUC__
|
||||
#define CYBOZU_UNUSED __attribute__((unused))
|
||||
#else
|
||||
#define CYBOZU_UNUSED
|
||||
#endif
|
||||
#endif
|
||||
#ifndef CYBOZU_ALLOCA
|
||||
#ifdef _MSC_VER
|
||||
#include <malloc.h>
|
||||
#define CYBOZU_ALLOCA(x) _malloca(x)
|
||||
#else
|
||||
#define CYBOZU_ALLOCA(x) __builtin_alloca(x)
|
||||
#endif
|
||||
#endif
|
||||
#ifndef CYBOZU_NUM_OF_ARRAY
|
||||
#define CYBOZU_NUM_OF_ARRAY(x) (sizeof(x) / sizeof(*x))
|
||||
#endif
|
||||
#ifndef CYBOZU_SNPRINTF
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||
#define CYBOZU_SNPRINTF(x, len, ...) (void)_snprintf_s(x, len, len - 1, __VA_ARGS__)
|
||||
#else
|
||||
#define CYBOZU_SNPRINTF(x, len, ...) (void)snprintf(x, len, __VA_ARGS__)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define CYBOZU_CPP_VERSION_CPP03 0
|
||||
#define CYBOZU_CPP_VERSION_TR1 1
|
||||
#define CYBOZU_CPP_VERSION_CPP11 2
|
||||
#define CYBOZU_CPP_VERSION_CPP14 3
|
||||
#define CYBOZU_CPP_VERSION_CPP17 4
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define CYBOZU_GNUC_PREREQ(major, minor) ((__GNUC__) * 100 + (__GNUC_MINOR__) >= (major) * 100 + (minor))
|
||||
#else
|
||||
#define CYBOZU_GNUC_PREREQ(major, minor) 0
|
||||
#endif
|
||||
|
||||
#if (__cplusplus >= 201703)
|
||||
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP17
|
||||
#elif (__cplusplus >= 201402)
|
||||
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP14
|
||||
#elif (__cplusplus >= 201103) || (_MSC_VER >= 1500) || defined(__GXX_EXPERIMENTAL_CXX0X__)
|
||||
#if defined(_MSC_VER) && (_MSC_VER <= 1600)
|
||||
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_TR1
|
||||
#else
|
||||
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP11
|
||||
#endif
|
||||
#elif CYBOZU_GNUC_PREREQ(4, 5) || (CYBOZU_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || (__clang_major__ >= 3)
|
||||
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_TR1
|
||||
#else
|
||||
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP03
|
||||
#endif
|
||||
|
||||
#ifdef CYBOZU_USE_BOOST
|
||||
#define CYBOZU_NAMESPACE_STD boost
|
||||
#define CYBOZU_NAMESPACE_TR1_BEGIN
|
||||
#define CYBOZU_NAMESPACE_TR1_END
|
||||
#elif (CYBOZU_CPP_VERSION == CYBOZU_CPP_VERSION_TR1) && !defined(__APPLE__)
|
||||
#define CYBOZU_NAMESPACE_STD std::tr1
|
||||
#define CYBOZU_NAMESPACE_TR1_BEGIN namespace tr1 {
|
||||
#define CYBOZU_NAMESPACE_TR1_END }
|
||||
#else
|
||||
#define CYBOZU_NAMESPACE_STD std
|
||||
#define CYBOZU_NAMESPACE_TR1_BEGIN
|
||||
#define CYBOZU_NAMESPACE_TR1_END
|
||||
#endif
|
||||
|
||||
#ifndef CYBOZU_OS_BIT
|
||||
#if defined(_WIN64) || defined(__x86_64__) || defined(__AARCH64EL__) || defined(__EMSCRIPTEN__)
|
||||
#define CYBOZU_OS_BIT 64
|
||||
#else
|
||||
#define CYBOZU_OS_BIT 32
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef CYBOZU_HOST
|
||||
#define CYBOZU_HOST_UNKNOWN 0
|
||||
#define CYBOZU_HOST_INTEL 1
|
||||
#define CYBOZU_HOST_ARM 2
|
||||
#if defined(_M_IX86) || defined(_M_AMD64) || defined(__x86_64__) || defined(__i386__)
|
||||
#define CYBOZU_HOST CYBOZU_HOST_INTEL
|
||||
#elif defined(__arm__) || defined(__AARCH64EL__)
|
||||
#define CYBOZU_HOST CYBOZU_HOST_ARM
|
||||
#else
|
||||
#define CYBOZU_HOST CYBOZU_HOST_UNKNOWN
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef CYBOZU_ENDIAN
|
||||
#define CYBOZU_ENDIAN_UNKNOWN 0
|
||||
#define CYBOZU_ENDIAN_LITTLE 1
|
||||
#define CYBOZU_ENDIAN_BIG 2
|
||||
#if (CYBOZU_HOST == CYBOZU_HOST_INTEL)
|
||||
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_LITTLE
|
||||
#elif (CYBOZU_HOST == CYBOZU_HOST_ARM) && (defined(__ARM_EABI__) || defined(__AARCH64EL__))
|
||||
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_LITTLE
|
||||
#else
|
||||
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_UNKNOWN
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if CYBOZU_CPP_VERSION >= CYBOZU_CPP_VERSION_CPP11
|
||||
#define CYBOZU_NOEXCEPT noexcept
|
||||
#else
|
||||
#define CYBOZU_NOEXCEPT throw()
|
||||
#endif
|
||||
namespace cybozu {
|
||||
template<class T>
|
||||
void disable_warning_unused_variable(const T&) { }
|
||||
template<class T, class S>
|
||||
T cast(const S* ptr) { return static_cast<T>(static_cast<const void*>(ptr)); }
|
||||
template<class T, class S>
|
||||
T cast(S* ptr) { return static_cast<T>(static_cast<void*>(ptr)); }
|
||||
} // cybozu
|
||||
Vendored
+373
@@ -0,0 +1,373 @@
|
||||
#pragma once
|
||||
/**
|
||||
@file
|
||||
@brief unit test class
|
||||
|
||||
@author MITSUNARI Shigeo(@herumi)
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
#include <list>
|
||||
#include <iostream>
|
||||
#include <utility>
|
||||
#if defined(_MSC_VER) && (MSC_VER <= 1500)
|
||||
#include <cybozu/inttype.hpp>
|
||||
#else
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
namespace cybozu { namespace test {
|
||||
|
||||
class AutoRun {
|
||||
typedef void (*Func)();
|
||||
typedef std::list<std::pair<const char*, Func> > UnitTestList;
|
||||
public:
|
||||
AutoRun()
|
||||
: init_(0)
|
||||
, term_(0)
|
||||
, okCount_(0)
|
||||
, ngCount_(0)
|
||||
, exceptionCount_(0)
|
||||
{
|
||||
}
|
||||
void setup(Func init, Func term)
|
||||
{
|
||||
init_ = init;
|
||||
term_ = term;
|
||||
}
|
||||
void append(const char *name, Func func)
|
||||
{
|
||||
list_.push_back(std::make_pair(name, func));
|
||||
}
|
||||
void set(bool isOK)
|
||||
{
|
||||
if (isOK) {
|
||||
okCount_++;
|
||||
} else {
|
||||
ngCount_++;
|
||||
}
|
||||
}
|
||||
std::string getBaseName(const std::string& name) const
|
||||
{
|
||||
#ifdef _WIN32
|
||||
const char sep = '\\';
|
||||
#else
|
||||
const char sep = '/';
|
||||
#endif
|
||||
size_t pos = name.find_last_of(sep);
|
||||
std::string ret = name.substr(pos + 1);
|
||||
pos = ret.find('.');
|
||||
return ret.substr(0, pos);
|
||||
}
|
||||
int run(int, char *argv[])
|
||||
{
|
||||
std::string msg;
|
||||
try {
|
||||
if (init_) init_();
|
||||
for (UnitTestList::const_iterator i = list_.begin(), ie = list_.end(); i != ie; ++i) {
|
||||
std::cout << "ctest:module=" << i->first << std::endl;
|
||||
try {
|
||||
(i->second)();
|
||||
} catch (std::exception& e) {
|
||||
exceptionCount_++;
|
||||
std::cout << "ctest: " << i->first << " is stopped by exception " << e.what() << std::endl;
|
||||
} catch (...) {
|
||||
exceptionCount_++;
|
||||
std::cout << "ctest: " << i->first << " is stopped by unknown exception" << std::endl;
|
||||
}
|
||||
}
|
||||
if (term_) term_();
|
||||
} catch (std::exception& e) {
|
||||
msg = std::string("ctest:err:") + e.what();
|
||||
} catch (...) {
|
||||
msg = "ctest:err: catch unknown exception";
|
||||
}
|
||||
fflush(stdout);
|
||||
if (msg.empty()) {
|
||||
int err = ngCount_ + exceptionCount_;
|
||||
int total = okCount_ + err;
|
||||
std::cout << "ctest:name=" << getBaseName(*argv)
|
||||
<< ", module=" << list_.size()
|
||||
<< ", total=" << total
|
||||
<< ", ok=" << okCount_
|
||||
<< ", ng=" << ngCount_
|
||||
<< ", exception=" << exceptionCount_ << std::endl;
|
||||
return err > 0 ? 1 : 0;
|
||||
} else {
|
||||
std::cout << msg << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
static inline AutoRun& getInstance()
|
||||
{
|
||||
static AutoRun instance;
|
||||
return instance;
|
||||
}
|
||||
private:
|
||||
Func init_;
|
||||
Func term_;
|
||||
int okCount_;
|
||||
int ngCount_;
|
||||
int exceptionCount_;
|
||||
UnitTestList list_;
|
||||
};
|
||||
|
||||
static AutoRun& autoRun = AutoRun::getInstance();
|
||||
|
||||
inline void test(bool ret, const std::string& msg, const std::string& param, const char *file, int line)
|
||||
{
|
||||
autoRun.set(ret);
|
||||
if (!ret) {
|
||||
printf("%s(%d):ctest:%s(%s);\n", file, line, msg.c_str(), param.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, typename U>
|
||||
bool isEqual(const T& lhs, const U& rhs)
|
||||
{
|
||||
return lhs == rhs;
|
||||
}
|
||||
|
||||
// avoid warning of comparision of integers of different signs
|
||||
inline bool isEqual(size_t lhs, int rhs)
|
||||
{
|
||||
return lhs == size_t(rhs);
|
||||
}
|
||||
inline bool isEqual(int lhs, size_t rhs)
|
||||
{
|
||||
return size_t(lhs) == rhs;
|
||||
}
|
||||
inline bool isEqual(const char *lhs, const char *rhs)
|
||||
{
|
||||
return strcmp(lhs, rhs) == 0;
|
||||
}
|
||||
inline bool isEqual(char *lhs, const char *rhs)
|
||||
{
|
||||
return strcmp(lhs, rhs) == 0;
|
||||
}
|
||||
inline bool isEqual(const char *lhs, char *rhs)
|
||||
{
|
||||
return strcmp(lhs, rhs) == 0;
|
||||
}
|
||||
inline bool isEqual(char *lhs, char *rhs)
|
||||
{
|
||||
return strcmp(lhs, rhs) == 0;
|
||||
}
|
||||
// avoid to compare float directly
|
||||
inline bool isEqual(float lhs, float rhs)
|
||||
{
|
||||
union fi {
|
||||
float f;
|
||||
uint32_t i;
|
||||
} lfi, rfi;
|
||||
lfi.f = lhs;
|
||||
rfi.f = rhs;
|
||||
return lfi.i == rfi.i;
|
||||
}
|
||||
// avoid to compare double directly
|
||||
inline bool isEqual(double lhs, double rhs)
|
||||
{
|
||||
union di {
|
||||
double d;
|
||||
uint64_t i;
|
||||
} ldi, rdi;
|
||||
ldi.d = lhs;
|
||||
rdi.d = rhs;
|
||||
return ldi.i == rdi.i;
|
||||
}
|
||||
|
||||
} } // cybozu::test
|
||||
|
||||
#ifndef CYBOZU_TEST_DISABLE_AUTO_RUN
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
return cybozu::test::autoRun.run(argc, argv);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
alert if !x
|
||||
@param x [in]
|
||||
*/
|
||||
#define CYBOZU_TEST_ASSERT(x) cybozu::test::test(!!(x), "CYBOZU_TEST_ASSERT", #x, __FILE__, __LINE__)
|
||||
|
||||
/**
|
||||
alert if x != y
|
||||
@param x [in]
|
||||
@param y [in]
|
||||
*/
|
||||
#define CYBOZU_TEST_EQUAL(x, y) { \
|
||||
bool _cybozu_eq = cybozu::test::isEqual(x, y); \
|
||||
cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL", #x ", " #y, __FILE__, __LINE__); \
|
||||
if (!_cybozu_eq) { \
|
||||
std::cout << "ctest: lhs=" << (x) << std::endl; \
|
||||
std::cout << "ctest: rhs=" << (y) << std::endl; \
|
||||
} \
|
||||
}
|
||||
/**
|
||||
alert if fabs(x, y) >= eps
|
||||
@param x [in]
|
||||
@param y [in]
|
||||
*/
|
||||
#define CYBOZU_TEST_NEAR(x, y, eps) { \
|
||||
bool _cybozu_isNear = fabs((x) - (y)) < eps; \
|
||||
cybozu::test::test(_cybozu_isNear, "CYBOZU_TEST_NEAR", #x ", " #y, __FILE__, __LINE__); \
|
||||
if (!_cybozu_isNear) { \
|
||||
std::cout << "ctest: lhs=" << (x) << std::endl; \
|
||||
std::cout << "ctest: rhs=" << (y) << std::endl; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define CYBOZU_TEST_EQUAL_POINTER(x, y) { \
|
||||
bool _cybozu_eq = x == y; \
|
||||
cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL_POINTER", #x ", " #y, __FILE__, __LINE__); \
|
||||
if (!_cybozu_eq) { \
|
||||
std::cout << "ctest: lhs=" << static_cast<const void*>(x) << std::endl; \
|
||||
std::cout << "ctest: rhs=" << static_cast<const void*>(y) << std::endl; \
|
||||
} \
|
||||
}
|
||||
/**
|
||||
alert if x[] != y[]
|
||||
@param x [in]
|
||||
@param y [in]
|
||||
@param n [in]
|
||||
*/
|
||||
#define CYBOZU_TEST_EQUAL_ARRAY(x, y, n) { \
|
||||
for (size_t _cybozu_test_i = 0, _cybozu_ie = (size_t)(n); _cybozu_test_i < _cybozu_ie; _cybozu_test_i++) { \
|
||||
bool _cybozu_eq = cybozu::test::isEqual((x)[_cybozu_test_i], (y)[_cybozu_test_i]); \
|
||||
cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL_ARRAY", #x ", " #y ", " #n, __FILE__, __LINE__); \
|
||||
if (!_cybozu_eq) { \
|
||||
std::cout << "ctest: i=" << _cybozu_test_i << std::endl; \
|
||||
std::cout << "ctest: lhs=" << (x)[_cybozu_test_i] << std::endl; \
|
||||
std::cout << "ctest: rhs=" << (y)[_cybozu_test_i] << std::endl; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
/**
|
||||
always alert
|
||||
@param msg [in]
|
||||
*/
|
||||
#define CYBOZU_TEST_FAIL(msg) cybozu::test::test(false, "CYBOZU_TEST_FAIL", msg, __FILE__, __LINE__)
|
||||
|
||||
/**
|
||||
verify message in exception
|
||||
*/
|
||||
#define CYBOZU_TEST_EXCEPTION_MESSAGE(statement, Exception, msg) \
|
||||
{ \
|
||||
int _cybozu_ret = 0; \
|
||||
std::string _cybozu_errMsg; \
|
||||
try { \
|
||||
statement; \
|
||||
_cybozu_ret = 1; \
|
||||
} catch (const Exception& _cybozu_e) { \
|
||||
_cybozu_errMsg = _cybozu_e.what(); \
|
||||
if (_cybozu_errMsg.find(msg) == std::string::npos) { \
|
||||
_cybozu_ret = 2; \
|
||||
} \
|
||||
} catch (...) { \
|
||||
_cybozu_ret = 3; \
|
||||
} \
|
||||
if (_cybozu_ret) { \
|
||||
cybozu::test::test(false, "CYBOZU_TEST_EXCEPTION_MESSAGE", #statement ", " #Exception ", " #msg, __FILE__, __LINE__); \
|
||||
if (_cybozu_ret == 1) { \
|
||||
std::cout << "ctest: no exception" << std::endl; \
|
||||
} else if (_cybozu_ret == 2) { \
|
||||
std::cout << "ctest: bad exception msg:" << _cybozu_errMsg << std::endl; \
|
||||
} else { \
|
||||
std::cout << "ctest: unexpected exception" << std::endl; \
|
||||
} \
|
||||
} else { \
|
||||
cybozu::test::autoRun.set(true); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define CYBOZU_TEST_EXCEPTION(statement, Exception) \
|
||||
{ \
|
||||
int _cybozu_ret = 0; \
|
||||
try { \
|
||||
statement; \
|
||||
_cybozu_ret = 1; \
|
||||
} catch (const Exception&) { \
|
||||
} catch (...) { \
|
||||
_cybozu_ret = 2; \
|
||||
} \
|
||||
if (_cybozu_ret) { \
|
||||
cybozu::test::test(false, "CYBOZU_TEST_EXCEPTION", #statement ", " #Exception, __FILE__, __LINE__); \
|
||||
if (_cybozu_ret == 1) { \
|
||||
std::cout << "ctest: no exception" << std::endl; \
|
||||
} else { \
|
||||
std::cout << "ctest: unexpected exception" << std::endl; \
|
||||
} \
|
||||
} else { \
|
||||
cybozu::test::autoRun.set(true); \
|
||||
} \
|
||||
}
|
||||
|
||||
/**
|
||||
verify statement does not throw
|
||||
*/
|
||||
#define CYBOZU_TEST_NO_EXCEPTION(statement) \
|
||||
try { \
|
||||
statement; \
|
||||
cybozu::test::autoRun.set(true); \
|
||||
} catch (...) { \
|
||||
cybozu::test::test(false, "CYBOZU_TEST_NO_EXCEPTION", #statement, __FILE__, __LINE__); \
|
||||
}
|
||||
|
||||
/**
|
||||
append auto unit test
|
||||
@param name [in] module name
|
||||
*/
|
||||
#define CYBOZU_TEST_AUTO(name) \
|
||||
void cybozu_test_ ## name(); \
|
||||
struct cybozu_test_local_ ## name { \
|
||||
cybozu_test_local_ ## name() \
|
||||
{ \
|
||||
cybozu::test::autoRun.append(#name, cybozu_test_ ## name); \
|
||||
} \
|
||||
} cybozu_test_local_instance_ ## name; \
|
||||
void cybozu_test_ ## name()
|
||||
|
||||
/**
|
||||
append auto unit test with fixture
|
||||
@param name [in] module name
|
||||
*/
|
||||
#define CYBOZU_TEST_AUTO_WITH_FIXTURE(name, Fixture) \
|
||||
void cybozu_test_ ## name(); \
|
||||
void cybozu_test_real_ ## name() \
|
||||
{ \
|
||||
Fixture f; \
|
||||
cybozu_test_ ## name(); \
|
||||
} \
|
||||
struct cybozu_test_local_ ## name { \
|
||||
cybozu_test_local_ ## name() \
|
||||
{ \
|
||||
cybozu::test::autoRun.append(#name, cybozu_test_real_ ## name); \
|
||||
} \
|
||||
} cybozu_test_local_instance_ ## name; \
|
||||
void cybozu_test_ ## name()
|
||||
|
||||
/**
|
||||
setup fixture
|
||||
@param Fixture [in] class name of fixture
|
||||
@note cstr of Fixture is called before test and dstr of Fixture is called after test
|
||||
*/
|
||||
#define CYBOZU_TEST_SETUP_FIXTURE(Fixture) \
|
||||
Fixture *cybozu_test_local_fixture; \
|
||||
void cybozu_test_local_init() \
|
||||
{ \
|
||||
cybozu_test_local_fixture = new Fixture(); \
|
||||
} \
|
||||
void cybozu_test_local_term() \
|
||||
{ \
|
||||
delete cybozu_test_local_fixture; \
|
||||
} \
|
||||
struct cybozu_test_local_fixture_setup_ { \
|
||||
cybozu_test_local_fixture_setup_() \
|
||||
{ \
|
||||
cybozu::test::autoRun.setup(cybozu_test_local_init, cybozu_test_local_term); \
|
||||
} \
|
||||
} cybozu_test_local_fixture_setup_instance_;
|
||||
Vendored
+94
@@ -0,0 +1,94 @@
|
||||
ldtilecfg(ptr[rax + rcx * 4 + 64]);
|
||||
ldtilecfg(ptr [r30+r29*4+0x12]);
|
||||
ldtilecfg(ptr [rax]);
|
||||
sttilecfg(ptr[rsp + rax * 8 + 64]);
|
||||
sttilecfg(ptr [r30+r29*4+0x12]);
|
||||
sttilecfg(ptr [r30]);
|
||||
tileloadd(tmm3, ptr[rdi + rdx * 2 + 8]);
|
||||
tileloadd(tmm2, ptr [r30+r29*4+0x12]);
|
||||
tileloaddt1(tmm4, ptr[r8 + r9 + 32]);
|
||||
tileloaddt1(tmm7, ptr [r30+r29*4+0x12]);
|
||||
tilerelease();
|
||||
tilestored(ptr[r10 + r11 * 2 + 32], tmm2);
|
||||
tilestored(ptr [r30+r29*4+0x12], tmm1);
|
||||
tilezero(tmm7);
|
||||
tdpbssd(tmm1, tmm2, tmm3);
|
||||
tdpbsud(tmm2, tmm3, tmm4);
|
||||
tdpbusd(tmm3, tmm4, tmm5);
|
||||
tdpbuud(tmm4, tmm5, tmm6);
|
||||
tdpfp16ps(tmm5, tmm6, tmm7);
|
||||
tdpbf16ps(tmm5, tmm6, tmm7);
|
||||
tileloadd(tmm1, ptr[r8+r8]);
|
||||
tileloadd(tmm1, ptr[rax+rcx*4]);
|
||||
tileloadd(tmm1, ptr[r8+r9*1+0x40]);
|
||||
tileloadd(tmm1, ptr[r30+r29*1+0x80]);
|
||||
tileloaddrs(tmm3, ptr[rdi + rdx * 2 + 8]);
|
||||
tileloaddrs(tmm7, ptr[r31 + rdx * 2 + 8]);
|
||||
tileloaddrst1(tmm4, ptr[r8 + r9 + 32]);
|
||||
tileloaddrst1(tmm4, ptr[r25 + r9 + 32]);
|
||||
|
||||
tdpbf8ps(tmm1, tmm2, tmm3);
|
||||
tdpbhf8ps(tmm1, tmm2, tmm3);
|
||||
tdphbf8ps(tmm1, tmm2, tmm3);
|
||||
tdphf8ps(tmm1, tmm2, tmm3);
|
||||
|
||||
tmmultf32ps(tmm1, tmm2, tmm3);
|
||||
|
||||
//t2rpntlvwz0(tmm1, ptr[rax+r8*2+0x80]);
|
||||
//t2rpntlvwz0(tmm7, ptr[r30+r8*2+0x80]);
|
||||
|
||||
//t2rpntlvwz0t1(tmm1, ptr[rax+r8*2+0x80]);
|
||||
//t2rpntlvwz0t1(tmm7, ptr[r30+r8*2+0x80]);
|
||||
|
||||
//t2rpntlvwz1(tmm1, ptr[rax+r8*2+0x80]);
|
||||
//t2rpntlvwz1(tmm7, ptr[r30+r8*2+0x80]);
|
||||
|
||||
//t2rpntlvwz1t1(tmm1, ptr[rax+r8*2+0x80]);
|
||||
//t2rpntlvwz1t1(tmm7, ptr[r30+r8*2+0x80]);
|
||||
|
||||
//t2rpntlvwz0rs(tmm1, ptr[rax+r8*2+0x80]);
|
||||
//t2rpntlvwz0rs(tmm7, ptr[r30+r8*2+0x80]);
|
||||
|
||||
//t2rpntlvwz0rst1(tmm1, ptr[rax+r8*2+0x80]);
|
||||
//t2rpntlvwz0rst1(tmm7, ptr[r30+r8*2+0x80]);
|
||||
|
||||
//t2rpntlvwz1rs(tmm1, ptr[rax+r8*2+0x80]);
|
||||
//t2rpntlvwz1rs(tmm7, ptr[r30+r8*2+0x80]);
|
||||
|
||||
//t2rpntlvwz1rst1(tmm1, ptr[rax+r8*2+0x80]);
|
||||
//t2rpntlvwz1rst1(tmm7, ptr[r30+r8*2+0x80]);
|
||||
|
||||
tcmmimfp16ps(tmm1, tmm2, tmm3);
|
||||
tcmmrlfp16ps(tmm1, tmm2, tmm3);
|
||||
|
||||
//tconjtcmmimfp16ps(tmm1, tmm2, tmm3);
|
||||
|
||||
//tconjtfp16(tmm1, tmm2);
|
||||
|
||||
tcvtrowps2bf16h(zmm1, tmm2, r30d);
|
||||
tcvtrowps2bf16h(zmm29, tmm2, 0x12);
|
||||
|
||||
tcvtrowps2bf16l(zmm1, tmm2, r30d);
|
||||
tcvtrowps2bf16l(zmm29, tmm2, 0x12);
|
||||
|
||||
tcvtrowps2phh(zmm1, tmm2, r30d);
|
||||
tcvtrowps2phh(zmm29, tmm2, 0x12);
|
||||
|
||||
tcvtrowps2phl(zmm1, tmm2, r30d);
|
||||
tcvtrowps2phl(zmm29, tmm2, 0x12);
|
||||
|
||||
tilemovrow(zmm1, tmm2, r30d);
|
||||
tilemovrow(zmm29, tmm2, 0x12);
|
||||
|
||||
//ttcmmimfp16ps(tmm1, tmm2, tmm3);
|
||||
//ttcmmrlfp16ps(tmm1, tmm2, tmm3);
|
||||
|
||||
//ttdpbf16ps(tmm1, tmm2, tmm3);
|
||||
//ttdpfp16ps(tmm1, tmm2, tmm3);
|
||||
|
||||
//ttmmultf32ps(tmm1, tmm2, tmm3);
|
||||
|
||||
//ttransposed(tmm1, tmm2);
|
||||
|
||||
tcvtrowd2ps(zmm20, tmm1, r30d);
|
||||
tcvtrowd2ps(zmm20, tmm1, 0x12);
|
||||
Vendored
+21
@@ -0,0 +1,21 @@
|
||||
// https://github.com/herumi/xbyak/pull/202
|
||||
sal(rax, r8, 1);
|
||||
sar(rax, r9, 4);
|
||||
shl(rax, rdi, 8);
|
||||
shr(rax, rsi, 12);
|
||||
rcl(rax, r10, 16);
|
||||
rcr(rax, r11, 20);
|
||||
rol(rax, r14, 24);
|
||||
ror(rax, r15, 28);
|
||||
sal(rcx, qword[r8], 32);
|
||||
sar(rcx, qword[r9], 36);
|
||||
sal(rcx, qword[rdi], 40);
|
||||
sar(rcx, qword[rsi], 44);
|
||||
rcl(rcx, qword[r10], 48);
|
||||
rcr(rcx, qword[r11], 52);
|
||||
rol(rcx, qword[r14], 56);
|
||||
ror(rcx, qword[r15], 60);
|
||||
|
||||
imul(rax, rdx, r10);
|
||||
imul(rcx, r15, qword[rdi]);
|
||||
|
||||
Vendored
+210
@@ -0,0 +1,210 @@
|
||||
vaddbf16(xm1, xm2, xm3);
|
||||
vaddbf16(ym1|k1, ym2, ptr[rax+64]);
|
||||
vaddbf16(ym1|k1, ym2, ptr_b[rax+64]);
|
||||
vaddbf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
|
||||
|
||||
vdivbf16(xm1, xm2, xm3);
|
||||
vdivbf16(ym1|k1, ym2, ptr[rax+64]);
|
||||
vdivbf16(ym1|k1, ym2, ptr_b[rax+64]);
|
||||
vdivbf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
|
||||
|
||||
vmaxbf16(xm1, xm2, xm3);
|
||||
vmaxbf16(ym1|k1, ym2, ptr[rax+64]);
|
||||
vmaxbf16(ym1|k1, ym2, ptr_b[rax+64]);
|
||||
vmaxbf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
|
||||
|
||||
vminbf16(xm1, xm2, xm3);
|
||||
vminbf16(ym1|k1, ym2, ptr[rax+64]);
|
||||
vminbf16(ym1|k1, ym2, ptr_b[rax+64]);
|
||||
vminbf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
|
||||
|
||||
vmulbf16(xm1, xm2, xm3);
|
||||
vmulbf16(ym1|k1, ym2, ptr[rax+64]);
|
||||
vmulbf16(ym1|k1, ym2, ptr_b[rax+64]);
|
||||
vmulbf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
|
||||
|
||||
vscalefbf16(xm1, xm2, xm3);
|
||||
vscalefbf16(ym1|k1, ym2, ptr[rax+64]);
|
||||
vscalefbf16(ym1|k1, ym2, ptr_b[rax+64]);
|
||||
vscalefbf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
|
||||
|
||||
vsubbf16(xm1, xm2, xm3);
|
||||
vsubbf16(ym1|k1, ym2, ptr[rax+64]);
|
||||
vsubbf16(ym1|k1, ym2, ptr_b[rax+64]);
|
||||
vsubbf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
|
||||
// madd
|
||||
vfmadd132bf16(xm1, xm2, xm3);
|
||||
vfmadd132bf16(ym1|k1, ym2, ptr[rax+64]);
|
||||
vfmadd132bf16(ym1|k1, ym2, ptr_b[rax+64]);
|
||||
vfmadd132bf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
|
||||
|
||||
vfmadd213bf16(xm1, xm2, xm3);
|
||||
vfmadd213bf16(ym1|k1, ym2, ptr[rax+64]);
|
||||
vfmadd213bf16(ym1|k1, ym2, ptr_b[rax+64]);
|
||||
vfmadd213bf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
|
||||
|
||||
vfmadd231bf16(xm1, xm2, xm3);
|
||||
vfmadd231bf16(ym1|k1, ym2, ptr[rax+64]);
|
||||
vfmadd231bf16(ym1|k1, ym2, ptr_b[rax+64]);
|
||||
vfmadd231bf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
|
||||
// nmadd
|
||||
vfnmadd132bf16(xm1, xm2, xm3);
|
||||
vfnmadd132bf16(ym1|k1, ym2, ptr[rax+64]);
|
||||
vfnmadd132bf16(ym1|k1, ym2, ptr_b[rax+64]);
|
||||
vfnmadd132bf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
|
||||
|
||||
vfnmadd213bf16(xm1, xm2, xm3);
|
||||
vfnmadd213bf16(ym1|k1, ym2, ptr[rax+64]);
|
||||
vfnmadd213bf16(ym1|k1, ym2, ptr_b[rax+64]);
|
||||
vfnmadd213bf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
|
||||
|
||||
vfnmadd231bf16(xm1, xm2, xm3);
|
||||
vfnmadd231bf16(ym1|k1, ym2, ptr[rax+64]);
|
||||
vfnmadd231bf16(ym1|k1, ym2, ptr_b[rax+64]);
|
||||
vfnmadd231bf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
|
||||
// msub
|
||||
vfmsub132bf16(xm1, xm2, xm3);
|
||||
vfmsub132bf16(ym1|k1, ym2, ptr[rax+64]);
|
||||
vfmsub132bf16(ym1|k1, ym2, ptr_b[rax+64]);
|
||||
vfmsub132bf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
|
||||
|
||||
vfmsub213bf16(xm1, xm2, xm3);
|
||||
vfmsub213bf16(ym1|k1, ym2, ptr[rax+64]);
|
||||
vfmsub213bf16(ym1|k1, ym2, ptr_b[rax+64]);
|
||||
vfmsub213bf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
|
||||
|
||||
vfmsub231bf16(xm1, xm2, xm3);
|
||||
vfmsub231bf16(ym1|k1, ym2, ptr[rax+64]);
|
||||
vfmsub231bf16(ym1|k1, ym2, ptr_b[rax+64]);
|
||||
vfmsub231bf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
|
||||
// nmsub
|
||||
vfnmsub132bf16(xm1, xm2, xm3);
|
||||
vfnmsub132bf16(ym1|k1, ym2, ptr[rax+64]);
|
||||
vfnmsub132bf16(ym1|k1, ym2, ptr_b[rax+64]);
|
||||
vfnmsub132bf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
|
||||
|
||||
vfnmsub213bf16(xm1, xm2, xm3);
|
||||
vfnmsub213bf16(ym1|k1, ym2, ptr[rax+64]);
|
||||
vfnmsub213bf16(ym1|k1, ym2, ptr_b[rax+64]);
|
||||
vfnmsub213bf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
|
||||
|
||||
vfnmsub231bf16(xm1, xm2, xm3);
|
||||
vfnmsub231bf16(ym1|k1, ym2, ptr[rax+64]);
|
||||
vfnmsub231bf16(ym1|k1, ym2, ptr_b[rax+64]);
|
||||
vfnmsub231bf16(zm1|k2|T_z, zm2, ptr_b[rax+64]);
|
||||
|
||||
vcmpbf16(k1, xm5, xm4, 5);
|
||||
vcmpbf16(k2, ym5, ym4, 6);
|
||||
vcmpbf16(k3, ym15, ptr_b[rax+64], 7);
|
||||
vcmpbf16(k4, zm30, zm20, 8);
|
||||
vcmpbf16(k5, zm1, ptr[rax+64], 9);
|
||||
vcmpbf16(k6, zm10, ptr_b[rax+64], 10);
|
||||
|
||||
vfpclassbf16(k1, xm4, 5);
|
||||
vfpclassbf16(k2|k5, ym4, 6);
|
||||
vfpclassbf16(k3|k5, zm20, 7);
|
||||
vfpclassbf16(k3|k5, xword[rax+64], 8);
|
||||
vfpclassbf16(k3, xword_b[rax+64], 9);
|
||||
vfpclassbf16(k5|k5, yword[rax+64], 10);
|
||||
vfpclassbf16(k6|k5, yword_b[rax+64], 11);
|
||||
vfpclassbf16(k7|k5, zword[rax+64], 12);
|
||||
vfpclassbf16(k7|k5, zword_b[rax+64], 13);
|
||||
|
||||
vcomisbf16(xm2, xm3);
|
||||
vcomisbf16(xm2, ptr[rax+64]);
|
||||
|
||||
vgetexpbf16(xm1|k3, xmm2);
|
||||
vgetexpbf16(xm1|k3, ptr[rax+64]);
|
||||
vgetexpbf16(xm1|k3, ptr_b[rax+64]);
|
||||
|
||||
vgetexpbf16(ym1|k3, ymm2);
|
||||
vgetexpbf16(ym1|k3, ptr[rax+64]);
|
||||
vgetexpbf16(ym1|k3, ptr_b[rax+64]);
|
||||
|
||||
vgetexpbf16(zm1|k3, zmm2);
|
||||
vgetexpbf16(zm1|k3, ptr[rax+64]);
|
||||
vgetexpbf16(zm1|k3, ptr_b[rax+64]);
|
||||
|
||||
vgetmantbf16(xm1|k3, xmm2, 3);
|
||||
vgetmantbf16(xm1|k3, ptr[rax+64], 5);
|
||||
vgetmantbf16(xm1|k3, ptr_b[rax+64], 9);
|
||||
|
||||
vgetmantbf16(ym1|k3, ymm2, 3);
|
||||
vgetmantbf16(ym1|k3, ptr[rax+64], 5);
|
||||
vgetmantbf16(ym1|k3, ptr_b[rax+64], 9);
|
||||
|
||||
vgetmantbf16(zm1|k3, zmm2, 3);
|
||||
vgetmantbf16(zm1|k3, ptr[rax+64], 5);
|
||||
vgetmantbf16(zm1|k3, ptr_b[rax+64], 9);
|
||||
|
||||
vrcpbf16(xm1|k5, xm2);
|
||||
vrcpbf16(xm1|k5, ptr[rcx+64]);
|
||||
vrcpbf16(xm1|k5, ptr_b[rcx+64]);
|
||||
|
||||
vrcpbf16(ym1|k5, ym2);
|
||||
vrcpbf16(ym1|k5, ptr[rcx+64]);
|
||||
vrcpbf16(ym1|k5, ptr_b[rcx+64]);
|
||||
|
||||
vrcpbf16(zm1|k5, zm2);
|
||||
vrcpbf16(zm1|k5, ptr[rcx+64]);
|
||||
vrcpbf16(zm1|k5, ptr_b[rcx+64]);
|
||||
|
||||
vreducebf16(xm1|k4, xm2, 1);
|
||||
vreducebf16(xm1|k4, ptr[rax+64], 1);
|
||||
vreducebf16(xm1|k4, ptr_b[rax+64], 1);
|
||||
|
||||
vreducebf16(ym1|k4, ym2, 1);
|
||||
vreducebf16(ym1|k4, ptr[rax+64], 1);
|
||||
vreducebf16(ym1|k4, ptr_b[rax+64], 1);
|
||||
|
||||
vreducebf16(zm1|k4, zm2, 1);
|
||||
vreducebf16(zm1|k4, ptr[rax+64], 1);
|
||||
vreducebf16(zm1|k4, ptr_b[rax+64], 1);
|
||||
|
||||
vrndscalebf16(xm1|k4, xm2, 1);
|
||||
vrndscalebf16(xm1|k4, ptr[rax+64], 1);
|
||||
vrndscalebf16(xm1|k4, ptr_b[rax+64], 1);
|
||||
|
||||
vrndscalebf16(ym1|k4, ym2, 1);
|
||||
vrndscalebf16(ym1|k4, ptr[rax+64], 1);
|
||||
vrndscalebf16(ym1|k4, ptr_b[rax+64], 1);
|
||||
|
||||
vrndscalebf16(zm1|k4, zm2, 1);
|
||||
vrndscalebf16(zm1|k4, ptr[rax+64], 1);
|
||||
vrndscalebf16(zm1|k4, ptr_b[rax+64], 1);
|
||||
|
||||
vrsqrtbf16(xm1|k5, xm2);
|
||||
vrsqrtbf16(xm1|k5, ptr[rcx+64]);
|
||||
vrsqrtbf16(xm1|k5, ptr_b[rcx+64]);
|
||||
|
||||
vrsqrtbf16(ym1|k5, ym2);
|
||||
vrsqrtbf16(ym1|k5, ptr[rcx+64]);
|
||||
vrsqrtbf16(ym1|k5, ptr_b[rcx+64]);
|
||||
|
||||
vrsqrtbf16(zm1|k5, zm2);
|
||||
vrsqrtbf16(zm1|k5, ptr[rcx+64]);
|
||||
vrsqrtbf16(zm1|k5, ptr_b[rcx+64]);
|
||||
|
||||
vscalefbf16(xm1|k5, xm5, xm2);
|
||||
vscalefbf16(xm1|k5, xm5, ptr[rcx+64]);
|
||||
vscalefbf16(xm1|k5, xm5, ptr_b[rcx+64]);
|
||||
|
||||
vscalefbf16(ym1|k5, ym9, ym2);
|
||||
vscalefbf16(ym1|k5, ym9, ptr[rcx+64]);
|
||||
vscalefbf16(ym1|k5, ym9, ptr_b[rcx+64]);
|
||||
|
||||
vscalefbf16(zm1|k5, zm30, zm2);
|
||||
vscalefbf16(zm1|k5, zm30, ptr[rcx+64]);
|
||||
vscalefbf16(zm1|k5, zm30, ptr_b[rcx+64]);
|
||||
|
||||
vsqrtbf16(xm5|k3, xmm4);
|
||||
vsqrtbf16(xm5|k3, ptr[rax+64]);
|
||||
vsqrtbf16(xm5|k3, ptr_b[rax+64]);
|
||||
|
||||
vsqrtbf16(ym5|k3, ymm4);
|
||||
vsqrtbf16(ym5|k3, ptr[rax+64]);
|
||||
vsqrtbf16(ym5|k3, ptr_b[rax+64]);
|
||||
|
||||
vsqrtbf16(zm5|k3, zmm4);
|
||||
vsqrtbf16(zm5|k3, ptr[rax+64]);
|
||||
vsqrtbf16(zm5|k3, ptr_b[rax+64]);
|
||||
Vendored
+17
@@ -0,0 +1,17 @@
|
||||
vcomxsd(xm1, xm2|T_sae);
|
||||
vcomxsd(xm1, ptr[rax+64]);
|
||||
|
||||
vcomxsh(xm1, xm2|T_sae);
|
||||
vcomxsh(xm1, ptr[rax+64]);
|
||||
|
||||
vcomxss(xm1, xm2|T_sae);
|
||||
vcomxss(xm1, ptr[rax+64]);
|
||||
|
||||
vucomxsd(xm1, xm2|T_sae);
|
||||
vucomxsd(xm1, ptr[rax+64]);
|
||||
|
||||
vucomxsh(xm1, xm2|T_sae);
|
||||
vucomxsh(xm1, ptr[rax+64]);
|
||||
|
||||
vucomxss(xm1, xm2|T_sae);
|
||||
vucomxss(xm1, ptr[rax+64]);
|
||||
+200
@@ -0,0 +1,200 @@
|
||||
vcvt2ps2phx(xm1|k5, xm2, xm3);
|
||||
vcvt2ps2phx(xm1|k5, xm2, ptr[rax+64]);
|
||||
vcvt2ps2phx(xm1|k5, xm2, ptr_b[rax+64]);
|
||||
|
||||
vcvt2ps2phx(ym1|k5, ym2, ym3);
|
||||
vcvt2ps2phx(ym1|k5, ym2, ptr[rax+64]);
|
||||
vcvt2ps2phx(ym1|k5, ym2, ptr_b[rax+64]);
|
||||
|
||||
vcvt2ps2phx(zm1|k5, zm2, zm3);
|
||||
vcvt2ps2phx(zm1|k5, zm2, ptr[rax+64]);
|
||||
vcvt2ps2phx(zm1|k5, zm2, ptr_b[rax+64]);
|
||||
|
||||
// vcvtbiasph2hf8
|
||||
vcvtbiasph2bf8(xm1|k2, xm3, xm5);
|
||||
vcvtbiasph2bf8(xm1|k2, xm3, ptr[rax+64]);
|
||||
vcvtbiasph2bf8(xm1|k2, xm3, ptr_b[rax+64]);
|
||||
|
||||
vcvtbiasph2bf8(xm1|k2, ym3, ym5);
|
||||
vcvtbiasph2bf8(xm1|k2, ym3, ptr[rax+64]);
|
||||
vcvtbiasph2bf8(xm1|k2, ym3, ptr_b[rax+64]);
|
||||
|
||||
vcvtbiasph2bf8(ym1|k2, zm3, zm5);
|
||||
vcvtbiasph2bf8(ym1|k2, zm3, ptr[rax+64]);
|
||||
vcvtbiasph2bf8(ym1|k2, zm3, ptr_b[rax+64]);
|
||||
|
||||
// vcvtbiasph2bf8s
|
||||
vcvtbiasph2bf8s(xm1|k2, xm3, xm5);
|
||||
vcvtbiasph2bf8s(xm1|k2, xm3, ptr[rax+64]);
|
||||
vcvtbiasph2bf8s(xm1|k2, xm3, ptr_b[rax+64]);
|
||||
|
||||
vcvtbiasph2bf8s(xm1|k2, ym3, ym5);
|
||||
vcvtbiasph2bf8s(xm1|k2, ym3, ptr[rax+64]);
|
||||
vcvtbiasph2bf8s(xm1|k2, ym3, ptr_b[rax+64]);
|
||||
|
||||
vcvtbiasph2bf8s(ym1|k2, zm3, zm5);
|
||||
vcvtbiasph2bf8s(ym1|k2, zm3, ptr[rax+64]);
|
||||
vcvtbiasph2bf8s(ym1|k2, zm3, ptr_b[rax+64]);
|
||||
|
||||
// vcvtbiasph2hf8
|
||||
vcvtbiasph2hf8(xm1|k2, xm3, xm5);
|
||||
vcvtbiasph2hf8(xm1|k2, xm3, ptr[rax+64]);
|
||||
vcvtbiasph2hf8(xm1|k2, xm3, ptr_b[rax+64]);
|
||||
|
||||
vcvtbiasph2hf8(xm1|k2, ym3, ym5);
|
||||
vcvtbiasph2hf8(xm1|k2, ym3, ptr[rax+64]);
|
||||
vcvtbiasph2hf8(xm1|k2, ym3, ptr_b[rax+64]);
|
||||
|
||||
vcvtbiasph2hf8(ym1|k2, zm3, zm5);
|
||||
vcvtbiasph2hf8(ym1|k2, zm3, ptr[rax+64]);
|
||||
vcvtbiasph2hf8(ym1|k2, zm3, ptr_b[rax+64]);
|
||||
|
||||
// vcvtbiasph2hf8s
|
||||
vcvtbiasph2hf8s(xm1|k2, xm3, xm5);
|
||||
vcvtbiasph2hf8s(xm1|k2, xm3, ptr[rax+64]);
|
||||
vcvtbiasph2hf8s(xm1|k2, xm3, ptr_b[rax+64]);
|
||||
|
||||
vcvtbiasph2hf8s(xm1|k2, ym3, ym5);
|
||||
vcvtbiasph2hf8s(xm1|k2, ym3, ptr[rax+64]);
|
||||
vcvtbiasph2hf8s(xm1|k2, ym3, ptr_b[rax+64]);
|
||||
|
||||
vcvtbiasph2hf8s(ym1|k2, zm3, zm5);
|
||||
vcvtbiasph2hf8s(ym1|k2, zm3, ptr[rax+64]);
|
||||
vcvtbiasph2hf8s(ym1|k2, zm3, ptr_b[rax+64]);
|
||||
|
||||
vcvthf82ph(xm1|k5|T_z, xm2);
|
||||
vcvthf82ph(xm1|k5|T_z, ptr[rax+64]);
|
||||
|
||||
vcvthf82ph(ym1|k5|T_z, xm2);
|
||||
vcvthf82ph(ym1|k5|T_z, ptr[rax+64]);
|
||||
|
||||
vcvthf82ph(zm1|k5|T_z, ym2);
|
||||
vcvthf82ph(zm1|k5|T_z, ptr[rax+64]);
|
||||
|
||||
//
|
||||
vcvt2ph2bf8(xm1|k4|T_z, xm2, xm3);
|
||||
vcvt2ph2bf8(xm1|k4, xm2, ptr[rax+64]);
|
||||
vcvt2ph2bf8(xm1|T_z, xm2, ptr_b[rax+64]);
|
||||
|
||||
vcvt2ph2bf8(ym1|k4|T_z, ym2, ym3);
|
||||
vcvt2ph2bf8(ym1|k4, ym2, ptr[rax+64]);
|
||||
vcvt2ph2bf8(ym1|T_z, ym2, ptr_b[rax+64]);
|
||||
|
||||
vcvt2ph2bf8(zm1|k4|T_z, zm2, zm3);
|
||||
vcvt2ph2bf8(zm1|k4, zm2, ptr[rax+64]);
|
||||
vcvt2ph2bf8(zm1|T_z, zm2, ptr_b[rax+64]);
|
||||
|
||||
//
|
||||
vcvt2ph2bf8s(xm1|k4|T_z, xm2, xm3);
|
||||
vcvt2ph2bf8s(xm1|k4, xm2, ptr[rax+64]);
|
||||
vcvt2ph2bf8s(xm1|T_z, xm2, ptr_b[rax+64]);
|
||||
|
||||
vcvt2ph2bf8s(ym1|k4|T_z, ym2, ym3);
|
||||
vcvt2ph2bf8s(ym1|k4, ym2, ptr[rax+64]);
|
||||
vcvt2ph2bf8s(ym1|T_z, ym2, ptr_b[rax+64]);
|
||||
|
||||
vcvt2ph2bf8s(zm1|k4|T_z, zm2, zm3);
|
||||
vcvt2ph2bf8s(zm1|k4, zm2, ptr[rax+64]);
|
||||
vcvt2ph2bf8s(zm1|T_z, zm2, ptr_b[rax+64]);
|
||||
|
||||
//
|
||||
vcvt2ph2hf8(xm1|k4|T_z, xm2, xm3);
|
||||
vcvt2ph2hf8(xm1|k4, xm2, ptr[rax+64]);
|
||||
vcvt2ph2hf8(xm1|T_z, xm2, ptr_b[rax+64]);
|
||||
|
||||
vcvt2ph2hf8(ym1|k4|T_z, ym2, ym3);
|
||||
vcvt2ph2hf8(ym1|k4, ym2, ptr[rax+64]);
|
||||
vcvt2ph2hf8(ym1|T_z, ym2, ptr_b[rax+64]);
|
||||
|
||||
vcvt2ph2hf8(zm1|k4|T_z, zm2, zm3);
|
||||
vcvt2ph2hf8(zm1|k4, zm2, ptr[rax+64]);
|
||||
vcvt2ph2hf8(zm1|T_z, zm2, ptr_b[rax+64]);
|
||||
|
||||
//
|
||||
vcvt2ph2hf8s(xm1|k4|T_z, xm2, xm3);
|
||||
vcvt2ph2hf8s(xm1|k4, xm2, ptr[rax+64]);
|
||||
vcvt2ph2hf8s(xm1|T_z, xm2, ptr_b[rax+64]);
|
||||
|
||||
vcvt2ph2hf8s(ym1|k4|T_z, ym2, ym3);
|
||||
vcvt2ph2hf8s(ym1|k4, ym2, ptr[rax+64]);
|
||||
vcvt2ph2hf8s(ym1|T_z, ym2, ptr_b[rax+64]);
|
||||
|
||||
vcvt2ph2hf8s(zm1|k4|T_z, zm2, zm3);
|
||||
vcvt2ph2hf8s(zm1|k4, zm2, ptr[rax+64]);
|
||||
vcvt2ph2hf8s(zm1|T_z, zm2, ptr_b[rax+64]);
|
||||
|
||||
// vcvtph2bf8
|
||||
vcvtph2bf8(xmm1|k2|T_z, xmm2);
|
||||
vcvtph2bf8(xmm1|k2|T_z, xword [rax+64]);
|
||||
vcvtph2bf8(xmm1|k2|T_z, xword_b[rax+64]);
|
||||
|
||||
vcvtph2bf8(xmm1|k2|T_z, ymm2);
|
||||
vcvtph2bf8(xmm1|k2|T_z, yword[rax+64]);
|
||||
vcvtph2bf8(xmm1|k2|T_z, yword_b[rax+64]);
|
||||
|
||||
vcvtph2bf8(ymm1|k2|T_z, zmm2);
|
||||
vcvtph2bf8(ymm1|k2|T_z, zword[rax+64]);
|
||||
vcvtph2bf8(ymm1|k2|T_z, zword_b[rax+64]);
|
||||
|
||||
// vcvtph2bf8s
|
||||
vcvtph2bf8s(xmm1|k2|T_z, xmm2);
|
||||
vcvtph2bf8s(xmm1|k2|T_z, xword [rax+64]);
|
||||
vcvtph2bf8s(xmm1|k2|T_z, xword_b[rax+64]);
|
||||
|
||||
vcvtph2bf8s(xmm1|k2|T_z, ymm2);
|
||||
vcvtph2bf8s(xmm1|k2|T_z, yword[rax+64]);
|
||||
vcvtph2bf8s(xmm1|k2|T_z, yword_b[rax+64]);
|
||||
|
||||
vcvtph2bf8s(ymm1|k2|T_z, zmm2);
|
||||
vcvtph2bf8s(ymm1|k2|T_z, zword[rax+64]);
|
||||
vcvtph2bf8s(ymm1|k2|T_z, zword_b[rax+64]);
|
||||
|
||||
// vcvtph2hf8
|
||||
vcvtph2hf8(xmm1|k2|T_z, xmm2);
|
||||
vcvtph2hf8(xmm1|k2|T_z, xword [rax+64]);
|
||||
vcvtph2hf8(xmm1|k2|T_z, xword_b[rax+64]);
|
||||
|
||||
vcvtph2hf8(xmm1|k2|T_z, ymm2);
|
||||
vcvtph2hf8(xmm1|k2|T_z, yword[rax+64]);
|
||||
vcvtph2hf8(xmm1|k2|T_z, yword_b[rax+64]);
|
||||
|
||||
vcvtph2hf8(ymm1|k2|T_z, zmm2);
|
||||
vcvtph2hf8(ymm1|k2|T_z, zword[rax+64]);
|
||||
vcvtph2hf8(ymm1|k2|T_z, zword_b[rax+64]);
|
||||
|
||||
// vcvtph2hf8s
|
||||
vcvtph2hf8s(xmm1|k2|T_z, xmm2);
|
||||
vcvtph2hf8s(xmm1|k2|T_z, xword [rax+64]);
|
||||
vcvtph2hf8s(xmm1|k2|T_z, xword_b[rax+64]);
|
||||
|
||||
vcvtph2hf8s(xmm1|k2|T_z, ymm2);
|
||||
vcvtph2hf8s(xmm1|k2|T_z, yword[rax+64]);
|
||||
vcvtph2hf8s(xmm1|k2|T_z, yword_b[rax+64]);
|
||||
|
||||
vcvtph2hf8s(ymm1|k2|T_z, zmm2);
|
||||
vcvtph2hf8s(ymm1|k2|T_z, zword[rax+64]);
|
||||
vcvtph2hf8s(ymm1|k2|T_z, zword_b[rax+64]);
|
||||
|
||||
// AVX-NE-CONVERT
|
||||
vbcstnebf162ps(xmm15, ptr[rax+64]);
|
||||
vbcstnebf162ps(xmm15, ptr[rax+64]);
|
||||
|
||||
vbcstnesh2ps(ymm15, ptr[rax+64]);
|
||||
vbcstnesh2ps(ymm15, ptr[rax+64]);
|
||||
|
||||
vcvtneebf162ps(xmm15, ptr[rax+64]);
|
||||
vcvtneebf162ps(ymm15, ptr[rax+64]);
|
||||
|
||||
vcvtneeph2ps(xmm15, ptr[rax+64]);
|
||||
vcvtneeph2ps(ymm15, ptr[rax+64]);
|
||||
|
||||
vcvtneobf162ps(xmm15, ptr[rax+64]);
|
||||
vcvtneobf162ps(ymm15, ptr[rax+64]);
|
||||
|
||||
vcvtneoph2ps(xmm15, ptr[rax+64]);
|
||||
vcvtneoph2ps(ymm15, ptr[rax+64]);
|
||||
|
||||
vcvtneps2bf16(xmm15, xmm3, VexEncoding);
|
||||
vcvtneps2bf16(xmm15, ptr[rax+64], VexEncoding);
|
||||
vcvtneps2bf16(xmm15, ymm3, VexEncoding);
|
||||
vcvtneps2bf16(xmm15, ptr[rax+64], VexEncoding);
|
||||
+63
@@ -0,0 +1,63 @@
|
||||
vminmaxbf16(xm1|k3|T_z, xm2, xm3, 5);
|
||||
vminmaxbf16(xm1|k3|T_z, xm2, ptr[rax+64], 5);
|
||||
vminmaxbf16(xm1|k3|T_z, xm2, ptr_b[rax+64], 5);
|
||||
|
||||
vminmaxbf16(ym1|k3|T_z, ym2, ym3, 5);
|
||||
vminmaxbf16(ym1|k3|T_z, ym2, ptr[rax+64], 5);
|
||||
vminmaxbf16(ym1|k3|T_z, ym2, ptr_b[rax+64], 5);
|
||||
|
||||
vminmaxbf16(zm1|k3|T_z, zm2, zm3, 5);
|
||||
vminmaxbf16(zm1|k3|T_z, zm2, ptr[rax+64], 5);
|
||||
vminmaxbf16(zm1|k3|T_z, zm2, ptr_b[rax+64], 5);
|
||||
//
|
||||
vminmaxpd(xm1|k3|T_z, xm2, xm3, 5);
|
||||
vminmaxpd(xm1|k3|T_z, xm2, ptr[rax+64], 5);
|
||||
vminmaxpd(xm1|k3|T_z, xm2, ptr_b[rax+64], 5);
|
||||
|
||||
vminmaxpd(ym1|k3|T_z, ym2, ym3, 5);
|
||||
vminmaxpd(ym1|k3|T_z, ym2, ptr[rax+64], 5);
|
||||
vminmaxpd(ym1|k3|T_z, ym2, ptr_b[rax+64], 5);
|
||||
|
||||
vminmaxpd(zm1|k3|T_z, zm2, zm3, 5);
|
||||
vminmaxpd(zm1|k3|T_z, zm2, zm3|T_sae, 5);
|
||||
vminmaxpd(zm1|k3|T_z, zm2, ptr[rax+64], 5);
|
||||
vminmaxpd(zm1|k3|T_z, zm2, ptr_b[rax+64], 5);
|
||||
//
|
||||
vminmaxph(xm1|k3|T_z, xm2, xm3, 5);
|
||||
vminmaxph(xm1|k3|T_z, xm2, ptr[rax+64], 5);
|
||||
vminmaxph(xm1|k3|T_z, xm2, ptr[rax+64], 5);
|
||||
vminmaxph(xm1|k3|T_z, xm2, ptr_b[rax+64], 5);
|
||||
|
||||
vminmaxph(ym1|k3|T_z, ym2, ym3, 5);
|
||||
vminmaxph(ym1|k3|T_z, ym2, ptr[rax+64], 5);
|
||||
vminmaxph(ym1|k3|T_z, ym2, ptr_b[rax+64], 5);
|
||||
|
||||
vminmaxph(zm1|k3|T_z, zm2, zm3, 5);
|
||||
vminmaxph(zm1|k3|T_z, zm2, zm3|T_sae, 5);
|
||||
vminmaxph(zm1|k3|T_z, zm2, ptr[rax+64], 5);
|
||||
vminmaxph(zm1|k3|T_z, zm2, ptr_b[rax+64], 5);
|
||||
//
|
||||
vminmaxps(xm1|k3|T_z, xm2, xm3, 5);
|
||||
vminmaxps(xm1|k3|T_z, xm2, ptr[rax+64], 5);
|
||||
vminmaxps(xm1|k3|T_z, xm2, ptr_b[rax+64], 5);
|
||||
|
||||
vminmaxps(ym1|k3|T_z, ym2, ym3, 5);
|
||||
vminmaxps(ym1|k3|T_z, ym2, ptr[rax+64], 5);
|
||||
vminmaxps(ym1|k3|T_z, ym2, ptr_b[rax+64], 5);
|
||||
|
||||
vminmaxps(zm1|k3|T_z, zm2, zm3, 5);
|
||||
vminmaxps(zm1|k3|T_z, zm2, zm3|T_sae, 5);
|
||||
vminmaxps(zm1|k3|T_z, zm2, ptr[rax+64], 5);
|
||||
vminmaxps(zm1|k3|T_z, zm2, ptr_b[rax+64], 5);
|
||||
//
|
||||
vminmaxsd(xm1|k3|T_z, xm2, xm3, 5);
|
||||
vminmaxsd(xm1|k3|T_z, xm2, xm3|T_sae, 5);
|
||||
vminmaxsd(xm1|k3|T_z, xm2, ptr[rax+64], 5);
|
||||
//
|
||||
vminmaxsh(xm1|k3|T_z, xm2, xm3, 5);
|
||||
vminmaxsh(xm1|k3|T_z, xm2, xm3|T_sae, 5);
|
||||
vminmaxsh(xm1|k3|T_z, xm2, ptr[rax+64], 5);
|
||||
//
|
||||
vminmaxss(xm1|k3|T_z, xm2, xm3, 5);
|
||||
vminmaxss(xm1|k3|T_z, xm2, xm3|T_sae, 5);
|
||||
vminmaxss(xm1|k3|T_z, xm2, ptr[rax+64], 5);
|
||||
Vendored
+290
@@ -0,0 +1,290 @@
|
||||
// AVX10 integer and FP16 VNNI, media and zero-extending
|
||||
vdpphps(xm1, xm2, xm3);
|
||||
vdpphps(xm1, xm2, ptr[rax+64]);
|
||||
vdpphps(xm1, xm2, ptr_b[rax+64]);
|
||||
|
||||
vdpphps(ym1, ym2, ym3);
|
||||
vdpphps(ym1, ym2, ptr[rax+64]);
|
||||
vdpphps(ym1, ym2, ptr_b[rax+64]);
|
||||
|
||||
vdpphps(zm1, zm2, zm3);
|
||||
vdpphps(zm1, zm2, ptr[rax+64]);
|
||||
vdpphps(zm1, zm2, ptr_b[rax+64]);
|
||||
//
|
||||
vmpsadbw(xm1, xm3, xm15, 3);
|
||||
vmpsadbw(xm1|T_z, xm4, ptr[rax+64], 5);
|
||||
|
||||
vmpsadbw(ym1|k4, ym3, ym15, 3);
|
||||
vmpsadbw(ym1, ym4, ptr[rax+64], 5);
|
||||
|
||||
vmpsadbw(zm1|k4, zm3, zm15, 3);
|
||||
vmpsadbw(zm1, zm4, ptr[rax+64], 5);
|
||||
//
|
||||
vpdpbssd(xm1, xm2, xm3);
|
||||
vpdpbssd(xm1, xm2, ptr[rax+64]);
|
||||
vpdpbssd(xm1, xm2, ptr_b[rax+64]);
|
||||
|
||||
vpdpbssd(ym1, ym2, ym3);
|
||||
vpdpbssd(ym1, ym2, ptr[rax+64]);
|
||||
vpdpbssd(ym1, ym2, ptr_b[rax+64]);
|
||||
|
||||
vpdpbssd(zm1, zm2, zm3);
|
||||
vpdpbssd(zm1, zm2, ptr[rax+64]);
|
||||
vpdpbssd(zm1, zm2, ptr_b[rax+64]);
|
||||
//
|
||||
vpdpbssds(xm1, xm2, xm3);
|
||||
vpdpbssds(xm1, xm2, ptr[rax+64]);
|
||||
vpdpbssds(xm1, xm2, ptr_b[rax+64]);
|
||||
|
||||
vpdpbssds(ym1, ym2, ym3);
|
||||
vpdpbssds(ym1, ym2, ptr[rax+64]);
|
||||
vpdpbssds(ym1, ym2, ptr_b[rax+64]);
|
||||
|
||||
vpdpbssds(zm1, zm2, zm3);
|
||||
vpdpbssds(zm1, zm2, ptr[rax+64]);
|
||||
vpdpbssds(zm1, zm2, ptr_b[rax+64]);
|
||||
//
|
||||
vpdpbsud(xm1, xm2, xm3);
|
||||
vpdpbsud(xm1, xm2, ptr[rax+64]);
|
||||
vpdpbsud(xm1, xm2, ptr_b[rax+64]);
|
||||
|
||||
vpdpbsud(ym1, ym2, ym3);
|
||||
vpdpbsud(ym1, ym2, ptr[rax+64]);
|
||||
vpdpbsud(ym1, ym2, ptr_b[rax+64]);
|
||||
|
||||
vpdpbsud(zm1, zm2, zm3);
|
||||
vpdpbsud(zm1, zm2, ptr[rax+64]);
|
||||
vpdpbsud(zm1, zm2, ptr_b[rax+64]);
|
||||
//
|
||||
vpdpbsuds(xm1, xm2, xm3);
|
||||
vpdpbsuds(xm1, xm2, ptr[rax+64]);
|
||||
vpdpbsuds(xm1, xm2, ptr_b[rax+64]);
|
||||
|
||||
vpdpbsuds(ym1, ym2, ym3);
|
||||
vpdpbsuds(ym1, ym2, ptr[rax+64]);
|
||||
vpdpbsuds(ym1, ym2, ptr_b[rax+64]);
|
||||
|
||||
vpdpbsuds(zm1, zm2, zm3);
|
||||
vpdpbsuds(zm1, zm2, ptr[rax+64]);
|
||||
vpdpbsuds(zm1, zm2, ptr_b[rax+64]);
|
||||
|
||||
//
|
||||
vpdpbuud(xm1, xm2, xm3);
|
||||
vpdpbuud(xm1, xm2, ptr[rax+64]);
|
||||
vpdpbuud(xm1, xm2, ptr_b[rax+64]);
|
||||
|
||||
vpdpbuud(ym1, ym2, ym3);
|
||||
vpdpbuud(ym1, ym2, ptr[rax+64]);
|
||||
vpdpbuud(ym1, ym2, ptr_b[rax+64]);
|
||||
|
||||
vpdpbuud(zm1, zm2, zm3);
|
||||
vpdpbuud(zm1, zm2, ptr[rax+64]);
|
||||
vpdpbuud(zm1, zm2, ptr_b[rax+64]);
|
||||
//
|
||||
vpdpbuuds(xm1, xm2, xm3);
|
||||
vpdpbuuds(xm1, xm2, ptr[rax+64]);
|
||||
vpdpbuuds(xm1, xm2, ptr_b[rax+64]);
|
||||
|
||||
vpdpbuuds(ym1, ym2, ym3);
|
||||
vpdpbuuds(ym1, ym2, ptr[rax+64]);
|
||||
vpdpbuuds(ym1, ym2, ptr_b[rax+64]);
|
||||
|
||||
vpdpbuuds(zm1, zm2, zm3);
|
||||
vpdpbuuds(zm1, zm2, ptr[rax+64]);
|
||||
vpdpbuuds(zm1, zm2, ptr_b[rax+64]);
|
||||
|
||||
//
|
||||
vpdpwsud(xm1, xm2, xm3);
|
||||
vpdpwsud(xm1, xm2, ptr[rax+64]);
|
||||
vpdpwsud(xm1, xm2, ptr_b[rax+64]);
|
||||
|
||||
vpdpwsud(ym1, ym2, ym3);
|
||||
vpdpwsud(ym1, ym2, ptr[rax+64]);
|
||||
vpdpwsud(ym1, ym2, ptr_b[rax+64]);
|
||||
|
||||
vpdpwsud(zm1, zm2, zm3);
|
||||
vpdpwsud(zm1, zm2, ptr[rax+64]);
|
||||
vpdpwsud(zm1, zm2, ptr_b[rax+64]);
|
||||
//
|
||||
vpdpwsuds(xm1, xm2, xm3);
|
||||
vpdpwsuds(xm1, xm2, ptr[rax+64]);
|
||||
vpdpwsuds(xm1, xm2, ptr_b[rax+64]);
|
||||
|
||||
vpdpwsuds(ym1, ym2, ym3);
|
||||
vpdpwsuds(ym1, ym2, ptr[rax+64]);
|
||||
vpdpwsuds(ym1, ym2, ptr_b[rax+64]);
|
||||
|
||||
vpdpwsuds(zm1, zm2, zm3);
|
||||
vpdpwsuds(zm1, zm2, ptr[rax+64]);
|
||||
vpdpwsuds(zm1, zm2, ptr_b[rax+64]);
|
||||
//
|
||||
vpdpwsud(xm1, xm2, xm3);
|
||||
vpdpwsud(xm1, xm2, ptr[rax+64]);
|
||||
vpdpwsud(xm1, xm2, ptr_b[rax+64]);
|
||||
|
||||
vpdpwsud(ym1, ym2, ym3);
|
||||
vpdpwsud(ym1, ym2, ptr[rax+64]);
|
||||
vpdpwsud(ym1, ym2, ptr_b[rax+64]);
|
||||
|
||||
vpdpwsud(zm1, zm2, zm3);
|
||||
vpdpwsud(zm1, zm2, ptr[rax+64]);
|
||||
vpdpwsud(zm1, zm2, ptr_b[rax+64]);
|
||||
//
|
||||
vpdpwsuds(xm1, xm2, xm3);
|
||||
vpdpwsuds(xm1, xm2, ptr[rax+64]);
|
||||
vpdpwsuds(xm1, xm2, ptr_b[rax+64]);
|
||||
|
||||
vpdpwsuds(ym1, ym2, ym3);
|
||||
vpdpwsuds(ym1, ym2, ptr[rax+64]);
|
||||
vpdpwsuds(ym1, ym2, ptr_b[rax+64]);
|
||||
|
||||
vpdpwsuds(zm1, zm2, zm3);
|
||||
vpdpwsuds(zm1, zm2, ptr[rax+64]);
|
||||
vpdpwsuds(zm1, zm2, ptr_b[rax+64]);
|
||||
|
||||
//
|
||||
vpdpwuud(xm1, xm2, xm3);
|
||||
vpdpwuud(xm1, xm2, ptr[rax+64]);
|
||||
vpdpwuud(xm1, xm2, ptr_b[rax+64]);
|
||||
|
||||
vpdpwuud(ym1, ym2, ym3);
|
||||
vpdpwuud(ym1, ym2, ptr[rax+64]);
|
||||
vpdpwuud(ym1, ym2, ptr_b[rax+64]);
|
||||
|
||||
vpdpwuud(zm1, zm2, zm3);
|
||||
vpdpwuud(zm1, zm2, ptr[rax+64]);
|
||||
vpdpwuud(zm1, zm2, ptr_b[rax+64]);
|
||||
//
|
||||
vpdpwuuds(xm1, xm2, xm3);
|
||||
vpdpwuuds(xm1, xm2, ptr[rax+64]);
|
||||
vpdpwuuds(xm1, xm2, ptr_b[rax+64]);
|
||||
|
||||
vpdpwuuds(ym1, ym2, ym3);
|
||||
vpdpwuuds(ym1, ym2, ptr[rax+64]);
|
||||
vpdpwuuds(ym1, ym2, ptr_b[rax+64]);
|
||||
|
||||
vpdpwuuds(zm1, zm2, zm3);
|
||||
vpdpwuuds(zm1, zm2, ptr[rax+64]);
|
||||
vpdpwuuds(zm1, zm2, ptr_b[rax+64]);
|
||||
|
||||
//
|
||||
vmovd(xm10, xm20);
|
||||
vmovd(xm1, xm2);
|
||||
vmovd(xm10, ptr[rax+64]);
|
||||
vmovd(ptr[rax+64], xm30);
|
||||
//
|
||||
vmovw(xm1, xm20);
|
||||
vmovw(xm1, xm2);
|
||||
vmovw(xm3, ptr [rax+0x40]);
|
||||
vmovw(ptr [rax+0x40], xm7);
|
||||
//
|
||||
push(rax);
|
||||
push(rcx);
|
||||
push(rdx);
|
||||
push(rbx);
|
||||
push(rsp);
|
||||
push(rbp);
|
||||
push(rsi);
|
||||
push(rdi);
|
||||
push(r8);
|
||||
push(r9);
|
||||
push(r10);
|
||||
push(r11);
|
||||
push(r12);
|
||||
push(r13);
|
||||
push(r14);
|
||||
push(r15);
|
||||
push(r16);
|
||||
push(r17);
|
||||
push(r18);
|
||||
push(r19);
|
||||
push(r20);
|
||||
push(r21);
|
||||
push(r22);
|
||||
push(r23);
|
||||
push(r24);
|
||||
push(r25);
|
||||
push(r26);
|
||||
push(r27);
|
||||
push(r28);
|
||||
push(r29);
|
||||
push(r30);
|
||||
push(r31);
|
||||
pop(rax);
|
||||
pop(rcx);
|
||||
pop(rdx);
|
||||
pop(rbx);
|
||||
pop(rsp);
|
||||
pop(rbp);
|
||||
pop(rsi);
|
||||
pop(rdi);
|
||||
pop(r8);
|
||||
pop(r9);
|
||||
pop(r10);
|
||||
pop(r11);
|
||||
pop(r12);
|
||||
pop(r13);
|
||||
pop(r14);
|
||||
pop(r15);
|
||||
pop(r16);
|
||||
pop(r17);
|
||||
pop(r18);
|
||||
pop(r19);
|
||||
pop(r20);
|
||||
pop(r21);
|
||||
pop(r22);
|
||||
pop(r23);
|
||||
pop(r24);
|
||||
pop(r25);
|
||||
pop(r26);
|
||||
pop(r27);
|
||||
pop(r28);
|
||||
pop(r29);
|
||||
pop(r30);
|
||||
pop(r31);
|
||||
|
||||
movrs(rcx, ptr[rax]);
|
||||
movrs(ecx, ptr[rax]);
|
||||
movrs(cx, ptr[rax]);
|
||||
movrs(cl, ptr[rax+rdx*4]);
|
||||
|
||||
prefetchnta(ptr[rcx]);
|
||||
prefetcht0(ptr[rcx]);
|
||||
prefetcht1(ptr[rcx]);
|
||||
prefetcht2(ptr[rcx]);
|
||||
prefetchit1(ptr[rip+64]);
|
||||
prefetchit0(ptr[rip+64]);
|
||||
prefetchrst2(ptr[rcx]);
|
||||
|
||||
vmovrsb(xm1|k1|T_z, ptr[rax+64]);
|
||||
vmovrsb(ym1|k1|T_z, ptr[rax+64]);
|
||||
vmovrsb(zm1|k1|T_z, ptr[rax+64]);
|
||||
|
||||
vmovrsd(xm1|k1|T_z, ptr[rax+64]);
|
||||
vmovrsd(ym1|k1|T_z, ptr[rax+64]);
|
||||
vmovrsd(zm1|k1|T_z, ptr[rax+64]);
|
||||
|
||||
vmovrsq(xm1|k1|T_z, ptr[rax+64]);
|
||||
vmovrsq(ym1|k1|T_z, ptr[rax+64]);
|
||||
vmovrsq(zm1|k1|T_z, ptr[rax+64]);
|
||||
|
||||
vmovrsw(xm1|k1|T_z, ptr[rax+64]);
|
||||
vmovrsw(ym1|k1|T_z, ptr[rax+64]);
|
||||
vmovrsw(zm1|k1|T_z, ptr[rax+64]);
|
||||
// moved for bug of nasm 3.x
|
||||
vcvtsd2si(esp, xmm4|T_rd_sae);
|
||||
vcvtsd2si(r8, xmm4|T_rd_sae);
|
||||
vcvtsd2usi(ecx, xmm4|T_rd_sae);
|
||||
vcvtsd2usi(r14, xmm4|T_rd_sae);
|
||||
vcvtss2si(ecx, xmm4|T_rd_sae);
|
||||
vcvtss2si(r13, xmm4|T_rd_sae);
|
||||
vcvtss2usi(esi, xmm4|T_rd_sae);
|
||||
vcvtss2usi(r10, xmm4|T_rd_sae);
|
||||
vcvttsd2si(ecx, xmm25|T_sae);
|
||||
vcvttsd2si(r12, xmm25|T_sae);
|
||||
vcvttsd2usi(edx, xmm25|T_sae);
|
||||
vcvttsd2usi(rbp, xmm25|T_sae);
|
||||
vcvttss2si(esp, xmm25|T_sae);
|
||||
vcvttss2si(r11, xmm25|T_sae);
|
||||
vcvttss2usi(edi, xmm25|T_sae);
|
||||
vcvttss2usi(r14, xmm25|T_sae);
|
||||
Vendored
+632
@@ -0,0 +1,632 @@
|
||||
vaesdec(xmm20, xmm30, ptr [rcx + 64]);
|
||||
vaesdec(ymm1, ymm2, ptr [rcx + 64]);
|
||||
vaesdec(zmm1, zmm2, ptr [rcx + 64]);
|
||||
vaesdeclast(xmm20, xmm30, ptr [rax + 64]);
|
||||
vaesdeclast(ymm20, ymm30, ptr [rax + 64]);
|
||||
vaesdeclast(zmm20, zmm30, ptr [rax + 64]);
|
||||
vaesenc(xmm20, xmm30, ptr [rcx + 64]);
|
||||
vaesenc(ymm1, ymm2, ptr [rcx + 64]);
|
||||
vaesenc(zmm1, zmm2, ptr [rcx + 64]);
|
||||
vaesenclast(xmm20, xmm30, ptr [rax + 64]);
|
||||
vaesenclast(ymm20, ymm30, ptr [rax + 64]);
|
||||
vaesenclast(zmm20, zmm30, ptr [rax + 64]);
|
||||
vpclmulqdq(xmm2, xmm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(ymm2, ymm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(zmm2, zmm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(xmm20, xmm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(ymm20, ymm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(zmm20, zmm3, ptr [rax + 64], 3);
|
||||
vpcompressb(ptr[rax + 64], xmm1);
|
||||
vpcompressb(xmm30 | k5, xmm1);
|
||||
vpcompressb(ptr[rax + 64], ymm1);
|
||||
vpcompressb(ymm30 | k3 |T_z, ymm1);
|
||||
vpcompressb(ptr[rax + 64], zmm1);
|
||||
vpcompressb(zmm30 | k2 |T_z, zmm1);
|
||||
vpcompressw(ptr[rax + 64], xmm1);
|
||||
vpcompressw(xmm30 | k5, xmm1);
|
||||
vpcompressw(ptr[rax + 64], ymm1);
|
||||
vpcompressw(ymm30 | k3 |T_z, ymm1);
|
||||
vpcompressw(ptr[rax + 64], zmm1);
|
||||
vpcompressw(zmm30 | k2 |T_z, zmm1);
|
||||
vpshldw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshldw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshldw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
vpshldd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshldd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshldd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
vpshldq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshldq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshldq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
vpshldvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshldvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshldvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
vpshldvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshldvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshldvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
vpshldvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshldvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshldvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
vpshrdw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshrdw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshrdw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
vpshrdd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshrdd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshrdd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
vpshrdq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshrdq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshrdq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
vpshrdvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshrdvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshrdvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
vpshrdvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshrdvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshrdvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
vpshrdvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshrdvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshrdvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
vpshrdd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdvd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
|
||||
vpopcntb(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntb(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntb(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntw(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntw(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntw(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntd(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntd(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntd(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntd(xmm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntd(ymm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntd(zmm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntq(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntq(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntq(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntq(xmm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntq(ymm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntq(zmm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpdpbusd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
|
||||
vpdpbusd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
|
||||
vpdpbusd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
|
||||
vpdpbusd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
|
||||
vpdpbusds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
|
||||
vpdpbusds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
|
||||
vpdpbusds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
|
||||
vpdpwssd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
|
||||
vpdpwssd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
|
||||
vpdpwssd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
|
||||
vpdpwssds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
|
||||
vpdpwssds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
|
||||
vpdpwssds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
|
||||
vpexpandb(xmm5|k3|T_z, xmm30);
|
||||
vpexpandb(ymm5|k3|T_z, ymm30);
|
||||
vpexpandb(zmm5|k3|T_z, zmm30);
|
||||
vpexpandb(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandb(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandb(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandw(xmm5|k3|T_z, xmm30);
|
||||
vpexpandw(ymm5|k3|T_z, ymm30);
|
||||
vpexpandw(zmm5|k3|T_z, zmm30);
|
||||
vpexpandw(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandw(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandw(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpshufbitqmb(k1|k2, xmm2, ptr [rax + 0x40]);
|
||||
vpshufbitqmb(k1|k2, ymm2, ptr [rax + 0x40]);
|
||||
vpshufbitqmb(k1|k2, zmm2, ptr [rax + 0x40]);
|
||||
gf2p8affineinvqb(xmm1, xmm2, 3);
|
||||
gf2p8affineinvqb(xmm1, ptr [rax + 0x40], 3);
|
||||
vgf2p8affineinvqb(xmm1, xmm5, xmm2, 3);
|
||||
vgf2p8affineinvqb(ymm1, ymm5, ymm2, 3);
|
||||
vgf2p8affineinvqb(xmm1, xmm5, ptr [rax + 0x40], 3);
|
||||
vgf2p8affineinvqb(ymm1, ymm5, ptr [rax + 0x40], 3);
|
||||
vgf2p8affineinvqb(xmm30, xmm31, xmm4, 5);
|
||||
vgf2p8affineinvqb(ymm30, ymm31, ymm4, 5);
|
||||
vgf2p8affineinvqb(zmm30, zmm31, zmm4, 5);
|
||||
vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
|
||||
gf2p8affineqb(xmm1, xmm2, 3);
|
||||
gf2p8affineqb(xmm1, ptr [rax + 0x40], 3);
|
||||
vgf2p8affineqb(xmm1, xmm5, xmm2, 3);
|
||||
vgf2p8affineqb(ymm1, ymm5, ymm2, 3);
|
||||
vgf2p8affineqb(xmm1, xmm5, ptr [rax + 0x40], 3);
|
||||
vgf2p8affineqb(ymm1, ymm5, ptr [rax + 0x40], 3);
|
||||
vgf2p8affineqb(xmm30, xmm31, xmm4, 5);
|
||||
vgf2p8affineqb(ymm30, ymm31, ymm4, 5);
|
||||
vgf2p8affineqb(zmm30, zmm31, zmm4, 5);
|
||||
vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
|
||||
vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
|
||||
vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
|
||||
gf2p8mulb(xmm1, xmm2);
|
||||
gf2p8mulb(xmm1, ptr [rax + 0x40]);
|
||||
vgf2p8mulb(xmm1, xmm5, xmm2);
|
||||
vgf2p8mulb(ymm1, ymm5, ymm2);
|
||||
vgf2p8mulb(xmm1, xmm5, ptr [rax + 0x40]);
|
||||
vgf2p8mulb(ymm1, ymm5, ptr [rax + 0x40]);
|
||||
vgf2p8mulb(xmm30, xmm31, xmm4);
|
||||
vgf2p8mulb(ymm30, ymm31, ymm4);
|
||||
vgf2p8mulb(zmm30, zmm31, zmm4);
|
||||
vgf2p8mulb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40]);
|
||||
vgf2p8mulb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40]);
|
||||
vgf2p8mulb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40]);
|
||||
vcvtne2ps2bf16(xmm0 | k1, xmm1, ptr [rax + 64]);
|
||||
vcvtne2ps2bf16(ymm0 | k1 | T_z, ymm0, ptr [rax + 64]);
|
||||
vcvtne2ps2bf16(zmm0 | k1, zmm1, ptr [rax + 64]);
|
||||
vcvtneps2bf16(xmm0, xword [rax + 64]);
|
||||
vcvtneps2bf16(xmm0 | k1, yword [rax + 64]);
|
||||
vcvtneps2bf16(ymm0 | k1, zword [rax + 64]);
|
||||
vcvtneps2bf16(ymm0 | k1, ptr [rax + 64]);
|
||||
vdpbf16ps(xmm0 | k1, xmm1, ptr [rax + 64]);
|
||||
vdpbf16ps(ymm0 | k1, ymm1, ptr [rax + 64]);
|
||||
vdpbf16ps(zmm0 | k1, zmm1, ptr [rax + 64]);
|
||||
vaddph(zmm0, zmm1, ptr[rax+64]);
|
||||
vaddph(ymm0, ymm1, ptr[rax+64]);
|
||||
vaddph(xmm0, xmm1, ptr[rax+64]);
|
||||
vaddph(zmm0, zmm1, ptr_b[rax+64]);
|
||||
vaddph(ymm0, ymm1, ptr_b[rax+64]);
|
||||
vaddph(xmm0, xmm1, ptr_b[rax+64]);
|
||||
vaddsh(xmm0, xmm15, ptr[rax+64]);
|
||||
vaddsh(xmm0|k5|T_z|T_rd_sae, xmm15, xmm3);
|
||||
vcmpph(k1, xm15, ptr[rax+64], 1);
|
||||
vcmpph(k2, ym15, ptr[rax+64], 2);
|
||||
vcmpph(k3, zm15, ptr[rax+64], 3);
|
||||
vcmpph(k1, xm15, ptr_b[rax+64], 1);
|
||||
vcmpph(k2, ym15, ptr_b[rax+64], 2);
|
||||
vcmpph(k3, zm15, ptr_b[rax+64], 3);
|
||||
vcmpsh(k1, xm15, ptr[rax+64], 1);
|
||||
vcmpsh(k3|k5, xmm1, xmm25|T_sae, 4);
|
||||
vcomish(xmm1, ptr[rax+64]);
|
||||
vcomish(xmm1|T_sae, xmm15);
|
||||
vucomish(xmm1, ptr [rax+0x40]);
|
||||
vucomish(xmm1|T_sae, xmm15);
|
||||
vfmaddsub213ph(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vfmaddsub213ph(xmm1, xmm2, ptr_b [rax+0x40]);
|
||||
vfmaddsub213ph(xmm1|k3, xmm2, xmm5);
|
||||
vfmaddsub213ph(ymm1, ymm2, ptr [rax+0x40]);
|
||||
vfmaddsub213ph(ymm1, ymm2, ptr_b[rax+0x40]);
|
||||
vfmaddsub213ph(ymm1|k3, ymm2, ymm5);
|
||||
vfmaddsub213ph(zmm1, zmm2, ptr [rax+0x40]);
|
||||
vfmaddsub213ph(zmm1, zmm2, ptr_b [rax+0x40]);
|
||||
vfmaddsub213ph(zmm1|T_ru_sae, zmm2, zmm5);
|
||||
vfmsubadd132ph(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vfmsubadd132ph(xmm1, xmm2, ptr_b [rax+0x40]);
|
||||
vfmsubadd132ph(ymm1, ymm2, ptr [rax+0x40]);
|
||||
vfmsubadd132ph(ymm1, ymm2, ptr_b [rax+0x40]);
|
||||
vfmsubadd132ph(zmm1, zmm2, ptr [rax+0x40]);
|
||||
vfmsubadd132ph(zmm1, zmm2, ptr_b [rax+0x40]);
|
||||
vfmsubadd132ph(zmm1|T_ru_sae, zmm2, zmm5);
|
||||
vfmadd132ph(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vfmadd132ph(xmm1, xmm2, ptr_b [rax+0x40]);
|
||||
vfmadd132ph(ymm1, ymm2, ptr [rax+0x40]);
|
||||
vfmadd132ph(ymm1, ymm2, ptr_b [rax+0x40]);
|
||||
vfmadd132ph(zmm1, zmm2, ptr [rax+0x40]);
|
||||
vfmadd132ph(zmm1, zmm2, ptr_b [rax+0x40]);
|
||||
vfmadd132ph(zmm1|T_rd_sae, zmm2, zmm5);
|
||||
vfmsub231ph(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vfmsub231ph(xmm1, xmm2, ptr_b [rax+0x40]);
|
||||
vfmsub231ph(ymm1, ymm2, ptr [rax+0x40]);
|
||||
vfmsub231ph(ymm1, ymm2, ptr_b [rax+0x40]);
|
||||
vfmsub231ph(zmm1, zmm2, ptr [rax+0x40]);
|
||||
vfmsub231ph(zmm1, zmm2, ptr_b [rax+0x40]);
|
||||
vfmsub231ph(zmm1|T_rd_sae, zmm2, zmm5);
|
||||
vfnmsub231ph(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vfnmsub231ph(ymm1, ymm2, ptr_b [rax+0x40]);
|
||||
vfnmsub231ph(zmm1, zmm2, ptr_b [rax+0x40]);
|
||||
vfnmsub231ph(zmm1|T_rd_sae, zmm2, zmm5);
|
||||
vfmadd132sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
|
||||
vfmadd132sh(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vfnmadd132sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
|
||||
vfnmadd132sh(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vfmsub132sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
|
||||
vfmsub132sh(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vfnmsub132sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
|
||||
vfnmsub132sh(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vfcmaddcph(xmm1|k1|T_z, xmm2, ptr [rax+0x40]);
|
||||
vfcmaddcph(ymm1|k1|T_z, ymm2, ptr [rax+0x40]);
|
||||
vfcmaddcph(zmm1|k1, zmm2, ptr [rax+0x40]);
|
||||
vfcmaddcph(zmm1|k1|T_rd_sae, zmm2, zmm5);
|
||||
vfcmaddcph(xmm1|k1|T_z, xmm2, ptr_b [rax+0x40]);
|
||||
vfcmaddcph(ymm1|k1|T_z, ymm2, ptr_b [rax+0x40]);
|
||||
vfcmaddcph(zmm1|k1|T_z, zmm2, ptr_b [rax+0x40]);
|
||||
vfmaddcph(xm1, xm2, ptr[rax+0x40]);
|
||||
vfmaddcph(ym1|k1|T_z, ym2, ptr_b[rax+0x40]);
|
||||
vfmaddcph(zm1, zm2, ptr_b[rax+0x40]);
|
||||
vfcmulcph(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vfcmulcph(ymm1|k1|T_z, ymm2, ptr_b [rax+0x40]);
|
||||
vfcmulcph(zmm1, zmm2, ptr_b [rax+0x40]);
|
||||
vfmulcph(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vfmulcph(ymm1|k1|T_z, ymm2, ptr_b [rax+0x40]);
|
||||
vfmulcph(zmm1, zmm2, ptr_b [rax+0x40]);
|
||||
vrcpph(xmm1, ptr [rax+0x40]);
|
||||
vrcpph(xmm1, ptr_b [rax+0x40]);
|
||||
vrcpph(ymm1, ptr [rax+0x40]);
|
||||
vrcpph(ymm1, ptr_b [rax+0x40]);
|
||||
vrcpph(zmm1, ptr [rax+0x40]);
|
||||
vrcpph(zmm1, ptr_b [rax+0x40]);
|
||||
vrcpsh(xmm1, xmm3, ptr [rax+0x40]);
|
||||
vrsqrtph(xmm1, ptr [rax+0x40]);
|
||||
vrsqrtph(xmm1, ptr_b [rax+0x40]);
|
||||
vrsqrtph(ymm2, ptr [rax+0x40]);
|
||||
vrsqrtph(ymm2, ptr_b [rax+0x40]);
|
||||
vrsqrtph(zmm2, ptr [rax+0x40]);
|
||||
vrsqrtph(zmm2, ptr_b [rax+0x40]);
|
||||
vrsqrtsh(xmm1|k5|T_z, xmm7, ptr [rax+0x40]);
|
||||
vsqrtph(xmm1|k4|T_z, ptr [rax+0x40]);
|
||||
vsqrtph(xmm1|k4|T_z, ptr_b [rax+0x40]);
|
||||
vsqrtph(ymm1|k4|T_z, ptr_b [rax+0x40]);
|
||||
vsqrtph(zmm1|k4|T_z, ptr [rax+0x40]);
|
||||
vsqrtph(zmm1|k4|T_z, ptr_b [rax+0x40]);
|
||||
vsqrtsh(xmm1|k4|T_z, xmm5, ptr [rax+0x40]);
|
||||
vsqrtsh(xmm1|k4|T_z|T_rd_sae, xmm5, xmm7);
|
||||
vscalefph(xmm1, xmm5, ptr [rax+0x40]);
|
||||
vscalefph(xmm1, xmm5, ptr_b [rax+0x40]);
|
||||
vscalefph(ymm1, ymm5, ptr [rax+0x40]);
|
||||
vscalefph(ymm1, ymm5, ptr_b [rax+0x40]);
|
||||
vscalefph(zmm1, zmm5, ptr [rax+0x40]);
|
||||
vscalefph(zmm1, zmm5, ptr_b [rax+0x40]);
|
||||
vscalefph(zmm1|k1|T_z|T_rd_sae, zmm5, zmm7);
|
||||
vscalefsh(xmm1, xmm5, ptr [rax+0x40]);
|
||||
vscalefsh(xmm1|k1|T_z|T_rd_sae, xmm5, xmm7);
|
||||
vreduceph(xmm1, ptr [rax+0x40], 0x1);
|
||||
vreduceph(xmm1, ptr_b [rax+0x40], 0x2);
|
||||
vreduceph(ymm1, ptr [rax+0x40], 0x3);
|
||||
vreduceph(ymm1, ptr_b [rax+0x40], 0x4);
|
||||
vreduceph(zmm1, ptr [rax+0x40], 0x5);
|
||||
vreduceph(zmm1, ptr_b [rax+0x40], 0x6);
|
||||
vreduceph(zmm1|k1|T_z|T_sae, zmm5, 0x7);
|
||||
vreducesh(xmm1, xmm3, ptr [rax+0x40], 0x1);
|
||||
vreducesh(xmm1|k1|T_z|T_sae, xmm5, xmm4, 0x2);
|
||||
vrndscaleph(xmm1, ptr [rax+0x40], 0x1);
|
||||
vrndscaleph(xmm1, ptr_b [rax+0x40], 0x2);
|
||||
vrndscaleph(ymm1, ptr [rax+0x40], 0x3);
|
||||
vrndscaleph(ymm1, ptr_b [rax+0x40], 0x4);
|
||||
vrndscaleph(zmm1, ptr [rax+0x40], 0x5);
|
||||
vrndscaleph(zmm1, ptr_b [rax+0x40], 0x6);
|
||||
vrndscaleph(zmm1|k1|T_z|T_sae, zmm5, 0x7);
|
||||
vrndscalesh(xmm1, xmm3, ptr [rax+0x40], 0x1);
|
||||
vrndscalesh(xmm1|k1|T_z|T_sae, xmm5, xmm4, 0x2);
|
||||
vfpclassph(k1, xword [rax+0x40], 0x1);
|
||||
vfpclassph(k1, xword_b[rax+0x40], 0x2);
|
||||
vfpclassph(k1, yword [rax+0x40], 0x3);
|
||||
vfpclassph(k1, yword_b[rax+0x40], 0x4);
|
||||
vfpclassph(k1, zword [rax+0x40], 0x5);
|
||||
vfpclassph(k1, zword_b[rax+0x40], 0x6);
|
||||
vfpclasssh(k1|k2, xmm3, 0x5);
|
||||
vfpclasssh(k1|k2, ptr [rax+0x40], 0x5);
|
||||
vgetexpph(xmm1, ptr [rax+0x40]);
|
||||
vgetexpph(ymm1, ptr_b [rax+0x40]);
|
||||
vgetexpph(zmm1, ptr [rax+0x40]);
|
||||
vgetexpph(zmm1|k1|T_z|T_sae, zmm5);
|
||||
vgetexpsh(xmm1, xmm5, ptr [rax+0x40]);
|
||||
vgetexpsh(xmm1|k1|T_z|T_sae, xmm3, xmm5);
|
||||
vgetmantph(xmm1, ptr [rax+0x40], 0x1);
|
||||
vgetmantph(ymm1, ptr_b [rax+0x40], 0x2);
|
||||
vgetmantph(zmm1, ptr [rax+0x40], 0x3);
|
||||
vgetmantph(zmm1|k1|T_z|T_sae, zmm5, 0x4);
|
||||
vgetmantsh(xmm1, xmm5, ptr [rax+0x40], 0x5);
|
||||
vgetmantsh(xmm1|k1|T_z|T_sae, xmm3, xmm5, 0x6);
|
||||
vmovsh(xmm1|k1|T_z, ptr [rax+0x40]);
|
||||
vmovsh(ptr [rax+0x40]|k1, xmm1);
|
||||
vmovsh(xmm1|k2|T_z, xmm3, xmm5);
|
||||
vcvtsd2sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
|
||||
vcvtsd2sh(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vcvtsh2sd(xmm1|k1|T_z|T_sae, xmm2, xmm3);
|
||||
vcvtsh2sd(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vcvtsh2ss(xmm1|k1|T_z|T_sae, xmm2, xmm3);
|
||||
vcvtsh2ss(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vcvtss2sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
|
||||
vcvtss2sh(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vcvtsh2si(edx|T_rd_sae, xmm1);
|
||||
vcvtsh2si(edx, ptr [rax+0x40]);
|
||||
vcvtsh2si(rdx|T_rd_sae, xmm1);
|
||||
vcvtsh2si(r8, ptr [rax+0x40]);
|
||||
vcvtph2dq(xmm1, xmm5);
|
||||
vcvtph2dq(xmm1, ptr [rax+0x40]);
|
||||
vcvtph2dq(xmm1, ptr_b [rax+0x40]);
|
||||
vcvtph2dq(ymm1|k2|T_z, xmm5);
|
||||
vcvtph2dq(ymm1, ptr [rax+0x40]);
|
||||
vcvtph2dq(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtph2dq(zmm1|k5|T_z|T_rd_sae, ymm3);
|
||||
vcvtph2dq(zmm1|k5|T_z, ptr [rax+0x40]);
|
||||
vcvtph2dq(zmm1|k5|T_z, ptr_b [rax+0x40]);
|
||||
vcvtph2psx(xmm1, xmm5);
|
||||
vcvtph2psx(xmm1, ptr [rax+0x40]);
|
||||
vcvtph2psx(xmm1, ptr_b [rax+0x40]);
|
||||
vcvtph2psx(ymm1|k2|T_z, xmm5);
|
||||
vcvtph2psx(ymm1, ptr [rax+0x40]);
|
||||
vcvtph2psx(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtph2psx(zmm1|k5|T_z|T_sae, ymm3);
|
||||
vcvtph2psx(zmm1|k5|T_z, ptr [rax+0x40]);
|
||||
vcvtph2psx(zmm1|k5|T_z, ptr_b [rax+0x40]);
|
||||
vcvtph2udq(xmm1, xmm5);
|
||||
vcvtph2udq(xmm1, ptr [rax+0x40]);
|
||||
vcvtph2udq(xmm1, ptr_b [rax+0x40]);
|
||||
vcvtph2udq(ymm1|k2|T_z, xmm5);
|
||||
vcvtph2udq(ymm1, ptr [rax+0x40]);
|
||||
vcvtph2udq(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtph2udq(zmm1|k5|T_z|T_rd_sae, ymm3);
|
||||
vcvtph2udq(zmm1|k5|T_z, ptr [rax+0x40]);
|
||||
vcvtph2udq(zmm1|k5|T_z, ptr_b [rax+0x40]);
|
||||
vcvttph2dq(xmm1, xmm5);
|
||||
vcvttph2dq(xmm1, ptr [rax+0x40]);
|
||||
vcvttph2dq(xmm1, ptr_b [rax+0x40]);
|
||||
vcvttph2dq(ymm1|k2|T_z, xmm5);
|
||||
vcvttph2dq(ymm1, ptr [rax+0x40]);
|
||||
vcvttph2dq(ymm1, ptr_b [rax+0x40]);
|
||||
vcvttph2dq(zmm1|k5|T_z|T_sae, ymm3);
|
||||
vcvttph2dq(zmm1|k5|T_z, ptr [rax+0x40]);
|
||||
vcvttph2dq(zmm1|k5|T_z, ptr_b [rax+0x40]);
|
||||
vcvttph2udq(xmm1, xmm5);
|
||||
vcvttph2udq(xmm1, ptr [rax+0x40]);
|
||||
vcvttph2udq(xmm1, ptr_b [rax+0x40]);
|
||||
vcvttph2udq(ymm1|k2|T_z, xmm5);
|
||||
vcvttph2udq(ymm1, ptr [rax+0x40]);
|
||||
vcvttph2udq(ymm1, ptr_b [rax+0x40]);
|
||||
vcvttph2udq(zmm1|k5|T_z|T_sae, ymm3);
|
||||
vcvttph2udq(zmm1|k5|T_z, ptr [rax+0x40]);
|
||||
vcvttph2udq(zmm1|k5|T_z, ptr_b [rax+0x40]);
|
||||
vcvtph2pd(xmm1, xmm5);
|
||||
vcvtph2pd(xmm1, ptr [rax+0x40]);
|
||||
vcvtph2pd(xmm1, ptr_b [rax+0x40]);
|
||||
vcvtph2pd(ymm1|k2|T_z, xmm5);
|
||||
vcvtph2pd(ymm1, ptr [rax+0x40]);
|
||||
vcvtph2pd(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtph2pd(zmm1|k5|T_z|T_sae, xmm3);
|
||||
vcvtph2pd(zmm1|k5|T_z, ptr [rax+0x40]);
|
||||
vcvtph2pd(zmm1|k5|T_z, ptr_b [rax+0x40]);
|
||||
vcvtph2qq(xmm1, xmm5);
|
||||
vcvtph2qq(xmm1, ptr [rax+0x40]);
|
||||
vcvtph2qq(xmm1, ptr_b [rax+0x40]);
|
||||
vcvtph2qq(ymm1|k2|T_z, xmm5);
|
||||
vcvtph2qq(ymm1, ptr [rax+0x40]);
|
||||
vcvtph2qq(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtph2qq(zmm1|k5|T_z|T_rd_sae, xmm3);
|
||||
vcvtph2qq(zmm1|k5|T_z, ptr [rax+0x40]);
|
||||
vcvtph2qq(zmm1|k5|T_z, ptr_b [rax+0x40]);
|
||||
vcvtph2uqq(xmm1, xmm5);
|
||||
vcvtph2uqq(xmm1, ptr [rax+0x40]);
|
||||
vcvtph2uqq(xmm1, ptr_b [rax+0x40]);
|
||||
vcvtph2uqq(ymm1|k2|T_z, xmm5);
|
||||
vcvtph2uqq(ymm1, ptr [rax+0x40]);
|
||||
vcvtph2uqq(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtph2uqq(zmm1|k5|T_z|T_rd_sae, xmm3);
|
||||
vcvtph2uqq(zmm1|k5|T_z, ptr [rax+0x40]);
|
||||
vcvtph2uqq(zmm1|k5|T_z, ptr_b [rax+0x40]);
|
||||
vcvttph2uqq(xmm1, xmm5);
|
||||
vcvttph2uqq(xmm1, ptr [rax+0x40]);
|
||||
vcvttph2uqq(xmm1, ptr_b [rax+0x40]);
|
||||
vcvttph2uqq(ymm1|k2|T_z, xmm5);
|
||||
vcvttph2uqq(ymm1, ptr [rax+0x40]);
|
||||
vcvttph2uqq(ymm1, ptr_b [rax+0x40]);
|
||||
vcvttph2uqq(zmm1|k5|T_z|T_sae, xmm3);
|
||||
vcvttph2uqq(zmm1|k5|T_z, ptr [rax+0x40]);
|
||||
vcvttph2uqq(zmm1|k5|T_z, ptr_b [rax+0x40]);
|
||||
vcvtdq2ph(xmm1, xmm5);
|
||||
vcvtdq2ph(xmm1, xword [rax+0x40]);
|
||||
vcvtdq2ph(xmm1, xword_b [rax+0x40]);
|
||||
vcvtdq2ph(xmm1, yword [rax+0x40]);
|
||||
vcvtdq2ph(xmm1, yword_b [rax+0x40]);
|
||||
vcvtdq2ph(ymm1|k2|T_z|T_rd_sae, zmm5);
|
||||
vcvtdq2ph(ymm1, ptr [rax+0x40]);
|
||||
vcvtdq2ph(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtps2phx(xmm1, xmm5);
|
||||
vcvtps2phx(xmm1, xword [rax+0x40]);
|
||||
vcvtps2phx(xmm1, xword_b [rax+0x40]);
|
||||
vcvtps2phx(xmm1, yword [rax+0x40]);
|
||||
vcvtps2phx(xmm1, yword_b [rax+0x40]);
|
||||
vcvtps2phx(ymm1|k2|T_z|T_rd_sae, zmm5);
|
||||
vcvtps2phx(ymm1, ptr [rax+0x40]);
|
||||
vcvtps2phx(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtudq2ph(xmm1, xmm5);
|
||||
vcvtudq2ph(xmm1, xword [rax+0x40]);
|
||||
vcvtudq2ph(xmm1, xword_b [rax+0x40]);
|
||||
vcvtudq2ph(xmm1, yword [rax+0x40]);
|
||||
vcvtudq2ph(xmm1, yword_b [rax+0x40]);
|
||||
vcvtudq2ph(ymm1|k2|T_z|T_rd_sae, zmm5);
|
||||
vcvtudq2ph(ymm1, ptr [rax+0x40]);
|
||||
vcvtudq2ph(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtpd2ph(xmm1, xmm5);
|
||||
vcvtpd2ph(xmm1, ymm5);
|
||||
vcvtpd2ph(xmm1|k2|T_z|T_rd_sae, zmm5);
|
||||
vcvtpd2ph(xmm1, xword [rax+0x40]);
|
||||
vcvtpd2ph(xmm1, xword_b [rax+0x40]);
|
||||
vcvtpd2ph(xmm1, yword [rax+0x40]);
|
||||
vcvtpd2ph(xmm1, yword_b [rax+0x40]);
|
||||
vcvtpd2ph(xmm1, zword [rax+0x40]);
|
||||
vcvtpd2ph(xmm1, zword_b [rax+0x40]);
|
||||
vcvtqq2ph(xmm1, xmm5);
|
||||
vcvtqq2ph(xmm1, ymm5);
|
||||
vcvtqq2ph(xmm1|k2|T_z|T_rd_sae, zmm5);
|
||||
vcvtqq2ph(xmm1, xword [rax+0x40]);
|
||||
vcvtqq2ph(xmm1, xword_b [rax+0x40]);
|
||||
vcvtqq2ph(xmm1, yword [rax+0x40]);
|
||||
vcvtqq2ph(xmm1, yword_b [rax+0x40]);
|
||||
vcvtqq2ph(xmm1, zword [rax+0x40]);
|
||||
vcvtqq2ph(xmm1, zword_b [rax+0x40]);
|
||||
vcvtuqq2ph(xmm1, xmm5);
|
||||
vcvtuqq2ph(xmm1, ymm5);
|
||||
vcvtuqq2ph(xmm1|k2|T_z|T_rd_sae, zmm5);
|
||||
vcvtuqq2ph(xmm1, xword [rax+0x40]);
|
||||
vcvtuqq2ph(xmm1, xword_b [rax+0x40]);
|
||||
vcvtuqq2ph(xmm1, yword [rax+0x40]);
|
||||
vcvtuqq2ph(xmm1, yword_b [rax+0x40]);
|
||||
vcvtuqq2ph(xmm1, zword [rax+0x40]);
|
||||
vcvtuqq2ph(xmm1, zword_b [rax+0x40]);
|
||||
vcvtph2uw(xmm1, xmm5);
|
||||
vcvtph2uw(xmm1, ptr [rax+0x40]);
|
||||
vcvtph2uw(xmm1, ptr_b [rax+0x40]);
|
||||
vcvtph2uw(ymm1, ptr [rax+0x40]);
|
||||
vcvtph2uw(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtph2uw(zmm1|k2|T_z|T_rd_sae, zmm5);
|
||||
vcvtph2uw(zmm1, ptr [rax+0x40]);
|
||||
vcvtph2uw(zmm1, ptr_b [rax+0x40]);
|
||||
vcvtph2w(xmm1, xmm5);
|
||||
vcvtph2w(xmm1, ptr [rax+0x40]);
|
||||
vcvtph2w(xmm1, ptr_b [rax+0x40]);
|
||||
vcvtph2w(ymm1, ptr [rax+0x40]);
|
||||
vcvtph2w(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtph2w(zmm1|k2|T_z|T_rd_sae, zmm5);
|
||||
vcvtph2w(zmm1, ptr [rax+0x40]);
|
||||
vcvtph2w(zmm1, ptr_b [rax+0x40]);
|
||||
vcvttph2uw(xmm1, xmm5);
|
||||
vcvttph2uw(xmm1, ptr [rax+0x40]);
|
||||
vcvttph2uw(xmm1, ptr_b [rax+0x40]);
|
||||
vcvttph2uw(ymm1, ptr [rax+0x40]);
|
||||
vcvttph2uw(ymm1, ptr_b [rax+0x40]);
|
||||
vcvttph2uw(zmm1|k2|T_z|T_sae, zmm5);
|
||||
vcvttph2uw(zmm1, ptr [rax+0x40]);
|
||||
vcvttph2uw(zmm1, ptr_b [rax+0x40]);
|
||||
vcvttph2w(xmm1, xmm5);
|
||||
vcvttph2w(xmm1, ptr [rax+0x40]);
|
||||
vcvttph2w(xmm1, ptr_b [rax+0x40]);
|
||||
vcvttph2w(ymm1, ptr [rax+0x40]);
|
||||
vcvttph2w(ymm1, ptr_b [rax+0x40]);
|
||||
vcvttph2w(zmm1|k2|T_z|T_sae, zmm5);
|
||||
vcvttph2w(zmm1, ptr [rax+0x40]);
|
||||
vcvttph2w(zmm1, ptr_b [rax+0x40]);
|
||||
vcvtuw2ph(xmm1, xmm5);
|
||||
vcvtuw2ph(xmm1, ptr [rax+0x40]);
|
||||
vcvtuw2ph(xmm1, ptr_b [rax+0x40]);
|
||||
vcvtuw2ph(ymm1, ptr [rax+0x40]);
|
||||
vcvtuw2ph(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtuw2ph(zmm1|k2|T_z|T_rd_sae, zmm5);
|
||||
vcvtuw2ph(zmm1, ptr [rax+0x40]);
|
||||
vcvtuw2ph(zmm1, ptr_b [rax+0x40]);
|
||||
vcvtw2ph(xmm1, xmm5);
|
||||
vcvtw2ph(xmm1, ptr [rax+0x40]);
|
||||
vcvtw2ph(xmm1, ptr_b [rax+0x40]);
|
||||
vcvtw2ph(ymm1, ptr [rax+0x40]);
|
||||
vcvtw2ph(ymm1, ptr_b [rax+0x40]);
|
||||
vcvtw2ph(zmm1|k2|T_z|T_rd_sae, zmm5);
|
||||
vcvtw2ph(zmm1, ptr [rax+0x40]);
|
||||
vcvtw2ph(zmm1, ptr_b [rax+0x40]);
|
||||
vcvtps2ph(xmm1, xmm2, 0x1);
|
||||
vcvtps2ph(ptr [rax+0x40], xmm2, 0x2);
|
||||
vcvtps2ph(xmm1, ymm2, 0x3);
|
||||
vcvtps2ph(ptr [rax+0x40], ymm2, 0x4);
|
||||
vcvtps2ph(xmm1|k1|T_z, xmm2, 0x5);
|
||||
vcvtps2ph(ptr [rax+0x40]|k1, xmm3, 0x6);
|
||||
vcvtps2ph(xmm1|k2, ymm4, 0x7);
|
||||
vcvtps2ph(ptr [rax+0x40]|k2, ymm5, 0x8);
|
||||
vcvtps2ph(ymm1|k2|T_sae, zmm5, 0x9);
|
||||
vcvtps2ph(ptr [rax+0x40]|k5, zmm4, 0xa);
|
||||
vcvtsh2usi(ecx|T_rd_sae, xmm1);
|
||||
vcvtsh2usi(eax, ptr [rax+0x40]);
|
||||
vcvtsh2usi(r9|T_rd_sae, xmm1);
|
||||
vcvtsh2usi(r13, ptr [rax+0x40]);
|
||||
vcvttsh2si(ecx|T_sae, xmm1);
|
||||
vcvttsh2si(eax, ptr [rax+0x40]);
|
||||
vcvttsh2si(r9|T_sae, xmm1);
|
||||
vcvttsh2si(r13, ptr [rax+0x40]);
|
||||
vcvttsh2usi(ecx|T_sae, xmm1);
|
||||
vcvttsh2usi(eax, ptr [rax+0x40]);
|
||||
vcvttsh2usi(r9|T_sae, xmm1);
|
||||
vcvttsh2usi(r13, ptr [rax+0x40]);
|
||||
vcvttph2qq(xmm1, xmm5);
|
||||
vcvttph2qq(xmm1, ptr [rax+0x40]);
|
||||
vcvttph2qq(xmm1, ptr_b [rax+0x40]);
|
||||
vcvttph2qq(ymm1|k2|T_z, xmm5);
|
||||
vcvttph2qq(ymm1, ptr [rax+0x40]);
|
||||
vcvttph2qq(ymm1, ptr_b [rax+0x40]);
|
||||
vcvttph2qq(zmm1|k5|T_z|T_sae, xmm3);
|
||||
vcvttph2qq(zmm1|k5|T_z, ptr [rax+0x40]);
|
||||
vcvttph2qq(zmm1|k5|T_z, ptr_b [rax+0x40]);
|
||||
vcvtsi2sh(xmm1|T_rd_sae, xmm2, eax);
|
||||
vcvtsi2sh(xmm1, xmm2, dword [rax+0x40]);
|
||||
vcvtsi2sh(xmm1|T_rd_sae, xmm2, r9);
|
||||
vcvtsi2sh(xmm1, xmm2, qword [rax+0x40]);
|
||||
vcvtusi2sh(xmm1|T_rd_sae, xmm2, eax);
|
||||
vcvtusi2sh(xmm1, xmm2, dword [rax+0x40]);
|
||||
vcvtusi2sh(xmm1|T_rd_sae, xmm2, r9);
|
||||
vcvtusi2sh(xmm1, xmm2, qword [rax+0x40]);
|
||||
aadd(ptr[rax], ecx);
|
||||
aadd(ptr[eax], ecx);
|
||||
aadd(ptr[rax], r10);
|
||||
aand(ptr[rax], ecx);
|
||||
aand(ptr[eax], ecx);
|
||||
aand(ptr[rax], r10);
|
||||
aor(ptr[rax], ecx);
|
||||
aor(ptr[eax], ecx);
|
||||
aor(ptr[rax], r10);
|
||||
axor(ptr[rax], ecx);
|
||||
axor(ptr[eax], ecx);
|
||||
axor(ptr[rax], r10);
|
||||
cmpbexadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpbxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmplexadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmplxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpnbexadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpnbxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpnlexadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpnlxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpnoxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpnpxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpnsxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpnzxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpoxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmppxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpsxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpzxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
vsha512msg1(ymm3, xmm5);
|
||||
vsha512msg2(ymm9, ymm10);
|
||||
vsha512rnds2(ymm1, ymm3, xmm2);
|
||||
vsm3msg1(xmm1, xmm2, xmm3);
|
||||
vsm3msg1(xmm1, xmm2, ptr [rax]);
|
||||
vsm3msg2(xmm5, xmm7, xmm3);
|
||||
vsm3msg2(xmm5, xmm6, ptr [rax]);
|
||||
vsm3rnds2(xmm5, xmm7, xmm3, 0x12);
|
||||
vsm3rnds2(xmm5, xmm7, ptr [rcx], 0x34);
|
||||
vsm4key4(xmm1, xmm2, xmm3);
|
||||
vsm4key4(xmm1, xmm2, ptr [rdx]);
|
||||
vsm4rnds4(xmm1, xmm2, xmm3);
|
||||
vsm4rnds4(xmm5, xmm6, ptr [rcx+rax*4]);
|
||||
vpdpbssd(xmm1, xmm2, xmm3);
|
||||
vpdpbssd(ymm1, ymm2, ptr [rax]);
|
||||
vpdpbssds(xmm1, xmm2, xmm3);
|
||||
vpdpbssds(ymm1, ymm2, ptr [rax]);
|
||||
vpdpbsud(xmm1, xmm2, xmm3);
|
||||
vpdpbsud(ymm1, ymm2, ptr [rax]);
|
||||
vpdpbsuds(xmm1, xmm2, xmm3);
|
||||
vpdpbsuds(ymm1, ymm2, ptr [rax]);
|
||||
vpdpbuud(xmm1, xmm2, xmm3);
|
||||
vpdpbuud(ymm1, ymm2, ptr [rax]);
|
||||
vpdpbuuds(xmm1, xmm2, xmm3);
|
||||
vpdpbuuds(ymm1, ymm2, ptr [rax]);
|
||||
vpdpwsud(xmm1, xmm2, xmm3);
|
||||
vpdpwsud(ymm1, ymm2, ptr [rax]);
|
||||
vpdpwsuds(xmm1, xmm2, xmm3);
|
||||
vpdpwsuds(ymm1, ymm2, ptr [rax]);
|
||||
vpdpwusd(xmm1, xmm2, xmm3);
|
||||
vpdpwusd(ymm1, ymm2, ptr [rax]);
|
||||
vpdpwusds(xmm1, xmm2, xmm3);
|
||||
vpdpwusds(ymm1, ymm2, ptr [rax]);
|
||||
vpdpwuud(xmm1, xmm2, xmm3);
|
||||
vpdpwuud(ymm1, ymm2, ptr [rax]);
|
||||
vpdpwuuds(xmm1, xmm2, xmm3);
|
||||
vpdpwuuds(ymm1, ymm2, ptr [rax]);
|
||||
+294
@@ -0,0 +1,294 @@
|
||||
//
|
||||
vcvtbf162ibs(xm1, xm2);
|
||||
vcvtbf162ibs(xm1, ptr[rax+64]);
|
||||
vcvtbf162ibs(xm1, ptr_b[rax+64]);
|
||||
|
||||
vcvtbf162ibs(ym1, ym2);
|
||||
vcvtbf162ibs(ym1, ptr[rax+64]);
|
||||
vcvtbf162ibs(ym1, ptr_b[rax+64]);
|
||||
|
||||
vcvtbf162ibs(zm1, zm2);
|
||||
vcvtbf162ibs(zm1, ptr[rax+64]);
|
||||
vcvtbf162ibs(zm1, ptr_b[rax+64]);
|
||||
//
|
||||
vcvtbf162iubs(xm1, xm2);
|
||||
vcvtbf162iubs(xm1, ptr[rax+64]);
|
||||
vcvtbf162iubs(xm1, ptr_b[rax+64]);
|
||||
|
||||
vcvtbf162iubs(ym1, ym2);
|
||||
vcvtbf162iubs(ym1, ptr[rax+64]);
|
||||
vcvtbf162iubs(ym1, ptr_b[rax+64]);
|
||||
|
||||
vcvtbf162iubs(zm1, zm2);
|
||||
vcvtbf162iubs(zm1, ptr[rax+64]);
|
||||
vcvtbf162iubs(zm1, ptr_b[rax+64]);
|
||||
//
|
||||
vcvttbf162ibs(xm1, xm2);
|
||||
vcvttbf162ibs(xm1, ptr[rax+64]);
|
||||
vcvttbf162ibs(xm1, ptr_b[rax+64]);
|
||||
|
||||
vcvttbf162ibs(ym1, ym2);
|
||||
vcvttbf162ibs(ym1, ptr[rax+64]);
|
||||
vcvttbf162ibs(ym1, ptr_b[rax+64]);
|
||||
|
||||
vcvttbf162ibs(zm1, zm2);
|
||||
vcvttbf162ibs(zm1, ptr[rax+64]);
|
||||
vcvttbf162ibs(zm1, ptr_b[rax+64]);
|
||||
//
|
||||
vcvttbf162iubs(xm1, xm2);
|
||||
vcvttbf162iubs(xm1, ptr[rax+64]);
|
||||
vcvttbf162iubs(xm1, ptr_b[rax+64]);
|
||||
|
||||
vcvttbf162iubs(ym1, ym2);
|
||||
vcvttbf162iubs(ym1, ptr[rax+64]);
|
||||
vcvttbf162iubs(ym1, ptr_b[rax+64]);
|
||||
|
||||
vcvttbf162iubs(zm1, zm2);
|
||||
vcvttbf162iubs(zm1, ptr[rax+64]);
|
||||
vcvttbf162iubs(zm1, ptr_b[rax+64]);
|
||||
//
|
||||
vcvttpd2qqs(xm1, xm2);
|
||||
vcvttpd2qqs(xm1, ptr[rax+64]);
|
||||
vcvttpd2qqs(xm1, ptr_b[rax+64]);
|
||||
|
||||
vcvttpd2qqs(ym1, ym2);
|
||||
vcvttpd2qqs(ym1, ptr[rax+64]);
|
||||
vcvttpd2qqs(ym1, ptr_b[rax+64]);
|
||||
|
||||
vcvttpd2qqs(zm1, zm2);
|
||||
vcvttpd2qqs(zm1, zm2|T_sae);
|
||||
vcvttpd2qqs(zm1, ptr[rax+64]);
|
||||
vcvttpd2qqs(zm1, ptr_b[rax+64]);
|
||||
//
|
||||
vcvttpd2uqqs(xm1, xm2);
|
||||
vcvttpd2uqqs(xm1, ptr[rax+64]);
|
||||
vcvttpd2uqqs(xm1, ptr_b[rax+64]);
|
||||
|
||||
vcvttpd2uqqs(ym1, ym2);
|
||||
vcvttpd2uqqs(ym1, ptr[rax+64]);
|
||||
vcvttpd2uqqs(ym1, ptr_b[rax+64]);
|
||||
|
||||
vcvttpd2uqqs(zm1, zm2);
|
||||
vcvttpd2uqqs(zm1, zm2|T_sae);
|
||||
vcvttpd2uqqs(zm1, ptr[rax+64]);
|
||||
vcvttpd2uqqs(zm1, ptr_b[rax+64]);
|
||||
//
|
||||
vcvtph2ibs(xm1, xm2);
|
||||
vcvtph2ibs(xm1, ptr[rax+64]);
|
||||
vcvtph2ibs(xm1, ptr_b[rax+64]);
|
||||
|
||||
vcvtph2ibs(ym1, ym2);
|
||||
vcvtph2ibs(ym1, ptr[rax+64]);
|
||||
vcvtph2ibs(ym1, ptr_b[rax+64]);
|
||||
|
||||
vcvtph2ibs(zm1, zm2);
|
||||
vcvtph2ibs(zm1, zm2|T_ru_sae);
|
||||
vcvtph2ibs(zm1, ptr[rax+64]);
|
||||
vcvtph2ibs(zm1, ptr_b[rax+64]);
|
||||
//
|
||||
vcvtph2iubs(xm1, xm2);
|
||||
vcvtph2iubs(xm1, ptr[rax+64]);
|
||||
vcvtph2iubs(xm1, ptr_b[rax+64]);
|
||||
|
||||
vcvtph2iubs(ym1, ym2);
|
||||
vcvtph2iubs(ym1, ptr[rax+64]);
|
||||
vcvtph2iubs(ym1, ptr_b[rax+64]);
|
||||
|
||||
vcvtph2iubs(zm1, zm2);
|
||||
vcvtph2iubs(zm1, zm2|T_ru_sae);
|
||||
vcvtph2iubs(zm1, ptr[rax+64]);
|
||||
vcvtph2iubs(zm1, ptr_b[rax+64]);
|
||||
//
|
||||
vcvttph2ibs(xm1, xm2);
|
||||
vcvttph2ibs(xm1, ptr[rax+64]);
|
||||
vcvttph2ibs(xm1, ptr_b[rax+64]);
|
||||
|
||||
vcvttph2ibs(ym1, ym2);
|
||||
vcvttph2ibs(ym1, ptr[rax+64]);
|
||||
vcvttph2ibs(ym1, ptr_b[rax+64]);
|
||||
|
||||
vcvttph2ibs(zm1, zm2);
|
||||
vcvttph2ibs(zm1, zm2|T_ru_sae);
|
||||
vcvttph2ibs(zm1, ptr[rax+64]);
|
||||
vcvttph2ibs(zm1, ptr_b[rax+64]);
|
||||
//
|
||||
vcvttph2iubs(xm1, xm2);
|
||||
vcvttph2iubs(xm1, ptr[rax+64]);
|
||||
vcvttph2iubs(xm1, ptr_b[rax+64]);
|
||||
|
||||
vcvttph2iubs(ym1, ym2);
|
||||
vcvttph2iubs(ym1, ptr[rax+64]);
|
||||
vcvttph2iubs(ym1, ptr_b[rax+64]);
|
||||
|
||||
vcvttph2iubs(zm1, zm2);
|
||||
vcvttph2iubs(zm1, zm2|T_ru_sae);
|
||||
vcvttph2iubs(zm1, ptr[rax+64]);
|
||||
vcvttph2iubs(zm1, ptr_b[rax+64]);
|
||||
//
|
||||
vcvttps2dqs(xm1, xm2);
|
||||
vcvttps2dqs(xm1, ptr[rax+64]);
|
||||
vcvttps2dqs(xm1, ptr_b[rax+64]);
|
||||
|
||||
vcvttps2dqs(ym1, ym2);
|
||||
vcvttps2dqs(ym1, ptr[rax+64]);
|
||||
vcvttps2dqs(ym1, ptr_b[rax+64]);
|
||||
|
||||
vcvttps2dqs(zm1, zm2);
|
||||
vcvttps2dqs(zm1, zm2|T_sae);
|
||||
vcvttps2dqs(zm1, ptr[rax+64]);
|
||||
vcvttps2dqs(zm1, ptr_b[rax+64]);
|
||||
//
|
||||
vcvtps2ibs(xm1, xm2);
|
||||
vcvtps2ibs(xm1, ptr[rax+64]);
|
||||
vcvtps2ibs(xm1, ptr_b[rax+64]);
|
||||
|
||||
vcvtps2ibs(ym1, ym2);
|
||||
vcvtps2ibs(ym1, ptr[rax+64]);
|
||||
vcvtps2ibs(ym1, ptr_b[rax+64]);
|
||||
|
||||
vcvtps2ibs(zm1, zm2);
|
||||
vcvtps2ibs(zm1, zm2|T_ru_sae);
|
||||
vcvtps2ibs(zm1, ptr[rax+64]);
|
||||
vcvtps2ibs(zm1, ptr_b[rax+64]);
|
||||
//
|
||||
vcvtps2iubs(xm1, xm2);
|
||||
vcvtps2iubs(xm1, ptr[rax+64]);
|
||||
vcvtps2iubs(xm1, ptr_b[rax+64]);
|
||||
|
||||
vcvtps2iubs(ym1, ym2);
|
||||
vcvtps2iubs(ym1, ptr[rax+64]);
|
||||
vcvtps2iubs(ym1, ptr_b[rax+64]);
|
||||
|
||||
vcvtps2iubs(zm1, zm2);
|
||||
vcvtps2iubs(zm1, zm2|T_ru_sae);
|
||||
vcvtps2iubs(zm1, ptr[rax+64]);
|
||||
vcvtps2iubs(zm1, ptr_b[rax+64]);
|
||||
//
|
||||
vcvttps2ibs(xm1, xm2);
|
||||
vcvttps2ibs(xm1, ptr[rax+64]);
|
||||
vcvttps2ibs(xm1, ptr_b[rax+64]);
|
||||
|
||||
vcvttps2ibs(ym1, ym2);
|
||||
vcvttps2ibs(ym1, ptr[rax+64]);
|
||||
vcvttps2ibs(ym1, ptr_b[rax+64]);
|
||||
|
||||
vcvttps2ibs(zm1, zm2);
|
||||
vcvttps2ibs(zm1, zm2|T_ru_sae);
|
||||
vcvttps2ibs(zm1, ptr[rax+64]);
|
||||
vcvttps2ibs(zm1, ptr_b[rax+64]);
|
||||
//
|
||||
vcvttps2iubs(xm1, xm2);
|
||||
vcvttps2iubs(xm1, ptr[rax+64]);
|
||||
vcvttps2iubs(xm1, ptr_b[rax+64]);
|
||||
|
||||
vcvttps2iubs(ym1, ym2);
|
||||
vcvttps2iubs(ym1, ptr[rax+64]);
|
||||
vcvttps2iubs(ym1, ptr_b[rax+64]);
|
||||
|
||||
vcvttps2iubs(zm1, zm2);
|
||||
vcvttps2iubs(zm1, zm2|T_ru_sae);
|
||||
vcvttps2iubs(zm1, ptr[rax+64]);
|
||||
vcvttps2iubs(zm1, ptr_b[rax+64]);
|
||||
//
|
||||
vcvttps2udqs(xm1, xm2);
|
||||
vcvttps2udqs(xm1, ptr[rax+64]);
|
||||
vcvttps2udqs(xm1, ptr_b[rax+64]);
|
||||
|
||||
vcvttps2udqs(ym1, ym2);
|
||||
vcvttps2udqs(ym1, ptr[rax+64]);
|
||||
vcvttps2udqs(ym1, ptr_b[rax+64]);
|
||||
|
||||
vcvttps2udqs(zm1, zm2);
|
||||
vcvttps2udqs(zm1, zm2|T_sae);
|
||||
vcvttps2udqs(zm1, ptr[rax+64]);
|
||||
vcvttps2udqs(zm1, ptr_b[rax+64]);
|
||||
|
||||
//
|
||||
vcvttpd2dqs(xm1|k1|T_z, xm2);
|
||||
vcvttpd2dqs(xm1|k1|T_z, xword [rax+64]);
|
||||
vcvttpd2dqs(xm1|k1|T_z, xword_b[rax+64]);
|
||||
|
||||
vcvttpd2dqs(xm1|k1|T_z, ym2);
|
||||
vcvttpd2dqs(xm1|k1|T_z, yword [rax+64]);
|
||||
vcvttpd2dqs(xm1|k1|T_z, yword_b[rax+64]);
|
||||
|
||||
vcvttpd2dqs(ym1|k1|T_z, zm2);
|
||||
vcvttpd2dqs(ym1|k1|T_z, zm2|T_sae);
|
||||
vcvttpd2dqs(ym1|k1|T_z, zword [rax+64]);
|
||||
vcvttpd2dqs(ym1|k1|T_z, zword_b[rax+64]);
|
||||
|
||||
//
|
||||
vcvttpd2udqs(xm1|k1|T_z, xm2);
|
||||
vcvttpd2udqs(xm1|k1|T_z, xword [rax+64]);
|
||||
vcvttpd2udqs(xm1|k1|T_z, xword_b[rax+64]);
|
||||
|
||||
vcvttpd2udqs(xm1|k1|T_z, ym2);
|
||||
vcvttpd2udqs(xm1|k1|T_z, yword [rax+64]);
|
||||
vcvttpd2udqs(xm1|k1|T_z, yword_b[rax+64]);
|
||||
|
||||
vcvttpd2udqs(ym1|k1|T_z, zm2);
|
||||
vcvttpd2udqs(ym1|k1|T_z, zm2|T_sae);
|
||||
vcvttpd2udqs(ym1|k1|T_z, zword [rax+64]);
|
||||
vcvttpd2udqs(ym1|k1|T_z, zword_b[rax+64]);
|
||||
//
|
||||
vcvttps2qqs(xm1|k1|T_z, xm2);
|
||||
vcvttps2qqs(xm1|k1|T_z, ptr [rax+64]);
|
||||
vcvttps2qqs(xm1|k1|T_z, ptr_b[rax+64]);
|
||||
|
||||
vcvttps2qqs(ym1|k1|T_z, xm2);
|
||||
vcvttps2qqs(ym1|k1|T_z, ptr [rax+64]);
|
||||
vcvttps2qqs(ym1|k1|T_z, ptr_b[rax+64]);
|
||||
|
||||
vcvttps2qqs(zm1, ym2);
|
||||
vcvttps2qqs(zm1|k1|T_z, ym2);
|
||||
vcvttps2qqs(zm1|k1|T_z|T_sae, ym2);
|
||||
vcvttps2qqs(zm1|k1|T_z, ptr [rax+64]);
|
||||
vcvttps2qqs(zm1|k1|T_z, ptr_b[rax+64]);
|
||||
|
||||
//
|
||||
vcvttps2uqqs(xm1|k1|T_z, xm2);
|
||||
vcvttps2uqqs(xm1|k1|T_z, ptr [rax+64]);
|
||||
vcvttps2uqqs(xm1|k1|T_z, ptr_b[rax+64]);
|
||||
|
||||
vcvttps2uqqs(ym1|k1|T_z, xm2);
|
||||
vcvttps2uqqs(ym1|k1|T_z, ptr [rax+64]);
|
||||
vcvttps2uqqs(ym1|k1|T_z, ptr_b[rax+64]);
|
||||
|
||||
vcvttps2uqqs(zm1, ym2);
|
||||
vcvttps2uqqs(zm1|k1|T_z, ym2);
|
||||
vcvttps2uqqs(zm1|k1|T_z|T_sae, ym2);
|
||||
vcvttps2uqqs(zm1|k1|T_z, ptr [rax+64]);
|
||||
vcvttps2uqqs(zm1|k1|T_z, ptr_b[rax+64]);
|
||||
|
||||
//
|
||||
vcvttsd2sis(eax, xm1);
|
||||
vcvttsd2sis(eax, xm1|T_sae);
|
||||
vcvttsd2sis(eax, ptr[rax+64]);
|
||||
|
||||
vcvttsd2sis(r30, xm1);
|
||||
vcvttsd2sis(r30, xm1|T_sae);
|
||||
vcvttsd2sis(r30, ptr[rax+64]);
|
||||
//
|
||||
vcvttsd2usis(eax, xm1);
|
||||
vcvttsd2usis(eax, xm1|T_sae);
|
||||
vcvttsd2usis(eax, ptr[rax+64]);
|
||||
|
||||
vcvttsd2usis(r30, xm1);
|
||||
vcvttsd2usis(r30, xm1|T_sae);
|
||||
vcvttsd2usis(r30, ptr[rax+64]);
|
||||
//
|
||||
vcvttss2sis(eax, xm1);
|
||||
vcvttss2sis(eax, xm1|T_sae);
|
||||
vcvttss2sis(eax, ptr[rax+64]);
|
||||
|
||||
vcvttss2sis(r30, xm1);
|
||||
vcvttss2sis(r30, xm1|T_sae);
|
||||
vcvttss2sis(r30, ptr[rax+64]);
|
||||
//
|
||||
vcvttss2usis(eax, xm1);
|
||||
vcvttss2usis(eax, xm1|T_sae);
|
||||
vcvttss2usis(eax, ptr[rax+64]);
|
||||
|
||||
vcvttss2usis(r30, xm1);
|
||||
vcvttss2usis(r30, xm1|T_sae);
|
||||
vcvttss2usis(r30, ptr[rax+64]);
|
||||
Vendored
+8
@@ -0,0 +1,8 @@
|
||||
#include <stdio.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
#if defined(__x86_64__) && defined(__ILP32__)
|
||||
puts("x32");
|
||||
#endif
|
||||
}
|
||||
Vendored
+1883
File diff suppressed because it is too large
Load Diff
Vendored
+63
@@ -0,0 +1,63 @@
|
||||
#pragma once
|
||||
#include <stdio.h>
|
||||
|
||||
struct Reg {
|
||||
int r_;
|
||||
Reg(int r) : r_(r) {}
|
||||
};
|
||||
|
||||
inline const Reg& getReg0() { static const Reg r(0); return r; }
|
||||
inline const Reg& getReg1() { static const Reg r(1); return r; }
|
||||
inline const Reg& getReg2() { static const Reg r(2); return r; }
|
||||
|
||||
static const Reg& r0 = getReg0();
|
||||
static const Reg& r1 = getReg1();
|
||||
static const Reg& r2 = getReg2();
|
||||
|
||||
inline void putReg()
|
||||
{
|
||||
puts("putReg");
|
||||
printf("r0=%p, %d\n", &r0, r0.r_);
|
||||
printf("r0=%p, %d\n", &r0, r1.r_);
|
||||
printf("r0=%p, %d\n", &r0, r2.r_);
|
||||
}
|
||||
|
||||
struct A {
|
||||
int a;
|
||||
A()
|
||||
: a(5)
|
||||
{
|
||||
puts("A cstr");
|
||||
}
|
||||
~A()
|
||||
{
|
||||
puts("A dstr");
|
||||
}
|
||||
void put() const
|
||||
{
|
||||
printf("a=%d\n", a);
|
||||
}
|
||||
};
|
||||
|
||||
template<int dummy = 0>
|
||||
struct XT {
|
||||
static A a;
|
||||
};
|
||||
|
||||
template<int dummy>
|
||||
A XT<dummy>::a;
|
||||
|
||||
typedef XT<0> X;
|
||||
|
||||
void init();
|
||||
|
||||
struct Init {
|
||||
Init()
|
||||
{
|
||||
puts("Init");
|
||||
init();
|
||||
putReg();
|
||||
}
|
||||
};
|
||||
static Init s_init;
|
||||
|
||||
Vendored
+51
@@ -0,0 +1,51 @@
|
||||
#include <stdio.h>
|
||||
|
||||
static const struct XXX {
|
||||
XXX() { puts("XXX"); }
|
||||
} s_sss;
|
||||
|
||||
struct A {
|
||||
int aaa;
|
||||
A()
|
||||
: aaa(123)
|
||||
{
|
||||
puts("A cstr");
|
||||
}
|
||||
~A()
|
||||
{
|
||||
puts("A dstr");
|
||||
}
|
||||
void put() const
|
||||
{
|
||||
printf("aaa=%d\n", aaa);
|
||||
}
|
||||
};
|
||||
|
||||
template<int dummy = 0>
|
||||
struct XT {
|
||||
static A sss;
|
||||
};
|
||||
|
||||
template<int dummy>
|
||||
A XT<dummy>::sss;
|
||||
|
||||
typedef XT<0> X;
|
||||
|
||||
static struct Init {
|
||||
Init()
|
||||
{
|
||||
puts("Init");
|
||||
X::sss.put();
|
||||
}
|
||||
} s_init;
|
||||
|
||||
int f() { puts("f"); return 4; }
|
||||
|
||||
static const int r = f();
|
||||
|
||||
int main()
|
||||
{
|
||||
puts("main");
|
||||
printf("r=%d\n", r);
|
||||
X::sss.put();
|
||||
}
|
||||
Vendored
+9
@@ -0,0 +1,9 @@
|
||||
#include "lib.h"
|
||||
|
||||
int main()
|
||||
{
|
||||
puts("main");
|
||||
X::a.put();
|
||||
putReg();
|
||||
}
|
||||
|
||||
Vendored
+13
@@ -0,0 +1,13 @@
|
||||
#include "lib.h"
|
||||
|
||||
void init()
|
||||
{
|
||||
static bool init = true;
|
||||
printf("in lib_test %d\n", init);
|
||||
if (!init) return;
|
||||
init = false;
|
||||
X::a.put();
|
||||
putReg();
|
||||
}
|
||||
|
||||
|
||||
Vendored
+2210
File diff suppressed because it is too large
Load Diff
Vendored
+3554
File diff suppressed because it is too large
Load Diff
Vendored
+2474
File diff suppressed because it is too large
Load Diff
+37
@@ -0,0 +1,37 @@
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include "xbyak/xbyak.h"
|
||||
#include <string.h>
|
||||
#include <vector>
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code(int x)
|
||||
{
|
||||
mov(eax, x);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
int main()
|
||||
try
|
||||
{
|
||||
#ifdef XBYAK_USE_MMAP_ALLOCATOR
|
||||
puts("use Allocator with mmap");
|
||||
#else
|
||||
puts("use Allocator with posix_memalign");
|
||||
#endif
|
||||
const int N = 70000;
|
||||
std::vector<Code*> v(N);
|
||||
for (int i = 0; i < N; i++) {
|
||||
v[i] = new Code(i);
|
||||
}
|
||||
long long sum = 0;
|
||||
for (int i = 0; i < N; i++) {
|
||||
sum += v[i]->getCode<int (*)()>()();
|
||||
}
|
||||
for (int i = 0; i < N; i++) {
|
||||
delete v[i];
|
||||
}
|
||||
printf("sum=%lld\n", sum);
|
||||
} catch (std::exception& e) {
|
||||
printf("ERR %s\n", e.what());
|
||||
}
|
||||
Vendored
+46
@@ -0,0 +1,46 @@
|
||||
#include <stdio.h>
|
||||
#define XBYAK_ENABLE_OMITTED_OPERAND
|
||||
#include "xbyak/xbyak.h"
|
||||
#define CYBOZU_TEST_DISABLE_AUTO_RUN
|
||||
#include "cybozu/test.hpp"
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable : 4245)
|
||||
#pragma warning(disable : 4312)
|
||||
#endif
|
||||
class Sample : public CodeGenerator {
|
||||
void operator=(const Sample&);
|
||||
public:
|
||||
#include "nm.cpp"
|
||||
};
|
||||
|
||||
|
||||
class ErrorSample : public CodeGenerator {
|
||||
void operator=(const ErrorSample&);
|
||||
public:
|
||||
void gen()
|
||||
{
|
||||
#ifndef XBYAK_NO_EXCEPTION
|
||||
CYBOZU_TEST_EXCEPTION(mov(ptr[eax],1), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(test(ptr[eax],1), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(adc(ptr[eax],1), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(setz(eax), std::exception);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
int main()
|
||||
try
|
||||
{
|
||||
// the size of Operand exceeds 32 bit.
|
||||
CYBOZU_TEST_EQUAL(sizeof(Xbyak::Operand), 8u);
|
||||
Sample s;
|
||||
s.gen();
|
||||
ErrorSample es;
|
||||
es.gen();
|
||||
} catch (std::exception& e) {
|
||||
fprintf(stderr, "ERR=%s\n", e.what());
|
||||
return 1;
|
||||
}
|
||||
Vendored
+111
@@ -0,0 +1,111 @@
|
||||
#define XBYAK_NO_EXCEPTION
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
int g_err = 0;
|
||||
int g_test = 0;
|
||||
|
||||
void assertEq(int x, int y)
|
||||
{
|
||||
if (x != y) {
|
||||
printf("ERR x=%d y=%d\n", x, y);
|
||||
g_err++;
|
||||
}
|
||||
g_test++;
|
||||
}
|
||||
|
||||
void assertBool(bool b)
|
||||
{
|
||||
if (!b) {
|
||||
printf("ERR assertBool\n");
|
||||
g_err++;
|
||||
}
|
||||
g_test++;
|
||||
}
|
||||
|
||||
void test1()
|
||||
{
|
||||
const int v = 123;
|
||||
struct Code : CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
mov(eax, v);
|
||||
ret();
|
||||
}
|
||||
} c;
|
||||
int (*f)() = c.getCode<int (*)()>();
|
||||
assertEq(f(), v);
|
||||
assertEq(Xbyak::GetError(), ERR_NONE);
|
||||
}
|
||||
|
||||
void test2()
|
||||
{
|
||||
struct Code : CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
Label lp;
|
||||
L(lp);
|
||||
L(lp);
|
||||
}
|
||||
} c;
|
||||
assertEq(Xbyak::GetError(), ERR_LABEL_IS_REDEFINED);
|
||||
Xbyak::ClearError();
|
||||
}
|
||||
|
||||
void test3()
|
||||
{
|
||||
static struct EmptyAllocator : Xbyak::Allocator {
|
||||
uint8_t *alloc(size_t) XBYAK_OVERRIDE { return 0; }
|
||||
} emptyAllocator;
|
||||
struct Code : CodeGenerator {
|
||||
Code() : CodeGenerator(8, 0, &emptyAllocator)
|
||||
{
|
||||
mov(eax, 3);
|
||||
assertBool(Xbyak::GetError() == 0);
|
||||
mov(eax, 3);
|
||||
mov(eax, 3);
|
||||
assertBool(Xbyak::GetError() != 0);
|
||||
Xbyak::ClearError();
|
||||
assertBool(Xbyak::GetError() == 0);
|
||||
}
|
||||
} c;
|
||||
}
|
||||
|
||||
void test4()
|
||||
{
|
||||
struct Code : CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
mov(ptr[eax], 1);
|
||||
assertBool(Xbyak::GetError() != 0);
|
||||
Xbyak::ClearError();
|
||||
|
||||
test(ptr[eax], 1);
|
||||
assertBool(Xbyak::GetError() != 0);
|
||||
Xbyak::ClearError();
|
||||
|
||||
adc(ptr[eax], 1);
|
||||
assertBool(Xbyak::GetError() != 0);
|
||||
Xbyak::ClearError();
|
||||
|
||||
setz(eax);
|
||||
assertBool(Xbyak::GetError() != 0);
|
||||
Xbyak::ClearError();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
test1();
|
||||
test2();
|
||||
test3();
|
||||
test4();
|
||||
if (g_err) {
|
||||
printf("err %d/%d\n", g_err, g_test);
|
||||
} else {
|
||||
printf("all ok %d\n", g_test);
|
||||
}
|
||||
return g_err != 0;
|
||||
}
|
||||
+56
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
normalize prefix
|
||||
*/
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include <iostream>
|
||||
#include <memory.h>
|
||||
|
||||
typedef unsigned char uint8_t;
|
||||
|
||||
std::string normalize(std::string line)
|
||||
{
|
||||
size_t pos = line.find('(');
|
||||
/* nasm generates byte codes containing () for xbegin, so remove it. */
|
||||
if (pos != std::string::npos) {
|
||||
line.erase(pos, 1);
|
||||
pos = line.find(')');
|
||||
if (pos == std::string::npos) {
|
||||
fprintf(stderr, "line error {%s}\n", line.c_str());
|
||||
return "";
|
||||
}
|
||||
line.erase(pos, 1);
|
||||
}
|
||||
static const char tbl[][3] = { "66", "67", "F2", "F3" };
|
||||
size_t tblNum = sizeof(tbl) / sizeof(tbl[0]);
|
||||
typedef std::set<std::string> StringSet;
|
||||
StringSet suf;
|
||||
|
||||
pos = 0;
|
||||
for (; pos < line.size(); pos += 2) {
|
||||
bool found = false;
|
||||
for (size_t i = 0; i < tblNum; i++) {
|
||||
if (::memcmp(&line[pos], tbl[i], 2) == 0) {
|
||||
found = true;
|
||||
suf.insert(tbl[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) break;
|
||||
}
|
||||
std::string ret;
|
||||
for (StringSet::const_iterator i = suf.begin(), e = suf.end(); i != e; ++i) {
|
||||
ret += *i;
|
||||
}
|
||||
ret += &line[pos];
|
||||
return ret;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
std::string line;
|
||||
while (std::getline(std::cin, line)) {
|
||||
std::string normalizedLine = normalize(line);
|
||||
std::cout << normalizedLine << '\n';//std::endl;
|
||||
}
|
||||
}
|
||||
Vendored
+6
@@ -0,0 +1,6 @@
|
||||
|
||||
test script on Windows
|
||||
|
||||
this test requires nasm.exe, yasm.exe, cl.exe, awk, diff
|
||||
|
||||
test_all ; for all tests
|
||||
Vendored
+2
@@ -0,0 +1,2 @@
|
||||
@echo off
|
||||
set OPT=/EHsc -I../xbyak -I./ /W4 -D_CRT_SECURE_NO_WARNINGS /nologo
|
||||
Vendored
+660
@@ -0,0 +1,660 @@
|
||||
#include <xbyak/xbyak_util.h>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
#ifdef XBYAK32
|
||||
#error "this sample is for only 64-bit mode"
|
||||
#endif
|
||||
|
||||
using namespace Xbyak::util;
|
||||
|
||||
#ifndef DUMP
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable : 4459)
|
||||
#pragma warning(disable : 4996)
|
||||
#endif
|
||||
#include <cybozu/test.hpp>
|
||||
|
||||
#ifdef XBYAK64_WIN
|
||||
#include "sf_test_win.h"
|
||||
#endif
|
||||
#ifdef XBYAK64_GCC
|
||||
#include "sf_test_gcc.h"
|
||||
#endif
|
||||
|
||||
struct Code : public Xbyak::CodeGenerator {
|
||||
void gen1()
|
||||
{
|
||||
StackFrame sf(this, 1);
|
||||
mov(rax, sf.p[0]);
|
||||
}
|
||||
void gen2()
|
||||
{
|
||||
StackFrame sf(this, 2);
|
||||
lea(rax, ptr [sf.p[0] + sf.p[1]]);
|
||||
}
|
||||
void gen3()
|
||||
{
|
||||
StackFrame sf(this, 3);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
}
|
||||
void gen4()
|
||||
{
|
||||
StackFrame sf(this, 4);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
add(rax, sf.p[3]);
|
||||
}
|
||||
|
||||
void gen5()
|
||||
{
|
||||
StackFrame sf(this, 4, UseRCX);
|
||||
xor_(rcx, rcx);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
add(rax, sf.p[3]);
|
||||
}
|
||||
|
||||
void gen6()
|
||||
{
|
||||
StackFrame sf(this, 4, UseRCX | UseRDX);
|
||||
xor_(rcx, rcx);
|
||||
xor_(rdx, rdx);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
add(rax, sf.p[3]);
|
||||
}
|
||||
|
||||
void gen7()
|
||||
{
|
||||
StackFrame sf(this, 3, UseRCX | UseRDX);
|
||||
xor_(rcx, rcx);
|
||||
xor_(rdx, rdx);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
}
|
||||
|
||||
void gen8()
|
||||
{
|
||||
StackFrame sf(this, 3, 3 | UseRCX | UseRDX);
|
||||
xor_(rcx, rcx);
|
||||
xor_(rdx, rdx);
|
||||
mov(sf.t[0], 1);
|
||||
mov(sf.t[1], 2);
|
||||
mov(sf.t[2], 3);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
}
|
||||
|
||||
void gen9()
|
||||
{
|
||||
StackFrame sf(this, 3, 3 | UseRCX | UseRDX, 32);
|
||||
xor_(rcx, rcx);
|
||||
xor_(rdx, rdx);
|
||||
mov(sf.t[0], 1);
|
||||
mov(sf.t[1], 2);
|
||||
mov(sf.t[2], 3);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
mov(ptr [rsp + 8 * 0], rax);
|
||||
mov(ptr [rsp + 8 * 1], rax);
|
||||
mov(ptr [rsp + 8 * 2], rax);
|
||||
mov(ptr [rsp + 8 * 3], rax);
|
||||
}
|
||||
|
||||
void gen10()
|
||||
{
|
||||
StackFrame sf(this, 4, 8 | UseRCX | UseRDX, 32);
|
||||
xor_(rcx, rcx);
|
||||
xor_(rdx, rdx);
|
||||
for (int i = 0; i < 8; i++) {
|
||||
mov(sf.t[i], i);
|
||||
}
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
add(rax, sf.p[3]);
|
||||
mov(ptr [rsp + 8 * 0], rax);
|
||||
mov(ptr [rsp + 8 * 1], rax);
|
||||
mov(ptr [rsp + 8 * 2], rax);
|
||||
mov(ptr [rsp + 8 * 3], rax);
|
||||
}
|
||||
|
||||
void gen11()
|
||||
{
|
||||
StackFrame sf(this, 0, UseRCX);
|
||||
xor_(rcx, rcx);
|
||||
mov(rax, 3);
|
||||
}
|
||||
|
||||
void gen12()
|
||||
{
|
||||
StackFrame sf(this, 4, UseRDX);
|
||||
xor_(rdx, rdx);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
add(rax, sf.p[3]);
|
||||
}
|
||||
|
||||
/*
|
||||
int64_t f(const int64_t a[13]) { return sum-of-a[]; }
|
||||
*/
|
||||
void gen13()
|
||||
{
|
||||
StackFrame sf(this, 1, 13);
|
||||
for (int i = 0; i < 13; i++) {
|
||||
mov(sf.t[i], ptr[sf.p[0] + i * 8]);
|
||||
}
|
||||
mov(rax, sf.t[0]);
|
||||
for (int i = 1; i < 13; i++) {
|
||||
add(rax, sf.t[i]);
|
||||
}
|
||||
}
|
||||
/*
|
||||
same as gen13
|
||||
*/
|
||||
void gen14()
|
||||
{
|
||||
StackFrame sf(this, 1, 11 | UseRCX | UseRDX);
|
||||
Pack t = sf.t;
|
||||
t.append(rcx);
|
||||
t.append(rdx);
|
||||
for (int i = 0; i < 13; i++) {
|
||||
mov(t[i], ptr[sf.p[0] + i * 8]);
|
||||
}
|
||||
mov(rax, t[0]);
|
||||
for (int i = 1; i < 13; i++) {
|
||||
add(rax, t[i]);
|
||||
}
|
||||
}
|
||||
/*
|
||||
return (1 << 15) - 1;
|
||||
*/
|
||||
void gen15()
|
||||
{
|
||||
StackFrame sf(this, 0, 14, 8);
|
||||
Pack t = sf.t;
|
||||
t.append(rax);
|
||||
for (int i = 0; i < 15; i++) {
|
||||
mov(t[i], uint64_t(1) << i);
|
||||
}
|
||||
mov(qword[rsp], 0);
|
||||
for (int i = 0; i < 15; i++) {
|
||||
add(ptr[rsp], t[i]);
|
||||
}
|
||||
mov(rax, ptr[rsp]);
|
||||
}
|
||||
};
|
||||
|
||||
struct Code2 : Xbyak::CodeGenerator {
|
||||
Code2()
|
||||
: Xbyak::CodeGenerator(4096 * 32)
|
||||
{
|
||||
}
|
||||
void gen(int pNum, int tNum, int stackSizeByte)
|
||||
{
|
||||
StackFrame sf(this, pNum, tNum, stackSizeByte);
|
||||
if (tNum & UseRCX) xor_(rcx, rcx);
|
||||
if (tNum & UseRDX) xor_(rdx, rdx);
|
||||
for (int i = 0, n = tNum & ~(UseRCX | UseRDX); i < n; i++) {
|
||||
mov(sf.t[i], 5);
|
||||
}
|
||||
for (int i = 0; i < stackSizeByte; i++) {
|
||||
mov(byte [rsp + i], 0);
|
||||
}
|
||||
mov(rax, 1);
|
||||
for (int i = 0; i < pNum; i++) {
|
||||
add(rax, sf.p[i]);
|
||||
}
|
||||
}
|
||||
void gen2(int pNum, int tNum, int stackSizeByte)
|
||||
{
|
||||
StackFrame sf(this, pNum, tNum, stackSizeByte);
|
||||
mov(rax, rsp);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void verify(const uint8_t *_f, int pNum)
|
||||
{
|
||||
uint8_t *f = const_cast<uint8_t*>(_f);
|
||||
switch (pNum) {
|
||||
case 0:
|
||||
CYBOZU_TEST_EQUAL(1, reinterpret_cast<int (*)()>(f)());
|
||||
return;
|
||||
case 1:
|
||||
CYBOZU_TEST_EQUAL(11, reinterpret_cast<int (*)(int)>(f)(10));
|
||||
return;
|
||||
case 2:
|
||||
CYBOZU_TEST_EQUAL(111, reinterpret_cast<int (*)(int, int)>(f)(10, 100));
|
||||
return;
|
||||
case 3:
|
||||
CYBOZU_TEST_EQUAL(1111, reinterpret_cast<int (*)(int, int, int)>(f)(10, 100, 1000));
|
||||
return;
|
||||
case 4:
|
||||
CYBOZU_TEST_EQUAL(11111, reinterpret_cast<int (*)(int, int, int, int)>(f)(10, 100, 1000, 10000));
|
||||
return;
|
||||
default:
|
||||
printf("ERR pNum=%d\n", pNum);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(param)
|
||||
{
|
||||
Code2 code;
|
||||
for (int stackSize = 0; stackSize < 32; stackSize += 7) {
|
||||
for (int pNum = 0; pNum < 4; pNum++) {
|
||||
for (int mode = 0; mode < 4; mode++) {
|
||||
int maxNum = 0;
|
||||
int opt = 0;
|
||||
if (mode == 0) {
|
||||
maxNum = 10;
|
||||
} else if (mode == 1) {
|
||||
maxNum = 9;
|
||||
opt = UseRCX;
|
||||
} else if (mode == 2) {
|
||||
maxNum = 9;
|
||||
opt = UseRDX;
|
||||
} else {
|
||||
maxNum = 8;
|
||||
opt = UseRCX | UseRDX;
|
||||
}
|
||||
for (int tNum = 0; tNum < maxNum; tNum++) {
|
||||
// printf("pNum=%d, tNum=%d, stackSize=%d\n", pNum, tNum | opt, stackSize);
|
||||
const uint8_t *f = code.getCurr();
|
||||
code.gen(pNum, tNum | opt, stackSize);
|
||||
verify(f, pNum);
|
||||
/*
|
||||
check rsp is 16-byte aligned if stackSize > 0
|
||||
*/
|
||||
if (stackSize > 0) {
|
||||
Code2 c2;
|
||||
c2.gen2(pNum, tNum | opt, stackSize);
|
||||
uint64_t addr = c2.getCode<uint64_t (*)()>()();
|
||||
CYBOZU_TEST_EQUAL(addr % 16, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(args)
|
||||
{
|
||||
Code code;
|
||||
int (*f1)(int) = code.getCurr<int (*)(int)>();
|
||||
code.gen1();
|
||||
CYBOZU_TEST_EQUAL(5, f1(5));
|
||||
|
||||
int (*f2)(int, int) = code.getCurr<int (*)(int, int)>();
|
||||
code.gen2();
|
||||
CYBOZU_TEST_EQUAL(9, f2(3, 6));
|
||||
|
||||
int (*f3)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
|
||||
code.gen3();
|
||||
CYBOZU_TEST_EQUAL(14, f3(1, 4, 9));
|
||||
|
||||
int (*f4)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
|
||||
code.gen4();
|
||||
CYBOZU_TEST_EQUAL(30, f4(1, 4, 9, 16));
|
||||
|
||||
int (*f5)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
|
||||
code.gen5();
|
||||
CYBOZU_TEST_EQUAL(23, f5(2, 5, 7, 9));
|
||||
|
||||
int (*f6)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
|
||||
code.gen6();
|
||||
CYBOZU_TEST_EQUAL(18, f6(3, 4, 5, 6));
|
||||
|
||||
int (*f7)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
|
||||
code.gen7();
|
||||
CYBOZU_TEST_EQUAL(12, f7(3, 4, 5));
|
||||
|
||||
int (*f8)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
|
||||
code.gen8();
|
||||
CYBOZU_TEST_EQUAL(23, f8(5, 8, 10));
|
||||
|
||||
int (*f9)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
|
||||
code.gen9();
|
||||
CYBOZU_TEST_EQUAL(60, f9(10, 20, 30));
|
||||
|
||||
int (*f10)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
|
||||
code.gen10();
|
||||
CYBOZU_TEST_EQUAL(100, f10(10, 20, 30, 40));
|
||||
|
||||
int (*f11)() = code.getCurr<int (*)()>();
|
||||
code.gen11();
|
||||
CYBOZU_TEST_EQUAL(3, f11());
|
||||
|
||||
int (*f12)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
|
||||
code.gen12();
|
||||
CYBOZU_TEST_EQUAL(24, f12(3, 5, 7, 9));
|
||||
|
||||
{
|
||||
int64_t tbl[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 };
|
||||
int64_t (*f13)(const int64_t*) = code.getCurr<int64_t (*)(const int64_t*)>();
|
||||
code.gen13();
|
||||
CYBOZU_TEST_EQUAL(91, f13(tbl));
|
||||
|
||||
int64_t (*f14)(const int64_t*) = code.getCurr<int64_t (*)(const int64_t*)>();
|
||||
code.gen14();
|
||||
CYBOZU_TEST_EQUAL(91, f14(tbl));
|
||||
}
|
||||
int (*f15)() = code.getCurr<int (*)()>();
|
||||
code.gen15();
|
||||
CYBOZU_TEST_EQUAL((1 << 15) - 1, f15());
|
||||
}
|
||||
|
||||
void put(const Xbyak::util::Pack& p)
|
||||
{
|
||||
for (size_t i = 0, n = p.size(); i < n; i++) {
|
||||
printf("%s ", p[i].toString());
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void verifyPack(const Xbyak::util::Pack& p, const int *tbl, size_t tblNum)
|
||||
{
|
||||
for (size_t i = 0; i < tblNum; i++) {
|
||||
CYBOZU_TEST_EQUAL(p[i].getIdx(), tbl[i]);
|
||||
}
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(pack)
|
||||
{
|
||||
const int N = 10;
|
||||
Xbyak::Reg64 regTbl[N];
|
||||
for (int i = 0; i < N; i++) {
|
||||
regTbl[i] = Xbyak::Reg64(i);
|
||||
}
|
||||
Xbyak::util::Pack p(regTbl, N);
|
||||
const struct {
|
||||
int pos;
|
||||
int num;
|
||||
int tbl[10];
|
||||
} tbl[] = {
|
||||
{ 0, 10, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 } },
|
||||
{ 1, 9, { 1, 2, 3, 4, 5, 6, 7, 8, 9 } },
|
||||
{ 2, 8, { 2, 3, 4, 5, 6, 7, 8, 9 } },
|
||||
{ 3, 7, { 3, 4, 5, 6, 7, 8, 9 } },
|
||||
{ 4, 6, { 4, 5, 6, 7, 8, 9 } },
|
||||
{ 5, 5, { 5, 6, 7, 8, 9 } },
|
||||
{ 6, 4, { 6, 7, 8, 9 } },
|
||||
{ 7, 3, { 7, 8, 9 } },
|
||||
{ 8, 2, { 8, 9 } },
|
||||
{ 9, 1, { 9 } },
|
||||
{ 3, 5, { 3, 4, 5, 6, 7 } },
|
||||
};
|
||||
for (size_t i = 0; i < sizeof(tbl) / sizeof(*tbl); i++) {
|
||||
const int pos = tbl[i].pos;
|
||||
const int num = tbl[i].num;
|
||||
verifyPack(p.sub(pos, num), tbl[i].tbl, num);
|
||||
if (pos + num == N) {
|
||||
verifyPack(p.sub(pos), tbl[i].tbl, num);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct CloseCode : Xbyak::CodeGenerator {
|
||||
CloseCode(size_t mode)
|
||||
{
|
||||
switch (mode) {
|
||||
case 0:
|
||||
{
|
||||
StackFrame sf(this, 0);
|
||||
// close() is automatically called.
|
||||
}
|
||||
break;
|
||||
|
||||
case 1:
|
||||
{
|
||||
StackFrame sf(this, 0, 0, 0, false);
|
||||
sf.close(); // Explicitly call close().
|
||||
setProtectModeRE(); // Ensure that no writes occur in destructor by setting read-exec
|
||||
}
|
||||
break;
|
||||
|
||||
case 2:
|
||||
{
|
||||
StackFrame sf(this, 0, 0, 0, false);
|
||||
sf.close(); // Explicitly call close().
|
||||
sf.close(); // Explicitly call close().
|
||||
setProtectModeRE(); // Ensure that no writes occur in destructor by setting read-exec
|
||||
}
|
||||
break;
|
||||
default:
|
||||
CYBOZU_TEST_ASSERT(false);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
CYBOZU_TEST_AUTO(close)
|
||||
{
|
||||
const size_t expectedTbl[] = {
|
||||
1, 1, 2,
|
||||
};
|
||||
for (size_t i = 0; i < sizeof(expectedTbl)/sizeof(expectedTbl[0]); i++) {
|
||||
CloseCode c(i);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), expectedTbl[i]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
struct ParamId {
|
||||
int pNum;
|
||||
int tNum;
|
||||
int useRegs;
|
||||
int stackSizeByte;
|
||||
union av {
|
||||
uint8_t a[4];
|
||||
uint32_t v;
|
||||
};
|
||||
uint32_t id() const
|
||||
{
|
||||
av av;
|
||||
av.a[0] = uint8_t(pNum);
|
||||
av.a[1] = uint8_t(tNum);
|
||||
av.a[2] = uint8_t(useRegs >> 5);
|
||||
av.a[3] = uint8_t(stackSizeByte);
|
||||
return av.v;
|
||||
};
|
||||
void set_id(uint32_t v)
|
||||
{
|
||||
av av;
|
||||
av.v = v;
|
||||
pNum = av.a[0];
|
||||
tNum = av.a[1];
|
||||
useRegs = av.a[2] << 5;
|
||||
stackSizeByte = av.a[3];
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::vector<uint8_t> Bytes;
|
||||
|
||||
#ifndef DUMP
|
||||
void cmpAndDumpIfFailed(int rhs, int lhs, const Bytes& d)
|
||||
{
|
||||
CYBOZU_TEST_EQUAL(rhs, lhs);
|
||||
if (rhs != lhs) {
|
||||
FILE *fp = fopen("dump.bin", "wb");
|
||||
fwrite(d.data(), 1, d.size(), fp);
|
||||
fclose(fp);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void stackFrameTest()
|
||||
{
|
||||
struct Data {
|
||||
ParamId paramId;
|
||||
Bytes code;
|
||||
};
|
||||
typedef std::map<uint32_t, Data> DataMap;
|
||||
DataMap dataMap;
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code(int pNum, int tNum, int useRegs, int stackSizeByte)
|
||||
{
|
||||
StackFrame sf(this, pNum, tNum|useRegs, stackSizeByte);
|
||||
// modify
|
||||
for (int i = 0; i < tNum; i++) {
|
||||
mov(sf.t[i], 12345);
|
||||
}
|
||||
if (useRegs & UseRCX) {
|
||||
mov(rcx, 12345);
|
||||
}
|
||||
if (useRegs & UseRDX) {
|
||||
mov(rdx, 12345);
|
||||
}
|
||||
if (useRegs & UseRSI) {
|
||||
mov(rsi, 1000);
|
||||
}
|
||||
if (useRegs & UseRDI) {
|
||||
mov(rdi, 2000);
|
||||
}
|
||||
// use rbp if UseRBP and !UseRBPAsFramePointer
|
||||
if ((useRegs & UseRBPAsFramePointer) == UseRBP) {
|
||||
mov(rbp, 3000);
|
||||
}
|
||||
// eax is sum of all params and (esp & 15) if stackSizeByte > 0
|
||||
if (stackSizeByte > 0) {
|
||||
mov(eax, esp);
|
||||
and_(eax, 15);
|
||||
} else {
|
||||
xor_(eax, eax);
|
||||
}
|
||||
for (int i = 0; i < pNum; i++) {
|
||||
add(rax, sf.p[i]);
|
||||
}
|
||||
}
|
||||
};
|
||||
static const uint8_t stackSizeTbl[] = { 0, 33 };
|
||||
for (int pNum = 0; pNum <= 4; pNum++) {
|
||||
for (int tNum = 0; tNum <= 14; tNum++) {
|
||||
for (int i = 0; i < (1<<6); i++) {
|
||||
int totalNum = pNum + tNum;
|
||||
int useRegs = 0;
|
||||
if (i & 1) { useRegs |= UseRCX; totalNum++; }
|
||||
if (i & 2) { useRegs |= UseRDX; totalNum++; }
|
||||
if (i & 4) { useRegs |= UseRSI; totalNum++; }
|
||||
if (i & 8) { useRegs |= UseRDI; totalNum++; }
|
||||
// UseRBP and UseRBPAsFramePointer are mutually exclusive
|
||||
if (i & 16) { useRegs |= UseRBP; totalNum++; }
|
||||
if (!(i & 16) && (i & 32)) { useRegs |= UseRBPAsFramePointer; totalNum++; }
|
||||
if (totalNum > 14) continue;
|
||||
for (size_t j = 0; j < sizeof(stackSizeTbl)/sizeof(stackSizeTbl[0]); j++) {
|
||||
int stackSizeByte = stackSizeTbl[j];
|
||||
//fprintf(stderr, "pNum=%d, tNum=%d, useRegs=0x%X stackSizeByte=%d\n", pNum, tNum, useRegs, stackSizeByte);
|
||||
Code c(pNum, tNum, useRegs, stackSizeByte);
|
||||
//fprintf(stderr, "code size = %d\n", int(c.getSize()));
|
||||
Data d;
|
||||
d.paramId.pNum = pNum;
|
||||
d.paramId.tNum = tNum;
|
||||
d.paramId.useRegs = useRegs;
|
||||
d.paramId.stackSizeByte = stackSizeByte;
|
||||
d.code.assign(c.getCode(), c.getCode() + c.getSize());
|
||||
dataMap[d.paramId.id()] = d;
|
||||
#ifndef DUMP
|
||||
switch (pNum) {
|
||||
case 0:
|
||||
{
|
||||
int (*f)() = c.getCode<int (*)()>();
|
||||
CYBOZU_TEST_EQUAL(0, f());
|
||||
// cmpAndDumpIfFailed(0, f(), d.code);
|
||||
break;
|
||||
}
|
||||
case 1:
|
||||
{
|
||||
int (*f1)(int) = c.getCode<int (*)(int)>();
|
||||
CYBOZU_TEST_EQUAL(1, f1(1));
|
||||
break;
|
||||
}
|
||||
case 2:
|
||||
{
|
||||
int (*f2)(int, int) = c.getCode<int (*)(int, int)>();
|
||||
CYBOZU_TEST_EQUAL(11, f2(1, 10));
|
||||
break;
|
||||
}
|
||||
case 3:
|
||||
{
|
||||
int (*f3)(int, int, int) = c.getCode<int (*)(int, int, int)>();
|
||||
CYBOZU_TEST_EQUAL(111, f3(1, 10, 100));
|
||||
break;
|
||||
}
|
||||
case 4:
|
||||
{
|
||||
int (*f4)(int, int, int, int) = c.getCode<int (*)(int, int, int, int)>();
|
||||
CYBOZU_TEST_EQUAL(1111, f4(1, 10, 100, 1000));
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef DUMP
|
||||
for (DataMap::const_iterator it = dataMap.begin(); it != dataMap.end(); ++it) {
|
||||
const Data& d = it->second;
|
||||
printf("static const uint8_t code_%08x[] = {\n", d.paramId.id());
|
||||
for (size_t j = 0; j < d.code.size(); j++) {
|
||||
if (j % 16 == 0) {
|
||||
if (j > 0) printf("\n");
|
||||
printf("\t");
|
||||
}
|
||||
if (j > 0) printf(" ");
|
||||
printf("0x%02x,", d.code[j]);
|
||||
}
|
||||
printf("\n};\n");
|
||||
}
|
||||
printf("static const struct {\n");
|
||||
printf("\tuint32_t paramId;\n");
|
||||
printf("\tconst uint8_t *code;\n");
|
||||
printf("\tsize_t codeSize;\n");
|
||||
printf("} g_dataVec[] = {\n");
|
||||
for (DataMap::const_iterator it = dataMap.begin(); it != dataMap.end(); ++it) {
|
||||
const Data& d = it->second;
|
||||
printf("\t{ 0x%08x, code_%08x, %zu },\n", d.paramId.id(), d.paramId.id(), d.code.size());
|
||||
}
|
||||
printf("};\n");
|
||||
#else
|
||||
DataMap dataMapExpected;
|
||||
for (size_t i = 0; i < sizeof(g_dataVec) / sizeof(*g_dataVec); i++) {
|
||||
const uint32_t id = g_dataVec[i].paramId;
|
||||
Data d;
|
||||
d.paramId.set_id(id);
|
||||
d.code.assign(g_dataVec[i].code, g_dataVec[i].code + g_dataVec[i].codeSize);
|
||||
dataMapExpected[id] = d;
|
||||
}
|
||||
CYBOZU_TEST_EQUAL(dataMap.size(), dataMapExpected.size());
|
||||
for (DataMap::const_iterator it = dataMapExpected.begin(); it != dataMapExpected.end(); ++it) {
|
||||
const uint32_t id = it->first;
|
||||
DataMap::const_iterator it2 = dataMap.find(id);
|
||||
CYBOZU_TEST_ASSERT(it2 != dataMap.end());
|
||||
const Data& d = it2->second;
|
||||
const Data& dExpected = it->second;
|
||||
CYBOZU_TEST_EQUAL(d.code.size(), dExpected.code.size());
|
||||
CYBOZU_TEST_EQUAL_ARRAY(d.code.data(), dExpected.code.data(), d.code.size());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef DUMP
|
||||
int main()
|
||||
#else
|
||||
CYBOZU_TEST_AUTO(stackFrame)
|
||||
#endif
|
||||
{
|
||||
stackFrameTest();
|
||||
}
|
||||
Vendored
+36638
File diff suppressed because it is too large
Load Diff
Vendored
+37414
File diff suppressed because it is too large
Load Diff
Vendored
BIN
Binary file not shown.
Vendored
+37
@@ -0,0 +1,37 @@
|
||||
@echo off
|
||||
set FILTER=grep -v warning
|
||||
if /i "%1"=="64" (
|
||||
set OPT2=-DXBYAK64
|
||||
set OPT3=win64
|
||||
) else (
|
||||
set OPT2=-DXBYAK32
|
||||
set OPT3=win32
|
||||
)
|
||||
|
||||
call set_opt
|
||||
bmake -f Makefile.win all
|
||||
|
||||
if /i "%1"=="64" (
|
||||
call :sub 1
|
||||
call :sub 2
|
||||
) else (
|
||||
call :sub 1
|
||||
)
|
||||
goto end
|
||||
|
||||
:sub
|
||||
echo cl address.cpp %OPT% %OPT2%
|
||||
cl address.cpp %OPT% %OPT2%
|
||||
address %1% > a.asm
|
||||
echo nasm -f %OPT3% -l a.lst a.asm
|
||||
nasm -f %OPT3% -l a.lst a.asm
|
||||
awk "{if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
|
||||
echo address %1% jit > nm.cpp
|
||||
address %1% jit > nm.cpp
|
||||
echo cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
|
||||
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
|
||||
nm_frame > x.lst
|
||||
diff -w x.lst ok.lst
|
||||
wc x.lst
|
||||
|
||||
:end
|
||||
+46
@@ -0,0 +1,46 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
FILTER="grep -v warning"
|
||||
CXX=${CXX:=g++}
|
||||
CFLAGS_USER=${CFLAGS}
|
||||
CFLAGS_WARN="$(cat CFLAGS_WARN.cfg)"
|
||||
|
||||
sub()
|
||||
{
|
||||
CFLAGS="$CFLAGS_USER $CFLAGS_WARN -I../ $OPT2"
|
||||
|
||||
echo $CXX $CFLAGS address.cpp -o address
|
||||
$CXX $CFLAGS address.cpp -o address
|
||||
|
||||
./address $1 > a.asm
|
||||
echo "asm"
|
||||
$EXE -f$OPT3 a.asm -l a.lst
|
||||
awk '{printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst
|
||||
|
||||
echo "xbyak"
|
||||
./address $1 jit > nm.cpp
|
||||
echo "compile nm_frame.cpp"
|
||||
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
||||
./nm_frame > x.lst
|
||||
diff -bB ok.lst x.lst && echo "ok"
|
||||
|
||||
}
|
||||
|
||||
if [ "$1" = "64" ]; then
|
||||
echo "nasm(64bit)"
|
||||
EXE=nasm
|
||||
OPT2=-DXBYAK64
|
||||
OPT3=win64
|
||||
|
||||
sub 1
|
||||
sub 2
|
||||
else
|
||||
echo "nasm(32bit)"
|
||||
EXE=nasm
|
||||
OPT2=-DXBYAK32
|
||||
OPT3=win32
|
||||
sub 1
|
||||
fi
|
||||
|
||||
Vendored
+17
@@ -0,0 +1,17 @@
|
||||
@echo off
|
||||
call test_nm_all
|
||||
echo *** test addressing ***
|
||||
call test_address
|
||||
call test_address 64
|
||||
echo *** test jmp address ***
|
||||
call test_jmp
|
||||
echo *** test misc ***
|
||||
set FILE=misc
|
||||
call test_misc
|
||||
echo *** test APX ***
|
||||
set FILE=apx
|
||||
call test_misc
|
||||
echo *** test AVX10 ***
|
||||
set FILE=avx10_test
|
||||
call test_misc
|
||||
echo *** all test end ***
|
||||
Vendored
+42
@@ -0,0 +1,42 @@
|
||||
@echo off
|
||||
set FILTER=cat
|
||||
set Y=0
|
||||
if /i "%1"=="Y" (
|
||||
set Y=1
|
||||
set EXE=yasm.exe
|
||||
set OPT2=-DUSE_YASM -DXBYAK32
|
||||
set OPT3=win32
|
||||
) else if /i "%1"=="64" (
|
||||
set EXE=nasm.exe
|
||||
set OPT2=-DXBYAK64
|
||||
set OPT3=win64
|
||||
set FILTER=normalize_prefix
|
||||
) else if /i "%1"=="Y64" (
|
||||
set Y=1
|
||||
set EXE=yasm.exe
|
||||
set OPT2=-DUSE_YASM -DXBYAK64
|
||||
set OPT3=win64
|
||||
set FILTER=normalize_prefix
|
||||
) else (
|
||||
set EXE=nasm.exe
|
||||
set OPT2=-DXBYAK32
|
||||
set OPT3=win32
|
||||
)
|
||||
call set_opt
|
||||
bmake -f Makefile.win all
|
||||
echo cl -I../ make_nm.cpp %OPT% %OPT2% /EHs /DUSE_AVX
|
||||
cl -I../ make_nm.cpp %OPT% %OPT2% /EHs /DUSE_AVX
|
||||
make_nm > a.asm
|
||||
%EXE% -f %OPT3% -l a.lst a.asm
|
||||
rem connect "?????-" and "??"
|
||||
if /i "%Y%"=="1" (
|
||||
awk "NR > 1 {if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
|
||||
) else (
|
||||
awk "{if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
|
||||
)
|
||||
make_nm jit > nm.cpp
|
||||
echo cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
|
||||
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
|
||||
nm_frame |%FILTER% > x.lst
|
||||
diff -w x.lst ok.lst
|
||||
wc x.lst
|
||||
+53
@@ -0,0 +1,53 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
FILTER="grep -v warning"
|
||||
CXX=${CXX:=g++}
|
||||
CFLAGS_USER=${CFLAGS}
|
||||
CFLAGS_WARN="$(cat CFLAGS_WARN.cfg)"
|
||||
|
||||
case $1 in
|
||||
Y)
|
||||
echo "yasm(32bit)"
|
||||
EXE=yasm
|
||||
OPT2="-DUSE_YASM -DXBYAK32"
|
||||
OPT3=win32
|
||||
;;
|
||||
64)
|
||||
echo "nasm(64bit)"
|
||||
EXE=nasm
|
||||
OPT2=-DXBYAK64
|
||||
OPT3=win64
|
||||
FILTER=./normalize_prefix
|
||||
;;
|
||||
Y64)
|
||||
echo "yasm(64bit)"
|
||||
EXE=yasm
|
||||
OPT2="-DUSE_YASM -DXBYAK64"
|
||||
OPT3=win64
|
||||
FILTER=./normalize_prefix
|
||||
;;
|
||||
*)
|
||||
echo "nasm(32bit)"
|
||||
EXE=nasm
|
||||
OPT2=-DXBYAK32
|
||||
OPT3=win32
|
||||
;;
|
||||
esac
|
||||
|
||||
CFLAGS="$CFLAGS_USER $CFLAGS_WARN -g -I../ $OPT2 -DUSE_AVX"
|
||||
echo "compile make_nm.cpp"
|
||||
$CXX $CFLAGS make_nm.cpp -o make_nm
|
||||
|
||||
./make_nm > a.asm
|
||||
echo "asm"
|
||||
$EXE -f$OPT3 a.asm -l a.lst
|
||||
awk '$3 != "1+1" {printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst
|
||||
|
||||
echo "xbyak"
|
||||
./make_nm jit > nm.cpp
|
||||
echo "compile nm_frame.cpp"
|
||||
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
||||
./nm_frame | $FILTER > x.lst
|
||||
diff -bB ok.lst x.lst && echo "ok"
|
||||
Vendored
+31
@@ -0,0 +1,31 @@
|
||||
@echo off
|
||||
set FILTER=cat
|
||||
set Y=0
|
||||
if /i "%1"=="min" (
|
||||
set EXE=nasm.exe
|
||||
set OPT2=-DXBYAK64 -DMIN_TEST
|
||||
set OPT3=win64
|
||||
set FILTER=normalize_prefix
|
||||
) else if /i "%1"=="64" (
|
||||
set EXE=nasm.exe
|
||||
set OPT2=-DXBYAK64
|
||||
set OPT3=win64
|
||||
set FILTER=normalize_prefix
|
||||
) else (
|
||||
set EXE=nasm.exe
|
||||
set OPT2=-DXBYAK32
|
||||
set OPT3=win32
|
||||
)
|
||||
call set_opt
|
||||
bmake -f Makefile.win all
|
||||
echo cl -I../ make_512.cpp %OPT% %OPT2% /EHs /DUSE_AVX512
|
||||
cl -I../ make_512.cpp %OPT% %OPT2% /EHs /DUSE_AVX512
|
||||
make_512 > a.asm
|
||||
%EXE% -f %OPT3% -l a.lst a.asm
|
||||
rem connect "?????-" and "??"
|
||||
awk "{if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
|
||||
make_512 jit > nm.cpp
|
||||
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2% /DXBYAK_AVX512
|
||||
nm_frame |%FILTER% > x.lst
|
||||
diff -w x.lst ok.lst
|
||||
wc x.lst
|
||||
+40
@@ -0,0 +1,40 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
FILTER="grep -v warning"
|
||||
CXX=${CXX:=g++}
|
||||
CFLAGS_USER=${CFLAGS}
|
||||
CFLAGS_WARN="$(cat CFLAGS_WARN.cfg)"
|
||||
|
||||
case $1 in
|
||||
64)
|
||||
echo "nasm(64bit)"
|
||||
EXE=nasm
|
||||
OPT2=-DXBYAK64
|
||||
OPT3=win64
|
||||
FILTER=./normalize_prefix
|
||||
;;
|
||||
*)
|
||||
echo "nasm(32bit)"
|
||||
EXE=nasm
|
||||
OPT2=-DXBYAK32
|
||||
OPT3=win32
|
||||
;;
|
||||
esac
|
||||
|
||||
CFLAGS="$CFLAGS_USER $CFLAGS_WARN -I../ $OPT2 -DUSE_AVX512"
|
||||
echo "compile make_512.cpp"
|
||||
$CXX $CFLAGS make_512.cpp -o make_512
|
||||
|
||||
./make_512 > a.asm
|
||||
echo "asm"
|
||||
$EXE -f$OPT3 a.asm -l a.lst
|
||||
awk '{printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst
|
||||
|
||||
echo "xbyak"
|
||||
./make_512 jit > nm.cpp
|
||||
echo "compile nm_frame.cpp"
|
||||
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame -DXBYAK_AVX512
|
||||
./nm_frame | $FILTER > x.lst
|
||||
diff -bB ok.lst x.lst && echo "ok"
|
||||
Vendored
+13
@@ -0,0 +1,13 @@
|
||||
@echo off
|
||||
echo ** nasm-avx(32bit) ***
|
||||
call test_avx
|
||||
echo ** nasm-avx(64bit) ***
|
||||
call test_avx 64
|
||||
echo ** yasm-avx(32bit) ***
|
||||
call test_avx Y
|
||||
echo ** yasm-avx(64bit) ***
|
||||
call test_avx Y64
|
||||
echo ** nasm-avx512(32bit) ***
|
||||
call test_avx512
|
||||
echo ** nasm-avx512(64bit) ***
|
||||
call test_avx512 64
|
||||
Vendored
+7
@@ -0,0 +1,7 @@
|
||||
@echo off
|
||||
set XED=xed
|
||||
set CFLAGS=-I ../ /EHsc /nologo
|
||||
copy %1% tmp.cpp
|
||||
cl %CFLAGS% test_by_xed.cpp && test_by_xed.exe
|
||||
%XED% -64 -ir bin > out.txt
|
||||
python3 test_by_xed.py %1% out.txt
|
||||
Vendored
+27
@@ -0,0 +1,27 @@
|
||||
#include <stdio.h>
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
: Xbyak::CodeGenerator(4096*8)
|
||||
{
|
||||
setDefaultEncodingAVX10(AVX10v2Encoding);
|
||||
#include "tmp.cpp"
|
||||
}
|
||||
};
|
||||
|
||||
int main()
|
||||
try
|
||||
{
|
||||
Code c;
|
||||
FILE *fp = fopen("bin", "wb");
|
||||
if (fp) {
|
||||
fwrite(c.getCode(), 1, c.getSize(), fp);
|
||||
fclose(fp);
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
printf("ERR %s\n", e.what());
|
||||
return 1;
|
||||
}
|
||||
Vendored
+456
@@ -0,0 +1,456 @@
|
||||
import re
|
||||
import math
|
||||
import sys
|
||||
|
||||
class Reg:
|
||||
def __init__(self, s):
|
||||
self.name = s
|
||||
def __str__(self):
|
||||
return self.name
|
||||
def __eq__(self, rhs):
|
||||
return self.name == rhs.name
|
||||
def __lt__(self, rhs):
|
||||
return self.name < rhs.name
|
||||
|
||||
g_xmmTbl = '''
|
||||
xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7
|
||||
xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 xmm15
|
||||
xmm16 xmm17 xmm18 xmm19 xmm20 xmm21 xmm22 xmm23
|
||||
xmm24 xmm25 xmm26 xmm27 xmm28 xmm29 xmm30 xmm31
|
||||
ymm0 ymm1 ymm2 ymm3 ymm4 ymm5 ymm6 ymm7
|
||||
ymm8 ymm9 ymm10 ymm11 ymm12 ymm13 ymm14 ymm15
|
||||
ymm16 ymm17 ymm18 ymm19 ymm20 ymm21 ymm22 ymm23
|
||||
ymm24 ymm25 ymm26 ymm27 ymm28 ymm29 ymm30 ymm31
|
||||
zmm0 zmm1 zmm2 zmm3 zmm4 zmm5 zmm6 zmm7
|
||||
zmm8 zmm9 zmm10 zmm11 zmm12 zmm13 zmm14 zmm15
|
||||
zmm16 zmm17 zmm18 zmm19 zmm20 zmm21 zmm22 zmm23
|
||||
zmm24 zmm25 zmm26 zmm27 zmm28 zmm29 zmm30 zmm31
|
||||
'''.split()
|
||||
|
||||
g_tmmTbl = '''
|
||||
tmm0 tmm1 tmm2 tmm3 tmm4 tmm5 tmm6 tmm7
|
||||
'''.split()
|
||||
|
||||
g_regTbl = '''
|
||||
eax ecx edx ebx esp ebp esi edi
|
||||
ax cx dx bx sp bp si di
|
||||
al cl dl bl ah ch dh bh
|
||||
k1 k2 k3 k4 k5 k6 k7
|
||||
rax rcx rdx rbx rsp rbp rsi rdi r8 r9 r10 r11 r12 r13 r14 r15
|
||||
r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29 r30 r31
|
||||
r8d r9d r10d r11d r12d r13d r14d r15d
|
||||
r16d r17d r18d r19d r20d r21d r22d r23d r24d r25d r26d r27d r28d r29d r30d r31d
|
||||
r8w r9w r10w r11w r12w r13w r14w r15w
|
||||
r16w r17w r18w r19w r20w r21w r22w r23w r24w r25w r26w r27w r28w r29w r30w r31w
|
||||
r8b r9b r10b r11b r12b r13b r14b r15b
|
||||
r16b r17b r18b r19b r20b r21b r22b r23b r24b r25b r26b r27b r28b r29b r30b r31b
|
||||
spl bpl sil dil
|
||||
'''.split()+g_tmmTbl+g_xmmTbl
|
||||
|
||||
# define global constants
|
||||
for e in g_regTbl:
|
||||
globals()[e] = Reg(e)
|
||||
|
||||
g_maskTbl = [k1, k2, k3, k4, k5, k6, k7]
|
||||
|
||||
g_replaceCharTbl = '{}();|,'
|
||||
g_replaceChar = str.maketrans(g_replaceCharTbl, ' '*len(g_replaceCharTbl))
|
||||
g_sizeTbl = ['byte', 'word', 'dword', 'qword', 'xword', 'yword', 'zword']
|
||||
g_xedSizeTbl = ['xmmword', 'ymmword', 'zmmword']
|
||||
g_attrTbl = ['T_sae', 'T_rn_sae', 'T_rd_sae', 'T_ru_sae', 'T_rz_sae', 'T_z']
|
||||
g_attrXedTbl = ['sae', 'rne-sae', 'rd-sae', 'ru-sae', 'rz-sae', 'z']
|
||||
|
||||
class Attr:
|
||||
def __init__(self, s):
|
||||
self.name = s
|
||||
def __str__(self):
|
||||
return self.name
|
||||
def __eq__(self, rhs):
|
||||
return self.name == rhs.name
|
||||
def __lt__(self, rhs):
|
||||
return self.name < rhs.name
|
||||
|
||||
for e in g_attrTbl:
|
||||
globals()[e] = Attr(e)
|
||||
|
||||
def newReg(s):
|
||||
if type(s) == str:
|
||||
return Reg(s)
|
||||
return s
|
||||
|
||||
class Memory:
|
||||
def __init__(self, size=0, base=None, index=None, scale=0, disp=0, broadcast=0, rip=False):
|
||||
self.size = size
|
||||
self.base = newReg(base)
|
||||
self.index = newReg(index)
|
||||
self.scale = scale
|
||||
self.disp = disp
|
||||
self.broadcast = broadcast
|
||||
self.rip = rip
|
||||
|
||||
def __str__(self):
|
||||
if self.rip:
|
||||
return f'[rip+{hex(self.disp)}]'
|
||||
if self.size == 0:
|
||||
s = 'ptr'
|
||||
else:
|
||||
idx = self.size * max(self.broadcast, 1)
|
||||
s = g_sizeTbl[int(math.log2(idx))]
|
||||
if self.broadcast > 0:
|
||||
s += '_b'
|
||||
s += ' ['
|
||||
needPlus = False
|
||||
if self.base:
|
||||
s += str(self.base)
|
||||
needPlus = True
|
||||
if self.index:
|
||||
if needPlus:
|
||||
s += '+'
|
||||
s += str(self.index)
|
||||
if self.scale > 1:
|
||||
s += f'*{self.scale}'
|
||||
needPlus = True
|
||||
if self.disp:
|
||||
if needPlus:
|
||||
s += '+'
|
||||
s += hex(self.disp)
|
||||
s += ']'
|
||||
return s
|
||||
|
||||
# Xbyak uses 'ptr' when it can be automatically detected, so we should consider this in the comparison.
|
||||
def __eq__(self, rhs):
|
||||
if self.broadcast > rhs.broadcast:
|
||||
return rhs == self
|
||||
assert(self.broadcast <= rhs.broadcast)
|
||||
if self.broadcast == 0:
|
||||
if rhs.broadcast > 0: return False
|
||||
# Xbyak uses 'ptr' when it is automatically detected.
|
||||
# Therefore, the comparison is true if 'ptr' (i.e., size = 0) is used.
|
||||
if 0 < self.size and 0 < rhs.size and self.size != rhs.size: return False
|
||||
if self.broadcast == 1: # _b
|
||||
if rhs.broadcast == 1: # compare ptr_b with ptr_b
|
||||
if self.size != rhs.size:
|
||||
return False
|
||||
if self.size > 0 and (self.size != rhs.size * rhs.broadcast): # compare ptr_b with {1toX}
|
||||
return False
|
||||
else:
|
||||
if self.broadcast != rhs.broadcast: return False
|
||||
r = self.base == rhs.base and self.index == rhs.index and self.scale == rhs.scale and self.disp == rhs.disp
|
||||
return r
|
||||
|
||||
def parseBroadcast(s):
|
||||
if '_b' in s:
|
||||
return (s.replace('_b', ''), 1)
|
||||
r = re.search(r'({1to(\d+)})', s)
|
||||
if not r:
|
||||
return (s, 0)
|
||||
return (s.replace(r.group(1), ''), int(r.group(2)))
|
||||
|
||||
def parseMemory(s, broadcast=0):
|
||||
org_s = s
|
||||
|
||||
s = s.replace(' ', '').lower()
|
||||
|
||||
size = 0
|
||||
base = index = None
|
||||
scale = 0
|
||||
disp = 0
|
||||
|
||||
if broadcast == 0:
|
||||
(s, broadcast) = parseBroadcast(s)
|
||||
|
||||
# Parse size
|
||||
for i in range(len(g_sizeTbl)):
|
||||
w = g_sizeTbl[i]
|
||||
if s.startswith(w):
|
||||
size = 1<<i
|
||||
s = s[len(w):]
|
||||
break
|
||||
|
||||
if size == 0:
|
||||
for i in range(len(g_xedSizeTbl)):
|
||||
w = g_xedSizeTbl[i]
|
||||
if s.startswith(w):
|
||||
size = 1<<(i+4)
|
||||
s = s[len(w):]
|
||||
break
|
||||
|
||||
# Remove 'ptr' if present
|
||||
if s.startswith('ptr'):
|
||||
s = s[3:]
|
||||
|
||||
if s.startswith('_b'):
|
||||
broadcast = 1
|
||||
s = s[2:]
|
||||
|
||||
# Extract the content inside brackets
|
||||
r = re.match(r'\[(.*)\]', s)
|
||||
if not r:
|
||||
raise ValueError(f'bad format {org_s=}')
|
||||
|
||||
# check rip
|
||||
expr = r.group(1)
|
||||
r = re.match(r'rip\+([a-fx0-9]+)', expr)
|
||||
if r:
|
||||
b = 16 if r.group(1).startswith('0x') else 10
|
||||
disp = int(r.group(1), b)
|
||||
return Memory(size, base, index, scale, disp, broadcast, True)
|
||||
|
||||
# Parse components
|
||||
elems = re.findall(r'([a-z0-9]+)(?:\*([0-9]+))?|([+-])', expr)
|
||||
|
||||
for i, e in enumerate(elems):
|
||||
if e[2]: # This is a '+' or '-' sign
|
||||
continue
|
||||
|
||||
if e[0] in g_regTbl:
|
||||
if base is None and (not e[1] or int(e[1]) == 1):
|
||||
base = e[0]
|
||||
elif index is None:
|
||||
index = e[0]
|
||||
scale = int(e[1]) if e[1] else 1
|
||||
else:
|
||||
raise ValueError(f'bad format2 {s=}')
|
||||
else:
|
||||
sign = -1 if i > 0 and elems[i-1][2] == '-' else 1
|
||||
b = 16 if e[0].startswith('0x') else 10
|
||||
disp += sign * int(e[0], b)
|
||||
|
||||
return Memory(size, base, index, scale, disp, broadcast)
|
||||
|
||||
def normalizeName(s):
|
||||
if s == 'sal':
|
||||
return 'shl'
|
||||
return s
|
||||
|
||||
class Nmemonic:
|
||||
def __init__(self, name, args=[], attrs=[]):
|
||||
self.name = name
|
||||
self.args = args
|
||||
self.attrs = attrs.sort()
|
||||
def __str__(self):
|
||||
s = f'{self.name}('
|
||||
for i in range(len(self.args)):
|
||||
if i > 0:
|
||||
s += ', '
|
||||
s += str(self.args[i])
|
||||
if i == 0 and self.attrs:
|
||||
for e in self.attrs:
|
||||
s += f'|{e}'
|
||||
s += ');'
|
||||
return s
|
||||
def __eq__(self, rhs):
|
||||
return normalizeName(self.name) == normalizeName(rhs.name) and self.args == rhs.args and self.attrs == rhs.attrs
|
||||
|
||||
def parseNmemonic(s):
|
||||
args = []
|
||||
attrs = []
|
||||
|
||||
# remove Xbyak::{Evex,Vex}Encoding
|
||||
r = re.search(r'(,[^,]*Encoding)', s)
|
||||
if r:
|
||||
s = s.replace(r.group(1), '')
|
||||
|
||||
(s, broadcast) = parseBroadcast(s)
|
||||
|
||||
# replace xm0 with xmm0
|
||||
while True:
|
||||
r = re.search(r'([xyz])m(\d\d?)', s)
|
||||
if not r:
|
||||
break
|
||||
s = s.replace(r.group(0), r.group(1) + 'mm' + r.group(2))
|
||||
|
||||
# check 'zmm0{k7}'
|
||||
r = re.search(r'({k[1-7]})', s)
|
||||
if r:
|
||||
idx = int(r.group(1)[2])
|
||||
attrs.append(g_maskTbl[idx-1])
|
||||
s = s.replace(r.group(1), '')
|
||||
# check 'zmm0|k7'
|
||||
r = re.search(r'(\|\s*k[1-7])', s)
|
||||
if r:
|
||||
idx = int(r.group(1)[-1])
|
||||
attrs.append(g_maskTbl[idx-1])
|
||||
s = s.replace(r.group(1), '')
|
||||
|
||||
s = s.translate(g_replaceChar)
|
||||
|
||||
# reconstruct memory string
|
||||
v = []
|
||||
inMemory = False
|
||||
for e in s.split():
|
||||
if inMemory:
|
||||
v[-1] += e
|
||||
if ']' in e:
|
||||
inMemory = False
|
||||
else:
|
||||
v.append(e)
|
||||
if e in g_sizeTbl or e in g_xedSizeTbl or e.startswith('ptr'):
|
||||
v[-1] += ' ' # to avoid 'byteptr'
|
||||
if ']' not in v[-1]:
|
||||
inMemory = True
|
||||
|
||||
name = v[0]
|
||||
for e in v[1:]:
|
||||
if e.startswith('0x'):
|
||||
args.append(int(e, 16))
|
||||
elif e[0] in '0123456789':
|
||||
args.append(int(e))
|
||||
elif e in g_attrTbl:
|
||||
attrs.append(Attr(e))
|
||||
elif e in g_attrXedTbl:
|
||||
attrs.append(Attr(g_attrTbl[g_attrXedTbl.index(e)]))
|
||||
elif e in g_regTbl:
|
||||
args.append(Reg(e))
|
||||
# xed special format : xmm8+3
|
||||
elif e[:-2] in g_xmmTbl and e.endswith('+3'):
|
||||
args.append(Reg(e[:-2]))
|
||||
# tmm?+1
|
||||
elif e[:-2] in g_tmmTbl and e.endswith('+1'):
|
||||
args.append(Reg(e[:-2]))
|
||||
else:
|
||||
args.append(parseMemory(e, broadcast))
|
||||
return Nmemonic(name, args, attrs)
|
||||
|
||||
def loadFile(name):
|
||||
with open(name) as f:
|
||||
r = []
|
||||
for line in f.read().split('\n'):
|
||||
if line:
|
||||
if line[0] == '#' or line.startswith('//'):
|
||||
continue
|
||||
r.append(line)
|
||||
return r
|
||||
|
||||
# remove top 5 information
|
||||
# e.g. XDIS 0: AVX512 AVX512EVEX 62F1E91858CB vaddpd ymm1{rne-sae}, ymm2, ymm3
|
||||
def removeExtraInfo(s):
|
||||
v = s.split()
|
||||
return ' '.join(v[5:])
|
||||
|
||||
def run(cppText, xedText):
|
||||
cpp = loadFile(cppText)
|
||||
xed = loadFile(xedText)
|
||||
n = len(cpp)
|
||||
if n != len(xed):
|
||||
raise Exception(f'different line {n} {len(xed)}')
|
||||
|
||||
for i in range(n):
|
||||
line1 = cpp[i]
|
||||
line2 = removeExtraInfo(xed[i])
|
||||
m1 = parseNmemonic(line1)
|
||||
m2 = parseNmemonic(line2)
|
||||
|
||||
assertEqual(m1, m2, f'{i+1}')
|
||||
print('run ok', n)
|
||||
|
||||
def assertEqualStr(a, b, msg=None):
|
||||
if str(a) != str(b):
|
||||
raise Exception(f'assert fail {msg}:', str(a), str(b))
|
||||
|
||||
def assertEqual(a, b, msg=None):
|
||||
if a != b:
|
||||
raise Exception(f'assert fail {msg}:', str(a), str(b))
|
||||
|
||||
def MemoryTest():
|
||||
tbl = [
|
||||
(Memory(0, rax), 'ptr [rax]'),
|
||||
(Memory(4, rax), 'dword [rax]'),
|
||||
(Memory(8, rax, rcx), 'qword [rax+rcx]'),
|
||||
(Memory(8, rax, rcx, 4), 'qword [rax+rcx*4]'),
|
||||
(Memory(8, None, rcx, 4), 'qword [rcx*4]'),
|
||||
(Memory(8, rax, None, 0, 5), 'qword [rax+0x5]'),
|
||||
(Memory(8, None, None, 0, 255), 'qword [0xff]'),
|
||||
(Memory(0, r8, r9, 1, 32), 'ptr [r8+r9+0x20]'),
|
||||
]
|
||||
for (m, expected) in tbl:
|
||||
assertEqualStr(m, expected)
|
||||
|
||||
assertEqual(Memory(16, rax), Memory(0, rax))
|
||||
|
||||
def parseMemoryTest():
|
||||
print('parseMemoryTest')
|
||||
tbl = [
|
||||
('[]', Memory()),
|
||||
('[rax]', Memory(0, rax)),
|
||||
('ptr[rax]', Memory(0, rax)),
|
||||
('ptr_b[rax]', Memory(0, rax, broadcast=1)),
|
||||
('dword[rbx]', Memory(4, rbx)),
|
||||
('xword ptr[rcx]', Memory(16, rcx)),
|
||||
('xmmword ptr[rcx]', Memory(16, rcx)),
|
||||
('xword ptr[rdx*8]', Memory(16, None, rdx, 8)),
|
||||
('[12345]', Memory(0, None, None, 0, 12345)),
|
||||
('[0x12345]', Memory(0, None, None, 0, 0x12345)),
|
||||
('yword [rax+rdx*4]', Memory(32, rax, rdx, 4)),
|
||||
('zword [rax+rdx*4+123]', Memory(64, rax, rdx, 4, 123)),
|
||||
('xword_b [rax]', Memory(16, rax, None, 0, 0, 1)),
|
||||
('dword [rax]{1to4}', Memory(16, rax, None, 0, 0, 1)),
|
||||
('yword_b [rax]', Memory(32, rax, None, 0, 0, 1)),
|
||||
('dword [rax]{1to8}', Memory(32, rax, None, 0, 0, 1)),
|
||||
]
|
||||
for (s, expected) in tbl:
|
||||
my = parseMemory(s)
|
||||
assertEqualStr(my, expected)
|
||||
|
||||
print('compare test')
|
||||
tbl = [
|
||||
('ptr[rax]', 'dword[rax]', True),
|
||||
('byte[rax]', 'dword[rax]', False),
|
||||
('yword_b[rax]', 'dword [rax]{1to8}', True),
|
||||
('yword_b[rax]', 'word [rax]{1to16}', True),
|
||||
('zword_b[rax]', 'word [rax]{1to32}', True),
|
||||
('zword_b[rax]', 'word [rax]{1to16}', False),
|
||||
('dword [rax]{1to2}', 'dword [rax] {1to4}', False),
|
||||
('zword_b[rax]', 'xword_b [rax]', False),
|
||||
('ptr_b[rax]', 'word [rax]{1to32}', True), # ignore size
|
||||
]
|
||||
for (lhs, rhs, eq) in tbl:
|
||||
a = parseMemory(lhs)
|
||||
b = parseMemory(rhs)
|
||||
if eq:
|
||||
assertEqual(a, b)
|
||||
assertEqual(b, a)
|
||||
else:
|
||||
assert(parseMemory(lhs) != parseMemory(rhs))
|
||||
|
||||
def parseNmemonicTest():
|
||||
print('parseNmemonicTest')
|
||||
tbl = [
|
||||
('vaddpd(ymm1, ymm2, ymm3 |T_rn_sae);', Nmemonic('vaddpd', [ymm1, ymm2, ymm3], [T_rn_sae])),
|
||||
('vaddpd ymm1{rne-sae}, ymm2, ymm3', Nmemonic('vaddpd', [ymm1, ymm2, ymm3], [T_rn_sae])),
|
||||
('mov(rax, dword ptr [rcx + rdx * 8 ] );', Nmemonic('mov', [rax, Memory(4, rcx, rdx, 8)])),
|
||||
('mov(rax, ptr [rcx + rdx * 8 ] );', Nmemonic('mov', [rax, Memory(0, rcx, rdx, 8)])),
|
||||
('vcmppd(k1, ymm2, ymm3 |T_sae, 3);', Nmemonic('vcmppd', [k1, ymm2, ymm3, 3], [T_sae])),
|
||||
('vcmppd k1{sae}, ymm2, ymm3, 0x3', Nmemonic('vcmppd', [k1, ymm2, ymm3, 3], [T_sae])),
|
||||
('v4fmaddps zmm1, zmm8+3, xmmword ptr [rdx+0x40]', Nmemonic('v4fmaddps', [zmm1, zmm8, Memory(16, rdx, None, 0, 0x40)])),
|
||||
('vp4dpwssd zmm23{k7}{z}, zmm1+3, xmmword ptr [rax+0x40]', Nmemonic('vp4dpwssd', [zmm23, zmm1, Memory(16, rax, None, 0, 0x40)], [k7, T_z])),
|
||||
('v4fnmaddps(zmm5 | k5, zmm2, ptr [rcx + 0x80]);', Nmemonic('v4fnmaddps', [zmm5, zmm2, Memory(0, rcx, None, 0, 0x80)], [k5])),
|
||||
('vpcompressw(zmm30 | k2 |T_z, zmm1);', Nmemonic('vpcompressw', [zmm30, zmm1], [k2, T_z])),
|
||||
('vpcompressw zmm30{k2}{z}, zmm1', Nmemonic('vpcompressw', [zmm30, zmm1], [k2, T_z])),
|
||||
('vpshldw(xmm9|k3|T_z, xmm2, ptr [rax + 0x40], 5);', Nmemonic('vpshldw', [xmm9, xmm2, Memory(0, rax, None, 0, 0x40), 5], [k3, T_z])),
|
||||
('vpshrdd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);', Nmemonic('vpshrdd', [xmm5, xmm2, Memory(0, rax, None, 0, 0x40, 1), 5], [k3, T_z])),
|
||||
('vpshrdd xmm5{k3}{z}, xmm2, dword ptr [rax+0x40]{1to4}, 0x5', Nmemonic('vpshrdd', [xmm5, xmm2, Memory(0, rax, None, 0, 0x40, 4), 5], [k3, T_z])),
|
||||
('vcmpph(k1, xmm15, ptr[rax+64], 1);', Nmemonic('vcmpph', [k1, xmm15, Memory(0, rax, None, 0, 64), 1])),
|
||||
]
|
||||
for (s, expected) in tbl:
|
||||
e = parseNmemonic(s)
|
||||
assertEqual(e, expected)
|
||||
|
||||
def test():
|
||||
print('test start')
|
||||
MemoryTest()
|
||||
parseMemoryTest()
|
||||
parseNmemonicTest()
|
||||
print('test end')
|
||||
|
||||
def main():
|
||||
if len(sys.argv) == 2 and sys.argv[1] == 'test':
|
||||
test()
|
||||
elif len(sys.argv) == 3:
|
||||
run(sys.argv[1], sys.argv[2])
|
||||
else:
|
||||
print(f'{__name__} <cpp-text> <xed-text> # compare cpp-text and xed-text generated by xed')
|
||||
print(f'{__name__} test # for test')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
+29
@@ -0,0 +1,29 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
XED=${XED:=xed}
|
||||
XED_OPT=${XED_OPT:=-64 -set PREFETCHIT 1 -set PREFETCHRST 1}
|
||||
#XED_OPT=${XED_OPT:=-64 -chip-check FUTURE}
|
||||
CXX=${CXX:=g++}
|
||||
CFLAGS_USER=${CFLAGS}
|
||||
CFLAGS_WARN="$(cat CFLAGS_WARN.cfg)"
|
||||
PYTHON=${PYTHON:=python3}
|
||||
echo $XED
|
||||
|
||||
if [ $# -ne 1 ]; then
|
||||
echo "./test_by_xed.sh <xbyak-cpp>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
TARGET=$1
|
||||
|
||||
CFLAGS="$CFLAGS_USER $CFLAGS_WARN -I ../"
|
||||
|
||||
echo "test:" $TARGET
|
||||
cp $TARGET tmp.cpp
|
||||
$CXX $CFLAGS test_by_xed.cpp -o test_by_xed
|
||||
./test_by_xed || (echo "ERR test_by_xed"; exit 1)
|
||||
echo "$XED ${XED_OPT} -ir bin > out.txt"
|
||||
$XED ${XED_OPT} -ir bin > out.txt || (echo "ERR xed"; exit 1)
|
||||
$PYTHON test_by_xed.py $TARGET out.txt || (echo "ERR test_by_xed.py"; exit 1)
|
||||
|
||||
+5
@@ -0,0 +1,5 @@
|
||||
set TARGETS=old.txt bf16.txt misc.txt convert.txt minmax.txt saturation.txt amx.txt apx.txt comp.txt
|
||||
for %%f in (%TARGETS%) do (
|
||||
echo %%f
|
||||
call test_by_xed.bat dataset\%%f
|
||||
)
|
||||
Vendored
+4
@@ -0,0 +1,4 @@
|
||||
call set_opt
|
||||
bmake -f Makefile.win all
|
||||
cl -I../ -I./ -DXBYAK_TEST jmp.cpp %OPT% /Od /Zi
|
||||
jmp
|
||||
Vendored
+4
@@ -0,0 +1,4 @@
|
||||
call set_opt
|
||||
bmake -f Makefile.win all
|
||||
cl -I../ -I./ -DXBYAK_TEST %FILE%.cpp %OPT% /Od /Zi
|
||||
%FILE%
|
||||
Vendored
+78
@@ -0,0 +1,78 @@
|
||||
#if defined(_MSC_VER) && (_MSC_VER <= 1200)
|
||||
#pragma warning(disable:4514)
|
||||
#pragma warning(disable:4786)
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "../../include.mie/mie_thread.h"
|
||||
#include "xbyak/xbyak.h"
|
||||
|
||||
class WriteMMX : public Xbyak::CodeGenerator {
|
||||
public:
|
||||
WriteMMX()
|
||||
{
|
||||
#ifdef XBYAK32
|
||||
mov(ecx, ptr [esp + 4]);
|
||||
#endif
|
||||
movd(mm0, ecx);
|
||||
ret();
|
||||
}
|
||||
void (*set() const)(int x) { return (void (*)(int x))getCode(); }
|
||||
};
|
||||
|
||||
class ReadMMX : public Xbyak::CodeGenerator {
|
||||
public:
|
||||
ReadMMX()
|
||||
{
|
||||
movd(eax, mm0);
|
||||
ret();
|
||||
}
|
||||
int (*get() const)() { return (int (*)())getCode(); }
|
||||
};
|
||||
|
||||
class Test : public MIE::ThreadBase<Test> {
|
||||
int n_;
|
||||
public:
|
||||
Test(int n)
|
||||
: n_(n)
|
||||
{
|
||||
}
|
||||
void threadEntry()
|
||||
{
|
||||
printf("n=%d\n", n_);
|
||||
WriteMMX w;
|
||||
w.set()(n_);
|
||||
ReadMMX r;
|
||||
for (;;) {
|
||||
int b = r.get()();
|
||||
printf("b=%d\n", b);
|
||||
if (b != n_) {
|
||||
printf("mm0 has changed!\n");
|
||||
}
|
||||
MIE::MIE_Sleep(1000);
|
||||
}
|
||||
}
|
||||
void stopThread() { }
|
||||
};
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
#ifdef XBYAK32
|
||||
puts("32bit");
|
||||
#else
|
||||
puts("64bit");
|
||||
#endif
|
||||
try {
|
||||
int n = atoi(argc == 1 ? "1223" : argv[1]);
|
||||
Test test0(n), test1(n + 1);
|
||||
test0.beginThread();
|
||||
test1.beginThread();
|
||||
|
||||
test0.joinThread();
|
||||
test1.joinThread();
|
||||
} catch (std::exception& e) {
|
||||
printf("ERR:%s\n", e.what());
|
||||
} catch (...) {
|
||||
printf("unknown error\n");
|
||||
}
|
||||
}
|
||||
Vendored
+46
@@ -0,0 +1,46 @@
|
||||
@echo off
|
||||
set FILTER=cat
|
||||
set Y=0
|
||||
if /i "%1"=="Y" (
|
||||
set Y=1
|
||||
set EXE=yasm.exe
|
||||
set OPT2=-DUSE_YASM -DXBYAK32
|
||||
set OPT3=win32
|
||||
) else if /i "%1"=="64" (
|
||||
set EXE=nasm.exe
|
||||
set OPT2=-DXBYAK64
|
||||
set OPT3=win64
|
||||
set FILTER=normalize_prefix
|
||||
) else if /i "%1"=="Y64" (
|
||||
set Y=1
|
||||
set EXE=yasm.exe
|
||||
set OPT2=-DUSE_YASM -DXBYAK64
|
||||
set OPT3=win64
|
||||
set FILTER=normalize_prefix
|
||||
) else if /i "%1"=="noexcept" (
|
||||
set EXE=nasm.exe
|
||||
set OPT2=-DXBYAK32 -DXBYAK_NO_EXCEPTION
|
||||
set OPT3=win32
|
||||
) else (
|
||||
set EXE=nasm.exe
|
||||
set OPT2=-DXBYAK32
|
||||
set OPT3=win32
|
||||
)
|
||||
call set_opt
|
||||
bmake -f Makefile.win all
|
||||
echo cl -I../ make_nm.cpp %OPT% %OPT2% /EHs
|
||||
cl -I../ make_nm.cpp %OPT% %OPT2% /EHs
|
||||
make_nm > a.asm
|
||||
rm -rf a.lst
|
||||
echo %EXE% -f %OPT3% -l a.lst a.asm
|
||||
%EXE% -f %OPT3% -l a.lst a.asm
|
||||
rem connect "?????-" and "??"
|
||||
if /i "%Y%"=="1" (
|
||||
awk "NR > 1 {if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
|
||||
) else (
|
||||
awk "{if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
|
||||
)
|
||||
make_nm jit > nm.cpp
|
||||
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
|
||||
nm_frame |%FILTER% > x.lst
|
||||
diff -wb x.lst ok.lst && echo "ok"
|
||||
+66
@@ -0,0 +1,66 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
FILTER=cat
|
||||
CXX=${CXX:=g++}
|
||||
CFLAGS_USER=${CFLAGS}
|
||||
CFLAGS_WARN="$(cat CFLAGS_WARN.cfg)"
|
||||
|
||||
case $1 in
|
||||
Y)
|
||||
echo "yasm(32bit)"
|
||||
EXE=yasm
|
||||
OPT2="-DUSE_YASM -DXBYAK32"
|
||||
OPT3=win32
|
||||
;;
|
||||
64)
|
||||
echo "nasm(64bit)"
|
||||
EXE=nasm
|
||||
OPT2=-DXBYAK64
|
||||
OPT3=win64
|
||||
FILTER=./normalize_prefix
|
||||
;;
|
||||
Y64)
|
||||
echo "yasm(64bit)"
|
||||
EXE=yasm
|
||||
OPT2="-DUSE_YASM -DXBYAK64"
|
||||
OPT3=win64
|
||||
FILTER=./normalize_prefix
|
||||
;;
|
||||
avx512)
|
||||
echo "nasm(64bit) + avx512"
|
||||
EXE=nasm
|
||||
OPT2="-DXBYAK64 -DUSE_AVX512"
|
||||
OPT3=win64
|
||||
FILTER=./normalize_prefix
|
||||
;;
|
||||
noexcept)
|
||||
echo "nasm(32bit) without exception"
|
||||
EXE=nasm
|
||||
OPT2="-DXBYAK32 -DXBYAK_NO_EXCEPTION"
|
||||
OPT3=win32
|
||||
;;
|
||||
*)
|
||||
echo "nasm(32bit)"
|
||||
EXE=nasm
|
||||
OPT2=-DXBYAK32
|
||||
OPT3=win32
|
||||
;;
|
||||
esac
|
||||
|
||||
CFLAGS="$CFLAGS_USER $CFLAGS_WARN -g -I../ -I./ $OPT2"
|
||||
echo "compile make_nm.cpp with $CFLAGS"
|
||||
$CXX $CFLAGS make_nm.cpp -o make_nm
|
||||
|
||||
./make_nm > a.asm
|
||||
echo "asm"
|
||||
$EXE -f$OPT3 a.asm -l a.lst
|
||||
awk '$3 != "1+1" {printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst
|
||||
|
||||
echo "xbyak"
|
||||
./make_nm jit > nm.cpp
|
||||
echo "compile nm_frame.cpp"
|
||||
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
||||
./nm_frame | $FILTER > x.lst
|
||||
diff -bB ok.lst x.lst && echo "ok"
|
||||
Vendored
+11
@@ -0,0 +1,11 @@
|
||||
@echo off
|
||||
echo *** nasm(32bit) ***
|
||||
call test_nm
|
||||
echo *** yasm(32bit) ***
|
||||
call test_nm Y
|
||||
echo *** nasm(64bit) ***
|
||||
call test_nm 64
|
||||
echo *** yasm(64bit) ***
|
||||
call test_nm Y64
|
||||
|
||||
call test_avx_all
|
||||
Reference in New Issue
Block a user