From 7891b6196a1fca7c984879e5ad3e9cce9d81f1e3 Mon Sep 17 00:00:00 2001 From: dreamstalker Date: Mon, 18 May 2015 17:04:00 +0400 Subject: [PATCH] Implemented delta JIT Implemented name->index map for sv.model_precache[] Fixed CPPUnitLite tests runner Source code optimizations for optimal usage of precompiled headers on linux --- .gitignore | 1 + .../include/cppunitlite/GradleAdapter.h | 1 + dep/cppunitlite/src/GradleAdapter.cpp | 48 +- dep/cppunitlite/src/TestResult.cpp | 2 + rehlds/engine/delta.cpp | 32 +- rehlds/engine/delta.h | 33 +- rehlds/engine/delta_jit.cpp | 332 + rehlds/engine/delta_jit.h | 54 + rehlds/engine/filesystem.cpp | 6 - rehlds/engine/filesystem_.h | 6 + rehlds/engine/host_cmd.cpp | 6 - rehlds/engine/host_cmd.h | 6 + rehlds/engine/mathlib.cpp | 3 - rehlds/engine/net_ws.cpp | 4 - rehlds/engine/net_ws.h | 4 +- rehlds/engine/pmovetst.cpp | 7 +- rehlds/engine/pr_cmds.cpp | 24 + rehlds/engine/server.h | 15 + rehlds/engine/sv_main.cpp | 54 +- rehlds/engine/sv_move.cpp | 2 - rehlds/engine/sv_move.h | 2 + rehlds/engine/sys_dll.cpp | 6 +- rehlds/engine/sys_dll2.cpp | 6 - rehlds/engine/sys_dll2.h | 6 + rehlds/engine/sys_engine.cpp | 4 - rehlds/engine/sys_engine.h | 4 + rehlds/engine/tmessage.cpp | 6 - rehlds/engine/tmessage.h | 6 + rehlds/engine/unicode_strtools.cpp | 2 +- rehlds/engine/world.cpp | 6 +- rehlds/hookers/engine.h | 4 +- rehlds/hookers/main_swds.cpp | 1 + rehlds/hookers/rehlds_debug.cpp | 16 + rehlds/hookers/rehlds_debug.h | 2 + rehlds/msvc/ReHLDS.vcxproj | 67 +- rehlds/msvc/ReHLDS.vcxproj.filters | 9 + rehlds/public/rehlds/crc32.cpp | 8 + rehlds/public/rehlds/crc32.h | 2 + rehlds/public/rehlds/osconfig.h | 6 + rehlds/public/rehlds/static_map.h | 53 + rehlds/rehlds/jitasm.h | 8967 +++++++++++++++++ rehlds/unittests/delta_tests.cpp | 362 + rehlds/unittests/mathlib_tests.cpp | 34 +- 43 files changed, 10094 insertions(+), 125 deletions(-) create mode 100644 rehlds/engine/delta_jit.cpp create mode 100644 rehlds/engine/delta_jit.h create mode 100644 rehlds/rehlds/jitasm.h create mode 100644 rehlds/unittests/delta_tests.cpp diff --git a/.gitignore b/.gitignore index a2d9bd4..d8fcef6 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,5 @@ rehlds/version/appversion.h rehlds/msvc/PublishPath*.txt rehlds/_rehldsTestImg +rehlds/_dev publish diff --git a/dep/cppunitlite/include/cppunitlite/GradleAdapter.h b/dep/cppunitlite/include/cppunitlite/GradleAdapter.h index 1e26c65..739fdaf 100644 --- a/dep/cppunitlite/include/cppunitlite/GradleAdapter.h +++ b/dep/cppunitlite/include/cppunitlite/GradleAdapter.h @@ -4,6 +4,7 @@ class GradleAdapter { public: int writeAllTestsInfoToFile(const char* fname); int runTest(const char* groupName, const char* testName); + int runGroup(const char* groupName); int runAllTests(); int testsEntryPoint(int argc, char* argv[]); }; diff --git a/dep/cppunitlite/src/GradleAdapter.cpp b/dep/cppunitlite/src/GradleAdapter.cpp index 2055fb2..9c6cc23 100644 --- a/dep/cppunitlite/src/GradleAdapter.cpp +++ b/dep/cppunitlite/src/GradleAdapter.cpp @@ -35,7 +35,10 @@ int GradleAdapter::writeAllTestsInfoToFile(const char* fname) { int GradleAdapter::runTest(const char* groupName, const char* testName) { Test* curTest = TestRegistry::getFirstTest(); - while (curTest != NULL && strcmp(groupName, curTest->getGroup()) && strcmp(testName, curTest->getGroup())) { + while (curTest != NULL) { + if (!strcmp(groupName, curTest->getGroup()) && !strcmp(testName, curTest->getName())) { + break; + } curTest = curTest->getNext(); } @@ -55,11 +58,18 @@ int GradleAdapter::runTest(const char* groupName, const char* testName) { } } -int GradleAdapter::runAllTests() { +int GradleAdapter::runGroup(const char* groupName) { Test* curTest = TestRegistry::getFirstTest(); + int ranTests = 0; while (curTest != NULL) { + if (strcmp(groupName, curTest->getGroup())) { + curTest = curTest->getNext(); + continue; + } + TestResult result; curTest->run(result); + ranTests++; if (result.getFailureCount()) { return 1; @@ -68,7 +78,31 @@ int GradleAdapter::runAllTests() { curTest = curTest->getNext(); } - printf("There were no test failures\n"); + if (ranTests == 0) { + printf("No tests with group='%s' found\n", groupName); + return 2; + } + + printf("There were no test failures; Tests executed: %d\n", ranTests); + return 0; +} + +int GradleAdapter::runAllTests() { + Test* curTest = TestRegistry::getFirstTest(); + int ranTests = 0; + while (curTest != NULL) { + TestResult result; + curTest->run(result); + ranTests++; + + if (result.getFailureCount()) { + return 1; + } + + curTest = curTest->getNext(); + } + + printf("There were no test failures; Tests executed: %d\n", ranTests); return 0; } @@ -93,6 +127,14 @@ int GradleAdapter::testsEntryPoint(int argc, char* argv[]) { return runTest(argv[2], argv[3]); } + if (!strcmp(argv[1], "-runGroup")) { + if (argc != 3) { + printf("-runGroup requires group name\n"); + } + + return runGroup(argv[2]); + } + printf("Bad argument specified\n"); return 1; } diff --git a/dep/cppunitlite/src/TestResult.cpp b/dep/cppunitlite/src/TestResult.cpp index d902124..e593767 100644 --- a/dep/cppunitlite/src/TestResult.cpp +++ b/dep/cppunitlite/src/TestResult.cpp @@ -21,6 +21,7 @@ void TestResult::addFailure (const Failure& failure) { std::stringstream ss; ss << "Failure in test '" << failure.testName << "' :" << failure.message; std::cout << ss.str() << std::endl; + std::cout.flush(); failureCount++; } @@ -33,4 +34,5 @@ void TestResult::testsEnded () { ss << "There were no test failures"; } std::cout << ss.str() << std::endl; + std::cout.flush(); } diff --git a/rehlds/engine/delta.cpp b/rehlds/engine/delta.cpp index 6364618..c40d97a 100644 --- a/rehlds/engine/delta.cpp +++ b/rehlds/engine/delta.cpp @@ -31,19 +31,6 @@ #ifndef Defines_and_Variables_region -#define DT_BYTE BIT(0) // A byte -#define DT_SHORT BIT(1) // 2 byte field -#define DT_FLOAT BIT(2) // A floating point field -#define DT_INTEGER BIT(3) // 4 byte integer -#define DT_ANGLE BIT(4) // A floating point angle -#define DT_TIMEWINDOW_8 BIT(5) // A floating point timestamp relative to server time -#define DT_TIMEWINDOW_BIG BIT(6) // A floating point timestamp relative to server time (with more precision and custom multiplier) -#define DT_STRING BIT(7) // A null terminated string, sent as 8 byte chars -#define DT_SIGNED BIT(31) // sign modificator - -#define FDT_MARK BIT(0) // Delta mark for sending - - /* <23bb1> ../engine/delta.c:47 */ typedef struct delta_link_s { @@ -520,6 +507,15 @@ void DELTA_MarkSendFields(unsigned char *from, unsigned char *to, delta_t *pFiel if (*(uint32 *)&from[pTest->fieldOffset] != *(uint32 *)&to[pTest->fieldOffset]) pTest->flags |= FDT_MARK; break; + +// don't use multiplier when checking, to increase performance +#ifdef REHLDS_FIXES + case DT_TIMEWINDOW_8: + case DT_TIMEWINDOW_BIG: + if (*(uint32 *)&from[pTest->fieldOffset] != *(uint32 *)&to[pTest->fieldOffset]) + pTest->flags |= FDT_MARK; + break; +#else case DT_TIMEWINDOW_8: if ((int32)(*(float *)&from[pTest->fieldOffset] * 100.0) != (int32)(*(float *)&to[pTest->fieldOffset] * 100.0)) pTest->flags |= FDT_MARK; @@ -528,6 +524,7 @@ void DELTA_MarkSendFields(unsigned char *from, unsigned char *to, delta_t *pFiel if ((int32)(*(float *)&from[pTest->fieldOffset] * 1000.0) != (int32)(*(float *)&to[pTest->fieldOffset] * 1000.0)) pTest->flags |= FDT_MARK; break; +#endif case DT_STRING: st1 = (char*)&from[pTest->fieldOffset]; st2 = (char*)&to[pTest->fieldOffset]; @@ -691,12 +688,19 @@ int DELTA_CheckDelta(unsigned char *from, unsigned char *to, delta_t *pFields) } /* <247f5> ../engine/delta.c:949 */ -int DELTA_WriteDelta(unsigned char *from, unsigned char *to, qboolean force, delta_t *pFields, void(*callback)(void)) +NOINLINE int DELTA_WriteDelta(unsigned char *from, unsigned char *to, qboolean force, delta_t *pFields, void(*callback)(void)) { int sendfields; + +#if defined(REHLDS_OPT_PEDANTIC) || defined(REHLDS_FIXES) + DELTAJit_ClearAndMarkSendFields(from, to, pFields); +#else DELTA_ClearFlags(pFields); DELTA_MarkSendFields(from, to, pFields); +#endif + sendfields = DELTA_CountSendFields(pFields); + _DELTA_WriteDelta(from, to, force, pFields, callback, sendfields); return sendfields; } diff --git a/rehlds/engine/delta.h b/rehlds/engine/delta.h index 26282be..de253c3 100644 --- a/rehlds/engine/delta.h +++ b/rehlds/engine/delta.h @@ -25,15 +25,22 @@ * version. * */ - -#ifndef DELTA_H -#define DELTA_H -#ifdef _WIN32 #pragma once -#endif #include "maintypes.h" +#define DT_BYTE BIT(0) // A byte +#define DT_SHORT BIT(1) // 2 byte field +#define DT_FLOAT BIT(2) // A floating point field +#define DT_INTEGER BIT(3) // 4 byte integer +#define DT_ANGLE BIT(4) // A floating point angle +#define DT_TIMEWINDOW_8 BIT(5) // A floating point timestamp relative to server time +#define DT_TIMEWINDOW_BIG BIT(6) // A floating point timestamp relative to server time (with more precision and custom multiplier) +#define DT_STRING BIT(7) // A null terminated string, sent as 8 byte chars +#define DT_SIGNED BIT(31) // sign modificator + +#define FDT_MARK BIT(0) // Delta mark for sending + typedef struct delta_s delta_t; @@ -61,6 +68,8 @@ typedef struct delta_description_s delta_stats_t stats; } delta_description_t; +class CDeltaJit; + /* ../engine/delta.h:78 */ typedef struct delta_s { @@ -69,6 +78,10 @@ typedef struct delta_s char conditionalencodename[32]; encoder_t conditionalencode; delta_description_t *pdd; + +#ifdef REHLDS_FIXES + CDeltaJit* jit; +#endif } delta_t; /* <23b2a> ../engine/delta.h:104 */ @@ -88,6 +101,14 @@ typedef struct delta_definition_list_s delta_definition_list_t; typedef struct delta_registry_s delta_registry_t; typedef struct delta_info_s delta_info_t; +/* ../engine/sv_main.c:102 */ +typedef struct delta_info_s +{ + delta_info_s *next; + char *name; + char *loadfile; + delta_t *delta; +} delta_info_t; #ifdef HOOK_ENGINE #define g_defs (*pg_defs) @@ -145,5 +166,3 @@ void DELTA_PrintStats(const char *name, delta_t *p); void DELTA_DumpStats_f(void); void DELTA_Init(void); void DELTA_Shutdown(void); - -#endif // DELTA_H diff --git a/rehlds/engine/delta_jit.cpp b/rehlds/engine/delta_jit.cpp new file mode 100644 index 0000000..a5ea603 --- /dev/null +++ b/rehlds/engine/delta_jit.cpp @@ -0,0 +1,332 @@ +#include "precompiled.h" +#include "jitasm.h" + +CDeltaJitRegistry g_DeltaJitRegistry; + +uint32 DELTAJIT_CreateMask(int startBit, int endBit) { + if (startBit < 0) startBit = 0; + if (endBit < 0) endBit = 0; + if (startBit > 32) startBit = 32; + if (endBit > 32) endBit = 32; + + uint32 res = 0xFFFFFFFF; + res &= (0xFFFFFFFF << startBit); + res &= (0xFFFFFFFF >> (32 - endBit)); + return res; +} + +unsigned int DELTAJIT_GetFieldSize(delta_description_t* desc) { + switch (desc->fieldType & ~DT_SIGNED) { + case DT_BYTE: return 1; + case DT_SHORT: return 2; + case DT_FLOAT: + case DT_INTEGER: + case DT_ANGLE: + case DT_TIMEWINDOW_8: + case DT_TIMEWINDOW_BIG: + return 4; + + case DT_STRING: + return desc->fieldSize; + + default: + rehlds_syserror("%s: Unknown delta field type %d", __FUNCTION__, desc->fieldType); + return 0; + } +} + +void DELTAJIT_CreateDescription(delta_t* delta, deltajitdata_t &jitdesc) { + + //calculate max offset and number of memblocks + unsigned int maxOffset = 0; + for (int i = 0; i < delta->fieldCount; i++) { + delta_description_t* desc = &delta->pdd[i]; + + unsigned fieldMaxOff = DELTAJIT_GetFieldSize(desc); + fieldMaxOff += desc->fieldOffset; + if (fieldMaxOff > maxOffset) { + maxOffset = fieldMaxOff; + } + } + + unsigned int numMemBlocks = maxOffset / 16; + if (maxOffset % 16 || numMemBlocks == 0) { + numMemBlocks++; + } + + //sanity checks & pre-clean + if (numMemBlocks >= DELTAJIT_MAX_BLOCKS) { + rehlds_syserror("%s: numMemBlocks >= DELTAJIT_MAX_BLOCKS (%d >= %d)", __FUNCTION__, numMemBlocks, DELTAJIT_MAX_BLOCKS); + return; + } + + if (delta->fieldCount >= DELTAJIT_MAX_FIELDS) { + rehlds_syserror("%s: fieldCount >= DELTAJIT_MAX_FIELDS (%d >= %d)", __FUNCTION__, delta->fieldCount, DELTAJIT_MAX_FIELDS); + return; + } + + memset(&jitdesc, 0, sizeof(jitdesc)); + jitdesc.numblocks = numMemBlocks; + jitdesc.numFields = delta->fieldCount; + + //create descriptions (primitive types) + for (int i = 0; i < delta->fieldCount; i++) { + delta_description_t* fieldDesc = &delta->pdd[i]; + + unsigned int blockId = fieldDesc->fieldOffset / 16; + unsigned int blockStart = blockId * 16; + + unsigned int fieldSize = DELTAJIT_GetFieldSize(fieldDesc); + + + auto jitField = &jitdesc.fields[i]; + jitField->id = i; + jitField->offset = fieldDesc->fieldOffset; + jitField->type = fieldDesc->fieldType; + jitField->length = fieldSize; + + if (fieldDesc->fieldType != DT_STRING) { + bool firstBlock = true; + deltajit_memblock_field* blockField = NULL; + while (blockStart < fieldDesc->fieldOffset + fieldSize) { + deltajit_memblock* memblock = &jitdesc.blocks[blockId]; + uint32 mask = DELTAJIT_CreateMask(fieldDesc->fieldOffset - blockStart, fieldDesc->fieldOffset + fieldSize - blockStart); + blockField = &memblock->fields[memblock->numFields++]; + blockField->field = jitField; + jitField->numBlocks++; + blockField->first = firstBlock; + blockField->mask = mask; + + blockStart += 16; + blockId++; + firstBlock = false; + } + blockField->last = true; + } + } + +} + +class CDeltaClearMarkFieldsJIT : public jitasm::function { +public: + deltajitdata_t *jitdesc; + + + CDeltaClearMarkFieldsJIT(deltajitdata_t *_jitdesc) : jitdesc(_jitdesc) { + } + + void checkFieldMask(jitasm::Frontend::Reg32& mask, deltajit_memblock_field* jitField); + void main(Addr src, Addr dst, Addr fields); + void processStrings(Addr src, Addr dst, Addr fields); +}; + +void CDeltaClearMarkFieldsJIT::checkFieldMask(jitasm::Frontend::Reg32& mask, deltajit_memblock_field* jitField) { + test(mask, (uint16)jitField->mask); + setnz(al); + movzx(dx, al); +} + +void CDeltaClearMarkFieldsJIT::main(Addr src, Addr dst, Addr fields) +{ + +#ifndef REHLDS_FIXES + sub(esp, 12); //some local storage is required for precise DT_TIMEWINDOW marking +#endif + + mov(esi, ptr[src]); + mov(edi, ptr[dst]); + mov(ecx, ptr[fields]); + movdqu(xmm3, xmmword_ptr[esi]); + movdqu(xmm4, xmmword_ptr[edi]); + + auto zero_xmm = xmm2; + pxor(zero_xmm, zero_xmm); + + auto mask = ebx; + xor_(mask, mask); + for (unsigned int i = 0; i < jitdesc->numblocks; i++) { + movdqa(xmm0, xmm3); + movdqa(xmm1, xmm4); + + //prefetch next blocks + if (i < jitdesc->numblocks) { + mov(esi, ptr[src]); + mov(edi, ptr[dst]); + movdqu(xmm3, xmmword_ptr[esi + ((i + 1) * 16)]); + movdqu(xmm4, xmmword_ptr[edi + ((i + 1) * 16)]); + } + + pxor(xmm0, xmm1); + pcmpeqb(xmm0, xmm2); + pmovmskb(mask, xmm0); + not_(mask); + + + auto block = &jitdesc->blocks[i]; + for (unsigned int j = 0; j < block->numFields; j++) { + auto jitField = &block->fields[j]; + +#ifndef REHLDS_FIXES + if (jitField->field->type == DT_TIMEWINDOW_8 || jitField->field->type == DT_TIMEWINDOW_BIG) { + if (jitField->last) { + + float multiplier = (jitField->field->type == DT_TIMEWINDOW_8) ? 100.0f : 1000.0f; + uint32 m32 = *(uint32*)(&multiplier); + mov(dword_ptr[esp], m32); + + fld(dword_ptr[esi + jitField->field->offset]); + fld(dword_ptr[esp]); + fmulp(); + fstp(qword_ptr[esp + 4]); + cvttsd2si(eax, mmword_ptr[esp + 4]); + + fld(dword_ptr[edi + jitField->field->offset]); + fld(dword_ptr[esp]); + fmulp(); + fstp(qword_ptr[esp + 4]); + cvttsd2si(edx, mmword_ptr[esp + 4]); + + cmp(eax, edx); + setne(al); + movzx(dx, al); + + } else { + continue; + } + } else { + checkFieldMask(mask, jitField); + } +#else + checkFieldMask(mask, jitField); +#endif + + int flagsOffset = (jitField->field->id * sizeof(delta_description_t) + offsetof(delta_description_t, flags)); + if (jitField->first) { + mov(word_ptr[ecx + flagsOffset], dx); + } + else { + or_(word_ptr[ecx + flagsOffset], dx); + } + } + } + + processStrings(src, dst, fields); + +#ifndef REHLDS_FIXES + add(esp, 12); //some local storage is required for precise DT_TIMEWINDOW marking +#endif +} + +void CDeltaClearMarkFieldsJIT::processStrings(Addr src, Addr dst, Addr fields) { + //strings + for (unsigned int i = 0; i < jitdesc->numFields; i++) { + auto jitField = &jitdesc->fields[i]; + if (jitField->type != DT_STRING) + continue; + + mov(esi, ptr[src]); + mov(edi, ptr[dst]); + add(esi, jitField->offset); + add(edi, jitField->offset); + + push(esi); + push(edi); + mov(ecx, (size_t)&Q_stricmp); + call(ecx); + add(esp, 8); + test(eax, eax); + setnz(cl); + movzx(cx, cl); + + mov(edx, ptr[fields]); + int flagsOffset = (jitField->id * sizeof(delta_description_t) + offsetof(delta_description_t, flags)); + mov(word_ptr[edx + flagsOffset], cx); + } +} + +class CDeltaJit { +public: + CDeltaClearMarkFieldsJIT* cleanMarkCheckFunc; + delta_t* delta; + + CDeltaJit(delta_t* _delta, CDeltaClearMarkFieldsJIT* _cleanMarkCheckFunc) { + delta = _delta; + cleanMarkCheckFunc = _cleanMarkCheckFunc; + } + + virtual ~CDeltaJit() { + if (cleanMarkCheckFunc) { + delete cleanMarkCheckFunc; + cleanMarkCheckFunc = NULL; + } + } +}; + +CDeltaJitRegistry::CDeltaJitRegistry() { + +} + +void CDeltaJitRegistry::RegisterDeltaJit(delta_t* delta, CDeltaJit* deltaJit) { + void* key = delta; +#ifndef REHLDS_FIXES + m_DeltaToJITMap.put(key, deltaJit); +#else + delta->jit = deltaJit; +#endif +} + +CDeltaJit* CDeltaJitRegistry::GetJITByDelta(delta_t* delta) { +#ifndef REHLDS_FIXES + void* key = delta; + auto node = m_DeltaToJITMap.get(key); + return (node != NULL) ? node->val : NULL; +#else + return delta->jit; +#endif +} + +void CDeltaJitRegistry::CreateAndRegisterDeltaJIT(delta_t* delta) { + deltajitdata_t data; + DELTAJIT_CreateDescription(delta, data); + + CDeltaClearMarkFieldsJIT* cleanMarkCheckFunc = new CDeltaClearMarkFieldsJIT(&data); + cleanMarkCheckFunc->Assemble(); + cleanMarkCheckFunc->jitdesc = NULL; + + CDeltaJit* deltaJit = new CDeltaJit(delta, cleanMarkCheckFunc); + RegisterDeltaJit(delta, deltaJit); +} + +NOINLINE void DELTAJit_ClearAndMarkSendFields(unsigned char *from, unsigned char *to, delta_t *pFields) { + CDeltaJit* deltaJit = g_DeltaJitRegistry.GetJITByDelta(pFields); + if (!deltaJit) { + rehlds_syserror("%s: JITted delta encoder not found for delta %p", __FUNCTION__, pFields); + return; + } + + CDeltaClearMarkFieldsJIT &func = *deltaJit->cleanMarkCheckFunc; + func(from, to, pFields->pdd); + + if (pFields->conditionalencode) + pFields->conditionalencode(pFields, from, to); + +} + +void CDeltaJitRegistry::Cleanup() { +#ifndef REHLDS_FIXES + for (auto itr = m_DeltaToJITMap.iterator(); itr.hasElement(); itr.next()) { + auto node = itr.current(); + CDeltaJit* deltaJit = node->val; + delete deltaJit; + } + + m_DeltaToJITMap.clear(); +#else + delta_info_t* cur = g_sv_delta; + while (cur) { + delete cur->delta->jit; + cur->delta->jit = NULL; + cur = cur->next; + } +#endif +} diff --git a/rehlds/engine/delta_jit.h b/rehlds/engine/delta_jit.h new file mode 100644 index 0000000..9af8a29 --- /dev/null +++ b/rehlds/engine/delta_jit.h @@ -0,0 +1,54 @@ +#pragma once + +#include "maintypes.h" + +#define DELTAJIT_MAX_BLOCKS 32 +#define DELTAJIT_MAX_FIELDS 96 + +struct deltajit_field { + unsigned int id; + unsigned int offset; + unsigned int length; + int type; + unsigned int numBlocks; +}; + +struct deltajit_memblock_field { + deltajit_field* field; + uint16 mask; + bool first; + bool last; +}; + +struct deltajit_memblock { + unsigned int numFields; + deltajit_memblock_field fields[24]; +}; + +struct deltajitdata_t { + unsigned int numblocks; + deltajit_memblock blocks[DELTAJIT_MAX_BLOCKS]; + + unsigned int numFields; + deltajit_field fields[DELTAJIT_MAX_FIELDS]; +}; + +class CDeltaJit; + +class CDeltaJitRegistry { +private: +#ifndef REHLDS_FIXES + CStaticMap m_DeltaToJITMap; +#endif + +public: + CDeltaJitRegistry(); + void RegisterDeltaJit(delta_t* delta, CDeltaJit* deltaJit); + CDeltaJit* GetJITByDelta(delta_t* delta); + void CreateAndRegisterDeltaJIT(delta_t* delta); + void Cleanup(); +}; + +extern CDeltaJitRegistry g_DeltaJitRegistry; + +extern void DELTAJit_ClearAndMarkSendFields(unsigned char *from, unsigned char *to, delta_t *pFields); diff --git a/rehlds/engine/filesystem.cpp b/rehlds/engine/filesystem.cpp index 660f654..d256790 100644 --- a/rehlds/engine/filesystem.cpp +++ b/rehlds/engine/filesystem.cpp @@ -28,12 +28,6 @@ #include "precompiled.h" -#ifdef _WIN32 - #define FILESYSTEM_DLL_NAME "filesystem_stdio.dll" -#else - #define FILESYSTEM_DLL_NAME "filesystem_stdio.so" -#endif - CUtlVector g_fallbackLocalizationFiles; char s_pBaseDir[512]; bool bLowViolenceBuild; diff --git a/rehlds/engine/filesystem_.h b/rehlds/engine/filesystem_.h index 105a20b..7648c48 100644 --- a/rehlds/engine/filesystem_.h +++ b/rehlds/engine/filesystem_.h @@ -36,6 +36,12 @@ #include "iregistry.h" #include "utlvector.h" +#ifdef _WIN32 +#define FILESYSTEM_DLL_NAME "filesystem_stdio.dll" +#else +#define FILESYSTEM_DLL_NAME "filesystem_stdio.so" +#endif + #ifdef HOOK_ENGINE #define g_fallbackLocalizationFiles (*pg_fallbackLocalizationFiles) #define s_pBaseDir (*ps_pBaseDir) diff --git a/rehlds/engine/host_cmd.cpp b/rehlds/engine/host_cmd.cpp index bb5b1aa..aa3542e 100644 --- a/rehlds/engine/host_cmd.cpp +++ b/rehlds/engine/host_cmd.cpp @@ -28,12 +28,6 @@ #include "precompiled.h" -#define FILETIME_TO_QWORD(ft) \ - ((((uint64)ft.dwHighDateTime) << 32) + ft.dwLowDateTime) - -#define FILETIME_TO_PAIR(f,h)\ - (((uint64)f << 32) | h) - /* <3d3ff> ../engine/host_cmd.c:4378 */ typedef int(*SV_BLENDING_INTERFACE_FUNC)(int, struct sv_blending_interface_s **, struct server_studio_api_s *, float *, float *); diff --git a/rehlds/engine/host_cmd.h b/rehlds/engine/host_cmd.h index 1c53ba2..bc26ac2 100644 --- a/rehlds/engine/host_cmd.h +++ b/rehlds/engine/host_cmd.h @@ -36,6 +36,12 @@ #include "eiface.h" #include "FileSystem.h" +#define FILETIME_TO_QWORD(ft) \ + ((((uint64)ft.dwHighDateTime) << 32) + ft.dwLowDateTime) + +#define FILETIME_TO_PAIR(f,h)\ + (((uint64)f << 32) | h) + /* <3d22f> ../engine/host_cmd.c:83 */ typedef void (*SV_SAVEGAMECOMMENT_FUNC)(char *, int); diff --git a/rehlds/engine/mathlib.cpp b/rehlds/engine/mathlib.cpp index 72b5a1c..d11c540 100644 --- a/rehlds/engine/mathlib.cpp +++ b/rehlds/engine/mathlib.cpp @@ -28,9 +28,6 @@ #include "precompiled.h" -#include -#include - vec3_t vec3_origin; //int nanmask; //short int new_cw; diff --git a/rehlds/engine/net_ws.cpp b/rehlds/engine/net_ws.cpp index 5b86b98..bcd66b7 100644 --- a/rehlds/engine/net_ws.cpp +++ b/rehlds/engine/net_ws.cpp @@ -28,10 +28,6 @@ #include "precompiled.h" -// Create general message queues -#define NUM_MSG_QUEUES 40 -#define MSG_QUEUE_SIZE 1536 - #ifdef _WIN32 CRITICAL_SECTION net_cs; #endif // _WIN32 diff --git a/rehlds/engine/net_ws.h b/rehlds/engine/net_ws.h index ea7d7c7..f1c86f3 100644 --- a/rehlds/engine/net_ws.h +++ b/rehlds/engine/net_ws.h @@ -56,7 +56,9 @@ //1400 - 9 = 1391 #define SPLIT_SIZE (MAX_ROUTEABLE_PACKET - sizeof(SPLITPACKET)) - +// Create general message queues +#define NUM_MSG_QUEUES 40 +#define MSG_QUEUE_SIZE 1536 /* ../engine/net_ws.c:137 */ typedef struct loopmsg_s diff --git a/rehlds/engine/pmovetst.cpp b/rehlds/engine/pmovetst.cpp index 26ee92e..9170e25 100644 --- a/rehlds/engine/pmovetst.cpp +++ b/rehlds/engine/pmovetst.cpp @@ -28,9 +28,6 @@ #include "precompiled.h" -// 1/32 epsilon to keep floating point happy -#define DIST_EPSILON (0.03125f) - int g_contentsresult; hull_t box_hull_0; box_clipnodes_t box_clipnodes_0; @@ -645,6 +642,8 @@ qboolean PM_RecursiveHullCheck(hull_t *hull, int num, float p1f, float p2f, cons float t2; float midf; + float DIST_EPSILON = 0.03125f; + if (num < 0) { if (num == CONTENTS_SOLID) @@ -792,6 +791,8 @@ qboolean PM_RecursiveHullCheck(hull_t *hull, int num, float p1f, float p2f, cons float midf; vec3_t custom_p1; // for holding custom p1 value + float DIST_EPSILON = 0.03125f; + while (1) { if (num < 0) diff --git a/rehlds/engine/pr_cmds.cpp b/rehlds/engine/pr_cmds.cpp index ff41f86..452dcf3 100644 --- a/rehlds/engine/pr_cmds.cpp +++ b/rehlds/engine/pr_cmds.cpp @@ -139,7 +139,14 @@ void PF_setmodel_I(edict_t *e, const char *m) for (; *check; i++, check++) #endif { + + //use case-sensitive names to increase performance +#ifdef REHLDS_FIXES + if (!Q_strcmp(*check, m)) +#else if (!Q_stricmp(*check, m)) +#endif + { e->v.modelindex = i; model_t *mod = g_psv.models[i]; @@ -1423,14 +1430,25 @@ int PF_precache_model_I(const char *s) #else g_psv.model_precache[i] = s; #endif // REHLDS_FIXES + +#ifdef REHLDS_OPT_PEDANTIC + g_rehlds_sv.modelsMap.put(g_psv.model_precache[i], i); +#endif //REHLDS_OPT_PEDANTIC + g_psv.models[i] = Mod_ForName(s, 1, 1); if (!iOptional) g_psv.model_precache_flags[i] |= 1u; return i; } + //use case-sensitive names to increase performance +#ifdef REHLDS_FIXES + if (!Q_strcmp(g_psv.model_precache[i], s)) + return i; +#else if (!Q_stricmp(g_psv.model_precache[i], s)) return i; +#endif } Host_Error( "PF_precache_model_I: Model '%s' failed to precache because the item count is over the %d limit.\nReduce the number of brush models and/or regular models in the map to correct this.", @@ -1441,8 +1459,14 @@ int PF_precache_model_I(const char *s) { for (int i = 0; i < HL_MODEL_MAX; i++) { + //use case-sensitive names to increase performance +#ifdef REHLDS_FIXES + if (!Q_strcmp(g_psv.model_precache[i], s)) + return i; +#else if (!Q_stricmp(g_psv.model_precache[i], s)) return i; +#endif } Host_Error("PF_precache_model_I: '%s' Precache can only be done in spawn functions", s); } diff --git a/rehlds/engine/server.h b/rehlds/engine/server.h index 6d0bf1c..7f2acd5 100644 --- a/rehlds/engine/server.h +++ b/rehlds/engine/server.h @@ -164,6 +164,19 @@ typedef struct server_s unsigned char signon_data[32768]; } server_t; + +struct rehlds_server_t { + + //map for sv.model_precache (for faster resolving of model index by its name) +#if defined(REHLDS_FIXES) + CStringKeyStaticMap modelsMap; //case-sensitive keys for better performance +#elif defined(REHLDS_OPT_PEDANTIC) + CICaseStringKeyStaticMap modelsMap; //use case-insensitive keys to conform original engine's behavior +#endif + +}; + + /* <3b30a> ../engine/server.h:163 */ typedef struct client_frame_s { @@ -430,6 +443,8 @@ extern globalvars_t gGlobalVariables; extern server_static_t g_psvs; extern server_t g_psv; +extern rehlds_server_t g_rehlds_sv; + extern cvar_t sv_lan; extern cvar_t sv_lan_rate; extern cvar_t sv_aim; diff --git a/rehlds/engine/sv_main.cpp b/rehlds/engine/sv_main.cpp index 57cb025..da6d579 100644 --- a/rehlds/engine/sv_main.cpp +++ b/rehlds/engine/sv_main.cpp @@ -35,16 +35,6 @@ typedef struct full_packet_entities_s entity_state_t entities[MAX_PACKET_ENTITIES]; } full_packet_entities_t; -/* ../engine/sv_main.c:102 */ -typedef struct delta_info_s -{ - delta_info_s *next; - char *name; - char *loadfile; - delta_t *delta; -} delta_info_t; - - int sv_lastnum; extra_baselines_t g_sv_instance_baselines; @@ -66,6 +56,8 @@ globalvars_t gGlobalVariables; server_static_t g_psvs; server_t g_psv; +rehlds_server_t g_rehlds_sv; + decalname_t sv_decalnames[512]; int sv_decalnamecount; @@ -695,8 +687,15 @@ void SV_FindModelNumbers(void) { if (!g_psv.model_precache[i]) break; + + //use case-sensitive names to increase performance +#ifdef REHLDS_FIXES if (!Q_stricmp(g_psv.model_precache[i], "models/player.mdl")) sv_playermodel = i; +#else + if (!Q_stricmp(g_psv.model_precache[i], "models/player.mdl")) + sv_playermodel = i; +#endif } } @@ -4853,6 +4852,12 @@ int SV_ModelIndex(const char *name) if (!name || !name[0]) return 0; +#ifdef REHLDS_OPT_PEDANTIC + auto node = g_rehlds_sv.modelsMap.get(name); + if (node) { + return node->val; + } +#else for (int i = 0; i < HL_MODEL_MAX; i++) { if (!g_psv.model_precache[i]) @@ -4861,6 +4866,7 @@ int SV_ModelIndex(const char *name) if (!Q_stricmp(g_psv.model_precache[i], name)) return i; }; +#endif Sys_Error("SV_ModelIndex: model %s not precached", name); } @@ -5465,6 +5471,11 @@ int SV_SpawnServer(qboolean bIsDemo, char *server, char *startspot) SV_AllocClientFrames(); Q_memset(&g_psv, 0, sizeof(server_t)); + +#ifdef REHLDS_OPT_PEDANTIC + g_rehlds_sv.modelsMap.clear(); +#endif + Q_strncpy(g_psv.oldname, oldname, sizeof(oldname) - 1); g_psv.oldname[sizeof(oldname) - 1] = 0; Q_strncpy(g_psv.name, server, sizeof(g_psv.name) - 1); @@ -5577,6 +5588,13 @@ int SV_SpawnServer(qboolean bIsDemo, char *server, char *startspot) g_psv.model_precache_flags[1] |= RES_FATALIFMISSING; g_psv.model_precache[1] = g_psv.modelname; +#ifdef REHLDS_OPT_PEDANTIC + { + int __itmp = 1; + g_rehlds_sv.modelsMap.put(ED_NewString(g_psv.modelname), __itmp); + } +#endif + g_psv.sound_precache[0] = pr_strings; g_psv.model_precache[0] = pr_strings; g_psv.generic_precache[0] = pr_strings; @@ -5586,6 +5604,13 @@ int SV_SpawnServer(qboolean bIsDemo, char *server, char *startspot) g_psv.model_precache[i + 1] = localmodels[i]; g_psv.models[i + 1] = Mod_ForName(localmodels[i], FALSE, FALSE); g_psv.model_precache_flags[i + 1] |= RES_FATALIFMISSING; + +#ifdef REHLDS_OPT_PEDANTIC + { + int __itmp = i + 1; + g_rehlds_sv.modelsMap.put(g_psv.model_precache[i + 1], __itmp); + } +#endif } Q_memset(&g_psv.edicts->v, 0, sizeof(entvars_t)); @@ -6789,6 +6814,10 @@ void SV_RegisterDelta(char *name, char *loadfile) p->delta = pdesc; p->next = g_sv_delta; g_sv_delta = p; + +#if defined(REHLDS_OPT_PEDANTIC) || defined(REHLDS_FIXES) + g_DeltaJitRegistry.CreateAndRegisterDeltaJIT(pdesc); +#endif } /* ../engine/sv_main.c:9284 */ @@ -6826,6 +6855,10 @@ void SV_InitDeltas(void) g_peventdelta = SV_LookupDelta("event_t"); if (!g_peventdelta) Sys_Error("No event_t encoder on server!\n"); + +#if defined(REHLDS_OPT_PEDANTIC) || defined(REHLDS_FIXES) + g_DeltaJitRegistry.CreateAndRegisterDeltaJIT(&g_MetaDelta[0]); +#endif } /* ../engine/sv_main.c:9339 */ @@ -7012,6 +7045,7 @@ void SV_Init(void) /* ../engine/sv_main.c:9558 */ void SV_Shutdown(void) { + g_DeltaJitRegistry.Cleanup(); delta_info_t *p = g_sv_delta; while (p) { diff --git a/rehlds/engine/sv_move.cpp b/rehlds/engine/sv_move.cpp index 644bef8..37adfa9 100644 --- a/rehlds/engine/sv_move.cpp +++ b/rehlds/engine/sv_move.cpp @@ -28,8 +28,6 @@ #include "precompiled.h" -#define DI_NODIR -1 - /* * Local initialization */ diff --git a/rehlds/engine/sv_move.h b/rehlds/engine/sv_move.h index e0b8128..a244ca1 100644 --- a/rehlds/engine/sv_move.h +++ b/rehlds/engine/sv_move.h @@ -35,6 +35,8 @@ #include "maintypes.h" #include "server.h" +#define DI_NODIR -1 + #ifdef HOOK_ENGINE #define c_yes (*pc_yes) diff --git a/rehlds/engine/sys_dll.cpp b/rehlds/engine/sys_dll.cpp index 9286884..bd71ec6 100644 --- a/rehlds/engine/sys_dll.cpp +++ b/rehlds/engine/sys_dll.cpp @@ -222,7 +222,7 @@ void Sys_SetupFPUOptions() __asm { fldenv byte ptr fpuOpts } } -__declspec(noinline) void Sys_InitFPUControlWords() +NOINLINE void Sys_InitFPUControlWords() { int fpucw = 0; __asm { fnstcw fpucw } @@ -280,13 +280,13 @@ void __cdecl Sys_InitHardwareTimer() } int g_SavedFPUCW1 = 0; -__declspec(noinline) void Sys_FPUCW_Push_Prec64() { +NOINLINE void Sys_FPUCW_Push_Prec64() { uint16 tmp = g_FPUCW_Mask_Prec_64Bit; __asm { fnstcw g_SavedFPUCW1 } __asm { fldcw tmp } } -__declspec(noinline) void Sys_FPUCW_Pop_Prec64() { +NOINLINE void Sys_FPUCW_Pop_Prec64() { uint16 tmp = g_SavedFPUCW1; __asm { fldcw tmp } } diff --git a/rehlds/engine/sys_dll2.cpp b/rehlds/engine/sys_dll2.cpp index 7c56766..7ae9f3b 100644 --- a/rehlds/engine/sys_dll2.cpp +++ b/rehlds/engine/sys_dll2.cpp @@ -28,12 +28,6 @@ #include "precompiled.h" -#define FIFTEEN_MB (15 * 1024 * 1024) -#define MINIMUM_WIN_MEMORY 0x0e00000 -#define WARNING_MEMORY 0x0200000 -#define MAXIMUM_WIN_MEMORY 0x2800000 // Ask for 40 MB max -#define MAXIMUM_DEDICATED_MEMORY 0x2800000 // Ask for 40 MB max - IDedicatedExports *dedicated_; qboolean g_bIsWin95; qboolean g_bIsWin98; diff --git a/rehlds/engine/sys_dll2.h b/rehlds/engine/sys_dll2.h index fed5bdb..cf4db2c 100644 --- a/rehlds/engine/sys_dll2.h +++ b/rehlds/engine/sys_dll2.h @@ -40,6 +40,12 @@ #include "engine_launcher_api.h" #include "idedicatedexports.h" +#define FIFTEEN_MB (15 * 1024 * 1024) +#define MINIMUM_WIN_MEMORY 0x0e00000 +#define WARNING_MEMORY 0x0200000 +#define MAXIMUM_WIN_MEMORY 0x2800000 // Ask for 40 MB max +#define MAXIMUM_DEDICATED_MEMORY 0x2800000 // Ask for 40 MB max + #ifdef HOOK_ENGINE #define dedicated_ (*pdedicated) #define g_bIsWin95 (*pg_bIsWin95) diff --git a/rehlds/engine/sys_engine.cpp b/rehlds/engine/sys_engine.cpp index 6ba3525..bcb304a 100644 --- a/rehlds/engine/sys_engine.cpp +++ b/rehlds/engine/sys_engine.cpp @@ -28,10 +28,6 @@ #include "precompiled.h" -// sleep time when not focus -#define NOT_FOCUS_SLEEP 50 -#define MINIMIZED_SLEEP 20 - /* * Globals initialization */ diff --git a/rehlds/engine/sys_engine.h b/rehlds/engine/sys_engine.h index 1a91cc8..5f7ad79 100644 --- a/rehlds/engine/sys_engine.h +++ b/rehlds/engine/sys_engine.h @@ -33,6 +33,10 @@ #include "igame.h" #include "iengine.h" +// sleep time when not focus +#define NOT_FOCUS_SLEEP 50 +#define MINIMIZED_SLEEP 20 + #ifdef HOOK_ENGINE #define game (*pgame) #define eng (*peng) diff --git a/rehlds/engine/tmessage.cpp b/rehlds/engine/tmessage.cpp index 817186a..9e0a898 100644 --- a/rehlds/engine/tmessage.cpp +++ b/rehlds/engine/tmessage.cpp @@ -28,12 +28,6 @@ #include "precompiled.h" -#define MSGFILE_NAME 0 -#define MSGFILE_TEXT 1 - -#define NAME_HEAP_SIZE 16384 -#define MAX_MESSAGES 1000 - char gNetworkTextMessageBuffer[MAX_NETMESSAGE][512]; client_textmessage_t gMessageParms; diff --git a/rehlds/engine/tmessage.h b/rehlds/engine/tmessage.h index dffc511..6d69cd8 100644 --- a/rehlds/engine/tmessage.h +++ b/rehlds/engine/tmessage.h @@ -42,6 +42,12 @@ #define MAX_NETMESSAGE 4 +#define MSGFILE_NAME 0 +#define MSGFILE_TEXT 1 + +#define NAME_HEAP_SIZE 16384 +#define MAX_MESSAGES 1000 + #ifdef HOOK_ENGINE #define gMessageTable (*pgMessageTable) diff --git a/rehlds/engine/unicode_strtools.cpp b/rehlds/engine/unicode_strtools.cpp index c049df8..a37a43f 100644 --- a/rehlds/engine/unicode_strtools.cpp +++ b/rehlds/engine/unicode_strtools.cpp @@ -485,4 +485,4 @@ qboolean Q_StripUnprintableAndSpace(char *pch) free(pwch_alloced); return bStrippedAny; -} \ No newline at end of file +} diff --git a/rehlds/engine/world.cpp b/rehlds/engine/world.cpp index cb82ff9..9f09f6c 100644 --- a/rehlds/engine/world.cpp +++ b/rehlds/engine/world.cpp @@ -28,8 +28,6 @@ #include "precompiled.h" -#define DIST_EPSILON (0.03125f) - hull_t box_hull; hull_t beam_hull; box_clipnodes_t box_clipnodes; @@ -656,6 +654,8 @@ qboolean SV_RecursiveHullCheck(hull_t *hull, int num, float p1f, float p2f, cons float midf; float pdif = p2f - p1f; + float DIST_EPSILON = 0.03125f; + if (num >= 0) { if (num < hull->firstclipnode || num > hull->lastclipnode || !hull->planes) @@ -792,6 +792,8 @@ qboolean SV_RecursiveHullCheck(hull_t *hull, int num, float p1f, float p2f, cons float pdif; vec3_t custom_p1; // for holding custom p1 value + float DIST_EPSILON = 0.03125f; + while (num >= 0) { pdif = p2f - p1f; diff --git a/rehlds/hookers/engine.h b/rehlds/hookers/engine.h index b28d161..b330c46 100644 --- a/rehlds/hookers/engine.h +++ b/rehlds/hookers/engine.h @@ -30,6 +30,7 @@ #include "keys.h" #include "decal.h" #include "delta.h" +#include "delta_jit.h" #include "server.h" #include "sys_dll.h" #include "sys_dll2.h" @@ -64,9 +65,6 @@ #include "net_ws.h" #include "net_chan.h" - - - #include "tmessage.h" #include "traceinit.h" #include "wad.h" diff --git a/rehlds/hookers/main_swds.cpp b/rehlds/hookers/main_swds.cpp index d470c6b..951e9a7 100644 --- a/rehlds/hookers/main_swds.cpp +++ b/rehlds/hookers/main_swds.cpp @@ -43,6 +43,7 @@ BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) return (FALSE); TestSuite_Init(NULL, &hlds_exe, NULL); + Rehlds_Debug_Init(NULL); #endif } diff --git a/rehlds/hookers/rehlds_debug.cpp b/rehlds/hookers/rehlds_debug.cpp index 0b8b8b8..c67366e 100644 --- a/rehlds/hookers/rehlds_debug.cpp +++ b/rehlds/hookers/rehlds_debug.cpp @@ -554,6 +554,22 @@ void Rehlds_Debug_logFree(void* ptr) g_RehldsDebugLog.flush(); } +void Rehlds_Debug_LogDeltaFlags(delta_t* delta, int counter, bool verbose) { + unsigned int cksum = 0; + + for (int i = 0; i < delta->fieldCount; i++) { + cksum = _mm_crc32_u16(cksum, delta->pdd[i].flags); + } + + g_RehldsDebugLog << "DF(c=" << counter << " crc=" << cksum << ")\n"; + if (verbose) { + for (int i = 0; i < delta->fieldCount; i++) { + g_RehldsDebugLog << "DeltaFlagsVerbose(counter=" << counter << " id=" << delta->pdd[i].fieldName << " flags= " << delta->pdd[i].flags << ")\n"; + } + } + g_RehldsDebugLog.flush(); +} + void Rehlds_Debug_Init(Module* engine) { diff --git a/rehlds/hookers/rehlds_debug.h b/rehlds/hookers/rehlds_debug.h index 031de8c..b2b257d 100644 --- a/rehlds/hookers/rehlds_debug.h +++ b/rehlds/hookers/rehlds_debug.h @@ -41,6 +41,8 @@ extern void Rehlds_Debug_logAlloc(size_t sz, void* ptr); extern void Rehlds_Debug_logRealloc(size_t sz, void* oldPtr, void* newPtr); extern void Rehlds_Debug_logFree(void* ptr); +extern void Rehlds_Debug_LogDeltaFlags(delta_t* delta, int counter, bool verbose); + extern void Rehlds_Debug_Init(Module* engine); #endif diff --git a/rehlds/msvc/ReHLDS.vcxproj b/rehlds/msvc/ReHLDS.vcxproj index f82e3ef..b2e0e3f 100644 --- a/rehlds/msvc/ReHLDS.vcxproj +++ b/rehlds/msvc/ReHLDS.vcxproj @@ -53,6 +53,7 @@ + @@ -229,37 +230,53 @@ true true - - true - true - true - true - true - true - - - true - true - true - true - true - false - false - - - true - true - true + true true true true true - - + true + true + true + + + true + true true true true + true + true + true + + + true + true + true + true + true + true + true + true + + + true + true + true + true + true + true + true + true + + + true + true + true + true + true + true true true @@ -358,6 +375,7 @@ + @@ -709,7 +727,7 @@ Level3 Disabled true - REHLDS_OPT_PEDANTIC;REHLDS_SELF;REHLDS_FIXES;REHLDS_CHECKS;USE_BREAKPAD_HANDLER;DEDICATED;SWDS;_CRT_SECURE_NO_WARNINGS;_DEBUG;%(PreprocessorDefinitions) + REHLDS_OPT_PEDANTIC;REHLDS_SELF;REHLDS_CHECKS;USE_BREAKPAD_HANDLER;DEDICATED;SWDS;_CRT_SECURE_NO_WARNINGS;_DEBUG;%(PreprocessorDefinitions) Precise /arch:IA32 %(AdditionalOptions) MultiThreadedDebug @@ -861,6 +879,7 @@ MultiThreadedDebug Use precompiled.h + NoExtensions true diff --git a/rehlds/msvc/ReHLDS.vcxproj.filters b/rehlds/msvc/ReHLDS.vcxproj.filters index 84bece4..4713188 100644 --- a/rehlds/msvc/ReHLDS.vcxproj.filters +++ b/rehlds/msvc/ReHLDS.vcxproj.filters @@ -325,6 +325,12 @@ unittests + + engine + + + unittests + @@ -1032,6 +1038,9 @@ public\rehlds + + engine + diff --git a/rehlds/public/rehlds/crc32.cpp b/rehlds/public/rehlds/crc32.cpp index 887d154..872a9f4 100644 --- a/rehlds/public/rehlds/crc32.cpp +++ b/rehlds/public/rehlds/crc32.cpp @@ -131,3 +131,11 @@ uint32 crc32_t(uint32 iCRC, const uint8 *s, unsigned int len) { uint32 crc32(const uint8 *buf, unsigned int len) { return crc32_t(0, buf, len); } + +uint32 crc32_t8_sse(uint32 iCRC, uint8 data) { + return _mm_crc32_u8(iCRC, data); +} + +uint32 crc32_t8_nosse(uint32 iCRC, uint8 data) { + return crc32_tab[(iCRC ^ data) & 0xff] ^ (iCRC >> 8); +} diff --git a/rehlds/public/rehlds/crc32.h b/rehlds/public/rehlds/crc32.h index 58e9030..6ca4458 100644 --- a/rehlds/public/rehlds/crc32.h +++ b/rehlds/public/rehlds/crc32.h @@ -18,4 +18,6 @@ uint32 crc32(const uint8 *buf, unsigned int len); uint32 crc32_t(uint32 iCRC, const uint8 *s, unsigned int len); +uint32 crc32_t8_sse(uint32 iCRC, uint8 data); +uint32 crc32_t8_nosse(uint32 iCRC, uint8 data); diff --git a/rehlds/public/rehlds/osconfig.h b/rehlds/public/rehlds/osconfig.h index 2494149..772d730 100644 --- a/rehlds/public/rehlds/osconfig.h +++ b/rehlds/public/rehlds/osconfig.h @@ -43,6 +43,9 @@ #include #include +#include +#include + #ifdef _WIN32 // WINDOWS #include #include @@ -85,6 +88,9 @@ #include #include +#include +#include + #ifdef _WIN32 // WINDOWS #define _CRT_SECURE_NO_WARNINGS #define WIN32_LEAN_AND_MEAN diff --git a/rehlds/public/rehlds/static_map.h b/rehlds/public/rehlds/static_map.h index bc6662a..010ffc9 100644 --- a/rehlds/public/rehlds/static_map.h +++ b/rehlds/public/rehlds/static_map.h @@ -202,3 +202,56 @@ public: return Iterator(this); } }; + +template +class CStringKeyStaticMap : public CStaticMap { +protected: + virtual uint32 hash(const char* const &val) { + return crc32((const unsigned char*)val, strlen(val)); + } + + virtual bool equals(const char* const &val1, const char* const &val2) { + return !strcmp(val1, val2); + } + +public: + CStringKeyStaticMap() { + } + +}; + +template +class CICaseStringKeyStaticMap : public CStaticMap { +protected: + virtual uint32 hash(const char* const &val) { + uint32 cksum = 0; + const char* pcc = val; + if (cpuinfo.sse4_2) { + while (*pcc) { + char cc = *(pcc++); + if (cc >= 'A' || cc <= 'Z') { + cc |= 0x20; + } + cksum = crc32_t8_sse(cksum, cc); + } + } else { + while (*pcc) { + char cc = *(pcc++); + if (cc >= 'A' || cc <= 'Z') { + cc |= 0x20; + } + cksum = crc32_t8_nosse(cksum, cc); + } + } + return cksum; + } + + virtual bool equals(const char* const &val1, const char* const &val2) { + return !_stricmp(val1, val2); + } + +public: + CICaseStringKeyStaticMap() { + } + +}; diff --git a/rehlds/rehlds/jitasm.h b/rehlds/rehlds/jitasm.h new file mode 100644 index 0000000..233706b --- /dev/null +++ b/rehlds/rehlds/jitasm.h @@ -0,0 +1,8967 @@ +// Copyright (c) 2009-2011, Hikaru Inoue, Akihiro Yamasaki, +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * The names of the contributors may not be used to endorse or promote +// products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once +#ifndef JITASM_H +#define JITASM_H + +#if defined(_WIN32) +#define JITASM_WIN // Windows +#endif + +#if (defined(_WIN64) && (defined(_M_AMD64) || defined(_M_X64))) || defined(__x86_64__) +#define JITASM64 +#endif + +#if defined(__GNUC__) +#define JITASM_GCC +#endif + +#if !defined(JITASM_MMINTRIN) + #if !defined(__GNUC__) || defined(__MMX__) + #define JITASM_MMINTRIN 1 + #else + #define JITASM_MMINTRIN 0 + #endif +#endif +#if !defined(JITASM_XMMINTRIN) + #if !defined(__GNUC__) || defined(__SSE__) + #define JITASM_XMMINTRIN 1 + #else + #define JITASM_XMMINTRIN 0 + #endif +#endif +#if !defined(JITASM_EMMINTRIN) + #if !defined(__GNUC__) || defined(__SSE2__) + #define JITASM_EMMINTRIN 1 + #else + #define JITASM_EMMINTRIN 0 + #endif +#endif + + +#include +#include +#include +#include +#include +#include + +#if defined(JITASM_WIN) +#include +#else +#include +#include +#include +#endif + +#if JITASM_MMINTRIN + #include +#endif +#if JITASM_XMMINTRIN + #include +#endif +#if JITASM_EMMINTRIN + #include +#endif + +#if _MSC_VER >= 1400 // VC8 or later +#include +#endif + +#if defined(_MSC_VER) +#pragma warning( push ) +#pragma warning( disable : 4127 ) // conditional expression is constant. +#pragma warning( disable : 4201 ) // nonstandard extension used : nameless struct/union +#endif + +#ifdef ASSERT +#define JITASM_ASSERT ASSERT +#else +#include +#define JITASM_ASSERT assert +#endif + +//#define JITASM_DEBUG_DUMP +#ifdef JITASM_DEBUG_DUMP + #if defined(JITASM_GCC) + #include + #define JITASM_TRACE printf + #else + #define JITASM_TRACE jitasm::detail::Trace + #endif +#elif defined(JITASM_GCC) + #define JITASM_TRACE(...) ((void)0) +#else + #define JITASM_TRACE __noop +#endif + +namespace jitasm +{ + +typedef signed char sint8; +typedef signed short sint16; +typedef signed int sint32; +typedef unsigned char uint8; +typedef unsigned short uint16; +typedef unsigned int uint32; +#if defined(JITASM_GCC) +typedef signed long long sint64; +typedef unsigned long long uint64; +#else +typedef signed __int64 sint64; +typedef unsigned __int64 uint64; +#endif + +template inline void avoid_unused_warn(const T&) {} + +namespace detail +{ +#if defined(JITASM_GCC) + inline long interlocked_increment(long *addend) { return __sync_add_and_fetch(addend, 1); } + inline long interlocked_decrement(long *addend) { return __sync_sub_and_fetch(addend, 1); } + inline long interlocked_exchange(long *target, long value) { return __sync_lock_test_and_set(target, value); } +#elif defined(JITASM_WIN) + inline long interlocked_increment(long *addend) { return _InterlockedIncrement(addend); } + inline long interlocked_decrement(long *addend) { return _InterlockedDecrement(addend); } + inline long interlocked_exchange(long *target, long value) { return _InterlockedExchange(target, value); } +#endif +} // namespace detail + +/// Physical register ID +enum PhysicalRegID +{ + INVALID=-1, + EAX=0, ECX, EDX, EBX, ESP, EBP, ESI, EDI, R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D, + AL=0, CL, DL, BL, AH, CH, DH, BH, R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B, + AX=0, CX, DX, BX, SP, BP, SI, DI, R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W, + RAX=0, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15, + ST0=0, ST1, ST2, ST3, ST4, ST5, ST6, ST7, + MM0=0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0=0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, + YMM0=0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15, +}; + +enum +{ + /** \var NUM_OF_PHYSICAL_REG + * Number of physical register + */ + /** \var SIZE_OF_GP_REG + * Size of general-purpose register + */ +#ifdef JITASM64 + NUM_OF_PHYSICAL_REG = 16, + SIZE_OF_GP_REG = 8 +#else + NUM_OF_PHYSICAL_REG = 8, + SIZE_OF_GP_REG = 4 +#endif +}; + +/// Register type +enum RegType +{ + R_TYPE_GP, ///< General purpose register + R_TYPE_MMX, ///< MMX register + R_TYPE_XMM, ///< XMM register + R_TYPE_YMM, ///< YMM register + R_TYPE_FPU, ///< FPU register + R_TYPE_SYMBOLIC_GP, ///< Symbolic general purpose register + R_TYPE_SYMBOLIC_MMX, ///< Symbolic MMX register + R_TYPE_SYMBOLIC_XMM, ///< Symbolic XMM register + R_TYPE_SYMBOLIC_YMM ///< Symbolic YMM register +}; + +/// Register identifier +struct RegID +{ + RegType type; + int id; ///< PhysicalRegID or symbolic register id + + bool operator==(const RegID& rhs) const {return type == rhs.type && id == rhs.id;} + bool operator!=(const RegID& rhs) const {return !(*this == rhs);} + bool operator<(const RegID& rhs) const {return type != rhs.type ? type < rhs.type : id < rhs.id;} + bool IsInvalid() const {return type == R_TYPE_GP && id == INVALID;} + bool IsSymbolic() const {return type == R_TYPE_SYMBOLIC_GP || type == R_TYPE_SYMBOLIC_MMX || type == R_TYPE_SYMBOLIC_XMM;} + + static RegID Invalid() { + RegID reg; + reg.type = R_TYPE_GP; + reg.id = INVALID; + return reg; + } + static RegID CreatePhysicalRegID(RegType type_, PhysicalRegID id_) { + RegID reg; + reg.type = type_; + reg.id = id_; + return reg; + } + static RegID CreateSymbolicRegID(RegType type_) { + static long s_id = 0; + RegID reg; + reg.type = type_; + reg.id = static_cast(detail::interlocked_increment(&s_id)); + return reg; + } +}; + +/// Operand type +enum OpdType +{ + O_TYPE_NONE, + O_TYPE_REG, + O_TYPE_MEM, + O_TYPE_IMM, + O_TYPE_TYPE_MASK = 0x0F, + + O_TYPE_DUMMY = 1 << 8, ///< The operand which has this flag is not encoded. This is for register allocator. + O_TYPE_READ = 1 << 9, ///< The operand is used for reading. + O_TYPE_WRITE = 1 << 10 ///< The operand is used for writing. +}; + +/// Operand size +enum OpdSize +{ + O_SIZE_8 = 8, + O_SIZE_16 = 16, + O_SIZE_32 = 32, + O_SIZE_64 = 64, + O_SIZE_80 = 80, + O_SIZE_128 = 128, + O_SIZE_224 = 224, + O_SIZE_256 = 256, + O_SIZE_864 = 864, + O_SIZE_4096 = 4096, +}; + +namespace detail +{ + /// Operand base class + struct Opd + { + OpdType opdtype_; + OpdSize opdsize_; + + union { + // REG + struct { + RegID reg_; + uint32 reg_assignable_; + }; + // MEM + struct { + RegID base_; + RegID index_; + sint64 scale_; + sint64 disp_; + OpdSize addrsize_; + }; + // IMM + sint64 imm_; + }; + + /// NONE + Opd() : opdtype_(O_TYPE_NONE) {} + /// REG + Opd(OpdSize opdsize, const RegID& reg, uint32 reg_assignable = 0xFFFFFFFF) : opdtype_(O_TYPE_REG), opdsize_(opdsize), reg_(reg), reg_assignable_(reg_assignable) {} + /// MEM + Opd(OpdSize opdsize, OpdSize addrsize, const RegID& base, const RegID& index, sint64 scale, sint64 disp) + : opdtype_(O_TYPE_MEM), opdsize_(opdsize), base_(base), index_(index), scale_(scale), disp_(disp), addrsize_(addrsize) {} + protected: + /// IMM + explicit Opd(OpdSize opdsize, sint64 imm) : opdtype_(O_TYPE_IMM), opdsize_(opdsize), imm_(imm) {} + + public: + bool IsNone() const {return (opdtype_ & O_TYPE_TYPE_MASK) == O_TYPE_NONE;} + bool IsReg() const {return (opdtype_ & O_TYPE_TYPE_MASK) == O_TYPE_REG;} + bool IsGpReg() const {return IsReg() && (reg_.type == R_TYPE_GP || reg_.type == R_TYPE_SYMBOLIC_GP);} + bool IsFpuReg() const {return IsReg() && reg_.type == R_TYPE_FPU;} + bool IsMmxReg() const {return IsReg() && (reg_.type == R_TYPE_MMX || reg_.type == R_TYPE_SYMBOLIC_MMX);} + bool IsXmmReg() const {return IsReg() && (reg_.type == R_TYPE_XMM || reg_.type == R_TYPE_SYMBOLIC_XMM);} + bool IsYmmReg() const {return IsReg() && (reg_.type == R_TYPE_YMM || reg_.type == R_TYPE_SYMBOLIC_YMM);} + bool IsMem() const {return (opdtype_ & O_TYPE_TYPE_MASK) == O_TYPE_MEM;} + bool IsImm() const {return (opdtype_ & O_TYPE_TYPE_MASK) == O_TYPE_IMM;} + bool IsDummy() const {return (opdtype_ & O_TYPE_DUMMY) != 0;} + + OpdSize GetSize() const {return opdsize_;} + OpdSize GetAddressSize() const {return addrsize_;} + RegID GetReg() const {JITASM_ASSERT(IsReg()); return reg_;} + RegID GetBase() const {JITASM_ASSERT(IsMem()); return base_;} + RegID GetIndex() const {JITASM_ASSERT(IsMem()); return index_;} + sint64 GetScale() const {JITASM_ASSERT(IsMem()); return scale_;} + sint64 GetDisp() const {JITASM_ASSERT(IsMem()); return disp_;} + sint64 GetImm() const {JITASM_ASSERT(IsImm()); return imm_;} + + bool operator==(const Opd& rhs) const + { + if ((opdtype_ & O_TYPE_TYPE_MASK) != (rhs.opdtype_ & O_TYPE_TYPE_MASK) || opdsize_ != opdsize_) {return false;} + if (IsReg()) {return reg_ == rhs.reg_ && reg_assignable_ == rhs.reg_assignable_;} + if (IsMem()) {return base_ == rhs.base_ && index_ == rhs.index_ && scale_ == rhs.scale_ && disp_ == rhs.disp_ && addrsize_ == rhs.addrsize_;} + if (IsImm()) {return imm_ == rhs.imm_;} + return true; + } + bool operator!=(const Opd& rhs) const {return !(*this == rhs);} + }; + + /// Add O_TYPE_DUMMY to the specified operand + inline Opd Dummy(const Opd& opd) + { + Opd o(opd); + o.opdtype_ = static_cast(static_cast(o.opdtype_) | O_TYPE_DUMMY); + return o; + } + + /// Add O_TYPE_DUMMY to the specified operand and constraint of register assignment + inline Opd Dummy(const Opd& opd, const Opd& constraint) + { + JITASM_ASSERT(opd.IsReg() && (opd.opdtype_ & O_TYPE_TYPE_MASK) == (constraint.opdtype_ & O_TYPE_TYPE_MASK) && !constraint.GetReg().IsSymbolic()); + Opd o(opd); + o.opdtype_ = static_cast(static_cast(o.opdtype_) | O_TYPE_DUMMY); + o.reg_assignable_ = (1 << constraint.reg_.id); + return o; + } + + /// Add O_TYPE_READ to the specified operand + inline Opd R(const Opd& opd) + { + Opd o(opd); + o.opdtype_ = static_cast(static_cast(o.opdtype_) | O_TYPE_READ); + return o; + } + + /// Add O_TYPE_WRITE to the specified operand + inline Opd W(const Opd& opd) + { + Opd o(opd); + o.opdtype_ = static_cast(static_cast(o.opdtype_) | O_TYPE_WRITE); + return o; + } + + /// Add O_TYPE_READ | O_TYPE_WRITE to the specified operand + inline Opd RW(const Opd& opd) + { + Opd o(opd); + o.opdtype_ = static_cast(static_cast(o.opdtype_) | O_TYPE_READ | O_TYPE_WRITE); + return o; + } + + template + struct OpdT : Opd + { + /// NONE + OpdT() : Opd() {} + /// REG + explicit OpdT(const RegID& reg, uint32 reg_assignable = 0xFFFFFFFF) : Opd(static_cast(Size), reg, reg_assignable) {} + /// MEM + OpdT(OpdSize addrsize, const RegID& base, const RegID& index, sint64 scale, sint64 disp) + : Opd(static_cast(Size), addrsize, base, index, scale, disp) {} + protected: + /// IMM + OpdT(sint64 imm) : Opd(static_cast(Size), imm) {} + }; + +} // namespace detail + +typedef detail::OpdT Opd8; +typedef detail::OpdT Opd16; +typedef detail::OpdT Opd32; +typedef detail::OpdT Opd64; +typedef detail::OpdT Opd80; +typedef detail::OpdT Opd128; +typedef detail::OpdT Opd224; // FPU environment +typedef detail::OpdT Opd256; +typedef detail::OpdT Opd864; // FPU state +typedef detail::OpdT Opd4096; // FPU, MMX, XMM, MXCSR state + +/// 8bit general purpose register +struct Reg8 : Opd8 { + Reg8() : Opd8(RegID::CreateSymbolicRegID(R_TYPE_SYMBOLIC_GP), 0xFFFFFF0F) {} + explicit Reg8(PhysicalRegID id) : Opd8(RegID::CreatePhysicalRegID(R_TYPE_GP, id)) {} +}; +/// 16bit general purpose register +struct Reg16 : Opd16 { + Reg16() : Opd16(RegID::CreateSymbolicRegID(R_TYPE_SYMBOLIC_GP)) {} + explicit Reg16(PhysicalRegID id) : Opd16(RegID::CreatePhysicalRegID(R_TYPE_GP, id)) {} +}; +/// 32bit general purpose register +struct Reg32 : Opd32 { + Reg32() : Opd32(RegID::CreateSymbolicRegID(R_TYPE_SYMBOLIC_GP)) {} + explicit Reg32(PhysicalRegID id) : Opd32(RegID::CreatePhysicalRegID(R_TYPE_GP, id)) {} +}; +#ifdef JITASM64 +/// 64bit general purpose register +struct Reg64 : Opd64 { + Reg64() : Opd64(RegID::CreateSymbolicRegID(R_TYPE_SYMBOLIC_GP)) {} + explicit Reg64(PhysicalRegID id) : Opd64(RegID::CreatePhysicalRegID(R_TYPE_GP, id)) {} +}; +typedef Reg64 Reg; +#else +typedef Reg32 Reg; +#endif +/// FPU register +struct FpuReg : Opd80 { + explicit FpuReg(PhysicalRegID id) : Opd80(RegID::CreatePhysicalRegID(R_TYPE_FPU, id)) {} +}; +/// MMX register +struct MmxReg : Opd64 { + MmxReg() : Opd64(RegID::CreateSymbolicRegID(R_TYPE_SYMBOLIC_MMX)) {} + explicit MmxReg(PhysicalRegID id) : Opd64(RegID::CreatePhysicalRegID(R_TYPE_MMX, id)) {} +}; +/// XMM register +struct XmmReg : Opd128 { + XmmReg() : Opd128(RegID::CreateSymbolicRegID(R_TYPE_SYMBOLIC_XMM)) {} + explicit XmmReg(PhysicalRegID id) : Opd128(RegID::CreatePhysicalRegID(R_TYPE_XMM, id)) {} +}; +/// YMM register +struct YmmReg : Opd256 { + YmmReg() : Opd256(RegID::CreateSymbolicRegID(R_TYPE_SYMBOLIC_YMM)) {} + explicit YmmReg(PhysicalRegID id) : Opd256(RegID::CreatePhysicalRegID(R_TYPE_YMM, id)) {} +}; + +struct Reg8_al : Reg8 {Reg8_al() : Reg8(AL) {}}; +struct Reg8_cl : Reg8 {Reg8_cl() : Reg8(CL) {}}; +struct Reg16_ax : Reg16 {Reg16_ax() : Reg16(AX) {}}; +struct Reg16_dx : Reg16 {Reg16_dx() : Reg16(DX) {}}; +struct Reg32_eax : Reg32 {Reg32_eax() : Reg32(EAX) {}}; +#ifdef JITASM64 +struct Reg64_rax : Reg64 {Reg64_rax() : Reg64(RAX) {}}; +#endif +struct FpuReg_st0 : FpuReg {FpuReg_st0() : FpuReg(ST0) {}}; + +template +struct MemT : OpdN +{ + MemT(OpdSize addrsize, const RegID& base, const RegID& index, sint64 scale, sint64 disp) : OpdN(addrsize, base, index, scale, disp) {} +}; +typedef MemT Mem8; +typedef MemT Mem16; +typedef MemT Mem32; +typedef MemT Mem64; +typedef MemT Mem80; +typedef MemT Mem128; +typedef MemT Mem224; // FPU environment +typedef MemT Mem256; +typedef MemT Mem864; // FPU state +typedef MemT Mem4096; // FPU, MMX, XMM, MXCSR state + +struct MemOffset64 +{ + sint64 offset_; + explicit MemOffset64(sint64 offset) : offset_(offset) {} + sint64 GetOffset() const {return offset_;} +}; + +template +struct ImmT : OpdN +{ + ImmT(U imm) : OpdN((S) imm) {} +}; +typedef ImmT Imm8; ///< 1 byte immediate +typedef ImmT Imm16; ///< 2 byte immediate +typedef ImmT Imm32; ///< 4 byte immediate +typedef ImmT Imm64; ///< 8 byte immediate + +namespace detail +{ + inline bool IsInt8(sint64 n) {return (sint8) n == n;} + inline bool IsInt16(sint64 n) {return (sint16) n == n;} + inline bool IsInt32(sint64 n) {return (sint32) n == n;} + inline Opd ImmXor8(const Imm16& imm) {return IsInt8(imm.GetImm()) ? (Opd) Imm8((sint8) imm.GetImm()) : (Opd) imm;} + inline Opd ImmXor8(const Imm32& imm) {return IsInt8(imm.GetImm()) ? (Opd) Imm8((sint8) imm.GetImm()) : (Opd) imm;} + inline Opd ImmXor8(const Imm64& imm) {return IsInt8(imm.GetImm()) ? (Opd) Imm8((sint8) imm.GetImm()) : (Opd) imm;} +} // namespace detail + + +/// 32bit address (base, displacement) +struct Addr32 +{ + RegID reg_; + sint64 disp_; + Addr32(const Reg32& obj) : reg_(obj.reg_), disp_(0) {} // implicit + Addr32(const RegID& reg, sint64 disp) : reg_(reg), disp_(disp) {} +}; +inline Addr32 operator+(const Reg32& lhs, sint64 rhs) {return Addr32(lhs.reg_, rhs);} +inline Addr32 operator+(sint64 lhs, const Reg32& rhs) {return rhs + lhs;} +inline Addr32 operator-(const Reg32& lhs, sint64 rhs) {return lhs + -rhs;} +inline Addr32 operator+(const Addr32& lhs, sint64 rhs) {return Addr32(lhs.reg_, lhs.disp_ + rhs);} +inline Addr32 operator+(sint64 lhs, const Addr32& rhs) {return rhs + lhs;} +inline Addr32 operator-(const Addr32& lhs, sint64 rhs) {return lhs + -rhs;} + +/// 32bit address (base, index, displacement) +struct Addr32BI +{ + RegID base_; + RegID index_; + sint64 disp_; + Addr32BI(const RegID& base, const RegID& index, sint64 disp) : base_(base), index_(index), disp_(disp) {} +}; +inline Addr32BI operator+(const Addr32& lhs, const Addr32& rhs) {return Addr32BI(rhs.reg_, lhs.reg_, lhs.disp_ + rhs.disp_);} +inline Addr32BI operator+(const Addr32BI& lhs, sint64 rhs) {return Addr32BI(lhs.base_, lhs.index_, lhs.disp_ + rhs);} +inline Addr32BI operator+(sint64 lhs, const Addr32BI& rhs) {return rhs + lhs;} +inline Addr32BI operator-(const Addr32BI& lhs, sint64 rhs) {return lhs + -rhs;} + +/// 32bit address (index, scale, displacement) +struct Addr32SI +{ + RegID index_; + sint64 scale_; + sint64 disp_; + Addr32SI(const RegID& index, sint64 scale, sint64 disp) : index_(index), scale_(scale), disp_(disp) {} +}; +inline Addr32SI operator*(const Reg32& lhs, sint64 rhs) {return Addr32SI(lhs.reg_, rhs, 0);} +inline Addr32SI operator*(sint64 lhs, const Reg32& rhs) {return rhs * lhs;} +inline Addr32SI operator*(const Addr32SI& lhs, sint64 rhs) {return Addr32SI(lhs.index_, lhs.scale_ * rhs, lhs.disp_);} +inline Addr32SI operator*(sint64 lhs, const Addr32SI& rhs) {return rhs * lhs;} +inline Addr32SI operator+(const Addr32SI& lhs, sint64 rhs) {return Addr32SI(lhs.index_, lhs.scale_, lhs.disp_ + rhs);} +inline Addr32SI operator+(sint64 lhs, const Addr32SI& rhs) {return rhs + lhs;} +inline Addr32SI operator-(const Addr32SI& lhs, sint64 rhs) {return lhs + -rhs;} + +/// 32bit address (base, index, scale, displacement) +struct Addr32SIB +{ + RegID base_; + RegID index_; + sint64 scale_; + sint64 disp_; + Addr32SIB(const RegID& base, const RegID& index, sint64 scale, sint64 disp) : base_(base), index_(index), scale_(scale), disp_(disp) {} +}; +inline Addr32SIB operator+(const Addr32& lhs, const Addr32SI& rhs) {return Addr32SIB(lhs.reg_, rhs.index_, rhs.scale_, lhs.disp_ + rhs.disp_);} +inline Addr32SIB operator+(const Addr32SI& lhs, const Addr32& rhs) {return rhs + lhs;} +inline Addr32SIB operator+(const Addr32SIB& lhs, sint64 rhs) {return Addr32SIB(lhs.base_, lhs.index_, lhs.scale_, lhs.disp_ + rhs);} +inline Addr32SIB operator+(sint64 lhs, const Addr32SIB& rhs) {return rhs + lhs;} +inline Addr32SIB operator-(const Addr32SIB& lhs, sint64 rhs) {return lhs + -rhs;} + +#ifdef JITASM64 +/// 64bit address (base, displacement) +struct Addr64 +{ + RegID reg_; + sint64 disp_; + Addr64(const Reg64& obj) : reg_(obj.reg_), disp_(0) {} // implicit + Addr64(const RegID& reg, sint64 disp) : reg_(reg), disp_(disp) {} +}; +inline Addr64 operator+(const Reg64& lhs, sint64 rhs) {return Addr64(lhs.reg_, rhs);} +inline Addr64 operator+(sint64 lhs, const Reg64& rhs) {return rhs + lhs;} +inline Addr64 operator-(const Reg64& lhs, sint64 rhs) {return lhs + -rhs;} +inline Addr64 operator+(const Addr64& lhs, sint64 rhs) {return Addr64(lhs.reg_, lhs.disp_ + rhs);} +inline Addr64 operator+(sint64 lhs, const Addr64& rhs) {return rhs + lhs;} +inline Addr64 operator-(const Addr64& lhs, sint64 rhs) {return lhs + -rhs;} + +/// 64bit address (base, index, displacement) +struct Addr64BI +{ + RegID base_; + RegID index_; + sint64 disp_; + Addr64BI(const RegID& base, const RegID& index, sint64 disp) : base_(base), index_(index), disp_(disp) {} +}; +inline Addr64BI operator+(const Addr64& lhs, const Addr64& rhs) {return Addr64BI(rhs.reg_, lhs.reg_, lhs.disp_ + rhs.disp_);} +inline Addr64BI operator+(const Addr64BI& lhs, sint64 rhs) {return Addr64BI(lhs.base_, lhs.index_, lhs.disp_ + rhs);} +inline Addr64BI operator+(sint64 lhs, const Addr64BI& rhs) {return rhs + lhs;} +inline Addr64BI operator-(const Addr64BI& lhs, sint64 rhs) {return lhs + -rhs;} + +/// 64bit address (index, scale, displacement) +struct Addr64SI +{ + RegID index_; + sint64 scale_; + sint64 disp_; + Addr64SI(const RegID& index, sint64 scale, sint64 disp) : index_(index), scale_(scale), disp_(disp) {} +}; +inline Addr64SI operator*(const Reg64& lhs, sint64 rhs) {return Addr64SI(lhs.reg_, rhs, 0);} +inline Addr64SI operator*(sint64 lhs, const Reg64& rhs) {return rhs * lhs;} +inline Addr64SI operator*(const Addr64SI& lhs, sint64 rhs) {return Addr64SI(lhs.index_, lhs.scale_ * rhs, lhs.disp_);} +inline Addr64SI operator*(sint64 lhs, const Addr64SI& rhs) {return rhs * lhs;} +inline Addr64SI operator+(const Addr64SI& lhs, sint64 rhs) {return Addr64SI(lhs.index_, lhs.scale_, lhs.disp_ + rhs);} +inline Addr64SI operator+(sint64 lhs, const Addr64SI& rhs) {return rhs + lhs;} +inline Addr64SI operator-(const Addr64SI& lhs, sint64 rhs) {return lhs + -rhs;} + +/// 64bit address (base, index, scale, displacement) +struct Addr64SIB +{ + RegID base_; + RegID index_; + sint64 scale_; + sint64 disp_; + Addr64SIB(const RegID& base, const RegID& index, sint64 scale, sint64 disp) : base_(base), index_(index), scale_(scale), disp_(disp) {} +}; +inline Addr64SIB operator+(const Addr64& lhs, const Addr64SI& rhs) {return Addr64SIB(lhs.reg_, rhs.index_, rhs.scale_, lhs.disp_ + rhs.disp_);} +inline Addr64SIB operator+(const Addr64SI& lhs, const Addr64& rhs) {return rhs + lhs;} +inline Addr64SIB operator+(const Addr64SIB& lhs, sint64 rhs) {return Addr64SIB(lhs.base_, lhs.index_, lhs.scale_, lhs.disp_ + rhs);} +inline Addr64SIB operator+(sint64 lhs, const Addr64SIB& rhs) {return rhs + lhs;} +inline Addr64SIB operator-(const Addr64SIB& lhs, sint64 rhs) {return lhs + -rhs;} + +typedef Addr64 Addr; +typedef Addr64BI AddrBI; +typedef Addr64SI AddrSI; +typedef Addr64SIB AddrSIB; +#else +typedef Addr32 Addr; +typedef Addr32BI AddrBI; +typedef Addr32SI AddrSI; +typedef Addr32SIB AddrSIB; +#endif + +template +struct AddressingPtr +{ + // 32bit-Addressing + MemT operator[](const Addr32& obj) {return MemT(O_SIZE_32, obj.reg_, RegID::Invalid(), 0, obj.disp_);} + MemT operator[](const Addr32BI& obj) {return MemT(O_SIZE_32, obj.base_, obj.index_, 0, obj.disp_);} + MemT operator[](const Addr32SI& obj) {return MemT(O_SIZE_32, RegID::Invalid(), obj.index_, obj.scale_, obj.disp_);} + MemT operator[](const Addr32SIB& obj) {return MemT(O_SIZE_32, obj.base_, obj.index_, obj.scale_, obj.disp_);} +#ifdef JITASM64 + MemT operator[](sint32 disp) {return MemT(O_SIZE_64, RegID::Invalid(), RegID::Invalid(), 0, disp);} + MemT operator[](uint32 disp) {return MemT(O_SIZE_64, RegID::Invalid(), RegID::Invalid(), 0, (sint32) disp);} +#else + MemT operator[](sint32 disp) {return MemT(O_SIZE_32, RegID::Invalid(), RegID::Invalid(), 0, disp);} + MemT operator[](uint32 disp) {return MemT(O_SIZE_32, RegID::Invalid(), RegID::Invalid(), 0, (sint32) disp);} +#endif + +#ifdef JITASM64 + // 64bit-Addressing + MemT operator[](const Addr64& obj) {return MemT(O_SIZE_64, obj.reg_, RegID::Invalid(), 0, obj.disp_);} + MemT operator[](const Addr64BI& obj) {return MemT(O_SIZE_64, obj.base_, obj.index_, 0, obj.disp_);} + MemT operator[](const Addr64SI& obj) {return MemT(O_SIZE_64, RegID::Invalid(), obj.index_, obj.scale_, obj.disp_);} + MemT operator[](const Addr64SIB& obj) {return MemT(O_SIZE_64, obj.base_, obj.index_, obj.scale_, obj.disp_);} + MemOffset64 operator[](sint64 offset) {return MemOffset64(offset);} + MemOffset64 operator[](uint64 offset) {return MemOffset64((sint64) offset);} +#endif +}; + +/// Instruction ID +enum InstrID +{ + I_ADC, I_ADD, I_AND, + I_BSF, I_BSR, I_BSWAP, I_BT, I_BTC, I_BTR, I_BTS, + I_CALL, I_CBW, I_CLC, I_CLD, I_CLI, I_CLTS, I_CMC, I_CMOVCC, I_CMP, I_CMPS_B, I_CMPS_W, I_CMPS_D, I_CMPS_Q, I_CMPXCHG, + I_CMPXCHG8B, I_CMPXCHG16B, I_CPUID, I_CWD, I_CDQ, I_CQO, + I_DEC, I_DIV, + I_ENTER, + I_HLT, + I_IDIV, I_IMUL, I_IN, I_INC, I_INS_B, I_INS_W, I_INS_D, I_INVD, I_INVLPG, I_INT3, I_INTN, I_INTO, I_IRET, I_IRETD, I_IRETQ, + I_JMP, I_JCC, + I_LAR, I_LEA, I_LEAVE, I_LLDT, I_LMSW, I_LSL, I_LTR, I_LODS_B, I_LODS_W, I_LODS_D, I_LODS_Q, I_LOOP, + I_MOV, I_MOVBE, I_MOVS_B, I_MOVS_W, I_MOVS_D, I_MOVS_Q, I_MOVZX, I_MOVSX, I_MOVSXD, I_MUL, + I_NEG, I_NOP, I_NOT, + I_OR, I_OUT, I_OUTS_B, I_OUTS_W, I_OUTS_D, + I_POP, I_POPAD, I_POPF, I_POPFD, I_POPFQ, I_PUSH, I_PUSHAD, I_PUSHF, I_PUSHFD, I_PUSHFQ, + I_RDMSR, I_RDPMC, I_RDTSC, I_RET, I_RCL, I_RCR, I_ROL, I_ROR, I_RSM, + I_SAR, I_SHL, I_SHR, I_SBB, I_SCAS_B, I_SCAS_W, I_SCAS_D, I_SCAS_Q, I_SETCC, I_SHLD, I_SHRD, I_SGDT, I_SIDT, I_SLDT, I_SMSW, I_STC, I_STD, I_STI, + I_STOS_B, I_STOS_W, I_STOS_D, I_STOS_Q, I_SUB, I_SWAPGS, I_SYSCALL, I_SYSENTER, I_SYSEXIT, I_SYSRET, + I_TEST, + I_UD2, + I_VERR, I_VERW, + I_WAIT, I_WBINVD, I_WRMSR, + I_XADD, I_XCHG, I_XGETBV, I_XLATB, I_XOR, + + I_F2XM1, I_FABS, I_FADD, I_FADDP, I_FIADD, + I_FBLD, I_FBSTP, I_FCHS, I_FCLEX, I_FNCLEX, I_FCMOVCC, I_FCOM, I_FCOMP, I_FCOMPP, I_FCOMI, I_FCOMIP, I_FCOS, + I_FDECSTP, I_FDIV, I_FDIVP, I_FIDIV, I_FDIVR, I_FDIVRP, I_FIDIVR, + I_FFREE, + I_FICOM, I_FICOMP, I_FILD, I_FINCSTP, I_FINIT, I_FNINIT, I_FIST, I_FISTP, + I_FLD, I_FLD1, I_FLDCW, I_FLDENV, I_FLDL2E, I_FLDL2T, I_FLDLG2, I_FLDLN2, I_FLDPI, I_FLDZ, + I_FMUL, I_FMULP, I_FIMUL, + I_FNOP, + I_FPATAN, I_FPREM, I_FPREM1, I_FPTAN, + I_FRNDINT, I_FRSTOR, + I_FSAVE, I_FNSAVE, I_FSCALE, I_FSIN, I_FSINCOS, I_FSQRT, I_FST, I_FSTP, I_FSTCW, I_FNSTCW, I_FSTENV, I_FNSTENV, I_FSTSW, I_FNSTSW, + I_FSUB, I_FSUBP, I_FISUB, I_FSUBR, I_FSUBRP, I_FISUBR, + I_FTST, + I_FUCOM, I_FUCOMP, I_FUCOMPP, I_FUCOMI, I_FUCOMIP, + I_FXAM, I_FXCH, I_FXRSTOR, I_FXSAVE, I_FXTRACT, + I_FYL2X, I_FYL2XP1, + + I_ADDPS, I_ADDSS, I_ADDPD, I_ADDSD, I_ADDSUBPS, I_ADDSUBPD, I_ANDPS, I_ANDPD, I_ANDNPS, I_ANDNPD, + I_BLENDPS, I_BLENDPD, I_BLENDVPS, I_BLENDVPD, + I_CLFLUSH, I_CMPPS, I_CMPSS, I_CMPPD, I_CMPSD, I_COMISS, I_COMISD, I_CRC32, + I_CVTDQ2PD, I_CVTDQ2PS, I_CVTPD2DQ, I_CVTPD2PI, I_CVTPD2PS, I_CVTPI2PD, I_CVTPI2PS, I_CVTPS2DQ, I_CVTPS2PD, I_CVTPS2PI, I_CVTSD2SI, + I_CVTSD2SS, I_CVTSI2SD, I_CVTSI2SS, I_CVTSS2SD, I_CVTSS2SI, I_CVTTPD2DQ, I_CVTTPD2PI, I_CVTTPS2DQ, I_CVTTPS2PI, I_CVTTSD2SI, I_CVTTSS2SI, + I_DIVPS, I_DIVSS, I_DIVPD, I_DIVSD, I_DPPS, I_DPPD, + I_EMMS, I_EXTRACTPS, + I_FISTTP, + I_HADDPS, I_HADDPD, I_HSUBPS, I_HSUBPD, + I_INSERTPS, + I_LDDQU, I_LDMXCSR, I_LFENCE, + I_MASKMOVDQU, I_MASKMOVQ, I_MAXPS, I_MAXSS, I_MAXPD, I_MAXSD, I_MFENCE, I_MINPS, I_MINSS, I_MINPD, I_MINSD, I_MONITOR, + I_MOVAPD, I_MOVAPS, I_MOVD, I_MOVDDUP, I_MOVDQA, I_MOVDQU, I_MOVDQ2Q, I_MOVHLPS, I_MOVLHPS, I_MOVHPS, I_MOVHPD, I_MOVLPS, I_MOVLPD, + I_MOVMSKPS, I_MOVMSKPD, I_MOVNTDQ, I_MOVNTDQA, I_MOVNTI, I_MOVNTPD, I_MOVNTPS, I_MOVNTQ, I_MOVQ, I_MOVQ2DQ, I_MOVSD, I_MOVSS, + I_MOVSHDUP, I_MOVSLDUP, I_MOVUPS, I_MOVUPD, I_MPSADBW, I_MULPS, I_MULSS, I_MULPD, I_MULSD, I_MWAIT, + I_ORPS, I_ORPD, + I_PABSB, I_PABSD, I_PABSW, I_PACKSSDW, I_PACKSSWB, I_PACKUSDW, I_PACKUSWB, I_PADDB, I_PADDD, I_PADDQ, I_PADDSB, I_PADDSW, I_PADDUSB, + I_PADDUSW, I_PADDW, I_PALIGNR, I_PAND, I_PANDN, I_PAUSE, I_PAVGB, I_PAVGW, + I_PBLENDVB, I_PBLENDW, + I_PCMPEQB, I_PCMPEQW, I_PCMPEQD, I_PCMPEQQ, I_PCMPESTRI, I_PCMPESTRM, I_PCMPISTRI, I_PCMPISTRM, I_PCMPGTB, I_PCMPGTW, I_PCMPGTD, I_PCMPGTQ, + I_PEXTRB, I_PEXTRW, I_PEXTRD, I_PEXTRQ, + I_PHADDW, I_PHADDD, I_PHADDSW, I_PHMINPOSUW, I_PHSUBW, I_PHSUBD, I_PHSUBSW, + I_PINSRB, I_PINSRW, I_PINSRD, I_PINSRQ, + I_PMADDUBSW, I_PMADDWD, I_PMAXSB, I_PMAXSW, I_PMAXSD, I_PMAXUB, I_PMAXUW, I_PMAXUD, I_PMINSB, I_PMINSW, I_PMINSD, I_PMINUB, I_PMINUW, + I_PMINUD, I_PMOVMSKB, I_PMOVSXBW, I_PMOVSXBD, I_PMOVSXBQ, I_PMOVSXWD, I_PMOVSXWQ, I_PMOVSXDQ, I_PMOVZXBW, I_PMOVZXBD, I_PMOVZXBQ, I_PMOVZXWD, + I_PMOVZXWQ, I_PMOVZXDQ, I_PMULDQ, I_PMULHRSW, I_PMULHUW, I_PMULHW, I_PMULLW, I_PMULLD, I_PMULUDQ, + I_POPCNT, I_POR, + I_PREFETCH, + I_PSADBW, I_PSHUFB, I_PSHUFD, I_PSHUFHW, I_PSHUFLW, I_PSHUFW, I_PSIGNB, I_PSIGNW, I_PSIGND, I_PSLLW, I_PSLLD, I_PSLLQ, I_PSLLDQ, I_PSRAW, + I_PSRAD, I_PSRLW, I_PSRLD, I_PSRLQ, I_PSRLDQ, I_PSUBB, I_PSUBW, I_PSUBD, I_PSUBQ, I_PSUBSB, I_PSUBSW, I_PSUBUSB, I_PSUBUSW, + I_PTEST, + I_PUNPCKHBW, I_PUNPCKHWD, I_PUNPCKHDQ, I_PUNPCKHQDQ, I_PUNPCKLBW, I_PUNPCKLWD, I_PUNPCKLDQ, I_PUNPCKLQDQ, + I_PXOR, + I_RCPPS, I_RCPSS, I_ROUNDPS, I_ROUNDPD, I_ROUNDSS, I_ROUNDSD, I_RSQRTPS, I_RSQRTSS, + I_SFENCE, I_SHUFPS, I_SHUFPD, I_SQRTPS, I_SQRTSS, I_SQRTPD, I_SQRTSD, I_STMXCSR, I_SUBPS, I_SUBSS, I_SUBPD, I_SUBSD, + I_UCOMISS, I_UCOMISD, I_UNPCKHPS, I_UNPCKHPD, I_UNPCKLPS, I_UNPCKLPD, + I_XORPS, I_XORPD, + + I_VBROADCASTSS, I_VBROADCASTSD, I_VBROADCASTF128, + I_VEXTRACTF128, + I_VINSERTF128, + I_VMASKMOVPS, I_VMASKMOVPD, + I_VPERMILPD, I_VPERMILPS, I_VPERM2F128, + I_VTESTPS, I_VTESTPD, + I_VZEROALL, I_VZEROUPPER, + + I_AESENC, I_AESENCLAST, I_AESDEC, I_AESDECLAST, I_AESIMC, I_AESKEYGENASSIST, + I_PCLMULQDQ, + + // FMA + I_VFMADD132PD, I_VFMADD213PD, I_VFMADD231PD, I_VFMADD132PS, I_VFMADD213PS, I_VFMADD231PS, + I_VFMADD132SD, I_VFMADD213SD, I_VFMADD231SD, I_VFMADD132SS, I_VFMADD213SS, I_VFMADD231SS, + I_VFMADDSUB132PD, I_VFMADDSUB213PD, I_VFMADDSUB231PD, I_VFMADDSUB132PS, I_VFMADDSUB213PS, I_VFMADDSUB231PS, + I_VFMSUBADD132PD, I_VFMSUBADD213PD, I_VFMSUBADD231PD, I_VFMSUBADD132PS, I_VFMSUBADD213PS, I_VFMSUBADD231PS, + I_VFMSUB132PD, I_VFMSUB213PD, I_VFMSUB231PD, I_VFMSUB132PS, I_VFMSUB213PS, I_VFMSUB231PS, + I_VFMSUB132SD, I_VFMSUB213SD, I_VFMSUB231SD, I_VFMSUB132SS, I_VFMSUB213SS, I_VFMSUB231SS, + I_VFNMADD132PD, I_VFNMADD213PD, I_VFNMADD231PD, I_VFNMADD132PS, I_VFNMADD213PS, I_VFNMADD231PS, + I_VFNMADD132SD, I_VFNMADD213SD, I_VFNMADD231SD, I_VFNMADD132SS, I_VFNMADD213SS, I_VFNMADD231SS, + I_VFNMSUB132PD, I_VFNMSUB213PD, I_VFNMSUB231PD, I_VFNMSUB132PS, I_VFNMSUB213PS, I_VFNMSUB231PS, + I_VFNMSUB132SD, I_VFNMSUB213SD, I_VFNMSUB231SD, I_VFNMSUB132SS, I_VFNMSUB213SS, I_VFNMSUB231SS, + + // F16C + I_RDFSBASE, I_RDGSBASE, I_RDRAND, I_WRFSBASE, I_WRGSBASE, I_VCVTPH2PS, I_VCVTPS2PH, + + // XOP + I_VFRCZPD, I_VFRCZPS, I_VFRCZSD, I_VFRCZSS, + I_VPCMOV, I_VPCOMB, I_VPCOMD, I_VPCOMQ, I_VPCOMUB, I_VPCOMUD, I_VPCOMUQ, I_VPCOMUW, I_VPCOMW, I_VPERMIL2PD, I_VPERMIL2PS, + I_VPHADDBD, I_VPHADDBQ, I_VPHADDBW, I_VPHADDDQ, I_VPHADDUBD, I_VPHADDUBQ, I_VPHADDUBW, I_VPHADDUDQ, I_VPHADDUWD, I_VPHADDUWQ, + I_VPHADDWD, I_VPHADDWQ, I_VPHSUBBW, I_VPHSUBDQ, I_VPHSUBWD, + I_VPMACSDD, I_VPMACSDQH, I_VPMACSDQL, I_VPMACSSDD, I_VPMACSSDQH, I_VPMACSSDQL, I_VPMACSSWD, I_VPMACSSWW, I_VPMACSWD, I_VPMACSWW, + I_VPMADCSSWD, I_VPMADCSWD, + I_VPPERM, I_VPROTB, I_VPROTD, I_VPROTQ, I_VPROTW, I_VPSHAB, I_VPSHAD, I_VPSHAQ, I_VPSHAW, I_VPSHLB, I_VPSHLD, I_VPSHLQ, I_VPSHLW, + + // FMA4 + I_VFMADDPD, I_VFMADDPS, I_VFMADDSD, I_VFMADDSS, + I_VFMADDSUBPD, I_VFMADDSUBPS, + I_VFMSUBADDPD, I_VFMSUBADDPS, + I_VFMSUBPD, I_VFMSUBPS, I_VFMSUBSD, I_VFMSUBSS, + I_VFNMADDPD, I_VFNMADDPS, I_VFNMADDSD, I_VFNMADDSS, + I_VFNMSUBPD, I_VFNMSUBPS, I_VFNMSUBSD, I_VFNMSUBSS, + + // jitasm compiler instructions + I_COMPILER_DECLARE_REG_ARG, ///< Declare register argument + I_COMPILER_DECLARE_STACK_ARG, ///< Declare stack argument + I_COMPILER_DECLARE_RESULT_REG, ///< Declare result register (eax/rax/xmm0) + I_COMPILER_PROLOG, ///< Function prolog + I_COMPILER_EPILOG ///< Function epilog +}; + +enum JumpCondition +{ + JCC_O, JCC_NO, JCC_B, JCC_AE, JCC_E, JCC_NE, JCC_BE, JCC_A, JCC_S, JCC_NS, JCC_P, JCC_NP, JCC_L, JCC_GE, JCC_LE, JCC_G, + JCC_CXZ, JCC_ECXZ, JCC_RCXZ, +}; + +enum EncodingFlags +{ + E_SPECIAL = 1 << 0, + E_OPERAND_SIZE_PREFIX = 1 << 1, ///< Operand-size override prefix + E_REP_PREFIX = 1 << 2, ///< REP prefix + E_REXW_PREFIX = 1 << 3, ///< REX.W + E_MANDATORY_PREFIX_66 = 1 << 4, ///< Mandatory prefix 66 + E_MANDATORY_PREFIX_F2 = 1 << 5, ///< Mandatory prefix F2 + E_MANDATORY_PREFIX_F3 = 1 << 6, ///< Mandatory prefix F3 + E_VEX = 1 << 7, + E_XOP = 1 << 8, + E_VEX_L = 1 << 9, + E_VEX_W = 1 << 10, + E_VEX_MMMMM_SHIFT = 11, + E_VEX_MMMMM_MASK = 0x1F << E_VEX_MMMMM_SHIFT, + E_VEX_0F = 1 << E_VEX_MMMMM_SHIFT, + E_VEX_0F38 = 2 << E_VEX_MMMMM_SHIFT, + E_VEX_0F3A = 3 << E_VEX_MMMMM_SHIFT, + E_XOP_M00011 = 3 << E_VEX_MMMMM_SHIFT, + E_XOP_M01000 = 8 << E_VEX_MMMMM_SHIFT, + E_XOP_M01001 = 9 << E_VEX_MMMMM_SHIFT, + E_VEX_PP_SHIFT = 16, + E_VEX_PP_MASK = 0x3 << E_VEX_PP_SHIFT, + E_VEX_66 = 1 << E_VEX_PP_SHIFT, + E_VEX_F3 = 2 << E_VEX_PP_SHIFT, + E_VEX_F2 = 3 << E_VEX_PP_SHIFT, + E_XOP_P00 = 0 << E_VEX_PP_SHIFT, + E_XOP_P01 = 1 << E_VEX_PP_SHIFT, + + E_VEX_128 = E_VEX, + E_VEX_256 = E_VEX | E_VEX_L, + E_VEX_LIG = E_VEX, + E_VEX_LZ = E_VEX, + E_VEX_66_0F = E_VEX_66 | E_VEX_0F, + E_VEX_66_0F38 = E_VEX_66 | E_VEX_0F38, + E_VEX_66_0F3A = E_VEX_66 | E_VEX_0F3A, + E_VEX_F2_0F = E_VEX_F2 | E_VEX_0F, + E_VEX_F2_0F38 = E_VEX_F2 | E_VEX_0F38, + E_VEX_F2_0F3A = E_VEX_F2 | E_VEX_0F3A, + E_VEX_F3_0F = E_VEX_F3 | E_VEX_0F, + E_VEX_F3_0F38 = E_VEX_F3 | E_VEX_0F38, + E_VEX_F3_0F3A = E_VEX_F3 | E_VEX_0F3A, + E_VEX_W0 = 0, + E_VEX_W1 = E_VEX_W, + E_VEX_WIG = 0, + E_XOP_128 = E_XOP, + E_XOP_256 = E_XOP | E_VEX_L, + E_XOP_W0 = 0, + E_XOP_W1 = E_VEX_W, + + // Aliases + E_VEX_128_0F_WIG = E_VEX_128 | E_VEX_0F | E_VEX_WIG, + E_VEX_256_0F_WIG = E_VEX_256 | E_VEX_0F | E_VEX_WIG, + E_VEX_128_66_0F_WIG = E_VEX_128 | E_VEX_66_0F | E_VEX_WIG, + E_VEX_256_66_0F_WIG = E_VEX_256 | E_VEX_66_0F | E_VEX_WIG, + E_VEX_128_66_0F38_WIG = E_VEX_128 | E_VEX_66_0F38 | E_VEX_WIG, + E_VEX_256_66_0F38_WIG = E_VEX_256 | E_VEX_66_0F38 | E_VEX_WIG, + E_VEX_128_66_0F38_W0 = E_VEX_128 | E_VEX_66_0F38 | E_VEX_W0, + E_VEX_256_66_0F38_W0 = E_VEX_256 | E_VEX_66_0F38 | E_VEX_W0, + E_VEX_128_66_0F38_W1 = E_VEX_128 | E_VEX_66_0F38 | E_VEX_W1, + E_VEX_256_66_0F38_W1 = E_VEX_256 | E_VEX_66_0F38 | E_VEX_W1, + E_VEX_128_66_0F3A_W0 = E_VEX_128 | E_VEX_66_0F3A | E_VEX_W0, + E_VEX_256_66_0F3A_W0 = E_VEX_256 | E_VEX_66_0F3A | E_VEX_W0, +}; + +/// Instruction +struct Instr +{ + static const size_t MAX_OPERAND_COUNT = 6; + + InstrID id_; ///< Instruction ID + uint32 opcode_; ///< Opcode + uint32 encoding_flag_; ///< EncodingFlags + detail::Opd opd_[MAX_OPERAND_COUNT]; ///< Operands + + Instr(InstrID id, uint32 opcode, uint32 encoding_flag, const detail::Opd& opd1 = detail::Opd(), const detail::Opd& opd2 = detail::Opd(), const detail::Opd& opd3 = detail::Opd(), const detail::Opd& opd4 = detail::Opd(), const detail::Opd& opd5 = detail::Opd(), const detail::Opd& opd6 = detail::Opd()) + : id_(id), opcode_(opcode), encoding_flag_(encoding_flag) {opd_[0] = opd1, opd_[1] = opd2, opd_[2] = opd3, opd_[3] = opd4, opd_[4] = opd5, opd_[5] = opd6;} + + InstrID GetID() const {return id_;} + const detail::Opd& GetOpd(size_t index) const {return opd_[index];} + detail::Opd& GetOpd(size_t index) {return opd_[index];} +}; + +/// Assembler backend +struct Backend +{ + uint8* pbuff_; + size_t buffsize_; + size_t size_; + + Backend(void* pbuff = NULL, size_t buffsize = 0) : pbuff_((uint8*) pbuff), buffsize_(buffsize), size_(0) + { + memset(pbuff, 0xCC, buffsize); // INT3 + } + + size_t GetSize() const + { + return size_; + } + + void put_bytes(void* p, size_t n) + { + uint8* pb = (uint8*) p; + while (n--) { + if (pbuff_) { + if (size_ == buffsize_) JITASM_ASSERT(0); + pbuff_[size_] = *pb++; + } + size_++; + } + } + void db(uint64 b) {put_bytes(&b, 1);} + void dw(uint64 w) {put_bytes(&w, 2);} + void dd(uint64 d) {put_bytes(&d, 4);} + void dq(uint64 q) {put_bytes(&q, 8);} + + uint8 GetWRXB(int w, const detail::Opd& reg, const detail::Opd& r_m) + { + uint8 wrxb = w ? 8 : 0; + if (reg.IsReg()) { + if (!reg.GetReg().IsInvalid() && reg.GetReg().id >= R8) wrxb |= 4; + } + if (r_m.IsReg()) { + if (r_m.GetReg().id >= R8) wrxb |= 1; + } + if (r_m.IsMem()) { + if (!r_m.GetIndex().IsInvalid() && r_m.GetIndex().id >= R8) wrxb |= 2; + if (!r_m.GetBase().IsInvalid() && r_m.GetBase().id >= R8) wrxb |= 1; + } + return wrxb; + } + + void EncodePrefixes(uint32 flag, const detail::Opd& reg, const detail::Opd& r_m, const detail::Opd& vex) + { + if (flag & (E_VEX | E_XOP)) { + // Encode VEX prefix +#ifdef JITASM64 + if (r_m.IsMem() && r_m.GetAddressSize() != O_SIZE_64) db(0x67); +#endif + uint8 vvvv = vex.IsReg() ? 0xF - (uint8) vex.GetReg().id : 0xF; + uint8 mmmmm = (flag & E_VEX_MMMMM_MASK) >> E_VEX_MMMMM_SHIFT; + uint8 pp = static_cast((flag & E_VEX_PP_MASK) >> E_VEX_PP_SHIFT); + uint8 wrxb = GetWRXB(flag & E_VEX_W, reg, r_m); + if (flag & E_XOP) { + db(0x8F); + db((~wrxb & 7) << 5 | mmmmm); + db((wrxb & 8) << 4 | vvvv << 3 | (flag & E_VEX_L ? 4 : 0) | pp); + } else if (wrxb & 0xB || (flag & E_VEX_MMMMM_MASK) == E_VEX_0F38 || (flag & E_VEX_MMMMM_MASK) == E_VEX_0F3A) { + db(0xC4); + db((~wrxb & 7) << 5 | mmmmm); + db((wrxb & 8) << 4 | vvvv << 3 | (flag & E_VEX_L ? 4 : 0) | pp); + } else { + db(0xC5); + db((~wrxb & 4) << 5 | vvvv << 3 | (flag & E_VEX_L ? 4 : 0) | pp); + } + } else { + uint8 wrxb = GetWRXB(flag & E_REXW_PREFIX, reg, r_m); + if (wrxb) { + // Encode REX prefix + JITASM_ASSERT(!reg.IsReg() || reg.GetSize() != O_SIZE_8 || reg.GetReg().id < AH || reg.GetReg().id >= R8B); // AH, BH, CH, or DH may not be used with REX. + JITASM_ASSERT(!r_m.IsReg() || r_m.GetSize() != O_SIZE_8 || r_m.GetReg().id < AH || r_m.GetReg().id >= R8B); // AH, BH, CH, or DH may not be used with REX. + + if (flag & E_REP_PREFIX) db(0xF3); +#ifdef JITASM64 + if (r_m.IsMem() && r_m.GetAddressSize() != O_SIZE_64) db(0x67); +#endif + if (flag & E_OPERAND_SIZE_PREFIX) db(0x66); + + if (flag & E_MANDATORY_PREFIX_66) db(0x66); + else if (flag & E_MANDATORY_PREFIX_F2) db(0xF2); + else if (flag & E_MANDATORY_PREFIX_F3) db(0xF3); + + db(0x40 | wrxb); + } else { + if (flag & E_MANDATORY_PREFIX_66) db(0x66); + else if (flag & E_MANDATORY_PREFIX_F2) db(0xF2); + else if (flag & E_MANDATORY_PREFIX_F3) db(0xF3); + + if (flag & E_REP_PREFIX) db(0xF3); +#ifdef JITASM64 + if (r_m.IsMem() && r_m.GetAddressSize() != O_SIZE_64) db(0x67); +#endif + if (flag & E_OPERAND_SIZE_PREFIX) db(0x66); + } + } + } + + void EncodeModRM(uint8 reg, const detail::Opd& r_m) + { + reg &= 0x7; + + if (r_m.IsReg()) { + db(0xC0 | (reg << 3) | (r_m.GetReg().id & 0x7)); + } else if (r_m.IsMem()) { + JITASM_ASSERT(r_m.GetBase().type == R_TYPE_GP && r_m.GetIndex().type == R_TYPE_GP); + int base = r_m.GetBase().id; if (base != INVALID) base &= 0x7; + int index = r_m.GetIndex().id; if (index != INVALID) index &= 0x7; + + if (base == INVALID && index == INVALID) { +#ifdef JITASM64 + db(reg << 3 | 4); + db(0x25); +#else + db(reg << 3 | 5); +#endif + dd(r_m.GetDisp()); + } else { + JITASM_ASSERT(base != ESP || index != ESP); + JITASM_ASSERT(index != ESP || r_m.GetScale() == 0); + + if (index == ESP) { + index = base; + base = ESP; + } + bool sib = index != INVALID || r_m.GetScale() || base == ESP; + + // ModR/M + uint8 mod = 0; + if (r_m.GetDisp() == 0 || (sib && base == INVALID)) mod = base != EBP ? 0 : 1; + else if (detail::IsInt8(r_m.GetDisp())) mod = 1; + else if (detail::IsInt32(r_m.GetDisp())) mod = 2; + else JITASM_ASSERT(0); + db(mod << 6 | reg << 3 | (sib ? 4 : base)); + + // SIB + if (sib) { + uint8 ss = 0; + if (r_m.GetScale() == 0) ss = 0; + else if (r_m.GetScale() == 2) ss = 1; + else if (r_m.GetScale() == 4) ss = 2; + else if (r_m.GetScale() == 8) ss = 3; + else JITASM_ASSERT(0); + if (index != INVALID && base != INVALID) { + db(ss << 6 | index << 3 | base); + } else if (base != INVALID) { + db(ss << 6 | 4 << 3 | base); + } else if (index != INVALID) { + db(ss << 6 | index << 3 | 5); + } else { + JITASM_ASSERT(0); + } + } + + // Displacement + if (mod == 0 && sib && base == INVALID) dd(r_m.GetDisp()); + if (mod == 1) db(r_m.GetDisp()); + if (mod == 2) dd(r_m.GetDisp()); + } + } else { + JITASM_ASSERT(0); + } + } + + void EncodeOpcode(uint32 opcode) + { + if (opcode & 0xFF000000) db((opcode >> 24) & 0xFF); + if (opcode & 0xFFFF0000) db((opcode >> 16) & 0xFF); + if (opcode & 0xFFFFFF00) db((opcode >> 8) & 0xFF); + db(opcode & 0xFF); + } + + void EncodeImm(const detail::Opd& imm) + { + const OpdSize size = imm.GetSize(); + if (size == O_SIZE_8) db(imm.GetImm()); + else if (size == O_SIZE_16) dw(imm.GetImm()); + else if (size == O_SIZE_32) dd(imm.GetImm()); + else if (size == O_SIZE_64) dq(imm.GetImm()); + else JITASM_ASSERT(0); + } + + void Encode(const Instr& instr) + { + uint32 opcode = instr.opcode_; + + const detail::Opd& opd1 = instr.GetOpd(0).IsDummy() ? detail::Opd() : instr.GetOpd(0); JITASM_ASSERT(!(opd1.IsReg() && opd1.GetReg().IsSymbolic())); + const detail::Opd& opd2 = instr.GetOpd(1).IsDummy() ? detail::Opd() : instr.GetOpd(1); JITASM_ASSERT(!(opd2.IsReg() && opd2.GetReg().IsSymbolic())); + const detail::Opd& opd3 = instr.GetOpd(2).IsDummy() ? detail::Opd() : instr.GetOpd(2); JITASM_ASSERT(!(opd3.IsReg() && opd3.GetReg().IsSymbolic())); + const detail::Opd& opd4 = instr.GetOpd(3).IsDummy() ? detail::Opd() : instr.GetOpd(3); JITASM_ASSERT(!(opd4.IsReg() && opd4.GetReg().IsSymbolic())); + + // +rb, +rw, +rd, +ro + if (opd1.IsReg() && (opd2.IsNone() || opd2.IsImm())) { + opcode += opd1.GetReg().id & 0x7; + } + + if ((opd1.IsImm() || opd1.IsReg()) && (opd2.IsReg() || opd2.IsMem())) { // ModR/M + const detail::Opd& reg = opd1; + const detail::Opd& r_m = opd2; + const detail::Opd& vex = opd3; + EncodePrefixes(instr.encoding_flag_, reg, r_m, vex); + EncodeOpcode(opcode); + EncodeModRM((uint8) (reg.IsImm() ? reg.GetImm() : reg.GetReg().id), r_m); + + // /is4 + if (opd4.IsReg()) { + EncodeImm(Imm8(static_cast(opd4.GetReg().id << 4))); + } + } else { + const detail::Opd& reg = detail::Opd(); + const detail::Opd& r_m = opd1.IsReg() ? opd1 : detail::Opd(); + const detail::Opd& vex = detail::Opd(); + EncodePrefixes(instr.encoding_flag_, reg, r_m, vex); + EncodeOpcode(opcode); + } + + if (opd1.IsImm() && !opd2.IsReg() && !opd2.IsMem()) EncodeImm(opd1); + if (opd2.IsImm()) EncodeImm(opd2); + if (opd3.IsImm()) EncodeImm(opd3); + if (opd4.IsImm()) EncodeImm(opd4); + } + + void EncodeALU(const Instr& instr, uint32 opcode) + { + const detail::Opd& reg = instr.GetOpd(1); + const detail::Opd& imm = instr.GetOpd(2); + JITASM_ASSERT(instr.GetOpd(0).IsImm() && reg.IsReg() && imm.IsImm()); + + if (reg.GetReg().id == EAX && (reg.GetSize() == O_SIZE_8 || !detail::IsInt8(imm.GetImm()))) { + opcode |= (reg.GetSize() == O_SIZE_8 ? 0 : 1); + Encode(Instr(instr.GetID(), opcode, instr.encoding_flag_, reg, imm)); + } else { + Encode(instr); + } + } + + void EncodeJMP(const Instr& instr) + { + const detail::Opd& imm = instr.GetOpd(0); + if (instr.GetID() == I_JMP) { + Encode(Instr(instr.GetID(), imm.GetSize() == O_SIZE_8 ? 0xEB : 0xE9, instr.encoding_flag_, imm)); + } else if (instr.GetID() == I_JCC) { +#ifndef JITASM64 + uint32 tttn = instr.opcode_; + if (tttn == JCC_CXZ) Encode(Instr(instr.GetID(), 0x67E3, instr.encoding_flag_, imm)); + else if (tttn == JCC_ECXZ) Encode(Instr(instr.GetID(), 0xE3, instr.encoding_flag_, imm)); + else Encode(Instr(instr.GetID(), (imm.GetSize() == O_SIZE_8 ? 0x70 : 0x0F80) | tttn, instr.encoding_flag_, imm)); +#else + uint32 tttn = instr.opcode_; + if (tttn == JCC_ECXZ) Encode(Instr(instr.GetID(), 0x67E3, instr.encoding_flag_, imm)); + else if (tttn == JCC_RCXZ) Encode(Instr(instr.GetID(), 0xE3, instr.encoding_flag_, imm)); + else Encode(Instr(instr.GetID(), (imm.GetSize() == O_SIZE_8 ? 0x70 : 0x0F80) | tttn, instr.encoding_flag_, imm)); +#endif + } else if (instr.GetID() == I_LOOP) { + Encode(Instr(instr.GetID(), instr.opcode_, instr.encoding_flag_, imm)); + } else { + JITASM_ASSERT(0); + } + } + + void EncodeMOV(const Instr& instr) + { +#ifndef JITASM64 + const detail::Opd& reg = instr.GetOpd(0); + const detail::Opd& mem = instr.GetOpd(1); + JITASM_ASSERT(reg.IsReg() && mem.IsMem()); + + if (reg.GetReg().id == EAX && mem.GetBase().IsInvalid() && mem.GetIndex().IsInvalid()) { + uint32 opcode = 0xA0 | (~instr.opcode_ & 0x2) | (instr.opcode_ & 1); + Encode(Instr(instr.GetID(), opcode, instr.encoding_flag_, Imm32((sint32) mem.GetDisp()))); + } else { + Encode(instr); + } +#else + Encode(instr); +#endif + } + + void EncodeTEST(const Instr& instr) + { + const detail::Opd& reg = instr.GetOpd(1); + const detail::Opd& imm = instr.GetOpd(2); + JITASM_ASSERT(instr.GetOpd(0).IsImm() && reg.IsReg() && imm.IsImm()); + + if (reg.GetReg().id == EAX) { + uint32 opcode = 0xA8 | (reg.GetSize() == O_SIZE_8 ? 0 : 1); + Encode(Instr(instr.GetID(), opcode, instr.encoding_flag_, reg, imm)); + } else { + Encode(instr); + } + } + + void EncodeXCHG(const Instr& instr) + { + const detail::Opd& dst = instr.GetOpd(0); + const detail::Opd& src = instr.GetOpd(1); + JITASM_ASSERT(dst.IsReg() && src.IsReg()); + + if (dst.GetReg().id == EAX) { + Encode(Instr(instr.GetID(), 0x90, instr.encoding_flag_, src)); + } else if (src.GetReg().id == EAX) { + Encode(Instr(instr.GetID(), 0x90, instr.encoding_flag_, dst)); + } else { + Encode(instr); + } + } + + void Assemble(const Instr& instr) + { + if (instr.encoding_flag_ & E_SPECIAL) { + switch (instr.GetID()) { + case I_ADD: EncodeALU(instr, 0x04); break; + case I_OR: EncodeALU(instr, 0x0C); break; + case I_ADC: EncodeALU(instr, 0x14); break; + case I_SBB: EncodeALU(instr, 0x1C); break; + case I_AND: EncodeALU(instr, 0x24); break; + case I_SUB: EncodeALU(instr, 0x2C); break; + case I_XOR: EncodeALU(instr, 0x34); break; + case I_CMP: EncodeALU(instr, 0x3C); break; + case I_JMP: EncodeJMP(instr); break; + case I_JCC: EncodeJMP(instr); break; + case I_LOOP: EncodeJMP(instr); break; + case I_MOV: EncodeMOV(instr); break; + case I_TEST: EncodeTEST(instr); break; + case I_XCHG: EncodeXCHG(instr); break; + default: JITASM_ASSERT(0); break; + } + } else { + Encode(instr); + } + } + + static size_t GetInstrCodeSize(const Instr& instr) + { + Backend backend; + backend.Assemble(instr); + return backend.GetSize(); + } +}; + +namespace detail +{ + /// Counting 1-Bits + inline uint32 Count1Bits(uint32 x) + { + x = x - ((x >> 1) & 0x55555555); + x = (x & 0x33333333) + ((x >> 2) & 0x33333333); + x = (x + (x >> 4)) & 0x0F0F0F0F; + x = x + (x >> 8); + x = x + (x >> 16); + return x & 0x0000003F; + } + + /// The bit position of the first bit 1. + inline uint32 bit_scan_forward(uint32 x) + { + JITASM_ASSERT(x != 0); +#if defined(JITASM_GCC) + return __builtin_ctz(x); +#else + unsigned long index; + _BitScanForward(&index, x); + return index; +#endif + } + + /// The bit position of the last bit 1. + inline uint32 bit_scan_reverse(uint32 x) + { + JITASM_ASSERT(x != 0); +#if defined(JITASM_GCC) + return 31 - __builtin_clz(x); +#else + unsigned long index; + _BitScanReverse(&index, x); + return index; +#endif + } + + /// Prior iterator + template It prior(const It &it) { + It i = it; + return --i; + } + + /// Next iterator + template It next(const It &it) { + It i = it; + return ++i; + } + + /// Iterator range + template struct Range : std::pair { + typedef It Iterator; + Range() : std::pair() {} + Range(const It& f, const It& s) : std::pair(f, s) {} + Range(T& container) : std::pair(container.begin(), container.end()) {} + bool empty() const {return this->first == this->second;} + size_t size() const {return std::distance(this->first, this->second);} + }; + + /// Const iterator range + template struct ConstRange : Range { + ConstRange() : Range() {} + ConstRange(const typename T::const_iterator& f, const typename T::const_iterator& s) : Range(f, s) {} + ConstRange(const T& container) : Range(container.begin(), container.end()) {} + }; + + inline void append_num(std::string& str, size_t num) + { + if (num >= 10) + append_num(str, num / 10); + str.append(1, static_cast('0' + num % 10)); + } + +#if defined(JITASM_WIN) + /// Debug trace + inline void Trace(const char *format, ...) + { + char szBuf[256]; + va_list args; + va_start(args, format); +#if _MSC_VER >= 1400 // VC8 or later + _vsnprintf_s(szBuf, sizeof(szBuf) / sizeof(char), format, args); +#else + vsnprintf(szBuf, sizeof(szBuf) / sizeof(char), format, args); +#endif + va_end(args); + ::OutputDebugStringA(szBuf); + } +#endif + + /// Executable code buffer + class CodeBuffer + { + void* pbuff_; + size_t codesize_; + size_t buffsize_; + + public: + CodeBuffer() : pbuff_(NULL), codesize_(0), buffsize_(0) {} + ~CodeBuffer() {Reset(0);} + + void* GetPointer() const {return pbuff_;} + size_t GetCodeSize() const {return codesize_;} + size_t GetBufferSize() const {return buffsize_;} + + bool Reset(size_t codesize) + { + if (pbuff_) { +#if defined(JITASM_WIN) + ::VirtualFree(pbuff_, 0, MEM_RELEASE); +#else + munmap(pbuff_, buffsize_); +#endif + pbuff_ = NULL; + codesize_ = 0; + buffsize_ = 0; + } + if (codesize) { +#if defined(JITASM_WIN) + void* pbuff = ::VirtualAlloc(NULL, codesize, MEM_COMMIT, PAGE_EXECUTE_READWRITE); + if (!pbuff) { + JITASM_ASSERT(0); + return false; + } + MEMORY_BASIC_INFORMATION info; + ::VirtualQuery(pbuff, &info, sizeof(info)); + buffsize_ = info.RegionSize; +#else + int pagesize = getpagesize(); + size_t buffsize = (codesize + pagesize - 1) / pagesize * pagesize; + void* pbuff = mmap(NULL, buffsize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0); + if (!pbuff) { + JITASM_ASSERT(0); + return false; + } + buffsize_ = buffsize; +#endif + + pbuff_ = pbuff; + codesize_ = codesize; + } + return true; + } + }; + + /// Stack manager + /** + * Stack layout + * \verbatim + * +-----------------------+ + * | Caller return address | + * +=======================+======== + * | ebp (rbp) | + * +-----------------------+ <-- ebp (rbp) + * | Saved gp registers | + * +-----------------------+ + * | Padding for alignment | + * +-----------------------+ <-- Stack base + * | Spill slots and | + * | local variable | + * +-----------------------+ <-- esp (rsp) + * \endverbatim + */ + class StackManager + { + private: + Addr stack_base_; + uint32 stack_size_; + + public: + StackManager() : stack_base_(RegID::CreatePhysicalRegID(R_TYPE_GP, EBX), 0), stack_size_(0) {} + + /// Get allocated stack size + uint32 GetSize() const {return (stack_size_ + 15) / 16 * 16; /* 16 bytes aligned*/} + + /// Get stack base + Addr GetStackBase() const {return stack_base_;} + + /// Set stack base + void SetStackBase(const Addr& stack_base) {stack_base_ = stack_base;} + + /// Allocate stack + Addr Alloc(uint32 size, uint32 alignment) + { + stack_size_ = (stack_size_ + alignment - 1) / alignment * alignment; + stack_size_ += size; + return stack_base_ - stack_size_; + } + }; + + /// Spin lock + class SpinLock + { + long lock_; + public: + SpinLock() : lock_(0) {} + void Lock() {while (interlocked_exchange(&lock_, 1));} + void Unlock() {interlocked_exchange(&lock_, 0);} + }; + + template + class ScopedLock + { + Ty& lock_; + ScopedLock& operator=(const ScopedLock&); + public: + ScopedLock(Ty& lock) : lock_(lock) {lock.Lock();} + ~ScopedLock() {lock_.Unlock();} + }; +} // namespace detail + +// compiler prototype declaration +struct Frontend; +namespace compiler { + void Compile(Frontend& f); +} + +/// jitasm frontend +struct Frontend +{ + typedef jitasm::Addr Addr; + typedef jitasm::Reg Reg; + typedef jitasm::Reg8 Reg8; + typedef jitasm::Reg16 Reg16; + typedef jitasm::Reg32 Reg32; +#ifdef JITASM64 + typedef jitasm::Reg64 Reg64; +#endif + typedef jitasm::MmxReg MmxReg; + typedef jitasm::XmmReg XmmReg; + typedef jitasm::YmmReg YmmReg; + + Reg8_al al; + Reg8_cl cl; + Reg8 dl, bl, ah, ch, dh, bh; + Reg16_ax ax; + Reg16_dx dx; + Reg16 cx, bx, sp, bp, si, di; + Reg32_eax eax; + Reg32 ecx, edx, ebx, esp, ebp, esi, edi; + FpuReg_st0 st0; + FpuReg st1, st2, st3, st4, st5, st6, st7; + MmxReg mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7; + XmmReg xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; + YmmReg ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7; +#ifdef JITASM64 + Reg8 r8b, r9b, r10b, r11b, r12b, r13b, r14b, r15b; + Reg16 r8w, r9w, r10w, r11w, r12w, r13w, r14w, r15w; + Reg32 r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d; + Reg64_rax rax; + Reg64 rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15; + XmmReg xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15; + YmmReg ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15; +#endif + + AddressingPtr byte_ptr; + AddressingPtr word_ptr; + AddressingPtr dword_ptr; + AddressingPtr qword_ptr; + AddressingPtr mmword_ptr; + AddressingPtr xmmword_ptr; + AddressingPtr ymmword_ptr; + AddressingPtr real4_ptr; + AddressingPtr real8_ptr; + AddressingPtr real10_ptr; + AddressingPtr m2byte_ptr; + AddressingPtr m28byte_ptr; + AddressingPtr m108byte_ptr; + AddressingPtr m512byte_ptr; + + Reg zcx, zdx, zbx, zsp, zbp, zsi, zdi; +#ifdef JITASM64 + Reg64_rax zax; + AddressingPtr ptr; +#else + Reg32_eax zax; + AddressingPtr ptr; +#endif + + Frontend() + : dl(DL), bl(BL), ah(AH), ch(CH), dh(DH), bh(BH), + cx(CX), bx(BX), sp(SP), bp(BP), si(SI), di(DI), + ecx(ECX), edx(EDX), ebx(EBX), esp(ESP), ebp(EBP), esi(ESI), edi(EDI), + st1(ST1), st2(ST2), st3(ST3), st4(ST4), st5(ST5), st6(ST6), st7(ST7), + mm0(MM0), mm1(MM1), mm2(MM2), mm3(MM3), mm4(MM4), mm5(MM5), mm6(MM6), mm7(MM7), + xmm0(XMM0), xmm1(XMM1), xmm2(XMM2), xmm3(XMM3), xmm4(XMM4), xmm5(XMM5), xmm6(XMM6), xmm7(XMM7), + ymm0(YMM0), ymm1(YMM1), ymm2(YMM2), ymm3(YMM3), ymm4(YMM4), ymm5(YMM5), ymm6(YMM6), ymm7(YMM7), +#ifdef JITASM64 + r8b(R8B), r9b(R9B), r10b(R10B), r11b(R11B), r12b(R12B), r13b(R13B), r14b(R14B), r15b(R15B), + r8w(R8W), r9w(R9W), r10w(R10W), r11w(R11W), r12w(R12W), r13w(R13W), r14w(R14W), r15w(R15W), + r8d(R8D), r9d(R9D), r10d(R10D), r11d(R11D), r12d(R12D), r13d(R13D), r14d(R14D), r15d(R15D), + rcx(RCX), rdx(RDX), rbx(RBX), rsp(RSP), rbp(RBP), rsi(RSI), rdi(RDI), + r8(R8), r9(R9), r10(R10), r11(R11), r12(R12), r13(R13), r14(R14), r15(R15), + xmm8(XMM8), xmm9(XMM9), xmm10(XMM10), xmm11(XMM11), xmm12(XMM12), xmm13(XMM13), xmm14(XMM14), xmm15(XMM15), + ymm8(YMM8), ymm9(YMM9), ymm10(YMM10), ymm11(YMM11), ymm12(YMM12), ymm13(YMM13), ymm14(YMM14), ymm15(YMM15), + zcx(RCX), zdx(RDX), zbx(RBX), zsp(RSP), zbp(RBP), zsi(RSI), zdi(RDI), +#else + zcx(ECX), zdx(EDX), zbx(EBX), zsp(ESP), zbp(EBP), zsi(ESI), zdi(EDI), +#endif + assembled_(false) + { + } + + virtual ~Frontend() {} + + typedef std::vector InstrList; + InstrList instrs_; + bool assembled_; + detail::CodeBuffer codebuff_; + detail::SpinLock codelock_; + detail::StackManager stack_manager_; + + struct Label + { + std::string name; + size_t instr_number; + explicit Label(const std::string& name_) : name(name_), instr_number(0) {} + }; + typedef std::deque