2
0
mirror of https://github.com/rehlds/rehlds.git synced 2025-01-19 01:58:02 +03:00

Merge pull request #19 from dreamstalker/further_delta_opt

JITted DELTA_CountSendFields
This commit is contained in:
dreamstalker 2015-05-20 19:23:15 +04:00
commit ed6e5b4a94
5 changed files with 125 additions and 35 deletions

View File

@ -681,9 +681,15 @@ void DELTA_WriteMarkedFields(unsigned char *from, unsigned char *to, delta_t *pF
int DELTA_CheckDelta(unsigned char *from, unsigned char *to, delta_t *pFields) int DELTA_CheckDelta(unsigned char *from, unsigned char *to, delta_t *pFields)
{ {
int sendfields; int sendfields;
#if defined(REHLDS_OPT_PEDANTIC) || defined(REHLDS_FIXES)
sendfields = DELTAJit_Feilds_Clear_Mark_Check(from, to, pFields);
#else
DELTA_ClearFlags(pFields); DELTA_ClearFlags(pFields);
DELTA_MarkSendFields(from, to, pFields); DELTA_MarkSendFields(from, to, pFields);
#endif
sendfields = DELTA_CountSendFields(pFields); sendfields = DELTA_CountSendFields(pFields);
return sendfields; return sendfields;
} }
@ -693,14 +699,14 @@ NOINLINE int DELTA_WriteDelta(unsigned char *from, unsigned char *to, qboolean f
int sendfields; int sendfields;
#if defined(REHLDS_OPT_PEDANTIC) || defined(REHLDS_FIXES) #if defined(REHLDS_OPT_PEDANTIC) || defined(REHLDS_FIXES)
DELTAJit_ClearAndMarkSendFields(from, to, pFields); sendfields = DELTAJit_Feilds_Clear_Mark_Check(from, to, pFields);
#else #else
DELTA_ClearFlags(pFields); DELTA_ClearFlags(pFields);
DELTA_MarkSendFields(from, to, pFields); DELTA_MarkSendFields(from, to, pFields);
sendfields = DELTA_CountSendFields(pFields);
#endif #endif
sendfields = DELTA_CountSendFields(pFields); //sendfields = DELTA_CountSendFields(pFields);
_DELTA_WriteDelta(from, to, force, pFields, callback, sendfields); _DELTA_WriteDelta(from, to, force, pFields, callback, sendfields);
return sendfields; return sendfields;
} }

View File

@ -107,17 +107,21 @@ void DELTAJIT_CreateDescription(delta_t* delta, deltajitdata_t &jitdesc) {
} }
class CDeltaClearMarkFieldsJIT : public jitasm::function<void, CDeltaClearMarkFieldsJIT, void*, void*, void*> { class CDeltaClearMarkFieldsJIT : public jitasm::function<int, CDeltaClearMarkFieldsJIT, void*, void*, void*> {
public: public:
deltajitdata_t *jitdesc; deltajitdata_t *jitdesc;
deltajit_marked_count_type_t countType;
CDeltaClearMarkFieldsJIT(deltajitdata_t *_jitdesc) : jitdesc(_jitdesc) { CDeltaClearMarkFieldsJIT(deltajitdata_t *_jitdesc, deltajit_marked_count_type_t _countType)
: jitdesc(_jitdesc), countType(_countType) {
} }
void checkFieldMask(jitasm::Frontend::Reg32& mask, deltajit_memblock_field* jitField); void checkFieldMask(jitasm::Frontend::Reg32& mask, deltajit_memblock_field* jitField);
void main(Addr src, Addr dst, Addr fields); Result main(Addr src, Addr dst, Addr delta);
void processStrings(Addr src, Addr dst, Addr fields); void processStrings(Addr src, Addr dst, Addr delta);
void callConditionalEncoder(Addr src, Addr dst, Addr delta);
void countMarkedFields();
}; };
void CDeltaClearMarkFieldsJIT::checkFieldMask(jitasm::Frontend::Reg32& mask, deltajit_memblock_field* jitField) { void CDeltaClearMarkFieldsJIT::checkFieldMask(jitasm::Frontend::Reg32& mask, deltajit_memblock_field* jitField) {
@ -126,7 +130,60 @@ void CDeltaClearMarkFieldsJIT::checkFieldMask(jitasm::Frontend::Reg32& mask, del
movzx(dx, al); movzx(dx, al);
} }
void CDeltaClearMarkFieldsJIT::main(Addr src, Addr dst, Addr fields) void CDeltaClearMarkFieldsJIT::callConditionalEncoder(Addr src, Addr dst, Addr delta) {
//This generator expects that following registers are already initialized:
// esi = src
// edi = dst
int condEncoderOffset = (offsetof(delta_t, conditionalencode));
mov(eax, ptr[delta]);
mov(ecx, dword_ptr[eax + condEncoderOffset]);
If(ecx != 0);
push(edi);
push(esi);
push(eax);
call(ecx);
add(esp, 12);
EndIf();
}
void CDeltaClearMarkFieldsJIT::countMarkedFields() {
//This generator expects that following registers are already initialized:
// ebx = delta->pdd
//
//Returns value: 'changed flag' or count in eax
xor_(eax, eax);
if (countType == DJ_M_DONT_COUNT) {
return;
}
for (unsigned int i = 0; i < jitdesc->numFields; i++) {
int fieldId = jitdesc->fields[i].id;
int flagsOffset = (fieldId * sizeof(delta_description_t) + offsetof(delta_description_t, flags));
if (i & 1) { //rotate between cx and dx to decrease instruction result dependencies
mov(cx, word_ptr[ebx + flagsOffset]);
and_(cx, 1);
or_(ax, cx);
} else {
mov(dx, word_ptr[ebx + flagsOffset]);
and_(dx, 1);
or_(ax, dx);
}
//insert 'is changed' check every 8 fields
if ((i & 7) == 0) {
jnz("countMarkedFields_finish");
}
}
L("countMarkedFields_finish");
}
CDeltaClearMarkFieldsJIT::Result CDeltaClearMarkFieldsJIT::main(Addr src, Addr dst, Addr delta)
{ {
#ifndef REHLDS_FIXES #ifndef REHLDS_FIXES
@ -135,14 +192,16 @@ void CDeltaClearMarkFieldsJIT::main(Addr src, Addr dst, Addr fields)
mov(esi, ptr[src]); mov(esi, ptr[src]);
mov(edi, ptr[dst]); mov(edi, ptr[dst]);
mov(ecx, ptr[fields]); mov(ebx, ptr[delta]);
int fieldsOffset = (offsetof(delta_t, pdd));
mov(ebx, dword_ptr[ebx + fieldsOffset]);
movdqu(xmm3, xmmword_ptr[esi]); movdqu(xmm3, xmmword_ptr[esi]);
movdqu(xmm4, xmmword_ptr[edi]); movdqu(xmm4, xmmword_ptr[edi]);
auto zero_xmm = xmm2; auto zero_xmm = xmm2;
pxor(zero_xmm, zero_xmm); pxor(zero_xmm, zero_xmm);
auto mask = ebx; auto mask = ecx;
xor_(mask, mask); xor_(mask, mask);
for (unsigned int i = 0; i < jitdesc->numblocks; i++) { for (unsigned int i = 0; i < jitdesc->numblocks; i++) {
movdqa(xmm0, xmm3); movdqa(xmm0, xmm3);
@ -150,8 +209,6 @@ void CDeltaClearMarkFieldsJIT::main(Addr src, Addr dst, Addr fields)
//prefetch next blocks //prefetch next blocks
if (i < jitdesc->numblocks) { if (i < jitdesc->numblocks) {
mov(esi, ptr[src]);
mov(edi, ptr[dst]);
movdqu(xmm3, xmmword_ptr[esi + ((i + 1) * 16)]); movdqu(xmm3, xmmword_ptr[esi + ((i + 1) * 16)]);
movdqu(xmm4, xmmword_ptr[edi + ((i + 1) * 16)]); movdqu(xmm4, xmmword_ptr[edi + ((i + 1) * 16)]);
} }
@ -202,35 +259,46 @@ void CDeltaClearMarkFieldsJIT::main(Addr src, Addr dst, Addr fields)
int flagsOffset = (jitField->field->id * sizeof(delta_description_t) + offsetof(delta_description_t, flags)); int flagsOffset = (jitField->field->id * sizeof(delta_description_t) + offsetof(delta_description_t, flags));
if (jitField->first) { if (jitField->first) {
mov(word_ptr[ecx + flagsOffset], dx); mov(word_ptr[ebx + flagsOffset], dx);
} }
else { else {
or_(word_ptr[ecx + flagsOffset], dx); or_(word_ptr[ebx + flagsOffset], dx);
} }
} }
} }
processStrings(src, dst, fields); processStrings(src, dst, delta);
callConditionalEncoder(src, dst, delta);
countMarkedFields();
#ifndef REHLDS_FIXES #ifndef REHLDS_FIXES
add(esp, 12); //some local storage is required for precise DT_TIMEWINDOW marking add(esp, 12); //some local storage is required for precise DT_TIMEWINDOW marking
#endif #endif
return eax;
} }
void CDeltaClearMarkFieldsJIT::processStrings(Addr src, Addr dst, Addr fields) { void CDeltaClearMarkFieldsJIT::processStrings(Addr src, Addr dst, Addr delta) {
//This generator expects that following registers are already initialized:
// ebx = delta->pdd
// esi = src
// edi = dst
//strings //strings
for (unsigned int i = 0; i < jitdesc->numFields; i++) { for (unsigned int i = 0; i < jitdesc->numFields; i++) {
auto jitField = &jitdesc->fields[i]; auto jitField = &jitdesc->fields[i];
if (jitField->type != DT_STRING) if (jitField->type != DT_STRING)
continue; continue;
mov(esi, ptr[src]); mov(eax, esi);
mov(edi, ptr[dst]); mov(edx, edi);
add(esi, jitField->offset); add(eax, jitField->offset);
add(edi, jitField->offset); add(edx, jitField->offset);
push(esi); push(eax);
push(edi); push(edx);
mov(ecx, (size_t)&Q_stricmp); mov(ecx, (size_t)&Q_stricmp);
call(ecx); call(ecx);
add(esp, 8); add(esp, 8);
@ -238,9 +306,9 @@ void CDeltaClearMarkFieldsJIT::processStrings(Addr src, Addr dst, Addr fields) {
setnz(cl); setnz(cl);
movzx(cx, cl); movzx(cx, cl);
mov(edx, ptr[fields]);
int flagsOffset = (jitField->id * sizeof(delta_description_t) + offsetof(delta_description_t, flags)); int flagsOffset = (jitField->id * sizeof(delta_description_t) + offsetof(delta_description_t, flags));
mov(word_ptr[edx + flagsOffset], cx); mov(word_ptr[ebx + flagsOffset], cx);
} }
} }
@ -289,7 +357,7 @@ void CDeltaJitRegistry::CreateAndRegisterDeltaJIT(delta_t* delta) {
deltajitdata_t data; deltajitdata_t data;
DELTAJIT_CreateDescription(delta, data); DELTAJIT_CreateDescription(delta, data);
CDeltaClearMarkFieldsJIT* cleanMarkCheckFunc = new CDeltaClearMarkFieldsJIT(&data); CDeltaClearMarkFieldsJIT* cleanMarkCheckFunc = new CDeltaClearMarkFieldsJIT(&data, DJ_M_CHECK);
cleanMarkCheckFunc->Assemble(); cleanMarkCheckFunc->Assemble();
cleanMarkCheckFunc->jitdesc = NULL; cleanMarkCheckFunc->jitdesc = NULL;
@ -305,11 +373,18 @@ NOINLINE void DELTAJit_ClearAndMarkSendFields(unsigned char *from, unsigned char
} }
CDeltaClearMarkFieldsJIT &func = *deltaJit->cleanMarkCheckFunc; CDeltaClearMarkFieldsJIT &func = *deltaJit->cleanMarkCheckFunc;
func(from, to, pFields->pdd); func(from, to, pFields);
}
if (pFields->conditionalencode) NOINLINE int DELTAJit_Feilds_Clear_Mark_Check(unsigned char *from, unsigned char *to, delta_t *pFields) {
pFields->conditionalencode(pFields, from, to); CDeltaJit* deltaJit = g_DeltaJitRegistry.GetJITByDelta(pFields);
if (!deltaJit) {
rehlds_syserror("%s: JITted delta encoder not found for delta %p", __FUNCTION__, pFields);
return 0;
}
CDeltaClearMarkFieldsJIT &func = *deltaJit->cleanMarkCheckFunc;
return func(from, to, pFields);
} }
void CDeltaJitRegistry::Cleanup() { void CDeltaJitRegistry::Cleanup() {

View File

@ -33,6 +33,12 @@ struct deltajitdata_t {
deltajit_field fields[DELTAJIT_MAX_FIELDS]; deltajit_field fields[DELTAJIT_MAX_FIELDS];
}; };
enum deltajit_marked_count_type_t {
DJ_M_DONT_COUNT,
DJ_M_CHECK,
//DJ_M_COUNT, //not implemented yet
};
class CDeltaJit; class CDeltaJit;
class CDeltaJitRegistry { class CDeltaJitRegistry {
@ -52,3 +58,4 @@ public:
extern CDeltaJitRegistry g_DeltaJitRegistry; extern CDeltaJitRegistry g_DeltaJitRegistry;
extern void DELTAJit_ClearAndMarkSendFields(unsigned char *from, unsigned char *to, delta_t *pFields); extern void DELTAJit_ClearAndMarkSendFields(unsigned char *from, unsigned char *to, delta_t *pFields);
extern int DELTAJit_Feilds_Clear_Mark_Check(unsigned char *from, unsigned char *to, delta_t *pFields);

View File

@ -652,14 +652,16 @@ void SV_Users_f(void)
/* <a651b> ../engine/sv_main.c:762 */ /* <a651b> ../engine/sv_main.c:762 */
void SV_CountPlayers(int *clients) void SV_CountPlayers(int *clients)
{ {
*clients = 0; int count = 0;
client_s *cl = g_psvs.clients;
client_s *cl = g_psvs.clients;
for (int i = 0; i < g_psvs.maxclients; i++, cl++) for (int i = 0; i < g_psvs.maxclients; i++, cl++)
{ {
if (cl->active || cl->spawned || cl->connected) if (cl->active | cl->spawned | cl->connected)
(*clients)++; count++;
} }
*clients = count;
} }
/* <a68a4> ../engine/sv_main.c:786 */ /* <a68a4> ../engine/sv_main.c:786 */

View File

@ -239,7 +239,7 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release Play|Win32'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release Play|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release Swds Play|Win32'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release Swds Play|Win32'">true</ExcludedFromBuild>
</ClCompile> </ClCompile>
<ClCompile Include="..\unittests\mathlib_tests.cpp"> <ClCompile Include="..\unittests\mathlib_tests.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug Swds|Win32'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug Swds|Win32'">true</ExcludedFromBuild>
@ -1027,7 +1027,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking> <FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions> <IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck> <SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>REHLDS_OPT_PEDANTIC;REHLDS_SELF;REHLDS_CHECKS;USE_BREAKPAD_HANDLER;DEDICATED;SWDS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>REHLDS_FIXES;REHLDS_OPT_PEDANTIC;REHLDS_SELF;REHLDS_CHECKS;USE_BREAKPAD_HANDLER;DEDICATED;SWDS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary> <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<AdditionalOptions>/arch:IA32 %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions>/arch:IA32 %(AdditionalOptions)</AdditionalOptions>
<PrecompiledHeader>Use</PrecompiledHeader> <PrecompiledHeader>Use</PrecompiledHeader>