mirror of
https://github.com/rehlds/rehlds.git
synced 2025-01-06 03:55:32 +03:00
JITted DELTA_TestDelta.
Maked unit test for DELTA_TestDelta. Unrolled tail recursion in SV_FindTouchedLeafs and SV_LinkContents. Other small optimizations and fixes.
This commit is contained in:
parent
bc88790bb1
commit
7a973d1afb
@ -1253,9 +1253,12 @@ NOXREF float MSG_ReadHiresAngle(void)
|
|||||||
|
|
||||||
void MSG_ReadUsercmd(usercmd_t *to, usercmd_t* from)
|
void MSG_ReadUsercmd(usercmd_t *to, usercmd_t* from)
|
||||||
{
|
{
|
||||||
delta_t *pdesc = SV_LookupDelta("usercmd_t");
|
|
||||||
MSG_StartBitReading(&net_message);
|
MSG_StartBitReading(&net_message);
|
||||||
DELTA_ParseDelta((byte *)from, (byte *)to, pdesc);
|
#ifdef REHLDS_OPT_PEDANTIC
|
||||||
|
DELTA_ParseDelta((byte *)from, (byte *)to, g_pusercmddelta);
|
||||||
|
#else
|
||||||
|
DELTA_ParseDelta((byte *)from, (byte *)to, SV_LookupDelta("usercmd_t"));
|
||||||
|
#endif
|
||||||
MSG_EndBitReading(&net_message);
|
MSG_EndBitReading(&net_message);
|
||||||
COM_NormalizeAngles(to->viewangles);
|
COM_NormalizeAngles(to->viewangles);
|
||||||
}
|
}
|
||||||
|
@ -422,6 +422,9 @@ void DELTA_ClearFlags(delta_t *pFields)
|
|||||||
/* <241d2> ../engine/delta.c:473 */
|
/* <241d2> ../engine/delta.c:473 */
|
||||||
int DELTA_TestDelta(unsigned char *from, unsigned char *to, delta_t *pFields)
|
int DELTA_TestDelta(unsigned char *from, unsigned char *to, delta_t *pFields)
|
||||||
{
|
{
|
||||||
|
#if defined(REHLDS_OPT_PEDANTIC) || defined(REHLDS_FIXES)
|
||||||
|
return DELTAJit_TestDelta(from, to, pFields);
|
||||||
|
#else
|
||||||
int i;
|
int i;
|
||||||
char *st1, *st2;
|
char *st1, *st2;
|
||||||
delta_description_t *pTest;
|
delta_description_t *pTest;
|
||||||
@ -452,9 +455,10 @@ int DELTA_TestDelta(unsigned char *from, unsigned char *to, delta_t *pFields)
|
|||||||
break;
|
break;
|
||||||
#ifdef REHLDS_FIXES
|
#ifdef REHLDS_FIXES
|
||||||
// don't use multiplier when checking, to increase performance
|
// don't use multiplier when checking, to increase performance
|
||||||
|
// check values binary like it does in jit
|
||||||
case DT_TIMEWINDOW_8:
|
case DT_TIMEWINDOW_8:
|
||||||
case DT_TIMEWINDOW_BIG:
|
case DT_TIMEWINDOW_BIG:
|
||||||
different = (int32)(*(float *)&from[pTest->fieldOffset]) != (int32)(*(float *)&to[pTest->fieldOffset]);
|
different = (*(int32 *)&from[pTest->fieldOffset]) != (*(int32 *)&to[pTest->fieldOffset]);
|
||||||
break;
|
break;
|
||||||
#else
|
#else
|
||||||
case DT_TIMEWINDOW_8:
|
case DT_TIMEWINDOW_8:
|
||||||
@ -471,8 +475,10 @@ int DELTA_TestDelta(unsigned char *from, unsigned char *to, delta_t *pFields)
|
|||||||
{
|
{
|
||||||
#ifdef REHLDS_FIXES
|
#ifdef REHLDS_FIXES
|
||||||
different = TRUE;
|
different = TRUE;
|
||||||
#endif // REHLDS_FIXES
|
length = Q_strlen(st2) * 8;
|
||||||
|
#else // REHLDS_FIXES
|
||||||
length = Q_strlen(st2);
|
length = Q_strlen(st2);
|
||||||
|
#endif // REHLDS_FIXES
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -487,12 +493,13 @@ int DELTA_TestDelta(unsigned char *from, unsigned char *to, delta_t *pFields)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (neededBits != -1)
|
if (highestBit != -1)
|
||||||
{
|
{
|
||||||
neededBits += highestBit / 8 * 8 + 8;
|
neededBits += highestBit / 8 * 8 + 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
return neededBits;
|
return neededBits;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/* <24309> ../engine/delta.c:602 */
|
/* <24309> ../engine/delta.c:602 */
|
||||||
@ -749,7 +756,6 @@ qboolean DELTA_CheckDelta(unsigned char *from, unsigned char *to, delta_t *pFiel
|
|||||||
NOINLINE qboolean DELTA_WriteDelta(unsigned char *from, unsigned char *to, qboolean force, delta_t *pFields, void(*callback)(void))
|
NOINLINE qboolean DELTA_WriteDelta(unsigned char *from, unsigned char *to, qboolean force, delta_t *pFields, void(*callback)(void))
|
||||||
{
|
{
|
||||||
qboolean sendfields;
|
qboolean sendfields;
|
||||||
int bytecount;
|
|
||||||
|
|
||||||
#if defined(REHLDS_OPT_PEDANTIC) || defined(REHLDS_FIXES)
|
#if defined(REHLDS_OPT_PEDANTIC) || defined(REHLDS_FIXES)
|
||||||
sendfields = DELTAJit_Fields_Clear_Mark_Check(from, to, pFields, NULL);
|
sendfields = DELTAJit_Fields_Clear_Mark_Check(from, to, pFields, NULL);
|
||||||
|
@ -27,7 +27,7 @@ unsigned int DELTAJIT_GetFieldSize(delta_description_t* desc) {
|
|||||||
return 4;
|
return 4;
|
||||||
|
|
||||||
case DT_STRING:
|
case DT_STRING:
|
||||||
return desc->fieldSize;
|
return 0;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
rehlds_syserror("%s: Unknown delta field type %d", __FUNCTION__, desc->fieldType);
|
rehlds_syserror("%s: Unknown delta field type %d", __FUNCTION__, desc->fieldType);
|
||||||
@ -84,6 +84,7 @@ void DELTAJIT_CreateDescription(delta_t* delta, deltajitdata_t &jitdesc) {
|
|||||||
jitField->offset = fieldDesc->fieldOffset;
|
jitField->offset = fieldDesc->fieldOffset;
|
||||||
jitField->type = fieldDesc->fieldType;
|
jitField->type = fieldDesc->fieldType;
|
||||||
jitField->length = fieldSize;
|
jitField->length = fieldSize;
|
||||||
|
jitField->significantBits = fieldDesc->significant_bits;
|
||||||
|
|
||||||
if (fieldDesc->fieldType != DT_STRING) {
|
if (fieldDesc->fieldType != DT_STRING) {
|
||||||
bool firstBlock = true;
|
bool firstBlock = true;
|
||||||
@ -127,50 +128,207 @@ void DELTAJIT_CreateDescription(delta_t* delta, deltajitdata_t &jitdesc) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
class CDeltaClearMarkFieldsJIT;
|
class CDeltaClearMarkFieldsJIT;
|
||||||
|
class CDeltaTestDeltaJIT;
|
||||||
|
|
||||||
class CDeltaJit {
|
class CDeltaJit {
|
||||||
public:
|
public:
|
||||||
CDeltaClearMarkFieldsJIT* cleanMarkCheckFunc;
|
CDeltaClearMarkFieldsJIT* cleanMarkCheckFunc;
|
||||||
|
CDeltaTestDeltaJIT* testDeltaFunc;
|
||||||
delta_t* delta;
|
delta_t* delta;
|
||||||
|
delta_marked_mask_t marked_fields_mask;
|
||||||
|
delta_marked_mask_t originalMarkedFieldsMask; //mask based on data, before calling the conditional encoder
|
||||||
int markedFieldsMaskSize;
|
int markedFieldsMaskSize;
|
||||||
|
|
||||||
delta_marked_mask_t marked_fields_mask;
|
CDeltaJit(delta_t* _delta, CDeltaClearMarkFieldsJIT* _cleanMarkCheckFunc, CDeltaTestDeltaJIT* _testDeltaFunc);
|
||||||
int mfm_sse_highbits[2]; //High 64 bits for manipulating marked_fields_mask via SSE registers
|
|
||||||
|
|
||||||
delta_marked_mask_t originalMarkedFieldsMask; //mask based on data, before calling the conditional encoder
|
|
||||||
int omfm_sse_highbits[2]; //High 64 bits for manipulating marked_fields_mask via SSE registers
|
|
||||||
|
|
||||||
CDeltaJit(delta_t* _delta, CDeltaClearMarkFieldsJIT* _cleanMarkCheckFunc);
|
|
||||||
|
|
||||||
virtual ~CDeltaJit();
|
virtual ~CDeltaJit();
|
||||||
};
|
};
|
||||||
|
|
||||||
class CDeltaClearMarkFieldsJIT : public jitasm::function<int, CDeltaClearMarkFieldsJIT, void*, void*, void*, void*> {
|
class CDeltaCheckJIT : public jitasm::function<void, CDeltaCheckJIT>
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
deltajitdata_t *jitdesc;
|
void main() {}
|
||||||
deltajit_marked_count_type_t countType;
|
virtual void onFieldChecked(deltajit_field* jitField) {}
|
||||||
|
virtual void onStringChecked(deltajit_field* jitField) {}
|
||||||
XmmReg marked_fields_mask = xmm7;
|
|
||||||
|
|
||||||
|
|
||||||
CDeltaClearMarkFieldsJIT(deltajitdata_t *_jitdesc, deltajit_marked_count_type_t _countType)
|
|
||||||
: jitdesc(_jitdesc), countType(_countType) {
|
|
||||||
}
|
|
||||||
|
|
||||||
void checkFieldMask(jitasm::Frontend::Reg32& mask, deltajit_memblock_field* jitField);
|
void checkFieldMask(jitasm::Frontend::Reg32& mask, deltajit_memblock_field* jitField);
|
||||||
Result main(Addr src, Addr dst, Addr deltaJit, Addr pForceMarkMask);
|
void iterateBlocks(deltajitdata_t *jitdesc);
|
||||||
void processStrings(Addr src, Addr dst);
|
void iterateStrings(deltajitdata_t *jitdesc);
|
||||||
void callConditionalEncoder(Addr src, Addr dst, Addr deltaJit);
|
|
||||||
void calculateBytecount();
|
|
||||||
};
|
};
|
||||||
|
|
||||||
void CDeltaClearMarkFieldsJIT::checkFieldMask(jitasm::Frontend::Reg32& mask, deltajit_memblock_field* jitField) {
|
void CDeltaCheckJIT::checkFieldMask(jitasm::Frontend::Reg32& mask, deltajit_memblock_field* jitField)
|
||||||
test(mask, (uint16)jitField->mask);
|
{
|
||||||
|
test(mask, jitField->mask);
|
||||||
setnz(al);
|
setnz(al);
|
||||||
movzx(edx, al);
|
movzx(edx, al);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CDeltaCheckJIT::iterateBlocks(deltajitdata_t *jitdesc)
|
||||||
|
{
|
||||||
|
#ifndef REHLDS_FIXES
|
||||||
|
sub(esp, 12); //some local storage is required for precise DT_TIMEWINDOW marking
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
Registers usage:
|
||||||
|
esi = src
|
||||||
|
edi = dst
|
||||||
|
ecx = blockMask
|
||||||
|
xmm0-xmm2: loaded src
|
||||||
|
xmm3-xmm5: loaded dst
|
||||||
|
xmm6: temp
|
||||||
|
Already initialized:
|
||||||
|
esi, edi
|
||||||
|
*/
|
||||||
|
|
||||||
|
int dataXmmCounter = 0; // from 0 to 2 => 3 pairs of registers
|
||||||
|
jitasm::Frontend::XmmReg src_xmm[3] = {xmm0, xmm1, xmm2};
|
||||||
|
jitasm::Frontend::XmmReg dst_xmm[3] = {xmm3, xmm4, xmm5};
|
||||||
|
|
||||||
|
auto xmm_tmp = xmm6;
|
||||||
|
|
||||||
|
if (jitdesc->numItrBlocks > 0) {
|
||||||
|
movdqu(src_xmm[0], xmmword_ptr[esi + ( jitdesc->itrBlocks[0].memblockId * 16 )]);
|
||||||
|
movdqu(dst_xmm[0], xmmword_ptr[edi + ( jitdesc->itrBlocks[0].memblockId * 16 )]);
|
||||||
|
}
|
||||||
|
if (jitdesc->numItrBlocks > 1) {
|
||||||
|
movdqu(src_xmm[1], xmmword_ptr[esi + ( jitdesc->itrBlocks[1].memblockId * 16 )]);
|
||||||
|
movdqu(dst_xmm[1], xmmword_ptr[edi + ( jitdesc->itrBlocks[1].memblockId * 16 )]);
|
||||||
|
}
|
||||||
|
if (jitdesc->numItrBlocks > 2) {
|
||||||
|
movdqu(src_xmm[2], xmmword_ptr[esi + ( jitdesc->itrBlocks[2].memblockId * 16 )]);
|
||||||
|
movdqu(dst_xmm[2], xmmword_ptr[edi + ( jitdesc->itrBlocks[2].memblockId * 16 )]);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto blockMask = ecx;
|
||||||
|
xor_(blockMask, blockMask);
|
||||||
|
|
||||||
|
for (unsigned int i = 0; i < jitdesc->numItrBlocks; i++) {
|
||||||
|
auto block = jitdesc->itrBlocks[i].memblock;
|
||||||
|
auto itrBlock = &jitdesc->itrBlocks[i];
|
||||||
|
|
||||||
|
//do far prefetch
|
||||||
|
if (itrBlock->prefetchBlockId != -1) {
|
||||||
|
prefetcht0(byte_ptr[esi + ( itrBlock->prefetchBlockId * 16 )]);
|
||||||
|
prefetcht0(byte_ptr[edi + ( itrBlock->prefetchBlockId * 16 )]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// create mask for changed bytes
|
||||||
|
pcmpeqb(src_xmm[dataXmmCounter], dst_xmm[dataXmmCounter]);
|
||||||
|
pmovmskb(blockMask, src_xmm[dataXmmCounter]);
|
||||||
|
not_(blockMask);
|
||||||
|
|
||||||
|
//preload next blocks
|
||||||
|
if (i + 3 < jitdesc->numItrBlocks) {
|
||||||
|
movdqu(src_xmm[dataXmmCounter], xmmword_ptr[esi + ( jitdesc->itrBlocks[i + 3].memblockId * 16 )]);
|
||||||
|
movdqu(dst_xmm[dataXmmCounter], xmmword_ptr[edi + ( jitdesc->itrBlocks[i + 3].memblockId * 16 )]);
|
||||||
|
}
|
||||||
|
|
||||||
|
dataXmmCounter++;
|
||||||
|
if (dataXmmCounter > 2) {
|
||||||
|
dataXmmCounter -= 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
// iterate fields contained in block
|
||||||
|
for (unsigned int j = 0; j < block->numFields; j++) {
|
||||||
|
auto jitField = &block->fields[j];
|
||||||
|
|
||||||
|
#ifndef REHLDS_FIXES
|
||||||
|
// precise floats comparison
|
||||||
|
if (jitField->field->type == DT_TIMEWINDOW_8 || jitField->field->type == DT_TIMEWINDOW_BIG) {
|
||||||
|
if (jitField->last) {
|
||||||
|
|
||||||
|
float multiplier = ( jitField->field->type == DT_TIMEWINDOW_8 ) ? 100.0f : 1000.0f;
|
||||||
|
uint32 m32 = *(uint32*)( &multiplier );
|
||||||
|
mov(dword_ptr[esp], m32);
|
||||||
|
|
||||||
|
fld(dword_ptr[esi + jitField->field->offset]);
|
||||||
|
fld(dword_ptr[esp]);
|
||||||
|
fmulp();
|
||||||
|
fstp(qword_ptr[esp + 4]);
|
||||||
|
cvttsd2si(eax, mmword_ptr[esp + 4]);
|
||||||
|
|
||||||
|
fld(dword_ptr[edi + jitField->field->offset]);
|
||||||
|
fld(dword_ptr[esp]);
|
||||||
|
fmulp();
|
||||||
|
fstp(qword_ptr[esp + 4]);
|
||||||
|
cvttsd2si(edx, mmword_ptr[esp + 4]);
|
||||||
|
|
||||||
|
cmp(eax, edx);
|
||||||
|
setne(al);
|
||||||
|
movzx(edx, al);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
checkFieldMask(blockMask, jitField);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
checkFieldMask(blockMask, jitField);
|
||||||
|
#endif
|
||||||
|
// call handler
|
||||||
|
onFieldChecked(jitField->field);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef REHLDS_FIXES
|
||||||
|
add(esp, 12); //some local storage is required for precise DT_TIMEWINDOW marking
|
||||||
|
#endif // REHLDS_FIXES
|
||||||
|
}
|
||||||
|
|
||||||
|
void CDeltaCheckJIT::iterateStrings(deltajitdata_t *jitdesc)
|
||||||
|
{
|
||||||
|
// This generator expects that following registers are already initialized:
|
||||||
|
// esi = src
|
||||||
|
// edi = dst
|
||||||
|
|
||||||
|
for (unsigned int i = 0; i < jitdesc->numFields; i++) {
|
||||||
|
auto jitField = &jitdesc->fields[i];
|
||||||
|
if (jitField->type != DT_STRING)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// will be parallel
|
||||||
|
lea(eax, ptr[esi + jitField->offset]);
|
||||||
|
lea(edx, ptr[edi + jitField->offset]);
|
||||||
|
|
||||||
|
push(eax);
|
||||||
|
push(edx);
|
||||||
|
mov(ecx, (size_t)&Q_stricmp);
|
||||||
|
call(ecx);
|
||||||
|
|
||||||
|
// call handler
|
||||||
|
onStringChecked(jitField);
|
||||||
|
|
||||||
|
add(esp, 8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class CDeltaClearMarkFieldsJIT : public jitasm::function<int, CDeltaClearMarkFieldsJIT, void*, void*, void*, void*>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
CDeltaClearMarkFieldsJIT(deltajitdata_t *_jitdesc);
|
||||||
|
|
||||||
|
Result main(Addr src, Addr dst, Addr deltaJit, Addr pForceMarkMask);
|
||||||
|
|
||||||
|
void callConditionalEncoder(Addr src, Addr dst, Addr deltaJit);
|
||||||
|
void calculateBytecount();
|
||||||
|
|
||||||
|
// first two virtual functions must be same as in CDeltaCheckJIT
|
||||||
|
virtual void onFieldChecked(deltajit_field* jitField);
|
||||||
|
virtual void onStringChecked(deltajit_field* jitField);
|
||||||
|
|
||||||
|
deltajitdata_t *jitdesc;
|
||||||
|
|
||||||
|
private:
|
||||||
|
jitasm::XmmReg xmm_tmp = xmm6;
|
||||||
|
jitasm::XmmReg marked_fields_mask = xmm7;
|
||||||
|
};
|
||||||
|
|
||||||
|
CDeltaClearMarkFieldsJIT::CDeltaClearMarkFieldsJIT(deltajitdata_t *_jitdesc) : jitdesc(_jitdesc)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
void CDeltaClearMarkFieldsJIT::callConditionalEncoder(Addr src, Addr dst, Addr deltaJit) {
|
void CDeltaClearMarkFieldsJIT::callConditionalEncoder(Addr src, Addr dst, Addr deltaJit) {
|
||||||
// This generator expects that following registers are already initialized:
|
// This generator expects that following registers are already initialized:
|
||||||
// esi = src
|
// esi = src
|
||||||
@ -260,12 +418,34 @@ void CDeltaClearMarkFieldsJIT::calculateBytecount() {
|
|||||||
mov(dword_ptr[ebx + delta_masksize_offset], edx);
|
mov(dword_ptr[ebx + delta_masksize_offset], edx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CDeltaClearMarkFieldsJIT::onFieldChecked(deltajit_field* field)
|
||||||
|
{
|
||||||
|
// This generator expects that following registers are already initialized:
|
||||||
|
// edx = is field changed
|
||||||
|
|
||||||
|
movd(xmm_tmp, edx); // set bit in send mask
|
||||||
|
psllq(xmm_tmp, field->id); // shift left
|
||||||
|
por(marked_fields_mask, xmm_tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CDeltaClearMarkFieldsJIT::onStringChecked(deltajit_field* field)
|
||||||
|
{
|
||||||
|
// This generator expects that following registers are already initialized:
|
||||||
|
// eax = result of strings comparison
|
||||||
|
// ebx = deltaJit
|
||||||
|
|
||||||
|
size_t delta_markbits_offset = offsetof(CDeltaJit, marked_fields_mask);
|
||||||
|
|
||||||
|
xor_(ecx, ecx);
|
||||||
|
test(eax, eax);
|
||||||
|
setnz(cl);
|
||||||
|
|
||||||
|
shl(ecx, field->id & 31);
|
||||||
|
or_(ptr[ebx + delta_markbits_offset + ((field->id > 31) ? 4 : 0)], ecx);
|
||||||
|
}
|
||||||
|
|
||||||
CDeltaClearMarkFieldsJIT::Result CDeltaClearMarkFieldsJIT::main(Addr src, Addr dst, Addr deltaJit, Addr pForceMarkMask)
|
CDeltaClearMarkFieldsJIT::Result CDeltaClearMarkFieldsJIT::main(Addr src, Addr dst, Addr deltaJit, Addr pForceMarkMask)
|
||||||
{
|
{
|
||||||
#ifndef REHLDS_FIXES
|
|
||||||
sub(esp, 12); //some local storage is required for precise DT_TIMEWINDOW marking
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Registers usage:
|
Registers usage:
|
||||||
esi = src
|
esi = src
|
||||||
@ -278,124 +458,27 @@ CDeltaClearMarkFieldsJIT::Result CDeltaClearMarkFieldsJIT::main(Addr src, Addr d
|
|||||||
mov(esi, ptr[src]);
|
mov(esi, ptr[src]);
|
||||||
mov(edi, ptr[dst]);
|
mov(edi, ptr[dst]);
|
||||||
|
|
||||||
int dataXmmCounter = 0; // from 0 to 2 => 3 pairs of registers
|
|
||||||
jitasm::Frontend::XmmReg src_xmm[3] = { xmm0, xmm1, xmm2 };
|
|
||||||
jitasm::Frontend::XmmReg dst_xmm[3] = { xmm3, xmm4, xmm5 };
|
|
||||||
// overall mask is in xmm7 (marked_fields_mask)
|
|
||||||
auto xmm_tmp = xmm6;
|
|
||||||
|
|
||||||
|
|
||||||
if (jitdesc->numItrBlocks > 0) {
|
|
||||||
movdqu(src_xmm[0], xmmword_ptr[esi + (jitdesc->itrBlocks[0].memblockId * 16)]);
|
|
||||||
movdqu(dst_xmm[0], xmmword_ptr[edi + (jitdesc->itrBlocks[0].memblockId * 16)]);
|
|
||||||
}
|
|
||||||
if (jitdesc->numItrBlocks > 1) {
|
|
||||||
movdqu(src_xmm[1], xmmword_ptr[esi + (jitdesc->itrBlocks[1].memblockId * 16)]);
|
|
||||||
movdqu(dst_xmm[1], xmmword_ptr[edi + (jitdesc->itrBlocks[1].memblockId * 16)]);
|
|
||||||
}
|
|
||||||
if (jitdesc->numItrBlocks > 2) {
|
|
||||||
movdqu(src_xmm[2], xmmword_ptr[esi + (jitdesc->itrBlocks[2].memblockId * 16)]);
|
|
||||||
movdqu(dst_xmm[2], xmmword_ptr[edi + (jitdesc->itrBlocks[2].memblockId * 16)]);
|
|
||||||
}
|
|
||||||
|
|
||||||
auto blockMask = ecx;
|
|
||||||
xor_(blockMask, blockMask);
|
|
||||||
|
|
||||||
pxor(marked_fields_mask, marked_fields_mask);
|
pxor(marked_fields_mask, marked_fields_mask);
|
||||||
|
|
||||||
for (unsigned int i = 0; i < jitdesc->numItrBlocks; i++) {
|
// check changed blocks
|
||||||
auto block = jitdesc->itrBlocks[i].memblock;
|
(reinterpret_cast<CDeltaCheckJIT*>(this)->*(&CDeltaCheckJIT::iterateBlocks))(jitdesc);
|
||||||
auto itrBlock = &jitdesc->itrBlocks[i];
|
|
||||||
|
|
||||||
//do far prefetch
|
|
||||||
if (itrBlock->prefetchBlockId != -1) {
|
|
||||||
prefetcht0(byte_ptr[esi + (itrBlock->prefetchBlockId * 16)]);
|
|
||||||
prefetcht0(byte_ptr[edi + (itrBlock->prefetchBlockId * 16)]);
|
|
||||||
}
|
|
||||||
|
|
||||||
pcmpeqb(src_xmm[dataXmmCounter], dst_xmm[dataXmmCounter]);
|
|
||||||
pmovmskb(blockMask, src_xmm[dataXmmCounter]);
|
|
||||||
not_(blockMask);
|
|
||||||
|
|
||||||
//preload next blocks
|
|
||||||
if (i + 3 < jitdesc->numItrBlocks) {
|
|
||||||
movdqu(src_xmm[dataXmmCounter], xmmword_ptr[esi + (jitdesc->itrBlocks[i + 3].memblockId * 16)]);
|
|
||||||
movdqu(dst_xmm[dataXmmCounter], xmmword_ptr[edi + (jitdesc->itrBlocks[i + 3].memblockId * 16)]);
|
|
||||||
}
|
|
||||||
|
|
||||||
dataXmmCounter++;
|
|
||||||
if (dataXmmCounter > 2) {
|
|
||||||
dataXmmCounter -= 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (unsigned int j = 0; j < block->numFields; j++) {
|
|
||||||
auto jitField = &block->fields[j];
|
|
||||||
|
|
||||||
#ifndef REHLDS_FIXES
|
|
||||||
if (jitField->field->type == DT_TIMEWINDOW_8 || jitField->field->type == DT_TIMEWINDOW_BIG) {
|
|
||||||
if (jitField->last) {
|
|
||||||
|
|
||||||
float multiplier = (jitField->field->type == DT_TIMEWINDOW_8) ? 100.0f : 1000.0f;
|
|
||||||
uint32 m32 = *(uint32*)(&multiplier);
|
|
||||||
mov(dword_ptr[esp], m32);
|
|
||||||
|
|
||||||
fld(dword_ptr[esi + jitField->field->offset]);
|
|
||||||
fld(dword_ptr[esp]);
|
|
||||||
fmulp();
|
|
||||||
fstp(qword_ptr[esp + 4]);
|
|
||||||
cvttsd2si(eax, mmword_ptr[esp + 4]);
|
|
||||||
|
|
||||||
fld(dword_ptr[edi + jitField->field->offset]);
|
|
||||||
fld(dword_ptr[esp]);
|
|
||||||
fmulp();
|
|
||||||
fstp(qword_ptr[esp + 4]);
|
|
||||||
cvttsd2si(edx, mmword_ptr[esp + 4]);
|
|
||||||
|
|
||||||
cmp(eax, edx);
|
|
||||||
setne(al);
|
|
||||||
movzx(edx, al);
|
|
||||||
|
|
||||||
} else {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
checkFieldMask(blockMask, jitField);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
checkFieldMask(blockMask, jitField);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// set bit in send mask
|
|
||||||
movd(xmm_tmp, edx);
|
|
||||||
psllq(xmm_tmp, jitField->field->id);
|
|
||||||
por(marked_fields_mask, xmm_tmp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//apply 'force mark' mask if it's present
|
//apply 'force mark' mask if it's present
|
||||||
mov(eax, ptr[pForceMarkMask]);
|
mov(eax, ptr[pForceMarkMask]);
|
||||||
If(eax != 0);
|
If(eax != 0);
|
||||||
//mask for cleaning garbage in high 64 bits
|
movq(xmm_tmp, qword_ptr[eax]);
|
||||||
mov(edx, -1);
|
por(marked_fields_mask, xmm_tmp);
|
||||||
movd(xmm0, edx);
|
|
||||||
movd(xmm1, edx);
|
|
||||||
psllq(xmm0, 32);
|
|
||||||
por(xmm0, xmm1);
|
|
||||||
|
|
||||||
movdqu(xmm_tmp, xmmword_ptr[eax]);
|
|
||||||
pand(xmm_tmp, xmm0); //clean high 64 bits
|
|
||||||
|
|
||||||
por(marked_fields_mask, xmm_tmp); //apply the 'force' mask
|
|
||||||
EndIf();
|
EndIf();
|
||||||
|
|
||||||
size_t delta_markbits_offset = offsetof(CDeltaJit, marked_fields_mask);
|
size_t delta_markbits_offset = offsetof(CDeltaJit, marked_fields_mask);
|
||||||
|
|
||||||
//Save mask from SSE register to CDeltaJit::marked_fields_mask and CDeltaJit::originalMarkedFieldsMask
|
//Save mask from SSE register to CDeltaJit::marked_fields_mask and CDeltaJit::originalMarkedFieldsMask
|
||||||
mov(ebx, ptr[deltaJit]);
|
mov(ebx, ptr[deltaJit]);
|
||||||
movdqu(xmmword_ptr[ebx + delta_markbits_offset], marked_fields_mask);
|
movq(qword_ptr[ebx + delta_markbits_offset], marked_fields_mask);
|
||||||
movdqu(xmmword_ptr[ebx + offsetof(CDeltaJit, originalMarkedFieldsMask)], marked_fields_mask);
|
movq(qword_ptr[ebx + offsetof(CDeltaJit, originalMarkedFieldsMask)], marked_fields_mask);
|
||||||
|
|
||||||
processStrings(src, dst);
|
// check changed strings
|
||||||
|
(reinterpret_cast<CDeltaCheckJIT*>(this)->*(&CDeltaCheckJIT::iterateStrings))(jitdesc);
|
||||||
|
|
||||||
//emit conditional encoder call
|
//emit conditional encoder call
|
||||||
callConditionalEncoder(src, dst, deltaJit);
|
callConditionalEncoder(src, dst, deltaJit);
|
||||||
@ -413,54 +496,125 @@ CDeltaClearMarkFieldsJIT::Result CDeltaClearMarkFieldsJIT::main(Addr src, Addr d
|
|||||||
mov(dword_ptr[ebx + delta_masksize_offset], edx);
|
mov(dword_ptr[ebx + delta_masksize_offset], edx);
|
||||||
EndIf();
|
EndIf();
|
||||||
|
|
||||||
#ifndef REHLDS_FIXES
|
|
||||||
add(esp, 12); //some local storage is required for precise DT_TIMEWINDOW marking
|
|
||||||
#endif // REHLDS_FIXES
|
|
||||||
|
|
||||||
return edx;
|
return edx;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CDeltaClearMarkFieldsJIT::processStrings(Addr src, Addr dst) {
|
class CDeltaTestDeltaJIT : public jitasm::function<int, CDeltaTestDeltaJIT, void*, void*, void*>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
CDeltaTestDeltaJIT(deltajitdata_t *_jitdesc);
|
||||||
|
|
||||||
|
Result main(Addr src, Addr dst, Addr deltaJit);
|
||||||
|
|
||||||
|
virtual void onFieldChecked(deltajit_field* jitField);
|
||||||
|
virtual void onStringChecked(deltajit_field* jitField);
|
||||||
|
|
||||||
|
deltajitdata_t *jitdesc;
|
||||||
|
|
||||||
|
private:
|
||||||
|
jitasm::Reg32 neededBits = ebx;
|
||||||
|
jitasm::Reg32 highestBit = ebp;
|
||||||
|
size_t highest_id;
|
||||||
|
};
|
||||||
|
|
||||||
|
CDeltaTestDeltaJIT::CDeltaTestDeltaJIT(deltajitdata_t *_jitdesc) : jitdesc(_jitdesc)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void CDeltaTestDeltaJIT::onFieldChecked(deltajit_field* field)
|
||||||
|
{
|
||||||
|
mov(eax, field->id);
|
||||||
|
|
||||||
|
if (field->id >= highest_id)
|
||||||
|
{
|
||||||
|
// setted highestBit can't be > field->id
|
||||||
|
highest_id = field->id;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// select greatest from field->id and highestBit
|
||||||
|
cmp(eax, highestBit);
|
||||||
|
cmovl(eax, highestBit); // eax = id < highestBit ? highestBit : id
|
||||||
|
}
|
||||||
|
|
||||||
|
// edx = 1 if field changed, otherwise 0
|
||||||
|
neg(edx);
|
||||||
|
cmovnz(highestBit, eax);
|
||||||
|
and_(edx, field->significantBits);
|
||||||
|
add(neededBits, edx);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CDeltaTestDeltaJIT::onStringChecked(deltajit_field* field)
|
||||||
|
{
|
||||||
// This generator expects that following registers are already initialized:
|
// This generator expects that following registers are already initialized:
|
||||||
// esi = src
|
// eax = result of strings comparison
|
||||||
// edi = dst
|
// ebp = highestBit
|
||||||
// ebx = deltaJit
|
// [esp] = dest string
|
||||||
|
|
||||||
size_t delta_markbits_offset = offsetof(CDeltaJit, marked_fields_mask);
|
If(eax != 0); // changed
|
||||||
|
mov(ecx, (size_t)&Q_strlen);
|
||||||
|
call(ecx); // dest already in top of stack
|
||||||
|
|
||||||
//strings
|
lea(neededBits, ptr[neededBits + eax * 8 + 8]); // add size of string in bits + EOS byte
|
||||||
for (unsigned int i = 0; i < jitdesc->numFields; i++) {
|
|
||||||
auto jitField = &jitdesc->fields[i];
|
|
||||||
if (jitField->type != DT_STRING)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// will be parallel
|
mov(eax, field->id);
|
||||||
lea(eax, ptr[esi + jitField->offset]);
|
cmp(eax, highestBit);
|
||||||
lea(edx, ptr[edi + jitField->offset]);
|
cmovg(highestBit, eax);
|
||||||
|
EndIf();
|
||||||
push(eax);
|
|
||||||
push(edx);
|
|
||||||
mov(ecx, (size_t)&Q_stricmp);
|
|
||||||
call(ecx);
|
|
||||||
add(esp, 8);
|
|
||||||
xor_(ecx, ecx);
|
|
||||||
test(eax, eax);
|
|
||||||
setnz(cl);
|
|
||||||
|
|
||||||
shl(ecx, jitField->id & 31);
|
|
||||||
or_(ptr[ebx + delta_markbits_offset + ((jitField->id > 31) ? 4 : 0)], ecx);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
CDeltaJit::CDeltaJit(delta_t* _delta, CDeltaClearMarkFieldsJIT* _cleanMarkCheckFunc) {
|
CDeltaClearMarkFieldsJIT::Result CDeltaTestDeltaJIT::main(Addr src, Addr dst, Addr deltaJit)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Registers usage:
|
||||||
|
esi = src
|
||||||
|
edi = dst
|
||||||
|
ebx = neededBits;
|
||||||
|
ebp = highestBit;
|
||||||
|
xmm0-xmm2: loaded src
|
||||||
|
xmm3-xmm5: loaded dst
|
||||||
|
xmm6: temp
|
||||||
|
*/
|
||||||
|
mov(esi, ptr[src]);
|
||||||
|
mov(edi, ptr[dst]);
|
||||||
|
|
||||||
|
// neededBits 0; highestBit = -1
|
||||||
|
xor_(neededBits, neededBits);
|
||||||
|
xor_(highestBit, highestBit);
|
||||||
|
dec(highestBit);
|
||||||
|
|
||||||
|
// can save some operations
|
||||||
|
highest_id = 0;
|
||||||
|
|
||||||
|
// check changed fields
|
||||||
|
(reinterpret_cast<CDeltaCheckJIT*>(this)->*(&CDeltaCheckJIT::iterateBlocks))(jitdesc);
|
||||||
|
|
||||||
|
#ifdef REHLDS_FIXES
|
||||||
|
// check changed strings
|
||||||
|
(reinterpret_cast<CDeltaCheckJIT*>(this)->*(&CDeltaCheckJIT::iterateStrings))(jitdesc);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
If(highestBit >= 0);
|
||||||
|
//neededBits += highestBit / 8 * 8 + 8;
|
||||||
|
shr(highestBit, 3);
|
||||||
|
lea(neededBits, ptr[neededBits + highestBit * 8 + 8]);
|
||||||
|
EndIf();
|
||||||
|
|
||||||
|
return neededBits;
|
||||||
|
}
|
||||||
|
|
||||||
|
CDeltaJit::CDeltaJit(delta_t* _delta, CDeltaClearMarkFieldsJIT* _cleanMarkCheckFunc, CDeltaTestDeltaJIT* _testDeltaFunc) {
|
||||||
delta = _delta;
|
delta = _delta;
|
||||||
cleanMarkCheckFunc = _cleanMarkCheckFunc;
|
cleanMarkCheckFunc = _cleanMarkCheckFunc;
|
||||||
|
testDeltaFunc = _testDeltaFunc;
|
||||||
}
|
}
|
||||||
|
|
||||||
CDeltaJit::~CDeltaJit() {
|
CDeltaJit::~CDeltaJit() {
|
||||||
if (cleanMarkCheckFunc) {
|
if (cleanMarkCheckFunc) {
|
||||||
delete cleanMarkCheckFunc;
|
delete cleanMarkCheckFunc;
|
||||||
|
delete testDeltaFunc;
|
||||||
cleanMarkCheckFunc = NULL;
|
cleanMarkCheckFunc = NULL;
|
||||||
|
testDeltaFunc = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -491,20 +645,29 @@ void CDeltaJitRegistry::CreateAndRegisterDeltaJIT(delta_t* delta) {
|
|||||||
deltajitdata_t data;
|
deltajitdata_t data;
|
||||||
DELTAJIT_CreateDescription(delta, data);
|
DELTAJIT_CreateDescription(delta, data);
|
||||||
|
|
||||||
CDeltaClearMarkFieldsJIT* cleanMarkCheckFunc = new CDeltaClearMarkFieldsJIT(&data, DJ_M_CHECK);
|
CDeltaClearMarkFieldsJIT* cleanMarkCheckFunc = new CDeltaClearMarkFieldsJIT(&data);
|
||||||
cleanMarkCheckFunc->Assemble();
|
cleanMarkCheckFunc->Assemble();
|
||||||
cleanMarkCheckFunc->jitdesc = NULL;
|
cleanMarkCheckFunc->jitdesc = NULL;
|
||||||
|
|
||||||
CDeltaJit* deltaJit = new CDeltaJit(delta, cleanMarkCheckFunc);
|
CDeltaTestDeltaJIT* testDeltaFunc = new CDeltaTestDeltaJIT(&data);
|
||||||
|
testDeltaFunc->Assemble();
|
||||||
|
testDeltaFunc->jitdesc = NULL;
|
||||||
|
|
||||||
|
// align to 16
|
||||||
|
CDeltaJit* deltaJit = new CDeltaJit(delta, cleanMarkCheckFunc, testDeltaFunc);
|
||||||
RegisterDeltaJit(delta, deltaJit);
|
RegisterDeltaJit(delta, deltaJit);
|
||||||
}
|
}
|
||||||
|
|
||||||
CDeltaJit* DELTAJit_LookupDeltaJit(const char* callsite, delta_t *pFields) {
|
CDeltaJit* DELTAJit_LookupDeltaJit(const char* callsite, delta_t *pFields) {
|
||||||
CDeltaJit* deltaJit = g_DeltaJitRegistry.GetJITByDelta(pFields);
|
CDeltaJit* deltaJit = g_DeltaJitRegistry.GetJITByDelta(pFields);
|
||||||
|
|
||||||
|
#ifndef REHLDS_FIXES
|
||||||
|
// only for testing
|
||||||
if (!deltaJit) {
|
if (!deltaJit) {
|
||||||
rehlds_syserror("%s: JITted delta encoder not found for delta %p", callsite, pFields);
|
rehlds_syserror("%s: JITted delta encoder not found for delta %p", callsite, pFields);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
#endif // REHLDS_FIXES
|
||||||
|
|
||||||
return deltaJit;
|
return deltaJit;
|
||||||
}
|
}
|
||||||
@ -515,6 +678,13 @@ NOINLINE int DELTAJit_Fields_Clear_Mark_Check(unsigned char *from, unsigned char
|
|||||||
return func(from, to, deltaJit, pForceMarkMask);
|
return func(from, to, deltaJit, pForceMarkMask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
NOINLINE int DELTAJit_TestDelta(unsigned char *from, unsigned char *to, delta_t *pFields)
|
||||||
|
{
|
||||||
|
CDeltaJit* deltaJit = DELTAJit_LookupDeltaJit(__FUNCTION__, pFields);
|
||||||
|
CDeltaTestDeltaJIT &func = *deltaJit->testDeltaFunc;
|
||||||
|
return func(from, to, deltaJit);
|
||||||
|
}
|
||||||
|
|
||||||
void DELTAJit_SetSendFlagBits(delta_t *pFields, int *bits, int *bytecount) {
|
void DELTAJit_SetSendFlagBits(delta_t *pFields, int *bits, int *bytecount) {
|
||||||
CDeltaJit* deltaJit = DELTAJit_LookupDeltaJit(__FUNCTION__, pFields);
|
CDeltaJit* deltaJit = DELTAJit_LookupDeltaJit(__FUNCTION__, pFields);
|
||||||
|
|
||||||
@ -523,32 +693,22 @@ void DELTAJit_SetSendFlagBits(delta_t *pFields, int *bits, int *bytecount) {
|
|||||||
*bytecount = deltaJit->markedFieldsMaskSize;
|
*bytecount = deltaJit->markedFieldsMaskSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DELTAJit_SetFieldByIndex(struct delta_s *pFields, int fieldNumber) {
|
void DELTAJit_SetFieldByIndex(struct delta_s *pFields, int fieldNumber)
|
||||||
|
{
|
||||||
CDeltaJit* deltaJit = DELTAJit_LookupDeltaJit(__FUNCTION__, pFields);
|
CDeltaJit* deltaJit = DELTAJit_LookupDeltaJit(__FUNCTION__, pFields);
|
||||||
|
deltaJit->marked_fields_mask.u32[fieldNumber >> 5] |= (1 << (fieldNumber & 31));
|
||||||
if (fieldNumber > 31)
|
|
||||||
deltaJit->marked_fields_mask.u32[1] |= (1 << (fieldNumber & 0x1F));
|
|
||||||
else
|
|
||||||
deltaJit->marked_fields_mask.u32[0] |= (1 << fieldNumber);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void DELTAJit_UnsetFieldByIndex(struct delta_s *pFields, int fieldNumber) {
|
void DELTAJit_UnsetFieldByIndex(struct delta_s *pFields, int fieldNumber)
|
||||||
|
{
|
||||||
CDeltaJit* deltaJit = DELTAJit_LookupDeltaJit(__FUNCTION__, pFields);
|
CDeltaJit* deltaJit = DELTAJit_LookupDeltaJit(__FUNCTION__, pFields);
|
||||||
|
deltaJit->marked_fields_mask.u32[fieldNumber >> 5] &= ~(1 << (fieldNumber & 31));
|
||||||
if (fieldNumber > 31)
|
|
||||||
deltaJit->marked_fields_mask.u32[1] &= ~(1 << (fieldNumber & 0x1F));
|
|
||||||
else
|
|
||||||
deltaJit->marked_fields_mask.u32[0] &= ~(1 << fieldNumber);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
qboolean DELTAJit_IsFieldMarked(delta_t* pFields, int fieldNumber) {
|
qboolean DELTAJit_IsFieldMarked(delta_t* pFields, int fieldNumber)
|
||||||
|
{
|
||||||
CDeltaJit* deltaJit = DELTAJit_LookupDeltaJit(__FUNCTION__, pFields);
|
CDeltaJit* deltaJit = DELTAJit_LookupDeltaJit(__FUNCTION__, pFields);
|
||||||
|
return deltaJit->marked_fields_mask.u32[fieldNumber >> 5] & (1 << (fieldNumber & 31));
|
||||||
if (fieldNumber > 31)
|
|
||||||
return deltaJit->marked_fields_mask.u32[1] & (1 << (fieldNumber & 0x1F));
|
|
||||||
|
|
||||||
return deltaJit->marked_fields_mask.u32[0] & (1 << fieldNumber);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64 DELTAJit_GetOriginalMask(delta_t* pFields) {
|
uint64 DELTAJit_GetOriginalMask(delta_t* pFields) {
|
||||||
|
@ -11,6 +11,7 @@ struct deltajit_field {
|
|||||||
unsigned int length;
|
unsigned int length;
|
||||||
int type;
|
int type;
|
||||||
unsigned int numBlocks;
|
unsigned int numBlocks;
|
||||||
|
unsigned int significantBits;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct deltajit_memblock_field {
|
struct deltajit_memblock_field {
|
||||||
@ -42,12 +43,6 @@ struct deltajitdata_t {
|
|||||||
deltajit_memblock_itr_t itrBlocks[DELTAJIT_MAX_BLOCKS];
|
deltajit_memblock_itr_t itrBlocks[DELTAJIT_MAX_BLOCKS];
|
||||||
};
|
};
|
||||||
|
|
||||||
enum deltajit_marked_count_type_t {
|
|
||||||
DJ_M_DONT_COUNT,
|
|
||||||
DJ_M_CHECK,
|
|
||||||
//DJ_M_COUNT, //not implemented yet
|
|
||||||
};
|
|
||||||
|
|
||||||
class CDeltaJit;
|
class CDeltaJit;
|
||||||
|
|
||||||
class CDeltaJitRegistry {
|
class CDeltaJitRegistry {
|
||||||
@ -74,6 +69,7 @@ union delta_marked_mask_t {
|
|||||||
extern CDeltaJitRegistry g_DeltaJitRegistry;
|
extern CDeltaJitRegistry g_DeltaJitRegistry;
|
||||||
|
|
||||||
extern int DELTAJit_Fields_Clear_Mark_Check(unsigned char *from, unsigned char *to, delta_t *pFields, void* pForceMarkMask);
|
extern int DELTAJit_Fields_Clear_Mark_Check(unsigned char *from, unsigned char *to, delta_t *pFields, void* pForceMarkMask);
|
||||||
|
extern int DELTAJit_TestDelta(unsigned char *from, unsigned char *to, delta_t *pFields);
|
||||||
extern void DELTAJit_SetSendFlagBits(delta_t *pFields, int *bits, int *bytecount);
|
extern void DELTAJit_SetSendFlagBits(delta_t *pFields, int *bits, int *bytecount);
|
||||||
extern void DELTAJit_SetFieldByIndex(struct delta_s *pFields, int fieldNumber);
|
extern void DELTAJit_SetFieldByIndex(struct delta_s *pFields, int fieldNumber);
|
||||||
extern void DELTAJit_UnsetFieldByIndex(struct delta_s *pFields, int fieldNumber);
|
extern void DELTAJit_UnsetFieldByIndex(struct delta_s *pFields, int fieldNumber);
|
||||||
|
@ -561,6 +561,9 @@ extern delta_t *g_pentitydelta;
|
|||||||
extern delta_t *g_pcustomentitydelta;
|
extern delta_t *g_pcustomentitydelta;
|
||||||
extern delta_t *g_pclientdelta;
|
extern delta_t *g_pclientdelta;
|
||||||
extern delta_t *g_pweapondelta;
|
extern delta_t *g_pweapondelta;
|
||||||
|
#ifdef REHLDS_OPT_PEDANTIC
|
||||||
|
extern delta_t *g_pusercmddelta;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
extern unsigned char fatpvs[1024];
|
extern unsigned char fatpvs[1024];
|
||||||
|
@ -44,6 +44,9 @@ delta_t *g_pentitydelta;
|
|||||||
delta_t *g_pcustomentitydelta;
|
delta_t *g_pcustomentitydelta;
|
||||||
delta_t *g_pclientdelta;
|
delta_t *g_pclientdelta;
|
||||||
delta_t *g_pweapondelta;
|
delta_t *g_pweapondelta;
|
||||||
|
#ifdef REHLDS_OPT_PEDANTIC
|
||||||
|
delta_t *g_pusercmddelta;
|
||||||
|
#endif
|
||||||
|
|
||||||
int hashstrings_collisions;
|
int hashstrings_collisions;
|
||||||
|
|
||||||
@ -415,14 +418,18 @@ void SV_ReallocateDynamicData(void)
|
|||||||
if (g_moved_edict)
|
if (g_moved_edict)
|
||||||
{
|
{
|
||||||
Con_Printf("Reallocate on moved_edict\n");
|
Con_Printf("Reallocate on moved_edict\n");
|
||||||
// TODO: Free memory to prevent mem leaks?
|
#ifdef REHLDS_FIXES
|
||||||
|
Mem_Free(g_moved_edict);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
g_moved_edict = (edict_t **)Mem_ZeroMalloc(sizeof(edict_t *) * nSize);
|
g_moved_edict = (edict_t **)Mem_ZeroMalloc(sizeof(edict_t *) * nSize);
|
||||||
|
|
||||||
if (g_moved_from)
|
if (g_moved_from)
|
||||||
{
|
{
|
||||||
Con_Printf("Reallocate on moved_from\n");
|
Con_Printf("Reallocate on moved_from\n");
|
||||||
// TODO: Free memory to prevent mem leaks?
|
#ifdef REHLDS_FIXES
|
||||||
|
Mem_Free(g_moved_from);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
g_moved_from = (vec3_t *)Mem_ZeroMalloc(sizeof(vec3_t) * nSize);
|
g_moved_from = (vec3_t *)Mem_ZeroMalloc(sizeof(vec3_t) * nSize);
|
||||||
}
|
}
|
||||||
@ -437,7 +444,9 @@ void SV_AllocClientFrames(void)
|
|||||||
if (cl->frames)
|
if (cl->frames)
|
||||||
{
|
{
|
||||||
Con_DPrintf("Allocating over frame pointer?\n");
|
Con_DPrintf("Allocating over frame pointer?\n");
|
||||||
// TODO: Free memory to prevent mem leaks?
|
#ifdef REHLDS_FIXES
|
||||||
|
Mem_Free(cl->frames);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
cl->frames = (client_frame_t *)Mem_ZeroMalloc(sizeof(client_frame_t) * SV_UPDATE_BACKUP);
|
cl->frames = (client_frame_t *)Mem_ZeroMalloc(sizeof(client_frame_t) * SV_UPDATE_BACKUP);
|
||||||
}
|
}
|
||||||
@ -4084,23 +4093,21 @@ void SV_InvokeCallback(void)
|
|||||||
int SV_FindBestBaseline(int index, entity_state_t ** baseline, entity_state_t *to, int num, qboolean custom)
|
int SV_FindBestBaseline(int index, entity_state_t ** baseline, entity_state_t *to, int num, qboolean custom)
|
||||||
{
|
{
|
||||||
int bestbitnumber;
|
int bestbitnumber;
|
||||||
|
delta_t* delta;
|
||||||
|
|
||||||
if (custom)
|
if (custom)
|
||||||
{
|
{
|
||||||
bestbitnumber = DELTA_TestDelta((byte *)*baseline, (byte *)&to[index], g_pcustomentitydelta);
|
delta = g_pcustomentitydelta;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (SV_IsPlayerIndex(num))
|
if (SV_IsPlayerIndex(num))
|
||||||
{
|
delta = g_pplayerdelta;
|
||||||
bestbitnumber = DELTA_TestDelta((byte *)*baseline, (byte *)&to[index], g_pplayerdelta);
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
delta = g_pentitydelta;
|
||||||
bestbitnumber = DELTA_TestDelta((byte *)*baseline, (byte *)&to[index], g_pentitydelta);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bestbitnumber = DELTA_TestDelta((byte *)*baseline, (byte *)&to[index], delta);
|
||||||
bestbitnumber -= 6;
|
bestbitnumber -= 6;
|
||||||
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
@ -4111,21 +4118,8 @@ int SV_FindBestBaseline(int index, entity_state_t ** baseline, entity_state_t *t
|
|||||||
{
|
{
|
||||||
if (to[index].entityType == to[i].entityType)
|
if (to[index].entityType == to[i].entityType)
|
||||||
{
|
{
|
||||||
if (custom)
|
bitnumber = DELTA_TestDelta((byte *)&to[i], (byte *)&to[index], delta);
|
||||||
{
|
|
||||||
bitnumber = DELTA_TestDelta((byte *)&to[i], (byte *)&to[index], g_pcustomentitydelta);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (SV_IsPlayerIndex(num))
|
|
||||||
{
|
|
||||||
bitnumber = DELTA_TestDelta((byte *)&to[i], (byte *)&to[index], g_pplayerdelta);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
bitnumber = DELTA_TestDelta((byte *)&to[i], (byte *)&to[index], g_pentitydelta);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (bitnumber < bestbitnumber)
|
if (bitnumber < bestbitnumber)
|
||||||
{
|
{
|
||||||
bestbitnumber = bitnumber;
|
bestbitnumber = bitnumber;
|
||||||
@ -6907,6 +6901,12 @@ void SV_InitDeltas(void)
|
|||||||
if (!g_peventdelta)
|
if (!g_peventdelta)
|
||||||
Sys_Error("No event_t encoder on server!\n");
|
Sys_Error("No event_t encoder on server!\n");
|
||||||
|
|
||||||
|
#ifdef REHLDS_OPT_PEDANTIC
|
||||||
|
g_pusercmddelta = SV_LookupDelta("usercmd_t");
|
||||||
|
if (!g_pusercmddelta)
|
||||||
|
Sys_Error("No usercmd_t encoder on server!\n");
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(REHLDS_OPT_PEDANTIC) || defined(REHLDS_FIXES)
|
#if defined(REHLDS_OPT_PEDANTIC) || defined(REHLDS_FIXES)
|
||||||
g_DeltaJitRegistry.CreateAndRegisterDeltaJIT(&g_MetaDelta[0]);
|
g_DeltaJitRegistry.CreateAndRegisterDeltaJIT(&g_MetaDelta[0]);
|
||||||
#endif
|
#endif
|
||||||
|
@ -283,7 +283,6 @@ areanode_t *SV_CreateAreaNode(int depth, vec_t *mins, vec_t *maxs)
|
|||||||
anode->children[1] = SV_CreateAreaNode(depth + 1, mins1, maxs1);
|
anode->children[1] = SV_CreateAreaNode(depth + 1, mins1, maxs1);
|
||||||
|
|
||||||
return anode;
|
return anode;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* <ca877> ../engine/world.c:353 */
|
/* <ca877> ../engine/world.c:353 */
|
||||||
@ -375,6 +374,7 @@ void SV_TouchLinks(edict_t *ent, areanode_t *node)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* <ca8f2> ../engine/world.c:467 */
|
/* <ca8f2> ../engine/world.c:467 */
|
||||||
|
#ifndef REHLDS_OPT_PEDANTIC
|
||||||
void SV_FindTouchedLeafs(edict_t *ent, mnode_t *node, int *topnode)
|
void SV_FindTouchedLeafs(edict_t *ent, mnode_t *node, int *topnode)
|
||||||
{
|
{
|
||||||
mplane_t *splitplane;
|
mplane_t *splitplane;
|
||||||
@ -435,6 +435,87 @@ void SV_FindTouchedLeafs(edict_t *ent, mnode_t *node, int *topnode)
|
|||||||
if (sides & 2)
|
if (sides & 2)
|
||||||
SV_FindTouchedLeafs(ent, node->children[1], topnode);
|
SV_FindTouchedLeafs(ent, node->children[1], topnode);
|
||||||
}
|
}
|
||||||
|
#else // REHLDS_OPT_PEDANTIC
|
||||||
|
// unrolled some tail recursion
|
||||||
|
void SV_FindTouchedLeafs(edict_t *ent, mnode_t *node, int *topnode)
|
||||||
|
{
|
||||||
|
mplane_t *splitplane;
|
||||||
|
int sides;
|
||||||
|
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
if (node->contents == CONTENTS_SOLID)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (node->contents < 0)
|
||||||
|
{
|
||||||
|
if (ent->num_leafs < MAX_ENT_LEAFS)
|
||||||
|
{
|
||||||
|
mleaf_t *leaf = (mleaf_t *)node;
|
||||||
|
int leafnum = leaf - g_psv.worldmodel->leafs - 1;
|
||||||
|
ent->leafnums[ent->num_leafs] = leafnum;
|
||||||
|
ent->num_leafs++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ent->num_leafs = MAX_ENT_LEAFS + 1;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
splitplane = node->plane;
|
||||||
|
if (splitplane->type >= 3)
|
||||||
|
{
|
||||||
|
sides = BoxOnPlaneSide(ent->v.absmin, ent->v.absmax, splitplane);
|
||||||
|
|
||||||
|
if (sides == 3)
|
||||||
|
{
|
||||||
|
if (*topnode == -1)
|
||||||
|
*topnode = node - g_psv.worldmodel->nodes;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sides & 1)
|
||||||
|
SV_FindTouchedLeafs(ent, node->children[0], topnode);
|
||||||
|
|
||||||
|
if (sides & 2)
|
||||||
|
SV_FindTouchedLeafs(ent, node->children[1], topnode);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (splitplane->dist > ent->v.absmin[splitplane->type])
|
||||||
|
{
|
||||||
|
if (splitplane->dist < ent->v.absmax[splitplane->type])
|
||||||
|
{
|
||||||
|
// sides = 3;
|
||||||
|
// do both children nodes
|
||||||
|
if (*topnode == -1)
|
||||||
|
*topnode = node - g_psv.worldmodel->nodes;
|
||||||
|
|
||||||
|
SV_FindTouchedLeafs(ent, node->children[0], topnode);
|
||||||
|
node = node->children[1];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// sides = 2;
|
||||||
|
// do only SV_FindTouchedLeafs(ent, node->children[1], topnode);
|
||||||
|
node = node->children[1];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// sides = 1;
|
||||||
|
// do only SV_FindTouchedLeafs(ent, node->children[0], topnode);
|
||||||
|
node = node->children[0];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // REHLDS_OPT_PEDANTIC
|
||||||
|
|
||||||
/* <caab0> ../engine/world.c:517 */
|
/* <caab0> ../engine/world.c:517 */
|
||||||
void SV_LinkEdict(edict_t *ent, qboolean touch_triggers)
|
void SV_LinkEdict(edict_t *ent, qboolean touch_triggers)
|
||||||
@ -547,6 +628,11 @@ int SV_LinkContents(areanode_t *node, const vec_t *pos)
|
|||||||
vec3_t localPosition;
|
vec3_t localPosition;
|
||||||
vec3_t offset;
|
vec3_t offset;
|
||||||
|
|
||||||
|
#ifdef REHLDS_OPT_PEDANTIC
|
||||||
|
// unroll tail recursion
|
||||||
|
while (1)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
for (l = node->solid_edicts.next; l != &node->solid_edicts; l = next)
|
for (l = node->solid_edicts.next; l != &node->solid_edicts; l = next)
|
||||||
{
|
{
|
||||||
next = l->next;
|
next = l->next;
|
||||||
@ -589,15 +675,31 @@ int SV_LinkContents(areanode_t *node, const vec_t *pos)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (node->axis == -1)
|
if (node->axis == -1)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
#ifndef REHLDS_OPT_PEDANTIC
|
||||||
if (pos[node->axis] > node->dist)
|
if (pos[node->axis] > node->dist)
|
||||||
return SV_LinkContents(node->children[0], pos);
|
return SV_LinkContents(node->children[0], pos);
|
||||||
|
|
||||||
if (pos[node->axis] < node->dist)
|
if (pos[node->axis] < node->dist)
|
||||||
return SV_LinkContents(node->children[1], pos);
|
return SV_LinkContents(node->children[1], pos);
|
||||||
|
#else // REHLDS_OPT_PEDANTIC
|
||||||
|
if (pos[node->axis] > node->dist)
|
||||||
|
{
|
||||||
|
node = node->children[0];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pos[node->axis] < node->dist)
|
||||||
|
{
|
||||||
|
node = node->children[1];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
#endif // REHLDS_OPT_PEDANTIC
|
||||||
|
}
|
||||||
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
@ -166,22 +166,9 @@ NOINLINE void _GetBitmaskAndBytecount(delta_t* delta, int* bits, int* bytecount,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE void _CompareDeltaResults(const char* callsite, delta_res_t* def, delta_res_t* jit, int testscount)
|
|
||||||
{
|
|
||||||
for (int i = 0; i < testscount; i++)
|
|
||||||
{
|
|
||||||
if (!!def[i].sendfields != !!jit[i].sendfields)
|
|
||||||
rehlds_syserror("%s: Test %i: !!sendfields not equals %i|%i", callsite, i, !!def[i].sendfields, !!jit[i].sendfields);
|
|
||||||
if (memcmp(def[i].bits, jit[i].bits, 8))
|
|
||||||
rehlds_syserror("%s: Test %i: bits not equals %p.%p|%p.%p", callsite, i, def[i].bits[0], def[i].bits[1], jit[i].bits[0], jit[i].bits[1]);
|
|
||||||
if (def[i].bytecount != jit[i].bytecount)
|
|
||||||
rehlds_syserror("%s: Test %i: bytecount not equal %i|%i", callsite, i, def[i].bytecount, jit[i].bytecount);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
NOINLINE delta_t* _CreateTestDeltaDesc() {
|
NOINLINE delta_t* _CreateTestDeltaDesc() {
|
||||||
static delta_description_t _fields[32];
|
static delta_description_t _fields[32];
|
||||||
delta_test_struct_t d; // "use" d variable
|
delta_test_struct_t d; d; // "use" d variable
|
||||||
|
|
||||||
_InitDeltaField(&_fields[0], 0x00, DT_BYTE, "b_00", offsetof(delta_test_struct_t, b_00), 1, 8, 1.0f, 1.0f);
|
_InitDeltaField(&_fields[0], 0x00, DT_BYTE, "b_00", offsetof(delta_test_struct_t, b_00), 1, 8, 1.0f, 1.0f);
|
||||||
_InitDeltaField(&_fields[1], 0x01, DT_BYTE, "b_01", offsetof(delta_test_struct_t, b_01), 1, 8, 1.0f, 1.0f);
|
_InitDeltaField(&_fields[1], 0x01, DT_BYTE, "b_01", offsetof(delta_test_struct_t, b_01), 1, 8, 1.0f, 1.0f);
|
||||||
@ -331,3 +318,51 @@ TEST(MarkFieldsTest_TimeWindow, Delta, 1000) {
|
|||||||
|
|
||||||
SV_Shutdown();
|
SV_Shutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(TestDelta_Test, Delta, 1000)
|
||||||
|
{
|
||||||
|
delta_t* delta = _CreateTestDeltaDesc();
|
||||||
|
|
||||||
|
delta_test_struct_t testdata[4], from;
|
||||||
|
int result[4];
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 4; i++)
|
||||||
|
_FillTestDelta(&testdata[i], 0xCC);
|
||||||
|
_FillTestDelta(&from, 0xCC);
|
||||||
|
|
||||||
|
// equal
|
||||||
|
result[0] = 0;
|
||||||
|
|
||||||
|
// change byte + short + float
|
||||||
|
testdata[1].b_01 = 1;
|
||||||
|
testdata[1].s_12 = 1.0;
|
||||||
|
testdata[1].f_08 = 1.0;
|
||||||
|
result[1] = delta->pdd[1].significant_bits + delta->pdd[8].significant_bits + delta->pdd[4].significant_bits + (8 / 8 * 8 + 8);
|
||||||
|
|
||||||
|
// change float + float + string
|
||||||
|
testdata[2].f_18 = 2.0;
|
||||||
|
testdata[2].wb_20 = 2.0;
|
||||||
|
strcpy(testdata[2].s_24, "TestDelta_Test" );
|
||||||
|
#ifdef REHLDS_FIXES
|
||||||
|
result[2] = delta->pdd[10].significant_bits + delta->pdd[12].significant_bits + strlen(testdata[2].s_24) * 8 + 8 + (13 / 8 * 8 + 8);
|
||||||
|
#else
|
||||||
|
result[2] = delta->pdd[10].significant_bits + delta->pdd[12].significant_bits + (13 / 8 * 8 + 8);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// change byte + int + float + short
|
||||||
|
testdata[3].b_4D = 4;
|
||||||
|
testdata[3].i_14 = 4;
|
||||||
|
testdata[3].w8_0C = 4.0;
|
||||||
|
testdata[3].s_12 = 4;
|
||||||
|
result[3] = delta->pdd[14].significant_bits + delta->pdd[9].significant_bits + delta->pdd[5].significant_bits + delta->pdd[8].significant_bits + (14 / 8 * 8 + 8);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
int tested = DELTA_TestDelta((uint8 *)&from, (uint8 *)&testdata[i], delta);
|
||||||
|
|
||||||
|
if (tested != result[i])
|
||||||
|
rehlds_syserror("TestDelta_Test: returned bitcount %i is not equal to true value %i", tested, result[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
SV_Shutdown();
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user