2
0
mirror of https://github.com/rehlds/rehlds.git synced 2025-01-19 10:08:04 +03:00

JITted DELTA_TestDelta.

Maked unit test for DELTA_TestDelta.
Unrolled tail recursion in SV_FindTouchedLeafs and SV_LinkContents.
Other small optimizations and fixes.
This commit is contained in:
asmodai 2015-05-29 23:21:20 +03:00
parent bc88790bb1
commit 7a973d1afb
8 changed files with 587 additions and 282 deletions

View File

@ -1253,9 +1253,12 @@ NOXREF float MSG_ReadHiresAngle(void)
void MSG_ReadUsercmd(usercmd_t *to, usercmd_t* from)
{
delta_t *pdesc = SV_LookupDelta("usercmd_t");
MSG_StartBitReading(&net_message);
DELTA_ParseDelta((byte *)from, (byte *)to, pdesc);
#ifdef REHLDS_OPT_PEDANTIC
DELTA_ParseDelta((byte *)from, (byte *)to, g_pusercmddelta);
#else
DELTA_ParseDelta((byte *)from, (byte *)to, SV_LookupDelta("usercmd_t"));
#endif
MSG_EndBitReading(&net_message);
COM_NormalizeAngles(to->viewangles);
}

View File

@ -422,6 +422,9 @@ void DELTA_ClearFlags(delta_t *pFields)
/* <241d2> ../engine/delta.c:473 */
int DELTA_TestDelta(unsigned char *from, unsigned char *to, delta_t *pFields)
{
#if defined(REHLDS_OPT_PEDANTIC) || defined(REHLDS_FIXES)
return DELTAJit_TestDelta(from, to, pFields);
#else
int i;
char *st1, *st2;
delta_description_t *pTest;
@ -452,9 +455,10 @@ int DELTA_TestDelta(unsigned char *from, unsigned char *to, delta_t *pFields)
break;
#ifdef REHLDS_FIXES
// don't use multiplier when checking, to increase performance
// check values binary like it does in jit
case DT_TIMEWINDOW_8:
case DT_TIMEWINDOW_BIG:
different = (int32)(*(float *)&from[pTest->fieldOffset]) != (int32)(*(float *)&to[pTest->fieldOffset]);
different = (*(int32 *)&from[pTest->fieldOffset]) != (*(int32 *)&to[pTest->fieldOffset]);
break;
#else
case DT_TIMEWINDOW_8:
@ -471,8 +475,10 @@ int DELTA_TestDelta(unsigned char *from, unsigned char *to, delta_t *pFields)
{
#ifdef REHLDS_FIXES
different = TRUE;
#endif // REHLDS_FIXES
length = Q_strlen(st2) * 8;
#else // REHLDS_FIXES
length = Q_strlen(st2);
#endif // REHLDS_FIXES
}
break;
default:
@ -487,12 +493,13 @@ int DELTA_TestDelta(unsigned char *from, unsigned char *to, delta_t *pFields)
}
}
if (neededBits != -1)
if (highestBit != -1)
{
neededBits += highestBit / 8 * 8 + 8;
}
return neededBits;
#endif
}
/* <24309> ../engine/delta.c:602 */
@ -749,7 +756,6 @@ qboolean DELTA_CheckDelta(unsigned char *from, unsigned char *to, delta_t *pFiel
NOINLINE qboolean DELTA_WriteDelta(unsigned char *from, unsigned char *to, qboolean force, delta_t *pFields, void(*callback)(void))
{
qboolean sendfields;
int bytecount;
#if defined(REHLDS_OPT_PEDANTIC) || defined(REHLDS_FIXES)
sendfields = DELTAJit_Fields_Clear_Mark_Check(from, to, pFields, NULL);

View File

@ -27,7 +27,7 @@ unsigned int DELTAJIT_GetFieldSize(delta_description_t* desc) {
return 4;
case DT_STRING:
return desc->fieldSize;
return 0;
default:
rehlds_syserror("%s: Unknown delta field type %d", __FUNCTION__, desc->fieldType);
@ -84,6 +84,7 @@ void DELTAJIT_CreateDescription(delta_t* delta, deltajitdata_t &jitdesc) {
jitField->offset = fieldDesc->fieldOffset;
jitField->type = fieldDesc->fieldType;
jitField->length = fieldSize;
jitField->significantBits = fieldDesc->significant_bits;
if (fieldDesc->fieldType != DT_STRING) {
bool firstBlock = true;
@ -127,52 +128,209 @@ void DELTAJIT_CreateDescription(delta_t* delta, deltajitdata_t &jitdesc) {
}
class CDeltaClearMarkFieldsJIT;
class CDeltaTestDeltaJIT;
class CDeltaJit {
public:
CDeltaClearMarkFieldsJIT* cleanMarkCheckFunc;
CDeltaTestDeltaJIT* testDeltaFunc;
delta_t* delta;
delta_marked_mask_t marked_fields_mask;
delta_marked_mask_t originalMarkedFieldsMask; //mask based on data, before calling the conditional encoder
int markedFieldsMaskSize;
delta_marked_mask_t marked_fields_mask;
int mfm_sse_highbits[2]; //High 64 bits for manipulating marked_fields_mask via SSE registers
delta_marked_mask_t originalMarkedFieldsMask; //mask based on data, before calling the conditional encoder
int omfm_sse_highbits[2]; //High 64 bits for manipulating marked_fields_mask via SSE registers
CDeltaJit(delta_t* _delta, CDeltaClearMarkFieldsJIT* _cleanMarkCheckFunc);
CDeltaJit(delta_t* _delta, CDeltaClearMarkFieldsJIT* _cleanMarkCheckFunc, CDeltaTestDeltaJIT* _testDeltaFunc);
virtual ~CDeltaJit();
};
class CDeltaClearMarkFieldsJIT : public jitasm::function<int, CDeltaClearMarkFieldsJIT, void*, void*, void*, void*> {
class CDeltaCheckJIT : public jitasm::function<void, CDeltaCheckJIT>
{
public:
deltajitdata_t *jitdesc;
deltajit_marked_count_type_t countType;
XmmReg marked_fields_mask = xmm7;
CDeltaClearMarkFieldsJIT(deltajitdata_t *_jitdesc, deltajit_marked_count_type_t _countType)
: jitdesc(_jitdesc), countType(_countType) {
}
void main() {}
virtual void onFieldChecked(deltajit_field* jitField) {}
virtual void onStringChecked(deltajit_field* jitField) {}
void checkFieldMask(jitasm::Frontend::Reg32& mask, deltajit_memblock_field* jitField);
Result main(Addr src, Addr dst, Addr deltaJit, Addr pForceMarkMask);
void processStrings(Addr src, Addr dst);
void callConditionalEncoder(Addr src, Addr dst, Addr deltaJit);
void calculateBytecount();
void iterateBlocks(deltajitdata_t *jitdesc);
void iterateStrings(deltajitdata_t *jitdesc);
};
void CDeltaClearMarkFieldsJIT::checkFieldMask(jitasm::Frontend::Reg32& mask, deltajit_memblock_field* jitField) {
test(mask, (uint16)jitField->mask);
void CDeltaCheckJIT::checkFieldMask(jitasm::Frontend::Reg32& mask, deltajit_memblock_field* jitField)
{
test(mask, jitField->mask);
setnz(al);
movzx(edx, al);
}
void CDeltaCheckJIT::iterateBlocks(deltajitdata_t *jitdesc)
{
#ifndef REHLDS_FIXES
sub(esp, 12); //some local storage is required for precise DT_TIMEWINDOW marking
#endif
/*
Registers usage:
esi = src
edi = dst
ecx = blockMask
xmm0-xmm2: loaded src
xmm3-xmm5: loaded dst
xmm6: temp
Already initialized:
esi, edi
*/
int dataXmmCounter = 0; // from 0 to 2 => 3 pairs of registers
jitasm::Frontend::XmmReg src_xmm[3] = {xmm0, xmm1, xmm2};
jitasm::Frontend::XmmReg dst_xmm[3] = {xmm3, xmm4, xmm5};
auto xmm_tmp = xmm6;
if (jitdesc->numItrBlocks > 0) {
movdqu(src_xmm[0], xmmword_ptr[esi + ( jitdesc->itrBlocks[0].memblockId * 16 )]);
movdqu(dst_xmm[0], xmmword_ptr[edi + ( jitdesc->itrBlocks[0].memblockId * 16 )]);
}
if (jitdesc->numItrBlocks > 1) {
movdqu(src_xmm[1], xmmword_ptr[esi + ( jitdesc->itrBlocks[1].memblockId * 16 )]);
movdqu(dst_xmm[1], xmmword_ptr[edi + ( jitdesc->itrBlocks[1].memblockId * 16 )]);
}
if (jitdesc->numItrBlocks > 2) {
movdqu(src_xmm[2], xmmword_ptr[esi + ( jitdesc->itrBlocks[2].memblockId * 16 )]);
movdqu(dst_xmm[2], xmmword_ptr[edi + ( jitdesc->itrBlocks[2].memblockId * 16 )]);
}
auto blockMask = ecx;
xor_(blockMask, blockMask);
for (unsigned int i = 0; i < jitdesc->numItrBlocks; i++) {
auto block = jitdesc->itrBlocks[i].memblock;
auto itrBlock = &jitdesc->itrBlocks[i];
//do far prefetch
if (itrBlock->prefetchBlockId != -1) {
prefetcht0(byte_ptr[esi + ( itrBlock->prefetchBlockId * 16 )]);
prefetcht0(byte_ptr[edi + ( itrBlock->prefetchBlockId * 16 )]);
}
// create mask for changed bytes
pcmpeqb(src_xmm[dataXmmCounter], dst_xmm[dataXmmCounter]);
pmovmskb(blockMask, src_xmm[dataXmmCounter]);
not_(blockMask);
//preload next blocks
if (i + 3 < jitdesc->numItrBlocks) {
movdqu(src_xmm[dataXmmCounter], xmmword_ptr[esi + ( jitdesc->itrBlocks[i + 3].memblockId * 16 )]);
movdqu(dst_xmm[dataXmmCounter], xmmword_ptr[edi + ( jitdesc->itrBlocks[i + 3].memblockId * 16 )]);
}
dataXmmCounter++;
if (dataXmmCounter > 2) {
dataXmmCounter -= 3;
}
// iterate fields contained in block
for (unsigned int j = 0; j < block->numFields; j++) {
auto jitField = &block->fields[j];
#ifndef REHLDS_FIXES
// precise floats comparison
if (jitField->field->type == DT_TIMEWINDOW_8 || jitField->field->type == DT_TIMEWINDOW_BIG) {
if (jitField->last) {
float multiplier = ( jitField->field->type == DT_TIMEWINDOW_8 ) ? 100.0f : 1000.0f;
uint32 m32 = *(uint32*)( &multiplier );
mov(dword_ptr[esp], m32);
fld(dword_ptr[esi + jitField->field->offset]);
fld(dword_ptr[esp]);
fmulp();
fstp(qword_ptr[esp + 4]);
cvttsd2si(eax, mmword_ptr[esp + 4]);
fld(dword_ptr[edi + jitField->field->offset]);
fld(dword_ptr[esp]);
fmulp();
fstp(qword_ptr[esp + 4]);
cvttsd2si(edx, mmword_ptr[esp + 4]);
cmp(eax, edx);
setne(al);
movzx(edx, al);
} else {
continue;
}
} else {
checkFieldMask(blockMask, jitField);
}
#else
checkFieldMask(blockMask, jitField);
#endif
// call handler
onFieldChecked(jitField->field);
}
}
#ifndef REHLDS_FIXES
add(esp, 12); //some local storage is required for precise DT_TIMEWINDOW marking
#endif // REHLDS_FIXES
}
void CDeltaCheckJIT::iterateStrings(deltajitdata_t *jitdesc)
{
// This generator expects that following registers are already initialized:
// esi = src
// edi = dst
for (unsigned int i = 0; i < jitdesc->numFields; i++) {
auto jitField = &jitdesc->fields[i];
if (jitField->type != DT_STRING)
continue;
// will be parallel
lea(eax, ptr[esi + jitField->offset]);
lea(edx, ptr[edi + jitField->offset]);
push(eax);
push(edx);
mov(ecx, (size_t)&Q_stricmp);
call(ecx);
// call handler
onStringChecked(jitField);
add(esp, 8);
}
}
class CDeltaClearMarkFieldsJIT : public jitasm::function<int, CDeltaClearMarkFieldsJIT, void*, void*, void*, void*>
{
public:
CDeltaClearMarkFieldsJIT(deltajitdata_t *_jitdesc);
Result main(Addr src, Addr dst, Addr deltaJit, Addr pForceMarkMask);
void callConditionalEncoder(Addr src, Addr dst, Addr deltaJit);
void calculateBytecount();
// first two virtual functions must be same as in CDeltaCheckJIT
virtual void onFieldChecked(deltajit_field* jitField);
virtual void onStringChecked(deltajit_field* jitField);
deltajitdata_t *jitdesc;
private:
jitasm::XmmReg xmm_tmp = xmm6;
jitasm::XmmReg marked_fields_mask = xmm7;
};
CDeltaClearMarkFieldsJIT::CDeltaClearMarkFieldsJIT(deltajitdata_t *_jitdesc) : jitdesc(_jitdesc)
{
}
void CDeltaClearMarkFieldsJIT::callConditionalEncoder(Addr src, Addr dst, Addr deltaJit) {
//This generator expects that following registers are already initialized:
// This generator expects that following registers are already initialized:
// esi = src
// edi = dst
@ -192,8 +350,8 @@ void CDeltaClearMarkFieldsJIT::callConditionalEncoder(Addr src, Addr dst, Addr d
}
void CDeltaClearMarkFieldsJIT::calculateBytecount() {
//This generator expects that following registers are already initialized:
//ebx = delta
// This generator expects that following registers are already initialized:
// ebx = delta
size_t delta_markbits_offset = offsetof(CDeltaJit, marked_fields_mask);
mov(eax, dword_ptr[ebx + delta_markbits_offset]);
@ -260,12 +418,34 @@ void CDeltaClearMarkFieldsJIT::calculateBytecount() {
mov(dword_ptr[ebx + delta_masksize_offset], edx);
}
void CDeltaClearMarkFieldsJIT::onFieldChecked(deltajit_field* field)
{
// This generator expects that following registers are already initialized:
// edx = is field changed
movd(xmm_tmp, edx); // set bit in send mask
psllq(xmm_tmp, field->id); // shift left
por(marked_fields_mask, xmm_tmp);
}
void CDeltaClearMarkFieldsJIT::onStringChecked(deltajit_field* field)
{
// This generator expects that following registers are already initialized:
// eax = result of strings comparison
// ebx = deltaJit
size_t delta_markbits_offset = offsetof(CDeltaJit, marked_fields_mask);
xor_(ecx, ecx);
test(eax, eax);
setnz(cl);
shl(ecx, field->id & 31);
or_(ptr[ebx + delta_markbits_offset + ((field->id > 31) ? 4 : 0)], ecx);
}
CDeltaClearMarkFieldsJIT::Result CDeltaClearMarkFieldsJIT::main(Addr src, Addr dst, Addr deltaJit, Addr pForceMarkMask)
{
#ifndef REHLDS_FIXES
sub(esp, 12); //some local storage is required for precise DT_TIMEWINDOW marking
#endif
/*
Registers usage:
esi = src
@ -273,129 +453,32 @@ CDeltaClearMarkFieldsJIT::Result CDeltaClearMarkFieldsJIT::main(Addr src, Addr d
xmm0-xmm2: loaded src
xmm3-xmm5: loaded dst
xmm6: temp
xmm7: marked fields mask
xmm7: marked fields mask
*/
mov(esi, ptr[src]);
mov(edi, ptr[dst]);
int dataXmmCounter = 0; // from 0 to 2 => 3 pairs of registers
jitasm::Frontend::XmmReg src_xmm[3] = { xmm0, xmm1, xmm2 };
jitasm::Frontend::XmmReg dst_xmm[3] = { xmm3, xmm4, xmm5 };
// overall mask is in xmm7 (marked_fields_mask)
auto xmm_tmp = xmm6;
if (jitdesc->numItrBlocks > 0) {
movdqu(src_xmm[0], xmmword_ptr[esi + (jitdesc->itrBlocks[0].memblockId * 16)]);
movdqu(dst_xmm[0], xmmword_ptr[edi + (jitdesc->itrBlocks[0].memblockId * 16)]);
}
if (jitdesc->numItrBlocks > 1) {
movdqu(src_xmm[1], xmmword_ptr[esi + (jitdesc->itrBlocks[1].memblockId * 16)]);
movdqu(dst_xmm[1], xmmword_ptr[edi + (jitdesc->itrBlocks[1].memblockId * 16)]);
}
if (jitdesc->numItrBlocks > 2) {
movdqu(src_xmm[2], xmmword_ptr[esi + (jitdesc->itrBlocks[2].memblockId * 16)]);
movdqu(dst_xmm[2], xmmword_ptr[edi + (jitdesc->itrBlocks[2].memblockId * 16)]);
}
auto blockMask = ecx;
xor_(blockMask, blockMask);
pxor(marked_fields_mask, marked_fields_mask);
for (unsigned int i = 0; i < jitdesc->numItrBlocks; i++) {
auto block = jitdesc->itrBlocks[i].memblock;
auto itrBlock = &jitdesc->itrBlocks[i];
//do far prefetch
if (itrBlock->prefetchBlockId != -1) {
prefetcht0(byte_ptr[esi + (itrBlock->prefetchBlockId * 16)]);
prefetcht0(byte_ptr[edi + (itrBlock->prefetchBlockId * 16)]);
}
pcmpeqb(src_xmm[dataXmmCounter], dst_xmm[dataXmmCounter]);
pmovmskb(blockMask, src_xmm[dataXmmCounter]);
not_(blockMask);
//preload next blocks
if (i + 3 < jitdesc->numItrBlocks) {
movdqu(src_xmm[dataXmmCounter], xmmword_ptr[esi + (jitdesc->itrBlocks[i + 3].memblockId * 16)]);
movdqu(dst_xmm[dataXmmCounter], xmmword_ptr[edi + (jitdesc->itrBlocks[i + 3].memblockId * 16)]);
}
dataXmmCounter++;
if (dataXmmCounter > 2) {
dataXmmCounter -= 3;
}
for (unsigned int j = 0; j < block->numFields; j++) {
auto jitField = &block->fields[j];
#ifndef REHLDS_FIXES
if (jitField->field->type == DT_TIMEWINDOW_8 || jitField->field->type == DT_TIMEWINDOW_BIG) {
if (jitField->last) {
float multiplier = (jitField->field->type == DT_TIMEWINDOW_8) ? 100.0f : 1000.0f;
uint32 m32 = *(uint32*)(&multiplier);
mov(dword_ptr[esp], m32);
fld(dword_ptr[esi + jitField->field->offset]);
fld(dword_ptr[esp]);
fmulp();
fstp(qword_ptr[esp + 4]);
cvttsd2si(eax, mmword_ptr[esp + 4]);
fld(dword_ptr[edi + jitField->field->offset]);
fld(dword_ptr[esp]);
fmulp();
fstp(qword_ptr[esp + 4]);
cvttsd2si(edx, mmword_ptr[esp + 4]);
cmp(eax, edx);
setne(al);
movzx(edx, al);
} else {
continue;
}
} else {
checkFieldMask(blockMask, jitField);
}
#else
checkFieldMask(blockMask, jitField);
#endif
// set bit in send mask
movd(xmm_tmp, edx);
psllq(xmm_tmp, jitField->field->id);
por(marked_fields_mask, xmm_tmp);
}
}
// check changed blocks
(reinterpret_cast<CDeltaCheckJIT*>(this)->*(&CDeltaCheckJIT::iterateBlocks))(jitdesc);
//apply 'force mark' mask if it's present
mov(eax, ptr[pForceMarkMask]);
If(eax != 0);
//mask for cleaning garbage in high 64 bits
mov(edx, -1);
movd(xmm0, edx);
movd(xmm1, edx);
psllq(xmm0, 32);
por(xmm0, xmm1);
movdqu(xmm_tmp, xmmword_ptr[eax]);
pand(xmm_tmp, xmm0); //clean high 64 bits
por(marked_fields_mask, xmm_tmp); //apply the 'force' mask
movq(xmm_tmp, qword_ptr[eax]);
por(marked_fields_mask, xmm_tmp);
EndIf();
size_t delta_markbits_offset = offsetof(CDeltaJit, marked_fields_mask);
//Save mask from SSE register to CDeltaJit::marked_fields_mask and CDeltaJit::originalMarkedFieldsMask
mov(ebx, ptr[deltaJit]);
movdqu(xmmword_ptr[ebx + delta_markbits_offset], marked_fields_mask);
movdqu(xmmword_ptr[ebx + offsetof(CDeltaJit, originalMarkedFieldsMask)], marked_fields_mask);
movq(qword_ptr[ebx + delta_markbits_offset], marked_fields_mask);
movq(qword_ptr[ebx + offsetof(CDeltaJit, originalMarkedFieldsMask)], marked_fields_mask);
processStrings(src, dst);
// check changed strings
(reinterpret_cast<CDeltaCheckJIT*>(this)->*(&CDeltaCheckJIT::iterateStrings))(jitdesc);
//emit conditional encoder call
callConditionalEncoder(src, dst, deltaJit);
@ -413,54 +496,125 @@ CDeltaClearMarkFieldsJIT::Result CDeltaClearMarkFieldsJIT::main(Addr src, Addr d
mov(dword_ptr[ebx + delta_masksize_offset], edx);
EndIf();
#ifndef REHLDS_FIXES
add(esp, 12); //some local storage is required for precise DT_TIMEWINDOW marking
#endif // REHLDS_FIXES
return edx;
}
void CDeltaClearMarkFieldsJIT::processStrings(Addr src, Addr dst) {
//This generator expects that following registers are already initialized:
// esi = src
// edi = dst
// ebx = deltaJit
class CDeltaTestDeltaJIT : public jitasm::function<int, CDeltaTestDeltaJIT, void*, void*, void*>
{
public:
CDeltaTestDeltaJIT(deltajitdata_t *_jitdesc);
size_t delta_markbits_offset = offsetof(CDeltaJit, marked_fields_mask);
Result main(Addr src, Addr dst, Addr deltaJit);
//strings
for (unsigned int i = 0; i < jitdesc->numFields; i++) {
auto jitField = &jitdesc->fields[i];
if (jitField->type != DT_STRING)
continue;
virtual void onFieldChecked(deltajit_field* jitField);
virtual void onStringChecked(deltajit_field* jitField);
// will be parallel
lea(eax, ptr[esi + jitField->offset]);
lea(edx, ptr[edi + jitField->offset]);
deltajitdata_t *jitdesc;
push(eax);
push(edx);
mov(ecx, (size_t)&Q_stricmp);
call(ecx);
add(esp, 8);
xor_(ecx, ecx);
test(eax, eax);
setnz(cl);
private:
jitasm::Reg32 neededBits = ebx;
jitasm::Reg32 highestBit = ebp;
size_t highest_id;
};
shl(ecx, jitField->id & 31);
or_(ptr[ebx + delta_markbits_offset + ((jitField->id > 31) ? 4 : 0)], ecx);
}
CDeltaTestDeltaJIT::CDeltaTestDeltaJIT(deltajitdata_t *_jitdesc) : jitdesc(_jitdesc)
{
}
CDeltaJit::CDeltaJit(delta_t* _delta, CDeltaClearMarkFieldsJIT* _cleanMarkCheckFunc) {
void CDeltaTestDeltaJIT::onFieldChecked(deltajit_field* field)
{
mov(eax, field->id);
if (field->id >= highest_id)
{
// setted highestBit can't be > field->id
highest_id = field->id;
}
else
{
// select greatest from field->id and highestBit
cmp(eax, highestBit);
cmovl(eax, highestBit); // eax = id < highestBit ? highestBit : id
}
// edx = 1 if field changed, otherwise 0
neg(edx);
cmovnz(highestBit, eax);
and_(edx, field->significantBits);
add(neededBits, edx);
}
void CDeltaTestDeltaJIT::onStringChecked(deltajit_field* field)
{
// This generator expects that following registers are already initialized:
// eax = result of strings comparison
// ebp = highestBit
// [esp] = dest string
If(eax != 0); // changed
mov(ecx, (size_t)&Q_strlen);
call(ecx); // dest already in top of stack
lea(neededBits, ptr[neededBits + eax * 8 + 8]); // add size of string in bits + EOS byte
mov(eax, field->id);
cmp(eax, highestBit);
cmovg(highestBit, eax);
EndIf();
}
CDeltaClearMarkFieldsJIT::Result CDeltaTestDeltaJIT::main(Addr src, Addr dst, Addr deltaJit)
{
/*
Registers usage:
esi = src
edi = dst
ebx = neededBits;
ebp = highestBit;
xmm0-xmm2: loaded src
xmm3-xmm5: loaded dst
xmm6: temp
*/
mov(esi, ptr[src]);
mov(edi, ptr[dst]);
// neededBits 0; highestBit = -1
xor_(neededBits, neededBits);
xor_(highestBit, highestBit);
dec(highestBit);
// can save some operations
highest_id = 0;
// check changed fields
(reinterpret_cast<CDeltaCheckJIT*>(this)->*(&CDeltaCheckJIT::iterateBlocks))(jitdesc);
#ifdef REHLDS_FIXES
// check changed strings
(reinterpret_cast<CDeltaCheckJIT*>(this)->*(&CDeltaCheckJIT::iterateStrings))(jitdesc);
#endif
If(highestBit >= 0);
//neededBits += highestBit / 8 * 8 + 8;
shr(highestBit, 3);
lea(neededBits, ptr[neededBits + highestBit * 8 + 8]);
EndIf();
return neededBits;
}
CDeltaJit::CDeltaJit(delta_t* _delta, CDeltaClearMarkFieldsJIT* _cleanMarkCheckFunc, CDeltaTestDeltaJIT* _testDeltaFunc) {
delta = _delta;
cleanMarkCheckFunc = _cleanMarkCheckFunc;
testDeltaFunc = _testDeltaFunc;
}
CDeltaJit::~CDeltaJit() {
if (cleanMarkCheckFunc) {
delete cleanMarkCheckFunc;
delete testDeltaFunc;
cleanMarkCheckFunc = NULL;
testDeltaFunc = NULL;
}
}
@ -491,20 +645,29 @@ void CDeltaJitRegistry::CreateAndRegisterDeltaJIT(delta_t* delta) {
deltajitdata_t data;
DELTAJIT_CreateDescription(delta, data);
CDeltaClearMarkFieldsJIT* cleanMarkCheckFunc = new CDeltaClearMarkFieldsJIT(&data, DJ_M_CHECK);
CDeltaClearMarkFieldsJIT* cleanMarkCheckFunc = new CDeltaClearMarkFieldsJIT(&data);
cleanMarkCheckFunc->Assemble();
cleanMarkCheckFunc->jitdesc = NULL;
CDeltaJit* deltaJit = new CDeltaJit(delta, cleanMarkCheckFunc);
CDeltaTestDeltaJIT* testDeltaFunc = new CDeltaTestDeltaJIT(&data);
testDeltaFunc->Assemble();
testDeltaFunc->jitdesc = NULL;
// align to 16
CDeltaJit* deltaJit = new CDeltaJit(delta, cleanMarkCheckFunc, testDeltaFunc);
RegisterDeltaJit(delta, deltaJit);
}
CDeltaJit* DELTAJit_LookupDeltaJit(const char* callsite, delta_t *pFields) {
CDeltaJit* deltaJit = g_DeltaJitRegistry.GetJITByDelta(pFields);
#ifndef REHLDS_FIXES
// only for testing
if (!deltaJit) {
rehlds_syserror("%s: JITted delta encoder not found for delta %p", callsite, pFields);
return NULL;
}
#endif // REHLDS_FIXES
return deltaJit;
}
@ -515,6 +678,13 @@ NOINLINE int DELTAJit_Fields_Clear_Mark_Check(unsigned char *from, unsigned char
return func(from, to, deltaJit, pForceMarkMask);
}
NOINLINE int DELTAJit_TestDelta(unsigned char *from, unsigned char *to, delta_t *pFields)
{
CDeltaJit* deltaJit = DELTAJit_LookupDeltaJit(__FUNCTION__, pFields);
CDeltaTestDeltaJIT &func = *deltaJit->testDeltaFunc;
return func(from, to, deltaJit);
}
void DELTAJit_SetSendFlagBits(delta_t *pFields, int *bits, int *bytecount) {
CDeltaJit* deltaJit = DELTAJit_LookupDeltaJit(__FUNCTION__, pFields);
@ -523,32 +693,22 @@ void DELTAJit_SetSendFlagBits(delta_t *pFields, int *bits, int *bytecount) {
*bytecount = deltaJit->markedFieldsMaskSize;
}
void DELTAJit_SetFieldByIndex(struct delta_s *pFields, int fieldNumber) {
void DELTAJit_SetFieldByIndex(struct delta_s *pFields, int fieldNumber)
{
CDeltaJit* deltaJit = DELTAJit_LookupDeltaJit(__FUNCTION__, pFields);
if (fieldNumber > 31)
deltaJit->marked_fields_mask.u32[1] |= (1 << (fieldNumber & 0x1F));
else
deltaJit->marked_fields_mask.u32[0] |= (1 << fieldNumber);
deltaJit->marked_fields_mask.u32[fieldNumber >> 5] |= (1 << (fieldNumber & 31));
}
void DELTAJit_UnsetFieldByIndex(struct delta_s *pFields, int fieldNumber) {
void DELTAJit_UnsetFieldByIndex(struct delta_s *pFields, int fieldNumber)
{
CDeltaJit* deltaJit = DELTAJit_LookupDeltaJit(__FUNCTION__, pFields);
if (fieldNumber > 31)
deltaJit->marked_fields_mask.u32[1] &= ~(1 << (fieldNumber & 0x1F));
else
deltaJit->marked_fields_mask.u32[0] &= ~(1 << fieldNumber);
deltaJit->marked_fields_mask.u32[fieldNumber >> 5] &= ~(1 << (fieldNumber & 31));
}
qboolean DELTAJit_IsFieldMarked(delta_t* pFields, int fieldNumber) {
qboolean DELTAJit_IsFieldMarked(delta_t* pFields, int fieldNumber)
{
CDeltaJit* deltaJit = DELTAJit_LookupDeltaJit(__FUNCTION__, pFields);
if (fieldNumber > 31)
return deltaJit->marked_fields_mask.u32[1] & (1 << (fieldNumber & 0x1F));
return deltaJit->marked_fields_mask.u32[0] & (1 << fieldNumber);
return deltaJit->marked_fields_mask.u32[fieldNumber >> 5] & (1 << (fieldNumber & 31));
}
uint64 DELTAJit_GetOriginalMask(delta_t* pFields) {

View File

@ -11,6 +11,7 @@ struct deltajit_field {
unsigned int length;
int type;
unsigned int numBlocks;
unsigned int significantBits;
};
struct deltajit_memblock_field {
@ -42,12 +43,6 @@ struct deltajitdata_t {
deltajit_memblock_itr_t itrBlocks[DELTAJIT_MAX_BLOCKS];
};
enum deltajit_marked_count_type_t {
DJ_M_DONT_COUNT,
DJ_M_CHECK,
//DJ_M_COUNT, //not implemented yet
};
class CDeltaJit;
class CDeltaJitRegistry {
@ -74,6 +69,7 @@ union delta_marked_mask_t {
extern CDeltaJitRegistry g_DeltaJitRegistry;
extern int DELTAJit_Fields_Clear_Mark_Check(unsigned char *from, unsigned char *to, delta_t *pFields, void* pForceMarkMask);
extern int DELTAJit_TestDelta(unsigned char *from, unsigned char *to, delta_t *pFields);
extern void DELTAJit_SetSendFlagBits(delta_t *pFields, int *bits, int *bytecount);
extern void DELTAJit_SetFieldByIndex(struct delta_s *pFields, int fieldNumber);
extern void DELTAJit_UnsetFieldByIndex(struct delta_s *pFields, int fieldNumber);

View File

@ -561,6 +561,9 @@ extern delta_t *g_pentitydelta;
extern delta_t *g_pcustomentitydelta;
extern delta_t *g_pclientdelta;
extern delta_t *g_pweapondelta;
#ifdef REHLDS_OPT_PEDANTIC
extern delta_t *g_pusercmddelta;
#endif
extern unsigned char fatpvs[1024];

View File

@ -44,6 +44,9 @@ delta_t *g_pentitydelta;
delta_t *g_pcustomentitydelta;
delta_t *g_pclientdelta;
delta_t *g_pweapondelta;
#ifdef REHLDS_OPT_PEDANTIC
delta_t *g_pusercmddelta;
#endif
int hashstrings_collisions;
@ -415,14 +418,18 @@ void SV_ReallocateDynamicData(void)
if (g_moved_edict)
{
Con_Printf("Reallocate on moved_edict\n");
// TODO: Free memory to prevent mem leaks?
#ifdef REHLDS_FIXES
Mem_Free(g_moved_edict);
#endif
}
g_moved_edict = (edict_t **)Mem_ZeroMalloc(sizeof(edict_t *) * nSize);
if (g_moved_from)
{
Con_Printf("Reallocate on moved_from\n");
// TODO: Free memory to prevent mem leaks?
#ifdef REHLDS_FIXES
Mem_Free(g_moved_from);
#endif
}
g_moved_from = (vec3_t *)Mem_ZeroMalloc(sizeof(vec3_t) * nSize);
}
@ -437,7 +444,9 @@ void SV_AllocClientFrames(void)
if (cl->frames)
{
Con_DPrintf("Allocating over frame pointer?\n");
// TODO: Free memory to prevent mem leaks?
#ifdef REHLDS_FIXES
Mem_Free(cl->frames);
#endif
}
cl->frames = (client_frame_t *)Mem_ZeroMalloc(sizeof(client_frame_t) * SV_UPDATE_BACKUP);
}
@ -4084,23 +4093,21 @@ void SV_InvokeCallback(void)
int SV_FindBestBaseline(int index, entity_state_t ** baseline, entity_state_t *to, int num, qboolean custom)
{
int bestbitnumber;
delta_t* delta;
if (custom)
{
bestbitnumber = DELTA_TestDelta((byte *)*baseline, (byte *)&to[index], g_pcustomentitydelta);
delta = g_pcustomentitydelta;
}
else
{
if (SV_IsPlayerIndex(num))
{
bestbitnumber = DELTA_TestDelta((byte *)*baseline, (byte *)&to[index], g_pplayerdelta);
}
delta = g_pplayerdelta;
else
{
bestbitnumber = DELTA_TestDelta((byte *)*baseline, (byte *)&to[index], g_pentitydelta);
}
delta = g_pentitydelta;
}
bestbitnumber = DELTA_TestDelta((byte *)*baseline, (byte *)&to[index], delta);
bestbitnumber -= 6;
int i = 0;
@ -4111,21 +4118,8 @@ int SV_FindBestBaseline(int index, entity_state_t ** baseline, entity_state_t *t
{
if (to[index].entityType == to[i].entityType)
{
if (custom)
{
bitnumber = DELTA_TestDelta((byte *)&to[i], (byte *)&to[index], g_pcustomentitydelta);
}
else
{
if (SV_IsPlayerIndex(num))
{
bitnumber = DELTA_TestDelta((byte *)&to[i], (byte *)&to[index], g_pplayerdelta);
}
else
{
bitnumber = DELTA_TestDelta((byte *)&to[i], (byte *)&to[index], g_pentitydelta);
}
}
bitnumber = DELTA_TestDelta((byte *)&to[i], (byte *)&to[index], delta);
if (bitnumber < bestbitnumber)
{
bestbitnumber = bitnumber;
@ -6907,6 +6901,12 @@ void SV_InitDeltas(void)
if (!g_peventdelta)
Sys_Error("No event_t encoder on server!\n");
#ifdef REHLDS_OPT_PEDANTIC
g_pusercmddelta = SV_LookupDelta("usercmd_t");
if (!g_pusercmddelta)
Sys_Error("No usercmd_t encoder on server!\n");
#endif
#if defined(REHLDS_OPT_PEDANTIC) || defined(REHLDS_FIXES)
g_DeltaJitRegistry.CreateAndRegisterDeltaJIT(&g_MetaDelta[0]);
#endif

View File

@ -283,7 +283,6 @@ areanode_t *SV_CreateAreaNode(int depth, vec_t *mins, vec_t *maxs)
anode->children[1] = SV_CreateAreaNode(depth + 1, mins1, maxs1);
return anode;
}
/* <ca877> ../engine/world.c:353 */
@ -375,6 +374,7 @@ void SV_TouchLinks(edict_t *ent, areanode_t *node)
}
/* <ca8f2> ../engine/world.c:467 */
#ifndef REHLDS_OPT_PEDANTIC
void SV_FindTouchedLeafs(edict_t *ent, mnode_t *node, int *topnode)
{
mplane_t *splitplane;
@ -435,6 +435,87 @@ void SV_FindTouchedLeafs(edict_t *ent, mnode_t *node, int *topnode)
if (sides & 2)
SV_FindTouchedLeafs(ent, node->children[1], topnode);
}
#else // REHLDS_OPT_PEDANTIC
// unrolled some tail recursion
void SV_FindTouchedLeafs(edict_t *ent, mnode_t *node, int *topnode)
{
mplane_t *splitplane;
int sides;
while (1)
{
if (node->contents == CONTENTS_SOLID)
return;
if (node->contents < 0)
{
if (ent->num_leafs < MAX_ENT_LEAFS)
{
mleaf_t *leaf = (mleaf_t *)node;
int leafnum = leaf - g_psv.worldmodel->leafs - 1;
ent->leafnums[ent->num_leafs] = leafnum;
ent->num_leafs++;
}
else
{
ent->num_leafs = MAX_ENT_LEAFS + 1;
}
return;
}
splitplane = node->plane;
if (splitplane->type >= 3)
{
sides = BoxOnPlaneSide(ent->v.absmin, ent->v.absmax, splitplane);
if (sides == 3)
{
if (*topnode == -1)
*topnode = node - g_psv.worldmodel->nodes;
}
if (sides & 1)
SV_FindTouchedLeafs(ent, node->children[0], topnode);
if (sides & 2)
SV_FindTouchedLeafs(ent, node->children[1], topnode);
}
else
{
if (splitplane->dist > ent->v.absmin[splitplane->type])
{
if (splitplane->dist < ent->v.absmax[splitplane->type])
{
// sides = 3;
// do both children nodes
if (*topnode == -1)
*topnode = node - g_psv.worldmodel->nodes;
SV_FindTouchedLeafs(ent, node->children[0], topnode);
node = node->children[1];
continue;
}
else
{
// sides = 2;
// do only SV_FindTouchedLeafs(ent, node->children[1], topnode);
node = node->children[1];
continue;
}
}
else
{
// sides = 1;
// do only SV_FindTouchedLeafs(ent, node->children[0], topnode);
node = node->children[0];
continue;
}
}
break;
}
}
#endif // REHLDS_OPT_PEDANTIC
/* <caab0> ../engine/world.c:517 */
void SV_LinkEdict(edict_t *ent, qboolean touch_triggers)
@ -547,58 +628,79 @@ int SV_LinkContents(areanode_t *node, const vec_t *pos)
vec3_t localPosition;
vec3_t offset;
for (l = node->solid_edicts.next; l != &node->solid_edicts; l = next)
#ifdef REHLDS_OPT_PEDANTIC
// unroll tail recursion
while (1)
#endif
{
next = l->next;
touch = (edict_t *)((char *)l - offsetof(edict_t, area));
if (!touch->v.solid)
for (l = node->solid_edicts.next; l != &node->solid_edicts; l = next)
{
if (touch->v.groupinfo)
next = l->next;
touch = (edict_t *)((char *)l - offsetof(edict_t, area));
if (!touch->v.solid)
{
if (g_groupop)
if (touch->v.groupinfo)
{
if (g_groupop == GROUP_OP_NAND && (touch->v.groupinfo & g_groupmask))
continue;
if (g_groupop)
{
if (g_groupop == GROUP_OP_NAND && (touch->v.groupinfo & g_groupmask))
continue;
}
else
{
if (!(touch->v.groupinfo & g_groupmask))
continue;
}
}
else
pModel = g_psv.models[touch->v.modelindex];
if (pModel
&& !pModel->type
&& pos[0] <= (double)touch->v.absmax[0]
&& pos[1] <= (double)touch->v.absmax[1]
&& pos[2] <= (double)touch->v.absmax[2]
&& pos[0] >= (double)touch->v.absmin[0]
&& pos[1] >= (double)touch->v.absmin[1]
&& pos[2] >= (double)touch->v.absmin[2])
{
if (!(touch->v.groupinfo & g_groupmask))
continue;
int contents = touch->v.skin;
if (contents < -100 || contents > 100)
Con_DPrintf("Invalid contents on trigger field: %s\n", &pr_strings[touch->v.classname]);
hull = SV_HullForBsp(touch, vec3_origin, vec3_origin, offset);
localPosition[0] = pos[0] - offset[0];
localPosition[1] = pos[1] - offset[1];
localPosition[2] = pos[2] - offset[2];
if (SV_HullPointContents(hull, hull->firstclipnode, localPosition) != -1)
return contents;
}
}
pModel = g_psv.models[touch->v.modelindex];
if (pModel
&& !pModel->type
&& pos[0] <= (double)touch->v.absmax[0]
&& pos[1] <= (double)touch->v.absmax[1]
&& pos[2] <= (double)touch->v.absmax[2]
&& pos[0] >= (double)touch->v.absmin[0]
&& pos[1] >= (double)touch->v.absmin[1]
&& pos[2] >= (double)touch->v.absmin[2])
{
int contents = touch->v.skin;
if (contents < -100 || contents > 100)
Con_DPrintf("Invalid contents on trigger field: %s\n", &pr_strings[touch->v.classname]);
hull = SV_HullForBsp(touch, vec3_origin, vec3_origin, offset);
localPosition[0] = pos[0] - offset[0];
localPosition[1] = pos[1] - offset[1];
localPosition[2] = pos[2] - offset[2];
if (SV_HullPointContents(hull, hull->firstclipnode, localPosition) != -1)
return contents;
}
}
if (node->axis == -1)
return -1;
#ifndef REHLDS_OPT_PEDANTIC
if (pos[node->axis] > node->dist)
return SV_LinkContents(node->children[0], pos);
if (pos[node->axis] < node->dist)
return SV_LinkContents(node->children[1], pos);
#else // REHLDS_OPT_PEDANTIC
if (pos[node->axis] > node->dist)
{
node = node->children[0];
continue;
}
if (pos[node->axis] < node->dist)
{
node = node->children[1];
continue;
}
break;
#endif // REHLDS_OPT_PEDANTIC
}
if (node->axis == -1)
return -1;
if (pos[node->axis] > node->dist)
return SV_LinkContents(node->children[0], pos);
if (pos[node->axis] < node->dist)
return SV_LinkContents(node->children[1], pos);
return -1;
}

View File

@ -166,22 +166,9 @@ NOINLINE void _GetBitmaskAndBytecount(delta_t* delta, int* bits, int* bytecount,
}
}
NOINLINE void _CompareDeltaResults(const char* callsite, delta_res_t* def, delta_res_t* jit, int testscount)
{
for (int i = 0; i < testscount; i++)
{
if (!!def[i].sendfields != !!jit[i].sendfields)
rehlds_syserror("%s: Test %i: !!sendfields not equals %i|%i", callsite, i, !!def[i].sendfields, !!jit[i].sendfields);
if (memcmp(def[i].bits, jit[i].bits, 8))
rehlds_syserror("%s: Test %i: bits not equals %p.%p|%p.%p", callsite, i, def[i].bits[0], def[i].bits[1], jit[i].bits[0], jit[i].bits[1]);
if (def[i].bytecount != jit[i].bytecount)
rehlds_syserror("%s: Test %i: bytecount not equal %i|%i", callsite, i, def[i].bytecount, jit[i].bytecount);
}
}
NOINLINE delta_t* _CreateTestDeltaDesc() {
static delta_description_t _fields[32];
delta_test_struct_t d; // "use" d variable
delta_test_struct_t d; d; // "use" d variable
_InitDeltaField(&_fields[0], 0x00, DT_BYTE, "b_00", offsetof(delta_test_struct_t, b_00), 1, 8, 1.0f, 1.0f);
_InitDeltaField(&_fields[1], 0x01, DT_BYTE, "b_01", offsetof(delta_test_struct_t, b_01), 1, 8, 1.0f, 1.0f);
@ -331,3 +318,51 @@ TEST(MarkFieldsTest_TimeWindow, Delta, 1000) {
SV_Shutdown();
}
TEST(TestDelta_Test, Delta, 1000)
{
delta_t* delta = _CreateTestDeltaDesc();
delta_test_struct_t testdata[4], from;
int result[4];
for (size_t i = 0; i < 4; i++)
_FillTestDelta(&testdata[i], 0xCC);
_FillTestDelta(&from, 0xCC);
// equal
result[0] = 0;
// change byte + short + float
testdata[1].b_01 = 1;
testdata[1].s_12 = 1.0;
testdata[1].f_08 = 1.0;
result[1] = delta->pdd[1].significant_bits + delta->pdd[8].significant_bits + delta->pdd[4].significant_bits + (8 / 8 * 8 + 8);
// change float + float + string
testdata[2].f_18 = 2.0;
testdata[2].wb_20 = 2.0;
strcpy(testdata[2].s_24, "TestDelta_Test" );
#ifdef REHLDS_FIXES
result[2] = delta->pdd[10].significant_bits + delta->pdd[12].significant_bits + strlen(testdata[2].s_24) * 8 + 8 + (13 / 8 * 8 + 8);
#else
result[2] = delta->pdd[10].significant_bits + delta->pdd[12].significant_bits + (13 / 8 * 8 + 8);
#endif
// change byte + int + float + short
testdata[3].b_4D = 4;
testdata[3].i_14 = 4;
testdata[3].w8_0C = 4.0;
testdata[3].s_12 = 4;
result[3] = delta->pdd[14].significant_bits + delta->pdd[9].significant_bits + delta->pdd[5].significant_bits + delta->pdd[8].significant_bits + (14 / 8 * 8 + 8);
for (size_t i = 0; i < 4; i++)
{
int tested = DELTA_TestDelta((uint8 *)&from, (uint8 *)&testdata[i], delta);
if (tested != result[i])
rehlds_syserror("TestDelta_Test: returned bitcount %i is not equal to true value %i", tested, result[i]);
}
SV_Shutdown();
}