diff --git a/rehlds/engine/common.cpp b/rehlds/engine/common.cpp index d5f9989..9d19ede 100644 --- a/rehlds/engine/common.cpp +++ b/rehlds/engine/common.cpp @@ -1253,9 +1253,12 @@ NOXREF float MSG_ReadHiresAngle(void) void MSG_ReadUsercmd(usercmd_t *to, usercmd_t* from) { - delta_t *pdesc = SV_LookupDelta("usercmd_t"); MSG_StartBitReading(&net_message); - DELTA_ParseDelta((byte *)from, (byte *)to, pdesc); +#ifdef REHLDS_OPT_PEDANTIC + DELTA_ParseDelta((byte *)from, (byte *)to, g_pusercmddelta); +#else + DELTA_ParseDelta((byte *)from, (byte *)to, SV_LookupDelta("usercmd_t")); +#endif MSG_EndBitReading(&net_message); COM_NormalizeAngles(to->viewangles); } diff --git a/rehlds/engine/delta.cpp b/rehlds/engine/delta.cpp index bcac7f1..aef81f4 100644 --- a/rehlds/engine/delta.cpp +++ b/rehlds/engine/delta.cpp @@ -422,6 +422,9 @@ void DELTA_ClearFlags(delta_t *pFields) /* <241d2> ../engine/delta.c:473 */ int DELTA_TestDelta(unsigned char *from, unsigned char *to, delta_t *pFields) { +#if defined(REHLDS_OPT_PEDANTIC) || defined(REHLDS_FIXES) + return DELTAJit_TestDelta(from, to, pFields); +#else int i; char *st1, *st2; delta_description_t *pTest; @@ -452,9 +455,10 @@ int DELTA_TestDelta(unsigned char *from, unsigned char *to, delta_t *pFields) break; #ifdef REHLDS_FIXES // don't use multiplier when checking, to increase performance + // check values binary like it does in jit case DT_TIMEWINDOW_8: case DT_TIMEWINDOW_BIG: - different = (int32)(*(float *)&from[pTest->fieldOffset]) != (int32)(*(float *)&to[pTest->fieldOffset]); + different = (*(int32 *)&from[pTest->fieldOffset]) != (*(int32 *)&to[pTest->fieldOffset]); break; #else case DT_TIMEWINDOW_8: @@ -471,8 +475,10 @@ int DELTA_TestDelta(unsigned char *from, unsigned char *to, delta_t *pFields) { #ifdef REHLDS_FIXES different = TRUE; -#endif // REHLDS_FIXES + length = Q_strlen(st2) * 8; +#else // REHLDS_FIXES length = Q_strlen(st2); +#endif // REHLDS_FIXES } break; default: @@ -487,12 +493,13 @@ int DELTA_TestDelta(unsigned char *from, unsigned char *to, delta_t *pFields) } } - if (neededBits != -1) + if (highestBit != -1) { neededBits += highestBit / 8 * 8 + 8; } return neededBits; +#endif } /* <24309> ../engine/delta.c:602 */ @@ -749,7 +756,6 @@ qboolean DELTA_CheckDelta(unsigned char *from, unsigned char *to, delta_t *pFiel NOINLINE qboolean DELTA_WriteDelta(unsigned char *from, unsigned char *to, qboolean force, delta_t *pFields, void(*callback)(void)) { qboolean sendfields; - int bytecount; #if defined(REHLDS_OPT_PEDANTIC) || defined(REHLDS_FIXES) sendfields = DELTAJit_Fields_Clear_Mark_Check(from, to, pFields, NULL); diff --git a/rehlds/engine/delta_jit.cpp b/rehlds/engine/delta_jit.cpp index 34baf51..b2efbf6 100644 --- a/rehlds/engine/delta_jit.cpp +++ b/rehlds/engine/delta_jit.cpp @@ -27,7 +27,7 @@ unsigned int DELTAJIT_GetFieldSize(delta_description_t* desc) { return 4; case DT_STRING: - return desc->fieldSize; + return 0; default: rehlds_syserror("%s: Unknown delta field type %d", __FUNCTION__, desc->fieldType); @@ -84,6 +84,7 @@ void DELTAJIT_CreateDescription(delta_t* delta, deltajitdata_t &jitdesc) { jitField->offset = fieldDesc->fieldOffset; jitField->type = fieldDesc->fieldType; jitField->length = fieldSize; + jitField->significantBits = fieldDesc->significant_bits; if (fieldDesc->fieldType != DT_STRING) { bool firstBlock = true; @@ -127,52 +128,209 @@ void DELTAJIT_CreateDescription(delta_t* delta, deltajitdata_t &jitdesc) { } class CDeltaClearMarkFieldsJIT; +class CDeltaTestDeltaJIT; class CDeltaJit { public: CDeltaClearMarkFieldsJIT* cleanMarkCheckFunc; + CDeltaTestDeltaJIT* testDeltaFunc; delta_t* delta; - + delta_marked_mask_t marked_fields_mask; + delta_marked_mask_t originalMarkedFieldsMask; //mask based on data, before calling the conditional encoder int markedFieldsMaskSize; - delta_marked_mask_t marked_fields_mask; - int mfm_sse_highbits[2]; //High 64 bits for manipulating marked_fields_mask via SSE registers - - delta_marked_mask_t originalMarkedFieldsMask; //mask based on data, before calling the conditional encoder - int omfm_sse_highbits[2]; //High 64 bits for manipulating marked_fields_mask via SSE registers - - CDeltaJit(delta_t* _delta, CDeltaClearMarkFieldsJIT* _cleanMarkCheckFunc); + CDeltaJit(delta_t* _delta, CDeltaClearMarkFieldsJIT* _cleanMarkCheckFunc, CDeltaTestDeltaJIT* _testDeltaFunc); virtual ~CDeltaJit(); }; -class CDeltaClearMarkFieldsJIT : public jitasm::function { +class CDeltaCheckJIT : public jitasm::function +{ public: - deltajitdata_t *jitdesc; - deltajit_marked_count_type_t countType; - - XmmReg marked_fields_mask = xmm7; - - - CDeltaClearMarkFieldsJIT(deltajitdata_t *_jitdesc, deltajit_marked_count_type_t _countType) - : jitdesc(_jitdesc), countType(_countType) { - } + void main() {} + virtual void onFieldChecked(deltajit_field* jitField) {} + virtual void onStringChecked(deltajit_field* jitField) {} void checkFieldMask(jitasm::Frontend::Reg32& mask, deltajit_memblock_field* jitField); - Result main(Addr src, Addr dst, Addr deltaJit, Addr pForceMarkMask); - void processStrings(Addr src, Addr dst); - void callConditionalEncoder(Addr src, Addr dst, Addr deltaJit); - void calculateBytecount(); + void iterateBlocks(deltajitdata_t *jitdesc); + void iterateStrings(deltajitdata_t *jitdesc); }; -void CDeltaClearMarkFieldsJIT::checkFieldMask(jitasm::Frontend::Reg32& mask, deltajit_memblock_field* jitField) { - test(mask, (uint16)jitField->mask); +void CDeltaCheckJIT::checkFieldMask(jitasm::Frontend::Reg32& mask, deltajit_memblock_field* jitField) +{ + test(mask, jitField->mask); setnz(al); movzx(edx, al); } +void CDeltaCheckJIT::iterateBlocks(deltajitdata_t *jitdesc) +{ +#ifndef REHLDS_FIXES + sub(esp, 12); //some local storage is required for precise DT_TIMEWINDOW marking +#endif + + /* + Registers usage: + esi = src + edi = dst + ecx = blockMask + xmm0-xmm2: loaded src + xmm3-xmm5: loaded dst + xmm6: temp + Already initialized: + esi, edi + */ + + int dataXmmCounter = 0; // from 0 to 2 => 3 pairs of registers + jitasm::Frontend::XmmReg src_xmm[3] = {xmm0, xmm1, xmm2}; + jitasm::Frontend::XmmReg dst_xmm[3] = {xmm3, xmm4, xmm5}; + + auto xmm_tmp = xmm6; + + if (jitdesc->numItrBlocks > 0) { + movdqu(src_xmm[0], xmmword_ptr[esi + ( jitdesc->itrBlocks[0].memblockId * 16 )]); + movdqu(dst_xmm[0], xmmword_ptr[edi + ( jitdesc->itrBlocks[0].memblockId * 16 )]); + } + if (jitdesc->numItrBlocks > 1) { + movdqu(src_xmm[1], xmmword_ptr[esi + ( jitdesc->itrBlocks[1].memblockId * 16 )]); + movdqu(dst_xmm[1], xmmword_ptr[edi + ( jitdesc->itrBlocks[1].memblockId * 16 )]); + } + if (jitdesc->numItrBlocks > 2) { + movdqu(src_xmm[2], xmmword_ptr[esi + ( jitdesc->itrBlocks[2].memblockId * 16 )]); + movdqu(dst_xmm[2], xmmword_ptr[edi + ( jitdesc->itrBlocks[2].memblockId * 16 )]); + } + + auto blockMask = ecx; + xor_(blockMask, blockMask); + + for (unsigned int i = 0; i < jitdesc->numItrBlocks; i++) { + auto block = jitdesc->itrBlocks[i].memblock; + auto itrBlock = &jitdesc->itrBlocks[i]; + + //do far prefetch + if (itrBlock->prefetchBlockId != -1) { + prefetcht0(byte_ptr[esi + ( itrBlock->prefetchBlockId * 16 )]); + prefetcht0(byte_ptr[edi + ( itrBlock->prefetchBlockId * 16 )]); + } + + // create mask for changed bytes + pcmpeqb(src_xmm[dataXmmCounter], dst_xmm[dataXmmCounter]); + pmovmskb(blockMask, src_xmm[dataXmmCounter]); + not_(blockMask); + + //preload next blocks + if (i + 3 < jitdesc->numItrBlocks) { + movdqu(src_xmm[dataXmmCounter], xmmword_ptr[esi + ( jitdesc->itrBlocks[i + 3].memblockId * 16 )]); + movdqu(dst_xmm[dataXmmCounter], xmmword_ptr[edi + ( jitdesc->itrBlocks[i + 3].memblockId * 16 )]); + } + + dataXmmCounter++; + if (dataXmmCounter > 2) { + dataXmmCounter -= 3; + } + + // iterate fields contained in block + for (unsigned int j = 0; j < block->numFields; j++) { + auto jitField = &block->fields[j]; + +#ifndef REHLDS_FIXES + // precise floats comparison + if (jitField->field->type == DT_TIMEWINDOW_8 || jitField->field->type == DT_TIMEWINDOW_BIG) { + if (jitField->last) { + + float multiplier = ( jitField->field->type == DT_TIMEWINDOW_8 ) ? 100.0f : 1000.0f; + uint32 m32 = *(uint32*)( &multiplier ); + mov(dword_ptr[esp], m32); + + fld(dword_ptr[esi + jitField->field->offset]); + fld(dword_ptr[esp]); + fmulp(); + fstp(qword_ptr[esp + 4]); + cvttsd2si(eax, mmword_ptr[esp + 4]); + + fld(dword_ptr[edi + jitField->field->offset]); + fld(dword_ptr[esp]); + fmulp(); + fstp(qword_ptr[esp + 4]); + cvttsd2si(edx, mmword_ptr[esp + 4]); + + cmp(eax, edx); + setne(al); + movzx(edx, al); + + } else { + continue; + } + } else { + checkFieldMask(blockMask, jitField); + } +#else + checkFieldMask(blockMask, jitField); +#endif + // call handler + onFieldChecked(jitField->field); + } + } + +#ifndef REHLDS_FIXES + add(esp, 12); //some local storage is required for precise DT_TIMEWINDOW marking +#endif // REHLDS_FIXES +} + +void CDeltaCheckJIT::iterateStrings(deltajitdata_t *jitdesc) +{ + // This generator expects that following registers are already initialized: + // esi = src + // edi = dst + + for (unsigned int i = 0; i < jitdesc->numFields; i++) { + auto jitField = &jitdesc->fields[i]; + if (jitField->type != DT_STRING) + continue; + + // will be parallel + lea(eax, ptr[esi + jitField->offset]); + lea(edx, ptr[edi + jitField->offset]); + + push(eax); + push(edx); + mov(ecx, (size_t)&Q_stricmp); + call(ecx); + + // call handler + onStringChecked(jitField); + + add(esp, 8); + } +} + +class CDeltaClearMarkFieldsJIT : public jitasm::function +{ +public: + CDeltaClearMarkFieldsJIT(deltajitdata_t *_jitdesc); + + Result main(Addr src, Addr dst, Addr deltaJit, Addr pForceMarkMask); + + void callConditionalEncoder(Addr src, Addr dst, Addr deltaJit); + void calculateBytecount(); + + // first two virtual functions must be same as in CDeltaCheckJIT + virtual void onFieldChecked(deltajit_field* jitField); + virtual void onStringChecked(deltajit_field* jitField); + + deltajitdata_t *jitdesc; + +private: + jitasm::XmmReg xmm_tmp = xmm6; + jitasm::XmmReg marked_fields_mask = xmm7; +}; + +CDeltaClearMarkFieldsJIT::CDeltaClearMarkFieldsJIT(deltajitdata_t *_jitdesc) : jitdesc(_jitdesc) +{ +} + void CDeltaClearMarkFieldsJIT::callConditionalEncoder(Addr src, Addr dst, Addr deltaJit) { - //This generator expects that following registers are already initialized: + // This generator expects that following registers are already initialized: // esi = src // edi = dst @@ -192,8 +350,8 @@ void CDeltaClearMarkFieldsJIT::callConditionalEncoder(Addr src, Addr dst, Addr d } void CDeltaClearMarkFieldsJIT::calculateBytecount() { - //This generator expects that following registers are already initialized: - //ebx = delta + // This generator expects that following registers are already initialized: + // ebx = delta size_t delta_markbits_offset = offsetof(CDeltaJit, marked_fields_mask); mov(eax, dword_ptr[ebx + delta_markbits_offset]); @@ -260,12 +418,34 @@ void CDeltaClearMarkFieldsJIT::calculateBytecount() { mov(dword_ptr[ebx + delta_masksize_offset], edx); } +void CDeltaClearMarkFieldsJIT::onFieldChecked(deltajit_field* field) +{ + // This generator expects that following registers are already initialized: + // edx = is field changed + + movd(xmm_tmp, edx); // set bit in send mask + psllq(xmm_tmp, field->id); // shift left + por(marked_fields_mask, xmm_tmp); +} + +void CDeltaClearMarkFieldsJIT::onStringChecked(deltajit_field* field) +{ + // This generator expects that following registers are already initialized: + // eax = result of strings comparison + // ebx = deltaJit + + size_t delta_markbits_offset = offsetof(CDeltaJit, marked_fields_mask); + + xor_(ecx, ecx); + test(eax, eax); + setnz(cl); + + shl(ecx, field->id & 31); + or_(ptr[ebx + delta_markbits_offset + ((field->id > 31) ? 4 : 0)], ecx); +} + CDeltaClearMarkFieldsJIT::Result CDeltaClearMarkFieldsJIT::main(Addr src, Addr dst, Addr deltaJit, Addr pForceMarkMask) { -#ifndef REHLDS_FIXES - sub(esp, 12); //some local storage is required for precise DT_TIMEWINDOW marking -#endif - /* Registers usage: esi = src @@ -273,129 +453,32 @@ CDeltaClearMarkFieldsJIT::Result CDeltaClearMarkFieldsJIT::main(Addr src, Addr d xmm0-xmm2: loaded src xmm3-xmm5: loaded dst xmm6: temp - xmm7: marked fields mask + xmm7: marked fields mask */ mov(esi, ptr[src]); mov(edi, ptr[dst]); - - int dataXmmCounter = 0; // from 0 to 2 => 3 pairs of registers - jitasm::Frontend::XmmReg src_xmm[3] = { xmm0, xmm1, xmm2 }; - jitasm::Frontend::XmmReg dst_xmm[3] = { xmm3, xmm4, xmm5 }; - // overall mask is in xmm7 (marked_fields_mask) - auto xmm_tmp = xmm6; - - - if (jitdesc->numItrBlocks > 0) { - movdqu(src_xmm[0], xmmword_ptr[esi + (jitdesc->itrBlocks[0].memblockId * 16)]); - movdqu(dst_xmm[0], xmmword_ptr[edi + (jitdesc->itrBlocks[0].memblockId * 16)]); - } - if (jitdesc->numItrBlocks > 1) { - movdqu(src_xmm[1], xmmword_ptr[esi + (jitdesc->itrBlocks[1].memblockId * 16)]); - movdqu(dst_xmm[1], xmmword_ptr[edi + (jitdesc->itrBlocks[1].memblockId * 16)]); - } - if (jitdesc->numItrBlocks > 2) { - movdqu(src_xmm[2], xmmword_ptr[esi + (jitdesc->itrBlocks[2].memblockId * 16)]); - movdqu(dst_xmm[2], xmmword_ptr[edi + (jitdesc->itrBlocks[2].memblockId * 16)]); - } - - auto blockMask = ecx; - xor_(blockMask, blockMask); pxor(marked_fields_mask, marked_fields_mask); - for (unsigned int i = 0; i < jitdesc->numItrBlocks; i++) { - auto block = jitdesc->itrBlocks[i].memblock; - auto itrBlock = &jitdesc->itrBlocks[i]; - - //do far prefetch - if (itrBlock->prefetchBlockId != -1) { - prefetcht0(byte_ptr[esi + (itrBlock->prefetchBlockId * 16)]); - prefetcht0(byte_ptr[edi + (itrBlock->prefetchBlockId * 16)]); - } - - pcmpeqb(src_xmm[dataXmmCounter], dst_xmm[dataXmmCounter]); - pmovmskb(blockMask, src_xmm[dataXmmCounter]); - not_(blockMask); - - //preload next blocks - if (i + 3 < jitdesc->numItrBlocks) { - movdqu(src_xmm[dataXmmCounter], xmmword_ptr[esi + (jitdesc->itrBlocks[i + 3].memblockId * 16)]); - movdqu(dst_xmm[dataXmmCounter], xmmword_ptr[edi + (jitdesc->itrBlocks[i + 3].memblockId * 16)]); - } - - dataXmmCounter++; - if (dataXmmCounter > 2) { - dataXmmCounter -= 3; - } - - for (unsigned int j = 0; j < block->numFields; j++) { - auto jitField = &block->fields[j]; - -#ifndef REHLDS_FIXES - if (jitField->field->type == DT_TIMEWINDOW_8 || jitField->field->type == DT_TIMEWINDOW_BIG) { - if (jitField->last) { - - float multiplier = (jitField->field->type == DT_TIMEWINDOW_8) ? 100.0f : 1000.0f; - uint32 m32 = *(uint32*)(&multiplier); - mov(dword_ptr[esp], m32); - - fld(dword_ptr[esi + jitField->field->offset]); - fld(dword_ptr[esp]); - fmulp(); - fstp(qword_ptr[esp + 4]); - cvttsd2si(eax, mmword_ptr[esp + 4]); - - fld(dword_ptr[edi + jitField->field->offset]); - fld(dword_ptr[esp]); - fmulp(); - fstp(qword_ptr[esp + 4]); - cvttsd2si(edx, mmword_ptr[esp + 4]); - - cmp(eax, edx); - setne(al); - movzx(edx, al); - - } else { - continue; - } - } else { - checkFieldMask(blockMask, jitField); - } -#else - checkFieldMask(blockMask, jitField); -#endif - - // set bit in send mask - movd(xmm_tmp, edx); - psllq(xmm_tmp, jitField->field->id); - por(marked_fields_mask, xmm_tmp); - } - } + // check changed blocks + (reinterpret_cast(this)->*(&CDeltaCheckJIT::iterateBlocks))(jitdesc); //apply 'force mark' mask if it's present mov(eax, ptr[pForceMarkMask]); If(eax != 0); - //mask for cleaning garbage in high 64 bits - mov(edx, -1); - movd(xmm0, edx); - movd(xmm1, edx); - psllq(xmm0, 32); - por(xmm0, xmm1); - - movdqu(xmm_tmp, xmmword_ptr[eax]); - pand(xmm_tmp, xmm0); //clean high 64 bits - - por(marked_fields_mask, xmm_tmp); //apply the 'force' mask + movq(xmm_tmp, qword_ptr[eax]); + por(marked_fields_mask, xmm_tmp); EndIf(); size_t delta_markbits_offset = offsetof(CDeltaJit, marked_fields_mask); //Save mask from SSE register to CDeltaJit::marked_fields_mask and CDeltaJit::originalMarkedFieldsMask mov(ebx, ptr[deltaJit]); - movdqu(xmmword_ptr[ebx + delta_markbits_offset], marked_fields_mask); - movdqu(xmmword_ptr[ebx + offsetof(CDeltaJit, originalMarkedFieldsMask)], marked_fields_mask); + movq(qword_ptr[ebx + delta_markbits_offset], marked_fields_mask); + movq(qword_ptr[ebx + offsetof(CDeltaJit, originalMarkedFieldsMask)], marked_fields_mask); - processStrings(src, dst); + // check changed strings + (reinterpret_cast(this)->*(&CDeltaCheckJIT::iterateStrings))(jitdesc); //emit conditional encoder call callConditionalEncoder(src, dst, deltaJit); @@ -413,54 +496,125 @@ CDeltaClearMarkFieldsJIT::Result CDeltaClearMarkFieldsJIT::main(Addr src, Addr d mov(dword_ptr[ebx + delta_masksize_offset], edx); EndIf(); -#ifndef REHLDS_FIXES - add(esp, 12); //some local storage is required for precise DT_TIMEWINDOW marking -#endif // REHLDS_FIXES - return edx; } -void CDeltaClearMarkFieldsJIT::processStrings(Addr src, Addr dst) { - //This generator expects that following registers are already initialized: - // esi = src - // edi = dst - // ebx = deltaJit +class CDeltaTestDeltaJIT : public jitasm::function +{ +public: + CDeltaTestDeltaJIT(deltajitdata_t *_jitdesc); - size_t delta_markbits_offset = offsetof(CDeltaJit, marked_fields_mask); + Result main(Addr src, Addr dst, Addr deltaJit); - //strings - for (unsigned int i = 0; i < jitdesc->numFields; i++) { - auto jitField = &jitdesc->fields[i]; - if (jitField->type != DT_STRING) - continue; + virtual void onFieldChecked(deltajit_field* jitField); + virtual void onStringChecked(deltajit_field* jitField); - // will be parallel - lea(eax, ptr[esi + jitField->offset]); - lea(edx, ptr[edi + jitField->offset]); + deltajitdata_t *jitdesc; - push(eax); - push(edx); - mov(ecx, (size_t)&Q_stricmp); - call(ecx); - add(esp, 8); - xor_(ecx, ecx); - test(eax, eax); - setnz(cl); +private: + jitasm::Reg32 neededBits = ebx; + jitasm::Reg32 highestBit = ebp; + size_t highest_id; +}; - shl(ecx, jitField->id & 31); - or_(ptr[ebx + delta_markbits_offset + ((jitField->id > 31) ? 4 : 0)], ecx); - } +CDeltaTestDeltaJIT::CDeltaTestDeltaJIT(deltajitdata_t *_jitdesc) : jitdesc(_jitdesc) +{ } -CDeltaJit::CDeltaJit(delta_t* _delta, CDeltaClearMarkFieldsJIT* _cleanMarkCheckFunc) { +void CDeltaTestDeltaJIT::onFieldChecked(deltajit_field* field) +{ + mov(eax, field->id); + + if (field->id >= highest_id) + { + // setted highestBit can't be > field->id + highest_id = field->id; + } + else + { + // select greatest from field->id and highestBit + cmp(eax, highestBit); + cmovl(eax, highestBit); // eax = id < highestBit ? highestBit : id + } + + // edx = 1 if field changed, otherwise 0 + neg(edx); + cmovnz(highestBit, eax); + and_(edx, field->significantBits); + add(neededBits, edx); +} + +void CDeltaTestDeltaJIT::onStringChecked(deltajit_field* field) +{ + // This generator expects that following registers are already initialized: + // eax = result of strings comparison + // ebp = highestBit + // [esp] = dest string + + If(eax != 0); // changed + mov(ecx, (size_t)&Q_strlen); + call(ecx); // dest already in top of stack + + lea(neededBits, ptr[neededBits + eax * 8 + 8]); // add size of string in bits + EOS byte + + mov(eax, field->id); + cmp(eax, highestBit); + cmovg(highestBit, eax); + EndIf(); +} + +CDeltaClearMarkFieldsJIT::Result CDeltaTestDeltaJIT::main(Addr src, Addr dst, Addr deltaJit) +{ + /* + Registers usage: + esi = src + edi = dst + ebx = neededBits; + ebp = highestBit; + xmm0-xmm2: loaded src + xmm3-xmm5: loaded dst + xmm6: temp + */ + mov(esi, ptr[src]); + mov(edi, ptr[dst]); + + // neededBits 0; highestBit = -1 + xor_(neededBits, neededBits); + xor_(highestBit, highestBit); + dec(highestBit); + + // can save some operations + highest_id = 0; + + // check changed fields + (reinterpret_cast(this)->*(&CDeltaCheckJIT::iterateBlocks))(jitdesc); + +#ifdef REHLDS_FIXES + // check changed strings + (reinterpret_cast(this)->*(&CDeltaCheckJIT::iterateStrings))(jitdesc); +#endif + + If(highestBit >= 0); + //neededBits += highestBit / 8 * 8 + 8; + shr(highestBit, 3); + lea(neededBits, ptr[neededBits + highestBit * 8 + 8]); + EndIf(); + + return neededBits; +} + +CDeltaJit::CDeltaJit(delta_t* _delta, CDeltaClearMarkFieldsJIT* _cleanMarkCheckFunc, CDeltaTestDeltaJIT* _testDeltaFunc) { delta = _delta; cleanMarkCheckFunc = _cleanMarkCheckFunc; + testDeltaFunc = _testDeltaFunc; } CDeltaJit::~CDeltaJit() { if (cleanMarkCheckFunc) { delete cleanMarkCheckFunc; + delete testDeltaFunc; cleanMarkCheckFunc = NULL; + testDeltaFunc = NULL; } } @@ -491,20 +645,29 @@ void CDeltaJitRegistry::CreateAndRegisterDeltaJIT(delta_t* delta) { deltajitdata_t data; DELTAJIT_CreateDescription(delta, data); - CDeltaClearMarkFieldsJIT* cleanMarkCheckFunc = new CDeltaClearMarkFieldsJIT(&data, DJ_M_CHECK); + CDeltaClearMarkFieldsJIT* cleanMarkCheckFunc = new CDeltaClearMarkFieldsJIT(&data); cleanMarkCheckFunc->Assemble(); cleanMarkCheckFunc->jitdesc = NULL; - CDeltaJit* deltaJit = new CDeltaJit(delta, cleanMarkCheckFunc); + CDeltaTestDeltaJIT* testDeltaFunc = new CDeltaTestDeltaJIT(&data); + testDeltaFunc->Assemble(); + testDeltaFunc->jitdesc = NULL; + + // align to 16 + CDeltaJit* deltaJit = new CDeltaJit(delta, cleanMarkCheckFunc, testDeltaFunc); RegisterDeltaJit(delta, deltaJit); } CDeltaJit* DELTAJit_LookupDeltaJit(const char* callsite, delta_t *pFields) { CDeltaJit* deltaJit = g_DeltaJitRegistry.GetJITByDelta(pFields); + +#ifndef REHLDS_FIXES + // only for testing if (!deltaJit) { rehlds_syserror("%s: JITted delta encoder not found for delta %p", callsite, pFields); return NULL; } +#endif // REHLDS_FIXES return deltaJit; } @@ -515,6 +678,13 @@ NOINLINE int DELTAJit_Fields_Clear_Mark_Check(unsigned char *from, unsigned char return func(from, to, deltaJit, pForceMarkMask); } +NOINLINE int DELTAJit_TestDelta(unsigned char *from, unsigned char *to, delta_t *pFields) +{ + CDeltaJit* deltaJit = DELTAJit_LookupDeltaJit(__FUNCTION__, pFields); + CDeltaTestDeltaJIT &func = *deltaJit->testDeltaFunc; + return func(from, to, deltaJit); +} + void DELTAJit_SetSendFlagBits(delta_t *pFields, int *bits, int *bytecount) { CDeltaJit* deltaJit = DELTAJit_LookupDeltaJit(__FUNCTION__, pFields); @@ -523,32 +693,22 @@ void DELTAJit_SetSendFlagBits(delta_t *pFields, int *bits, int *bytecount) { *bytecount = deltaJit->markedFieldsMaskSize; } -void DELTAJit_SetFieldByIndex(struct delta_s *pFields, int fieldNumber) { +void DELTAJit_SetFieldByIndex(struct delta_s *pFields, int fieldNumber) +{ CDeltaJit* deltaJit = DELTAJit_LookupDeltaJit(__FUNCTION__, pFields); - - if (fieldNumber > 31) - deltaJit->marked_fields_mask.u32[1] |= (1 << (fieldNumber & 0x1F)); - else - deltaJit->marked_fields_mask.u32[0] |= (1 << fieldNumber); - + deltaJit->marked_fields_mask.u32[fieldNumber >> 5] |= (1 << (fieldNumber & 31)); } -void DELTAJit_UnsetFieldByIndex(struct delta_s *pFields, int fieldNumber) { +void DELTAJit_UnsetFieldByIndex(struct delta_s *pFields, int fieldNumber) +{ CDeltaJit* deltaJit = DELTAJit_LookupDeltaJit(__FUNCTION__, pFields); - - if (fieldNumber > 31) - deltaJit->marked_fields_mask.u32[1] &= ~(1 << (fieldNumber & 0x1F)); - else - deltaJit->marked_fields_mask.u32[0] &= ~(1 << fieldNumber); + deltaJit->marked_fields_mask.u32[fieldNumber >> 5] &= ~(1 << (fieldNumber & 31)); } -qboolean DELTAJit_IsFieldMarked(delta_t* pFields, int fieldNumber) { +qboolean DELTAJit_IsFieldMarked(delta_t* pFields, int fieldNumber) +{ CDeltaJit* deltaJit = DELTAJit_LookupDeltaJit(__FUNCTION__, pFields); - - if (fieldNumber > 31) - return deltaJit->marked_fields_mask.u32[1] & (1 << (fieldNumber & 0x1F)); - - return deltaJit->marked_fields_mask.u32[0] & (1 << fieldNumber); + return deltaJit->marked_fields_mask.u32[fieldNumber >> 5] & (1 << (fieldNumber & 31)); } uint64 DELTAJit_GetOriginalMask(delta_t* pFields) { diff --git a/rehlds/engine/delta_jit.h b/rehlds/engine/delta_jit.h index adf9b17..5732536 100644 --- a/rehlds/engine/delta_jit.h +++ b/rehlds/engine/delta_jit.h @@ -11,6 +11,7 @@ struct deltajit_field { unsigned int length; int type; unsigned int numBlocks; + unsigned int significantBits; }; struct deltajit_memblock_field { @@ -42,12 +43,6 @@ struct deltajitdata_t { deltajit_memblock_itr_t itrBlocks[DELTAJIT_MAX_BLOCKS]; }; -enum deltajit_marked_count_type_t { - DJ_M_DONT_COUNT, - DJ_M_CHECK, - //DJ_M_COUNT, //not implemented yet -}; - class CDeltaJit; class CDeltaJitRegistry { @@ -74,6 +69,7 @@ union delta_marked_mask_t { extern CDeltaJitRegistry g_DeltaJitRegistry; extern int DELTAJit_Fields_Clear_Mark_Check(unsigned char *from, unsigned char *to, delta_t *pFields, void* pForceMarkMask); +extern int DELTAJit_TestDelta(unsigned char *from, unsigned char *to, delta_t *pFields); extern void DELTAJit_SetSendFlagBits(delta_t *pFields, int *bits, int *bytecount); extern void DELTAJit_SetFieldByIndex(struct delta_s *pFields, int fieldNumber); extern void DELTAJit_UnsetFieldByIndex(struct delta_s *pFields, int fieldNumber); diff --git a/rehlds/engine/server.h b/rehlds/engine/server.h index 7f2acd5..1b90876 100644 --- a/rehlds/engine/server.h +++ b/rehlds/engine/server.h @@ -561,6 +561,9 @@ extern delta_t *g_pentitydelta; extern delta_t *g_pcustomentitydelta; extern delta_t *g_pclientdelta; extern delta_t *g_pweapondelta; +#ifdef REHLDS_OPT_PEDANTIC +extern delta_t *g_pusercmddelta; +#endif extern unsigned char fatpvs[1024]; diff --git a/rehlds/engine/sv_main.cpp b/rehlds/engine/sv_main.cpp index 0f2cad4..66e9683 100644 --- a/rehlds/engine/sv_main.cpp +++ b/rehlds/engine/sv_main.cpp @@ -44,6 +44,9 @@ delta_t *g_pentitydelta; delta_t *g_pcustomentitydelta; delta_t *g_pclientdelta; delta_t *g_pweapondelta; +#ifdef REHLDS_OPT_PEDANTIC +delta_t *g_pusercmddelta; +#endif int hashstrings_collisions; @@ -415,14 +418,18 @@ void SV_ReallocateDynamicData(void) if (g_moved_edict) { Con_Printf("Reallocate on moved_edict\n"); - // TODO: Free memory to prevent mem leaks? +#ifdef REHLDS_FIXES + Mem_Free(g_moved_edict); +#endif } g_moved_edict = (edict_t **)Mem_ZeroMalloc(sizeof(edict_t *) * nSize); if (g_moved_from) { Con_Printf("Reallocate on moved_from\n"); - // TODO: Free memory to prevent mem leaks? +#ifdef REHLDS_FIXES + Mem_Free(g_moved_from); +#endif } g_moved_from = (vec3_t *)Mem_ZeroMalloc(sizeof(vec3_t) * nSize); } @@ -437,7 +444,9 @@ void SV_AllocClientFrames(void) if (cl->frames) { Con_DPrintf("Allocating over frame pointer?\n"); - // TODO: Free memory to prevent mem leaks? +#ifdef REHLDS_FIXES + Mem_Free(cl->frames); +#endif } cl->frames = (client_frame_t *)Mem_ZeroMalloc(sizeof(client_frame_t) * SV_UPDATE_BACKUP); } @@ -4084,23 +4093,21 @@ void SV_InvokeCallback(void) int SV_FindBestBaseline(int index, entity_state_t ** baseline, entity_state_t *to, int num, qboolean custom) { int bestbitnumber; + delta_t* delta; if (custom) { - bestbitnumber = DELTA_TestDelta((byte *)*baseline, (byte *)&to[index], g_pcustomentitydelta); + delta = g_pcustomentitydelta; } else { if (SV_IsPlayerIndex(num)) - { - bestbitnumber = DELTA_TestDelta((byte *)*baseline, (byte *)&to[index], g_pplayerdelta); - } + delta = g_pplayerdelta; else - { - bestbitnumber = DELTA_TestDelta((byte *)*baseline, (byte *)&to[index], g_pentitydelta); - } + delta = g_pentitydelta; } + bestbitnumber = DELTA_TestDelta((byte *)*baseline, (byte *)&to[index], delta); bestbitnumber -= 6; int i = 0; @@ -4111,21 +4118,8 @@ int SV_FindBestBaseline(int index, entity_state_t ** baseline, entity_state_t *t { if (to[index].entityType == to[i].entityType) { - if (custom) - { - bitnumber = DELTA_TestDelta((byte *)&to[i], (byte *)&to[index], g_pcustomentitydelta); - } - else - { - if (SV_IsPlayerIndex(num)) - { - bitnumber = DELTA_TestDelta((byte *)&to[i], (byte *)&to[index], g_pplayerdelta); - } - else - { - bitnumber = DELTA_TestDelta((byte *)&to[i], (byte *)&to[index], g_pentitydelta); - } - } + bitnumber = DELTA_TestDelta((byte *)&to[i], (byte *)&to[index], delta); + if (bitnumber < bestbitnumber) { bestbitnumber = bitnumber; @@ -6907,6 +6901,12 @@ void SV_InitDeltas(void) if (!g_peventdelta) Sys_Error("No event_t encoder on server!\n"); +#ifdef REHLDS_OPT_PEDANTIC + g_pusercmddelta = SV_LookupDelta("usercmd_t"); + if (!g_pusercmddelta) + Sys_Error("No usercmd_t encoder on server!\n"); +#endif + #if defined(REHLDS_OPT_PEDANTIC) || defined(REHLDS_FIXES) g_DeltaJitRegistry.CreateAndRegisterDeltaJIT(&g_MetaDelta[0]); #endif diff --git a/rehlds/engine/world.cpp b/rehlds/engine/world.cpp index 9f09f6c..134b8c0 100644 --- a/rehlds/engine/world.cpp +++ b/rehlds/engine/world.cpp @@ -283,7 +283,6 @@ areanode_t *SV_CreateAreaNode(int depth, vec_t *mins, vec_t *maxs) anode->children[1] = SV_CreateAreaNode(depth + 1, mins1, maxs1); return anode; - } /* ../engine/world.c:353 */ @@ -375,6 +374,7 @@ void SV_TouchLinks(edict_t *ent, areanode_t *node) } /* ../engine/world.c:467 */ +#ifndef REHLDS_OPT_PEDANTIC void SV_FindTouchedLeafs(edict_t *ent, mnode_t *node, int *topnode) { mplane_t *splitplane; @@ -435,6 +435,87 @@ void SV_FindTouchedLeafs(edict_t *ent, mnode_t *node, int *topnode) if (sides & 2) SV_FindTouchedLeafs(ent, node->children[1], topnode); } +#else // REHLDS_OPT_PEDANTIC +// unrolled some tail recursion +void SV_FindTouchedLeafs(edict_t *ent, mnode_t *node, int *topnode) +{ + mplane_t *splitplane; + int sides; + + while (1) + { + if (node->contents == CONTENTS_SOLID) + return; + + if (node->contents < 0) + { + if (ent->num_leafs < MAX_ENT_LEAFS) + { + mleaf_t *leaf = (mleaf_t *)node; + int leafnum = leaf - g_psv.worldmodel->leafs - 1; + ent->leafnums[ent->num_leafs] = leafnum; + ent->num_leafs++; + } + else + { + ent->num_leafs = MAX_ENT_LEAFS + 1; + } + return; + } + + splitplane = node->plane; + if (splitplane->type >= 3) + { + sides = BoxOnPlaneSide(ent->v.absmin, ent->v.absmax, splitplane); + + if (sides == 3) + { + if (*topnode == -1) + *topnode = node - g_psv.worldmodel->nodes; + } + + if (sides & 1) + SV_FindTouchedLeafs(ent, node->children[0], topnode); + + if (sides & 2) + SV_FindTouchedLeafs(ent, node->children[1], topnode); + } + else + { + if (splitplane->dist > ent->v.absmin[splitplane->type]) + { + if (splitplane->dist < ent->v.absmax[splitplane->type]) + { + // sides = 3; + // do both children nodes + if (*topnode == -1) + *topnode = node - g_psv.worldmodel->nodes; + + SV_FindTouchedLeafs(ent, node->children[0], topnode); + node = node->children[1]; + continue; + } + else + { + // sides = 2; + // do only SV_FindTouchedLeafs(ent, node->children[1], topnode); + node = node->children[1]; + continue; + } + } + else + { + // sides = 1; + // do only SV_FindTouchedLeafs(ent, node->children[0], topnode); + node = node->children[0]; + continue; + } + } + + break; + } +} +#endif // REHLDS_OPT_PEDANTIC /* ../engine/world.c:517 */ void SV_LinkEdict(edict_t *ent, qboolean touch_triggers) @@ -547,58 +628,79 @@ int SV_LinkContents(areanode_t *node, const vec_t *pos) vec3_t localPosition; vec3_t offset; - for (l = node->solid_edicts.next; l != &node->solid_edicts; l = next) +#ifdef REHLDS_OPT_PEDANTIC + // unroll tail recursion + while (1) +#endif { - next = l->next; - touch = (edict_t *)((char *)l - offsetof(edict_t, area)); - if (!touch->v.solid) + for (l = node->solid_edicts.next; l != &node->solid_edicts; l = next) { - if (touch->v.groupinfo) + next = l->next; + touch = (edict_t *)((char *)l - offsetof(edict_t, area)); + if (!touch->v.solid) { - if (g_groupop) + if (touch->v.groupinfo) { - if (g_groupop == GROUP_OP_NAND && (touch->v.groupinfo & g_groupmask)) - continue; + if (g_groupop) + { + if (g_groupop == GROUP_OP_NAND && (touch->v.groupinfo & g_groupmask)) + continue; + } + else + { + if (!(touch->v.groupinfo & g_groupmask)) + continue; + } } - else + pModel = g_psv.models[touch->v.modelindex]; + if (pModel + && !pModel->type + && pos[0] <= (double)touch->v.absmax[0] + && pos[1] <= (double)touch->v.absmax[1] + && pos[2] <= (double)touch->v.absmax[2] + && pos[0] >= (double)touch->v.absmin[0] + && pos[1] >= (double)touch->v.absmin[1] + && pos[2] >= (double)touch->v.absmin[2]) { - if (!(touch->v.groupinfo & g_groupmask)) - continue; + int contents = touch->v.skin; + if (contents < -100 || contents > 100) + Con_DPrintf("Invalid contents on trigger field: %s\n", &pr_strings[touch->v.classname]); + hull = SV_HullForBsp(touch, vec3_origin, vec3_origin, offset); + localPosition[0] = pos[0] - offset[0]; + localPosition[1] = pos[1] - offset[1]; + localPosition[2] = pos[2] - offset[2]; + if (SV_HullPointContents(hull, hull->firstclipnode, localPosition) != -1) + return contents; } } - pModel = g_psv.models[touch->v.modelindex]; - if (pModel - && !pModel->type - && pos[0] <= (double)touch->v.absmax[0] - && pos[1] <= (double)touch->v.absmax[1] - && pos[2] <= (double)touch->v.absmax[2] - && pos[0] >= (double)touch->v.absmin[0] - && pos[1] >= (double)touch->v.absmin[1] - && pos[2] >= (double)touch->v.absmin[2]) - { - int contents = touch->v.skin; - if (contents < -100 || contents > 100) - Con_DPrintf("Invalid contents on trigger field: %s\n", &pr_strings[touch->v.classname]); - hull = SV_HullForBsp(touch, vec3_origin, vec3_origin, offset); - localPosition[0] = pos[0] - offset[0]; - localPosition[1] = pos[1] - offset[1]; - localPosition[2] = pos[2] - offset[2]; - if (SV_HullPointContents(hull, hull->firstclipnode, localPosition) != -1) - return contents; - } } + + if (node->axis == -1) + return -1; + +#ifndef REHLDS_OPT_PEDANTIC + if (pos[node->axis] > node->dist) + return SV_LinkContents(node->children[0], pos); + + if (pos[node->axis] < node->dist) + return SV_LinkContents(node->children[1], pos); +#else // REHLDS_OPT_PEDANTIC + if (pos[node->axis] > node->dist) + { + node = node->children[0]; + continue; + } + + if (pos[node->axis] < node->dist) + { + node = node->children[1]; + continue; + } + + break; +#endif // REHLDS_OPT_PEDANTIC } - - if (node->axis == -1) - return -1; - - if (pos[node->axis] > node->dist) - return SV_LinkContents(node->children[0], pos); - - if (pos[node->axis] < node->dist) - return SV_LinkContents(node->children[1], pos); - return -1; } diff --git a/rehlds/unittests/delta_tests.cpp b/rehlds/unittests/delta_tests.cpp index 2572837..cd4e0e5 100644 --- a/rehlds/unittests/delta_tests.cpp +++ b/rehlds/unittests/delta_tests.cpp @@ -166,22 +166,9 @@ NOINLINE void _GetBitmaskAndBytecount(delta_t* delta, int* bits, int* bytecount, } } -NOINLINE void _CompareDeltaResults(const char* callsite, delta_res_t* def, delta_res_t* jit, int testscount) -{ - for (int i = 0; i < testscount; i++) - { - if (!!def[i].sendfields != !!jit[i].sendfields) - rehlds_syserror("%s: Test %i: !!sendfields not equals %i|%i", callsite, i, !!def[i].sendfields, !!jit[i].sendfields); - if (memcmp(def[i].bits, jit[i].bits, 8)) - rehlds_syserror("%s: Test %i: bits not equals %p.%p|%p.%p", callsite, i, def[i].bits[0], def[i].bits[1], jit[i].bits[0], jit[i].bits[1]); - if (def[i].bytecount != jit[i].bytecount) - rehlds_syserror("%s: Test %i: bytecount not equal %i|%i", callsite, i, def[i].bytecount, jit[i].bytecount); - } -} - NOINLINE delta_t* _CreateTestDeltaDesc() { static delta_description_t _fields[32]; - delta_test_struct_t d; // "use" d variable + delta_test_struct_t d; d; // "use" d variable _InitDeltaField(&_fields[0], 0x00, DT_BYTE, "b_00", offsetof(delta_test_struct_t, b_00), 1, 8, 1.0f, 1.0f); _InitDeltaField(&_fields[1], 0x01, DT_BYTE, "b_01", offsetof(delta_test_struct_t, b_01), 1, 8, 1.0f, 1.0f); @@ -331,3 +318,51 @@ TEST(MarkFieldsTest_TimeWindow, Delta, 1000) { SV_Shutdown(); } + +TEST(TestDelta_Test, Delta, 1000) +{ + delta_t* delta = _CreateTestDeltaDesc(); + + delta_test_struct_t testdata[4], from; + int result[4]; + + for (size_t i = 0; i < 4; i++) + _FillTestDelta(&testdata[i], 0xCC); + _FillTestDelta(&from, 0xCC); + + // equal + result[0] = 0; + + // change byte + short + float + testdata[1].b_01 = 1; + testdata[1].s_12 = 1.0; + testdata[1].f_08 = 1.0; + result[1] = delta->pdd[1].significant_bits + delta->pdd[8].significant_bits + delta->pdd[4].significant_bits + (8 / 8 * 8 + 8); + + // change float + float + string + testdata[2].f_18 = 2.0; + testdata[2].wb_20 = 2.0; + strcpy(testdata[2].s_24, "TestDelta_Test" ); +#ifdef REHLDS_FIXES + result[2] = delta->pdd[10].significant_bits + delta->pdd[12].significant_bits + strlen(testdata[2].s_24) * 8 + 8 + (13 / 8 * 8 + 8); +#else + result[2] = delta->pdd[10].significant_bits + delta->pdd[12].significant_bits + (13 / 8 * 8 + 8); +#endif + + // change byte + int + float + short + testdata[3].b_4D = 4; + testdata[3].i_14 = 4; + testdata[3].w8_0C = 4.0; + testdata[3].s_12 = 4; + result[3] = delta->pdd[14].significant_bits + delta->pdd[9].significant_bits + delta->pdd[5].significant_bits + delta->pdd[8].significant_bits + (14 / 8 * 8 + 8); + + for (size_t i = 0; i < 4; i++) + { + int tested = DELTA_TestDelta((uint8 *)&from, (uint8 *)&testdata[i], delta); + + if (tested != result[i]) + rehlds_syserror("TestDelta_Test: returned bitcount %i is not equal to true value %i", tested, result[i]); + } + + SV_Shutdown(); +}