2
0
mirror of https://github.com/rehlds/rehlds.git synced 2025-01-07 12:35:33 +03:00

Fixed DELTAJIT_MAX_FIELDS limit check

Optimizations in delta jit
This commit is contained in:
asmodai 2016-01-12 19:16:38 +03:00
parent 308ca9936a
commit 4aad7ec954
2 changed files with 74 additions and 41 deletions

View File

@ -55,13 +55,13 @@ void DELTAJIT_CreateDescription(delta_t* delta, deltajitdata_t &jitdesc) {
} }
//sanity checks & pre-clean //sanity checks & pre-clean
if (numMemBlocks >= DELTAJIT_MAX_BLOCKS) { if (numMemBlocks > DELTAJIT_MAX_BLOCKS) {
rehlds_syserror("%s: numMemBlocks >= DELTAJIT_MAX_BLOCKS (%d >= %d)", __FUNCTION__, numMemBlocks, DELTAJIT_MAX_BLOCKS); rehlds_syserror("%s: numMemBlocks > DELTAJIT_MAX_BLOCKS (%d > %d)", __FUNCTION__, numMemBlocks, DELTAJIT_MAX_BLOCKS);
return; return;
} }
if (delta->fieldCount >= DELTAJIT_MAX_FIELDS) { if (delta->fieldCount > DELTAJIT_MAX_FIELDS) {
rehlds_syserror("%s: fieldCount >= DELTAJIT_MAX_FIELDS (%d >= %d)", __FUNCTION__, delta->fieldCount, DELTAJIT_MAX_FIELDS); rehlds_syserror("%s: fieldCount > DELTAJIT_MAX_FIELDS (%d > %d)", __FUNCTION__, delta->fieldCount, DELTAJIT_MAX_FIELDS);
return; return;
} }
@ -75,10 +75,8 @@ void DELTAJIT_CreateDescription(delta_t* delta, deltajitdata_t &jitdesc) {
unsigned int blockId = fieldDesc->fieldOffset / 16; unsigned int blockId = fieldDesc->fieldOffset / 16;
unsigned int blockStart = blockId * 16; unsigned int blockStart = blockId * 16;
unsigned int fieldSize = DELTAJIT_GetFieldSize(fieldDesc); unsigned int fieldSize = DELTAJIT_GetFieldSize(fieldDesc);
auto jitField = &jitdesc.fields[i]; auto jitField = &jitdesc.fields[i];
jitField->id = i; jitField->id = i;
jitField->offset = fieldDesc->fieldOffset; jitField->offset = fieldDesc->fieldOffset;
@ -127,9 +125,24 @@ void DELTAJIT_CreateDescription(delta_t* delta, deltajitdata_t &jitdesc) {
jitdesc.itrBlocks[i].prefetchBlockId = prefetchBlkId; jitdesc.itrBlocks[i].prefetchBlockId = prefetchBlkId;
} }
} }
class CUniqueLabel {
public:
CUniqueLabel(const char* name) : m_name(name) {
m_name += m_unique_index++;
}
operator std::string&() {
return m_name;
}
private:
std::string m_name;
static size_t m_unique_index;
};
size_t CUniqueLabel::m_unique_index;
class CDeltaClearMarkFieldsJIT; class CDeltaClearMarkFieldsJIT;
class CDeltaTestDeltaJIT; class CDeltaTestDeltaJIT;
@ -157,6 +170,9 @@ public:
void checkFieldMask(jitasm::Frontend::Reg32& mask, deltajit_memblock_field* jitField); void checkFieldMask(jitasm::Frontend::Reg32& mask, deltajit_memblock_field* jitField);
void iterateBlocks(deltajitdata_t *jitdesc); void iterateBlocks(deltajitdata_t *jitdesc);
void iterateStrings(deltajitdata_t *jitdesc); void iterateStrings(deltajitdata_t *jitdesc);
private:
jitasm::XmmReg xmm_tmp = xmm6;
}; };
void CDeltaCheckJIT::checkFieldMask(jitasm::Frontend::Reg32& mask, deltajit_memblock_field* jitField) void CDeltaCheckJIT::checkFieldMask(jitasm::Frontend::Reg32& mask, deltajit_memblock_field* jitField)
@ -188,8 +204,6 @@ void CDeltaCheckJIT::iterateBlocks(deltajitdata_t *jitdesc)
jitasm::Frontend::XmmReg src_xmm[3] = {xmm0, xmm1, xmm2}; jitasm::Frontend::XmmReg src_xmm[3] = {xmm0, xmm1, xmm2};
jitasm::Frontend::XmmReg dst_xmm[3] = {xmm3, xmm4, xmm5}; jitasm::Frontend::XmmReg dst_xmm[3] = {xmm3, xmm4, xmm5};
auto xmm_tmp = xmm6;
if (jitdesc->numItrBlocks > 0) { if (jitdesc->numItrBlocks > 0) {
movdqu(src_xmm[0], xmmword_ptr[esi + ( jitdesc->itrBlocks[0].memblockId * 16 )]); movdqu(src_xmm[0], xmmword_ptr[esi + ( jitdesc->itrBlocks[0].memblockId * 16 )]);
movdqu(dst_xmm[0], xmmword_ptr[edi + ( jitdesc->itrBlocks[0].memblockId * 16 )]); movdqu(dst_xmm[0], xmmword_ptr[edi + ( jitdesc->itrBlocks[0].memblockId * 16 )]);
@ -217,7 +231,7 @@ void CDeltaCheckJIT::iterateBlocks(deltajitdata_t *jitdesc)
} }
// create mask for changed bytes // create mask for changed bytes
pcmpeqb(src_xmm[dataXmmCounter], dst_xmm[dataXmmCounter]); pcmpeqb(src_xmm[dataXmmCounter], dst_xmm[dataXmmCounter]); // 0..15: byte[s] = (byte[s] == byte[d]) ? 0xFF : 0x00
pmovmskb(blockMask, src_xmm[dataXmmCounter]); pmovmskb(blockMask, src_xmm[dataXmmCounter]);
not_(blockMask); not_(blockMask);
@ -285,6 +299,7 @@ void CDeltaCheckJIT::iterateStrings(deltajitdata_t *jitdesc)
// This generator expects that following registers are already initialized: // This generator expects that following registers are already initialized:
// esi = src // esi = src
// edi = dst // edi = dst
size_t pushed_size = 0;
for (unsigned int i = 0; i < jitdesc->numFields; i++) { for (unsigned int i = 0; i < jitdesc->numFields; i++) {
auto jitField = &jitdesc->fields[i]; auto jitField = &jitdesc->fields[i];
@ -295,16 +310,18 @@ void CDeltaCheckJIT::iterateStrings(deltajitdata_t *jitdesc)
lea(eax, ptr[esi + jitField->offset]); lea(eax, ptr[esi + jitField->offset]);
lea(edx, ptr[edi + jitField->offset]); lea(edx, ptr[edi + jitField->offset]);
mov(ecx, (size_t)&Q_stricmp);
push(eax); push(eax);
push(edx); push(edx);
mov(ecx, (size_t)&Q_stricmp);
call(ecx); call(ecx);
// call handler // call handler
onStringChecked(jitField); onStringChecked(jitField);
add(esp, 8); pushed_size += 8;
} }
add(esp, pushed_size);
} }
class CDeltaClearMarkFieldsJIT : public jitasm::function<int, CDeltaClearMarkFieldsJIT, void*, void*, void*, void*> class CDeltaClearMarkFieldsJIT : public jitasm::function<int, CDeltaClearMarkFieldsJIT, void*, void*, void*, void*>
@ -337,19 +354,22 @@ void CDeltaClearMarkFieldsJIT::callConditionalEncoder(Addr src, Addr dst, Addr d
// esi = src // esi = src
// edi = dst // edi = dst
CUniqueLabel no_encoder("no_encoder");
int deltaOffset = (offsetof(CDeltaJit, delta)); int deltaOffset = (offsetof(CDeltaJit, delta));
int condEncoderOffset = (offsetof(delta_t, conditionalencode)); int condEncoderOffset = (offsetof(delta_t, conditionalencode));
mov(eax, ptr[deltaJit]); mov(eax, ptr[deltaJit]);
mov(eax, ptr[eax + deltaOffset]); mov(eax, ptr[eax + deltaOffset]);
mov(ecx, dword_ptr[eax + condEncoderOffset]); mov(ecx, dword_ptr[eax + condEncoderOffset]);
If(ecx != 0);
test(ecx, ecx);
jz(no_encoder);
push(edi); push(edi);
push(esi); push(esi);
push(eax); push(eax);
call(ecx); call(ecx);
add(esp, 12); add(esp, 12);
EndIf(); L(no_encoder);
} }
void CDeltaClearMarkFieldsJIT::calculateBytecount() { void CDeltaClearMarkFieldsJIT::calculateBytecount() {
@ -361,15 +381,14 @@ void CDeltaClearMarkFieldsJIT::calculateBytecount() {
xor_(edx, edx); xor_(edx, edx);
// 0-7 // 0-7
mov(ecx, 1); test(al, al);
test(eax, 0xFF); setnz(dl);
cmovnz(edx, ecx);
// 8-15 // 8-15
if (jitdesc->numFields > 7) if (jitdesc->numFields > 7)
{ {
mov(esi, 2); mov(esi, 2);
test(eax, 0xFF00); test(ah, ah);
cmovnz(edx, esi); cmovnz(edx, esi);
} }
@ -395,14 +414,14 @@ void CDeltaClearMarkFieldsJIT::calculateBytecount() {
// 32-39 // 32-39
mov(ecx, 5); mov(ecx, 5);
test(eax, 0xFF); test(al, al);
cmovnz(edx, ecx); cmovnz(edx, ecx);
// 40-47 // 40-47
if (jitdesc->numFields > 39) if (jitdesc->numFields > 39)
{ {
mov(esi, 6); mov(esi, 6);
test(eax, 0xFF00); test(ah, ah);
cmovnz(edx, esi); cmovnz(edx, esi);
} }
@ -439,12 +458,11 @@ void CDeltaClearMarkFieldsJIT::onStringChecked(deltajit_field* field)
size_t delta_markbits_offset = offsetof(CDeltaJit, marked_fields_mask); size_t delta_markbits_offset = offsetof(CDeltaJit, marked_fields_mask);
xor_(ecx, ecx);
test(eax, eax); test(eax, eax);
setnz(cl); setnz(cl);
shl(ecx, field->id & 31); shl(cl, field->id & 7);
or_(ptr[ebx + delta_markbits_offset + ((field->id > 31) ? 4 : 0)], ecx); or_(byte_ptr[ebx + delta_markbits_offset + (field->id / 8)], cl);
} }
CDeltaClearMarkFieldsJIT::Result CDeltaClearMarkFieldsJIT::main(Addr src, Addr dst, Addr deltaJit, Addr pForceMarkMask) CDeltaClearMarkFieldsJIT::Result CDeltaClearMarkFieldsJIT::main(Addr src, Addr dst, Addr deltaJit, Addr pForceMarkMask)
@ -464,14 +482,16 @@ CDeltaClearMarkFieldsJIT::Result CDeltaClearMarkFieldsJIT::main(Addr src, Addr d
pxor(marked_fields_mask, marked_fields_mask); pxor(marked_fields_mask, marked_fields_mask);
// check changed blocks // check changed blocks
(reinterpret_cast<CDeltaCheckJIT*>(this)->*(&CDeltaCheckJIT::iterateBlocks))(jitdesc); reinterpret_cast<CDeltaCheckJIT*>(this)->iterateBlocks(jitdesc);
//apply 'force mark' mask if it's present //apply 'force mark' mask if it's present
CUniqueLabel no_forcemask("no_forcemask");
mov(eax, ptr[pForceMarkMask]); mov(eax, ptr[pForceMarkMask]);
If(eax != 0); test(eax, eax);
movq(xmm_tmp, qword_ptr[eax]); jz(no_forcemask);
movq(xmm_tmp, qword_ptr[eax]);
por(marked_fields_mask, xmm_tmp); por(marked_fields_mask, xmm_tmp);
EndIf(); L(no_forcemask);
size_t delta_markbits_offset = offsetof(CDeltaJit, marked_fields_mask); size_t delta_markbits_offset = offsetof(CDeltaJit, marked_fields_mask);
@ -481,7 +501,7 @@ CDeltaClearMarkFieldsJIT::Result CDeltaClearMarkFieldsJIT::main(Addr src, Addr d
movq(qword_ptr[ebx + offsetof(CDeltaJit, originalMarkedFieldsMask)], marked_fields_mask); movq(qword_ptr[ebx + offsetof(CDeltaJit, originalMarkedFieldsMask)], marked_fields_mask);
// check changed strings // check changed strings
(reinterpret_cast<CDeltaCheckJIT*>(this)->*(&CDeltaCheckJIT::iterateStrings))(jitdesc); reinterpret_cast<CDeltaCheckJIT*>(this)->iterateStrings(jitdesc);
//emit conditional encoder call //emit conditional encoder call
callConditionalEncoder(src, dst, deltaJit); callConditionalEncoder(src, dst, deltaJit);
@ -490,14 +510,19 @@ CDeltaClearMarkFieldsJIT::Result CDeltaClearMarkFieldsJIT::main(Addr src, Addr d
mov(edi, dword_ptr[ebx + delta_markbits_offset]); mov(edi, dword_ptr[ebx + delta_markbits_offset]);
or_(edi, dword_ptr[ebx + delta_markbits_offset + 4]); or_(edi, dword_ptr[ebx + delta_markbits_offset + 4]);
If(edi != 0); CUniqueLabel no_markedbits("no_markedbits");
CUniqueLabel calculated("calculated");
test(edi, edi);
jz(no_markedbits);
calculateBytecount(); calculateBytecount();
Else(); jmp(calculated);
L(no_markedbits);
//set maskSize to 0 if there are no marked fields //set maskSize to 0 if there are no marked fields
size_t delta_masksize_offset = offsetof(CDeltaJit, markedFieldsMaskSize); size_t delta_masksize_offset = offsetof(CDeltaJit, markedFieldsMaskSize);
xor_(edx, edx); xor_(edx, edx);
mov(dword_ptr[ebx + delta_masksize_offset], edx); mov(dword_ptr[ebx + delta_masksize_offset], edx);
EndIf(); L(calculated);
return edx; return edx;
} }
@ -554,7 +579,10 @@ void CDeltaTestDeltaJIT::onStringChecked(deltajit_field* field)
// ebp = highestBit // ebp = highestBit
// [esp] = dest string // [esp] = dest string
If(eax != 0); // changed CUniqueLabel not_changed("not_changed");
test(eax, eax);
jz(not_changed); // changed
mov(ecx, (size_t)&Q_strlen); mov(ecx, (size_t)&Q_strlen);
call(ecx); // dest already in top of stack call(ecx); // dest already in top of stack
@ -563,7 +591,7 @@ void CDeltaTestDeltaJIT::onStringChecked(deltajit_field* field)
mov(eax, field->id); mov(eax, field->id);
cmp(eax, highestBit); cmp(eax, highestBit);
cmovg(highestBit, eax); cmovg(highestBit, eax);
EndIf(); L(not_changed);
} }
CDeltaClearMarkFieldsJIT::Result CDeltaTestDeltaJIT::main(Addr src, Addr dst, Addr deltaJit) CDeltaClearMarkFieldsJIT::Result CDeltaTestDeltaJIT::main(Addr src, Addr dst, Addr deltaJit)
@ -582,26 +610,29 @@ CDeltaClearMarkFieldsJIT::Result CDeltaTestDeltaJIT::main(Addr src, Addr dst, Ad
mov(edi, ptr[dst]); mov(edi, ptr[dst]);
// neededBits 0; highestBit = -1 // neededBits 0; highestBit = -1
xor_(neededBits, neededBits);
xor_(highestBit, highestBit); xor_(highestBit, highestBit);
xor_(neededBits, neededBits);
dec(highestBit); dec(highestBit);
// can save some operations // can save some operations
highest_id = 0; highest_id = 0;
// check changed fields // check changed fields
(reinterpret_cast<CDeltaCheckJIT*>(this)->*(&CDeltaCheckJIT::iterateBlocks))(jitdesc); reinterpret_cast<CDeltaCheckJIT*>(this)->iterateBlocks(jitdesc);
#ifdef REHLDS_FIXES #ifdef REHLDS_FIXES
// check changed strings // check changed strings
(reinterpret_cast<CDeltaCheckJIT*>(this)->*(&CDeltaCheckJIT::iterateStrings))(jitdesc); reinterpret_cast<CDeltaCheckJIT*>(this)->iterateStrings(jitdesc);
#endif #endif
If(highestBit >= 0); CUniqueLabel highest_not_set("highest_not_set");
test(highestBit, highestBit);
js(highest_not_set);
//neededBits += highestBit / 8 * 8 + 8; //neededBits += highestBit / 8 * 8 + 8;
shr(highestBit, 3); shr(highestBit, 3);
lea(neededBits, ptr[neededBits + highestBit * 8 + 8]); lea(neededBits, ptr[neededBits + highestBit * 8 + 8]);
EndIf(); L(highest_not_set);
return neededBits; return neededBits;
} }
@ -616,8 +647,10 @@ CDeltaJit::CDeltaJit(delta_t* _delta, CDeltaClearMarkFieldsJIT* _cleanMarkCheckF
CDeltaJit::~CDeltaJit() { CDeltaJit::~CDeltaJit() {
if (cleanMarkCheckFunc) { if (cleanMarkCheckFunc) {
delete cleanMarkCheckFunc; delete cleanMarkCheckFunc;
delete testDeltaFunc;
cleanMarkCheckFunc = NULL; cleanMarkCheckFunc = NULL;
}
if (testDeltaFunc) {
delete testDeltaFunc;
testDeltaFunc = NULL; testDeltaFunc = NULL;
} }
} }

View File

@ -16,7 +16,7 @@ struct deltajit_field {
struct deltajit_memblock_field { struct deltajit_memblock_field {
deltajit_field* field; deltajit_field* field;
uint16 mask; uint16 mask; // one bit for each memblock byte
bool first; bool first;
bool last; bool last;
}; };