From 904394b6ea2dbaeaeae5f0498a02a55531708a45 Mon Sep 17 00:00:00 2001 From: Andrey Date: Mon, 11 May 2015 20:09:51 +0300 Subject: [PATCH] Added SSE 4.1 implementation of dot product, added SinCos function, maked fast version of SV_Move for point hull (trace line), small copying optimization in SV_RunCmd. --- rehlds/engine/mathlib.cpp | 77 ++++++++++++++-- rehlds/engine/mathlib_e.h | 10 +- rehlds/engine/pmovetst.cpp | 13 ++- rehlds/engine/pr_cmds.cpp | 5 + rehlds/engine/sys_dll2.cpp | 2 + rehlds/engine/world.cpp | 137 ++++++++++++++++++++++++---- rehlds/engine/world.h | 4 + rehlds/public/rehlds/crc32.cpp | 2 +- rehlds/public/rehlds/sys_shared.cpp | 31 +++++-- rehlds/public/rehlds/sys_shared.h | 9 +- 10 files changed, 245 insertions(+), 45 deletions(-) diff --git a/rehlds/engine/mathlib.cpp b/rehlds/engine/mathlib.cpp index 83dd6ce..1347fd7 100644 --- a/rehlds/engine/mathlib.cpp +++ b/rehlds/engine/mathlib.cpp @@ -28,7 +28,8 @@ #include "precompiled.h" - +#include +#include vec3_t vec3_origin; //int nanmask; @@ -125,16 +126,34 @@ NOBODY int InvertMatrix(const float *m, float *out); // float *r3; // 161 //} +#ifdef REHLDS_FIXES +void SinCos(float radians, float *sine, float *cosine) +{ + __asm + { + fld dword ptr [radians]; + fsincos; + fstp dword ptr [cosine]; + fstp dword ptr [sine]; + } +} +#endif // REHLDS_FIXES + /* <47067> ../engine/mathlib.c:267 */ void AngleVectors(const vec_t *angles, vec_t *forward, vec_t *right, vec_t *up) { - float angle; float sr, sp, sy, cr, cp, cy; #ifndef SWDS g_engdstAddrs.pfnAngleVectors(&angles, &forward, &right, &up); #endif // SWDS +#ifdef REHLDS_FIXES + SinCos(DEG2RAD(angles[YAW]), &sy, &cy); + SinCos(DEG2RAD(angles[PITCH]), &sp, &cp); + SinCos(DEG2RAD(angles[ROLL]), &sr, &cr); +#else + float angle; angle = (float)(angles[YAW] * (M_PI * 2 / 360)); sy = sin(angle); cy = cos(angle); @@ -144,6 +163,7 @@ void AngleVectors(const vec_t *angles, vec_t *forward, vec_t *right, vec_t *up) angle = (float)(angles[ROLL] * (M_PI * 2 / 360)); sr = sin(angle); cr = cos(angle); +#endif if (forward) { @@ -168,9 +188,14 @@ void AngleVectors(const vec_t *angles, vec_t *forward, vec_t *right, vec_t *up) /* <4712e> ../engine/mathlib.c:304 */ void AngleVectorsTranspose(const vec_t *angles, vec_t *forward, vec_t *right, vec_t *up) { - float angle; float sr, sp, sy, cr, cp, cy; +#ifdef REHLDS_FIXES + SinCos(DEG2RAD(angles[YAW]), &sy, &cy); + SinCos(DEG2RAD(angles[PITCH]), &sp, &cp); + SinCos(DEG2RAD(angles[ROLL]), &sr, &cr); +#else + float angle; angle = (float)(angles[YAW] * (M_PI * 2 / 360)); sy = sin(angle); cy = cos(angle); @@ -180,6 +205,7 @@ void AngleVectorsTranspose(const vec_t *angles, vec_t *forward, vec_t *right, ve angle = (float)(angles[ROLL] * (M_PI * 2 / 360)); sr = sin(angle); cr = cos(angle); +#endif if (forward) { @@ -204,18 +230,24 @@ void AngleVectorsTranspose(const vec_t *angles, vec_t *forward, vec_t *right, ve /* <471e9> ../engine/mathlib.c:340 */ void AngleMatrix(const vec_t *angles, float(*matrix)[4]) { - float angle; float sr, sp, sy, cr, cp, cy; - angle = (float)(angles[2] * (M_PI * 2 / 360)); +#ifdef REHLDS_FIXES + SinCos(DEG2RAD(angles[ROLL]), &sy, &cy); + SinCos(DEG2RAD(angles[YAW]), &sp, &cp); + SinCos(DEG2RAD(angles[PITCH]), &sr, &cr); +#else + float angle; + angle = (float)(angles[ROLL] * (M_PI * 2 / 360)); sy = sin(angle); cy = cos(angle); - angle = (float)(angles[1] * (M_PI * 2 / 360)); + angle = (float)(angles[YAW] * (M_PI * 2 / 360)); sp = sin(angle); cp = cos(angle); - angle = (float)(angles[0] * (M_PI * 2 / 360)); + angle = (float)(angles[PITCH] * (M_PI * 2 / 360)); sr = sin(angle); cr = cos(angle); +#endif float tmp1, tmp2; @@ -296,11 +328,28 @@ void VectorMA(const vec_t *veca, float scale, const vec_t *vecb, vec_t *vecc) vecc[2] = scale * vecb[2] + veca[2]; } +#ifndef REHLDS_FIXES /* <4757a> ../engine/mathlib.c:484 */ -float _DotProduct(vec_t *v1, vec_t *v2) +long double _DotProduct(const vec_t *v1, const vec_t *v2) { return v1[0] * v2[0] + v1[1] * v2[1] + v1[2] * v2[2]; } +#else // REHLDS_FIXES +float _DotProduct(const vec_t *v1, const vec_t *v2) +{ +#ifdef REHLDS_FIXES + // _mm_loadu_ps - load xmm from unaligned address + // _mm_cvtss_f32 - return low float value of xmm + // _mm_dp_ps - dot product + // 0x71 = 0b01110001 - mask for multiplying operands and result + // dpps isn't binary compatible with separate sse2 instructions (max difference is about 0.0002f, but usually < 0.00001f) + if (cpuinfo.sse4_1) + return _mm_cvtss_f32(_mm_dp_ps(_mm_loadu_ps(v1), _mm_loadu_ps(v2), 0x71)); +#endif // REHLDS_FIXES + + return v1[0] * v2[0] + v1[1] * v2[1] + v1[2] * v2[2]; +} +#endif // REHLDS_FIXES /* <475b4> ../engine/mathlib.c:489 */ NOBODY void _VectorSubtract(vec_t *veca, vec_t *vecb, vec_t *out); @@ -331,6 +380,14 @@ void CrossProduct(const vec_t *v1, const vec_t *v2, vec_t *cross) /* <476d8> ../engine/mathlib.c:519 */ float Length(const vec_t *v) { +#ifdef REHLDS_FIXES + // based on dot product + if (cpuinfo.sse4_1) + { + return _mm_cvtss_f32(_mm_sqrt_ps(_mm_dp_ps(_mm_loadu_ps(v), _mm_loadu_ps(v), 0x71))); + } +#endif // REHLDS_FIXES + float length; length = 0.0f; @@ -346,8 +403,12 @@ float VectorNormalize(vec3_t v) { float length, ilength; +#ifdef REHLDS_FIXES + length = Length(v); +#else // REHLDS_FIXES length = v[0] * v[0] + v[1] * v[1] + v[2] * v[2]; length = sqrt(length); // FIXME +#endif // REHLDS_FIXES if (length) { diff --git a/rehlds/engine/mathlib_e.h b/rehlds/engine/mathlib_e.h index 8df6f1a..2495c2f 100644 --- a/rehlds/engine/mathlib_e.h +++ b/rehlds/engine/mathlib_e.h @@ -42,9 +42,11 @@ // fall over #define ROLL 2 +#define RAD2DEG(x) ((float)(x) * (float)(180.f / M_PI)) +#define DEG2RAD(x) ((float)(x) * (float)(M_PI / 180.f)) + #ifdef HOOK_ENGINE #define vec3_origin (*pvec3_origin) - #endif // HOOK_ENGINE @@ -69,7 +71,11 @@ NOBODY void InterpolateAngles(float *start, float *end, float *output, float fra void VectorTransform(const vec_t *in1, float *in2, vec_t *out); int VectorCompare(const vec_t *v1, const vec_t *v2); void VectorMA(const vec_t *veca, float scale, const vec_t *vecb, vec_t *vecc); -NOBODY float _DotProduct(vec_t *v1, vec_t *v2); +#ifdef REHLDS_FIXES +float _DotProduct(const vec_t *v1, const vec_t *v2); // with sse support +#else // REHLDS_FIXES +long double _DotProduct(const vec_t *v1, const vec_t *v2); // precise +#endif // REHLDS_FIXES NOBODY void _VectorSubtract(vec_t *veca, vec_t *vecb, vec_t *out); void _VectorAdd(vec_t *veca, vec_t *vecb, vec_t *out); NOBODY void _VectorCopy(vec_t *in, vec_t *out); diff --git a/rehlds/engine/pmovetst.cpp b/rehlds/engine/pmovetst.cpp index eb20404..59c1df6 100644 --- a/rehlds/engine/pmovetst.cpp +++ b/rehlds/engine/pmovetst.cpp @@ -645,7 +645,6 @@ struct pmtrace_s *PM_TraceLineEx(float *start, float *end, int flags, int usehul /* <6ef4a> ../engine/pmovetst.c:844 */ qboolean PM_RecursiveHullCheck(hull_t *hull, int num, float p1f, float p2f, const vec_t *p1, const vec_t *p2, pmtrace_t *trace) { - qboolean retval; dclipnode_t *node; mplane_t *plane; vec3_t mid; @@ -687,8 +686,8 @@ qboolean PM_RecursiveHullCheck(hull_t *hull, int num, float p1f, float p2f, cons plane = &hull->planes[node->planenum]; if (plane->type >= 3u) { - t1 = p1[2] * plane->normal[2] + p1[1] * plane->normal[1] + p1[0] * plane->normal[0] - plane->dist; - t2 = p2[2] * plane->normal[2] + p2[1] * plane->normal[1] + p2[0] * plane->normal[0] - plane->dist; + t1 = _DotProduct(p1, plane->normal) - plane->dist; + t2 = _DotProduct(p2, plane->normal) - plane->dist; } else { @@ -700,14 +699,14 @@ qboolean PM_RecursiveHullCheck(hull_t *hull, int num, float p1f, float p2f, cons if (t1 >= 0.0) { - midf = t1 - 0.03125f; + midf = t1 - DIST_EPSILON; } else { if (t2 < 0.0) return PM_RecursiveHullCheck(hull, node->children[1], p1f, p2f, p1, p2, trace); - midf = t1 + 0.03125f; + midf = t1 + DIST_EPSILON; } midf = midf / (t1 - t2); if (midf >= 0.0) @@ -837,8 +836,8 @@ qboolean PM_RecursiveHullCheck(hull_t *hull, int num, float p1f, float p2f, cons plane = &hull->planes[node->planenum]; if (plane->type >= 3u) { - t1 = p1[1] * plane->normal[1] + p1[2] * plane->normal[2] + p1[0] * plane->normal[0] - plane->dist; - t2 = p2[1] * plane->normal[1] + p2[2] * plane->normal[2] + plane->normal[0] * p2[0] - plane->dist; + t1 = _DotProduct(p1, plane->normal) - plane->dist; + t2 = _DotProduct(p2, plane->normal) - plane->dist; } else { diff --git a/rehlds/engine/pr_cmds.cpp b/rehlds/engine/pr_cmds.cpp index 5b80154..f9eb06f 100644 --- a/rehlds/engine/pr_cmds.cpp +++ b/rehlds/engine/pr_cmds.cpp @@ -341,7 +341,12 @@ void PF_sound_I(edict_t *entity, int channel, const char *sample, float volume, /* <78cdd> ../engine/pr_cmds.c:491 */ void PF_traceline_Shared(const float *v1, const float *v2, int nomonsters, edict_t *ent) { +#ifdef REHLDS_OPT_PEDANTIC + trace_t trace = SV_Move_Point(v1, v2, nomonsters, ent); +#else // REHLDS_OPT_PEDANTIC trace_t trace = SV_Move(v1, vec3_origin, vec3_origin, v2, nomonsters, ent, 0); +#endif // REHLDS_OPT_PEDANTIC + gGlobalVariables.trace_flags = 0; SV_SetGlobalTrace(&trace); } diff --git a/rehlds/engine/sys_dll2.cpp b/rehlds/engine/sys_dll2.cpp index 6e67d49..7c56766 100644 --- a/rehlds/engine/sys_dll2.cpp +++ b/rehlds/engine/sys_dll2.cpp @@ -514,6 +514,8 @@ int Sys_InitGame(char *lpOrgCmdLine, char *pBaseDir, void *pwnd, int bIsDedicate Sys_InitHardwareTimer(); #endif // _WIN32 + Sys_CheckCpuInstructionsSupport(); + #ifndef SWDS Sys_InitFloatTime(); #endif // SWDS diff --git a/rehlds/engine/world.cpp b/rehlds/engine/world.cpp index e306a44..cb82ff9 100644 --- a/rehlds/engine/world.cpp +++ b/rehlds/engine/world.cpp @@ -529,7 +529,7 @@ int SV_HullPointContents(hull_t *hull, int num, const vec_t *p) node = &hull->clipnodes[i]; plane = &hull->planes[node->planenum]; if (plane->type > 2) - d = plane->normal[0] * *p + plane->normal[1] * p[1] + plane->normal[2] * p[2] - plane->dist; + d = _DotProduct(plane->normal, p) - plane->dist; else d = p[plane->type] - plane->dist; i = node->children[(d >= 0.0f) ? 0 : 1]; @@ -644,7 +644,7 @@ edict_t *SV_TestEntityPosition(edict_t *ent) #ifndef REHLDS_OPT_PEDANTIC /* ../engine/world.c:804 */ -qboolean SV_RecursiveHullCheck(hull_t *hull, int num, float p1f, float p2f, vec_t *p1, vec_t *p2, trace_t *trace) +qboolean SV_RecursiveHullCheck(hull_t *hull, int num, float p1f, float p2f, const vec_t *p1, const vec_t *p2, trace_t *trace) { dclipnode_t *node; mplane_t *plane; @@ -665,8 +665,8 @@ qboolean SV_RecursiveHullCheck(hull_t *hull, int num, float p1f, float p2f, vec_ plane = &hull->planes[hull->clipnodes[num].planenum]; if (plane->type >= 3) { - t1 = p1[1] * plane->normal[1] + p1[2] * plane->normal[2] + p1[0] * plane->normal[0] - plane->dist; - t2 = p2[1] * plane->normal[1] + p2[2] * plane->normal[2] + plane->normal[0] * p2[0] - plane->dist; + t1 = _DotProduct(p1, plane->normal) - plane->dist; + t2 = _DotProduct(p2, plane->normal) - plane->dist; } else { @@ -678,14 +678,14 @@ qboolean SV_RecursiveHullCheck(hull_t *hull, int num, float p1f, float p2f, vec_ if (t1 >= 0.0f) { - midf = t1 - 0.03125f; + midf = t1 - DIST_EPSILON; } else { if (t2 < 0.0f) return SV_RecursiveHullCheck(hull, node->children[1], p1f, p2f, p1, p2, trace); - midf = t1 + 0.03125f; + midf = t1 + DIST_EPSILON; } midf = midf / (t1 - t2); @@ -698,7 +698,7 @@ qboolean SV_RecursiveHullCheck(hull_t *hull, int num, float p1f, float p2f, vec_ { midf = 0.0f; } - if (((*reinterpret_cast(&midf)) & nanmask) != nanmask) + if (!IS_NAN(midf)) // not a number { frac = pdif * midf + p1f; mid[0] = (p2[0] - p1[0]) * midf + p1[0]; @@ -803,8 +803,8 @@ qboolean SV_RecursiveHullCheck(hull_t *hull, int num, float p1f, float p2f, cons plane = &hull->planes[hull->clipnodes[num].planenum]; if (plane->type >= 3) { - t1 = p1[1] * plane->normal[1] + p1[2] * plane->normal[2] + p1[0] * plane->normal[0] - plane->dist; - t2 = p2[1] * plane->normal[1] + p2[2] * plane->normal[2] + plane->normal[0] * p2[0] - plane->dist; + t1 = _DotProduct(p1, plane->normal) - plane->dist; + t2 = _DotProduct(p2, plane->normal) - plane->dist; } else { @@ -974,14 +974,14 @@ void SV_SingleClipMoveToEntity(edict_t *ent, const vec_t *start, const vec_t *mi AngleVectors(ent->v.angles, forward, right, up); temp[0] = start_l[0]; temp[1] = start_l[1]; temp[2] = start_l[2]; - start_l[0] = forward[2] * temp[2] + forward[1] * temp[1] + forward[0] * temp[0]; - start_l[1] = -(right[0] * temp[0] + right[2] * temp[2] + right[1] * temp[1]); - start_l[2] = up[1] * temp[1] + up[0] * temp[0] + up[2] * temp[2]; - + start_l[0] = _DotProduct(forward, temp); + start_l[1] = -_DotProduct(right, temp); + start_l[2] = _DotProduct(up, temp); + temp[0] = end_l[0]; temp[1] = end_l[1]; temp[2] = end_l[2]; - end_l[0] = forward[2] * temp[2] + forward[1] * temp[1] + forward[0] * temp[0]; - end_l[1] = -(right[0] * temp[0] + right[2] * temp[2] + right[1] * temp[1]); - end_l[2] = up[1] * temp[1] + up[0] * temp[0] + up[2] * temp[2]; + end_l[0] = _DotProduct(forward, temp); + end_l[1] = -_DotProduct(right, temp); + end_l[2] = _DotProduct(up, temp); rotated = 1; } @@ -1033,9 +1033,9 @@ void SV_SingleClipMoveToEntity(edict_t *ent, const vec_t *start, const vec_t *mi temp[1] = trace->plane.normal[1]; temp[2] = trace->plane.normal[2]; - trace->plane.normal[0] = up[2] * temp[2] + up[1] * temp[1] + up[0] * temp[0]; - trace->plane.normal[1] = right[2] * temp[2] + right[1] * temp[1] + right[0] * temp[0]; - trace->plane.normal[2] = forward[2] * temp[2] + forward[1] * temp[1] + forward[0] * temp[0]; + trace->plane.normal[0] = _DotProduct(up, temp); + trace->plane.normal[1] = _DotProduct(right, temp); + trace->plane.normal[2] = _DotProduct(forward, temp); } trace->endpos[0] = (end[0] - start[0]) * trace->fraction + start[0]; @@ -1274,7 +1274,7 @@ trace_t SV_Move(const vec_t *start, const vec_t *mins, const vec_t *maxs, const clip.end = worldEndPoint; worldFraction = clip.trace.fraction; - clip.type = (unsigned char)type; + clip.type = type & 0xFF; clip.ignoretrans = type >> 8; clip.trace.fraction = 1.0f; clip.start = start; @@ -1307,3 +1307,100 @@ trace_t SV_Move(const vec_t *start, const vec_t *mins, const vec_t *maxs, const return clip.trace; } + +#ifdef REHLDS_OPT_PEDANTIC +// Optimized version of SV_Move routines for moving point hull throw world +void SV_SingleClipMoveToPoint(const vec_t *start, const vec_t *end, trace_t *trace) +{ + hull_t *hull; + + Q_memset(trace, 0, sizeof(trace_t)); + trace->fraction = 1.0f; + trace->allsolid = TRUE; + trace->endpos[0] = end[0]; + trace->endpos[1] = end[1]; + trace->endpos[2] = end[2]; + + hull = &g_psv.models[1]->hulls[0]; // world point hull + SV_RecursiveHullCheck(hull, hull->firstclipnode, 0.0f, 1.0f, start, end, trace); + + if (trace->fraction != 1.0f) + { + trace->endpos[0] = ( end[0] - start[0] ) * trace->fraction + start[0]; + trace->endpos[1] = ( end[1] - start[1] ) * trace->fraction + start[1]; + trace->endpos[2] = ( end[2] - start[2] ) * trace->fraction + start[2]; + } + + if (trace->fraction < 1.0f || trace->startsolid) + trace->ent = &g_psv.edicts[0]; +} + +void SV_MoveBounds_Point(const vec_t *start, const vec_t *end, vec_t *boxmins, vec_t *boxmaxs) +{ + for (int i = 0; i < 3; i++) + { + if (end[i] > start[i]) + { + boxmins[i] = start[i] - 1.0f; + boxmaxs[i] = end[i] + 1.0f; + } + else + { + boxmins[i] = end[i] - 1.0f; + boxmaxs[i] = start[i] + 1.0f; + } + } +} + +trace_t SV_Move_Point(const vec_t *start, const vec_t *end, int type, edict_t *passedict) +{ + moveclip_t clip; + vec3_t worldEndPoint; + float worldFraction; + + Q_memset(&clip, 0, sizeof(clip)); + SV_SingleClipMoveToPoint(start, end, &clip.trace); + + if (clip.trace.fraction != 0.0f) + { + worldEndPoint[0] = clip.trace.endpos[0]; + worldEndPoint[1] = clip.trace.endpos[1]; + worldEndPoint[2] = clip.trace.endpos[2]; + + clip.end = worldEndPoint; + worldFraction = clip.trace.fraction; + + clip.type = type & 0xFF; + clip.ignoretrans = type >> 8; + clip.trace.fraction = 1.0f; + clip.start = start; + clip.mins = vec3_origin; + clip.maxs = vec3_origin; + clip.passedict = passedict; + clip.monsterClipBrush = 0; + if (type == 2) + { + for (int i = 0; i < 3; i++) + { + clip.mins2[i] = -15.0f; + clip.maxs2[i] = +15.0f; + } + } + else + { + clip.mins2[0] = 0.0; + clip.mins2[1] = 0.0; + clip.mins2[2] = 0.0; + clip.maxs2[0] = 0.0; + clip.maxs2[1] = 0.0; + clip.maxs2[2] = 0.0; + } + SV_MoveBounds_Point(start, worldEndPoint, clip.boxmins, clip.boxmaxs); + SV_ClipToLinks(sv_areanodes, &clip); + gGlobalVariables.trace_ent = clip.trace.ent; + clip.trace.fraction = worldFraction * clip.trace.fraction; + } + + return clip.trace; +} +#endif // REHLDS_OPT_PEDANTIC diff --git a/rehlds/engine/world.h b/rehlds/engine/world.h index 57cb8fa..0b5ed4a 100644 --- a/rehlds/engine/world.h +++ b/rehlds/engine/world.h @@ -124,4 +124,8 @@ void SV_MoveBounds(const vec_t *start, const vec_t *mins, const vec_t *maxs, con trace_t SV_MoveNoEnts(const vec_t *start, vec_t *mins, vec_t *maxs, const vec_t *end, int type, edict_t *passedict); trace_t SV_Move(const vec_t *start, const vec_t *mins, const vec_t *maxs, const vec_t *end, int type, edict_t *passedict, qboolean monsterClipBrush); +#ifdef REHLDS_OPT_PEDANTIC +trace_t SV_Move_Point(const vec_t *start, const vec_t *end, int type, edict_t *passedict); +#endif // REHLDS_OPT_PEDANTIC + #endif // WORLD_H diff --git a/rehlds/public/rehlds/crc32.cpp b/rehlds/public/rehlds/crc32.cpp index 4c6e12e..887d154 100644 --- a/rehlds/public/rehlds/crc32.cpp +++ b/rehlds/public/rehlds/crc32.cpp @@ -110,7 +110,7 @@ uint32 crc32_t_nosse(uint32 iCRC, const uint8 *s, unsigned int len) { } uint32 crc32_t(uint32 iCRC, const uint8 *s, unsigned int len) { - if (!g_HasSSE42) { + if (!cpuinfo.sse4_2) { return crc32_t_nosse(iCRC, s, len); } diff --git a/rehlds/public/rehlds/sys_shared.cpp b/rehlds/public/rehlds/sys_shared.cpp index aad58e8..0f56b2e 100644 --- a/rehlds/public/rehlds/sys_shared.cpp +++ b/rehlds/public/rehlds/sys_shared.cpp @@ -27,19 +27,38 @@ */ #include "sys_shared.h" -bool Sys_ChechSSE42Support(); +#define SSE3_FLAG (1<<0) +#define SSSE3_FLAG (1<<9) +#define SSE4_1_FLAG (1<<19) +#define SSE4_2_FLAG (1<<20) +#define AVX_FLAG (1<<28) +#define AVX2_FLAG (1<<5) -bool g_HasSSE42 = Sys_ChechSSE42Support(); +cpuinfo_t cpuinfo; - -bool Sys_ChechSSE42Support() { +void Sys_CheckCpuInstructionsSupport(void) +{ unsigned int cpuid_data[4]; + // eax = 1, ecx = 0 #if defined(__GNUC__) __get_cpuid(0x1, &cpuid_data[0], &cpuid_data[1], &cpuid_data[2], &cpuid_data[3]); #else //__GNUC__ __cpuidex((int*)cpuid_data, 1, 0); #endif //__GNUC__ - return (0 != (cpuid_data[2] & (1 << 20))); -} + cpuinfo.sse3 = (cpuid_data[2] & SSE3_FLAG) ? 1 : 0; // ecx + cpuinfo.ssse3 = (cpuid_data[2] & SSSE3_FLAG) ? 1 : 0; + cpuinfo.sse4_1 = (cpuid_data[2] & SSE4_1_FLAG) ? 1 : 0; + cpuinfo.sse4_2 = (cpuid_data[2] & SSE4_2_FLAG) ? 1 : 0; + cpuinfo.avx = (cpuid_data[2] & AVX_FLAG) ? 1 : 0; + + // eax = 7, ecx = 0 +#if defined(__GNUC__) + __get_cpuid(0x7, &cpuid_data[0], &cpuid_data[1], &cpuid_data[2], &cpuid_data[3]); +#else //__GNUC__ + __cpuidex((int*)cpuid_data, 7, 0); +#endif //__GNUC__ + + cpuinfo.avx2 = (cpuid_data[1] & AVX2_FLAG) ? 1 : 0; // ebx +} \ No newline at end of file diff --git a/rehlds/public/rehlds/sys_shared.h b/rehlds/public/rehlds/sys_shared.h index 8187370..54c608b 100644 --- a/rehlds/public/rehlds/sys_shared.h +++ b/rehlds/public/rehlds/sys_shared.h @@ -27,4 +27,11 @@ */ #pragma once -extern bool g_HasSSE42; +typedef struct cpuinfo_s +{ + uint8 sse3, ssse3, sse4_1, sse4_2, avx, avx2; +} cpuinfo_t; + +extern cpuinfo_t cpuinfo; + +void Sys_CheckCpuInstructionsSupport(void);