From 418ed98b4886f7e9afd2dd9e67c772a36bd5691e Mon Sep 17 00:00:00 2001 From: asmodai Date: Fri, 23 Dec 2016 00:39:22 +0300 Subject: [PATCH] Use SSE for some typical floating point operations --- rehlds/engine/common.h | 6 +++++ rehlds/engine/host.cpp | 4 +-- rehlds/engine/mathlib_e.h | 53 +++++++++++++++++++++++++++++++------- rehlds/engine/net_ws.cpp | 2 +- rehlds/engine/r_studio.cpp | 2 +- rehlds/engine/sv_main.cpp | 6 ++--- rehlds/public/utlrbtree.h | 2 +- 7 files changed, 57 insertions(+), 18 deletions(-) diff --git a/rehlds/engine/common.h b/rehlds/engine/common.h index e10aa1a..60e11a0 100644 --- a/rehlds/engine/common.h +++ b/rehlds/engine/common.h @@ -154,6 +154,9 @@ extern int loadsize; #define Q_atoi atoi #define Q_atof atof #define Q_sqrt M_sqrt +#define Q_min M_min +#define Q_max M_max +#define Q_clamp M_clamp //#define Q_strtoull strtoull //#define Q_FileNameCmp FileNameCmp #define Q_vsnprintf _vsnprintf @@ -182,6 +185,9 @@ extern int loadsize; #define Q_atoi atoi #define Q_atof atof #define Q_sqrt sqrt +#define Q_min min +#define Q_max max +#define Q_clamp clamp //#define Q_strtoull strtoull //#define Q_FileNameCmp FileNameCmp #define Q_vsnprintf _vsnprintf diff --git a/rehlds/engine/host.cpp b/rehlds/engine/host.cpp index 7ca68e6..c42eb7a 100644 --- a/rehlds/engine/host.cpp +++ b/rehlds/engine/host.cpp @@ -477,9 +477,9 @@ void SV_DropClient_internal(client_t *cl, qboolean crash, const char *string) MSG_WriteByte(&cl->netchan.message, svc_disconnect); MSG_WriteString(&cl->netchan.message, string); final[0] = svc_disconnect; - Q_strncpy((char *)&final[1], string, min(sizeof(final) - 1, Q_strlen(string) + 1)); + Q_strncpy((char *)&final[1], string, Q_min(sizeof(final) - 1, Q_strlen(string) + 1)); final[sizeof(final) - 1] = 0; - i = 1 + min(sizeof(final) - 1, Q_strlen(string) + 1); + i = 1 + Q_min(sizeof(final) - 1, Q_strlen(string) + 1); } if (cl->edict && cl->spawned) gEntityInterface.pfnClientDisconnect(cl->edict); diff --git a/rehlds/engine/mathlib_e.h b/rehlds/engine/mathlib_e.h index 0a02545..87978e5 100644 --- a/rehlds/engine/mathlib_e.h +++ b/rehlds/engine/mathlib_e.h @@ -56,24 +56,57 @@ static const int nanmask = 0x7F800000; #define IS_NAN(fvar) ((*reinterpret_cast(&(fvar)) & nanmask) == nanmask) -inline double M_sqrt(int value) { - return sqrt(value); -} - inline float M_sqrt(float value) { return _mm_cvtss_f32(_mm_sqrt_ss(_mm_load_ss(&value))); } inline double M_sqrt(double value) { - double ret; auto v = _mm_load_sd(&value); - _mm_store_sd(&ret, _mm_sqrt_sd(v, v)); - return ret; + return _mm_cvtsd_f64(_mm_sqrt_sd(v, v)); } -inline double M_sqrt(long double value) -{ - return M_sqrt(double(value)); +template +inline double M_sqrt(T value) { + return sqrt(value); +} + +inline float M_min(float a, float b) { + return _mm_cvtss_f32(_mm_min_ss(_mm_load_ss(&a), _mm_load_ss(&b))); +} + +inline double M_min(double a, double b) { + return _mm_cvtsd_f64(_mm_min_sd(_mm_load_sd(&a), _mm_load_sd(&b))); +} + +template +inline T M_min(T a, T b) { + return min(a, b); +} + +inline float M_max(float a, float b) { + return _mm_cvtss_f32(_mm_max_ss(_mm_load_ss(&a), _mm_load_ss(&b))); +} + +inline double M_max(double a, double b) { + return _mm_cvtsd_f64(_mm_max_sd(_mm_load_sd(&a), _mm_load_sd(&b))); +} + +template +inline T M_max(T a, T b) { + return max(a, b); +} + +inline float M_clamp(float a, float min, float max) { + return _mm_cvtss_f32(_mm_min_ss(_mm_max_ss(_mm_load_ss(&a), _mm_load_ss(&min)), _mm_load_ss(&max))); +} + +inline double M_clamp(double a, double min, double max) { + return _mm_cvtsd_f64(_mm_min_sd(_mm_max_sd(_mm_load_sd(&a), _mm_load_sd(&min)), _mm_load_sd(&max))); +} + +template +inline T M_clamp(T a, T min, T max) { + return clamp(a, min, max); } float anglemod(float a); diff --git a/rehlds/engine/net_ws.cpp b/rehlds/engine/net_ws.cpp index 76fa193..4a066cf 100644 --- a/rehlds/engine/net_ws.cpp +++ b/rehlds/engine/net_ws.cpp @@ -1360,7 +1360,7 @@ int NET_SendLong(netsrc_t sock, int s, const char *buf, int len, int flags, cons while (len > 0) { - size = min(int(SPLIT_SIZE), len); + size = Q_min(int(SPLIT_SIZE), len); pPacket->packetID = (packetNumber << 4) + packetCount; diff --git a/rehlds/engine/r_studio.cpp b/rehlds/engine/r_studio.cpp index cac83f1..27ba9c4 100644 --- a/rehlds/engine/r_studio.cpp +++ b/rehlds/engine/r_studio.cpp @@ -883,7 +883,7 @@ qboolean SV_CheckSphereIntersection(edict_t *ent, const vec_t *start, const vec_ pseqdesc += ent->v.sequence; for (int i = 0; i < 3; i++) { - maxDim[i] = max(fabs(pseqdesc->bbmax[i]), fabs(pseqdesc->bbmin[i])); + maxDim[i] = Q_max(fabs(pseqdesc->bbmax[i]), fabs(pseqdesc->bbmin[i])); } radiusSquared = maxDim[0] * maxDim[0] + maxDim[1] * maxDim[1] + maxDim[2] * maxDim[2]; return DoesSphereIntersect(ent->v.origin, radiusSquared, traceOrg, traceDir) != 0; diff --git a/rehlds/engine/sv_main.cpp b/rehlds/engine/sv_main.cpp index 213f50c..f9f3eef 100644 --- a/rehlds/engine/sv_main.cpp +++ b/rehlds/engine/sv_main.cpp @@ -719,7 +719,7 @@ void SV_StartParticle(const vec_t *org, const vec_t *dir, int color, int count) for (int i = 0; i < 3; i++) { - MSG_WriteChar(&g_psv.datagram, clamp((int)(dir[i] * 16.0f), -128, 127)); + MSG_WriteChar(&g_psv.datagram, Q_clamp((int)(dir[i] * 16.0f), -128, 127)); } MSG_WriteByte(&g_psv.datagram, count); @@ -1771,7 +1771,7 @@ int SV_GetFragmentSize(void *state) if (val[0] != 0) { size = Q_atoi( val ); - size = clamp(size, FRAGMENT_S2C_MIN_SIZE, FRAGMENT_S2C_MAX_SIZE); + size = Q_clamp(size, FRAGMENT_S2C_MIN_SIZE, FRAGMENT_S2C_MAX_SIZE); } } @@ -4937,7 +4937,7 @@ void SV_ExtractFromUserinfo(client_t *cl) if (val[0] != 0) { i = Q_atoi(val); - cl->netchan.rate = clamp(float(i), MIN_RATE, MAX_RATE); + cl->netchan.rate = Q_clamp(float(i), MIN_RATE, MAX_RATE); } val = Info_ValueForKey(userinfo, "topcolor"); diff --git a/rehlds/public/utlrbtree.h b/rehlds/public/utlrbtree.h index 4fc0673..8c5bb4c 100644 --- a/rehlds/public/utlrbtree.h +++ b/rehlds/public/utlrbtree.h @@ -1119,7 +1119,7 @@ int CUtlRBTree::Depth( I node ) const int depthright = Depth( RightChild(node) ); int depthleft = Depth( LeftChild(node) ); - return max(depthright, depthleft) + 1; + return Q_max(depthright, depthleft) + 1; }