Merge pull request #82 from theAsmodai/master

Added optimized stdlib functions from Agner's asmlib
This commit is contained in:
theAsmodai 2015-10-18 00:23:54 +03:00
commit 8e6a3cac68
10 changed files with 262 additions and 47 deletions

View File

@ -142,7 +142,7 @@ void setupToolchain(NativeBinarySpec b) {
cfg.linkerOptions.baseAddress = '0x4970000'
}
cfg.projectLibpath(project, '/lib')
cfg.extraLibs 'steam_api.lib', 'psapi.lib', 'ws2_32.lib', 'kernel32.lib', 'user32.lib', 'advapi32.lib'
cfg.extraLibs 'steam_api.lib', 'psapi.lib', 'ws2_32.lib', 'kernel32.lib', 'user32.lib', 'advapi32.lib', 'libacof32.lib'
} else if (cfg instanceof GccToolchainConfig) {
cfg.compilerOptions.pchConfig = new GccToolchainConfig.PrecompilerHeaderOptions(
enabled: true,
@ -158,7 +158,7 @@ void setupToolchain(NativeBinarySpec b) {
])
cfg.compilerOptions.args '-Qoption,cpp,--treat_func_as_string_literal_cpp', '-fno-rtti'
cfg.projectLibpath(project, '/lib/linux32')
cfg.extraLibs 'rt', 'dl', 'm', 'steam_api'
cfg.extraLibs 'rt', 'dl', 'm', 'steam_api', 'aelf32'
}
if (!unitTestExecutable && !swdsLib) {

View File

@ -117,26 +117,26 @@ extern int loadsize;
#define _strlwr(p) for (int i = 0; p[i] != 0; i++) p[i] = tolower(p[i]);
#endif
#define Q_memset memset
#define Q_memcpy memcpy
#define Q_memmove memmove
#define Q_strlen strlen
#define Q_memcmp memcmp
#define Q_strcpy strcpy
#define Q_memset A_memset
#define Q_memcpy A_memcpy
#define Q_memmove A_memmove
#define Q_strlen A_strlen
#define Q_memcmp A_memcmp
#define Q_strcpy A_strcpy
#define Q_strncpy strncpy
#define Q_strrchr strrchr
#define Q_strcat strcat
#define Q_strcat A_strcat
#define Q_strncat strncat
#define Q_strcmp strcmp
#define Q_strcmp A_strcmp
#define Q_strncmp strncmp
//#define Q_strcasecmp _stricmp // Use Q_stricmp
//#define Q_strncasecmp _strnicmp // Use Q_strnicmp
#define Q_stricmp _stricmp
#define Q_stricmp A_stricmp
#define Q_strnicmp _strnicmp
#define Q_strstr strstr
#define Q_strstr A_strstr
#define Q_strchr strchr
#define Q_strrchr strrchr
#define Q_strlwr _strlwr
#define Q_strlwr A_strtolower
#define Q_sprintf sprintf
#define Q_snprintf _snprintf
#define Q_atoi atoi

View File

@ -6440,7 +6440,7 @@ void SV_FailDownload(const char *filename)
//-----------------------------------------------------------------------------
// Finds a string in another string with a case insensitive test
//-----------------------------------------------------------------------------
const char *Q_stristr(const char *pStr, const char *pSearch)
/*const char *Q_stristr(const char *pStr, const char *pSearch)
{
if (!pStr || !pSearch)
return NULL;
@ -6478,7 +6478,7 @@ const char *Q_stristr(const char *pStr, const char *pSearch)
}
return NULL;
}
}*/
/* <aa4f4> ../engine/sv_main.c:8736 */
qboolean IsSafeFileToDownload(const char *filename)
@ -6516,21 +6516,21 @@ qboolean IsSafeFileToDownload(const char *filename)
|| first != last
|| !first
|| Q_strlen(first) != 4
|| Q_stristr(lwrfilename, ".cfg")
|| Q_stristr(lwrfilename, ".lst")
|| Q_stristr(lwrfilename, ".exe")
|| Q_stristr(lwrfilename, ".vbs")
|| Q_stristr(lwrfilename, ".com")
|| Q_stristr(lwrfilename, ".bat")
|| Q_stristr(lwrfilename, ".dll")
|| Q_stristr(lwrfilename, ".ini")
|| Q_stristr(lwrfilename, ".log")
|| Q_stristr(lwrfilename, "halflife.wad")
|| Q_stristr(lwrfilename, "pak0.pak")
|| Q_stristr(lwrfilename, "xeno.wad")
|| Q_stristr(lwrfilename, ".so")
|| Q_stristr(lwrfilename, ".dylib")
|| Q_stristr(lwrfilename, ".sys"))
|| Q_strstr(lwrfilename, ".cfg")
|| Q_strstr(lwrfilename, ".lst")
|| Q_strstr(lwrfilename, ".exe")
|| Q_strstr(lwrfilename, ".vbs")
|| Q_strstr(lwrfilename, ".com")
|| Q_strstr(lwrfilename, ".bat")
|| Q_strstr(lwrfilename, ".dll")
|| Q_strstr(lwrfilename, ".ini")
|| Q_strstr(lwrfilename, ".log")
|| Q_strstr(lwrfilename, "halflife.wad")
|| Q_strstr(lwrfilename, "pak0.pak")
|| Q_strstr(lwrfilename, "xeno.wad")
|| Q_strstr(lwrfilename, ".so")
|| Q_strstr(lwrfilename, ".dylib")
|| Q_strstr(lwrfilename, ".sys"))
{
return FALSE;
}

BIN
rehlds/lib/libacof32.lib Normal file

Binary file not shown.

Binary file not shown.

View File

@ -769,7 +769,7 @@
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>psapi.lib;ws2_32.lib;$(ProjectDir)../lib/steam_api.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>psapi.lib;ws2_32.lib;$(ProjectDir)../lib/steam_api.lib;$(ProjectDir)../lib/libacof32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<MinimumRequiredVersion>
</MinimumRequiredVersion>
<ModuleDefinitionFile>

224
rehlds/public/asmlib.h Normal file
View File

@ -0,0 +1,224 @@
/*************************** asmlib.h ***************************************
* Author: Agner Fog
* Date created: 2003-12-12
* Last modified: 2013-10-04
* Project: asmlib.zip
* Source URL: www.agner.org/optimize
*
* Description:
* Header file for the asmlib function library.
* This library is available in many versions for different platforms.
* See asmlib-instructions.pdf for details.
*
* (c) Copyright 2003 - 2013 by Agner Fog.
* GNU General Public License http://www.gnu.org/licenses/gpl.html
*****************************************************************************/
#ifndef ASMLIB_H
#define ASMLIB_H
/***********************************************************************
Define compiler-specific types and directives
***********************************************************************/
// Define type size_t
#ifndef _SIZE_T_DEFINED
#include "stddef.h"
#endif
// Define integer types with known size: int32_t, uint32_t, int64_t, uint64_t.
// If this doesn't work then insert compiler-specific definitions here:
#if defined(__GNUC__) || (defined(_MSC_VER) && _MSC_VER >= 1600)
// Compilers supporting C99 or C++0x have stdint.h defining these integer types
#include <stdint.h>
#define INT64_SUPPORTED // Remove this if the compiler doesn't support 64-bit integers
#elif defined(_MSC_VER)
// Older Microsoft compilers have their own definition
typedef signed __int16 int16_t;
typedef unsigned __int16 uint16_t;
typedef signed __int32 int32_t;
typedef unsigned __int32 uint32_t;
typedef signed __int64 int64_t;
typedef unsigned __int64 uint64_t;
#define INT64_SUPPORTED // Remove this if the compiler doesn't support 64-bit integers
#else
// This works with most compilers
typedef signed short int int16_t;
typedef unsigned short int uint16_t;
typedef signed int int32_t;
typedef unsigned int uint32_t;
typedef long long int64_t;
typedef unsigned long long uint64_t;
#define INT64_SUPPORTED // Remove this if the compiler doesn't support 64-bit integers
#endif
// Turn off name mangling
#ifdef __cplusplus
extern "C" {
#endif
/***********************************************************************
Function prototypes, memory and string functions
***********************************************************************/
void * A_memcpy (void * dest, const void * src, size_t count); // Copy count bytes from src to dest
void * A_memmove(void * dest, const void * src, size_t count); // Same as memcpy, allows overlap between src and dest
void * A_memset (void * dest, int c, size_t count); // Set count bytes in dest to (char)c
int A_memcmp (const void * buf1, const void * buf2, size_t num); // Compares two blocks of memory
size_t GetMemcpyCacheLimit(void); // Data blocks bigger than this will be copied uncached by memcpy and memmove
void SetMemcpyCacheLimit(size_t); // Change limit in GetMemcpyCacheLimit
size_t GetMemsetCacheLimit(void); // Data blocks bigger than this will be stored uncached by memset
void SetMemsetCacheLimit(size_t); // Change limit in GetMemsetCacheLimit
char * A_strcat (char * dest, const char * src); // Concatenate strings dest and src. Store result in dest
char * A_strcpy (char * dest, const char * src); // Copy string src to dest
size_t A_strlen (const char * str); // Get length of zero-terminated string
int A_strcmp (const char * a, const char * b); // Compare strings. Case sensitive
int A_stricmp (const char *string1, const char *string2); // Compare strings. Case insensitive for A-Z only
char * A_strstr (char * haystack, const char * needle); // Search for substring in string
void A_strtolower(char * string); // Convert string to lower case for A-Z only
void A_strtoupper(char * string); // Convert string to upper case for a-z only
size_t A_substring(char * dest, const char * source, size_t pos, size_t len); // Copy a substring for source into dest
size_t A_strspn (const char * str, const char * set); // Find span of characters that belong to set
size_t A_strcspn(const char * str, const char * set); // Find span of characters that don't belong to set
size_t strCountInSet(const char * str, const char * set); // Count characters that belong to set
size_t strcount_UTF8(const char * str); // Counts the number of characters in a UTF-8 encoded string
/***********************************************************************
Function prototypes, miscellaneous functions
***********************************************************************/
uint32_t A_popcount(uint32_t x); // Count 1-bits in 32-bit integer
int RoundD (double x); // Round to nearest or even
int RoundF (float x); // Round to nearest or even
int InstructionSet(void); // Tell which instruction set is supported
char * ProcessorName(void); // ASCIIZ text describing microprocessor
void CpuType(int * vendor, int * family, int * model); // Get CPU vendor, family and model
size_t DataCacheSize(int level); // Get size of data cache
void A_DebugBreak(void); // Makes a debug breakpoint
#ifdef INT64_SUPPORTED
uint64_t ReadTSC(void); // Read microprocessor internal clock (64 bits)
#else
uint32_t ReadTSC(void); // Read microprocessor internal clock (only 32 bits supported by compiler)
#endif
void cpuid_ex (int abcd[4], int eax, int ecx); // call CPUID instruction
static inline void cpuid_abcd (int abcd[4], int eax) {
cpuid_ex(abcd, eax, 0);}
#ifdef __cplusplus
} // end of extern "C"
// Define overloaded versions if compiling as C++
static inline int Round (double x) { // Overload name Round
return RoundD(x);}
static inline int Round (float x) { // Overload name Round
return RoundF(x);}
static inline const char * A_strstr(const char * haystack, const char * needle) {
return A_strstr((char*)haystack, needle);} // Overload A_strstr with const char * version
#endif // __cplusplus
/***********************************************************************
Function prototypes, integer division functions
***********************************************************************/
// Turn off name mangling
#ifdef __cplusplus
extern "C" {
#endif
void setdivisori32(int buffer[2], int d); // Set divisor for repeated division
int dividefixedi32(const int buffer[2], int x); // Fast division with previously set divisor
void setdivisoru32(uint32_t buffer[2], uint32_t d); // Set divisor for repeated division
uint32_t dividefixedu32(const uint32_t buffer[2], uint32_t x); // Fast division with previously set divisor
// Test if emmintrin.h is included and __m128i defined
#if defined(__GNUC__) && defined(_EMMINTRIN_H_INCLUDED) && !defined(__SSE2__)
#error Please compile with -sse2 or higher
#endif
#if defined(_INCLUDED_EMM) || (defined(_EMMINTRIN_H_INCLUDED) && defined(__SSE2__))
#define VECTORDIVISIONDEFINED
// Integer vector division functions. These functions divide an integer vector by a scalar:
// Set divisor for repeated integer vector division
void setdivisorV8i16(__m128i buf[2], int16_t d); // Set divisor for repeated division
void setdivisorV8u16(__m128i buf[2], uint16_t d); // Set divisor for repeated division
void setdivisorV4i32(__m128i buf[2], int32_t d); // Set divisor for repeated division
void setdivisorV4u32(__m128i buf[2], uint32_t d); // Set divisor for repeated division
// Fast division of vector by previously set divisor
__m128i dividefixedV8i16(const __m128i buf[2], __m128i x); // Fast division with previously set divisor
__m128i dividefixedV8u16(const __m128i buf[2], __m128i x); // Fast division with previously set divisor
__m128i dividefixedV4i32(const __m128i buf[2], __m128i x); // Fast division with previously set divisor
__m128i dividefixedV4u32(const __m128i buf[2], __m128i x); // Fast division with previously set divisor
#endif // defined(_INCLUDED_EMM) || (defined(_EMMINTRIN_H_INCLUDED) && defined(__SSE2__))
#ifdef __cplusplus
} // end of extern "C"
#endif // __cplusplus
#ifdef __cplusplus
// Define classes and operator '/' for fast division with fixed divisor
class div_i32;
class div_u32;
static inline int32_t operator / (int32_t x, div_i32 const &D);
static inline uint32_t operator / (uint32_t x, div_u32 const & D);
class div_i32 { // Signed 32 bit integer division
public:
div_i32() { // Default constructor
buffer[0] = buffer[1] = 0;
}
div_i32(int d) { // Constructor with divisor
setdivisor(d);
}
void setdivisor(int d) { // Set divisor
setdivisori32(buffer, d);
}
protected:
int buffer[2]; // Internal memory
friend int32_t operator / (int32_t x, div_i32 const & D);
};
static inline int32_t operator / (int32_t x, div_i32 const &D){// Overloaded operator '/'
return dividefixedi32(D.buffer, x);
}
static inline int32_t operator /= (int32_t &x, div_i32 const &D){// Overloaded operator '/='
return x = x / D;
}
class div_u32 { // Unsigned 32 bit integer division
public:
div_u32() { // Default constructor
buffer[0] = buffer[1] = 0;
}
div_u32(uint32_t d) { // Constructor with divisor
setdivisor(d);
}
void setdivisor(uint32_t d) { // Set divisor
setdivisoru32(buffer, d);
}
protected:
uint32_t buffer[2]; // Internal memory
friend uint32_t operator / (uint32_t x, div_u32 const & D);
};
static inline uint32_t operator / (uint32_t x, div_u32 const & D){ // Overloaded operator '/'
return dividefixedu32(D.buffer, x);
}
static inline uint32_t operator /= (uint32_t &x, div_u32 const &D){// Overloaded operator '/='
return x = x / D;
}
#endif // __cplusplus
#endif // ASMLIB_H

View File

@ -42,14 +42,9 @@ cpuinfo_t cpuinfo;
void Sys_CheckCpuInstructionsSupport(void)
{
unsigned int cpuid_data[4];
int cpuid_data[4];
// eax = 1, ecx = 0
#if defined(__GNUC__)
__get_cpuid(0x1, &cpuid_data[0], &cpuid_data[1], &cpuid_data[2], &cpuid_data[3]);
#else //__GNUC__
__cpuidex((int*)cpuid_data, 1, 0);
#endif //__GNUC__
cpuid_ex(cpuid_data, 1, 0);
cpuinfo.sse3 = (cpuid_data[2] & SSE3_FLAG) ? 1 : 0; // ecx
cpuinfo.ssse3 = (cpuid_data[2] & SSSE3_FLAG) ? 1 : 0;
@ -57,12 +52,7 @@ void Sys_CheckCpuInstructionsSupport(void)
cpuinfo.sse4_2 = (cpuid_data[2] & SSE4_2_FLAG) ? 1 : 0;
cpuinfo.avx = (cpuid_data[2] & AVX_FLAG) ? 1 : 0;
// eax = 7, ecx = 0
#if defined(__GNUC__)
__get_cpuid(0x7, &cpuid_data[0], &cpuid_data[1], &cpuid_data[2], &cpuid_data[3]);
#else //__GNUC__
__cpuidex((int*)cpuid_data, 7, 0);
#endif //__GNUC__
cpuid_ex(cpuid_data, 7, 0);
cpuinfo.avx2 = (cpuid_data[1] & AVX2_FLAG) ? 1 : 0; // ebx
}

View File

@ -5,6 +5,7 @@
#include "osconfig.h"
#include "archtypes.h"
#include "asmlib.h"
#include "mathlib.h"
#include "sys_shared.h"

View File

@ -1,9 +1,9 @@
#include "precompiled.h"
cvar_t sv_rehlds_movecmdrate_max_avg = { "sv_rehlds_movecmdrate_max_avg", "750", 0, 750.0f, NULL };
cvar_t sv_rehlds_movecmdrate_max_burst = { "sv_rehlds_movecmdrate_max_burst", "3500", 0, 3500.0f, NULL };
cvar_t sv_rehlds_stringcmdrate_max_avg = {"sv_rehlds_stringcmdrate_max_avg", "32", 0, 32.0f, NULL};
cvar_t sv_rehlds_stringcmdrate_max_burst = {"sv_rehlds_stringcmdrate_max_burst", "80", 0, 80.0f, NULL};
cvar_t sv_rehlds_movecmdrate_max_burst = { "sv_rehlds_movecmdrate_max_burst", "1125", 0, 1125.0f, NULL };
cvar_t sv_rehlds_stringcmdrate_max_avg = {"sv_rehlds_stringcmdrate_max_avg", "64", 0, 32.0f, NULL};
cvar_t sv_rehlds_stringcmdrate_max_burst = {"sv_rehlds_stringcmdrate_max_burst", "96", 0, 80.0f, NULL};
CMoveCommandRateLimiter g_MoveCommandRateLimiter;
CStringCommandsRateLimiter g_StringCommandsRateLimiter;