From a44d20b26b35a2d9846f8f15fa71a0aab30013f8 Mon Sep 17 00:00:00 2001 From: Arkshine Date: Mon, 7 Jul 2014 20:49:38 +0200 Subject: [PATCH] Regex: Add regex_match_all_c and regex_match_all natives (by Nextra) --- dlls/regex/CRegEx.cpp | 102 ++++++++++++++++++++++++++++++++------ dlls/regex/CRegEx.h | 13 ++++- dlls/regex/module.cpp | 90 +++++++++++++++++++++++---------- plugins/include/regex.inc | 57 ++++++++++++++++++++- 4 files changed, 219 insertions(+), 43 deletions(-) diff --git a/dlls/regex/CRegEx.cpp b/dlls/regex/CRegEx.cpp index efa374c4..4073621f 100755 --- a/dlls/regex/CRegEx.cpp +++ b/dlls/regex/CRegEx.cpp @@ -42,7 +42,7 @@ RegEx::RegEx() re = NULL; mFree = true; subject = NULL; - mSubStrings = 0; + mSubStrings.clear(); } void RegEx::Clear() @@ -54,9 +54,9 @@ void RegEx::Clear() re = NULL; mFree = true; if (subject) - delete [] subject; + delete[] subject; subject = NULL; - mSubStrings = 0; + mSubStrings.clear(); } RegEx::~RegEx() @@ -152,11 +152,11 @@ int RegEx::Match(const char *str) if (mFree || re == NULL) return -1; - + this->ClearMatch(); //save str - subject = new char[strlen(str)+1]; + subject = new char[strlen(str) + 1]; strcpy(subject, str); rc = pcre_exec(re, NULL, subject, (int)strlen(subject), 0, 0, ovector, 30); @@ -166,37 +166,110 @@ int RegEx::Match(const char *str) if (rc == PCRE_ERROR_NOMATCH) { return 0; - } else { + } + else { mErrorOffset = rc; return -1; } } - mSubStrings = rc; + RegExSub res; + mSubStrings.ensure(rc); + + for (int s = 0; s < rc; ++s) + { + res.start = ovector[2 * s]; + res.end = ovector[2 * s + 1]; + mSubStrings.append(res); + } return 1; } + +int RegEx::MatchAll(const char *str) +{ + int rc = 0; + int rr = 0; + int offset = 0; + + if (mFree || re == NULL) + return -1; + + this->ClearMatch(); + + //save str + subject = new char[strlen(str) + 1]; + strcpy(subject, str); + + RegExSub sub, whole; + while ((rr = pcre_exec(re, NULL, subject, (int)strlen(subject), offset, 0, ovector, 30))) + { + if (rr < 0) + { + if (rr == PCRE_ERROR_NOMATCH) + { + break; + } + else + { + mErrorOffset = rr; + + if (rc) + this->ClearMatch(); + + return -1; + } + } + + rc += rr; + mSubStrings.ensure(rc); + + for (int s = 1; s < rr; ++s) + { + sub.start = ovector[2 * s]; + sub.end = ovector[2 * s + 1]; + mSubStrings.append(sub); + } + + offset = ovector[1]; + } + + if (!rc) + return 0; + + sub = mSubStrings.at(0); + whole.start = sub.start; + sub = mSubStrings.back(); + whole.end = sub.end; + + mSubStrings.insert(0, whole); + + return 1; +} + void RegEx::ClearMatch() { // Clears match results mErrorOffset = 0; mError = NULL; if (subject) - delete [] subject; + delete[] subject; subject = NULL; - mSubStrings = 0; + mSubStrings.clear(); } const char *RegEx::GetSubstring(int s, char buffer[], int max) { int i = 0; - if (s >= mSubStrings || s < 0) + if ((size_t)s >= mSubStrings.length() || s < 0) return NULL; - char *substr_a = subject + ovector[2*s]; - int substr_l = ovector[2*s+1] - ovector[2*s]; + RegExSub sub = mSubStrings.at(s); - for (i = 0; i= max) break; @@ -206,5 +279,4 @@ const char *RegEx::GetSubstring(int s, char buffer[], int max) buffer[i] = '\0'; return buffer; -} - +} \ No newline at end of file diff --git a/dlls/regex/CRegEx.h b/dlls/regex/CRegEx.h index 498ab929..ed88824f 100755 --- a/dlls/regex/CRegEx.h +++ b/dlls/regex/CRegEx.h @@ -32,29 +32,40 @@ */ #ifndef _INCLUDE_CREGEX_H #define _INCLUDE_CREGEX_H + +#include class RegEx { public: + struct RegExSub { + int start, end; + }; + RegEx(); ~RegEx(); + bool isFree(bool set=false, bool val=false); void Clear(); int Compile(const char *pattern, const char* flags = NULL); int Compile(const char *pattern, int iFlags); int Match(const char *str); + int MatchAll(const char *str); void ClearMatch(); const char *GetSubstring(int s, char buffer[], int max); + public: int mErrorOffset; const char *mError; - int mSubStrings; + int Count() { return mSubStrings.length(); } + private: pcre *re; bool mFree; int ovector[30]; char *subject; + ke::Vector mSubStrings; }; #endif //_INCLUDE_CREGEX_H diff --git a/dlls/regex/module.cpp b/dlls/regex/module.cpp index f175ce6d..d3048a1b 100755 --- a/dlls/regex/module.cpp +++ b/dlls/regex/module.cpp @@ -79,7 +79,7 @@ static cell AMX_NATIVE_CALL regex_compile_ex(AMX *amx, cell *params) { int len; const char *regex = MF_GetAmxString(amx, params[1], 0, &len); - + int id = GetPEL(); RegEx *x = PEL[id]; @@ -95,9 +95,7 @@ static cell AMX_NATIVE_CALL regex_compile_ex(AMX *amx, cell *params) return id + 1; } -// 1.8 includes the last parameter -// Regex:regex_match(const string[], const pattern[], &ret, error[], maxLen, const flags[] = ""); -static cell AMX_NATIVE_CALL regex_match(AMX *amx, cell *params) +cell match(AMX *amx, cell *params, bool all) { int len; const char *str = MF_GetAmxString(amx, params[1], 0, &len); @@ -105,24 +103,29 @@ static cell AMX_NATIVE_CALL regex_match(AMX *amx, cell *params) int id = GetPEL(); RegEx *x = PEL[id]; - + char* flags = NULL; - + if ((params[0] / sizeof(cell)) >= 6) // compiled with 1.8's extra parameter { flags = MF_GetAmxString(amx, params[6], 2, &len); } - + if (x->Compile(regex, flags) == 0) { cell *eOff = MF_GetAmxAddr(amx, params[3]); const char *err = x->mError; *eOff = x->mErrorOffset; - MF_SetAmxString(amx, params[4], err?err:"unknown", params[5]); + MF_SetAmxString(amx, params[4], err ? err : "unknown", params[5]); return -1; } - int e = x->Match(str); + int e; + if (all) + e = x->MatchAll(str); + else + e = x->Match(str); + if (e == -1) { /* there was a match error. destroy this and move on. */ @@ -130,24 +133,38 @@ static cell AMX_NATIVE_CALL regex_match(AMX *amx, cell *params) *res = x->mErrorOffset; x->Clear(); return -2; - } else if (e == 0) { + } + else if (e == 0) { cell *res = MF_GetAmxAddr(amx, params[3]); *res = 0; x->Clear(); return 0; - } else { + } + else { cell *res = MF_GetAmxAddr(amx, params[3]); - *res = x->mSubStrings; + *res = x->Count(); } - return id+1; + return id + 1; } -// native regex_match_c(const string[], Regex:id, &ret); -static cell AMX_NATIVE_CALL regex_match_c(AMX *amx, cell *params) +// 1.8 includes the last parameter +// Regex:regex_match(const string[], const pattern[], &ret, error[], maxLen, const flags[] = ""); +static cell AMX_NATIVE_CALL regex_match(AMX *amx, cell *params) +{ + return match(amx, params, false); +} + +// Regex:regex_match_all(const string[], const pattern[], &ret, error[], maxLen, const flags[] = ""); +static cell AMX_NATIVE_CALL regex_match_all(AMX *amx, cell *params) +{ + return match(amx, params, true); +} + +cell match_c(AMX *amx, cell *params, bool all) { int len; - int id = params[2]-1; + int id = params[2] - 1; const char *str = MF_GetAmxString(amx, params[1], 0, &len); if (id >= (int)PEL.length() || id < 0 || PEL[id]->isFree()) @@ -155,31 +172,50 @@ static cell AMX_NATIVE_CALL regex_match_c(AMX *amx, cell *params) MF_LogError(amx, AMX_ERR_NATIVE, "Invalid regex handle %d", id); return 0; } - + RegEx *x = PEL[id]; - int e = x->Match(str); + int e; + if (all) + e = x->MatchAll(str); + else + e = x->Match(str); + if (e == -1) { /* there was a match error. move on. */ cell *res = MF_GetAmxAddr(amx, params[3]); *res = x->mErrorOffset; /* only clear the match results, since the regex object - may still be referenced later */ + may still be referenced later */ x->ClearMatch(); return -2; - } else if (e == 0) { + } + else if (e == 0) { cell *res = MF_GetAmxAddr(amx, params[3]); *res = 0; /* only clear the match results, since the regex object - may still be referenced later */ + may still be referenced later */ x->ClearMatch(); return 0; - } else { - cell *res = MF_GetAmxAddr(amx, params[3]); - *res = x->mSubStrings; - return x->mSubStrings; } + else { + cell *res = MF_GetAmxAddr(amx, params[3]); + *res = x->Count(); + return x->Count(); + } +} + +// native regex_match_c(const string[], Regex:id, &ret); +static cell AMX_NATIVE_CALL regex_match_c(AMX *amx, cell *params) +{ + return match_c(amx, params, false); +} + +// native regex_match_all_c(const string[], Regex:id, &ret); +static cell AMX_NATIVE_CALL regex_match_all_c(AMX *amx, cell *params) +{ + return match_c(amx, params, true); } // native regex_match_ex(Regex:id, const string[], &RegexError:ret = REGEX_ERROR_NONE); @@ -219,7 +255,7 @@ static cell AMX_NATIVE_CALL regex_match_ex(AMX *amx, cell *params) } else { - return x->mSubStrings; + return x->Count(); } } @@ -270,6 +306,8 @@ AMX_NATIVE_INFO regex_Natives[] = { {"regex_match", regex_match}, {"regex_match_c", regex_match_c}, {"regex_match_ex", regex_match_ex}, + {"regex_match_all", regex_match_all}, + {"regex_match_all_c", regex_match_all_c}, {"regex_substr", regex_substr}, {"regex_free", regex_free}, {NULL, NULL}, diff --git a/plugins/include/regex.inc b/plugins/include/regex.inc index 976e25d1..32f9646c 100755 --- a/plugins/include/regex.inc +++ b/plugins/include/regex.inc @@ -266,6 +266,61 @@ native Regex:regex_compile_ex(const pattern[], flags = 0, error[]= "", maxLen = */ native regex_match_ex(Handle:regex, const str[], &RegexError:ret = REGEX_ERROR_NONE); +/** + * Matches a string against a pre-compiled regular expression pattern, matching all + * occurances of the pattern inside the string. This is similar to using the "g" flag + * in perl regex. + * + * + * @param pattern The regular expression pattern. + * @param string The string to check. + * @param ret Error code, if applicable, or number of results on success. + * + * @return -2 = Matching error (error code is stored in ret) + * 0 = No match. + * >1 = Number of results. + * + * @note You should free the returned handle (with regex_free()) + * when you are done with this pattern. + * + * @note Use the regex handle passed to this function to extract + * matches with regex_substr(). + */ +native regex_match_all_c(const string[], Regex:pattern, &ret); + +/** + * Matches a string against a regular expression pattern, matching all occurances of the + * pattern inside the string. This is similar to using the "g" flag in perl regex. + * + * @note If you intend on using the same regular expression pattern + * multiple times, consider using regex_compile and regex_match_c + * instead of making this function reparse the expression each time. + * + * @param string The string to check. + * @param pattern The regular expression pattern. + * @param ret Error code, or result state of the match. + * @param error Error message, if applicable. + * @param maxLen Maximum length of the error buffer. + * @param flags General flags for the regular expression. + * i = Ignore case + * m = Multilines (affects ^ and $ so that they match + * the start/end of a line rather than matching the + * start/end of the string). + * s = Single line (affects . so that it matches any character, + * even new line characters). + * x = Pattern extension (ignore whitespace and # comments). + * + * @return -2 = Matching error (error code is stored in ret) + * -1 = Error in pattern (error message and offset # in error and ret) + * 0 = No match. + * >1 = Handle for getting more information (via regex_substr) + * + * @note Flags only exist in amxmodx 1.8 and later. + * @note You should free the returned handle (with regex_free()) + * when you are done extracting all of the substrings. + */ +native Regex:regex_match_all(const string[], const pattern[], &ret, error[], maxLen, const flags[] = ""); + /** * Matches a string against a regular expression pattern. * @@ -298,4 +353,4 @@ stock regex_match_simple(const str[], const pattern[], flags = 0, error[]="", ma regex_free(regex); return substrings; -} +} \ No newline at end of file