From c52943b03c89f09fc0dd37172851f489d926e368 Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Tue, 15 Apr 2008 05:36:50 +0000 Subject: [PATCH] oops, forgot a file i hope this broke the nightlies --- amxmodx/sm_trie_tpl.h | 1102 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1102 insertions(+) create mode 100644 amxmodx/sm_trie_tpl.h diff --git a/amxmodx/sm_trie_tpl.h b/amxmodx/sm_trie_tpl.h new file mode 100644 index 00000000..61b8f1e5 --- /dev/null +++ b/amxmodx/sm_trie_tpl.h @@ -0,0 +1,1102 @@ +/** + * vim: set ts=4 : + * ============================================================================= + * SourceMod + * Copyright (C) 2004-2008 AlliedModders LLC. All rights reserved. + * ============================================================================= + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License, version 3.0, as published by the + * Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + * + * As a special exception, AlliedModders LLC gives you permission to link the + * code of this program (as well as its derivative works) to "Half-Life 2," the + * "Source Engine," the "SourcePawn JIT," and any Game MODs that run on software + * by the Valve Corporation. You must obey the GNU General Public License in + * all respects for all other code used. Additionally, AlliedModders LLC grants + * this exception to all derivative works. AlliedModders LLC defines further + * exceptions, found in LICENSE.txt (as of this writing, version JULY-31-2007), + * or . + * + * Version: $Id$ + */ + +#ifndef _INCLUDE_SOURCEMOD_TEMPLATED_TRIE_H_ +#define _INCLUDE_SOURCEMOD_TEMPLATED_TRIE_H_ + +#include +#include +#include +#include + +enum NodeType +{ + Node_Unused = 0, /* Node is not being used (sparse) */ + Node_Arc, /* Node is part of an arc and does not terminate */ + Node_Term, /* Node is a terminator */ +}; + +/** + * @brief Trie class for storing key/value pairs, based on double array tries. + * @file sm_trie_tpl.h + * + * For full works cited and implementation overview, there is a big comment + * block at the bottom of this file. + */ + +template +class KTrie +{ + class KTrieNode; +public: + /** + * @brief Clears all set objects in the trie. + */ + void clear() + { + run_destructors(); + internal_clear(); + } + + /** + * @brief Removes a key from the trie. + * + * @param key Key to remove. + * @return True on success, false if key was never set. + */ + bool remove(const char *key) + { + KTrieNode *node = internal_retrieve(key); + if (!node || !node->valset) + { + return false; + } + + node->value.~K(); + node->valset = false; + + m_numElements--; + + return true; + } + + /** + * @brief Retrieves a pointer to the object stored at a given key. + * + * @param key Key to retrieve. + * @return Pointer to object, or NULL if key was not found or not set. + */ + K * retrieve(const char *key) + { + KTrieNode *node = internal_retrieve(key); + if (!node || !node->valset) + { + return NULL; + } + return &node->value; + } + + /** + * @brief Inserts or updates the object stored at a key. + * + * @param key Key to update or insert. + * @param obj Object to store at the key. + * @return True on success, false on failure. + */ + bool replace(const char *key, const K & obj) + { + KTrieNode *prev_node = internal_retrieve(key); + if (!prev_node) + { + return insert(key, obj); + } + + if (prev_node->valset) + { + prev_node->value.~K(); + } + + new (&prev_node->value) K(obj); + + return true; + } + + /** + * @brief Inserts an object at a key. + * + * @param key Key to insert at. + * @param obj Object to store at the key. + * @return True on success, false if the key is already set or + * insertion otherwise failed. + */ + bool insert(const char *key, const K & obj) + { + unsigned int lastidx = 1; /* the last node index */ + unsigned int curidx; /* current node index */ + const char *keyptr = key; /* input stream at current token */ + KTrieNode *node = NULL; /* current node being processed */ + KTrieNode *basenode = NULL; /* current base node being processed */ + unsigned int q; /* temporary var for x_check results */ + unsigned int curoffs; /* current offset */ + + /** + * Empty strings are a special case, since there are no productions. We could + * probably rework it to use BASE[0] but this hack is easier. + */ + if (*key == '\0') + { + if (m_empty != NULL && m_empty->valset) + { + return false; + } + + if (m_empty == NULL) + { + m_empty = (KTrieNode *)malloc(sizeof(KTrieNode)); + } + + m_empty->valset = true; + new (&m_empty->value) K(obj); + + m_numElements++; + + return true; + } + + /* Start traversing at the root node (1) */ + do + { + /* Find where the next character is, then advance */ + curidx = m_base[lastidx].idx; + basenode = &m_base[curidx]; + curoffs = charval(*keyptr); + curidx += curoffs; + node = &m_base[curidx]; + keyptr++; + + /* Check if this slot is supposed to be empty. If so, we need to handle CASES 1/2: + * Insertion without collisions + */ + if ( (curidx > m_baseSize) || (node->mode == Node_Unused) ) + { + if (curidx > m_baseSize) + { + if (!grow()) + { + return false; + } + node = &m_base[curidx]; + } + node->parent = lastidx; + if (*keyptr == '\0') + { + node->mode = Node_Arc; + } + else + { + node->idx = x_addstring(keyptr); + node->mode = Node_Term; + } + node->valset = true; + new (&node->value) K(obj); + + m_numElements++; + + return true; + } + else if (node->parent != lastidx) + { + /* Collision! We have to split up the tree here. CASE 4: + * Insertion when a new word is inserted with a collision. + * NOTE: This is the hardest case to handle. All below examples are based on: + * BACHELOR, BADGE, inserting BABY. + * The problematic production here is A -> B, where B is already being used. + * + * This process has to rotate one half of the 'A' arc. We generate two lists: + * Outgoing Arcs - Anything leaving this 'A' + * Incoming Arcs - Anything going to this 'A' + * Whichever list is smaller will be moved. Note that this works because the intersection + * affects both arc chains, and moving one will make the slot available to either. + */ + KTrieNode *cur; + + /* Find every node arcing from the last node. + * I.e. for BACHELOR, BADGE, BABY, + * The arcs leaving A will be C and D, but our current node is B -> *. + * Thus, we use the last index (A) to find the base for arcs leaving A. + */ + unsigned int outgoing_base = m_base[lastidx].idx; + unsigned int outgoing_list[256]; + unsigned int outgoing_count = 0; /* count the current index here */ + cur = &m_base[outgoing_base] + 1; + unsigned int outgoing_limit = 255; + + if (outgoing_base + outgoing_limit > m_baseSize) + { + outgoing_limit = m_baseSize - outgoing_base; + } + + for (unsigned int i=1; i<=outgoing_limit; i++,cur++) + { + if (cur->mode == Node_Unused || cur->parent != lastidx) + { + continue; + } + outgoing_list[outgoing_count++] = i; + } + outgoing_list[outgoing_count++] = curidx - outgoing_base; + + /* Now we need to find all the arcs leaving our parent... + * Note: the inconsistency is the base of our parent. + */ + assert(m_base[node->parent].mode == Node_Arc); + unsigned int incoming_list[256]; + unsigned int incoming_base = m_base[node->parent].idx; + unsigned int incoming_count = 0; + unsigned int incoming_limit = 255; + cur = &m_base[incoming_base] + 1; + + if (incoming_base + incoming_limit > m_baseSize) + { + incoming_limit = m_baseSize - incoming_base; + } + + assert(incoming_limit > 0 && incoming_limit <= 255); + + for (unsigned int i=1; i<=incoming_limit; i++,cur++) + { + if (cur->mode == Node_Arc || cur->mode == Node_Term) + { + if (cur->parent == node->parent) + { + incoming_list[incoming_count++] = i; + } + } + } + + if (incoming_count < outgoing_count + 1) + { + unsigned int q = x_check_multi(incoming_list, incoming_count); + + node = &m_base[curidx]; + + /* If we're incoming, we need to modify our parent */ + m_base[node->parent].idx = q; + + /* For each node in the "to move" list, + * Relocate the node's info to the new position. + */ + unsigned int idx, newidx, oldidx; + for (unsigned int i=0; i 255) + { + outgoing_limit = 255; + } + for (unsigned int j=1; j<=outgoing_limit; j++, check_base++) + { + if (check_base->parent == oldidx) + { + check_base->parent = newidx; + } + } + } + } + } + else + { + unsigned int q = x_check_multi(outgoing_list, outgoing_count); + + node = &m_base[curidx]; + + /* If we're outgoing, we need to modify our own base */ + m_base[lastidx].idx = q; + + /* Take the last index (curidx) out of the list. Technically we are not moving this, + * since it's already being used by something else. + */ + outgoing_count--; + + /* For each node in the "to move" list, + * Relocate the node's info to the new position. + */ + unsigned int idx, newidx, oldidx; + for (unsigned int i=0; i 255) + { + outgoing_limit = 255; + } + for (unsigned int j=1; j<=outgoing_limit; j++, check_base++) + { + if (check_base->parent == oldidx) + { + check_base->parent = newidx; + } + } + } + } + + /* Take the invisible node and use it as our new node */ + node = &m_base[q + outgoing_list[outgoing_count]]; + } + + /* We're finally done! */ + node->parent = lastidx; + if (*keyptr == '\0') + { + node->mode = Node_Arc; + } + else + { + node->idx = x_addstring(keyptr); + node->mode = Node_Term; + } + node->valset = true; + new (&node->value) K(obj); + + m_numElements++; + + return true; + } + else + { + /* See what's in the next node - special case if terminator! */ + if (node->mode == Node_Term) + { + /* If we're a terminator, we need to handle CASE 3: + * Insertion when a terminating collision occurs + */ + char *term = &m_stringtab[node->idx]; + /* Do an initial browsing to make sure they're not the same string */ + if (strcmp(keyptr, term) == 0) + { + if (!node->valset) + { + node->valset = true; + new (&node->value) K(obj); + m_numElements++; + return true; + } + /* Same string. We can't insert. */ + return false; + } + /* For each matching character pair, we need to disband the terminator. + * This splits the similar prefix into a single arc path. + * First, save the old values so we can move them to a new node. + * Next, for each loop: + * Take the current (invalid) node, and point it to the next arc base. + * Set the current node to the node at the next arc. + */ + K oldvalue; + bool oldvalset = node->valset; + if (oldvalset) + { + oldvalue = node->value; + } + if (*term == *keyptr) + { + while (*term == *keyptr) + { + /* Find the next free slot in the check array. + * This is the "vector base" essentially + */ + q = x_check(*term); + node = &m_base[curidx]; + /* Point the node to the next new base */ + node->idx = q; + node->mode = Node_Arc; + if (node->valset == true) + { + node->value.~K(); + node->valset = false; + } + /* Advance the input stream and local variables */ + lastidx = curidx; + curidx = q + charval(*term); + node = &m_base[curidx]; + /* Make sure the new current node has its parent set. */ + node->parent = lastidx; + node->mode = Node_Arc; /* Just in case we run x_check again */ + *term = '\0'; /* Unmark the string table here */ + term++; + keyptr++; + } + } + else if (node->valset) + { + node->valset = false; + node->value.~K(); + } + /* We're done inserting new pairs. If one of them is exhausted, + * we take special shortcuts. + */ + if (*term == '\0') //EX: BADGERHOUSE added over B -> ADGER. + { + /* First backpatch the current node - it ends the newly split terminator. + * In the example, this would mean the node is the production from R -> ? + * This node ends the old BADGER, so we set it here. + */ + node->valset = oldvalset; + if (node->valset) + { + new (&node->value) K(oldvalue); + } + + /* The terminator was split up, but pieces of keyptr remain. + * We need to generate a new production, in this example, R -> H, + * with H being a terminator to OUSE. Thus we get: + * B,A,D,G,E,R*,H*->OUSE (* = value set). + * NOTE: parent was last set at the end of the while loop. + */ + /* Get the new base and apply re-basing */ + q = x_check(*keyptr); + node = &m_base[curidx]; + + node->idx = q; + node->mode = Node_Arc; + lastidx = curidx; + /* Finish the final node */ + curidx = q + charval(*keyptr); + node = &m_base[curidx]; + keyptr++; + /* Optimize - don't add to string table if there's nothing more to eat */ + if (*keyptr == '\0') + { + node->mode = Node_Arc; + } + else + { + node->idx = x_addstring(keyptr); + node->mode = Node_Term; + } + node->parent = lastidx; + node->valset = true; + new (&node->value) K(obj); + } + else if (*keyptr == '\0') + { //EX: BADGER added over B -> ADGERHOUSE + /* First backpatch the current node - it ends newly split input string. + * This is the exact opposite of the above procedure. + */ + node->valset = true; + new (&node->value) K(obj); + + /* Get the new base and apply re-basing */ + q = x_check(*term); + node = &m_base[curidx]; + + node->idx = q; + node->mode = Node_Arc; + lastidx = curidx; + /* Finish the final node */ + curidx = q + charval(*term); + node = &m_base[curidx]; + term++; + /* Optimize - don't add to string table if there's nothing more to eat */ + if (*term == '\0') + { + node->mode = Node_Arc; + } + else + { + node->idx = (term - m_stringtab); /* Already in the string table! */ + node->mode = Node_Term; + } + node->parent = lastidx; + node->valset = oldvalset; + if (node->valset) + { + new (&node->value) K(oldvalue); + } + } + else + { + /* Finally, we have to create two new nodes instead of just one. */ + node->mode = Node_Arc; + + /* Get the new base and apply re-basing */ + q = x_check2(*keyptr, *term); + node = &m_base[curidx]; + + node->idx = q; + lastidx = curidx; + + /* Re-create the old terminated node */ + curidx = q + charval(*term); + node = &m_base[curidx]; + term++; + node->valset = oldvalset; + if (node->valset) + { + new (&node->value) K(oldvalue); + } + node->parent = lastidx; + if (*term == '\0') + { + node->mode = Node_Arc; + } + else + { + node->mode = Node_Term; + node->idx = (term - m_stringtab); /* Already in the string table! */ + } + + /* Create the new keyed input node */ + curidx = q + charval(*keyptr); + node = &m_base[curidx]; + keyptr++; + node->valset = true; + new (&node->value) K(obj); + node->parent = lastidx; + if (*keyptr == '\0') + { + node->mode = Node_Arc; + } + else + { + node->mode = Node_Term; + node->idx = x_addstring(keyptr); + } + } + + m_numElements++; + + /* Phew! */ + return true; + } + else + { + assert(node->mode == Node_Arc); + } + } + lastidx = curidx; + } while (*keyptr != '\0'); + + assert(node); + + /* If we've exhausted the string and we have a valid reached node, + * the production rule already existed. Make sure it's valid to set first. + */ + + /* We have to be an Arc. If the last result was anything else, we would have returned a new + * production earlier. + */ + assert(node->mode == Node_Arc); + + if (!node->valset) + { + node->valset = true; + new (&node->value) K(obj); + m_numElements++; + return true; + } + + return false; + } + + /** + * @brief Iterates over the trie returning all known values. + * + * Note: This function is for debugging. Do not use it as a + * production iterator since it's inefficient. Iteration is + * guaranteed to be sorted ascendingly. + * + * The callback function takes: + * (KTrie) - Pointer to this Trie + * (const char *) - String containing key name. + * (K &) - By-reference object at the key. + * (data) - User pointer. + * + * @param buffer Buffer to use as a key name cache. + * @param maxlength Maximum length of the key name buffer. + * @param data User pointer for passing to the iterator. + * @param func Iterator callback function. + */ + void bad_iterator(char *buffer, + size_t maxlength, + void *data, + void (*func)(KTrie *, const char *, K & obj, void *data)) + { + bad_iterator_r(buffer, maxlength, 0, data, func, 1); + } + +private: + void bad_iterator_r(char *buffer, + size_t maxlength, + size_t buf_pos, + void *data, + void (*func)(KTrie *, const char *, K & obj, void *data), + unsigned int root) + { + char *term; + unsigned int idx, limit, start; + + limit = 255; + start = m_base[root].idx; + + /* Bound our limits */ + if (start + limit > m_baseSize) + { + limit = m_baseSize - start; + } + + /* Search for strings */ + for (unsigned int i = 1; i <= limit; i++) + { + idx = start + i; + if (m_base[idx].mode == Node_Unused + || m_base[idx].parent != root) + { + continue; + } + + if (m_base[idx].mode == Node_Arc) + { + if (buf_pos < maxlength - 1) + { + buffer[buf_pos++] = (char)i; + } + + if (m_base[idx].valset) + { + buffer[buf_pos] = '\0'; + func(this, buffer, m_base[idx].value, data); + } + + bad_iterator_r(buffer, + maxlength, + buf_pos, + data, + func, + idx); + + buf_pos--; + } + else if (m_base[idx].mode == Node_Term + && m_base[idx].valset == true) + { + size_t save_buf_pos; + + save_buf_pos = buf_pos; + if (buf_pos < maxlength - 1) + { + buffer[buf_pos++] = (char)i; + } + if (buf_pos < maxlength - 1) + { + size_t destlen, j; + + term = &m_stringtab[m_base[idx].idx]; + destlen = strlen(term); + for (j = 0; + j < destlen && j + buf_pos < maxlength - 1; + j++) + { + buffer[buf_pos + j] = term[j]; + } + buf_pos += j; + } + buffer[buf_pos] = '\0'; + + func(this, buffer, m_base[idx].value, data); + + buf_pos = save_buf_pos; + } + } + } +public: + KTrie() + { + m_base = (KTrieNode *)malloc(sizeof(KTrieNode) * (256 + 1)); + m_stringtab = (char *)malloc(sizeof(char) * 256); + m_baseSize = 256; + m_stSize = 256; + m_empty = NULL; + m_numElements = 0; + + internal_clear(); + } + ~KTrie() + { + if (m_empty != NULL && m_empty->valset) + { + m_empty->value.~K(); + m_empty->valset = false; + } + free(m_empty); + run_destructors(); + free(m_base); + free(m_stringtab); + } + void run_destructor(void (*dtor)(K * ptr)) + { + for (size_t i = 0; i <= m_baseSize; i++) + { + if (m_base[i].valset) + { + dtor(&m_base[i].value); + m_base[i].valset = false; + } + } + } +private: + class KTrieNode + { + friend class KTrie; + private: + /** + * For Node_Arc, this index stores the 'base' offset to the next arc chain. + * I.e. to jump from this arc to character C, it will be at base[idx+C]. + * For Node_Term, this is an index into the string table. + */ + unsigned int idx; + + /** + * This contains the prior arc that we must have come from. + * For example, if arc 63 has a base jump of index 12, and we want to see if + * there is a valid character C, the parent of 12+C must be 63. + */ + unsigned int parent; + K value; /* Value associated with this node */ + NodeType mode; /* Current usage type of the node */ + bool valset; /* Whether or not a value is set */ + }; +private: + KTrieNode *internal_retrieve(const char *key) + { + unsigned int lastidx = 1; /* the last node index */ + unsigned int curidx; /* current node index */ + const char *keyptr = key; /* input stream at current token */ + KTrieNode *node = NULL; /* current node being processed */ + + if (!*key) + { + return m_empty; + } + + /* Start traversing at the root node */ + do + { + /* Find where the next character is, then advance */ + curidx = m_base[lastidx].idx; + node = &m_base[curidx]; + curidx += charval(*keyptr); + node = &m_base[curidx]; + keyptr++; + + /* Check if this slot is supposed to be empty or is a collision */ + if ((curidx > m_baseSize) || node->mode == Node_Unused || node->parent != lastidx) + { + return NULL; + } + else if (node->mode == Node_Term) + { + char *term = &m_stringtab[node->idx]; + if (strcmp(keyptr, term) == 0) + { + break; + } + else + { + return NULL; + } + } + lastidx = curidx; + } while (*keyptr != '\0'); + + return node; + } + bool grow() + { + /* The current # of nodes in the tree is trie->baseSize + 1 */ + unsigned int cur_size = m_baseSize; + unsigned int new_size = cur_size * 2; + + KTrieNode *new_base = (KTrieNode *)malloc((new_size + 1) * sizeof(KTrieNode)); + if (!new_base) + { + return false; + } + + memcpy(new_base, m_base, sizeof(KTrieNode) * (m_baseSize + 1)); + memset(&new_base[cur_size + 1], 0, (new_size - cur_size) * sizeof(KTrieNode)); + + for (size_t i = 0; i <= m_baseSize; i++) + { + if (m_base[i].valset) + { + /* Placement construct+copy the object, then placement destroy the old. */ + new (&new_base[i].value) K(m_base[i].value); + m_base[i].value.~K(); + } + } + + free(m_base); + m_base = new_base; + m_baseSize = new_size; + + return true; + } + inline unsigned char charval(char c) + { + return (unsigned char)c; + } + void internal_clear() + { + m_tail = 0; + m_numElements = 0; + + memset(m_base, 0, sizeof(KTrieNode) * (m_baseSize + 1)); + memset(m_stringtab, 0, sizeof(char) * m_stSize); + + /* Sentinel root node */ + m_base[1].idx = 1; + m_base[1].mode = Node_Arc; + m_base[1].parent = 1; + } + void run_destructors() + { + for (size_t i = 0; i <= m_baseSize; i++) + { + if (m_base[i].valset) + { + m_base[i].value.~K(); + } + } + } + unsigned int x_addstring(const char *ptr) + { + size_t len = strlen(ptr) + 1; + + if (m_tail + len >= m_stSize) + { + while (m_tail + len >= m_stSize) + { + m_stSize *= 2; + } + m_stringtab = (char *)realloc(m_stringtab,m_stSize); + } + + unsigned int tail = m_tail; + strcpy(&m_stringtab[tail], ptr); + m_tail += len; + + return tail; + } + unsigned int x_check(char c, unsigned int start=1) + { + unsigned char _c = charval(c); + unsigned int to_check = m_baseSize - _c; + for (unsigned int i=start; i<=to_check; i++) + { + if (m_base[i+_c].mode == Node_Unused) + { + return i; + } + } + + grow(); + + return x_check(c, to_check+1); + } + unsigned int x_check2(char c1, char c2, unsigned int start=1) + { + unsigned char _c1 = charval(c1); + unsigned char _c2 = charval(c2); + unsigned int to_check = m_baseSize - (_c1 > _c2 ? _c1 : _c2); + for (unsigned int i=start; i<=to_check; i++) + { + if (m_base[i+_c1].mode == Node_Unused + && m_base[i+_c2].mode == Node_Unused) + { + return i; + } + } + + grow(); + + return x_check2(c1, c2, to_check+1); + } + unsigned int x_check_multi( + unsigned int offsets[], + unsigned int count, + unsigned int start=1) + { + KTrieNode *cur; + unsigned int to_check = m_baseSize; + unsigned int highest = 0; + + for (unsigned int i=0; i highest) + { + highest = offsets[i]; + } + } + + to_check -= highest; + + for (unsigned int i=start; i<=to_check; i++) + { + bool okay = true; + for (unsigned int j=0; jmode != Node_Unused) + { + okay = false; + break; + } + } + if (okay) + { + return i; + } + } + + grow(); + + return x_check_multi(offsets, count, to_check+1); + } +public: + size_t mem_usage() + { + return (sizeof(KTrieNode) * (m_baseSize)) + + m_stSize + + sizeof(KTrieNode); + } + size_t size() + { + return m_numElements; + } +private: + KTrieNode *m_base; /* Base array for the sparse tables */ + KTrieNode *m_empty; /* Special case for empty strings */ + char *m_stringtab; /* String table pointer */ + unsigned int m_baseSize; /* Size of the base array, in members */ + unsigned int m_stSize; /* Size of the string table, in bytes */ + unsigned int m_tail; /* Current unused offset into the string table */ + size_t m_numElements; /* Number of elements in use */ +}; + +/** + * Double Array Trie algorithm, based on: + * An Efficient Implementation of Trie Structures, by + * Jun-ichi Aoe and Katsushi Maromoto, and Takashi Sato + * from Software - Practice and Experience, Vol. 22(9), 695-721 (September 1992) + * + * A Trie is a simple data structure which stores strings as DFAs, with each + * transition state being a string entry. For example, observe the following strings: + * + * BAILOPAN, BAT, BACON, BACK + * These transition as the follow production rules: + * B -> ... B + * A -> ... BA + * I -> ... BAI + * LOPAN BAILOPAN + * T -> ... BAT + * C -> BAC + * O -> ... BACO + * N BACON + * K BACK + * + * The standard implementation for this - using lists - gives a slow linear lookup, somewhere between + * O(N+M) or O(log n). A faster implementation is proposed in the paper above, which is based on compacting + * the transition states into two arrays. In the paper's implementation, two arrays are used, and thus it is + * called the "Double Array" algorithm. However, the CHECK array's size is maintained the same as BASE, + * so they can be combined into one structure. The array seems complex at first, but is very simple: it is a + * tree structure flattened out into a single vector. I am calling this implementation the Flat Array Trie. + * + * BASE[] is an array where each member is a node in the Trie. The node can either be UNUSED (empty), an ARC + * (containing an offset to the next set of ARCs), or a TERMINATOR (contains the rest of a string). + * Each node has an index which must be interpreted based on the node type. If the node is a TERMINATOR, then the + * index is an index into a string table, to find the rest of the string. + * If the node is an ARC, the index is another index into BASE. For each possible token that can follow the + * current token, the value of those tokens can be added to the index given in the ARC. Thus, given a current + * position and the next desired token, the current arc will jump to another arc which can contain either: + * 1) An invalid production (collision, no entry exists) + * 2) An empty production (no entry exists) + * 3) Another arc label (the string ends here or continues into more productions) + * 4) A TERMINATOR (the string ends here and contains an unused set of productions) + * + * So, given current offset N (starting at N=1), jumping to token C means the next offset will be: + * offs = BASE[n] + C + * Thus, the next node will be at: + * BASE[BASE[n] + C] + * + * This allows each ARC to specify the base offset for any of its ARC children, like a tree. Each node specifies + * its parent ARC -- so if an invalid offset is specified, the parent will not match, and thus no such derived + * string exists. + * + * This means that arrays can be laid out "sparsely," maximizing their usage. Note that N need not be related to + * the range of tokens (1-256). I.e., a base index does not have to be at 1, 256, 512, et cetera. This is because + * insertion comes with a small deal of complexity. To insert a new set of tokens T, the algorithm finds a new + * BASE index N such that BASE[N+T[i]] is unused for each T[i]. Thus, indirection is not necessarily linear; + * traversing a chain of ARC nodes can _and will_ jump around BASE. + * + * Of course, given this level of flexibility in the array organization, there are collisions. This is largely + * where insertions become slow, as the old chain must be relocated before the new one is used. Relocation means + * finding one or more new base indexes, and this means traversing BASE until an acceptable index is found, such + * that each offset is unused (see description in previous paragraph). + * + * However, it is not insertion time we are concerned about. The "trie" name comes from reTRIEval. We are only + * concerned with lookup and deletion. Both lookup and deletion are O(k), where k is relative to the length of the + * input string. Note that it is best case O(1) and worst case O(k). Deleting the entire trie is always O(1). + */ + +#endif //_INCLUDE_SOURCEMOD_TEMPLATED_TRIE_H_