/** * vim: set ts=4 : * ============================================================================= * SourceMod * Copyright (C) 2004-2008 AlliedModders LLC. All rights reserved. * ============================================================================= * * This program is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License, version 3.0, as published by the * Free Software Foundation. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see <http://www.gnu.org/licenses/>. * * As a special exception, AlliedModders LLC gives you permission to link the * code of this program (as well as its derivative works) to "Half-Life 2," the * "Source Engine," the "SourcePawn JIT," and any Game MODs that run on software * by the Valve Corporation. You must obey the GNU General Public License in * all respects for all other code used. Additionally, AlliedModders LLC grants * this exception to all derivative works. AlliedModders LLC defines further * exceptions, found in LICENSE.txt (as of this writing, version JULY-31-2007), * or <http://www.sourcemod.net/license.php>. * * Version: $Id$ */ #ifndef _INCLUDE_SOURCEMOD_TEXTPARSERS_INTERFACE_H_ #define _INCLUDE_SOURCEMOD_TEXTPARSERS_INTERFACE_H_ #include <string.h> // size_t /** * @file ITextParsers.h * @brief Defines various text/file parsing functions, as well as UTF-8 support code. */ //namespace SourceMod //{ #define SMINTERFACE_TEXTPARSERS_NAME "ITextParsers" #define SMINTERFACE_TEXTPARSERS_VERSION 4 /** * The INI file format is defined as: * WHITESPACE: 0x20, \n, \t, \r * IDENTIFIER: A-Z a-z 0-9 _ - , + . $ ? / * STRING: Any set of symbols * * Basic syntax is comprised of SECTIONs. * A SECTION is defined as: * [SECTIONNAME] * OPTION * OPTION * OPTION... * * SECTIONNAME is an IDENTIFIER. * OPTION can be repeated any number of times, once per line. * OPTION is defined as one of: * KEY = "VALUE" * KEY = VALUE * KEY * Where KEY is an IDENTIFIER and VALUE is a STRING. * * WHITESPACE should always be omitted. * COMMENTS should be stripped, and are defined as text occurring in: * ;<TEXT> * * Example file below. Note that * The second line is technically invalid. The event handler * must decide whether this should be allowed. * --FILE BELOW-- * [gaben] * hi = clams * bye = "NO CLAMS" * * [valve] * cannot * maintain * products */ /** * @brief Contains parse events for INI files. */ class ITextListener_INI { public: /** * @brief Returns version number. */ virtual unsigned int GetTextParserVersion1() { return SMINTERFACE_TEXTPARSERS_VERSION; } public: /** * @brief Called when starting parsing. */ virtual void ReadINI_ParseStart() { }; /** * @brief Called when ending parsing. * * @param halted True if abnormally halted, false otherwise. */ virtual void ReadINI_ParseEnd(bool halted) { } /** * @brief Called when a new section is encountered in an INI file. * * @param section Name of section in between the [ and ] characters. * @param invalid_tokens True if invalid tokens were detected in the name. * @param close_bracket True if a closing bracket was detected, false otherwise. * @param extra_tokens True if extra tokens were detected on the line. * @param curtok Contains current token in the line where the section name starts. * You can add to this offset when failing to point to a token. * @return True to keep parsing, false otherwise. */ virtual bool ReadINI_NewSection(const char *section, bool invalid_tokens, bool close_bracket, bool extra_tokens, unsigned int *curtok) { return true; } /** * @brief Called when encountering a key/value pair in an INI file. * * @param key Name of key. * @param value String containing value (with quotes stripped, if any). * @param invalid_tokens Whether or not the key contained invalid tokens. * @param equal_token There was an '=' sign present (in case the value is missing). * @param quotes Whether value was enclosed in quotes. * @param curtok Contains the token index of the start of the value string. * This can be changed when returning false. * @return True to keep parsing, false otherwise. */ virtual bool ReadINI_KeyValue(const char *key, const char *value, bool invalid_tokens, bool equal_token, bool quotes, unsigned int *curtok) { return true; } /** * @brief Called after a line has been preprocessed, if it has text. * * @param line Contents of line. * @param curtok Pointer to optionally store failed position in string. * * @return True to keep parsing, false otherwise. */ virtual bool ReadINI_RawLine(const char *line, unsigned int *curtok) { return true; } }; /** * :TODO: write this in CFG (context free grammar) format so it makes sense * * The SMC file format is defined as: * WHITESPACE: 0x20, \n, \t, \r * IDENTIFIER: Any ASCII character EXCLUDING ", {, }, ;, //, / *, or WHITESPACE. * STRING: Any set of symbols enclosed in quotes. * Note: if a STRING does not have quotes, it is parsed as an IDENTIFIER. * * Basic syntax is comprised of SECTIONBLOCKs. * A SECTIONBLOCK defined as: * * SECTIONNAME * { * OPTION * } * * OPTION can be repeated any number of times inside a SECTIONBLOCK. * A new line will terminate an OPTION, but there can be more than one OPTION per line. * OPTION is defined any of: * "KEY" "VALUE" * SECTIONBLOCK * * SECTIONNAME, KEY, VALUE, and SINGLEKEY are strings * SECTIONNAME cannot have trailing characters if quoted, but the quotes can be optionally removed. * If SECTIONNAME is not enclosed in quotes, the entire sectionname string is used (minus surrounding whitespace). * If KEY is not enclosed in quotes, the key is terminated at first whitespace. * If VALUE is not properly enclosed in quotes, the entire value string is used (minus surrounding whitespace). * The VALUE may have inner quotes, but the key string may not. * * For an example, see configs/permissions.cfg * * WHITESPACE should be ignored. * Comments are text occurring inside the following tokens, and should be stripped * unless they are inside literal strings: * ;<TEXT> * //<TEXT> * / *<TEXT> */ /** * @brief Lists actions to take when an SMC parse hook is done. */ enum SMCResult { SMCResult_Continue, /**< Continue parsing */ SMCResult_Halt, /**< Stop parsing here */ SMCResult_HaltFail /**< Stop parsing and return SMCError_Custom */ }; /** * @brief Lists error codes possible from parsing an SMC file. */ enum SMCError { SMCError_Okay = 0, /**< No error */ SMCError_StreamOpen, /**< Stream failed to open */ SMCError_StreamError, /**< The stream died... somehow */ SMCError_Custom, /**< A custom handler threw an error */ SMCError_InvalidSection1, /**< A section was declared without quotes, and had extra tokens */ SMCError_InvalidSection2, /**< A section was declared without any header */ SMCError_InvalidSection3, /**< A section ending was declared with too many unknown tokens */ SMCError_InvalidSection4, /**< A section ending has no matching beginning */ SMCError_InvalidSection5, /**< A section beginning has no matching ending */ SMCError_InvalidTokens, /**< There were too many unidentifiable strings on one line */ SMCError_TokenOverflow, /**< The token buffer overflowed */ SMCError_InvalidProperty1, /**< A property was declared outside of any section */ }; /** * @brief States for line/column */ struct SMCStates { unsigned int line; /**< Current line */ unsigned int col; /**< Current col */ }; /** * @brief Describes the events available for reading an SMC stream. */ class ITextListener_SMC { public: /** * @brief Returns version number. */ virtual unsigned int GetTextParserVersion2() { return SMINTERFACE_TEXTPARSERS_VERSION; } public: /** * @brief Called when starting parsing. */ virtual void ReadSMC_ParseStart() { }; /** * @brief Called when ending parsing. * * @param halted True if abnormally halted, false otherwise. * @param failed True if parsing failed, false otherwise. */ virtual void ReadSMC_ParseEnd(bool halted, bool failed) { } /** * @brief Called when entering a new section * * @param states Parsing states. * @param name Name of section, with the colon omitted. * @return SMCResult directive. */ virtual SMCResult ReadSMC_NewSection(const SMCStates *states, const char *name) { return SMCResult_Continue; } /** * @brief Called when encountering a key/value pair in a section. * * @param states Parsing states. * @param key Key string. * @param value Value string. If no quotes were specified, this will be NULL, * and key will contain the entire string. * @return SMCResult directive. */ virtual SMCResult ReadSMC_KeyValue(const SMCStates *states, const char *key, const char *value) { return SMCResult_Continue; } /** * @brief Called when leaving the current section. * * @param states Parsing states. * @return SMCResult directive. */ virtual SMCResult ReadSMC_LeavingSection(const SMCStates *states) { return SMCResult_Continue; } /** * @brief Called after an input line has been preprocessed. * * @param states Parsing states. * @param line Contents of the line, null terminated at the position * of the newline character (thus, no newline will exist). * @return SMCResult directive. */ virtual SMCResult ReadSMC_RawLine(const SMCStates *states, const char *line) { return SMCResult_Continue; } }; /** * @brief Contains various text stream parsing functions. */ class ITextParsers /*: public SMInterface*/ { public: virtual const char *GetInterfaceName() { return SMINTERFACE_TEXTPARSERS_NAME; } virtual unsigned int GetInterfaceVersion() { return SMINTERFACE_TEXTPARSERS_VERSION; } virtual bool IsVersionCompatible(unsigned int version) { if (version < 2) { return false; } return true; /*return SMInterface::IsVersionCompatible(version);*/ } public: /** * @brief Parses an INI-format file. * * @param file Path to file. * @param ini_listener Event handler for reading file. * @param line If non-NULL, will contain last line parsed (0 if file could not be opened). * @param col If non-NULL, will contain last column parsed (undefined if file could not be opened). * @param inline_comment Whether inline comment is allowed. * @return True if parsing succeeded, false if file couldn't be opened or there was a syntax error. */ virtual bool ParseFile_INI(const char *file, ITextListener_INI *ini_listener, unsigned int *line, unsigned int *col, bool inline_comment = true) =0; /** * @brief Parses an SMC-format text file. * Note that the parser makes every effort to obey broken syntax. * For example, if an open brace is missing, but the section name has a colon, * it will let you know. It is up to the event handlers to decide whether to be strict or not. * * @param file Path to file. * @param smc_listener Event handler for reading file. * @param states Optional pointer to store last known states. * @return An SMCError result code. */ virtual SMCError ParseFile_SMC(const char *file, ITextListener_SMC *smc_listener, SMCStates *states) =0; /** * @brief Converts an SMCError to a string. * * @param err SMCError. * @return String error message, or NULL if none. */ virtual const char *GetSMCErrorString(SMCError err) =0; public: /** * @brief Returns the number of bytes that a multi-byte character contains in a UTF-8 stream. * If the current character is not multi-byte, the function returns 1. * * @param stream Pointer to multi-byte ANSI character string. * @return Number of bytes in current character. */ virtual unsigned int GetUTF8CharBytes(const char *stream) =0; /** * @brief Returns whether the first multi-byte character in the given stream * is a whitespace character. * * @param stream Pointer to multi-byte character string. * @return True if first character is whitespace, false otherwise. */ virtual bool IsWhitespace(const char *stream) =0; /** * @brief Same as ParseFile_SMC, but with an extended error buffer. * * @param file Path to file. * @param smc_listener Event handler for reading file. * @param states Optional pointer to store last known states. * @param buffer Error message buffer. * @param maxsize Maximum size of the error buffer. * @return Error code. */ virtual SMCError ParseSMCFile(const char *file, ITextListener_SMC *smc_listener, SMCStates *states, char *buffer, size_t maxsize) =0; /** * @brief Parses a raw UTF8 stream as an SMC file. * * @param stream Memory containing data. * @param length Number of bytes in the stream. * @param smc_listener Event handler for reading file. * @param states Optional pointer to store last known states. * @param buffer Error message buffer. * @param maxsize Maximum size of the error buffer. * @return Error code. */ virtual SMCError ParseSMCStream(const char *stream, size_t length, ITextListener_SMC *smc_listener, SMCStates *states, char *buffer, size_t maxsize) =0; }; inline unsigned int _GetUTF8CharBytes(const char *stream) { unsigned char c = *(unsigned char *)stream; if (c & (1<<7)) { if (c & (1<<5)) { if (c & (1<<4)) { return 4; } return 3; } return 2; } return 1; } //} extern ITextParsers *textparsers; #endif //_INCLUDE_SOURCEMOD_TEXTPARSERS_INTERFACE_H_