amxmodx/public/ITextParsers.h
Arkshine 0cf5a2e12f Expose ParseFile_INI to API.
Some minor changes have been made in parser:

- Added ReadINI_ParseStart and ReadINI_ParseEnd call.
- Function returns SMCResult instead of bool. To avoid unecessary complexity and duplicating natives, this feels more appropriate to let these functions to share some SMCResult and SMCParse constants. Since properly documented, this should be ok.
- Made sure curtok is set to 0 when ptr_val is null, otherwise unexpected value would be passed into the forward.
2014-08-06 09:17:45 +02:00

460 lines
14 KiB
C++

/**
* vim: set ts=4 :
* =============================================================================
* SourceMod
* Copyright (C) 2004-2008 AlliedModders LLC. All rights reserved.
* =============================================================================
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License, version 3.0, as published by the
* Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*
* As a special exception, AlliedModders LLC gives you permission to link the
* code of this program (as well as its derivative works) to "Half-Life 2," the
* "Source Engine," the "SourcePawn JIT," and any Game MODs that run on software
* by the Valve Corporation. You must obey the GNU General Public License in
* all respects for all other code used. Additionally, AlliedModders LLC grants
* this exception to all derivative works. AlliedModders LLC defines further
* exceptions, found in LICENSE.txt (as of this writing, version JULY-31-2007),
* or <http://www.sourcemod.net/license.php>.
*
* Version: $Id$
*/
#ifndef _INCLUDE_SOURCEMOD_TEXTPARSERS_INTERFACE_H_
#define _INCLUDE_SOURCEMOD_TEXTPARSERS_INTERFACE_H_
/**
* @file ITextParsers.h
* @brief Defines various text/file parsing functions, as well as UTF-8 support code.
*/
namespace SourceMod
{
#define SMINTERFACE_TEXTPARSERS_NAME "ITextParsers"
#define SMINTERFACE_TEXTPARSERS_VERSION 4
/**
* The INI file format is defined as:
* WHITESPACE: 0x20, \n, \t, \r
* IDENTIFIER: A-Z a-z 0-9 _ - , + . $ ? /
* STRING: Any set of symbols
*
* Basic syntax is comprised of SECTIONs.
* A SECTION is defined as:
* [SECTIONNAME]
* OPTION
* OPTION
* OPTION...
*
* SECTIONNAME is an IDENTIFIER.
* OPTION can be repeated any number of times, once per line.
* OPTION is defined as one of:
* KEY = "VALUE"
* KEY = VALUE
* KEY
* Where KEY is an IDENTIFIER and VALUE is a STRING.
*
* WHITESPACE should always be omitted.
* COMMENTS should be stripped, and are defined as text occurring in:
* ;<TEXT>
*
* Example file below. Note that
* The second line is technically invalid. The event handler
* must decide whether this should be allowed.
* --FILE BELOW--
* [gaben]
* hi = clams
* bye = "NO CLAMS"
*
* [valve]
* cannot
* maintain
* products
*/
/**
* @brief Lists actions to take when an SMC parse hook is done.
*/
enum SMCResult
{
SMCResult_Continue, /**< Continue parsing */
SMCResult_Halt, /**< Stop parsing here */
SMCResult_HaltFail /**< Stop parsing and return SMCError_Custom */
};
/**
* @brief Lists error codes possible from parsing an SMC file.
*/
enum SMCError
{
SMCError_Okay = 0, /**< No error */
SMCError_StreamOpen, /**< Stream failed to open */
SMCError_StreamError, /**< The stream died... somehow */
SMCError_Custom, /**< A custom handler threw an error */
SMCError_InvalidSection1, /**< A section was declared without quotes, and had extra tokens */
SMCError_InvalidSection2, /**< A section was declared without any header */
SMCError_InvalidSection3, /**< A section ending was declared with too many unknown tokens */
SMCError_InvalidSection4, /**< A section ending has no matching beginning */
SMCError_InvalidSection5, /**< A section beginning has no matching ending */
SMCError_InvalidTokens, /**< There were too many unidentifiable strings on one line */
SMCError_TokenOverflow, /**< The token buffer overflowed */
SMCError_InvalidProperty1, /**< A property was declared outside of any section */
};
/**
* @brief Contains parse events for INI files.
*/
class ITextListener_INI
{
public:
/**
* @brief Returns version number.
*/
virtual unsigned int GetTextParserVersion1()
{
return SMINTERFACE_TEXTPARSERS_VERSION;
}
public:
/**
* @brief Called when starting parsing.
*/
virtual void ReadINI_ParseStart()
{
};
/**
* @brief Called when ending parsing.
*
* @param halted True if abnormally halted, false otherwise.
* @param failed True if parsing failed, false otherwise.
*/
virtual void ReadINI_ParseEnd(bool halted, bool failed)
{
}
/**
* @brief Called when a new section is encountered in an INI file.
*
* @param section Name of section in between the [ and ] characters.
* @param invalid_tokens True if invalid tokens were detected in the name.
* @param close_bracket True if a closing bracket was detected, false otherwise.
* @param extra_tokens True if extra tokens were detected on the line.
* @param curtok Contains current token in the line where the section name starts.
* You can add to this offset when failing to point to a token.
* @return SMCResult directive.
*/
virtual SMCResult ReadINI_NewSection(const char *section, bool invalid_tokens, bool close_bracket, bool extra_tokens, unsigned int *curtok)
{
return SMCResult_Continue;
}
/**
* @brief Called when encountering a key/value pair in an INI file.
*
* @param key Name of key.
* @param value String containing value (with quotes stripped, if any).
* @param invalid_tokens Whether or not the key contained invalid tokens.
* @param equal_token There was an '=' sign present (in case the value is missing).
* @param quotes Whether value was enclosed in quotes.
* @param curtok Contains the token index of the start of the value string.
* This can be changed when returning false.
* @return SMCResult directive.
*/
virtual SMCResult ReadINI_KeyValue(const char *key, const char *value, bool invalid_tokens, bool equal_token, bool quotes, unsigned int *curtok)
{
return SMCResult_Continue;
}
/**
* @brief Called after a line has been preprocessed, if it has text.
*
* @param line Contents of line.
* @param lineno The line number it occurs on.
* @param curtok Pointer to optionally store failed position in string.
* @return SMCResult directive.
*/
virtual SMCResult ReadINI_RawLine(const char *line, unsigned int lineno, unsigned int *curtok)
{
return SMCResult_Continue;
}
};
/**
* :TODO: write this in CFG (context free grammar) format so it makes sense
*
* The SMC file format is defined as:
* WHITESPACE: 0x20, \n, \t, \r
* IDENTIFIER: Any ASCII character EXCLUDING ", {, }, ;, //, / *, or WHITESPACE.
* STRING: Any set of symbols enclosed in quotes.
* Note: if a STRING does not have quotes, it is parsed as an IDENTIFIER.
*
* Basic syntax is comprised of SECTIONBLOCKs.
* A SECTIONBLOCK defined as:
*
* SECTIONNAME
* {
* OPTION
* }
*
* OPTION can be repeated any number of times inside a SECTIONBLOCK.
* A new line will terminate an OPTION, but there can be more than one OPTION per line.
* OPTION is defined any of:
* "KEY" "VALUE"
* SECTIONBLOCK
*
* SECTIONNAME, KEY, VALUE, and SINGLEKEY are strings
* SECTIONNAME cannot have trailing characters if quoted, but the quotes can be optionally removed.
* If SECTIONNAME is not enclosed in quotes, the entire sectionname string is used (minus surrounding whitespace).
* If KEY is not enclosed in quotes, the key is terminated at first whitespace.
* If VALUE is not properly enclosed in quotes, the entire value string is used (minus surrounding whitespace).
* The VALUE may have inner quotes, but the key string may not.
*
* For an example, see configs/permissions.cfg
*
* WHITESPACE should be ignored.
* Comments are text occurring inside the following tokens, and should be stripped
* unless they are inside literal strings:
* ;<TEXT>
* //<TEXT>
* / *<TEXT> */
/**
* @brief States for line/column
*/
struct SMCStates
{
unsigned int line; /**< Current line */
unsigned int col; /**< Current col */
};
/**
* @brief Describes the events available for reading an SMC stream.
*/
class ITextListener_SMC
{
public:
/**
* @brief Returns version number.
*/
virtual unsigned int GetTextParserVersion2()
{
return SMINTERFACE_TEXTPARSERS_VERSION;
}
public:
/**
* @brief Called when starting parsing.
*/
virtual void ReadSMC_ParseStart()
{
};
/**
* @brief Called when ending parsing.
*
* @param halted True if abnormally halted, false otherwise.
* @param failed True if parsing failed, false otherwise.
*/
virtual void ReadSMC_ParseEnd(bool halted, bool failed)
{
}
/**
* @brief Called when entering a new section
*
* @param states Parsing states.
* @param name Name of section, with the colon omitted.
* @return SMCResult directive.
*/
virtual SMCResult ReadSMC_NewSection(const SMCStates *states, const char *name)
{
return SMCResult_Continue;
}
/**
* @brief Called when encountering a key/value pair in a section.
*
* @param states Parsing states.
* @param key Key string.
* @param value Value string. If no quotes were specified, this will be NULL,
* and key will contain the entire string.
* @return SMCResult directive.
*/
virtual SMCResult ReadSMC_KeyValue(const SMCStates *states, const char *key, const char *value)
{
return SMCResult_Continue;
}
/**
* @brief Called when leaving the current section.
*
* @param states Parsing states.
* @return SMCResult directive.
*/
virtual SMCResult ReadSMC_LeavingSection(const SMCStates *states)
{
return SMCResult_Continue;
}
/**
* @brief Called after an input line has been preprocessed.
*
* @param states Parsing states.
* @param line Contents of the line, null terminated at the position
* of the newline character (thus, no newline will exist).
* @return SMCResult directive.
*/
virtual SMCResult ReadSMC_RawLine(const SMCStates *states, const char *line)
{
return SMCResult_Continue;
}
};
/**
* @brief Contains various text stream parsing functions.
*/
class ITextParsers /*: public SMInterface*/
{
public:
virtual const char *GetInterfaceName()
{
return SMINTERFACE_TEXTPARSERS_NAME;
}
virtual unsigned int GetInterfaceVersion()
{
return SMINTERFACE_TEXTPARSERS_VERSION;
}
virtual bool IsVersionCompatible(unsigned int version)
{
if (version < 2)
{
return false;
}
return true;
/*return SMInterface::IsVersionCompatible(version);*/
}
public:
/**
* @brief Parses an INI-format file.
*
* @param file Path to file.
* @param ini_listener Event handler for reading file.
* @param line If non-NULL, will contain last line parsed (0 if file could not be opened).
* @param col If non-NULL, will contain last column parsed (undefined if file could not be opened).
* @return An SMCError result code.
*/
virtual SMCError ParseFile_INI(const char *file,
ITextListener_INI *ini_listener,
unsigned int *line,
unsigned int *col) =0;
/**
* @brief Parses an SMC-format text file.
* Note that the parser makes every effort to obey broken syntax.
* For example, if an open brace is missing, but the section name has a colon,
* it will let you know. It is up to the event handlers to decide whether to be strict or not.
*
* @param file Path to file.
* @param smc_listener Event handler for reading file.
* @param states Optional pointer to store last known states.
* @return An SMCError result code.
*/
virtual SMCError ParseFile_SMC(const char *file,
ITextListener_SMC *smc_listener,
SMCStates *states) =0;
/**
* @brief Converts an SMCError to a string.
*
* @param err SMCError.
* @return String error message, or NULL if none.
*/
virtual const char *GetSMCErrorString(SMCError err) =0;
public:
/**
* @brief Returns the number of bytes that a multi-byte character contains in a UTF-8 stream.
* If the current character is not multi-byte, the function returns 1.
*
* @param stream Pointer to multi-byte ANSI character string.
* @return Number of bytes in current character.
*/
virtual unsigned int GetUTF8CharBytes(const char *stream) =0;
/**
* @brief Returns whether the first multi-byte character in the given stream
* is a whitespace character.
*
* @param stream Pointer to multi-byte character string.
* @return True if first character is whitespace, false otherwise.
*/
virtual bool IsWhitespace(const char *stream) =0;
/**
* @brief Same as ParseFile_SMC, but with an extended error buffer.
*
* @param file Path to file.
* @param smc_listener Event handler for reading file.
* @param states Optional pointer to store last known states.
* @param buffer Error message buffer.
* @param maxsize Maximum size of the error buffer.
* @return Error code.
*/
virtual SMCError ParseSMCFile(const char *file,
ITextListener_SMC *smc_listener,
SMCStates *states,
char *buffer,
size_t maxsize) =0;
/**
* @brief Parses a raw UTF8 stream as an SMC file.
*
* @param stream Memory containing data.
* @param length Number of bytes in the stream.
* @param smc_listener Event handler for reading file.
* @param states Optional pointer to store last known states.
* @param buffer Error message buffer.
* @param maxsize Maximum size of the error buffer.
* @return Error code.
*/
virtual SMCError ParseSMCStream(const char *stream,
size_t length,
ITextListener_SMC *smc_listener,
SMCStates *states,
char *buffer,
size_t maxsize) =0;
};
inline unsigned int _GetUTF8CharBytes(const char *stream)
{
unsigned char c = *(unsigned char *)stream;
if (c & (1<<7))
{
if (c & (1<<5))
{
if (c & (1<<4))
{
return 4;
}
return 3;
}
return 2;
}
return 1;
}
}
extern SourceMod::ITextParsers *textparsers;
#endif //_INCLUDE_SOURCEMOD_TEXTPARSERS_INTERFACE_H_