From d79f292729bc76b3152ee1467b1446d682dbf220 Mon Sep 17 00:00:00 2001 From: Sandu Liviu Catalin Date: Fri, 16 Jul 2021 20:42:34 +0300 Subject: [PATCH] Replace JSMN with SAJSON. --- module/CMakeLists.txt | 2 +- module/Library/JSON.cpp | 277 +-- module/Library/JSON.hpp | 4 +- vendor/CMakeLists.txt | 2 +- vendor/JSMN/CMakeLists.txt | 7 - vendor/JSMN/include/jsmn.h | 202 --- vendor/JSMN/include/jsmn_defines.h | 137 -- vendor/JSMN/jsmn.c | 857 --------- vendor/SAJSON/CMakeLists.txt | 5 + vendor/SAJSON/include/sajson.h | 2600 ++++++++++++++++++++++++++++ vendor/SAJSON/sajson.cpp | 1 + 11 files changed, 2702 insertions(+), 1392 deletions(-) delete mode 100644 vendor/JSMN/CMakeLists.txt delete mode 100644 vendor/JSMN/include/jsmn.h delete mode 100644 vendor/JSMN/include/jsmn_defines.h delete mode 100644 vendor/JSMN/jsmn.c create mode 100644 vendor/SAJSON/CMakeLists.txt create mode 100644 vendor/SAJSON/include/sajson.h create mode 100644 vendor/SAJSON/sajson.cpp diff --git a/module/CMakeLists.txt b/module/CMakeLists.txt index a70eb93d..f8ec602f 100644 --- a/module/CMakeLists.txt +++ b/module/CMakeLists.txt @@ -133,7 +133,7 @@ if(WIN32 OR MINGW) target_link_libraries(SqModule wsock32 ws2_32 shlwapi) endif() # Link to base libraries -target_link_libraries(SqModule Squirrel fmt::fmt SimpleINI TinyDir ConcurrentQueue JSMN CPR PUGIXML maxminddb libzmq-static) +target_link_libraries(SqModule Squirrel fmt::fmt SimpleINI TinyDir ConcurrentQueue SAJSON CPR PUGIXML maxminddb libzmq-static) # Link to POCO libraries target_link_libraries(SqModule Poco::Foundation Poco::Crypto Poco::Data Poco::Net Poco::JSON Poco::XML) # Does POCO have SQLite support? diff --git a/module/Library/JSON.cpp b/module/Library/JSON.cpp index 6683bbd3..1ec4d4d7 100644 --- a/module/Library/JSON.cpp +++ b/module/Library/JSON.cpp @@ -17,90 +17,76 @@ static SQInteger SqToJSON(HSQUIRRELVM vm) noexcept } // ------------------------------------------------------------------------------------------------ -static SQInteger SqFromJson_Push(HSQUIRRELVM vm, const char * str, jsmntok * t, size_t count) noexcept +static SQInteger SqFromJson_Push(HSQUIRRELVM vm, const sajson::value & node) noexcept { - // Are there any elements to process? - if (count == 0) + // Operation result + SQInteger r = SQ_OK; + // Identify element type + switch (node.get_type()) { - // We still need something on the stack - sq_pushnull(vm); - // No token consumed - return 0; - } - // Is this a primitive type? - else if (t->type & JSMN_PRIMITIVE) - { - // Primitive length (in characters) - const jsmnint l = (t->end - t->start); - // Primitive start (character offset) - const char * v = (str + t->start); - // Is this a floating point? - if (memchr(v, '.', l) || memchr(v, 'e', l) || memchr(v, 'E', l)) - { - sq_pushfloat(vm, ConvNum< SQFloat >::FromStr(v)); - } - // Is this an integer? - else if (((v[0] >= '0') && (v[0] <= '9')) || (v[0] == '-') || (v[0] == '+')) - { - sq_pushinteger(vm, ConvNum< SQInteger >::FromStr(v)); - } - // Is this a boolean true? - else if (v[0] == 't') - { - sq_pushbool(vm, SQTrue); - } - // Is this a boolean false? - else if (v[0] == 'f') - { - sq_pushbool(vm, SQFalse); - } - // Is this null? - else if (v[0] == 'n') - { + case sajson::TYPE_INTEGER: { + sq_pushinteger(vm, static_cast< SQInteger >(node.get_integer_value())); + } break; + case sajson::TYPE_DOUBLE: { + sq_pushfloat(vm, static_cast< SQFloat >(node.get_double_value())); + } break; + case sajson::TYPE_NULL: { sq_pushnull(vm); - } - // Should never really get here because it should be sanitized by the JSON parser - // But doesn't hurt to have it here in case something out of our scope goes wrong - else - { - return sq_throwerrorf(vm, _SC("Unrecognized JSON primitive: '%.*s'"), l, v); - } - // One token was consumed - return 1; - } - // Is this a string? - else if (t->type & JSMN_STRING) - { - sq_pushstring(vm, (str + t->start), static_cast< SQInteger >(t->end - t->start)); - // One token was consumed - return 1; - } - // Is this an object? - else if (t->type & JSMN_OBJECT) - { - // Number of tokens consumed by this object - SQInteger c = 0, r = SQ_OK; - // Create a new table on the stack - sq_newtableex(vm, static_cast< SQInteger >(t->size)); - // Process object elements - for (jsmnint i = 0; i < t->size; i++) - { - // Locate key token relative to the current token - jsmntok * k = (t + 1 + c); - // Transform the key into a script object on the stack - r = SqFromJson_Push(vm, str, k, count - c); - // Did we fail? - if (SQ_FAILED(r)) - { - break; // Abort - } - // Update consumed tokens - c += r; - // Does the key have an associated value? - if (k->size > 0) + } break; + case sajson::TYPE_FALSE: { + sq_pushbool(vm, SQFalse); + } break; + case sajson::TYPE_TRUE: { + sq_pushbool(vm, SQTrue); + } break; + case sajson::TYPE_STRING: { + sq_pushstring(vm, node.as_cstring(), static_cast< SQInteger >(node.get_string_length())); + } break; + case sajson::TYPE_ARRAY: { + // Array length + const size_t n = node.get_length(); + // Create a new array on the stack + sq_newarrayex(vm, static_cast< SQInteger >(n)); + // Process array elements + for (size_t i = 0; i < n; ++i) { // Transform the value into a script object on the stack - r = SqFromJson_Push(vm, str, (t + 1 + c), count - c); + r = SqFromJson_Push(vm, node.get_array_element(i)); + // Did we fail? + if (SQ_FAILED(r)) + { + break; // Abort + } + // At this point we have a value on the stack + r = sq_arrayappend(vm, -2); + // Did we fail? + if (SQ_FAILED(r)) + { + // Discard the value + sq_poptop(vm); + // Abort + break; + } + } + // Anything bad happened? + if (SQ_FAILED(r)) + { + sq_poptop(vm); // Discard the array + } + } break; + case sajson::TYPE_OBJECT: { + // Object length + const size_t n = node.get_length(); + // Create a new table on the stack + sq_newtableex(vm, static_cast< SQInteger >(n)); + // + for (size_t i = 0; i < n; ++i) + { + const auto k = node.get_object_key(i); + // Transform the key into a script object on the stack + sq_pushstring(vm, k.data(), static_cast< SQInteger >(k.length())); + // Transform the value into a script object on the stack + r = SqFromJson_Push(vm, node.get_object_value(i)); // Did we fail? if (SQ_FAILED(r)) { @@ -109,79 +95,30 @@ static SQInteger SqFromJson_Push(HSQUIRRELVM vm, const char * str, jsmntok * t, // Abort break; } - // Update consumed tokens - c += r; + // At this point we have a key and a value on the stack + r = sq_newslot(vm, -3, SQFalse); + // Did we fail? + if (SQ_FAILED(r)) + { + // Discard the key/value pair + sq_pop(vm, 2); + // Abort + break; + } } - else - { - sq_pushnull(vm); // Default to null because a value must exist - } - // At this point we have a key and a value on the stack - r = sq_newslot(vm, -3, SQFalse); - // Did we fail? + // Anything bad happened? if (SQ_FAILED(r)) { - // Discard the key/value pair - sq_pop(vm, 2); - // Abort - break; + sq_poptop(vm); // Discard the table } - } - // Anything bad happened? - if (SQ_FAILED(r)) - { - // Discard the table - sq_poptop(vm); - // Propagate the error - return r; - } - // Return consumed tokens - return c + 1; + } break; + default: + // Should never really get here because it should be sanitized by the JSON parser + // But doesn't hurt to have it here in case something out of our scope goes wrong + r = sq_throwerror(vm, _SC("Unrecognized JSON type")); } - // Is this an array? - else if (t->type & JSMN_ARRAY) - { - // Number of tokens consumed by this array - SQInteger c = 0, r = SQ_OK; - // Create a new array on the stack - sq_newarrayex(vm, static_cast< SQInteger >(t->size)); - // Process array elements - for (jsmnint i = 0; i < t->size; i++) - { - // Transform the value into a script object on the stack - r = SqFromJson_Push(vm, str, (t + 1 + c), count - c); - // Did we fail? - if (SQ_FAILED(r)) - { - break; // Abort - } - // Update consumed tokens - c += r; - // At this point we have a value on the stack - r = sq_arrayappend(vm, -2); - // Did we fail? - if (SQ_FAILED(r)) - { - // Discard the value - sq_poptop(vm); - // Abort - break; - } - } - // Anything bad happened? - if (SQ_FAILED(r)) - { - // Discard the array - sq_poptop(vm); - // Propagate the error - return r; - } - // Return consumed tokens - return c + 1; - } - // Should never really get here because it should be sanitized by the JSON parser - // But doesn't hurt to have it here in case something out of our scope goes wrong - return sq_throwerror(vm, _SC("Unrecognized JSON type")); + // Return the result + return r; } // ------------------------------------------------------------------------------------------------ @@ -201,45 +138,17 @@ static SQInteger SqFromJSON(HSQUIRRELVM vm) noexcept { return s.mRes; // Propagate the error } - // Parser context - jsmnparser p; - // Initialize parser - jsmn_init(&p); - // Estimate the number of tokens necessary to parse the specified JSON string - jsmnint r = jsmn_parse(&p, s.mPtr, static_cast< size_t >(s.mLen), nullptr, 0); - // Is there anything to parse? - if (r == 0) - { - // Default to null - sq_pushnull(vm); - // A value was returned - return 1; - } - // See if there was an error - switch (r) - { - case jsmnint(JSMN_ERROR_NOMEM): - return sq_throwerror(vm, _SC("Not enough token memory was provided")); - case jsmnint(JSMN_ERROR_LEN): - return sq_throwerror(vm, _SC("Input data too long")); - case jsmnint(JSMN_ERROR_INVAL): - return sq_throwerror(vm, _SC("Invalid character inside JSON string")); - case jsmnint(JSMN_ERROR_PART): - return sq_throwerror(vm, _SC("The string is not a full JSON packet, more bytes expected")); - case jsmnint(JSMN_ERROR_UNMATCHED_BRACKETS): - return sq_throwerror(vm, _SC("The JSON string has unmatched brackets")); - default: break; // Nothing bad happened - } - // Initialize the token array - std::vector< jsmntok > tks(static_cast< size_t >(r) + 16); - // Initialize parser - jsmn_init(&p); // Attempt to parse the specified JSON string - r = jsmn_parse(&p, s.mPtr, static_cast< size_t >(s.mLen), tks.data(), tks.size()); - // Process the tokens that were parsed from the string - SQInteger res = SqFromJson_Push(vm, s.mPtr, tks.data(), p.toknext); + const sajson::document & document = sajson::parse(sajson::dynamic_allocation(), sajson::string(s.mPtr, static_cast(s.mLen))); + // See if there was an error + if (!document.is_valid()) + { + return sq_throwerror(vm, document.get_error_message_as_cstring()); + } + // Process the nodes that were parsed from the string + SQInteger r = SqFromJson_Push(vm, document.get_root()); // We either have a value to return or we propagate some error - return SQ_SUCCEEDED(res) ? 1 : res; + return SQ_SUCCEEDED(r) ? 1 : r; } // ================================================================================================ diff --git a/module/Library/JSON.hpp b/module/Library/JSON.hpp index dceb8264..4d7d05e0 100644 --- a/module/Library/JSON.hpp +++ b/module/Library/JSON.hpp @@ -5,9 +5,7 @@ #include "Library/IO/Buffer.hpp" // ------------------------------------------------------------------------------------------------ -extern "C" { - #include -} +#include // ------------------------------------------------------------------------------------------------ namespace SqMod { diff --git a/vendor/CMakeLists.txt b/vendor/CMakeLists.txt index 115809d1..2e32404c 100644 --- a/vendor/CMakeLists.txt +++ b/vendor/CMakeLists.txt @@ -3,7 +3,7 @@ add_subdirectory(Fmt) add_subdirectory(Squirrel) add_subdirectory(SimpleIni) add_subdirectory(TinyDir) -add_subdirectory(JSMN) +add_subdirectory(SAJSON) add_subdirectory(CPR) add_subdirectory(PUGIXML) set(BUILD_TESTING OFF CACHE INTERNAL "" FORCE) diff --git a/vendor/JSMN/CMakeLists.txt b/vendor/JSMN/CMakeLists.txt deleted file mode 100644 index 632c9bb6..00000000 --- a/vendor/JSMN/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -# Create the JSMN library -add_library(JSMN STATIC include/jsmn.h jsmn.c) -# Library includes -target_include_directories(JSMN PRIVATE ${CMAKE_CURRENT_LIST_DIR}) -target_include_directories(JSMN PUBLIC ${CMAKE_CURRENT_LIST_DIR}/include) -# Compile time options -target_compile_definitions(JSMN PUBLIC JSMN_UTF8=1) diff --git a/vendor/JSMN/include/jsmn.h b/vendor/JSMN/include/jsmn.h deleted file mode 100644 index d6920f77..00000000 --- a/vendor/JSMN/include/jsmn.h +++ /dev/null @@ -1,202 +0,0 @@ -/* - * MIT License - * - * Copyright (c) 2010 Serge Zaitsev - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef JSMN_H -#define JSMN_H - -#include - -#include "jsmn_defines.h" - -#ifdef JSMN_SHORT_TOKENS -typedef unsigned short jsmnint; -#else -typedef unsigned int jsmnint; -#endif -#define JSMN_NEG ((jsmnint)-1) - -/** - * JSON type identifier. Basic types are: - */ -typedef enum jsmntype { - JSMN_UNDEFINED = 0x0000, - JSMN_OBJECT = 0x0001, /*!< Object */ - JSMN_ARRAY = 0x0002, /*!< Array */ - JSMN_STRING = 0x0004, /*!< String */ - JSMN_PRIMITIVE = - 0x0008, /*!< Other primitive: number, boolean (true/false) or null */ - - JSMN_KEY = 0x0010, /*!< is a key */ - JSMN_VALUE = 0x0020, /*!< is a value */ - - /* Complex elements */ - JSMN_CONTAINER = JSMN_OBJECT | JSMN_ARRAY, -#ifndef JSMN_PERMISSIVE_KEY - JSMN_KEY_TYPE = JSMN_STRING, -#else - JSMN_KEY_TYPE = JSMN_STRING | JSMN_PRIMITIVE, -#endif - JSMN_ANY_TYPE = JSMN_OBJECT | JSMN_ARRAY | JSMN_STRING | JSMN_PRIMITIVE, - - JSMN_OBJ_VAL = JSMN_OBJECT | JSMN_VALUE, - JSMN_ARR_VAL = JSMN_ARRAY | JSMN_VALUE, - JSMN_STR_KEY = JSMN_STRING | JSMN_KEY, - JSMN_STR_VAL = JSMN_STRING | JSMN_VALUE, - JSMN_PRI_VAL = JSMN_PRIMITIVE | JSMN_VALUE, -#ifdef JSMN_PERMISSIVE_KEY - JSMN_OBJ_KEY = JSMN_OBJECT | JSMN_KEY, - JSMN_ARR_KEY = JSMN_ARRAY | JSMN_KEY, - JSMN_PRI_KEY = JSMN_PRIMITIVE | JSMN_KEY, -#endif - - /* Primitive extension */ - JSMN_PRI_LITERAL = 0x0040, /*!< true, false, null */ - JSMN_PRI_INTEGER = 0x0080, /*!< 0, 1 - 9 */ - JSMN_PRI_SIGN = 0x0100, /*!< minus sign, '-' or plus sign, '+' */ - JSMN_PRI_DECIMAL = 0x0200, /*!< deminal point '.' */ - JSMN_PRI_EXPONENT = 0x0400, /*!< exponent, 'e' or 'E' */ - - JSMN_PRI_MINUS = JSMN_PRI_SIGN, - - /* Parsing validation, expectations, and state information */ - JSMN_PRI_CONTINUE = 0x0800, /*!< Allow a continuation of a PRIMITIVE */ - JSMN_CLOSE = 0x1000, /*!< Close OBJECT '}' or ARRAY ']' */ - JSMN_COLON = 0x2000, /*!< Colon ':' expected after KEY */ - JSMN_COMMA = 0x4000, /*!< Comma ',' expected after VALUE */ - JSMN_INSD_OBJ = 0x8000, /*!< Inside an OBJECT */ - - /* Parsing rules */ - JSMN_ROOT_INIT = JSMN_ANY_TYPE | JSMN_VALUE, -#ifndef JSMN_PERMISSIVE -#ifndef JSMN_MULTIPLE_JSON - JSMN_ROOT = JSMN_UNDEFINED, -#else - JSMN_ROOT = JSMN_ANY_TYPE | JSMN_VALUE, -#endif - JSMN_OPEN_OBJECT = JSMN_KEY_TYPE | JSMN_KEY | JSMN_CLOSE | JSMN_INSD_OBJ, - JSMN_AFTR_OBJ_KEY = JSMN_VALUE | JSMN_INSD_OBJ | JSMN_COLON, - JSMN_AFTR_OBJ_VAL = JSMN_KEY | JSMN_CLOSE | JSMN_INSD_OBJ | JSMN_COMMA, - JSMN_OPEN_ARRAY = JSMN_ANY_TYPE | JSMN_VALUE | JSMN_CLOSE, - JSMN_AFTR_ARR_VAL = JSMN_VALUE | JSMN_CLOSE | JSMN_COMMA, - JSMN_AFTR_CLOSE = JSMN_CLOSE | JSMN_COMMA, - JSMN_AFTR_COLON = JSMN_ANY_TYPE | JSMN_VALUE | JSMN_INSD_OBJ, - JSMN_AFTR_COMMA_O = JSMN_KEY_TYPE | JSMN_KEY | JSMN_INSD_OBJ, - JSMN_AFTR_COMMA_A = JSMN_ANY_TYPE | JSMN_VALUE, -#else - JSMN_ROOT = JSMN_ANY_TYPE | JSMN_COLON | JSMN_COMMA, - JSMN_ROOT_AFTR_O = JSMN_ANY_TYPE | JSMN_COMMA, - JSMN_OPEN_OBJECT = JSMN_KEY_TYPE | JSMN_KEY | JSMN_CLOSE | JSMN_INSD_OBJ, - JSMN_AFTR_OBJ_KEY = JSMN_VALUE | JSMN_INSD_OBJ | JSMN_COLON, - JSMN_AFTR_OBJ_VAL = JSMN_ANY_TYPE | JSMN_CLOSE | JSMN_INSD_OBJ | JSMN_COMMA, - JSMN_OPEN_ARRAY = JSMN_ANY_TYPE | JSMN_VALUE | JSMN_CLOSE, - JSMN_AFTR_ARR_VAL = JSMN_ANY_TYPE | JSMN_CLOSE | JSMN_COMMA, - JSMN_AFTR_CLOSE = JSMN_ANY_TYPE | JSMN_CLOSE | JSMN_COMMA, - JSMN_AFTR_COLON = JSMN_ANY_TYPE | JSMN_VALUE | JSMN_INSD_OBJ, - JSMN_AFTR_COLON_R = JSMN_ANY_TYPE | JSMN_VALUE, - JSMN_AFTR_COMMA_O = JSMN_KEY_TYPE | JSMN_KEY | JSMN_INSD_OBJ, - JSMN_AFTR_COMMA_A = JSMN_ANY_TYPE | JSMN_VALUE, - JSMN_AFTR_COMMA_R = JSMN_ANY_TYPE, -#endif -} jsmntype; - -/*! - * JSMN Error Codes - */ -typedef enum jsmnerr { - JSMN_SUCCESS = 0, - JSMN_ERROR_NOMEM = -1, /*!< Not enough tokens were provided */ - JSMN_ERROR_LEN = -2, /*!< Input data too long */ - JSMN_ERROR_INVAL = -3, /*!< Invalid character inside JSON string */ - JSMN_ERROR_PART = - -4, /*!< The string is not a full JSON packet, more bytes expected */ - JSMN_ERROR_UNMATCHED_BRACKETS = - -5, /*!< The JSON string has unmatched brackets */ -} jsmnerr; - -/*! - * JSMN Boolean - */ -typedef enum jsmnbool { - JSMN_FALSE = 0, - JSMN_TRUE = 1, -} jsmnbool; - -/** - * JSON token description. - */ -typedef struct jsmntok { - jsmntype type; /*!< type (object, array, string etc.) */ - jsmnint start; /*!< start position in JSON data string */ - jsmnint end; /*!< end position in JSON data string */ - jsmnint size; /*!< number of children */ -#ifdef JSMN_PARENT_LINKS - jsmnint parent; /*!< parent id */ -#endif -#ifdef JSMN_NEXT_SIBLING - jsmnint next_sibling; /*!< next sibling id */ -#endif -} jsmntok; - -/** - * JSON parser - * - * Contains an array of token blocks available. Also stores - * the string being parsed now and current position in that string. - */ -typedef struct jsmnparser { - jsmnint pos; /*!< offset in the JSON string */ - jsmnint toknext; /*!< next token to allocate */ - /*!< when tokens == NULL, keeps track of container types to a depth of - * (sizeof(jsmnint) * 8) */ - jsmnint toksuper; /*!< superior token node, e.g. parent object or array */ - /*!< when tokens == NULL, toksuper represents container depth */ - jsmntype expected; /*!< Expected jsmn type(s) */ -} jsmnparser; - -/** - * @brief Create JSON parser over an array of tokens - * - * @param[out] parser jsmn parser - */ -JSMN_API -void jsmn_init(jsmnparser *parser); - -/** - * @brief Run JSON parser - * - * It parses a JSON data string into and array of tokens, each - * describing a single JSON object. - * - * @param[in,out] parser jsmn parser - * @param[in] js JSON data string - * @param[in] len JSON data string length - * @param[in,out] tokens pointer to memory allocated for tokens or NULL - * @param[in] num_tokens number of tokens allocated - * @return jsmnint number of tokens found or ERRNO - */ -JSMN_API -jsmnint jsmn_parse(jsmnparser *parser, const char *js, const size_t len, - jsmntok *tokens, const size_t num_tokens); - -#endif /* JSMN_H */ diff --git a/vendor/JSMN/include/jsmn_defines.h b/vendor/JSMN/include/jsmn_defines.h deleted file mode 100644 index c0122090..00000000 --- a/vendor/JSMN/include/jsmn_defines.h +++ /dev/null @@ -1,137 +0,0 @@ -#ifndef JSMN_DEFINES -#define JSMN_DEFINES - -/*! - * If nothing is defined, the default definitions are JSMN_PARENT_LINKS and * - * JSMN_NEXT_SIBLING with a jsmntok field size of 4 bytes (unsigned int). * - * This will parse one json object in a buffer at a time and return after a * - * successful json object parse. To check if there is more data in the * - * buffer that hasn't been parsed, run jsmn_eof. !*/ - -/*! @def JSMN_PARENT_LINKS - * @brief Adds a parent field to the token - * - * This decreases the initial time required to parse a json buffer and - * simplifies the post-processing of token array by adding a link to the id of - * a token's parent. - * This is enabled by default and highly recommended. - */ - -/*! @def JSMN_NEXT_SIBLING - * @brief Adds a next_sibling field to the token - * - * This simplifies the post-processing of token array by adding a link to the id - * of a token's next sibling. - * This is enabled by default and highly recommended. - */ - -/*! @def JSMN_UTF8 - * @brief Add UTF-8 functionality - * - * This allows for stricter parsing of json strings and also allows for the - * conversion of escaped characters (\uXXXX) to UTF-8 and back. - */ - -/*! @def JSMN_LOW_MEMORY - * @brief Enables defintions that reduce jsmn's memory footprint for small - * devices and doesn't enable definitions that increase it's footprint. - * - * This enables definitions that reduce jsmn's memory footprint at the cost of - * CPU usage. This is useful for small devices that don't parse json objects - * often and have restrictive memory requirements. - */ - -/*! @def JSMN_SHORT_TOKENS - * @brief Changes the tokens field size from a uint32_t to a uint16_t - * - * This reduces the jsmntok size by half by changing jsmntok field sizes - * from an unsigned int to an unsigned short. NOTE: This reduces the maximum - * possible json string length from 4,294,967,295 to 65,535 minus the size of - * jsmnerr. - */ - -/*! @def JSMN_PERMISSIVE - * @brief Enables all PERMISSIVE definitions - * - * Enables JSMN_PERMISSIVE_KEY, JSMN_PERMISSIVE_PRIMITIVE, and - * JSMN_MULTIPLE_JSON - */ - -/*! @def JSMN_PERMISSIVE_KEY - * @brief Allows PRIMITIVEs to be OBJECT KEYs - */ - -/*! @def JSMN_PERMISSIVE_PRIMITIVE - * @brief Allows PRIMITIVEs to be any contiguous value - * - * This allows PRIMIVITEs to be any contiguous value that does not contain a - * character that has a special meaning to json (`{}[]",:`). NOTE: There is no - * validation of JSMN_PRI_MINUS, JSNM_PRI_DECIMAL, or JSMN_PRI_EXPONENT; - * everything is the base type JSMN_PRIMITIVE. - */ - -/*! @def JSMN_MULTIPLE_JSON - * @brief Allows multiple json objects in a complete buffer - * - * This allows jsmn to parse multiple json objects in a single buffer. - * NOTE: If a single json object is malformed jsmn_parse will return with - * an error. - */ - -/*! @def JSMN_MULTIPLE_JSON_FAIL - * @brief Fails if there is more than one json object in a buffer. - */ - -#ifndef JSMN_API -# ifdef JSMN_STATIC -# define JSMN_API static -# else -# define JSMN_API extern -# endif -#endif - -#ifndef JSMN_LOW_MEMORY - -# ifndef JSMN_PARENT_LINKS -# define JSMN_PARENT_LINKS -# endif -# ifndef JSMN_NEXT_SIBLING -# define JSMN_NEXT_SIBLING -# endif - -#else - -# ifndef JSMN_SHORT_TOKENS -# define JSMN_SHORT_TOKENS -# endif - -#endif - -#ifdef JSMN_PERMISSIVE -# ifndef JSMN_PERMISSIVE_KEY -# define JSMN_PERMISSIVE_KEY -# endif -# ifndef JSMN_PERMISSIVE_PRIMITIVE -# define JSMN_PERMISSIVE_PRIMITIVE -# endif -# ifndef JSMN_MULTIPLE_JSON -# define JSMN_MULTIPLE_JSON -# endif -#endif - -#ifdef JSMN_MULTIPLE_JSON_FAIL -# undef JSMN_MULTIPLE_JSON -#endif - -#if (defined(__linux__) || defined(__APPLE__) || defined(ARDUINO)) -# define JSMN_EXPORT __attribute__((visibility("default"))) -# define JSMN_LOCAL __attribute__((visibility("hidden"))) -#elif (defined(_WIN32)) -# define JSMN_EXPORT __declspec(dllexport) -# define JSMN_LOCAL -#else -# define JSMN_EXPORT -# define JSMN_LOCAL -#endif - -#endif /* JSMN_DEFINES */ diff --git a/vendor/JSMN/jsmn.c b/vendor/JSMN/jsmn.c deleted file mode 100644 index 274063d4..00000000 --- a/vendor/JSMN/jsmn.c +++ /dev/null @@ -1,857 +0,0 @@ -/* - * MIT License - * - * Copyright (c) 2010 Serge Zaitsev - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "jsmn.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * Allocates a fresh unused token from the token pool. - */ -static jsmntok *jsmn_alloc_token(jsmnparser *parser, jsmntok *tokens, - const size_t num_tokens) { - if (parser->toknext >= num_tokens) { - return NULL; - } - - jsmntok *tok; - tok = &tokens[parser->toknext++]; - tok->start = tok->end = JSMN_NEG; - tok->size = 0; -#ifdef JSMN_PARENT_LINKS - tok->parent = JSMN_NEG; -#endif -#ifdef JSMN_NEXT_SIBLING - tok->next_sibling = JSMN_NEG; -#endif - return tok; -} - -/** - * Fills token type and boundaries. - */ -static void jsmn_fill_token(jsmntok *token, const jsmntype type, - const jsmnint start, const jsmnint end) { - token->type = type; - token->start = start; - token->end = end; - token->size = 0; -} - -#ifdef JSMN_NEXT_SIBLING -/** - * Set previous child's next_sibling to current token - */ -static void jsmn_next_sibling(jsmnparser *parser, jsmntok *tokens) { - jsmnint sibling; - - /* Start with parent's first child */ - if (parser->toksuper != JSMN_NEG) { - sibling = parser->toksuper + 1; - } else { - sibling = 0; - } - - /* If the first child is the current token */ - if (sibling == parser->toknext - 1) { - return; - } - - /* Loop until we find previous sibling */ - while (tokens[sibling].next_sibling != JSMN_NEG) { - sibling = tokens[sibling].next_sibling; - } - - /* Set previous sibling's next_sibling to current token */ - tokens[sibling].next_sibling = parser->toknext - 1; -} -#endif - -static jsmnbool is_whitespace(const char c) { - if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { - return JSMN_TRUE; - } - return JSMN_FALSE; -} - -static jsmnbool is_hexadecimal(const char c) { - if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || - (c >= 'a' && c <= 'f')) { - return JSMN_TRUE; - } - return JSMN_FALSE; -} - -/* TODO: Confusing function name */ -static jsmnbool is_character(const char c) { - if ((c >= 0x20 && c <= 0x21) || (c >= 0x23 && c <= 0x5B) || (c >= 0x5D)) { - return JSMN_TRUE; - } - return JSMN_FALSE; -} - -static jsmnbool is_special(const char c) { - if (c == '{' || c == '}' || c == '[' || c == ']' || c == '"' || c == ':' || - c == ',') { - return JSMN_TRUE; - } - return JSMN_FALSE; -} - -/** - * Fills next available token with JSON primitive. - */ -static jsmnint jsmn_parse_primitive(jsmnparser *parser, const char *js, - const size_t len, jsmntok *tokens, - const size_t num_tokens) { - /* If a PRIMITIVE wasn't expected */ - if (!(parser->expected & (JSMN_PRIMITIVE | JSMN_PRI_CONTINUE))) { - return JSMN_ERROR_INVAL; - } - - jsmnint pos; - jsmntype type; - jsmntype expected = JSMN_CLOSE; - - /** - * Find beginning of the primitive - * TODO: See if it is really necessary. - * Shouldn't parser stay at the last valid state in case of an error? - * In this case it should be right before primitive is parsed. - */ - if (!(parser->expected & JSMN_PRI_CONTINUE)) { - pos = parser->pos; - } else { - if (tokens != NULL) { - pos = tokens[parser->toknext - 1].start; - } else { - pos = parser->pos; - while (pos != JSMN_NEG && !is_whitespace(js[pos]) && - !is_special(js[pos]) && is_character(js[pos])) { - pos--; - } - pos++; - } - } - type = JSMN_PRIMITIVE; - -#ifndef JSMN_PERMISSIVE_PRIMITIVE - if (js[pos] == 't' || js[pos] == 'f' || js[pos] == 'n') { - char *literal = NULL; - jsmnint size = 0; - if (js[pos] == 't') { - literal = "true"; - size = 4; - } else if (js[pos] == 'f') { - literal = "false"; - size = 5; - } else if (js[pos] == 'n') { - literal = "null"; - size = 4; - } - jsmnint i; - for (i = 1, pos++; i < size; i++, pos++) { - if (pos == len || js[pos] == '\0') { - return JSMN_ERROR_PART; - } else if (js[pos] != literal[i]) { - return JSMN_ERROR_INVAL; - } - } - type |= JSMN_PRI_LITERAL; - if (pos == len || js[pos] == '\0') { - goto found; - } - } else { - expected = JSMN_PRI_MINUS | JSMN_PRI_INTEGER; - for (; pos < len && js[pos] != '\0'; pos++) { - switch (js[pos]) { - case '0': - if (!(expected & JSMN_PRI_INTEGER)) { - return JSMN_ERROR_INVAL; - } - if (type & JSMN_PRI_EXPONENT) { - expected = JSMN_PRI_INTEGER | JSMN_CLOSE; - } else if (type & JSMN_PRI_DECIMAL) { - expected = JSMN_PRI_INTEGER | JSMN_PRI_EXPONENT | JSMN_CLOSE; - } else if (parser->pos == pos || - (parser->pos + 1 == pos && (type & JSMN_PRI_MINUS))) { - expected = JSMN_PRI_DECIMAL | JSMN_PRI_EXPONENT | JSMN_CLOSE; - } else { - expected = JSMN_PRI_INTEGER | JSMN_PRI_DECIMAL | JSMN_PRI_EXPONENT | - JSMN_CLOSE; - } - break; - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - if (!(expected & JSMN_PRI_INTEGER)) { - return JSMN_ERROR_INVAL; - } - if (type & JSMN_PRI_EXPONENT) { - expected = JSMN_PRI_INTEGER | JSMN_CLOSE; - } else if (type & JSMN_PRI_DECIMAL) { - expected = JSMN_PRI_INTEGER | JSMN_PRI_EXPONENT | JSMN_CLOSE; - } else { - expected = JSMN_PRI_INTEGER | JSMN_PRI_DECIMAL | JSMN_PRI_EXPONENT | - JSMN_CLOSE; - } - break; - case '-': - if (!(expected & JSMN_PRI_MINUS)) { - return JSMN_ERROR_INVAL; - } - expected = JSMN_PRI_INTEGER; - if (parser->pos == pos) { - type |= JSMN_PRI_MINUS; - } - break; - case '+': - if (!(expected & JSMN_PRI_SIGN)) { - return JSMN_ERROR_INVAL; - } - expected = JSMN_PRI_INTEGER; - break; - case '.': - if (!(expected & JSMN_PRI_DECIMAL)) { - return JSMN_ERROR_INVAL; - } - type |= JSMN_PRI_DECIMAL; - expected = JSMN_PRI_INTEGER; - break; - case 'e': - case 'E': - if (!(expected & JSMN_PRI_EXPONENT)) { - return JSMN_ERROR_INVAL; - } - type |= JSMN_PRI_EXPONENT; - expected = JSMN_PRI_SIGN | JSMN_PRI_INTEGER; - break; - default: - if (!(expected & JSMN_CLOSE)) { - return JSMN_ERROR_INVAL; - } - goto check_primitive_border; - } - } - if (!(expected & JSMN_CLOSE)) { - return JSMN_ERROR_INVAL; - } else { - goto found; - } - } -check_primitive_border: - switch (js[pos]) { - case ' ': - case '\t': - case '\n': - case '\r': - case ',': - case '}': - case ']': - goto found; - case '"': - case ':': - case '{': - case '[': - return JSMN_ERROR_INVAL; - case '\0': - goto found; - default: - return JSMN_ERROR_INVAL; - } -#else - for (; pos < len && js[pos] != '\0'; pos++) { - switch (js[pos]) { - case ' ': - case '\t': - case '\n': - case '\r': - case ',': - case '}': - case ']': - case ':': - - case '{': - case '[': - case '"': - goto found; - default: /* to quiet a warning from gcc */ - break; - } - if (!is_character(js[pos])) { - return JSMN_ERROR_INVAL; - } - } -#endif - -found: - expected = parser->expected; - if (parser->toksuper != JSMN_NEG) { - /* OBJECT KEY, strict query */ - if ((parser->expected & (JSMN_KEY | JSMN_INSD_OBJ)) == - (JSMN_KEY | JSMN_INSD_OBJ)) { - parser->expected = JSMN_AFTR_OBJ_KEY; - type |= JSMN_KEY | JSMN_INSD_OBJ; - /* OBJECT VALUE, VALUE is implicit */ - } else if (parser->expected & JSMN_INSD_OBJ) { - parser->expected = JSMN_AFTR_OBJ_VAL; - type |= JSMN_VALUE | JSMN_INSD_OBJ; -#ifdef JSMN_PERMISSIVE - /* OBJECT VALUE at the ROOT level */ - } else if (parser->toksuper == JSMN_NEG) { - parser->expected = JSMN_ROOT_AFTR_O; - type |= JSMN_VALUE; -#endif - /* ARRAY VALUE, VALUE is implicit */ - } else { - parser->expected = JSMN_AFTR_ARR_VAL; - type |= JSMN_VALUE; - } - } else { - parser->expected = JSMN_ROOT; - type |= JSMN_VALUE; - } - if (pos == len || js[pos] == '\0') { - parser->expected |= JSMN_PRI_CONTINUE; - } - - if (tokens == NULL) { - parser->pos = pos - 1; - return JSMN_SUCCESS; - } - - jsmntok *token; - if (!(expected & JSMN_PRI_CONTINUE)) { - token = jsmn_alloc_token(parser, tokens, num_tokens); - if (token == NULL) { - parser->expected = expected; - return JSMN_ERROR_NOMEM; - } - jsmn_fill_token(token, type, parser->pos, pos); - } else { - token = &tokens[parser->toknext - 1]; - jsmn_fill_token(token, type, token->start, pos); - } - parser->pos = pos; -#ifdef JSMN_PARENT_LINKS - token->parent = parser->toksuper; -#endif -#ifdef JSMN_NEXT_SIBLING - jsmn_next_sibling(parser, tokens); -#endif - - if (parser->toksuper != JSMN_NEG) { - if (!(expected & JSMN_PRI_CONTINUE)) { - tokens[parser->toksuper].size++; - } - - if (!(tokens[parser->toksuper].type & JSMN_CONTAINER)) { -#ifdef JSMN_PARENT_LINKS - parser->toksuper = tokens[parser->toksuper].parent; -#else - jsmnint i; - for (i = parser->toksuper; i != JSMN_NEG; i--) { - if (tokens[i].type & JSMN_CONTAINER && tokens[i].end == JSMN_NEG) { - parser->toksuper = i; - break; - } - } -#ifdef JSMN_PERMISSIVE - if (i == JSMN_NEG) { - parser->toksuper = i; - } -#endif -#endif - } - } - parser->pos--; - - return JSMN_SUCCESS; -} - -/** - * Fills next token with JSON string. - */ -static jsmnint jsmn_parse_string(jsmnparser *parser, const char *js, - const size_t len, jsmntok *tokens, - const size_t num_tokens) { - /* If a STRING wasn't expected */ - if (!(parser->expected & JSMN_STRING)) { - return JSMN_ERROR_INVAL; - } - - if (len >= JSMN_NEG) { - return JSMN_ERROR_LEN; - } - - jsmnint pos; - pos = parser->pos; - - /* Skip starting quote */ - pos++; - - char c; - for (; pos < len && js[pos] != '\0'; pos++) { - c = js[pos]; - - /* Quote: end of string */ - if (c == '\"') { - jsmntype expected = parser->expected; - jsmntype type; - if (parser->toksuper != JSMN_NEG) { - /* OBJECT KEY, strict query */ - if ((parser->expected & (JSMN_INSD_OBJ | JSMN_KEY)) == - (JSMN_INSD_OBJ | JSMN_KEY)) { - parser->expected = JSMN_AFTR_OBJ_KEY; - type = JSMN_STRING | JSMN_KEY | JSMN_INSD_OBJ; - /* OBJECT VALUE, VALUE is implicit */ - } else if (parser->expected & JSMN_INSD_OBJ) { - parser->expected = JSMN_AFTR_OBJ_VAL; - type = JSMN_STRING | JSMN_VALUE | JSMN_INSD_OBJ; -#ifdef JSMN_PERMISSIVE - /* OBJECT VALUE at the ROOT level */ - } else if (parser->toksuper == JSMN_NEG) { - parser->expected = JSMN_ROOT_AFTR_O; - type = JSMN_STRING | JSMN_VALUE; -#endif - /* ARRAY VALUE, VALUE is implicit */ - } else { - parser->expected = JSMN_AFTR_ARR_VAL; - type = JSMN_STRING | JSMN_VALUE; - } - } else { - parser->expected = JSMN_ROOT; - type = JSMN_STRING | JSMN_VALUE; - } - - if (tokens == NULL) { - parser->pos = pos; - return JSMN_SUCCESS; - } - - jsmntok *token; - token = jsmn_alloc_token(parser, tokens, num_tokens); - if (token == NULL) { - parser->expected = expected; - return JSMN_ERROR_NOMEM; - } - jsmn_fill_token(token, type, parser->pos + 1, pos); - parser->pos = pos; -#ifdef JSMN_PARENT_LINKS - token->parent = parser->toksuper; -#endif -#ifdef JSMN_NEXT_SIBLING - jsmn_next_sibling(parser, tokens); -#endif - - if (parser->toksuper != JSMN_NEG) { - tokens[parser->toksuper].size++; - - if (!(tokens[parser->toksuper].type & JSMN_CONTAINER)) { -#ifdef JSMN_PARENT_LINKS - parser->toksuper = tokens[parser->toksuper].parent; -#else - jsmnint i; - for (i = parser->toksuper; i != JSMN_NEG; i--) { - if (tokens[i].type & JSMN_CONTAINER && tokens[i].end == JSMN_NEG) { - parser->toksuper = i; - break; - } - } -#ifdef JSMN_PERMISSIVE - if (i == JSMN_NEG) { - parser->toksuper = i; - } -#endif -#endif - } - } - - return JSMN_SUCCESS; - } - - /* Backslash: Quoted symbol expected */ - if (c == '\\' && pos + 1 < len) { - pos++; - switch (js[pos]) { - /* Allowed escaped symbols */ - case '\"': - case '\\': - case '/': - case 'b': - case 'f': - case 'n': - case 'r': - case 't': - break; - /* Allows escaped symbol \uXXXX */ - case 'u': - pos++; - jsmnint i; - for (i = pos + 4; pos < i; pos++) { - if (pos == len || js[pos] == '\0') { - return JSMN_ERROR_PART; - } - /* If it isn't a hex character we have an error */ - if (!is_hexadecimal(js[pos])) { - return JSMN_ERROR_INVAL; - } - } - pos--; - break; - /* Unexpected symbol */ - default: - return JSMN_ERROR_INVAL; - } - } - - /* form feed, new line, carraige return, tab, and vertical tab not allowed - */ - else if (c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v') { - return JSMN_ERROR_INVAL; - } - } - return JSMN_ERROR_PART; -} - -static jsmnint jsmn_parse_container_open(jsmnparser *parser, const char c, - jsmntok *tokens, - const size_t num_tokens) { - /* If an OBJECT or ARRAY wasn't expected */ - if (!(parser->expected & JSMN_CONTAINER)) { - return JSMN_ERROR_INVAL; - } - - jsmntype type; - if (c == '{') { - parser->expected = JSMN_OPEN_OBJECT; - type = JSMN_OBJECT | JSMN_VALUE; - } else { - parser->expected = JSMN_OPEN_ARRAY; - type = JSMN_ARRAY | JSMN_VALUE; - } - - if (tokens == NULL) { - parser->toksuper++; - if (parser->toksuper < (sizeof(jsmnint) * 8) && - parser->expected & JSMN_INSD_OBJ) { - parser->toknext |= (1 << parser->toksuper); - } - return JSMN_SUCCESS; - } - - if (parser->toksuper != JSMN_NEG && - tokens[parser->toksuper].type & JSMN_INSD_OBJ) { - type |= JSMN_INSD_OBJ; - } - - jsmntok *token; - token = jsmn_alloc_token(parser, tokens, num_tokens); - if (token == NULL) { - return JSMN_ERROR_NOMEM; - } - jsmn_fill_token(token, type, parser->pos, JSMN_NEG); -#ifdef JSMN_PARENT_LINKS - token->parent = parser->toksuper; -#endif -#ifdef JSMN_NEXT_SIBLING - jsmn_next_sibling(parser, tokens); -#endif - - if (parser->toksuper != JSMN_NEG) { - tokens[parser->toksuper].size++; - } - parser->toksuper = parser->toknext - 1; - - return JSMN_SUCCESS; -} - -static jsmnint jsmn_parse_container_close(jsmnparser *parser, const char c, - jsmntok *tokens) { - /* If an OBJECT or ARRAY CLOSE wasn't expected */ - if (!(parser->expected & JSMN_CLOSE)) { - return JSMN_ERROR_INVAL; - } - - if (tokens == NULL) { - if (parser->toksuper < (sizeof(jsmnint) * 8)) { - jsmntype type; - type = (c == '}' ? JSMN_OBJECT : JSMN_ARRAY); - if ((((parser->toknext & (1 << parser->toksuper)) == 1) && - !(type & JSMN_OBJECT)) || - (((parser->toknext & (1 << parser->toksuper)) == 0) && - !(type & JSMN_ARRAY))) { - return JSMN_ERROR_UNMATCHED_BRACKETS; - } - parser->toknext &= ~(1 << parser->toksuper); - } - parser->toksuper--; - } else { - jsmntype type; - jsmntok *token; - - type = (c == '}' ? JSMN_OBJECT : JSMN_ARRAY); -#ifdef JSMN_PERMISSIVE - if (parser->toksuper == JSMN_NEG) { - return JSMN_ERROR_UNMATCHED_BRACKETS; - } -#endif - token = &tokens[parser->toksuper]; - if (!(token->type & type) || token->end != JSMN_NEG) { - return JSMN_ERROR_UNMATCHED_BRACKETS; - } - token->end = parser->pos + 1; -#ifdef JSMN_PARENT_LINKS - if (token->type & JSMN_INSD_OBJ) { - if (tokens[token->parent].type & JSMN_CONTAINER) { - parser->toksuper = token->parent; - } else { - parser->toksuper = tokens[token->parent].parent; - } - } else { - parser->toksuper = token->parent; - } -#else - jsmnint i; - for (i = parser->toksuper - 1; i != JSMN_NEG; i--) { - if (tokens[i].type & JSMN_CONTAINER && tokens[i].end == JSMN_NEG) { - parser->toksuper = i; - break; - } - } - if (i == JSMN_NEG) { - parser->toksuper = i; - } -#endif - } - - if (parser->toksuper != JSMN_NEG) { - parser->expected = JSMN_AFTR_CLOSE; - } else { - parser->expected = JSMN_ROOT; - } - - return JSMN_SUCCESS; -} - -static jsmnint jsmn_parse_colon(jsmnparser *parser, jsmntok *tokens) { - /* If a COLON wasn't expected; strict check because it is a complex enum */ - if (!((parser->expected & JSMN_COLON) == JSMN_COLON)) { - return JSMN_ERROR_INVAL; - } - - if (parser->toksuper != JSMN_NEG) { - parser->expected = JSMN_AFTR_COLON; -#ifdef JSMN_PERMISSIVE - } else { - parser->expected = JSMN_AFTR_COLON_R; -#endif - } - - if (tokens == NULL) { - return JSMN_SUCCESS; - } - -#ifdef JSMN_PERMISSIVE - tokens[parser->toknext - 1].type &= ~JSMN_VALUE; - tokens[parser->toknext - 1].type |= JSMN_KEY; -#endif - - parser->toksuper = parser->toknext - 1; - - return JSMN_SUCCESS; -} - -static jsmnint jsmn_parse_comma(jsmnparser *parser, jsmntok *tokens) { - /* If a COMMA wasn't expected; strict check because it is a complex enum */ - if (!((parser->expected & JSMN_COMMA) == JSMN_COMMA)) { - return JSMN_ERROR_INVAL; - } - - jsmntype type = JSMN_UNDEFINED; - if (tokens == NULL) { - if (parser->toksuper < (sizeof(jsmnint) * 8) && - parser->toknext & (1 << parser->toksuper)) { - type = JSMN_INSD_OBJ; - } - } else { - if (parser->toksuper != JSMN_NEG) { - type = tokens[parser->toksuper].type; - } - } - - if (parser->toksuper != JSMN_NEG) { - if (type & (JSMN_OBJECT | JSMN_INSD_OBJ)) { - parser->expected = JSMN_AFTR_COMMA_O; - } else { - parser->expected = JSMN_AFTR_COMMA_A; - } -#ifdef JSMN_PERMISSIVE - } else { - parser->expected = JSMN_AFTR_COMMA_R; -#endif - } - - if (tokens == NULL) { - return JSMN_SUCCESS; - } - -#ifdef JSMN_PERMISSIVE - tokens[parser->toknext - 1].type |= JSMN_VALUE; -#endif - - return JSMN_SUCCESS; -} - -/** - * Parse JSON string and fill tokens. - */ -JSMN_API -jsmnint jsmn_parse(jsmnparser *parser, const char *js, const size_t len, - jsmntok *tokens, const size_t num_tokens) { - jsmnint r; - jsmnint count = parser->toknext; - - char c; - for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { -#ifndef JSMN_MULTIPLE_JSON_FAIL - if (parser->expected == JSMN_UNDEFINED) { - break; - } -#endif - c = js[parser->pos]; - switch (c) { - case '{': - case '[': - r = jsmn_parse_container_open(parser, c, tokens, num_tokens); - if (r != JSMN_SUCCESS) { - return r; - } - count++; - break; - case '}': - case ']': - r = jsmn_parse_container_close(parser, c, tokens); - if (r != JSMN_SUCCESS) { - return r; - } - break; - case '\"': - r = jsmn_parse_string(parser, js, len, tokens, num_tokens); - if (r != JSMN_SUCCESS) { - return r; - } - count++; - break; - case ':': - r = jsmn_parse_colon(parser, tokens); - if (r != JSMN_SUCCESS) { - return r; - } - break; - case ',': - r = jsmn_parse_comma(parser, tokens); - if (r != JSMN_SUCCESS) { - return r; - } - break; - /* Valid whitespace */ - case ' ': - case '\t': - case '\n': - case '\r': - break; -#ifndef JSMN_PERMISSIVE_PRIMITIVE - /* rfc8259: PRIMITIVEs are numbers and booleans */ - case '-': - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - case 't': - case 'f': - case 'n': -#else - /* In permissive mode every unquoted value is a PRIMITIVE */ - default: -#endif - r = jsmn_parse_primitive(parser, js, len, tokens, num_tokens); - if (r != JSMN_SUCCESS) { - return r; - } - count++; - break; - -#ifndef JSMN_PERMISSIVE - /* Unexpected char */ - default: - return JSMN_ERROR_INVAL; -#endif - } - } - - if (parser->toksuper != JSMN_NEG) { - return JSMN_ERROR_PART; - } - - if (count == 0) { - return JSMN_ERROR_INVAL; - } - - while (parser->pos < len && is_whitespace(js[parser->pos])) { - parser->pos++; - } - - return count; -} - -/** - * Creates a new parser based over a given buffer with an array of tokens - * available. - */ -JSMN_API -void jsmn_init(jsmnparser *parser) { - parser->pos = 0; - parser->toknext = 0; - parser->toksuper = JSMN_NEG; - parser->expected = JSMN_ROOT_INIT; -} - -#ifdef __cplusplus -} -#endif diff --git a/vendor/SAJSON/CMakeLists.txt b/vendor/SAJSON/CMakeLists.txt new file mode 100644 index 00000000..0be735e4 --- /dev/null +++ b/vendor/SAJSON/CMakeLists.txt @@ -0,0 +1,5 @@ +# Create the SAJSON library +add_library(SAJSON STATIC include/sajson.h sajson.cpp) +# Library includes +target_include_directories(SAJSON PRIVATE ${CMAKE_CURRENT_LIST_DIR}) +target_include_directories(SAJSON PUBLIC ${CMAKE_CURRENT_LIST_DIR}/include) diff --git a/vendor/SAJSON/include/sajson.h b/vendor/SAJSON/include/sajson.h new file mode 100644 index 00000000..d9b07f82 --- /dev/null +++ b/vendor/SAJSON/include/sajson.h @@ -0,0 +1,2600 @@ +/* + * Copyright (c) 2012-2017 Chad Austin + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef SAJSON_NO_STD_STRING +#include // for convenient access to error messages and string values. +#endif + +#if defined(__GNUC__) || defined(__clang__) +#define SAJSON_LIKELY(x) __builtin_expect(!!(x), 1) +#define SAJSON_UNLIKELY(x) __builtin_expect(!!(x), 0) +#define SAJSON_ALWAYS_INLINE __attribute__((always_inline)) +#define SAJSON_UNREACHABLE() __builtin_unreachable() +#define SAJSON_snprintf snprintf +#elif defined(_MSC_VER) +#define SAJSON_LIKELY(x) x +#define SAJSON_UNLIKELY(x) x +#define SAJSON_ALWAYS_INLINE __forceinline +#define SAJSON_UNREACHABLE() __assume(0) +#if (_MSC_VER <= 1800) +#define SAJSON_snprintf _snprintf +#else +#define SAJSON_snprintf snprintf +#endif +#else +#define SAJSON_LIKELY(x) x +#define SAJSON_UNLIKELY(x) x +#define SAJSON_ALWAYS_INLINE inline +#define SAJSON_UNREACHABLE() assert(!"unreachable") +#define SAJSON_snprintf snprintf +#endif + +/** + * sajson Public API + */ +namespace sajson { + +/** + * Indicates a JSON value's type. + * + * In early versions of sajson, this was the tag value directly from the parsed + * AST storage, but, to preserve API compabitility, it is now synthesized. + */ +enum type : uint8_t { + TYPE_INTEGER, + TYPE_DOUBLE, + TYPE_NULL, + TYPE_FALSE, + TYPE_TRUE, + TYPE_STRING, + TYPE_ARRAY, + TYPE_OBJECT, +}; + +namespace internal { + +/** + * get_value_of_key for objects is O(lg N), but most objects have + * small, bounded key sets, and the sort adds parsing overhead when a + * linear scan would be fast anyway and the code consuming objects may + * never lookup values by name! Therefore, only binary search for + * large numbers of keys. + */ +constexpr inline bool should_binary_search(size_t length) { +#ifdef SAJSON_UNSORTED_OBJECT_KEYS + return false; +#else + return length > 100; +#endif +} + +/** + * The low bits of every AST word indicate the value's type. This representation + * is internal and subject to change. + */ +enum class tag : uint8_t { + integer, + double_, + null, + false_, + true_, + string, + array, + object, +}; + +static const size_t TAG_BITS = 3; +static const size_t TAG_MASK = (1 << TAG_BITS) - 1; +static const size_t VALUE_MASK = ~size_t{} >> TAG_BITS; + +static const size_t ROOT_MARKER = VALUE_MASK; + +constexpr inline tag get_element_tag(size_t s) { + return static_cast(s & TAG_MASK); +} + +constexpr inline size_t get_element_value(size_t s) { return s >> TAG_BITS; } + +constexpr inline size_t make_element(tag t, size_t value) { + // assert((value & ~VALUE_MASK) == 0); + // value &= VALUE_MASK; + return static_cast(t) | (value << TAG_BITS); +} + +// This template utilizes the One Definition Rule to create global arrays in a +// header. This trick courtesy of Rich Geldreich's Purple JSON parser. +template +struct globals_struct { +// clang-format off + + // bit 0 (1) - set if: plain ASCII string character + // bit 1 (2) - set if: whitespace + // bit 4 (0x10) - set if: 0-9 e E . + constexpr static const uint8_t parse_flags[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 2, 0, 0, // 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1 + 3, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0x11,1, // 2 + 0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11, 0x11,0x11,1, 1, 1, 1, 1, 1, // 3 + 1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 5 + 1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 + + // 128-255 + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0 + }; + +// clang-format on +}; +typedef globals_struct<> globals; + +constexpr inline bool is_plain_string_character(char c) { + // return c >= 0x20 && c <= 0x7f && c != 0x22 && c != 0x5c; + return (globals::parse_flags[static_cast(c)] & 1) != 0; +} + +constexpr inline bool is_whitespace(char c) { + // return c == '\r' || c == '\n' || c == '\t' || c == ' '; + return (globals::parse_flags[static_cast(c)] & 2) != 0; +} + +class allocated_buffer { +public: + allocated_buffer() + : memory(0) {} + + explicit allocated_buffer(size_t length) { + // throws std::bad_alloc upon allocation failure + void* buffer = operator new(sizeof(size_t) + length); + memory = static_cast(buffer); + memory->refcount = 1; + } + + allocated_buffer(const allocated_buffer& that) + : memory(that.memory) { + incref(); + } + + allocated_buffer(allocated_buffer&& that) + : memory(that.memory) { + that.memory = 0; + } + + ~allocated_buffer() { decref(); } + + allocated_buffer& operator=(const allocated_buffer& that) { + if (this != &that) { + decref(); + memory = that.memory; + incref(); + } + return *this; + } + + allocated_buffer& operator=(allocated_buffer&& that) { + if (this != &that) { + decref(); + memory = that.memory; + that.memory = 0; + } + return *this; + } + + char* get_data() const { return memory ? memory->data : 0; } + +private: + void incref() const { + if (memory) { + ++(memory->refcount); + } + } + + void decref() const { + if (memory && --(memory->refcount) == 0) { + operator delete(memory); + } + } + + struct layout { + size_t refcount; + char data[]; + }; + + layout* memory; +}; +} // namespace internal + +/// A simple type encoding a pointer to some memory and a length (in bytes). +/// Does not maintain any memory. +class string { +public: + string(const char* text_, size_t length) + : text(text_) + , _length(length) {} + + const char* data() const { return text; } + + size_t length() const { return _length; } + +#ifndef SAJSON_NO_STD_STRING + std::string as_string() const { return std::string(text, text + _length); } +#endif + +private: + const char* const text; + const size_t _length; + + string(); /*=delete*/ +}; + +/// A convenient way to parse JSON from a string literal. The string ends +/// at its first NUL character. +class literal : public string { +public: + template + explicit literal(const char (&text_)[sz]) + : string(text_, sz - 1) { + static_assert(sz > 0, "!"); + } +}; + +/// A pointer to a mutable buffer, its size in bytes, and strong ownership of +/// any copied memory. +class mutable_string_view { +public: + /// Creates an empty, zero-sized view. + mutable_string_view() + : length_(0) + , data(0) + , buffer() {} + + /// Given a length in bytes and a pointer, constructs a view + /// that does not allocate a copy of the data or maintain its life. + /// The given pointer must stay valid for the duration of the parse and the + /// resulting \ref document's life. + mutable_string_view(size_t length, char* data_) + : length_(length) + , data(data_) + , buffer() {} + + /// Allocates a copy of the given \ref literal string and exposes a + /// mutable view into it. Throws std::bad_alloc if allocation fails. + mutable_string_view(const literal& s) + : length_(s.length()) + , buffer(length_) { + data = buffer.get_data(); + memcpy(data, s.data(), length_); + } + + /// Allocates a copy of the given \ref string and exposes a mutable view + /// into it. Throws std::bad_alloc if allocation fails. + mutable_string_view(const string& s) + : length_(s.length()) + , buffer(length_) { + data = buffer.get_data(); + memcpy(data, s.data(), length_); + } + + /// Copies a mutable_string_view. If any backing memory has been + /// allocated, its refcount is incremented - both views can safely + /// use the memory. + mutable_string_view(const mutable_string_view& that) + : length_(that.length_) + , data(that.data) + , buffer(that.buffer) {} + + /// Move constructor - neuters the old mutable_string_view. + mutable_string_view(mutable_string_view&& that) + : length_(that.length_) + , data(that.data) + , buffer(std::move(that.buffer)) { + that.length_ = 0; + that.data = 0; + } + + mutable_string_view& operator=(mutable_string_view&& that) { + if (this != &that) { + length_ = that.length_; + data = that.data; + buffer = std::move(that.buffer); + that.length_ = 0; + that.data = 0; + } + return *this; + } + + mutable_string_view& operator=(const mutable_string_view& that) { + if (this != &that) { + length_ = that.length_; + data = that.data; + buffer = that.buffer; + } + return *this; + } + + size_t length() const { return length_; } + + char* get_data() const { return data; } + +private: + size_t length_; + char* data; + internal::allocated_buffer buffer; // may not be allocated +}; + +namespace internal { +struct object_key_record { + size_t key_start; + size_t key_end; + size_t value; + + bool match(const char* object_data, const string& str) const { + size_t length = key_end - key_start; + return length == str.length() + && 0 == memcmp(str.data(), object_data + key_start, length); + } +}; + +struct object_key_comparator { + object_key_comparator(const char* object_data) + : data(object_data) {} + + bool operator()(const object_key_record& lhs, const string& rhs) const { + const size_t lhs_length = lhs.key_end - lhs.key_start; + const size_t rhs_length = rhs.length(); + if (lhs_length < rhs_length) { + return true; + } else if (lhs_length > rhs_length) { + return false; + } + return memcmp(data + lhs.key_start, rhs.data(), lhs_length) < 0; + } + + bool operator()(const string& lhs, const object_key_record& rhs) const { + return !(*this)(rhs, lhs); + } + + bool + operator()(const object_key_record& lhs, const object_key_record& rhs) { + const size_t lhs_length = lhs.key_end - lhs.key_start; + const size_t rhs_length = rhs.key_end - rhs.key_start; + if (lhs_length < rhs_length) { + return true; + } else if (lhs_length > rhs_length) { + return false; + } + return memcmp(data + lhs.key_start, data + rhs.key_start, lhs_length) + < 0; + } + + const char* data; +}; +} // namespace internal + +namespace integer_storage { +enum { word_length = 1 }; + +inline int load(const size_t* location) { + int value; + memcpy(&value, location, sizeof(value)); + return value; +} + +inline void store(size_t* location, int value) { + // NOTE: Most modern compilers optimize away this constant-size + // memcpy into a single instruction. If any don't, and treat + // punning through a union as legal, they can be special-cased. + static_assert( + sizeof(value) <= sizeof(*location), + "size_t must not be smaller than int"); + memcpy(location, &value, sizeof(value)); +} +} // namespace integer_storage + +namespace double_storage { +enum { word_length = sizeof(double) / sizeof(size_t) }; + +inline double load(const size_t* location) { + double value; + memcpy(&value, location, sizeof(double)); + return value; +} + +inline void store(size_t* location, double value) { + // NOTE: Most modern compilers optimize away this constant-size + // memcpy into a single instruction. If any don't, and treat + // punning through a union as legal, they can be special-cased. + memcpy(location, &value, sizeof(double)); +} +} // namespace double_storage + +/// Represents a JSON value. First, call get_type() to check its type, +/// which determines which methods are available. +/// +/// Note that \ref value does not maintain any backing memory, only the +/// corresponding \ref document does. It is illegal to access a \ref value +/// after its \ref document has been destroyed. +class value { +public: + value() + : value_tag{ tag::null } + , payload{ nullptr } + , text{ nullptr } {} + + /// Returns the JSON value's \ref type. + type get_type() const { + // As of 2020, current versions of MSVC generate a jump table for this + // conversion. If it matters, a more clever mapping with knowledge of + // the specific values is possible. gcc and clang generate good code -- + // at worst a table lookup. + switch (value_tag) { + case tag::integer: + return TYPE_INTEGER; + case tag::double_: + return TYPE_DOUBLE; + case tag::null: + return TYPE_NULL; + case tag::false_: + return TYPE_FALSE; + case tag::true_: + return TYPE_TRUE; + case tag::string: + return TYPE_STRING; + case tag::array: + return TYPE_ARRAY; + case tag::object: + return TYPE_OBJECT; + } + SAJSON_UNREACHABLE(); + } + + bool is_boolean() const { + return value_tag == tag::false_ || value_tag == tag::true_; + } + + bool get_boolean_value() const { + switch (value_tag) { + case tag::true_: + return true; + case tag::false_: + return false; + default: + assert(false); + return false; + } + } + + /// Returns the length of the object or array. + /// Only legal if get_type() is TYPE_ARRAY or TYPE_OBJECT. + size_t get_length() const { + assert_tag_2(tag::array, tag::object); + return payload[0]; + } + + /// Returns the nth element of an array. Calling with an out-of-bound + /// index is undefined behavior. + /// Only legal if get_type() is TYPE_ARRAY. + value get_array_element(size_t index) const { + using namespace internal; + assert_tag(tag::array); + size_t element = payload[1 + index]; + return value( + get_element_tag(element), + payload + get_element_value(element), + text); + } + + /// Returns the nth key of an object. Calling with an out-of-bound + /// index is undefined behavior. + /// Only legal if get_type() is TYPE_OBJECT. + string get_object_key(size_t index) const { + assert_tag(tag::object); + const size_t* s = payload + 1 + index * 3; + return string(text + s[0], s[1] - s[0]); + } + + /// Returns the nth value of an object. Calling with an out-of-bound + /// index is undefined behavior. Only legal if get_type() is TYPE_OBJECT. + value get_object_value(size_t index) const { + using namespace internal; + assert_tag(tag::object); + size_t element = payload[3 + index * 3]; + return value( + get_element_tag(element), + payload + get_element_value(element), + text); + } + + /// Given a string key, returns the value with that key or a null value + /// if the key is not found. Running time is O(lg N). + /// Only legal if get_type() is TYPE_OBJECT. + value get_value_of_key(const string& key) const { + assert_tag(tag::object); + size_t i = find_object_key(key); + if (i < get_length()) { + return get_object_value(i); + } else { + return value(tag::null, 0, 0); + } + } + + /// Given a string key, returns the index of the associated value if + /// one exists. Returns get_length() if there is no such key. + /// Note: sajson sorts object keys, so the running time is O(lg N). + /// Only legal if get_type() is TYPE_OBJECT + size_t find_object_key(const string& key) const { + using namespace internal; + assert_tag(tag::object); + size_t length = get_length(); + const object_key_record* start + = reinterpret_cast(payload + 1); + const object_key_record* end = start + length; + if (SAJSON_UNLIKELY(should_binary_search(length))) { + const object_key_record* i = std::lower_bound( + start, end, key, object_key_comparator(text)); + if (i != end && i->match(text, key)) { + return i - start; + } + } else { + for (size_t i = 0; i < length; ++i) { + if (start[i].match(text, key)) { + return i; + } + } + } + return length; + } + + /// If a numeric value was parsed as a 32-bit integer, returns it. + /// Only legal if get_type() is TYPE_INTEGER. + int get_integer_value() const { + assert_tag(tag::integer); + return integer_storage::load(payload); + } + + /// If a numeric value was parsed as a double, returns it. + /// Only legal if get_type() is TYPE_DOUBLE. + double get_double_value() const { + assert_tag(tag::double_); + return double_storage::load(payload); + } + + /// Returns a numeric value as a double-precision float. + /// Only legal if get_type() is TYPE_INTEGER or TYPE_DOUBLE. + double get_number_value() const { + assert_tag_2(tag::integer, tag::double_); + if (value_tag == tag::integer) { + return get_integer_value(); + } else { + return get_double_value(); + } + } + + /// Returns true and writes to the output argument if the numeric value + /// fits in a 53-bit integer. This is useful for timestamps and other + /// situations where integral values with greater than 32-bit precision + /// are used, as 64-bit values are not understood by all JSON + /// implementations or languages. + /// Returns false if the value is not an integer or not in range. + /// Only legal if get_type() is TYPE_INTEGER or TYPE_DOUBLE. + bool get_int53_value(int64_t* out) const { + // Make sure the output variable is always defined to avoid any + // possible situation like + // https://gist.github.com/chadaustin/2c249cb850619ddec05b23ca42cf7a18 + *out = 0; + + assert_tag_2(tag::integer, tag::double_); + switch (value_tag) { + case tag::integer: + *out = get_integer_value(); + return true; + case tag::double_: { + double v = get_double_value(); + if (v < -(1LL << 53) || v > (1LL << 53)) { + return false; + } + int64_t as_int = static_cast(v); + if (as_int != v) { + return false; + } + *out = as_int; + return true; + } + default: + return false; + } + } + + /// Returns the length of the string. + /// Only legal if get_type() is TYPE_STRING. + size_t get_string_length() const { + assert_tag(tag::string); + return payload[1] - payload[0]; + } + + /// Returns a pointer to the beginning of a string value's data. + /// WARNING: Calling this function and using the return value as a + /// C-style string (that is, without also using get_string_length()) + /// will cause the string to appear truncated if the string has + /// embedded NULs. + /// Only legal if get_type() is TYPE_STRING. + const char* as_cstring() const { + assert_tag(tag::string); + return text + payload[0]; + } + +#ifndef SAJSON_NO_STD_STRING + /// Returns a string's value as a std::string. + /// Only legal if get_type() is TYPE_STRING. + std::string as_string() const { + assert_tag(tag::string); + return std::string(text + payload[0], text + payload[1]); + } +#endif + + /// \cond INTERNAL + const size_t* _internal_get_payload() const { return payload; } + /// \endcond + +private: + using tag = internal::tag; + + explicit value(tag value_tag_, const size_t* payload_, const char* text_) + : value_tag(value_tag_) + , payload(payload_) + , text(text_) {} + + void assert_tag(tag expected) const { assert(expected == value_tag); } + + void assert_tag_2(tag e1, tag e2) const { + assert(e1 == value_tag || e2 == value_tag); + } + + void assert_in_bounds(size_t i) const { assert(i < get_length()); } + + const tag value_tag; + const size_t* const payload; + const char* const text; + + friend class document; +}; + +/// Error code indicating why parse failed. +enum error { + ERROR_NO_ERROR, + ERROR_OUT_OF_MEMORY, + ERROR_UNEXPECTED_END, + ERROR_MISSING_ROOT_ELEMENT, + ERROR_BAD_ROOT, + ERROR_EXPECTED_COMMA, + ERROR_MISSING_OBJECT_KEY, + ERROR_EXPECTED_COLON, + ERROR_EXPECTED_END_OF_INPUT, + ERROR_UNEXPECTED_COMMA, + ERROR_EXPECTED_VALUE, + ERROR_EXPECTED_NULL, + ERROR_EXPECTED_FALSE, + ERROR_EXPECTED_TRUE, + ERROR_INVALID_NUMBER, + ERROR_MISSING_EXPONENT, + ERROR_ILLEGAL_CODEPOINT, + ERROR_INVALID_UNICODE_ESCAPE, + ERROR_UNEXPECTED_END_OF_UTF16, + ERROR_EXPECTED_U, + ERROR_INVALID_UTF16_TRAIL_SURROGATE, + ERROR_UNKNOWN_ESCAPE, + ERROR_INVALID_UTF8, + ERROR_UNINITIALIZED, +}; + +namespace internal { +class ownership { +public: + ownership() = delete; + ownership(const ownership&) = delete; + void operator=(const ownership&) = delete; + + explicit ownership(size_t* p_) + : p(p_) {} + + ownership(ownership&& p_) + : p(p_.p) { + p_.p = 0; + } + + ~ownership() { delete[] p; } + + bool is_valid() const { return !!p; } + +private: + size_t* p; +}; + +inline const char* get_error_text(error error_code) { + switch (error_code) { + case ERROR_NO_ERROR: + return "no error"; + case ERROR_OUT_OF_MEMORY: + return "out of memory"; + case ERROR_UNEXPECTED_END: + return "unexpected end of input"; + case ERROR_MISSING_ROOT_ELEMENT: + return "missing root element"; + case ERROR_BAD_ROOT: + return "document root must be object or array"; + case ERROR_EXPECTED_COMMA: + return "expected ,"; + case ERROR_MISSING_OBJECT_KEY: + return "missing object key"; + case ERROR_EXPECTED_COLON: + return "expected :"; + case ERROR_EXPECTED_END_OF_INPUT: + return "expected end of input"; + case ERROR_UNEXPECTED_COMMA: + return "unexpected comma"; + case ERROR_EXPECTED_VALUE: + return "expected value"; + case ERROR_EXPECTED_NULL: + return "expected 'null'"; + case ERROR_EXPECTED_FALSE: + return "expected 'false'"; + case ERROR_EXPECTED_TRUE: + return "expected 'true'"; + case ERROR_INVALID_NUMBER: + return "invalid number"; + case ERROR_MISSING_EXPONENT: + return "missing exponent"; + case ERROR_ILLEGAL_CODEPOINT: + return "illegal unprintable codepoint in string"; + case ERROR_INVALID_UNICODE_ESCAPE: + return "invalid character in unicode escape"; + case ERROR_UNEXPECTED_END_OF_UTF16: + return "unexpected end of input during UTF-16 surrogate pair"; + case ERROR_EXPECTED_U: + return "expected \\u"; + case ERROR_INVALID_UTF16_TRAIL_SURROGATE: + return "invalid UTF-16 trail surrogate"; + case ERROR_UNKNOWN_ESCAPE: + return "unknown escape"; + case ERROR_INVALID_UTF8: + return "invalid UTF-8"; + case ERROR_UNINITIALIZED: + return "uninitialized document"; + } + + SAJSON_UNREACHABLE(); +} +} // namespace internal + +/** + * Represents the result of a JSON parse: either is_valid() and the document + * contains a root value or parse error information is available. + * + * Note that the document holds a strong reference to any memory allocated: + * any mutable copy of the input text and any memory allocated for the + * AST data structure. Thus, the document must not be deallocated while any + * \ref value is in use. + */ +class document { +public: + document() + : document{ mutable_string_view{}, 0, 0, ERROR_UNINITIALIZED, 0 } {} + + document(document&& rhs) + : input(rhs.input) + , structure(std::move(rhs.structure)) + , root_tag(rhs.root_tag) + , root(rhs.root) + , error_line(rhs.error_line) + , error_column(rhs.error_column) + , error_code(rhs.error_code) + , error_arg(rhs.error_arg) { + // Yikes... but strcpy is okay here because formatted_error is + // guaranteed to be null-terminated. + strcpy(formatted_error_message, rhs.formatted_error_message); + // should rhs's fields be zeroed too? + } + + /** + * Returns true if the document was parsed successfully. + * If true, call get_root() to access the document's root value. + * If false, call get_error_line(), get_error_column(), and + * get_error_message_as_cstring() to see why the parse failed. + */ + bool is_valid() const { + return root_tag == tag::array || root_tag == tag::object; + } + + /// If is_valid(), returns the document's root \ref value. + value get_root() const { return value(root_tag, root, input.get_data()); } + + /// If not is_valid(), returns the one-based line number where the parse + /// failed. + size_t get_error_line() const { return error_line; } + + /// If not is_valid(), returns the one-based column number where the parse + /// failed. + size_t get_error_column() const { return error_column; } + +#ifndef SAJSON_NO_STD_STRING + /// If not is_valid(), returns a std::string indicating why the parse + /// failed. + std::string get_error_message_as_string() const { + return formatted_error_message; + } +#endif + + /// If not is_valid(), returns a null-terminated C string indicating why the + /// parse failed. + const char* get_error_message_as_cstring() const { + return formatted_error_message; + } + + /// \cond INTERNAL + + // WARNING: Internal function which is subject to change + error _internal_get_error_code() const { return error_code; } + + // WARNING: Internal function which is subject to change + int _internal_get_error_argument() const { return error_arg; } + + // WARNING: Internal function which is subject to change + const char* _internal_get_error_text() const { + return internal::get_error_text(error_code); + } + + // WARNING: Internal function exposed only for high-performance language + // bindings. + internal::tag _internal_get_root_tag() const { return root_tag; } + + // WARNING: Internal function exposed only for high-performance language + // bindings. + const size_t* _internal_get_root() const { return root; } + + // WARNING: Internal function exposed only for high-performance language + // bindings. + const mutable_string_view& _internal_get_input() const { return input; } + + /// \endcond + +private: + using tag = internal::tag; + + document(const document&) = delete; + void operator=(const document&) = delete; + + explicit document( + const mutable_string_view& input_, + internal::ownership&& structure_, + tag root_tag_, + const size_t* root_) + : input(input_) + , structure(std::move(structure_)) + , root_tag(root_tag_) + , root(root_) + , error_line(0) + , error_column(0) + , error_code(ERROR_NO_ERROR) + , error_arg(0) { + formatted_error_message[0] = 0; + } + + explicit document( + const mutable_string_view& input_, + size_t error_line_, + size_t error_column_, + const error error_code_, + int error_arg_) + : input(input_) + , structure(0) + , root_tag(tag::null) + , root(0) + , error_line(error_line_) + , error_column(error_column_) + , error_code(error_code_) + , error_arg(error_arg_) { + formatted_error_message[ERROR_BUFFER_LENGTH - 1] = 0; + int written = has_significant_error_arg() + ? SAJSON_snprintf( + formatted_error_message, + ERROR_BUFFER_LENGTH - 1, + "%s: %d", + _internal_get_error_text(), + error_arg) + : SAJSON_snprintf( + formatted_error_message, + ERROR_BUFFER_LENGTH - 1, + "%s", + _internal_get_error_text()); + (void)written; + assert(written >= 0 && written < ERROR_BUFFER_LENGTH); + } + + bool has_significant_error_arg() const { + return error_code == ERROR_ILLEGAL_CODEPOINT; + } + + mutable_string_view input; + internal::ownership structure; + const tag root_tag; + const size_t* const root; + const size_t error_line; + const size_t error_column; + const error error_code; + const int error_arg; + + enum { ERROR_BUFFER_LENGTH = 128 }; + char formatted_error_message[ERROR_BUFFER_LENGTH]; + + template + friend document + parse(const AllocationStrategy& strategy, const StringType& string); + template + friend class parser; +}; + +/// Allocation policy that allocates one large buffer guaranteed to hold the +/// resulting AST. This allocation policy is the fastest since it requires +/// no conditionals to see if more memory must be allocated. +class single_allocation { +public: + /// \cond INTERNAL + + class stack_head { + public: + stack_head(stack_head&& other) + : stack_bottom(other.stack_bottom) + , stack_top(other.stack_top) {} + + bool push(size_t element) { + *stack_top++ = element; + return true; + } + + size_t* reserve(size_t amount, bool* success) { + size_t* rv = stack_top; + stack_top += amount; + *success = true; + return rv; + } + + // The compiler does not see the stack_head (stored in a local) + // and the allocator (stored as a field) have the same stack_bottom + // values, so it does a bit of redundant work. + // So there's a microoptimization available here: introduce a type + // "stack_mark" and make it polymorphic on the allocator. For + // single_allocation, it merely needs to be a single pointer. + + void reset(size_t new_top) { stack_top = stack_bottom + new_top; } + + size_t get_size() { return stack_top - stack_bottom; } + + size_t* get_top() { return stack_top; } + + size_t* get_pointer_from_offset(size_t offset) { + return stack_bottom + offset; + } + + private: + stack_head() = delete; + stack_head(const stack_head&) = delete; + void operator=(const stack_head&) = delete; + + explicit stack_head(size_t* base) + : stack_bottom(base) + , stack_top(base) {} + + size_t* const stack_bottom; + size_t* stack_top; + + friend class single_allocation; + }; + + class allocator { + public: + allocator() = delete; + allocator(const allocator&) = delete; + void operator=(const allocator&) = delete; + + explicit allocator( + size_t* buffer, size_t input_size, bool should_deallocate_) + : structure(buffer) + , structure_end(buffer ? buffer + input_size : 0) + , write_cursor(structure_end) + , should_deallocate(should_deallocate_) {} + + explicit allocator(std::nullptr_t) + : structure(0) + , structure_end(0) + , write_cursor(0) + , should_deallocate(false) {} + + allocator(allocator&& other) + : structure(other.structure) + , structure_end(other.structure_end) + , write_cursor(other.write_cursor) + , should_deallocate(other.should_deallocate) { + other.structure = 0; + other.structure_end = 0; + other.write_cursor = 0; + other.should_deallocate = false; + } + + ~allocator() { + if (should_deallocate) { + delete[] structure; + } + } + + stack_head get_stack_head(bool* success) { + *success = true; + return stack_head(structure); + } + + size_t get_write_offset() { return structure_end - write_cursor; } + + size_t* get_write_pointer_of(size_t v) { return structure_end - v; } + + size_t* reserve(size_t size, bool* success) { + *success = true; + write_cursor -= size; + return write_cursor; + } + + size_t* get_ast_root() { return write_cursor; } + + internal::ownership transfer_ownership() { + auto p = structure; + structure = 0; + structure_end = 0; + write_cursor = 0; + if (should_deallocate) { + return internal::ownership(p); + } else { + return internal::ownership(0); + } + } + + private: + size_t* structure; + size_t* structure_end; + size_t* write_cursor; + bool should_deallocate; + }; + + /// \endcond + + /// Allocate a single worst-case AST buffer with one word per byte in + /// the input document. + single_allocation() + : has_existing_buffer(false) + , existing_buffer(0) + , existing_buffer_size(0) {} + + /// Write the AST into an existing buffer. Will fail with an out of + /// memory error if the buffer is not guaranteed to be big enough for + /// the document. The caller must guarantee the memory is valid for + /// the duration of the parse and the AST traversal. + single_allocation(size_t* existing_buffer_, size_t size_in_words) + : has_existing_buffer(true) + , existing_buffer(existing_buffer_) + , existing_buffer_size(size_in_words) {} + + /// Convenience wrapper for single_allocation(size_t*, size_t) that + /// automatically infers the length of a given array. + template + explicit single_allocation(size_t (&existing_buffer_)[N]) + : single_allocation(existing_buffer_, N) {} + + /// \cond INTERNAL + + allocator + make_allocator(size_t input_document_size_in_bytes, bool* succeeded) const { + if (has_existing_buffer) { + if (existing_buffer_size < input_document_size_in_bytes) { + *succeeded = false; + return allocator(nullptr); + } + *succeeded = true; + return allocator( + existing_buffer, input_document_size_in_bytes, false); + } else { + size_t* buffer + = new (std::nothrow) size_t[input_document_size_in_bytes]; + if (!buffer) { + *succeeded = false; + return allocator(nullptr); + } + *succeeded = true; + return allocator(buffer, input_document_size_in_bytes, true); + } + } + + /// \endcond + +private: + bool has_existing_buffer; + size_t* existing_buffer; + size_t existing_buffer_size; +}; + +/// Allocation policy that uses dynamically-growing buffers for both the +/// parse stack and the AST. This allocation policy minimizes peak memory +/// usage at the cost of some allocation and copying churn. +class dynamic_allocation { +public: + /// \cond INTERNAL + + class stack_head { + public: + stack_head(stack_head&& other) + : stack_top(other.stack_top) + , stack_bottom(other.stack_bottom) + , stack_limit(other.stack_limit) { + other.stack_top = 0; + other.stack_bottom = 0; + other.stack_limit = 0; + } + + ~stack_head() { delete[] stack_bottom; } + + bool push(size_t element) { + if (can_grow(1)) { + *stack_top++ = element; + return true; + } else { + return false; + } + } + + size_t* reserve(size_t amount, bool* success) { + if (can_grow(amount)) { + size_t* rv = stack_top; + stack_top += amount; + *success = true; + return rv; + } else { + *success = false; + return 0; + } + } + + void reset(size_t new_top) { stack_top = stack_bottom + new_top; } + + size_t get_size() { return stack_top - stack_bottom; } + + size_t* get_top() { return stack_top; } + + size_t* get_pointer_from_offset(size_t offset) { + return stack_bottom + offset; + } + + private: + stack_head(const stack_head&) = delete; + void operator=(const stack_head&) = delete; + + explicit stack_head(size_t initial_capacity, bool* success) { + assert(initial_capacity); + stack_bottom = new (std::nothrow) size_t[initial_capacity]; + stack_top = stack_bottom; + if (stack_bottom) { + stack_limit = stack_bottom + initial_capacity; + } else { + stack_limit = 0; + } + *success = !!stack_bottom; + } + + bool can_grow(size_t amount) { + if (SAJSON_LIKELY( + amount <= static_cast(stack_limit - stack_top))) { + return true; + } + + size_t current_size = stack_top - stack_bottom; + size_t old_capacity = stack_limit - stack_bottom; + size_t new_capacity = old_capacity * 2; + while (new_capacity < amount + current_size) { + new_capacity *= 2; + } + size_t* new_stack = new (std::nothrow) size_t[new_capacity]; + if (!new_stack) { + stack_top = 0; + stack_bottom = 0; + stack_limit = 0; + return false; + } + + memcpy(new_stack, stack_bottom, current_size * sizeof(size_t)); + delete[] stack_bottom; + stack_top = new_stack + current_size; + stack_bottom = new_stack; + stack_limit = stack_bottom + new_capacity; + return true; + } + + size_t* stack_top; // stack grows up: stack_top >= stack_bottom + size_t* stack_bottom; + size_t* stack_limit; + + friend class dynamic_allocation; + }; + + class allocator { + public: + allocator() = delete; + allocator(const allocator&) = delete; + void operator=(const allocator&) = delete; + + explicit allocator( + size_t* buffer_, + size_t current_capacity, + size_t initial_stack_capacity_) + : ast_buffer_bottom(buffer_) + , ast_buffer_top(buffer_ + current_capacity) + , ast_write_head(ast_buffer_top) + , initial_stack_capacity(initial_stack_capacity_) {} + + explicit allocator(std::nullptr_t) + : ast_buffer_bottom(0) + , ast_buffer_top(0) + , ast_write_head(0) + , initial_stack_capacity(0) {} + + allocator(allocator&& other) + : ast_buffer_bottom(other.ast_buffer_bottom) + , ast_buffer_top(other.ast_buffer_top) + , ast_write_head(other.ast_write_head) + , initial_stack_capacity(other.initial_stack_capacity) { + other.ast_buffer_bottom = 0; + other.ast_buffer_top = 0; + other.ast_write_head = 0; + } + + ~allocator() { delete[] ast_buffer_bottom; } + + stack_head get_stack_head(bool* success) { + return stack_head(initial_stack_capacity, success); + } + + size_t get_write_offset() { return ast_buffer_top - ast_write_head; } + + size_t* get_write_pointer_of(size_t v) { return ast_buffer_top - v; } + + size_t* reserve(size_t size, bool* success) { + if (can_grow(size)) { + ast_write_head -= size; + *success = true; + return ast_write_head; + } else { + *success = false; + return 0; + } + } + + size_t* get_ast_root() { return ast_write_head; } + + internal::ownership transfer_ownership() { + auto p = ast_buffer_bottom; + ast_buffer_bottom = 0; + ast_buffer_top = 0; + ast_write_head = 0; + return internal::ownership(p); + } + + private: + bool can_grow(size_t amount) { + if (SAJSON_LIKELY( + amount <= static_cast( + ast_write_head - ast_buffer_bottom))) { + return true; + } + size_t current_capacity = ast_buffer_top - ast_buffer_bottom; + + size_t current_size = ast_buffer_top - ast_write_head; + size_t new_capacity = current_capacity * 2; + while (new_capacity < amount + current_size) { + new_capacity *= 2; + } + + size_t* old_buffer = ast_buffer_bottom; + size_t* new_buffer = new (std::nothrow) size_t[new_capacity]; + if (!new_buffer) { + ast_buffer_bottom = 0; + ast_buffer_top = 0; + ast_write_head = 0; + return false; + } + + size_t* old_write_head = ast_write_head; + ast_buffer_bottom = new_buffer; + ast_buffer_top = new_buffer + new_capacity; + ast_write_head = ast_buffer_top - current_size; + memcpy( + ast_write_head, old_write_head, current_size * sizeof(size_t)); + delete[] old_buffer; + + return true; + } + + size_t* + ast_buffer_bottom; // base address of the ast buffer - it grows down + size_t* ast_buffer_top; + size_t* ast_write_head; + size_t initial_stack_capacity; + }; + + /// \endcond + + /// Creates a dynamic_allocation policy with the given initial AST + /// and stack buffer sizes. + dynamic_allocation( + size_t initial_ast_capacity_ = 0, size_t initial_stack_capacity_ = 0) + : initial_ast_capacity(initial_ast_capacity_) + , initial_stack_capacity(initial_stack_capacity_) {} + + /// \cond INTERNAL + + allocator + make_allocator(size_t input_document_size_in_bytes, bool* succeeded) const { + size_t capacity = initial_ast_capacity; + if (!capacity) { + // TODO: guess based on input document size + capacity = 1024; + } + + size_t* buffer = new (std::nothrow) size_t[capacity]; + if (!buffer) { + *succeeded = false; + return allocator(nullptr); + } + + size_t stack_capacity = initial_stack_capacity; + if (!stack_capacity) { + stack_capacity = 256; + } + + *succeeded = true; + return allocator(buffer, capacity, stack_capacity); + } + + /// \endcond + +private: + size_t initial_ast_capacity; + size_t initial_stack_capacity; +}; + +/// Allocation policy that attempts to fit the parsed AST into an existing +/// memory buffer. This allocation policy is useful when using sajson in +/// a zero-allocation context or when there are constraints on the amount +// of memory that can be used. +class bounded_allocation { +public: + /// \cond INTERNAL + + class allocator; + + class stack_head { + public: + stack_head(stack_head&& other) + : source_allocator(other.source_allocator) { + other.source_allocator = 0; + } + + bool push(size_t element) { + if (SAJSON_LIKELY(source_allocator->can_grow(1))) { + *(source_allocator->stack_top)++ = element; + return true; + } else { + return false; + } + } + + size_t* reserve(size_t amount, bool* success) { + if (SAJSON_LIKELY(source_allocator->can_grow(amount))) { + size_t* rv = source_allocator->stack_top; + source_allocator->stack_top += amount; + *success = true; + return rv; + } else { + *success = false; + return 0; + } + } + + void reset(size_t new_top) { + source_allocator->stack_top = source_allocator->structure + new_top; + } + + size_t get_size() { + return source_allocator->stack_top - source_allocator->structure; + } + + size_t* get_top() { return source_allocator->stack_top; } + + size_t* get_pointer_from_offset(size_t offset) { + return source_allocator->structure + offset; + } + + private: + stack_head(const stack_head&) = delete; + void operator=(const stack_head&) = delete; + + explicit stack_head(allocator* source_allocator_) + : source_allocator(source_allocator_) {} + + allocator* source_allocator; + + friend class bounded_allocation; + }; + + class allocator { + public: + allocator() = delete; + allocator(const allocator&) = delete; + void operator=(const allocator&) = delete; + + explicit allocator(size_t* existing_buffer, size_t existing_buffer_size) + : structure(existing_buffer) + , structure_end(existing_buffer + existing_buffer_size) + , write_cursor(structure_end) + , stack_top(structure) {} + + allocator(allocator&& other) + : structure(other.structure) + , structure_end(other.structure_end) + , write_cursor(other.write_cursor) + , stack_top(other.stack_top) { + other.structure = 0; + other.structure_end = 0; + other.write_cursor = 0; + other.stack_top = 0; + } + + stack_head get_stack_head(bool* success) { + *success = true; + return stack_head(this); + } + + size_t get_write_offset() { return structure_end - write_cursor; } + + size_t* get_write_pointer_of(size_t v) { return structure_end - v; } + + size_t* reserve(size_t size, bool* success) { + if (can_grow(size)) { + write_cursor -= size; + *success = true; + return write_cursor; + } else { + *success = false; + return 0; + } + } + + size_t* get_ast_root() { return write_cursor; } + + internal::ownership transfer_ownership() { + structure = 0; + structure_end = 0; + write_cursor = 0; + return internal::ownership(0); + } + + private: + bool can_grow(size_t amount) { + // invariant: stack_top <= write_cursor + // thus: write_cursor - stack_top is positive + return static_cast(write_cursor - stack_top) >= amount; + } + + size_t* structure; + size_t* structure_end; + size_t* write_cursor; + size_t* stack_top; + + friend class bounded_allocation; + }; + + /// \endcond + + /// Uses an existing buffer to hold the parsed AST, if it fits. The + /// specified buffer must not be deallocated until after the document + /// is parsed and the AST traversed. + bounded_allocation(size_t* existing_buffer_, size_t size_in_words) + : existing_buffer(existing_buffer_) + , existing_buffer_size(size_in_words) {} + + /// Convenience wrapper for bounded_allocation(size_t*, size) that + /// automatically infers the size of the given array. + template + explicit bounded_allocation(size_t (&existing_buffer_)[N]) + : bounded_allocation(existing_buffer_, N) {} + + /// \cond INTERNAL + + allocator + make_allocator(size_t input_document_size_in_bytes, bool* succeeded) const { + *succeeded = true; + return allocator(existing_buffer, existing_buffer_size); + } + + /// \endcond + +private: + size_t* existing_buffer; + size_t existing_buffer_size; +}; + +// I thought about putting parser in the internal namespace but I don't +// want to indent it further... +/// \cond INTERNAL +template +class parser { +public: + parser(const mutable_string_view& msv, Allocator&& allocator_) + : input(msv) + , input_end(input.get_data() + input.length()) + , allocator(std::move(allocator_)) + , root_tag(internal::tag::null) + , error_line(0) + , error_column(0) {} + + document get_document() { + if (parse()) { + size_t* ast_root = allocator.get_ast_root(); + return document( + input, allocator.transfer_ownership(), root_tag, ast_root); + } else { + return document( + input, error_line, error_column, error_code, error_arg); + } + } + +private: + struct error_result { + operator bool() const { return false; } + operator char*() const { return 0; } + }; + + bool at_eof(const char* p) { return p == input_end; } + + char* skip_whitespace(char* p) { + // There is an opportunity to make better use of superscalar + // hardware here* but if someone cares about JSON parsing + // performance the first thing they do is minify, so prefer + // to optimize for code size here. + // * + // https://github.com/chadaustin/Web-Benchmarks/blob/master/json/third-party/pjson/pjson.h#L1873 + for (;;) { + if (SAJSON_UNLIKELY(p == input_end)) { + return 0; + } else if (internal::is_whitespace(*p)) { + ++p; + } else { + return p; + } + } + } + + error_result oom(char* p, const char* /*reason*/) { + return make_error(p, ERROR_OUT_OF_MEMORY); + } + + error_result unexpected_end() { + return make_error(0, ERROR_UNEXPECTED_END); + } + + error_result unexpected_end(char* p) { + return make_error(p, ERROR_UNEXPECTED_END); + } + + error_result make_error(char* p, error code, int arg = 0) { + if (!p) { + p = input_end; + } + + error_line = 1; + error_column = 1; + + char* c = input.get_data(); + while (c < p) { + if (*c == '\r') { + if (c + 1 < p && c[1] == '\n') { + ++error_line; + error_column = 1; + ++c; + } else { + ++error_line; + error_column = 1; + } + } else if (*c == '\n') { + ++error_line; + error_column = 1; + } else { + // TODO: count UTF-8 characters + ++error_column; + } + ++c; + } + + error_code = code; + error_arg = arg; + return error_result(); + } + + bool parse() { + using namespace internal; + + // p points to the character currently being parsed + char* p = input.get_data(); + + bool success; + auto stack = allocator.get_stack_head(&success); + if (SAJSON_UNLIKELY(!success)) { + return oom(p, "failed to get stack head"); + } + + p = skip_whitespace(p); + if (SAJSON_UNLIKELY(!p)) { + return make_error(p, ERROR_MISSING_ROOT_ELEMENT); + } + + // current_base is an offset to the first element of the current + // structure (object or array) + size_t current_base = stack.get_size(); + tag current_structure_tag; + if (*p == '[') { + current_structure_tag = tag::array; + bool s + = stack.push(make_element(current_structure_tag, ROOT_MARKER)); + if (SAJSON_UNLIKELY(!s)) { + return oom(p, "stack.push array"); + } + goto array_close_or_element; + } else if (*p == '{') { + current_structure_tag = tag::object; + bool s + = stack.push(make_element(current_structure_tag, ROOT_MARKER)); + if (SAJSON_UNLIKELY(!s)) { + printf("oom 3\n"); + return oom(p, "stack.push object"); + } + goto object_close_or_element; + } else { + return make_error(p, ERROR_BAD_ROOT); + } + + // BEGIN STATE MACHINE + + size_t pop_element; // used as an argument into the `pop` routine + + if (0) { // purely for structure + + // ASSUMES: byte at p SHOULD be skipped + array_close_or_element: + p = skip_whitespace(p + 1); + if (SAJSON_UNLIKELY(!p)) { + return unexpected_end(); + } + if (*p == ']') { + goto pop_array; + } else { + goto next_element; + } + SAJSON_UNREACHABLE(); + + // ASSUMES: byte at p SHOULD be skipped + object_close_or_element: + p = skip_whitespace(p + 1); + if (SAJSON_UNLIKELY(!p)) { + return unexpected_end(); + } + if (*p == '}') { + goto pop_object; + } else { + goto object_key; + } + SAJSON_UNREACHABLE(); + + // ASSUMES: byte at p SHOULD NOT be skipped + structure_close_or_comma: + p = skip_whitespace(p); + if (SAJSON_UNLIKELY(!p)) { + return unexpected_end(); + } + + if (current_structure_tag == tag::array) { + if (*p == ']') { + goto pop_array; + } else { + if (SAJSON_UNLIKELY(*p != ',')) { + return make_error(p, ERROR_EXPECTED_COMMA); + } + ++p; + goto next_element; + } + } else { + assert(current_structure_tag == tag::object); + if (*p == '}') { + goto pop_object; + } else { + if (SAJSON_UNLIKELY(*p != ',')) { + return make_error(p, ERROR_EXPECTED_COMMA); + } + ++p; + goto object_key; + } + } + SAJSON_UNREACHABLE(); + + // ASSUMES: *p == '}' + pop_object : { + ++p; + size_t* base_ptr = stack.get_pointer_from_offset(current_base); + pop_element = *base_ptr; + if (SAJSON_UNLIKELY( + !install_object(base_ptr + 1, stack.get_top()))) { + return oom(p, "install_object"); + } + goto pop; + } + + // ASSUMES: *p == ']' + pop_array : { + ++p; + size_t* base_ptr = stack.get_pointer_from_offset(current_base); + pop_element = *base_ptr; + if (SAJSON_UNLIKELY( + !install_array(base_ptr + 1, stack.get_top()))) { + return oom(p, "install_array"); + } + goto pop; + } + + // ASSUMES: byte at p SHOULD NOT be skipped + object_key : { + p = skip_whitespace(p); + if (SAJSON_UNLIKELY(!p)) { + return unexpected_end(); + } + if (SAJSON_UNLIKELY(*p != '"')) { + return make_error(p, ERROR_MISSING_OBJECT_KEY); + } + bool success_; + size_t* out = stack.reserve(2, &success_); + if (SAJSON_UNLIKELY(!success_)) { + return oom(p, "reserve for object key"); + } + p = parse_string(p, out); + if (SAJSON_UNLIKELY(!p)) { + return false; + } + p = skip_whitespace(p); + if (SAJSON_UNLIKELY(!p || *p != ':')) { + return make_error(p, ERROR_EXPECTED_COLON); + } + ++p; + goto next_element; + } + + // ASSUMES: byte at p SHOULD NOT be skipped + next_element: + p = skip_whitespace(p); + if (SAJSON_UNLIKELY(!p)) { + return unexpected_end(); + } + + tag value_tag_result; + switch (*p) { + case 0: + return unexpected_end(p); + case 'n': + p = parse_null(p); + if (!p) { + return false; + } + value_tag_result = tag::null; + break; + case 'f': + p = parse_false(p); + if (!p) { + return false; + } + value_tag_result = tag::false_; + break; + case 't': + p = parse_true(p); + if (!p) { + return false; + } + value_tag_result = tag::true_; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': { + auto result = parse_number(p); + p = result.first; + if (!p) { + return false; + } + value_tag_result = result.second; + break; + } + case '"': { + bool success_; + size_t* string_tag = allocator.reserve(2, &success_); + if (SAJSON_UNLIKELY(!success_)) { + return oom(p, "reserve for string tag"); + } + p = parse_string(p, string_tag); + if (!p) { + return false; + } + value_tag_result = tag::string; + break; + } + + case '[': { + size_t previous_base = current_base; + current_base = stack.get_size(); + bool s = stack.push( + make_element(current_structure_tag, previous_base)); + if (SAJSON_UNLIKELY(!s)) { + return oom(p, "stack.push array"); + } + current_structure_tag = tag::array; + goto array_close_or_element; + } + case '{': { + size_t previous_base = current_base; + current_base = stack.get_size(); + bool s = stack.push( + make_element(current_structure_tag, previous_base)); + if (SAJSON_UNLIKELY(!s)) { + return oom(p, "stack.push object"); + } + current_structure_tag = tag::object; + goto object_close_or_element; + } + pop : { + size_t parent = get_element_value(pop_element); + if (parent == ROOT_MARKER) { + root_tag = current_structure_tag; + p = skip_whitespace(p); + if (SAJSON_UNLIKELY(p)) { + return make_error(p, ERROR_EXPECTED_END_OF_INPUT); + } + return true; + } + stack.reset(current_base); + current_base = parent; + value_tag_result = current_structure_tag; + current_structure_tag = get_element_tag(pop_element); + break; + } + + case ',': + return make_error(p, ERROR_UNEXPECTED_COMMA); + default: + return make_error(p, ERROR_EXPECTED_VALUE); + } + + bool s = stack.push( + make_element(value_tag_result, allocator.get_write_offset())); + if (SAJSON_UNLIKELY(!s)) { + return oom(p, "stack.push value"); + } + + goto structure_close_or_comma; + } + + SAJSON_UNREACHABLE(); + } + + bool has_remaining_characters(char* p, ptrdiff_t remaining) { + return input_end - p >= remaining; + } + + char* parse_null(char* p) { + if (SAJSON_UNLIKELY(!has_remaining_characters(p, 4))) { + make_error(p, ERROR_UNEXPECTED_END); + return 0; + } + char p1 = p[1]; + char p2 = p[2]; + char p3 = p[3]; + if (SAJSON_UNLIKELY(p1 != 'u' || p2 != 'l' || p3 != 'l')) { + make_error(p, ERROR_EXPECTED_NULL); + return 0; + } + return p + 4; + } + + char* parse_false(char* p) { + if (SAJSON_UNLIKELY(!has_remaining_characters(p, 5))) { + return make_error(p, ERROR_UNEXPECTED_END); + } + char p1 = p[1]; + char p2 = p[2]; + char p3 = p[3]; + char p4 = p[4]; + if (SAJSON_UNLIKELY(p1 != 'a' || p2 != 'l' || p3 != 's' || p4 != 'e')) { + return make_error(p, ERROR_EXPECTED_FALSE); + } + return p + 5; + } + + char* parse_true(char* p) { + if (SAJSON_UNLIKELY(!has_remaining_characters(p, 4))) { + return make_error(p, ERROR_UNEXPECTED_END); + } + char p1 = p[1]; + char p2 = p[2]; + char p3 = p[3]; + if (SAJSON_UNLIKELY(p1 != 'r' || p2 != 'u' || p3 != 'e')) { + return make_error(p, ERROR_EXPECTED_TRUE); + } + return p + 4; + } + + static double pow10(int64_t exponent) { + if (SAJSON_UNLIKELY(exponent > 308)) { + return std::numeric_limits::infinity(); + } else if (SAJSON_UNLIKELY(exponent < -323)) { + return 0.0; + } + + // clang-format off + static const double constants[] = { + 1e-323,1e-322,1e-321,1e-320,1e-319,1e-318,1e-317,1e-316,1e-315,1e-314, + 1e-313,1e-312,1e-311,1e-310,1e-309,1e-308,1e-307,1e-306,1e-305,1e-304, + 1e-303,1e-302,1e-301,1e-300,1e-299,1e-298,1e-297,1e-296,1e-295,1e-294, + 1e-293,1e-292,1e-291,1e-290,1e-289,1e-288,1e-287,1e-286,1e-285,1e-284, + 1e-283,1e-282,1e-281,1e-280,1e-279,1e-278,1e-277,1e-276,1e-275,1e-274, + 1e-273,1e-272,1e-271,1e-270,1e-269,1e-268,1e-267,1e-266,1e-265,1e-264, + 1e-263,1e-262,1e-261,1e-260,1e-259,1e-258,1e-257,1e-256,1e-255,1e-254, + 1e-253,1e-252,1e-251,1e-250,1e-249,1e-248,1e-247,1e-246,1e-245,1e-244, + 1e-243,1e-242,1e-241,1e-240,1e-239,1e-238,1e-237,1e-236,1e-235,1e-234, + 1e-233,1e-232,1e-231,1e-230,1e-229,1e-228,1e-227,1e-226,1e-225,1e-224, + 1e-223,1e-222,1e-221,1e-220,1e-219,1e-218,1e-217,1e-216,1e-215,1e-214, + 1e-213,1e-212,1e-211,1e-210,1e-209,1e-208,1e-207,1e-206,1e-205,1e-204, + 1e-203,1e-202,1e-201,1e-200,1e-199,1e-198,1e-197,1e-196,1e-195,1e-194, + 1e-193,1e-192,1e-191,1e-190,1e-189,1e-188,1e-187,1e-186,1e-185,1e-184, + 1e-183,1e-182,1e-181,1e-180,1e-179,1e-178,1e-177,1e-176,1e-175,1e-174, + 1e-173,1e-172,1e-171,1e-170,1e-169,1e-168,1e-167,1e-166,1e-165,1e-164, + 1e-163,1e-162,1e-161,1e-160,1e-159,1e-158,1e-157,1e-156,1e-155,1e-154, + 1e-153,1e-152,1e-151,1e-150,1e-149,1e-148,1e-147,1e-146,1e-145,1e-144, + 1e-143,1e-142,1e-141,1e-140,1e-139,1e-138,1e-137,1e-136,1e-135,1e-134, + 1e-133,1e-132,1e-131,1e-130,1e-129,1e-128,1e-127,1e-126,1e-125,1e-124, + 1e-123,1e-122,1e-121,1e-120,1e-119,1e-118,1e-117,1e-116,1e-115,1e-114, + 1e-113,1e-112,1e-111,1e-110,1e-109,1e-108,1e-107,1e-106,1e-105,1e-104, + 1e-103,1e-102,1e-101,1e-100,1e-99,1e-98,1e-97,1e-96,1e-95,1e-94,1e-93, + 1e-92,1e-91,1e-90,1e-89,1e-88,1e-87,1e-86,1e-85,1e-84,1e-83,1e-82,1e-81, + 1e-80,1e-79,1e-78,1e-77,1e-76,1e-75,1e-74,1e-73,1e-72,1e-71,1e-70,1e-69, + 1e-68,1e-67,1e-66,1e-65,1e-64,1e-63,1e-62,1e-61,1e-60,1e-59,1e-58,1e-57, + 1e-56,1e-55,1e-54,1e-53,1e-52,1e-51,1e-50,1e-49,1e-48,1e-47,1e-46,1e-45, + 1e-44,1e-43,1e-42,1e-41,1e-40,1e-39,1e-38,1e-37,1e-36,1e-35,1e-34,1e-33, + 1e-32,1e-31,1e-30,1e-29,1e-28,1e-27,1e-26,1e-25,1e-24,1e-23,1e-22,1e-21, + 1e-20,1e-19,1e-18,1e-17,1e-16,1e-15,1e-14,1e-13,1e-12,1e-11,1e-10,1e-9, + 1e-8,1e-7,1e-6,1e-5,1e-4,1e-3,1e-2,1e-1,1e0,1e1,1e2,1e3,1e4,1e5,1e6,1e7, + 1e8,1e9,1e10,1e11,1e12,1e13,1e14,1e15,1e16,1e17,1e18,1e19,1e20,1e21, + 1e22,1e23,1e24,1e25,1e26,1e27,1e28,1e29,1e30,1e31,1e32,1e33,1e34,1e35, + 1e36,1e37,1e38,1e39,1e40,1e41,1e42,1e43,1e44,1e45,1e46,1e47,1e48,1e49, + 1e50,1e51,1e52,1e53,1e54,1e55,1e56,1e57,1e58,1e59,1e60,1e61,1e62,1e63, + 1e64,1e65,1e66,1e67,1e68,1e69,1e70,1e71,1e72,1e73,1e74,1e75,1e76,1e77, + 1e78,1e79,1e80,1e81,1e82,1e83,1e84,1e85,1e86,1e87,1e88,1e89,1e90,1e91, + 1e92,1e93,1e94,1e95,1e96,1e97,1e98,1e99,1e100,1e101,1e102,1e103,1e104, + 1e105,1e106,1e107,1e108,1e109,1e110,1e111,1e112,1e113,1e114,1e115,1e116, + 1e117,1e118,1e119,1e120,1e121,1e122,1e123,1e124,1e125,1e126,1e127,1e128, + 1e129,1e130,1e131,1e132,1e133,1e134,1e135,1e136,1e137,1e138,1e139,1e140, + 1e141,1e142,1e143,1e144,1e145,1e146,1e147,1e148,1e149,1e150,1e151,1e152, + 1e153,1e154,1e155,1e156,1e157,1e158,1e159,1e160,1e161,1e162,1e163,1e164, + 1e165,1e166,1e167,1e168,1e169,1e170,1e171,1e172,1e173,1e174,1e175,1e176, + 1e177,1e178,1e179,1e180,1e181,1e182,1e183,1e184,1e185,1e186,1e187,1e188, + 1e189,1e190,1e191,1e192,1e193,1e194,1e195,1e196,1e197,1e198,1e199,1e200, + 1e201,1e202,1e203,1e204,1e205,1e206,1e207,1e208,1e209,1e210,1e211,1e212, + 1e213,1e214,1e215,1e216,1e217,1e218,1e219,1e220,1e221,1e222,1e223,1e224, + 1e225,1e226,1e227,1e228,1e229,1e230,1e231,1e232,1e233,1e234,1e235,1e236, + 1e237,1e238,1e239,1e240,1e241,1e242,1e243,1e244,1e245,1e246,1e247,1e248, + 1e249,1e250,1e251,1e252,1e253,1e254,1e255,1e256,1e257,1e258,1e259,1e260, + 1e261,1e262,1e263,1e264,1e265,1e266,1e267,1e268,1e269,1e270,1e271,1e272, + 1e273,1e274,1e275,1e276,1e277,1e278,1e279,1e280,1e281,1e282,1e283,1e284, + 1e285,1e286,1e287,1e288,1e289,1e290,1e291,1e292,1e293,1e294,1e295,1e296, + 1e297,1e298,1e299,1e300,1e301,1e302,1e303,1e304,1e305,1e306,1e307,1e308 + }; + // clang-format on + + return constants[exponent + 323]; + } + + std::pair parse_number(char* p) { + using internal::tag; + + bool negative = false; + if ('-' == *p) { + ++p; + negative = true; + + if (SAJSON_UNLIKELY(at_eof(p))) { + return std::make_pair( + make_error(p, ERROR_UNEXPECTED_END), tag::null); + } + } + + bool try_double = false; + + int i = 0; + double d = 0.0; // gcc complains that d might be used uninitialized + // which isn't true. appease the warning anyway. + if (*p == '0') { + ++p; + if (SAJSON_UNLIKELY(at_eof(p))) { + return std::make_pair( + make_error(p, ERROR_UNEXPECTED_END), tag::null); + } + } else { + unsigned char c = *p; + if (c < '0' || c > '9') { + return std::make_pair( + make_error(p, ERROR_INVALID_NUMBER), tag::null); + } + + do { + ++p; + if (SAJSON_UNLIKELY(at_eof(p))) { + return std::make_pair( + make_error(p, ERROR_UNEXPECTED_END), tag::null); + } + + unsigned char digit = c - '0'; + + if (SAJSON_UNLIKELY(!try_double && i > INT_MAX / 10 - 9)) { + // TODO: could split this into two loops + try_double = true; + d = i; + } + if (SAJSON_UNLIKELY(try_double)) { + d = 10.0 * d + digit; + } else { + i = 10 * i + digit; + } + + c = *p; + } while (c >= '0' && c <= '9'); + } + + int64_t exponent = 0; + + if ('.' == *p) { + if (!try_double) { + try_double = true; + d = i; + } + ++p; + if (SAJSON_UNLIKELY(at_eof(p))) { + return std::make_pair( + make_error(p, ERROR_UNEXPECTED_END), tag::null); + } + char c = *p; + if (c < '0' || c > '9') { + return std::make_pair( + make_error(p, ERROR_INVALID_NUMBER), tag::null); + } + + do { + ++p; + if (SAJSON_UNLIKELY(at_eof(p))) { + return std::make_pair( + make_error(p, ERROR_UNEXPECTED_END), tag::null); + } + d = d * 10 + (c - '0'); + // One option to avoid underflow would be to clamp + // to INT_MIN, but int64 subtraction is cheap and + // in the absurd case of parsing 2 GB of digits + // with an extremely high exponent, this will + // produce accurate results. Instead, we just + // leave exponent as int64_t and it will never + // underflow. + --exponent; + + c = *p; + } while (c >= '0' && c <= '9'); + } + + char e = *p; + if ('e' == e || 'E' == e) { + if (!try_double) { + try_double = true; + d = i; + } + ++p; + if (SAJSON_UNLIKELY(at_eof(p))) { + return std::make_pair( + make_error(p, ERROR_UNEXPECTED_END), tag::null); + } + + bool negativeExponent = false; + if ('-' == *p) { + negativeExponent = true; + ++p; + if (SAJSON_UNLIKELY(at_eof(p))) { + return std::make_pair( + make_error(p, ERROR_UNEXPECTED_END), tag::null); + } + } else if ('+' == *p) { + ++p; + if (SAJSON_UNLIKELY(at_eof(p))) { + return std::make_pair( + make_error(p, ERROR_UNEXPECTED_END), tag::null); + } + } + + int exp = 0; + + char c = *p; + if (SAJSON_UNLIKELY(c < '0' || c > '9')) { + return std::make_pair( + make_error(p, ERROR_MISSING_EXPONENT), tag::null); + } + for (;;) { + // c guaranteed to be between '0' and '9', inclusive + unsigned char digit = c - '0'; + if (exp > (INT_MAX - digit) / 10) { + // The exponent overflowed. Keep parsing, but + // it will definitely be out of range when + // pow10 is called. + exp = INT_MAX; + } else { + exp = 10 * exp + digit; + } + + ++p; + if (SAJSON_UNLIKELY(at_eof(p))) { + return std::make_pair( + make_error(p, ERROR_UNEXPECTED_END), tag::null); + } + + c = *p; + if (c < '0' || c > '9') { + break; + } + } + static_assert( + -INT_MAX >= INT_MIN, "exp can be negated without loss or UB"); + exponent += (negativeExponent ? -exp : exp); + } + + if (exponent) { + assert(try_double); + // If d is zero but the exponent is huge, don't + // multiply zero by inf which gives nan. + if (d != 0.0) { + d *= pow10(exponent); + } + } + + if (negative) { + if (try_double) { + d = -d; + } else { + i = -i; + } + } + if (try_double) { + bool success; + size_t* out + = allocator.reserve(double_storage::word_length, &success); + if (SAJSON_UNLIKELY(!success)) { + return std::make_pair(oom(p, "double"), tag::null); + } + double_storage::store(out, d); + return std::make_pair(p, tag::double_); + } else { + bool success; + size_t* out + = allocator.reserve(integer_storage::word_length, &success); + if (SAJSON_UNLIKELY(!success)) { + return std::make_pair(oom(p, "integer"), tag::null); + } + integer_storage::store(out, i); + return std::make_pair(p, tag::integer); + } + } + + bool install_array(size_t* array_base, size_t* array_end) { + using namespace sajson::internal; + + const size_t length = array_end - array_base; + bool success; + size_t* const new_base = allocator.reserve(length + 1, &success); + if (SAJSON_UNLIKELY(!success)) { + return false; + } + size_t* out = new_base + length + 1; + size_t* const structure_end = allocator.get_write_pointer_of(0); + + while (array_end > array_base) { + size_t element = *--array_end; + tag element_type = get_element_tag(element); + size_t element_value = get_element_value(element); + size_t* element_ptr = structure_end - element_value; + *--out = make_element(element_type, element_ptr - new_base); + } + *--out = length; + return true; + } + + bool install_object(size_t* object_base, size_t* object_end) { + using namespace internal; + + assert((object_end - object_base) % 3 == 0); + const size_t length_times_3 = object_end - object_base; + const size_t length = length_times_3 / 3; + if (SAJSON_UNLIKELY(should_binary_search(length))) { + std::sort( + reinterpret_cast(object_base), + reinterpret_cast(object_end), + object_key_comparator(input.get_data())); + } + + bool success; + size_t* const new_base + = allocator.reserve(length_times_3 + 1, &success); + if (SAJSON_UNLIKELY(!success)) { + return false; + } + size_t* out = new_base + length_times_3 + 1; + size_t* const structure_end = allocator.get_write_pointer_of(0); + + while (object_end > object_base) { + size_t element = *--object_end; + tag element_type = get_element_tag(element); + size_t element_value = get_element_value(element); + size_t* element_ptr = structure_end - element_value; + + *--out = make_element(element_type, element_ptr - new_base); + *--out = *--object_end; + *--out = *--object_end; + } + *--out = length; + return true; + } + + char* parse_string(char* p, size_t* tag) { + using namespace internal; + + ++p; // " + size_t start = p - input.get_data(); + char* input_end_local = input_end; + while (input_end_local - p >= 4) { + if (!is_plain_string_character(p[0])) { + goto found; + } + if (!is_plain_string_character(p[1])) { + p += 1; + goto found; + } + if (!is_plain_string_character(p[2])) { + p += 2; + goto found; + } + if (!is_plain_string_character(p[3])) { + p += 3; + goto found; + } + p += 4; + } + for (;;) { + if (SAJSON_UNLIKELY(p >= input_end_local)) { + return make_error(p, ERROR_UNEXPECTED_END); + } + + if (!is_plain_string_character(*p)) { + break; + } + + ++p; + } + found: + if (SAJSON_LIKELY(*p == '"')) { + tag[0] = start; + tag[1] = p - input.get_data(); + *p = '\0'; + return p + 1; + } + + if (*p >= 0 && *p < 0x20) { + return make_error(p, ERROR_ILLEGAL_CODEPOINT, static_cast(*p)); + } else { + // backslash or >0x7f + return parse_string_slow(p, tag, start); + } + } + + char* read_hex(char* p, unsigned& u) { + unsigned v = 0; + int i = 4; + while (i--) { + unsigned char c = *p++; + if (c >= '0' && c <= '9') { + c -= '0'; + } else if (c >= 'a' && c <= 'f') { + c = c - 'a' + 10; + } else if (c >= 'A' && c <= 'F') { + c = c - 'A' + 10; + } else { + return make_error(p, ERROR_INVALID_UNICODE_ESCAPE); + } + v = (v << 4) + c; + } + + u = v; + return p; + } + + void write_utf8(unsigned codepoint, char*& end) { + if (codepoint < 0x80) { + *end++ = codepoint; + } else if (codepoint < 0x800) { + *end++ = 0xC0 | (codepoint >> 6); + *end++ = 0x80 | (codepoint & 0x3F); + } else if (codepoint < 0x10000) { + *end++ = 0xE0 | (codepoint >> 12); + *end++ = 0x80 | ((codepoint >> 6) & 0x3F); + *end++ = 0x80 | (codepoint & 0x3F); + } else { + assert(codepoint < 0x200000); + *end++ = 0xF0 | (codepoint >> 18); + *end++ = 0x80 | ((codepoint >> 12) & 0x3F); + *end++ = 0x80 | ((codepoint >> 6) & 0x3F); + *end++ = 0x80 | (codepoint & 0x3F); + } + } + + char* parse_string_slow(char* p, size_t* tag, size_t start) { + char* end = p; + char* input_end_local = input_end; + + for (;;) { + if (SAJSON_UNLIKELY(p >= input_end_local)) { + return make_error(p, ERROR_UNEXPECTED_END); + } + + if (SAJSON_UNLIKELY(*p >= 0 && *p < 0x20)) { + return make_error( + p, ERROR_ILLEGAL_CODEPOINT, static_cast(*p)); + } + + switch (*p) { + case '"': + tag[0] = start; + tag[1] = end - input.get_data(); + *end = '\0'; + return p + 1; + + case '\\': + ++p; + if (SAJSON_UNLIKELY(p >= input_end_local)) { + return make_error(p, ERROR_UNEXPECTED_END); + } + + char replacement; + switch (*p) { + case '"': + replacement = '"'; + goto replace; + case '\\': + replacement = '\\'; + goto replace; + case '/': + replacement = '/'; + goto replace; + case 'b': + replacement = '\b'; + goto replace; + case 'f': + replacement = '\f'; + goto replace; + case 'n': + replacement = '\n'; + goto replace; + case 'r': + replacement = '\r'; + goto replace; + case 't': + replacement = '\t'; + goto replace; + replace: + *end++ = replacement; + ++p; + break; + case 'u': { + ++p; + if (SAJSON_UNLIKELY(!has_remaining_characters(p, 4))) { + return make_error(p, ERROR_UNEXPECTED_END); + } + unsigned u = 0; // gcc's complaining that this could be used + // uninitialized. wrong. + p = read_hex(p, u); + if (!p) { + return 0; + } + if (u >= 0xD800 && u <= 0xDBFF) { + if (SAJSON_UNLIKELY(!has_remaining_characters(p, 6))) { + return make_error(p, ERROR_UNEXPECTED_END_OF_UTF16); + } + char p0 = p[0]; + char p1 = p[1]; + if (p0 != '\\' || p1 != 'u') { + return make_error(p, ERROR_EXPECTED_U); + } + p += 2; + unsigned v = 0; // gcc's complaining that this could be + // used uninitialized. wrong. + p = read_hex(p, v); + if (!p) { + return p; + } + + if (v < 0xDC00 || v > 0xDFFF) { + return make_error( + p, ERROR_INVALID_UTF16_TRAIL_SURROGATE); + } + u = 0x10000 + (((u - 0xD800) << 10) | (v - 0xDC00)); + } + write_utf8(u, end); + break; + } + default: + return make_error(p, ERROR_UNKNOWN_ESCAPE); + } + break; + + default: + // validate UTF-8 + unsigned char c0 = p[0]; + if (c0 < 128) { + *end++ = *p++; + } else if (c0 < 224) { + if (SAJSON_UNLIKELY(!has_remaining_characters(p, 2))) { + return unexpected_end(p); + } + unsigned char c1 = p[1]; + if (c1 < 128 || c1 >= 192) { + return make_error(p + 1, ERROR_INVALID_UTF8); + } + end[0] = c0; + end[1] = c1; + end += 2; + p += 2; + } else if (c0 < 240) { + if (SAJSON_UNLIKELY(!has_remaining_characters(p, 3))) { + return unexpected_end(p); + } + unsigned char c1 = p[1]; + if (c1 < 128 || c1 >= 192) { + return make_error(p + 1, ERROR_INVALID_UTF8); + } + unsigned char c2 = p[2]; + if (c2 < 128 || c2 >= 192) { + return make_error(p + 2, ERROR_INVALID_UTF8); + } + end[0] = c0; + end[1] = c1; + end[2] = c2; + end += 3; + p += 3; + } else if (c0 < 248) { + if (SAJSON_UNLIKELY(!has_remaining_characters(p, 4))) { + return unexpected_end(p); + } + unsigned char c1 = p[1]; + if (c1 < 128 || c1 >= 192) { + return make_error(p + 1, ERROR_INVALID_UTF8); + } + unsigned char c2 = p[2]; + if (c2 < 128 || c2 >= 192) { + return make_error(p + 2, ERROR_INVALID_UTF8); + } + unsigned char c3 = p[3]; + if (c3 < 128 || c3 >= 192) { + return make_error(p + 3, ERROR_INVALID_UTF8); + } + end[0] = c0; + end[1] = c1; + end[2] = c2; + end[3] = c3; + end += 4; + p += 4; + } else { + return make_error(p, ERROR_INVALID_UTF8); + } + break; + } + } + } + + mutable_string_view input; + char* const input_end; + Allocator allocator; + + internal::tag root_tag; + size_t error_line; + size_t error_column; + error error_code; + int error_arg; // optional argument for the error +}; +/// \endcond + +/** + * Parses a string of JSON bytes into a \ref document, given an allocation + * strategy instance. Any kind of string type is valid as long as a + * mutable_string_view can be constructed from it. + * + * Valid allocation strategies are \ref single_allocation, + * \ref dynamic_allocation, and \ref bounded_allocation. + * + * A \ref document is returned whether or not the parse succeeds: success + * state is available by calling document::is_valid(). + */ +template +document parse(const AllocationStrategy& strategy, const StringType& string) { + mutable_string_view input(string); + + bool success; + auto allocator = strategy.make_allocator(input.length(), &success); + if (!success) { + return document(input, 1, 1, ERROR_OUT_OF_MEMORY, 0); + } + + return parser( + input, std::move(allocator)) + .get_document(); +} +} // namespace sajson diff --git a/vendor/SAJSON/sajson.cpp b/vendor/SAJSON/sajson.cpp new file mode 100644 index 00000000..46bfad83 --- /dev/null +++ b/vendor/SAJSON/sajson.cpp @@ -0,0 +1 @@ +#include \ No newline at end of file