json/include/nlohmann/detail/input/parser.hpp

501 lines
18 KiB
C++
Raw Normal View History

2018-01-10 17:18:31 +08:00
#pragma once
2017-08-14 23:02:40 +08:00
#include <cmath> // isfinite
#include <cstdint> // uint8_t
#include <functional> // function
#include <string> // string
#include <utility> // move
2019-03-15 21:55:13 +08:00
#include <vector> // vector
2017-08-14 23:02:40 +08:00
#include <nlohmann/detail/exceptions.hpp>
#include <nlohmann/detail/input/input_adapters.hpp>
2018-02-27 03:08:12 +08:00
#include <nlohmann/detail/input/json_sax.hpp>
#include <nlohmann/detail/input/lexer.hpp>
2019-03-17 19:01:49 +08:00
#include <nlohmann/detail/macro_scope.hpp>
#include <nlohmann/detail/meta/is_sax.hpp>
#include <nlohmann/detail/string_concat.hpp>
#include <nlohmann/detail/value_t.hpp>
2017-08-14 23:02:40 +08:00
namespace nlohmann
{
namespace detail
{
////////////
// parser //
////////////
2021-08-08 19:24:17 +08:00
enum class parse_event_t : std::uint8_t
2020-02-19 23:32:49 +08:00
{
/// the parser read `{` and started to process a JSON object
object_start,
/// the parser read `}` and finished processing a JSON object
object_end,
/// the parser read `[` and started to process a JSON array
array_start,
/// the parser read `]` and finished processing a JSON array
array_end,
/// the parser read a key of a value in an object
key,
/// the parser finished reading a JSON value
value
};
template<typename BasicJsonType>
using parser_callback_t =
:rotating_light: add new CI and fix warnings (#2561) * :alembic: move CI targets to CMake * :recycle: add target for cpplint * :recycle: add target for self-contained binaries * :recycle: add targets for iwyu and infer * :loud_sound: add version output * :recycle: add target for oclint * :rotating_light: fix warnings * :recycle: rename targets * :recycle: use iwyu properly * :rotating_light: fix warnings * :recycle: use iwyu properly * :recycle: add target for benchmarks * :recycle: add target for CMake flags * :construction_worker: use GitHub Actions * :alembic: try to install Clang 11 * :alembic: try to install GCC 11 * :alembic: try to install Clang 11 * :alembic: try to install GCC 11 * :alembic: add clang analyze target * :fire: remove Google Benchmark * :arrow_up: Google Benchmark 1.5.2 * :fire: use fetchcontent * :penguin: add target to download a Linux version of CMake * :hammer: fix dependency * :rotating_light: fix includes * :rotating_light: fix comment * :wrench: adjust flags for GCC 11.0.0 20210110 (experimental) * :whale: user Docker image to run CI * :wrench: add target for Valgrind * :construction_worker: add target for Valgrind tests * :alembic: add Dart * :rewind: remove Dart * :alembic: do not call ctest in test subdirectory * :alembic: download test data explicitly * :alembic: only execute Valgrind tests * :alembic: fix labels * :fire: remove unneeded jobs * :hammer: cleanup * :bug: fix OCLint call * :white_check_mark: add targets for offline and git-independent tests * :white_check_mark: add targets for C++ language versions and reproducible tests * :hammer: clean up * :construction_worker: add CI steps for cppcheck and cpplint * :rotating_light: fix warnings from Clang-Tidy * :construction_worker: add CI steps for Clang-Tidy * :rotating_light: fix warnings * :wrench: select proper binary * :rotating_light: fix warnings * :rotating_light: suppress some unhelpful warnings * :rotating_light: fix warnings * :art: fix format * :rotating_light: fix warnings * :construction_worker: add CI steps for Sanitizers * :rotating_light: fix warnings * :zap: add optimization to sanitizer build * :rotating_light: fix warnings * :rotating_light: add missing header * :rotating_light: fix warnings * :construction_worker: add CI step for coverage * :construction_worker: add CI steps for disabled exceptions and implicit conversions * :rotating_light: fix warnings * :construction_worker: add CI steps for checking indentation * :bug: fix variable use * :green_heart: fix build * :heavy_minus_sign: remove CircleCI * :construction_worker: add CI step for diagnostics * :rotating_light: fix warning * :fire: clean Travis
2021-03-24 14:15:18 +08:00
std::function<bool(int /*depth*/, parse_event_t /*event*/, BasicJsonType& /*parsed*/)>;
2020-02-19 23:32:49 +08:00
2017-08-14 23:02:40 +08:00
/*!
@brief syntax analysis
2020-01-16 00:40:03 +08:00
This class implements a recursive descent parser.
2017-08-14 23:02:40 +08:00
*/
2020-02-20 03:59:31 +08:00
template<typename BasicJsonType, typename InputAdapterType>
2017-08-14 23:02:40 +08:00
class parser
{
using number_integer_t = typename BasicJsonType::number_integer_t;
using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
using number_float_t = typename BasicJsonType::number_float_t;
using string_t = typename BasicJsonType::string_t;
2020-02-19 23:32:49 +08:00
using lexer_t = lexer<BasicJsonType, InputAdapterType>;
2017-08-14 23:02:40 +08:00
using token_type = typename lexer_t::token_type;
public:
/// a parser reading from an input adapter
2020-02-20 03:59:31 +08:00
explicit parser(InputAdapterType&& adapter,
2020-02-19 23:32:49 +08:00
const parser_callback_t<BasicJsonType> cb = nullptr,
2020-06-18 04:03:14 +08:00
const bool allow_exceptions_ = true,
const bool skip_comments = false)
: callback(cb)
, m_lexer(std::move(adapter), skip_comments)
, allow_exceptions(allow_exceptions_)
2018-03-07 01:17:07 +08:00
{
// read first token
get_token();
}
2018-02-25 01:04:07 +08:00
2017-08-14 23:02:40 +08:00
/*!
@brief public parser interface
@param[in] strict whether to expect the last token to be EOF
@param[in,out] result parsed JSON value
@throw parse_error.101 in case of an unexpected token
@throw parse_error.102 if to_unicode fails or surrogate error
@throw parse_error.103 if to_unicode fails
*/
void parse(const bool strict, BasicJsonType& result)
{
2018-03-07 01:17:07 +08:00
if (callback)
{
json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions);
sax_parse_internal(&sdp);
// in strict mode, input must be completely read
if (strict && (get_token() != token_type::end_of_input))
{
sdp.parse_error(m_lexer.get_position(),
m_lexer.get_token_string(),
parse_error::create(101, m_lexer.get_position(),
exception_message(token_type::end_of_input, "value"), nullptr));
}
// in case of an error, return discarded value
if (sdp.is_errored())
{
result = value_t::discarded;
return;
}
2017-08-14 23:02:40 +08:00
2018-03-07 01:17:07 +08:00
// set top-level value to null if it was discarded by the callback
// function
if (result.is_discarded())
{
result = nullptr;
}
2017-08-14 23:02:40 +08:00
}
2018-03-07 01:17:07 +08:00
else
2017-08-14 23:02:40 +08:00
{
2018-03-07 01:17:07 +08:00
json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions);
sax_parse_internal(&sdp);
// in strict mode, input must be completely read
if (strict && (get_token() != token_type::end_of_input))
2018-03-07 01:17:07 +08:00
{
sdp.parse_error(m_lexer.get_position(),
m_lexer.get_token_string(),
parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input, "value"), nullptr));
2018-03-07 01:17:07 +08:00
}
// in case of an error, return discarded value
if (sdp.is_errored())
{
result = value_t::discarded;
return;
}
2017-08-14 23:02:40 +08:00
}
2021-01-11 05:40:50 +08:00
result.assert_invariant();
2017-08-14 23:02:40 +08:00
}
/*!
@brief public accept interface
@param[in] strict whether to expect the last token to be EOF
@return whether the input is a proper JSON text
*/
bool accept(const bool strict = true)
{
json_sax_acceptor<BasicJsonType> sax_acceptor;
2018-03-21 05:39:08 +08:00
return sax_parse(&sax_acceptor, strict);
}
template<typename SAX>
2019-07-02 04:37:30 +08:00
JSON_HEDLEY_NON_NULL(2)
bool sax_parse(SAX* sax, const bool strict = true)
2018-03-21 05:39:08 +08:00
{
(void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
2018-03-21 05:39:08 +08:00
const bool result = sax_parse_internal(sax);
2018-03-21 05:39:08 +08:00
// strict mode: next byte must be EOF
if (result && strict && (get_token() != token_type::end_of_input))
2017-08-14 23:02:40 +08:00
{
2018-03-21 05:39:08 +08:00
return sax->parse_error(m_lexer.get_position(),
m_lexer.get_token_string(),
parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input, "value"), nullptr));
2017-08-14 23:02:40 +08:00
}
2018-03-21 05:39:08 +08:00
return result;
2018-02-25 01:04:07 +08:00
}
2017-08-14 23:02:40 +08:00
private:
template<typename SAX>
2019-07-02 04:37:30 +08:00
JSON_HEDLEY_NON_NULL(2)
bool sax_parse_internal(SAX* sax)
2018-02-25 01:04:07 +08:00
{
2018-10-28 21:20:20 +08:00
// stack to remember the hierarchy of structured values we are parsing
// true = array; false = object
std::vector<bool> states;
// value to avoid a goto (see comment where set to true)
bool skip_to_state_evaluation = false;
while (true)
2018-02-25 01:04:07 +08:00
{
if (!skip_to_state_evaluation)
2018-02-25 01:04:07 +08:00
{
// invariant: get_token() was called before each iteration
switch (last_token)
2018-02-25 01:04:07 +08:00
{
case token_type::begin_object:
{
if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast<std::size_t>(-1))))
{
return false;
}
2018-02-25 01:04:07 +08:00
// closing } -> we are done
2018-03-30 06:38:18 +08:00
if (get_token() == token_type::end_object)
{
if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
{
return false;
}
break;
}
2018-02-25 01:04:07 +08:00
// parse key
2019-07-02 04:37:30 +08:00
if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string))
{
return sax->parse_error(m_lexer.get_position(),
m_lexer.get_token_string(),
parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr));
}
if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string())))
{
return false;
}
// parse separator (:)
2019-07-02 04:37:30 +08:00
if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
{
return sax->parse_error(m_lexer.get_position(),
m_lexer.get_token_string(),
parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr));
}
// remember we are now inside an object
states.push_back(false);
// parse values
get_token();
continue;
}
case token_type::begin_array:
2018-02-25 01:04:07 +08:00
{
if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast<std::size_t>(-1))))
2018-02-25 01:04:07 +08:00
{
return false;
}
// closing ] -> we are done
2018-03-30 06:38:18 +08:00
if (get_token() == token_type::end_array)
{
if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
{
return false;
}
break;
}
// remember we are now inside an array
states.push_back(true);
// parse values (no need to call get_token)
continue;
2018-02-25 01:04:07 +08:00
}
case token_type::value_float:
2018-02-25 01:04:07 +08:00
{
const auto res = m_lexer.get_number_float();
if (JSON_HEDLEY_UNLIKELY(!std::isfinite(res)))
{
return sax->parse_error(m_lexer.get_position(),
m_lexer.get_token_string(),
out_of_range::create(406, concat("number overflow parsing '", m_lexer.get_token_string(), '\''), nullptr));
}
2019-03-19 16:17:14 +08:00
if (JSON_HEDLEY_UNLIKELY(!sax->number_float(res, m_lexer.get_string())))
{
2019-03-19 16:17:14 +08:00
return false;
}
2019-03-19 16:17:14 +08:00
break;
2018-02-25 01:04:07 +08:00
}
case token_type::literal_false:
2018-02-25 01:04:07 +08:00
{
if (JSON_HEDLEY_UNLIKELY(!sax->boolean(false)))
{
return false;
}
break;
2018-02-25 01:04:07 +08:00
}
case token_type::literal_null:
2018-02-25 01:04:07 +08:00
{
if (JSON_HEDLEY_UNLIKELY(!sax->null()))
{
return false;
}
break;
2018-02-25 01:04:07 +08:00
}
case token_type::literal_true:
2018-02-25 01:04:07 +08:00
{
if (JSON_HEDLEY_UNLIKELY(!sax->boolean(true)))
{
return false;
}
break;
2018-02-25 01:04:07 +08:00
}
2018-03-12 05:47:25 +08:00
case token_type::value_integer:
{
if (JSON_HEDLEY_UNLIKELY(!sax->number_integer(m_lexer.get_number_integer())))
{
return false;
}
break;
2018-02-25 01:04:07 +08:00
}
case token_type::value_string:
2018-02-25 01:04:07 +08:00
{
if (JSON_HEDLEY_UNLIKELY(!sax->string(m_lexer.get_string())))
{
return false;
}
break;
2018-02-25 01:04:07 +08:00
}
case token_type::value_unsigned:
2018-02-25 01:04:07 +08:00
{
if (JSON_HEDLEY_UNLIKELY(!sax->number_unsigned(m_lexer.get_number_unsigned())))
{
return false;
}
break;
2018-02-25 01:04:07 +08:00
}
case token_type::parse_error:
2018-02-25 01:04:07 +08:00
{
// using "uninitialized" to avoid "expected" message
return sax->parse_error(m_lexer.get_position(),
m_lexer.get_token_string(),
parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized, "value"), nullptr));
2018-02-25 01:04:07 +08:00
}
case token_type::uninitialized:
case token_type::end_array:
case token_type::end_object:
case token_type::name_separator:
case token_type::value_separator:
case token_type::end_of_input:
case token_type::literal_or_value:
default: // the last token was unexpected
2018-02-25 01:04:07 +08:00
{
return sax->parse_error(m_lexer.get_position(),
m_lexer.get_token_string(),
parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value, "value"), nullptr));
2018-02-25 01:04:07 +08:00
}
}
}
else
2018-02-25 01:04:07 +08:00
{
skip_to_state_evaluation = false;
2018-02-25 01:04:07 +08:00
}
// we reached this line after we successfully parsed a value
if (states.empty())
2018-02-25 01:04:07 +08:00
{
2018-10-28 21:20:20 +08:00
// empty stack: we reached the end of the hierarchy: done
return true;
2018-02-25 01:04:07 +08:00
}
2019-03-19 16:17:14 +08:00
if (states.back()) // array
2018-02-25 01:04:07 +08:00
{
2019-03-19 16:17:14 +08:00
// comma -> next value
if (get_token() == token_type::value_separator)
{
// parse a new value
get_token();
continue;
}
// closing ]
2019-07-02 04:37:30 +08:00
if (JSON_HEDLEY_LIKELY(last_token == token_type::end_array))
{
if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
{
2019-03-19 16:17:14 +08:00
return false;
}
2019-03-19 16:17:14 +08:00
// We are done with this array. Before we can parse a
// new value, we need to evaluate the new state first.
// By setting skip_to_state_evaluation to false, we
// are effectively jumping to the beginning of this if.
JSON_ASSERT(!states.empty());
2019-03-19 16:17:14 +08:00
states.pop_back();
skip_to_state_evaluation = true;
continue;
}
2018-02-25 01:04:07 +08:00
2019-03-19 16:17:14 +08:00
return sax->parse_error(m_lexer.get_position(),
m_lexer.get_token_string(),
parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array, "array"), nullptr));
2019-03-19 16:17:14 +08:00
}
2018-02-25 01:04:07 +08:00
2021-01-27 19:54:46 +08:00
// states.back() is false -> object
2019-03-19 16:17:14 +08:00
// comma -> next value
if (get_token() == token_type::value_separator)
{
// parse key
if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string))
2019-03-19 16:17:14 +08:00
{
return sax->parse_error(m_lexer.get_position(),
m_lexer.get_token_string(),
parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr));
2021-01-27 19:54:46 +08:00
}
2018-02-25 01:04:07 +08:00
2021-01-27 19:54:46 +08:00
if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string())))
{
return false;
2019-03-19 16:17:14 +08:00
}
// parse separator (:)
if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
{
return sax->parse_error(m_lexer.get_position(),
m_lexer.get_token_string(),
parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr));
2019-03-19 16:17:14 +08:00
}
2021-01-27 19:54:46 +08:00
// parse values
get_token();
continue;
}
2019-03-19 16:17:14 +08:00
2021-01-27 19:54:46 +08:00
// closing }
if (JSON_HEDLEY_LIKELY(last_token == token_type::end_object))
{
if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
{
return false;
}
2019-03-19 16:17:14 +08:00
2021-01-27 19:54:46 +08:00
// We are done with this object. Before we can parse a
// new value, we need to evaluate the new state first.
// By setting skip_to_state_evaluation to false, we
// are effectively jumping to the beginning of this if.
JSON_ASSERT(!states.empty());
states.pop_back();
skip_to_state_evaluation = true;
continue;
2018-02-25 01:04:07 +08:00
}
2021-01-27 19:54:46 +08:00
return sax->parse_error(m_lexer.get_position(),
m_lexer.get_token_string(),
parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object, "object"), nullptr));
2018-02-25 01:04:07 +08:00
}
}
2017-08-14 23:02:40 +08:00
/// get next token from lexer
token_type get_token()
{
2019-03-18 20:53:48 +08:00
return last_token = m_lexer.scan();
2017-08-14 23:02:40 +08:00
}
std::string exception_message(const token_type expected, const std::string& context)
2017-08-14 23:02:40 +08:00
{
std::string error_msg = "syntax error ";
if (!context.empty())
{
error_msg += concat("while parsing ", context, ' ');
}
error_msg += "- ";
2017-08-14 23:02:40 +08:00
if (last_token == token_type::parse_error)
{
error_msg += concat(m_lexer.get_error_message(), "; last read: '",
m_lexer.get_token_string(), '\'');
2017-08-14 23:02:40 +08:00
}
else
{
error_msg += concat("unexpected ", lexer_t::token_type_name(last_token));
2017-08-14 23:02:40 +08:00
}
if (expected != token_type::uninitialized)
{
error_msg += concat("; expected ", lexer_t::token_type_name(expected));
2017-08-14 23:02:40 +08:00
}
return error_msg;
2017-08-14 23:02:40 +08:00
}
private:
/// callback function
2020-02-19 23:32:49 +08:00
const parser_callback_t<BasicJsonType> callback = nullptr;
2017-08-14 23:02:40 +08:00
/// the type of the last read token
token_type last_token = token_type::uninitialized;
/// the lexer
lexer_t m_lexer;
/// whether to throw exceptions in case of errors
const bool allow_exceptions = true;
};
:rotating_light: add new CI and fix warnings (#2561) * :alembic: move CI targets to CMake * :recycle: add target for cpplint * :recycle: add target for self-contained binaries * :recycle: add targets for iwyu and infer * :loud_sound: add version output * :recycle: add target for oclint * :rotating_light: fix warnings * :recycle: rename targets * :recycle: use iwyu properly * :rotating_light: fix warnings * :recycle: use iwyu properly * :recycle: add target for benchmarks * :recycle: add target for CMake flags * :construction_worker: use GitHub Actions * :alembic: try to install Clang 11 * :alembic: try to install GCC 11 * :alembic: try to install Clang 11 * :alembic: try to install GCC 11 * :alembic: add clang analyze target * :fire: remove Google Benchmark * :arrow_up: Google Benchmark 1.5.2 * :fire: use fetchcontent * :penguin: add target to download a Linux version of CMake * :hammer: fix dependency * :rotating_light: fix includes * :rotating_light: fix comment * :wrench: adjust flags for GCC 11.0.0 20210110 (experimental) * :whale: user Docker image to run CI * :wrench: add target for Valgrind * :construction_worker: add target for Valgrind tests * :alembic: add Dart * :rewind: remove Dart * :alembic: do not call ctest in test subdirectory * :alembic: download test data explicitly * :alembic: only execute Valgrind tests * :alembic: fix labels * :fire: remove unneeded jobs * :hammer: cleanup * :bug: fix OCLint call * :white_check_mark: add targets for offline and git-independent tests * :white_check_mark: add targets for C++ language versions and reproducible tests * :hammer: clean up * :construction_worker: add CI steps for cppcheck and cpplint * :rotating_light: fix warnings from Clang-Tidy * :construction_worker: add CI steps for Clang-Tidy * :rotating_light: fix warnings * :wrench: select proper binary * :rotating_light: fix warnings * :rotating_light: suppress some unhelpful warnings * :rotating_light: fix warnings * :art: fix format * :rotating_light: fix warnings * :construction_worker: add CI steps for Sanitizers * :rotating_light: fix warnings * :zap: add optimization to sanitizer build * :rotating_light: fix warnings * :rotating_light: add missing header * :rotating_light: fix warnings * :construction_worker: add CI step for coverage * :construction_worker: add CI steps for disabled exceptions and implicit conversions * :rotating_light: fix warnings * :construction_worker: add CI steps for checking indentation * :bug: fix variable use * :green_heart: fix build * :heavy_minus_sign: remove CircleCI * :construction_worker: add CI step for diagnostics * :rotating_light: fix warning * :fire: clean Travis
2021-03-24 14:15:18 +08:00
} // namespace detail
} // namespace nlohmann