mirror of
https://github.com/nlohmann/json.git
synced 2024-12-18 08:50:00 +08:00
Merge branch 'develop' into feature/convert_char
This commit is contained in:
commit
5a6bdf5934
9
Makefile
9
Makefile
@ -48,6 +48,7 @@ all:
|
||||
@echo "cppcheck - analyze code with cppcheck"
|
||||
@echo "doctest - compile example files and check their output"
|
||||
@echo "fuzz_testing - prepare fuzz testing of the JSON parser"
|
||||
@echo "fuzz_testing_bson - prepare fuzz testing of the BSON parser"
|
||||
@echo "fuzz_testing_cbor - prepare fuzz testing of the CBOR parser"
|
||||
@echo "fuzz_testing_msgpack - prepare fuzz testing of the MessagePack parser"
|
||||
@echo "fuzz_testing_ubjson - prepare fuzz testing of the UBJSON parser"
|
||||
@ -220,6 +221,14 @@ fuzz_testing:
|
||||
find test/data/json_tests -size -5k -name *json | xargs -I{} cp "{}" fuzz-testing/testcases
|
||||
@echo "Execute: afl-fuzz -i fuzz-testing/testcases -o fuzz-testing/out fuzz-testing/fuzzer"
|
||||
|
||||
fuzz_testing_bson:
|
||||
rm -fr fuzz-testing
|
||||
mkdir -p fuzz-testing fuzz-testing/testcases fuzz-testing/out
|
||||
$(MAKE) parse_bson_fuzzer -C test CXX=afl-clang++
|
||||
mv test/parse_bson_fuzzer fuzz-testing/fuzzer
|
||||
find test/data -size -5k -name *.bson | xargs -I{} cp "{}" fuzz-testing/testcases
|
||||
@echo "Execute: afl-fuzz -i fuzz-testing/testcases -o fuzz-testing/out fuzz-testing/fuzzer"
|
||||
|
||||
fuzz_testing_cbor:
|
||||
rm -fr fuzz-testing
|
||||
mkdir -p fuzz-testing fuzz-testing/testcases fuzz-testing/out
|
||||
|
18
README.md
18
README.md
@ -27,7 +27,7 @@
|
||||
- [JSON Merge Patch](#json-merge-patch)
|
||||
- [Implicit conversions](#implicit-conversions)
|
||||
- [Conversions to/from arbitrary types](#arbitrary-types-conversions)
|
||||
- [Binary formats (CBOR, MessagePack, and UBJSON)](#binary-formats-cbor-messagepack-and-ubjson)
|
||||
- [Binary formats (CBOR, BSON, MessagePack, and UBJSON)](#binary-formats-bson-cbor-messagepack-and-ubjson)
|
||||
- [Supported compilers](#supported-compilers)
|
||||
- [License](#license)
|
||||
- [Contact](#contact)
|
||||
@ -309,7 +309,7 @@ std::cout << j_string << " == " << serialized_string << std::endl;
|
||||
|
||||
[`.dump()`](https://nlohmann.github.io/json/classnlohmann_1_1basic__json_a5adea76fedba9898d404fef8598aa663.html#a5adea76fedba9898d404fef8598aa663) always returns the serialized value, and [`.get<std::string>()`](https://nlohmann.github.io/json/classnlohmann_1_1basic__json_a16f9445f7629f634221a42b967cdcd43.html#a16f9445f7629f634221a42b967cdcd43) returns the originally stored string value.
|
||||
|
||||
Note the library only supports UTF-8. When you store strings with different encodings in the library, calling [`dump()`](https://nlohmann.github.io/json/classnlohmann_1_1basic__json_a5adea76fedba9898d404fef8598aa663.html#a5adea76fedba9898d404fef8598aa663) may throw an exception.
|
||||
Note the library only supports UTF-8. When you store strings with different encodings in the library, calling [`dump()`](https://nlohmann.github.io/json/classnlohmann_1_1basic__json_a5adea76fedba9898d404fef8598aa663.html#a5adea76fedba9898d404fef8598aa663) may throw an exception unless `json::error_handler_t::replace` or `json::error_handler_t::ignore` are used as error handlers.
|
||||
|
||||
#### To/from streams (e.g. files, string streams)
|
||||
|
||||
@ -874,14 +874,22 @@ struct bad_serializer
|
||||
};
|
||||
```
|
||||
|
||||
### Binary formats (CBOR, MessagePack, and UBJSON)
|
||||
### Binary formats (CBOR, BSON, MessagePack, and UBJSON
|
||||
|
||||
Though JSON is a ubiquitous data format, it is not a very compact format suitable for data exchange, for instance over a network. Hence, the library supports [CBOR](http://cbor.io) (Concise Binary Object Representation), [MessagePack](http://msgpack.org), and [UBJSON](http://ubjson.org) (Universal Binary JSON Specification) to efficiently encode JSON values to byte vectors and to decode such vectors.
|
||||
Though JSON is a ubiquitous data format, it is not a very compact format suitable for data exchange, for instance over a network. Hence, the library supports [BSON](http://bsonspec.org) (Binary JSON), [CBOR](http://cbor.io) (Concise Binary Object Representation), [MessagePack](http://msgpack.org), and [UBJSON](http://ubjson.org) (Universal Binary JSON Specification) to efficiently encode JSON values to byte vectors and to decode such vectors.
|
||||
|
||||
```cpp
|
||||
// create a JSON value
|
||||
json j = R"({"compact": true, "schema": 0})"_json;
|
||||
|
||||
// serialize to BSON
|
||||
std::vector<std::uint8_t> v_bson = json::to_bson(j);
|
||||
|
||||
// 0x1B, 0x00, 0x00, 0x00, 0x08, 0x63, 0x6F, 0x6D, 0x70, 0x61, 0x63, 0x74, 0x00, 0x01, 0x10, 0x73, 0x63, 0x68, 0x65, 0x6D, 0x61, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
||||
|
||||
// roundtrip
|
||||
json j_from_bson = json::from_bson(v_bson);
|
||||
|
||||
// serialize to CBOR
|
||||
std::vector<std::uint8_t> v_cbor = json::to_cbor(j);
|
||||
|
||||
@ -1138,6 +1146,8 @@ I deeply appreciate the help of the following people.
|
||||
- [Henry Schreiner](https://github.com/henryiii) added support for GCC 4.8.
|
||||
- [knilch](https://github.com/knilch0r) made sure the test suite does not stall when run in the wrong directory.
|
||||
- [Antonio Borondo](https://github.com/antonioborondo) fixed an MSVC 2017 warning.
|
||||
- [efp](https://github.com/efp) added line and column information to parse errors.
|
||||
- [julian-becker](https://github.com/julian-becker) added BSON support.
|
||||
|
||||
Thanks a lot for helping out! Please [let me know](mailto:mail@nlohmann.me) if I forgot someone.
|
||||
|
||||
|
@ -30,7 +30,7 @@ int main()
|
||||
<< j_string.dump(-1, ' ', true) << '\n';
|
||||
|
||||
// create JSON value with invalid UTF-8 byte sequence
|
||||
json j_invalid = "\xF0\xA4\xAD\xC0";
|
||||
json j_invalid = "ä\xA9ü";
|
||||
try
|
||||
{
|
||||
std::cout << j_invalid.dump() << std::endl;
|
||||
@ -39,4 +39,10 @@ int main()
|
||||
{
|
||||
std::cout << e.what() << std::endl;
|
||||
}
|
||||
|
||||
std::cout << "string with replaced invalid characters: "
|
||||
<< j_invalid.dump(-1, ' ', false, json::error_handler_t::replace)
|
||||
<< "\nstring with ignored invalid characters: "
|
||||
<< j_invalid.dump(-1, ' ', false, json::error_handler_t::ignore)
|
||||
<< '\n';
|
||||
}
|
||||
|
@ -1 +1 @@
|
||||
<a target="_blank" href="https://wandbox.org/permlink/uC4kna7QsQ0rAt80"><b>online</b></a>
|
||||
<a target="_blank" href="https://wandbox.org/permlink/KtH6hJIe10abhHMi"><b>online</b></a>
|
@ -50,4 +50,6 @@ arrays:
|
||||
strings:
|
||||
"Hellö 😀!"
|
||||
"Hell\u00f6 \ud83d\ude00!"
|
||||
[json.exception.type_error.316] invalid UTF-8 byte at index 3: 0xC0
|
||||
[json.exception.type_error.316] invalid UTF-8 byte at index 2: 0xA9
|
||||
string with replaced invalid characters: "ä<>ü"
|
||||
string with ignored invalid characters: "äü"
|
||||
|
@ -93,6 +93,7 @@ json.exception.parse_error.109 | parse error: array index 'one' is not a number
|
||||
json.exception.parse_error.110 | parse error at 1: cannot read 2 bytes from vector | When parsing CBOR or MessagePack, the byte vector ends before the complete value has been read.
|
||||
json.exception.parse_error.112 | parse error at 1: error reading CBOR; last byte: 0xF8 | Not all types of CBOR or MessagePack are supported. This exception occurs if an unsupported byte was read.
|
||||
json.exception.parse_error.113 | parse error at 2: expected a CBOR string; last byte: 0x98 | While parsing a map key, a value that is not a string has been read.
|
||||
json.exception.parse_error.114 | parse error: Unsupported BSON record type 0x0F | The parsing of the corresponding BSON record type is not implemented (yet).
|
||||
|
||||
@note For an input with n bytes, 1 is the index of the first character and n+1
|
||||
is the index of the terminating null byte or the end of file. This also
|
||||
@ -236,6 +237,7 @@ json.exception.type_error.313 | invalid value to unflatten | The @ref unflatten
|
||||
json.exception.type_error.314 | only objects can be unflattened | The @ref unflatten function only works for an object whose keys are JSON Pointers.
|
||||
json.exception.type_error.315 | values in object must be primitive | The @ref unflatten function only works for an object whose keys are JSON Pointers and whose values are primitive.
|
||||
json.exception.type_error.316 | invalid UTF-8 byte at index 10: 0x7E | The @ref dump function only works with UTF-8 encoded strings; that is, if you assign a `std::string` to a JSON value, make sure it is UTF-8 encoded. |
|
||||
json.exception.type_error.317 | JSON value cannot be serialized to requested format | The dynamic type of the object cannot be represented in the requested serialization format (e.g. a raw `true` or `null` JSON object cannot be serialized to BSON) |
|
||||
|
||||
@liveexample{The following code shows how a `type_error` exception can be
|
||||
caught.,type_error}
|
||||
@ -278,8 +280,9 @@ json.exception.out_of_range.403 | key 'foo' not found | The provided key was not
|
||||
json.exception.out_of_range.404 | unresolved reference token 'foo' | A reference token in a JSON Pointer could not be resolved.
|
||||
json.exception.out_of_range.405 | JSON pointer has no parent | The JSON Patch operations 'remove' and 'add' can not be applied to the root element of the JSON value.
|
||||
json.exception.out_of_range.406 | number overflow parsing '10E1000' | A parsed number could not be stored as without changing it to NaN or INF.
|
||||
json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON only supports integers numbers up to 9223372036854775807. |
|
||||
json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON and BSON only support integer numbers up to 9223372036854775807. |
|
||||
json.exception.out_of_range.408 | excessive array size: 8658170730974374167 | The size (following `#`) of an UBJSON array or object exceeds the maximal capacity. |
|
||||
json.exception.out_of_range.409 | BSON key cannot contain code point U+0000 (at byte 2) | Key identifiers to be serialized to BSON cannot contain code point U+0000, since the key is stored as zero-terminated c-string |
|
||||
|
||||
@liveexample{The following code shows how an `out_of_range` exception can be
|
||||
caught.,out_of_range}
|
||||
|
@ -80,6 +80,10 @@ class binary_reader
|
||||
result = parse_ubjson_internal();
|
||||
break;
|
||||
|
||||
case input_format_t::bson:
|
||||
result = parse_bson_internal();
|
||||
break;
|
||||
|
||||
// LCOV_EXCL_START
|
||||
default:
|
||||
assert(false);
|
||||
@ -121,6 +125,216 @@ class binary_reader
|
||||
}
|
||||
|
||||
private:
|
||||
//////////
|
||||
// BSON //
|
||||
//////////
|
||||
|
||||
/*!
|
||||
@brief Reads in a BSON-object and passes it to the SAX-parser.
|
||||
@return whether a valid BSON-value was passed to the SAX parser
|
||||
*/
|
||||
bool parse_bson_internal()
|
||||
{
|
||||
std::int32_t documentSize;
|
||||
get_number<std::int32_t, true>(input_format_t::bson, documentSize);
|
||||
|
||||
if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1))))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return sax->end_object();
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief Parses a C-style string from the BSON input.
|
||||
@param[in, out] result A reference to the string variable where the read
|
||||
string is to be stored.
|
||||
@return `true` if the \x00-byte indicating the end of the string was
|
||||
encountered before the EOF; false` indicates an unexpected EOF.
|
||||
*/
|
||||
bool get_bson_cstr(string_t& result)
|
||||
{
|
||||
auto out = std::back_inserter(result);
|
||||
while (true)
|
||||
{
|
||||
get();
|
||||
if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "cstring")))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (current == 0x00)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
*out++ = static_cast<char>(current);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief Parses a zero-terminated string of length @a len from the BSON
|
||||
input.
|
||||
@param[in] len The length (including the zero-byte at the end) of the
|
||||
string to be read.
|
||||
@param[in, out] result A reference to the string variable where the read
|
||||
string is to be stored.
|
||||
@tparam NumberType The type of the length @a len
|
||||
@pre len > 0
|
||||
@return `true` if the string was successfully parsed
|
||||
*/
|
||||
template<typename NumberType>
|
||||
bool get_bson_string(const NumberType len, string_t& result)
|
||||
{
|
||||
return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) and get() != std::char_traits<char>::eof();
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief Read a BSON document element of the given @a element_type.
|
||||
@param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html
|
||||
@param[in] element_type_parse_position The position in the input stream,
|
||||
where the `element_type` was read.
|
||||
@warning Not all BSON element types are supported yet. An unsupported
|
||||
@a element_type will give rise to a parse_error.114:
|
||||
Unsupported BSON record type 0x...
|
||||
@return whether a valid BSON-object/array was passed to the SAX parser
|
||||
*/
|
||||
bool parse_bson_element_internal(const int element_type,
|
||||
const std::size_t element_type_parse_position)
|
||||
{
|
||||
switch (element_type)
|
||||
{
|
||||
case 0x01: // double
|
||||
{
|
||||
double number;
|
||||
return get_number<double, true>(input_format_t::bson, number) and sax->number_float(static_cast<number_float_t>(number), "");
|
||||
}
|
||||
|
||||
case 0x02: // string
|
||||
{
|
||||
std::int32_t len;
|
||||
string_t value;
|
||||
return get_number<std::int32_t, true>(input_format_t::bson, len) and get_bson_string(len, value) and sax->string(value);
|
||||
}
|
||||
|
||||
case 0x03: // object
|
||||
{
|
||||
return parse_bson_internal();
|
||||
}
|
||||
|
||||
case 0x04: // array
|
||||
{
|
||||
return parse_bson_array();
|
||||
}
|
||||
|
||||
case 0x08: // boolean
|
||||
{
|
||||
return sax->boolean(static_cast<bool>(get()));
|
||||
}
|
||||
|
||||
case 0x0A: // null
|
||||
{
|
||||
return sax->null();
|
||||
}
|
||||
|
||||
case 0x10: // int32
|
||||
{
|
||||
std::int32_t value;
|
||||
return get_number<std::int32_t, true>(input_format_t::bson, value) and sax->number_integer(value);
|
||||
}
|
||||
|
||||
case 0x12: // int64
|
||||
{
|
||||
std::int64_t value;
|
||||
return get_number<std::int64_t, true>(input_format_t::bson, value) and sax->number_integer(value);
|
||||
}
|
||||
|
||||
default: // anything else not supported (yet)
|
||||
{
|
||||
char cr[3];
|
||||
snprintf(cr, sizeof(cr), "%.2hhX", static_cast<unsigned char>(element_type));
|
||||
return sax->parse_error(element_type_parse_position, std::string(cr), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief Read a BSON element list (as specified in the BSON-spec)
|
||||
|
||||
The same binary layout is used for objects and arrays, hence it must be
|
||||
indicated with the argument @a is_array which one is expected
|
||||
(true --> array, false --> object).
|
||||
|
||||
@param[in] is_array Determines if the element list being read is to be
|
||||
treated as an object (@a is_array == false), or as an
|
||||
array (@a is_array == true).
|
||||
@return whether a valid BSON-object/array was passed to the SAX parser
|
||||
*/
|
||||
bool parse_bson_element_list(const bool is_array)
|
||||
{
|
||||
string_t key;
|
||||
while (int element_type = get())
|
||||
{
|
||||
if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "element list")))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
const std::size_t element_type_parse_position = chars_read;
|
||||
if (JSON_UNLIKELY(not get_bson_cstr(key)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (not is_array)
|
||||
{
|
||||
sax->key(key);
|
||||
}
|
||||
|
||||
if (JSON_UNLIKELY(not parse_bson_element_internal(element_type, element_type_parse_position)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// get_bson_cstr only appends
|
||||
key.clear();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief Reads an array from the BSON input and passes it to the SAX-parser.
|
||||
@return whether a valid BSON-array was passed to the SAX parser
|
||||
*/
|
||||
bool parse_bson_array()
|
||||
{
|
||||
std::int32_t documentSize;
|
||||
get_number<std::int32_t, true>(input_format_t::bson, documentSize);
|
||||
|
||||
if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1))))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/true)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return sax->end_array();
|
||||
}
|
||||
|
||||
//////////
|
||||
// CBOR //
|
||||
//////////
|
||||
|
||||
/*!
|
||||
@param[in] get_char whether a new character should be retrieved from the
|
||||
input (true, default) or whether the last read
|
||||
@ -459,6 +673,191 @@ class binary_reader
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief reads a CBOR string
|
||||
|
||||
This function first reads starting bytes to determine the expected
|
||||
string length and then copies this number of bytes into a string.
|
||||
Additionally, CBOR's strings with indefinite lengths are supported.
|
||||
|
||||
@param[out] result created string
|
||||
|
||||
@return whether string creation completed
|
||||
*/
|
||||
bool get_cbor_string(string_t& result)
|
||||
{
|
||||
if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string")))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (current)
|
||||
{
|
||||
// UTF-8 string (0x00..0x17 bytes follow)
|
||||
case 0x60:
|
||||
case 0x61:
|
||||
case 0x62:
|
||||
case 0x63:
|
||||
case 0x64:
|
||||
case 0x65:
|
||||
case 0x66:
|
||||
case 0x67:
|
||||
case 0x68:
|
||||
case 0x69:
|
||||
case 0x6A:
|
||||
case 0x6B:
|
||||
case 0x6C:
|
||||
case 0x6D:
|
||||
case 0x6E:
|
||||
case 0x6F:
|
||||
case 0x70:
|
||||
case 0x71:
|
||||
case 0x72:
|
||||
case 0x73:
|
||||
case 0x74:
|
||||
case 0x75:
|
||||
case 0x76:
|
||||
case 0x77:
|
||||
{
|
||||
return get_string(input_format_t::cbor, current & 0x1F, result);
|
||||
}
|
||||
|
||||
case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
|
||||
{
|
||||
uint8_t len;
|
||||
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
|
||||
}
|
||||
|
||||
case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
|
||||
{
|
||||
uint16_t len;
|
||||
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
|
||||
}
|
||||
|
||||
case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
|
||||
{
|
||||
uint32_t len;
|
||||
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
|
||||
}
|
||||
|
||||
case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
|
||||
{
|
||||
uint64_t len;
|
||||
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
|
||||
}
|
||||
|
||||
case 0x7F: // UTF-8 string (indefinite length)
|
||||
{
|
||||
while (get() != 0xFF)
|
||||
{
|
||||
string_t chunk;
|
||||
if (not get_cbor_string(chunk))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
result.append(chunk);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
auto last_token = get_token_string();
|
||||
return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string")));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
@param[in] len the length of the array or std::size_t(-1) for an
|
||||
array of indefinite size
|
||||
@return whether array creation completed
|
||||
*/
|
||||
bool get_cbor_array(const std::size_t len)
|
||||
{
|
||||
if (JSON_UNLIKELY(not sax->start_array(len)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (len != std::size_t(-1))
|
||||
{
|
||||
for (std::size_t i = 0; i < len; ++i)
|
||||
{
|
||||
if (JSON_UNLIKELY(not parse_cbor_internal()))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (get() != 0xFF)
|
||||
{
|
||||
if (JSON_UNLIKELY(not parse_cbor_internal(false)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sax->end_array();
|
||||
}
|
||||
|
||||
/*!
|
||||
@param[in] len the length of the object or std::size_t(-1) for an
|
||||
object of indefinite size
|
||||
@return whether object creation completed
|
||||
*/
|
||||
bool get_cbor_object(const std::size_t len)
|
||||
{
|
||||
if (not JSON_UNLIKELY(sax->start_object(len)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
string_t key;
|
||||
if (len != std::size_t(-1))
|
||||
{
|
||||
for (std::size_t i = 0; i < len; ++i)
|
||||
{
|
||||
get();
|
||||
if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (JSON_UNLIKELY(not parse_cbor_internal()))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
key.clear();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (get() != 0xFF)
|
||||
{
|
||||
if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (JSON_UNLIKELY(not parse_cbor_internal()))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
key.clear();
|
||||
}
|
||||
}
|
||||
|
||||
return sax->end_object();
|
||||
}
|
||||
|
||||
/////////////
|
||||
// MsgPack //
|
||||
/////////////
|
||||
|
||||
/*!
|
||||
@return whether a valid MessagePack value was passed to the SAX parser
|
||||
*/
|
||||
@ -821,300 +1220,6 @@ class binary_reader
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
@param[in] get_char whether a new character should be retrieved from the
|
||||
input (true, default) or whether the last read
|
||||
character should be considered instead
|
||||
|
||||
@return whether a valid UBJSON value was passed to the SAX parser
|
||||
*/
|
||||
bool parse_ubjson_internal(const bool get_char = true)
|
||||
{
|
||||
return get_ubjson_value(get_char ? get_ignore_noop() : current);
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief get next character from the input
|
||||
|
||||
This function provides the interface to the used input adapter. It does
|
||||
not throw in case the input reached EOF, but returns a -'ve valued
|
||||
`std::char_traits<char>::eof()` in that case.
|
||||
|
||||
@return character read from the input
|
||||
*/
|
||||
int get()
|
||||
{
|
||||
++chars_read;
|
||||
return (current = ia->get_character());
|
||||
}
|
||||
|
||||
/*!
|
||||
@return character read from the input after ignoring all 'N' entries
|
||||
*/
|
||||
int get_ignore_noop()
|
||||
{
|
||||
do
|
||||
{
|
||||
get();
|
||||
}
|
||||
while (current == 'N');
|
||||
|
||||
return current;
|
||||
}
|
||||
|
||||
/*
|
||||
@brief read a number from the input
|
||||
|
||||
@tparam NumberType the type of the number
|
||||
@param[in] format the current format (for diagnostics)
|
||||
@param[out] result number of type @a NumberType
|
||||
|
||||
@return whether conversion completed
|
||||
|
||||
@note This function needs to respect the system's endianess, because
|
||||
bytes in CBOR, MessagePack, and UBJSON are stored in network order
|
||||
(big endian) and therefore need reordering on little endian systems.
|
||||
*/
|
||||
template<typename NumberType>
|
||||
bool get_number(const input_format_t format, NumberType& result)
|
||||
{
|
||||
// step 1: read input into array with system's byte order
|
||||
std::array<uint8_t, sizeof(NumberType)> vec;
|
||||
for (std::size_t i = 0; i < sizeof(NumberType); ++i)
|
||||
{
|
||||
get();
|
||||
if (JSON_UNLIKELY(not unexpect_eof(format, "number")))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// reverse byte order prior to conversion if necessary
|
||||
if (is_little_endian)
|
||||
{
|
||||
vec[sizeof(NumberType) - i - 1] = static_cast<uint8_t>(current);
|
||||
}
|
||||
else
|
||||
{
|
||||
vec[i] = static_cast<uint8_t>(current); // LCOV_EXCL_LINE
|
||||
}
|
||||
}
|
||||
|
||||
// step 2: convert array into number of type T and return
|
||||
std::memcpy(&result, vec.data(), sizeof(NumberType));
|
||||
return true;
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief create a string by reading characters from the input
|
||||
|
||||
@tparam NumberType the type of the number
|
||||
@param[in] format the current format (for diagnostics)
|
||||
@param[in] len number of characters to read
|
||||
@param[out] result string created by reading @a len bytes
|
||||
|
||||
@return whether string creation completed
|
||||
|
||||
@note We can not reserve @a len bytes for the result, because @a len
|
||||
may be too large. Usually, @ref unexpect_eof() detects the end of
|
||||
the input before we run out of string memory.
|
||||
*/
|
||||
template<typename NumberType>
|
||||
bool get_string(const input_format_t format, const NumberType len, string_t& result)
|
||||
{
|
||||
bool success = true;
|
||||
std::generate_n(std::back_inserter(result), len, [this, &success, &format]()
|
||||
{
|
||||
get();
|
||||
if (JSON_UNLIKELY(not unexpect_eof(format, "string")))
|
||||
{
|
||||
success = false;
|
||||
}
|
||||
return static_cast<char>(current);
|
||||
});
|
||||
return success;
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief reads a CBOR string
|
||||
|
||||
This function first reads starting bytes to determine the expected
|
||||
string length and then copies this number of bytes into a string.
|
||||
Additionally, CBOR's strings with indefinite lengths are supported.
|
||||
|
||||
@param[out] result created string
|
||||
|
||||
@return whether string creation completed
|
||||
*/
|
||||
bool get_cbor_string(string_t& result)
|
||||
{
|
||||
if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string")))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (current)
|
||||
{
|
||||
// UTF-8 string (0x00..0x17 bytes follow)
|
||||
case 0x60:
|
||||
case 0x61:
|
||||
case 0x62:
|
||||
case 0x63:
|
||||
case 0x64:
|
||||
case 0x65:
|
||||
case 0x66:
|
||||
case 0x67:
|
||||
case 0x68:
|
||||
case 0x69:
|
||||
case 0x6A:
|
||||
case 0x6B:
|
||||
case 0x6C:
|
||||
case 0x6D:
|
||||
case 0x6E:
|
||||
case 0x6F:
|
||||
case 0x70:
|
||||
case 0x71:
|
||||
case 0x72:
|
||||
case 0x73:
|
||||
case 0x74:
|
||||
case 0x75:
|
||||
case 0x76:
|
||||
case 0x77:
|
||||
{
|
||||
return get_string(input_format_t::cbor, current & 0x1F, result);
|
||||
}
|
||||
|
||||
case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
|
||||
{
|
||||
uint8_t len;
|
||||
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
|
||||
}
|
||||
|
||||
case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
|
||||
{
|
||||
uint16_t len;
|
||||
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
|
||||
}
|
||||
|
||||
case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
|
||||
{
|
||||
uint32_t len;
|
||||
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
|
||||
}
|
||||
|
||||
case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
|
||||
{
|
||||
uint64_t len;
|
||||
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
|
||||
}
|
||||
|
||||
case 0x7F: // UTF-8 string (indefinite length)
|
||||
{
|
||||
while (get() != 0xFF)
|
||||
{
|
||||
string_t chunk;
|
||||
if (not get_cbor_string(chunk))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
result.append(chunk);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
auto last_token = get_token_string();
|
||||
return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string")));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
@param[in] len the length of the array or std::size_t(-1) for an
|
||||
array of indefinite size
|
||||
@return whether array creation completed
|
||||
*/
|
||||
bool get_cbor_array(const std::size_t len)
|
||||
{
|
||||
if (JSON_UNLIKELY(not sax->start_array(len)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (len != std::size_t(-1))
|
||||
{
|
||||
for (std::size_t i = 0; i < len; ++i)
|
||||
{
|
||||
if (JSON_UNLIKELY(not parse_cbor_internal()))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (get() != 0xFF)
|
||||
{
|
||||
if (JSON_UNLIKELY(not parse_cbor_internal(false)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sax->end_array();
|
||||
}
|
||||
|
||||
/*!
|
||||
@param[in] len the length of the object or std::size_t(-1) for an
|
||||
object of indefinite size
|
||||
@return whether object creation completed
|
||||
*/
|
||||
bool get_cbor_object(const std::size_t len)
|
||||
{
|
||||
if (not JSON_UNLIKELY(sax->start_object(len)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
string_t key;
|
||||
if (len != std::size_t(-1))
|
||||
{
|
||||
for (std::size_t i = 0; i < len; ++i)
|
||||
{
|
||||
get();
|
||||
if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (JSON_UNLIKELY(not parse_cbor_internal()))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
key.clear();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (get() != 0xFF)
|
||||
{
|
||||
if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (JSON_UNLIKELY(not parse_cbor_internal()))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
key.clear();
|
||||
}
|
||||
}
|
||||
|
||||
return sax->end_object();
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief reads a MessagePack string
|
||||
|
||||
@ -1249,6 +1354,22 @@ class binary_reader
|
||||
return sax->end_object();
|
||||
}
|
||||
|
||||
////////////
|
||||
// UBJSON //
|
||||
////////////
|
||||
|
||||
/*!
|
||||
@param[in] get_char whether a new character should be retrieved from the
|
||||
input (true, default) or whether the last read
|
||||
character should be considered instead
|
||||
|
||||
@return whether a valid UBJSON value was passed to the SAX parser
|
||||
*/
|
||||
bool parse_ubjson_internal(const bool get_char = true)
|
||||
{
|
||||
return get_ubjson_value(get_char ? get_ignore_noop() : current);
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief reads a UBJSON string
|
||||
|
||||
@ -1663,6 +1784,113 @@ class binary_reader
|
||||
return sax->end_object();
|
||||
}
|
||||
|
||||
///////////////////////
|
||||
// Utility functions //
|
||||
///////////////////////
|
||||
|
||||
/*!
|
||||
@brief get next character from the input
|
||||
|
||||
This function provides the interface to the used input adapter. It does
|
||||
not throw in case the input reached EOF, but returns a -'ve valued
|
||||
`std::char_traits<char>::eof()` in that case.
|
||||
|
||||
@return character read from the input
|
||||
*/
|
||||
int get()
|
||||
{
|
||||
++chars_read;
|
||||
return (current = ia->get_character());
|
||||
}
|
||||
|
||||
/*!
|
||||
@return character read from the input after ignoring all 'N' entries
|
||||
*/
|
||||
int get_ignore_noop()
|
||||
{
|
||||
do
|
||||
{
|
||||
get();
|
||||
}
|
||||
while (current == 'N');
|
||||
|
||||
return current;
|
||||
}
|
||||
|
||||
/*
|
||||
@brief read a number from the input
|
||||
|
||||
@tparam NumberType the type of the number
|
||||
@param[in] format the current format (for diagnostics)
|
||||
@param[out] result number of type @a NumberType
|
||||
|
||||
@return whether conversion completed
|
||||
|
||||
@note This function needs to respect the system's endianess, because
|
||||
bytes in CBOR, MessagePack, and UBJSON are stored in network order
|
||||
(big endian) and therefore need reordering on little endian systems.
|
||||
*/
|
||||
template<typename NumberType, bool InputIsLittleEndian = false>
|
||||
bool get_number(const input_format_t format, NumberType& result)
|
||||
{
|
||||
// step 1: read input into array with system's byte order
|
||||
std::array<uint8_t, sizeof(NumberType)> vec;
|
||||
for (std::size_t i = 0; i < sizeof(NumberType); ++i)
|
||||
{
|
||||
get();
|
||||
if (JSON_UNLIKELY(not unexpect_eof(format, "number")))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// reverse byte order prior to conversion if necessary
|
||||
if (is_little_endian && !InputIsLittleEndian)
|
||||
{
|
||||
vec[sizeof(NumberType) - i - 1] = static_cast<uint8_t>(current);
|
||||
}
|
||||
else
|
||||
{
|
||||
vec[i] = static_cast<uint8_t>(current); // LCOV_EXCL_LINE
|
||||
}
|
||||
}
|
||||
|
||||
// step 2: convert array into number of type T and return
|
||||
std::memcpy(&result, vec.data(), sizeof(NumberType));
|
||||
return true;
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief create a string by reading characters from the input
|
||||
|
||||
@tparam NumberType the type of the number
|
||||
@param[in] format the current format (for diagnostics)
|
||||
@param[in] len number of characters to read
|
||||
@param[out] result string created by reading @a len bytes
|
||||
|
||||
@return whether string creation completed
|
||||
|
||||
@note We can not reserve @a len bytes for the result, because @a len
|
||||
may be too large. Usually, @ref unexpect_eof() detects the end of
|
||||
the input before we run out of string memory.
|
||||
*/
|
||||
template<typename NumberType>
|
||||
bool get_string(const input_format_t format,
|
||||
const NumberType len,
|
||||
string_t& result)
|
||||
{
|
||||
bool success = true;
|
||||
std::generate_n(std::back_inserter(result), len, [this, &success, &format]()
|
||||
{
|
||||
get();
|
||||
if (JSON_UNLIKELY(not unexpect_eof(format, "string")))
|
||||
{
|
||||
success = false;
|
||||
}
|
||||
return static_cast<char>(current);
|
||||
});
|
||||
return success;
|
||||
}
|
||||
|
||||
/*!
|
||||
@param[in] format the current format (for diagnostics)
|
||||
@param[in] context further context information (for diagnostics)
|
||||
@ -1688,7 +1916,6 @@ class binary_reader
|
||||
return std::string{cr};
|
||||
}
|
||||
|
||||
private:
|
||||
/*!
|
||||
@param[in] format the current format
|
||||
@param[in] detail a detailed error message
|
||||
@ -1715,6 +1942,10 @@ class binary_reader
|
||||
error_msg += "UBJSON";
|
||||
break;
|
||||
|
||||
case input_format_t::bson:
|
||||
error_msg += "BSON";
|
||||
break;
|
||||
|
||||
// LCOV_EXCL_START
|
||||
default:
|
||||
assert(false);
|
||||
@ -1724,6 +1955,7 @@ class binary_reader
|
||||
return error_msg + " " + context + ": " + detail;
|
||||
}
|
||||
|
||||
private:
|
||||
/// input adapter
|
||||
input_adapter_t ia = nullptr;
|
||||
|
||||
|
@ -18,7 +18,7 @@ namespace nlohmann
|
||||
namespace detail
|
||||
{
|
||||
/// the supported input formats
|
||||
enum class input_format_t { json, cbor, msgpack, ubjson };
|
||||
enum class input_format_t { json, cbor, msgpack, ubjson, bson };
|
||||
|
||||
////////////////////
|
||||
// input adapters //
|
||||
|
@ -35,7 +35,33 @@ class binary_writer
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief[in] j JSON value to serialize
|
||||
@param[in] j JSON value to serialize
|
||||
@pre j.type() == value_t::object
|
||||
*/
|
||||
void write_bson(const BasicJsonType& j)
|
||||
{
|
||||
switch (j.type())
|
||||
{
|
||||
case value_t::object:
|
||||
{
|
||||
write_bson_object(*j.m_value.object);
|
||||
break;
|
||||
}
|
||||
|
||||
case value_t::discarded:
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name())));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
@param[in] j JSON value to serialize
|
||||
*/
|
||||
void write_cbor(const BasicJsonType& j)
|
||||
{
|
||||
@ -279,7 +305,7 @@ class binary_writer
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief[in] j JSON value to serialize
|
||||
@param[in] j JSON value to serialize
|
||||
*/
|
||||
void write_msgpack(const BasicJsonType& j)
|
||||
{
|
||||
@ -679,33 +705,362 @@ class binary_writer
|
||||
}
|
||||
|
||||
private:
|
||||
/*
|
||||
@brief write a number to output input
|
||||
//////////
|
||||
// BSON //
|
||||
//////////
|
||||
|
||||
@param[in] n number of type @a NumberType
|
||||
@tparam NumberType the type of the number
|
||||
|
||||
@note This function needs to respect the system's endianess, because bytes
|
||||
in CBOR, MessagePack, and UBJSON are stored in network order (big
|
||||
endian) and therefore need reordering on little endian systems.
|
||||
/*!
|
||||
@return The size of a BSON document entry header, including the id marker
|
||||
and the entry name size (and its null-terminator).
|
||||
*/
|
||||
template<typename NumberType>
|
||||
void write_number(const NumberType n)
|
||||
static std::size_t calc_bson_entry_header_size(const typename BasicJsonType::string_t& name)
|
||||
{
|
||||
// step 1: write number to array of length NumberType
|
||||
std::array<CharType, sizeof(NumberType)> vec;
|
||||
std::memcpy(vec.data(), &n, sizeof(NumberType));
|
||||
|
||||
// step 2: write array to output (with possible reordering)
|
||||
if (is_little_endian)
|
||||
const auto it = name.find(static_cast<typename BasicJsonType::string_t::value_type>(0));
|
||||
if (JSON_UNLIKELY(it != BasicJsonType::string_t::npos))
|
||||
{
|
||||
// reverse byte order prior to conversion if necessary
|
||||
std::reverse(vec.begin(), vec.end());
|
||||
JSON_THROW(out_of_range::create(409,
|
||||
"BSON key cannot contain code point U+0000 (at byte " + std::to_string(it) + ")"));
|
||||
}
|
||||
|
||||
oa->write_characters(vec.data(), sizeof(NumberType));
|
||||
return /*id*/ 1ul + name.size() + /*zero-terminator*/1u;
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief Writes the given @a element_type and @a name to the output adapter
|
||||
*/
|
||||
void write_bson_entry_header(const typename BasicJsonType::string_t& name,
|
||||
std::uint8_t element_type)
|
||||
{
|
||||
oa->write_character(to_char_type(element_type)); // boolean
|
||||
oa->write_characters(
|
||||
reinterpret_cast<const CharType*>(name.c_str()),
|
||||
name.size() + 1u);
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief Writes a BSON element with key @a name and boolean value @a value
|
||||
*/
|
||||
void write_bson_boolean(const typename BasicJsonType::string_t& name,
|
||||
const bool value)
|
||||
{
|
||||
write_bson_entry_header(name, 0x08);
|
||||
oa->write_character(value ? to_char_type(0x01) : to_char_type(0x00));
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief Writes a BSON element with key @a name and double value @a value
|
||||
*/
|
||||
void write_bson_double(const typename BasicJsonType::string_t& name,
|
||||
const double value)
|
||||
{
|
||||
write_bson_entry_header(name, 0x01);
|
||||
write_number<double, true>(value);
|
||||
}
|
||||
|
||||
/*!
|
||||
@return The size of the BSON-encoded string in @a value
|
||||
*/
|
||||
static std::size_t calc_bson_string_size(const typename BasicJsonType::string_t& value)
|
||||
{
|
||||
return sizeof(std::int32_t) + value.size() + 1ul;
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief Writes a BSON element with key @a name and string value @a value
|
||||
*/
|
||||
void write_bson_string(const typename BasicJsonType::string_t& name,
|
||||
const typename BasicJsonType::string_t& value)
|
||||
{
|
||||
write_bson_entry_header(name, 0x02);
|
||||
|
||||
write_number<std::int32_t, true>(static_cast<std::int32_t>(value.size() + 1ul));
|
||||
oa->write_characters(
|
||||
reinterpret_cast<const CharType*>(value.c_str()),
|
||||
value.size() + 1);
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief Writes a BSON element with key @a name and null value
|
||||
*/
|
||||
void write_bson_null(const typename BasicJsonType::string_t& name)
|
||||
{
|
||||
write_bson_entry_header(name, 0x0A);
|
||||
}
|
||||
|
||||
/*!
|
||||
@return The size of the BSON-encoded integer @a value
|
||||
*/
|
||||
static std::size_t calc_bson_integer_size(const std::int64_t value)
|
||||
{
|
||||
if ((std::numeric_limits<std::int32_t>::min)() <= value and value <= (std::numeric_limits<std::int32_t>::max)())
|
||||
{
|
||||
return sizeof(std::int32_t);
|
||||
}
|
||||
else
|
||||
{
|
||||
return sizeof(std::int64_t);
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief Writes a BSON element with key @a name and integer @a value
|
||||
*/
|
||||
void write_bson_integer(const typename BasicJsonType::string_t& name,
|
||||
const std::int64_t value)
|
||||
{
|
||||
if ((std::numeric_limits<std::int32_t>::min)() <= value and value <= (std::numeric_limits<std::int32_t>::max)())
|
||||
{
|
||||
write_bson_entry_header(name, 0x10); // int32
|
||||
write_number<std::int32_t, true>(static_cast<std::int32_t>(value));
|
||||
}
|
||||
else
|
||||
{
|
||||
write_bson_entry_header(name, 0x12); // int64
|
||||
write_number<std::int64_t, true>(static_cast<std::int64_t>(value));
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
@return The size of the BSON-encoded unsigned integer in @a j
|
||||
*/
|
||||
static std::size_t calc_bson_unsigned_size(const std::uint64_t value)
|
||||
{
|
||||
if (value <= static_cast<std::uint64_t>((std::numeric_limits<std::int32_t>::max)()))
|
||||
{
|
||||
return sizeof(std::int32_t);
|
||||
}
|
||||
else
|
||||
{
|
||||
return sizeof(std::int64_t);
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief Writes a BSON element with key @a name and unsigned @a value
|
||||
*/
|
||||
void write_bson_unsigned(const typename BasicJsonType::string_t& name,
|
||||
const std::uint64_t value)
|
||||
{
|
||||
if (value <= static_cast<std::uint64_t>((std::numeric_limits<std::int32_t>::max)()))
|
||||
{
|
||||
write_bson_entry_header(name, 0x10); // int32
|
||||
write_number<std::int32_t, true>(static_cast<std::int32_t>(value));
|
||||
}
|
||||
else if (value <= static_cast<std::uint64_t>((std::numeric_limits<std::int64_t>::max)()))
|
||||
{
|
||||
write_bson_entry_header(name, 0x12); // int64
|
||||
write_number<std::int64_t, true>(static_cast<std::int64_t>(value));
|
||||
}
|
||||
else
|
||||
{
|
||||
JSON_THROW(out_of_range::create(407, "number overflow serializing " + std::to_string(value)));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief Writes a BSON element with key @a name and object @a value
|
||||
*/
|
||||
void write_bson_object_entry(const typename BasicJsonType::string_t& name,
|
||||
const typename BasicJsonType::object_t& value)
|
||||
{
|
||||
write_bson_entry_header(name, 0x03); // object
|
||||
write_bson_object(value);
|
||||
}
|
||||
|
||||
/*!
|
||||
@return The size of the BSON-encoded array @a value
|
||||
*/
|
||||
static std::size_t calc_bson_array_size(const typename BasicJsonType::array_t& value)
|
||||
{
|
||||
std::size_t embedded_document_size = 0ul;
|
||||
std::size_t array_index = 0ul;
|
||||
|
||||
for (const auto& el : value)
|
||||
{
|
||||
embedded_document_size += calc_bson_element_size(std::to_string(array_index++), el);
|
||||
}
|
||||
|
||||
return sizeof(std::int32_t) + embedded_document_size + 1ul;
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief Writes a BSON element with key @a name and array @a value
|
||||
*/
|
||||
void write_bson_array(const typename BasicJsonType::string_t& name,
|
||||
const typename BasicJsonType::array_t& value)
|
||||
{
|
||||
write_bson_entry_header(name, 0x04); // array
|
||||
write_number<std::int32_t, true>(static_cast<std::int32_t>(calc_bson_array_size(value)));
|
||||
|
||||
std::size_t array_index = 0ul;
|
||||
|
||||
for (const auto& el : value)
|
||||
{
|
||||
write_bson_element(std::to_string(array_index++), el);
|
||||
}
|
||||
|
||||
oa->write_character(to_char_type(0x00));
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief Calculates the size necessary to serialize the JSON value @a j with its @a name
|
||||
@return The calculated size for the BSON document entry for @a j with the given @a name.
|
||||
*/
|
||||
static std::size_t calc_bson_element_size(const typename BasicJsonType::string_t& name,
|
||||
const BasicJsonType& j)
|
||||
{
|
||||
const auto header_size = calc_bson_entry_header_size(name);
|
||||
switch (j.type())
|
||||
{
|
||||
case value_t::discarded:
|
||||
return 0ul;
|
||||
|
||||
case value_t::object:
|
||||
return header_size + calc_bson_object_size(*j.m_value.object);
|
||||
|
||||
case value_t::array:
|
||||
return header_size + calc_bson_array_size(*j.m_value.array);
|
||||
|
||||
case value_t::boolean:
|
||||
return header_size + 1ul;
|
||||
|
||||
case value_t::number_float:
|
||||
return header_size + 8ul;
|
||||
|
||||
case value_t::number_integer:
|
||||
return header_size + calc_bson_integer_size(j.m_value.number_integer);
|
||||
|
||||
case value_t::number_unsigned:
|
||||
return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned);
|
||||
|
||||
case value_t::string:
|
||||
return header_size + calc_bson_string_size(*j.m_value.string);
|
||||
|
||||
case value_t::null:
|
||||
return header_size + 0ul;
|
||||
|
||||
// LCOV_EXCL_START
|
||||
default:
|
||||
assert(false);
|
||||
return 0ul;
|
||||
// LCOV_EXCL_STOP
|
||||
};
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief Serializes the JSON value @a j to BSON and associates it with the
|
||||
key @a name.
|
||||
@param name The name to associate with the JSON entity @a j within the
|
||||
current BSON document
|
||||
@return The size of the BSON entry
|
||||
*/
|
||||
void write_bson_element(const typename BasicJsonType::string_t& name,
|
||||
const BasicJsonType& j)
|
||||
{
|
||||
switch (j.type())
|
||||
{
|
||||
case value_t::discarded:
|
||||
return;
|
||||
|
||||
case value_t::object:
|
||||
return write_bson_object_entry(name, *j.m_value.object);
|
||||
|
||||
case value_t::array:
|
||||
return write_bson_array(name, *j.m_value.array);
|
||||
|
||||
case value_t::boolean:
|
||||
return write_bson_boolean(name, j.m_value.boolean);
|
||||
|
||||
case value_t::number_float:
|
||||
return write_bson_double(name, j.m_value.number_float);
|
||||
|
||||
case value_t::number_integer:
|
||||
return write_bson_integer(name, j.m_value.number_integer);
|
||||
|
||||
case value_t::number_unsigned:
|
||||
return write_bson_unsigned(name, j.m_value.number_unsigned);
|
||||
|
||||
case value_t::string:
|
||||
return write_bson_string(name, *j.m_value.string);
|
||||
|
||||
case value_t::null:
|
||||
return write_bson_null(name);
|
||||
|
||||
// LCOV_EXCL_START
|
||||
default:
|
||||
assert(false);
|
||||
return;
|
||||
// LCOV_EXCL_STOP
|
||||
};
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief Calculates the size of the BSON serialization of the given
|
||||
JSON-object @a j.
|
||||
@param[in] j JSON value to serialize
|
||||
@pre j.type() == value_t::object
|
||||
*/
|
||||
static std::size_t calc_bson_object_size(const typename BasicJsonType::object_t& value)
|
||||
{
|
||||
std::size_t document_size = 0;
|
||||
|
||||
for (const auto& el : value)
|
||||
{
|
||||
document_size += calc_bson_element_size(el.first, el.second);
|
||||
}
|
||||
|
||||
return sizeof(std::int32_t) + document_size + 1ul;
|
||||
}
|
||||
|
||||
/*!
|
||||
@param[in] j JSON value to serialize
|
||||
@pre j.type() == value_t::object
|
||||
*/
|
||||
void write_bson_object(const typename BasicJsonType::object_t& value)
|
||||
{
|
||||
write_number<std::int32_t, true>(static_cast<std::int32_t>(calc_bson_object_size(value)));
|
||||
|
||||
for (const auto& el : value)
|
||||
{
|
||||
write_bson_element(el.first, el.second);
|
||||
}
|
||||
|
||||
oa->write_character(to_char_type(0x00));
|
||||
}
|
||||
|
||||
//////////
|
||||
// CBOR //
|
||||
//////////
|
||||
|
||||
static constexpr CharType get_cbor_float_prefix(float /*unused*/)
|
||||
{
|
||||
return to_char_type(0xFA); // Single-Precision Float
|
||||
}
|
||||
|
||||
static constexpr CharType get_cbor_float_prefix(double /*unused*/)
|
||||
{
|
||||
return to_char_type(0xFB); // Double-Precision Float
|
||||
}
|
||||
|
||||
/////////////
|
||||
// MsgPack //
|
||||
/////////////
|
||||
|
||||
static constexpr CharType get_msgpack_float_prefix(float /*unused*/)
|
||||
{
|
||||
return to_char_type(0xCA); // float 32
|
||||
}
|
||||
|
||||
static constexpr CharType get_msgpack_float_prefix(double /*unused*/)
|
||||
{
|
||||
return to_char_type(0xCB); // float 64
|
||||
}
|
||||
|
||||
////////////
|
||||
// UBJSON //
|
||||
////////////
|
||||
|
||||
// UBJSON: write number (floating point)
|
||||
template<typename NumberType, typename std::enable_if<
|
||||
std::is_floating_point<NumberType>::value, int>::type = 0>
|
||||
@ -906,26 +1261,6 @@ class binary_writer
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr CharType get_cbor_float_prefix(float /*unused*/)
|
||||
{
|
||||
return to_char_type(0xFA); // Single-Precision Float
|
||||
}
|
||||
|
||||
static constexpr CharType get_cbor_float_prefix(double /*unused*/)
|
||||
{
|
||||
return to_char_type(0xFB); // Double-Precision Float
|
||||
}
|
||||
|
||||
static constexpr CharType get_msgpack_float_prefix(float /*unused*/)
|
||||
{
|
||||
return to_char_type(0xCA); // float 32
|
||||
}
|
||||
|
||||
static constexpr CharType get_msgpack_float_prefix(double /*unused*/)
|
||||
{
|
||||
return to_char_type(0xCB); // float 64
|
||||
}
|
||||
|
||||
static constexpr CharType get_ubjson_float_prefix(float /*unused*/)
|
||||
{
|
||||
return 'd'; // float 32
|
||||
@ -936,6 +1271,38 @@ class binary_writer
|
||||
return 'D'; // float 64
|
||||
}
|
||||
|
||||
///////////////////////
|
||||
// Utility functions //
|
||||
///////////////////////
|
||||
|
||||
/*
|
||||
@brief write a number to output input
|
||||
@param[in] n number of type @a NumberType
|
||||
@tparam NumberType the type of the number
|
||||
@tparam OutputIsLittleEndian Set to true if output data is
|
||||
required to be little endian
|
||||
|
||||
@note This function needs to respect the system's endianess, because bytes
|
||||
in CBOR, MessagePack, and UBJSON are stored in network order (big
|
||||
endian) and therefore need reordering on little endian systems.
|
||||
*/
|
||||
template<typename NumberType, bool OutputIsLittleEndian = false>
|
||||
void write_number(const NumberType n)
|
||||
{
|
||||
// step 1: write number to array of length NumberType
|
||||
std::array<CharType, sizeof(NumberType)> vec;
|
||||
std::memcpy(vec.data(), &n, sizeof(NumberType));
|
||||
|
||||
// step 2: write array to output (with possible reordering)
|
||||
if (is_little_endian and not OutputIsLittleEndian)
|
||||
{
|
||||
// reverse byte order prior to conversion if necessary
|
||||
std::reverse(vec.begin(), vec.end());
|
||||
}
|
||||
|
||||
oa->write_characters(vec.data(), sizeof(NumberType));
|
||||
}
|
||||
|
||||
// The following to_char_type functions are implement the conversion
|
||||
// between uint8_t and CharType. In case CharType is not unsigned,
|
||||
// such a conversion is required to allow values greater than 128.
|
||||
|
@ -28,6 +28,14 @@ namespace detail
|
||||
// serialization //
|
||||
///////////////////
|
||||
|
||||
/// how to treat decoding errors
|
||||
enum class error_handler_t
|
||||
{
|
||||
strict, ///< throw a type_error exception in case of invalid UTF-8
|
||||
replace, ///< replace invalid UTF-8 sequences with U+FFFD
|
||||
ignore ///< ignore invalid UTF-8 sequences
|
||||
};
|
||||
|
||||
template<typename BasicJsonType>
|
||||
class serializer
|
||||
{
|
||||
@ -42,12 +50,17 @@ class serializer
|
||||
/*!
|
||||
@param[in] s output stream to serialize to
|
||||
@param[in] ichar indentation character to use
|
||||
@param[in] error_handler_ how to react on decoding errors
|
||||
*/
|
||||
serializer(output_adapter_t<char> s, const char ichar)
|
||||
: o(std::move(s)), loc(std::localeconv()),
|
||||
thousands_sep(loc->thousands_sep == nullptr ? '\0' : * (loc->thousands_sep)),
|
||||
decimal_point(loc->decimal_point == nullptr ? '\0' : * (loc->decimal_point)),
|
||||
indent_char(ichar), indent_string(512, indent_char)
|
||||
serializer(output_adapter_t<char> s, const char ichar,
|
||||
error_handler_t error_handler_ = error_handler_t::strict)
|
||||
: o(std::move(s))
|
||||
, loc(std::localeconv())
|
||||
, thousands_sep(loc->thousands_sep == nullptr ? '\0' : * (loc->thousands_sep))
|
||||
, decimal_point(loc->decimal_point == nullptr ? '\0' : * (loc->decimal_point))
|
||||
, indent_char(ichar)
|
||||
, indent_string(512, indent_char)
|
||||
, error_handler(error_handler_)
|
||||
{}
|
||||
|
||||
// delete because of pointer members
|
||||
@ -287,6 +300,10 @@ class serializer
|
||||
uint8_t state = UTF8_ACCEPT;
|
||||
std::size_t bytes = 0; // number of bytes written to string_buffer
|
||||
|
||||
// number of bytes written at the point of the last valid byte
|
||||
std::size_t bytes_after_last_accept = 0;
|
||||
std::size_t undumped_chars = 0;
|
||||
|
||||
for (std::size_t i = 0; i < s.size(); ++i)
|
||||
{
|
||||
const auto byte = static_cast<uint8_t>(s[i]);
|
||||
@ -384,14 +401,69 @@ class serializer
|
||||
o->write_characters(string_buffer.data(), bytes);
|
||||
bytes = 0;
|
||||
}
|
||||
|
||||
// remember the byte position of this accept
|
||||
bytes_after_last_accept = bytes;
|
||||
undumped_chars = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
case UTF8_REJECT: // decode found invalid UTF-8 byte
|
||||
{
|
||||
std::string sn(3, '\0');
|
||||
snprintf(&sn[0], sn.size(), "%.2X", byte);
|
||||
JSON_THROW(type_error::create(316, "invalid UTF-8 byte at index " + std::to_string(i) + ": 0x" + sn));
|
||||
switch (error_handler)
|
||||
{
|
||||
case error_handler_t::strict:
|
||||
{
|
||||
std::string sn(3, '\0');
|
||||
snprintf(&sn[0], sn.size(), "%.2X", byte);
|
||||
JSON_THROW(type_error::create(316, "invalid UTF-8 byte at index " + std::to_string(i) + ": 0x" + sn));
|
||||
}
|
||||
|
||||
case error_handler_t::ignore:
|
||||
case error_handler_t::replace:
|
||||
{
|
||||
// in case we saw this character the first time, we
|
||||
// would like to read it again, because the byte
|
||||
// may be OK for itself, but just not OK for the
|
||||
// previous sequence
|
||||
if (undumped_chars > 0)
|
||||
{
|
||||
--i;
|
||||
}
|
||||
|
||||
// reset length buffer to the last accepted index;
|
||||
// thus removing/ignoring the invalid characters
|
||||
bytes = bytes_after_last_accept;
|
||||
|
||||
if (error_handler == error_handler_t::replace)
|
||||
{
|
||||
// add a replacement character
|
||||
if (ensure_ascii)
|
||||
{
|
||||
string_buffer[bytes++] = '\\';
|
||||
string_buffer[bytes++] = 'u';
|
||||
string_buffer[bytes++] = 'f';
|
||||
string_buffer[bytes++] = 'f';
|
||||
string_buffer[bytes++] = 'f';
|
||||
string_buffer[bytes++] = 'd';
|
||||
}
|
||||
else
|
||||
{
|
||||
string_buffer[bytes++] = '\xEF';
|
||||
string_buffer[bytes++] = '\xBF';
|
||||
string_buffer[bytes++] = '\xBD';
|
||||
}
|
||||
bytes_after_last_accept = bytes;
|
||||
}
|
||||
|
||||
undumped_chars = 0;
|
||||
|
||||
// continue processing the string
|
||||
state = UTF8_ACCEPT;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default: // decode found yet incomplete multi-byte code point
|
||||
@ -401,11 +473,13 @@ class serializer
|
||||
// code point will not be escaped - copy byte to buffer
|
||||
string_buffer[bytes++] = s[i];
|
||||
}
|
||||
++undumped_chars;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// we finished processing the string
|
||||
if (JSON_LIKELY(state == UTF8_ACCEPT))
|
||||
{
|
||||
// write buffer
|
||||
@ -417,9 +491,38 @@ class serializer
|
||||
else
|
||||
{
|
||||
// we finish reading, but do not accept: string was incomplete
|
||||
std::string sn(3, '\0');
|
||||
snprintf(&sn[0], sn.size(), "%.2X", static_cast<uint8_t>(s.back()));
|
||||
JSON_THROW(type_error::create(316, "incomplete UTF-8 string; last byte: 0x" + sn));
|
||||
switch (error_handler)
|
||||
{
|
||||
case error_handler_t::strict:
|
||||
{
|
||||
std::string sn(3, '\0');
|
||||
snprintf(&sn[0], sn.size(), "%.2X", static_cast<uint8_t>(s.back()));
|
||||
JSON_THROW(type_error::create(316, "incomplete UTF-8 string; last byte: 0x" + sn));
|
||||
}
|
||||
|
||||
case error_handler_t::ignore:
|
||||
{
|
||||
// write all accepted bytes
|
||||
o->write_characters(string_buffer.data(), bytes_after_last_accept);
|
||||
break;
|
||||
}
|
||||
|
||||
case error_handler_t::replace:
|
||||
{
|
||||
// write all accepted bytes
|
||||
o->write_characters(string_buffer.data(), bytes_after_last_accept);
|
||||
// add a replacement character
|
||||
if (ensure_ascii)
|
||||
{
|
||||
o->write_characters("\\ufffd", 6);
|
||||
}
|
||||
else
|
||||
{
|
||||
o->write_characters("\xEF\xBF\xBD", 3);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -629,6 +732,9 @@ class serializer
|
||||
const char indent_char;
|
||||
/// the indentation string
|
||||
string_t indent_string;
|
||||
|
||||
/// error_handler how to react on decoding errors
|
||||
const error_handler_t error_handler;
|
||||
};
|
||||
} // namespace detail
|
||||
} // namespace nlohmann
|
||||
|
@ -208,6 +208,8 @@ class basic_json
|
||||
using json_pointer = ::nlohmann::json_pointer<basic_json>;
|
||||
template<typename T, typename SFINAE>
|
||||
using json_serializer = JSONSerializer<T, SFINAE>;
|
||||
/// how to treat decoding errors
|
||||
using error_handler_t = detail::error_handler_t;
|
||||
/// helper type for initializer lists of basic_json values
|
||||
using initializer_list_t = std::initializer_list<detail::json_ref<basic_json>>;
|
||||
|
||||
@ -1932,6 +1934,10 @@ class basic_json
|
||||
@param[in] ensure_ascii If @a ensure_ascii is true, all non-ASCII characters
|
||||
in the output are escaped with `\uXXXX` sequences, and the result consists
|
||||
of ASCII characters only.
|
||||
@param[in] error_handler how to react on decoding errors; there are three
|
||||
possible values: `strict` (throws and exception in case a decoding error
|
||||
occurs; default), `replace` (replace invalid UTF-8 sequences with U+FFFD),
|
||||
and `ignore` (ignore invalid UTF-8 sequences during serialization).
|
||||
|
||||
@return string containing the serialization of the JSON value
|
||||
|
||||
@ -1950,13 +1956,16 @@ class basic_json
|
||||
@see https://docs.python.org/2/library/json.html#json.dump
|
||||
|
||||
@since version 1.0.0; indentation character @a indent_char, option
|
||||
@a ensure_ascii and exceptions added in version 3.0.0
|
||||
@a ensure_ascii and exceptions added in version 3.0.0; error
|
||||
handlers added in version 3.4.0.
|
||||
*/
|
||||
string_t dump(const int indent = -1, const char indent_char = ' ',
|
||||
const bool ensure_ascii = false) const
|
||||
string_t dump(const int indent = -1,
|
||||
const char indent_char = ' ',
|
||||
const bool ensure_ascii = false,
|
||||
const error_handler_t error_handler = error_handler_t::strict) const
|
||||
{
|
||||
string_t result;
|
||||
serializer s(detail::output_adapter<char, string_t>(result), indent_char);
|
||||
serializer s(detail::output_adapter<char, string_t>(result), indent_char, error_handler);
|
||||
|
||||
if (indent >= 0)
|
||||
{
|
||||
@ -6618,6 +6627,87 @@ class basic_json
|
||||
binary_writer<char>(o).write_ubjson(j, use_size, use_type);
|
||||
}
|
||||
|
||||
|
||||
/*!
|
||||
@brief Serializes the given JSON object `j` to BSON and returns a vector
|
||||
containing the corresponding BSON-representation.
|
||||
|
||||
BSON (Binary JSON) is a binary format in which zero or more ordered key/value pairs are
|
||||
stored as a single entity (a so-called document).
|
||||
|
||||
The library uses the following mapping from JSON values types to BSON types:
|
||||
|
||||
JSON value type | value/range | BSON type | marker
|
||||
--------------- | --------------------------------- | ----------- | ------
|
||||
null | `null` | null | 0x0A
|
||||
boolean | `true`, `false` | boolean | 0x08
|
||||
number_integer | -9223372036854775808..-2147483649 | int64 | 0x12
|
||||
number_integer | -2147483648..2147483647 | int32 | 0x10
|
||||
number_integer | 2147483648..9223372036854775807 | int64 | 0x12
|
||||
number_unsigned | 0..2147483647 | int32 | 0x10
|
||||
number_unsigned | 2147483648..9223372036854775807 | int64 | 0x12
|
||||
number_unsigned | 9223372036854775808..18446744073709551615| -- | --
|
||||
number_float | *any value* | double | 0x01
|
||||
string | *any value* | string | 0x02
|
||||
array | *any value* | document | 0x04
|
||||
object | *any value* | document | 0x03
|
||||
|
||||
@warning The mapping is **incomplete**, since only JSON-objects (and things
|
||||
contained therein) can be serialized to BSON.
|
||||
Also, integers larger than 9223372036854775807 cannot be serialized to BSON,
|
||||
and the keys may not contain U+0000, since they are serialized a
|
||||
zero-terminated c-strings.
|
||||
|
||||
@throw out_of_range.407 if `j.is_number_unsigned() && j.get<std::uint64_t>() > 9223372036854775807`
|
||||
@throw out_of_range.409 if a key in `j` contains a NULL (U+0000)
|
||||
@throw type_error.317 if `!j.is_object()`
|
||||
|
||||
@pre The input `j` is required to be an object: `j.is_object() == true`.
|
||||
|
||||
@note Any BSON output created via @ref to_bson can be successfully parsed
|
||||
by @ref from_bson.
|
||||
|
||||
@param[in] j JSON value to serialize
|
||||
@return BSON serialization as byte vector
|
||||
|
||||
@complexity Linear in the size of the JSON value @a j.
|
||||
|
||||
@sa http://bsonspec.org/spec.html
|
||||
@sa @ref from_bson(detail::input_adapter, const bool strict) for the
|
||||
analogous deserialization
|
||||
@sa @ref to_ubjson(const basic_json&) for the related UBJSON format
|
||||
@sa @ref to_cbor(const basic_json&) for the related CBOR format
|
||||
@sa @ref to_msgpack(const basic_json&) for the related MessagePack format
|
||||
*/
|
||||
static std::vector<uint8_t> to_bson(const basic_json& j)
|
||||
{
|
||||
std::vector<uint8_t> result;
|
||||
to_bson(j, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
/*!
|
||||
@brief Serializes the given JSON object `j` to BSON and forwards the
|
||||
corresponding BSON-representation to the given output_adapter `o`.
|
||||
@param j The JSON object to convert to BSON.
|
||||
@param o The output adapter that receives the binary BSON representation.
|
||||
@pre The input `j` shall be an object: `j.is_object() == true`
|
||||
@sa @ref to_bson(const basic_json&)
|
||||
*/
|
||||
static void to_bson(const basic_json& j, detail::output_adapter<uint8_t> o)
|
||||
{
|
||||
binary_writer<uint8_t>(o).write_bson(j);
|
||||
}
|
||||
|
||||
/*!
|
||||
@copydoc to_bson(const basic_json&, detail::output_adapter<uint8_t>)
|
||||
*/
|
||||
static void to_bson(const basic_json& j, detail::output_adapter<char> o)
|
||||
{
|
||||
binary_writer<char>(o).write_bson(j);
|
||||
}
|
||||
|
||||
|
||||
/*!
|
||||
@brief create a JSON value from an input in CBOR format
|
||||
|
||||
@ -6812,6 +6902,8 @@ class basic_json
|
||||
related CBOR format
|
||||
@sa @ref from_ubjson(detail::input_adapter&&, const bool, const bool) for
|
||||
the related UBJSON format
|
||||
@sa @ref from_bson(detail::input_adapter, const bool, const bool) for
|
||||
the related BSON format
|
||||
|
||||
@since version 2.0.9; parameter @a start_index since 2.1.1; changed to
|
||||
consume input adapters, removed start_index parameter, and added
|
||||
@ -6897,6 +6989,8 @@ class basic_json
|
||||
related CBOR format
|
||||
@sa @ref from_msgpack(detail::input_adapter&&, const bool, const bool) for
|
||||
the related MessagePack format
|
||||
@sa @ref from_bson(detail::input_adapter, const bool, const bool) for
|
||||
the related BSON format
|
||||
|
||||
@since version 3.1.0; added @a allow_exceptions parameter since 3.2.0
|
||||
*/
|
||||
@ -6925,6 +7019,91 @@ class basic_json
|
||||
return res ? result : basic_json(value_t::discarded);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
@brief Create a JSON value from an input in BSON format
|
||||
|
||||
Deserializes a given input @a i to a JSON value using the BSON (Binary JSON)
|
||||
serialization format.
|
||||
|
||||
The library maps BSON record types to JSON value types as follows:
|
||||
|
||||
BSON type | BSON marker byte | JSON value type
|
||||
--------------- | ---------------- | ---------------------------
|
||||
double | 0x01 | number_float
|
||||
string | 0x02 | string
|
||||
document | 0x03 | object
|
||||
array | 0x04 | array
|
||||
binary | 0x05 | still unsupported
|
||||
undefined | 0x06 | still unsupported
|
||||
ObjectId | 0x07 | still unsupported
|
||||
boolean | 0x08 | boolean
|
||||
UTC Date-Time | 0x09 | still unsupported
|
||||
null | 0x0A | null
|
||||
Regular Expr. | 0x0B | still unsupported
|
||||
DB Pointer | 0x0C | still unsupported
|
||||
JavaScript Code | 0x0D | still unsupported
|
||||
Symbol | 0x0E | still unsupported
|
||||
JavaScript Code | 0x0F | still unsupported
|
||||
int32 | 0x10 | number_integer
|
||||
Timestamp | 0x11 | still unsupported
|
||||
128-bit decimal float | 0x13 | still unsupported
|
||||
Max Key | 0x7F | still unsupported
|
||||
Min Key | 0xFF | still unsupported
|
||||
|
||||
|
||||
@warning The mapping is **incomplete**. The unsupported mappings
|
||||
are indicated in the table above.
|
||||
|
||||
@param[in] i an input in BSON format convertible to an input adapter
|
||||
@param[in] strict whether to expect the input to be consumed until EOF
|
||||
(true by default)
|
||||
@param[in] allow_exceptions whether to throw exceptions in case of a
|
||||
parse error (optional, true by default)
|
||||
|
||||
@return deserialized JSON value
|
||||
|
||||
@throw parse_error.114 if an unsupported BSON record type is encountered
|
||||
|
||||
@sa http://bsonspec.org/spec.html
|
||||
@sa @ref to_bson(const basic_json&, const bool, const bool) for the
|
||||
analogous serialization
|
||||
@sa @ref from_cbor(detail::input_adapter, const bool, const bool) for the
|
||||
related CBOR format
|
||||
@sa @ref from_msgpack(detail::input_adapter, const bool, const bool) for
|
||||
the related MessagePack format
|
||||
@sa @ref from_ubjson(detail::input_adapter, const bool, const bool) for the
|
||||
related UBJSON format
|
||||
*/
|
||||
static basic_json from_bson(detail::input_adapter&& i,
|
||||
const bool strict = true,
|
||||
const bool allow_exceptions = true)
|
||||
{
|
||||
basic_json result;
|
||||
detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
|
||||
const bool res = binary_reader(detail::input_adapter(i)).sax_parse(input_format_t::bson, &sdp, strict);
|
||||
return res ? result : basic_json(value_t::discarded);
|
||||
}
|
||||
|
||||
/*!
|
||||
@copydoc from_bson(detail::input_adapter&&, const bool, const bool)
|
||||
*/
|
||||
template<typename A1, typename A2,
|
||||
detail::enable_if_t<std::is_constructible<detail::input_adapter, A1, A2>::value, int> = 0>
|
||||
static basic_json from_bson(A1 && a1, A2 && a2,
|
||||
const bool strict = true,
|
||||
const bool allow_exceptions = true)
|
||||
{
|
||||
basic_json result;
|
||||
detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
|
||||
const bool res = binary_reader(detail::input_adapter(std::forward<A1>(a1), std::forward<A2>(a2))).sax_parse(input_format_t::bson, &sdp, strict);
|
||||
return res ? result : basic_json(value_t::discarded);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// @}
|
||||
|
||||
//////////////////////////
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -10,6 +10,7 @@ SOURCES = src/unit.cpp \
|
||||
src/unit-algorithms.cpp \
|
||||
src/unit-allocator.cpp \
|
||||
src/unit-alt-string.cpp \
|
||||
src/unit-bson.cpp \
|
||||
src/unit-capacity.cpp \
|
||||
src/unit-cbor.cpp \
|
||||
src/unit-class_const_iterator.cpp \
|
||||
@ -90,12 +91,15 @@ check: $(OBJECTS) $(TESTCASES)
|
||||
##############################################################################
|
||||
|
||||
FUZZER_ENGINE = src/fuzzer-driver_afl.cpp
|
||||
FUZZERS = parse_afl_fuzzer parse_cbor_fuzzer parse_msgpack_fuzzer parse_ubjson_fuzzer
|
||||
FUZZERS = parse_afl_fuzzer parse_bson_fuzzer parse_cbor_fuzzer parse_msgpack_fuzzer parse_ubjson_fuzzer
|
||||
fuzzers: $(FUZZERS)
|
||||
|
||||
parse_afl_fuzzer:
|
||||
$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(FUZZER_ENGINE) src/fuzzer-parse_json.cpp -o $@
|
||||
|
||||
parse_bson_fuzzer:
|
||||
$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(FUZZER_ENGINE) src/fuzzer-parse_bson.cpp -o $@
|
||||
|
||||
parse_cbor_fuzzer:
|
||||
$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(FUZZER_ENGINE) src/fuzzer-parse_cbor.cpp -o $@
|
||||
|
||||
|
BIN
test/data/json.org/1.json.bson
Normal file
BIN
test/data/json.org/1.json.bson
Normal file
Binary file not shown.
BIN
test/data/json.org/2.json.bson
Normal file
BIN
test/data/json.org/2.json.bson
Normal file
Binary file not shown.
BIN
test/data/json.org/3.json.bson
Normal file
BIN
test/data/json.org/3.json.bson
Normal file
Binary file not shown.
BIN
test/data/json.org/4.json.bson
Normal file
BIN
test/data/json.org/4.json.bson
Normal file
Binary file not shown.
BIN
test/data/json.org/5.json.bson
Normal file
BIN
test/data/json.org/5.json.bson
Normal file
Binary file not shown.
BIN
test/data/json_tests/pass3.json.bson
Normal file
BIN
test/data/json_tests/pass3.json.bson
Normal file
Binary file not shown.
73
test/src/fuzzer-parse_bson.cpp
Normal file
73
test/src/fuzzer-parse_bson.cpp
Normal file
@ -0,0 +1,73 @@
|
||||
/*
|
||||
__ _____ _____ _____
|
||||
__| | __| | | | JSON for Modern C++ (fuzz test support)
|
||||
| | |__ | | | | | | version 3.3.0
|
||||
|_____|_____|_____|_|___| https://github.com/nlohmann/json
|
||||
|
||||
This file implements a parser test suitable for fuzz testing. Given a byte
|
||||
array data, it performs the following steps:
|
||||
|
||||
- j1 = from_bson(data)
|
||||
- vec = to_bson(j1)
|
||||
- j2 = from_bson(vec)
|
||||
- assert(j1 == j2)
|
||||
|
||||
The provided function `LLVMFuzzerTestOneInput` can be used in different fuzzer
|
||||
drivers.
|
||||
|
||||
Licensed under the MIT License <http://opensource.org/licenses/MIT>.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
// see http://llvm.org/docs/LibFuzzer.html
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
|
||||
{
|
||||
try
|
||||
{
|
||||
// step 1: parse input
|
||||
std::vector<uint8_t> vec1(data, data + size);
|
||||
json j1 = json::from_bson(vec1);
|
||||
|
||||
if (j1.is_discarded())
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
// step 2: round trip
|
||||
std::vector<uint8_t> vec2 = json::to_bson(j1);
|
||||
|
||||
// parse serialization
|
||||
json j2 = json::from_bson(vec2);
|
||||
|
||||
// serializations must match
|
||||
assert(json::to_bson(j2) == vec2);
|
||||
}
|
||||
catch (const json::parse_error&)
|
||||
{
|
||||
// parsing a BSON serialization must not fail
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
catch (const json::parse_error&)
|
||||
{
|
||||
// parse errors are ok, because input may be random bytes
|
||||
}
|
||||
catch (const json::type_error&)
|
||||
{
|
||||
// type errors can occur during parsing, too
|
||||
}
|
||||
catch (const json::out_of_range&)
|
||||
{
|
||||
// out of range errors can occur during parsing, too
|
||||
}
|
||||
|
||||
// return 0 - non-zero return values are reserved for future use
|
||||
return 0;
|
||||
}
|
@ -60,10 +60,6 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
|
||||
// parse errors are ok, because input may be random bytes
|
||||
}
|
||||
catch (const json::out_of_range&)
|
||||
{
|
||||
// parse errors are ok, because input may be random bytes
|
||||
}
|
||||
catch (const json::out_of_range&)
|
||||
{
|
||||
// out of range errors may happen if provided sizes are excessive
|
||||
}
|
||||
|
1241
test/src/unit-bson.cpp
Normal file
1241
test/src/unit-bson.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -139,10 +139,10 @@ bool operator==(Data const& lhs, Data const& rhs)
|
||||
return lhs.a == rhs.a && lhs.b == rhs.b;
|
||||
}
|
||||
|
||||
bool operator!=(Data const& lhs, Data const& rhs)
|
||||
{
|
||||
return !(lhs == rhs);
|
||||
}
|
||||
//bool operator!=(Data const& lhs, Data const& rhs)
|
||||
//{
|
||||
// return !(lhs == rhs);
|
||||
//}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
|
@ -94,4 +94,80 @@ TEST_CASE("serialization")
|
||||
"[\n\t\"foo\",\n\t1,\n\t2,\n\t3,\n\tfalse,\n\t{\n\t\t\"one\": 1\n\t}\n]");
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("dump")
|
||||
{
|
||||
SECTION("invalid character")
|
||||
{
|
||||
json j = "ä\xA9ü";
|
||||
|
||||
CHECK_THROWS_AS(j.dump(), json::type_error&);
|
||||
CHECK_THROWS_WITH(j.dump(), "[json.exception.type_error.316] invalid UTF-8 byte at index 2: 0xA9");
|
||||
CHECK_THROWS_AS(j.dump(1, ' ', false, json::error_handler_t::strict), json::type_error&);
|
||||
CHECK_THROWS_WITH(j.dump(1, ' ', false, json::error_handler_t::strict), "[json.exception.type_error.316] invalid UTF-8 byte at index 2: 0xA9");
|
||||
CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"äü\"");
|
||||
CHECK(j.dump(-1, ' ', false, json::error_handler_t::replace) == "\"ä\xEF\xBF\xBDü\"");
|
||||
CHECK(j.dump(-1, ' ', true, json::error_handler_t::replace) == "\"\\u00e4\\ufffd\\u00fc\"");
|
||||
}
|
||||
|
||||
SECTION("ending with incomplete character")
|
||||
{
|
||||
json j = "123\xC2";
|
||||
|
||||
CHECK_THROWS_AS(j.dump(), json::type_error&);
|
||||
CHECK_THROWS_WITH(j.dump(), "[json.exception.type_error.316] incomplete UTF-8 string; last byte: 0xC2");
|
||||
CHECK_THROWS_AS(j.dump(1, ' ', false, json::error_handler_t::strict), json::type_error&);
|
||||
CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"123\"");
|
||||
CHECK(j.dump(-1, ' ', false, json::error_handler_t::replace) == "\"123\xEF\xBF\xBD\"");
|
||||
CHECK(j.dump(-1, ' ', true, json::error_handler_t::replace) == "\"123\\ufffd\"");
|
||||
}
|
||||
|
||||
SECTION("unexpected character")
|
||||
{
|
||||
json j = "123\xF1\xB0\x34\x35\x36";
|
||||
|
||||
CHECK_THROWS_AS(j.dump(), json::type_error&);
|
||||
CHECK_THROWS_WITH(j.dump(), "[json.exception.type_error.316] invalid UTF-8 byte at index 5: 0x34");
|
||||
CHECK_THROWS_AS(j.dump(1, ' ', false, json::error_handler_t::strict), json::type_error&);
|
||||
CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"123456\"");
|
||||
CHECK(j.dump(-1, ' ', false, json::error_handler_t::replace) == "\"123\xEF\xBF\xBD\x34\x35\x36\"");
|
||||
CHECK(j.dump(-1, ' ', true, json::error_handler_t::replace) == "\"123\\ufffd456\"");
|
||||
}
|
||||
|
||||
SECTION("U+FFFD Substitution of Maximal Subparts")
|
||||
{
|
||||
// Some tests (mostly) from
|
||||
// https://www.unicode.org/versions/Unicode11.0.0/ch03.pdf
|
||||
// Section 3.9 -- U+FFFD Substitution of Maximal Subparts
|
||||
|
||||
auto test = [&](std::string const & input, std::string const & expected)
|
||||
{
|
||||
json j = input;
|
||||
CHECK(j.dump(-1, ' ', true, json::error_handler_t::replace) == "\"" + expected + "\"");
|
||||
};
|
||||
|
||||
test("\xC2", "\\ufffd");
|
||||
test("\xC2\x41\x42", "\\ufffd" "\x41" "\x42");
|
||||
test("\xC2\xF4", "\\ufffd" "\\ufffd");
|
||||
|
||||
test("\xF0\x80\x80\x41", "\\ufffd" "\\ufffd" "\\ufffd" "\x41");
|
||||
test("\xF1\x80\x80\x41", "\\ufffd" "\x41");
|
||||
test("\xF2\x80\x80\x41", "\\ufffd" "\x41");
|
||||
test("\xF3\x80\x80\x41", "\\ufffd" "\x41");
|
||||
test("\xF4\x80\x80\x41", "\\ufffd" "\x41");
|
||||
test("\xF5\x80\x80\x41", "\\ufffd" "\\ufffd" "\\ufffd" "\x41");
|
||||
|
||||
test("\xF0\x90\x80\x41", "\\ufffd" "\x41");
|
||||
test("\xF1\x90\x80\x41", "\\ufffd" "\x41");
|
||||
test("\xF2\x90\x80\x41", "\\ufffd" "\x41");
|
||||
test("\xF3\x90\x80\x41", "\\ufffd" "\x41");
|
||||
test("\xF4\x90\x80\x41", "\\ufffd" "\\ufffd" "\\ufffd" "\x41");
|
||||
test("\xF5\x90\x80\x41", "\\ufffd" "\\ufffd" "\\ufffd" "\x41");
|
||||
|
||||
test("\xC0\xAF\xE0\x80\xBF\xF0\x81\x82\x41", "\\ufffd" "\\ufffd" "\\ufffd" "\\ufffd" "\\ufffd" "\\ufffd" "\\ufffd" "\\ufffd" "\x41");
|
||||
test("\xED\xA0\x80\xED\xBF\xBF\xED\xAF\x41", "\\ufffd" "\\ufffd" "\\ufffd" "\\ufffd" "\\ufffd" "\\ufffd" "\\ufffd" "\\ufffd" "\x41");
|
||||
test("\xF4\x91\x92\x93\xFF\x41\x80\xBF\x42", "\\ufffd" "\\ufffd" "\\ufffd" "\\ufffd" "\\ufffd" "\x41" "\\ufffd""\\ufffd" "\x42");
|
||||
test("\xE1\x80\xE2\xF0\x91\x92\xF1\xBF\x41", "\\ufffd" "\\ufffd" "\\ufffd" "\\ufffd" "\x41");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -39,6 +39,80 @@ using nlohmann::json;
|
||||
extern size_t calls;
|
||||
size_t calls = 0;
|
||||
|
||||
void check_utf8dump(bool success_expected, int byte1, int byte2, int byte3, int byte4);
|
||||
|
||||
void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3 = -1, int byte4 = -1)
|
||||
{
|
||||
std::string json_string;
|
||||
|
||||
CAPTURE(byte1);
|
||||
CAPTURE(byte2);
|
||||
CAPTURE(byte3);
|
||||
CAPTURE(byte4);
|
||||
|
||||
json_string += std::string(1, static_cast<char>(byte1));
|
||||
|
||||
if (byte2 != -1)
|
||||
{
|
||||
json_string += std::string(1, static_cast<char>(byte2));
|
||||
}
|
||||
|
||||
if (byte3 != -1)
|
||||
{
|
||||
json_string += std::string(1, static_cast<char>(byte3));
|
||||
}
|
||||
|
||||
if (byte4 != -1)
|
||||
{
|
||||
json_string += std::string(1, static_cast<char>(byte4));
|
||||
}
|
||||
|
||||
CAPTURE(json_string);
|
||||
|
||||
// store the string in a JSON value
|
||||
json j = json_string;
|
||||
json j2 = "abc" + json_string + "xyz";
|
||||
|
||||
// dumping with ignore/replace must not throw in any case
|
||||
auto s_ignored = j.dump(-1, ' ', false, json::error_handler_t::ignore);
|
||||
auto s_ignored2 = j2.dump(-1, ' ', false, json::error_handler_t::ignore);
|
||||
auto s_ignored_ascii = j.dump(-1, ' ', true, json::error_handler_t::ignore);
|
||||
auto s_ignored2_ascii = j2.dump(-1, ' ', true, json::error_handler_t::ignore);
|
||||
auto s_replaced = j.dump(-1, ' ', false, json::error_handler_t::replace);
|
||||
auto s_replaced2 = j2.dump(-1, ' ', false, json::error_handler_t::replace);
|
||||
auto s_replaced_ascii = j.dump(-1, ' ', true, json::error_handler_t::replace);
|
||||
auto s_replaced2_ascii = j2.dump(-1, ' ', true, json::error_handler_t::replace);
|
||||
|
||||
if (success_expected)
|
||||
{
|
||||
// strict mode must not throw if success is expected
|
||||
auto s_strict = j.dump();
|
||||
// all dumps should agree on the string
|
||||
CHECK(s_strict == s_ignored);
|
||||
CHECK(s_strict == s_replaced);
|
||||
}
|
||||
else
|
||||
{
|
||||
// strict mode must throw if success is not expected
|
||||
CHECK_THROWS_AS(j.dump(), json::type_error&);
|
||||
// ignore and replace must create different dumps
|
||||
CHECK(s_ignored != s_replaced);
|
||||
|
||||
// check that replace string contains a replacement character
|
||||
CHECK(s_replaced.find("\xEF\xBF\xBD") != std::string::npos);
|
||||
}
|
||||
|
||||
// check that prefix and suffix are preserved
|
||||
CHECK(s_ignored2.substr(1, 3) == "abc");
|
||||
CHECK(s_ignored2.substr(s_ignored2.size() - 4, 3) == "xyz");
|
||||
CHECK(s_ignored2_ascii.substr(1, 3) == "abc");
|
||||
CHECK(s_ignored2_ascii.substr(s_ignored2_ascii.size() - 4, 3) == "xyz");
|
||||
CHECK(s_replaced2.substr(1, 3) == "abc");
|
||||
CHECK(s_replaced2.substr(s_replaced2.size() - 4, 3) == "xyz");
|
||||
CHECK(s_replaced2_ascii.substr(1, 3) == "abc");
|
||||
CHECK(s_replaced2_ascii.substr(s_replaced2_ascii.size() - 4, 3) == "xyz");
|
||||
}
|
||||
|
||||
void check_utf8string(bool success_expected, int byte1, int byte2, int byte3, int byte4);
|
||||
|
||||
// create and check a JSON string with up to four UTF-8 bytes
|
||||
@ -115,11 +189,13 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte1 = 0x80; byte1 <= 0xC1; ++byte1)
|
||||
{
|
||||
check_utf8string(false, byte1);
|
||||
check_utf8dump(false, byte1);
|
||||
}
|
||||
|
||||
for (int byte1 = 0xF5; byte1 <= 0xFF; ++byte1)
|
||||
{
|
||||
check_utf8string(false, byte1);
|
||||
check_utf8dump(false, byte1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -152,6 +228,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
|
||||
// all other characters are OK
|
||||
check_utf8string(true, byte1);
|
||||
check_utf8dump(true, byte1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -165,6 +242,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
check_utf8string(true, byte1, byte2);
|
||||
check_utf8dump(true, byte1, byte2);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -174,6 +252,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte1 = 0xC2; byte1 <= 0xDF; ++byte1)
|
||||
{
|
||||
check_utf8string(false, byte1);
|
||||
check_utf8dump(false, byte1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -190,6 +269,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
}
|
||||
|
||||
check_utf8string(false, byte1, byte2);
|
||||
check_utf8dump(false, byte1, byte2);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -206,6 +286,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
check_utf8string(true, byte1, byte2, byte3);
|
||||
check_utf8dump(true, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -216,6 +297,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte1 = 0xE0; byte1 <= 0xE0; ++byte1)
|
||||
{
|
||||
check_utf8string(false, byte1);
|
||||
check_utf8dump(false, byte1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -226,6 +308,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte2 = 0xA0; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2);
|
||||
check_utf8dump(false, byte1, byte2);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -245,6 +328,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3);
|
||||
check_utf8dump(false, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -265,6 +349,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
}
|
||||
|
||||
check_utf8string(false, byte1, byte2, byte3);
|
||||
check_utf8dump(false, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -282,6 +367,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
check_utf8string(true, byte1, byte2, byte3);
|
||||
check_utf8dump(true, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -292,6 +378,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte1 = 0xE1; byte1 <= 0xEC; ++byte1)
|
||||
{
|
||||
check_utf8string(false, byte1);
|
||||
check_utf8dump(false, byte1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -302,6 +389,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2);
|
||||
check_utf8dump(false, byte1, byte2);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -321,6 +409,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3);
|
||||
check_utf8dump(false, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -341,6 +430,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
}
|
||||
|
||||
check_utf8string(false, byte1, byte2, byte3);
|
||||
check_utf8dump(false, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -358,6 +448,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
check_utf8string(true, byte1, byte2, byte3);
|
||||
check_utf8dump(true, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -368,6 +459,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte1 = 0xED; byte1 <= 0xED; ++byte1)
|
||||
{
|
||||
check_utf8string(false, byte1);
|
||||
check_utf8dump(false, byte1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -378,6 +470,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte2 = 0x80; byte2 <= 0x9F; ++byte2)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2);
|
||||
check_utf8dump(false, byte1, byte2);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -397,6 +490,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3);
|
||||
check_utf8dump(false, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -417,6 +511,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
}
|
||||
|
||||
check_utf8string(false, byte1, byte2, byte3);
|
||||
check_utf8dump(false, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -434,6 +529,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
check_utf8string(true, byte1, byte2, byte3);
|
||||
check_utf8dump(true, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -444,6 +540,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte1 = 0xEE; byte1 <= 0xEF; ++byte1)
|
||||
{
|
||||
check_utf8string(false, byte1);
|
||||
check_utf8dump(false, byte1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -454,6 +551,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2);
|
||||
check_utf8dump(false, byte1, byte2);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -473,6 +571,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3);
|
||||
check_utf8dump(false, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -493,6 +592,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
}
|
||||
|
||||
check_utf8string(false, byte1, byte2, byte3);
|
||||
check_utf8dump(false, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -512,6 +612,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
|
||||
{
|
||||
check_utf8string(true, byte1, byte2, byte3, byte4);
|
||||
check_utf8dump(true, byte1, byte2, byte3, byte4);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -523,6 +624,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte1 = 0xF0; byte1 <= 0xF0; ++byte1)
|
||||
{
|
||||
check_utf8string(false, byte1);
|
||||
check_utf8dump(false, byte1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -533,6 +635,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte2 = 0x90; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2);
|
||||
check_utf8dump(false, byte1, byte2);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -546,6 +649,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3);
|
||||
check_utf8dump(false, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -568,6 +672,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3, byte4);
|
||||
check_utf8dump(false, byte1, byte2, byte3, byte4);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -591,6 +696,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3, byte4);
|
||||
check_utf8dump(false, byte1, byte2, byte3, byte4);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -614,6 +720,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
}
|
||||
|
||||
check_utf8string(false, byte1, byte2, byte3, byte4);
|
||||
check_utf8dump(false, byte1, byte2, byte3, byte4);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -634,6 +741,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
|
||||
{
|
||||
check_utf8string(true, byte1, byte2, byte3, byte4);
|
||||
check_utf8dump(true, byte1, byte2, byte3, byte4);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -645,6 +753,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
|
||||
{
|
||||
check_utf8string(false, byte1);
|
||||
check_utf8dump(false, byte1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -655,6 +764,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2);
|
||||
check_utf8dump(false, byte1, byte2);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -668,6 +778,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3);
|
||||
check_utf8dump(false, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -690,6 +801,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3, byte4);
|
||||
check_utf8dump(false, byte1, byte2, byte3, byte4);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -713,6 +825,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3, byte4);
|
||||
check_utf8dump(false, byte1, byte2, byte3, byte4);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -736,6 +849,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
}
|
||||
|
||||
check_utf8string(false, byte1, byte2, byte3, byte4);
|
||||
check_utf8dump(false, byte1, byte2, byte3, byte4);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -756,6 +870,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
|
||||
{
|
||||
check_utf8string(true, byte1, byte2, byte3, byte4);
|
||||
check_utf8dump(true, byte1, byte2, byte3, byte4);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -767,6 +882,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte1 = 0xF4; byte1 <= 0xF4; ++byte1)
|
||||
{
|
||||
check_utf8string(false, byte1);
|
||||
check_utf8dump(false, byte1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -777,6 +893,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte2 = 0x80; byte2 <= 0x8F; ++byte2)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2);
|
||||
check_utf8dump(false, byte1, byte2);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -790,6 +907,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3);
|
||||
check_utf8dump(false, byte1, byte2, byte3);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -812,6 +930,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3, byte4);
|
||||
check_utf8dump(false, byte1, byte2, byte3, byte4);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -835,6 +954,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
|
||||
{
|
||||
check_utf8string(false, byte1, byte2, byte3, byte4);
|
||||
check_utf8dump(false, byte1, byte2, byte3, byte4);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -858,6 +978,7 @@ TEST_CASE("Unicode", "[hide]")
|
||||
}
|
||||
|
||||
check_utf8string(false, byte1, byte2, byte3, byte4);
|
||||
check_utf8dump(false, byte1, byte2, byte3, byte4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user