🎨 clean up binary formats

This commit is contained in:
Niels Lohmann 2018-10-24 23:39:30 +02:00
parent 4d1eaace8c
commit 1968e5c793
No known key found for this signature in database
GPG Key ID: 7F3CEA63AE251B69
5 changed files with 1274 additions and 1122 deletions

View File

@ -125,14 +125,38 @@ class binary_reader
}
private:
//////////
// BSON //
//////////
/*!
@brief Reads in a BSON-object and passes it to the SAX-parser.
@return whether a valid BSON-value was passed to the SAX parser
*/
bool parse_bson_internal()
{
std::int32_t documentSize;
get_number<std::int32_t, true>(input_format_t::bson, documentSize);
if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1))))
{
return false;
}
if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false)))
{
return false;
}
return sax->end_object();
}
/*!
@brief Parses a C-style string from the BSON input.
@param [out] result A reference to the string variable where the read string
is to be stored.
@return `true` if the \x00-byte indicating the end of the
string was encountered before the EOF.
`false` indicates an unexpected EOF.
@param[in, out] result A reference to the string variable where the read
string is to be stored.
@return `true` if the \x00-byte indicating the end of the string was
encountered before the EOF; false` indicates an unexpected EOF.
*/
bool get_bson_cstr(string_t& result)
{
@ -155,107 +179,107 @@ class binary_reader
}
/*!
@brief Parses a zero-terminated string of length @a len from the BSON input.
@param [in] len The length (including the zero-byte at the end) of the string to be read.
@param [out] result A reference to the string variable where the read string
is to be stored.
@brief Parses a zero-terminated string of length @a len from the BSON
input.
@param[in] len The length (including the zero-byte at the end) of the
string to be read.
@param[in, out] result A reference to the string variable where the read
string is to be stored.
@tparam NumberType The type of the length @a len
@pre len > 0
@return `true` if the string was successfully parsed
*/
template <typename NumberType>
template<typename NumberType>
bool get_bson_string(const NumberType len, string_t& result)
{
return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result)
&& get() != std::char_traits<char>::eof();
}
/*!
@return A hexadecimal string representation of the given @a byte
@param byte The byte to convert to a string
*/
static std::string byte_hexstring(unsigned char byte)
{
char cr[3];
snprintf(cr, sizeof(cr), "%02hhX", byte);
return std::string{cr};
return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) and get() != std::char_traits<char>::eof();
}
/*!
@brief Read a BSON document element of the given @a element_type.
@param element_type The BSON element type, c.f. http://bsonspec.org/spec.html
@param element_type_parse_position The position in the input stream, where the `element_type` was read.
@warning Not all BSON element types are supported yet. An unsupported @a element_type will
give rise to a parse_error.114: Unsupported BSON record type 0x...
@param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html
@param[in] element_type_parse_position The position in the input stream,
where the `element_type` was read.
@warning Not all BSON element types are supported yet. An unsupported
@a element_type will give rise to a parse_error.114:
Unsupported BSON record type 0x...
@return whether a valid BSON-object/array was passed to the SAX parser
*/
bool parse_bson_element_internal(int element_type, std::size_t element_type_parse_position)
bool parse_bson_element_internal(const int element_type,
const std::size_t element_type_parse_position)
{
switch (element_type)
{
case 0x01: // double
{
double number;
return get_number<double, true>(input_format_t::bson, number)
&& sax->number_float(static_cast<number_float_t>(number), "");
return get_number<double, true>(input_format_t::bson, number) and sax->number_float(static_cast<number_float_t>(number), "");
}
case 0x02: // string
{
std::int32_t len;
string_t value;
return get_number<std::int32_t, true>(input_format_t::bson, len)
&& get_bson_string(len, value)
&& sax->string(value);
}
case 0x08: // boolean
{
return sax->boolean(static_cast<bool>(get()));
}
case 0x10: // int32
{
std::int32_t value;
return get_number<std::int32_t, true>(input_format_t::bson, value)
&& sax->number_integer(static_cast<std::int32_t>(value));
}
case 0x12: // int64
{
std::int64_t value;
return get_number<std::int64_t, true>(input_format_t::bson, value)
&& sax->number_integer(static_cast<std::int64_t>(value));
}
case 0x0A: // null
{
return sax->null();
return get_number<std::int32_t, true>(input_format_t::bson, len) and get_bson_string(len, value) and sax->string(value);
}
case 0x03: // object
{
return parse_bson_internal();
}
case 0x04: // array
{
return parse_bson_array();
}
case 0x08: // boolean
{
return sax->boolean(static_cast<bool>(get()));
}
case 0x0A: // null
{
return sax->null();
}
case 0x10: // int32
{
std::int32_t value;
return get_number<std::int32_t, true>(input_format_t::bson, value) and sax->number_integer(static_cast<std::int32_t>(value));
}
case 0x12: // int64
{
std::int64_t value;
return get_number<std::int64_t, true>(input_format_t::bson, value) and sax->number_integer(static_cast<std::int64_t>(value));
}
default: // anything else not supported (yet)
{
auto element_type_str = byte_hexstring(element_type);
return sax->parse_error(element_type_parse_position, element_type_str, parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + element_type_str));
char cr[3];
snprintf(cr, sizeof(cr), "%.2hhX", static_cast<unsigned char>(element_type));
return sax->parse_error(element_type_parse_position, std::string(cr), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr)));
}
}
}
/*!
@brief Read a BSON element list (as specified in the BSON-spec) from the input
and passes it to the SAX-parser.
The same binary layout is used for objects and arrays, hence it must
be indicated with the argument @a is_array which one is expected
(true --> array, false --> object).
@param is_array Determines if the element list being read is to be treated as
an object (@a is_array == false), or as an array (@a is_array == true).
@brief Read a BSON element list (as specified in the BSON-spec)
The same binary layout is used for objects and arrays, hence it must be
indicated with the argument @a is_array which one is expected
(true --> array, false --> object).
@param[in] is_array Determines if the element list being read is to be
treated as an object (@a is_array == false), or as an
array (@a is_array == true).
@return whether a valid BSON-object/array was passed to the SAX parser
*/
bool parse_bson_element_list(bool is_array)
bool parse_bson_element_list(const bool is_array)
{
while (auto element_type = get())
string_t key;
while (int element_type = get())
{
if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "element list")))
{
@ -263,13 +287,12 @@ class binary_reader
}
const std::size_t element_type_parse_position = chars_read;
string_t key;
if (JSON_UNLIKELY(not get_bson_cstr(key)))
{
return false;
}
if (!is_array)
if (not is_array)
{
sax->key(key);
}
@ -278,7 +301,11 @@ class binary_reader
{
return false;
}
// get_bson_cstr only appends
key.clear();
}
return true;
}
@ -291,7 +318,7 @@ class binary_reader
std::int32_t documentSize;
get_number<std::int32_t, true>(input_format_t::bson, documentSize);
if (JSON_UNLIKELY(not sax->start_array(-1)))
if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1))))
{
return false;
}
@ -304,27 +331,9 @@ class binary_reader
return sax->end_array();
}
/*!
@brief Reads in a BSON-object and pass it to the SAX-parser.
@return whether a valid BSON-value was passed to the SAX parser
*/
bool parse_bson_internal()
{
std::int32_t documentSize;
get_number<std::int32_t, true>(input_format_t::bson, documentSize);
if (JSON_UNLIKELY(not sax->start_object(-1)))
{
return false;
}
if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false)))
{
return false;
}
return sax->end_object();
}
//////////
// CBOR //
//////////
/*!
@param[in] get_char whether a new character should be retrieved from the
@ -664,6 +673,191 @@ class binary_reader
}
}
/*!
@brief reads a CBOR string
This function first reads starting bytes to determine the expected
string length and then copies this number of bytes into a string.
Additionally, CBOR's strings with indefinite lengths are supported.
@param[out] result created string
@return whether string creation completed
*/
bool get_cbor_string(string_t& result)
{
if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string")))
{
return false;
}
switch (current)
{
// UTF-8 string (0x00..0x17 bytes follow)
case 0x60:
case 0x61:
case 0x62:
case 0x63:
case 0x64:
case 0x65:
case 0x66:
case 0x67:
case 0x68:
case 0x69:
case 0x6A:
case 0x6B:
case 0x6C:
case 0x6D:
case 0x6E:
case 0x6F:
case 0x70:
case 0x71:
case 0x72:
case 0x73:
case 0x74:
case 0x75:
case 0x76:
case 0x77:
{
return get_string(input_format_t::cbor, current & 0x1F, result);
}
case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
{
uint8_t len;
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
}
case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
{
uint16_t len;
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
}
case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
{
uint32_t len;
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
}
case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
{
uint64_t len;
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
}
case 0x7F: // UTF-8 string (indefinite length)
{
while (get() != 0xFF)
{
string_t chunk;
if (not get_cbor_string(chunk))
{
return false;
}
result.append(chunk);
}
return true;
}
default:
{
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string")));
}
}
}
/*!
@param[in] len the length of the array or std::size_t(-1) for an
array of indefinite size
@return whether array creation completed
*/
bool get_cbor_array(const std::size_t len)
{
if (JSON_UNLIKELY(not sax->start_array(len)))
{
return false;
}
if (len != std::size_t(-1))
{
for (std::size_t i = 0; i < len; ++i)
{
if (JSON_UNLIKELY(not parse_cbor_internal()))
{
return false;
}
}
}
else
{
while (get() != 0xFF)
{
if (JSON_UNLIKELY(not parse_cbor_internal(false)))
{
return false;
}
}
}
return sax->end_array();
}
/*!
@param[in] len the length of the object or std::size_t(-1) for an
object of indefinite size
@return whether object creation completed
*/
bool get_cbor_object(const std::size_t len)
{
if (not JSON_UNLIKELY(sax->start_object(len)))
{
return false;
}
string_t key;
if (len != std::size_t(-1))
{
for (std::size_t i = 0; i < len; ++i)
{
get();
if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key)))
{
return false;
}
if (JSON_UNLIKELY(not parse_cbor_internal()))
{
return false;
}
key.clear();
}
}
else
{
while (get() != 0xFF)
{
if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key)))
{
return false;
}
if (JSON_UNLIKELY(not parse_cbor_internal()))
{
return false;
}
key.clear();
}
}
return sax->end_object();
}
/////////////
// MsgPack //
/////////////
/*!
@return whether a valid MessagePack value was passed to the SAX parser
*/
@ -1026,301 +1220,6 @@ class binary_reader
}
}
/*!
@param[in] get_char whether a new character should be retrieved from the
input (true, default) or whether the last read
character should be considered instead
@return whether a valid UBJSON value was passed to the SAX parser
*/
bool parse_ubjson_internal(const bool get_char = true)
{
return get_ubjson_value(get_char ? get_ignore_noop() : current);
}
/*!
@brief get next character from the input
This function provides the interface to the used input adapter. It does
not throw in case the input reached EOF, but returns a -'ve valued
`std::char_traits<char>::eof()` in that case.
@return character read from the input
*/
int get()
{
++chars_read;
return (current = ia->get_character());
}
/*!
@return character read from the input after ignoring all 'N' entries
*/
int get_ignore_noop()
{
do
{
get();
}
while (current == 'N');
return current;
}
/*
@brief read a number from the input
@tparam NumberType the type of the number
@param[in] format the current format (for diagnostics)
@param[out] result number of type @a NumberType
@return whether conversion completed
@note This function needs to respect the system's endianess, because
bytes in CBOR, MessagePack, and UBJSON are stored in network order
(big endian) and therefore need reordering on little endian systems.
*/
template<typename NumberType, bool InputIsLittleEndian = false>
bool get_number(const input_format_t format, NumberType& result)
{
// step 1: read input into array with system's byte order
std::array<uint8_t, sizeof(NumberType)> vec;
for (std::size_t i = 0; i < sizeof(NumberType); ++i)
{
get();
if (JSON_UNLIKELY(not unexpect_eof(format, "number")))
{
return false;
}
// reverse byte order prior to conversion if necessary
if (is_little_endian && !InputIsLittleEndian)
{
vec[sizeof(NumberType) - i - 1] = static_cast<uint8_t>(current);
}
else
{
vec[i] = static_cast<uint8_t>(current); // LCOV_EXCL_LINE
}
}
// step 2: convert array into number of type T and return
std::memcpy(&result, vec.data(), sizeof(NumberType));
return true;
}
/*!
@brief create a string by reading characters from the input
@tparam NumberType the type of the number
@param[in] format the current format (for diagnostics)
@param[in] len number of characters to read
@param[out] result string created by reading @a len bytes
@return whether string creation completed
@note We can not reserve @a len bytes for the result, because @a len
may be too large. Usually, @ref unexpect_eof() detects the end of
the input before we run out of string memory.
*/
template<typename NumberType>
bool get_string(const input_format_t format, const NumberType len, string_t& result)
{
bool success = true;
std::generate_n(std::back_inserter(result), len, [this, &success, &format]()
{
get();
if (JSON_UNLIKELY(not unexpect_eof(format, "string")))
{
success = false;
}
return static_cast<char>(current);
});
return success;
}
/*!
@brief reads a CBOR string
This function first reads starting bytes to determine the expected
string length and then copies this number of bytes into a string.
Additionally, CBOR's strings with indefinite lengths are supported.
@param[out] result created string
@return whether string creation completed
*/
bool get_cbor_string(string_t& result)
{
if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string")))
{
return false;
}
switch (current)
{
// UTF-8 string (0x00..0x17 bytes follow)
case 0x60:
case 0x61:
case 0x62:
case 0x63:
case 0x64:
case 0x65:
case 0x66:
case 0x67:
case 0x68:
case 0x69:
case 0x6A:
case 0x6B:
case 0x6C:
case 0x6D:
case 0x6E:
case 0x6F:
case 0x70:
case 0x71:
case 0x72:
case 0x73:
case 0x74:
case 0x75:
case 0x76:
case 0x77:
{
return get_string(input_format_t::cbor, current & 0x1F, result);
}
case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
{
uint8_t len;
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
}
case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
{
uint16_t len;
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
}
case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
{
uint32_t len;
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
}
case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
{
uint64_t len;
return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
}
case 0x7F: // UTF-8 string (indefinite length)
{
while (get() != 0xFF)
{
string_t chunk;
if (not get_cbor_string(chunk))
{
return false;
}
result.append(chunk);
}
return true;
}
default:
{
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string")));
}
}
}
/*!
@param[in] len the length of the array or std::size_t(-1) for an
array of indefinite size
@return whether array creation completed
*/
bool get_cbor_array(const std::size_t len)
{
if (JSON_UNLIKELY(not sax->start_array(len)))
{
return false;
}
if (len != std::size_t(-1))
{
for (std::size_t i = 0; i < len; ++i)
{
if (JSON_UNLIKELY(not parse_cbor_internal()))
{
return false;
}
}
}
else
{
while (get() != 0xFF)
{
if (JSON_UNLIKELY(not parse_cbor_internal(false)))
{
return false;
}
}
}
return sax->end_array();
}
/*!
@param[in] len the length of the object or std::size_t(-1) for an
object of indefinite size
@return whether object creation completed
*/
bool get_cbor_object(const std::size_t len)
{
if (not JSON_UNLIKELY(sax->start_object(len)))
{
return false;
}
string_t key;
if (len != std::size_t(-1))
{
for (std::size_t i = 0; i < len; ++i)
{
get();
if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key)))
{
return false;
}
if (JSON_UNLIKELY(not parse_cbor_internal()))
{
return false;
}
key.clear();
}
}
else
{
while (get() != 0xFF)
{
if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key)))
{
return false;
}
if (JSON_UNLIKELY(not parse_cbor_internal()))
{
return false;
}
key.clear();
}
}
return sax->end_object();
}
/*!
@brief reads a MessagePack string
@ -1455,6 +1354,22 @@ class binary_reader
return sax->end_object();
}
////////////
// UBJSON //
////////////
/*!
@param[in] get_char whether a new character should be retrieved from the
input (true, default) or whether the last read
character should be considered instead
@return whether a valid UBJSON value was passed to the SAX parser
*/
bool parse_ubjson_internal(const bool get_char = true)
{
return get_ubjson_value(get_char ? get_ignore_noop() : current);
}
/*!
@brief reads a UBJSON string
@ -1869,6 +1784,113 @@ class binary_reader
return sax->end_object();
}
///////////////////////
// Utility functions //
///////////////////////
/*!
@brief get next character from the input
This function provides the interface to the used input adapter. It does
not throw in case the input reached EOF, but returns a -'ve valued
`std::char_traits<char>::eof()` in that case.
@return character read from the input
*/
int get()
{
++chars_read;
return (current = ia->get_character());
}
/*!
@return character read from the input after ignoring all 'N' entries
*/
int get_ignore_noop()
{
do
{
get();
}
while (current == 'N');
return current;
}
/*
@brief read a number from the input
@tparam NumberType the type of the number
@param[in] format the current format (for diagnostics)
@param[out] result number of type @a NumberType
@return whether conversion completed
@note This function needs to respect the system's endianess, because
bytes in CBOR, MessagePack, and UBJSON are stored in network order
(big endian) and therefore need reordering on little endian systems.
*/
template<typename NumberType, bool InputIsLittleEndian = false>
bool get_number(const input_format_t format, NumberType& result)
{
// step 1: read input into array with system's byte order
std::array<uint8_t, sizeof(NumberType)> vec;
for (std::size_t i = 0; i < sizeof(NumberType); ++i)
{
get();
if (JSON_UNLIKELY(not unexpect_eof(format, "number")))
{
return false;
}
// reverse byte order prior to conversion if necessary
if (is_little_endian && !InputIsLittleEndian)
{
vec[sizeof(NumberType) - i - 1] = static_cast<uint8_t>(current);
}
else
{
vec[i] = static_cast<uint8_t>(current); // LCOV_EXCL_LINE
}
}
// step 2: convert array into number of type T and return
std::memcpy(&result, vec.data(), sizeof(NumberType));
return true;
}
/*!
@brief create a string by reading characters from the input
@tparam NumberType the type of the number
@param[in] format the current format (for diagnostics)
@param[in] len number of characters to read
@param[out] result string created by reading @a len bytes
@return whether string creation completed
@note We can not reserve @a len bytes for the result, because @a len
may be too large. Usually, @ref unexpect_eof() detects the end of
the input before we run out of string memory.
*/
template<typename NumberType>
bool get_string(const input_format_t format,
const NumberType len,
string_t& result)
{
bool success = true;
std::generate_n(std::back_inserter(result), len, [this, &success, &format]()
{
get();
if (JSON_UNLIKELY(not unexpect_eof(format, "string")))
{
success = false;
}
return static_cast<char>(current);
});
return success;
}
/*!
@param[in] format the current format (for diagnostics)
@param[in] context further context information (for diagnostics)
@ -1894,7 +1916,6 @@ class binary_reader
return std::string{cr};
}
private:
/*!
@param[in] format the current format
@param[in] detail a detailed error message
@ -1934,6 +1955,7 @@ class binary_reader
return error_msg + " " + context + ": " + detail;
}
private:
/// input adapter
input_adapter_t ia = nullptr;

View File

@ -35,7 +35,34 @@ class binary_writer
}
/*!
@brief[in] j JSON value to serialize
@param[in] j JSON value to serialize
@pre j.type() == value_t::object
*/
void write_bson(const BasicJsonType& j)
{
switch (j.type())
{
case value_t::object:
{
write_bson_object(*j.m_value.object);
break;
}
case value_t::discarded:
{
break;
}
default:
{
JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name())));
break;
}
}
}
/*!
@param[in] j JSON value to serialize
*/
void write_cbor(const BasicJsonType& j)
{
@ -279,7 +306,7 @@ class binary_writer
}
/*!
@brief[in] j JSON value to serialize
@param[in] j JSON value to serialize
*/
void write_msgpack(const BasicJsonType& j)
{
@ -678,13 +705,19 @@ class binary_writer
}
}
private:
//////////
// BSON //
//////////
/*!
@return The size of a BSON document entry header, including the id marker and the entry name size (and its null-terminator).
@return The size of a BSON document entry header, including the id marker
and the entry name size (and its null-terminator).
*/
static std::size_t calc_bson_entry_header_size(const typename BasicJsonType::string_t& name)
{
const auto it = name.find(static_cast<typename BasicJsonType::string_t::value_type>(0));
if (it != BasicJsonType::string_t::npos)
if (JSON_UNLIKELY(it != BasicJsonType::string_t::npos))
{
JSON_THROW(out_of_range::create(409,
"BSON key cannot contain code point U+0000 (at byte " + std::to_string(it) + ")"));
@ -696,7 +729,8 @@ class binary_writer
/*!
@brief Writes the given @a element_type and @a name to the output adapter
*/
void write_bson_entry_header(const typename BasicJsonType::string_t& name, std::uint8_t element_type)
void write_bson_entry_header(const typename BasicJsonType::string_t& name,
std::uint8_t element_type)
{
oa->write_character(static_cast<CharType>(element_type)); // boolean
oa->write_characters(
@ -707,7 +741,8 @@ class binary_writer
/*!
@brief Writes a BSON element with key @a name and boolean value @a value
*/
void write_bson_boolean(const typename BasicJsonType::string_t& name, const bool value)
void write_bson_boolean(const typename BasicJsonType::string_t& name,
const bool value)
{
write_bson_entry_header(name, 0x08);
oa->write_character(value ? static_cast<CharType>(0x01) : static_cast<CharType>(0x00));
@ -716,7 +751,8 @@ class binary_writer
/*!
@brief Writes a BSON element with key @a name and double value @a value
*/
void write_bson_double(const typename BasicJsonType::string_t& name, const double value)
void write_bson_double(const typename BasicJsonType::string_t& name,
const double value)
{
write_bson_entry_header(name, 0x01);
write_number<double, true>(value);
@ -733,7 +769,8 @@ class binary_writer
/*!
@brief Writes a BSON element with key @a name and string value @a value
*/
void write_bson_string(const typename BasicJsonType::string_t& name, const typename BasicJsonType::string_t& value)
void write_bson_string(const typename BasicJsonType::string_t& name,
const typename BasicJsonType::string_t& value)
{
write_bson_entry_header(name, 0x02);
@ -769,7 +806,8 @@ class binary_writer
/*!
@brief Writes a BSON element with key @a name and integer @a value
*/
void write_bson_integer(const typename BasicJsonType::string_t& name, const std::int64_t value)
void write_bson_integer(const typename BasicJsonType::string_t& name,
const std::int64_t value)
{
if ((std::numeric_limits<std::int32_t>::min)() <= value and value <= (std::numeric_limits<std::int32_t>::max)())
{
@ -783,7 +821,6 @@ class binary_writer
}
}
/*!
@return The size of the BSON-encoded unsigned integer in @a j
*/
@ -802,7 +839,8 @@ class binary_writer
/*!
@brief Writes a BSON element with key @a name and unsigned @a value
*/
void write_bson_unsigned(const typename BasicJsonType::string_t& name, const std::uint64_t value)
void write_bson_unsigned(const typename BasicJsonType::string_t& name,
const std::uint64_t value)
{
if (value <= static_cast<std::uint64_t>((std::numeric_limits<std::int32_t>::max)()))
{
@ -824,13 +862,13 @@ class binary_writer
/*!
@brief Writes a BSON element with key @a name and object @a value
*/
void write_bson_object_entry(const typename BasicJsonType::string_t& name, const typename BasicJsonType::object_t& value)
void write_bson_object_entry(const typename BasicJsonType::string_t& name,
const typename BasicJsonType::object_t& value)
{
write_bson_entry_header(name, 0x03); // object
write_bson_object(value);
}
/*!
@return The size of the BSON-encoded array @a value
*/
@ -849,10 +887,11 @@ class binary_writer
/*!
@brief Writes a BSON element with key @a name and array @a value
*/
void write_bson_array(const typename BasicJsonType::string_t& name, const typename BasicJsonType::array_t& value)
void write_bson_array(const typename BasicJsonType::string_t& name,
const typename BasicJsonType::array_t& value)
{
write_bson_entry_header(name, 0x04); // array
write_number<std::int32_t, true>(calc_bson_array_size(value));
write_number<std::int32_t, true>(static_cast<std::int32_t>(calc_bson_array_size(value)));
for (const auto& el : value)
{
@ -862,75 +901,95 @@ class binary_writer
oa->write_character(static_cast<CharType>(0x00));
}
/*!
@brief Calculates the size necessary to serialize the JSON value @a j with its @a name
@return The calculated size for the BSON document entry for @a j with the given @a name.
*/
static std::size_t calc_bson_element_size(const typename BasicJsonType::string_t& name, const BasicJsonType& j)
static std::size_t calc_bson_element_size(const typename BasicJsonType::string_t& name,
const BasicJsonType& j)
{
const auto header_size = calc_bson_entry_header_size(name);
switch (j.type())
{
case value_t::discarded:
return 0ul;
case value_t::object:
return header_size + calc_bson_object_size(*j.m_value.object);
case value_t::array:
return header_size + calc_bson_array_size(*j.m_value.array);
case value_t::boolean:
return header_size + 1ul;
case value_t::number_float:
return header_size + 8ul;
case value_t::number_integer:
return header_size + calc_bson_integer_size(j.m_value.number_integer);
case value_t::number_unsigned:
return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned);
case value_t::string:
return header_size + calc_bson_string_size(*j.m_value.string);
case value_t::null:
return header_size + 0ul;
// LCOV_EXCL_START
default:
assert(false);
return 0ul;
// LCOV_EXCL_STOP
case value_t::discarded:
return 0ul;
case value_t::object:
return header_size + calc_bson_object_size(*j.m_value.object);
case value_t::array:
return header_size + calc_bson_array_size(*j.m_value.array);
case value_t::boolean:
return header_size + 1ul;
case value_t::number_float:
return header_size + 8ul;
case value_t::number_integer:
return header_size + calc_bson_integer_size(j.m_value.number_integer);
case value_t::number_unsigned:
return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned);
case value_t::string:
return header_size + calc_bson_string_size(*j.m_value.string);
case value_t::null:
return header_size + 0ul;
// LCOV_EXCL_STOP
};
}
/*!
@brief Serializes the JSON value @a j to BSON and associates it with the key @a name.
@param name The name to associate with the JSON entity @a j within the current BSON document
@return The size of the bson entry
@brief Serializes the JSON value @a j to BSON and associates it with the
key @a name.
@param name The name to associate with the JSON entity @a j within the
current BSON document
@return The size of the BSON entry
*/
void write_bson_element(const typename BasicJsonType::string_t& name, const BasicJsonType& j)
void write_bson_element(const typename BasicJsonType::string_t& name,
const BasicJsonType& j)
{
switch (j.type())
{
case value_t::discarded:
return;
case value_t::object:
return write_bson_object_entry(name, *j.m_value.object);
case value_t::array:
return write_bson_array(name, *j.m_value.array);
case value_t::boolean:
return write_bson_boolean(name, j.m_value.boolean);
case value_t::number_float:
return write_bson_double(name, j.m_value.number_float);
case value_t::number_integer:
return write_bson_integer(name, j.m_value.number_integer);
case value_t::number_unsigned:
return write_bson_unsigned(name, j.m_value.number_unsigned);
case value_t::string:
return write_bson_string(name, *j.m_value.string);
case value_t::null:
return write_bson_null(name);
// LCOV_EXCL_START
default:
assert(false);
return;
// LCOV_EXCL_STOP
case value_t::discarded:
return;
case value_t::object:
return write_bson_object_entry(name, *j.m_value.object);
case value_t::array:
return write_bson_array(name, *j.m_value.array);
case value_t::boolean:
return write_bson_boolean(name, j.m_value.boolean);
case value_t::number_float:
return write_bson_double(name, j.m_value.number_float);
case value_t::number_integer:
return write_bson_integer(name, j.m_value.number_integer);
case value_t::number_unsigned:
return write_bson_unsigned(name, j.m_value.number_unsigned);
case value_t::string:
return write_bson_string(name, *j.m_value.string);
case value_t::null:
return write_bson_null(name);
// LCOV_EXCL_STOP
};
}
@ -958,7 +1017,7 @@ class binary_writer
*/
void write_bson_object(const typename BasicJsonType::object_t& value)
{
write_number<std::int32_t, true>(calc_bson_object_size(value));
write_number<std::int32_t, true>(static_cast<std::int32_t>(calc_bson_object_size(value)));
for (const auto& el : value)
{
@ -968,64 +1027,38 @@ class binary_writer
oa->write_character(static_cast<CharType>(0x00));
}
/*!
@param[in] j JSON value to serialize
@pre j.type() == value_t::object
*/
void write_bson(const BasicJsonType& j)
//////////
// CBOR //
//////////
static constexpr CharType get_cbor_float_prefix(float /*unused*/)
{
switch (j.type())
{
case value_t::object:
{
write_bson_object(*j.m_value.object);
break;
}
case value_t::discarded:
{
break;
}
default:
{
JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name())));
break;
}
}
return static_cast<CharType>(0xFA); // Single-Precision Float
}
private:
/*
@brief write a number to output input
@param[in] n number of type @a NumberType
@tparam NumberType the type of the number
@tparam OutputIsLittleEndian Set to true if output data is
required to be little endian
@note This function needs to respect the system's endianess, because bytes
in CBOR, MessagePack, and UBJSON are stored in network order (big
endian) and therefore need reordering on little endian systems.
*/
template<typename NumberType, bool OutputIsLittleEndian = false>
void write_number(const NumberType n)
static constexpr CharType get_cbor_float_prefix(double /*unused*/)
{
// step 1: write number to array of length NumberType
std::array<CharType, sizeof(NumberType)> vec;
std::memcpy(vec.data(), &n, sizeof(NumberType));
// step 2: write array to output (with possible reordering)
if (is_little_endian and not OutputIsLittleEndian)
{
// reverse byte order prior to conversion if necessary
std::reverse(vec.begin(), vec.end());
}
oa->write_characters(vec.data(), sizeof(NumberType));
return static_cast<CharType>(0xFB); // Double-Precision Float
}
/////////////
// MsgPack //
/////////////
static constexpr CharType get_msgpack_float_prefix(float /*unused*/)
{
return static_cast<CharType>(0xCA); // float 32
}
static constexpr CharType get_msgpack_float_prefix(double /*unused*/)
{
return static_cast<CharType>(0xCB); // float 64
}
////////////
// UBJSON //
////////////
// UBJSON: write number (floating point)
template<typename NumberType, typename std::enable_if<
std::is_floating_point<NumberType>::value, int>::type = 0>
@ -1226,26 +1259,6 @@ class binary_writer
}
}
static constexpr CharType get_cbor_float_prefix(float /*unused*/)
{
return static_cast<CharType>(0xFA); // Single-Precision Float
}
static constexpr CharType get_cbor_float_prefix(double /*unused*/)
{
return static_cast<CharType>(0xFB); // Double-Precision Float
}
static constexpr CharType get_msgpack_float_prefix(float /*unused*/)
{
return static_cast<CharType>(0xCA); // float 32
}
static constexpr CharType get_msgpack_float_prefix(double /*unused*/)
{
return static_cast<CharType>(0xCB); // float 64
}
static constexpr CharType get_ubjson_float_prefix(float /*unused*/)
{
return 'd'; // float 32
@ -1256,6 +1269,39 @@ class binary_writer
return 'D'; // float 64
}
///////////////////////
// Utility functions //
///////////////////////
/*
@brief write a number to output input
@param[in] n number of type @a NumberType
@tparam NumberType the type of the number
@tparam OutputIsLittleEndian Set to true if output data is
required to be little endian
@note This function needs to respect the system's endianess, because bytes
in CBOR, MessagePack, and UBJSON are stored in network order (big
endian) and therefore need reordering on little endian systems.
*/
template<typename NumberType, bool OutputIsLittleEndian = false>
void write_number(const NumberType n)
{
// step 1: write number to array of length NumberType
std::array<CharType, sizeof(NumberType)> vec;
std::memcpy(vec.data(), &n, sizeof(NumberType));
// step 2: write array to output (with possible reordering)
if (is_little_endian and not OutputIsLittleEndian)
{
// reverse byte order prior to conversion if necessary
std::reverse(vec.begin(), vec.end());
}
oa->write_characters(vec.data(), sizeof(NumberType));
}
private:
/// whether we can assume little endianess
const bool is_little_endian = binary_reader<BasicJsonType>::little_endianess();

File diff suppressed because it is too large Load Diff

View File

@ -48,7 +48,7 @@ TEST_CASE("BSON")
SECTION("null")
{
json j = nullptr;
REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is null");
}
@ -57,14 +57,14 @@ TEST_CASE("BSON")
SECTION("true")
{
json j = true;
REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is boolean");
}
SECTION("false")
{
json j = false;
REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is boolean");
}
}
@ -72,29 +72,29 @@ TEST_CASE("BSON")
SECTION("number")
{
json j = 42;
REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is number");
CHECK_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is number");
}
SECTION("float")
{
json j = 4.2;
REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is number");
CHECK_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is number");
}
SECTION("string")
{
json j = "not supported";
REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is string");
CHECK_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is string");
}
SECTION("array")
{
json j = std::vector<int> {1, 2, 3, 4, 5, 6, 7};
REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is array");
CHECK_THROWS_AS(json::to_bson(j), json::type_error&);
CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is array");
}
}
@ -104,7 +104,7 @@ TEST_CASE("BSON")
{
{ std::string("en\0try", 6), true }
};
REQUIRE_THROWS_AS(json::to_bson(j), json::out_of_range&);
CHECK_THROWS_AS(json::to_bson(j), json::out_of_range&);
CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.out_of_range.409] BSON key cannot contain code point U+0000 (at byte 2)");
}
@ -541,6 +541,30 @@ TEST_CASE("BSON")
CHECK(json::from_bson(result, true, false) == j);
}
}
SECTION("Examples from http://bsonspec.org/faq.html")
{
SECTION("Example 1")
{
std::vector<std::uint8_t> input = {0x16, 0x00, 0x00, 0x00, 0x02, 'h', 'e', 'l', 'l', 'o', 0x00, 0x06, 0x00, 0x00, 0x00, 'w', 'o', 'r', 'l', 'd', 0x00, 0x00};
json parsed = json::from_bson(input);
json expected = {{"hello", "world"}};
CHECK(parsed == expected);
auto dumped = json::to_bson(parsed);
CHECK(dumped == input);
}
SECTION("Example 2")
{
std::vector<std::uint8_t> input = {0x31, 0x00, 0x00, 0x00, 0x04, 'B', 'S', 'O', 'N', 0x00, 0x26, 0x00, 0x00, 0x00, 0x02, 0x30, 0x00, 0x08, 0x00, 0x00, 0x00, 'a', 'w', 'e', 's', 'o', 'm', 'e', 0x00, 0x01, 0x31, 0x00, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x14, 0x40, 0x10, 0x32, 0x00, 0xc2, 0x07, 0x00, 0x00, 0x00, 0x00};
json parsed = json::from_bson(input);
json expected = {{"BSON", {"awesome", 5.05, 1986}}};
CHECK(parsed == expected);
auto dumped = json::to_bson(parsed);
//CHECK(dumped == input); // see https://github.com/nlohmann/json/pull/1254#issuecomment-432831216
CHECK(json::from_bson(dumped) == expected);
}
}
}
TEST_CASE("BSON input/output_adapters")
@ -601,10 +625,6 @@ TEST_CASE("BSON input/output_adapters")
}
}
class SaxCountdown
{
public:
@ -675,86 +695,84 @@ class SaxCountdown
int events_left = 0;
};
TEST_CASE("Incomplete BSON INPUT")
TEST_CASE("Incomplete BSON Input")
{
std::vector<uint8_t> incomplete_bson =
SECTION("Incomplete BSON Input 1")
{
0x0D, 0x00, 0x00, 0x00, // size (little endian)
0x08, // entry: boolean
'e', 'n', 't' // unexpected EOF
};
std::vector<uint8_t> incomplete_bson =
{
0x0D, 0x00, 0x00, 0x00, // size (little endian)
0x08, // entry: boolean
'e', 'n', 't' // unexpected EOF
};
CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&);
CHECK_THROWS_WITH(json::from_bson(incomplete_bson),
"[json.exception.parse_error.110] parse error at byte 9: syntax error while parsing BSON cstring: unexpected end of input");
CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&);
CHECK_THROWS_WITH(json::from_bson(incomplete_bson),
"[json.exception.parse_error.110] parse error at byte 9: syntax error while parsing BSON cstring: unexpected end of input");
CHECK(json::from_bson(incomplete_bson, true, false).is_discarded());
CHECK(json::from_bson(incomplete_bson, true, false).is_discarded());
SaxCountdown scp(0);
CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson));
}
SaxCountdown scp(0);
CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson));
}
TEST_CASE("Incomplete BSON INPUT 2")
{
std::vector<uint8_t> incomplete_bson =
SECTION("Incomplete BSON Input 2")
{
0x0D, 0x00, 0x00, 0x00, // size (little endian)
0x08, // entry: boolean, unexpected EOF
};
std::vector<uint8_t> incomplete_bson =
{
0x0D, 0x00, 0x00, 0x00, // size (little endian)
0x08, // entry: boolean, unexpected EOF
};
CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&);
CHECK_THROWS_WITH(json::from_bson(incomplete_bson),
"[json.exception.parse_error.110] parse error at byte 6: syntax error while parsing BSON cstring: unexpected end of input");
CHECK(json::from_bson(incomplete_bson, true, false).is_discarded());
CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&);
CHECK_THROWS_WITH(json::from_bson(incomplete_bson),
"[json.exception.parse_error.110] parse error at byte 6: syntax error while parsing BSON cstring: unexpected end of input");
CHECK(json::from_bson(incomplete_bson, true, false).is_discarded());
SaxCountdown scp(0);
CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson));
}
SaxCountdown scp(0);
CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson));
}
TEST_CASE("Incomplete BSON INPUT 3")
{
std::vector<uint8_t> incomplete_bson =
SECTION("Incomplete BSON Input 3")
{
0x41, 0x00, 0x00, 0x00, // size (little endian)
0x04, /// entry: embedded document
'e', 'n', 't', 'r', 'y', '\x00',
std::vector<uint8_t> incomplete_bson =
{
0x41, 0x00, 0x00, 0x00, // size (little endian)
0x04, /// entry: embedded document
'e', 'n', 't', 'r', 'y', '\x00',
0x35, 0x00, 0x00, 0x00, // size (little endian)
0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x10, 0x00, 0x02, 0x00, 0x00, 0x00
// missing input data...
};
0x35, 0x00, 0x00, 0x00, // size (little endian)
0x10, 0x00, 0x01, 0x00, 0x00, 0x00,
0x10, 0x00, 0x02, 0x00, 0x00, 0x00
// missing input data...
};
CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&);
CHECK_THROWS_WITH(json::from_bson(incomplete_bson),
"[json.exception.parse_error.110] parse error at byte 28: syntax error while parsing BSON element list: unexpected end of input");
CHECK(json::from_bson(incomplete_bson, true, false).is_discarded());
CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&);
CHECK_THROWS_WITH(json::from_bson(incomplete_bson),
"[json.exception.parse_error.110] parse error at byte 28: syntax error while parsing BSON element list: unexpected end of input");
CHECK(json::from_bson(incomplete_bson, true, false).is_discarded());
SaxCountdown scp(1);
CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson));
}
SaxCountdown scp(1);
CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson));
}
TEST_CASE("Incomplete BSON INPUT 4")
{
std::vector<uint8_t> incomplete_bson =
SECTION("Incomplete BSON Input 4")
{
0x0D, 0x00, // size (incomplete), unexpected EOF
};
std::vector<uint8_t> incomplete_bson =
{
0x0D, 0x00, // size (incomplete), unexpected EOF
};
CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&);
CHECK_THROWS_WITH(json::from_bson(incomplete_bson),
"[json.exception.parse_error.110] parse error at byte 3: syntax error while parsing BSON number: unexpected end of input");
CHECK(json::from_bson(incomplete_bson, true, false).is_discarded());
CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&);
CHECK_THROWS_WITH(json::from_bson(incomplete_bson),
"[json.exception.parse_error.110] parse error at byte 3: syntax error while parsing BSON number: unexpected end of input");
CHECK(json::from_bson(incomplete_bson, true, false).is_discarded());
SaxCountdown scp(0);
CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson));
SaxCountdown scp(0);
CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson));
}
}
TEST_CASE("Unsupported BSON input")
{
std::vector<uint8_t> bson =
@ -774,8 +792,6 @@ TEST_CASE("Unsupported BSON input")
CHECK(not json::sax_parse(bson, &scp, json::input_format_t::bson));
}
TEST_CASE("BSON numerical data")
{
SECTION("number")
@ -1205,12 +1221,12 @@ TEST_CASE("BSON roundtrips", "[hide]")
(std::istreambuf_iterator<char>(f_bson)),
std::istreambuf_iterator<char>());
SECTION(filename + ": output adapters: std::vector<uint8_t>")
{
std::vector<uint8_t> vec;
json::to_bson(j1, vec);
CHECK(vec == packed);
}
SECTION(filename + ": output adapters: std::vector<uint8_t>")
{
std::vector<uint8_t> vec;
json::to_bson(j1, vec);
CHECK(vec == packed);
}
}
}
}

View File

@ -139,10 +139,10 @@ bool operator==(Data const& lhs, Data const& rhs)
return lhs.a == rhs.a && lhs.b == rhs.b;
}
bool operator!=(Data const& lhs, Data const& rhs)
{
return !(lhs == rhs);
}
//bool operator!=(Data const& lhs, Data const& rhs)
//{
// return !(lhs == rhs);
//}
}
/////////////////////////////////////////////////////////////////////