From 012c9665ac748d050b5cf636cebd3255f362bc78 Mon Sep 17 00:00:00 2001 From: Michael Reilly Date: Fri, 5 Jul 2019 00:13:25 -0400 Subject: [PATCH] Add binary type support to all binary file formats, as well as an internally represented binary type --- .../nlohmann/detail/conversions/to_json.hpp | 22 + .../nlohmann/detail/input/binary_reader.hpp | 316 +++++ include/nlohmann/detail/input/json_sax.hpp | 63 +- .../detail/iterators/internal_iterator.hpp | 2 + include/nlohmann/detail/macro_scope.hpp | 5 +- .../nlohmann/detail/output/binary_writer.hpp | 216 ++- include/nlohmann/detail/output/serializer.hpp | 68 +- include/nlohmann/detail/value_t.hpp | 11 +- include/nlohmann/json.hpp | 464 ++++++- include/nlohmann/json_fwd.hpp | 3 +- single_include/nlohmann/json.hpp | 1170 ++++++++++++++++- test/data/binary_data/cbor_binary.cbor | Bin 0 -> 541 bytes test/data/binary_data/cbor_binary.out | 17 + test/src/unit-bson.cpp | 35 + test/src/unit-cbor.cpp | 178 ++- test/src/unit-class_parser.cpp | 20 + test/src/unit-deserialization.cpp | 15 + test/src/unit-msgpack.cpp | 274 +++- test/src/unit-regression.cpp | 2 +- test/src/unit-ubjson.cpp | 230 ++++ test/src/unit-udt.cpp | 3 +- 21 files changed, 3008 insertions(+), 106 deletions(-) create mode 100644 test/data/binary_data/cbor_binary.cbor create mode 100644 test/data/binary_data/cbor_binary.out diff --git a/include/nlohmann/detail/conversions/to_json.hpp b/include/nlohmann/detail/conversions/to_json.hpp index a1def699f..10b21d16b 100644 --- a/include/nlohmann/detail/conversions/to_json.hpp +++ b/include/nlohmann/detail/conversions/to_json.hpp @@ -67,6 +67,28 @@ struct external_constructor } }; +template<> +struct external_constructor +{ + template + static void construct(BasicJsonType& j, const typename BasicJsonType::binary_t& b) + { + j.m_type = value_t::binary; + typename BasicJsonType::internal_binary_t value{b}; + j.m_value = value; + j.assert_invariant(); + } + + template + static void construct(BasicJsonType& j, typename BasicJsonType::binary_t&& b) + { + j.m_type = value_t::binary; + typename BasicJsonType::internal_binary_t value{std::move(b)}; + j.m_value = value; + j.assert_invariant(); + } +}; + template<> struct external_constructor { diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index 25c51fc00..b8d9bec0a 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -38,6 +38,7 @@ class binary_reader using number_unsigned_t = typename BasicJsonType::number_unsigned_t; using number_float_t = typename BasicJsonType::number_float_t; using string_t = typename BasicJsonType::string_t; + using internal_binary_t = typename BasicJsonType::internal_binary_t; using json_sax_t = SAX; public: @@ -207,6 +208,30 @@ class binary_reader return get_string(input_format_t::bson, len - static_cast(1), result) and get() != std::char_traits::eof(); } + /*! + @brief Parses a byte array input of length @a len from the BSON input. + @param[in] len The length of the byte array to be read. + @param[in, out] result A reference to the binary variable where the read + array is to be stored. + @tparam NumberType The type of the length @a len + @pre len >= 0 + @return `true` if the byte array was successfully parsed + */ + template + bool get_bson_binary(const NumberType len, internal_binary_t& result) + { + if (JSON_HEDLEY_UNLIKELY(len < 0)) + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "byte array length cannot be negative, is " + std::to_string(len), "binary"))); + } + + result.has_subtype = true; // All BSON binary values have a subtype + get_number(input_format_t::bson, result.subtype); + + return get_binary(input_format_t::bson, len, result); + } + /*! @brief Read a BSON document element of the given @a element_type. @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html @@ -245,6 +270,13 @@ class binary_reader return parse_bson_array(); } + case 0x05: // binary + { + std::int32_t len; + internal_binary_t value; + return get_number(input_format_t::bson, len) and get_bson_binary(len, value) and sax->binary(value); + } + case 0x08: // boolean { return sax->boolean(get() != 0); @@ -291,6 +323,7 @@ class binary_reader bool parse_bson_element_list(const bool is_array) { string_t key; + while (int element_type = get()) { if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::bson, "element list"))) @@ -465,6 +498,41 @@ class binary_reader - static_cast(number)); } + // Binary data (0x00..0x17 bytes follow) + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4A: + case 0x4B: + case 0x4C: + case 0x4D: + case 0x4E: + case 0x4F: + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + case 0x58: // Binary data (one-byte uint8_t for n follows) + case 0x59: // Binary data (two-byte uint16_t for n follow) + case 0x5A: // Binary data (four-byte uint32_t for n follow) + case 0x5B: // Binary data (eight-byte uint64_t for n follow) + case 0x5F: // Binary data (indefinite length) + { + internal_binary_t b; + return get_cbor_binary(b) and sax->binary(b); + } + // UTF-8 string (0x00..0x17 bytes follow) case 0x60: case 0x61: @@ -780,6 +848,101 @@ class binary_reader } } + /*! + @brief reads a CBOR byte array + + This function first reads starting bytes to determine the expected + byte array length and then copies this number of bytes into the byte array. + Additionally, CBOR's byte arrays with indefinite lengths are supported. + + @param[out] result created byte array + + @return whether byte array creation completed + */ + bool get_cbor_binary(internal_binary_t& result) + { + if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::cbor, "binary"))) + { + return false; + } + + switch (current) + { + // Binary data (0x00..0x17 bytes follow) + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4A: + case 0x4B: + case 0x4C: + case 0x4D: + case 0x4E: + case 0x4F: + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + { + return get_binary(input_format_t::cbor, static_cast(current) & 0x1Fu, result); + } + + case 0x58: // Binary data (one-byte uint8_t for n follows) + { + std::uint8_t len; + return get_number(input_format_t::cbor, len) and get_binary(input_format_t::cbor, len, result); + } + + case 0x59: // Binary data (two-byte uint16_t for n follow) + { + std::uint16_t len; + return get_number(input_format_t::cbor, len) and get_binary(input_format_t::cbor, len, result); + } + + case 0x5A: // Binary data (four-byte uint32_t for n follow) + { + std::uint32_t len; + return get_number(input_format_t::cbor, len) and get_binary(input_format_t::cbor, len, result); + } + + case 0x5B: // Binary data (eight-byte uint64_t for n follow) + { + std::uint64_t len; + return get_number(input_format_t::cbor, len) and get_binary(input_format_t::cbor, len, result); + } + + case 0x5F: // Binary data (indefinite length) + { + while (get() != 0xFF) + { + internal_binary_t chunk; + if (not get_cbor_binary(chunk)) + { + return false; + } + result.insert(result.end(), chunk.begin(), chunk.end()); + } + return true; + } + + default: + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x40-0x5B) or indefinite binary array type (0x5F); last byte: 0x" + last_token, "binary"))); + } + } + } + /*! @param[in] len the length of the array or std::size_t(-1) for an array of indefinite size @@ -1100,6 +1263,22 @@ class binary_reader case 0xC3: // true return sax->boolean(true); + case 0xC4: // bin 8 + case 0xC5: // bin 16 + case 0xC6: // bin 32 + case 0xC7: // ext 8 + case 0xC8: // ext 16 + case 0xC9: // ext 32 + case 0xD4: // fixext 1 + case 0xD5: // fixext 2 + case 0xD6: // fixext 4 + case 0xD7: // fixext 8 + case 0xD8: // fixext 16 + { + internal_binary_t b; + return get_msgpack_binary(b) and sax->binary(b); + } + case 0xCA: // float 32 { float number; @@ -1309,6 +1488,108 @@ class binary_reader } } + /*! + @brief reads a MessagePack byte array + + This function first reads starting bytes to determine the expected + byte array length and then copies this number of bytes into a byte array. + + @param[out] result created byte array + + @return whether byte array creation completed + */ + bool get_msgpack_binary(internal_binary_t& result) + { + if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::msgpack, "binary"))) + { + return false; + } + + switch (current) + { + case 0xC4: // bin 8 + { + std::uint8_t len; + return get_number(input_format_t::msgpack, len) and get_binary(input_format_t::msgpack, len, result); + } + + case 0xC5: // bin 16 + { + std::uint16_t len; + return get_number(input_format_t::msgpack, len) and get_binary(input_format_t::msgpack, len, result); + } + + case 0xC6: // bin 32 + { + std::uint32_t len; + return get_number(input_format_t::msgpack, len) and get_binary(input_format_t::msgpack, len, result); + } + + case 0xC7: // ext 8 + { + std::uint8_t len; + result.has_subtype = true; + return get_number(input_format_t::msgpack, len) and + get_number(input_format_t::msgpack, result.subtype) and + get_binary(input_format_t::msgpack, len, result); + } + + case 0xC8: // ext 16 + { + std::uint16_t len; + result.has_subtype = true; + return get_number(input_format_t::msgpack, len) and + get_number(input_format_t::msgpack, result.subtype) and + get_binary(input_format_t::msgpack, len, result); + } + + case 0xC9: // ext 32 + { + std::uint32_t len; + result.has_subtype = true; + return get_number(input_format_t::msgpack, len) and + get_number(input_format_t::msgpack, result.subtype) and + get_binary(input_format_t::msgpack, len, result); + } + + case 0xD4: // fixext 1 + { + result.has_subtype = true; + return get_number(input_format_t::msgpack, result.subtype) and get_binary(input_format_t::msgpack, 1, result); + } + + case 0xD5: // fixext 2 + { + result.has_subtype = true; + return get_number(input_format_t::msgpack, result.subtype) and get_binary(input_format_t::msgpack, 2, result); + } + + case 0xD6: // fixext 4 + { + result.has_subtype = true; + return get_number(input_format_t::msgpack, result.subtype) and get_binary(input_format_t::msgpack, 4, result); + } + + case 0xD7: // fixext 8 + { + result.has_subtype = true; + return get_number(input_format_t::msgpack, result.subtype) and get_binary(input_format_t::msgpack, 8, result); + } + + case 0xD8: // fixext 16 + { + result.has_subtype = true; + return get_number(input_format_t::msgpack, result.subtype) and get_binary(input_format_t::msgpack, 16, result); + } + + default: + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::msgpack, "expected binary type specification (0xC4-0xC9, 0xD4-0xD8); last byte: 0x" + last_token, "binary"))); + } + } + } + /*! @param[in] len the length of the array @return whether array creation completed @@ -1793,6 +2074,9 @@ class binary_reader return sax->end_object(); } + // Note, no reader for UBJSON binary types is implemented because they do + // not exist + /////////////////////// // Utility functions // /////////////////////// @@ -1900,6 +2184,38 @@ class binary_reader return success; } + /*! + @brief create a byte array by reading bytes from the input + + @tparam NumberType the type of the number + @param[in] format the current format (for diagnostics) + @param[in] len number of bytes to read + @param[out] result byte array created by reading @a len bytes + + @return whether byte array creation completed + + @note We can not reserve @a len bytes for the result, because @a len + may be too large. Usually, @ref unexpect_eof() detects the end of + the input before we run out of memory. + */ + template + bool get_binary(const input_format_t format, + const NumberType len, + internal_binary_t& result) + { + bool success = true; + std::generate_n(std::back_inserter(result), len, [this, &success, &format]() + { + get(); + if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(format, "binary"))) + { + success = false; + } + return static_cast(current); + }); + return success; + } + /*! @param[in] format the current format (for diagnostics) @param[in] context further context information (for diagnostics) diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index 606b7862e..47c859df0 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -31,6 +31,7 @@ struct json_sax using number_float_t = typename BasicJsonType::number_float_t; /// type for strings using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; /*! @brief a null value was read @@ -75,6 +76,14 @@ struct json_sax */ virtual bool string(string_t& val) = 0; + /*! + @brief a binary string was read + @param[in] val binary value + @return whether parsing should proceed + @note It is safe to move the passed binary. + */ + virtual bool binary(binary_t& val) = 0; + /*! @brief the beginning of an object was read @param[in] elements number of object elements or -1 if unknown @@ -149,6 +158,7 @@ class json_sax_dom_parser using number_unsigned_t = typename BasicJsonType::number_unsigned_t; using number_float_t = typename BasicJsonType::number_float_t; using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; /*! @param[in, out] r reference to a JSON value that is manipulated while @@ -202,6 +212,12 @@ class json_sax_dom_parser return true; } + bool binary(binary_t& val) + { + handle_binary(val); + return true; + } + bool start_object(std::size_t len) { ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); @@ -311,6 +327,36 @@ class json_sax_dom_parser return object_element; } + /*! + @invariant If the ref stack is empty, then the passed value will be the new + root. + @invariant If the ref stack contains a value, then it is an array or an + object to which we can add elements + */ + template + JSON_HEDLEY_RETURNS_NON_NULL + BasicJsonType* handle_binary(BinaryValue&& v) + { + if (ref_stack.empty()) + { + root = BasicJsonType::binary_array(std::forward(v)); + return &root; + } + + assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); + + if (ref_stack.back()->is_array()) + { + ref_stack.back()->m_value.array->emplace_back(BasicJsonType::binary_array(std::forward(v))); + return &(ref_stack.back()->m_value.array->back()); + } + + assert(ref_stack.back()->is_object()); + assert(object_element); + *object_element = BasicJsonType::binary_array(std::forward(v)); + return object_element; + } + /// the parsed JSON value BasicJsonType& root; /// stack to model hierarchy of values @@ -331,6 +377,7 @@ class json_sax_dom_callback_parser using number_unsigned_t = typename BasicJsonType::number_unsigned_t; using number_float_t = typename BasicJsonType::number_float_t; using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; using parser_callback_t = typename BasicJsonType::parser_callback_t; using parse_event_t = typename BasicJsonType::parse_event_t; @@ -385,6 +432,12 @@ class json_sax_dom_callback_parser return true; } + bool binary(binary_t& val) + { + handle_value(val); + return true; + } + bool start_object(std::size_t len) { // check callback for object start @@ -635,6 +688,7 @@ class json_sax_acceptor using number_unsigned_t = typename BasicJsonType::number_unsigned_t; using number_float_t = typename BasicJsonType::number_float_t; using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; bool null() { @@ -666,7 +720,12 @@ class json_sax_acceptor return true; } - bool start_object(std::size_t /*unused*/ = std::size_t(-1)) + bool binary(binary_t& /*unused*/) + { + return true; + } + + bool start_object(std::size_t /*unused*/ = std::size_t(-1)) { return true; } @@ -681,7 +740,7 @@ class json_sax_acceptor return true; } - bool start_array(std::size_t /*unused*/ = std::size_t(-1)) + bool start_array(std::size_t /*unused*/ = std::size_t(-1)) { return true; } diff --git a/include/nlohmann/detail/iterators/internal_iterator.hpp b/include/nlohmann/detail/iterators/internal_iterator.hpp index 2c81f723f..71539a6ac 100644 --- a/include/nlohmann/detail/iterators/internal_iterator.hpp +++ b/include/nlohmann/detail/iterators/internal_iterator.hpp @@ -18,6 +18,8 @@ template struct internal_iterator typename BasicJsonType::object_t::iterator object_iterator {}; /// iterator for JSON arrays typename BasicJsonType::array_t::iterator array_iterator {}; + /// iterator for JSON binary arrays + typename BasicJsonType::binary_t::iterator binary_iterator {}; /// generic iterator for all other types primitive_iterator_t primitive_iterator {}; }; diff --git a/include/nlohmann/detail/macro_scope.hpp b/include/nlohmann/detail/macro_scope.hpp index 27dddc6be..25e6b76eb 100644 --- a/include/nlohmann/detail/macro_scope.hpp +++ b/include/nlohmann/detail/macro_scope.hpp @@ -113,9 +113,10 @@ class StringType, class BooleanType, class NumberIntegerType, \ class NumberUnsignedType, class NumberFloatType, \ template class AllocatorType, \ - template class JSONSerializer> + template class JSONSerializer, \ + class BinaryType> #define NLOHMANN_BASIC_JSON_TPL \ basic_json + AllocatorType, JSONSerializer, BinaryType> diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index 0a80d26b0..a899d0fe6 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -26,6 +26,7 @@ template class binary_writer { using string_t = typename BasicJsonType::string_t; + using internal_binary_t = typename BasicJsonType::internal_binary_t; public: /*! @@ -258,6 +259,45 @@ class binary_writer break; } + case value_t::binary: + { + // step 1: write control byte and the binary array size + const auto N = j.m_value.binary->size(); + if (N <= 0x17) + { + write_number(static_cast(0x40 + N)); + } + else if (N <= (std::numeric_limits::max)()) + { + oa->write_character(to_char_type(0x58)); + write_number(static_cast(N)); + } + else if (N <= (std::numeric_limits::max)()) + { + oa->write_character(to_char_type(0x59)); + write_number(static_cast(N)); + } + else if (N <= (std::numeric_limits::max)()) + { + oa->write_character(to_char_type(0x5A)); + write_number(static_cast(N)); + } + // LCOV_EXCL_START + else if (N <= (std::numeric_limits::max)()) + { + oa->write_character(to_char_type(0x5B)); + write_number(static_cast(N)); + } + // LCOV_EXCL_STOP + + // step 2: write each element + oa->write_characters( + reinterpret_cast(j.m_value.binary->data()), + N); + + break; + } + case value_t::object: { // step 1: write control byte and the object size @@ -506,6 +546,101 @@ class binary_writer break; } + case value_t::binary: + { + // step 0: determine if the binary type has a set subtype to + // determine whether or not to use the ext or fixext types + const bool use_ext = j.m_value.binary->has_subtype; + + // step 1: write control byte and the byte string length + const auto N = j.m_value.binary->size(); + if (N <= (std::numeric_limits::max)()) + { + std::uint8_t output_type; + bool fixed = true; + if (use_ext) + { + switch (N) + { + case 1: + output_type = 0xD4; // fixext 1 + break; + case 2: + output_type = 0xD5; // fixext 2 + break; + case 4: + output_type = 0xD6; // fixext 4 + break; + case 8: + output_type = 0xD7; // fixext 8 + break; + case 16: + output_type = 0xD8; // fixext 16 + break; + default: + output_type = 0xC7; // ext 8 + fixed = false; + break; + } + + } + else + { + output_type = 0xC4; // bin 8 + fixed = false; + } + + oa->write_character(to_char_type(output_type)); + if (not fixed) + { + write_number(static_cast(N)); + } + } + else if (N <= (std::numeric_limits::max)()) + { + std::uint8_t output_type; + if (use_ext) + { + output_type = 0xC8; // ext 16 + } + else + { + output_type = 0xC5; // bin 16 + } + + oa->write_character(to_char_type(output_type)); + write_number(static_cast(N)); + } + else if (N <= (std::numeric_limits::max)()) + { + std::uint8_t output_type; + if (use_ext) + { + output_type = 0xC9; // ext 32 + } + else + { + output_type = 0xC6; // bin 32 + } + + oa->write_character(to_char_type(output_type)); + write_number(static_cast(N)); + } + + // step 1.5: if this is an ext type, write the subtype + if (use_ext) + { + write_number(j.m_value.binary->subtype); + } + + // step 2: write the byte string + oa->write_characters( + reinterpret_cast(j.m_value.binary->data()), + N); + + break; + } + case value_t::object: { // step 1: write control byte and the object size @@ -649,6 +784,49 @@ class binary_writer break; } + case value_t::binary: + { + if (add_prefix) + { + oa->write_character(to_char_type('[')); + } + + if (use_type and not j.m_value.binary->empty()) + { + assert(use_count); + oa->write_character(to_char_type('$')); + oa->write_character('U'); + } + + if (use_count) + { + oa->write_character(to_char_type('#')); + write_number_with_ubjson_prefix(j.m_value.binary->size(), true); + } + + if (use_type) + { + oa->write_characters( + reinterpret_cast(j.m_value.binary->data()), + j.m_value.binary->size()); + } + else + { + for (size_t i = 0; i < j.m_value.binary->size(); ++i) + { + oa->write_character(to_char_type('U')); + oa->write_character(j.m_value.binary->data()[i]); + } + } + + if (not use_count) + { + oa->write_character(to_char_type(']')); + } + + break; + } + case value_t::object: { if (add_prefix) @@ -871,6 +1049,14 @@ class binary_writer return sizeof(std::int32_t) + embedded_document_size + 1ul; } + /*! + @return The size of the BSON-encoded binary array @a value + */ + static std::size_t calc_bson_binary_size(const typename BasicJsonType::internal_binary_t& value) + { + return sizeof(std::int32_t) + value.size() + 1ul; + } + /*! @brief Writes a BSON element with key @a name and array @a value */ @@ -890,6 +1076,27 @@ class binary_writer oa->write_character(to_char_type(0x00)); } + /*! + @brief Writes a BSON element with key @a name and binary value @a value + */ + void write_bson_binary(const string_t& name, + const internal_binary_t& value) + { + write_bson_entry_header(name, 0x05); + + write_number(static_cast(value.size())); + std::uint8_t subtype = 0x00; // Generic Binary Subtype + if (value.has_subtype) + { + subtype = value.subtype; + } + write_number(subtype); + + oa->write_characters( + reinterpret_cast(value.data()), + value.size()); + } + /*! @brief Calculates the size necessary to serialize the JSON value @a j with its @a name @return The calculated size for the BSON document entry for @a j with the given @a name. @@ -906,6 +1113,9 @@ class binary_writer case value_t::array: return header_size + calc_bson_array_size(*j.m_value.array); + case value_t::binary: + return header_size + calc_bson_binary_size(*j.m_value.binary); + case value_t::boolean: return header_size + 1ul; @@ -950,6 +1160,9 @@ class binary_writer case value_t::array: return write_bson_array(name, *j.m_value.array); + case value_t::binary: + return write_bson_binary(name, *j.m_value.binary); + case value_t::boolean: return write_bson_boolean(name, j.m_value.boolean); @@ -1230,7 +1443,8 @@ class binary_writer case value_t::string: return 'S'; - case value_t::array: + case value_t::array: // fallthrough + case value_t::binary: return '['; case value_t::object: diff --git a/include/nlohmann/detail/output/serializer.hpp b/include/nlohmann/detail/output/serializer.hpp index 0a4ddbdaa..169012e0e 100644 --- a/include/nlohmann/detail/output/serializer.hpp +++ b/include/nlohmann/detail/output/serializer.hpp @@ -45,6 +45,7 @@ class serializer using number_float_t = typename BasicJsonType::number_float_t; using number_integer_t = typename BasicJsonType::number_integer_t; using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using binary_t = typename BasicJsonType::binary_t; static constexpr std::uint8_t UTF8_ACCEPT = 0; static constexpr std::uint8_t UTF8_REJECT = 1; @@ -83,16 +84,19 @@ class serializer - strings and object keys are escaped using `escape_string()` - integer numbers are converted implicitly via `operator<<` - floating-point numbers are converted to a string using `"%g"` format + - if specified to, binary values are output using the syntax `b[]`, otherwise an exception is thrown - @param[in] val value to serialize - @param[in] pretty_print whether the output shall be pretty-printed - @param[in] indent_step the indent level - @param[in] current_indent the current indent level (only used internally) + @param[in] val value to serialize + @param[in] pretty_print whether the output shall be pretty-printed + @param[in] indent_step the indent level + @param[in] current_indent the current indent level (only used internally) + @param[in] serialize_binary whether the output shall include non-standard binary output */ void dump(const BasicJsonType& val, const bool pretty_print, const bool ensure_ascii, const unsigned int indent_step, - const unsigned int current_indent = 0) + const unsigned int current_indent = 0, + const bool serialize_binary = false) { switch (val.m_type) { @@ -236,6 +240,55 @@ class serializer return; } + case value_t::binary: + { + if (not serialize_binary) + { + JSON_THROW(type_error::create(317, "cannot serialize binary data to text JSON")); + } + + if (val.m_value.binary->empty()) + { + o->write_characters("b[]", 3); + } + else if (pretty_print) + { + o->write_characters("b[", 2); + for (auto i = val.m_value.binary->cbegin(); + i != val.m_value.binary->cend() - 1; ++i) + { + dump_integer(*i); + o->write_character(','); + int index = i - val.m_value.binary->cbegin(); + if (index % 16 == 0) + { + o->write_character('\n'); + } + else + { + o->write_character(' '); + } + } + + dump_integer(val.m_value.binary->back()); + o->write_character(']'); + } + else + { + o->write_characters("b[", 2); + for (auto i = val.m_value.binary->cbegin(); + i != val.m_value.binary->cend() - 1; ++i) + { + dump_integer(*i); + o->write_character(','); + } + + dump_integer(val.m_value.binary->back()); + o->write_character(']'); + } + return; + } + case value_t::boolean: { if (val.m_value.boolean) @@ -592,7 +645,8 @@ class serializer */ template::value or - std::is_same::value, + std::is_same::value or + std::is_same::value, int> = 0> void dump_integer(NumberType x) { @@ -630,7 +684,7 @@ class serializer if (is_negative) { *buffer_ptr = '-'; - abs_value = remove_sign(x); + abs_value = remove_sign(static_cast(x)); // account one more byte for the minus sign n_chars = 1 + count_digits(abs_value); diff --git a/include/nlohmann/detail/value_t.hpp b/include/nlohmann/detail/value_t.hpp index 301fbe003..86b4630b9 100644 --- a/include/nlohmann/detail/value_t.hpp +++ b/include/nlohmann/detail/value_t.hpp @@ -48,6 +48,7 @@ enum class value_t : std::uint8_t number_integer, ///< number value (signed integer) number_unsigned, ///< number value (unsigned integer) number_float, ///< number value (floating-point) + binary, ///< binary array (ordered collection of bytes) discarded ///< discarded by the parser callback function }; @@ -55,17 +56,21 @@ enum class value_t : std::uint8_t @brief comparison operator for JSON types Returns an ordering that is similar to Python: -- order: null < boolean < number < object < array < string +- order: null < boolean < number < object < array < string < binary - furthermore, each type is not smaller than itself - discarded values are not comparable +- binary is represented as a b"" string in python and directly comparable to a + string; however, making a binary array directly comparable with a string would + be surprising behavior in a JSON file. @since version 1.0.0 */ inline bool operator<(const value_t lhs, const value_t rhs) noexcept { - static constexpr std::array order = {{ + static constexpr std::array order = {{ 0 /* null */, 3 /* object */, 4 /* array */, 5 /* string */, - 1 /* boolean */, 2 /* integer */, 2 /* unsigned */, 2 /* float */ + 1 /* boolean */, 2 /* integer */, 2 /* unsigned */, 2 /* float */, + 6 /* binary */ } }; diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index bdffe109d..88b82efb8 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -97,6 +97,9 @@ default; will be used in @ref number_integer_t) `uint64_t` by default; will be used in @ref number_unsigned_t) @tparam NumberFloatType type for JSON floating-point numbers (`double` by default; will be used in @ref number_float_t) +@tparam BinaryType type for packed binary data for compatibility with binary +serialization formats (`std::vector` by default; will be used in +@ref binary_t) @tparam AllocatorType type of the allocator to use (`std::allocator` by default) @tparam JSONSerializer the serializer to resolve internal calls to `to_json()` @@ -818,6 +821,85 @@ class basic_json */ using number_float_t = NumberFloatType; + /*! + @brief a type for a packed binary type + + This type is a type designed to carry binary data that appears in various + serialized formats, such as CBOR's Major Type 2, MessagePack's bin, and + BSON's generic binary subtype. This type is NOT a part of standard JSON and + exists solely for compatibility with these binary types. As such, it is + simply defined as an ordered sequence of zero or more byte values. + + Additionally, as an implementation detail, the subtype of the binary data is + carried around as a `unint8_t`, which is compatible with both of the binary + data formats that use binary subtyping, (though the specific numbering is + incompatible with each other, and it is up to the user to translate between + them). + + [CBOR's RFC 7049](https://tools.ietf.org/html/rfc7049) describes this type + as: + > Major type 2: a byte string. The string's length in bytes is + > represented following the rules for positive integers (major type + > 0). + + [MessagePack's documentation on the bin type + family](https://github.com/msgpack/msgpack/blob/master/spec.md#bin-format-family) + describes this type as: + > Bin format family stores an byte array in 2, 3, or 5 bytes of extra bytes + > in addition to the size of the byte array. + + [BSON's specifications](http://bsonspec.org/spec.html) describe several + binary types; however, this type is intended to represent the generic binary + type which has the description: + > Generic binary subtype - This is the most commonly used binary subtype and + > should be the 'default' for drivers and tools. + + None of these impose any limitations on the internal representation other + than the basic unit of storage be some type of array whose parts are + decomposible into bytes. + + The default representation of this binary format is a + `std::vector`, which is a very common way to represent a byte + array in modern C++. + + #### Default type + + The default values for @a BinaryType is `std::vector` + + #### Storage + + Binary Arrays are stored as pointers in a @ref basic_json type. That is, + for any access to array values, a pointer of the type `binary_t*` must be + dereferenced. + + @sa @ref array_t -- type for an array value + + @since version 3.8.0 + */ + + using binary_t = BinaryType; + + /*! + @brief an internal type for a backed binary type + + This type is designed to be `binary_t` but with the subtype implementation + detail. This type exists so that the user does not have to specify a struct + his- or herself with a specific naming scheme in order to override the + binary type. I.e. it's for ease of use. + */ + struct internal_binary_t : public BinaryType + { + using BinaryType::BinaryType; + internal_binary_t() : BinaryType() {} + internal_binary_t(BinaryType const& bint) : BinaryType(bint) {} + internal_binary_t(BinaryType&& bint) : BinaryType(std::move(bint)) {} + + // TOOD: If minimum C++ version is ever bumped to C++17, this field + // deserves to be a std::optional + std::uint8_t subtype = 0; + bool has_subtype = false; + }; + /// @} private: @@ -860,6 +942,7 @@ class basic_json number | number_integer | @ref number_integer_t number | number_unsigned | @ref number_unsigned_t number | number_float | @ref number_float_t + binary | binary | pointer to @ref internal_binary_t null | null | *no value is stored* @note Variable-length types (objects, arrays, and strings) are stored as @@ -876,6 +959,8 @@ class basic_json array_t* array; /// string (stored with pointer to save storage) string_t* string; + /// binary (stored with pointer to save storage) + internal_binary_t* binary; /// boolean boolean_t boolean; /// number (integer) @@ -918,6 +1003,12 @@ class basic_json break; } + case value_t::binary: + { + binary = create(); + break; + } + case value_t::boolean: { boolean = boolean_t(false); @@ -996,6 +1087,18 @@ class basic_json array = create(std::move(value)); } + /// constructor for binary arrays + json_value(const binary_t& value) + { + binary = create(value); + } + + /// constructor for rvalue binary arrays + json_value(binary_t&& value) + { + binary = create(std::move(value)); + } + void destroy(value_t t) noexcept { // flatten the current json_value to a heap-allocated stack @@ -1071,6 +1174,14 @@ class basic_json break; } + case value_t::binary: + { + AllocatorType alloc; + std::allocator_traits::destroy(alloc, binary); + std::allocator_traits::deallocate(alloc, binary, 1); + break; + } + default: { break; @@ -1191,6 +1302,7 @@ class basic_json number | `0` object | `{}` array | `[]` + binary | empty array @param[in] v the type of the value to create @@ -1262,6 +1374,12 @@ class basic_json @ref number_float_t, and all convertible number types such as `int`, `size_t`, `int64_t`, `float` or `double` can be used. - **boolean**: @ref boolean_t / `bool` can be used. + - **binary**: @ref binary_t / `std::vector` may be used, + unfortunately because string literals cannot be distinguished from binary + character arrays by the C++ type system, all types compatible with `const + char*` will be directed to the string constructor instead. This is both + for backwards compatibility, and due to the fact that a binary type is not + a standard JSON type. See the examples below. @@ -1343,6 +1461,7 @@ class basic_json using other_string_t = typename BasicJsonType::string_t; using other_object_t = typename BasicJsonType::object_t; using other_array_t = typename BasicJsonType::array_t; + using other_binary_t = typename BasicJsonType::internal_binary_t; switch (val.type()) { @@ -1367,6 +1486,9 @@ class basic_json case value_t::array: JSONSerializer::to_json(*this, val.template get_ref()); break; + case value_t::binary: + JSONSerializer::to_json(*this, val.template get_ref()); + break; case value_t::null: *this = nullptr; break; @@ -1505,6 +1627,78 @@ class basic_json assert_invariant(); } + /*! + @brief explicitly create a binary array from an already constructed copy of + its base type + + Creates a JSON binary array value from a given `binary_t`. Binary values are + part of various binary formats, such as CBOR, MsgPack, and BSON. And this + constructor is used to create a value for serialization to those formats. + + @note Note, this function exists because of the difficulty in correctly + specifying the correct template overload in the standard value ctor, as both + JSON arrays and JSON binary arrays are backed with some form of a + `std::vector`. Because JSON binary arrays are a non-standard extension it + was decided that it would be best to prevent automatic initialization of a + binary array type, for backwards compatibility and so it does not happen on + accident. + + @param[in] init `binary_t` with JSON values to create a binary array from + + @return JSON binary array value + + @complexity Linear in the size of @a init. + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes to any JSON value. + + @since version 3.8.0 + */ + JSON_HEDLEY_WARN_UNUSED_RESULT + static basic_json binary_array(binary_t const& init) + { + auto res = basic_json(); + res.m_type = value_t::binary; + res.m_value = init; + return res; + } + + /*! + @brief explicitly create a binary array from an already constructed rvalue + copy of its base type + + Creates a JSON binary array value from a given `binary_t`. Binary values are + part of various binary formats, such as CBOR, MsgPack, and BSON. And this + constructor is used to create a value for serialization to those formats. + + @note Note, this function exists because of the difficulty in correctly + specifying the correct template overload in the standard value ctor, as both + JSON arrays and JSON binary arrays are backed with some form of a + `std::vector`. Because JSON binary arrays are a non-standard extension it + was decided that it would be best to prevent automatic initialization of a + binary array type, for backwards compatibility and so it doesn't happen on + accident. + + @param[in] init `binary_t` with JSON values to create a binary array from + + @return JSON binary array value + + @complexity Linear in the size of @a init. + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes to any JSON value. + + @since version 3.8.0 + */ + JSON_HEDLEY_WARN_UNUSED_RESULT + static basic_json binary_array(binary_t&& init) + { + auto res = basic_json(); + res.m_type = value_t::binary; + res.m_value = std::move(init); + return res; + } + /*! @brief explicitly create an array from an initializer list @@ -1760,6 +1954,13 @@ class basic_json break; } + case value_t::binary: + { + m_value.binary = create(first.m_it.binary_iterator, + last.m_it.binary_iterator); + break; + } + default: JSON_THROW(invalid_iterator::create(206, "cannot construct with iterators from " + std::string(first.m_object->type_name()))); @@ -1853,6 +2054,12 @@ class basic_json break; } + case value_t::binary: + { + m_value = *other.m_value.binary; + break; + } + default: break; } @@ -1993,6 +2200,11 @@ class basic_json possible values: `strict` (throws and exception in case a decoding error occurs; default), `replace` (replace invalid UTF-8 sequences with U+FFFD), and `ignore` (ignore invalid UTF-8 sequences during serialization). + @param[in] serialize_binary Whether or not to allow serialization of binary + types to JSON. Because binary types are non-standard, this will produce + non-conformant JSON, and is disabled by default. This flag is primarily + useful for debugging. Will output the binary value as a list of 8-bit + numbers prefixed by "b" (e.g. "bindata" = b[3, 0, 42, 255]). @return string containing the serialization of the JSON value @@ -2017,18 +2229,19 @@ class basic_json string_t dump(const int indent = -1, const char indent_char = ' ', const bool ensure_ascii = false, - const error_handler_t error_handler = error_handler_t::strict) const + const error_handler_t error_handler = error_handler_t::strict, + const bool serialize_binary = false) const { string_t result; serializer s(detail::output_adapter(result), indent_char, error_handler); if (indent >= 0) { - s.dump(*this, true, ensure_ascii, static_cast(indent)); + s.dump(*this, true, ensure_ascii, static_cast(indent), serialize_binary); } else { - s.dump(*this, false, ensure_ascii, 0); + s.dump(*this, false, ensure_ascii, 0, serialize_binary); } return result; @@ -2051,6 +2264,7 @@ class basic_json number (floating-point) | value_t::number_float object | value_t::object array | value_t::array + binary | value_t::binary discarded | value_t::discarded @complexity Constant. @@ -2093,12 +2307,13 @@ class basic_json @sa @ref is_string() -- returns whether JSON value is a string @sa @ref is_boolean() -- returns whether JSON value is a boolean @sa @ref is_number() -- returns whether JSON value is a number + @sa @ref is_binary() -- returns whether JSON value is a binary array @since version 1.0.0 */ constexpr bool is_primitive() const noexcept { - return is_null() or is_string() or is_boolean() or is_number(); + return is_null() or is_string() or is_boolean() or is_number() or is_binary(); } /*! @@ -2353,6 +2568,28 @@ class basic_json return m_type == value_t::string; } + /*! + @brief return whether value is a binary array + + This function returns true if and only if the JSON value is a binary array. + + @return `true` if type is binary array, `false` otherwise. + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this member function never throws + exceptions. + + @liveexample{The following code exemplifies `is_binary()` for all JSON + types.,is_binary} + + @since version 3.8.0 + */ + constexpr bool is_binary() const noexcept + { + return m_type == value_t::binary; + } + /*! @brief return whether value is discarded @@ -2508,6 +2745,18 @@ class basic_json return is_number_float() ? &m_value.number_float : nullptr; } + /// get a pointer to the value (binary) + binary_t* get_impl_ptr(binary_t* /*unused*/) noexcept + { + return is_binary() ? m_value.binary : nullptr; + } + + /// get a pointer to the value (binary) + constexpr const binary_t* get_impl_ptr(const binary_t* /*unused*/) const noexcept + { + return is_binary() ? m_value.binary : nullptr; + } + /*! @brief helper function to implement get_ref() @@ -3555,6 +3804,120 @@ class basic_json return value(ptr, string_t(default_value)); } + /*! + @brief return the binary subtype + + Returns the numerical subtype of the JSON value, if the JSON value is of + type "binary", and it has a subtype. If it does not have a subtype (or the + object is not of type binary) this function will return size_t(-1) as a + sentinel value. + + @return the numerical subtype of the binary JSON value + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this member function never throws + exceptions. + + @sa @ref set_subtype() -- sets the binary subtype + @sa @ref clear_subtype() -- clears the binary subtype + @sa @ref has_subtype() -- returns whether or not the binary value has a + subtype + + @since version 3.8.0 + */ + std::size_t get_subtype() const noexcept + { + if (is_binary() and m_value.binary->has_subtype) + { + return m_value.binary->subtype; + } + + return std::size_t(-1); + } + + /*! + @brief sets the binary subtype + + Sets the binary subtype of the JSON value, also flags a binary JSON value as + having a subtype, which has implications for serialization to msgpack (will + prefer ext file formats over bin). If the JSON value is not a binary value, + this function does nothing. + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this member function never throws + exceptions. + + @sa @ref get_subtype() -- return the binary subtype + @sa @ref clear_subtype() -- clears the binary subtype + @sa @ref has_subtype() -- returns whether or not the binary value has a + subtype + + @since version 3.8.0 + */ + + void set_subtype(std::uint8_t subtype) noexcept + { + if (is_binary()) + { + m_value.binary->has_subtype = true; + m_value.binary->subtype = subtype; + } + } + + /*! + @brief clears the binary subtype + + Clears the binary subtype of the JSON value, also flags a binary JSON value + as not having a subtype, which has implications for serialization to msgpack + (will prefer bin file formats over ext). If the JSON value is not a binary + value, this function does nothing. + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this member function never throws + exceptions. + + @sa @ref get_subtype() -- return the binary subtype + @sa @ref set_subtype() -- sets the binary subtype + @sa @ref has_subtype() -- returns whether or not the binary value has a + subtype + + @since version 3.8.0 + */ + void clear_subtype() noexcept + { + if (is_binary()) + { + m_value.binary->has_subtype = false; + m_value.binary->subtype = 0; + } + } + + /*! + @brief return whether or not the binary subtype has a value + + Returns whether or not the binary subtype has a value. + + @return whether or not the binary subtype has a value. + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this member function never throws + exceptions. + + @sa @ref get_subtype() -- return the binary subtype + @sa @ref set_subtype() -- sets the binary subtype + @sa @ref clear_subtype() -- clears the binary subtype + + @since version 3.8.0 + */ + bool has_subtype() const noexcept + { + return is_binary() and m_value.binary->has_subtype; + } + /*! @brief access the first element @@ -3562,8 +3925,8 @@ class basic_json container `c`, the expression `c.front()` is equivalent to `*c.begin()`. @return In case of a structured type (array or object), a reference to the - first element is returned. In case of number, string, or boolean values, a - reference to the value is returned. + first element is returned. In case of number, string, boolean, or binary + values, a reference to the value is returned. @complexity Constant. @@ -3605,8 +3968,8 @@ class basic_json @endcode @return In case of a structured type (array or object), a reference to the - last element is returned. In case of number, string, or boolean values, a - reference to the value is returned. + last element is returned. In case of number, string, boolean, or binary + values, a reference to the value is returned. @complexity Constant. @@ -3672,7 +4035,7 @@ class basic_json @complexity The complexity depends on the type: - objects: amortized constant - arrays: linear in distance between @a pos and the end of the container - - strings: linear in the length of the string + - strings and binary: linear in the length of the member - other types: constant @liveexample{The example shows the result of `erase()` for different JSON @@ -3708,6 +4071,7 @@ class basic_json case value_t::number_integer: case value_t::number_unsigned: case value_t::string: + case value_t::binary: { if (JSON_HEDLEY_UNLIKELY(not pos.m_it.primitive_iterator.is_begin())) { @@ -3721,6 +4085,13 @@ class basic_json std::allocator_traits::deallocate(alloc, m_value.string, 1); m_value.string = nullptr; } + else if (is_binary()) + { + AllocatorType alloc; + std::allocator_traits::destroy(alloc, m_value.binary); + std::allocator_traits::deallocate(alloc, m_value.binary, 1); + m_value.binary = nullptr; + } m_type = value_t::null; assert_invariant(); @@ -3778,7 +4149,7 @@ class basic_json - objects: `log(size()) + std::distance(first, last)` - arrays: linear in the distance between @a first and @a last, plus linear in the distance between @a last and end of the container - - strings: linear in the length of the string + - strings and binary: linear in the length of the member - other types: constant @liveexample{The example shows the result of `erase()` for different JSON @@ -3813,6 +4184,7 @@ class basic_json case value_t::number_integer: case value_t::number_unsigned: case value_t::string: + case value_t::binary: { if (JSON_HEDLEY_LIKELY(not first.m_it.primitive_iterator.is_begin() or not last.m_it.primitive_iterator.is_end())) @@ -3827,6 +4199,13 @@ class basic_json std::allocator_traits::deallocate(alloc, m_value.string, 1); m_value.string = nullptr; } + else if (is_binary()) + { + AllocatorType alloc; + std::allocator_traits::destroy(alloc, m_value.binary); + std::allocator_traits::deallocate(alloc, m_value.binary, 1); + m_value.binary = nullptr; + } m_type = value_t::null; assert_invariant(); @@ -4546,6 +4925,7 @@ class basic_json boolean | `false` string | `false` number | `false` + binary | `false` object | result of function `object_t::empty()` array | result of function `array_t::empty()` @@ -4617,6 +4997,7 @@ class basic_json boolean | `1` string | `1` number | `1` + binary | `1` object | result of function object_t::size() array | result of function array_t::size() @@ -4691,6 +5072,7 @@ class basic_json boolean | `1` (same as `size()`) string | `1` (same as `size()`) number | `1` (same as `size()`) + binary | `1` (same as `size()`) object | result of function `object_t::max_size()` array | result of function `array_t::max_size()` @@ -4763,6 +5145,7 @@ class basic_json boolean | `false` string | `""` number | `0` + binary | An empty byte vector object | `{}` array | `[]` @@ -4820,6 +5203,12 @@ class basic_json break; } + case value_t::binary: + { + m_value.binary->clear(); + break; + } + case value_t::array: { m_value.array->clear(); @@ -5696,6 +6085,9 @@ class basic_json case value_t::number_float: return lhs.m_value.number_float == rhs.m_value.number_float; + case value_t::binary: + return *lhs.m_value.binary == *rhs.m_value.binary; + default: return false; } @@ -5856,6 +6248,9 @@ class basic_json case value_t::number_float: return (lhs.m_value.number_float) < (rhs.m_value.number_float); + case value_t::binary: + return (lhs.m_value.binary) < (rhs.m_value.binary); + default: return false; } @@ -6420,6 +6815,7 @@ class basic_json number | `"number"` (for all number types) object | `"object"` array | `"array"` + binary | `"binary"` discarded | `"discarded"` @exceptionsafety No-throw guarantee: this function never throws exceptions. @@ -6451,6 +6847,8 @@ class basic_json return "string"; case value_t::boolean: return "boolean"; + case value_t::binary: + return "binary"; case value_t::discarded: return "discarded"; default: @@ -6526,6 +6924,11 @@ class basic_json object | *size*: 256..65535 | map (2 bytes follow) | 0xB9 object | *size*: 65536..4294967295 | map (4 bytes follow) | 0xBA object | *size*: 4294967296..18446744073709551615 | map (8 bytes follow) | 0xBB + binary | *size*: 0..23 | byte string | 0x40..0x57 + binary | *size*: 23..255 | byte string (1 byte follow) | 0x58 + binary | *size*: 256..65535 | byte string (2 bytes follow) | 0x59 + binary | *size*: 65536..4294967295 | byte string (4 bytes follow) | 0x5A + binary | *size*: 4294967296..18446744073709551615 | byte string (8 bytes follow) | 0x5B @note The mapping is **complete** in the sense that any JSON value type can be converted to a CBOR value. @@ -6535,10 +6938,10 @@ class basic_json function which serializes NaN or Infinity to `null`. @note The following CBOR types are not used in the conversion: - - byte strings (0x40..0x5F) - UTF-8 strings terminated by "break" (0x7F) - arrays terminated by "break" (0x9F) - maps terminated by "break" (0xBF) + - byte strings terminated by "break" (0x5F) - date/time (0xC0..0xC1) - bignum (0xC2..0xC3) - decimal fraction (0xC4) @@ -6625,20 +7028,21 @@ class basic_json object | *size*: 0..15 | fix map | 0x80..0x8F object | *size*: 16..65535 | map 16 | 0xDE object | *size*: 65536..4294967295 | map 32 | 0xDF + binary | *size*: 0..255 | bin 8 | 0xC4 + binary | *size*: 256..65535 | bin 16 | 0xC5 + binary | *size*: 65536..4294967295 | bin 32 | 0xC6 @note The mapping is **complete** in the sense that any JSON value type can be converted to a MessagePack value. @note The following values can **not** be converted to a MessagePack value: - strings with more than 4294967295 bytes + - byte strings with more than 4294967295 bytes - arrays with more than 4294967295 elements - objects with more than 4294967295 elements @note The following MessagePack types are not used in the conversion: - - bin 8 - bin 32 (0xC4..0xC6) - - ext 8 - ext 32 (0xC7..0xC9) - float 32 (0xCA) - - fixext 1 - fixext 16 (0xD4..0xD8) @note Any MessagePack output created @ref to_msgpack can be successfully parsed by @ref from_msgpack. @@ -6741,6 +7145,12 @@ class basic_json the benefit of this parameter is that the receiving side is immediately informed on the number of elements of the container. + @note If the JSON data contains the binary type, the value stored is a list + of integers, as suggested by the UBJSON documentation. In particular, + this means that serialization and the deserialization of a JSON + containing binary values into UBJSON and back will result in a + different JSON object. + @param[in] j JSON value to serialize @param[in] use_size whether to add size annotations to container types @param[in] use_type whether to add type annotations to container types @@ -6805,6 +7215,7 @@ class basic_json string | *any value* | string | 0x02 array | *any value* | document | 0x04 object | *any value* | document | 0x03 + binary | *any value* | binary | 0x05 @warning The mapping is **incomplete**, since only JSON-objects (and things contained therein) can be serialized to BSON. @@ -6886,7 +7297,11 @@ class basic_json Negative integer | number_integer | 0x39 Negative integer | number_integer | 0x3A Negative integer | number_integer | 0x3B - Negative integer | number_integer | 0x40..0x57 + Byte string | binary | 0x40..0x57 + Byte string | binary | 0x58 + Byte string | binary | 0x59 + Byte string | binary | 0x5A + Byte string | binary | 0x5B UTF-8 string | string | 0x60..0x77 UTF-8 string | string | 0x78 UTF-8 string | string | 0x79 @@ -6915,7 +7330,6 @@ class basic_json @warning The mapping is **incomplete** in the sense that not all CBOR types can be converted to a JSON value. The following CBOR types are not supported and will yield parse errors (parse_error.112): - - byte strings (0x40..0x5F) - date/time (0xC0..0xC1) - bignum (0xC2..0xC3) - decimal fraction (0xC4) @@ -7026,15 +7440,19 @@ class basic_json array 32 | array | 0xDD map 16 | object | 0xDE map 32 | object | 0xDF + bin 8 | binary | 0xC4 + bin 16 | binary | 0xC5 + bin 32 | binary | 0xC6 + ext 8 | binary | 0xC7 + ext 16 | binary | 0xC8 + ext 32 | binary | 0xC9 + fixext 1 | binary | 0xD4 + fixext 2 | binary | 0xD5 + fixext 4 | binary | 0xD6 + fixext 8 | binary | 0xD7 + fixext 16 | binary | 0xD8 negative fixint | number_integer | 0xE0-0xFF - @warning The mapping is **incomplete** in the sense that not all - MessagePack types can be converted to a JSON value. The following - MessagePack types are not supported and will yield parse errors: - - bin 8 - bin 32 (0xC4..0xC6) - - ext 8 - ext 32 (0xC7..0xC9) - - fixext 1 - fixext 16 (0xD4..0xD8) - @note Any MessagePack output created @ref to_msgpack can be successfully parsed by @ref from_msgpack. diff --git a/include/nlohmann/json_fwd.hpp b/include/nlohmann/json_fwd.hpp index 28fd10d45..824e86a1f 100644 --- a/include/nlohmann/json_fwd.hpp +++ b/include/nlohmann/json_fwd.hpp @@ -33,7 +33,8 @@ template class ObjectType = class NumberFloatType = double, template class AllocatorType = std::allocator, template class JSONSerializer = - adl_serializer> + adl_serializer, + class BinaryType = std::vector> class basic_json; /*! diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index de146ed61..19ab76f58 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -2133,12 +2133,13 @@ JSON_HEDLEY_DIAGNOSTIC_POP class StringType, class BooleanType, class NumberIntegerType, \ class NumberUnsignedType, class NumberFloatType, \ template class AllocatorType, \ - template class JSONSerializer> + template class JSONSerializer, \ + class BinaryType> #define NLOHMANN_BASIC_JSON_TPL \ basic_json + AllocatorType, JSONSerializer, BinaryType> namespace nlohmann @@ -2733,7 +2734,8 @@ template class ObjectType = class NumberFloatType = double, template class AllocatorType = std::allocator, template class JSONSerializer = - adl_serializer> + adl_serializer, + class BinaryType = std::vector> class basic_json; /*! @@ -3177,6 +3179,7 @@ enum class value_t : std::uint8_t number_integer, ///< number value (signed integer) number_unsigned, ///< number value (unsigned integer) number_float, ///< number value (floating-point) + binary, ///< binary array (ordered collection of bytes) discarded ///< discarded by the parser callback function }; @@ -3184,17 +3187,21 @@ enum class value_t : std::uint8_t @brief comparison operator for JSON types Returns an ordering that is similar to Python: -- order: null < boolean < number < object < array < string +- order: null < boolean < number < object < array < string < binary - furthermore, each type is not smaller than itself - discarded values are not comparable +- binary is represented as a b"" string in python and directly comparable to a + string; however, making a binary array directly comparable with a string would + be surprising behavior in a JSON file. @since version 1.0.0 */ inline bool operator<(const value_t lhs, const value_t rhs) noexcept { - static constexpr std::array order = {{ + static constexpr std::array order = {{ 0 /* null */, 3 /* object */, 4 /* array */, 5 /* string */, - 1 /* boolean */, 2 /* integer */, 2 /* unsigned */, 2 /* float */ + 1 /* boolean */, 2 /* integer */, 2 /* unsigned */, 2 /* float */, + 6 /* binary */ } }; @@ -3827,6 +3834,28 @@ struct external_constructor } }; +template<> +struct external_constructor +{ + template + static void construct(BasicJsonType& j, const typename BasicJsonType::binary_t& b) + { + j.m_type = value_t::binary; + typename BasicJsonType::internal_binary_t value{b}; + j.m_value = value; + j.assert_invariant(); + } + + template + static void construct(BasicJsonType& j, typename BasicJsonType::binary_t&& b) + { + j.m_type = value_t::binary; + typename BasicJsonType::internal_binary_t value{std::move(b)}; + j.m_value = value; + j.assert_invariant(); + } +}; + template<> struct external_constructor { @@ -4656,6 +4685,7 @@ struct json_sax using number_float_t = typename BasicJsonType::number_float_t; /// type for strings using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; /*! @brief a null value was read @@ -4700,6 +4730,14 @@ struct json_sax */ virtual bool string(string_t& val) = 0; + /*! + @brief a binary string was read + @param[in] val binary value + @return whether parsing should proceed + @note It is safe to move the passed binary. + */ + virtual bool binary(binary_t& val) = 0; + /*! @brief the beginning of an object was read @param[in] elements number of object elements or -1 if unknown @@ -4774,6 +4812,7 @@ class json_sax_dom_parser using number_unsigned_t = typename BasicJsonType::number_unsigned_t; using number_float_t = typename BasicJsonType::number_float_t; using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; /*! @param[in, out] r reference to a JSON value that is manipulated while @@ -4827,6 +4866,12 @@ class json_sax_dom_parser return true; } + bool binary(binary_t& val) + { + handle_binary(val); + return true; + } + bool start_object(std::size_t len) { ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); @@ -4936,6 +4981,36 @@ class json_sax_dom_parser return object_element; } + /*! + @invariant If the ref stack is empty, then the passed value will be the new + root. + @invariant If the ref stack contains a value, then it is an array or an + object to which we can add elements + */ + template + JSON_HEDLEY_RETURNS_NON_NULL + BasicJsonType* handle_binary(BinaryValue&& v) + { + if (ref_stack.empty()) + { + root = BasicJsonType::binary_array(std::forward(v)); + return &root; + } + + assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); + + if (ref_stack.back()->is_array()) + { + ref_stack.back()->m_value.array->emplace_back(BasicJsonType::binary_array(std::forward(v))); + return &(ref_stack.back()->m_value.array->back()); + } + + assert(ref_stack.back()->is_object()); + assert(object_element); + *object_element = BasicJsonType::binary_array(std::forward(v)); + return object_element; + } + /// the parsed JSON value BasicJsonType& root; /// stack to model hierarchy of values @@ -4956,6 +5031,7 @@ class json_sax_dom_callback_parser using number_unsigned_t = typename BasicJsonType::number_unsigned_t; using number_float_t = typename BasicJsonType::number_float_t; using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; using parser_callback_t = typename BasicJsonType::parser_callback_t; using parse_event_t = typename BasicJsonType::parse_event_t; @@ -5010,6 +5086,12 @@ class json_sax_dom_callback_parser return true; } + bool binary(binary_t& val) + { + handle_value(val); + return true; + } + bool start_object(std::size_t len) { // check callback for object start @@ -5260,6 +5342,7 @@ class json_sax_acceptor using number_unsigned_t = typename BasicJsonType::number_unsigned_t; using number_float_t = typename BasicJsonType::number_float_t; using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; bool null() { @@ -5291,7 +5374,12 @@ class json_sax_acceptor return true; } - bool start_object(std::size_t /*unused*/ = std::size_t(-1)) + bool binary(binary_t& /*unused*/) + { + return true; + } + + bool start_object(std::size_t /*unused*/ = std::size_t(-1)) { return true; } @@ -5306,7 +5394,7 @@ class json_sax_acceptor return true; } - bool start_array(std::size_t /*unused*/ = std::size_t(-1)) + bool start_array(std::size_t /*unused*/ = std::size_t(-1)) { return true; } @@ -5494,6 +5582,7 @@ class binary_reader using number_unsigned_t = typename BasicJsonType::number_unsigned_t; using number_float_t = typename BasicJsonType::number_float_t; using string_t = typename BasicJsonType::string_t; + using internal_binary_t = typename BasicJsonType::internal_binary_t; using json_sax_t = SAX; public: @@ -5663,6 +5752,30 @@ class binary_reader return get_string(input_format_t::bson, len - static_cast(1), result) and get() != std::char_traits::eof(); } + /*! + @brief Parses a byte array input of length @a len from the BSON input. + @param[in] len The length of the byte array to be read. + @param[in, out] result A reference to the binary variable where the read + array is to be stored. + @tparam NumberType The type of the length @a len + @pre len >= 0 + @return `true` if the byte array was successfully parsed + */ + template + bool get_bson_binary(const NumberType len, internal_binary_t& result) + { + if (JSON_HEDLEY_UNLIKELY(len < 0)) + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "byte array length cannot be negative, is " + std::to_string(len), "binary"))); + } + + result.has_subtype = true; // All BSON binary values have a subtype + get_number(input_format_t::bson, result.subtype); + + return get_binary(input_format_t::bson, len, result); + } + /*! @brief Read a BSON document element of the given @a element_type. @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html @@ -5701,6 +5814,13 @@ class binary_reader return parse_bson_array(); } + case 0x05: // binary + { + std::int32_t len; + internal_binary_t value; + return get_number(input_format_t::bson, len) and get_bson_binary(len, value) and sax->binary(value); + } + case 0x08: // boolean { return sax->boolean(get() != 0); @@ -5747,6 +5867,7 @@ class binary_reader bool parse_bson_element_list(const bool is_array) { string_t key; + while (int element_type = get()) { if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::bson, "element list"))) @@ -5921,6 +6042,41 @@ class binary_reader - static_cast(number)); } + // Binary data (0x00..0x17 bytes follow) + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4A: + case 0x4B: + case 0x4C: + case 0x4D: + case 0x4E: + case 0x4F: + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + case 0x58: // Binary data (one-byte uint8_t for n follows) + case 0x59: // Binary data (two-byte uint16_t for n follow) + case 0x5A: // Binary data (four-byte uint32_t for n follow) + case 0x5B: // Binary data (eight-byte uint64_t for n follow) + case 0x5F: // Binary data (indefinite length) + { + internal_binary_t b; + return get_cbor_binary(b) and sax->binary(b); + } + // UTF-8 string (0x00..0x17 bytes follow) case 0x60: case 0x61: @@ -6236,6 +6392,101 @@ class binary_reader } } + /*! + @brief reads a CBOR byte array + + This function first reads starting bytes to determine the expected + byte array length and then copies this number of bytes into the byte array. + Additionally, CBOR's byte arrays with indefinite lengths are supported. + + @param[out] result created byte array + + @return whether byte array creation completed + */ + bool get_cbor_binary(internal_binary_t& result) + { + if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::cbor, "binary"))) + { + return false; + } + + switch (current) + { + // Binary data (0x00..0x17 bytes follow) + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4A: + case 0x4B: + case 0x4C: + case 0x4D: + case 0x4E: + case 0x4F: + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + { + return get_binary(input_format_t::cbor, static_cast(current) & 0x1Fu, result); + } + + case 0x58: // Binary data (one-byte uint8_t for n follows) + { + std::uint8_t len; + return get_number(input_format_t::cbor, len) and get_binary(input_format_t::cbor, len, result); + } + + case 0x59: // Binary data (two-byte uint16_t for n follow) + { + std::uint16_t len; + return get_number(input_format_t::cbor, len) and get_binary(input_format_t::cbor, len, result); + } + + case 0x5A: // Binary data (four-byte uint32_t for n follow) + { + std::uint32_t len; + return get_number(input_format_t::cbor, len) and get_binary(input_format_t::cbor, len, result); + } + + case 0x5B: // Binary data (eight-byte uint64_t for n follow) + { + std::uint64_t len; + return get_number(input_format_t::cbor, len) and get_binary(input_format_t::cbor, len, result); + } + + case 0x5F: // Binary data (indefinite length) + { + while (get() != 0xFF) + { + internal_binary_t chunk; + if (not get_cbor_binary(chunk)) + { + return false; + } + result.insert(result.end(), chunk.begin(), chunk.end()); + } + return true; + } + + default: + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x40-0x5B) or indefinite binary array type (0x5F); last byte: 0x" + last_token, "binary"))); + } + } + } + /*! @param[in] len the length of the array or std::size_t(-1) for an array of indefinite size @@ -6556,6 +6807,22 @@ class binary_reader case 0xC3: // true return sax->boolean(true); + case 0xC4: // bin 8 + case 0xC5: // bin 16 + case 0xC6: // bin 32 + case 0xC7: // ext 8 + case 0xC8: // ext 16 + case 0xC9: // ext 32 + case 0xD4: // fixext 1 + case 0xD5: // fixext 2 + case 0xD6: // fixext 4 + case 0xD7: // fixext 8 + case 0xD8: // fixext 16 + { + internal_binary_t b; + return get_msgpack_binary(b) and sax->binary(b); + } + case 0xCA: // float 32 { float number; @@ -6765,6 +7032,108 @@ class binary_reader } } + /*! + @brief reads a MessagePack byte array + + This function first reads starting bytes to determine the expected + byte array length and then copies this number of bytes into a byte array. + + @param[out] result created byte array + + @return whether byte array creation completed + */ + bool get_msgpack_binary(internal_binary_t& result) + { + if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::msgpack, "binary"))) + { + return false; + } + + switch (current) + { + case 0xC4: // bin 8 + { + std::uint8_t len; + return get_number(input_format_t::msgpack, len) and get_binary(input_format_t::msgpack, len, result); + } + + case 0xC5: // bin 16 + { + std::uint16_t len; + return get_number(input_format_t::msgpack, len) and get_binary(input_format_t::msgpack, len, result); + } + + case 0xC6: // bin 32 + { + std::uint32_t len; + return get_number(input_format_t::msgpack, len) and get_binary(input_format_t::msgpack, len, result); + } + + case 0xC7: // ext 8 + { + std::uint8_t len; + result.has_subtype = true; + return get_number(input_format_t::msgpack, len) and + get_number(input_format_t::msgpack, result.subtype) and + get_binary(input_format_t::msgpack, len, result); + } + + case 0xC8: // ext 16 + { + std::uint16_t len; + result.has_subtype = true; + return get_number(input_format_t::msgpack, len) and + get_number(input_format_t::msgpack, result.subtype) and + get_binary(input_format_t::msgpack, len, result); + } + + case 0xC9: // ext 32 + { + std::uint32_t len; + result.has_subtype = true; + return get_number(input_format_t::msgpack, len) and + get_number(input_format_t::msgpack, result.subtype) and + get_binary(input_format_t::msgpack, len, result); + } + + case 0xD4: // fixext 1 + { + result.has_subtype = true; + return get_number(input_format_t::msgpack, result.subtype) and get_binary(input_format_t::msgpack, 1, result); + } + + case 0xD5: // fixext 2 + { + result.has_subtype = true; + return get_number(input_format_t::msgpack, result.subtype) and get_binary(input_format_t::msgpack, 2, result); + } + + case 0xD6: // fixext 4 + { + result.has_subtype = true; + return get_number(input_format_t::msgpack, result.subtype) and get_binary(input_format_t::msgpack, 4, result); + } + + case 0xD7: // fixext 8 + { + result.has_subtype = true; + return get_number(input_format_t::msgpack, result.subtype) and get_binary(input_format_t::msgpack, 8, result); + } + + case 0xD8: // fixext 16 + { + result.has_subtype = true; + return get_number(input_format_t::msgpack, result.subtype) and get_binary(input_format_t::msgpack, 16, result); + } + + default: + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::msgpack, "expected binary type specification (0xC4-0xC9, 0xD4-0xD8); last byte: 0x" + last_token, "binary"))); + } + } + } + /*! @param[in] len the length of the array @return whether array creation completed @@ -7249,6 +7618,9 @@ class binary_reader return sax->end_object(); } + // Note, no reader for UBJSON binary types is implemented because they do + // not exist + /////////////////////// // Utility functions // /////////////////////// @@ -7356,6 +7728,38 @@ class binary_reader return success; } + /*! + @brief create a byte array by reading bytes from the input + + @tparam NumberType the type of the number + @param[in] format the current format (for diagnostics) + @param[in] len number of bytes to read + @param[out] result byte array created by reading @a len bytes + + @return whether byte array creation completed + + @note We can not reserve @a len bytes for the result, because @a len + may be too large. Usually, @ref unexpect_eof() detects the end of + the input before we run out of memory. + */ + template + bool get_binary(const input_format_t format, + const NumberType len, + internal_binary_t& result) + { + bool success = true; + std::generate_n(std::back_inserter(result), len, [this, &success, &format]() + { + get(); + if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(format, "binary"))) + { + success = false; + } + return static_cast(current); + }); + return success; + } + /*! @param[in] format the current format (for diagnostics) @param[in] context further context information (for diagnostics) @@ -9606,6 +10010,8 @@ template struct internal_iterator typename BasicJsonType::object_t::iterator object_iterator {}; /// iterator for JSON arrays typename BasicJsonType::array_t::iterator array_iterator {}; + /// iterator for JSON binary arrays + typename BasicJsonType::binary_t::iterator binary_iterator {}; /// generic iterator for all other types primitive_iterator_t primitive_iterator {}; }; @@ -11634,6 +12040,7 @@ template class binary_writer { using string_t = typename BasicJsonType::string_t; + using internal_binary_t = typename BasicJsonType::internal_binary_t; public: /*! @@ -11866,6 +12273,45 @@ class binary_writer break; } + case value_t::binary: + { + // step 1: write control byte and the binary array size + const auto N = j.m_value.binary->size(); + if (N <= 0x17) + { + write_number(static_cast(0x40 + N)); + } + else if (N <= (std::numeric_limits::max)()) + { + oa->write_character(to_char_type(0x58)); + write_number(static_cast(N)); + } + else if (N <= (std::numeric_limits::max)()) + { + oa->write_character(to_char_type(0x59)); + write_number(static_cast(N)); + } + else if (N <= (std::numeric_limits::max)()) + { + oa->write_character(to_char_type(0x5A)); + write_number(static_cast(N)); + } + // LCOV_EXCL_START + else if (N <= (std::numeric_limits::max)()) + { + oa->write_character(to_char_type(0x5B)); + write_number(static_cast(N)); + } + // LCOV_EXCL_STOP + + // step 2: write each element + oa->write_characters( + reinterpret_cast(j.m_value.binary->data()), + N); + + break; + } + case value_t::object: { // step 1: write control byte and the object size @@ -12114,6 +12560,101 @@ class binary_writer break; } + case value_t::binary: + { + // step 0: determine if the binary type has a set subtype to + // determine whether or not to use the ext or fixext types + const bool use_ext = j.m_value.binary->has_subtype; + + // step 1: write control byte and the byte string length + const auto N = j.m_value.binary->size(); + if (N <= (std::numeric_limits::max)()) + { + std::uint8_t output_type; + bool fixed = true; + if (use_ext) + { + switch (N) + { + case 1: + output_type = 0xD4; // fixext 1 + break; + case 2: + output_type = 0xD5; // fixext 2 + break; + case 4: + output_type = 0xD6; // fixext 4 + break; + case 8: + output_type = 0xD7; // fixext 8 + break; + case 16: + output_type = 0xD8; // fixext 16 + break; + default: + output_type = 0xC7; // ext 8 + fixed = false; + break; + } + + } + else + { + output_type = 0xC4; // bin 8 + fixed = false; + } + + oa->write_character(to_char_type(output_type)); + if (not fixed) + { + write_number(static_cast(N)); + } + } + else if (N <= (std::numeric_limits::max)()) + { + std::uint8_t output_type; + if (use_ext) + { + output_type = 0xC8; // ext 16 + } + else + { + output_type = 0xC5; // bin 16 + } + + oa->write_character(to_char_type(output_type)); + write_number(static_cast(N)); + } + else if (N <= (std::numeric_limits::max)()) + { + std::uint8_t output_type; + if (use_ext) + { + output_type = 0xC9; // ext 32 + } + else + { + output_type = 0xC6; // bin 32 + } + + oa->write_character(to_char_type(output_type)); + write_number(static_cast(N)); + } + + // step 1.5: if this is an ext type, write the subtype + if (use_ext) + { + write_number(j.m_value.binary->subtype); + } + + // step 2: write the byte string + oa->write_characters( + reinterpret_cast(j.m_value.binary->data()), + N); + + break; + } + case value_t::object: { // step 1: write control byte and the object size @@ -12257,6 +12798,49 @@ class binary_writer break; } + case value_t::binary: + { + if (add_prefix) + { + oa->write_character(to_char_type('[')); + } + + if (use_type and not j.m_value.binary->empty()) + { + assert(use_count); + oa->write_character(to_char_type('$')); + oa->write_character('U'); + } + + if (use_count) + { + oa->write_character(to_char_type('#')); + write_number_with_ubjson_prefix(j.m_value.binary->size(), true); + } + + if (use_type) + { + oa->write_characters( + reinterpret_cast(j.m_value.binary->data()), + j.m_value.binary->size()); + } + else + { + for (size_t i = 0; i < j.m_value.binary->size(); ++i) + { + oa->write_character(to_char_type('U')); + oa->write_character(j.m_value.binary->data()[i]); + } + } + + if (not use_count) + { + oa->write_character(to_char_type(']')); + } + + break; + } + case value_t::object: { if (add_prefix) @@ -12479,6 +13063,14 @@ class binary_writer return sizeof(std::int32_t) + embedded_document_size + 1ul; } + /*! + @return The size of the BSON-encoded binary array @a value + */ + static std::size_t calc_bson_binary_size(const typename BasicJsonType::internal_binary_t& value) + { + return sizeof(std::int32_t) + value.size() + 1ul; + } + /*! @brief Writes a BSON element with key @a name and array @a value */ @@ -12498,6 +13090,27 @@ class binary_writer oa->write_character(to_char_type(0x00)); } + /*! + @brief Writes a BSON element with key @a name and binary value @a value + */ + void write_bson_binary(const string_t& name, + const internal_binary_t& value) + { + write_bson_entry_header(name, 0x05); + + write_number(static_cast(value.size())); + std::uint8_t subtype = 0x00; // Generic Binary Subtype + if (value.has_subtype) + { + subtype = value.subtype; + } + write_number(subtype); + + oa->write_characters( + reinterpret_cast(value.data()), + value.size()); + } + /*! @brief Calculates the size necessary to serialize the JSON value @a j with its @a name @return The calculated size for the BSON document entry for @a j with the given @a name. @@ -12514,6 +13127,9 @@ class binary_writer case value_t::array: return header_size + calc_bson_array_size(*j.m_value.array); + case value_t::binary: + return header_size + calc_bson_binary_size(*j.m_value.binary); + case value_t::boolean: return header_size + 1ul; @@ -12558,6 +13174,9 @@ class binary_writer case value_t::array: return write_bson_array(name, *j.m_value.array); + case value_t::binary: + return write_bson_binary(name, *j.m_value.binary); + case value_t::boolean: return write_bson_boolean(name, j.m_value.boolean); @@ -12838,7 +13457,8 @@ class binary_writer case value_t::string: return 'S'; - case value_t::array: + case value_t::array: // fallthrough + case value_t::binary: return '['; case value_t::object: @@ -14106,6 +14726,7 @@ class serializer using number_float_t = typename BasicJsonType::number_float_t; using number_integer_t = typename BasicJsonType::number_integer_t; using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using binary_t = typename BasicJsonType::binary_t; static constexpr std::uint8_t UTF8_ACCEPT = 0; static constexpr std::uint8_t UTF8_REJECT = 1; @@ -14144,16 +14765,19 @@ class serializer - strings and object keys are escaped using `escape_string()` - integer numbers are converted implicitly via `operator<<` - floating-point numbers are converted to a string using `"%g"` format + - if specified to, binary values are output using the syntax `b[]`, otherwise an exception is thrown - @param[in] val value to serialize - @param[in] pretty_print whether the output shall be pretty-printed - @param[in] indent_step the indent level - @param[in] current_indent the current indent level (only used internally) + @param[in] val value to serialize + @param[in] pretty_print whether the output shall be pretty-printed + @param[in] indent_step the indent level + @param[in] current_indent the current indent level (only used internally) + @param[in] serialize_binary whether the output shall include non-standard binary output */ void dump(const BasicJsonType& val, const bool pretty_print, const bool ensure_ascii, const unsigned int indent_step, - const unsigned int current_indent = 0) + const unsigned int current_indent = 0, + const bool serialize_binary = false) { switch (val.m_type) { @@ -14297,6 +14921,55 @@ class serializer return; } + case value_t::binary: + { + if (not serialize_binary) + { + JSON_THROW(type_error::create(317, "cannot serialize binary data to text JSON")); + } + + if (val.m_value.binary->empty()) + { + o->write_characters("b[]", 3); + } + else if (pretty_print) + { + o->write_characters("b[", 2); + for (auto i = val.m_value.binary->cbegin(); + i != val.m_value.binary->cend() - 1; ++i) + { + dump_integer(*i); + o->write_character(','); + int index = i - val.m_value.binary->cbegin(); + if (index % 16 == 0) + { + o->write_character('\n'); + } + else + { + o->write_character(' '); + } + } + + dump_integer(val.m_value.binary->back()); + o->write_character(']'); + } + else + { + o->write_characters("b[", 2); + for (auto i = val.m_value.binary->cbegin(); + i != val.m_value.binary->cend() - 1; ++i) + { + dump_integer(*i); + o->write_character(','); + } + + dump_integer(val.m_value.binary->back()); + o->write_character(']'); + } + return; + } + case value_t::boolean: { if (val.m_value.boolean) @@ -14653,7 +15326,8 @@ class serializer */ template::value or - std::is_same::value, + std::is_same::value or + std::is_same::value, int> = 0> void dump_integer(NumberType x) { @@ -14691,7 +15365,7 @@ class serializer if (is_negative) { *buffer_ptr = '-'; - abs_value = remove_sign(x); + abs_value = remove_sign(static_cast(x)); // account one more byte for the minus sign n_chars = 1 + count_digits(abs_value); @@ -14956,6 +15630,9 @@ default; will be used in @ref number_integer_t) `uint64_t` by default; will be used in @ref number_unsigned_t) @tparam NumberFloatType type for JSON floating-point numbers (`double` by default; will be used in @ref number_float_t) +@tparam BinaryType type for packed binary data for compatibility with binary +serialization formats (`std::vector` by default; will be used in +@ref binary_t) @tparam AllocatorType type of the allocator to use (`std::allocator` by default) @tparam JSONSerializer the serializer to resolve internal calls to `to_json()` @@ -15677,6 +16354,85 @@ class basic_json */ using number_float_t = NumberFloatType; + /*! + @brief a type for a packed binary type + + This type is a type designed to carry binary data that appears in various + serialized formats, such as CBOR's Major Type 2, MessagePack's bin, and + BSON's generic binary subtype. This type is NOT a part of standard JSON and + exists solely for compatibility with these binary types. As such, it is + simply defined as an ordered sequence of zero or more byte values. + + Additionally, as an implementation detail, the subtype of the binary data is + carried around as a `unint8_t`, which is compatible with both of the binary + data formats that use binary subtyping, (though the specific numbering is + incompatible with each other, and it is up to the user to translate between + them). + + [CBOR's RFC 7049](https://tools.ietf.org/html/rfc7049) describes this type + as: + > Major type 2: a byte string. The string's length in bytes is + > represented following the rules for positive integers (major type + > 0). + + [MessagePack's documentation on the bin type + family](https://github.com/msgpack/msgpack/blob/master/spec.md#bin-format-family) + describes this type as: + > Bin format family stores an byte array in 2, 3, or 5 bytes of extra bytes + > in addition to the size of the byte array. + + [BSON's specifications](http://bsonspec.org/spec.html) describe several + binary types; however, this type is intended to represent the generic binary + type which has the description: + > Generic binary subtype - This is the most commonly used binary subtype and + > should be the 'default' for drivers and tools. + + None of these impose any limitations on the internal representation other + than the basic unit of storage be some type of array whose parts are + decomposible into bytes. + + The default representation of this binary format is a + `std::vector`, which is a very common way to represent a byte + array in modern C++. + + #### Default type + + The default values for @a BinaryType is `std::vector` + + #### Storage + + Binary Arrays are stored as pointers in a @ref basic_json type. That is, + for any access to array values, a pointer of the type `binary_t*` must be + dereferenced. + + @sa @ref array_t -- type for an array value + + @since version 3.8.0 + */ + + using binary_t = BinaryType; + + /*! + @brief an internal type for a backed binary type + + This type is designed to be `binary_t` but with the subtype implementation + detail. This type exists so that the user does not have to specify a struct + his- or herself with a specific naming scheme in order to override the + binary type. I.e. it's for ease of use. + */ + struct internal_binary_t : public BinaryType + { + using BinaryType::BinaryType; + internal_binary_t() : BinaryType() {} + internal_binary_t(BinaryType const& bint) : BinaryType(bint) {} + internal_binary_t(BinaryType&& bint) : BinaryType(std::move(bint)) {} + + // TOOD: If minimum C++ version is ever bumped to C++17, this field + // deserves to be a std::optional + std::uint8_t subtype = 0; + bool has_subtype = false; + }; + /// @} private: @@ -15719,6 +16475,7 @@ class basic_json number | number_integer | @ref number_integer_t number | number_unsigned | @ref number_unsigned_t number | number_float | @ref number_float_t + binary | binary | pointer to @ref internal_binary_t null | null | *no value is stored* @note Variable-length types (objects, arrays, and strings) are stored as @@ -15735,6 +16492,8 @@ class basic_json array_t* array; /// string (stored with pointer to save storage) string_t* string; + /// binary (stored with pointer to save storage) + internal_binary_t* binary; /// boolean boolean_t boolean; /// number (integer) @@ -15777,6 +16536,12 @@ class basic_json break; } + case value_t::binary: + { + binary = create(); + break; + } + case value_t::boolean: { boolean = boolean_t(false); @@ -15855,6 +16620,18 @@ class basic_json array = create(std::move(value)); } + /// constructor for binary arrays + json_value(const binary_t& value) + { + binary = create(value); + } + + /// constructor for rvalue binary arrays + json_value(binary_t&& value) + { + binary = create(std::move(value)); + } + void destroy(value_t t) noexcept { // flatten the current json_value to a heap-allocated stack @@ -15930,6 +16707,14 @@ class basic_json break; } + case value_t::binary: + { + AllocatorType alloc; + std::allocator_traits::destroy(alloc, binary); + std::allocator_traits::deallocate(alloc, binary, 1); + break; + } + default: { break; @@ -16050,6 +16835,7 @@ class basic_json number | `0` object | `{}` array | `[]` + binary | empty array @param[in] v the type of the value to create @@ -16121,6 +16907,12 @@ class basic_json @ref number_float_t, and all convertible number types such as `int`, `size_t`, `int64_t`, `float` or `double` can be used. - **boolean**: @ref boolean_t / `bool` can be used. + - **binary**: @ref binary_t / `std::vector` may be used, + unfortunately because string literals cannot be distinguished from binary + character arrays by the C++ type system, all types compatible with `const + char*` will be directed to the string constructor instead. This is both + for backwards compatibility, and due to the fact that a binary type is not + a standard JSON type. See the examples below. @@ -16202,6 +16994,7 @@ class basic_json using other_string_t = typename BasicJsonType::string_t; using other_object_t = typename BasicJsonType::object_t; using other_array_t = typename BasicJsonType::array_t; + using other_binary_t = typename BasicJsonType::internal_binary_t; switch (val.type()) { @@ -16226,6 +17019,9 @@ class basic_json case value_t::array: JSONSerializer::to_json(*this, val.template get_ref()); break; + case value_t::binary: + JSONSerializer::to_json(*this, val.template get_ref()); + break; case value_t::null: *this = nullptr; break; @@ -16364,6 +17160,78 @@ class basic_json assert_invariant(); } + /*! + @brief explicitly create a binary array from an already constructed copy of + its base type + + Creates a JSON binary array value from a given `binary_t`. Binary values are + part of various binary formats, such as CBOR, MsgPack, and BSON. And this + constructor is used to create a value for serialization to those formats. + + @note Note, this function exists because of the difficulty in correctly + specifying the correct template overload in the standard value ctor, as both + JSON arrays and JSON binary arrays are backed with some form of a + `std::vector`. Because JSON binary arrays are a non-standard extension it + was decided that it would be best to prevent automatic initialization of a + binary array type, for backwards compatibility and so it does not happen on + accident. + + @param[in] init `binary_t` with JSON values to create a binary array from + + @return JSON binary array value + + @complexity Linear in the size of @a init. + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes to any JSON value. + + @since version 3.8.0 + */ + JSON_HEDLEY_WARN_UNUSED_RESULT + static basic_json binary_array(binary_t const& init) + { + auto res = basic_json(); + res.m_type = value_t::binary; + res.m_value = init; + return res; + } + + /*! + @brief explicitly create a binary array from an already constructed rvalue + copy of its base type + + Creates a JSON binary array value from a given `binary_t`. Binary values are + part of various binary formats, such as CBOR, MsgPack, and BSON. And this + constructor is used to create a value for serialization to those formats. + + @note Note, this function exists because of the difficulty in correctly + specifying the correct template overload in the standard value ctor, as both + JSON arrays and JSON binary arrays are backed with some form of a + `std::vector`. Because JSON binary arrays are a non-standard extension it + was decided that it would be best to prevent automatic initialization of a + binary array type, for backwards compatibility and so it doesn't happen on + accident. + + @param[in] init `binary_t` with JSON values to create a binary array from + + @return JSON binary array value + + @complexity Linear in the size of @a init. + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes to any JSON value. + + @since version 3.8.0 + */ + JSON_HEDLEY_WARN_UNUSED_RESULT + static basic_json binary_array(binary_t&& init) + { + auto res = basic_json(); + res.m_type = value_t::binary; + res.m_value = std::move(init); + return res; + } + /*! @brief explicitly create an array from an initializer list @@ -16619,6 +17487,13 @@ class basic_json break; } + case value_t::binary: + { + m_value.binary = create(first.m_it.binary_iterator, + last.m_it.binary_iterator); + break; + } + default: JSON_THROW(invalid_iterator::create(206, "cannot construct with iterators from " + std::string(first.m_object->type_name()))); @@ -16712,6 +17587,12 @@ class basic_json break; } + case value_t::binary: + { + m_value = *other.m_value.binary; + break; + } + default: break; } @@ -16852,6 +17733,11 @@ class basic_json possible values: `strict` (throws and exception in case a decoding error occurs; default), `replace` (replace invalid UTF-8 sequences with U+FFFD), and `ignore` (ignore invalid UTF-8 sequences during serialization). + @param[in] serialize_binary Whether or not to allow serialization of binary + types to JSON. Because binary types are non-standard, this will produce + non-conformant JSON, and is disabled by default. This flag is primarily + useful for debugging. Will output the binary value as a list of 8-bit + numbers prefixed by "b" (e.g. "bindata" = b[3, 0, 42, 255]). @return string containing the serialization of the JSON value @@ -16876,18 +17762,19 @@ class basic_json string_t dump(const int indent = -1, const char indent_char = ' ', const bool ensure_ascii = false, - const error_handler_t error_handler = error_handler_t::strict) const + const error_handler_t error_handler = error_handler_t::strict, + const bool serialize_binary = false) const { string_t result; serializer s(detail::output_adapter(result), indent_char, error_handler); if (indent >= 0) { - s.dump(*this, true, ensure_ascii, static_cast(indent)); + s.dump(*this, true, ensure_ascii, static_cast(indent), serialize_binary); } else { - s.dump(*this, false, ensure_ascii, 0); + s.dump(*this, false, ensure_ascii, 0, serialize_binary); } return result; @@ -16910,6 +17797,7 @@ class basic_json number (floating-point) | value_t::number_float object | value_t::object array | value_t::array + binary | value_t::binary discarded | value_t::discarded @complexity Constant. @@ -16952,12 +17840,13 @@ class basic_json @sa @ref is_string() -- returns whether JSON value is a string @sa @ref is_boolean() -- returns whether JSON value is a boolean @sa @ref is_number() -- returns whether JSON value is a number + @sa @ref is_binary() -- returns whether JSON value is a binary array @since version 1.0.0 */ constexpr bool is_primitive() const noexcept { - return is_null() or is_string() or is_boolean() or is_number(); + return is_null() or is_string() or is_boolean() or is_number() or is_binary(); } /*! @@ -17212,6 +18101,28 @@ class basic_json return m_type == value_t::string; } + /*! + @brief return whether value is a binary array + + This function returns true if and only if the JSON value is a binary array. + + @return `true` if type is binary array, `false` otherwise. + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this member function never throws + exceptions. + + @liveexample{The following code exemplifies `is_binary()` for all JSON + types.,is_binary} + + @since version 3.8.0 + */ + constexpr bool is_binary() const noexcept + { + return m_type == value_t::binary; + } + /*! @brief return whether value is discarded @@ -17367,6 +18278,18 @@ class basic_json return is_number_float() ? &m_value.number_float : nullptr; } + /// get a pointer to the value (binary) + binary_t* get_impl_ptr(binary_t* /*unused*/) noexcept + { + return is_binary() ? m_value.binary : nullptr; + } + + /// get a pointer to the value (binary) + constexpr const binary_t* get_impl_ptr(const binary_t* /*unused*/) const noexcept + { + return is_binary() ? m_value.binary : nullptr; + } + /*! @brief helper function to implement get_ref() @@ -18414,6 +19337,120 @@ class basic_json return value(ptr, string_t(default_value)); } + /*! + @brief return the binary subtype + + Returns the numerical subtype of the JSON value, if the JSON value is of + type "binary", and it has a subtype. If it does not have a subtype (or the + object is not of type binary) this function will return size_t(-1) as a + sentinel value. + + @return the numerical subtype of the binary JSON value + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this member function never throws + exceptions. + + @sa @ref set_subtype() -- sets the binary subtype + @sa @ref clear_subtype() -- clears the binary subtype + @sa @ref has_subtype() -- returns whether or not the binary value has a + subtype + + @since version 3.8.0 + */ + std::size_t get_subtype() const noexcept + { + if (is_binary() and m_value.binary->has_subtype) + { + return m_value.binary->subtype; + } + + return std::size_t(-1); + } + + /*! + @brief sets the binary subtype + + Sets the binary subtype of the JSON value, also flags a binary JSON value as + having a subtype, which has implications for serialization to msgpack (will + prefer ext file formats over bin). If the JSON value is not a binary value, + this function does nothing. + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this member function never throws + exceptions. + + @sa @ref get_subtype() -- return the binary subtype + @sa @ref clear_subtype() -- clears the binary subtype + @sa @ref has_subtype() -- returns whether or not the binary value has a + subtype + + @since version 3.8.0 + */ + + void set_subtype(std::uint8_t subtype) noexcept + { + if (is_binary()) + { + m_value.binary->has_subtype = true; + m_value.binary->subtype = subtype; + } + } + + /*! + @brief clears the binary subtype + + Clears the binary subtype of the JSON value, also flags a binary JSON value + as not having a subtype, which has implications for serialization to msgpack + (will prefer bin file formats over ext). If the JSON value is not a binary + value, this function does nothing. + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this member function never throws + exceptions. + + @sa @ref get_subtype() -- return the binary subtype + @sa @ref set_subtype() -- sets the binary subtype + @sa @ref has_subtype() -- returns whether or not the binary value has a + subtype + + @since version 3.8.0 + */ + void clear_subtype() noexcept + { + if (is_binary()) + { + m_value.binary->has_subtype = false; + m_value.binary->subtype = 0; + } + } + + /*! + @brief return whether or not the binary subtype has a value + + Returns whether or not the binary subtype has a value. + + @return whether or not the binary subtype has a value. + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this member function never throws + exceptions. + + @sa @ref get_subtype() -- return the binary subtype + @sa @ref set_subtype() -- sets the binary subtype + @sa @ref clear_subtype() -- clears the binary subtype + + @since version 3.8.0 + */ + bool has_subtype() const noexcept + { + return is_binary() and m_value.binary->has_subtype; + } + /*! @brief access the first element @@ -18421,8 +19458,8 @@ class basic_json container `c`, the expression `c.front()` is equivalent to `*c.begin()`. @return In case of a structured type (array or object), a reference to the - first element is returned. In case of number, string, or boolean values, a - reference to the value is returned. + first element is returned. In case of number, string, boolean, or binary + values, a reference to the value is returned. @complexity Constant. @@ -18464,8 +19501,8 @@ class basic_json @endcode @return In case of a structured type (array or object), a reference to the - last element is returned. In case of number, string, or boolean values, a - reference to the value is returned. + last element is returned. In case of number, string, boolean, or binary + values, a reference to the value is returned. @complexity Constant. @@ -18531,7 +19568,7 @@ class basic_json @complexity The complexity depends on the type: - objects: amortized constant - arrays: linear in distance between @a pos and the end of the container - - strings: linear in the length of the string + - strings and binary: linear in the length of the member - other types: constant @liveexample{The example shows the result of `erase()` for different JSON @@ -18567,6 +19604,7 @@ class basic_json case value_t::number_integer: case value_t::number_unsigned: case value_t::string: + case value_t::binary: { if (JSON_HEDLEY_UNLIKELY(not pos.m_it.primitive_iterator.is_begin())) { @@ -18580,6 +19618,13 @@ class basic_json std::allocator_traits::deallocate(alloc, m_value.string, 1); m_value.string = nullptr; } + else if (is_binary()) + { + AllocatorType alloc; + std::allocator_traits::destroy(alloc, m_value.binary); + std::allocator_traits::deallocate(alloc, m_value.binary, 1); + m_value.binary = nullptr; + } m_type = value_t::null; assert_invariant(); @@ -18637,7 +19682,7 @@ class basic_json - objects: `log(size()) + std::distance(first, last)` - arrays: linear in the distance between @a first and @a last, plus linear in the distance between @a last and end of the container - - strings: linear in the length of the string + - strings and binary: linear in the length of the member - other types: constant @liveexample{The example shows the result of `erase()` for different JSON @@ -18672,6 +19717,7 @@ class basic_json case value_t::number_integer: case value_t::number_unsigned: case value_t::string: + case value_t::binary: { if (JSON_HEDLEY_LIKELY(not first.m_it.primitive_iterator.is_begin() or not last.m_it.primitive_iterator.is_end())) @@ -18686,6 +19732,13 @@ class basic_json std::allocator_traits::deallocate(alloc, m_value.string, 1); m_value.string = nullptr; } + else if (is_binary()) + { + AllocatorType alloc; + std::allocator_traits::destroy(alloc, m_value.binary); + std::allocator_traits::deallocate(alloc, m_value.binary, 1); + m_value.binary = nullptr; + } m_type = value_t::null; assert_invariant(); @@ -19405,6 +20458,7 @@ class basic_json boolean | `false` string | `false` number | `false` + binary | `false` object | result of function `object_t::empty()` array | result of function `array_t::empty()` @@ -19476,6 +20530,7 @@ class basic_json boolean | `1` string | `1` number | `1` + binary | `1` object | result of function object_t::size() array | result of function array_t::size() @@ -19550,6 +20605,7 @@ class basic_json boolean | `1` (same as `size()`) string | `1` (same as `size()`) number | `1` (same as `size()`) + binary | `1` (same as `size()`) object | result of function `object_t::max_size()` array | result of function `array_t::max_size()` @@ -19622,6 +20678,7 @@ class basic_json boolean | `false` string | `""` number | `0` + binary | An empty byte vector object | `{}` array | `[]` @@ -19679,6 +20736,12 @@ class basic_json break; } + case value_t::binary: + { + m_value.binary->clear(); + break; + } + case value_t::array: { m_value.array->clear(); @@ -20555,6 +21618,9 @@ class basic_json case value_t::number_float: return lhs.m_value.number_float == rhs.m_value.number_float; + case value_t::binary: + return *lhs.m_value.binary == *rhs.m_value.binary; + default: return false; } @@ -20715,6 +21781,9 @@ class basic_json case value_t::number_float: return (lhs.m_value.number_float) < (rhs.m_value.number_float); + case value_t::binary: + return (lhs.m_value.binary) < (rhs.m_value.binary); + default: return false; } @@ -21279,6 +22348,7 @@ class basic_json number | `"number"` (for all number types) object | `"object"` array | `"array"` + binary | `"binary"` discarded | `"discarded"` @exceptionsafety No-throw guarantee: this function never throws exceptions. @@ -21310,6 +22380,8 @@ class basic_json return "string"; case value_t::boolean: return "boolean"; + case value_t::binary: + return "binary"; case value_t::discarded: return "discarded"; default: @@ -21385,6 +22457,11 @@ class basic_json object | *size*: 256..65535 | map (2 bytes follow) | 0xB9 object | *size*: 65536..4294967295 | map (4 bytes follow) | 0xBA object | *size*: 4294967296..18446744073709551615 | map (8 bytes follow) | 0xBB + binary | *size*: 0..23 | byte string | 0x40..0x57 + binary | *size*: 23..255 | byte string (1 byte follow) | 0x58 + binary | *size*: 256..65535 | byte string (2 bytes follow) | 0x59 + binary | *size*: 65536..4294967295 | byte string (4 bytes follow) | 0x5A + binary | *size*: 4294967296..18446744073709551615 | byte string (8 bytes follow) | 0x5B @note The mapping is **complete** in the sense that any JSON value type can be converted to a CBOR value. @@ -21394,10 +22471,10 @@ class basic_json function which serializes NaN or Infinity to `null`. @note The following CBOR types are not used in the conversion: - - byte strings (0x40..0x5F) - UTF-8 strings terminated by "break" (0x7F) - arrays terminated by "break" (0x9F) - maps terminated by "break" (0xBF) + - byte strings terminated by "break" (0x5F) - date/time (0xC0..0xC1) - bignum (0xC2..0xC3) - decimal fraction (0xC4) @@ -21484,20 +22561,21 @@ class basic_json object | *size*: 0..15 | fix map | 0x80..0x8F object | *size*: 16..65535 | map 16 | 0xDE object | *size*: 65536..4294967295 | map 32 | 0xDF + binary | *size*: 0..255 | bin 8 | 0xC4 + binary | *size*: 256..65535 | bin 16 | 0xC5 + binary | *size*: 65536..4294967295 | bin 32 | 0xC6 @note The mapping is **complete** in the sense that any JSON value type can be converted to a MessagePack value. @note The following values can **not** be converted to a MessagePack value: - strings with more than 4294967295 bytes + - byte strings with more than 4294967295 bytes - arrays with more than 4294967295 elements - objects with more than 4294967295 elements @note The following MessagePack types are not used in the conversion: - - bin 8 - bin 32 (0xC4..0xC6) - - ext 8 - ext 32 (0xC7..0xC9) - float 32 (0xCA) - - fixext 1 - fixext 16 (0xD4..0xD8) @note Any MessagePack output created @ref to_msgpack can be successfully parsed by @ref from_msgpack. @@ -21600,6 +22678,12 @@ class basic_json the benefit of this parameter is that the receiving side is immediately informed on the number of elements of the container. + @note If the JSON data contains the binary type, the value stored is a list + of integers, as suggested by the UBJSON documentation. In particular, + this means that serialization and the deserialization of a JSON + containing binary values into UBJSON and back will result in a + different JSON object. + @param[in] j JSON value to serialize @param[in] use_size whether to add size annotations to container types @param[in] use_type whether to add type annotations to container types @@ -21664,6 +22748,7 @@ class basic_json string | *any value* | string | 0x02 array | *any value* | document | 0x04 object | *any value* | document | 0x03 + binary | *any value* | binary | 0x05 @warning The mapping is **incomplete**, since only JSON-objects (and things contained therein) can be serialized to BSON. @@ -21745,7 +22830,11 @@ class basic_json Negative integer | number_integer | 0x39 Negative integer | number_integer | 0x3A Negative integer | number_integer | 0x3B - Negative integer | number_integer | 0x40..0x57 + Byte string | binary | 0x40..0x57 + Byte string | binary | 0x58 + Byte string | binary | 0x59 + Byte string | binary | 0x5A + Byte string | binary | 0x5B UTF-8 string | string | 0x60..0x77 UTF-8 string | string | 0x78 UTF-8 string | string | 0x79 @@ -21774,7 +22863,6 @@ class basic_json @warning The mapping is **incomplete** in the sense that not all CBOR types can be converted to a JSON value. The following CBOR types are not supported and will yield parse errors (parse_error.112): - - byte strings (0x40..0x5F) - date/time (0xC0..0xC1) - bignum (0xC2..0xC3) - decimal fraction (0xC4) @@ -21885,15 +22973,19 @@ class basic_json array 32 | array | 0xDD map 16 | object | 0xDE map 32 | object | 0xDF + bin 8 | binary | 0xC4 + bin 16 | binary | 0xC5 + bin 32 | binary | 0xC6 + ext 8 | binary | 0xC7 + ext 16 | binary | 0xC8 + ext 32 | binary | 0xC9 + fixext 1 | binary | 0xD4 + fixext 2 | binary | 0xD5 + fixext 4 | binary | 0xD6 + fixext 8 | binary | 0xD7 + fixext 16 | binary | 0xD8 negative fixint | number_integer | 0xE0-0xFF - @warning The mapping is **incomplete** in the sense that not all - MessagePack types can be converted to a JSON value. The following - MessagePack types are not supported and will yield parse errors: - - bin 8 - bin 32 (0xC4..0xC6) - - ext 8 - ext 32 (0xC7..0xC9) - - fixext 1 - fixext 16 (0xD4..0xD8) - @note Any MessagePack output created @ref to_msgpack can be successfully parsed by @ref from_msgpack. diff --git a/test/data/binary_data/cbor_binary.cbor b/test/data/binary_data/cbor_binary.cbor new file mode 100644 index 0000000000000000000000000000000000000000..ee6d9bdfc1ea0ee33256e013717ecc7dcb33bf3f GIT binary patch literal 541 zcma!daAb61WO8O?Vs-&y7FQ6#>INp++#wXZ2bAXUgfTe1;4Ch01e4nbiOu7S%;EJz z;qm#Sa`^+$_yU3G0)jypB0|BKLc$?f#6&`|35tec6%`A|E-W5l@c)E&!-0y(`i^Hp z`}x<{Pu{`Q@WEI~{JCUXMd+mI+yBV8X$Ac{d;aiM5w?H}=|43i8A#R=#lS$irf8rh R5FpvM_;}>#`;Wr;4*=eQcXI#$ literal 0 HcmV?d00001 diff --git a/test/data/binary_data/cbor_binary.out b/test/data/binary_data/cbor_binary.out new file mode 100644 index 000000000..7904806b6 --- /dev/null +++ b/test/data/binary_data/cbor_binary.out @@ -0,0 +1,17 @@ +  + +  +  +  +  +  +  +  +  +  +  +  +  + ÿÈK€ÀxYˆæ¿¬?“¸€ð3"ç†xU’—·üF*RúÍÏÃÕPÐgù)ÿÈK€ÀxYˆæ¿¬?“¸€ð3"ç†xU’—·üF*RúÍÏÃÕPÐgù)ÿÈK€ÀxYˆæ¿¬?“¸€ð3"ç†xU’—·üF*RúÍÏÃÕPÐgù)ÿÈK€ÀxYˆæ¿¬?“¸€ð3"ç†xU’—·üF*RúÍÏÃÕPÐgù) +  + \ No newline at end of file diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp index 459544fbc..3f803edfb 100644 --- a/test/src/unit-bson.cpp +++ b/test/src/unit-bson.cpp @@ -492,6 +492,36 @@ TEST_CASE("BSON") CHECK(json::from_bson(result, true, false) == j); } + SECTION("non-empty object with binary member") + { + const size_t N = 10; + const auto s = std::vector(N, 'x'); + json j = + { + { "entry", json::binary_array(s) } + }; + + std::vector expected = + { + 0x1B, 0x00, 0x00, 0x00, // size (little endian) + 0x05, // entry: binary + 'e', 'n', 't', 'r', 'y', '\x00', + + 0x0A, 0x00, 0x00, 0x00, // size of binary (little endian) + 0x00, // Generic binary subtype + 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, 0x78, + + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + SECTION("Some more complex document") { // directly encoding uint64 is not supported in bson (only for timestamp values) @@ -646,6 +676,11 @@ class SaxCountdown return events_left-- > 0; } + bool binary(std::vector&) + { + return events_left-- > 0; + } + bool start_object(std::size_t) { return events_left-- > 0; diff --git a/test/src/unit-cbor.cpp b/test/src/unit-cbor.cpp index e4196545a..c4c73426a 100644 --- a/test/src/unit-cbor.cpp +++ b/test/src/unit-cbor.cpp @@ -36,6 +36,7 @@ using nlohmann::json; #include #include #include +#include #include namespace @@ -76,6 +77,11 @@ class SaxCountdown return events_left-- > 0; } + bool binary(std::vector&) + { + return events_left-- > 0; + } + bool start_object(std::size_t) { return events_left-- > 0; @@ -1285,10 +1291,156 @@ TEST_CASE("CBOR") CHECK(json::from_cbor(result, true, false) == j); } } + + SECTION("binary") + { + SECTION("N = 0..23") + { + for (size_t N = 0; N <= 0x17; ++N) + { + CAPTURE(N) + + // create JSON value with byte array containing of N * 'x' + const auto s = std::vector(N, 'x'); + json j = json::binary_array(s); + + // create expected byte vector + std::vector expected; + expected.push_back(static_cast(0x40 + N)); + for (size_t i = 0; i < N; ++i) + { + expected.push_back(0x78); + } + + // compare result + size + const auto result = json::to_cbor(j); + CHECK(result == expected); + CHECK(result.size() == N + 1); + // check that no null byte is appended + if (N > 0) + { + CHECK(result.back() != '\x00'); + } + + // roundtrip + CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); + } + } + + SECTION("N = 24..255") + { + for (size_t N = 24; N <= 255; ++N) + { + CAPTURE(N) + + // create JSON value with string containing of N * 'x' + const auto s = std::vector(N, 'x'); + json j = json::binary_array(s); + + // create expected byte vector + std::vector expected; + expected.push_back(0x58); + expected.push_back(static_cast(N)); + for (size_t i = 0; i < N; ++i) + { + expected.push_back('x'); + } + + // compare result + size + const auto result = json::to_cbor(j); + CHECK(result == expected); + CHECK(result.size() == N + 2); + // check that no null byte is appended + CHECK(result.back() != '\x00'); + + // roundtrip + CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); + } + } + + SECTION("N = 256..65535") + { + for (size_t N : + { + 256u, 999u, 1025u, 3333u, 2048u, 65535u + }) + { + CAPTURE(N) + + // create JSON value with string containing of N * 'x' + const auto s = std::vector(N, 'x'); + json j = json::binary_array(s); + + // create expected byte vector (hack: create string first) + std::vector expected(N, 'x'); + // reverse order of commands, because we insert at begin() + expected.insert(expected.begin(), static_cast(N & 0xff)); + expected.insert(expected.begin(), static_cast((N >> 8) & 0xff)); + expected.insert(expected.begin(), 0x59); + + // compare result + size + const auto result = json::to_cbor(j); + CHECK(result == expected); + CHECK(result.size() == N + 3); + // check that no null byte is appended + CHECK(result.back() != '\x00'); + + // roundtrip + CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); + } + } + + SECTION("N = 65536..4294967295") + { + for (size_t N : + { + 65536u, 77777u, 1048576u + }) + { + CAPTURE(N) + + // create JSON value with string containing of N * 'x' + const auto s = std::vector(N, 'x'); + json j = json::binary_array(s); + + // create expected byte vector (hack: create string first) + std::vector expected(N, 'x'); + // reverse order of commands, because we insert at begin() + expected.insert(expected.begin(), static_cast(N & 0xff)); + expected.insert(expected.begin(), static_cast((N >> 8) & 0xff)); + expected.insert(expected.begin(), static_cast((N >> 16) & 0xff)); + expected.insert(expected.begin(), static_cast((N >> 24) & 0xff)); + expected.insert(expected.begin(), 0x5a); + + // compare result + size + const auto result = json::to_cbor(j); + CHECK(result == expected); + CHECK(result.size() == N + 5); + // check that no null byte is appended + CHECK(result.back() != '\x00'); + + // roundtrip + CHECK(json::from_cbor(result) == j); + CHECK(json::from_cbor(result, true, false) == j); + } + } + } } SECTION("additional deserialization") { + SECTION("0x5b (byte array)") + { + std::vector given = {0x5b, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x61 + }; + json j = json::from_cbor(given); + CHECK(j == json::binary_array(std::vector {'a'})); + } + SECTION("0x7b (string)") { std::vector given = {0x7b, 0x00, 0x00, 0x00, 0x00, @@ -1455,14 +1607,8 @@ TEST_CASE("CBOR") 0x1c, 0x1d, 0x1e, 0x1f, // ? 0x3c, 0x3d, 0x3e, 0x3f, - // byte strings - 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, - // byte strings - 0x58, 0x59, 0x5a, 0x5b, // ? 0x5c, 0x5d, 0x5e, - // byte string - 0x5f, // ? 0x7c, 0x7d, 0x7e, // ? @@ -1929,12 +2075,6 @@ TEST_CASE("all CBOR first bytes") { //// types not supported by this library - // byte strings - 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, - 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, - 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, - // byte strings - 0x58, 0x59, 0x5a, 0x5b, 0x5f, // date/time 0xc0, 0xc1, // bignum @@ -2144,6 +2284,20 @@ TEST_CASE("examples from RFC 7049 Appendix A") CHECK(json::parse("\"streaming\"") == json::from_cbor(std::vector({0x7f, 0x65, 0x73, 0x74, 0x72, 0x65, 0x61, 0x64, 0x6d, 0x69, 0x6e, 0x67, 0xff}))); } + SECTION("byte arrays") + { + std::ifstream f_cbor("test/data/binary_data/cbor_binary.cbor", std::ios::binary); + std::vector packed((std::istreambuf_iterator(f_cbor)), + std::istreambuf_iterator()); + json j; + CHECK_NOTHROW(j = json::from_cbor(packed)); + + std::ifstream f_bin("test/data/binary_data/cbor_binary.out", std::ios::binary); + std::vector expected((std::istreambuf_iterator(f_bin)), + std::istreambuf_iterator()); + CHECK(j == json::binary_array(expected)); + } + SECTION("arrays") { CHECK(json::to_cbor(json::parse("[]")) == std::vector({0x80})); diff --git a/test/src/unit-class_parser.cpp b/test/src/unit-class_parser.cpp index 88438a4d1..188f446c0 100644 --- a/test/src/unit-class_parser.cpp +++ b/test/src/unit-class_parser.cpp @@ -77,6 +77,21 @@ class SaxEventLogger return true; } + bool binary(std::vector& val) + { + std::string binary_contents = "binary("; + std::string comma_space = ""; + for (auto b : val) + { + binary_contents.append(comma_space); + binary_contents.append(std::to_string(static_cast(b))); + comma_space = ", "; + } + binary_contents.append(")"); + events.push_back(binary_contents); + return true; + } + bool start_object(std::size_t elements) { if (elements == std::size_t(-1)) @@ -168,6 +183,11 @@ class SaxCountdown : public nlohmann::json::json_sax_t return events_left-- > 0; } + bool binary(std::vector&) override + { + return events_left-- > 0; + } + bool start_object(std::size_t) override { return events_left-- > 0; diff --git a/test/src/unit-deserialization.cpp b/test/src/unit-deserialization.cpp index 253345876..7f2a0d381 100644 --- a/test/src/unit-deserialization.cpp +++ b/test/src/unit-deserialization.cpp @@ -76,6 +76,21 @@ struct SaxEventLogger : public nlohmann::json_sax return true; } + bool binary(std::vector& val) + { + std::string binary_contents = "binary("; + std::string comma_space = ""; + for (auto b : val) + { + binary_contents.append(comma_space); + binary_contents.append(std::to_string(static_cast(b))); + comma_space = ", "; + } + binary_contents.append(")"); + events.push_back(binary_contents); + return true; + } + bool start_object(std::size_t elements) override { if (elements == std::size_t(-1)) diff --git a/test/src/unit-msgpack.cpp b/test/src/unit-msgpack.cpp index 205e63ce9..f5d81f728 100644 --- a/test/src/unit-msgpack.cpp +++ b/test/src/unit-msgpack.cpp @@ -75,6 +75,11 @@ class SaxCountdown return events_left-- > 0; } + bool binary(std::vector&) + { + return events_left-- > 0; + } + bool start_object(std::size_t) { return events_left-- > 0; @@ -1115,6 +1120,261 @@ TEST_CASE("MessagePack") CHECK(json::from_msgpack(result, true, false) == j); } } + + SECTION("extension") + { + SECTION("N = 0..255") + { + for (size_t N = 0; N <= 0xFF; ++N) + { + CAPTURE(N) + + // create JSON value with byte array containing of N * 'x' + const auto s = std::vector(N, 'x'); + json j = json::binary_array(s); + std::uint8_t subtype = 42; + j.set_subtype(subtype); + + // create expected byte vector + std::vector expected; + switch (N) + { + case 1: + expected.push_back(static_cast(0xD4)); + break; + case 2: + expected.push_back(static_cast(0xD5)); + break; + case 4: + expected.push_back(static_cast(0xD6)); + break; + case 8: + expected.push_back(static_cast(0xD7)); + break; + case 16: + expected.push_back(static_cast(0xD8)); + break; + default: + expected.push_back(static_cast(0xC7)); + expected.push_back(static_cast(N)); + break; + } + expected.push_back(subtype); + + for (size_t i = 0; i < N; ++i) + { + expected.push_back(0x78); + } + + // compare result + size + const auto result = json::to_msgpack(j); + CHECK(result == expected); + switch (N) + { + case 1: + case 2: + case 4: + case 8: + case 16: + CHECK(result.size() == N + 2); + break; + default: + CHECK(result.size() == N + 3); + break; + } + + // check that no null byte is appended + if (N > 0) + { + CHECK(result.back() != '\x00'); + } + + // roundtrip + CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); + } + } + + SECTION("N = 256..65535") + { + for (std::size_t N : + { + 256u, 999u, 1025u, 3333u, 2048u, 65535u + }) + { + CAPTURE(N) + + // create JSON value with string containing of N * 'x' + const auto s = std::vector(N, 'x'); + json j = json::binary_array(s); + std::uint8_t subtype = 42; + j.set_subtype(subtype); + + // create expected byte vector (hack: create string first) + std::vector expected(N, 'x'); + // reverse order of commands, because we insert at begin() + expected.insert(expected.begin(), subtype); + expected.insert(expected.begin(), static_cast(N & 0xff)); + expected.insert(expected.begin(), static_cast((N >> 8) & 0xff)); + expected.insert(expected.begin(), 0xC8); + + // compare result + size + const auto result = json::to_msgpack(j); + CHECK(result == expected); + CHECK(result.size() == N + 4); + // check that no null byte is appended + CHECK(result.back() != '\x00'); + + // roundtrip + CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); + } + } + + SECTION("N = 65536..4294967295") + { + for (std::size_t N : + { + 65536u, 77777u, 1048576u + }) + { + CAPTURE(N) + + // create JSON value with string containing of N * 'x' + const auto s = std::vector(N, 'x'); + json j = json::binary_array(s); + std::uint8_t subtype = 42; + j.set_subtype(subtype); + + // create expected byte vector (hack: create string first) + std::vector expected(N, 'x'); + // reverse order of commands, because we insert at begin() + expected.insert(expected.begin(), subtype); + expected.insert(expected.begin(), static_cast(N & 0xff)); + expected.insert(expected.begin(), static_cast((N >> 8) & 0xff)); + expected.insert(expected.begin(), static_cast((N >> 16) & 0xff)); + expected.insert(expected.begin(), static_cast((N >> 24) & 0xff)); + expected.insert(expected.begin(), 0xC9); + + // compare result + size + const auto result = json::to_msgpack(j); + CHECK(result == expected); + CHECK(result.size() == N + 6); + // check that no null byte is appended + CHECK(result.back() != '\x00'); + + // roundtrip + CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); + } + } + } + + SECTION("binary") + { + SECTION("N = 0..255") + { + for (std::size_t N = 0; N <= 0xFF; ++N) + { + CAPTURE(N) + + // create JSON value with byte array containing of N * 'x' + const auto s = std::vector(N, 'x'); + json j = json::binary_array(s); + + // create expected byte vector + std::vector expected; + expected.push_back(static_cast(0xC4)); + expected.push_back(static_cast(N)); + for (size_t i = 0; i < N; ++i) + { + expected.push_back(0x78); + } + + // compare result + size + const auto result = json::to_msgpack(j); + CHECK(result == expected); + CHECK(result.size() == N + 2); + // check that no null byte is appended + if (N > 0) + { + CHECK(result.back() != '\x00'); + } + + // roundtrip + CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); + } + } + + SECTION("N = 256..65535") + { + for (std::size_t N : + { + 256u, 999u, 1025u, 3333u, 2048u, 65535u + }) + { + CAPTURE(N) + + // create JSON value with string containing of N * 'x' + const auto s = std::vector(N, 'x'); + json j = json::binary_array(s); + + // create expected byte vector (hack: create string first) + std::vector expected(N, 'x'); + // reverse order of commands, because we insert at begin() + expected.insert(expected.begin(), static_cast(N & 0xff)); + expected.insert(expected.begin(), static_cast((N >> 8) & 0xff)); + expected.insert(expected.begin(), 0xC5); + + // compare result + size + const auto result = json::to_msgpack(j); + CHECK(result == expected); + CHECK(result.size() == N + 3); + // check that no null byte is appended + CHECK(result.back() != '\x00'); + + // roundtrip + CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); + } + } + + SECTION("N = 65536..4294967295") + { + for (std::size_t N : + { + 65536u, 77777u, 1048576u + }) + { + CAPTURE(N) + + // create JSON value with string containing of N * 'x' + const auto s = std::vector(N, 'x'); + json j = json::binary_array(s); + + // create expected byte vector (hack: create string first) + std::vector expected(N, 'x'); + // reverse order of commands, because we insert at begin() + expected.insert(expected.begin(), static_cast(N & 0xff)); + expected.insert(expected.begin(), static_cast((N >> 8) & 0xff)); + expected.insert(expected.begin(), static_cast((N >> 16) & 0xff)); + expected.insert(expected.begin(), static_cast((N >> 24) & 0xff)); + expected.insert(expected.begin(), 0xC6); + + // compare result + size + const auto result = json::to_msgpack(j); + CHECK(result == expected); + CHECK(result.size() == N + 5); + // check that no null byte is appended + CHECK(result.back() != '\x00'); + + // roundtrip + CHECK(json::from_msgpack(result) == j); + CHECK(json::from_msgpack(result, true, false) == j); + } + } + } } SECTION("from float32") @@ -1226,12 +1486,6 @@ TEST_CASE("MessagePack") CHECK_THROWS_AS(_ = json::from_msgpack(std::vector({0xc1})), json::parse_error&); CHECK_THROWS_WITH(_ = json::from_msgpack(std::vector({0xc1})), "[json.exception.parse_error.112] parse error at byte 1: syntax error while parsing MessagePack value: invalid byte: 0xC1"); - CHECK(json::from_msgpack(std::vector({0xc6}), true, false).is_discarded()); - - CHECK_THROWS_AS(_ = json::from_msgpack(std::vector({0xc6})), json::parse_error&); - CHECK_THROWS_WITH(_ = json::from_msgpack(std::vector({0xc6})), - "[json.exception.parse_error.112] parse error at byte 1: syntax error while parsing MessagePack value: invalid byte: 0xC6"); - CHECK(json::from_msgpack(std::vector({0xc6}), true, false).is_discarded()); } SECTION("all unsupported bytes") @@ -1239,13 +1493,7 @@ TEST_CASE("MessagePack") for (auto byte : { // never used - 0xc1, - // bin - 0xc4, 0xc5, 0xc6, - // ext - 0xc7, 0xc8, 0xc9, - // fixext - 0xd4, 0xd5, 0xd6, 0xd7, 0xd8 + 0xc1 }) { json _; diff --git a/test/src/unit-regression.cpp b/test/src/unit-regression.cpp index f5139264c..3d86d33da 100644 --- a/test/src/unit-regression.cpp +++ b/test/src/unit-regression.cpp @@ -107,7 +107,7 @@ struct foo_serializer < T, typename std::enable_if < !std::is_same::valu } using foo_json = nlohmann::basic_json; + std::uint64_t, double, std::allocator, ns::foo_serializer, std::vector>; ///////////////////////////////////////////////////////////////////// // for #805 diff --git a/test/src/unit-ubjson.cpp b/test/src/unit-ubjson.cpp index dc5da9792..dc356221f 100644 --- a/test/src/unit-ubjson.cpp +++ b/test/src/unit-ubjson.cpp @@ -73,6 +73,11 @@ class SaxCountdown return events_left-- > 0; } + bool binary(std::vector&) + { + return events_left-- > 0; + } + bool start_object(std::size_t) { return events_left-- > 0; @@ -905,6 +910,231 @@ TEST_CASE("UBJSON") } } + SECTION("binary") + { + SECTION("N = 0..127") + { + for (std::size_t N = 0; N <= 127; ++N) + { + CAPTURE(N) + + // create JSON value with byte array containing of N * 'x' + const auto s = std::vector(N, 'x'); + json j = json::binary_array(s); + + // create expected byte vector + std::vector expected; + expected.push_back(static_cast('[')); + if (N != 0) + { + expected.push_back(static_cast('$')); + expected.push_back(static_cast('U')); + } + expected.push_back(static_cast('#')); + expected.push_back(static_cast('i')); + expected.push_back(static_cast(N)); + for (size_t i = 0; i < N; ++i) + { + expected.push_back(0x78); + } + + // compare result + size + const auto result = json::to_ubjson(j, true, true); + CHECK(result == expected); + if (N == 0) + { + CHECK(result.size() == N + 4); + } + else + { + CHECK(result.size() == N + 6); + } + + // check that no null byte is appended + if (N > 0) + { + CHECK(result.back() != '\x00'); + } + + // roundtrip only works to an array of numbers + json j_out = s; + CHECK(json::from_ubjson(result) == j_out); + CHECK(json::from_ubjson(result, true, false) == j_out); + } + } + + SECTION("N = 128..255") + { + for (std::size_t N = 128; N <= 255; ++N) + { + CAPTURE(N) + + // create JSON value with byte array containing of N * 'x' + const auto s = std::vector(N, 'x'); + json j = json::binary_array(s); + + // create expected byte vector + std::vector expected; + expected.push_back(static_cast('[')); + expected.push_back(static_cast('$')); + expected.push_back(static_cast('U')); + expected.push_back(static_cast('#')); + expected.push_back(static_cast('U')); + expected.push_back(static_cast(N)); + for (size_t i = 0; i < N; ++i) + { + expected.push_back(0x78); + } + + // compare result + size + const auto result = json::to_ubjson(j, true, true); + CHECK(result == expected); + CHECK(result.size() == N + 6); + // check that no null byte is appended + CHECK(result.back() != '\x00'); + + // roundtrip only works to an array of numbers + json j_out = s; + CHECK(json::from_ubjson(result) == j_out); + CHECK(json::from_ubjson(result, true, false) == j_out); + } + } + + SECTION("N = 256..32767") + { + for (std::size_t N : + { + 256u, 999u, 1025u, 3333u, 2048u, 32767u + }) + { + CAPTURE(N) + + // create JSON value with byte array containing of N * 'x' + const auto s = std::vector(N, 'x'); + json j = json::binary_array(s); + + // create expected byte vector + std::vector expected(N + 7, 'x'); + expected[0] = '['; + expected[1] = '$'; + expected[2] = 'U'; + expected[3] = '#'; + expected[4] = 'I'; + expected[5] = static_cast((N >> 8) & 0xFF); + expected[6] = static_cast(N & 0xFF); + + // compare result + size + const auto result = json::to_ubjson(j, true, true); + CHECK(result == expected); + CHECK(result.size() == N + 7); + // check that no null byte is appended + CHECK(result.back() != '\x00'); + + // roundtrip only works to an array of numbers + json j_out = s; + CHECK(json::from_ubjson(result) == j_out); + CHECK(json::from_ubjson(result, true, false) == j_out); + } + } + + SECTION("N = 32768..2147483647") + { + for (std::size_t N : + { + 32768u, 77777u, 1048576u + }) + { + CAPTURE(N) + + // create JSON value with byte array containing of N * 'x' + const auto s = std::vector(N, 'x'); + json j = json::binary_array(s); + + // create expected byte vector + std::vector expected(N + 9, 'x'); + expected[0] = '['; + expected[1] = '$'; + expected[2] = 'U'; + expected[3] = '#'; + expected[4] = 'l'; + expected[5] = static_cast((N >> 24) & 0xFF); + expected[6] = static_cast((N >> 16) & 0xFF); + expected[7] = static_cast((N >> 8) & 0xFF); + expected[8] = static_cast(N & 0xFF); + + // compare result + size + const auto result = json::to_ubjson(j, true, true); + CHECK(result == expected); + CHECK(result.size() == N + 9); + // check that no null byte is appended + CHECK(result.back() != '\x00'); + + // roundtrip only works to an array of numbers + json j_out = s; + CHECK(json::from_ubjson(result) == j_out); + CHECK(json::from_ubjson(result, true, false) == j_out); + } + } + + SECTION("Other Serializations") + { + const std::size_t N = 10; + const auto s = std::vector(N, 'x'); + json j = json::binary_array(s); + + SECTION("No Count No Type") + { + std::vector expected; + expected.push_back(static_cast('[')); + for (std::size_t i = 0; i < N; ++i) + { + expected.push_back(static_cast('U')); + expected.push_back(static_cast(0x78)); + } + expected.push_back(static_cast(']')); + + // compare result + size + const auto result = json::to_ubjson(j, false, false); + CHECK(result == expected); + CHECK(result.size() == N + 12); + // check that no null byte is appended + CHECK(result.back() != '\x00'); + + // roundtrip only works to an array of numbers + json j_out = s; + CHECK(json::from_ubjson(result) == j_out); + CHECK(json::from_ubjson(result, true, false) == j_out); + } + + SECTION("Yes Count No Type") + { + std::vector expected; + expected.push_back(static_cast('[')); + expected.push_back(static_cast('#')); + expected.push_back(static_cast('i')); + expected.push_back(static_cast(N)); + + for (size_t i = 0; i < N; ++i) + { + expected.push_back(static_cast('U')); + expected.push_back(static_cast(0x78)); + } + + // compare result + size + const auto result = json::to_ubjson(j, true, false); + CHECK(result == expected); + CHECK(result.size() == N + 14); + // check that no null byte is appended + CHECK(result.back() != '\x00'); + + // roundtrip only works to an array of numbers + json j_out = s; + CHECK(json::from_ubjson(result) == j_out); + CHECK(json::from_ubjson(result, true, false) == j_out); + } + } + } + SECTION("array") { SECTION("empty") diff --git a/test/src/unit-udt.cpp b/test/src/unit-udt.cpp index bb3ae72c0..86db2f23e 100644 --- a/test/src/unit-udt.cpp +++ b/test/src/unit-udt.cpp @@ -653,8 +653,7 @@ TEST_CASE("custom serializer for pods" * doctest::test_suite("udt")) { using custom_json = nlohmann::basic_json; + std::int64_t, std::uint64_t, double, std::allocator, pod_serializer>; auto p = udt::small_pod{42, '/', 42}; custom_json j = p;