2018-01-10 17:18:31 +08:00
# pragma once
2017-08-14 23:50:24 +08:00
2017-08-15 01:28:01 +08:00
# include <algorithm> // generate_n
# include <array> // array
# include <cmath> // ldexp
# include <cstddef> // size_t
# include <cstdint> // uint8_t, uint16_t, uint32_t, uint64_t
2018-05-31 21:32:21 +08:00
# include <cstdio> // snprintf
2017-08-15 01:28:01 +08:00
# include <cstring> // memcpy
# include <iterator> // back_inserter
# include <limits> // numeric_limits
# include <string> // char_traits, string
# include <utility> // make_pair, move
2020-12-27 00:56:16 +08:00
# include <vector> // vector
2017-08-14 23:50:24 +08:00
2021-01-10 02:21:18 +08:00
# include <nlohmann/detail/diagnostics_t.hpp>
2019-03-17 19:01:49 +08:00
# include <nlohmann/detail/exceptions.hpp>
2018-01-29 18:21:11 +08:00
# include <nlohmann/detail/input/input_adapters.hpp>
2018-03-12 01:47:38 +08:00
# include <nlohmann/detail/input/json_sax.hpp>
2020-07-20 15:42:37 +08:00
# include <nlohmann/detail/input/lexer.hpp>
2018-01-29 18:21:11 +08:00
# include <nlohmann/detail/macro_scope.hpp>
2018-07-24 20:47:41 +08:00
# include <nlohmann/detail/meta/is_sax.hpp>
2018-01-29 18:21:11 +08:00
# include <nlohmann/detail/value_t.hpp>
2017-08-14 23:50:24 +08:00
namespace nlohmann
{
namespace detail
{
2020-02-20 23:19:29 +08:00
2020-07-12 22:51:43 +08:00
/// how to treat CBOR tags
enum class cbor_tag_handler_t
{
error , ///< throw a parse_error exception in case of a tag
ignore ///< ignore tags
} ;
2020-02-20 23:19:29 +08:00
/*!
@ brief determine system byte order
@ return true if and only if system ' s byte order is little endian
@ note from https : //stackoverflow.com/a/1001328/266378
*/
2020-05-16 20:26:17 +08:00
static inline bool little_endianess ( int num = 1 ) noexcept
2020-02-20 23:19:29 +08:00
{
return * reinterpret_cast < char * > ( & num ) = = 1 ;
}
2017-08-14 23:50:24 +08:00
///////////////////
// binary reader //
///////////////////
/*!
2018-03-19 06:00:45 +08:00
@ brief deserialization of CBOR , MessagePack , and UBJSON values
2017-08-14 23:50:24 +08:00
*/
2020-02-20 03:59:31 +08:00
template < typename BasicJsonType , typename InputAdapterType , typename SAX = json_sax_dom_parser < BasicJsonType > >
2017-08-14 23:50:24 +08:00
class binary_reader
{
using number_integer_t = typename BasicJsonType : : number_integer_t ;
using number_unsigned_t = typename BasicJsonType : : number_unsigned_t ;
2018-06-23 16:28:04 +08:00
using number_float_t = typename BasicJsonType : : number_float_t ;
2018-02-01 15:01:01 +08:00
using string_t = typename BasicJsonType : : string_t ;
2020-05-18 04:50:27 +08:00
using binary_t = typename BasicJsonType : : binary_t ;
2018-07-02 16:14:37 +08:00
using json_sax_t = SAX ;
2020-06-06 20:30:17 +08:00
using char_type = typename InputAdapterType : : char_type ;
using char_int_type = typename std : : char_traits < char_type > : : int_type ;
2021-01-10 02:21:18 +08:00
using diagnostics_t = detail : : diagnostics_t < BasicJsonType > ;
2017-08-14 23:50:24 +08:00
public :
/*!
@ brief create a binary reader
@ param [ in ] adapter input adapter to read from
*/
2020-02-20 03:59:31 +08:00
explicit binary_reader ( InputAdapterType & & adapter ) : ia ( std : : move ( adapter ) )
2017-08-14 23:50:24 +08:00
{
2018-07-24 20:47:41 +08:00
( void ) detail : : is_sax_static_asserts < SAX , BasicJsonType > { } ;
2017-08-14 23:50:24 +08:00
}
2019-03-17 19:01:49 +08:00
// make class move-only
binary_reader ( const binary_reader & ) = delete ;
2019-03-17 22:20:22 +08:00
binary_reader ( binary_reader & & ) = default ;
2019-03-17 19:01:49 +08:00
binary_reader & operator = ( const binary_reader & ) = delete ;
2019-03-17 22:20:22 +08:00
binary_reader & operator = ( binary_reader & & ) = default ;
2019-03-17 19:01:49 +08:00
~ binary_reader ( ) = default ;
2017-08-14 23:50:24 +08:00
/*!
2018-03-20 05:48:13 +08:00
@ param [ in ] format the binary format to parse
@ param [ in ] sax_ a SAX event processor
2017-08-14 23:50:24 +08:00
@ param [ in ] strict whether to expect the input to be consumed completed
2020-07-12 22:51:43 +08:00
@ param [ in ] tag_handler how to treat CBOR tags
2017-08-14 23:50:24 +08:00
2018-03-20 05:48:13 +08:00
@ return
2017-08-14 23:50:24 +08:00
*/
2019-07-02 04:37:30 +08:00
JSON_HEDLEY_NON_NULL ( 3 )
2018-03-21 05:39:08 +08:00
bool sax_parse ( const input_format_t format ,
json_sax_t * sax_ ,
2020-07-12 22:51:43 +08:00
const bool strict = true ,
const cbor_tag_handler_t tag_handler = cbor_tag_handler_t : : error )
2017-08-14 23:50:24 +08:00
{
2018-03-20 05:48:13 +08:00
sax = sax_ ;
2018-08-18 08:12:19 +08:00
bool result = false ;
2017-08-14 23:50:24 +08:00
2018-03-20 05:48:13 +08:00
switch ( format )
2017-08-14 23:50:24 +08:00
{
2018-10-28 00:31:03 +08:00
case input_format_t : : bson :
result = parse_bson_internal ( ) ;
break ;
2018-03-21 05:39:08 +08:00
case input_format_t : : cbor :
2020-07-12 22:51:43 +08:00
result = parse_cbor_internal ( true , tag_handler ) ;
2018-03-20 05:48:13 +08:00
break ;
2017-08-14 23:50:24 +08:00
2018-03-21 05:39:08 +08:00
case input_format_t : : msgpack :
2018-03-20 05:48:13 +08:00
result = parse_msgpack_internal ( ) ;
break ;
2018-01-14 17:27:30 +08:00
2018-03-21 05:39:08 +08:00
case input_format_t : : ubjson :
2018-03-20 05:48:13 +08:00
result = parse_ubjson_internal ( ) ;
break ;
2018-03-21 05:39:08 +08:00
2019-03-18 20:53:48 +08:00
default : // LCOV_EXCL_LINE
2020-07-06 18:22:31 +08:00
JSON_ASSERT ( false ) ; // LCOV_EXCL_LINE
2018-03-20 05:48:13 +08:00
}
2018-01-14 17:27:30 +08:00
2018-03-20 05:48:13 +08:00
// strict mode: next byte must be EOF
2020-06-03 20:20:36 +08:00
if ( result & & strict )
2018-01-14 17:27:30 +08:00
{
2018-03-21 05:39:08 +08:00
if ( format = = input_format_t : : ubjson )
2018-03-20 05:48:13 +08:00
{
get_ignore_noop ( ) ;
}
else
{
get ( ) ;
}
2020-06-06 20:30:17 +08:00
if ( JSON_HEDLEY_UNLIKELY ( current ! = std : : char_traits < char_type > : : eof ( ) ) )
2018-03-20 05:48:13 +08:00
{
2018-10-17 18:15:58 +08:00
return sax - > parse_error ( chars_read , get_token_string ( ) ,
2021-01-10 02:21:18 +08:00
parse_error : : create ( 110 , chars_read , exception_message ( format , " expected end of input; last byte: 0x " + get_token_string ( ) , " value " ) , diagnostics_t ( ) ) ) ;
2018-03-20 05:48:13 +08:00
}
2018-01-14 17:27:30 +08:00
}
2018-03-20 05:48:13 +08:00
2018-03-12 01:47:38 +08:00
return result ;
2018-01-14 17:27:30 +08:00
}
2017-08-14 23:50:24 +08:00
private :
2018-10-25 05:39:30 +08:00
//////////
// BSON //
//////////
/*!
@ brief Reads in a BSON - object and passes it to the SAX - parser .
@ return whether a valid BSON - value was passed to the SAX parser
*/
bool parse_bson_internal ( )
{
2020-06-23 04:32:21 +08:00
std : : int32_t document_size { } ;
2018-10-28 00:31:03 +08:00
get_number < std : : int32_t , true > ( input_format_t : : bson , document_size ) ;
2018-10-25 05:39:30 +08:00
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! sax - > start_object ( std : : size_t ( - 1 ) ) ) )
2018-10-25 05:39:30 +08:00
{
return false ;
}
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! parse_bson_element_list ( /*is_array*/ false ) ) )
2018-10-25 05:39:30 +08:00
{
return false ;
}
return sax - > end_object ( ) ;
}
2018-09-15 04:58:22 +08:00
2018-09-15 09:08:50 +08:00
/*!
2018-10-07 13:52:12 +08:00
@ brief Parses a C - style string from the BSON input .
2018-10-25 05:39:30 +08:00
@ param [ in , out ] result A reference to the string variable where the read
string is to be stored .
@ return ` true ` if the \ x00 - byte indicating the end of the string was
encountered before the EOF ; false ` indicates an unexpected EOF .
2018-09-15 09:08:50 +08:00
*/
2018-09-15 17:33:24 +08:00
bool get_bson_cstr ( string_t & result )
2018-09-15 09:08:50 +08:00
{
2018-09-29 17:50:01 +08:00
auto out = std : : back_inserter ( result ) ;
while ( true )
2018-09-15 09:08:50 +08:00
{
get ( ) ;
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format_t : : bson , " cstring " ) ) )
2018-09-15 09:08:50 +08:00
{
2018-09-29 17:50:01 +08:00
return false ;
}
if ( current = = 0x00 )
{
return true ;
2018-09-15 09:08:50 +08:00
}
2020-06-06 20:30:17 +08:00
* out + + = static_cast < typename string_t : : value_type > ( current ) ;
2018-09-29 17:50:01 +08:00
}
2018-09-15 09:08:50 +08:00
}
2018-10-07 13:52:12 +08:00
/*!
2018-10-25 05:39:30 +08:00
@ brief Parses a zero - terminated string of length @ a len from the BSON
input .
@ param [ in ] len The length ( including the zero - byte at the end ) of the
string to be read .
@ param [ in , out ] result A reference to the string variable where the read
string is to be stored .
2018-10-07 13:52:12 +08:00
@ tparam NumberType The type of the length @ a len
2018-10-28 16:16:40 +08:00
@ pre len > = 1
2018-10-07 13:52:12 +08:00
@ return ` true ` if the string was successfully parsed
*/
2018-10-25 05:39:30 +08:00
template < typename NumberType >
2018-10-07 13:52:12 +08:00
bool get_bson_string ( const NumberType len , string_t & result )
{
2019-07-02 04:37:30 +08:00
if ( JSON_HEDLEY_UNLIKELY ( len < 1 ) )
2018-10-28 16:16:40 +08:00
{
auto last_token = get_token_string ( ) ;
2021-01-10 02:21:18 +08:00
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 112 , chars_read , exception_message ( input_format_t : : bson , " string length must be at least 1, is " + std : : to_string ( len ) , " string " ) , diagnostics_t ( ) ) ) ;
2018-10-28 16:16:40 +08:00
}
2020-06-27 19:14:48 +08:00
return get_string ( input_format_t : : bson , len - static_cast < NumberType > ( 1 ) , result ) & & get ( ) ! = std : : char_traits < char_type > : : eof ( ) ;
2018-10-07 13:52:12 +08:00
}
2019-07-05 12:13:25 +08:00
/*!
@ brief Parses a byte array input of length @ a len from the BSON input .
@ param [ in ] len The length of the byte array to be read .
@ param [ in , out ] result A reference to the binary variable where the read
array is to be stored .
@ tparam NumberType The type of the length @ a len
@ pre len > = 0
@ return ` true ` if the byte array was successfully parsed
*/
template < typename NumberType >
2020-05-18 04:50:27 +08:00
bool get_bson_binary ( const NumberType len , binary_t & result )
2019-07-05 12:13:25 +08:00
{
if ( JSON_HEDLEY_UNLIKELY ( len < 0 ) )
{
auto last_token = get_token_string ( ) ;
2021-01-10 02:21:18 +08:00
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 112 , chars_read , exception_message ( input_format_t : : bson , " byte array length cannot be negative, is " + std : : to_string ( len ) , " binary " ) , diagnostics_t ( ) ) ) ;
2019-07-05 12:13:25 +08:00
}
2020-05-17 19:51:59 +08:00
// All BSON binary values have a subtype
2020-06-23 04:32:21 +08:00
std : : uint8_t subtype { } ;
2020-05-17 19:51:59 +08:00
get_number < std : : uint8_t > ( input_format_t : : bson , subtype ) ;
result . set_subtype ( subtype ) ;
2019-07-05 12:13:25 +08:00
return get_binary ( input_format_t : : bson , len , result ) ;
}
2018-10-07 13:52:12 +08:00
/*!
@ brief Read a BSON document element of the given @ a element_type .
2018-10-25 05:39:30 +08:00
@ param [ in ] element_type The BSON element type , c . f . http : //bsonspec.org/spec.html
@ param [ in ] element_type_parse_position The position in the input stream ,
where the ` element_type ` was read .
@ warning Not all BSON element types are supported yet . An unsupported
@ a element_type will give rise to a parse_error .114 :
Unsupported BSON record type 0 x . . .
2018-10-07 13:52:12 +08:00
@ return whether a valid BSON - object / array was passed to the SAX parser
*/
2020-06-06 20:30:17 +08:00
bool parse_bson_element_internal ( const char_int_type element_type ,
2018-10-25 05:39:30 +08:00
const std : : size_t element_type_parse_position )
2018-10-07 13:52:12 +08:00
{
switch ( element_type )
{
case 0x01 : // double
{
2020-06-23 04:32:21 +08:00
double number { } ;
2020-06-03 20:20:36 +08:00
return get_number < double , true > ( input_format_t : : bson , number ) & & sax - > number_float ( static_cast < number_float_t > ( number ) , " " ) ;
2018-10-07 13:52:12 +08:00
}
2018-10-25 05:39:30 +08:00
2018-10-07 13:52:12 +08:00
case 0x02 : // string
{
2020-06-23 04:32:21 +08:00
std : : int32_t len { } ;
2018-10-07 13:52:12 +08:00
string_t value ;
2020-06-03 20:20:36 +08:00
return get_number < std : : int32_t , true > ( input_format_t : : bson , len ) & & get_bson_string ( len , value ) & & sax - > string ( value ) ;
2018-10-07 13:52:12 +08:00
}
2018-10-25 05:39:30 +08:00
case 0x03 : // object
2018-10-07 13:52:12 +08:00
{
2018-10-25 05:39:30 +08:00
return parse_bson_internal ( ) ;
2018-10-07 13:52:12 +08:00
}
2018-10-25 05:39:30 +08:00
case 0x04 : // array
2018-10-07 13:52:12 +08:00
{
2018-10-25 05:39:30 +08:00
return parse_bson_array ( ) ;
2018-10-07 13:52:12 +08:00
}
2018-10-25 05:39:30 +08:00
2019-07-05 12:13:25 +08:00
case 0x05 : // binary
{
2020-06-23 04:32:21 +08:00
std : : int32_t len { } ;
2020-05-18 04:50:27 +08:00
binary_t value ;
2020-06-03 20:20:36 +08:00
return get_number < std : : int32_t , true > ( input_format_t : : bson , len ) & & get_bson_binary ( len , value ) & & sax - > binary ( value ) ;
2019-07-05 12:13:25 +08:00
}
2018-10-25 05:39:30 +08:00
case 0x08 : // boolean
2018-10-07 13:52:12 +08:00
{
2018-11-22 04:17:38 +08:00
return sax - > boolean ( get ( ) ! = 0 ) ;
2018-10-07 13:52:12 +08:00
}
2018-10-25 05:39:30 +08:00
2018-10-07 13:52:12 +08:00
case 0x0A : // null
{
return sax - > null ( ) ;
}
2018-10-25 05:39:30 +08:00
case 0x10 : // int32
2018-10-07 13:52:12 +08:00
{
2020-06-23 04:32:21 +08:00
std : : int32_t value { } ;
2020-06-03 20:20:36 +08:00
return get_number < std : : int32_t , true > ( input_format_t : : bson , value ) & & sax - > number_integer ( value ) ;
2018-10-07 13:52:12 +08:00
}
2018-10-25 05:39:30 +08:00
case 0x12 : // int64
2018-10-07 13:52:12 +08:00
{
2020-06-23 04:32:21 +08:00
std : : int64_t value { } ;
2020-06-03 20:20:36 +08:00
return get_number < std : : int64_t , true > ( input_format_t : : bson , value ) & & sax - > number_integer ( value ) ;
2018-10-07 13:52:12 +08:00
}
2018-10-25 05:39:30 +08:00
2018-10-07 13:52:12 +08:00
default : // anything else not supported (yet)
{
2019-03-20 18:06:39 +08:00
std : : array < char , 3 > cr { { } } ;
2019-03-17 07:27:44 +08:00
( std : : snprintf ) ( cr . data ( ) , cr . size ( ) , " %.2hhX " , static_cast < unsigned char > ( element_type ) ) ;
2021-01-10 02:21:18 +08:00
return sax - > parse_error ( element_type_parse_position , std : : string ( cr . data ( ) ) , parse_error : : create ( 114 , element_type_parse_position , " Unsupported BSON record type 0x " + std : : string ( cr . data ( ) ) , diagnostics_t ( ) ) ) ;
2018-10-07 13:52:12 +08:00
}
}
}
/*!
2018-10-25 05:39:30 +08:00
@ brief Read a BSON element list ( as specified in the BSON - spec )
The same binary layout is used for objects and arrays , hence it must be
indicated with the argument @ a is_array which one is expected
( true - - > array , false - - > object ) .
@ param [ in ] is_array Determines if the element list being read is to be
treated as an object ( @ a is_array = = false ) , or as an
array ( @ a is_array = = true ) .
2018-10-07 13:52:12 +08:00
@ return whether a valid BSON - object / array was passed to the SAX parser
*/
2018-10-25 05:39:30 +08:00
bool parse_bson_element_list ( const bool is_array )
2018-09-15 04:58:22 +08:00
{
2018-10-25 05:39:30 +08:00
string_t key ;
2019-07-05 12:13:25 +08:00
2020-06-06 20:30:17 +08:00
while ( auto element_type = get ( ) )
2018-09-15 09:08:50 +08:00
{
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format_t : : bson , " element list " ) ) )
2018-10-17 01:13:07 +08:00
{
return false ;
}
2018-10-07 13:52:12 +08:00
const std : : size_t element_type_parse_position = chars_read ;
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! get_bson_cstr ( key ) ) )
2018-09-26 02:34:25 +08:00
{
return false ;
}
2020-06-03 20:20:36 +08:00
if ( ! is_array & & ! sax - > key ( key ) )
2018-09-15 20:08:38 +08:00
{
2019-03-18 20:53:48 +08:00
return false ;
2018-09-15 20:08:38 +08:00
}
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! parse_bson_element_internal ( element_type , element_type_parse_position ) ) )
2018-09-15 09:08:50 +08:00
{
2018-10-07 13:52:12 +08:00
return false ;
2018-09-15 09:08:50 +08:00
}
2018-10-25 05:39:30 +08:00
// get_bson_cstr only appends
key . clear ( ) ;
2018-09-15 09:08:50 +08:00
}
2018-10-25 05:39:30 +08:00
2018-09-26 02:34:25 +08:00
return true ;
2018-09-15 19:54:08 +08:00
}
2018-10-07 13:52:12 +08:00
/*!
@ brief Reads an array from the BSON input and passes it to the SAX - parser .
@ return whether a valid BSON - array was passed to the SAX parser
*/
2018-09-15 19:54:08 +08:00
bool parse_bson_array ( )
{
2020-06-23 04:32:21 +08:00
std : : int32_t document_size { } ;
2018-10-28 00:31:03 +08:00
get_number < std : : int32_t , true > ( input_format_t : : bson , document_size ) ;
2018-09-15 19:54:08 +08:00
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! sax - > start_array ( std : : size_t ( - 1 ) ) ) )
2018-09-15 19:54:08 +08:00
{
return false ;
}
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! parse_bson_element_list ( /*is_array*/ true ) ) )
2018-09-26 02:34:25 +08:00
{
return false ;
}
2018-09-15 19:54:08 +08:00
2018-09-26 02:34:25 +08:00
return sax - > end_array ( ) ;
2018-09-15 19:54:08 +08:00
}
2018-10-25 05:39:30 +08:00
//////////
// CBOR //
//////////
2018-09-15 04:58:22 +08:00
2017-08-14 23:50:24 +08:00
/*!
@ param [ in ] get_char whether a new character should be retrieved from the
2020-07-24 20:18:39 +08:00
input ( true ) or whether the last read character should
be considered instead ( false )
2020-07-12 22:51:43 +08:00
@ param [ in ] tag_handler how CBOR tags should be treated
2018-03-19 06:00:45 +08:00
@ return whether a valid CBOR value was passed to the SAX parser
2017-08-14 23:50:24 +08:00
*/
2020-07-24 20:18:39 +08:00
bool parse_cbor_internal ( const bool get_char ,
const cbor_tag_handler_t tag_handler )
2017-08-14 23:50:24 +08:00
{
switch ( get_char ? get ( ) : current )
{
// EOF
2020-06-06 20:30:17 +08:00
case std : : char_traits < char_type > : : eof ( ) :
2018-10-17 18:15:58 +08:00
return unexpect_eof ( input_format_t : : cbor , " value " ) ;
2017-08-14 23:50:24 +08:00
// Integer 0x00..0x17 (0..23)
case 0x00 :
case 0x01 :
case 0x02 :
case 0x03 :
case 0x04 :
case 0x05 :
case 0x06 :
case 0x07 :
case 0x08 :
case 0x09 :
case 0x0A :
case 0x0B :
case 0x0C :
case 0x0D :
case 0x0E :
case 0x0F :
case 0x10 :
case 0x11 :
case 0x12 :
case 0x13 :
case 0x14 :
case 0x15 :
case 0x16 :
case 0x17 :
2018-03-12 01:47:38 +08:00
return sax - > number_unsigned ( static_cast < number_unsigned_t > ( current ) ) ;
2017-08-14 23:50:24 +08:00
case 0x18 : // Unsigned integer (one-byte uint8_t follows)
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint8_t number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : cbor , number ) & & sax - > number_unsigned ( number ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
case 0x19 : // Unsigned integer (two-byte uint16_t follows)
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint16_t number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : cbor , number ) & & sax - > number_unsigned ( number ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
case 0x1A : // Unsigned integer (four-byte uint32_t follows)
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint32_t number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : cbor , number ) & & sax - > number_unsigned ( number ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
case 0x1B : // Unsigned integer (eight-byte uint64_t follows)
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint64_t number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : cbor , number ) & & sax - > number_unsigned ( number ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
// Negative integer -1-0x00..-1-0x17 (-1..-24)
case 0x20 :
case 0x21 :
case 0x22 :
case 0x23 :
case 0x24 :
case 0x25 :
case 0x26 :
case 0x27 :
case 0x28 :
case 0x29 :
case 0x2A :
case 0x2B :
case 0x2C :
case 0x2D :
case 0x2E :
case 0x2F :
case 0x30 :
case 0x31 :
case 0x32 :
case 0x33 :
case 0x34 :
case 0x35 :
case 0x36 :
case 0x37 :
2019-03-17 07:27:44 +08:00
return sax - > number_integer ( static_cast < std : : int8_t > ( 0x20 - 1 - current ) ) ;
2017-08-14 23:50:24 +08:00
case 0x38 : // Negative integer (one-byte uint8_t follows)
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint8_t number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : cbor , number ) & & sax - > number_integer ( static_cast < number_integer_t > ( - 1 ) - number ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
case 0x39 : // Negative integer -1-n (two-byte uint16_t follows)
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint16_t number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : cbor , number ) & & sax - > number_integer ( static_cast < number_integer_t > ( - 1 ) - number ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
case 0x3A : // Negative integer -1-n (four-byte uint32_t follows)
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint32_t number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : cbor , number ) & & sax - > number_integer ( static_cast < number_integer_t > ( - 1 ) - number ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
case 0x3B : // Negative integer -1-n (eight-byte uint64_t follows)
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint64_t number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : cbor , number ) & & sax - > number_integer ( static_cast < number_integer_t > ( - 1 )
2018-03-19 06:00:45 +08:00
- static_cast < number_integer_t > ( number ) ) ;
}
2017-08-14 23:50:24 +08:00
2019-07-05 12:13:25 +08:00
// Binary data (0x00..0x17 bytes follow)
case 0x40 :
case 0x41 :
case 0x42 :
case 0x43 :
case 0x44 :
case 0x45 :
case 0x46 :
case 0x47 :
case 0x48 :
case 0x49 :
case 0x4A :
case 0x4B :
case 0x4C :
case 0x4D :
case 0x4E :
case 0x4F :
case 0x50 :
case 0x51 :
case 0x52 :
case 0x53 :
case 0x54 :
case 0x55 :
case 0x56 :
case 0x57 :
case 0x58 : // Binary data (one-byte uint8_t for n follows)
case 0x59 : // Binary data (two-byte uint16_t for n follow)
case 0x5A : // Binary data (four-byte uint32_t for n follow)
case 0x5B : // Binary data (eight-byte uint64_t for n follow)
case 0x5F : // Binary data (indefinite length)
{
2020-05-18 04:50:27 +08:00
binary_t b ;
2020-06-03 20:20:36 +08:00
return get_cbor_binary ( b ) & & sax - > binary ( b ) ;
2019-07-05 12:13:25 +08:00
}
2017-08-14 23:50:24 +08:00
// UTF-8 string (0x00..0x17 bytes follow)
case 0x60 :
case 0x61 :
case 0x62 :
case 0x63 :
case 0x64 :
case 0x65 :
case 0x66 :
case 0x67 :
case 0x68 :
case 0x69 :
case 0x6A :
case 0x6B :
case 0x6C :
case 0x6D :
case 0x6E :
case 0x6F :
case 0x70 :
case 0x71 :
case 0x72 :
case 0x73 :
case 0x74 :
case 0x75 :
case 0x76 :
case 0x77 :
case 0x78 : // UTF-8 string (one-byte uint8_t for n follows)
case 0x79 : // UTF-8 string (two-byte uint16_t for n follow)
case 0x7A : // UTF-8 string (four-byte uint32_t for n follow)
case 0x7B : // UTF-8 string (eight-byte uint64_t for n follow)
case 0x7F : // UTF-8 string (indefinite length)
2018-03-19 06:00:45 +08:00
{
string_t s ;
2020-06-03 20:20:36 +08:00
return get_cbor_string ( s ) & & sax - > string ( s ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
// array (0x00..0x17 data items follow)
case 0x80 :
case 0x81 :
case 0x82 :
case 0x83 :
case 0x84 :
case 0x85 :
case 0x86 :
case 0x87 :
case 0x88 :
case 0x89 :
case 0x8A :
case 0x8B :
case 0x8C :
case 0x8D :
case 0x8E :
case 0x8F :
case 0x90 :
case 0x91 :
case 0x92 :
case 0x93 :
case 0x94 :
case 0x95 :
case 0x96 :
case 0x97 :
2020-07-24 20:18:39 +08:00
return get_cbor_array ( static_cast < std : : size_t > ( static_cast < unsigned int > ( current ) & 0x1Fu ) , tag_handler ) ;
2017-08-14 23:50:24 +08:00
case 0x98 : // array (one-byte uint8_t for n follows)
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint8_t len { } ;
2020-07-24 20:18:39 +08:00
return get_number ( input_format_t : : cbor , len ) & & get_cbor_array ( static_cast < std : : size_t > ( len ) , tag_handler ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
case 0x99 : // array (two-byte uint16_t for n follow)
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint16_t len { } ;
2020-07-24 20:18:39 +08:00
return get_number ( input_format_t : : cbor , len ) & & get_cbor_array ( static_cast < std : : size_t > ( len ) , tag_handler ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
case 0x9A : // array (four-byte uint32_t for n follow)
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint32_t len { } ;
2020-07-24 20:18:39 +08:00
return get_number ( input_format_t : : cbor , len ) & & get_cbor_array ( static_cast < std : : size_t > ( len ) , tag_handler ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
case 0x9B : // array (eight-byte uint64_t for n follow)
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint64_t len { } ;
2020-07-24 20:18:39 +08:00
return get_number ( input_format_t : : cbor , len ) & & get_cbor_array ( static_cast < std : : size_t > ( len ) , tag_handler ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
case 0x9F : // array (indefinite length)
2020-07-24 20:18:39 +08:00
return get_cbor_array ( std : : size_t ( - 1 ) , tag_handler ) ;
2017-08-14 23:50:24 +08:00
// map (0x00..0x17 pairs of data items follow)
case 0xA0 :
case 0xA1 :
case 0xA2 :
case 0xA3 :
case 0xA4 :
case 0xA5 :
case 0xA6 :
case 0xA7 :
case 0xA8 :
case 0xA9 :
case 0xAA :
case 0xAB :
case 0xAC :
case 0xAD :
case 0xAE :
case 0xAF :
case 0xB0 :
case 0xB1 :
case 0xB2 :
case 0xB3 :
case 0xB4 :
case 0xB5 :
case 0xB6 :
case 0xB7 :
2020-07-24 20:18:39 +08:00
return get_cbor_object ( static_cast < std : : size_t > ( static_cast < unsigned int > ( current ) & 0x1Fu ) , tag_handler ) ;
2017-08-14 23:50:24 +08:00
case 0xB8 : // map (one-byte uint8_t for n follows)
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint8_t len { } ;
2020-07-24 20:18:39 +08:00
return get_number ( input_format_t : : cbor , len ) & & get_cbor_object ( static_cast < std : : size_t > ( len ) , tag_handler ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
case 0xB9 : // map (two-byte uint16_t for n follow)
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint16_t len { } ;
2020-07-24 20:18:39 +08:00
return get_number ( input_format_t : : cbor , len ) & & get_cbor_object ( static_cast < std : : size_t > ( len ) , tag_handler ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
case 0xBA : // map (four-byte uint32_t for n follow)
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint32_t len { } ;
2020-07-24 20:18:39 +08:00
return get_number ( input_format_t : : cbor , len ) & & get_cbor_object ( static_cast < std : : size_t > ( len ) , tag_handler ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
case 0xBB : // map (eight-byte uint64_t for n follow)
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint64_t len { } ;
2020-07-24 20:18:39 +08:00
return get_number ( input_format_t : : cbor , len ) & & get_cbor_object ( static_cast < std : : size_t > ( len ) , tag_handler ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
case 0xBF : // map (indefinite length)
2020-07-24 20:18:39 +08:00
return get_cbor_object ( std : : size_t ( - 1 ) , tag_handler ) ;
2017-08-14 23:50:24 +08:00
2020-07-12 22:51:43 +08:00
case 0xC6 : // tagged item
case 0xC7 :
case 0xC8 :
case 0xC9 :
case 0xCA :
case 0xCB :
case 0xCC :
case 0xCD :
case 0xCE :
case 0xCF :
case 0xD0 :
case 0xD1 :
case 0xD2 :
case 0xD3 :
case 0xD4 :
case 0xD8 : // tagged item (1 bytes follow)
case 0xD9 : // tagged item (2 bytes follow)
case 0xDA : // tagged item (4 bytes follow)
case 0xDB : // tagged item (8 bytes follow)
{
switch ( tag_handler )
{
case cbor_tag_handler_t : : error :
{
auto last_token = get_token_string ( ) ;
2021-01-10 02:21:18 +08:00
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 112 , chars_read , exception_message ( input_format_t : : cbor , " invalid byte: 0x " + last_token , " value " ) , diagnostics_t ( ) ) ) ;
2020-07-12 22:51:43 +08:00
}
case cbor_tag_handler_t : : ignore :
{
switch ( current )
{
case 0xD8 :
{
std : : uint8_t len { } ;
get_number ( input_format_t : : cbor , len ) ;
break ;
}
case 0xD9 :
{
std : : uint16_t len { } ;
get_number ( input_format_t : : cbor , len ) ;
break ;
}
case 0xDA :
{
std : : uint32_t len { } ;
get_number ( input_format_t : : cbor , len ) ;
break ;
}
case 0xDB :
{
std : : uint64_t len { } ;
get_number ( input_format_t : : cbor , len ) ;
break ;
}
default :
break ;
}
return parse_cbor_internal ( true , tag_handler ) ;
}
2020-08-11 02:48:39 +08:00
default : // LCOV_EXCL_LINE
2020-07-12 22:51:43 +08:00
JSON_ASSERT ( false ) ; // LCOV_EXCL_LINE
2020-08-11 02:48:39 +08:00
return false ; // LCOV_EXCL_LINE
2020-07-12 22:51:43 +08:00
}
}
2017-08-14 23:50:24 +08:00
case 0xF4 : // false
2018-03-12 01:47:38 +08:00
return sax - > boolean ( false ) ;
2017-08-14 23:50:24 +08:00
case 0xF5 : // true
2018-03-12 01:47:38 +08:00
return sax - > boolean ( true ) ;
2017-08-14 23:50:24 +08:00
case 0xF6 : // null
2018-03-12 01:47:38 +08:00
return sax - > null ( ) ;
2017-08-14 23:50:24 +08:00
case 0xF9 : // Half-Precision Float (two-byte IEEE 754)
{
2020-06-06 20:30:17 +08:00
const auto byte1_raw = get ( ) ;
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format_t : : cbor , " number " ) ) )
2018-03-19 06:00:45 +08:00
{
return false ;
}
2020-06-06 20:30:17 +08:00
const auto byte2_raw = get ( ) ;
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format_t : : cbor , " number " ) ) )
2018-03-19 06:00:45 +08:00
{
return false ;
}
2017-08-14 23:50:24 +08:00
2018-10-08 00:39:18 +08:00
const auto byte1 = static_cast < unsigned char > ( byte1_raw ) ;
const auto byte2 = static_cast < unsigned char > ( byte2_raw ) ;
2018-10-06 22:26:47 +08:00
2017-08-14 23:50:24 +08:00
// code from RFC 7049, Appendix D, Figure 3:
// As half-precision floating-point numbers were only added
// to IEEE 754 in 2008, today's programming platforms often
// still only have limited support for them. It is very
// easy to include at least decoding support for them even
// without such support. An example of a small decoder for
// half-precision floating-point numbers in the C language
// is shown in Fig. 3.
2019-03-17 07:27:44 +08:00
const auto half = static_cast < unsigned int > ( ( byte1 < < 8u ) + byte2 ) ;
2018-06-23 23:05:04 +08:00
const double val = [ & half ]
2017-08-14 23:50:24 +08:00
{
2019-03-17 07:27:44 +08:00
const int exp = ( half > > 10u ) & 0x1Fu ;
2019-03-15 22:55:52 +08:00
const unsigned int mant = half & 0x3FFu ;
2020-07-11 20:04:40 +08:00
JSON_ASSERT ( 0 < = exp & & exp < = 32 ) ;
2020-07-06 18:22:31 +08:00
JSON_ASSERT ( mant < = 1024 ) ;
2018-06-23 23:05:04 +08:00
switch ( exp )
{
case 0 :
return std : : ldexp ( mant , - 24 ) ;
case 31 :
return ( mant = = 0 )
? std : : numeric_limits < double > : : infinity ( )
: std : : numeric_limits < double > : : quiet_NaN ( ) ;
default :
return std : : ldexp ( mant + 1024 , exp - 25 ) ;
}
} ( ) ;
2019-03-15 22:55:52 +08:00
return sax - > number_float ( ( half & 0x8000u ) ! = 0
2018-06-23 16:28:04 +08:00
? static_cast < number_float_t > ( - val )
: static_cast < number_float_t > ( val ) , " " ) ;
2017-08-14 23:50:24 +08:00
}
case 0xFA : // Single-Precision Float (four-byte IEEE 754)
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
float number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : cbor , number ) & & sax - > number_float ( static_cast < number_float_t > ( number ) , " " ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
case 0xFB : // Double-Precision Float (eight-byte IEEE 754)
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
double number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : cbor , number ) & & sax - > number_float ( static_cast < number_float_t > ( number ) , " " ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
default : // anything else (0xFF is handled inside the other types)
{
2018-03-19 06:00:45 +08:00
auto last_token = get_token_string ( ) ;
2021-01-10 02:21:18 +08:00
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 112 , chars_read , exception_message ( input_format_t : : cbor , " invalid byte: 0x " + last_token , " value " ) , diagnostics_t ( ) ) ) ;
2017-08-14 23:50:24 +08:00
}
}
}
2018-03-19 06:00:45 +08:00
/*!
2018-10-25 05:39:30 +08:00
@ brief reads a CBOR string
This function first reads starting bytes to determine the expected
string length and then copies this number of bytes into a string .
Additionally , CBOR ' s strings with indefinite lengths are supported .
@ param [ out ] result created string
@ return whether string creation completed
2018-03-19 06:00:45 +08:00
*/
2018-10-25 05:39:30 +08:00
bool get_cbor_string ( string_t & result )
2017-08-14 23:50:24 +08:00
{
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format_t : : cbor , " string " ) ) )
2017-08-14 23:50:24 +08:00
{
2018-10-25 05:39:30 +08:00
return false ;
}
2017-08-14 23:50:24 +08:00
2018-10-25 05:39:30 +08:00
switch ( current )
{
// UTF-8 string (0x00..0x17 bytes follow)
2017-08-14 23:50:24 +08:00
case 0x60 :
case 0x61 :
case 0x62 :
case 0x63 :
case 0x64 :
case 0x65 :
case 0x66 :
case 0x67 :
case 0x68 :
case 0x69 :
case 0x6A :
case 0x6B :
case 0x6C :
case 0x6D :
case 0x6E :
case 0x6F :
case 0x70 :
case 0x71 :
case 0x72 :
case 0x73 :
case 0x74 :
case 0x75 :
case 0x76 :
case 0x77 :
2018-10-25 05:39:30 +08:00
{
2019-03-15 22:55:52 +08:00
return get_string ( input_format_t : : cbor , static_cast < unsigned int > ( current ) & 0x1Fu , result ) ;
2018-10-25 05:39:30 +08:00
}
2017-08-14 23:50:24 +08:00
2018-10-25 05:39:30 +08:00
case 0x78 : // UTF-8 string (one-byte uint8_t for n follows)
{
2020-06-23 04:32:21 +08:00
std : : uint8_t len { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : cbor , len ) & & get_string ( input_format_t : : cbor , len , result ) ;
2018-10-25 05:39:30 +08:00
}
2017-08-14 23:50:24 +08:00
2018-10-25 05:39:30 +08:00
case 0x79 : // UTF-8 string (two-byte uint16_t for n follow)
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint16_t len { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : cbor , len ) & & get_string ( input_format_t : : cbor , len , result ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
2018-10-25 05:39:30 +08:00
case 0x7A : // UTF-8 string (four-byte uint32_t for n follow)
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint32_t len { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : cbor , len ) & & get_string ( input_format_t : : cbor , len , result ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
2018-10-25 05:39:30 +08:00
case 0x7B : // UTF-8 string (eight-byte uint64_t for n follow)
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint64_t len { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : cbor , len ) & & get_string ( input_format_t : : cbor , len , result ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
2018-10-25 05:39:30 +08:00
case 0x7F : // UTF-8 string (indefinite length)
2018-03-19 06:00:45 +08:00
{
2018-10-25 05:39:30 +08:00
while ( get ( ) ! = 0xFF )
{
string_t chunk ;
2020-06-03 20:20:36 +08:00
if ( ! get_cbor_string ( chunk ) )
2018-10-25 05:39:30 +08:00
{
return false ;
}
result . append ( chunk ) ;
}
return true ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
2018-10-25 05:39:30 +08:00
default :
2017-08-14 23:50:24 +08:00
{
2018-03-19 06:00:45 +08:00
auto last_token = get_token_string ( ) ;
2021-01-10 02:21:18 +08:00
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 113 , chars_read , exception_message ( input_format_t : : cbor , " expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x " + last_token , " string " ) , diagnostics_t ( ) ) ) ;
2017-08-14 23:50:24 +08:00
}
}
}
2019-07-05 12:13:25 +08:00
/*!
@ brief reads a CBOR byte array
This function first reads starting bytes to determine the expected
byte array length and then copies this number of bytes into the byte array .
Additionally , CBOR ' s byte arrays with indefinite lengths are supported .
@ param [ out ] result created byte array
@ return whether byte array creation completed
*/
2020-05-18 04:50:27 +08:00
bool get_cbor_binary ( binary_t & result )
2019-07-05 12:13:25 +08:00
{
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format_t : : cbor , " binary " ) ) )
2019-07-05 12:13:25 +08:00
{
return false ;
}
switch ( current )
{
// Binary data (0x00..0x17 bytes follow)
case 0x40 :
case 0x41 :
case 0x42 :
case 0x43 :
case 0x44 :
case 0x45 :
case 0x46 :
case 0x47 :
case 0x48 :
case 0x49 :
case 0x4A :
case 0x4B :
case 0x4C :
case 0x4D :
case 0x4E :
case 0x4F :
case 0x50 :
case 0x51 :
case 0x52 :
case 0x53 :
case 0x54 :
case 0x55 :
case 0x56 :
case 0x57 :
{
return get_binary ( input_format_t : : cbor , static_cast < unsigned int > ( current ) & 0x1Fu , result ) ;
}
case 0x58 : // Binary data (one-byte uint8_t for n follows)
{
2020-06-23 04:32:21 +08:00
std : : uint8_t len { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : cbor , len ) & &
2020-05-17 19:51:59 +08:00
get_binary ( input_format_t : : cbor , len , result ) ;
2019-07-05 12:13:25 +08:00
}
case 0x59 : // Binary data (two-byte uint16_t for n follow)
{
2020-06-23 04:32:21 +08:00
std : : uint16_t len { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : cbor , len ) & &
2020-05-17 19:51:59 +08:00
get_binary ( input_format_t : : cbor , len , result ) ;
2019-07-05 12:13:25 +08:00
}
case 0x5A : // Binary data (four-byte uint32_t for n follow)
{
2020-06-23 04:32:21 +08:00
std : : uint32_t len { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : cbor , len ) & &
2020-05-17 19:51:59 +08:00
get_binary ( input_format_t : : cbor , len , result ) ;
2019-07-05 12:13:25 +08:00
}
case 0x5B : // Binary data (eight-byte uint64_t for n follow)
{
2020-06-23 04:32:21 +08:00
std : : uint64_t len { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : cbor , len ) & &
2020-05-17 19:51:59 +08:00
get_binary ( input_format_t : : cbor , len , result ) ;
2019-07-05 12:13:25 +08:00
}
case 0x5F : // Binary data (indefinite length)
{
while ( get ( ) ! = 0xFF )
{
2020-05-18 04:50:27 +08:00
binary_t chunk ;
2020-06-03 20:20:36 +08:00
if ( ! get_cbor_binary ( chunk ) )
2019-07-05 12:13:25 +08:00
{
return false ;
}
result . insert ( result . end ( ) , chunk . begin ( ) , chunk . end ( ) ) ;
}
return true ;
}
default :
{
auto last_token = get_token_string ( ) ;
2021-01-10 02:21:18 +08:00
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 113 , chars_read , exception_message ( input_format_t : : cbor , " expected length specification (0x40-0x5B) or indefinite binary array type (0x5F); last byte: 0x " + last_token , " binary " ) , diagnostics_t ( ) ) ) ;
2019-07-05 12:13:25 +08:00
}
}
}
2018-01-14 17:27:30 +08:00
/*!
2018-10-25 05:39:30 +08:00
@ param [ in ] len the length of the array or std : : size_t ( - 1 ) for an
array of indefinite size
2020-07-24 20:18:39 +08:00
@ param [ in ] tag_handler how CBOR tags should be treated
2018-10-25 05:39:30 +08:00
@ return whether array creation completed
2018-01-14 17:27:30 +08:00
*/
2020-07-24 20:18:39 +08:00
bool get_cbor_array ( const std : : size_t len ,
const cbor_tag_handler_t tag_handler )
2018-01-14 17:27:30 +08:00
{
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! sax - > start_array ( len ) ) )
2018-01-14 17:27:30 +08:00
{
2018-10-25 05:39:30 +08:00
return false ;
2018-01-14 17:27:30 +08:00
}
2018-10-25 05:39:30 +08:00
if ( len ! = std : : size_t ( - 1 ) )
2017-08-14 23:50:24 +08:00
{
2018-10-25 05:39:30 +08:00
for ( std : : size_t i = 0 ; i < len ; + + i )
2017-08-14 23:50:24 +08:00
{
2020-07-24 20:18:39 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! parse_cbor_internal ( true , tag_handler ) ) )
2018-10-25 05:39:30 +08:00
{
return false ;
}
2017-08-14 23:50:24 +08:00
}
2018-10-25 05:39:30 +08:00
}
else
{
while ( get ( ) ! = 0xFF )
2017-08-14 23:50:24 +08:00
{
2020-07-24 20:18:39 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! parse_cbor_internal ( false , tag_handler ) ) )
2018-10-25 05:39:30 +08:00
{
return false ;
}
2017-08-14 23:50:24 +08:00
}
}
2018-10-25 05:39:30 +08:00
return sax - > end_array ( ) ;
2017-08-14 23:50:24 +08:00
}
/*!
2018-10-25 05:39:30 +08:00
@ param [ in ] len the length of the object or std : : size_t ( - 1 ) for an
object of indefinite size
2020-07-24 20:18:39 +08:00
@ param [ in ] tag_handler how CBOR tags should be treated
2018-10-25 05:39:30 +08:00
@ return whether object creation completed
2017-08-14 23:50:24 +08:00
*/
2020-07-24 20:18:39 +08:00
bool get_cbor_object ( const std : : size_t len ,
const cbor_tag_handler_t tag_handler )
2017-08-14 23:50:24 +08:00
{
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! sax - > start_object ( len ) ) )
2017-08-14 23:50:24 +08:00
{
2018-10-25 05:39:30 +08:00
return false ;
}
string_t key ;
if ( len ! = std : : size_t ( - 1 ) )
{
for ( std : : size_t i = 0 ; i < len ; + + i )
2018-03-19 06:00:45 +08:00
{
2018-10-25 05:39:30 +08:00
get ( ) ;
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! get_cbor_string ( key ) | | ! sax - > key ( key ) ) )
2018-10-25 05:39:30 +08:00
{
return false ;
}
2020-07-24 20:18:39 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! parse_cbor_internal ( true , tag_handler ) ) )
2018-10-25 05:39:30 +08:00
{
return false ;
}
key . clear ( ) ;
2018-03-19 06:00:45 +08:00
}
2018-10-25 05:39:30 +08:00
}
else
{
while ( get ( ) ! = 0xFF )
{
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! get_cbor_string ( key ) | | ! sax - > key ( key ) ) )
2018-10-25 05:39:30 +08:00
{
return false ;
}
2017-08-14 23:50:24 +08:00
2020-07-24 20:18:39 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! parse_cbor_internal ( true , tag_handler ) ) )
2018-10-25 05:39:30 +08:00
{
return false ;
}
key . clear ( ) ;
}
}
2017-08-14 23:50:24 +08:00
2018-10-25 05:39:30 +08:00
return sax - > end_object ( ) ;
}
2017-08-14 23:50:24 +08:00
2018-10-25 05:39:30 +08:00
/////////////
// MsgPack //
/////////////
2017-08-14 23:50:24 +08:00
2018-10-25 05:39:30 +08:00
/*!
@ return whether a valid MessagePack value was passed to the SAX parser
2017-08-14 23:50:24 +08:00
*/
2018-10-25 05:39:30 +08:00
bool parse_msgpack_internal ( )
2017-08-14 23:50:24 +08:00
{
2018-10-25 05:39:30 +08:00
switch ( get ( ) )
2017-08-14 23:50:24 +08:00
{
2018-10-25 05:39:30 +08:00
// EOF
2020-06-06 20:30:17 +08:00
case std : : char_traits < char_type > : : eof ( ) :
2018-10-25 05:39:30 +08:00
return unexpect_eof ( input_format_t : : msgpack , " value " ) ;
// positive fixint
case 0x00 :
case 0x01 :
case 0x02 :
case 0x03 :
case 0x04 :
case 0x05 :
case 0x06 :
case 0x07 :
case 0x08 :
case 0x09 :
case 0x0A :
case 0x0B :
case 0x0C :
case 0x0D :
case 0x0E :
case 0x0F :
case 0x10 :
case 0x11 :
case 0x12 :
case 0x13 :
case 0x14 :
case 0x15 :
case 0x16 :
case 0x17 :
case 0x18 :
case 0x19 :
case 0x1A :
case 0x1B :
case 0x1C :
case 0x1D :
case 0x1E :
case 0x1F :
case 0x20 :
case 0x21 :
case 0x22 :
case 0x23 :
case 0x24 :
case 0x25 :
case 0x26 :
case 0x27 :
case 0x28 :
case 0x29 :
case 0x2A :
case 0x2B :
case 0x2C :
case 0x2D :
case 0x2E :
case 0x2F :
case 0x30 :
case 0x31 :
case 0x32 :
case 0x33 :
case 0x34 :
case 0x35 :
case 0x36 :
case 0x37 :
case 0x38 :
case 0x39 :
case 0x3A :
case 0x3B :
case 0x3C :
case 0x3D :
case 0x3E :
case 0x3F :
case 0x40 :
case 0x41 :
case 0x42 :
case 0x43 :
case 0x44 :
case 0x45 :
case 0x46 :
case 0x47 :
case 0x48 :
case 0x49 :
case 0x4A :
case 0x4B :
case 0x4C :
case 0x4D :
case 0x4E :
case 0x4F :
case 0x50 :
case 0x51 :
case 0x52 :
case 0x53 :
case 0x54 :
case 0x55 :
case 0x56 :
case 0x57 :
case 0x58 :
case 0x59 :
case 0x5A :
case 0x5B :
case 0x5C :
case 0x5D :
case 0x5E :
case 0x5F :
2017-08-14 23:50:24 +08:00
case 0x60 :
case 0x61 :
case 0x62 :
case 0x63 :
case 0x64 :
case 0x65 :
case 0x66 :
case 0x67 :
case 0x68 :
case 0x69 :
case 0x6A :
case 0x6B :
case 0x6C :
case 0x6D :
case 0x6E :
case 0x6F :
case 0x70 :
case 0x71 :
case 0x72 :
case 0x73 :
case 0x74 :
case 0x75 :
case 0x76 :
case 0x77 :
2018-10-25 05:39:30 +08:00
case 0x78 :
case 0x79 :
case 0x7A :
case 0x7B :
case 0x7C :
case 0x7D :
case 0x7E :
case 0x7F :
return sax - > number_unsigned ( static_cast < number_unsigned_t > ( current ) ) ;
// fixmap
case 0x80 :
case 0x81 :
case 0x82 :
case 0x83 :
case 0x84 :
case 0x85 :
case 0x86 :
case 0x87 :
case 0x88 :
case 0x89 :
case 0x8A :
case 0x8B :
case 0x8C :
case 0x8D :
case 0x8E :
case 0x8F :
2019-03-15 22:55:52 +08:00
return get_msgpack_object ( static_cast < std : : size_t > ( static_cast < unsigned int > ( current ) & 0x0Fu ) ) ;
2018-10-25 05:39:30 +08:00
// fixarray
case 0x90 :
case 0x91 :
case 0x92 :
case 0x93 :
case 0x94 :
case 0x95 :
case 0x96 :
case 0x97 :
case 0x98 :
case 0x99 :
case 0x9A :
case 0x9B :
case 0x9C :
case 0x9D :
case 0x9E :
case 0x9F :
2019-03-15 22:55:52 +08:00
return get_msgpack_array ( static_cast < std : : size_t > ( static_cast < unsigned int > ( current ) & 0x0Fu ) ) ;
2018-10-25 05:39:30 +08:00
// fixstr
case 0xA0 :
case 0xA1 :
case 0xA2 :
case 0xA3 :
case 0xA4 :
case 0xA5 :
case 0xA6 :
case 0xA7 :
case 0xA8 :
case 0xA9 :
case 0xAA :
case 0xAB :
case 0xAC :
case 0xAD :
case 0xAE :
case 0xAF :
case 0xB0 :
case 0xB1 :
case 0xB2 :
case 0xB3 :
case 0xB4 :
case 0xB5 :
case 0xB6 :
case 0xB7 :
case 0xB8 :
case 0xB9 :
case 0xBA :
case 0xBB :
case 0xBC :
case 0xBD :
case 0xBE :
case 0xBF :
2019-07-21 20:04:49 +08:00
case 0xD9 : // str 8
case 0xDA : // str 16
case 0xDB : // str 32
2018-03-19 06:00:45 +08:00
{
2018-10-25 05:39:30 +08:00
string_t s ;
2020-06-03 20:20:36 +08:00
return get_msgpack_string ( s ) & & sax - > string ( s ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
2018-10-25 05:39:30 +08:00
case 0xC0 : // nil
return sax - > null ( ) ;
case 0xC2 : // false
return sax - > boolean ( false ) ;
case 0xC3 : // true
return sax - > boolean ( true ) ;
2019-07-05 12:13:25 +08:00
case 0xC4 : // bin 8
case 0xC5 : // bin 16
case 0xC6 : // bin 32
case 0xC7 : // ext 8
case 0xC8 : // ext 16
case 0xC9 : // ext 32
case 0xD4 : // fixext 1
case 0xD5 : // fixext 2
case 0xD6 : // fixext 4
case 0xD7 : // fixext 8
case 0xD8 : // fixext 16
{
2020-05-18 04:50:27 +08:00
binary_t b ;
2020-06-03 20:20:36 +08:00
return get_msgpack_binary ( b ) & & sax - > binary ( b ) ;
2019-07-05 12:13:25 +08:00
}
2018-10-25 05:39:30 +08:00
case 0xCA : // float 32
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
float number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , number ) & & sax - > number_float ( static_cast < number_float_t > ( number ) , " " ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
2018-10-25 05:39:30 +08:00
case 0xCB : // float 64
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
double number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , number ) & & sax - > number_float ( static_cast < number_float_t > ( number ) , " " ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
2018-10-25 05:39:30 +08:00
case 0xCC : // uint 8
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint8_t number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , number ) & & sax - > number_unsigned ( number ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
2018-10-25 05:39:30 +08:00
case 0xCD : // uint 16
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint16_t number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , number ) & & sax - > number_unsigned ( number ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
2018-10-25 05:39:30 +08:00
case 0xCE : // uint 32
2017-08-14 23:50:24 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint32_t number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , number ) & & sax - > number_unsigned ( number ) ;
2018-10-25 05:39:30 +08:00
}
case 0xCF : // uint 64
{
2020-06-23 04:32:21 +08:00
std : : uint64_t number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , number ) & & sax - > number_unsigned ( number ) ;
2018-10-25 05:39:30 +08:00
}
case 0xD0 : // int 8
{
2020-06-23 04:32:21 +08:00
std : : int8_t number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , number ) & & sax - > number_integer ( number ) ;
2018-10-25 05:39:30 +08:00
}
case 0xD1 : // int 16
{
2020-06-23 04:32:21 +08:00
std : : int16_t number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , number ) & & sax - > number_integer ( number ) ;
2018-10-25 05:39:30 +08:00
}
case 0xD2 : // int 32
{
2020-06-23 04:32:21 +08:00
std : : int32_t number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , number ) & & sax - > number_integer ( number ) ;
2018-10-25 05:39:30 +08:00
}
case 0xD3 : // int 64
{
2020-06-23 04:32:21 +08:00
std : : int64_t number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , number ) & & sax - > number_integer ( number ) ;
2017-08-14 23:50:24 +08:00
}
2018-10-25 05:39:30 +08:00
case 0xDC : // array 16
2018-03-12 01:47:38 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint16_t len { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , len ) & & get_msgpack_array ( static_cast < std : : size_t > ( len ) ) ;
2018-03-12 01:47:38 +08:00
}
2018-10-25 05:39:30 +08:00
case 0xDD : // array 32
2018-03-12 01:47:38 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint32_t len { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , len ) & & get_msgpack_array ( static_cast < std : : size_t > ( len ) ) ;
2018-03-12 01:47:38 +08:00
}
2018-10-25 05:39:30 +08:00
case 0xDE : // map 16
2018-03-12 01:47:38 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint16_t len { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , len ) & & get_msgpack_object ( static_cast < std : : size_t > ( len ) ) ;
2018-03-12 01:47:38 +08:00
}
2018-10-25 05:39:30 +08:00
case 0xDF : // map 32
2018-03-12 01:47:38 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint32_t len { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , len ) & & get_msgpack_object ( static_cast < std : : size_t > ( len ) ) ;
2018-10-25 05:39:30 +08:00
}
2018-03-12 01:47:38 +08:00
2018-10-25 05:39:30 +08:00
// negative fixint
case 0xE0 :
case 0xE1 :
case 0xE2 :
case 0xE3 :
case 0xE4 :
case 0xE5 :
case 0xE6 :
case 0xE7 :
case 0xE8 :
case 0xE9 :
case 0xEA :
case 0xEB :
case 0xEC :
case 0xED :
case 0xEE :
case 0xEF :
case 0xF0 :
case 0xF1 :
case 0xF2 :
case 0xF3 :
case 0xF4 :
case 0xF5 :
case 0xF6 :
case 0xF7 :
case 0xF8 :
case 0xF9 :
case 0xFA :
case 0xFB :
case 0xFC :
case 0xFD :
case 0xFE :
case 0xFF :
2019-03-17 07:27:44 +08:00
return sax - > number_integer ( static_cast < std : : int8_t > ( current ) ) ;
2018-10-25 05:39:30 +08:00
default : // anything else
{
auto last_token = get_token_string ( ) ;
2021-01-10 02:21:18 +08:00
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 112 , chars_read , exception_message ( input_format_t : : msgpack , " invalid byte: 0x " + last_token , " value " ) , diagnostics_t ( ) ) ) ;
2018-03-12 01:47:38 +08:00
}
}
2017-08-14 23:50:24 +08:00
}
/*!
@ brief reads a MessagePack string
This function first reads starting bytes to determine the expected
string length and then copies this number of bytes into a string .
2018-03-19 06:00:45 +08:00
@ param [ out ] result created string
2017-08-14 23:50:24 +08:00
2018-03-19 06:00:45 +08:00
@ return whether string creation completed
2017-08-14 23:50:24 +08:00
*/
2018-03-19 06:00:45 +08:00
bool get_msgpack_string ( string_t & result )
2017-08-14 23:50:24 +08:00
{
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format_t : : msgpack , " string " ) ) )
2018-03-19 06:00:45 +08:00
{
return false ;
}
2017-08-14 23:50:24 +08:00
switch ( current )
{
// fixstr
case 0xA0 :
case 0xA1 :
case 0xA2 :
case 0xA3 :
case 0xA4 :
case 0xA5 :
case 0xA6 :
case 0xA7 :
case 0xA8 :
case 0xA9 :
case 0xAA :
case 0xAB :
case 0xAC :
case 0xAD :
case 0xAE :
case 0xAF :
case 0xB0 :
case 0xB1 :
case 0xB2 :
case 0xB3 :
case 0xB4 :
case 0xB5 :
case 0xB6 :
case 0xB7 :
case 0xB8 :
case 0xB9 :
case 0xBA :
case 0xBB :
case 0xBC :
case 0xBD :
case 0xBE :
case 0xBF :
2018-03-19 06:00:45 +08:00
{
2019-03-15 22:55:52 +08:00
return get_string ( input_format_t : : msgpack , static_cast < unsigned int > ( current ) & 0x1Fu , result ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
case 0xD9 : // str 8
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint8_t len { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , len ) & & get_string ( input_format_t : : msgpack , len , result ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
case 0xDA : // str 16
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint16_t len { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , len ) & & get_string ( input_format_t : : msgpack , len , result ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
case 0xDB : // str 32
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint32_t len { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , len ) & & get_string ( input_format_t : : msgpack , len , result ) ;
2018-03-19 06:00:45 +08:00
}
2017-08-14 23:50:24 +08:00
default :
{
2018-03-19 06:00:45 +08:00
auto last_token = get_token_string ( ) ;
2021-01-10 02:21:18 +08:00
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 113 , chars_read , exception_message ( input_format_t : : msgpack , " expected length specification (0xA0-0xBF, 0xD9-0xDB); last byte: 0x " + last_token , " string " ) , diagnostics_t ( ) ) ) ;
2017-08-14 23:50:24 +08:00
}
}
}
2019-07-05 12:13:25 +08:00
/*!
@ brief reads a MessagePack byte array
This function first reads starting bytes to determine the expected
byte array length and then copies this number of bytes into a byte array .
@ param [ out ] result created byte array
@ return whether byte array creation completed
*/
2020-05-18 04:50:27 +08:00
bool get_msgpack_binary ( binary_t & result )
2019-07-05 12:13:25 +08:00
{
2020-05-17 19:51:59 +08:00
// helper function to set the subtype
auto assign_and_return_true = [ & result ] ( std : : int8_t subtype )
{
result . set_subtype ( static_cast < std : : uint8_t > ( subtype ) ) ;
return true ;
} ;
2019-07-05 12:13:25 +08:00
switch ( current )
{
case 0xC4 : // bin 8
{
2020-06-23 04:32:21 +08:00
std : : uint8_t len { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , len ) & &
2020-05-17 19:51:59 +08:00
get_binary ( input_format_t : : msgpack , len , result ) ;
2019-07-05 12:13:25 +08:00
}
case 0xC5 : // bin 16
{
2020-06-23 04:32:21 +08:00
std : : uint16_t len { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , len ) & &
2020-05-17 19:51:59 +08:00
get_binary ( input_format_t : : msgpack , len , result ) ;
2019-07-05 12:13:25 +08:00
}
case 0xC6 : // bin 32
{
2020-06-23 04:32:21 +08:00
std : : uint32_t len { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , len ) & &
2020-05-17 19:51:59 +08:00
get_binary ( input_format_t : : msgpack , len , result ) ;
2019-07-05 12:13:25 +08:00
}
case 0xC7 : // ext 8
{
2020-06-23 04:32:21 +08:00
std : : uint8_t len { } ;
std : : int8_t subtype { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , len ) & &
get_number ( input_format_t : : msgpack , subtype ) & &
get_binary ( input_format_t : : msgpack , len , result ) & &
2020-05-17 19:51:59 +08:00
assign_and_return_true ( subtype ) ;
2019-07-05 12:13:25 +08:00
}
case 0xC8 : // ext 16
{
2020-06-23 04:32:21 +08:00
std : : uint16_t len { } ;
std : : int8_t subtype { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , len ) & &
get_number ( input_format_t : : msgpack , subtype ) & &
get_binary ( input_format_t : : msgpack , len , result ) & &
2020-05-17 19:51:59 +08:00
assign_and_return_true ( subtype ) ;
2019-07-05 12:13:25 +08:00
}
case 0xC9 : // ext 32
{
2020-06-23 04:32:21 +08:00
std : : uint32_t len { } ;
std : : int8_t subtype { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , len ) & &
get_number ( input_format_t : : msgpack , subtype ) & &
get_binary ( input_format_t : : msgpack , len , result ) & &
2020-05-17 19:51:59 +08:00
assign_and_return_true ( subtype ) ;
2019-07-05 12:13:25 +08:00
}
case 0xD4 : // fixext 1
{
2020-06-23 04:32:21 +08:00
std : : int8_t subtype { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , subtype ) & &
get_binary ( input_format_t : : msgpack , 1 , result ) & &
2020-05-17 19:51:59 +08:00
assign_and_return_true ( subtype ) ;
2019-07-05 12:13:25 +08:00
}
case 0xD5 : // fixext 2
{
2020-06-23 04:32:21 +08:00
std : : int8_t subtype { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , subtype ) & &
get_binary ( input_format_t : : msgpack , 2 , result ) & &
2020-05-17 19:51:59 +08:00
assign_and_return_true ( subtype ) ;
2019-07-05 12:13:25 +08:00
}
case 0xD6 : // fixext 4
{
2020-06-23 04:32:21 +08:00
std : : int8_t subtype { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , subtype ) & &
get_binary ( input_format_t : : msgpack , 4 , result ) & &
2020-05-17 19:51:59 +08:00
assign_and_return_true ( subtype ) ;
2019-07-05 12:13:25 +08:00
}
case 0xD7 : // fixext 8
{
2020-06-23 04:32:21 +08:00
std : : int8_t subtype { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , subtype ) & &
get_binary ( input_format_t : : msgpack , 8 , result ) & &
2020-05-17 19:51:59 +08:00
assign_and_return_true ( subtype ) ;
2019-07-05 12:13:25 +08:00
}
case 0xD8 : // fixext 16
{
2020-06-23 04:32:21 +08:00
std : : int8_t subtype { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : msgpack , subtype ) & &
get_binary ( input_format_t : : msgpack , 16 , result ) & &
2020-05-17 19:51:59 +08:00
assign_and_return_true ( subtype ) ;
2019-07-05 12:13:25 +08:00
}
2020-05-14 03:28:43 +08:00
default : // LCOV_EXCL_LINE
return false ; // LCOV_EXCL_LINE
2019-07-05 12:13:25 +08:00
}
}
2018-03-19 06:00:45 +08:00
/*!
@ param [ in ] len the length of the array
@ return whether array creation completed
*/
bool get_msgpack_array ( const std : : size_t len )
2017-08-14 23:50:24 +08:00
{
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! sax - > start_array ( len ) ) )
2017-08-14 23:50:24 +08:00
{
2018-03-12 01:47:38 +08:00
return false ;
}
for ( std : : size_t i = 0 ; i < len ; + + i )
{
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! parse_msgpack_internal ( ) ) )
2018-03-12 01:47:38 +08:00
{
return false ;
}
}
return sax - > end_array ( ) ;
2017-08-14 23:50:24 +08:00
}
2018-03-19 06:00:45 +08:00
/*!
@ param [ in ] len the length of the object
@ return whether object creation completed
*/
bool get_msgpack_object ( const std : : size_t len )
2017-08-14 23:50:24 +08:00
{
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! sax - > start_object ( len ) ) )
2018-03-12 01:47:38 +08:00
{
return false ;
}
2018-03-22 03:12:06 +08:00
string_t key ;
2018-03-12 01:47:38 +08:00
for ( std : : size_t i = 0 ; i < len ; + + i )
2017-08-14 23:50:24 +08:00
{
get ( ) ;
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! get_msgpack_string ( key ) | | ! sax - > key ( key ) ) )
2018-03-12 01:47:38 +08:00
{
return false ;
}
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! parse_msgpack_internal ( ) ) )
2018-03-12 01:47:38 +08:00
{
return false ;
}
2018-03-22 03:12:06 +08:00
key . clear ( ) ;
2018-03-12 01:47:38 +08:00
}
return sax - > end_object ( ) ;
2017-08-14 23:50:24 +08:00
}
2018-10-25 05:39:30 +08:00
////////////
// UBJSON //
////////////
/*!
@ param [ in ] get_char whether a new character should be retrieved from the
input ( true , default ) or whether the last read
character should be considered instead
@ return whether a valid UBJSON value was passed to the SAX parser
*/
bool parse_ubjson_internal ( const bool get_char = true )
{
return get_ubjson_value ( get_char ? get_ignore_noop ( ) : current ) ;
}
2018-01-14 17:27:30 +08:00
/*!
@ brief reads a UBJSON string
This function is either called after reading the ' S ' byte explicitly
indicating a string , or in case of an object key where the ' S ' byte can be
left out .
2018-03-19 06:00:45 +08:00
@ param [ out ] result created string
2018-01-14 17:27:30 +08:00
@ param [ in ] get_char whether a new character should be retrieved from the
input ( true , default ) or whether the last read
character should be considered instead
2018-03-19 06:00:45 +08:00
@ return whether string creation completed
2018-01-14 17:27:30 +08:00
*/
2018-03-19 06:00:45 +08:00
bool get_ubjson_string ( string_t & result , const bool get_char = true )
2018-01-14 17:27:30 +08:00
{
if ( get_char )
{
2019-03-17 07:27:44 +08:00
get ( ) ; // TODO(niels): may we ignore N here?
2018-01-14 17:27:30 +08:00
}
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format_t : : ubjson , " value " ) ) )
2018-03-19 06:00:45 +08:00
{
return false ;
}
2018-01-14 17:27:30 +08:00
switch ( current )
{
case ' U ' :
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint8_t len { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : ubjson , len ) & & get_string ( input_format_t : : ubjson , len , result ) ;
2018-03-19 06:00:45 +08:00
}
2018-01-14 17:27:30 +08:00
case ' i ' :
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : int8_t len { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : ubjson , len ) & & get_string ( input_format_t : : ubjson , len , result ) ;
2018-03-19 06:00:45 +08:00
}
2018-01-14 17:27:30 +08:00
case ' I ' :
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : int16_t len { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : ubjson , len ) & & get_string ( input_format_t : : ubjson , len , result ) ;
2018-03-19 06:00:45 +08:00
}
2018-01-14 17:27:30 +08:00
case ' l ' :
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : int32_t len { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : ubjson , len ) & & get_string ( input_format_t : : ubjson , len , result ) ;
2018-03-19 06:00:45 +08:00
}
2018-01-14 17:27:30 +08:00
case ' L ' :
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : int64_t len { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : ubjson , len ) & & get_string ( input_format_t : : ubjson , len , result ) ;
2018-03-19 06:00:45 +08:00
}
2018-01-14 17:27:30 +08:00
default :
2018-03-19 06:00:45 +08:00
auto last_token = get_token_string ( ) ;
2021-01-10 02:21:18 +08:00
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 113 , chars_read , exception_message ( input_format_t : : ubjson , " expected length type specification (U, i, I, l, L); last byte: 0x " + last_token , " string " ) , diagnostics_t ( ) ) ) ;
2018-01-14 17:27:30 +08:00
}
}
2018-03-19 06:00:45 +08:00
/*!
@ param [ out ] result determined size
@ return whether size determination completed
*/
bool get_ubjson_size_value ( std : : size_t & result )
2018-03-12 01:47:38 +08:00
{
switch ( get_ignore_noop ( ) )
{
case ' U ' :
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint8_t number { } ;
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! get_number ( input_format_t : : ubjson , number ) ) )
2018-03-19 06:00:45 +08:00
{
return false ;
}
result = static_cast < std : : size_t > ( number ) ;
return true ;
}
2018-03-12 01:47:38 +08:00
case ' i ' :
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : int8_t number { } ;
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! get_number ( input_format_t : : ubjson , number ) ) )
2018-03-19 06:00:45 +08:00
{
return false ;
}
result = static_cast < std : : size_t > ( number ) ;
return true ;
}
2018-03-12 01:47:38 +08:00
case ' I ' :
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : int16_t number { } ;
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! get_number ( input_format_t : : ubjson , number ) ) )
2018-03-19 06:00:45 +08:00
{
return false ;
}
result = static_cast < std : : size_t > ( number ) ;
return true ;
}
2018-03-12 01:47:38 +08:00
case ' l ' :
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : int32_t number { } ;
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! get_number ( input_format_t : : ubjson , number ) ) )
2018-03-19 06:00:45 +08:00
{
return false ;
}
result = static_cast < std : : size_t > ( number ) ;
return true ;
}
2018-03-12 01:47:38 +08:00
case ' L ' :
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : int64_t number { } ;
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! get_number ( input_format_t : : ubjson , number ) ) )
2018-03-19 06:00:45 +08:00
{
return false ;
}
result = static_cast < std : : size_t > ( number ) ;
return true ;
}
2018-03-12 01:47:38 +08:00
default :
2018-03-19 06:00:45 +08:00
{
2018-03-21 01:49:10 +08:00
auto last_token = get_token_string ( ) ;
2021-01-10 02:21:18 +08:00
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 113 , chars_read , exception_message ( input_format_t : : ubjson , " expected length type specification (U, i, I, l, L) after '#'; last byte: 0x " + last_token , " size " ) , diagnostics_t ( ) ) ) ;
2018-03-19 06:00:45 +08:00
}
2018-03-12 01:47:38 +08:00
}
}
2018-01-14 17:27:30 +08:00
/*!
@ brief determine the type and size for a container
In the optimized UBJSON format , a type and a size can be provided to allow
for a more compact representation .
2018-03-19 06:00:45 +08:00
@ param [ out ] result pair of the size and the type
@ return whether pair creation completed
2018-01-14 17:27:30 +08:00
*/
2020-06-06 20:30:17 +08:00
bool get_ubjson_size_type ( std : : pair < std : : size_t , char_int_type > & result )
2018-01-14 17:27:30 +08:00
{
2018-03-19 06:00:45 +08:00
result . first = string_t : : npos ; // size
result . second = 0 ; // type
2018-01-14 17:27:30 +08:00
get_ignore_noop ( ) ;
if ( current = = ' $ ' )
{
2018-03-19 06:00:45 +08:00
result . second = get ( ) ; // must not ignore 'N', because 'N' maybe the type
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format_t : : ubjson , " type " ) ) )
2018-03-19 06:00:45 +08:00
{
return false ;
}
2018-01-14 17:27:30 +08:00
get_ignore_noop ( ) ;
2019-07-02 04:37:30 +08:00
if ( JSON_HEDLEY_UNLIKELY ( current ! = ' # ' ) )
2018-01-14 17:27:30 +08:00
{
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format_t : : ubjson , " value " ) ) )
2018-03-21 01:49:10 +08:00
{
return false ;
}
2018-03-19 06:00:45 +08:00
auto last_token = get_token_string ( ) ;
2021-01-10 02:21:18 +08:00
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 112 , chars_read , exception_message ( input_format_t : : ubjson , " expected '#' after type information; last byte: 0x " + last_token , " size " ) , diagnostics_t ( ) ) ) ;
2018-01-14 17:27:30 +08:00
}
2018-03-19 06:00:45 +08:00
return get_ubjson_size_value ( result . first ) ;
2018-01-14 17:27:30 +08:00
}
2019-03-17 07:27:44 +08:00
if ( current = = ' # ' )
2018-01-14 17:27:30 +08:00
{
2018-03-19 06:00:45 +08:00
return get_ubjson_size_value ( result . first ) ;
2018-01-14 17:27:30 +08:00
}
2019-03-17 07:27:44 +08:00
2018-03-19 06:00:45 +08:00
return true ;
2018-01-14 17:27:30 +08:00
}
2018-03-19 06:00:45 +08:00
/*!
@ param prefix the previously read or set type prefix
@ return whether value creation completed
*/
2020-06-06 20:30:17 +08:00
bool get_ubjson_value ( const char_int_type prefix )
2018-01-14 17:27:30 +08:00
{
switch ( prefix )
{
2020-06-06 20:30:17 +08:00
case std : : char_traits < char_type > : : eof ( ) : // EOF
2018-10-17 18:15:58 +08:00
return unexpect_eof ( input_format_t : : ubjson , " value " ) ;
2018-01-14 17:27:30 +08:00
case ' T ' : // true
2018-03-12 01:47:38 +08:00
return sax - > boolean ( true ) ;
2018-01-14 17:27:30 +08:00
case ' F ' : // false
2018-03-12 01:47:38 +08:00
return sax - > boolean ( false ) ;
2018-01-14 17:27:30 +08:00
case ' Z ' : // null
2018-03-12 01:47:38 +08:00
return sax - > null ( ) ;
2018-01-14 17:27:30 +08:00
case ' U ' :
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : uint8_t number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : ubjson , number ) & & sax - > number_unsigned ( number ) ;
2018-03-19 06:00:45 +08:00
}
2018-01-14 17:27:30 +08:00
case ' i ' :
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : int8_t number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : ubjson , number ) & & sax - > number_integer ( number ) ;
2018-03-19 06:00:45 +08:00
}
2018-01-14 17:27:30 +08:00
case ' I ' :
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : int16_t number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : ubjson , number ) & & sax - > number_integer ( number ) ;
2018-03-19 06:00:45 +08:00
}
2018-01-14 17:27:30 +08:00
case ' l ' :
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : int32_t number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : ubjson , number ) & & sax - > number_integer ( number ) ;
2018-03-19 06:00:45 +08:00
}
2018-01-14 17:27:30 +08:00
case ' L ' :
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
std : : int64_t number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : ubjson , number ) & & sax - > number_integer ( number ) ;
2018-03-19 06:00:45 +08:00
}
2018-01-14 17:27:30 +08:00
case ' d ' :
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
float number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : ubjson , number ) & & sax - > number_float ( static_cast < number_float_t > ( number ) , " " ) ;
2018-03-19 06:00:45 +08:00
}
2018-01-14 17:27:30 +08:00
case ' D ' :
2018-03-19 06:00:45 +08:00
{
2020-06-23 04:32:21 +08:00
double number { } ;
2020-06-03 20:20:36 +08:00
return get_number ( input_format_t : : ubjson , number ) & & sax - > number_float ( static_cast < number_float_t > ( number ) , " " ) ;
2018-03-19 06:00:45 +08:00
}
2018-01-14 17:27:30 +08:00
2020-07-20 15:42:37 +08:00
case ' H ' :
{
2020-07-23 18:16:18 +08:00
return get_ubjson_high_precision_number ( ) ;
2020-07-20 15:42:37 +08:00
}
2018-01-14 17:27:30 +08:00
case ' C ' : // char
{
get ( ) ;
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format_t : : ubjson , " char " ) ) )
2018-03-19 06:00:45 +08:00
{
return false ;
}
2019-07-02 04:37:30 +08:00
if ( JSON_HEDLEY_UNLIKELY ( current > 127 ) )
2018-01-14 17:27:30 +08:00
{
2018-03-19 06:00:45 +08:00
auto last_token = get_token_string ( ) ;
2021-01-10 02:21:18 +08:00
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 113 , chars_read , exception_message ( input_format_t : : ubjson , " byte after 'C' must be in range 0x00..0x7F; last byte: 0x " + last_token , " char " ) , diagnostics_t ( ) ) ) ;
2018-01-14 17:27:30 +08:00
}
2020-06-06 20:30:17 +08:00
string_t s ( 1 , static_cast < typename string_t : : value_type > ( current ) ) ;
2018-03-22 03:12:06 +08:00
return sax - > string ( s ) ;
2018-01-14 17:27:30 +08:00
}
case ' S ' : // string
2018-03-19 06:00:45 +08:00
{
string_t s ;
2020-06-03 20:20:36 +08:00
return get_ubjson_string ( s ) & & sax - > string ( s ) ;
2018-03-19 06:00:45 +08:00
}
2018-01-14 17:27:30 +08:00
case ' [ ' : // array
2018-03-19 06:00:45 +08:00
return get_ubjson_array ( ) ;
2018-01-14 17:27:30 +08:00
case ' { ' : // object
2018-03-19 06:00:45 +08:00
return get_ubjson_object ( ) ;
2018-01-14 17:27:30 +08:00
default : // anything else
2018-03-19 06:00:45 +08:00
{
auto last_token = get_token_string ( ) ;
2021-01-10 02:21:18 +08:00
return sax - > parse_error ( chars_read , last_token , parse_error : : create ( 112 , chars_read , exception_message ( input_format_t : : ubjson , " invalid byte: 0x " + last_token , " value " ) , diagnostics_t ( ) ) ) ;
2018-03-19 06:00:45 +08:00
}
2018-01-14 17:27:30 +08:00
}
}
2018-03-19 06:00:45 +08:00
/*!
@ return whether array creation completed
*/
bool get_ubjson_array ( )
2018-01-14 17:27:30 +08:00
{
2020-06-06 20:30:17 +08:00
std : : pair < std : : size_t , char_int_type > size_and_type ;
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! get_ubjson_size_type ( size_and_type ) ) )
2018-03-19 06:00:45 +08:00
{
return false ;
}
2018-01-14 17:27:30 +08:00
2018-02-01 15:01:01 +08:00
if ( size_and_type . first ! = string_t : : npos )
2018-01-14 17:27:30 +08:00
{
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! sax - > start_array ( size_and_type . first ) ) )
2018-02-07 05:38:53 +08:00
{
2018-03-12 01:47:38 +08:00
return false ;
2018-02-07 05:38:53 +08:00
}
2018-01-14 17:27:30 +08:00
if ( size_and_type . second ! = 0 )
{
if ( size_and_type . second ! = ' N ' )
2018-02-07 05:38:53 +08:00
{
2018-03-12 01:47:38 +08:00
for ( std : : size_t i = 0 ; i < size_and_type . first ; + + i )
2018-02-07 05:38:53 +08:00
{
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! get_ubjson_value ( size_and_type . second ) ) )
2018-03-12 01:47:38 +08:00
{
return false ;
}
}
2018-02-07 05:38:53 +08:00
}
2018-01-14 17:27:30 +08:00
}
else
{
2018-03-12 01:47:38 +08:00
for ( std : : size_t i = 0 ; i < size_and_type . first ; + + i )
2018-01-14 17:27:30 +08:00
{
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! parse_ubjson_internal ( ) ) )
2018-03-12 01:47:38 +08:00
{
return false ;
}
}
2018-01-14 17:27:30 +08:00
}
}
else
{
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! sax - > start_array ( std : : size_t ( - 1 ) ) ) )
2018-03-12 01:47:38 +08:00
{
return false ;
}
2018-01-14 17:27:30 +08:00
while ( current ! = ' ] ' )
{
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! parse_ubjson_internal ( false ) ) )
2018-03-12 01:47:38 +08:00
{
return false ;
}
2018-01-14 17:27:30 +08:00
get_ignore_noop ( ) ;
}
}
2018-03-12 01:47:38 +08:00
return sax - > end_array ( ) ;
2018-01-14 17:27:30 +08:00
}
2018-03-19 06:00:45 +08:00
/*!
@ return whether object creation completed
*/
bool get_ubjson_object ( )
2018-01-14 17:27:30 +08:00
{
2020-06-06 20:30:17 +08:00
std : : pair < std : : size_t , char_int_type > size_and_type ;
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! get_ubjson_size_type ( size_and_type ) ) )
2018-03-19 06:00:45 +08:00
{
return false ;
}
2018-01-14 17:27:30 +08:00
2018-03-22 03:12:06 +08:00
string_t key ;
2018-02-01 15:01:01 +08:00
if ( size_and_type . first ! = string_t : : npos )
2018-01-14 17:27:30 +08:00
{
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! sax - > start_object ( size_and_type . first ) ) )
2018-02-07 05:38:53 +08:00
{
2018-03-12 01:47:38 +08:00
return false ;
2018-02-07 05:38:53 +08:00
}
2018-01-14 17:27:30 +08:00
if ( size_and_type . second ! = 0 )
{
2018-03-12 01:47:38 +08:00
for ( std : : size_t i = 0 ; i < size_and_type . first ; + + i )
2018-01-14 17:27:30 +08:00
{
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! get_ubjson_string ( key ) | | ! sax - > key ( key ) ) )
2018-03-12 01:47:38 +08:00
{
return false ;
}
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! get_ubjson_value ( size_and_type . second ) ) )
2018-03-12 01:47:38 +08:00
{
return false ;
}
2018-03-22 03:12:06 +08:00
key . clear ( ) ;
2018-03-12 01:47:38 +08:00
}
2018-01-14 17:27:30 +08:00
}
else
{
2018-03-12 01:47:38 +08:00
for ( std : : size_t i = 0 ; i < size_and_type . first ; + + i )
2018-01-14 17:27:30 +08:00
{
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! get_ubjson_string ( key ) | | ! sax - > key ( key ) ) )
2018-03-12 01:47:38 +08:00
{
return false ;
}
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! parse_ubjson_internal ( ) ) )
2018-03-12 01:47:38 +08:00
{
return false ;
}
2018-03-22 03:12:06 +08:00
key . clear ( ) ;
2018-03-12 01:47:38 +08:00
}
2018-01-14 17:27:30 +08:00
}
}
else
{
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! sax - > start_object ( std : : size_t ( - 1 ) ) ) )
2018-03-12 01:47:38 +08:00
{
return false ;
}
2018-01-14 17:27:30 +08:00
while ( current ! = ' } ' )
{
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! get_ubjson_string ( key , false ) | | ! sax - > key ( key ) ) )
2018-03-12 01:47:38 +08:00
{
return false ;
}
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! parse_ubjson_internal ( ) ) )
2018-03-12 01:47:38 +08:00
{
return false ;
}
2018-01-14 17:27:30 +08:00
get_ignore_noop ( ) ;
2018-03-22 03:12:06 +08:00
key . clear ( ) ;
2018-01-14 17:27:30 +08:00
}
}
2018-03-12 01:47:38 +08:00
return sax - > end_object ( ) ;
2018-01-14 17:27:30 +08:00
}
2019-07-05 12:13:25 +08:00
// Note, no reader for UBJSON binary types is implemented because they do
// not exist
2020-07-23 18:16:18 +08:00
bool get_ubjson_high_precision_number ( )
{
// get size of following number string
std : : size_t size { } ;
auto res = get_ubjson_size_value ( size ) ;
if ( JSON_HEDLEY_UNLIKELY ( ! res ) )
{
return res ;
}
// get number string
std : : vector < char > number_vector ;
for ( std : : size_t i = 0 ; i < size ; + + i )
{
get ( ) ;
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( input_format_t : : ubjson , " number " ) ) )
{
return false ;
}
number_vector . push_back ( static_cast < char > ( current ) ) ;
}
// parse number string
auto number_ia = detail : : input_adapter ( std : : forward < decltype ( number_vector ) > ( number_vector ) ) ;
auto number_lexer = detail : : lexer < BasicJsonType , decltype ( number_ia ) > ( std : : move ( number_ia ) , false ) ;
const auto result_number = number_lexer . scan ( ) ;
const auto number_string = number_lexer . get_token_string ( ) ;
const auto result_remainder = number_lexer . scan ( ) ;
using token_type = typename detail : : lexer_base < BasicJsonType > : : token_type ;
if ( JSON_HEDLEY_UNLIKELY ( result_remainder ! = token_type : : end_of_input ) )
{
2021-01-10 02:21:18 +08:00
return sax - > parse_error ( chars_read , number_string , parse_error : : create ( 115 , chars_read , exception_message ( input_format_t : : ubjson , " invalid number text: " + number_lexer . get_token_string ( ) , " high-precision number " ) , diagnostics_t ( ) ) ) ;
2020-07-23 18:16:18 +08:00
}
switch ( result_number )
{
case token_type : : value_integer :
return sax - > number_integer ( number_lexer . get_number_integer ( ) ) ;
case token_type : : value_unsigned :
return sax - > number_unsigned ( number_lexer . get_number_unsigned ( ) ) ;
case token_type : : value_float :
return sax - > number_float ( number_lexer . get_number_float ( ) , std : : move ( number_string ) ) ;
default :
2021-01-10 02:21:18 +08:00
return sax - > parse_error ( chars_read , number_string , parse_error : : create ( 115 , chars_read , exception_message ( input_format_t : : ubjson , " invalid number text: " + number_lexer . get_token_string ( ) , " high-precision number " ) , diagnostics_t ( ) ) ) ;
2020-07-23 18:16:18 +08:00
}
}
2018-10-25 05:39:30 +08:00
///////////////////////
// Utility functions //
///////////////////////
/*!
@ brief get next character from the input
This function provides the interface to the used input adapter . It does
not throw in case the input reached EOF , but returns a - ' ve valued
2020-06-06 20:30:17 +08:00
` std : : char_traits < char_type > : : eof ( ) ` in that case .
2018-10-25 05:39:30 +08:00
@ return character read from the input
*/
2020-06-06 20:30:17 +08:00
char_int_type get ( )
2018-10-25 05:39:30 +08:00
{
+ + chars_read ;
2020-02-20 03:59:31 +08:00
return current = ia . get_character ( ) ;
2018-10-25 05:39:30 +08:00
}
/*!
@ return character read from the input after ignoring all ' N ' entries
*/
2020-06-06 20:30:17 +08:00
char_int_type get_ignore_noop ( )
2018-10-25 05:39:30 +08:00
{
do
{
get ( ) ;
}
while ( current = = ' N ' ) ;
return current ;
}
/*
@ brief read a number from the input
@ tparam NumberType the type of the number
@ param [ in ] format the current format ( for diagnostics )
@ param [ out ] result number of type @ a NumberType
@ return whether conversion completed
@ note This function needs to respect the system ' s endianess , because
bytes in CBOR , MessagePack , and UBJSON are stored in network order
( big endian ) and therefore need reordering on little endian systems .
*/
template < typename NumberType , bool InputIsLittleEndian = false >
bool get_number ( const input_format_t format , NumberType & result )
{
// step 1: read input into array with system's byte order
2019-03-17 07:27:44 +08:00
std : : array < std : : uint8_t , sizeof ( NumberType ) > vec ;
2018-10-25 05:39:30 +08:00
for ( std : : size_t i = 0 ; i < sizeof ( NumberType ) ; + + i )
{
get ( ) ;
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( format , " number " ) ) )
2018-10-25 05:39:30 +08:00
{
return false ;
}
// reverse byte order prior to conversion if necessary
2019-02-21 18:55:21 +08:00
if ( is_little_endian ! = InputIsLittleEndian )
2018-10-25 05:39:30 +08:00
{
2019-03-17 07:27:44 +08:00
vec [ sizeof ( NumberType ) - i - 1 ] = static_cast < std : : uint8_t > ( current ) ;
2018-10-25 05:39:30 +08:00
}
else
{
2019-03-17 07:27:44 +08:00
vec [ i ] = static_cast < std : : uint8_t > ( current ) ; // LCOV_EXCL_LINE
2018-10-25 05:39:30 +08:00
}
}
// step 2: convert array into number of type T and return
std : : memcpy ( & result , vec . data ( ) , sizeof ( NumberType ) ) ;
return true ;
}
/*!
@ brief create a string by reading characters from the input
@ tparam NumberType the type of the number
@ param [ in ] format the current format ( for diagnostics )
@ param [ in ] len number of characters to read
@ param [ out ] result string created by reading @ a len bytes
@ return whether string creation completed
@ note We can not reserve @ a len bytes for the result , because @ a len
may be too large . Usually , @ ref unexpect_eof ( ) detects the end of
the input before we run out of string memory .
*/
template < typename NumberType >
bool get_string ( const input_format_t format ,
const NumberType len ,
string_t & result )
{
bool success = true ;
2020-07-19 16:57:17 +08:00
for ( NumberType i = 0 ; i < len ; i + + )
2018-10-25 05:39:30 +08:00
{
get ( ) ;
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( format , " string " ) ) )
2018-10-25 05:39:30 +08:00
{
success = false ;
2020-07-19 15:35:49 +08:00
break ;
2018-10-25 05:39:30 +08:00
}
2020-07-22 05:00:56 +08:00
result . push_back ( static_cast < typename string_t : : value_type > ( current ) ) ;
2020-12-26 21:55:19 +08:00
}
2018-10-25 05:39:30 +08:00
return success ;
}
2019-07-05 12:13:25 +08:00
/*!
@ brief create a byte array by reading bytes from the input
@ tparam NumberType the type of the number
@ param [ in ] format the current format ( for diagnostics )
@ param [ in ] len number of bytes to read
@ param [ out ] result byte array created by reading @ a len bytes
@ return whether byte array creation completed
@ note We can not reserve @ a len bytes for the result , because @ a len
may be too large . Usually , @ ref unexpect_eof ( ) detects the end of
the input before we run out of memory .
*/
template < typename NumberType >
bool get_binary ( const input_format_t format ,
const NumberType len ,
2020-05-18 04:50:27 +08:00
binary_t & result )
2019-07-05 12:13:25 +08:00
{
bool success = true ;
2020-07-19 16:57:17 +08:00
for ( NumberType i = 0 ; i < len ; i + + )
2019-07-05 12:13:25 +08:00
{
get ( ) ;
2020-06-03 20:20:36 +08:00
if ( JSON_HEDLEY_UNLIKELY ( ! unexpect_eof ( format , " binary " ) ) )
2019-07-05 12:13:25 +08:00
{
success = false ;
2020-07-19 15:35:49 +08:00
break ;
2019-07-05 12:13:25 +08:00
}
2020-07-19 16:51:13 +08:00
result . push_back ( static_cast < std : : uint8_t > ( current ) ) ;
2020-07-19 15:35:49 +08:00
}
2019-07-05 12:13:25 +08:00
return success ;
}
2018-01-28 20:15:03 +08:00
/*!
2018-10-17 18:15:58 +08:00
@ param [ in ] format the current format ( for diagnostics )
@ param [ in ] context further context information ( for diagnostics )
2018-03-19 06:00:45 +08:00
@ return whether the last read character is not EOF
2018-01-28 20:15:03 +08:00
*/
2019-07-02 04:37:30 +08:00
JSON_HEDLEY_NON_NULL ( 3 )
2018-10-17 18:15:58 +08:00
bool unexpect_eof ( const input_format_t format , const char * context ) const
2018-01-28 20:15:03 +08:00
{
2020-06-06 20:30:17 +08:00
if ( JSON_HEDLEY_UNLIKELY ( current = = std : : char_traits < char_type > : : eof ( ) ) )
2017-08-14 23:50:24 +08:00
{
2018-10-17 18:15:58 +08:00
return sax - > parse_error ( chars_read , " <end of file> " ,
2021-01-10 02:21:18 +08:00
parse_error : : create ( 110 , chars_read , exception_message ( format , " unexpected end of input " , context ) , diagnostics_t ( ) ) ) ;
2017-08-14 23:50:24 +08:00
}
2018-03-19 06:00:45 +08:00
return true ;
}
/*!
@ return a string representation of the last read byte
*/
std : : string get_token_string ( ) const
{
2019-03-20 18:06:39 +08:00
std : : array < char , 3 > cr { { } } ;
2019-03-17 07:27:44 +08:00
( std : : snprintf ) ( cr . data ( ) , cr . size ( ) , " %.2hhX " , static_cast < unsigned char > ( current ) ) ;
return std : : string { cr . data ( ) } ;
2017-08-14 23:50:24 +08:00
}
2018-10-17 18:15:58 +08:00
/*!
@ param [ in ] format the current format
@ param [ in ] detail a detailed error message
2019-10-19 17:59:46 +08:00
@ param [ in ] context further context information
2018-10-17 18:15:58 +08:00
@ return a message string to use in the parse_error exceptions
*/
std : : string exception_message ( const input_format_t format ,
const std : : string & detail ,
const std : : string & context ) const
{
std : : string error_msg = " syntax error while parsing " ;
switch ( format )
{
case input_format_t : : cbor :
error_msg + = " CBOR " ;
break ;
case input_format_t : : msgpack :
error_msg + = " MessagePack " ;
break ;
case input_format_t : : ubjson :
error_msg + = " UBJSON " ;
break ;
2018-10-18 01:06:22 +08:00
case input_format_t : : bson :
error_msg + = " BSON " ;
break ;
2019-03-18 20:53:48 +08:00
default : // LCOV_EXCL_LINE
2020-07-06 18:22:31 +08:00
JSON_ASSERT ( false ) ; // LCOV_EXCL_LINE
2018-10-17 18:15:58 +08:00
}
return error_msg + " " + context + " : " + detail ;
}
2018-10-25 05:39:30 +08:00
private :
2017-08-14 23:50:24 +08:00
/// input adapter
2020-02-20 03:59:31 +08:00
InputAdapterType ia ;
2017-08-14 23:50:24 +08:00
/// the current character
2020-06-06 20:30:17 +08:00
char_int_type current = std : : char_traits < char_type > : : eof ( ) ;
2017-08-14 23:50:24 +08:00
/// the number of characters read
std : : size_t chars_read = 0 ;
/// whether we can assume little endianess
const bool is_little_endian = little_endianess ( ) ;
2018-03-19 06:00:45 +08:00
/// the SAX parser
json_sax_t * sax = nullptr ;
2017-08-14 23:50:24 +08:00
} ;
2018-10-08 00:39:18 +08:00
} // namespace detail
} // namespace nlohmann