Minor BJData fixes (#3637)

* Replace vector/map LUTs in binary_reader with arrays

* Replace string_t::npos in binary_reader
This commit is contained in:
Florian Albrechtskirchinger 2022-08-03 09:15:37 +02:00 committed by GitHub
parent ac9e668e63
commit 32242022f7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 143 additions and 46 deletions

View File

@ -20,7 +20,6 @@
#include <string> // char_traits, string #include <string> // char_traits, string
#include <utility> // make_pair, move #include <utility> // make_pair, move
#include <vector> // vector #include <vector> // vector
#include <map> // map
#include <nlohmann/detail/exceptions.hpp> #include <nlohmann/detail/exceptions.hpp>
#include <nlohmann/detail/input/input_adapters.hpp> #include <nlohmann/detail/input/input_adapters.hpp>
@ -1953,7 +1952,7 @@ class binary_reader
return false; return false;
} }
if (size_and_type.first != string_t::npos) if (size_and_type.first != npos)
{ {
if (size_and_type.second != 0) if (size_and_type.second != 0)
{ {
@ -2186,7 +2185,7 @@ class binary_reader
for (auto i : dim) for (auto i : dim)
{ {
result *= i; result *= i;
if (result == 0 || result == string_t::npos) // because dim elements shall not have zeros, result = 0 means overflow happened; it also can't be string_t::npos as it is used to initialize size in get_ubjson_size_type() if (result == 0 || result == npos) // because dim elements shall not have zeros, result = 0 means overflow happened; it also can't be npos as it is used to initialize size in get_ubjson_size_type()
{ {
return sax->parse_error(chars_read, get_token_string(), out_of_range::create(408, exception_message(input_format, "excessive ndarray size caused overflow", "size"), nullptr)); return sax->parse_error(chars_read, get_token_string(), out_of_range::create(408, exception_message(input_format, "excessive ndarray size caused overflow", "size"), nullptr));
} }
@ -2232,7 +2231,7 @@ class binary_reader
*/ */
bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result, bool inside_ndarray = false) bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result, bool inside_ndarray = false)
{ {
result.first = string_t::npos; // size result.first = npos; // size
result.second = 0; // type result.second = 0; // type
bool is_ndarray = false; bool is_ndarray = false;
@ -2240,10 +2239,9 @@ class binary_reader
if (current == '$') if (current == '$')
{ {
std::vector<char_int_type> bjdx = {'[', '{', 'S', 'H', 'T', 'F', 'N', 'Z'}; // excluded markers in bjdata optimized type
result.second = get(); // must not ignore 'N', because 'N' maybe the type result.second = get(); // must not ignore 'N', because 'N' maybe the type
if (JSON_HEDLEY_UNLIKELY( input_format == input_format_t::bjdata && std::find(bjdx.begin(), bjdx.end(), result.second) != bjdx.end() )) if (input_format == input_format_t::bjdata
&& JSON_HEDLEY_UNLIKELY(std::binary_search(bjd_optimized_type_markers.begin(), bjd_optimized_type_markers.end(), result.second)))
{ {
auto last_token = get_token_string(); auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
@ -2492,23 +2490,23 @@ class binary_reader
// if bit-8 of size_and_type.second is set to 1, encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata): // if bit-8 of size_and_type.second is set to 1, encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata):
// {"_ArrayType_" : "typeid", "_ArraySize_" : [n1, n2, ...], "_ArrayData_" : [v1, v2, ...]} // {"_ArrayType_" : "typeid", "_ArraySize_" : [n1, n2, ...], "_ArrayData_" : [v1, v2, ...]}
if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && (size_and_type.second & (1 << 8)) != 0) if (input_format == input_format_t::bjdata && size_and_type.first != npos && (size_and_type.second & (1 << 8)) != 0)
{ {
std::map<char_int_type, string_t> bjdtype = {{'U', "uint8"}, {'i', "int8"}, {'u', "uint16"}, {'I', "int16"},
{'m', "uint32"}, {'l', "int32"}, {'M', "uint64"}, {'L', "int64"}, {'d', "single"}, {'D', "double"}, {'C', "char"}
};
size_and_type.second &= ~(static_cast<char_int_type>(1) << 8); // use bit 8 to indicate ndarray, here we remove the bit to restore the type marker size_and_type.second &= ~(static_cast<char_int_type>(1) << 8); // use bit 8 to indicate ndarray, here we remove the bit to restore the type marker
auto it = std::lower_bound(bjd_types_map.begin(), bjd_types_map.end(), size_and_type.second, [](const bjd_type & p, char_int_type t)
{
return p.first < t;
});
string_t key = "_ArrayType_"; string_t key = "_ArrayType_";
if (JSON_HEDLEY_UNLIKELY(bjdtype.count(size_and_type.second) == 0)) if (JSON_HEDLEY_UNLIKELY(it == bjd_types_map.end() || it->first != size_and_type.second))
{ {
auto last_token = get_token_string(); auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
exception_message(input_format, "invalid byte: 0x" + last_token, "type"), nullptr)); exception_message(input_format, "invalid byte: 0x" + last_token, "type"), nullptr));
} }
if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(bjdtype[size_and_type.second]) )) string_t type = it->second; // sax->string() takes a reference
if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(type)))
{ {
return false; return false;
} }
@ -2535,7 +2533,7 @@ class binary_reader
return (sax->end_array() && sax->end_object()); return (sax->end_array() && sax->end_object());
} }
if (size_and_type.first != string_t::npos) if (size_and_type.first != npos)
{ {
if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first))) if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first)))
{ {
@ -2598,7 +2596,7 @@ class binary_reader
} }
// do not accept ND-array size in objects in BJData // do not accept ND-array size in objects in BJData
if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && (size_and_type.second & (1 << 8)) != 0) if (input_format == input_format_t::bjdata && size_and_type.first != npos && (size_and_type.second & (1 << 8)) != 0)
{ {
auto last_token = get_token_string(); auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
@ -2606,7 +2604,7 @@ class binary_reader
} }
string_t key; string_t key;
if (size_and_type.first != string_t::npos) if (size_and_type.first != npos)
{ {
if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first))) if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first)))
{ {
@ -2950,6 +2948,8 @@ class binary_reader
} }
private: private:
static JSON_INLINE_VARIABLE constexpr std::size_t npos = static_cast<std::size_t>(-1);
/// input adapter /// input adapter
InputAdapterType ia; InputAdapterType ia;
@ -2967,7 +2967,44 @@ class binary_reader
/// the SAX parser /// the SAX parser
json_sax_t* sax = nullptr; json_sax_t* sax = nullptr;
// excluded markers in bjdata optimized type
#define JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_ \
make_array<char_int_type>('F', 'H', 'N', 'S', 'T', 'Z', '[', '{')
#define JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_ \
make_array<bjd_type>( \
bjd_type{'C', "char"}, \
bjd_type{'D', "double"}, \
bjd_type{'I', "int16"}, \
bjd_type{'L', "int64"}, \
bjd_type{'M', "uint64"}, \
bjd_type{'U', "uint8"}, \
bjd_type{'d', "single"}, \
bjd_type{'i', "int8"}, \
bjd_type{'l', "int32"}, \
bjd_type{'m', "uint32"}, \
bjd_type{'u', "uint16"})
JSON_PRIVATE_UNLESS_TESTED:
// lookup tables
// NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
const decltype(JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_) bjd_optimized_type_markers =
JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_;
using bjd_type = std::pair<char_int_type, string_t>;
// NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
const decltype(JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_) bjd_types_map =
JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_;
#undef JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_
#undef JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_
}; };
#ifndef JSON_HAS_CPP_17
template<typename BasicJsonType, typename InputAdapterType, typename SAX>
constexpr std::size_t binary_reader<BasicJsonType, InputAdapterType, SAX>::npos;
#endif
} // namespace detail } // namespace detail
NLOHMANN_JSON_NAMESPACE_END NLOHMANN_JSON_NAMESPACE_END

View File

@ -9,6 +9,7 @@
#pragma once #pragma once
#include <array> // array
#include <cstddef> // size_t #include <cstddef> // size_t
#include <type_traits> // conditional, enable_if, false_type, integral_constant, is_constructible, is_integral, is_same, remove_cv, remove_reference, true_type #include <type_traits> // conditional, enable_if, false_type, integral_constant, is_constructible, is_integral, is_same, remove_cv, remove_reference, true_type
#include <utility> // index_sequence, make_index_sequence, index_sequence_for #include <utility> // index_sequence, make_index_sequence, index_sequence_for
@ -152,15 +153,19 @@ template<> struct priority_tag<0> {};
template<typename T> template<typename T>
struct static_const struct static_const
{ {
static constexpr T value{}; static JSON_INLINE_VARIABLE constexpr T value{};
}; };
#ifndef JSON_HAS_CPP_17 #ifndef JSON_HAS_CPP_17
template<typename T> template<typename T>
constexpr T static_const<T>::value; // NOLINT(readability-redundant-declaration) constexpr T static_const<T>::value;
#endif #endif
template<typename T, typename... Args>
inline constexpr std::array<T, sizeof...(Args)> make_array(Args&& ... args)
{
return std::array<T, sizeof...(Args)> {{static_cast<T>(std::forward<Args>(args))...}};
}
} // namespace detail } // namespace detail
NLOHMANN_JSON_NAMESPACE_END NLOHMANN_JSON_NAMESPACE_END

View File

@ -3028,6 +3028,7 @@ NLOHMANN_JSON_NAMESPACE_END
#include <array> // array
#include <cstddef> // size_t #include <cstddef> // size_t
#include <type_traits> // conditional, enable_if, false_type, integral_constant, is_constructible, is_integral, is_same, remove_cv, remove_reference, true_type #include <type_traits> // conditional, enable_if, false_type, integral_constant, is_constructible, is_integral, is_same, remove_cv, remove_reference, true_type
#include <utility> // index_sequence, make_index_sequence, index_sequence_for #include <utility> // index_sequence, make_index_sequence, index_sequence_for
@ -3172,16 +3173,20 @@ template<> struct priority_tag<0> {};
template<typename T> template<typename T>
struct static_const struct static_const
{ {
static constexpr T value{}; static JSON_INLINE_VARIABLE constexpr T value{};
}; };
#ifndef JSON_HAS_CPP_17 #ifndef JSON_HAS_CPP_17
template<typename T> template<typename T>
constexpr T static_const<T>::value; // NOLINT(readability-redundant-declaration) constexpr T static_const<T>::value;
#endif #endif
template<typename T, typename... Args>
inline constexpr std::array<T, sizeof...(Args)> make_array(Args&& ... args)
{
return std::array<T, sizeof...(Args)> {{static_cast<T>(std::forward<Args>(args))...}};
}
} // namespace detail } // namespace detail
NLOHMANN_JSON_NAMESPACE_END NLOHMANN_JSON_NAMESPACE_END
@ -5977,7 +5982,6 @@ NLOHMANN_JSON_NAMESPACE_END
#include <string> // char_traits, string #include <string> // char_traits, string
#include <utility> // make_pair, move #include <utility> // make_pair, move
#include <vector> // vector #include <vector> // vector
#include <map> // map
// #include <nlohmann/detail/exceptions.hpp> // #include <nlohmann/detail/exceptions.hpp>
@ -10943,7 +10947,7 @@ class binary_reader
return false; return false;
} }
if (size_and_type.first != string_t::npos) if (size_and_type.first != npos)
{ {
if (size_and_type.second != 0) if (size_and_type.second != 0)
{ {
@ -11176,7 +11180,7 @@ class binary_reader
for (auto i : dim) for (auto i : dim)
{ {
result *= i; result *= i;
if (result == 0 || result == string_t::npos) // because dim elements shall not have zeros, result = 0 means overflow happened; it also can't be string_t::npos as it is used to initialize size in get_ubjson_size_type() if (result == 0 || result == npos) // because dim elements shall not have zeros, result = 0 means overflow happened; it also can't be npos as it is used to initialize size in get_ubjson_size_type()
{ {
return sax->parse_error(chars_read, get_token_string(), out_of_range::create(408, exception_message(input_format, "excessive ndarray size caused overflow", "size"), nullptr)); return sax->parse_error(chars_read, get_token_string(), out_of_range::create(408, exception_message(input_format, "excessive ndarray size caused overflow", "size"), nullptr));
} }
@ -11222,7 +11226,7 @@ class binary_reader
*/ */
bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result, bool inside_ndarray = false) bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result, bool inside_ndarray = false)
{ {
result.first = string_t::npos; // size result.first = npos; // size
result.second = 0; // type result.second = 0; // type
bool is_ndarray = false; bool is_ndarray = false;
@ -11230,10 +11234,9 @@ class binary_reader
if (current == '$') if (current == '$')
{ {
std::vector<char_int_type> bjdx = {'[', '{', 'S', 'H', 'T', 'F', 'N', 'Z'}; // excluded markers in bjdata optimized type
result.second = get(); // must not ignore 'N', because 'N' maybe the type result.second = get(); // must not ignore 'N', because 'N' maybe the type
if (JSON_HEDLEY_UNLIKELY( input_format == input_format_t::bjdata && std::find(bjdx.begin(), bjdx.end(), result.second) != bjdx.end() )) if (input_format == input_format_t::bjdata
&& JSON_HEDLEY_UNLIKELY(std::binary_search(bjd_optimized_type_markers.begin(), bjd_optimized_type_markers.end(), result.second)))
{ {
auto last_token = get_token_string(); auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
@ -11482,23 +11485,23 @@ class binary_reader
// if bit-8 of size_and_type.second is set to 1, encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata): // if bit-8 of size_and_type.second is set to 1, encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata):
// {"_ArrayType_" : "typeid", "_ArraySize_" : [n1, n2, ...], "_ArrayData_" : [v1, v2, ...]} // {"_ArrayType_" : "typeid", "_ArraySize_" : [n1, n2, ...], "_ArrayData_" : [v1, v2, ...]}
if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && (size_and_type.second & (1 << 8)) != 0) if (input_format == input_format_t::bjdata && size_and_type.first != npos && (size_and_type.second & (1 << 8)) != 0)
{ {
std::map<char_int_type, string_t> bjdtype = {{'U', "uint8"}, {'i', "int8"}, {'u', "uint16"}, {'I', "int16"},
{'m', "uint32"}, {'l', "int32"}, {'M', "uint64"}, {'L', "int64"}, {'d', "single"}, {'D', "double"}, {'C', "char"}
};
size_and_type.second &= ~(static_cast<char_int_type>(1) << 8); // use bit 8 to indicate ndarray, here we remove the bit to restore the type marker size_and_type.second &= ~(static_cast<char_int_type>(1) << 8); // use bit 8 to indicate ndarray, here we remove the bit to restore the type marker
auto it = std::lower_bound(bjd_types_map.begin(), bjd_types_map.end(), size_and_type.second, [](const bjd_type & p, char_int_type t)
{
return p.first < t;
});
string_t key = "_ArrayType_"; string_t key = "_ArrayType_";
if (JSON_HEDLEY_UNLIKELY(bjdtype.count(size_and_type.second) == 0)) if (JSON_HEDLEY_UNLIKELY(it == bjd_types_map.end() || it->first != size_and_type.second))
{ {
auto last_token = get_token_string(); auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
exception_message(input_format, "invalid byte: 0x" + last_token, "type"), nullptr)); exception_message(input_format, "invalid byte: 0x" + last_token, "type"), nullptr));
} }
if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(bjdtype[size_and_type.second]) )) string_t type = it->second; // sax->string() takes a reference
if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(type)))
{ {
return false; return false;
} }
@ -11525,7 +11528,7 @@ class binary_reader
return (sax->end_array() && sax->end_object()); return (sax->end_array() && sax->end_object());
} }
if (size_and_type.first != string_t::npos) if (size_and_type.first != npos)
{ {
if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first))) if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first)))
{ {
@ -11588,7 +11591,7 @@ class binary_reader
} }
// do not accept ND-array size in objects in BJData // do not accept ND-array size in objects in BJData
if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && (size_and_type.second & (1 << 8)) != 0) if (input_format == input_format_t::bjdata && size_and_type.first != npos && (size_and_type.second & (1 << 8)) != 0)
{ {
auto last_token = get_token_string(); auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
@ -11596,7 +11599,7 @@ class binary_reader
} }
string_t key; string_t key;
if (size_and_type.first != string_t::npos) if (size_and_type.first != npos)
{ {
if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first))) if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first)))
{ {
@ -11940,6 +11943,8 @@ class binary_reader
} }
private: private:
static JSON_INLINE_VARIABLE constexpr std::size_t npos = static_cast<std::size_t>(-1);
/// input adapter /// input adapter
InputAdapterType ia; InputAdapterType ia;
@ -11957,8 +11962,45 @@ class binary_reader
/// the SAX parser /// the SAX parser
json_sax_t* sax = nullptr; json_sax_t* sax = nullptr;
// excluded markers in bjdata optimized type
#define JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_ \
make_array<char_int_type>('F', 'H', 'N', 'S', 'T', 'Z', '[', '{')
#define JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_ \
make_array<bjd_type>( \
bjd_type{'C', "char"}, \
bjd_type{'D', "double"}, \
bjd_type{'I', "int16"}, \
bjd_type{'L', "int64"}, \
bjd_type{'M', "uint64"}, \
bjd_type{'U', "uint8"}, \
bjd_type{'d', "single"}, \
bjd_type{'i', "int8"}, \
bjd_type{'l', "int32"}, \
bjd_type{'m', "uint32"}, \
bjd_type{'u', "uint16"})
JSON_PRIVATE_UNLESS_TESTED:
// lookup tables
// NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
const decltype(JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_) bjd_optimized_type_markers =
JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_;
using bjd_type = std::pair<char_int_type, string_t>;
// NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
const decltype(JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_) bjd_types_map =
JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_;
#undef JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_
#undef JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_
}; };
#ifndef JSON_HAS_CPP_17
template<typename BasicJsonType, typename InputAdapterType, typename SAX>
constexpr std::size_t binary_reader<BasicJsonType, InputAdapterType, SAX>::npos;
#endif
} // namespace detail } // namespace detail
NLOHMANN_JSON_NAMESPACE_END NLOHMANN_JSON_NAMESPACE_END

View File

@ -8,11 +8,13 @@
#include "doctest_compatibility.h" #include "doctest_compatibility.h"
#include <climits> #define JSON_TESTS_PRIVATE
#include <limits>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
using nlohmann::json; using nlohmann::json;
#include <algorithm>
#include <climits>
#include <limits>
#include <iostream> #include <iostream>
#include <fstream> #include <fstream>
#include <set> #include <set>
@ -205,6 +207,17 @@ TEST_CASE_TEMPLATE_INVOKE(value_in_range_of_test, \
TEST_CASE("BJData") TEST_CASE("BJData")
{ {
SECTION("binary_reader BJData LUT arrays are sorted")
{
std::vector<std::uint8_t> data;
auto ia = nlohmann::detail::input_adapter(data);
// NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg)
nlohmann::detail::binary_reader<json, decltype(ia)> br{std::move(ia), json::input_format_t::bjdata};
CHECK(std::is_sorted(br.bjd_optimized_type_markers.begin(), br.bjd_optimized_type_markers.end()));
CHECK(std::is_sorted(br.bjd_types_map.begin(), br.bjd_types_map.end()));
}
SECTION("individual values") SECTION("individual values")
{ {
SECTION("discarded") SECTION("discarded")