json/include/nlohmann/detail/input/input_adapters.hpp

455 lines
15 KiB
C++
Raw Normal View History

2018-01-10 17:18:31 +08:00
#pragma once
2017-08-14 22:48:55 +08:00
2019-03-17 07:27:44 +08:00
#include <array> // array
#include <cstddef> // size_t
2019-03-17 19:01:49 +08:00
#include <cstdio> //FILE *
#include <cstring> // strlen
#include <istream> // istream
#include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next
#include <memory> // shared_ptr, make_shared, addressof
#include <numeric> // accumulate
#include <string> // string, char_traits
#include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer
#include <utility> // pair, declval
2017-08-14 22:48:55 +08:00
2019-01-13 22:31:22 +08:00
#include <nlohmann/detail/iterators/iterator_traits.hpp>
#include <nlohmann/detail/macro_scope.hpp>
2017-08-14 22:48:55 +08:00
namespace nlohmann
{
namespace detail
{
2018-03-21 05:39:08 +08:00
/// the supported input formats
enum class input_format_t { json, cbor, msgpack, ubjson, bson };
2018-03-21 05:39:08 +08:00
2017-08-14 22:48:55 +08:00
////////////////////
// input adapters //
////////////////////
/*!
Input adapter for stdio file access. This adapter read only 1 byte and do not use any
2018-12-13 03:15:49 +08:00
buffer. This adapter is a very low level adapter.
*/
2020-02-20 03:59:31 +08:00
class file_input_adapter
{
public:
2020-05-28 00:40:04 +08:00
using char_type = char;
2019-07-02 04:37:30 +08:00
JSON_HEDLEY_NON_NULL(2)
2020-07-16 20:45:39 +08:00
explicit file_input_adapter(std::FILE* f) noexcept
: m_file(f)
{}
2019-03-17 19:01:49 +08:00
// make class move-only
file_input_adapter(const file_input_adapter&) = delete;
2019-03-18 05:25:18 +08:00
file_input_adapter(file_input_adapter&&) = default;
2019-03-17 19:01:49 +08:00
file_input_adapter& operator=(const file_input_adapter&) = delete;
file_input_adapter& operator=(file_input_adapter&&) = delete;
2019-03-17 19:01:49 +08:00
2020-02-20 03:59:31 +08:00
std::char_traits<char>::int_type get_character() noexcept
{
return std::fgetc(m_file);
}
2019-03-17 19:01:49 +08:00
private:
/// the file pointer to read from
std::FILE* m_file;
};
2017-08-14 22:48:55 +08:00
/*!
Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at
beginning of input. Does not support changing the underlying std::streambuf
in mid-input. Maintains underlying std::istream and std::streambuf to support
subsequent use of standard std::istream operations to process any input
characters following those used in parsing the JSON input. Clears the
std::istream flags; any input errors (e.g., EOF) will be detected by the first
subsequent call for input from the std::istream.
*/
2020-02-20 03:59:31 +08:00
class input_stream_adapter
2017-08-14 22:48:55 +08:00
{
public:
2020-05-28 00:40:04 +08:00
using char_type = char;
2020-02-20 03:59:31 +08:00
~input_stream_adapter()
2017-08-14 22:48:55 +08:00
{
// clear stream flags; we use underlying streambuf I/O, do not
// maintain ifstream flags, except eof
2020-06-23 04:32:21 +08:00
if (is != nullptr)
2020-02-20 03:59:31 +08:00
{
is->clear(is->rdstate() & std::ios::eofbit);
}
2017-08-14 22:48:55 +08:00
}
explicit input_stream_adapter(std::istream& i)
2020-02-20 03:59:31 +08:00
: is(&i), sb(i.rdbuf())
{}
2017-08-14 22:48:55 +08:00
// delete because of pointer members
input_stream_adapter(const input_stream_adapter&) = delete;
input_stream_adapter& operator=(input_stream_adapter&) = delete;
input_stream_adapter& operator=(input_stream_adapter&& rhs) = delete;
2020-02-20 03:59:31 +08:00
2020-07-16 20:45:39 +08:00
input_stream_adapter(input_stream_adapter&& rhs) noexcept : is(rhs.is), sb(rhs.sb)
2020-02-20 03:59:31 +08:00
{
rhs.is = nullptr;
rhs.sb = nullptr;
}
2017-08-14 22:48:55 +08:00
// std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
// ensure that std::char_traits<char>::eof() and the character 0xFF do not
// end up as the same value, eg. 0xFFFFFFFF.
2020-02-20 03:59:31 +08:00
std::char_traits<char>::int_type get_character()
2017-08-14 22:48:55 +08:00
{
2020-02-20 03:59:31 +08:00
auto res = sb->sbumpc();
// set eof manually, as we don't use the istream interface.
2020-05-28 14:29:53 +08:00
if (JSON_HEDLEY_UNLIKELY(res == EOF))
2018-11-10 04:10:32 +08:00
{
2020-02-20 03:59:31 +08:00
is->clear(is->rdstate() | std::ios::eofbit);
2018-11-10 04:10:32 +08:00
}
return res;
2017-08-14 22:48:55 +08:00
}
private:
/// the associated input stream
2020-02-20 03:59:31 +08:00
std::istream* is = nullptr;
std::streambuf* sb = nullptr;
2017-08-14 22:48:55 +08:00
};
2020-05-28 00:40:04 +08:00
// General-purpose iterator-based adapter. It might not be as fast as
// theoretically possible for some containers, but it is extremely versatile.
template<typename IteratorType>
class iterator_input_adapter
2017-08-14 22:48:55 +08:00
{
public:
2020-05-28 00:40:04 +08:00
using char_type = typename std::iterator_traits<IteratorType>::value_type;
2017-08-14 22:48:55 +08:00
2020-05-28 00:40:04 +08:00
iterator_input_adapter(IteratorType first, IteratorType last)
: current(std::move(first)), end(std::move(last)) {}
2017-08-14 22:48:55 +08:00
2020-05-28 00:40:04 +08:00
typename std::char_traits<char_type>::int_type get_character()
2017-08-14 22:48:55 +08:00
{
2020-05-28 14:29:53 +08:00
if (JSON_HEDLEY_LIKELY(current != end))
2017-08-14 22:48:55 +08:00
{
2020-05-28 00:40:04 +08:00
auto result = std::char_traits<char_type>::to_int_type(*current);
std::advance(current, 1);
return result;
}
else
{
return std::char_traits<char_type>::eof();
2017-08-14 22:48:55 +08:00
}
}
private:
2020-05-28 00:40:04 +08:00
IteratorType current;
IteratorType end;
template<typename BaseInputAdapter, size_t T>
2020-05-29 00:20:02 +08:00
friend struct wide_string_input_helper;
2020-05-28 00:40:04 +08:00
bool empty() const
{
return current == end;
}
2017-08-14 22:48:55 +08:00
};
2020-05-28 00:40:04 +08:00
template<typename BaseInputAdapter, size_t T>
struct wide_string_input_helper;
template<typename BaseInputAdapter>
struct wide_string_input_helper<BaseInputAdapter, 4>
{
2018-10-03 20:51:49 +08:00
// UTF-32
2020-05-28 00:40:04 +08:00
static void fill_buffer(BaseInputAdapter& input,
2019-03-17 07:27:44 +08:00
std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
size_t& utf8_bytes_index,
size_t& utf8_bytes_filled)
{
2018-10-03 20:51:49 +08:00
utf8_bytes_index = 0;
2020-05-28 14:29:53 +08:00
if (JSON_HEDLEY_UNLIKELY(input.empty()))
2018-10-03 20:51:49 +08:00
{
utf8_bytes[0] = std::char_traits<char>::eof();
utf8_bytes_filled = 1;
}
else
{
2018-10-03 20:51:49 +08:00
// get the current character
2020-05-28 00:40:04 +08:00
const auto wc = input.get_character();
2018-10-03 20:51:49 +08:00
// UTF-32 to UTF-8 encoding
if (wc < 0x80)
{
2019-03-17 07:27:44 +08:00
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
utf8_bytes_filled = 1;
}
2018-10-03 20:51:49 +08:00
else if (wc <= 0x7FF)
{
2020-06-06 20:30:17 +08:00
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((static_cast<unsigned int>(wc) >> 6u) & 0x1Fu));
utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
2018-10-03 20:51:49 +08:00
utf8_bytes_filled = 2;
}
else if (wc <= 0xFFFF)
{
2020-06-06 20:30:17 +08:00
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((static_cast<unsigned int>(wc) >> 12u) & 0x0Fu));
utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
2018-10-03 20:51:49 +08:00
utf8_bytes_filled = 3;
}
else if (wc <= 0x10FFFF)
{
2020-06-06 20:30:17 +08:00
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | ((static_cast<unsigned int>(wc) >> 18u) & 0x07u));
utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 12u) & 0x3Fu));
utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
2018-10-03 20:51:49 +08:00
utf8_bytes_filled = 4;
}
else
{
2018-10-03 20:51:49 +08:00
// unknown character
2019-03-17 07:27:44 +08:00
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
2018-10-03 20:51:49 +08:00
utf8_bytes_filled = 1;
}
}
}
2018-10-03 20:51:49 +08:00
};
2020-05-28 00:40:04 +08:00
template<typename BaseInputAdapter>
struct wide_string_input_helper<BaseInputAdapter, 2>
2018-10-03 20:51:49 +08:00
{
// UTF-16
2020-05-28 00:40:04 +08:00
static void fill_buffer(BaseInputAdapter& input,
2019-03-17 07:27:44 +08:00
std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
size_t& utf8_bytes_index,
size_t& utf8_bytes_filled)
{
utf8_bytes_index = 0;
2020-05-28 14:29:53 +08:00
if (JSON_HEDLEY_UNLIKELY(input.empty()))
{
utf8_bytes[0] = std::char_traits<char>::eof();
utf8_bytes_filled = 1;
}
else
{
// get the current character
2020-05-28 00:40:04 +08:00
const auto wc = input.get_character();
// UTF-16 to UTF-8 encoding
if (wc < 0x80)
{
2019-03-17 07:27:44 +08:00
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
utf8_bytes_filled = 1;
}
else if (wc <= 0x7FF)
{
2020-06-08 04:47:25 +08:00
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((static_cast<unsigned int>(wc) >> 6u)));
utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
utf8_bytes_filled = 2;
}
else if (0xD800 > wc || wc >= 0xE000)
{
2020-06-08 04:47:25 +08:00
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((static_cast<unsigned int>(wc) >> 12u)));
utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
utf8_bytes_filled = 3;
}
else
{
if (JSON_HEDLEY_UNLIKELY(!input.empty()))
{
2020-05-28 00:40:04 +08:00
const auto wc2 = static_cast<unsigned int>(input.get_character());
2020-06-08 04:47:25 +08:00
const auto charcode = 0x10000u + (((static_cast<unsigned int>(wc) & 0x3FFu) << 10u) | (wc2 & 0x3FFu));
2019-03-17 07:27:44 +08:00
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | (charcode >> 18u));
utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu));
utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu));
utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (charcode & 0x3Fu));
utf8_bytes_filled = 4;
}
else
{
2019-03-17 07:27:44 +08:00
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
utf8_bytes_filled = 1;
}
}
}
}
2018-10-03 20:51:49 +08:00
};
2020-05-28 00:40:04 +08:00
// Wraps another input apdater to convert wide character types into individual bytes.
template<typename BaseInputAdapter, typename WideCharType>
2020-02-20 03:59:31 +08:00
class wide_string_input_adapter
{
public:
2020-06-06 20:30:17 +08:00
using char_type = char;
2020-05-28 00:40:04 +08:00
wide_string_input_adapter(BaseInputAdapter base)
: base_adapter(base) {}
2020-05-28 00:40:04 +08:00
typename std::char_traits<char>::int_type get_character() noexcept
{
// check if buffer needs to be filled
if (utf8_bytes_index == utf8_bytes_filled)
{
2020-05-28 00:40:04 +08:00
fill_buffer<sizeof(WideCharType)>();
JSON_ASSERT(utf8_bytes_filled > 0);
JSON_ASSERT(utf8_bytes_index == 0);
}
// use buffer
JSON_ASSERT(utf8_bytes_filled > 0);
JSON_ASSERT(utf8_bytes_index < utf8_bytes_filled);
return utf8_bytes[utf8_bytes_index++];
}
private:
2020-05-28 00:40:04 +08:00
BaseInputAdapter base_adapter;
template<size_t T>
void fill_buffer()
{
2020-05-28 00:40:04 +08:00
wide_string_input_helper<BaseInputAdapter, T>::fill_buffer(base_adapter, utf8_bytes, utf8_bytes_index, utf8_bytes_filled);
}
2018-10-05 02:42:19 +08:00
/// a buffer for UTF-8 bytes
std::array<std::char_traits<char>::int_type, 4> utf8_bytes = {{0, 0, 0, 0}};
/// index to the utf8_codes array for the next valid byte
std::size_t utf8_bytes_index = 0;
/// number of valid bytes in the utf8_codes array
std::size_t utf8_bytes_filled = 0;
};
2020-02-19 23:32:49 +08:00
2020-05-28 00:40:04 +08:00
template<typename IteratorType, typename Enable = void>
struct iterator_input_adapter_factory
2020-02-19 23:32:49 +08:00
{
2020-05-28 00:40:04 +08:00
using iterator_type = IteratorType;
using char_type = typename std::iterator_traits<iterator_type>::value_type;
using adapter_type = iterator_input_adapter<iterator_type>;
2020-02-19 23:32:49 +08:00
2020-05-28 14:29:53 +08:00
static adapter_type create(IteratorType first, IteratorType last)
2020-05-28 00:40:04 +08:00
{
2020-05-28 14:29:53 +08:00
return adapter_type(std::move(first), std::move(last));
2020-05-28 00:40:04 +08:00
}
};
2020-02-19 23:32:49 +08:00
2020-05-28 14:29:53 +08:00
template<typename T>
struct is_iterator_of_multibyte
2020-02-19 23:32:49 +08:00
{
using value_type = typename std::iterator_traits<T>::value_type;
enum
{
value = sizeof(value_type) > 1
};
};
2020-02-19 23:32:49 +08:00
2020-05-28 14:29:53 +08:00
template<typename IteratorType>
struct iterator_input_adapter_factory<IteratorType, enable_if_t<is_iterator_of_multibyte<IteratorType>::value>>
2020-02-19 23:32:49 +08:00
{
2020-05-28 14:29:53 +08:00
using iterator_type = IteratorType;
using char_type = typename std::iterator_traits<iterator_type>::value_type;
using base_adapter_type = iterator_input_adapter<iterator_type>;
using adapter_type = wide_string_input_adapter<base_adapter_type, char_type>;
2020-02-19 23:32:49 +08:00
2020-05-28 14:29:53 +08:00
static adapter_type create(IteratorType first, IteratorType last)
{
return adapter_type(base_adapter_type(std::move(first), std::move(last)));
}
};
2020-02-19 23:32:49 +08:00
2020-05-28 00:40:04 +08:00
// General purpose iterator-based input
template<typename IteratorType>
2020-05-28 14:29:53 +08:00
typename iterator_input_adapter_factory<IteratorType>::adapter_type input_adapter(IteratorType first, IteratorType last)
2020-02-19 23:32:49 +08:00
{
2020-05-28 00:40:04 +08:00
using factory_type = iterator_input_adapter_factory<IteratorType>;
2020-05-28 14:29:53 +08:00
return factory_type::create(first, last);
2020-02-19 23:32:49 +08:00
}
2020-05-28 00:40:04 +08:00
// Convenience shorthand from container to iterator
template<typename ContainerType>
auto input_adapter(const ContainerType& container) -> decltype(input_adapter(begin(container), end(container)))
2020-02-19 23:32:49 +08:00
{
// Enable ADL
using std::begin;
using std::end;
2020-02-19 23:32:49 +08:00
2020-05-28 00:40:04 +08:00
return input_adapter(begin(container), end(container));
2020-02-19 23:32:49 +08:00
}
2020-05-28 00:40:04 +08:00
// Special cases with fast paths
inline file_input_adapter input_adapter(std::FILE* file)
2020-02-19 23:32:49 +08:00
{
2020-05-28 00:40:04 +08:00
return file_input_adapter(file);
2020-02-19 23:32:49 +08:00
}
2020-05-28 00:40:04 +08:00
inline input_stream_adapter input_adapter(std::istream& stream)
2020-02-19 23:32:49 +08:00
{
2020-05-28 00:40:04 +08:00
return input_stream_adapter(stream);
2020-02-19 23:32:49 +08:00
}
2017-08-14 22:48:55 +08:00
2020-05-28 00:40:04 +08:00
inline input_stream_adapter input_adapter(std::istream&& stream)
2020-02-19 23:32:49 +08:00
{
2020-05-28 00:40:04 +08:00
return input_stream_adapter(stream);
2020-02-19 23:32:49 +08:00
}
2020-05-28 00:40:04 +08:00
using contiguous_bytes_input_adapter = decltype(input_adapter(std::declval<const char*>(), std::declval<const char*>()));
2020-05-28 00:40:04 +08:00
// Null-delimited strings, and the like.
template < typename CharT,
typename std::enable_if <
std::is_pointer<CharT>::value&&
!std::is_array<CharT>::value&&
std::is_integral<typename std::remove_pointer<CharT>::type>::value&&
sizeof(typename std::remove_pointer<CharT>::type) == 1,
int >::type = 0 >
2020-05-28 00:40:04 +08:00
contiguous_bytes_input_adapter input_adapter(CharT b)
2020-02-19 23:32:49 +08:00
{
2020-05-28 00:40:04 +08:00
auto length = std::strlen(reinterpret_cast<const char*>(b));
2020-06-23 04:32:21 +08:00
const auto* ptr = reinterpret_cast<const char*>(b);
2020-05-28 00:40:04 +08:00
return input_adapter(ptr, ptr + length);
2020-02-19 23:32:49 +08:00
}
template<typename T, std::size_t N>
auto input_adapter(T (&array)[N]) -> decltype(input_adapter(array, array + N))
2020-02-19 23:32:49 +08:00
{
return input_adapter(array, array + N);
2020-02-19 23:32:49 +08:00
}
2020-02-19 23:32:49 +08:00
// This class only handles inputs of input_buffer_adapter type.
// It's required so that expressions like {ptr, len} can be implicitely casted
// to the correct adapter.
class span_input_adapter
{
public:
template < typename CharT,
typename std::enable_if <
std::is_pointer<CharT>::value&&
std::is_integral<typename std::remove_pointer<CharT>::type>::value&&
sizeof(typename std::remove_pointer<CharT>::type) == 1,
int >::type = 0 >
2020-02-19 23:32:49 +08:00
span_input_adapter(CharT b, std::size_t l)
2020-05-28 00:40:04 +08:00
: ia(reinterpret_cast<const char*>(b), reinterpret_cast<const char*>(b) + l) {}
2017-08-14 22:48:55 +08:00
template<class IteratorType,
typename std::enable_if<
std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value,
2017-08-14 22:48:55 +08:00
int>::type = 0>
2020-02-19 23:32:49 +08:00
span_input_adapter(IteratorType first, IteratorType last)
: ia(input_adapter(first, last)) {}
2017-08-14 22:48:55 +08:00
2020-05-28 00:40:04 +08:00
contiguous_bytes_input_adapter&& get()
2017-08-14 22:48:55 +08:00
{
2020-02-20 03:59:31 +08:00
return std::move(ia);
2017-08-14 22:48:55 +08:00
}
private:
2020-05-28 00:40:04 +08:00
contiguous_bytes_input_adapter ia;
2017-08-14 22:48:55 +08:00
};
} // namespace detail
} // namespace nlohmann