json/include/nlohmann/detail/input/input_adapters.hpp

440 lines
16 KiB
C++
Raw Normal View History

2018-01-10 17:18:31 +08:00
#pragma once
2017-08-14 22:48:55 +08:00
2019-03-17 07:27:44 +08:00
#include <array> // array
#include <cassert> // assert
#include <cstddef> // size_t
2019-03-17 19:01:49 +08:00
#include <cstdio> //FILE *
#include <cstring> // strlen
#include <istream> // istream
#include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next
#include <memory> // shared_ptr, make_shared, addressof
#include <numeric> // accumulate
#include <string> // string, char_traits
#include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer
#include <utility> // pair, declval
2017-08-14 22:48:55 +08:00
2019-01-13 22:31:22 +08:00
#include <nlohmann/detail/iterators/iterator_traits.hpp>
#include <nlohmann/detail/macro_scope.hpp>
2017-08-14 22:48:55 +08:00
namespace nlohmann
{
namespace detail
{
2018-03-21 05:39:08 +08:00
/// the supported input formats
enum class input_format_t { json, cbor, msgpack, ubjson, bson };
2018-03-21 05:39:08 +08:00
2017-08-14 22:48:55 +08:00
////////////////////
// input adapters //
////////////////////
/*!
@brief abstract input adapter interface
Produces a stream of std::char_traits<char>::int_type characters from a
2018-04-03 04:25:17 +08:00
std::istream, a buffer, or some other input type. Accepts the return of
exactly one non-EOF character for future input. The int_type characters
returned consist of all valid char values as positive values (typically
unsigned char), plus an EOF value outside that range, specified by the value
of the function std::char_traits<char>::eof(). This value is typically -1, but
could be any arbitrary value which is not a valid char value.
2017-08-14 22:48:55 +08:00
*/
struct input_adapter_protocol
{
/// get a character [0,255] or std::char_traits<char>::eof().
virtual std::char_traits<char>::int_type get_character() = 0;
virtual ~input_adapter_protocol() = default;
};
/// a type to simplify interfaces
using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
/*!
Input adapter for stdio file access. This adapter read only 1 byte and do not use any
2018-12-13 03:15:49 +08:00
buffer. This adapter is a very low level adapter.
*/
class file_input_adapter : public input_adapter_protocol
{
public:
explicit file_input_adapter(std::FILE* f) noexcept
: m_file(f)
{}
2019-03-17 19:01:49 +08:00
// make class move-only
file_input_adapter(const file_input_adapter&) = delete;
2019-03-18 05:25:18 +08:00
file_input_adapter(file_input_adapter&&) = default;
2019-03-17 19:01:49 +08:00
file_input_adapter& operator=(const file_input_adapter&) = delete;
2019-03-18 05:25:18 +08:00
file_input_adapter& operator=(file_input_adapter&&) = default;
2019-03-17 19:01:49 +08:00
~file_input_adapter() override = default;
std::char_traits<char>::int_type get_character() noexcept override
{
return std::fgetc(m_file);
}
2019-03-17 19:01:49 +08:00
private:
/// the file pointer to read from
std::FILE* m_file;
};
2017-08-14 22:48:55 +08:00
/*!
Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at
beginning of input. Does not support changing the underlying std::streambuf
in mid-input. Maintains underlying std::istream and std::streambuf to support
subsequent use of standard std::istream operations to process any input
characters following those used in parsing the JSON input. Clears the
std::istream flags; any input errors (e.g., EOF) will be detected by the first
subsequent call for input from the std::istream.
*/
class input_stream_adapter : public input_adapter_protocol
{
public:
~input_stream_adapter() override
{
// clear stream flags; we use underlying streambuf I/O, do not
// maintain ifstream flags, except eof
is.clear(is.rdstate() & std::ios::eofbit);
2017-08-14 22:48:55 +08:00
}
explicit input_stream_adapter(std::istream& i)
: is(i), sb(*i.rdbuf())
{}
2017-08-14 22:48:55 +08:00
// delete because of pointer members
input_stream_adapter(const input_stream_adapter&) = delete;
input_stream_adapter& operator=(input_stream_adapter&) = delete;
input_stream_adapter(input_stream_adapter&&) = delete;
input_stream_adapter& operator=(input_stream_adapter&&) = delete;
2017-08-14 22:48:55 +08:00
// std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
// ensure that std::char_traits<char>::eof() and the character 0xFF do not
// end up as the same value, eg. 0xFFFFFFFF.
std::char_traits<char>::int_type get_character() override
{
auto res = sb.sbumpc();
// set eof manually, as we don't use the istream interface.
if (res == EOF)
2018-11-10 04:10:32 +08:00
{
is.clear(is.rdstate() | std::ios::eofbit);
2018-11-10 04:10:32 +08:00
}
return res;
2017-08-14 22:48:55 +08:00
}
private:
/// the associated input stream
std::istream& is;
std::streambuf& sb;
};
/// input adapter for buffer input
class input_buffer_adapter : public input_adapter_protocol
{
public:
2018-10-28 00:31:03 +08:00
input_buffer_adapter(const char* b, const std::size_t l) noexcept
: cursor(b), limit(b + l)
{}
2017-08-14 22:48:55 +08:00
// delete because of pointer members
input_buffer_adapter(const input_buffer_adapter&) = delete;
input_buffer_adapter& operator=(input_buffer_adapter&) = delete;
input_buffer_adapter(input_buffer_adapter&&) = delete;
input_buffer_adapter& operator=(input_buffer_adapter&&) = delete;
~input_buffer_adapter() override = default;
2017-08-14 22:48:55 +08:00
std::char_traits<char>::int_type get_character() noexcept override
{
if (JSON_LIKELY(cursor < limit))
{
return std::char_traits<char>::to_int_type(*(cursor++));
}
return std::char_traits<char>::eof();
}
private:
/// pointer to the current character
const char* cursor;
/// pointer past the last character
2018-04-03 04:25:17 +08:00
const char* const limit;
2017-08-14 22:48:55 +08:00
};
2018-10-03 20:51:49 +08:00
template<typename WideStringType, size_t T>
struct wide_string_input_helper
{
2018-10-03 20:51:49 +08:00
// UTF-32
2019-03-17 07:27:44 +08:00
static void fill_buffer(const WideStringType& str,
size_t& current_wchar,
std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
size_t& utf8_bytes_index,
size_t& utf8_bytes_filled)
{
2018-10-03 20:51:49 +08:00
utf8_bytes_index = 0;
if (current_wchar == str.size())
{
utf8_bytes[0] = std::char_traits<char>::eof();
utf8_bytes_filled = 1;
}
else
{
2018-10-03 20:51:49 +08:00
// get the current character
2019-03-15 22:55:52 +08:00
const auto wc = static_cast<unsigned int>(str[current_wchar++]);
2018-10-03 20:51:49 +08:00
// UTF-32 to UTF-8 encoding
if (wc < 0x80)
{
2019-03-17 07:27:44 +08:00
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
utf8_bytes_filled = 1;
}
2018-10-03 20:51:49 +08:00
else if (wc <= 0x7FF)
{
2019-03-17 07:27:44 +08:00
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((wc >> 6u) & 0x1Fu));
utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
2018-10-03 20:51:49 +08:00
utf8_bytes_filled = 2;
}
else if (wc <= 0xFFFF)
{
2019-03-17 07:27:44 +08:00
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((wc >> 12u) & 0x0Fu));
utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu));
utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
2018-10-03 20:51:49 +08:00
utf8_bytes_filled = 3;
}
else if (wc <= 0x10FFFF)
{
2019-03-17 07:27:44 +08:00
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | ((wc >> 18u) & 0x07u));
utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 12u) & 0x3Fu));
utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu));
utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
2018-10-03 20:51:49 +08:00
utf8_bytes_filled = 4;
}
else
{
2018-10-03 20:51:49 +08:00
// unknown character
2019-03-17 07:27:44 +08:00
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
2018-10-03 20:51:49 +08:00
utf8_bytes_filled = 1;
}
}
}
2018-10-03 20:51:49 +08:00
};
2018-10-03 20:51:49 +08:00
template<typename WideStringType>
struct wide_string_input_helper<WideStringType, 2>
{
// UTF-16
2019-03-17 07:27:44 +08:00
static void fill_buffer(const WideStringType& str,
size_t& current_wchar,
std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
size_t& utf8_bytes_index,
size_t& utf8_bytes_filled)
{
utf8_bytes_index = 0;
if (current_wchar == str.size())
{
utf8_bytes[0] = std::char_traits<char>::eof();
utf8_bytes_filled = 1;
}
else
{
// get the current character
2019-03-15 22:55:52 +08:00
const auto wc = static_cast<unsigned int>(str[current_wchar++]);
// UTF-16 to UTF-8 encoding
if (wc < 0x80)
{
2019-03-17 07:27:44 +08:00
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
utf8_bytes_filled = 1;
}
else if (wc <= 0x7FF)
{
2019-03-17 07:27:44 +08:00
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((wc >> 6u)));
utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
utf8_bytes_filled = 2;
}
else if (0xD800 > wc or wc >= 0xE000)
{
2019-03-17 07:27:44 +08:00
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((wc >> 12u)));
utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu));
utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
utf8_bytes_filled = 3;
}
else
{
if (current_wchar < str.size())
{
2019-03-15 22:55:52 +08:00
const auto wc2 = static_cast<unsigned int>(str[current_wchar++]);
const auto charcode = 0x10000u + (((wc & 0x3FFu) << 10u) | (wc2 & 0x3FFu));
2019-03-17 07:27:44 +08:00
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | (charcode >> 18u));
utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu));
utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu));
utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (charcode & 0x3Fu));
utf8_bytes_filled = 4;
}
else
{
// unknown character
++current_wchar;
2019-03-17 07:27:44 +08:00
utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
utf8_bytes_filled = 1;
}
}
}
}
2018-10-03 20:51:49 +08:00
};
template<typename WideStringType>
class wide_string_input_adapter : public input_adapter_protocol
{
public:
2018-10-28 00:31:03 +08:00
explicit wide_string_input_adapter(const WideStringType& w) noexcept
: str(w)
{}
std::char_traits<char>::int_type get_character() noexcept override
{
// check if buffer needs to be filled
if (utf8_bytes_index == utf8_bytes_filled)
{
fill_buffer<sizeof(typename WideStringType::value_type)>();
assert(utf8_bytes_filled > 0);
assert(utf8_bytes_index == 0);
}
// use buffer
assert(utf8_bytes_filled > 0);
assert(utf8_bytes_index < utf8_bytes_filled);
return utf8_bytes[utf8_bytes_index++];
}
private:
template<size_t T>
void fill_buffer()
{
wide_string_input_helper<WideStringType, T>::fill_buffer(str, current_wchar, utf8_bytes, utf8_bytes_index, utf8_bytes_filled);
}
2018-10-05 02:42:19 +08:00
/// the wstring to process
const WideStringType& str;
/// index of the current wchar in str
std::size_t current_wchar = 0;
/// a buffer for UTF-8 bytes
std::array<std::char_traits<char>::int_type, 4> utf8_bytes = {{0, 0, 0, 0}};
/// index to the utf8_codes array for the next valid byte
std::size_t utf8_bytes_index = 0;
/// number of valid bytes in the utf8_codes array
std::size_t utf8_bytes_filled = 0;
};
2017-08-14 22:48:55 +08:00
class input_adapter
{
public:
// native support
2018-12-14 20:33:28 +08:00
input_adapter(std::FILE* file)
: ia(std::make_shared<file_input_adapter>(file)) {}
2017-08-14 22:48:55 +08:00
/// input adapter for input stream
input_adapter(std::istream& i)
: ia(std::make_shared<input_stream_adapter>(i)) {}
/// input adapter for input stream
input_adapter(std::istream&& i)
: ia(std::make_shared<input_stream_adapter>(i)) {}
input_adapter(const std::wstring& ws)
: ia(std::make_shared<wide_string_input_adapter<std::wstring>>(ws)) {}
input_adapter(const std::u16string& ws)
: ia(std::make_shared<wide_string_input_adapter<std::u16string>>(ws)) {}
input_adapter(const std::u32string& ws)
: ia(std::make_shared<wide_string_input_adapter<std::u32string>>(ws)) {}
2017-08-14 22:48:55 +08:00
/// input adapter for buffer
template<typename CharT,
typename std::enable_if<
std::is_pointer<CharT>::value and
std::is_integral<typename std::remove_pointer<CharT>::type>::value and
sizeof(typename std::remove_pointer<CharT>::type) == 1,
int>::type = 0>
input_adapter(CharT b, std::size_t l)
: ia(std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(b), l)) {}
// derived support
/// input adapter for string literal
template<typename CharT,
typename std::enable_if<
std::is_pointer<CharT>::value and
std::is_integral<typename std::remove_pointer<CharT>::type>::value and
sizeof(typename std::remove_pointer<CharT>::type) == 1,
int>::type = 0>
input_adapter(CharT b)
: input_adapter(reinterpret_cast<const char*>(b),
std::strlen(reinterpret_cast<const char*>(b))) {}
/// input adapter for iterator range with contiguous storage
template<class IteratorType,
typename std::enable_if<
std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value,
2017-08-14 22:48:55 +08:00
int>::type = 0>
input_adapter(IteratorType first, IteratorType last)
{
2018-09-10 16:02:38 +08:00
#ifndef NDEBUG
2017-08-14 22:48:55 +08:00
// assertion to check that the iterator range is indeed contiguous,
// see http://stackoverflow.com/a/35008842/266378 for more discussion
2018-09-10 16:02:38 +08:00
const auto is_contiguous = std::accumulate(
first, last, std::pair<bool, int>(true, 0),
[&first](std::pair<bool, int> res, decltype(*first) val)
2017-08-14 22:48:55 +08:00
{
res.first &= (val == *(std::next(std::addressof(*first), res.second++)));
return res;
2018-09-10 16:02:38 +08:00
}).first;
assert(is_contiguous);
#endif
2017-08-14 22:48:55 +08:00
// assertion to check that each element is 1 byte long
static_assert(
sizeof(typename iterator_traits<IteratorType>::value_type) == 1,
2017-08-14 22:48:55 +08:00
"each element in the iterator range must have the size of 1 byte");
const auto len = static_cast<size_t>(std::distance(first, last));
if (JSON_LIKELY(len > 0))
{
// there is at least one element: use the address of first
ia = std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(&(*first)), len);
}
else
{
// the address of first cannot be used: use nullptr
ia = std::make_shared<input_buffer_adapter>(nullptr, len);
}
}
/// input adapter for array
template<class T, std::size_t N>
input_adapter(T (&array)[N])
: input_adapter(std::begin(array), std::end(array)) {}
/// input adapter for contiguous container
template<class ContiguousContainer, typename
std::enable_if<not std::is_pointer<ContiguousContainer>::value and
std::is_base_of<std::random_access_iterator_tag, typename iterator_traits<decltype(std::begin(std::declval<ContiguousContainer const>()))>::iterator_category>::value,
2017-08-14 22:48:55 +08:00
int>::type = 0>
input_adapter(const ContiguousContainer& c)
: input_adapter(std::begin(c), std::end(c)) {}
operator input_adapter_t()
{
return ia;
}
private:
/// the actual adapter
input_adapter_t ia = nullptr;
};
} // namespace detail
} // namespace nlohmann