add ensure_ascii parameter to dump. #330

This commit is contained in:
Ryan Mulder 2017-07-11 13:41:56 -04:00
parent 7dee868a42
commit 71597be294
3 changed files with 77 additions and 42 deletions

View File

@ -6423,6 +6423,7 @@ class serializer
@param[in] current_indent the current indent level (only used internally)
*/
void dump(const BasicJsonType& val, const bool pretty_print,
const bool ensure_ascii,
const unsigned int indent_step,
const unsigned int current_indent = 0)
{
@ -6453,9 +6454,9 @@ class serializer
{
o->write_characters(indent_string.c_str(), new_indent);
o->write_character('\"');
dump_escaped(i->first);
dump_escaped(i->first, ensure_ascii);
o->write_characters("\": ", 3);
dump(i->second, true, indent_step, new_indent);
dump(i->second, true, ensure_ascii, indent_step, new_indent);
o->write_characters(",\n", 2);
}
@ -6463,9 +6464,9 @@ class serializer
assert(i != val.m_value.object->cend());
o->write_characters(indent_string.c_str(), new_indent);
o->write_character('\"');
dump_escaped(i->first);
dump_escaped(i->first, ensure_ascii);
o->write_characters("\": ", 3);
dump(i->second, true, indent_step, new_indent);
dump(i->second, true, ensure_ascii, indent_step, new_indent);
o->write_character('\n');
o->write_characters(indent_string.c_str(), current_indent);
@ -6480,18 +6481,18 @@ class serializer
for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i)
{
o->write_character('\"');
dump_escaped(i->first);
dump_escaped(i->first, ensure_ascii);
o->write_characters("\":", 2);
dump(i->second, false, indent_step, current_indent);
dump(i->second, false, ensure_ascii, indent_step, current_indent);
o->write_character(',');
}
// last element
assert(i != val.m_value.object->cend());
o->write_character('\"');
dump_escaped(i->first);
dump_escaped(i->first, ensure_ascii);
o->write_characters("\":", 2);
dump(i->second, false, indent_step, current_indent);
dump(i->second, false, ensure_ascii, indent_step, current_indent);
o->write_character('}');
}
@ -6523,14 +6524,14 @@ class serializer
i != val.m_value.array->cend() - 1; ++i)
{
o->write_characters(indent_string.c_str(), new_indent);
dump(*i, true, indent_step, new_indent);
dump(*i, true, ensure_ascii, indent_step, new_indent);
o->write_characters(",\n", 2);
}
// last element
assert(not val.m_value.array->empty());
o->write_characters(indent_string.c_str(), new_indent);
dump(val.m_value.array->back(), true, indent_step, new_indent);
dump(val.m_value.array->back(), true, ensure_ascii, indent_step, new_indent);
o->write_character('\n');
o->write_characters(indent_string.c_str(), current_indent);
@ -6544,13 +6545,13 @@ class serializer
for (auto i = val.m_value.array->cbegin();
i != val.m_value.array->cend() - 1; ++i)
{
dump(*i, false, indent_step, current_indent);
dump(*i, false, ensure_ascii, indent_step, current_indent);
o->write_character(',');
}
// last element
assert(not val.m_value.array->empty());
dump(val.m_value.array->back(), false, indent_step, current_indent);
dump(val.m_value.array->back(), false, ensure_ascii, indent_step, current_indent);
o->write_character(']');
}
@ -6561,7 +6562,7 @@ class serializer
case value_t::string:
{
o->write_character('\"');
dump_escaped(*val.m_value.string);
dump_escaped(*val.m_value.string, ensure_ascii);
o->write_character('\"');
return;
}
@ -6616,14 +6617,15 @@ class serializer
@brief calculates the extra space to escape a JSON string
@param[in] s the string to escape
@param[in] ensure_ascii whether to escape non-ASCII characters with \uXXXX sequences
@return the number of characters required to escape string @a s
@complexity Linear in the length of string @a s.
*/
static std::size_t extra_space(const string_t& s) noexcept
static std::size_t extra_space(const string_t& s, const bool ensure_ascii) noexcept
{
return std::accumulate(s.begin(), s.end(), size_t{},
[](size_t res, typename string_t::value_type c)
[ensure_ascii](size_t res, typename string_t::value_type c)
{
switch (c)
{
@ -6673,6 +6675,11 @@ class serializer
default:
{
if (c & 0x80 and ensure_ascii)
{
// from c (1 byte) to \uxxxx (6 bytes)
return res + 5;
}
return res;
}
}
@ -6688,12 +6695,13 @@ class serializer
representation. The escaped string is written to output stream @a o.
@param[in] s the string to escape
@param[in] ensure_ascii whether to escape non-ASCII characters with \uXXXX sequences
@complexity Linear in the length of string @a s.
*/
void dump_escaped(const string_t& s) const
void dump_escaped(const string_t& s, const bool ensure_ascii) const
{
const auto space = extra_space(s);
const auto space = extra_space(s, ensure_ascii);
if (space == 0)
{
o->write_characters(s.c_str(), s.size());
@ -6704,6 +6712,27 @@ class serializer
string_t result(s.size() + space, '\\');
std::size_t pos = 0;
auto escape_character = [&result, &pos](const typename string_t::value_type c)
{
// convert a number 0..15 to its hex representation
// (0..f)
static const char hexify[16] =
{
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
};
// print character c as \uxxxx
for (const char m :
{ 'u', '0', '0', hexify[(c >> 4) & 0x0f], hexify[c & 0x0f]
})
{
result[++pos] = m;
}
++pos;
};
for (const auto& c : s)
{
switch (c)
@ -6792,28 +6821,21 @@ class serializer
case 0x1e:
case 0x1f:
{
// convert a number 0..15 to its hex representation
// (0..f)
static const char hexify[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
};
// print character c as \uxxxx
for (const char m :
{'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f]
})
{
result[++pos] = m;
}
++pos;
escape_character(c);
break;
}
default:
{
// all other characters are added as-is
result[pos++] = c;
if (c & 0x80 and ensure_ascii)
{
escape_character(c);
}
else
{
// all other characters are added as-is
result[pos++] = c;
}
break;
}
}
@ -9017,7 +9039,7 @@ class basic_json
Serialization function for JSON values. The function tries to mimic
Python's `json.dumps()` function, and currently supports its @a indent
parameter.
and @a ensure_ascii parameters.
@param[in] indent If indent is nonnegative, then array elements and object
members will be pretty-printed with that indent level. An indent level of
@ -9025,30 +9047,33 @@ class basic_json
representation.
@param[in] indent_char The character to use for indentation if @a indent is
greater than `0`. The default is ` ` (space).
@param[in] ensure_ascii If ensure_ascii is true (the default), all non-ASCII
characters in the output are escaped with \uXXXX sequences, and the result
consists of ASCII characters only.
@return string containing the serialization of the JSON value
@complexity Linear.
@liveexample{The following example shows the effect of different @a indent
parameters to the result of the serialization.,dump}
parameters to the result of the serialization.dump}
@see https://docs.python.org/2/library/json.html#json.dump
@since version 1.0.0; indentation character added in version 3.0.0
*/
string_t dump(const int indent = -1, const char indent_char = ' ') const
string_t dump(const int indent = -1, const char indent_char = ' ', const bool ensure_ascii = false) const
{
string_t result;
serializer s(detail::output_adapter_factory<char>::create(result), indent_char);
if (indent >= 0)
{
s.dump(*this, true, static_cast<unsigned int>(indent));
s.dump(*this, true, ensure_ascii, static_cast<unsigned int>(indent));
}
else
{
s.dump(*this, false, 0);
s.dump(*this, false, ensure_ascii, 0);
}
return result;
@ -12715,7 +12740,7 @@ class basic_json
// do the actual serialization
serializer s(detail::output_adapter_factory<char>::create(o), o.fill());
s.dump(j, pretty_print, static_cast<unsigned int>(indentation));
s.dump(j, pretty_print, false, static_cast<unsigned int>(indentation));
return o;
}

View File

@ -50,11 +50,12 @@ TEST_CASE("convenience functions")
SECTION("string escape")
{
const auto check_escaped = [](const char* original,
const char* escaped)
const char* escaped,
const bool ensure_ascii = false)
{
std::stringstream ss;
json::serializer s(nlohmann::detail::output_adapter_factory<char>::create(ss), ' ');
s.dump_escaped(original);
s.dump_escaped(original, ensure_ascii);
CHECK(ss.str() == escaped);
};
@ -97,5 +98,7 @@ TEST_CASE("convenience functions")
check_escaped("\x1d", "\\u001d");
check_escaped("\x1e", "\\u001e");
check_escaped("\x1f", "\\u001f");
check_escaped("\xA9", "\xA9");
check_escaped("\xA9", "\\u00a9", true);
}
}

View File

@ -250,6 +250,13 @@ TEST_CASE("object inspection")
CHECK(json("❤️").dump() == "\"❤️\"");
}
SECTION("dump with ensure_ascii and non-ASCII characters")
{
CHECK(json("ä").dump(-1, ' ', true) == R"("\u00c3\u00a4")");
CHECK(json("Ö").dump(-1, ' ', true) == R"("\u00c3\u0096")");
CHECK(json("❤️").dump(-1, ' ', true) == R"("\u00e2\u009d\u00a4\u00ef\u00b8\u008f")");
}
SECTION("serialization of discarded element")
{
json j_discarded(json::value_t::discarded);