Merge pull request #450 from nlohmann/TurpentineDistillery-feature/locale_independent_str_to_num

TurpentineDistillery feature/locale independent str to num
2025-01-19 15:53:00 +08:00 · 2017-02-14 07:18:35 +01:00 · 2017-02-14 07:18:35 +01:00 · 22b9a301d6
commit 22b9a301d6
parent c95ff863bf 265c5b5207
6 changed files with 634 additions and 364 deletions
--- a/2
+++ b/2
@ -94,7 +94,7 @@ cppcheck:

 # run clang sanitize (we are overrding the CXXFLAGS provided by travis in order to use gcc's libstdc++)
 clang_sanitize: clean
-	CXX=clang++ CXXFLAGS="-g -O2 -fsanitize=address -fsanitize=undefined -fno-omit-frame-pointer" $(MAKE)
+	CXX=clang++ CXXFLAGS="-g -O2 -fsanitize=address -fsanitize=undefined -fno-omit-frame-pointer" $(MAKE) check


 ##########################################################################
--- a/src/json.hpp
+++ b/src/json.hpp
@ -9444,7 +9444,9 @@ class basic_json
            literal_false,   ///< the `false` literal
            literal_null,    ///< the `null` literal
            value_string,    ///< a string -- use get_string() for actual value
-            value_number,    ///< a number -- use get_number() for actual value
+            value_unsigned,  ///< an unsigned integer -- use get_number() for actual value
+            value_integer,   ///< a signed integer -- use get_number() for actual value
+            value_float,     ///< an floating point number -- use get_number() for actual value
            begin_array,     ///< the character for array begin `[`
            begin_object,    ///< the character for object begin `{`
            end_array,       ///< the character for array end `]`
@ -9596,7 +9598,9 @@ class basic_json
                    return "null literal";
                case token_type::value_string:
                    return "string literal";
-                case token_type::value_number:
+                case lexer::token_type::value_unsigned:
+                case lexer::token_type::value_integer:
+                case lexer::token_type::value_float:
                    return "number literal";
                case token_type::begin_array:
                    return "'['";
@ -9869,11 +9873,11 @@ basic_json_parser_12:
                    }
                    if (yych <= '0')
                    {
-                        goto basic_json_parser_13;
+                        goto basic_json_parser_43;
                    }
                    if (yych <= '9')
                    {
-                        goto basic_json_parser_15;
+                        goto basic_json_parser_45;
                    }
                    goto basic_json_parser_5;
 basic_json_parser_13:
@ -9883,23 +9887,23 @@ basic_json_parser_13:
                    {
                        if (yych == '.')
                        {
-                            goto basic_json_parser_43;
+                            goto basic_json_parser_47;
                        }
                    }
                    else
                    {
                        if (yych <= 'E')
                        {
-                            goto basic_json_parser_44;
+                            goto basic_json_parser_48;
                        }
                        if (yych == 'e')
                        {
-                            goto basic_json_parser_44;
+                            goto basic_json_parser_48;
                        }
                    }
 basic_json_parser_14:
                    {
-                        last_token_type = token_type::value_number;
+                        last_token_type = token_type::value_unsigned;
                        break;
                    }
 basic_json_parser_15:
@ -9918,7 +9922,7 @@ basic_json_parser_15:
                    {
                        if (yych == '.')
                        {
-                            goto basic_json_parser_43;
+                            goto basic_json_parser_47;
                        }
                        goto basic_json_parser_14;
                    }
@ -9926,11 +9930,11 @@ basic_json_parser_15:
                    {
                        if (yych <= 'E')
                        {
-                            goto basic_json_parser_44;
+                            goto basic_json_parser_48;
                        }
                        if (yych == 'e')
                        {
-                            goto basic_json_parser_44;
+                            goto basic_json_parser_48;
                        }
                        goto basic_json_parser_14;
                    }
@ -9957,7 +9961,7 @@ basic_json_parser_23:
                    yych = *(m_marker = ++m_cursor);
                    if (yych == 'a')
                    {
-                        goto basic_json_parser_45;
+                        goto basic_json_parser_49;
                    }
                    goto basic_json_parser_5;
 basic_json_parser_24:
@ -9965,7 +9969,7 @@ basic_json_parser_24:
                    yych = *(m_marker = ++m_cursor);
                    if (yych == 'u')
                    {
-                        goto basic_json_parser_46;
+                        goto basic_json_parser_50;
                    }
                    goto basic_json_parser_5;
 basic_json_parser_25:
@ -9973,7 +9977,7 @@ basic_json_parser_25:
                    yych = *(m_marker = ++m_cursor);
                    if (yych == 'r')
                    {
-                        goto basic_json_parser_47;
+                        goto basic_json_parser_51;
                    }
                    goto basic_json_parser_5;
 basic_json_parser_26:
@ -10055,13 +10059,27 @@ basic_json_parser_31:
                    }
 basic_json_parser_32:
                    m_cursor = m_marker;
-                    if (yyaccept == 0)
+                    if (yyaccept <= 1)
                    {
-                        goto basic_json_parser_5;
+                        if (yyaccept == 0)
+                        {
+                            goto basic_json_parser_5;
+                        }
+                        else
+                        {
+                            goto basic_json_parser_14;
+                        }
                    }
                    else
                    {
-                        goto basic_json_parser_14;
+                        if (yyaccept == 2)
+                        {
+                            goto basic_json_parser_44;
+                        }
+                        else
+                        {
+                            goto basic_json_parser_55;
+                        }
                    }
 basic_json_parser_33:
                    ++m_cursor;
@ -10142,7 +10160,7 @@ basic_json_parser_35:
                                }
                                if (yych <= 'u')
                                {
-                                    goto basic_json_parser_48;
+                                    goto basic_json_parser_52;
                                }
                                goto basic_json_parser_32;
                            }
@ -10261,6 +10279,71 @@ basic_json_parser_42:
                    }
                    goto basic_json_parser_32;
 basic_json_parser_43:
+                    yyaccept = 2;
+                    yych = *(m_marker = ++m_cursor);
+                    if (yych <= 'D')
+                    {
+                        if (yych == '.')
+                        {
+                            goto basic_json_parser_47;
+                        }
+                    }
+                    else
+                    {
+                        if (yych <= 'E')
+                        {
+                            goto basic_json_parser_48;
+                        }
+                        if (yych == 'e')
+                        {
+                            goto basic_json_parser_48;
+                        }
+                    }
+basic_json_parser_44:
+                    {
+                        last_token_type = token_type::value_integer;
+                        break;
+                    }
+basic_json_parser_45:
+                    yyaccept = 2;
+                    m_marker = ++m_cursor;
+                    if ((m_limit - m_cursor) < 3)
+                    {
+                        fill_line_buffer(3);    // LCOV_EXCL_LINE
+                    }
+                    yych = *m_cursor;
+                    if (yych <= '9')
+                    {
+                        if (yych == '.')
+                        {
+                            goto basic_json_parser_47;
+                        }
+                        if (yych <= '/')
+                        {
+                            goto basic_json_parser_44;
+                        }
+                        goto basic_json_parser_45;
+                    }
+                    else
+                    {
+                        if (yych <= 'E')
+                        {
+                            if (yych <= 'D')
+                            {
+                                goto basic_json_parser_44;
+                            }
+                            goto basic_json_parser_48;
+                        }
+                        else
+                        {
+                            if (yych == 'e')
+                            {
+                                goto basic_json_parser_48;
+                            }
+                            goto basic_json_parser_44;
+                        }
+                    }
+basic_json_parser_47:
                    yych = *++m_cursor;
                    if (yych <= '/')
                    {
@ -10268,16 +10351,16 @@ basic_json_parser_43:
                    }
                    if (yych <= '9')
                    {
-                        goto basic_json_parser_49;
+                        goto basic_json_parser_53;
                    }
                    goto basic_json_parser_32;
-basic_json_parser_44:
+basic_json_parser_48:
                    yych = *++m_cursor;
                    if (yych <= ',')
                    {
                        if (yych == '+')
                        {
-                            goto basic_json_parser_51;
+                            goto basic_json_parser_56;
                        }
                        goto basic_json_parser_32;
                    }
@ -10285,7 +10368,7 @@ basic_json_parser_44:
                    {
                        if (yych <= '-')
                        {
-                            goto basic_json_parser_51;
+                            goto basic_json_parser_56;
                        }
                        if (yych <= '/')
                        {
@ -10293,32 +10376,32 @@ basic_json_parser_44:
                        }
                        if (yych <= '9')
                        {
-                            goto basic_json_parser_52;
+                            goto basic_json_parser_57;
                        }
                        goto basic_json_parser_32;
                    }
-basic_json_parser_45:
+basic_json_parser_49:
                    yych = *++m_cursor;
                    if (yych == 'l')
                    {
-                        goto basic_json_parser_54;
+                        goto basic_json_parser_59;
                    }
                    goto basic_json_parser_32;
-basic_json_parser_46:
+basic_json_parser_50:
                    yych = *++m_cursor;
                    if (yych == 'l')
                    {
-                        goto basic_json_parser_55;
+                        goto basic_json_parser_60;
                    }
                    goto basic_json_parser_32;
-basic_json_parser_47:
+basic_json_parser_51:
                    yych = *++m_cursor;
                    if (yych == 'u')
                    {
-                        goto basic_json_parser_56;
+                        goto basic_json_parser_61;
                    }
                    goto basic_json_parser_32;
-basic_json_parser_48:
+basic_json_parser_52:
                    ++m_cursor;
                    if (m_limit <= m_cursor)
                    {
@ -10333,7 +10416,7 @@ basic_json_parser_48:
                        }
                        if (yych <= '9')
                        {
-                            goto basic_json_parser_57;
+                            goto basic_json_parser_62;
                        }
                        goto basic_json_parser_32;
                    }
@ -10341,7 +10424,7 @@ basic_json_parser_48:
                    {
                        if (yych <= 'F')
                        {
-                            goto basic_json_parser_57;
+                            goto basic_json_parser_62;
                        }
                        if (yych <= '`')
                        {
@ -10349,12 +10432,12 @@ basic_json_parser_48:
                        }
                        if (yych <= 'f')
                        {
-                            goto basic_json_parser_57;
+                            goto basic_json_parser_62;
                        }
                        goto basic_json_parser_32;
                    }
-basic_json_parser_49:
-                    yyaccept = 1;
+basic_json_parser_53:
+                    yyaccept = 3;
                    m_marker = ++m_cursor;
                    if ((m_limit - m_cursor) < 3)
                    {
@ -10365,27 +10448,30 @@ basic_json_parser_49:
                    {
                        if (yych <= '/')
                        {
-                            goto basic_json_parser_14;
+                            goto basic_json_parser_55;
                        }
                        if (yych <= '9')
                        {
-                            goto basic_json_parser_49;
+                            goto basic_json_parser_53;
                        }
-                        goto basic_json_parser_14;
                    }
                    else
                    {
                        if (yych <= 'E')
                        {
-                            goto basic_json_parser_44;
+                            goto basic_json_parser_48;
                        }
                        if (yych == 'e')
                        {
-                            goto basic_json_parser_44;
+                            goto basic_json_parser_48;
                        }
-                        goto basic_json_parser_14;
                    }
-basic_json_parser_51:
+basic_json_parser_55:
+                    {
+                        last_token_type = token_type::value_float;
+                        break;
+                    }
+basic_json_parser_56:
                    yych = *++m_cursor;
                    if (yych <= '/')
                    {
@ -10395,7 +10481,7 @@ basic_json_parser_51:
                    {
                        goto basic_json_parser_32;
                    }
-basic_json_parser_52:
+basic_json_parser_57:
                    ++m_cursor;
                    if (m_limit <= m_cursor)
                    {
@ -10404,35 +10490,35 @@ basic_json_parser_52:
                    yych = *m_cursor;
                    if (yych <= '/')
                    {
-                        goto basic_json_parser_14;
+                        goto basic_json_parser_55;
                    }
                    if (yych <= '9')
                    {
-                        goto basic_json_parser_52;
+                        goto basic_json_parser_57;
                    }
-                    goto basic_json_parser_14;
-basic_json_parser_54:
+                    goto basic_json_parser_55;
+basic_json_parser_59:
                    yych = *++m_cursor;
                    if (yych == 's')
                    {
-                        goto basic_json_parser_58;
+                        goto basic_json_parser_63;
                    }
                    goto basic_json_parser_32;
-basic_json_parser_55:
+basic_json_parser_60:
                    yych = *++m_cursor;
                    if (yych == 'l')
                    {
-                        goto basic_json_parser_59;
+                        goto basic_json_parser_64;
                    }
                    goto basic_json_parser_32;
-basic_json_parser_56:
+basic_json_parser_61:
                    yych = *++m_cursor;
                    if (yych == 'e')
                    {
-                        goto basic_json_parser_61;
+                        goto basic_json_parser_66;
                    }
                    goto basic_json_parser_32;
-basic_json_parser_57:
+basic_json_parser_62:
                    ++m_cursor;
                    if (m_limit <= m_cursor)
                    {
@ -10447,7 +10533,7 @@ basic_json_parser_57:
                        }
                        if (yych <= '9')
                        {
-                            goto basic_json_parser_63;
+                            goto basic_json_parser_68;
                        }
                        goto basic_json_parser_32;
                    }
@ -10455,7 +10541,7 @@ basic_json_parser_57:
                    {
                        if (yych <= 'F')
                        {
-                            goto basic_json_parser_63;
+                            goto basic_json_parser_68;
                        }
                        if (yych <= '`')
                        {
@ -10463,30 +10549,30 @@ basic_json_parser_57:
                        }
                        if (yych <= 'f')
                        {
-                            goto basic_json_parser_63;
+                            goto basic_json_parser_68;
                        }
                        goto basic_json_parser_32;
                    }
-basic_json_parser_58:
+basic_json_parser_63:
                    yych = *++m_cursor;
                    if (yych == 'e')
                    {
-                        goto basic_json_parser_64;
+                        goto basic_json_parser_69;
                    }
                    goto basic_json_parser_32;
-basic_json_parser_59:
+basic_json_parser_64:
                    ++m_cursor;
                    {
                        last_token_type = token_type::literal_null;
                        break;
                    }
-basic_json_parser_61:
+basic_json_parser_66:
                    ++m_cursor;
                    {
                        last_token_type = token_type::literal_true;
                        break;
                    }
-basic_json_parser_63:
+basic_json_parser_68:
                    ++m_cursor;
                    if (m_limit <= m_cursor)
                    {
@ -10501,7 +10587,7 @@ basic_json_parser_63:
                        }
                        if (yych <= '9')
                        {
-                            goto basic_json_parser_66;
+                            goto basic_json_parser_71;
                        }
                        goto basic_json_parser_32;
                    }
@ -10509,7 +10595,7 @@ basic_json_parser_63:
                    {
                        if (yych <= 'F')
                        {
-                            goto basic_json_parser_66;
+                            goto basic_json_parser_71;
                        }
                        if (yych <= '`')
                        {
@ -10517,17 +10603,17 @@ basic_json_parser_63:
                        }
                        if (yych <= 'f')
                        {
-                            goto basic_json_parser_66;
+                            goto basic_json_parser_71;
                        }
                        goto basic_json_parser_32;
                    }
-basic_json_parser_64:
+basic_json_parser_69:
                    ++m_cursor;
                    {
                        last_token_type = token_type::literal_false;
                        break;
                    }
-basic_json_parser_66:
+basic_json_parser_71:
                    ++m_cursor;
                    if (m_limit <= m_cursor)
                    {
@ -10838,59 +10924,155 @@ basic_json_parser_66:
            return result;
        }

-        /*!
-        @brief parse floating point number
-
-        This function (and its overloads) serves to select the most appropriate
-        standard floating point number parsing function based on the type
-        supplied via the first parameter.  Set this to @a
-        static_cast<number_float_t*>(nullptr).
-
-        @param[in,out] endptr receives a pointer to the first character after
-        the number
-
-        @return the floating point number
-        */
-        long double str_to_float_t(long double* /* type */, char** endptr) const
-        {
-            return std::strtold(reinterpret_cast<typename string_t::const_pointer>(m_start), endptr);
-        }

        /*!
-        @brief parse floating point number
+        @brief parse string into a built-in arithmetic type as if the current
+               locale is POSIX.

-        This function (and its overloads) serves to select the most appropriate
-        standard floating point number parsing function based on the type
-        supplied via the first parameter.  Set this to @a
-        static_cast<number_float_t*>(nullptr).
+        @note in floating-point case strtod may parse past the token's end -
+              this is not an error

-        @param[in,out] endptr  receives a pointer to the first character after
-        the number
-
-        @return the floating point number
+        @note any leading blanks are not handled
        */
-        double str_to_float_t(double* /* type */, char** endptr) const
+        struct strtonum
        {
-            return std::strtod(reinterpret_cast<typename string_t::const_pointer>(m_start), endptr);
-        }
+          public:
+            strtonum(const char* start, const char* end)
+                : m_start(start), m_end(end)
+            {}

-        /*!
-        @brief parse floating point number
+            /*!
+            @return true iff parsed successfully as number of type T

-        This function (and its overloads) serves to select the most appropriate
-        standard floating point number parsing function based on the type
-        supplied via the first parameter.  Set this to @a
-        static_cast<number_float_t*>(nullptr).
+            @param[in,out] val shall contain parsed value, or undefined value
+            if could not parse
+            */
+            template<typename T, typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>
+            bool to(T& val) const
+            {
+                return parse(val, std::is_integral<T>());
+            }

-        @param[in,out] endptr  receives a pointer to the first character after
-        the number
+          private:
+            const char* const m_start = nullptr;
+            const char* const m_end = nullptr;

-        @return the floating point number
-        */
-        float str_to_float_t(float* /* type */, char** endptr) const
-        {
-            return std::strtof(reinterpret_cast<typename string_t::const_pointer>(m_start), endptr);
-        }
+            // floating-point conversion
+
+            // overloaded wrappers for strtod/strtof/strtold
+            // that will be called from parse<floating_point_t>
+            static void strtof(float& f, const char* str, char** endptr)
+            {
+                f = std::strtof(str, endptr);
+            }
+
+            static void strtof(double& f, const char* str, char** endptr)
+            {
+                f = std::strtod(str, endptr);
+            }
+
+            static void strtof(long double& f, const char* str, char** endptr)
+            {
+                f = std::strtold(str, endptr);
+            }
+
+            template<typename T>
+            bool parse(T& value, /*is_integral=*/std::false_type) const
+            {
+                // replace decimal separator with locale-specific version,
+                // when necessary; data will point to either the original
+                // string, or buf, or tempstr containing the fixed string.
+                std::string tempstr;
+                std::array<char, 64> buf;
+                const size_t len = static_cast<size_t>(m_end - m_start);
+
+                // lexer will reject empty numbers
+                assert(len > 0);
+
+                // since dealing with strtod family of functions, we're
+                // getting the decimal point char from the C locale facilities
+                // instead of C++'s numpunct facet of the current std::locale
+                const auto loc = localeconv();
+                assert(loc != nullptr);
+                const char decimal_point_char = (loc->decimal_point == nullptr) ? '.' : loc->decimal_point[0];
+
+                const char* data = m_start;
+
+                if (decimal_point_char != '.')
+                {
+                    const size_t ds_pos = static_cast<size_t>(std::find(m_start, m_end, '.') - m_start);
+
+                    if (ds_pos != len)
+                    {
+                        // copy the data into the local buffer or tempstr, if
+                        // buffer is too small; replace decimal separator, and
+                        // update data to point to the modified bytes
+                        if ((len + 1) < buf.size())
+                        {
+                            std::copy(m_start, m_end, buf.data());
+                            buf[len] = 0;
+                            buf[ds_pos] = decimal_point_char;
+                            data = buf.data();
+                        }
+                        else
+                        {
+                            tempstr.assign(m_start, m_end);
+                            tempstr[ds_pos] = decimal_point_char;
+                            data = tempstr.c_str();
+                        }
+                    }
+                }
+
+                char* endptr = nullptr;
+                value = 0;
+                // this calls appropriate overload depending on T
+                strtof(value, data, &endptr);
+
+                // parsing was successful iff strtof parsed exactly the number
+                // of characters determined by the lexer (len)
+                const bool ok = (endptr == (data + len));
+
+                if (ok and (value == 0.0) and (*data == '-'))
+                {
+                    // some implementations forget to negate the zero
+                    value = -0.0;
+                }
+
+                return ok;
+            }
+
+            // integral conversion
+
+            signed long long parse_integral(char** endptr, /*is_signed*/std::true_type) const
+            {
+                return std::strtoll(m_start, endptr, 10);
+            }
+
+            unsigned long long parse_integral(char** endptr, /*is_signed*/std::false_type) const
+            {
+                return std::strtoull(m_start, endptr, 10);
+            }
+
+            template<typename T>
+            bool parse(T& value, /*is_integral=*/std::true_type) const
+            {
+                char* endptr = nullptr;
+                errno = 0; // these are thread-local
+                const auto x = parse_integral(&endptr, std::is_signed<T>());
+
+                // called right overload?
+                static_assert(std::is_signed<T>() == std::is_signed<decltype(x)>(), "");
+
+                value = static_cast<T>(x);
+
+                return (x == static_cast<decltype(x)>(value)) // x fits into destination T
+                       and (x < 0) == (value < 0)             // preserved sign
+                       //and ((x != 0) or is_integral())        // strto[u]ll did nto fail
+                       and (errno == 0)                       // strto[u]ll did not overflow
+                       and (m_start < m_end)                  // token was not empty
+                       and (endptr == m_end);                 // parsed entire token exactly
+            }
+        };

        /*!
        @brief return number value for number tokens
@ -10899,125 +11081,84 @@ basic_json_parser_66:
        number type (either integer, unsigned integer or floating point),
        which is passed back to the caller via the result parameter.

-        This function parses the integer component up to the radix point or
-        exponent while collecting information about the 'floating point
-        representation', which it stores in the result parameter. If there is
-        no radix point or exponent, and the number can fit into a @ref
-        number_integer_t or @ref number_unsigned_t then it sets the result
-        parameter accordingly.
+        integral numbers that don't fit into the the range of the respective
+        type are parsed as number_float_t

-        If the number is a floating point number the number is then parsed
-        using @a std:strtod (or @a std:strtof or @a std::strtold).
+        floating-point values do not satisfy std::isfinite predicate
+        are converted to value_t::null

-        @param[out] result  @ref basic_json object to receive the number, or
-        NAN if the conversion read past the current token. The latter case
-        needs to be treated by the caller function.
+        throws if the entire string [m_start .. m_cursor) cannot be
+        interpreted as a number
+
+        @param[out] result  @ref basic_json object to receive the number.
+        @param[in]  token   the type of the number token
        */
-        void get_number(basic_json& result) const
+        bool get_number(basic_json& result, const token_type token) const
        {
            assert(m_start != nullptr);
+            assert(m_start < m_cursor);
+            assert((token == token_type::value_unsigned) or
+                   (token == token_type::value_integer) or
+                   (token == token_type::value_float));

-            const lexer::lexer_char_t* curptr = m_start;
+            strtonum num_converter(reinterpret_cast<const char*>(m_start),
+                                   reinterpret_cast<const char*>(m_cursor));

-            // accumulate the integer conversion result (unsigned for now)
-            number_unsigned_t value = 0;
-
-            // maximum absolute value of the relevant integer type
-            number_unsigned_t max;
-
-            // temporarily store the type to avoid unnecessary bitfield access
-            value_t type;
-
-            // look for sign
-            if (*curptr == '-')
+            switch (token)
            {
-                type = value_t::number_integer;
-                max = static_cast<uint64_t>((std::numeric_limits<number_integer_t>::max)()) + 1;
-                curptr++;
-            }
-            else
-            {
-                type = value_t::number_unsigned;
-                max = static_cast<uint64_t>((std::numeric_limits<number_unsigned_t>::max)());
-            }
-
-            // count the significant figures
-            for (; curptr < m_cursor; curptr++)
-            {
-                // quickly skip tests if a digit
-                if (*curptr < '0' or* curptr > '9')
+                case lexer::token_type::value_unsigned:
                {
-                    if (*curptr == '.')
+                    number_unsigned_t val;
+                    if (num_converter.to(val))
                    {
-                        // don't count '.' but change to float
-                        type = value_t::number_float;
-                        continue;
+                        // parsing successful
+                        result.m_type = value_t::number_unsigned;
+                        result.m_value = val;
+                        return true;
                    }
-                    // assume exponent (if not then will fail parse): change to
-                    // float, stop counting and record exponent details
-                    type = value_t::number_float;
                    break;
                }

-                // skip if definitely not an integer
-                if (type != value_t::number_float)
+                case lexer::token_type::value_integer:
                {
-                    auto digit = static_cast<number_unsigned_t>(*curptr - '0');
-
-                    // overflow if value * 10 + digit > max, move terms around
-                    // to avoid overflow in intermediate values
-                    if (value > (max - digit) / 10)
+                    number_integer_t val;
+                    if (num_converter.to(val))
                    {
-                        // overflow
-                        type = value_t::number_float;
-                    }
-                    else
-                    {
-                        // no overflow
-                        value = value * 10 + digit;
+                        // parsing successful
+                        result.m_type = value_t::number_integer;
+                        result.m_value = val;
+                        return true;
                    }
+                    break;
+                }
+
+                default:
+                {
+                    break;
                }
            }

-            // save the value (if not a float)
-            if (type == value_t::number_unsigned)
+            // parse float (either explicitly or because a previous conversion
+            // failed)
+            number_float_t val;
+            if (num_converter.to(val))
            {
-                result.m_value.number_unsigned = value;
-            }
-            else if (type == value_t::number_integer)
-            {
-                // invariant: if we parsed a '-', the absolute value is between
-                // 0 (we allow -0) and max == -INT64_MIN
-                assert(value >= 0);
-                assert(value <= max);
-
-                if (value == max)
-                {
-                    // we cannot simply negate value (== max == -INT64_MIN),
-                    // see https://github.com/nlohmann/json/issues/389
-                    result.m_value.number_integer = static_cast<number_integer_t>(INT64_MIN);
-                }
-                else
-                {
-                    // all other values can be negated safely
-                    result.m_value.number_integer = -static_cast<number_integer_t>(value);
-                }
-            }
-            else
-            {
-                // parse with strtod
-                result.m_value.number_float = str_to_float_t(static_cast<number_float_t*>(nullptr), nullptr);
+                // parsing successful
+                result.m_type = value_t::number_float;
+                result.m_value = val;

                // replace infinity and NAN by null
                if (not std::isfinite(result.m_value.number_float))
                {
-                    type = value_t::null;
+                    result.m_type  = value_t::null;
                    result.m_value = basic_json::json_value();
                }
+
+                return true;
            }

-            // save the type
-            result.m_type = type;
+            // couldn't parse number in any format
+            return false;
        }

      private:
@ -11261,10 +11402,20 @@ basic_json_parser_66:
                    break;
                }

-                case lexer::token_type::value_number:
+                case lexer::token_type::value_unsigned:
+                case lexer::token_type::value_integer:
+                case lexer::token_type::value_float:
                {
-                    m_lexer.get_number(result);
+                    const bool ok = m_lexer.get_number(result, last_token);
                    get_token();
+
+                    // if number conversion was unsuccessful, then is is
+                    // because the number was directly followed by an
+                    // unexpected character (e.g. "01" where "1" is unexpected)
+                    if (not ok)
+                    {
+                        unexpect(last_token);
+                    }
                    break;
                }

--- a/src/json.hpp.re2c
+++ b/src/json.hpp.re2c
@ -9444,7 +9444,9 @@ class basic_json
            literal_false,   ///< the `false` literal
            literal_null,    ///< the `null` literal
            value_string,    ///< a string -- use get_string() for actual value
-            value_number,    ///< a number -- use get_number() for actual value
+            value_unsigned,  ///< an unsigned integer -- use get_number() for actual value
+            value_integer,   ///< a signed integer -- use get_number() for actual value
+            value_float,     ///< an floating point number -- use get_number() for actual value
            begin_array,     ///< the character for array begin `[`
            begin_object,    ///< the character for object begin `{`
            end_array,       ///< the character for array end `]`
@ -9596,7 +9598,9 @@ class basic_json
                    return "null literal";
                case token_type::value_string:
                    return "string literal";
-                case token_type::value_number:
+                case lexer::token_type::value_unsigned:
+                case lexer::token_type::value_integer:
+                case lexer::token_type::value_float:
                    return "number literal";
                case token_type::begin_array:
                    return "'['";
@ -9684,18 +9688,22 @@ class basic_json
                    "false" { last_token_type = token_type::literal_false; break; }

                    // number
-                    decimal_point = ".";
-                    digit         = [0-9];
-                    digit_1_9     = [1-9];
-                    e             = "e" | "E";
-                    minus         = "-";
-                    plus          = "+";
-                    zero          = "0";
-                    exp           = e (minus | plus)? digit+;
-                    frac          = decimal_point digit+;
-                    int           = (zero | digit_1_9 digit*);
-                    number        = minus? int frac? exp?;
-                    number        { last_token_type = token_type::value_number; break; }
+                    decimal_point   = ".";
+                    digit           = [0-9];
+                    digit_1_9       = [1-9];
+                    e               = "e" | "E";
+                    minus           = "-";
+                    plus            = "+";
+                    zero            = "0";
+                    exp             = e (minus | plus)? digit+;
+                    frac            = decimal_point digit+;
+                    int             = (zero | digit_1_9 digit*);
+                    number_unsigned = int;
+                    number_unsigned { last_token_type = token_type::value_unsigned; break; }
+                    number_integer  = minus int;
+                    number_integer  { last_token_type = token_type::value_integer; break; }
+                    number_float    = minus? int frac? exp?;
+                    number_float    { last_token_type = token_type::value_float; break; }

                    // string
                    quotation_mark  = "\"";
@ -9988,59 +9996,155 @@ class basic_json
            return result;
        }

-        /*!
-        @brief parse floating point number
-
-        This function (and its overloads) serves to select the most appropriate
-        standard floating point number parsing function based on the type
-        supplied via the first parameter.  Set this to @a
-        static_cast<number_float_t*>(nullptr).
-
-        @param[in,out] endptr receives a pointer to the first character after
-        the number
-
-        @return the floating point number
-        */
-        long double str_to_float_t(long double* /* type */, char** endptr) const
-        {
-            return std::strtold(reinterpret_cast<typename string_t::const_pointer>(m_start), endptr);
-        }

        /*!
-        @brief parse floating point number
+        @brief parse string into a built-in arithmetic type as if the current
+               locale is POSIX.

-        This function (and its overloads) serves to select the most appropriate
-        standard floating point number parsing function based on the type
-        supplied via the first parameter.  Set this to @a
-        static_cast<number_float_t*>(nullptr).
+        @note in floating-point case strtod may parse past the token's end -
+              this is not an error

-        @param[in,out] endptr  receives a pointer to the first character after
-        the number
-
-        @return the floating point number
+        @note any leading blanks are not handled
        */
-        double str_to_float_t(double* /* type */, char** endptr) const
+        struct strtonum
        {
-            return std::strtod(reinterpret_cast<typename string_t::const_pointer>(m_start), endptr);
-        }
+          public:
+            strtonum(const char* start, const char* end)
+                : m_start(start), m_end(end)
+            {}

-        /*!
-        @brief parse floating point number
+            /*!
+            @return true iff parsed successfully as number of type T

-        This function (and its overloads) serves to select the most appropriate
-        standard floating point number parsing function based on the type
-        supplied via the first parameter.  Set this to @a
-        static_cast<number_float_t*>(nullptr).
+            @param[in,out] val shall contain parsed value, or undefined value
+            if could not parse
+            */
+            template<typename T, typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>
+            bool to(T& val) const
+            {
+                return parse(val, std::is_integral<T>());
+            }

-        @param[in,out] endptr  receives a pointer to the first character after
-        the number
+          private:
+            const char* const m_start = nullptr;
+            const char* const m_end = nullptr;

-        @return the floating point number
-        */
-        float str_to_float_t(float* /* type */, char** endptr) const
-        {
-            return std::strtof(reinterpret_cast<typename string_t::const_pointer>(m_start), endptr);
-        }
+            // floating-point conversion
+
+            // overloaded wrappers for strtod/strtof/strtold
+            // that will be called from parse<floating_point_t>
+            static void strtof(float& f, const char* str, char** endptr)
+            {
+                f = std::strtof(str, endptr);
+            }
+
+            static void strtof(double& f, const char* str, char** endptr)
+            {
+                f = std::strtod(str, endptr);
+            }
+
+            static void strtof(long double& f, const char* str, char** endptr)
+            {
+                f = std::strtold(str, endptr);
+            }
+
+            template<typename T>
+            bool parse(T& value, /*is_integral=*/std::false_type) const
+            {
+                // replace decimal separator with locale-specific version,
+                // when necessary; data will point to either the original
+                // string, or buf, or tempstr containing the fixed string.
+                std::string tempstr;
+                std::array<char, 64> buf;
+                const size_t len = static_cast<size_t>(m_end - m_start);
+
+                // lexer will reject empty numbers
+                assert(len > 0);
+
+                // since dealing with strtod family of functions, we're
+                // getting the decimal point char from the C locale facilities
+                // instead of C++'s numpunct facet of the current std::locale
+                const auto loc = localeconv();
+                assert(loc != nullptr);
+                const char decimal_point_char = (loc->decimal_point == nullptr) ? '.' : loc->decimal_point[0];
+
+                const char* data = m_start;
+
+                if (decimal_point_char != '.')
+                {
+                    const size_t ds_pos = static_cast<size_t>(std::find(m_start, m_end, '.') - m_start);
+
+                    if (ds_pos != len)
+                    {
+                        // copy the data into the local buffer or tempstr, if
+                        // buffer is too small; replace decimal separator, and
+                        // update data to point to the modified bytes
+                        if ((len + 1) < buf.size())
+                        {
+                            std::copy(m_start, m_end, buf.data());
+                            buf[len] = 0;
+                            buf[ds_pos] = decimal_point_char;
+                            data = buf.data();
+                        }
+                        else
+                        {
+                            tempstr.assign(m_start, m_end);
+                            tempstr[ds_pos] = decimal_point_char;
+                            data = tempstr.c_str();
+                        }
+                    }
+                }
+
+                char* endptr = nullptr;
+                value = 0;
+                // this calls appropriate overload depending on T
+                strtof(value, data, &endptr);
+
+                // parsing was successful iff strtof parsed exactly the number
+                // of characters determined by the lexer (len)
+                const bool ok = (endptr == (data + len));
+
+                if (ok and (value == 0.0) and (*data == '-'))
+                {
+                    // some implementations forget to negate the zero
+                    value = -0.0;
+                }
+
+                return ok;
+            }
+
+            // integral conversion
+
+            signed long long parse_integral(char** endptr, /*is_signed*/std::true_type) const
+            {
+                return std::strtoll(m_start, endptr, 10);
+            }
+
+            unsigned long long parse_integral(char** endptr, /*is_signed*/std::false_type) const
+            {
+                return std::strtoull(m_start, endptr, 10);
+            }
+
+            template<typename T>
+            bool parse(T& value, /*is_integral=*/std::true_type) const
+            {
+                char* endptr = nullptr;
+                errno = 0; // these are thread-local
+                const auto x = parse_integral(&endptr, std::is_signed<T>());
+
+                // called right overload?
+                static_assert(std::is_signed<T>() == std::is_signed<decltype(x)>(), "");
+
+                value = static_cast<T>(x);
+
+                return (x == static_cast<decltype(x)>(value)) // x fits into destination T
+                       and (x < 0) == (value < 0)             // preserved sign
+                       //and ((x != 0) or is_integral())        // strto[u]ll did nto fail
+                       and (errno == 0)                       // strto[u]ll did not overflow
+                       and (m_start < m_end)                  // token was not empty
+                       and (endptr == m_end);                 // parsed entire token exactly
+            }
+        };

        /*!
        @brief return number value for number tokens
@ -10049,125 +10153,84 @@ class basic_json
        number type (either integer, unsigned integer or floating point),
        which is passed back to the caller via the result parameter.

-        This function parses the integer component up to the radix point or
-        exponent while collecting information about the 'floating point
-        representation', which it stores in the result parameter. If there is
-        no radix point or exponent, and the number can fit into a @ref
-        number_integer_t or @ref number_unsigned_t then it sets the result
-        parameter accordingly.
+        integral numbers that don't fit into the the range of the respective
+        type are parsed as number_float_t

-        If the number is a floating point number the number is then parsed
-        using @a std:strtod (or @a std:strtof or @a std::strtold).
+        floating-point values do not satisfy std::isfinite predicate
+        are converted to value_t::null

-        @param[out] result  @ref basic_json object to receive the number, or
-        NAN if the conversion read past the current token. The latter case
-        needs to be treated by the caller function.
+        throws if the entire string [m_start .. m_cursor) cannot be
+        interpreted as a number
+
+        @param[out] result  @ref basic_json object to receive the number.
+        @param[in]  token   the type of the number token
        */
-        void get_number(basic_json& result) const
+        bool get_number(basic_json& result, const token_type token) const
        {
            assert(m_start != nullptr);
+            assert(m_start < m_cursor);
+            assert((token == token_type::value_unsigned) or
+                   (token == token_type::value_integer) or
+                   (token == token_type::value_float));

-            const lexer::lexer_char_t* curptr = m_start;
+            strtonum num_converter(reinterpret_cast<const char*>(m_start),
+                                   reinterpret_cast<const char*>(m_cursor));

-            // accumulate the integer conversion result (unsigned for now)
-            number_unsigned_t value = 0;
-
-            // maximum absolute value of the relevant integer type
-            number_unsigned_t max;
-
-            // temporarily store the type to avoid unnecessary bitfield access
-            value_t type;
-
-            // look for sign
-            if (*curptr == '-')
+            switch (token)
            {
-                type = value_t::number_integer;
-                max = static_cast<uint64_t>((std::numeric_limits<number_integer_t>::max)()) + 1;
-                curptr++;
-            }
-            else
-            {
-                type = value_t::number_unsigned;
-                max = static_cast<uint64_t>((std::numeric_limits<number_unsigned_t>::max)());
-            }
-
-            // count the significant figures
-            for (; curptr < m_cursor; curptr++)
-            {
-                // quickly skip tests if a digit
-                if (*curptr < '0' or* curptr > '9')
+                case lexer::token_type::value_unsigned:
                {
-                    if (*curptr == '.')
+                    number_unsigned_t val;
+                    if (num_converter.to(val))
                    {
-                        // don't count '.' but change to float
-                        type = value_t::number_float;
-                        continue;
+                        // parsing successful
+                        result.m_type = value_t::number_unsigned;
+                        result.m_value = val;
+                        return true;
                    }
-                    // assume exponent (if not then will fail parse): change to
-                    // float, stop counting and record exponent details
-                    type = value_t::number_float;
                    break;
                }

-                // skip if definitely not an integer
-                if (type != value_t::number_float)
+                case lexer::token_type::value_integer:
                {
-                    auto digit = static_cast<number_unsigned_t>(*curptr - '0');
-
-                    // overflow if value * 10 + digit > max, move terms around
-                    // to avoid overflow in intermediate values
-                    if (value > (max - digit) / 10)
+                    number_integer_t val;
+                    if (num_converter.to(val))
                    {
-                        // overflow
-                        type = value_t::number_float;
-                    }
-                    else
-                    {
-                        // no overflow
-                        value = value * 10 + digit;
+                        // parsing successful
+                        result.m_type = value_t::number_integer;
+                        result.m_value = val;
+                        return true;
                    }
+                    break;
+                }
+
+                default:
+                {
+                    break;
                }
            }

-            // save the value (if not a float)
-            if (type == value_t::number_unsigned)
+            // parse float (either explicitly or because a previous conversion
+            // failed)
+            number_float_t val;
+            if (num_converter.to(val))
            {
-                result.m_value.number_unsigned = value;
-            }
-            else if (type == value_t::number_integer)
-            {
-                // invariant: if we parsed a '-', the absolute value is between
-                // 0 (we allow -0) and max == -INT64_MIN
-                assert(value >= 0);
-                assert(value <= max);
-
-                if (value == max)
-                {
-                    // we cannot simply negate value (== max == -INT64_MIN),
-                    // see https://github.com/nlohmann/json/issues/389
-                    result.m_value.number_integer = static_cast<number_integer_t>(INT64_MIN);
-                }
-                else
-                {
-                    // all other values can be negated safely
-                    result.m_value.number_integer = -static_cast<number_integer_t>(value);
-                }
-            }
-            else
-            {
-                // parse with strtod
-                result.m_value.number_float = str_to_float_t(static_cast<number_float_t*>(nullptr), nullptr);
+                // parsing successful
+                result.m_type = value_t::number_float;
+                result.m_value = val;

                // replace infinity and NAN by null
                if (not std::isfinite(result.m_value.number_float))
                {
-                    type = value_t::null;
+                    result.m_type  = value_t::null;
                    result.m_value = basic_json::json_value();
                }
+
+                return true;
            }

-            // save the type
-            result.m_type = type;
+            // couldn't parse number in any format
+            return false;
        }

      private:
@ -10411,10 +10474,20 @@ class basic_json
                    break;
                }

-                case lexer::token_type::value_number:
+                case lexer::token_type::value_unsigned:
+                case lexer::token_type::value_integer:
+                case lexer::token_type::value_float:
                {
-                    m_lexer.get_number(result);
+                    const bool ok = m_lexer.get_number(result, last_token);
                    get_token();
+
+                    // if number conversion was unsuccessful, then is is
+                    // because the number was directly followed by an
+                    // unexpected character (e.g. "01" where "1" is unexpected)
+                    if (not ok)
+                    {
+                        unexpect(last_token);
+                    }
                    break;
                }

--- a/test/src/unit-class_lexer.cpp
+++ b/test/src/unit-class_lexer.cpp
@ -65,25 +65,37 @@ TEST_CASE("lexer class")
        SECTION("numbers")
        {
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("0"),
-                               1).scan() == json::lexer::token_type::value_number));
+                               1).scan() == json::lexer::token_type::value_unsigned));
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("1"),
-                               1).scan() == json::lexer::token_type::value_number));
+                               1).scan() == json::lexer::token_type::value_unsigned));
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("2"),
-                               1).scan() == json::lexer::token_type::value_number));
+                               1).scan() == json::lexer::token_type::value_unsigned));
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("3"),
-                               1).scan() == json::lexer::token_type::value_number));
+                               1).scan() == json::lexer::token_type::value_unsigned));
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("4"),
-                               1).scan() == json::lexer::token_type::value_number));
+                               1).scan() == json::lexer::token_type::value_unsigned));
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("5"),
-                               1).scan() == json::lexer::token_type::value_number));
+                               1).scan() == json::lexer::token_type::value_unsigned));
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("6"),
-                               1).scan() == json::lexer::token_type::value_number));
+                               1).scan() == json::lexer::token_type::value_unsigned));
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("7"),
-                               1).scan() == json::lexer::token_type::value_number));
+                               1).scan() == json::lexer::token_type::value_unsigned));
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("8"),
-                               1).scan() == json::lexer::token_type::value_number));
+                               1).scan() == json::lexer::token_type::value_unsigned));
            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("9"),
-                               1).scan() == json::lexer::token_type::value_number));
+                               1).scan() == json::lexer::token_type::value_unsigned));
+
+            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("-0"),
+                               2).scan() == json::lexer::token_type::value_integer));
+            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("-1"),
+                               2).scan() == json::lexer::token_type::value_integer));
+
+            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("1.1"),
+                               3).scan() == json::lexer::token_type::value_float));
+            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("-1.1"),
+                               4).scan() == json::lexer::token_type::value_float));
+            CHECK((json::lexer(reinterpret_cast<const json::lexer::lexer_char_t*>("1E10"),
+                               4).scan() == json::lexer::token_type::value_float));
        }

        SECTION("whitespace")
@ -109,7 +121,9 @@ TEST_CASE("lexer class")
        CHECK((json::lexer::token_type_name(json::lexer::token_type::literal_false) == "false literal"));
        CHECK((json::lexer::token_type_name(json::lexer::token_type::literal_null) == "null literal"));
        CHECK((json::lexer::token_type_name(json::lexer::token_type::value_string) == "string literal"));
-        CHECK((json::lexer::token_type_name(json::lexer::token_type::value_number) == "number literal"));
+        CHECK((json::lexer::token_type_name(json::lexer::token_type::value_unsigned) == "number literal"));
+        CHECK((json::lexer::token_type_name(json::lexer::token_type::value_integer) == "number literal"));
+        CHECK((json::lexer::token_type_name(json::lexer::token_type::value_float) == "number literal"));
        CHECK((json::lexer::token_type_name(json::lexer::token_type::begin_array) == "'['"));
        CHECK((json::lexer::token_type_name(json::lexer::token_type::begin_object) == "'{'"));
        CHECK((json::lexer::token_type_name(json::lexer::token_type::end_array) == "']'"));
--- a/test/src/unit-class_parser.cpp
+++ b/test/src/unit-class_parser.cpp
@ -101,6 +101,7 @@ TEST_CASE("parser class")
                CHECK_THROWS_WITH(json::parser("\"\b\"").parse(), "parse error - unexpected '\"'");
                // improve code coverage
                CHECK_THROWS_AS(json::parser("\uFF01").parse(), std::invalid_argument);
+                CHECK_THROWS_AS(json::parser("[-4:1,]").parse(), std::invalid_argument);
                // unescaped control characters
                CHECK_THROWS_AS(json::parser("\"\x00\"").parse(), std::invalid_argument);
                CHECK_THROWS_AS(json::parser("\"\x01\"").parse(), std::invalid_argument);
@ -269,6 +270,11 @@ TEST_CASE("parser class")
                }
            }

+            SECTION("overflow")
+            {
+                CHECK(json::parser("1.18973e+4932").parse() == json());
+            }
+
            SECTION("invalid numbers")
            {
                CHECK_THROWS_AS(json::parser("01").parse(), std::invalid_argument);
@ -293,7 +299,7 @@ TEST_CASE("parser class")
                CHECK_THROWS_AS(json::parser("+0").parse(), std::invalid_argument);

                CHECK_THROWS_WITH(json::parser("01").parse(),
-                                  "parse error - unexpected number literal; expected end of input");
+                                  "parse error - unexpected number literal");
                CHECK_THROWS_WITH(json::parser("--1").parse(), "parse error - unexpected '-'");
                CHECK_THROWS_WITH(json::parser("1.").parse(),
                                  "parse error - unexpected '.'; expected end of input");
--- a/test/src/unit-regression.cpp
+++ b/test/src/unit-regression.cpp
@ -383,7 +383,7 @@ TEST_CASE("regression tests")
        };

        // change locale to mess with decimal points
-        std::locale::global(std::locale(std::locale(), new CommaDecimalSeparator));
+        auto orig_locale = std::locale::global(std::locale(std::locale(), new CommaDecimalSeparator));

        CHECK(j1a.dump() == "23.42");
        CHECK(j1b.dump() == "23.42");
@ -407,8 +407,34 @@ TEST_CASE("regression tests")
        CHECK(j3c.dump() == "10000");
        //CHECK(j3b.dump() == "1E04"); // roundtrip error
        //CHECK(j3c.dump() == "1e04"); // roundtrip error
+
+        std::locale::global(orig_locale);
    }

+    SECTION("issue #379 - locale-independent str-to-num")
+    {
+        setlocale(LC_NUMERIC, "de_DE.UTF-8");
+
+        // disabled, because locale-specific beharivor is not
+        // triggered in AppVeyor for some reason
+#ifndef _MSC_VER
+        {
+            // verify that strtod now uses commas as decimal-separator
+            CHECK(std::strtod("3,14", nullptr) == 3.14);
+
+            // verify that strtod does not understand dots as decimal separator
+            CHECK(std::strtod("3.14", nullptr) == 3);
+        }
+#endif
+
+        // verify that parsed correctly despite using strtod internally
+        CHECK(json::parse("3.14").get<double>() == 3.14);
+
+        // check a different code path
+        CHECK(json::parse("1.000000000000000000000000000000000000000000000000000000000000000000000000").get<double>() == 1.0);
+    }
+
+
    SECTION("issue #233 - Can't use basic_json::iterator as a base iterator for std::move_iterator")
    {
        json source = {"a", "b", "c"};