diff --git a/modules/core/src/persistence.cpp b/modules/core/src/persistence.cpp index efb5dfb2c3..c7cdfe3c4f 100644 --- a/modules/core/src/persistence.cpp +++ b/modules/core/src/persistence.cpp @@ -606,12 +606,11 @@ public: int last_occurrence = -1; xml_buf_size = MIN(xml_buf_size, int(file_size)); fseek( file, -xml_buf_size, SEEK_END ); - std::vector xml_buf_(xml_buf_size+2); // find the last occurrence of for(;;) { int line_offset = (int)ftell( file ); - const char* ptr0 = this->gets(&xml_buf_[0], xml_buf_size ); + const char* ptr0 = this->gets(xml_buf_size); const char* ptr = NULL; if( !ptr0 ) break; @@ -693,8 +692,8 @@ public: } else { - const size_t buf_size0 = 1 << 20; - size_t buf_size = buf_size0; + const size_t buf_size0 = 40; + buffer.resize(buf_size0); if( mem_mode ) { strbuf = (char*)filename_or_buf; @@ -704,8 +703,7 @@ public: const char* yaml_signature = "%YAML"; const char* json_signature = "{"; const char* xml_signature = "gets( buf, sizeof(buf)-2 ); + char* buf = this->gets(16); char* bufPtr = cv_skip_BOM(buf); size_t bufOffset = bufPtr - buf; @@ -720,23 +718,8 @@ public: else CV_Error(CV_BADARG_ERR, "Unsupported file storage format"); - if( !isGZ ) - { - if( !mem_mode ) - { - fseek( file, 0, SEEK_END ); - buf_size = ftell( file ); - } - else - buf_size = strbufsize; - buf_size = MIN( buf_size, buf_size0 ); - buf_size = MAX( buf_size, (size_t)(CV_FS_MAX_LEN*6 + 1024) ); - } rewind(); strbufpos = bufOffset; - - buffer.reserve(buf_size + 256); - buffer.resize(buf_size); bufofs = 0; try @@ -809,56 +792,70 @@ public: CV_Error( CV_StsError, "The storage is not opened" ); } - char* gets( char* str, int maxCount ) + char* getsFromFile( char* buf, int count ) + { + if( file ) + return fgets( buf, count, file ); + #if USE_ZLIB + if( gzfile ) + return gzgets( gzfile, buf, count ); + #endif + CV_Error(CV_StsError, "The storage is not opened"); + } + + char* gets( size_t maxCount ) { if( strbuf ) { size_t i = strbufpos, len = strbufsize; - int j = 0; const char* instr = strbuf; - while( i < len && j < maxCount-1 ) + for( ; i < len; i++ ) { - char c = instr[i++]; - if( c == '\0' ) - break; - str[j++] = c; - if( c == '\n' ) + char c = instr[i]; + if( c == '\0' || c == '\n' ) + { + if( c == '\n' ) + i++; break; + } } - str[j++] = '\0'; + size_t count = i - strbufpos; + if( maxCount == 0 || maxCount > count ) + maxCount = count; + buffer.resize(std::max(buffer.size(), maxCount + 8)); + memcpy(&buffer[0], instr + strbufpos, maxCount); + buffer[maxCount] = '\0'; strbufpos = i; - if (maxCount > 256 && !(flags & cv::FileStorage::BASE64)) - CV_Assert(j < maxCount - 1 && "OpenCV persistence doesn't support very long lines"); - return j > 1 ? str : 0; + return maxCount > 0 ? &buffer[0] : 0; } - if( file ) + + const size_t MAX_BLOCK_SIZE = INT_MAX/2; // hopefully, that will be enough + if( maxCount == 0 ) + maxCount = MAX_BLOCK_SIZE; + else + CV_Assert(maxCount < MAX_BLOCK_SIZE); + size_t ofs = 0; + + for(;;) { - char* ptr = fgets( str, maxCount, file ); - if (ptr && maxCount > 256 && !(flags & cv::FileStorage::BASE64)) - { - size_t sz = strnlen(ptr, maxCount); - CV_Assert(sz < (size_t)(maxCount - 1) && "OpenCV persistence doesn't support very long lines"); - } - return ptr; + int count = (int)std::min(buffer.size() - ofs - 16, maxCount); + char* ptr = getsFromFile( &buffer[ofs], count+1 ); + if( !ptr ) + break; + int delta = (int)strlen(ptr); + ofs += delta; + maxCount -= delta; + if( ptr[delta-1] == '\n' || maxCount == 0 ) + break; + if( delta == count ) + buffer.resize((size_t)(buffer.size()*1.5)); } -#if USE_ZLIB - if( gzfile ) - { - char* ptr = gzgets( gzfile, str, maxCount ); - if (ptr && maxCount > 256 && !(flags & cv::FileStorage::BASE64)) - { - size_t sz = strnlen(ptr, maxCount); - CV_Assert(sz < (size_t)(maxCount - 1) && "OpenCV persistence doesn't support very long lines"); - } - return ptr; - } -#endif - CV_Error(CV_StsError, "The storage is not opened"); + return ofs > 0 ? &buffer[0] : 0; } char* gets() { - char* ptr = this->gets(bufferStart(), (int)(bufferEnd() - bufferStart())); + char* ptr = this->gets(0); if( !ptr ) { ptr = bufferStart(); // FIXIT Why do we need this hack? What is about other parsers JSON/YAML? @@ -868,10 +865,12 @@ public: } else { - FileStorage_API* fs = this; - int l = (int)strlen(ptr); + size_t l = strlen(ptr); if( l > 0 && ptr[l-1] != '\n' && ptr[l-1] != '\r' && !eof() ) - CV_PARSE_ERROR_CPP("Too long string or a last string w/o newline"); + { + ptr[l] = '\n'; + ptr[l+1] = '\0'; + } } lineno++; return ptr; diff --git a/modules/core/src/persistence_json.cpp b/modules/core/src/persistence_json.cpp index 5ea6dd3b03..667895fbc5 100644 --- a/modules/core/src/persistence_json.cpp +++ b/modules/core/src/persistence_json.cpp @@ -411,7 +411,10 @@ public: if( *ptr != '"' ) CV_PARSE_ERROR_CPP( "Key must end with \'\"\'" ); - const char * end = ptr; + if( ptr == beg ) + CV_PARSE_ERROR_CPP( "Key is empty" ); + value_placeholder = fs->addNode(collection, std::string(beg, (size_t)(ptr - beg)), FileNode::NONE); + ptr++; ptr = skipSpaces( ptr ); if( !ptr || !*ptr ) @@ -420,11 +423,6 @@ public: if( *ptr != ':' ) CV_PARSE_ERROR_CPP( "Missing \':\' between key and value" ); - /* [beg, end) */ - if( end <= beg ) - CV_PARSE_ERROR_CPP( "Key is empty" ); - - value_placeholder = fs->addNode(collection, std::string(beg, (size_t)(end - beg)), FileNode::NONE); return ++ptr; } @@ -768,8 +766,6 @@ public: CV_PARSE_ERROR_CPP( "left-brace of top level is missing" ); } - if( !ptr || !*ptr ) - CV_PARSE_ERROR_CPP( "Unexpected End-Of-File" ); return true; } diff --git a/modules/core/test/test_io.cpp b/modules/core/test/test_io.cpp index 8ed4eea886..d1749e4906 100644 --- a/modules/core/test/test_io.cpp +++ b/modules/core/test/test_io.cpp @@ -1669,4 +1669,46 @@ TEST(Core_InputOutput, FileStorage_YAML_parse_multiple_documents) ASSERT_EQ(0, std::remove(filename.c_str())); } +TEST(Core_InputOutput, FileStorage_JSON_VeryLongLines) +{ + for( int iter = 0; iter < 2; iter++ ) + { + std::string temp_path = cv::tempfile("temp.json"); + { + std::ofstream ofs(temp_path); + ofs << "{ "; + int prev_len = 0, start = 0; + for (int i = 0; i < 52500; i++) + { + std::string str = cv::format("\"KEY%d\"", i); + ofs << str; + if(iter == 1 && i - start > prev_len) + { + // build a stairway with increasing text row width + ofs << "\n"; + prev_len = i - start; + start = i; + } + str = cv::format(": \"VALUE%d\", ", i); + ofs << str; + } + ofs << "}"; + } + + { + cv::FileStorage fs(temp_path, cv::FileStorage::READ); + char key[16], val0[16]; + std::string val; + for(int i = 0; i < 52500; i += 100) + { + sprintf(key, "KEY%d", i); + sprintf(val0, "VALUE%d", i); + fs[key] >> val; + ASSERT_EQ(val, val0); + } + } + remove(temp_path.c_str()); + } +} + }} // namespace