mirror of
https://github.com/opencv/opencv.git
synced 2025-06-06 00:43:52 +08:00
Merge pull request #16488 from vpisarev:filestorage_longlines
trying to fix handling file storages with extremely long lines * trying to fix handling of file storages with extremely long lines: https://github.com/opencv/opencv/issues/11061 * * fixed errorneous pointer access in JSON parser. * it's now crash-test time! temporarily set the initial parser buffer size to just 40 bytes. let's run all the test and check if the buffer is always correctly resized and handled * fixed pointer use in JSON parser; added the proper test to catch this case * fixed the test to make it more challenging. generate test json with * ** *** etc. shape
This commit is contained in:
parent
aa2777ed61
commit
3efa78311a
@ -606,12 +606,11 @@ public:
|
||||
int last_occurrence = -1;
|
||||
xml_buf_size = MIN(xml_buf_size, int(file_size));
|
||||
fseek( file, -xml_buf_size, SEEK_END );
|
||||
std::vector<char> xml_buf_(xml_buf_size+2);
|
||||
// find the last occurrence of </opencv_storage>
|
||||
for(;;)
|
||||
{
|
||||
int line_offset = (int)ftell( file );
|
||||
const char* ptr0 = this->gets(&xml_buf_[0], xml_buf_size );
|
||||
const char* ptr0 = this->gets(xml_buf_size);
|
||||
const char* ptr = NULL;
|
||||
if( !ptr0 )
|
||||
break;
|
||||
@ -693,8 +692,8 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
const size_t buf_size0 = 1 << 20;
|
||||
size_t buf_size = buf_size0;
|
||||
const size_t buf_size0 = 40;
|
||||
buffer.resize(buf_size0);
|
||||
if( mem_mode )
|
||||
{
|
||||
strbuf = (char*)filename_or_buf;
|
||||
@ -704,8 +703,7 @@ public:
|
||||
const char* yaml_signature = "%YAML";
|
||||
const char* json_signature = "{";
|
||||
const char* xml_signature = "<?xml";
|
||||
char buf[16];
|
||||
this->gets( buf, sizeof(buf)-2 );
|
||||
char* buf = this->gets(16);
|
||||
char* bufPtr = cv_skip_BOM(buf);
|
||||
size_t bufOffset = bufPtr - buf;
|
||||
|
||||
@ -720,23 +718,8 @@ public:
|
||||
else
|
||||
CV_Error(CV_BADARG_ERR, "Unsupported file storage format");
|
||||
|
||||
if( !isGZ )
|
||||
{
|
||||
if( !mem_mode )
|
||||
{
|
||||
fseek( file, 0, SEEK_END );
|
||||
buf_size = ftell( file );
|
||||
}
|
||||
else
|
||||
buf_size = strbufsize;
|
||||
buf_size = MIN( buf_size, buf_size0 );
|
||||
buf_size = MAX( buf_size, (size_t)(CV_FS_MAX_LEN*6 + 1024) );
|
||||
}
|
||||
rewind();
|
||||
strbufpos = bufOffset;
|
||||
|
||||
buffer.reserve(buf_size + 256);
|
||||
buffer.resize(buf_size);
|
||||
bufofs = 0;
|
||||
|
||||
try
|
||||
@ -809,56 +792,70 @@ public:
|
||||
CV_Error( CV_StsError, "The storage is not opened" );
|
||||
}
|
||||
|
||||
char* gets( char* str, int maxCount )
|
||||
char* getsFromFile( char* buf, int count )
|
||||
{
|
||||
if( file )
|
||||
return fgets( buf, count, file );
|
||||
#if USE_ZLIB
|
||||
if( gzfile )
|
||||
return gzgets( gzfile, buf, count );
|
||||
#endif
|
||||
CV_Error(CV_StsError, "The storage is not opened");
|
||||
}
|
||||
|
||||
char* gets( size_t maxCount )
|
||||
{
|
||||
if( strbuf )
|
||||
{
|
||||
size_t i = strbufpos, len = strbufsize;
|
||||
int j = 0;
|
||||
const char* instr = strbuf;
|
||||
while( i < len && j < maxCount-1 )
|
||||
for( ; i < len; i++ )
|
||||
{
|
||||
char c = instr[i++];
|
||||
if( c == '\0' )
|
||||
break;
|
||||
str[j++] = c;
|
||||
if( c == '\n' )
|
||||
char c = instr[i];
|
||||
if( c == '\0' || c == '\n' )
|
||||
{
|
||||
if( c == '\n' )
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
str[j++] = '\0';
|
||||
size_t count = i - strbufpos;
|
||||
if( maxCount == 0 || maxCount > count )
|
||||
maxCount = count;
|
||||
buffer.resize(std::max(buffer.size(), maxCount + 8));
|
||||
memcpy(&buffer[0], instr + strbufpos, maxCount);
|
||||
buffer[maxCount] = '\0';
|
||||
strbufpos = i;
|
||||
if (maxCount > 256 && !(flags & cv::FileStorage::BASE64))
|
||||
CV_Assert(j < maxCount - 1 && "OpenCV persistence doesn't support very long lines");
|
||||
return j > 1 ? str : 0;
|
||||
return maxCount > 0 ? &buffer[0] : 0;
|
||||
}
|
||||
if( file )
|
||||
|
||||
const size_t MAX_BLOCK_SIZE = INT_MAX/2; // hopefully, that will be enough
|
||||
if( maxCount == 0 )
|
||||
maxCount = MAX_BLOCK_SIZE;
|
||||
else
|
||||
CV_Assert(maxCount < MAX_BLOCK_SIZE);
|
||||
size_t ofs = 0;
|
||||
|
||||
for(;;)
|
||||
{
|
||||
char* ptr = fgets( str, maxCount, file );
|
||||
if (ptr && maxCount > 256 && !(flags & cv::FileStorage::BASE64))
|
||||
{
|
||||
size_t sz = strnlen(ptr, maxCount);
|
||||
CV_Assert(sz < (size_t)(maxCount - 1) && "OpenCV persistence doesn't support very long lines");
|
||||
}
|
||||
return ptr;
|
||||
int count = (int)std::min(buffer.size() - ofs - 16, maxCount);
|
||||
char* ptr = getsFromFile( &buffer[ofs], count+1 );
|
||||
if( !ptr )
|
||||
break;
|
||||
int delta = (int)strlen(ptr);
|
||||
ofs += delta;
|
||||
maxCount -= delta;
|
||||
if( ptr[delta-1] == '\n' || maxCount == 0 )
|
||||
break;
|
||||
if( delta == count )
|
||||
buffer.resize((size_t)(buffer.size()*1.5));
|
||||
}
|
||||
#if USE_ZLIB
|
||||
if( gzfile )
|
||||
{
|
||||
char* ptr = gzgets( gzfile, str, maxCount );
|
||||
if (ptr && maxCount > 256 && !(flags & cv::FileStorage::BASE64))
|
||||
{
|
||||
size_t sz = strnlen(ptr, maxCount);
|
||||
CV_Assert(sz < (size_t)(maxCount - 1) && "OpenCV persistence doesn't support very long lines");
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
#endif
|
||||
CV_Error(CV_StsError, "The storage is not opened");
|
||||
return ofs > 0 ? &buffer[0] : 0;
|
||||
}
|
||||
|
||||
char* gets()
|
||||
{
|
||||
char* ptr = this->gets(bufferStart(), (int)(bufferEnd() - bufferStart()));
|
||||
char* ptr = this->gets(0);
|
||||
if( !ptr )
|
||||
{
|
||||
ptr = bufferStart(); // FIXIT Why do we need this hack? What is about other parsers JSON/YAML?
|
||||
@ -868,10 +865,12 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
FileStorage_API* fs = this;
|
||||
int l = (int)strlen(ptr);
|
||||
size_t l = strlen(ptr);
|
||||
if( l > 0 && ptr[l-1] != '\n' && ptr[l-1] != '\r' && !eof() )
|
||||
CV_PARSE_ERROR_CPP("Too long string or a last string w/o newline");
|
||||
{
|
||||
ptr[l] = '\n';
|
||||
ptr[l+1] = '\0';
|
||||
}
|
||||
}
|
||||
lineno++;
|
||||
return ptr;
|
||||
|
@ -411,7 +411,10 @@ public:
|
||||
if( *ptr != '"' )
|
||||
CV_PARSE_ERROR_CPP( "Key must end with \'\"\'" );
|
||||
|
||||
const char * end = ptr;
|
||||
if( ptr == beg )
|
||||
CV_PARSE_ERROR_CPP( "Key is empty" );
|
||||
value_placeholder = fs->addNode(collection, std::string(beg, (size_t)(ptr - beg)), FileNode::NONE);
|
||||
|
||||
ptr++;
|
||||
ptr = skipSpaces( ptr );
|
||||
if( !ptr || !*ptr )
|
||||
@ -420,11 +423,6 @@ public:
|
||||
if( *ptr != ':' )
|
||||
CV_PARSE_ERROR_CPP( "Missing \':\' between key and value" );
|
||||
|
||||
/* [beg, end) */
|
||||
if( end <= beg )
|
||||
CV_PARSE_ERROR_CPP( "Key is empty" );
|
||||
|
||||
value_placeholder = fs->addNode(collection, std::string(beg, (size_t)(end - beg)), FileNode::NONE);
|
||||
return ++ptr;
|
||||
}
|
||||
|
||||
@ -768,8 +766,6 @@ public:
|
||||
CV_PARSE_ERROR_CPP( "left-brace of top level is missing" );
|
||||
}
|
||||
|
||||
if( !ptr || !*ptr )
|
||||
CV_PARSE_ERROR_CPP( "Unexpected End-Of-File" );
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1669,4 +1669,46 @@ TEST(Core_InputOutput, FileStorage_YAML_parse_multiple_documents)
|
||||
ASSERT_EQ(0, std::remove(filename.c_str()));
|
||||
}
|
||||
|
||||
TEST(Core_InputOutput, FileStorage_JSON_VeryLongLines)
|
||||
{
|
||||
for( int iter = 0; iter < 2; iter++ )
|
||||
{
|
||||
std::string temp_path = cv::tempfile("temp.json");
|
||||
{
|
||||
std::ofstream ofs(temp_path);
|
||||
ofs << "{ ";
|
||||
int prev_len = 0, start = 0;
|
||||
for (int i = 0; i < 52500; i++)
|
||||
{
|
||||
std::string str = cv::format("\"KEY%d\"", i);
|
||||
ofs << str;
|
||||
if(iter == 1 && i - start > prev_len)
|
||||
{
|
||||
// build a stairway with increasing text row width
|
||||
ofs << "\n";
|
||||
prev_len = i - start;
|
||||
start = i;
|
||||
}
|
||||
str = cv::format(": \"VALUE%d\", ", i);
|
||||
ofs << str;
|
||||
}
|
||||
ofs << "}";
|
||||
}
|
||||
|
||||
{
|
||||
cv::FileStorage fs(temp_path, cv::FileStorage::READ);
|
||||
char key[16], val0[16];
|
||||
std::string val;
|
||||
for(int i = 0; i < 52500; i += 100)
|
||||
{
|
||||
sprintf(key, "KEY%d", i);
|
||||
sprintf(val0, "VALUE%d", i);
|
||||
fs[key] >> val;
|
||||
ASSERT_EQ(val, val0);
|
||||
}
|
||||
}
|
||||
remove(temp_path.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
}} // namespace
|
||||
|
Loading…
Reference in New Issue
Block a user