mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-27 12:49:35 +08:00
fix issue 809: invalid hOCR output file on windows when input filename has non ascii chars.
Add release date to vs2008/doc/versions.html git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@828 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
parent
e8f7dc8b54
commit
db52047420
@ -1074,6 +1074,20 @@ char* TessBaseAPI::GetHOCRText(int page_number) {
|
||||
if (input_file_ == NULL)
|
||||
SetInputName(NULL);
|
||||
|
||||
#ifdef _WIN32
|
||||
// convert input name from ANSI encoding to utf-8
|
||||
int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, NULL, NULL);
|
||||
wchar_t *uni16_str = new WCHAR[str16_len];
|
||||
str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, uni16_str, str16_len);
|
||||
|
||||
int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, NULL, NULL, NULL, NULL);
|
||||
char *utf8_str = new char[utf8_len];
|
||||
WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len, NULL, NULL);
|
||||
*input_file_ = utf8_str;
|
||||
delete[] uni16_str;
|
||||
delete[] utf8_str;
|
||||
#endif
|
||||
|
||||
hocr_str.add_str_int(" <div class='ocr_page' id='page_", page_id);
|
||||
hocr_str += "' title='image \"";
|
||||
hocr_str += input_file_ ? *input_file_ : "unknown";
|
||||
|
@ -57,7 +57,7 @@
|
||||
<div class="section" id="version-notes">
|
||||
<h1>Version Notes<a class="headerlink" href="#version-notes" title="Permalink to this headline">¶</a></h1>
|
||||
<div class="section" id="february-2012">
|
||||
<h2>3.02 – February ??, 2012<a class="headerlink" href="#february-2012" title="Permalink to this headline">¶</a></h2>
|
||||
<h2>3.02.02 – October 31, 2012<a class="headerlink" href="#february-2012" title="Permalink to this headline">¶</a></h2>
|
||||
<ul>
|
||||
<li><p class="first">Created a completely new Visual Studio 2008 solution from scratch.</p>
|
||||
</li>
|
||||
@ -235,7 +235,7 @@ ccutil\unichar.h
|
||||
<li class="toctree-l1"><a class="reference internal" href="maintenance.html">Maintaining the VS2008 directory</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="vs2010-notes.html">Using Visual Studio 2010</a></li>
|
||||
<li class="toctree-l1 current"><a class="current reference internal" href="">Version Notes</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="#february-2012">3.02 – February ??, 2012</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="#february-2012">3.02.02 – October 31, 2012</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
Loading…
Reference in New Issue
Block a user