fix issue 809: invalid hOCR output file on windows when input filename has non ascii chars.

Add release date to vs2008/doc/versions.html

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@828 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
zdenop@gmail.com 2013-02-23 15:01:21 +00:00
parent e8f7dc8b54
commit db52047420
2 changed files with 16 additions and 2 deletions

View File

@ -1074,6 +1074,20 @@ char* TessBaseAPI::GetHOCRText(int page_number) {
if (input_file_ == NULL)
SetInputName(NULL);
#ifdef _WIN32
// convert input name from ANSI encoding to utf-8
int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, NULL, NULL);
wchar_t *uni16_str = new WCHAR[str16_len];
str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, uni16_str, str16_len);
int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, NULL, NULL, NULL, NULL);
char *utf8_str = new char[utf8_len];
WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len, NULL, NULL);
*input_file_ = utf8_str;
delete[] uni16_str;
delete[] utf8_str;
#endif
hocr_str.add_str_int(" <div class='ocr_page' id='page_", page_id);
hocr_str += "' title='image \"";
hocr_str += input_file_ ? *input_file_ : "unknown";

View File

@ -57,7 +57,7 @@
<div class="section" id="version-notes">
<h1>Version Notes<a class="headerlink" href="#version-notes" title="Permalink to this headline"></a></h1>
<div class="section" id="february-2012">
<h2>3.02 &#8211; February ??, 2012<a class="headerlink" href="#february-2012" title="Permalink to this headline"></a></h2>
<h2>3.02.02 &#8211; October 31, 2012<a class="headerlink" href="#february-2012" title="Permalink to this headline"></a></h2>
<ul>
<li><p class="first">Created a completely new Visual Studio 2008 solution from scratch.</p>
</li>
@ -235,7 +235,7 @@ ccutil\unichar.h
<li class="toctree-l1"><a class="reference internal" href="maintenance.html">Maintaining the VS2008 directory</a></li>
<li class="toctree-l1"><a class="reference internal" href="vs2010-notes.html">Using Visual Studio 2010</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="">Version Notes</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#february-2012">3.02 &#8211; February ??, 2012</a></li>
<li class="toctree-l2"><a class="reference internal" href="#february-2012">3.02.02 &#8211; October 31, 2012</a></li>
</ul>
</li>
</ul>