mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-28 05:39:35 +08:00
Fix Issue 645, a char signed/unsigned issue in paragraphs.cpp.
When constructing our debug strings, our simple UTF-8 processing should skip all non-ASCII chars. git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@706 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
parent
1563c01565
commit
a91778397b
@ -16,9 +16,6 @@
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
#ifdef _MSC_VER
|
||||
#define __func__ __FUNCTION__
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
@ -2328,7 +2325,8 @@ void InitializeRowInfo(const MutableIterator &it, RowInfo *info) {
|
||||
char *text = it.GetUTF8Text(RIL_TEXTLINE);
|
||||
int trailing_ws_idx = strlen(text); // strip trailing space
|
||||
while (trailing_ws_idx > 0 &&
|
||||
text[trailing_ws_idx - 1] < 128 && // isspace() only takes ASCII
|
||||
// isspace() only takes ASCII
|
||||
((text[trailing_ws_idx - 1] & 0x80) == 0) &&
|
||||
isspace(text[trailing_ws_idx - 1]))
|
||||
trailing_ws_idx--;
|
||||
if (trailing_ws_idx > 0) {
|
||||
|
Loading…
Reference in New Issue
Block a user