mirror of
https://github.com/microsoft/PowerToys.git
synced 2025-06-07 09:28:03 +08:00
[TextExtractor]Fix error blanks in Japanese OCR (#22443)
* fix error blanks in japanese OCR Kanji ,Hiragana, Katakana, Hankaku-Katakana do not need blank. (not only the range of CJKUnifiedIdeographs). Maybe there are more symbols that don't require spaces like \u3001 \u3002. But give it to ocr engine to improve may be a better choice ? * Update ImageMethods.cs fixing spelling * Update expect.txt adding in Hankaku * Update ImageMethods.cs
This commit is contained in:
parent
08d569ccf6
commit
a8a618af1d
1
.github/actions/spell-check/expect.txt
vendored
1
.github/actions/spell-check/expect.txt
vendored
@ -618,6 +618,7 @@ HACCEL
|
||||
handlekeyboardhookevent
|
||||
handlerroutine
|
||||
hangeul
|
||||
Hankaku
|
||||
hanselman
|
||||
Hanzi
|
||||
Hardlines
|
||||
|
@ -147,7 +147,10 @@ internal class ImageMethods
|
||||
}
|
||||
else
|
||||
{
|
||||
var cjkRegex = new Regex(@"\p{IsCJKUnifiedIdeographs}");
|
||||
// Kanji, Hiragana, Katakana, Hankaku-Katakana do not need blank.(not only the symbol in CJKUnifiedIdeographs).
|
||||
// Maybe there are more symbols that don't require spaces like \u3001 \u3002.
|
||||
// var cjkRegex = new Regex(@"\p{IsCJKUnifiedIdeographs}|\p{IsHiragana}|\p{IsKatakana}|[\uFF61-\uFF9F]|[\u3000-\u3003]");
|
||||
var cjkRegex = new Regex(@"\p{IsCJKUnifiedIdeographs}|\p{IsHiragana}|\p{IsKatakana}|[\uFF61-\uFF9F]");
|
||||
|
||||
foreach (OcrLine ocrLine in ocrResult.Lines)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user