diff --git a/src/api/capi.h b/src/api/capi.h index 0939b1e7d..47f2b2d70 100644 --- a/src/api/capi.h +++ b/src/api/capi.h @@ -30,6 +30,7 @@ # include "renderer.h" #else # include "platform.h" +# include # include #endif diff --git a/src/ccutil/scanutils.h b/src/ccutil/scanutils.h index dc3dfe085..b2f155804 100644 --- a/src/ccutil/scanutils.h +++ b/src/ccutil/scanutils.h @@ -45,12 +45,12 @@ uintmax_t streamtoumax(FILE* s, int base); // Parse a file stream according to the given format. See the fscanf manpage // for more information, as this function attempts to mimic its behavior. -// Note that scientific loating-point notation is not supported. +// Note that scientific floating-point notation is not supported. int fscanf(FILE* stream, const char *format, ...); // Parse a file stream according to the given format. See the fscanf manpage // for more information, as this function attempts to mimic its behavior. -// Note that scientific loating-point notation is not supported. +// Note that scientific floating-point notation is not supported. int vfscanf(FILE* stream, const char *format, va_list ap); // Create a file at the specified path. See the creat manpage for more diff --git a/unlvtests/README.md b/unlvtests/README.md index ae3a23850..98ef8c258 100644 --- a/unlvtests/README.md +++ b/unlvtests/README.md @@ -1,15 +1,16 @@ -How to run UNLV tests. +## How to run UNLV tests. The scripts in this directory make it possible to duplicate the tests published in the Fourth Annual Test of OCR Accuracy. -See http://www.isri.unlv.edu/downloads/AT-1995.pdf +See http://www.expervision.com/wp-content/uploads/2012/12/1995.The_Fourth_Annual_Test_of_OCR_Accuracy.pdf but first you have to get the tools and data used by UNLV: -Step 1: to download the images go to +### Step 1: to download the images go to https://sourceforge.net/projects/isri-ocr-evaluation-tools-alt/files/ and get doe3.3B.tar.gz, bus.3B.tar.gz, mag.3B.tar.gz and news.3B.tar.gz spn.3B.tar.gz is incorrect in this repo, so get it from code.google +``` mkdir -p ~/isri-downloads cd ~/isri-downloads curl -L https://sourceforge.net/projects/isri-ocr-evaluation-tools-alt/files/bus.3B.tar.gz > bus.3B.tar.gz @@ -17,12 +18,15 @@ curl -L https://sourceforge.net/projects/isri-ocr-evaluation-tools-alt/files/do curl -L https://sourceforge.net/projects/isri-ocr-evaluation-tools-alt/files/mag.3B.tar.gz > mag.3B.tar.gz curl -L https://sourceforge.net/projects/isri-ocr-evaluation-tools-alt/files/news.3B.tar.gz > news.3B.tar.gz curl -L https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/isri-ocr-evaluation-tools/spn.3B.tar.gz > spn.3B.tar.gz +``` -Step 2: extract the files. It doesn't really matter where +### Step 2: extract the files. +It doesn't really matter where in your filesystem you put them, but they must go under a common root so you have directories doe3.3B, bus.3B, mag.3B and news.3B. in, for example, ~/ISRI-OCRtk. +``` mkdir -p ~/ISRI-OCRtk cd ~/ISRI-OCRtk tar xzvf ~/isri-downloads/bus.3B.tar.gz @@ -30,26 +34,37 @@ tar xzvf ~/isri-downloads/doe3.3B.tar.gz tar xzvf ~/isri-downloads/mag.3B.tar.gz tar xzvf ~/isri-downloads/news.3B.tar.gz tar xzvf ~/isri-downloads/spn.3B.tar.gz +``` -**** Edit ~/ISRI-OCRtk/spn.3B/pages +Edit *~/ISRI-OCRtk/spn.3B/pages* delete the line containing the following imagename as it crashes tesseract. + 7733_005.3B.tif -Step 4: Download the modified ISRI toolkit and make and install the tools : +### Step 3: Download the modified ISRI toolkit, make and install the tools : +These will be installed in /usr/local/bin. +``` git clone https://github.com/Shreeshrii/ocr-evaluation-tools.git cd ~/ocr-evaluation-tools sudo make install +``` -Step 6: cd back to your main tesseract-ocr dir and Build tesseract. +### Step 4: cd back to your main tesseract-ocr dir and Build tesseract. -Step 7: run unlvtests/runalltests.sh with the root ISRI data dir and testname, tessdata-dir and language: +### Step 5: run unlvtests/runalltests.sh with the root ISRI data dir, testname, tessdata-dir and language: +``` unlvtests/runalltests.sh ~/ISRI-OCRtk 4_fast_eng ../tessdata_fast eng -and go to the gym, have lunch etc. +``` +and go to the gym, have lunch etc. It takes a while to run. -Step 8: There should be a file -unlvtests/reports/4-beta_fast.summary that contains the final summarized accuracy +### Step 6: There should be a RELEASE.summary file +*unlvtests/reports/4-beta_fast.summary* that contains the final summarized accuracy report and comparison with the 1995 results. +### Step 7: run the test for Spanish. + +``` unlvtests/runalltests.sh ~/ISRI-OCRtk 4_fast_spa ../tessdata_fast spa +```