diff --git a/Makefile.am b/Makefile.am index 426f023e..e427170e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -64,3 +64,17 @@ doc-clean: pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = tesseract.pc + +# fuzzer-api is used for fuzzing tests. +# They are run by OSS-Fuzz https://oss-fuzz.com/, but can also be run locally. +# Note: -fsanitize=fuzzer currently requires the clang++ compiler. +fuzzer-api: all +fuzzer-api: $(top_srcdir)/unittest/fuzzers/fuzzer-api.cpp + $(CXX) $(CXXFLAGS) -g -fsanitize=fuzzer \ + -I $(top_srcdir)/src/api \ + -I $(top_srcdir)/src/ccmain \ + -I $(top_srcdir)/src/ccstruct \ + -I $(top_srcdir)/src/ccutil \ + -I src/api \ + $< \ + src/api/.libs/libtesseract.a $(LEPTONICA_LIBS) $(libarchive_LIBS) -o $@ diff --git a/unittest/fuzzers/fuzzer-api.cpp b/unittest/fuzzers/fuzzer-api.cpp new file mode 100644 index 00000000..f3fb1d81 --- /dev/null +++ b/unittest/fuzzers/fuzzer-api.cpp @@ -0,0 +1,93 @@ +#include "baseapi.h" +#include "leptonica/allheaders.h" + +#include // for dirname +#include // for printf +#include // for std::getenv, std::setenv +#include // for std::string + +class BitReader { + private: + uint8_t const* data; + size_t size; + size_t shift; + + public: + BitReader(const uint8_t* data, size_t size) + : data(data), size(size), shift(0) {} + + int Read(void) { + if (size == 0) { + return 0; + } + + const int ret = ((*data) >> shift) & 1; + + shift++; + if (shift >= 8) { + shift = 0; + data++; + size--; + } + + return ret; + } +}; + +static tesseract::TessBaseAPI* api = nullptr; + +extern "C" int LLVMFuzzerInitialize(int* /*pArgc*/, char*** pArgv) { + if (std::getenv("TESSDATA_PREFIX") == nullptr) { + std::string binary_path = *pArgv[0]; + const std::string filepath = dirname(&binary_path[0]); + + const std::string tessdata_path = filepath + "/" + "tessdata"; + if (setenv("TESSDATA_PREFIX", tessdata_path.c_str(), 1) != 0) { + printf("Setenv failed\n"); + std::abort(); + } + } + + api = new tesseract::TessBaseAPI(); + if (api->Init(nullptr, "eng") != 0) { + printf("Cannot initialize API\n"); + abort(); + } + + /* Silence output */ + api->SetVariable("debug_file", "/dev/null"); + + return 0; +} + +static PIX* createPix(BitReader& BR, const size_t width, const size_t height) { + Pix* pix = pixCreate(width, height, 1); + + if (pix == nullptr) { + printf("pix creation failed\n"); + abort(); + } + + for (size_t i = 0; i < width; i++) { + for (size_t j = 0; j < height; j++) { + pixSetPixel(pix, i, j, BR.Read()); + } + } + + return pix; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + BitReader BR(data, size); + + auto pix = createPix(BR, 100, 100); + + api->SetImage(pix); + + char* outText = api->GetUTF8Text(); + + pixDestroy(&pix); + delete[] outText; + + return 0; +}