2019-10-29 15:41:47 +08:00
|
|
|
#include <tesseract/baseapi.h>
|
2021-01-05 21:57:14 +08:00
|
|
|
#include <allheaders.h>
|
2019-03-05 23:59:24 +08:00
|
|
|
|
2019-03-26 18:05:38 +08:00
|
|
|
#include <libgen.h> // for dirname
|
|
|
|
#include <cstdio> // for printf
|
|
|
|
#include <cstdlib> // for std::getenv, std::setenv
|
|
|
|
#include <string> // for std::string
|
2019-03-05 23:59:24 +08:00
|
|
|
|
2020-01-03 22:33:13 +08:00
|
|
|
#ifndef TESSERACT_FUZZER_WIDTH
|
|
|
|
#define TESSERACT_FUZZER_WIDTH 100
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef TESSERACT_FUZZER_HEIGHT
|
|
|
|
#define TESSERACT_FUZZER_HEIGHT 100
|
|
|
|
#endif
|
|
|
|
|
2019-03-05 23:59:24 +08:00
|
|
|
class BitReader {
|
2019-03-26 17:54:41 +08:00
|
|
|
private:
|
|
|
|
uint8_t const* data;
|
|
|
|
size_t size;
|
|
|
|
size_t shift;
|
|
|
|
|
|
|
|
public:
|
|
|
|
BitReader(const uint8_t* data, size_t size)
|
|
|
|
: data(data), size(size), shift(0) {}
|
|
|
|
|
|
|
|
int Read(void) {
|
|
|
|
if (size == 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
2019-03-05 23:59:24 +08:00
|
|
|
|
2019-03-26 17:54:41 +08:00
|
|
|
const int ret = ((*data) >> shift) & 1;
|
2019-03-05 23:59:24 +08:00
|
|
|
|
2019-03-26 17:54:41 +08:00
|
|
|
shift++;
|
|
|
|
if (shift >= 8) {
|
|
|
|
shift = 0;
|
|
|
|
data++;
|
|
|
|
size--;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
};
|
2019-03-05 23:59:24 +08:00
|
|
|
|
2019-03-26 18:05:38 +08:00
|
|
|
static tesseract::TessBaseAPI* api = nullptr;
|
2019-03-05 23:59:24 +08:00
|
|
|
|
2019-03-26 18:05:38 +08:00
|
|
|
extern "C" int LLVMFuzzerInitialize(int* /*pArgc*/, char*** pArgv) {
|
|
|
|
if (std::getenv("TESSDATA_PREFIX") == nullptr) {
|
|
|
|
std::string binary_path = *pArgv[0];
|
|
|
|
const std::string filepath = dirname(&binary_path[0]);
|
2019-03-05 23:59:24 +08:00
|
|
|
|
2019-03-26 17:54:41 +08:00
|
|
|
const std::string tessdata_path = filepath + "/" + "tessdata";
|
|
|
|
if (setenv("TESSDATA_PREFIX", tessdata_path.c_str(), 1) != 0) {
|
|
|
|
printf("Setenv failed\n");
|
2019-03-26 18:05:38 +08:00
|
|
|
std::abort();
|
2019-03-05 23:59:24 +08:00
|
|
|
}
|
2019-03-26 17:54:41 +08:00
|
|
|
}
|
2019-03-05 23:59:24 +08:00
|
|
|
|
2019-03-26 17:54:41 +08:00
|
|
|
api = new tesseract::TessBaseAPI();
|
|
|
|
if (api->Init(nullptr, "eng") != 0) {
|
|
|
|
printf("Cannot initialize API\n");
|
|
|
|
abort();
|
|
|
|
}
|
2019-03-05 23:59:24 +08:00
|
|
|
|
2019-03-26 17:54:41 +08:00
|
|
|
/* Silence output */
|
|
|
|
api->SetVariable("debug_file", "/dev/null");
|
2019-03-05 23:59:24 +08:00
|
|
|
|
2019-03-26 17:54:41 +08:00
|
|
|
return 0;
|
|
|
|
}
|
2019-03-05 23:59:24 +08:00
|
|
|
|
|
|
|
static PIX* createPix(BitReader& BR, const size_t width, const size_t height) {
|
2019-03-26 17:54:41 +08:00
|
|
|
Pix* pix = pixCreate(width, height, 1);
|
2019-03-05 23:59:24 +08:00
|
|
|
|
2019-03-26 17:54:41 +08:00
|
|
|
if (pix == nullptr) {
|
|
|
|
printf("pix creation failed\n");
|
|
|
|
abort();
|
|
|
|
}
|
2019-03-05 23:59:24 +08:00
|
|
|
|
2019-03-26 17:54:41 +08:00
|
|
|
for (size_t i = 0; i < width; i++) {
|
|
|
|
for (size_t j = 0; j < height; j++) {
|
|
|
|
pixSetPixel(pix, i, j, BR.Read());
|
2019-03-05 23:59:24 +08:00
|
|
|
}
|
2019-03-26 17:54:41 +08:00
|
|
|
}
|
2019-03-05 23:59:24 +08:00
|
|
|
|
2019-03-26 17:54:41 +08:00
|
|
|
return pix;
|
2019-03-05 23:59:24 +08:00
|
|
|
}
|
|
|
|
|
2019-03-26 17:54:41 +08:00
|
|
|
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
|
|
|
|
BitReader BR(data, size);
|
2019-03-05 23:59:24 +08:00
|
|
|
|
2020-01-03 22:33:13 +08:00
|
|
|
auto pix = createPix(BR, TESSERACT_FUZZER_WIDTH, TESSERACT_FUZZER_HEIGHT);
|
2019-03-05 23:59:24 +08:00
|
|
|
|
2019-03-26 17:54:41 +08:00
|
|
|
api->SetImage(pix);
|
2019-03-05 23:59:24 +08:00
|
|
|
|
2019-03-26 17:54:41 +08:00
|
|
|
char* outText = api->GetUTF8Text();
|
2019-03-05 23:59:24 +08:00
|
|
|
|
2019-03-26 17:54:41 +08:00
|
|
|
pixDestroy(&pix);
|
|
|
|
delete[] outText;
|
2019-03-05 23:59:24 +08:00
|
|
|
|
2019-03-26 17:54:41 +08:00
|
|
|
return 0;
|
2019-03-05 23:59:24 +08:00
|
|
|
}
|