Do the improvement

This commit is contained in:
Sadra Barikbin 2024-02-04 09:27:33 +03:30
parent 8ee020e14c
commit fc70f76e2e
3 changed files with 49 additions and 1 deletions

View File

@ -85,6 +85,32 @@ bool TessdataManager::LoadArchiveFile(const char *filename) {
}
return result;
}
bool TessdataManager::SaveArchiveFile(const char *filename) const{
bool result = false;
archive *a = archive_write_new();
archive_entry *ae = archive_entry_new();
if (a != nullptr) {
archive_write_set_format_zip(a);
archive_write_open_filename(a, filename);
std::string filename_str = filename;
filename_str += ".";
archive_entry_set_filetype(ae, AE_IFREG);
archive_entry_set_perm(ae, 333);
for (unsigned i = 0; i < TESSDATA_NUM_ENTRIES; ++i) {
if (!entries_[i].empty()) {
archive_entry_set_pathname(ae, (filename_str + kTessdataFileSuffixes[i]).c_str());
archive_entry_set_size(ae, entries_[i].size());
archive_write_header(a, ae);
archive_write_data(a, &entries_[i][0], entries_[i].size());
}
}
result = archive_write_close(a) == ARCHIVE_OK;
archive_write_free(a);
return result;
}
return result;
}
#endif
bool TessdataManager::Init(const char *data_file_name) {
@ -162,12 +188,16 @@ void TessdataManager::OverwriteEntry(TessdataType type, const char *data, int si
// Saves to the given filename.
bool TessdataManager::SaveFile(const char *filename, FileWriter writer) const {
// TODO: This method supports only the proprietary file format.
// TODO: This method supports only the proprietary file format.
ASSERT_HOST(is_loaded_);
std::vector<char> data;
Serialize(&data);
if (writer == nullptr) {
#if defined(HAVE_LIBARCHIVE)
return SaveArchiveFile(filename);
#else
return SaveDataToFile(data, filename);
#endif
} else {
return (*writer)(data, filename);
}

View File

@ -223,6 +223,7 @@ public:
private:
// Use libarchive.
bool LoadArchiveFile(const char *filename);
bool SaveArchiveFile(const char *filename) const;
/**
* Fills type with TessdataType of the tessdata component represented by the

View File

@ -219,6 +219,19 @@ int main(int argc, char **argv) {
tprintf("Failed to write modified traineddata:%s!\n", argv[2]);
return EXIT_FAILURE;
}
} else if (argc == 3 && strcmp(argv[1], "-t") == 0) {
#if defined(HAVE_LIBARCHIVE)
if (!tm.Init(argv[2])) {
tprintf("Failed to read %s\n", argv[2]);
return EXIT_FAILURE;
}
if (!tm.SaveFile(argv[2], nullptr)) {
tprintf("Failed to tranform traineddata:%s!\n", argv[2]);
return EXIT_FAILURE;
}
#else
tprintf("Failed to load libarchive. Is tesseract compiled with libarchive support?\n");
#endif
} else if (argc == 3 && strcmp(argv[1], "-d") == 0) {
return list_components(tm, argv[2]);
} else if (argc == 3 && strcmp(argv[1], "-l") == 0) {
@ -272,6 +285,10 @@ int main(int argc, char **argv) {
"Usage for compacting LSTM component to int:\n"
" %s -c traineddata_file\n",
argv[0]);
printf(
"Usage for transforming the proprietary .traineddata file to a zip archive:\n"
" %s -t traineddata_file\n",
argv[0]);
return EXIT_FAILURE;
}
tm.Directory();