More changes to ccmain for 3.00

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@287 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
theraysmith 2009-07-11 02:07:25 +00:00
parent 109d1c8f21
commit 96e8b51feb
83 changed files with 21234 additions and 0 deletions

650
ccmain/Makefile Normal file
View File

@ -0,0 +1,650 @@
# Makefile.in generated by automake 1.10.1 from Makefile.am.
# ccmain/Makefile. Generated from Makefile.in by configure.
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
# 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
pkgdatadir = $(datadir)/tesseract
pkglibdir = $(libdir)/tesseract
pkgincludedir = $(includedir)/tesseract
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = x86_64-unknown-linux-gnu
host_triplet = x86_64-unknown-linux-gnu
subdir = ccmain
DIST_COMMON = $(include_HEADERS) $(srcdir)/Makefile.am \
$(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
$(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(SHELL) $(top_srcdir)/config/mkinstalldirs
CONFIG_HEADER = $(top_builddir)/config_auto.h
CONFIG_CLEAN_FILES =
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
am__vpath_adj = case $$p in \
$(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
*) f=$$p;; \
esac;
am__strip_dir = `echo $$p | sed -e 's|^.*/||'`;
am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)"
libLIBRARIES_INSTALL = $(INSTALL_DATA)
LIBRARIES = $(lib_LIBRARIES)
AR = ar
ARFLAGS = cru
libtesseract_main_a_AR = $(AR) $(ARFLAGS)
libtesseract_main_a_LIBADD =
am_libtesseract_main_a_OBJECTS = adaptions.$(OBJEXT) \
ambigsrecog.$(OBJEXT) applybox.$(OBJEXT) blobcmp.$(OBJEXT) \
callnet.$(OBJEXT) charcut.$(OBJEXT) charsample.$(OBJEXT) \
control.$(OBJEXT) docqual.$(OBJEXT) expandblob.$(OBJEXT) \
fixspace.$(OBJEXT) fixxht.$(OBJEXT) imgscale.$(OBJEXT) \
matmatch.$(OBJEXT) osdetect.$(OBJEXT) output.$(OBJEXT) \
pagewalk.$(OBJEXT) paircmp.$(OBJEXT) pgedit.$(OBJEXT) \
reject.$(OBJEXT) scaleimg.$(OBJEXT) tessbox.$(OBJEXT) \
tessedit.$(OBJEXT) tesseractclass.$(OBJEXT) tessvars.$(OBJEXT) \
tfacepp.$(OBJEXT) thresholder.$(OBJEXT) tstruct.$(OBJEXT) \
varabled.$(OBJEXT) werdit.$(OBJEXT)
libtesseract_main_a_OBJECTS = $(am_libtesseract_main_a_OBJECTS)
DEFAULT_INCLUDES = -I. -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/config/depcomp
am__depfiles_maybe = depfiles
CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
CXXLD = $(CXX)
CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \
-o $@
SOURCES = $(libtesseract_main_a_SOURCES)
DIST_SOURCES = $(libtesseract_main_a_SOURCES)
RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
html-recursive info-recursive install-data-recursive \
install-dvi-recursive install-exec-recursive \
install-html-recursive install-info-recursive \
install-pdf-recursive install-ps-recursive install-recursive \
installcheck-recursive installdirs-recursive pdf-recursive \
ps-recursive uninstall-recursive
includeHEADERS_INSTALL = $(INSTALL_HEADER)
HEADERS = $(include_HEADERS)
RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
distclean-recursive maintainer-clean-recursive
ETAGS = etags
CTAGS = ctags
DIST_SUBDIRS = $(SUBDIRS)
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = aclocal-1.10
AMTAR = tar
AUTOCONF = autoconf
AUTOHEADER = autoheader
AUTOMAKE = automake-1.10
AWK = gawk
CC = gcc
CCDEPMODE = depmode=gcc3
CFLAGS = -g -O2
CPP = gcc -E
CPPFLAGS = -I/usr/local/include/liblept
CXX = g++
CXXCPP = g++ -E
CXXDEPMODE = depmode=gcc3
CXXFLAGS = -g -O2
CYGPATH_W = echo
DEFS = -DHAVE_CONFIG_H
DEPDIR = .deps
ECHO_C =
ECHO_N = -n
ECHO_T =
EGREP = /bin/grep -E
EXEEXT =
GREP = /bin/grep
INSTALL = /usr/bin/install -c
INSTALL_DATA = ${INSTALL} -m 644
INSTALL_PROGRAM = ${INSTALL}
INSTALL_SCRIPT = ${INSTALL}
INSTALL_STRIP_PROGRAM = $(install_sh) -c -s
LDFLAGS =
LIBOBJS =
LIBS = -llept -ltiff -lpthread -ljpeg -lpng -lz -lm
LIBTIFF_CFLAGS =
LIBTIFF_LIBS = -ltiff
LTLIBOBJS =
MAINT = #
MAKEINFO = makeinfo
MKDIR_P = /bin/mkdir -p
OBJEXT = o
PACKAGE = tesseract
PACKAGE_BUGREPORT = theraysmith@gmail.com
PACKAGE_DATE = 05/29
PACKAGE_NAME = tesseract
PACKAGE_STRING = tesseract 3.00
PACKAGE_TARNAME = tesseract
PACKAGE_VERSION = 3.00
PACKAGE_YEAR = 2009
PATH_SEPARATOR = :
RANLIB = ranlib
SET_MAKE =
SHELL = /bin/sh
STRIP =
VERSION = 3.00
abs_builddir = /home/rays/src/opensrc/tesseract-3.00.src/ccmain
abs_srcdir = /home/rays/src/opensrc/tesseract-3.00.src/ccmain
abs_top_builddir = /home/rays/src/opensrc/tesseract-3.00.src
abs_top_srcdir = /home/rays/src/opensrc/tesseract-3.00.src
ac_ct_CC = gcc
ac_ct_CXX = g++
am__include = include
am__leading_dot = .
am__quote =
am__tar = ${AMTAR} chof - "$$tardir"
am__untar = ${AMTAR} xf -
bindir = ${exec_prefix}/bin
build = x86_64-unknown-linux-gnu
build_alias =
build_cpu = x86_64
build_os = linux-gnu
build_vendor = unknown
builddir = .
datadir = ${datarootdir}
datarootdir = ${prefix}/share
docdir = ${datarootdir}/doc/${PACKAGE_TARNAME}
dvidir = ${docdir}
exec_prefix = ${prefix}
host = x86_64-unknown-linux-gnu
host_alias =
host_cpu = x86_64
host_os = linux-gnu
host_vendor = unknown
htmldir = ${docdir}
includedir = ${prefix}/include/tesseract
infodir = ${datarootdir}/info
install_sh = $(SHELL) /home/rays/src/opensrc/tesseract-3.00.src/config/install-sh
libdir = ${exec_prefix}/lib
libexecdir = ${exec_prefix}/libexec
localedir = ${datarootdir}/locale
localstatedir = ${prefix}/var
mandir = ${datarootdir}/man
mkdir_p = /bin/mkdir -p
oldincludedir = /usr/include
pdfdir = ${docdir}
prefix = /usr/local
program_transform_name = s,x,x,
psdir = ${docdir}
sbindir = ${exec_prefix}/sbin
sharedstatedir = ${prefix}/com
srcdir = .
sysconfdir = ${prefix}/etc
target_alias =
top_builddir = ..
top_srcdir = ..
SUBDIRS =
AM_CPPFLAGS = \
-I$(top_srcdir)/ccutil -I$(top_srcdir)/ccstruct \
-I$(top_srcdir)/image -I$(top_srcdir)/viewer \
-I$(top_srcdir)/ccops -I$(top_srcdir)/dict \
-I$(top_srcdir)/classify \
-I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil \
-I$(top_srcdir)/textord
EXTRA_DIST = tessembedded.cpp ccmain.vcproj
include_HEADERS = \
adaptions.h applybox.h blobcmp.h \
callnet.h charcut.h charsample.h control.h \
docqual.h expandblob.h fixspace.h fixxht.h \
imgscale.h matmatch.h osdetect.h output.h \
pagewalk.h paircmp.h pgedit.h reject.h scaleimg.h \
tessbox.h tessedit.h tessembedded.h tesseractclass.h \
tessio.h tessvars.h tfacep.h tfacepp.h thresholder.h tstruct.h \
varabled.h werdit.h
lib_LIBRARIES = libtesseract_main.a
libtesseract_main_a_SOURCES = \
adaptions.cpp ambigsrecog.cpp applybox.cpp \
blobcmp.cpp \
callnet.cpp charcut.cpp charsample.cpp control.cpp \
docqual.cpp expandblob.cpp fixspace.cpp fixxht.cpp \
imgscale.cpp matmatch.cpp osdetect.cpp output.cpp \
pagewalk.cpp paircmp.cpp pgedit.cpp reject.cpp scaleimg.cpp \
tessbox.cpp tessedit.cpp tesseractclass.cpp tessvars.cpp \
tfacepp.cpp thresholder.cpp tstruct.cpp \
varabled.cpp werdit.cpp
all: all-recursive
.SUFFIXES:
.SUFFIXES: .cpp .o .obj
$(srcdir)/Makefile.in: # $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
&& exit 0; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu ccmain/Makefile'; \
cd $(top_srcdir) && \
$(AUTOMAKE) --gnu ccmain/Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(top_srcdir)/configure: # $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): # $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
install-libLIBRARIES: $(lib_LIBRARIES)
@$(NORMAL_INSTALL)
test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)"
@list='$(lib_LIBRARIES)'; for p in $$list; do \
if test -f $$p; then \
f=$(am__strip_dir) \
echo " $(libLIBRARIES_INSTALL) '$$p' '$(DESTDIR)$(libdir)/$$f'"; \
$(libLIBRARIES_INSTALL) "$$p" "$(DESTDIR)$(libdir)/$$f"; \
else :; fi; \
done
@$(POST_INSTALL)
@list='$(lib_LIBRARIES)'; for p in $$list; do \
if test -f $$p; then \
p=$(am__strip_dir) \
echo " $(RANLIB) '$(DESTDIR)$(libdir)/$$p'"; \
$(RANLIB) "$(DESTDIR)$(libdir)/$$p"; \
else :; fi; \
done
uninstall-libLIBRARIES:
@$(NORMAL_UNINSTALL)
@list='$(lib_LIBRARIES)'; for p in $$list; do \
p=$(am__strip_dir) \
echo " rm -f '$(DESTDIR)$(libdir)/$$p'"; \
rm -f "$(DESTDIR)$(libdir)/$$p"; \
done
clean-libLIBRARIES:
-test -z "$(lib_LIBRARIES)" || rm -f $(lib_LIBRARIES)
libtesseract_main.a: $(libtesseract_main_a_OBJECTS) $(libtesseract_main_a_DEPENDENCIES)
-rm -f libtesseract_main.a
$(libtesseract_main_a_AR) libtesseract_main.a $(libtesseract_main_a_OBJECTS) $(libtesseract_main_a_LIBADD)
$(RANLIB) libtesseract_main.a
mostlyclean-compile:
-rm -f *.$(OBJEXT)
distclean-compile:
-rm -f *.tab.c
include ./$(DEPDIR)/adaptions.Po
include ./$(DEPDIR)/ambigsrecog.Po
include ./$(DEPDIR)/applybox.Po
include ./$(DEPDIR)/blobcmp.Po
include ./$(DEPDIR)/callnet.Po
include ./$(DEPDIR)/charcut.Po
include ./$(DEPDIR)/charsample.Po
include ./$(DEPDIR)/control.Po
include ./$(DEPDIR)/docqual.Po
include ./$(DEPDIR)/expandblob.Po
include ./$(DEPDIR)/fixspace.Po
include ./$(DEPDIR)/fixxht.Po
include ./$(DEPDIR)/imgscale.Po
include ./$(DEPDIR)/matmatch.Po
include ./$(DEPDIR)/osdetect.Po
include ./$(DEPDIR)/output.Po
include ./$(DEPDIR)/pagewalk.Po
include ./$(DEPDIR)/paircmp.Po
include ./$(DEPDIR)/pgedit.Po
include ./$(DEPDIR)/reject.Po
include ./$(DEPDIR)/scaleimg.Po
include ./$(DEPDIR)/tessbox.Po
include ./$(DEPDIR)/tessedit.Po
include ./$(DEPDIR)/tesseractclass.Po
include ./$(DEPDIR)/tessvars.Po
include ./$(DEPDIR)/tfacepp.Po
include ./$(DEPDIR)/thresholder.Po
include ./$(DEPDIR)/tstruct.Po
include ./$(DEPDIR)/varabled.Po
include ./$(DEPDIR)/werdit.Po
.cpp.o:
$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
# source='$<' object='$@' libtool=no \
# DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) \
# $(CXXCOMPILE) -c -o $@ $<
.cpp.obj:
$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
# source='$<' object='$@' libtool=no \
# DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) \
# $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
install-includeHEADERS: $(include_HEADERS)
@$(NORMAL_INSTALL)
test -z "$(includedir)" || $(MKDIR_P) "$(DESTDIR)$(includedir)"
@list='$(include_HEADERS)'; for p in $$list; do \
if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
f=$(am__strip_dir) \
echo " $(includeHEADERS_INSTALL) '$$d$$p' '$(DESTDIR)$(includedir)/$$f'"; \
$(includeHEADERS_INSTALL) "$$d$$p" "$(DESTDIR)$(includedir)/$$f"; \
done
uninstall-includeHEADERS:
@$(NORMAL_UNINSTALL)
@list='$(include_HEADERS)'; for p in $$list; do \
f=$(am__strip_dir) \
echo " rm -f '$(DESTDIR)$(includedir)/$$f'"; \
rm -f "$(DESTDIR)$(includedir)/$$f"; \
done
# This directory's subdirectories are mostly independent; you can cd
# into them and run `make' without going through this Makefile.
# To change the values of `make' variables: instead of editing Makefiles,
# (1) if the variable is set in `config.status', edit `config.status'
# (which will cause the Makefiles to be regenerated when you run `make');
# (2) otherwise, pass the desired values on the `make' command line.
$(RECURSIVE_TARGETS):
@failcom='exit 1'; \
for f in x $$MAKEFLAGS; do \
case $$f in \
*=* | --[!k]*);; \
*k*) failcom='fail=yes';; \
esac; \
done; \
dot_seen=no; \
target=`echo $@ | sed s/-recursive//`; \
list='$(SUBDIRS)'; for subdir in $$list; do \
echo "Making $$target in $$subdir"; \
if test "$$subdir" = "."; then \
dot_seen=yes; \
local_target="$$target-am"; \
else \
local_target="$$target"; \
fi; \
(cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
|| eval $$failcom; \
done; \
if test "$$dot_seen" = "no"; then \
$(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
fi; test -z "$$fail"
$(RECURSIVE_CLEAN_TARGETS):
@failcom='exit 1'; \
for f in x $$MAKEFLAGS; do \
case $$f in \
*=* | --[!k]*);; \
*k*) failcom='fail=yes';; \
esac; \
done; \
dot_seen=no; \
case "$@" in \
distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
*) list='$(SUBDIRS)' ;; \
esac; \
rev=''; for subdir in $$list; do \
if test "$$subdir" = "."; then :; else \
rev="$$subdir $$rev"; \
fi; \
done; \
rev="$$rev ."; \
target=`echo $@ | sed s/-recursive//`; \
for subdir in $$rev; do \
echo "Making $$target in $$subdir"; \
if test "$$subdir" = "."; then \
local_target="$$target-am"; \
else \
local_target="$$target"; \
fi; \
(cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
|| eval $$failcom; \
done && test -z "$$fail"
tags-recursive:
list='$(SUBDIRS)'; for subdir in $$list; do \
test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
done
ctags-recursive:
list='$(SUBDIRS)'; for subdir in $$list; do \
test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
done
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
mkid -fID $$unique
tags: TAGS
TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
tags=; \
here=`pwd`; \
if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
include_option=--etags-include; \
empty_fix=.; \
else \
include_option=--include; \
empty_fix=; \
fi; \
list='$(SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
test ! -f $$subdir/TAGS || \
tags="$$tags $$include_option=$$here/$$subdir/TAGS"; \
fi; \
done; \
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$tags $$unique; \
fi
ctags: CTAGS
CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
tags=; \
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
test -z "$(CTAGS_ARGS)$$tags$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$tags $$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& cd $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) $$here
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
distdir: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
fi; \
cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
else \
test -f $(distdir)/$$file \
|| cp -p $$d/$$file $(distdir)/$$file \
|| exit 1; \
fi; \
done
list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
test -d "$(distdir)/$$subdir" \
|| $(MKDIR_P) "$(distdir)/$$subdir" \
|| exit 1; \
distdir=`$(am__cd) $(distdir) && pwd`; \
top_distdir=`$(am__cd) $(top_distdir) && pwd`; \
(cd $$subdir && \
$(MAKE) $(AM_MAKEFLAGS) \
top_distdir="$$top_distdir" \
distdir="$$distdir/$$subdir" \
am__remove_distdir=: \
am__skip_length_check=: \
distdir) \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-recursive
all-am: Makefile $(LIBRARIES) $(HEADERS)
installdirs: installdirs-recursive
installdirs-am:
for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)"; do \
test -z "$$dir" || $(MKDIR_P) "$$dir"; \
done
install: install-recursive
install-exec: install-exec-recursive
install-data: install-data-recursive
uninstall: uninstall-recursive
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-recursive
install-strip:
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
`test -z '$(STRIP)' || \
echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
mostlyclean-generic:
clean-generic:
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-recursive
clean-am: clean-generic clean-libLIBRARIES mostlyclean-am
distclean: distclean-recursive
-rm -rf ./$(DEPDIR)
-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
distclean-tags
dvi: dvi-recursive
dvi-am:
html: html-recursive
info: info-recursive
info-am:
install-data-am: install-includeHEADERS
install-dvi: install-dvi-recursive
install-exec-am: install-libLIBRARIES
install-html: install-html-recursive
install-info: install-info-recursive
install-man:
install-pdf: install-pdf-recursive
install-ps: install-ps-recursive
installcheck-am:
maintainer-clean: maintainer-clean-recursive
-rm -rf ./$(DEPDIR)
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-recursive
mostlyclean-am: mostlyclean-compile mostlyclean-generic
pdf: pdf-recursive
pdf-am:
ps: ps-recursive
ps-am:
uninstall-am: uninstall-includeHEADERS uninstall-libLIBRARIES
.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) install-am \
install-strip
.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
all all-am check check-am clean clean-generic \
clean-libLIBRARIES ctags ctags-recursive distclean \
distclean-compile distclean-generic distclean-tags distdir dvi \
dvi-am html html-am info info-am install install-am \
install-data install-data-am install-dvi install-dvi-am \
install-exec install-exec-am install-html install-html-am \
install-includeHEADERS install-info install-info-am \
install-libLIBRARIES install-man install-pdf install-pdf-am \
install-ps install-ps-am install-strip installcheck \
installcheck-am installdirs installdirs-am maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
mostlyclean-generic pdf pdf-am ps ps-am tags tags-recursive \
uninstall uninstall-am uninstall-includeHEADERS \
uninstall-libLIBRARIES
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

179
ccmain/ambigsrecog.cpp Normal file
View File

@ -0,0 +1,179 @@
///////////////////////////////////////////////////////////////////////
// File: genericvector.h
// Description: Functions for producing classifications
// for the input to ambigstraining.
// Author: Daria Antonova
// Created: Mon Jun 23 11:26:43 PDT 2008
//
// (C) Copyright 2007, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include "ambigs.h"
#include "applybox.h"
#include "boxread.h"
#include "control.h"
#include "permute.h"
#include "ratngs.h"
#include "reject.h"
#include "stopper.h"
#include "tesseractclass.h"
namespace tesseract {
// Sets flags necessary for ambigs training mode.
// Opens and returns the pointer to the output file.
FILE *Tesseract::init_ambigs_training(const STRING &fname) {
permute_only_top = 1; // use only top choice permuter
tessedit_tess_adaption_mode.set_value(0); // turn off adaption
tessedit_ok_mode.set_value(0); // turn off context checking
tessedit_enable_doc_dict.set_value(0); // turn off document dictionary
save_best_choices.set_value(1); // save individual char choices
stopper_no_acceptable_choices.set_value(1); // explore all segmentations
save_raw_choices.set_value(1); // save raw choices
// Open ambigs output file.
STRING output_fname = fname;
const char *lastdot = strrchr(output_fname.string(), '.');
if (lastdot != NULL) {
output_fname[lastdot - output_fname.string()] = '\0';
}
output_fname += ".txt";
FILE *output_file;
if (!(output_file = fopen(output_fname.string(), "a+"))) {
CANTOPENFILE.error("ambigs_training", EXIT,
"Can't open box file %s\n", output_fname.string());
}
return output_file;
}
// This function takes tif/box pair of files and runs recognition on the image,
// while making sure that the word bounds that tesseract identified roughly
// match to those specified by the input box file. For each word (ngram in a
// single bounding box from the input box file) it outputs the ocred result,
// the correct label, rating and certainty.
void Tesseract::ambigs_training_segmented(const STRING &fname,
PAGE_RES *page_res,
volatile ETEXT_DESC *monitor,
FILE *output_file) {
STRING box_fname = fname;
const char *lastdot = strrchr(box_fname.string(), '.');
if (lastdot != NULL) {
box_fname[lastdot - box_fname.string()] = '\0';
}
box_fname += ".box";
FILE *box_file;
if (!(box_file = fopen(box_fname.string(), "r"))) {
CANTOPENFILE.error("ambigs_training", EXIT,
"Can't open box file %s\n", box_fname.string());
}
static PAGE_RES_IT page_res_it;
page_res_it.page_res = page_res;
page_res_it.restart_page();
int x_min, y_min, x_max, y_max;
char label[UNICHAR_LEN * 10];
// Process all the words on this page.
while (page_res_it.word() != NULL &&
read_next_box(applybox_page, box_file, label,
&x_min, &y_min, &x_max, &y_max)) {
// Init bounding box of the current word bounding box and from box file.
TBOX box = TBOX(ICOORD(x_min, y_min), ICOORD(x_max, y_max));
TBOX word_box(page_res_it.word()->word->bounding_box());
bool one_word = true;
// Check whether the bounding box of the next word overlaps with the
// current box from box file.
while (page_res_it.next_word() != NULL &&
box.x_overlap(page_res_it.next_word()->word->bounding_box())) {
word_box = word_box.bounding_union(
page_res_it.next_word()->word->bounding_box());
page_res_it.forward();
one_word = false;
}
if (!word_box.major_overlap(box)) {
if (!word_box.x_overlap(box)) {
// We must be looking at the word that belongs in the "next" bounding
// box from the box file. The ngram that was supposed to appear in
// the current box read from the box file must have been dropped by
// tesseract as noise.
tprintf("Word %s was dropped as noise.\n", label);
continue; // stay on this blob, but read next box from box file
} else {
tprintf("Error: Insufficient overlap for word box"
" and box from file for %s\n", label);
word_box.print();
box.print();
exit(1);
}
}
// Skip recognizing the ngram if tesseract is sure it's not
// one word, otherwise run one recognition pass on this word.
if (!one_word) {
tprintf("Tesseract segmented %s as multiple words\n", label);
} else {
ambigs_classify_and_output(&page_res_it, label, output_file);
}
page_res_it.forward();
}
fclose(box_file);
}
// Run classify_word_pass1() on the current word. Output tesseract's raw choice
// as a result of the classification. For words labeled with a single unichar
// also output all alternatives from blob_choices of the best choice.
void Tesseract::ambigs_classify_and_output(PAGE_RES_IT *page_res_it,
const char *label,
FILE *output_file) {
int offset;
// Classify word.
classify_word_pass1(page_res_it->word(), page_res_it->row()->row,
page_res_it->block()->block,
FALSE, NULL, NULL);
WERD_CHOICE *best_choice = page_res_it->word()->best_choice;
ASSERT_HOST(best_choice != NULL);
ASSERT_HOST(best_choice->blob_choices() != NULL);
// Compute the number of unichars in the label.
int label_num_unichars = 0;
int step = 1; // should be non-zero on the first iteration
for (offset = 0; label[offset] != '\0' && step > 0;
step = getDict().getUnicharset().step(label + offset),
offset += step, ++label_num_unichars);
if (step == 0) {
tprintf("Not outputting illegal unichar %s\n", label);
return;
}
// Output all classifier choices for the unigrams (1-1 classifications).
if (label_num_unichars == 1 && best_choice->blob_choices()->length() == 1) {
BLOB_CHOICE_LIST_C_IT outer_blob_choice_it;
outer_blob_choice_it.set_to_list(best_choice->blob_choices());
BLOB_CHOICE_IT blob_choice_it;
blob_choice_it.set_to_list(outer_blob_choice_it.data());
for (blob_choice_it.mark_cycle_pt();
!blob_choice_it.cycled_list();
blob_choice_it.forward()) {
BLOB_CHOICE *blob_choice = blob_choice_it.data();
if (blob_choice->unichar_id() != INVALID_UNICHAR_ID) {
fprintf(output_file, "%s\t%s\t%.4f\t%.4f\n",
unicharset.id_to_unichar(blob_choice->unichar_id()),
label, blob_choice->rating(), blob_choice->certainty());
}
}
}
// Output the raw choice for succesful non 1-1 classifications.
getDict().PrintAmbigAlternatives(output_file, label, label_num_unichars);
}
} // namespace tesseract

29
ccmain/blckerr.h Normal file
View File

@ -0,0 +1,29 @@
/**********************************************************************
* File: blckerr.h (Formerly blockerr.h)
* Description: Error codes for the page block classes.
* Author: Ray Smith
* Created: Tue Mar 19 17:43:30 GMT 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef BLCKERR_H
#define BLCKERR_H
#include "errcode.h"
const ERRCODE BADBLOCKLINE = "Y coordinate in block out of bounds";
const ERRCODE LOSTBLOCKLINE = "Can't find rectangle for line";
const ERRCODE ILLEGAL_GRADIENT = "Gradient wrong side of edge step!";
const ERRCODE WRONG_WORD = "Word doesn't have blobs of that type";
#endif

805
ccmain/blobbox.cpp Normal file
View File

@ -0,0 +1,805 @@
/**********************************************************************
* File: blobbox.cpp (Formerly blobnbox.c)
* Description: Code for the textord blob class.
* Author: Ray Smith
* Created: Thu Jul 30 09:08:51 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include "blobbox.h"
#define PROJECTION_MARGIN 10 //arbitrary
#define EXTERN
EXTERN double_VAR (textord_error_weight, 3,
"Weighting for error in believability");
EXTERN BOOL_VAR (pitsync_projection_fix, TRUE,
"Fix bug in projection profile");
ELISTIZE (BLOBNBOX) ELIST2IZE (TO_ROW) ELISTIZE (TO_BLOCK)
/**********************************************************************
* BLOBNBOX::merge
*
* Merge this blob with the given blob, which should be after this.
**********************************************************************/
void BLOBNBOX::merge( //merge blobs
BLOBNBOX *nextblob //blob to join with
) {
box += nextblob->box; //merge boxes
nextblob->joined = TRUE;
}
/**********************************************************************
* BLOBNBOX::chop
*
* Chop this blob into equal sized pieces using the x height as a guide.
* The blob is not actually chopped. Instead, fake blobs are inserted
* with the relevant bounding boxes.
**********************************************************************/
void BLOBNBOX::chop( //chop blobs
BLOBNBOX_IT *start_it, //location of this
BLOBNBOX_IT *end_it, //iterator
FCOORD rotation, //for landscape
float xheight //of line
) {
inT16 blobcount; //no of blobs
BLOBNBOX *newblob; //fake blob
BLOBNBOX *blob; //current blob
inT16 blobindex; //number of chop
inT16 leftx; //left edge of blob
float blobwidth; //width of each
float rightx; //right edge to scan
float ymin, ymax; //limits of new blob
float test_ymin, test_ymax; //limits of part blob
ICOORD bl, tr; //corners of box
BLOBNBOX_IT blob_it; //blob iterator
//get no of chops
blobcount = (inT16) floor (box.width () / xheight);
if (blobcount > 1 && (blob_ptr != NULL || cblob_ptr != NULL)) {
//width of each
blobwidth = (float) (box.width () + 1) / blobcount;
for (blobindex = blobcount - 1, rightx = box.right ();
blobindex >= 0; blobindex--, rightx -= blobwidth) {
ymin = (float) MAX_INT32;
ymax = (float) -MAX_INT32;
blob_it = *start_it;
do {
blob = blob_it.data ();
if (blob->blob_ptr != NULL)
find_blob_limits (blob->blob_ptr, rightx - blobwidth, rightx,
rotation, test_ymin, test_ymax);
else
find_cblob_vlimits (blob->cblob_ptr, rightx - blobwidth,
rightx,
/*rotation, */ test_ymin, test_ymax);
blob_it.forward ();
if (test_ymin < ymin)
ymin = test_ymin;
if (test_ymax > ymax)
ymax = test_ymax;
}
while (blob != end_it->data ());
if (ymin < ymax) {
leftx = (inT16) floor (rightx - blobwidth);
if (leftx < box.left ())
leftx = box.left (); //clip to real box
bl = ICOORD (leftx, (inT16) floor (ymin));
tr = ICOORD ((inT16) ceil (rightx), (inT16) ceil (ymax));
if (blobindex == 0)
box = TBOX (bl, tr); //change box
else {
newblob = new BLOBNBOX;
//box is all it has
newblob->box = TBOX (bl, tr);
//stay on current
end_it->add_after_stay_put (newblob);
}
}
}
}
}
/**********************************************************************
* find_blob_limits
*
* Scan the outlines of the blob to locate the y min and max
* between the given x limits.
**********************************************************************/
void find_blob_limits( //get y limits
PBLOB *blob, //blob to search
float leftx, //x limits
float rightx,
FCOORD rotation, //for landscape
float &ymin, //output y limits
float &ymax) {
float testy; //y intercept
FCOORD pos; //rotated
FCOORD vec;
POLYPT *polypt; //current point
//outlines
OUTLINE_IT out_it = blob->out_list ();
POLYPT_IT poly_it; //outline pts
ymin = (float) MAX_INT32;
ymax = (float) -MAX_INT32;
for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
//get points
poly_it.set_to_list (out_it.data ()->polypts ());
for (poly_it.mark_cycle_pt (); !poly_it.cycled_list ();
poly_it.forward ()) {
polypt = poly_it.data ();
pos = polypt->pos;
pos.rotate (rotation);
vec = polypt->vec;
vec.rotate (rotation);
if ((pos.x () < leftx && pos.x () + vec.x () > leftx)
|| (pos.x () > leftx && pos.x () + vec.x () < leftx)) {
testy = pos.y () + vec.y () * (leftx - pos.x ()) / vec.x ();
//intercept of boundary
if (testy < ymin)
ymin = testy;
if (testy > ymax)
ymax = testy;
}
if (pos.x () >= leftx && pos.x () <= rightx) {
if (pos.y () > ymax)
ymax = pos.y ();
if (pos.y () < ymin)
ymin = pos.y ();
}
if ((pos.x () > rightx && pos.x () + vec.x () < rightx)
|| (pos.x () < rightx && pos.x () + vec.x () > rightx)) {
testy = pos.y () + vec.y () * (rightx - pos.x ()) / vec.x ();
//intercept of boundary
if (testy < ymin)
ymin = testy;
if (testy > ymax)
ymax = testy;
}
}
}
}
/**********************************************************************
* find_cblob_limits
*
* Scan the outlines of the cblob to locate the y min and max
* between the given x limits.
**********************************************************************/
void find_cblob_limits( //get y limits
C_BLOB *blob, //blob to search
float leftx, //x limits
float rightx,
FCOORD rotation, //for landscape
float &ymin, //output y limits
float &ymax) {
inT16 stepindex; //current point
ICOORD pos; //current coords
ICOORD vec; //rotated step
C_OUTLINE *outline; //current outline
//outlines
C_OUTLINE_IT out_it = blob->out_list ();
ymin = (float) MAX_INT32;
ymax = (float) -MAX_INT32;
for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
outline = out_it.data ();
pos = outline->start_pos (); //get coords
pos.rotate (rotation);
for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
//inside
if (pos.x () >= leftx && pos.x () <= rightx) {
if (pos.y () > ymax)
ymax = pos.y ();
if (pos.y () < ymin)
ymin = pos.y ();
}
vec = outline->step (stepindex);
vec.rotate (rotation);
pos += vec; //move to next
}
}
}
/**********************************************************************
* find_cblob_vlimits
*
* Scan the outlines of the cblob to locate the y min and max
* between the given x limits.
**********************************************************************/
void find_cblob_vlimits( //get y limits
C_BLOB *blob, //blob to search
float leftx, //x limits
float rightx,
float &ymin, //output y limits
float &ymax) {
inT16 stepindex; //current point
ICOORD pos; //current coords
ICOORD vec; //rotated step
C_OUTLINE *outline; //current outline
//outlines
C_OUTLINE_IT out_it = blob->out_list ();
ymin = (float) MAX_INT32;
ymax = (float) -MAX_INT32;
for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
outline = out_it.data ();
pos = outline->start_pos (); //get coords
for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
//inside
if (pos.x () >= leftx && pos.x () <= rightx) {
if (pos.y () > ymax)
ymax = pos.y ();
if (pos.y () < ymin)
ymin = pos.y ();
}
vec = outline->step (stepindex);
pos += vec; //move to next
}
}
}
/**********************************************************************
* find_cblob_hlimits
*
* Scan the outlines of the cblob to locate the x min and max
* between the given y limits.
**********************************************************************/
void find_cblob_hlimits( //get x limits
C_BLOB *blob, //blob to search
float bottomy, //y limits
float topy,
float &xmin, //output x limits
float &xmax) {
inT16 stepindex; //current point
ICOORD pos; //current coords
ICOORD vec; //rotated step
C_OUTLINE *outline; //current outline
//outlines
C_OUTLINE_IT out_it = blob->out_list ();
xmin = (float) MAX_INT32;
xmax = (float) -MAX_INT32;
for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
outline = out_it.data ();
pos = outline->start_pos (); //get coords
for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
//inside
if (pos.y () >= bottomy && pos.y () <= topy) {
if (pos.x () > xmax)
xmax = pos.x ();
if (pos.x () < xmin)
xmin = pos.x ();
}
vec = outline->step (stepindex);
pos += vec; //move to next
}
}
}
/**********************************************************************
* rotate_blob
*
* Poly copy the blob and rotate the copy by the given vector.
**********************************************************************/
PBLOB *rotate_blob( //get y limits
PBLOB *blob, //blob to search
FCOORD rotation //vector to rotate by
) {
PBLOB *copy; //copy of blob
POLYPT *polypt; //current point
OUTLINE_IT out_it;
POLYPT_IT poly_it; //outline pts
copy = new PBLOB;
*copy = *blob; //deep copy
out_it.set_to_list (copy->out_list ());
for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
//get points
poly_it.set_to_list (out_it.data ()->polypts ());
for (poly_it.mark_cycle_pt (); !poly_it.cycled_list ();
poly_it.forward ()) {
polypt = poly_it.data ();
//rotate it
polypt->pos.rotate (rotation);
polypt->vec.rotate (rotation);
}
out_it.data ()->compute_bb ();
}
return copy;
}
/**********************************************************************
* rotate_cblob
*
* Poly copy the blob and rotate the copy by the given vector.
**********************************************************************/
PBLOB *rotate_cblob( //rotate it
C_BLOB *blob, //blob to search
float xheight, //for poly approx
FCOORD rotation //for landscape
) {
PBLOB *copy; //copy of blob
POLYPT *polypt; //current point
OUTLINE_IT out_it;
POLYPT_IT poly_it; //outline pts
copy = new PBLOB (blob, xheight);
out_it.set_to_list (copy->out_list ());
for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
//get points
poly_it.set_to_list (out_it.data ()->polypts ());
for (poly_it.mark_cycle_pt (); !poly_it.cycled_list ();
poly_it.forward ()) {
polypt = poly_it.data ();
//rotate it
polypt->pos.rotate (rotation);
polypt->vec.rotate (rotation);
}
out_it.data ()->compute_bb ();
}
return copy;
}
/**********************************************************************
* crotate_cblob
*
* Rotate the copy by the given vector and return a C_BLOB.
**********************************************************************/
C_BLOB *crotate_cblob( //rotate it
C_BLOB *blob, //blob to search
FCOORD rotation //for landscape
) {
C_OUTLINE_LIST out_list; //output outlines
//input outlines
C_OUTLINE_IT in_it = blob->out_list ();
//output outlines
C_OUTLINE_IT out_it = &out_list;
for (in_it.mark_cycle_pt (); !in_it.cycled_list (); in_it.forward ()) {
out_it.add_after_then_move (new C_OUTLINE (in_it.data (), rotation));
}
return new C_BLOB (&out_list);
}
/**********************************************************************
* box_next
*
* Compute the bounding box of this blob with merging of x overlaps
* but no pre-chopping.
* Then move the iterator on to the start of the next blob.
**********************************************************************/
TBOX box_next( //get bounding box
BLOBNBOX_IT *it //iterator to blobds
) {
BLOBNBOX *blob; //current blob
TBOX result; //total box
blob = it->data ();
result = blob->bounding_box ();
do {
it->forward ();
blob = it->data ();
if (blob->blob () == NULL && blob->cblob () == NULL)
//was pre-chopped
result += blob->bounding_box ();
}
//until next real blob
while ((blob->blob () == NULL && blob->cblob () == NULL) || blob->joined_to_prev ());
return result;
}
/**********************************************************************
* box_next_pre_chopped
*
* Compute the bounding box of this blob with merging of x overlaps
* but WITH pre-chopping.
* Then move the iterator on to the start of the next pre-chopped blob.
**********************************************************************/
TBOX box_next_pre_chopped( //get bounding box
BLOBNBOX_IT *it //iterator to blobds
) {
BLOBNBOX *blob; //current blob
TBOX result; //total box
blob = it->data ();
result = blob->bounding_box ();
do {
it->forward ();
blob = it->data ();
}
//until next real blob
while (blob->joined_to_prev ());
return result;
}
/**********************************************************************
* TO_ROW::TO_ROW
*
* Constructor to make a row from a blob.
**********************************************************************/
TO_ROW::TO_ROW ( //constructor
BLOBNBOX * blob, //first blob
float top, //corrected top
float bottom, //of row
float row_size //ideal
): y_min(bottom), y_max(top), initial_y_min(bottom), num_repeated_sets_(-1) {
float diff; //in size
BLOBNBOX_IT it = &blobs; //list of blobs
it.add_to_end (blob);
diff = top - bottom - row_size;
if (diff > 0) {
y_max -= diff / 2;
y_min += diff / 2;
}
//very small object
else if ((top - bottom) * 3 < row_size) {
diff = row_size / 3 + bottom - top;
y_max += diff / 2;
y_min -= diff / 2;
}
}
/**********************************************************************
* TO_ROW:add_blob
*
* Add the blob to the end of the row.
**********************************************************************/
void TO_ROW::add_blob( //constructor
BLOBNBOX *blob, //first blob
float top, //corrected top
float bottom, //of row
float row_size //ideal
) {
float allowed; //allowed expansion
float available; //expansion
BLOBNBOX_IT it = &blobs; //list of blobs
it.add_to_end (blob);
allowed = row_size + y_min - y_max;
if (allowed > 0) {
available = top > y_max ? top - y_max : 0;
if (bottom < y_min)
//total available
available += y_min - bottom;
if (available > 0) {
available += available; //do it gradually
if (available < allowed)
available = allowed;
if (bottom < y_min)
y_min -= (y_min - bottom) * allowed / available;
if (top > y_max)
y_max += (top - y_max) * allowed / available;
}
}
}
/**********************************************************************
* TO_ROW:insert_blob
*
* Add the blob to the row in the correct position.
**********************************************************************/
void TO_ROW::insert_blob( //constructor
BLOBNBOX *blob //first blob
) {
BLOBNBOX_IT it = &blobs; //list of blobs
if (it.empty ())
it.add_before_then_move (blob);
else {
it.mark_cycle_pt ();
while (!it.cycled_list ()
&& it.data ()->bounding_box ().left () <=
blob->bounding_box ().left ())
it.forward ();
if (it.cycled_list ())
it.add_to_end (blob);
else
it.add_before_stay_put (blob);
}
}
/**********************************************************************
* TO_ROW::compute_vertical_projection
*
* Compute the vertical projection of a TO_ROW from its blobs.
**********************************************************************/
void TO_ROW::compute_vertical_projection() { //project whole row
TBOX row_box; //bound of row
BLOBNBOX *blob; //current blob
TBOX blob_box; //bounding box
BLOBNBOX_IT blob_it = blob_list ();
if (blob_it.empty ())
return;
row_box = blob_it.data ()->bounding_box ();
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ())
row_box += blob_it.data ()->bounding_box ();
projection.set_range (row_box.left () - PROJECTION_MARGIN,
row_box.right () + PROJECTION_MARGIN);
projection_left = row_box.left () - PROJECTION_MARGIN;
projection_right = row_box.right () + PROJECTION_MARGIN;
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
blob = blob_it.data ();
if (blob->blob () != NULL)
vertical_blob_projection (blob->blob (), &projection);
else if (blob->cblob () != NULL)
vertical_cblob_projection (blob->cblob (), &projection);
}
}
/**********************************************************************
* vertical_blob_projection
*
* Compute the vertical projection of a blob from its outlines
* and add to the given STATS.
**********************************************************************/
void vertical_blob_projection( //project outlines
PBLOB *blob, //blob to project
STATS *stats //output
) {
//outlines of blob
OUTLINE_IT out_it = blob->out_list ();
for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
vertical_outline_projection (out_it.data (), stats);
}
}
/**********************************************************************
* vertical_outline_projection
*
* Compute the vertical projection of a outline from its outlines
* and add to the given STATS.
**********************************************************************/
void vertical_outline_projection( //project outlines
OUTLINE *outline, //outline to project
STATS *stats //output
) {
POLYPT *polypt; //current point
inT32 xcoord; //current pixel coord
float end_x; //end of vec
POLYPT_IT poly_it = outline->polypts ();
OUTLINE_IT out_it = outline->child ();
float ymean; //amount to add
float width; //amount of x
for (poly_it.mark_cycle_pt (); !poly_it.cycled_list (); poly_it.forward ()) {
polypt = poly_it.data ();
end_x = polypt->pos.x () + polypt->vec.x ();
if (polypt->vec.x () > 0) {
for (xcoord = (inT32) floor (polypt->pos.x ());
xcoord < end_x; xcoord++) {
if (polypt->pos.x () < xcoord) {
width = (float) xcoord;
ymean =
polypt->vec.y () * (xcoord -
polypt->pos.x ()) / polypt->vec.x () +
polypt->pos.y ();
}
else {
width = polypt->pos.x ();
ymean = polypt->pos.y ();
}
if (end_x > xcoord + 1) {
width -= xcoord + 1;
ymean +=
polypt->vec.y () * (xcoord + 1 -
polypt->pos.x ()) / polypt->vec.x () +
polypt->pos.y ();
}
else {
width -= end_x;
ymean += polypt->pos.y () + polypt->vec.y ();
}
ymean = ymean * width / 2;
stats->add (xcoord, (inT32) floor (ymean + 0.5));
}
}
else if (polypt->vec.x () < 0) {
for (xcoord = (inT32) floor (end_x);
xcoord < polypt->pos.x (); xcoord++) {
if (polypt->pos.x () > xcoord + 1) {
width = xcoord + 1.0f;
ymean =
polypt->vec.y () * (xcoord + 1 -
polypt->pos.x ()) / polypt->vec.x () +
polypt->pos.y ();
}
else {
width = polypt->pos.x ();
ymean = polypt->pos.y ();
}
if (end_x < xcoord) {
width -= xcoord;
ymean +=
polypt->vec.y () * (xcoord -
polypt->pos.x ()) / polypt->vec.x () +
polypt->pos.y ();
}
else {
width -= end_x;
ymean += polypt->pos.y () + polypt->vec.y ();
}
ymean = ymean * width / 2;
stats->add (xcoord, (inT32) floor (ymean + 0.5));
}
}
}
for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
vertical_outline_projection (out_it.data (), stats);
}
}
/**********************************************************************
* vertical_cblob_projection
*
* Compute the vertical projection of a cblob from its outlines
* and add to the given STATS.
**********************************************************************/
void vertical_cblob_projection( //project outlines
C_BLOB *blob, //blob to project
STATS *stats //output
) {
//outlines of blob
C_OUTLINE_IT out_it = blob->out_list ();
for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
vertical_coutline_projection (out_it.data (), stats);
}
}
/**********************************************************************
* vertical_coutline_projection
*
* Compute the vertical projection of a outline from its outlines
* and add to the given STATS.
**********************************************************************/
void vertical_coutline_projection( //project outlines
C_OUTLINE *outline, //outline to project
STATS *stats //output
) {
ICOORD pos; //current point
ICOORD step; //edge step
inT32 length; //of outline
inT16 stepindex; //current step
C_OUTLINE_IT out_it = outline->child ();
pos = outline->start_pos ();
length = outline->pathlength ();
for (stepindex = 0; stepindex < length; stepindex++) {
step = outline->step (stepindex);
if (step.x () > 0) {
if (pitsync_projection_fix)
stats->add (pos.x (), -pos.y ());
else
stats->add (pos.x (), pos.y ());
}
else if (step.x () < 0) {
if (pitsync_projection_fix)
stats->add (pos.x () - 1, pos.y ());
else
stats->add (pos.x () - 1, -pos.y ());
}
pos += step;
}
for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
vertical_coutline_projection (out_it.data (), stats);
}
}
/**********************************************************************
* TO_BLOCK::TO_BLOCK
*
* Constructor to make a TO_BLOCK from a real block.
**********************************************************************/
TO_BLOCK::TO_BLOCK( //make a block
BLOCK *src_block //real block
) {
block = src_block;
}
static void clear_blobnboxes(BLOBNBOX_LIST* boxes) {
BLOBNBOX_IT it = boxes;
// A BLOBNBOX generally doesn't own its blobs, so if they do, you
// have to delete them explicitly.
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
BLOBNBOX* box = it.data();
if (box->blob() != NULL)
delete box->blob();
if (box->cblob() != NULL)
delete box->cblob();
}
}
TO_BLOCK::~TO_BLOCK() {
// Any residual BLOBNBOXes at this stage own their blobs, so delete them.
clear_blobnboxes(&blobs);
clear_blobnboxes(&underlines);
clear_blobnboxes(&noise_blobs);
clear_blobnboxes(&small_blobs);
clear_blobnboxes(&large_blobs);
}
// Draw the blobs on the various lists in the block in different colors.
void TO_BLOCK::plot_graded_blobs(ScrollView* to_win) {
plot_blob_list(to_win, &noise_blobs, ScrollView::CORAL, ScrollView::BLUE);
plot_blob_list(to_win, &small_blobs,
ScrollView::GOLDENROD, ScrollView::YELLOW);
plot_blob_list(to_win, &large_blobs,
ScrollView::DARK_GREEN, ScrollView::YELLOW);
plot_blob_list(to_win, &blobs, ScrollView::WHITE, ScrollView::BROWN);
}
/**********************************************************************
* plot_blob_list
*
* Draw a list of blobs.
**********************************************************************/
void plot_blob_list(ScrollView* win, // window to draw in
BLOBNBOX_LIST *list, // blob list
ScrollView::Color body_colour, // colour to draw
ScrollView::Color child_colour) { // colour of child
BLOBNBOX_IT it = list;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
it.data()->plot(win, body_colour, child_colour);
}
}

557
ccmain/blobbox.h Normal file
View File

@ -0,0 +1,557 @@
/**********************************************************************
* File: blobbox.h (Formerly blobnbox.h)
* Description: Code for the textord blob class.
* Author: Ray Smith
* Created: Thu Jul 30 09:08:51 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef BLOBBOX_H
#define BLOBBOX_H
#include "varable.h"
#include "clst.h"
#include "elst2.h"
#include "werd.h"
#include "ocrblock.h"
#include "statistc.h"
extern double_VAR_H (textord_error_weight, 3,
"Weighting for error in believability");
enum PITCH_TYPE
{
PITCH_DUNNO, //insufficient data
PITCH_DEF_FIXED, //definitely fixed
PITCH_MAYBE_FIXED, //could be
PITCH_DEF_PROP,
PITCH_MAYBE_PROP,
PITCH_CORR_FIXED,
PITCH_CORR_PROP
};
// The possible tab-stop types of each side of a BLOBNBOX.
enum TabType {
TT_NONE, // Not a tab.
TT_DELETED, // Not a tab after detailed analysis.
TT_UNCONFIRMED, // Initial designation of a tab-stop candidate.
TT_FAKE, // Added by interpolation.
TT_CONFIRMED, // Aligned with neighbours.
TT_VLINE // Detected as a vertical line.
};
// The possible region types of a BLOBNBOX.
// Note: keep all the text types > BRT_UNKNOWN and all the image types less.
// Keep in sync with kBlobTypes in colpartition.cpp and BoxColor below.
enum BlobRegionType {
BRT_NOISE, // Neither text nor image.
BRT_HLINE, // Horizontal separator line.
BRT_RECTIMAGE, // Rectangular image.
BRT_POLYIMAGE, // Non-rectangular image.
BRT_UNKNOWN, // Not determined yet.
BRT_VERT_TEXT, // Vertical alignment, not necessarily vertically oriented.
BRT_TEXT, // Convincing text.
BRT_COUNT // Number of possibilities.
};
namespace tesseract {
class ColPartition;
}
class BLOBNBOX;
ELISTIZEH (BLOBNBOX)
class BLOBNBOX:public ELIST_LINK
{
public:
BLOBNBOX() {
blob_ptr = NULL;
cblob_ptr = NULL;
area = 0;
Init();
}
explicit BLOBNBOX(PBLOB *srcblob) {
blob_ptr = srcblob;
cblob_ptr = NULL;
box = srcblob->bounding_box ();
area = (int) srcblob->area ();
Init();
}
explicit BLOBNBOX(C_BLOB *srcblob) {
blob_ptr = NULL;
cblob_ptr = srcblob;
box = srcblob->bounding_box ();
area = (int) srcblob->area ();
Init();
}
void rotate_box(FCOORD vec) {
box.rotate(vec);
}
void translate_box(ICOORD v) {
box.move(v);
}
void merge(BLOBNBOX *nextblob);
void chop( // fake chop blob
BLOBNBOX_IT *start_it, // location of this
BLOBNBOX_IT *blob_it, // iterator
FCOORD rotation, // for landscape
float xheight); // line height
// Simple accessors.
const TBOX& bounding_box() const {
return box;
}
void compute_bounding_box() {
box = cblob_ptr != NULL ? cblob_ptr->bounding_box()
: blob_ptr->bounding_box();
}
const TBOX& reduced_box() const {
return red_box;
}
void set_reduced_box(TBOX new_box) {
red_box = new_box;
reduced = TRUE;
}
inT32 enclosed_area() const {
return area;
}
bool joined_to_prev() const {
return joined != 0;
}
bool red_box_set() const {
return reduced != 0;
}
int repeated_set() const {
return repeated_set_;
}
void set_repeated_set(int set_id) {
repeated_set_ = set_id;
}
PBLOB *blob() const {
return blob_ptr;
}
C_BLOB *cblob() const {
return cblob_ptr;
}
TabType left_tab_type() const {
return left_tab_type_;
}
void set_left_tab_type(TabType new_type) {
left_tab_type_ = new_type;
}
TabType right_tab_type() const {
return right_tab_type_;
}
void set_right_tab_type(TabType new_type) {
right_tab_type_ = new_type;
}
BlobRegionType region_type() const {
return region_type_;
}
void set_region_type(BlobRegionType new_type) {
region_type_ = new_type;
}
int left_rule() const {
return left_rule_;
}
void set_left_rule(int new_left) {
left_rule_ = new_left;
}
int right_rule() const {
return right_rule_;
}
void set_right_rule(int new_right) {
right_rule_ = new_right;
}
int left_crossing_rule() const {
return left_crossing_rule_;
}
void set_left_crossing_rule(int new_left) {
left_crossing_rule_ = new_left;
}
int right_crossing_rule() const {
return right_crossing_rule_;
}
void set_right_crossing_rule(int new_right) {
right_crossing_rule_ = new_right;
}
float horz_stroke_width() const {
return horz_stroke_width_;
}
void set_horz_stroke_width(float width) {
horz_stroke_width_ = width;
}
float vert_stroke_width() const {
return vert_stroke_width_;
}
void set_vert_stroke_width(float width) {
vert_stroke_width_ = width;
}
tesseract::ColPartition* owner() const {
return owner_;
}
void set_owner(tesseract::ColPartition* new_owner) {
owner_ = new_owner;
}
void set_noise_flag(bool flag) {
noise_flag_ = flag;
}
bool noise_flag() const {
return noise_flag_;
}
#ifndef GRAPHICS_DISABLED
// Keep in sync with BlobRegionType.
ScrollView::Color BoxColor() const {
switch (region_type_) {
case BRT_HLINE:
return ScrollView::YELLOW;
case BRT_RECTIMAGE:
return ScrollView::RED;
case BRT_POLYIMAGE:
return ScrollView::ORANGE;
case BRT_UNKNOWN:
return ScrollView::CYAN;
case BRT_VERT_TEXT:
return ScrollView::GREEN;
case BRT_TEXT:
return ScrollView::BLUE;
case BRT_NOISE:
default:
return ScrollView::GREY;
}
}
void plot(ScrollView* window, // window to draw in
ScrollView::Color blob_colour, // for outer bits
ScrollView::Color child_colour) { // for holes
if (blob_ptr != NULL)
blob_ptr->plot(window, blob_colour, child_colour);
if (cblob_ptr != NULL)
cblob_ptr->plot(window, blob_colour, child_colour);
}
#endif
NEWDELETE2(BLOBNBOX)
private:
// Initializes the bulk of the members to default values.
void Init() {
joined = false;
reduced = false;
repeated_set_ = 0;
left_tab_type_ = TT_NONE;
right_tab_type_ = TT_NONE;
region_type_ = BRT_UNKNOWN;
left_rule_ = 0;
right_rule_ = 0;
left_crossing_rule_ = 0;
right_crossing_rule_ = 0;
horz_stroke_width_ = 0.0f;
vert_stroke_width_ = 0.0f;
owner_ = NULL;
noise_flag_ = false;
}
PBLOB *blob_ptr; // poly blob
C_BLOB *cblob_ptr; // edgestep blob
TBOX box; // bounding box
TBOX red_box; // bounding box
int area:30; // enclosed area
int joined:1; // joined to prev
int reduced:1; // reduced box set
int repeated_set_; // id of the set of repeated blobs
TabType left_tab_type_; // Indicates tab-stop assessment
TabType right_tab_type_; // Indicates tab-stop assessment
BlobRegionType region_type_; // Type of region this blob belongs to
inT16 left_rule_; // x-coord of nearest but not crossing rule line
inT16 right_rule_; // x-coord of nearest but not crossing rule line
inT16 left_crossing_rule_; // x-coord of nearest or crossing rule line
inT16 right_crossing_rule_; // x-coord of nearest or crossing rule line
float horz_stroke_width_; // Median horizontal stroke width
float vert_stroke_width_; // Median vertical stroke width
tesseract::ColPartition* owner_; // Who will delete me when I am not needed
// Was the blob flagged as noise in the initial filtering step
bool noise_flag_;
};
class TO_ROW:public ELIST2_LINK
{
public:
TO_ROW() {
num_repeated_sets_ = -1;
} //empty
TO_ROW( //constructor
BLOBNBOX *blob, //from first blob
float top, //of row //target height
float bottom,
float row_size);
float max_y() const { //access function
return y_max;
}
float min_y() const {
return y_min;
}
float mean_y() const {
return (y_min + y_max) / 2.0f;
}
float initial_min_y() const {
return initial_y_min;
}
float line_m() const { //access to line fit
return m;
}
float line_c() const {
return c;
}
float line_error() const {
return error;
}
float parallel_c() const {
return para_c;
}
float parallel_error() const {
return para_error;
}
float believability() const { //baseline goodness
return credibility;
}
float intercept() const { //real parallel_c
return y_origin;
}
void add_blob( //put in row
BLOBNBOX *blob, //blob to add
float top, //of row //target height
float bottom,
float row_size);
void insert_blob( //put in row in order
BLOBNBOX *blob);
BLOBNBOX_LIST *blob_list() { //get list
return &blobs;
}
void set_line( //set line spec
float new_m, //line to set
float new_c,
float new_error) {
m = new_m;
c = new_c;
error = new_error;
}
void set_parallel_line( //set fixed gradient line
float gradient, //page gradient
float new_c,
float new_error) {
para_c = new_c;
para_error = new_error;
credibility =
(float) (blobs.length () - textord_error_weight * new_error);
y_origin = (float) (new_c / sqrt (1 + gradient * gradient));
//real intercept
}
void set_limits( //set min,max
float new_min, //bottom and
float new_max) { //top of row
y_min = new_min;
y_max = new_max;
}
void compute_vertical_projection();
//get projection
bool rep_chars_marked() const {
return num_repeated_sets_ != -1;
}
void clear_rep_chars_marked() {
num_repeated_sets_ = -1;
}
int num_repeated_sets() const {
return num_repeated_sets_;
}
void set_num_repeated_sets(int num_sets) {
num_repeated_sets_ = num_sets;
}
// true when dead
NEWDELETE2 (TO_ROW) BOOL8 merged;
BOOL8 all_caps; // had no ascenders
BOOL8 used_dm_model; // in guessing pitch
inT16 projection_left; // start of projection
inT16 projection_right; // start of projection
PITCH_TYPE pitch_decision; // how strong is decision
float fixed_pitch; // pitch or 0
float fp_space; // sp if fixed pitch
float fp_nonsp; // nonsp if fixed pitch
float pr_space; // sp if prop
float pr_nonsp; // non sp if prop
float spacing; // to "next" row
float xheight; // of line
int xheight_evidence; // number of blobs of height xheight
float ascrise; // ascenders
float descdrop; // descenders
inT32 min_space; // min size for real space
inT32 max_nonspace; // max size of non-space
inT32 space_threshold; // space vs nonspace
float kern_size; // average non-space
float space_size; // average space
WERD_LIST rep_words; // repeated chars
ICOORDELT_LIST char_cells; // fixed pitch cells
QSPLINE baseline; // curved baseline
STATS projection; // vertical projection
private:
BLOBNBOX_LIST blobs; //blobs in row
float y_min; //coords
float y_max;
float initial_y_min;
float m, c; //line spec
float error; //line error
float para_c; //constrained fit
float para_error;
float y_origin; //rotated para_c;
float credibility; //baseline believability
int num_repeated_sets_; // number of sets of repeated blobs
// set to -1 if we have not searched
// for repeated blobs in this row yet
};
ELIST2IZEH (TO_ROW)
class TO_BLOCK:public ELIST_LINK
{
public:
TO_BLOCK() {
} //empty
TO_BLOCK( //constructor
BLOCK *src_block); //real block
~TO_BLOCK();
TO_ROW_LIST *get_rows() { //access function
return &row_list;
}
void print_rows() { //debug info
TO_ROW_IT row_it = &row_list;
TO_ROW *row;
for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
row_it.forward ()) {
row = row_it.data ();
printf ("Row range (%g,%g), para_c=%g, blobcount=" INT32FORMAT
"\n", row->min_y (), row->max_y (), row->parallel_c (),
row->blob_list ()->length ());
}
}
// Draw the blobs on on the various lists in the block in different colors.
void plot_graded_blobs(ScrollView* to_win);
BLOBNBOX_LIST blobs; //medium size
BLOBNBOX_LIST underlines; //underline blobs
BLOBNBOX_LIST noise_blobs; //very small
BLOBNBOX_LIST small_blobs; //fairly small
BLOBNBOX_LIST large_blobs; //big blobs
BLOCK *block; //real block
PITCH_TYPE pitch_decision; //how strong is decision
float line_spacing; //estimate
float line_size; //estimate
float max_blob_size; //line assignment limit
float baseline_offset; //phase shift
float xheight; //median blob size
float fixed_pitch; //pitch or 0
float kern_size; //average non-space
float space_size; //average space
inT32 min_space; //min definite space
inT32 max_nonspace; //max definite
float fp_space; //sp if fixed pitch
float fp_nonsp; //nonsp if fixed pitch
float pr_space; //sp if prop
float pr_nonsp; //non sp if prop
TO_ROW *key_row; //starting row
NEWDELETE2 (TO_BLOCK) private:
TO_ROW_LIST row_list; //temporary rows
};
ELISTIZEH (TO_BLOCK)
extern double_VAR_H (textord_error_weight, 3,
"Weighting for error in believability");
void find_blob_limits( //get y limits
PBLOB *blob, //blob to search
float leftx, //x limits
float rightx,
FCOORD rotation, //for landscape
float &ymin, //output y limits
float &ymax);
void find_cblob_limits( //get y limits
C_BLOB *blob, //blob to search
float leftx, //x limits
float rightx,
FCOORD rotation, //for landscape
float &ymin, //output y limits
float &ymax);
void find_cblob_vlimits( //get y limits
C_BLOB *blob, //blob to search
float leftx, //x limits
float rightx,
float &ymin, //output y limits
float &ymax);
void find_cblob_hlimits( //get x limits
C_BLOB *blob, //blob to search
float bottomy, //y limits
float topy,
float &xmin, //output x limits
float &xymax);
PBLOB *rotate_blob( //get y limits
PBLOB *blob, //blob to search
FCOORD rotation //vector to rotate by
);
PBLOB *rotate_cblob( //rotate it
C_BLOB *blob, //blob to search
float xheight, //for poly approx
FCOORD rotation //for landscape
);
C_BLOB *crotate_cblob( //rotate it
C_BLOB *blob, //blob to search
FCOORD rotation //for landscape
);
TBOX box_next( //get bounding box
BLOBNBOX_IT *it //iterator to blobds
);
TBOX box_next_pre_chopped( //get bounding box
BLOBNBOX_IT *it //iterator to blobds
);
void vertical_blob_projection( //project outlines
PBLOB *blob, //blob to project
STATS *stats //output
);
//project outlines
void vertical_outline_projection(OUTLINE *outline, //outline to project
STATS *stats //output
);
void vertical_cblob_projection( //project outlines
C_BLOB *blob, //blob to project
STATS *stats //output
);
void vertical_coutline_projection( //project outlines
C_OUTLINE *outline, //outline to project
STATS *stats //output
);
void plot_blob_list(ScrollView* win, // window to draw in
BLOBNBOX_LIST *list, // blob list
ScrollView::Color body_colour, // colour to draw
ScrollView::Color child_colour); // colour of child
#endif

247
ccmain/blobs.cpp Normal file
View File

@ -0,0 +1,247 @@
/* -*-C-*-
********************************************************************************
*
* File: blobs.c (Formerly blobs.c)
* Description: Blob definition
* Author: Mark Seaman, OCR Technology
* Created: Fri Oct 27 15:39:52 1989
* Modified: Thu Mar 28 15:33:26 1991 (Mark Seaman) marks@hpgrlt
* Language: C
* Package: N/A
* Status: Experimental (Do Not Distribute)
*
* (c) Copyright 1989, Hewlett-Packard Company.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
*********************************************************************************/
/*----------------------------------------------------------------------
I n c l u d e s
----------------------------------------------------------------------*/
#include "mfcpch.h"
#include "blobs.h"
#include "cutil.h"
#include "emalloc.h"
#include "structures.h"
/*----------------------------------------------------------------------
F u n c t i o n s
----------------------------------------------------------------------*/
/**********************************************************************
* blob_origin
*
* Compute the origin of a compound blob, define to be the centre
* of the bounding box.
**********************************************************************/
void blob_origin(TBLOB *blob, /*blob to compute on */
TPOINT *origin) { /*return value */
TPOINT topleft; /*bounding box */
TPOINT botright;
/*find bounding box */
blob_bounding_box(blob, &topleft, &botright);
/*centre of box */
origin->x = (topleft.x + botright.x) / 2;
origin->y = (topleft.y + botright.y) / 2;
}
/**********************************************************************
* blob_bounding_box
*
* Compute the bounding_box of a compound blob, define to be the
* max coordinate value of the bounding boxes of all the top-level
* outlines in the box.
**********************************************************************/
void blob_bounding_box(TBLOB *blob, /*blob to compute on */
register TPOINT *topleft, /*bounding box */
register TPOINT *botright) {
register TESSLINE *outline; /*current outline */
if (blob == NULL || blob->outlines == NULL) {
topleft->x = topleft->y = 0;
*botright = *topleft; /*default value */
}
else {
outline = blob->outlines;
*topleft = outline->topleft;
*botright = outline->botright;
for (outline = outline->next; outline != NULL; outline = outline->next) {
if (outline->topleft.x < topleft->x)
/*find extremes */
topleft->x = outline->topleft.x;
if (outline->botright.x > botright->x)
/*find extremes */
botright->x = outline->botright.x;
if (outline->topleft.y > topleft->y)
/*find extremes */
topleft->y = outline->topleft.y;
if (outline->botright.y < botright->y)
/*find extremes */
botright->y = outline->botright.y;
}
}
}
/**********************************************************************
* blobs_bounding_box
*
* Return the smallest extreme point that contain this word.
**********************************************************************/
void blobs_bounding_box(TBLOB *blobs, TPOINT *topleft, TPOINT *botright) {
TPOINT tl;
TPOINT br;
TBLOB *blob;
/* Start with first blob */
blob_bounding_box(blobs, topleft, botright);
iterate_blobs(blob, blobs) {
blob_bounding_box(blob, &tl, &br);
if (tl.x < topleft->x)
topleft->x = tl.x;
if (tl.y > topleft->y)
topleft->y = tl.y;
if (br.x > botright->x)
botright->x = br.x;
if (br.y < botright->y)
botright->y = br.y;
}
}
/**********************************************************************
* blobs_origin
*
* Compute the origin of a compound blob, define to be the centre
* of the bounding box.
**********************************************************************/
void blobs_origin(TBLOB *blobs, /*blob to compute on */
TPOINT *origin) { /*return value */
TPOINT topleft; /*bounding box */
TPOINT botright;
/*find bounding box */
blobs_bounding_box(blobs, &topleft, &botright);
/*center of box */
origin->x = (topleft.x + botright.x) / 2;
origin->y = (topleft.y + botright.y) / 2;
}
/**********************************************************************
* blobs_widths
*
* Compute the widths of a list of blobs. Return an array of the widths
* and gaps.
**********************************************************************/
WIDTH_RECORD *blobs_widths(TBLOB *blobs) { /*blob to compute on */
WIDTH_RECORD *width_record;
TPOINT topleft; /*bounding box */
TPOINT botright;
TBLOB *blob; /*blob to compute on */
int i = 0;
int blob_end;
int num_blobs = count_blobs (blobs);
/* Get memory */
width_record = (WIDTH_RECORD *) memalloc (sizeof (int) * num_blobs * 2);
width_record->num_chars = num_blobs;
blob_bounding_box(blobs, &topleft, &botright);
width_record->widths[i++] = botright.x - topleft.x;
/* First width */
blob_end = botright.x;
iterate_blobs (blob, blobs->next) {
blob_bounding_box(blob, &topleft, &botright);
width_record->widths[i++] = topleft.x - blob_end;
width_record->widths[i++] = botright.x - topleft.x;
blob_end = botright.x;
}
return (width_record);
}
/**********************************************************************
* count_blobs
*
* Return a count of the number of blobs attached to this one.
**********************************************************************/
int count_blobs(TBLOB *blobs) {
TBLOB *b;
int x = 0;
iterate_blobs (b, blobs) x++;
return (x);
}
/**********************************************************************
* delete_word
*
* Reclaim the memory taken by this word structure and all of its
* lower level structures.
**********************************************************************/
void delete_word(TWERD *word) {
TBLOB *blob;
TBLOB *nextblob;
TESSLINE *outline;
TESSLINE *nextoutline;
TESSLINE *child;
TESSLINE *nextchild;
for (blob = word->blobs; blob; blob = nextblob) {
nextblob = blob->next;
for (outline = blob->outlines; outline; outline = nextoutline) {
nextoutline = outline->next;
delete_edgepts (outline->loop);
for (child = outline->child; child; child = nextchild) {
nextchild = child->next;
delete_edgepts (child->loop);
oldoutline(child);
}
oldoutline(outline);
}
oldblob(blob);
}
if (word->correct != NULL)
strfree (word->correct); /* Reclaim memory */
oldword(word);
}
/**********************************************************************
* delete_edgepts
*
* Delete a list of EDGEPT structures.
**********************************************************************/
void delete_edgepts(register EDGEPT *edgepts) {
register EDGEPT *this_edge;
register EDGEPT *next_edge;
if (edgepts == NULL)
return;
this_edge = edgepts;
do {
next_edge = this_edge->next;
oldedgept(this_edge);
this_edge = next_edge;
}
while (this_edge != edgepts);
}

119
ccmain/blobs.h Normal file
View File

@ -0,0 +1,119 @@
/* -*-C-*-
********************************************************************************
*
* File: blobs.h (Formerly blobs.h)
* Description: Blob definition
* Author: Mark Seaman, OCR Technology
* Created: Fri Oct 27 15:39:52 1989
* Modified: Thu Mar 28 15:33:38 1991 (Mark Seaman) marks@hpgrlt
* Language: C
* Package: N/A
* Status: Experimental (Do Not Distribute)
*
* (c) Copyright 1989, Hewlett-Packard Company.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
*********************************************************************************/
#ifndef BLOBS_H
#define BLOBS_H
/*----------------------------------------------------------------------
I n c l u d e s
----------------------------------------------------------------------*/
#include "vecfuncs.h"
#include "tessclas.h"
/*----------------------------------------------------------------------
T y p e s
----------------------------------------------------------------------*/
typedef struct
{ /* Widths of pieces */
int num_chars;
int widths[1];
} WIDTH_RECORD;
/*----------------------------------------------------------------------
M a c r o s
----------------------------------------------------------------------*/
/**********************************************************************
* free_widths
*
* Free the memory taken up by a width array.
**********************************************************************/
#define free_widths(w) \
if (w) memfree (w)
/*----------------------------------------------------------------------
F u n c t i o n s
----------------------------------------------------------------------*/
void blob_origin(TBLOB *blob, /*blob to compute on */
TPOINT *origin); /*return value */
/*blob to compute on */
void blob_bounding_box(TBLOB *blob,
register TPOINT *topleft, /*bounding box */
register TPOINT *botright);
void blobs_bounding_box(TBLOB *blobs, TPOINT *topleft, TPOINT *botright);
void blobs_origin(TBLOB *blobs, /*blob to compute on */
TPOINT *origin); /*return value */
/*blob to compute on */
WIDTH_RECORD *blobs_widths(TBLOB *blobs);
int count_blobs(TBLOB *blobs);
void delete_word(TWERD *word);
void delete_edgepts(register EDGEPT *edgepts);
/*
#if defined(__STDC__) || defined(__cplusplus)
# define _ARGS(s) s
#else
# define _ARGS(s) ()
#endif*/
/* blobs.c
void blob_origin
_ARGS((BLOB *blob,
TPOINT *origin));
void blob_bounding_box
_ARGS((BLOB *blob,
TPOINT *topleft,
TPOINT *botright));
void blobs_bounding_box
_ARGS((BLOB *blobs,
TPOINT *topleft,
TPOINT *botright));
void blobs_origin
_ARGS((BLOB *blobs,
TPOINT *origin));
WIDTH_RECORD *blobs_widths
_ARGS((BLOB *blobs));
int count_blobs
_ARGS((BLOB *blobs));
void delete_word
_ARGS((TWERD *word));
void delete_edgepts
_ARGS((EDGEPT *edgepts));
#undef _ARGS
*/
#endif

73
ccmain/blread.cpp Normal file
View File

@ -0,0 +1,73 @@
/**********************************************************************
* File: blread.cpp (Formerly pdread.c)
* Description: Friend function of BLOCK to read the uscan pd file.
* Author: Ray Smith
* Created: Mon Mar 18 14:39:00 GMT 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include <stdlib.h>
#ifdef __UNIX__
#include <assert.h>
#endif
#include "scanutils.h"
#include "fileerr.h"
#include "blread.h"
#define UNLV_EXT ".uzn" // unlv zone file
/**********************************************************************
* read_unlv_file
*
* Read a whole unlv zone file to make a list of blocks.
**********************************************************************/
bool read_unlv_file( //print list of sides
STRING name, //basename of file
inT32 xsize, //image size
inT32 ysize, //image size
BLOCK_LIST *blocks //output list
) {
FILE *pdfp; //file pointer
BLOCK *block; //current block
int x; //current top-down coords
int y;
int width; //of current block
int height;
BLOCK_IT block_it = blocks; //block iterator
name += UNLV_EXT; //add extension
if ((pdfp = fopen (name.string (), "r")) == NULL) {
return false; //didn't read one
}
else {
while (fscanf (pdfp, "%d %d %d %d %*s", &x, &y, &width, &height) >= 4) {
//make rect block
block = new BLOCK (name.string (), TRUE, 0, 0,
(inT16) x, (inT16) (ysize - y - height),
(inT16) (x + width), (inT16) (ysize - y));
//on end of list
block_it.add_to_end (block);
}
fclose(pdfp);
}
return true;
}
void FullPageBlock(int width, int height, BLOCK_LIST *blocks) {
BLOCK_IT block_it(blocks);
BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height);
block_it.add_to_end(block);
}

33
ccmain/blread.h Normal file
View File

@ -0,0 +1,33 @@
/**********************************************************************
* File: blread.h (Formerly pdread.h)
* Description: Friend function of BLOCK to read the uscan pd file.
* Author: Ray Smith
* Created: Mon Mar 18 14:39:00 GMT 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef BLREAD_H
#define BLREAD_H
#include "varable.h"
#include "ocrblock.h"
bool read_unlv_file( //print list of sides
STRING name, //basename of file
inT32 xsize, //image size
inT32 ysize, //image size
BLOCK_LIST *blocks //output list
);
void FullPageBlock(int width, int height, BLOCK_LIST *blocks);
#endif

172
ccmain/callcpp.cpp Normal file
View File

@ -0,0 +1,172 @@
/**********************************************************************
* File: callcpp.cpp
* Description: extern C interface calling C++ from C.
* Author: Ray Smith
* Created: Sun Feb 04 20:39:23 MST 1996
*
* (C) Copyright 1996, Hewlett-Packard Co.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include "errcode.h"
#ifdef __UNIX__
#include <assert.h>
#include <stdarg.h>
#endif
#include <time.h>
#include "memry.h"
#include "scrollview.h"
//#include "evnts.h"
#include "varable.h"
#include "callcpp.h"
#include "tprintf.h"
//#include "strace.h"
#include "host.h"
#include "unichar.h"
//extern "C" {
INT_VAR (tess_cp_mapping0, 0, "Mappings for class pruner distance");
INT_VAR (tess_cp_mapping1, 1, "Mappings for class pruner distance");
INT_VAR (tess_cp_mapping2, 2, "Mappings for class pruner distance");
INT_VAR (tess_cp_mapping3, 3, "Mappings for class pruner distance");
INT_VAR (record_matcher_output, 0, "Record detailed matcher info");
INT_VAR (il1_adaption_test, 0, "Dont adapt to i/I at beginning of word");
double_VAR (permuter_pending_threshold, 0.0,
"Worst conf for using pending dictionary");
//Global matcher info from the class pruner.
inT32 cp_maps[4];
//Global info to control writes of matcher info
char blob_answer[UNICHAR_LEN + 1]; //correct char
char *word_answer; //correct word
inT32 bits_in_states; //no of bits in states
void setup_cp_maps() {
cp_maps[0] = tess_cp_mapping0;
cp_maps[1] = tess_cp_mapping1;
cp_maps[2] = tess_cp_mapping2;
cp_maps[3] = tess_cp_mapping3;
}
void
cprintf ( //Trace printf
const char *format, ... //special message
) {
va_list args; //variable args
char msg[1000];
va_start(args, format); //variable list
vsprintf(msg, format, args); //Format into msg
va_end(args);
tprintf ("%s", msg);
}
#ifndef GRAPHICS_DISABLED
ScrollView *c_create_window( /*create a window */
const char *name, /*name/title of window */
inT16 xpos, /*coords of window */
inT16 ypos, /*coords of window */
inT16 xsize, /*size of window */
inT16 ysize, /*size of window */
double xmin, /*scrolling limits */
double xmax, /*to stop users */
double ymin, /*getting lost in */
double ymax /*empty space */
) {
return new ScrollView(name, xpos, ypos, xsize, ysize, xmax - xmin, ymax - ymin, true);
}
void c_line_color_index( /*set color */
void *win,
C_COL index) {
// The colors are the same as the SV ones except that SV has COLOR:NONE --> offset of 1
ScrollView* window = (ScrollView*) win;
window->Pen((ScrollView::Color) (index + 1));
}
void c_move( /*move pen */
void *win,
double x,
double y) {
ScrollView* window = (ScrollView*) win;
window->SetCursor((int) x, (int) y);
}
void c_draw( /*move pen */
void *win,
double x,
double y) {
ScrollView* window = (ScrollView*) win;
window->DrawTo((int) x, (int) y);
}
void c_make_current( /*move pen */
void *win) {
ScrollView* window = (ScrollView*) win;
window->Update();
}
void c_clear_window( /*move pen */
void *win) {
ScrollView* window = (ScrollView*) win;
window->Clear();
}
char window_wait(ScrollView* win) {
SVEvent* ev;
// Wait till an input or click event (all others are thrown away)
char ret = '\0';
SVEventType ev_type = SVET_ANY;
do {
ev = win->AwaitEvent(SVET_ANY);
ev_type = ev->type;
if (ev_type == SVET_INPUT)
ret = ev->parameter[0];
delete ev;
} while (ev_type != SVET_INPUT && ev_type != SVET_CLICK);
return ret;
}
#endif
void reverse32(void *ptr) {
char tmp;
char *cptr = (char *) ptr;
tmp = *cptr;
*cptr = *(cptr + 3);
*(cptr + 3) = tmp;
tmp = *(cptr + 1);
*(cptr + 1) = *(cptr + 2);
*(cptr + 2) = tmp;
}
void reverse16(void *ptr) {
char tmp;
char *cptr = (char *) ptr;
tmp = *cptr;
*cptr = *(cptr + 1);
*(cptr + 1) = tmp;
}
//};

923
ccmain/ccmain.vcproj Executable file
View File

@ -0,0 +1,923 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
Name="ccmain"
ProjectGUID="{EFE93017-B435-46EA-9199-1C83A869595A}"
RootNamespace="ccmain"
Keyword="Win32Proj"
TargetFrameworkVersion="196613"
>
<Platforms>
<Platform
Name="Win32"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(SolutionDir)$(ConfigurationName)"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="4"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\ccutil,..\cutil,..\viewer,..\image,..\ccstruct,..\dict,..\classify,..\wordrec,..\textord,..\include"
PreprocessorDefinitions="WIN32;_DEBUG;_LIB;__MSW32__;_CRT_SECURE_NO_WARNINGS;HAVE_LIBLEPT;LEPTONLIB_IMPORTS"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
UsePrecompiledHeader="2"
WarningLevel="3"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(SolutionDir)$(ConfigurationName)"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="4"
CharacterSet="2"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
EnableIntrinsicFunctions="true"
AdditionalIncludeDirectories="..\ccutil,..\cutil,..\viewer,..\image,..\ccstruct,..\dict,..\classify,..\wordrec,..\textord,..\include"
PreprocessorDefinitions="WIN32;NDEBUG;_LIB;__MSW32__;_CRT_SECURE_NO_WARNINGS;HAVE_LIBLEPT;LEPTONLIB_IMPORTS"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
UsePrecompiledHeader="2"
WarningLevel="3"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath=".\adaptions.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\ambigsrecog.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\applybox.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\blobcmp.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\callnet.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\charcut.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\charsample.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\control.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\docqual.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\expandblob.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\fixspace.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\fixxht.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\imgscale.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\matmatch.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\ccutil\mfcpch.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="1"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="1"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\osdetect.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\output.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\pagewalk.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\paircmp.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\pgedit.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\reject.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\scaleimg.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\tessbox.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\tessedit.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\tesseractclass.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\tessvars.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\tfacepp.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\thresholder.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\tstruct.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\varabled.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\werdit.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PrecompiledHeaderThrough="mfcpch.h"
/>
</FileConfiguration>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
<File
RelativePath=".\adaptions.h"
>
</File>
<File
RelativePath=".\applybox.h"
>
</File>
<File
RelativePath=".\blobcmp.h"
>
</File>
<File
RelativePath=".\callnet.h"
>
</File>
<File
RelativePath=".\charcut.h"
>
</File>
<File
RelativePath=".\charsample.h"
>
</File>
<File
RelativePath=".\control.h"
>
</File>
<File
RelativePath=".\docqual.h"
>
</File>
<File
RelativePath=".\expandblob.h"
>
</File>
<File
RelativePath=".\fixspace.h"
>
</File>
<File
RelativePath=".\fixxht.h"
>
</File>
<File
RelativePath=".\imgscale.h"
>
</File>
<File
RelativePath=".\matmatch.h"
>
</File>
<File
RelativePath=".\osdetect.h"
>
</File>
<File
RelativePath=".\output.h"
>
</File>
<File
RelativePath=".\pagewalk.h"
>
</File>
<File
RelativePath=".\paircmp.h"
>
</File>
<File
RelativePath=".\pgedit.h"
>
</File>
<File
RelativePath=".\reject.h"
>
</File>
<File
RelativePath=".\scaleimg.h"
>
</File>
<File
RelativePath=".\tessbox.h"
>
</File>
<File
RelativePath=".\tessedit.h"
>
</File>
<File
RelativePath=".\tessembedded.h"
>
</File>
<File
RelativePath=".\tesseractclass.h"
>
</File>
<File
RelativePath=".\tessio.h"
>
</File>
<File
RelativePath=".\tessvars.h"
>
</File>
<File
RelativePath=".\tfacep.h"
>
</File>
<File
RelativePath=".\tfacepp.h"
>
</File>
<File
RelativePath=".\thresholder.h"
>
</File>
<File
RelativePath=".\tstruct.h"
>
</File>
<File
RelativePath=".\varabled.h"
>
</File>
<File
RelativePath=".\werdit.h"
>
</File>
</Filter>
<Filter
Name="Resource Files"
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

29
ccmain/ccstruct.cpp Normal file
View File

@ -0,0 +1,29 @@
///////////////////////////////////////////////////////////////////////
// File: ccstruct.cpp
// Description: ccstruct class.
// Author: Samuel Charron
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include "ccstruct.h"
namespace tesseract {
CCStruct::CCStruct()
: image_(this) {
}
CCStruct::~CCStruct() {
}
}

52
ccmain/ccstruct.h Normal file
View File

@ -0,0 +1,52 @@
///////////////////////////////////////////////////////////////////////
// File: ccstruct.h
// Description: ccstruct class.
// Author: Samuel Charron
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCSTRUCT_CCSTRUCT_H__
#define TESSERACT_CCSTRUCT_CCSTRUCT_H__
#include "cutil.h"
#include "image.h"
class PBLOB;
class DENORM;
class WERD;
class BLOB_CHOICE_LIST;
namespace tesseract {
class CCStruct : public CUtil {
public:
CCStruct();
~CCStruct();
protected:
Image image_;
};
class Tesseract;
} // namespace tesseract
typedef void (tesseract::Tesseract::*POLY_MATCHER)
(PBLOB *, PBLOB *, PBLOB *, WERD *,
DENORM *, BLOB_CHOICE_LIST *, const char*);
/*
typedef void (tesseract::Tesseract::*POLY_TESTER)
(const STRING&, PBLOB *, DENORM *, BOOL8, char *,
inT32, BLOB_CHOICE_LIST *);
*/
#endif // TESSERACT_CCSTRUCT_CCSTRUCT_H__

1037
ccmain/ccstruct.vcproj Executable file

File diff suppressed because it is too large Load Diff

214
ccmain/charsample.h Normal file
View File

@ -0,0 +1,214 @@
/**********************************************************************
* File: charsample.h (Formerly charsample.h)
* Description: Class to contain character samples and match scores
* to be used for adaption
* Author: Chris Newton
* Created: Thu Oct 7 13:40:37 BST 1993
*
* (C) Copyright 1993, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef CHARSAMPLE_H
#define CHARSAMPLE_H
#include "elst.h"
#include "pageres.h"
#include "memry.h"
#include "notdll.h"
#define BAD_SCORE MAX_INT32
#define FIRST_CHAR '!'
#define LAST_CHAR '~'
namespace tesseract {
class Tesseract; // Fwd decl.
}
enum ClusterType
{ UNKNOWN, BLOB_CLUSTER, IMAGE_CLUSTER };
class CHAR_SAMPLE; //forward decl
ELISTIZEH (CHAR_SAMPLE)
class CHAR_SAMPLES; //forward decl
ELISTIZEH (CHAR_SAMPLES)
class CHAR_PROTO; //forward decl
class CHAR_SAMPLE:public ELIST_LINK
{
public:
CHAR_SAMPLE(); // empty constructor
CHAR_SAMPLE( // simple constructor
PBLOB *blob,
DENORM *denorm,
char c
);
CHAR_SAMPLE( // simple constructor
IMAGE *image,
char c
);
~CHAR_SAMPLE () {
// We own the image, so it has to be deleted.
if (sample_image != NULL)
delete sample_image;
}
float match_sample(CHAR_SAMPLE *test_sample, BOOL8 updating,
tesseract::Tesseract* tess);
inT32 n_matches() {
return n_samples_matched;
}
IMAGE *image() {
return sample_image;
}
PBLOB *blob() {
return sample_blob;
}
DENORM *denorm() {
return sample_denorm;
}
double mean_score();
double variance();
char character() {
return ch;
}
void print(FILE *f);
void reset_match_statistics();
NEWDELETE2 (CHAR_SAMPLE) private:
IMAGE * sample_image;
PBLOB *sample_blob;
DENORM *sample_denorm;
inT32 n_samples_matched;
double total_match_scores;
double sumsq_match_scores;
char ch;
};
class CHAR_SAMPLES:public ELIST_LINK
{
public:
CHAR_SAMPLES(); //empty constructor
CHAR_SAMPLES(CHAR_SAMPLE *sample);
~CHAR_SAMPLES () { //destructor
}
inT32 n_samples() {
return samples.length ();
}
void add_sample(CHAR_SAMPLE *sample, tesseract::Tesseract*);
void build_prototype();
void rebuild_prototype(inT32 new_xsize, inT32 new_ysize);
void add_sample_to_prototype(CHAR_SAMPLE *sample);
CHAR_PROTO *prototype() {
return proto;
}
void find_best_sample();
float match_score(CHAR_SAMPLE *sample, tesseract::Tesseract* tess);
float nn_match_score(CHAR_SAMPLE *sample, tesseract::Tesseract* tess);
char character() {
return ch;
}
void assign_to_char();
void print(FILE *f);
NEWDELETE2 (CHAR_SAMPLES) private:
ClusterType type;
char ch;
CHAR_PROTO *proto;
CHAR_SAMPLE *best_sample;
CHAR_SAMPLE_LIST samples;
};
class CHAR_PROTO
{
public:
CHAR_PROTO(); // empty constructor
CHAR_PROTO(inT32 x_size,
inT32 y_size,
inT32 n_samples,
float initial_value,
char c);
CHAR_PROTO( // simple constructor
CHAR_SAMPLE *sample);
~CHAR_PROTO ();
float match_sample(CHAR_SAMPLE *test_sample);
float match(CHAR_PROTO *test_proto);
inT32 n_samples() {
return nsamples;
}
inT32 x_size() {
return xsize;
}
inT32 y_size() {
return ysize;
}
float **data() {
return proto;
}
char character() {
return ch;
}
void enlarge_prototype(inT32 new_xsize, inT32 new_ysize);
void add_sample(CHAR_SAMPLE *sample);
IMAGE *make_image();
void print(FILE *f);
NEWDELETE2 (CHAR_PROTO) private:
inT32 xsize;
inT32 ysize;
float *proto_data;
float **proto;
inT32 nsamples;
char ch;
};
#endif

686
ccmain/coutln.cpp Normal file
View File

@ -0,0 +1,686 @@
/**********************************************************************
* File: coutln.c (Formerly coutline.c)
* Description: Code for the C_OUTLINE class.
* Author: Ray Smith
* Created: Mon Oct 07 16:01:57 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include <string.h>
#ifdef __UNIX__
#include <assert.h>
#endif
#include "coutln.h"
ELISTIZE_S (C_OUTLINE)
ICOORD C_OUTLINE::step_coords[4] = {
ICOORD (-1, 0), ICOORD (0, -1), ICOORD (1, 0), ICOORD (0, 1)
};
/**********************************************************************
* C_OUTLINE::C_OUTLINE
*
* Constructor to build a C_OUTLINE from a CRACKEDGE LOOP.
**********************************************************************/
C_OUTLINE::C_OUTLINE (
//constructor
CRACKEDGE * startpt, //outline to convert
ICOORD bot_left, //bounding box
ICOORD top_right, inT16 length //length of loop
):box (bot_left, top_right), start (startpt->pos) {
inT16 stepindex; //index to step
CRACKEDGE *edgept; //current point
stepcount = length; //no of steps
if (length == 0) {
steps = NULL;
return;
}
//get memory
steps = (uinT8 *) alloc_mem (step_mem());
memset(steps, 0, step_mem());
edgept = startpt;
for (stepindex = 0; stepindex < length; stepindex++) {
//set compact step
set_step (stepindex, edgept->stepdir);
edgept = edgept->next;
}
}
/**********************************************************************
* C_OUTLINE::C_OUTLINE
*
* Constructor to build a C_OUTLINE from a C_OUTLINE_FRAG.
**********************************************************************/
C_OUTLINE::C_OUTLINE (
//constructor
//steps to copy
ICOORD startpt, DIR128 * new_steps,
inT16 length //length of loop
):start (startpt) {
inT8 dirdiff; //direction difference
DIR128 prevdir; //previous direction
DIR128 dir; //current direction
DIR128 lastdir; //dir of last step
TBOX new_box; //easy bounding
inT16 stepindex; //index to step
inT16 srcindex; //source steps
ICOORD pos; //current position
pos = startpt;
stepcount = length; //no of steps
//get memory
steps = (uinT8 *) alloc_mem (step_mem());
memset(steps, 0, step_mem());
lastdir = new_steps[length - 1];
prevdir = lastdir;
for (stepindex = 0, srcindex = 0; srcindex < length;
stepindex++, srcindex++) {
new_box = TBOX (pos, pos);
box += new_box;
//copy steps
dir = new_steps[srcindex];
set_step(stepindex, dir);
dirdiff = dir - prevdir;
pos += step (stepindex);
if ((dirdiff == 64 || dirdiff == -64) && stepindex > 0) {
stepindex -= 2; //cancel there-and-back
prevdir = stepindex >= 0 ? step_dir (stepindex) : lastdir;
}
else
prevdir = dir;
}
ASSERT_HOST (pos.x () == startpt.x () && pos.y () == startpt.y ());
do {
dirdiff = step_dir (stepindex - 1) - step_dir (0);
if (dirdiff == 64 || dirdiff == -64) {
start += step (0);
stepindex -= 2; //cancel there-and-back
for (int i = 0; i < stepindex; ++i)
set_step(i, step_dir(i + 1));
}
}
while (stepindex > 1 && (dirdiff == 64 || dirdiff == -64));
stepcount = stepindex;
ASSERT_HOST (stepcount >= 4);
}
/**********************************************************************
* C_OUTLINE::C_OUTLINE
*
* Constructor to build a C_OUTLINE from a rotation of a C_OUTLINE.
**********************************************************************/
C_OUTLINE::C_OUTLINE( //constructor
C_OUTLINE *srcline, //outline to
FCOORD rotation //rotate
) {
TBOX new_box; //easy bounding
inT16 stepindex; //index to step
inT16 dirdiff; //direction change
ICOORD pos; //current position
ICOORD prevpos; //previous dest point
ICOORD destpos; //destination point
inT16 destindex; //index to step
DIR128 dir; //coded direction
uinT8 new_step;
stepcount = srcline->stepcount * 2;
if (stepcount == 0) {
steps = NULL;
box = srcline->box;
box.rotate(rotation);
return;
}
//get memory
steps = (uinT8 *) alloc_mem (step_mem());
memset(steps, 0, step_mem());
for (int iteration = 0; iteration < 2; ++iteration) {
DIR128 round1 = iteration == 0 ? 32 : 0;
DIR128 round2 = iteration != 0 ? 32 : 0;
pos = srcline->start;
prevpos = pos;
prevpos.rotate (rotation);
start = prevpos;
box = TBOX (start, start);
destindex = 0;
for (stepindex = 0; stepindex < srcline->stepcount; stepindex++) {
pos += srcline->step (stepindex);
destpos = pos;
destpos.rotate (rotation);
// printf("%i %i %i %i ", destpos.x(), destpos.y(), pos.x(), pos.y());
while (destpos.x () != prevpos.x () || destpos.y () != prevpos.y ()) {
dir = DIR128 (FCOORD (destpos - prevpos));
dir += 64; //turn to step style
new_step = dir.get_dir ();
// printf(" %i\n", new_step);
if (new_step & 31) {
set_step(destindex++, dir + round1);
prevpos += step(destindex - 1);
if (destindex < 2
|| ((dirdiff =
step_dir (destindex - 1) - step_dir (destindex - 2)) !=
-64 && dirdiff != 64)) {
set_step(destindex++, dir + round2);
prevpos += step(destindex - 1);
} else {
prevpos -= step(destindex - 1);
destindex--;
prevpos -= step(destindex - 1);
set_step(destindex - 1, dir + round2);
prevpos += step(destindex - 1);
}
}
else {
set_step(destindex++, dir);
prevpos += step(destindex - 1);
}
while (destindex >= 2 &&
((dirdiff =
step_dir (destindex - 1) - step_dir (destindex - 2)) == -64 ||
dirdiff == 64)) {
prevpos -= step(destindex - 1);
prevpos -= step(destindex - 2);
destindex -= 2; // Forget u turn
}
//ASSERT_HOST(prevpos.x() == destpos.x() && prevpos.y() == destpos.y());
new_box = TBOX (destpos, destpos);
box += new_box;
}
}
ASSERT_HOST (destpos.x () == start.x () && destpos.y () == start.y ());
dirdiff = step_dir (destindex - 1) - step_dir (0);
while ((dirdiff == 64 || dirdiff == -64) && destindex > 1) {
start += step (0);
destindex -= 2;
for (int i = 0; i < destindex; ++i)
set_step(i, step_dir(i + 1));
dirdiff = step_dir (destindex - 1) - step_dir (0);
}
if (destindex >= 4)
break;
}
ASSERT_HOST(destindex <= stepcount);
stepcount = destindex;
destpos = start;
for (stepindex = 0; stepindex < stepcount; stepindex++) {
destpos += step (stepindex);
}
ASSERT_HOST (destpos.x () == start.x () && destpos.y () == start.y ());
}
// Build a fake outline, given just a bounding box and append to the list.
void C_OUTLINE::FakeOutline(const TBOX& box, C_OUTLINE_LIST* outlines) {
C_OUTLINE_IT ol_it(outlines);
// Make a C_OUTLINE from the bounds. This is a bit of a hack,
// as there is no outline, just a bounding box, but it works nicely.
CRACKEDGE start;
start.pos = box.topleft();
C_OUTLINE* outline = new C_OUTLINE(&start, box.topleft(), box.botright(), 0);
ol_it.add_to_end(outline);
}
/**********************************************************************
* C_OUTLINE::area
*
* Compute the area of the outline.
**********************************************************************/
inT32 C_OUTLINE::area() { //winding number
int stepindex; //current step
inT32 total_steps; //steps to do
inT32 total; //total area
ICOORD pos; //position of point
ICOORD next_step; //step to next pix
C_OUTLINE_IT it = child ();
pos = start_pos ();
total_steps = pathlength ();
total = 0;
for (stepindex = 0; stepindex < total_steps; stepindex++) {
//all intersected
next_step = step (stepindex);
if (next_step.x () < 0)
total += pos.y ();
else if (next_step.x () > 0)
total -= pos.y ();
pos += next_step;
}
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ())
total += it.data ()->area ();//add areas of children
return total;
}
/**********************************************************************
* C_OUTLINE::perimeter
*
* Compute the perimeter of the outline and its first level children.
**********************************************************************/
inT32 C_OUTLINE::perimeter() {
inT32 total_steps; // Return value.
C_OUTLINE_IT it = child();
total_steps = pathlength();
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward())
total_steps += it.data()->pathlength(); // Add perimeters of children.
return total_steps;
}
/**********************************************************************
* C_OUTLINE::outer_area
*
* Compute the area of the outline.
**********************************************************************/
inT32 C_OUTLINE::outer_area() { //winding number
int stepindex; //current step
inT32 total_steps; //steps to do
inT32 total; //total area
ICOORD pos; //position of point
ICOORD next_step; //step to next pix
pos = start_pos ();
total_steps = pathlength ();
if (total_steps == 0)
return box.area();
total = 0;
for (stepindex = 0; stepindex < total_steps; stepindex++) {
//all intersected
next_step = step (stepindex);
if (next_step.x () < 0)
total += pos.y ();
else if (next_step.x () > 0)
total -= pos.y ();
pos += next_step;
}
return total;
}
/**********************************************************************
* C_OUTLINE::count_transitions
*
* Compute the number of x and y maxes and mins in the outline.
**********************************************************************/
inT32 C_OUTLINE::count_transitions( //winding number
inT32 threshold //on size
) {
BOOL8 first_was_max_x; //what was first
BOOL8 first_was_max_y;
BOOL8 looking_for_max_x; //what is next
BOOL8 looking_for_min_x;
BOOL8 looking_for_max_y; //what is next
BOOL8 looking_for_min_y;
int stepindex; //current step
inT32 total_steps; //steps to do
//current limits
inT32 max_x, min_x, max_y, min_y;
inT32 initial_x, initial_y; //initial limits
inT32 total; //total changes
ICOORD pos; //position of point
ICOORD next_step; //step to next pix
pos = start_pos ();
total_steps = pathlength ();
total = 0;
max_x = min_x = pos.x ();
max_y = min_y = pos.y ();
looking_for_max_x = TRUE;
looking_for_min_x = TRUE;
looking_for_max_y = TRUE;
looking_for_min_y = TRUE;
first_was_max_x = FALSE;
first_was_max_y = FALSE;
initial_x = pos.x ();
initial_y = pos.y (); //stop uninit warning
for (stepindex = 0; stepindex < total_steps; stepindex++) {
//all intersected
next_step = step (stepindex);
pos += next_step;
if (next_step.x () < 0) {
if (looking_for_max_x && pos.x () < min_x)
min_x = pos.x ();
if (looking_for_min_x && max_x - pos.x () > threshold) {
if (looking_for_max_x) {
initial_x = max_x;
first_was_max_x = FALSE;
}
total++;
looking_for_max_x = TRUE;
looking_for_min_x = FALSE;
min_x = pos.x (); //reset min
}
}
else if (next_step.x () > 0) {
if (looking_for_min_x && pos.x () > max_x)
max_x = pos.x ();
if (looking_for_max_x && pos.x () - min_x > threshold) {
if (looking_for_min_x) {
initial_x = min_x; //remember first min
first_was_max_x = TRUE;
}
total++;
looking_for_max_x = FALSE;
looking_for_min_x = TRUE;
max_x = pos.x ();
}
}
else if (next_step.y () < 0) {
if (looking_for_max_y && pos.y () < min_y)
min_y = pos.y ();
if (looking_for_min_y && max_y - pos.y () > threshold) {
if (looking_for_max_y) {
initial_y = max_y; //remember first max
first_was_max_y = FALSE;
}
total++;
looking_for_max_y = TRUE;
looking_for_min_y = FALSE;
min_y = pos.y (); //reset min
}
}
else {
if (looking_for_min_y && pos.y () > max_y)
max_y = pos.y ();
if (looking_for_max_y && pos.y () - min_y > threshold) {
if (looking_for_min_y) {
initial_y = min_y; //remember first min
first_was_max_y = TRUE;
}
total++;
looking_for_max_y = FALSE;
looking_for_min_y = TRUE;
max_y = pos.y ();
}
}
}
if (first_was_max_x && looking_for_min_x) {
if (max_x - initial_x > threshold)
total++;
else
total--;
}
else if (!first_was_max_x && looking_for_max_x) {
if (initial_x - min_x > threshold)
total++;
else
total--;
}
if (first_was_max_y && looking_for_min_y) {
if (max_y - initial_y > threshold)
total++;
else
total--;
}
else if (!first_was_max_y && looking_for_max_y) {
if (initial_y - min_y > threshold)
total++;
else
total--;
}
return total;
}
/**********************************************************************
* C_OUTLINE::operator<
*
* Return TRUE if the left operand is inside the right one.
**********************************************************************/
BOOL8
C_OUTLINE::operator< ( //winding number
const C_OUTLINE & other //other outline
) const
{
inT16 count = 0; //winding count
ICOORD pos; //position of point
inT32 stepindex; //index to cstep
if (!box.overlap (other.box))
return FALSE; //can't be contained
if (stepcount == 0)
return other.box.contains(this->box);
pos = start;
for (stepindex = 0; stepindex < stepcount
&& (count = other.winding_number (pos)) == INTERSECTING; stepindex++)
pos += step (stepindex); //try all points
if (count == INTERSECTING) {
//all intersected
pos = other.start;
for (stepindex = 0; stepindex < other.stepcount
&& (count = winding_number (pos)) == INTERSECTING; stepindex++)
//try other way round
pos += other.step (stepindex);
return count == INTERSECTING || count == 0;
}
return count != 0;
}
/**********************************************************************
* C_OUTLINE::winding_number
*
* Return the winding number of the outline around the given point.
**********************************************************************/
inT16 C_OUTLINE::winding_number( //winding number
ICOORD point //point to wind around
) const {
inT16 stepindex; //index to cstep
inT16 count; //winding count
ICOORD vec; //to current point
ICOORD stepvec; //step vector
inT32 cross; //cross product
vec = start - point; //vector to it
count = 0;
for (stepindex = 0; stepindex < stepcount; stepindex++) {
stepvec = step (stepindex); //get the step
//crossing the line
if (vec.y () <= 0 && vec.y () + stepvec.y () > 0) {
cross = vec * stepvec; //cross product
if (cross > 0)
count++; //crossing right half
else if (cross == 0)
return INTERSECTING; //going through point
}
else if (vec.y () > 0 && vec.y () + stepvec.y () <= 0) {
cross = vec * stepvec;
if (cross < 0)
count--; //crossing back
else if (cross == 0)
return INTERSECTING; //illegal
}
vec += stepvec; //sum vectors
}
return count; //winding number
}
/**********************************************************************
* C_OUTLINE::turn_direction
*
* Return the sum direction delta of the outline.
**********************************************************************/
inT16 C_OUTLINE::turn_direction() const { //winding number
DIR128 prevdir; //previous direction
DIR128 dir; //current direction
inT16 stepindex; //index to cstep
inT8 dirdiff; //direction difference
inT16 count; //winding count
if (stepcount == 0)
return 128;
count = 0;
prevdir = step_dir (stepcount - 1);
for (stepindex = 0; stepindex < stepcount; stepindex++) {
dir = step_dir (stepindex);
dirdiff = dir - prevdir;
ASSERT_HOST (dirdiff == 0 || dirdiff == 32 || dirdiff == -32);
count += dirdiff;
prevdir = dir;
}
ASSERT_HOST (count == 128 || count == -128);
return count; //winding number
}
/**********************************************************************
* C_OUTLINE::reverse
*
* Reverse the direction of an outline.
**********************************************************************/
void C_OUTLINE::reverse() { //reverse drection
DIR128 halfturn = MODULUS / 2; //amount to shift
DIR128 stepdir; //direction of step
inT16 stepindex; //index to cstep
inT16 farindex; //index to other side
inT16 halfsteps; //half of stepcount
halfsteps = (stepcount + 1) / 2;
for (stepindex = 0; stepindex < halfsteps; stepindex++) {
farindex = stepcount - stepindex - 1;
stepdir = step_dir (stepindex);
set_step (stepindex, step_dir (farindex) + halfturn);
set_step (farindex, stepdir + halfturn);
}
}
/**********************************************************************
* C_OUTLINE::move
*
* Move C_OUTLINE by vector
**********************************************************************/
void C_OUTLINE::move( // reposition OUTLINE
const ICOORD vec // by vector
) {
C_OUTLINE_IT it(&children); // iterator
box.move (vec);
start += vec;
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ())
it.data ()->move (vec); // move child outlines
}
// If this outline is smaller than the given min_size, delete this and
// remove from its list, via *it, after checking that *it points to this.
// Otherwise, if any children of this are too small, delete them.
// On entry, *it must be an iterator pointing to this. If this gets deleted
// then this is extracted from *it, so an iteration can continue.
void C_OUTLINE::RemoveSmallRecursive(int min_size, C_OUTLINE_IT* it) {
if (box.width() < min_size || box.height() < min_size) {
ASSERT_HOST(this == it->data());
delete it->extract(); // Too small so get rid of it and any children.
} else if (!children.empty()) {
// Search the children of this, deleting any that are too small.
C_OUTLINE_IT child_it(&children);
for (child_it.mark_cycle_pt(); !child_it.cycled_list();
child_it.forward()) {
C_OUTLINE* child = child_it.data();
child->RemoveSmallRecursive(min_size, &child_it);
}
}
}
/**********************************************************************
* C_OUTLINE::plot
*
* Draw the outline in the given colour.
**********************************************************************/
#ifndef GRAPHICS_DISABLED
void C_OUTLINE::plot( //draw it
ScrollView* window, //window to draw in
ScrollView::Color colour //colour to draw in
) const {
inT16 stepindex; //index to cstep
ICOORD pos; //current position
DIR128 stepdir; //direction of step
DIR128 oldstepdir; //previous stepdir
pos = start; //current position
window->Pen(colour);
if (stepcount == 0) {
window->Rectangle(box.left(), box.top(), box.right(), box.bottom());
return;
}
window->SetCursor(pos.x(), pos.y());
stepindex = 0;
stepdir = step_dir (0); //get direction
while (stepindex < stepcount) {
do {
pos += step (stepindex); //step to next
stepindex++; //count steps
oldstepdir = stepdir;
//new direction
stepdir = step_dir (stepindex);
}
while (stepindex < stepcount
&& oldstepdir.get_dir () == stepdir.get_dir ());
//merge straight lines
window->DrawTo(pos.x(), pos.y());
}
}
#endif
/**********************************************************************
* C_OUTLINE::operator=
*
* Assignment - deep copy data
**********************************************************************/
//assignment
C_OUTLINE & C_OUTLINE::operator= (
const C_OUTLINE & source //from this
) {
box = source.box;
start = source.start;
if (steps != NULL)
free_mem(steps);
stepcount = source.stepcount;
steps = (uinT8 *) alloc_mem (step_mem());
memmove (steps, source.steps, step_mem());
if (!children.empty ())
children.clear ();
children.deep_copy(&source.children, &deep_copy);
return *this;
}
ICOORD C_OUTLINE::chain_step(int chaindir) {
return step_coords[chaindir % 4];
}

197
ccmain/coutln.h Normal file
View File

@ -0,0 +1,197 @@
/**********************************************************************
* File: coutln.c (Formerly: coutline.c)
* Description: Code for the C_OUTLINE class.
* Author: Ray Smith
* Created: Mon Oct 07 16:01:57 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef COUTLN_H
#define COUTLN_H
#include "crakedge.h"
#include "mod128.h"
#include "bits16.h"
#include "rect.h"
#include "blckerr.h"
#include "scrollview.h"
#define INTERSECTING MAX_INT16//no winding number
//mask to get step
#define STEP_MASK 3
enum C_OUTLINE_FLAGS
{
COUT_INVERSE //White on black blob
};
class DLLSYM C_OUTLINE; //forward declaration
ELISTIZEH_S (C_OUTLINE)
class DLLSYM C_OUTLINE:public ELIST_LINK
{
public:
C_OUTLINE() { //empty constructor
steps = NULL;
}
C_OUTLINE( //constructor
CRACKEDGE *startpt, //from edge detector
ICOORD bot_left, //bounding box //length of loop
ICOORD top_right,
inT16 length);
C_OUTLINE(ICOORD startpt, //start of loop
DIR128 *new_steps, //steps in loop
inT16 length); //length of loop
//outline to copy
C_OUTLINE(C_OUTLINE *srcline, FCOORD rotation); //and rotate
// Build a fake outline, given just a bounding box and append to the list.
static void FakeOutline(const TBOX& box, C_OUTLINE_LIST* outlines);
~C_OUTLINE () { //destructor
if (steps != NULL)
free_mem(steps);
steps = NULL;
}
BOOL8 flag( //test flag
C_OUTLINE_FLAGS mask) const { //flag to test
return flags.bit (mask);
}
void set_flag( //set flag value
C_OUTLINE_FLAGS mask, //flag to test
BOOL8 value) { //value to set
flags.set_bit (mask, value);
}
C_OUTLINE_LIST *child() { //get child list
return &children;
}
//access function
const TBOX &bounding_box() const {
return box;
}
void set_step( //set a step
inT16 stepindex, //index of step
inT8 stepdir) { //chain code
int shift = stepindex%4 * 2;
uinT8 mask = 3 << shift;
steps[stepindex/4] = ((stepdir << shift) & mask) |
(steps[stepindex/4] & ~mask);
//squeeze 4 into byte
}
void set_step( //set a step
inT16 stepindex, //index of step
DIR128 stepdir) { //direction
//clean it
inT8 chaindir = stepdir.get_dir() >> (DIRBITS - 2);
//difference
set_step(stepindex, chaindir);
//squeeze 4 into byte
}
//get start position
const ICOORD &start_pos() const {
return start;
}
inT32 pathlength() const { //get path length
return stepcount;
}
// Return step at a given index as a DIR128.
DIR128 step_dir(inT16 index) const {
return DIR128((inT16)(((steps[index/4] >> (index%4 * 2)) & STEP_MASK) <<
(DIRBITS - 2)));
}
// Return the step vector for the given outline position.
ICOORD step(inT16 index) const { //index of step
return step_coords[(steps[index/4] >> (index%4 * 2)) & STEP_MASK];
}
inT32 area(); //return area
inT32 perimeter(); // Total perimeter of self and 1st level children.
inT32 outer_area(); //return area
inT32 count_transitions( //count maxima
inT32 threshold); //size threshold
BOOL8 operator< ( //containment test
const C_OUTLINE & other) const;
BOOL8 operator> ( //containment test
C_OUTLINE & other) const
{
return other < *this; //use the < to do it
}
inT16 winding_number( //get winding number
ICOORD testpt) const; //around this point
//get direction
inT16 turn_direction() const;
void reverse(); //reverse direction
void move( // reposition outline
const ICOORD vec); // by vector
// If this outline is smaller than the given min_size, delete this and
// remove from its list, via *it, after checking that *it points to this.
// Otherwise, if any children of this are too small, delete them.
// On entry, *it must be an iterator pointing to this. If this gets deleted
// then this is extracted from *it, so an iteration can continue.
void RemoveSmallRecursive(int min_size, C_OUTLINE_IT* it);
void plot( //draw one
ScrollView* window, //window to draw in
ScrollView::Color colour) const; //colour to draw it
void prep_serialise() { //set ptrs to counts
children.prep_serialise ();
}
void dump( //write external bits
FILE *f) {
//stepcount = # bytes
serialise_bytes (f, (void *) steps, step_mem());
children.dump (f);
}
void de_dump( //read external bits
FILE *f) {
steps = (uinT8 *) de_serialise_bytes (f, step_mem());
children.de_dump (f);
}
//assignment
make_serialise (C_OUTLINE)
C_OUTLINE& operator=(const C_OUTLINE& source);
static C_OUTLINE* deep_copy(const C_OUTLINE* src) {
C_OUTLINE* outline = new C_OUTLINE;
*outline = *src;
return outline;
}
static ICOORD chain_step(int chaindir);
private:
int step_mem() const { return (stepcount+3) / 4; }
TBOX box; //boudning box
ICOORD start; //start coord
uinT8 *steps; //step array
inT16 stepcount; //no of steps
BITS16 flags; //flags about outline
C_OUTLINE_LIST children; //child elements
static ICOORD step_coords[4];
};
#endif

39
ccmain/crakedge.h Normal file
View File

@ -0,0 +1,39 @@
/**********************************************************************
* File: crakedge.h (Formerly: crkedge.h)
* Description: Sturctures for the Crack following edge detector.
* Author: Ray Smith
* Created: Fri Mar 22 16:06:38 GMT 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef CRAKEDGE_H
#define CRAKEDGE_H
#include "points.h"
#include "mod128.h"
class CRACKEDGE
{
public:
ICOORD pos; /*position of crack */
inT8 stepx; //edge step
inT8 stepy;
inT8 stepdir; //chaincode
CRACKEDGE *prev; /*previous point */
CRACKEDGE *next; /*next point */
NEWDELETE2 (CRACKEDGE) CRACKEDGE () {
} //empty constructor
};
#endif

144
ccmain/detlinefit.cpp Normal file
View File

@ -0,0 +1,144 @@
///////////////////////////////////////////////////////////////////////
// File: detlinefit.cpp
// Description: Deterministic least median squares line fitting.
// Author: Ray Smith
// Created: Thu Feb 28 14:45:01 PDT 2008
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include "detlinefit.h"
#include "statistc.h"
#include "ndminx.h"
namespace tesseract {
// The number of points to consider at each end.
const int kNumEndPoints = 3;
DetLineFit::DetLineFit() {
}
DetLineFit::~DetLineFit() {
}
// Delete all Added points.
void DetLineFit::Clear() {
pt_list_.clear();
}
// Add a new point. Takes a copy - the pt doesn't need to stay in scope.
void DetLineFit::Add(const ICOORD& pt) {
ICOORDELT_IT it = &pt_list_;
ICOORDELT* new_pt = new ICOORDELT(pt);
it.add_to_end(new_pt);
}
// Fit a line to the points, returning the fitted line as a pair of
// points, and the upper quartile error.
double DetLineFit::Fit(ICOORD* pt1, ICOORD* pt2) {
ICOORDELT_IT it(&pt_list_);
// Do something sensible with no points.
if (pt_list_.empty()) {
pt1->set_x(0);
pt1->set_y(0);
*pt2 = *pt1;
return 0.0;
}
// Count the points and find the first and last kNumEndPoints.
ICOORD* starts[kNumEndPoints];
ICOORD* ends[kNumEndPoints];
int pt_count = 0;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
if (pt_count < kNumEndPoints) {
starts[pt_count] = it.data();
ends[pt_count] = starts[pt_count];
} else {
for (int i = 1; i < kNumEndPoints; ++i)
ends[i - 1] = ends[i];
ends[kNumEndPoints - 1] = it.data();
}
++pt_count;
}
// 1 or 2 points need special treatment.
if (pt_count <= 2) {
*pt1 = *starts[0];
if (pt_count > 1)
*pt2 = *starts[1];
else
*pt2 = *pt1;
return 0.0;
}
int end_count = MIN(pt_count, kNumEndPoints);
int* distances = new int[pt_count];
double best_uq = -1.0;
// Iterate each pair of points and find the best fitting line.
for (int i = 0; i < end_count; ++i) {
ICOORD* start = starts[i];
for (int j = 0; j < end_count; ++j) {
ICOORD* end = ends[j];
if (start != end) {
// Compute the upper quartile error from the line.
double dist = ComputeErrors(*start, *end, distances);
if (dist < best_uq || best_uq < 0.0) {
best_uq = dist;
*pt1 = *start;
*pt2 = *end;
}
}
}
}
delete [] distances;
// Finally compute the square root to return the true distance.
return best_uq > 0.0 ? sqrt(best_uq) : best_uq;
}
// Comparator function used by the nth_item funtion.
static int CompareInts(const void *p1, const void *p2) {
const int* i1 = reinterpret_cast<const int*>(p1);
const int* i2 = reinterpret_cast<const int*>(p2);
return *i1 - *i2;
}
// Compute all the cross product distances of the points from the line
// and return the true squared upper quartile distance.
double DetLineFit::ComputeErrors(const ICOORD start, const ICOORD end,
int* distances) {
ICOORDELT_IT it(&pt_list_);
ICOORD line_vector = end;
line_vector -= start;
// Compute the distance of each point from the line.
int pt_index = 0;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
ICOORD pt_vector = *it.data();
pt_vector -= start;
// Compute |line_vector||pt_vector|sin(angle between)
int dist = line_vector * pt_vector;
if (dist < 0)
dist = -dist;
distances[pt_index++] = dist;
}
// Now get the upper quartile distance.
int index = choose_nth_item(3 * pt_index / 4, distances, pt_index,
sizeof(distances[0]), CompareInts);
double dist = distances[index];
// The true distance is the square root of the dist squared / the
// squared length of line_vector (which is the dot product with itself)
// Don't bother with the square root. Just return the square distance.
return dist * dist / (line_vector % line_vector);
}
} // namespace tesseract.

80
ccmain/detlinefit.h Normal file
View File

@ -0,0 +1,80 @@
///////////////////////////////////////////////////////////////////////
// File: detlinefit.h
// Description: Deterministic least upper-quartile squares line fitting.
// Author: Ray Smith
// Created: Thu Feb 28 14:35:01 PDT 2008
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCSTRUCT_DETLINEFIT_H_
#define TESSERACT_CCSTRUCT_DETLINEFIT_H_
#include "points.h"
namespace tesseract {
// This class fits a line to a set of ICOORD points.
// There is no restriction on the direction of the line, as it
// uses a vector method, ie no concern over infinite gradients.
// The fitted line has the least upper quartile of squares of perpendicular
// distances of all source points from the line, subject to the constraint
// that the line is made from one of the pairs of [{p1,p2,p3},{pn-2, pn-1, pn}]
// i.e. the 9 combinations of one of the first 3 and last 3 points.
// A fundamental assumption of this algorithm is that one of the first 3 and
// one of the last 3 points are near the best line fit.
// The points must be Added in line order for the algorithm to work properly.
// No floating point calculations are needed* to make an accurate fit,
// and no random numbers are needed** so the algorithm is deterministic,
// architecture-stable, and compiler-stable as well as stable to minor
// changes in the input.
// *A single floating point division is used to compute each line's distance.
// This is unlikely to result in choice of a different line, but if it does,
// it would be easy to replace with a 64 bit integer calculation.
// **Random numbers are used in the nth_item function, but the worst
// non-determinism that can result is picking a different result among equals,
// and that wouldn't make any difference to the end-result distance, so the
// randomness does not affect the determinism of the algorithm. The random
// numbers are only there to guarantee average linear time.
// Fitting time is linear, but with a high constant, as it tries 9 different
// lines and computes the distance of all points each time.
// This class is aimed at replacing the LLSQ (linear least squares) and
// LMS (least median of squares) classes that are currently used for most
// of the line fitting in Tesseract.
class DetLineFit {
public:
DetLineFit();
~DetLineFit();
// Delete all Added points.
void Clear();
// Add a new point. Takes a copy - the pt doesn't need to stay in scope.
// Add must be called on points in sequence along the line.
void Add(const ICOORD& pt);
// Fit a line to the points, returning the fitted line as a pair of
// points, and the upper quartile error.
double Fit(ICOORD* pt1, ICOORD* pt2);
private:
double ComputeErrors(const ICOORD start, const ICOORD end, int* distances);
ICOORDELT_LIST pt_list_; // All the added points.
};
} // namespace tesseract.
#endif // TESSERACT_CCSTRUCT_DETLINEFIT_H_

133
ccmain/genblob.cpp Normal file
View File

@ -0,0 +1,133 @@
/**********************************************************************
* File: genblob.cpp (Formerly gblob.c)
* Description: Generic Blob processing routines
* Author: Phil Cheatle
* Created: Mon Nov 25 10:53:26 GMT 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include "stepblob.h"
#include "polyblob.h"
#include "genblob.h"
/**********************************************************************
* blob_comparator()
*
* Blob comparator used to sort a blob list so that blobs are in increasing
* order of left edge.
**********************************************************************/
int blob_comparator( //sort blobs
const void *blob1p, //ptr to ptr to blob1
const void *blob2p //ptr to ptr to blob2
) {
PBLOB *blob1 = *(PBLOB **) blob1p;
PBLOB *blob2 = *(PBLOB **) blob2p;
return blob1->bounding_box ().left () - blob2->bounding_box ().left ();
}
/**********************************************************************
* c_blob_comparator()
*
* Blob comparator used to sort a blob list so that blobs are in increasing
* order of left edge.
**********************************************************************/
int c_blob_comparator( //sort blobs
const void *blob1p, //ptr to ptr to blob1
const void *blob2p //ptr to ptr to blob2
) {
C_BLOB *blob1 = *(C_BLOB **) blob1p;
C_BLOB *blob2 = *(C_BLOB **) blob2p;
return blob1->bounding_box ().left () - blob2->bounding_box ().left ();
}
/**********************************************************************
* gblob_bounding_box()
*
* Return the bounding box of a generic blob.
**********************************************************************/
TBOX gblob_bounding_box( //Get bounding box
PBLOB *blob, //generic blob
BOOL8 polygonal //is blob polygonal?
) {
if (polygonal)
return blob->bounding_box ();
else
return ((C_BLOB *) blob)->bounding_box ();
}
/**********************************************************************
* gblob_sort_list()
*
* Sort a generic blob list into order of bounding box left edge
**********************************************************************/
void gblob_sort_list( //Sort a gblob list
PBLOB_LIST *blob_list, //generic blob list
BOOL8 polygonal //is list polygonal?
) {
PBLOB_IT b_it;
C_BLOB_IT c_it;
if (polygonal) {
b_it.set_to_list (blob_list);
b_it.sort (blob_comparator);
}
else {
c_it.set_to_list ((C_BLOB_LIST *) blob_list);
c_it.sort (c_blob_comparator);
}
}
/**********************************************************************
* gblob_out_list()
*
* Return the generic outline list of a generic blob.
**********************************************************************/
OUTLINE_LIST *gblob_out_list( //Get outline list
PBLOB *blob, //generic blob
BOOL8 polygonal //is blob polygonal?
) {
if (polygonal)
return blob->out_list ();
else
return (OUTLINE_LIST *) ((C_BLOB *) blob)->out_list ();
}
/**********************************************************************
* goutline_bounding_box()
*
* Return the bounding box of a generic outline.
**********************************************************************/
TBOX goutline_bounding_box( //Get bounding box
OUTLINE *outline, //generic outline
BOOL8 polygonal //is outline polygonal?
) {
if (polygonal)
return outline->bounding_box ();
else
return ((C_OUTLINE *) outline)->bounding_box ();
}

52
ccmain/genblob.h Normal file
View File

@ -0,0 +1,52 @@
/**********************************************************************
* File: genblob.h (Formerly gblob.h)
* Description: Generic Blob processing routines
* Author: Phil Cheatle
* Created: Mon Nov 25 10:53:26 GMT 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef GENBLOB_H
#define GENBLOB_H
#include "polyblob.h"
#include "hosthplb.h"
#include "rect.h"
#include "notdll.h"
int blob_comparator( //sort blobs
const void *blob1p, //ptr to ptr to blob1
const void *blob2p //ptr to ptr to blob2
);
int c_blob_comparator( //sort blobs
const void *blob1p, //ptr to ptr to blob1
const void *blob2p //ptr to ptr to blob2
);
TBOX gblob_bounding_box( //Get bounding box
PBLOB *blob, //generic blob
BOOL8 polygonal //is blob polygonal?
);
void gblob_sort_list( //Sort a gblob list
PBLOB_LIST *blob_list, //generic blob list
BOOL8 polygonal //is list polygonal?
);
OUTLINE_LIST *gblob_out_list( //Get outline list
PBLOB *blob, //generic blob
BOOL8 polygonal //is blob polygonal?
);
TBOX goutline_bounding_box( //Get bounding box
OUTLINE *outline, //generic outline
BOOL8 polygonal //is outline polygonal?
);
#endif

39
ccmain/hpddef.h Normal file
View File

@ -0,0 +1,39 @@
/**********************************************************************
* File: hpddef.h
* Description: Defines for dll symbols for handpd.dll.
* Author: Ray Smith
* Created: Tue Apr 30 17:15:01 MDT 1996
*
* (C) Copyright 1996, Hewlett-Packard Co.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
//This file does NOT use the usual single inclusion code as it
//is necessary to allow it to be executed every time it is included.
//#ifndef HPDDEF_H
//#define HPDDEF_H
#undef DLLSYM
#ifndef __IPEDLL
# define DLLSYM
#else
# ifdef __BUILDING_HANDPD__
# define DLLSYM DLLEXPORT
# else
# define DLLSYM DLLIMPORT
# endif
#endif
#if defined(__CFM68K__) && !defined(__USING_STATIC_LIBS__)
# pragma import on
#endif
//#endif

8
ccmain/hpdsizes.h Normal file
View File

@ -0,0 +1,8 @@
#ifndef HPDSIZES_H
#define HPDSIZES_H
#define NUM_TEXT_ATTR 10
#define NUM_BLOCK_ATTR 7
#define MAXLENGTH 128
#define NUM_BACKGROUNDS 8
#endif

479
ccmain/ipoints.h Normal file
View File

@ -0,0 +1,479 @@
/**********************************************************************
* File: ipoints.h (Formerly icoords.h)
* Description: Inline functions for coords.h.
* Author: Ray Smith
* Created: Fri Jun 21 15:14:21 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef IPOINTS_H
#define IPOINTS_H
#include <math.h>
/**********************************************************************
* operator!
*
* Rotate an ICOORD 90 degrees anticlockwise.
**********************************************************************/
inline ICOORD
operator! ( //rotate 90 deg anti
const ICOORD & src //thing to rotate
) {
ICOORD result; //output
result.xcoord = -src.ycoord;
result.ycoord = src.xcoord;
return result;
}
/**********************************************************************
* operator-
*
* Unary minus of an ICOORD.
**********************************************************************/
inline ICOORD
operator- ( //unary minus
const ICOORD & src //thing to minus
) {
ICOORD result; //output
result.xcoord = -src.xcoord;
result.ycoord = -src.ycoord;
return result;
}
/**********************************************************************
* operator+
*
* Add 2 ICOORDS.
**********************************************************************/
inline ICOORD
operator+ ( //sum vectors
const ICOORD & op1, //operands
const ICOORD & op2) {
ICOORD sum; //result
sum.xcoord = op1.xcoord + op2.xcoord;
sum.ycoord = op1.ycoord + op2.ycoord;
return sum;
}
/**********************************************************************
* operator+=
*
* Add 2 ICOORDS.
**********************************************************************/
inline ICOORD &
operator+= ( //sum vectors
ICOORD & op1, //operands
const ICOORD & op2) {
op1.xcoord += op2.xcoord;
op1.ycoord += op2.ycoord;
return op1;
}
/**********************************************************************
* operator-
*
* Subtract 2 ICOORDS.
**********************************************************************/
inline ICOORD
operator- ( //subtract vectors
const ICOORD & op1, //operands
const ICOORD & op2) {
ICOORD sum; //result
sum.xcoord = op1.xcoord - op2.xcoord;
sum.ycoord = op1.ycoord - op2.ycoord;
return sum;
}
/**********************************************************************
* operator-=
*
* Subtract 2 ICOORDS.
**********************************************************************/
inline ICOORD &
operator-= ( //sum vectors
ICOORD & op1, //operands
const ICOORD & op2) {
op1.xcoord -= op2.xcoord;
op1.ycoord -= op2.ycoord;
return op1;
}
/**********************************************************************
* operator%
*
* Scalar product of 2 ICOORDS.
**********************************************************************/
inline inT32
operator% ( //scalar product
const ICOORD & op1, //operands
const ICOORD & op2) {
return op1.xcoord * op2.xcoord + op1.ycoord * op2.ycoord;
}
/**********************************************************************
* operator*
*
* Cross product of 2 ICOORDS.
**********************************************************************/
inline inT32 operator *( //cross product
const ICOORD &op1, //operands
const ICOORD &op2) {
return op1.xcoord * op2.ycoord - op1.ycoord * op2.xcoord;
}
/**********************************************************************
* operator*
*
* Scalar multiply of an ICOORD.
**********************************************************************/
inline ICOORD operator *( //scalar multiply
const ICOORD &op1, //operands
inT16 scale) {
ICOORD result; //output
result.xcoord = op1.xcoord * scale;
result.ycoord = op1.ycoord * scale;
return result;
}
inline ICOORD operator *( //scalar multiply
inT16 scale,
const ICOORD &op1 //operands
) {
ICOORD result; //output
result.xcoord = op1.xcoord * scale;
result.ycoord = op1.ycoord * scale;
return result;
}
/**********************************************************************
* operator*=
*
* Scalar multiply of an ICOORD.
**********************************************************************/
inline ICOORD &
operator*= ( //scalar multiply
ICOORD & op1, //operands
inT16 scale) {
op1.xcoord *= scale;
op1.ycoord *= scale;
return op1;
}
/**********************************************************************
* operator/
*
* Scalar divide of an ICOORD.
**********************************************************************/
inline ICOORD
operator/ ( //scalar divide
const ICOORD & op1, //operands
inT16 scale) {
ICOORD result; //output
result.xcoord = op1.xcoord / scale;
result.ycoord = op1.ycoord / scale;
return result;
}
/**********************************************************************
* operator/=
*
* Scalar divide of an ICOORD.
**********************************************************************/
inline ICOORD &
operator/= ( //scalar divide
ICOORD & op1, //operands
inT16 scale) {
op1.xcoord /= scale;
op1.ycoord /= scale;
return op1;
}
/**********************************************************************
* ICOORD::rotate
*
* Rotate an ICOORD by the given (normalized) (cos,sin) vector.
**********************************************************************/
inline void ICOORD::rotate( //rotate by vector
const FCOORD& vec) {
inT16 tmp;
tmp = (inT16) floor (xcoord * vec.x () - ycoord * vec.y () + 0.5);
ycoord = (inT16) floor (ycoord * vec.x () + xcoord * vec.y () + 0.5);
xcoord = tmp;
}
/**********************************************************************
* operator!
*
* Rotate an FCOORD 90 degrees anticlockwise.
**********************************************************************/
inline FCOORD
operator! ( //rotate 90 deg anti
const FCOORD & src //thing to rotate
) {
FCOORD result; //output
result.xcoord = -src.ycoord;
result.ycoord = src.xcoord;
return result;
}
/**********************************************************************
* operator-
*
* Unary minus of an FCOORD.
**********************************************************************/
inline FCOORD
operator- ( //unary minus
const FCOORD & src //thing to minus
) {
FCOORD result; //output
result.xcoord = -src.xcoord;
result.ycoord = -src.ycoord;
return result;
}
/**********************************************************************
* operator+
*
* Add 2 FCOORDS.
**********************************************************************/
inline FCOORD
operator+ ( //sum vectors
const FCOORD & op1, //operands
const FCOORD & op2) {
FCOORD sum; //result
sum.xcoord = op1.xcoord + op2.xcoord;
sum.ycoord = op1.ycoord + op2.ycoord;
return sum;
}
/**********************************************************************
* operator+=
*
* Add 2 FCOORDS.
**********************************************************************/
inline FCOORD &
operator+= ( //sum vectors
FCOORD & op1, //operands
const FCOORD & op2) {
op1.xcoord += op2.xcoord;
op1.ycoord += op2.ycoord;
return op1;
}
/**********************************************************************
* operator-
*
* Subtract 2 FCOORDS.
**********************************************************************/
inline FCOORD
operator- ( //subtract vectors
const FCOORD & op1, //operands
const FCOORD & op2) {
FCOORD sum; //result
sum.xcoord = op1.xcoord - op2.xcoord;
sum.ycoord = op1.ycoord - op2.ycoord;
return sum;
}
/**********************************************************************
* operator-=
*
* Subtract 2 FCOORDS.
**********************************************************************/
inline FCOORD &
operator-= ( //sum vectors
FCOORD & op1, //operands
const FCOORD & op2) {
op1.xcoord -= op2.xcoord;
op1.ycoord -= op2.ycoord;
return op1;
}
/**********************************************************************
* operator%
*
* Scalar product of 2 FCOORDS.
**********************************************************************/
inline float
operator% ( //scalar product
const FCOORD & op1, //operands
const FCOORD & op2) {
return op1.xcoord * op2.xcoord + op1.ycoord * op2.ycoord;
}
/**********************************************************************
* operator*
*
* Cross product of 2 FCOORDS.
**********************************************************************/
inline float operator *( //cross product
const FCOORD &op1, //operands
const FCOORD &op2) {
return op1.xcoord * op2.ycoord - op1.ycoord * op2.xcoord;
}
/**********************************************************************
* operator*
*
* Scalar multiply of an FCOORD.
**********************************************************************/
inline FCOORD operator *( //scalar multiply
const FCOORD &op1, //operands
float scale) {
FCOORD result; //output
result.xcoord = op1.xcoord * scale;
result.ycoord = op1.ycoord * scale;
return result;
}
inline FCOORD operator *( //scalar multiply
float scale,
const FCOORD &op1 //operands
) {
FCOORD result; //output
result.xcoord = op1.xcoord * scale;
result.ycoord = op1.ycoord * scale;
return result;
}
/**********************************************************************
* operator*=
*
* Scalar multiply of an FCOORD.
**********************************************************************/
inline FCOORD &
operator*= ( //scalar multiply
FCOORD & op1, //operands
float scale) {
op1.xcoord *= scale;
op1.ycoord *= scale;
return op1;
}
/**********************************************************************
* operator/
*
* Scalar divide of an FCOORD.
**********************************************************************/
inline FCOORD
operator/ ( //scalar divide
const FCOORD & op1, //operands
float scale) {
FCOORD result; //output
if (scale != 0) {
result.xcoord = op1.xcoord / scale;
result.ycoord = op1.ycoord / scale;
}
return result;
}
/**********************************************************************
* operator/=
*
* Scalar divide of an FCOORD.
**********************************************************************/
inline FCOORD &
operator/= ( //scalar divide
FCOORD & op1, //operands
float scale) {
if (scale != 0) {
op1.xcoord /= scale;
op1.ycoord /= scale;
}
return op1;
}
/**********************************************************************
* rotate
*
* Rotate an FCOORD by the given (normalized) (cos,sin) vector.
**********************************************************************/
inline void FCOORD::rotate( //rotate by vector
const FCOORD vec) {
float tmp;
tmp = xcoord * vec.x () - ycoord * vec.y ();
ycoord = ycoord * vec.x () + xcoord * vec.y ();
xcoord = tmp;
}
#endif

188
ccmain/labls.cpp Normal file
View File

@ -0,0 +1,188 @@
/**********************************************************************
* File: labls.c (Formerly labels.c)
* Description: Attribute definition tables
* Author: Sheelagh Lloyd?
* Created:
*
* (C) Copyright 1993, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include "hpdsizes.h"
#include "labls.h"
/******************************************************************************
* TEXT REGIONS
*****************************************************************************/
DLLSYM inT32 tn[NUM_TEXT_ATTR] = {
3, //T_HORIZONTAL
4, //T_TEXT
2, //T_SERIF
2, //T_PROPORTIONAL
2, //T_NORMAL
2, //T_UPRIGHT
2, //T_SOLID
3, //T_BLACK
2, //T_NOTUNDER
2, //T_NOTDROP
};
DLLSYM char tlabel[NUM_TEXT_ATTR][4][MAXLENGTH] = { {
//T_HORIZONTAL
"Horizontal",
"Vertical",
"Skew",
""
},
{ //T_TEXT
"Text",
"Table",
"Form",
"Mixed"
},
{ //T_SERIF
"Serif",
"Sans-serif",
"",
""
},
{ //T_PROPORTIONAL
"Proportional",
"Fixed pitch",
"",
""
},
{ //T_NORMAL
"Normal",
"Bold",
"",
""
},
{ //T_UPRIGHT
"Upright",
"Italic",
"",
""
},
{ //T_SOLID
"Solid",
"Outline",
"",
""
},
{ //T_BLACK
"Black",
"White",
"Coloured",
""
},
{ //T_NOTUNDER
"Not underlined",
"Underlined",
"",
""
},
{ //T_NOTDROP
"Not drop caps",
"Drop Caps",
"",
""
}
};
DLLSYM inT32 bn[NUM_BLOCK_ATTR] = {
4, //G_MONOCHROME
2, //I_MONOCHROME
2, //I_SMOOTH
3, //R_SINGLE
3, //R_BLACK
3, //S_BLACK
2 //W_TEXT
};
DLLSYM inT32 tvar[NUM_TEXT_ATTR];
DLLSYM inT32 bvar[NUM_BLOCK_ATTR];
DLLSYM char blabel[NUM_BLOCK_ATTR][4][MAXLENGTH] = { {
//G_MONOCHROME
/****************************************************************************
* GRAPHICS
***************************************************************************/
"Monochrome ",
"Two colour ",
"Spot colour",
"Multicolour"
},
/****************************************************************************
* IMAGE
***************************************************************************/
{ //I_MONOCHROME
"Monochrome ",
"Colour ",
"",
""
},
{ //I_SMOOTH
"Smooth ",
"Grainy ",
"",
""
},
/****************************************************************************
* RULES
***************************************************************************/
{ //R_SINGLE
"Single ",
"Double ",
"Multiple",
""
},
{ //R_BLACK
"Black ",
"White ",
"Coloured",
""
},
/****************************************************************************
* SCRIBBLE
***************************************************************************/
{ //S_BLACK
"Black ",
"White ",
"Coloured",
""
},
/****************************************************************************
* WEIRD
***************************************************************************/
{ //W_TEXT
"No text ",
"Contains text",
"",
""
}
};
DLLSYM char backlabel[NUM_BACKGROUNDS][MAXLENGTH] = {
"White", //B_WHITE
"Black", //B_BLACK
"Coloured", //B_COLOURED
"Textured", //B_TEXTURED
"Patterned", //B_PATTERNED
"Gradient fill", //B_GRADIENTFILL
"Image", //B_IMAGE
"Text" //B_TEXT
};

38
ccmain/labls.h Normal file
View File

@ -0,0 +1,38 @@
/**********************************************************************
* File: labls.h (Formerly labels.h)
* Description: Attribute definition tables
* Author: Sheelagh Lloyd?
* Created:
*
* (C) Copyright 1993, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef LABLS_H
#define LABLS_H
#include "host.h"
#include "hpdsizes.h"
#include "hpddef.h" //must be last (handpd.dll)
extern DLLSYM inT32 tn[NUM_TEXT_ATTR];
extern DLLSYM char tlabel[NUM_TEXT_ATTR][4][MAXLENGTH];
extern DLLSYM inT32 bn[NUM_BLOCK_ATTR];
extern DLLSYM inT32 tvar[NUM_TEXT_ATTR];
extern DLLSYM inT32 bvar[NUM_BLOCK_ATTR];
extern DLLSYM char blabel[NUM_BLOCK_ATTR][4][MAXLENGTH];
extern DLLSYM char backlabel[NUM_BACKGROUNDS][MAXLENGTH];
#endif

249
ccmain/linlsq.cpp Normal file
View File

@ -0,0 +1,249 @@
/**********************************************************************
* File: linlsq.cpp (Formerly llsq.c)
* Description: Linear Least squares fitting code.
* Author: Ray Smith
* Created: Thu Sep 12 08:44:51 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include <stdio.h>
#include <math.h>
#include "errcode.h"
#include "linlsq.h"
#ifndef __UNIX__
#define M_PI 3.14159265359
#endif
const ERRCODE EMPTY_LLSQ = "Can't delete from an empty LLSQ";
#define EXTERN
EXTERN double_VAR (pdlsq_posdir_ratio, 4e-6, "Mult of dir to cf pos");
EXTERN double_VAR (pdlsq_threshold_angleavg, 0.1666666,
"Frac of pi for simple fit");
/**********************************************************************
* LLSQ::clear
*
* Function to initialize a LLSQ.
**********************************************************************/
void LLSQ::clear() { //initialize
n = 0; //no elements
sigx = 0; //update accumulators
sigy = 0;
sigxx = 0;
sigxy = 0;
sigyy = 0;
}
/**********************************************************************
* LLSQ::add
*
* Add an element to the accumulator.
**********************************************************************/
void LLSQ::add( //add an element
double x, //xcoord
double y //ycoord
) {
n++; //count elements
sigx += x; //update accumulators
sigy += y;
sigxx += x * x;
sigxy += x * y;
sigyy += y * y;
}
/**********************************************************************
* LLSQ::remove
*
* Delete an element from the acculuator.
**********************************************************************/
void LLSQ::remove( //delete an element
double x, //xcoord
double y //ycoord
) {
if (n <= 0)
//illegal
EMPTY_LLSQ.error ("LLSQ::remove", ABORT, NULL);
n--; //count elements
sigx -= x; //update accumulators
sigy -= y;
sigxx -= x * x;
sigxy -= x * y;
sigyy -= y * y;
}
/**********************************************************************
* LLSQ::m
*
* Return the gradient of the line fit.
**********************************************************************/
double LLSQ::m() { //get gradient
if (n > 1)
return (sigxy - sigx * sigy / n) / (sigxx - sigx * sigx / n);
else
return 0; //too little
}
/**********************************************************************
* LLSQ::c
*
* Return the constant of the line fit.
**********************************************************************/
double LLSQ::c( //get constant
double m //gradient to fit with
) {
if (n > 0)
return (sigy - m * sigx) / n;
else
return 0; //too little
}
/**********************************************************************
* LLSQ::rms
*
* Return the rms error of the fit.
**********************************************************************/
double LLSQ::rms( //get error
double m, //gradient to fit with
double c //constant to fit with
) {
double error; //total error
if (n > 0) {
error =
sigyy + m * (m * sigxx + 2 * (c * sigx - sigxy)) + c * (n * c -
2 * sigy);
if (error >= 0)
error = sqrt (error / n); //sqrt of mean
else
error = 0;
}
else
error = 0; //too little
return error;
}
/**********************************************************************
* LLSQ::spearman
*
* Return the spearman correlation coefficient.
**********************************************************************/
double LLSQ::spearman() { //get error
double error; //total error
if (n > 1) {
error = (sigxx - sigx * sigx / n) * (sigyy - sigy * sigy / n);
if (error > 0) {
error = (sigxy - sigx * sigy / n) / sqrt (error);
}
else
error = 1;
}
else
error = 1; //too little
return error;
}
/**********************************************************************
* PDLSQ::fit
*
* Return all the parameters of the fit to pos/dir.
* The return value is the rms error.
**********************************************************************/
float PDLSQ::fit( //get fit
DIR128 &ang, //output angle
float &sin_ang, //r,theta parameterisation
float &cos_ang,
float &r) {
double a, b; //itermediates
double angle; //resulting angle
double avg_angle; //simple average
double error; //total error
double sinx, cosx; //return values
if (pos.n > 0) {
a = pos.sigxy - pos.sigx * pos.sigy / pos.n
+ pdlsq_posdir_ratio * dir.sigxy;
b =
pos.sigxx - pos.sigyy + (pos.sigy * pos.sigy -
pos.sigx * pos.sigx) / pos.n +
pdlsq_posdir_ratio * (dir.sigxx - dir.sigyy);
if (dir.sigy != 0 || dir.sigx != 0)
avg_angle = atan2 (dir.sigy, dir.sigx);
else
avg_angle = 0;
if ((a != 0 || b != 0) && pos.n > 1)
angle = atan2 (2 * a, b) / 2;
else
angle = avg_angle;
error = avg_angle - angle;
if (error > M_PI / 2) {
error -= M_PI;
angle += M_PI;
}
if (error < -M_PI / 2) {
error += M_PI;
angle -= M_PI;
}
if (error > M_PI * pdlsq_threshold_angleavg
|| error < -M_PI * pdlsq_threshold_angleavg)
angle = avg_angle; //go simple
//convert direction
ang = (inT16) (angle * MODULUS / (2 * M_PI));
sinx = sin (angle);
cosx = cos (angle);
r = (sinx * pos.sigx - cosx * pos.sigy) / pos.n;
// tprintf("x=%g, y=%g, xx=%g, xy=%g, yy=%g, a=%g, b=%g, ang=%g, r=%g\n",
// pos.sigx,pos.sigy,pos.sigxx,pos.sigxy,pos.sigyy,
// a,b,angle,r);
error = dir.sigxx * sinx * sinx + dir.sigyy * cosx * cosx
- 2 * dir.sigxy * sinx * cosx;
error *= pdlsq_posdir_ratio;
error += sinx * sinx * pos.sigxx + cosx * cosx * pos.sigyy
- 2 * sinx * cosx * pos.sigxy
- 2 * r * (sinx * pos.sigx - cosx * pos.sigy) + r * r * pos.n;
if (error >= 0)
//rms value
error = sqrt (error / pos.n);
else
error = 0; //-0
sin_ang = sinx;
cos_ang = cosx;
}
else {
sin_ang = 0.0f;
cos_ang = 0.0f;
ang = 0;
error = 0; //too little
}
return error;
}

102
ccmain/linlsq.h Normal file
View File

@ -0,0 +1,102 @@
/**********************************************************************
* File: linlsq.h (Formerly llsq.h)
* Description: Linear Least squares fitting code.
* Author: Ray Smith
* Created: Thu Sep 12 08:44:51 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef LINLSQ_H
#define LINLSQ_H
#include "points.h"
#include "mod128.h"
#include "varable.h"
class LLSQ
{
friend class PDLSQ; //pos & direction
public:
LLSQ() { //constructor
clear(); //set to zeros
}
void clear(); //initialize
void add( //add element
double x, //coords to add
double y);
void remove( //delete element
double x, //coords to delete
double y);
inT32 count() { //no of elements
return n;
}
double m(); //get gradient
double c( //get constant
double m); //gradient
double rms( //get error
double m, //gradient
double c); //constant
double spearman(); //get error
private:
inT32 n; //no of elements
double sigx; //sum of x
double sigy; //sum of y
double sigxx; //sum x squared
double sigxy; //sum of xy
double sigyy; //sum y squared
};
class PDLSQ
{
public:
PDLSQ() { //constructor
clear(); //set to zeros
}
void clear() { //initialize
pos.clear (); //clear both
dir.clear ();
}
void add( //add element
const ICOORD &addpos, //position of pt
const ICOORD &adddir) { //dir of pt
pos.add (addpos.x (), addpos.y ());
dir.add (adddir.x (), adddir.y ());
}
void remove( //remove element
const ICOORD &removepos, //position of pt
const ICOORD &removedir) { //dir of pt
pos.remove (removepos.x (), removepos.y ());
dir.remove (removedir.x (), removedir.y ());
}
inT32 count() { //no of elements
return pos.count ();
}
float fit( //get fit parameters
DIR128 &ang, //output angle
float &sin_ang, //output components
float &cos_ang,
float &r);
private:
LLSQ pos; //position
LLSQ dir; //directions
};
extern double_VAR_H (pdlsq_posdir_ratio, 0.4e-6, "Mult of dir to cf pos");
#endif

453
ccmain/lmedsq.cpp Normal file
View File

@ -0,0 +1,453 @@
/**********************************************************************
* File: lmedsq.cpp (Formerly lms.c)
* Description: Code for the LMS class.
* Author: Ray Smith
* Created: Fri Aug 7 09:30:53 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include <stdlib.h>
#include "statistc.h"
#include "memry.h"
#include "statistc.h"
#include "lmedsq.h"
#define EXTERN
EXTERN INT_VAR (lms_line_trials, 12, "Number of linew fits to do");
#define SEED1 0x1234 //default seeds
#define SEED2 0x5678
#define SEED3 0x9abc
#define LMS_MAX_FAILURES 3
#ifndef __UNIX__
uinT32 nrand48( //get random number
uinT16 *seeds //seeds to use
) {
static uinT32 seed = 0; //only seed
if (seed == 0) {
seed = seeds[0] ^ (seeds[1] << 8) ^ (seeds[2] << 16);
srand(seed);
}
//make 32 bit one
return rand () | (rand () << 16);
}
#endif
/**********************************************************************
* LMS::LMS
*
* Construct a LMS class, given the max no of samples to be given
**********************************************************************/
LMS::LMS ( //constructor
inT32 size //samplesize
):samplesize (size) {
samplecount = 0;
a = 0;
m = 0.0f;
c = 0.0f;
samples = (FCOORD *) alloc_mem (size * sizeof (FCOORD));
errors = (float *) alloc_mem (size * sizeof (float));
line_error = 0.0f;
fitted = FALSE;
}
/**********************************************************************
* LMS::~LMS
*
* Destruct a LMS class.
**********************************************************************/
LMS::~LMS ( //constructor
) {
free_mem(samples);
free_mem(errors);
}
/**********************************************************************
* LMS::clear
*
* Clear samples from array.
**********************************************************************/
void LMS::clear() { //clear sample
samplecount = 0;
fitted = FALSE;
}
/**********************************************************************
* LMS::add
*
* Add another sample. More than the constructed number will be ignored.
**********************************************************************/
void LMS::add( //add sample
FCOORD sample //sample coords
) {
if (samplecount < samplesize)
//save it
samples[samplecount++] = sample;
fitted = FALSE;
}
/**********************************************************************
* LMS::fit
*
* Fit a line to the given sample points.
**********************************************************************/
void LMS::fit( //fit sample
float &out_m, //output line
float &out_c) {
inT32 index; //of median
inT32 trials; //no of medians
float test_m, test_c; //candidate line
float test_error; //error of test line
switch (samplecount) {
case 0:
m = 0.0f; //no info
c = 0.0f;
line_error = 0.0f;
break;
case 1:
m = 0.0f;
c = samples[0].y (); //horiz thru pt
line_error = 0.0f;
break;
case 2:
if (samples[0].x () != samples[1].x ()) {
m = (samples[1].y () - samples[0].y ())
/ (samples[1].x () - samples[0].x ());
c = samples[0].y () - m * samples[0].x ();
}
else {
m = 0.0f;
c = (samples[0].y () + samples[1].y ()) / 2;
}
line_error = 0.0f;
break;
default:
pick_line(m, c); //use pts at random
compute_errors(m, c); //from given line
index = choose_nth_item (samplecount / 2, errors, samplecount);
line_error = errors[index];
for (trials = 1; trials < lms_line_trials; trials++) {
//random again
pick_line(test_m, test_c);
compute_errors(test_m, test_c);
index = choose_nth_item (samplecount / 2, errors, samplecount);
test_error = errors[index];
if (test_error < line_error) {
//find least median
line_error = test_error;
m = test_m;
c = test_c;
}
}
}
fitted = TRUE;
out_m = m;
out_c = c;
a = 0;
}
/**********************************************************************
* LMS::fit_quadratic
*
* Fit a quadratic to the given sample points.
**********************************************************************/
void LMS::fit_quadratic( //fit sample
float outlier_threshold, //min outlier size
double &out_a, //x squared
float &out_b, //output line
float &out_c) {
inT32 trials; //no of medians
double test_a;
float test_b, test_c; //candidate line
float test_error; //error of test line
if (samplecount < 3) {
out_a = 0;
fit(out_b, out_c);
return;
}
pick_quadratic(a, m, c);
line_error = compute_quadratic_errors (outlier_threshold, a, m, c);
for (trials = 1; trials < lms_line_trials * 2; trials++) {
pick_quadratic(test_a, test_b, test_c);
test_error = compute_quadratic_errors (outlier_threshold,
test_a, test_b, test_c);
if (test_error < line_error) {
line_error = test_error; //find least median
a = test_a;
m = test_b;
c = test_c;
}
}
fitted = TRUE;
out_a = a;
out_b = m;
out_c = c;
}
/**********************************************************************
* LMS::constrained_fit
*
* Fit a line to the given sample points.
* The line must have the given gradient.
**********************************************************************/
void LMS::constrained_fit( //fit sample
float fixed_m, //forced gradient
float &out_c) {
inT32 index; //of median
inT32 trials; //no of medians
float test_c; //candidate line
static uinT16 seeds[3] = { SEED1, SEED2, SEED3 };
//for nrand
float test_error; //error of test line
m = fixed_m;
switch (samplecount) {
case 0:
c = 0.0f;
line_error = 0.0f;
break;
case 1:
//horiz thru pt
c = samples[0].y () - m * samples[0].x ();
line_error = 0.0f;
break;
case 2:
c = (samples[0].y () + samples[1].y ()
- m * (samples[0].x () + samples[1].x ())) / 2;
line_error = m * samples[0].x () + c - samples[0].y ();
line_error *= line_error;
break;
default:
index = (inT32) nrand48 (seeds) % samplecount;
//compute line
c = samples[index].y () - m * samples[index].x ();
compute_errors(m, c); //from given line
index = choose_nth_item (samplecount / 2, errors, samplecount);
line_error = errors[index];
for (trials = 1; trials < lms_line_trials; trials++) {
index = (inT32) nrand48 (seeds) % samplecount;
test_c = samples[index].y () - m * samples[index].x ();
//compute line
compute_errors(m, test_c);
index = choose_nth_item (samplecount / 2, errors, samplecount);
test_error = errors[index];
if (test_error < line_error) {
//find least median
line_error = test_error;
c = test_c;
}
}
}
fitted = TRUE;
out_c = c;
a = 0;
}
/**********************************************************************
* LMS::pick_line
*
* Fit a line to a random pair of sample points.
**********************************************************************/
void LMS::pick_line( //fit sample
float &line_m, //output gradient
float &line_c) {
inT16 trial_count; //no of attempts
static uinT16 seeds[3] = { SEED1, SEED2, SEED3 };
//for nrand
inT32 index1; //picked point
inT32 index2; //picked point
trial_count = 0;
do {
index1 = (inT32) nrand48 (seeds) % samplecount;
index2 = (inT32) nrand48 (seeds) % samplecount;
line_m = samples[index2].x () - samples[index1].x ();
trial_count++;
}
while (line_m == 0 && trial_count < LMS_MAX_FAILURES);
if (line_m == 0) {
line_c = (samples[index2].y () + samples[index1].y ()) / 2;
}
else {
line_m = (samples[index2].y () - samples[index1].y ()) / line_m;
line_c = samples[index1].y () - samples[index1].x () * line_m;
}
}
/**********************************************************************
* LMS::pick_quadratic
*
* Fit a quadratic to a random triplet of sample points.
**********************************************************************/
void LMS::pick_quadratic( //fit sample
double &line_a, //x suaread
float &line_m, //output gradient
float &line_c) {
inT16 trial_count; //no of attempts
static uinT16 seeds[3] = { SEED1, SEED2, SEED3 };
//for nrand
inT32 index1; //picked point
inT32 index2; //picked point
inT32 index3;
FCOORD x1x2; //vector
FCOORD x1x3;
FCOORD x3x2;
double bottom; //of a
trial_count = 0;
do {
if (trial_count >= LMS_MAX_FAILURES - 1) {
index1 = 0;
index2 = samplecount / 2;
index3 = samplecount - 1;
}
else {
index1 = (inT32) nrand48 (seeds) % samplecount;
index2 = (inT32) nrand48 (seeds) % samplecount;
index3 = (inT32) nrand48 (seeds) % samplecount;
}
x1x2 = samples[index2] - samples[index1];
x1x3 = samples[index3] - samples[index1];
x3x2 = samples[index2] - samples[index3];
bottom = x1x2.x () * x1x3.x () * x3x2.x ();
trial_count++;
}
while (bottom == 0 && trial_count < LMS_MAX_FAILURES);
if (bottom == 0) {
line_a = 0;
pick_line(line_m, line_c);
}
else {
line_a = x1x3 * x1x2 / bottom;
line_m = x1x2.y () - line_a * x1x2.x ()
* (samples[index2].x () + samples[index1].x ());
line_m /= x1x2.x ();
line_c = samples[index1].y () - samples[index1].x ()
* (samples[index1].x () * line_a + line_m);
}
}
/**********************************************************************
* LMS::compute_errors
*
* Compute the squared error from all the points.
**********************************************************************/
void LMS::compute_errors( //fit sample
float line_m, //input gradient
float line_c) {
inT32 index; //picked point
for (index = 0; index < samplecount; index++) {
errors[index] =
line_m * samples[index].x () + line_c - samples[index].y ();
errors[index] *= errors[index];
}
}
/**********************************************************************
* LMS::compute_quadratic_errors
*
* Compute the squared error from all the points.
**********************************************************************/
float LMS::compute_quadratic_errors( //fit sample
float outlier_threshold, //min outlier
double line_a,
float line_m, //input gradient
float line_c) {
inT32 outlier_count; //total outliers
inT32 index; //picked point
inT32 error_count; //no in total
double total_error; //summed squares
total_error = 0;
outlier_count = 0;
error_count = 0;
for (index = 0; index < samplecount; index++) {
errors[error_count] = line_c + samples[index].x ()
* (line_m + samples[index].x () * line_a) - samples[index].y ();
errors[error_count] *= errors[error_count];
if (errors[error_count] > outlier_threshold) {
outlier_count++;
errors[samplecount - outlier_count] = errors[error_count];
}
else {
total_error += errors[error_count++];
}
}
if (outlier_count * 3 < error_count)
return total_error / error_count;
else {
index = choose_nth_item (outlier_count / 2,
errors + samplecount - outlier_count,
outlier_count);
//median outlier
return errors[samplecount - outlier_count + index];
}
}
/**********************************************************************
* LMS::plot
*
* Plot the fitted line of a LMS.
**********************************************************************/
#ifndef GRAPHICS_DISABLED
void LMS::plot( //plot fit
ScrollView* win, //window
ScrollView::Color colour //colour to draw in
) {
if (fitted) {
win->Pen(colour);
win->SetCursor(samples[0].x (),
c + samples[0].x () * (m + samples[0].x () * a));
win->DrawTo(samples[samplecount - 1].x (),
c + samples[samplecount - 1].x () * (m +
samples[samplecount -
1].x () * a));
}
}
#endif

84
ccmain/lmedsq.h Normal file
View File

@ -0,0 +1,84 @@
/**********************************************************************
* File: lmedsq.h (Formerly lms.h)
* Description: Code for the LMS class.
* Author: Ray Smith
* Created: Fri Aug 7 09:30:53 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef LMEDSQ_H
#define LMEDSQ_H
#include "points.h"
#include "varable.h"
#include "scrollview.h"
#include "notdll.h"
class LMS
{
public:
LMS( //constructor
inT32 size); //no of samples
~LMS (); //destructor
void clear(); //clear samples
void add( //add sample
FCOORD sample); //sample coords
void fit( //generate fit
float &m, //output line
float &c);
void constrained_fit( //fixed gradient
float fixed_m, //forced gradient
float &out_c); //output line
void fit_quadratic( //easy quadratic
float outlier_threshold, //min outlier
double &a, //x squared
float &b, //x
float &c); //constant
void plot( //plot fit
ScrollView* win, //window
ScrollView::Color colour); //colour to draw in
float error() { //get error
return fitted ? line_error : -1;
}
private:
void pick_line( //random choice
float &m, //output line
float &c);
void pick_quadratic( //random choice
double &a, //output curve
float &b,
float &c);
void compute_errors( //find errors
float m, //from line
float c);
//find errors
float compute_quadratic_errors(float outlier_threshold, //min outlier
double a, //from curve
float m,
float c);
BOOL8 fitted; //line parts valid
inT32 samplesize; //max samples
inT32 samplecount; //current sample size
FCOORD *samples; //array of samples
float *errors; //error distances
double a; //x squared
float m; //line gradient
float c;
float line_error; //error of fit
};
extern INT_VAR_H (lms_line_trials, 12, "Number of linew fits to do");
#endif

100
ccmain/mod128.cpp Normal file
View File

@ -0,0 +1,100 @@
/**********************************************************************
* File: mod128.c (Formerly dir128.c)
* Description: Code to convert a DIR128 to an ICOORD.
* Author: Ray Smith
* Created: Tue Oct 22 11:56:09 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h" //precompiled headers
#include "mod128.h"
static inT16 idirtab[] = {
1000, 0, 998, 49, 995, 98, 989, 146,
980, 195, 970, 242, 956, 290, 941, 336,
923, 382, 903, 427, 881, 471, 857, 514,
831, 555, 803, 595, 773, 634, 740, 671,
707, 707, 671, 740, 634, 773, 595, 803,
555, 831, 514, 857, 471, 881, 427, 903,
382, 923, 336, 941, 290, 956, 242, 970,
195, 980, 146, 989, 98, 995, 49, 998,
0, 1000, -49, 998, -98, 995, -146, 989,
-195, 980, -242, 970, -290, 956, -336, 941,
-382, 923, -427, 903, -471, 881, -514, 857,
-555, 831, -595, 803, -634, 773, -671, 740,
-707, 707, -740, 671, -773, 634, -803, 595,
-831, 555, -857, 514, -881, 471, -903, 427,
-923, 382, -941, 336, -956, 290, -970, 242,
-980, 195, -989, 146, -995, 98, -998, 49,
-1000, 0, -998, -49, -995, -98, -989, -146,
-980, -195, -970, -242, -956, -290, -941, -336,
-923, -382, -903, -427, -881, -471, -857, -514,
-831, -555, -803, -595, -773, -634, -740, -671,
-707, -707, -671, -740, -634, -773, -595, -803,
-555, -831, -514, -857, -471, -881, -427, -903,
-382, -923, -336, -941, -290, -956, -242, -970,
-195, -980, -146, -989, -98, -995, -49, -998,
0, -1000, 49, -998, 98, -995, 146, -989,
195, -980, 242, -970, 290, -956, 336, -941,
382, -923, 427, -903, 471, -881, 514, -857,
555, -831, 595, -803, 634, -773, 671, -740,
707, -707, 740, -671, 773, -634, 803, -595,
831, -555, 857, -514, 881, -471, 903, -427,
923, -382, 941, -336, 956, -290, 970, -242,
980, -195, 989, -146, 995, -98, 998, -49
};
static ICOORD *dirtab = (ICOORD *) idirtab;
/**********************************************************************
* DIR128::DIR128
*
* Quantize the direction of an FCOORD to make a DIR128.
**********************************************************************/
DIR128::DIR128( //from fcoord
const FCOORD fc //vector to quantize
) {
int high, low, current; //binary search
low = 0;
if (fc.y () == 0) {
if (fc.x () >= 0)
dir = 0;
else
dir = MODULUS / 2;
return;
}
high = MODULUS;
do {
current = (high + low) / 2;
if (dirtab[current] * fc >= 0)
low = current;
else
high = current;
}
while (high - low > 1);
dir = low;
}
/**********************************************************************
* dir_to_gradient
*
* Convert a direction to a vector.
**********************************************************************/
ICOORD DIR128::vector() const { //convert to vector
return dirtab[dir]; //easy really
}

85
ccmain/mod128.h Normal file
View File

@ -0,0 +1,85 @@
/**********************************************************************
* File: mod128.h (Formerly dir128.h)
* Description: Header for class which implements modulo arithmetic.
* Author: Ray Smith
* Created: Tue Mar 26 17:48:13 GMT 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef MOD128_H
#define MOD128_H
#include "points.h"
#define MODULUS 128 /*range of directions */
#define DIRBITS 7 //no of bits used
#define DIRSCALE 1000 //length of vector
class DLLSYM DIR128
{
public:
DIR128() {
} //empty constructor
DIR128( //constructor
inT16 value) { //value to assign
value %= MODULUS; //modulo arithmetic
if (value < 0)
value += MODULUS; //done properly
dir = (inT8) value;
}
DIR128(const FCOORD fc); //quantize vector
DIR128 & operator= ( //assign of inT16
inT16 value) { //value to assign
value %= MODULUS; //modulo arithmetic
if (value < 0)
value += MODULUS; //done properly
dir = (inT8) value;
return *this;
}
inT8 operator- ( //subtraction
const DIR128 & minus) const//for signed result
{
//result
inT16 result = dir - minus.dir;
if (result > MODULUS / 2)
result -= MODULUS; //get in range
else if (result < -MODULUS / 2)
result += MODULUS;
return (inT8) result;
}
DIR128 operator+ ( //addition
const DIR128 & add) const //of itself
{
DIR128 result; //sum
result = dir + add.dir; //let = do the work
return result;
}
DIR128 & operator+= ( //same as +
const DIR128 & add) {
*this = dir + add.dir; //let = do the work
return *this;
}
inT8 get_dir() const { //access function
return dir;
}
ICOORD vector() const; //turn to vector
private:
inT8 dir; //a direction
};
#endif

178
ccmain/normalis.cpp Normal file
View File

@ -0,0 +1,178 @@
/**********************************************************************
* File: normalis.cpp (Formerly denorm.c)
* Description: Code for the DENORM class.
* Author: Ray Smith
* Created: Thu Apr 23 09:22:43 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include "werd.h"
#include "normalis.h"
/**********************************************************************
* DENORM::binary_search_segment
*
* Find the segment to use for the given x.
**********************************************************************/
const DENORM_SEG *DENORM::binary_search_segment(float src_x) const {
int bottom, top, middle; //binary search
bottom = 0;
top = segments;
do {
middle = (bottom + top) / 2;
if (segs[middle].xstart > src_x)
top = middle;
else
bottom = middle;
}
while (top - bottom > 1);
return &segs[bottom];
}
/**********************************************************************
* DENORM::scale_at_x
*
* Return scaling at a given (normalized) x coord.
**********************************************************************/
float DENORM::scale_at_x(float src_x) const { // In normalized coords.
if (segments != 0) {
const DENORM_SEG* seg = binary_search_segment(src_x);
if (seg->scale_factor > 0.0)
return seg->scale_factor;
}
return scale_factor;
}
/**********************************************************************
* DENORM::yshift_at_x
*
* Return yshift at a given (normalized) x coord.
**********************************************************************/
float DENORM::yshift_at_x(float src_x) const { // In normalized coords.
if (segments != 0) {
const DENORM_SEG* seg = binary_search_segment(src_x);
if (seg->ycoord == -MAX_INT32) {
if (base_is_row)
return source_row->base_line(x(src_x)/scale_at_x(src_x) + x_centre);
else
return m * x(src_x) + c;
} else {
return seg->ycoord;
}
}
return source_row->base_line (x(src_x)/scale_at_x(src_x) + x_centre);
}
/**********************************************************************
* DENORM::x
*
* Denormalise an x coordinate.
**********************************************************************/
float DENORM::x( //convert x coord
float src_x //coord to convert
) const {
return src_x / scale_at_x(src_x) + x_centre;
}
/**********************************************************************
* DENORM::y
*
* Denormalise a y coordinate.
**********************************************************************/
float DENORM::y( //convert y coord
float src_y, //coord to convert
float src_centre //x location for base
) const {
return (src_y - bln_baseline_offset) / scale_at_x(src_centre)
+ yshift_at_x(src_centre);
}
DENORM::DENORM(float x, //from same pieces
float scaling,
double line_m, //default line
double line_c,
inT16 seg_count, //no of segments
DENORM_SEG *seg_pts, //actual segments
BOOL8 using_row, //as baseline
ROW *src) {
x_centre = x; //just copy
scale_factor = scaling;
source_row = src;
if (seg_count > 0) {
segs = new DENORM_SEG[seg_count];
for (segments = 0; segments < seg_count; segments++) {
// It is possible, if infrequent that the segments may be out of order.
// since we are searching with a binary search, keep them in order.
if (segments == 0 || segs[segments - 1].xstart <=
seg_pts[segments].xstart) {
segs[segments] = seg_pts[segments];
} else {
int i;
for (i = 0; i < segments
&& segs[segments - 1 - i].xstart > seg_pts[segments].xstart;
++i) {
segs[segments - i ] = segs[segments - 1 - i];
}
segs[segments - i] = seg_pts[segments];
}
}
}
else {
segments = 0;
segs = NULL;
}
base_is_row = using_row;
m = line_m;
c = line_c;
block_ = NULL;
}
DENORM::DENORM(const DENORM &src) {
segments = 0;
segs = NULL;
*this = src;
}
DENORM & DENORM::operator= (const DENORM & src) {
x_centre = src.x_centre;
scale_factor = src.scale_factor;
source_row = src.source_row;
if (segments > 0)
delete[]segs;
if (src.segments > 0) {
segs = new DENORM_SEG[src.segments];
for (segments = 0; segments < src.segments; segments++)
segs[segments] = src.segs[segments];
}
else {
segments = 0;
segs = NULL;
}
base_is_row = src.base_is_row;
m = src.m;
c = src.c;
block_ = src.block_;
return *this;
}

118
ccmain/normalis.h Normal file
View File

@ -0,0 +1,118 @@
/**********************************************************************
* File: normalis.h (Formerly denorm.h)
* Description: Code for the DENORM class.
* Author: Ray Smith
* Created: Thu Apr 23 09:22:43 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef NORMALIS_H
#define NORMALIS_H
#include <stdio.h>
class ROW; //forward decl
class BLOCK;
class DENORM_SEG
{
public:
DENORM_SEG() {
} //empty
inT32 xstart; //start of segment
inT32 ycoord; //y at segment
float scale_factor; //for this segment
};
class DENORM
{
public:
DENORM() { //constructor
source_row = NULL;
x_centre = 0.0f;
scale_factor = 1.0f;
segments = 0;
segs = NULL;
base_is_row = TRUE;
m = c = 0;
block_ = NULL;
}
DENORM( //constructor
float x, //from same pieces
float scaling,
ROW *src) {
x_centre = x; //just copy
scale_factor = scaling;
source_row = src;
segments = 0;
segs = NULL;
base_is_row = TRUE;
m = c = 0;
block_ = NULL;
}
DENORM( //constructor
float x, //from same pieces
float scaling,
double line_m, //default line //no of segments
double line_c,
inT16 seg_count,
DENORM_SEG *seg_pts, //actual segments
BOOL8 using_row, //as baseline
ROW *src);
DENORM(const DENORM &);
DENORM & operator= (const DENORM &);
~DENORM () {
if (segments > 0)
delete[]segs;
}
float origin() const { //get x centre
return x_centre;
}
float scale() const { //get scale
return scale_factor;
}
ROW *row() const { //get row
return source_row;
}
const BLOCK* block() const {
return block_;
}
void set_block(const BLOCK* block) {
block_ = block;
}
float x( //convert an xcoord
float src_x) const;
float y( //convert a ycoord
float src_y, //coord to convert
float src_centre) const; //normed x centre
float scale_at_x( // Return scaling at this coord.
float src_x) const;
float yshift_at_x( // Return yshift at this coord.
float src_x) const;
private:
const DENORM_SEG *binary_search_segment(float src_x) const;
BOOL8 base_is_row; //using row baseline?
inT16 segments; //no of segments
double c, m; //baseline
float x_centre; //middle of word
float scale_factor; //scaling
ROW *source_row; //row it came from
DENORM_SEG *segs; //array of segments
const BLOCK* block_; // Block the word came from.
};
#endif

219
ccmain/ocrblock.cpp Normal file
View File

@ -0,0 +1,219 @@
/**********************************************************************
* File: ocrblock.cpp (Formerly block.c)
* Description: BLOCK member functions and iterator functions.
* Author: Ray Smith
* Created: Fri Mar 15 09:41:28 GMT 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include <stdlib.h>
#include "blckerr.h"
#include "ocrblock.h"
#include "tprintf.h"
#define BLOCK_LABEL_HEIGHT 150 //char height of block id
ELISTIZE_S (BLOCK)
/**********************************************************************
* BLOCK::BLOCK
*
* Constructor for a simple rectangular block.
**********************************************************************/
BLOCK::BLOCK(const char *name, //filename
BOOL8 prop, //proportional
inT16 kern, //kerning
inT16 space, //spacing
inT16 xmin, //bottom left
inT16 ymin, inT16 xmax, //top right
inT16 ymax)
: PDBLK (xmin, ymin, xmax, ymax),
filename(name),
re_rotation_(1.0f, 0.0f),
classify_rotation_(1.0f, 0.0f),
skew_(1.0f, 0.0f) {
ICOORDELT_IT left_it = &leftside;
ICOORDELT_IT right_it = &rightside;
proportional = prop;
kerning = kern;
spacing = space;
font_class = -1; //not assigned
cell_over_xheight_ = 2.0f;
hand_poly = NULL;
left_it.set_to_list (&leftside);
right_it.set_to_list (&rightside);
//make default box
left_it.add_to_end (new ICOORDELT (xmin, ymin));
left_it.add_to_end (new ICOORDELT (xmin, ymax));
right_it.add_to_end (new ICOORDELT (xmax, ymin));
right_it.add_to_end (new ICOORDELT (xmax, ymax));
}
/**********************************************************************
* decreasing_top_order
*
* Sort Comparator: Return <0 if row1 top < row2 top
**********************************************************************/
int decreasing_top_order( //
const void *row1,
const void *row2) {
return (*(ROW **) row2)->bounding_box ().top () -
(*(ROW **) row1)->bounding_box ().top ();
}
/**********************************************************************
* BLOCK::rotate
*
* Rotate the polygon by the given rotation and recompute the bounding_box.
**********************************************************************/
void BLOCK::rotate(const FCOORD& rotation) {
poly_block()->rotate(rotation);
box = *poly_block()->bounding_box();
}
/**********************************************************************
* BLOCK::sort_rows
*
* Order rows so that they are in order of decreasing Y coordinate
**********************************************************************/
void BLOCK::sort_rows() { // order on "top"
ROW_IT row_it(&rows);
row_it.sort (decreasing_top_order);
}
/**********************************************************************
* BLOCK::compress
*
* Delete space between the rows. (And maybe one day, compress the rows)
* Fill space of block from top down, left aligning rows.
**********************************************************************/
void BLOCK::compress() { // squash it up
#define ROW_SPACING 5
ROW_IT row_it(&rows);
ROW *row;
ICOORD row_spacing (0, ROW_SPACING);
ICOORDELT_IT icoordelt_it;
sort_rows();
box = TBOX (box.topleft (), box.topleft ());
box.move_bottom_edge (ROW_SPACING);
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
row = row_it.data ();
row->move (box.botleft () - row_spacing -
row->bounding_box ().topleft ());
box += row->bounding_box ();
}
leftside.clear ();
icoordelt_it.set_to_list (&leftside);
icoordelt_it.add_to_end (new ICOORDELT (box.left (), box.bottom ()));
icoordelt_it.add_to_end (new ICOORDELT (box.left (), box.top ()));
rightside.clear ();
icoordelt_it.set_to_list (&rightside);
icoordelt_it.add_to_end (new ICOORDELT (box.right (), box.bottom ()));
icoordelt_it.add_to_end (new ICOORDELT (box.right (), box.top ()));
}
/**********************************************************************
* BLOCK::check_pitch
*
* Check whether the block is fixed or prop, set the flag, and set
* the pitch if it is fixed.
**********************************************************************/
void BLOCK::check_pitch() { // check prop
// tprintf("Missing FFT fixed pitch stuff!\n");
pitch = -1;
}
/**********************************************************************
* BLOCK::compress
*
* Compress and move in a single operation.
**********************************************************************/
void BLOCK::compress( // squash it up
const ICOORD vec // and move
) {
box.move (vec);
compress();
}
/**********************************************************************
* BLOCK::print
*
* Print the info on a block
**********************************************************************/
void BLOCK::print( //print list of sides
FILE *, //file to print on
BOOL8 dump //print full detail
) {
ICOORDELT_IT it = &leftside; //iterator
box.print ();
tprintf ("Proportional= %s\n", proportional ? "TRUE" : "FALSE");
tprintf ("Kerning= %d\n", kerning);
tprintf ("Spacing= %d\n", spacing);
tprintf ("Fixed_pitch=%d\n", pitch);
tprintf ("Filename= %s\n", filename.string ());
if (dump) {
tprintf ("Left side coords are:\n");
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ())
tprintf ("(%d,%d) ", it.data ()->x (), it.data ()->y ());
tprintf ("\n");
tprintf ("Right side coords are:\n");
it.set_to_list (&rightside);
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ())
tprintf ("(%d,%d) ", it.data ()->x (), it.data ()->y ());
tprintf ("\n");
}
}
/**********************************************************************
* BLOCK::operator=
*
* Assignment - duplicate the block structure, but with an EMPTY row list.
**********************************************************************/
BLOCK & BLOCK::operator= ( //assignment
const BLOCK & source //from this
) {
this->ELIST_LINK::operator= (source);
this->PDBLK::operator= (source);
proportional = source.proportional;
kerning = source.kerning;
spacing = source.spacing;
filename = source.filename; //STRINGs assign ok
if (!rows.empty ())
rows.clear ();
re_rotation_ = source.re_rotation_;
classify_rotation_ = source.classify_rotation_;
skew_ = source.skew_;
return *this;
}

195
ccmain/ocrblock.h Normal file
View File

@ -0,0 +1,195 @@
/**********************************************************************
* File: ocrblock.h (Formerly block.h)
* Description: Page block class definition.
* Author: Ray Smith
* Created: Thu Mar 14 17:32:01 GMT 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef OCRBLOCK_H
#define OCRBLOCK_H
#include "img.h"
#include "ocrrow.h"
#include "pdblock.h"
class BLOCK; //forward decl
ELISTIZEH_S (BLOCK)
class BLOCK:public ELIST_LINK, public PDBLK
//page block
{
friend class BLOCK_RECT_IT; //block iterator
public:
BLOCK()
: re_rotation_(1.0f, 0.0f),
classify_rotation_(1.0f, 0.0f),
skew_(1.0f, 0.0f) {
hand_poly = NULL;
}
BLOCK(const char *name, // filename
BOOL8 prop, // proportional
inT16 kern, // kerning
inT16 space, // spacing
inT16 xmin, // bottom left
inT16 ymin,
inT16 xmax, // top right
inT16 ymax);
~BLOCK () {
}
// set space size etc.
void set_stats(BOOL8 prop, // proportional
inT16 kern, // inter char size
inT16 space, // inter word size
inT16 ch_pitch) { // pitch if fixed
proportional = prop;
kerning = (inT8) kern;
spacing = space;
pitch = ch_pitch;
}
void set_xheight(inT32 height) { // set char size
xheight = height;
}
void set_font_class(inT16 font) { // set font class
font_class = font;
}
BOOL8 prop() const { // return proportional
return proportional;
}
inT32 fixed_pitch() const { // return pitch
return pitch;
}
inT16 kern() const { // return kerning
return kerning;
}
inT16 font() const { // return font class
return font_class;
}
inT16 space() const { // return spacing
return spacing;
}
const char *name() const { // return filename
return filename.string ();
}
inT32 x_height() const { // return xheight
return xheight;
}
float cell_over_xheight() const {
return cell_over_xheight_;
}
void set_cell_over_xheight(float ratio) {
cell_over_xheight_ = ratio;
}
ROW_LIST *row_list() { // get rows
return &rows;
}
C_BLOB_LIST *blob_list() { // get blobs
return &c_blobs;
}
C_BLOB_LIST *reject_blobs() {
return &rej_blobs;
}
FCOORD re_rotation() const {
return re_rotation_; // How to transform coords back to image.
}
void set_re_rotation(const FCOORD& rotation) {
re_rotation_ = rotation;
}
FCOORD classify_rotation() const {
return classify_rotation_; // Apply this before classifying.
}
void set_classify_rotation(const FCOORD& rotation) {
classify_rotation_ = rotation;
}
FCOORD skew() const {
return skew_; // Direction of true horizontal.
}
void set_skew(const FCOORD& skew) {
skew_ = skew;
}
const ICOORD& median_size() const {
return median_size_;
}
void set_median_size(int x, int y) {
median_size_.set_x(x);
median_size_.set_y(y);
}
void rotate(const FCOORD& rotation);
void sort_rows(); // decreasing y order
void compress(); // shrink white space
void check_pitch(); // check proportional
void compress(const ICOORD vec); // shrink white spaceand move by vector
void print(FILE *fp, BOOL8 dump); // dump whole table
void prep_serialise() { // set ptrs to counts
filename.prep_serialise();
rows.prep_serialise();
c_blobs.prep_serialise();
rej_blobs.prep_serialise();
leftside.prep_serialise();
rightside.prep_serialise();
}
void dump(FILE *f) {
filename.dump(f);
rows.dump(f);
c_blobs.dump(f);
rej_blobs.dump(f);
leftside.dump(f);
rightside.dump(f);
}
void de_dump(FILE *f) { // read external bits
filename.de_dump(f);
rows.de_dump(f);
c_blobs.de_dump(f);
rej_blobs.de_dump(f);
leftside.de_dump(f);
rightside.de_dump(f);
}
make_serialise(BLOCK)
BLOCK& operator=(const BLOCK & source);
private:
BOOL8 proportional; // proportional
inT8 kerning; // inter blob gap
inT16 spacing; // inter word gap
inT16 pitch; // pitch of non-props
inT16 font_class; // correct font class
inT32 xheight; // height of chars
float cell_over_xheight_; // Ratio of cell height to xheight.
STRING filename; // name of block
ROW_LIST rows; // rows in block
C_BLOB_LIST c_blobs; // before textord
C_BLOB_LIST rej_blobs; // duff stuff
FCOORD re_rotation_; // How to transform coords back to image.
FCOORD classify_rotation_; // Apply this before classifying.
FCOORD skew_; // Direction of true horizontal.
ICOORD median_size_; // Median size of blobs.
};
int decreasing_top_order(const void *row1, const void *row2);
#endif

216
ccmain/ocrrow.cpp Normal file
View File

@ -0,0 +1,216 @@
/**********************************************************************
* File: ocrrow.cpp (Formerly row.c)
* Description: Code for the ROW class.
* Author: Ray Smith
* Created: Tue Oct 08 15:58:04 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include "ocrrow.h"
#include "blobbox.h"
ELISTIZE_S (ROW)
/**********************************************************************
* ROW::ROW
*
* Constructor to build a ROW. Only the stats stuff are given here.
* The words are added directly.
**********************************************************************/
ROW::ROW ( //constructor
inT32 spline_size, //no of segments
inT32 * xstarts, //segment boundaries
double *coeffs, //coefficients
float x_height, //line height
float ascenders, //ascender size
float descenders, //descender drop
inT16 kern, //char gap
inT16 space //word gap
):
baseline(spline_size, xstarts, coeffs) {
kerning = kern; //just store stuff
spacing = space;
xheight = x_height;
ascrise = ascenders;
descdrop = descenders;
}
/**********************************************************************
* ROW::ROW
*
* Constructor to build a ROW. Only the stats stuff are given here.
* The words are added directly.
**********************************************************************/
ROW::ROW( //constructor
TO_ROW *to_row, //source row
inT16 kern, //char gap
inT16 space //word gap
) {
kerning = kern; //just store stuff
spacing = space;
xheight = to_row->xheight;
ascrise = to_row->ascrise;
descdrop = to_row->descdrop;
baseline = to_row->baseline;
}
/**********************************************************************
* ROW::recalc_bounding_box
*
* Set the bounding box correctly
**********************************************************************/
void ROW::recalc_bounding_box() { //recalculate BB
WERD *word; //current word
WERD_IT it = &words; //words of ROW
inT16 left; //of word
inT16 prev_left; //old left
if (!it.empty ()) {
word = it.data ();
prev_left = word->bounding_box ().left ();
it.forward ();
while (!it.at_first ()) {
word = it.data ();
left = word->bounding_box ().left ();
if (left < prev_left) {
it.move_to_first ();
//words in BB order
it.sort (word_comparator);
break;
}
prev_left = left;
it.forward ();
}
}
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
word = it.data ();
if (it.at_first ())
word->set_flag (W_BOL, TRUE);
else
//not start of line
word->set_flag (W_BOL, FALSE);
if (it.at_last ())
word->set_flag (W_EOL, TRUE);
else
//not end of line
word->set_flag (W_EOL, FALSE);
//extend BB as reqd
bound_box += word->bounding_box ();
}
}
/**********************************************************************
* ROW::move
*
* Reposition row by vector
**********************************************************************/
void ROW::move( // reposition row
const ICOORD vec // by vector
) {
WERD_IT it(&words); // word iterator
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ())
it.data ()->move (vec);
bound_box.move (vec);
baseline.move (vec);
}
/**********************************************************************
* ROW::print
*
* Display members
**********************************************************************/
void ROW::print( //print
FILE *fp //file to print on
) {
tprintf ("Kerning= %d\n", kerning);
tprintf ("Spacing= %d\n", spacing);
bound_box.print ();
tprintf ("Xheight= %f\n", xheight);
tprintf ("Ascrise= %f\n", ascrise);
tprintf ("Descdrop= %f\n", descdrop);
}
/**********************************************************************
* ROW::plot
*
* Draw the ROW in the given colour.
**********************************************************************/
#ifndef GRAPHICS_DISABLED
void ROW::plot( //draw it
ScrollView* window, //window to draw in
ScrollView::Color colour //colour to draw in
) {
WERD *word; //current word
WERD_IT it = &words; //words of ROW
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
word = it.data ();
word->plot (window, colour); //all in one colour
}
}
#endif
/**********************************************************************
* ROW::plot
*
* Draw the ROW in rainbow colours.
**********************************************************************/
#ifndef GRAPHICS_DISABLED
void ROW::plot( //draw it
ScrollView* window //window to draw in
) {
WERD *word; //current word
WERD_IT it = &words; //words of ROW
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
word = it.data ();
word->plot (window); //in rainbow colours
}
}
#endif
/**********************************************************************
* ROW::operator=
*
* Assign rows by duplicating the row structure but NOT the WERDLIST
**********************************************************************/
ROW & ROW::operator= ( //assignment
const ROW & source //from this
) {
this->ELIST_LINK::operator= (source);
kerning = source.kerning;
spacing = source.spacing;
xheight = source.xheight;
ascrise = source.ascrise;
descdrop = source.descdrop;
if (!words.empty ())
words.clear ();
baseline = source.baseline; //QSPLINES must do =
bound_box = source.bound_box;
return *this;
}

136
ccmain/ocrrow.h Normal file
View File

@ -0,0 +1,136 @@
/**********************************************************************
* File: ocrrow.h (Formerly row.h)
* Description: Code for the ROW class.
* Author: Ray Smith
* Created: Tue Oct 08 15:58:04 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef OCRROW_H
#define OCRROW_H
#include <stdio.h>
#include "quspline.h"
#include "werd.h"
class TO_ROW;
class ROW:public ELIST_LINK
{
friend void tweak_row_baseline(ROW *);
public:
ROW() {
} //empty constructor
ROW( //constructor
inT32 spline_size, //no of segments
inT32 *xstarts, //segment boundaries
double *coeffs, //coefficients //ascender size
float x_height,
float ascenders,
float descenders, //descender size
inT16 kern, //char gap
inT16 space); //word gap
ROW( //constructor
TO_ROW *row, //textord row
inT16 kern, //char gap
inT16 space); //word gap
WERD_LIST *word_list() { //get words
return &words;
}
float base_line( //compute baseline
float xpos) const { //at the position
//get spline value
return (float) baseline.y (xpos);
}
float x_height() const { //return x height
return xheight;
}
void set_x_height(float new_xheight) { // set x height
xheight = new_xheight;
}
inT32 kern() const { //return kerning
return kerning;
}
inT32 space() const { //return spacing
return spacing;
}
float ascenders() const { //return size
return ascrise;
}
float descenders() const { //return size
return descdrop;
}
TBOX bounding_box() const { //return bounding box
return bound_box;
}
void recalc_bounding_box(); //recalculate BB
void move( // reposition row
const ICOORD vec); // by vector
void print( //print
FILE *fp); //file to print on
void plot( //draw one
ScrollView* window, //window to draw in
ScrollView::Color colour); //uniform colour
void plot( //draw one
ScrollView* window); //in rainbow colours
#ifndef GRAPHICS_DISABLED
void plot_baseline( //draw the baseline
ScrollView* window, //window to draw in
ScrollView::Color colour) { //colour to draw
//draw it
baseline.plot (window, colour);
}
#endif
void prep_serialise() { //set ptrs to counts
words.prep_serialise ();
baseline.prep_serialise ();
}
void dump( //write external bits
FILE *f) {
words.dump (f);
baseline.dump (f);
}
void de_dump( //read external bits
FILE *f) {
words.de_dump (f);
baseline.de_dump (f);
}
//assignment
make_serialise (ROW) ROW & operator= (
const ROW & source); //from this
private:
inT32 kerning; //inter char gap
inT32 spacing; //inter word gap
TBOX bound_box; //bounding box
float xheight; //height of line
float ascrise; //size of ascenders
float descdrop; //-size of descenders
WERD_LIST words; //words
QSPLINE baseline; //baseline spline
};
ELISTIZEH_S (ROW)
#endif

427
ccmain/osdetect.cpp Normal file
View File

@ -0,0 +1,427 @@
///////////////////////////////////////////////////////////////////////
// File: osdetect.cpp
// Description: Orientation and script detection.
// Author: Samuel Charron
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include "osdetect.h"
#include "strngs.h"
#include "blobbox.h"
#include "blread.h"
#include "tordmain.h"
#include "ratngs.h"
#include "oldlist.h"
#include "adaptmatch.h"
#include "tstruct.h"
#include "expandblob.h"
#include "tesseractclass.h"
#include "qrsequence.h"
extern IMAGE page_image;
const int kMinCharactersToTry = 50;
const int kMaxCharactersToTry = 5 * kMinCharactersToTry;
const float kSizeRatioToReject = 2.0;
const float kOrientationAcceptRatio = 1.3;
const float kScriptAcceptRatio = 1.3;
const float kHanRatioInKorean = 0.7;
const float kHanRatioInJapanese = 0.3;
const float kLatinRationInFraktur = 0.7;
const float kNonAmbiguousMargin = 1.0;
// General scripts
static const char* han_script = "Han";
static const char* latin_script = "Latin";
static const char* katakana_script = "Katakana";
static const char* hiragana_script = "Hiragana";
static const char* hangul_script = "Hangul";
// Pseudo-scripts Name
const char* ScriptDetector::korean_script_ = "Korean";
const char* ScriptDetector::japanese_script_ = "Japanese";
const char* ScriptDetector::fraktur_script_ = "Fraktur";
CLISTIZEH(BLOBNBOX);
CLISTIZE(BLOBNBOX);
// Find connected components in the page and process a subset until finished or
// a stopping criterion is met.
// Returns true if the page was successfully processed.
bool orientation_and_script_detection(STRING& filename,
OSResults* osr,
tesseract::Tesseract* tess) {
STRING name = filename; //truncated name
const char *lastdot; //of name
TO_BLOCK_LIST land_blocks, port_blocks;
BLOCK_LIST blocks;
TBOX page_box;
lastdot = strrchr (name.string (), '.');
if (lastdot != NULL)
name[lastdot-name.string()] = '\0';
if (!read_unlv_file(name, page_image.get_xsize(), page_image.get_ysize(),
&blocks))
FullPageBlock(page_image.get_xsize(), page_image.get_ysize(), &blocks);
find_components(&blocks, &land_blocks, &port_blocks, &page_box);
return os_detect(&port_blocks, osr, tess);
}
// Filter and sample the blobs.
// Returns true if the page was successfully processed, or false if the page had
// too few characters to be reliable
bool os_detect(TO_BLOCK_LIST* port_blocks, OSResults* osr,
tesseract::Tesseract* tess) {
int blobs_total = 0;
OSResults osr_;
if (osr == NULL)
osr = &osr_;
osr->unicharset = &tess->unicharset;
OrientationDetector o(osr);
ScriptDetector s(osr, tess);
TO_BLOCK_IT block_it;
block_it.set_to_list(port_blocks);
BLOBNBOX_CLIST filtered_list;
BLOBNBOX_C_IT filtered_it(&filtered_list);
for (block_it.mark_cycle_pt(); !block_it.cycled_list();
block_it.forward ()) {
TO_BLOCK* block = block_it.data();
BLOBNBOX_IT bbox_it;
bbox_it.set_to_list(&block->blobs);
for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list ();
bbox_it.forward ()) {
BLOBNBOX* bbox = bbox_it.data();
C_BLOB* blob = bbox->cblob();
TBOX box = blob->bounding_box();
++blobs_total;
float y_x = fabs((box.height() * 1.0) / box.width());
float x_y = 1.0f / y_x;
// Select a >= 1.0 ratio
float ratio = x_y > y_x ? x_y : y_x;
// Blob is ambiguous
if (ratio > kSizeRatioToReject) continue;
if (box.height() < 10) continue;
filtered_it.add_to_end(bbox);
}
}
if (filtered_it.length() > 0)
filtered_it.move_to_first();
int real_max = MIN(filtered_it.length(), kMaxCharactersToTry);
printf("Total blobs found = %d\n", blobs_total);
printf("Number of blobs post-filtering = %d\n", filtered_it.length());
printf("Number of blobs to try = %d\n", real_max);
// If there are too few characters, skip this page entirely.
if (real_max < kMinCharactersToTry / 2) {
printf("Too few characters. Skipping this page\n");
return false;
}
BLOBNBOX** blobs = new BLOBNBOX*[filtered_it.length()];
int number_of_blobs = 0;
for (filtered_it.mark_cycle_pt (); !filtered_it.cycled_list ();
filtered_it.forward ()) {
blobs[number_of_blobs++] = (BLOBNBOX*)filtered_it.data();
}
QRSequenceGenerator sequence(number_of_blobs);
for (int i = 0; i < real_max; ++i) {
if (os_detect_blob(blobs[sequence.GetVal()], &o, &s, osr, tess)
&& i > kMinCharactersToTry) {
break;
}
}
delete [] blobs;
// Make sure the best_result is up-to-date
int orientation = o.get_orientation();
s.update_best_script(orientation);
return true;
}
// Processes a single blob to estimate script and orientation.
// Return true if estimate of orientation and script satisfies stopping
// criteria.
bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
ScriptDetector* s, OSResults* osr,
tesseract::Tesseract* tess) {
C_BLOB* blob = bbox->cblob();
TBOX box = blob->bounding_box();
int x_mid = (box.left() + box.right()) / 2.0f;
int y_mid = (box.bottom() + box.top()) / 2.0f;
PBLOB pblob(blob, box.height());
BLOB_CHOICE_LIST ratings[4];
// Test the 4 orientations
for (int i = 0; i < 4; ++i) {
// normalize the blob
pblob.move(FCOORD(-x_mid, -box.bottom()));
pblob.scale(static_cast<float>(bln_x_height) / box.height());
pblob.move(FCOORD(0.0f, bln_baseline_offset));
{
// List of choices given by the classifier
TBLOB *tessblob; //converted blob
TEXTROW tessrow; //dummy row
tess_cn_matching.set_value(true); // turn it on
tess_bn_matching.set_value(false);
//convert blob
tessblob = make_tess_blob (&pblob, TRUE);
//make dummy row
make_tess_row(NULL, &tessrow);
//classify
tess->AdaptiveClassifier (tessblob, NULL, &tessrow, ratings + i, NULL);
free_blob(tessblob);
}
// undo normalize
pblob.move(FCOORD(0.0f, -bln_baseline_offset));
pblob.scale(1.0f / (static_cast<float>(bln_x_height) / box.height()));
pblob.move(FCOORD(x_mid, box.bottom()));
// center the blob
pblob.move(FCOORD(-x_mid, -y_mid));
// Rotate it
pblob.rotate();
// Re-compute the mid
box = pblob.bounding_box();
x_mid = (box.left() + box.right()) / 2;
y_mid = (box.top() + box.bottom()) / 2;
// re-center in the new mid
pblob.move(FCOORD(x_mid, y_mid));
}
bool stop = o->detect_blob(ratings);
s->detect_blob(ratings);
int orientation = o->get_orientation();
stop = s->must_stop(orientation) && stop;
return stop;
}
OrientationDetector::OrientationDetector(OSResults* osr) {
osr_ = osr;
}
// Score the given blob and return true if it is now sure of the orientation
// after adding this block.
bool OrientationDetector::detect_blob(BLOB_CHOICE_LIST* scores) {
for (int i = 0; i < 4; ++i) {
BLOB_CHOICE_IT choice_it;
choice_it.set_to_list(scores + i);
if (!choice_it.empty()) {
osr_->orientations[i] += (100 + choice_it.data()->certainty());
}
}
float first = -1;
float second = -1;
int idx = -1;
for (int i = 0; i < 4; ++i) {
if (osr_->orientations[i] > first) {
idx = i;
second = first;
first = osr_->orientations[i];
} else if (osr_->orientations[i] > second) {
second = osr_->orientations[i];
}
}
return first / second > kOrientationAcceptRatio;
}
void OrientationDetector::update_best_orientation() {
float first = osr_->orientations[0];
float second = osr_->orientations[1];
if (first < second) {
second = first;
first = osr_->orientations[1];
}
osr_->best_result.orientation = 0;
osr_->best_result.oconfidence = 0;
for (int i = 0; i < 4; ++i) {
if (osr_->orientations[i] > first) {
second = first;
first = osr_->orientations[i];
osr_->best_result.orientation = i;
} else if (osr_->orientations[i] > second) {
second = osr_->orientations[i];
}
}
osr_->best_result.oconfidence =
(first / second - 1.0) / (kOrientationAcceptRatio - 1.0);
}
int OrientationDetector::get_orientation() {
update_best_orientation();
return osr_->best_result.orientation;
}
ScriptDetector::ScriptDetector(OSResults* osr, tesseract::Tesseract* tess) {
osr_ = osr;
tess_ = tess;
katakana_id_ = tess_->unicharset.add_script(katakana_script);
hiragana_id_ = tess_->unicharset.add_script(hiragana_script);
han_id_ = tess_->unicharset.add_script(han_script);
hangul_id_ = tess_->unicharset.add_script(hangul_script);
japanese_id_ = tess_->unicharset.add_script(japanese_script_);
korean_id_ = tess_->unicharset.add_script(korean_script_);
latin_id_ = tess_->unicharset.add_script(latin_script);
fraktur_id_ = tess_->unicharset.add_script(fraktur_script_);
}
// Score the given blob and return true if it is now sure of the script after
// adding this blob.
void ScriptDetector::detect_blob(BLOB_CHOICE_LIST* scores) {
bool done[kMaxNumberOfScripts];
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < kMaxNumberOfScripts; ++j)
done[j] = false;
BLOB_CHOICE_IT choice_it;
choice_it.set_to_list(scores + i);
float prev_score = -1;
int script_count = 0;
int prev_id = -1;
int prev_script;
int prev_class_id = -1;
int prev_config = -1;
const char* prev_unichar = "";
const char* unichar = "";
float next_best_score = -1.0;
int next_best_script_id = -1;
const char* next_best_unichar = "";
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
choice_it.forward()) {
BLOB_CHOICE* choice = choice_it.data();
int id = choice->script_id();
// Script already processed before.
if (done[id]) continue;
done[id] = true;
unichar = tess_->unicharset.id_to_unichar(choice->unichar_id());
// Save data from the first match
if (prev_score < 0) {
prev_score = -choice->certainty();
script_count = 1;
prev_id = id;
prev_script = choice->script_id();
prev_unichar = unichar;
prev_class_id = choice->unichar_id();
prev_config = choice->config();
} else if (-choice->certainty() < prev_score + kNonAmbiguousMargin) {
script_count++;
next_best_score = -choice->certainty();
next_best_script_id = choice->script_id();
next_best_unichar = tess_->unicharset.id_to_unichar(choice->unichar_id());
}
if (strlen(prev_unichar) == 1)
if (unichar[0] >= '0' && unichar[0] <= '9')
break;
// if script_count is >= 2, character is ambiguous, skip other matches
// since they are useless.
if (script_count >= 2)
break;
}
// Character is non ambiguous
if (script_count == 1) {
// Update the score of the winning script
osr_->scripts_na[i][prev_id] += 1;
// Workaround for Fraktur
if (prev_id == latin_id_) {
int font_set_id = tess_->PreTrainedTemplates->
Class[prev_class_id]->font_set_id;
if (font_set_id >= 0 && prev_config >= 0) {
FontInfo fi = tess_->get_fontinfo_table().get(
tess_->get_fontset_table().get(font_set_id).configs[prev_config]);
//printf("Font: %s i:%i b:%i f:%i s:%i k:%i (%s)\n", fi.name,
// fi.is_italic(), fi.is_bold(), fi.is_fixed_pitch(),
// fi.is_serif(), fi.is_fraktur(),
// prev_unichar);
if (fi.is_fraktur()) {
osr_->scripts_na[i][prev_id] -= 1;
osr_->scripts_na[i][fraktur_id_] += 1;
}
}
}
// Update Japanese / Korean pseudo-scripts
if (prev_id == katakana_id_)
osr_->scripts_na[i][japanese_id_] += 1;
if (prev_id == hiragana_id_)
osr_->scripts_na[i][japanese_id_] += 1;
if (prev_id == hangul_id_)
osr_->scripts_na[i][korean_id_] += 1;
if (prev_id == han_id_)
osr_->scripts_na[i][korean_id_] += kHanRatioInKorean;
if (prev_id == han_id_)
osr_->scripts_na[i][japanese_id_] += kHanRatioInJapanese;
}
} // iterate over each orientation
}
bool ScriptDetector::must_stop(int orientation) {
update_best_script(orientation);
return osr_->best_result.sconfidence > 1;
}
void ScriptDetector::update_best_script(int orientation) {
float first = -1;
float second = -1;
// i = 1 -> ignore Common scripts
for (int i = 1; i < kMaxNumberOfScripts; ++i) {
if (osr_->scripts_na[orientation][i] > first) {
osr_->best_result.script =
tess_->unicharset.get_script_from_script_id(i);
second = first;
first = osr_->scripts_na[orientation][i];
} else if (osr_->scripts_na[orientation][i] > second) {
second = osr_->scripts_na[orientation][i];
}
}
osr_->best_result.sconfidence =
(first / second - 1.0) / (kOrientationAcceptRatio - 1.0);
}

102
ccmain/osdetect.h Normal file
View File

@ -0,0 +1,102 @@
///////////////////////////////////////////////////////////////////////
// File: osdetect.h
// Description: Orientation and script detection.
// Author: Samuel Charron
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCMAIN_OSDETECT_H__
#define TESSERACT_CCMAIN_OSDETECT_H__
#include "strngs.h"
#include "unicharset.h"
class TO_BLOCK_LIST;
class BLOBNBOX;
class BLOB_CHOICE_LIST;
namespace tesseract {
class Tesseract;
}
// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
struct OSBestResult {
int orientation;
const char* script;
float sconfidence;
float oconfidence;
};
struct OSResults {
OSResults() {
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < kMaxNumberOfScripts; ++j)
scripts_na[i][j] = 0;
orientations[i] = 0;
}
}
float orientations[4];
float scripts_na[4][kMaxNumberOfScripts];
UNICHARSET* unicharset;
OSBestResult best_result;
};
class OrientationDetector {
public:
OrientationDetector(OSResults*);
bool detect_blob(BLOB_CHOICE_LIST* scores);
void update_best_orientation();
int get_orientation();
private:
OSResults* osr_;
};
class ScriptDetector {
public:
ScriptDetector(OSResults*, tesseract::Tesseract* tess);
void detect_blob(BLOB_CHOICE_LIST* scores);
void update_best_script(int);
void get_script() ;
bool must_stop(int orientation);
private:
OSResults* osr_;
static const char* korean_script_;
static const char* japanese_script_;
static const char* fraktur_script_;
int korean_id_;
int japanese_id_;
int katakana_id_;
int hiragana_id_;
int han_id_;
int hangul_id_;
int latin_id_;
int fraktur_id_;
tesseract::Tesseract* tess_;
};
bool orientation_and_script_detection(STRING& filename,
OSResults*,
tesseract::Tesseract*);
bool os_detect(TO_BLOCK_LIST* port_blocks,
OSResults* osr,
tesseract::Tesseract* tess);
bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
ScriptDetector* s, OSResults*,
tesseract::Tesseract* tess);
#endif // TESSERACT_CCMAIN_OSDETECT_H__

153
ccmain/otsuthr.cpp Normal file
View File

@ -0,0 +1,153 @@
/**********************************************************************
* File: otsuthr.cpp
* Description: Simple Otsu thresholding for binarizing images.
* Author: Ray Smith
* Created: Fri Mar 07 12:31:01 PST 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <string.h>
#include "otsuthr.h"
namespace tesseract {
// Compute the Otsu threshold(s) for the given image rectangle, making one
// for each channel. Each channel is always one byte per pixel.
// Returns an array of threshold values and an array of hi_values, such
// that a pixel value >threshold[channel] is considered foreground if
// hi_values[channel] is 0 or background if 1. A hi_value of -1 indicates
// that there is no apparent foreground. At least one hi_value will not be -1.
// Delete thresholds and hi_values with delete [] after use.
void OtsuThreshold(const unsigned char* imagedata,
int bytes_per_pixel, int bytes_per_line,
int left, int top, int width, int height,
int** thresholds, int** hi_values) {
// Of all channels with no good hi_value, keep the best so we can always
// produce at least one answer.
int best_hi_value = 1;
int best_hi_index = 0;
bool any_good_hivalue = false;
double best_hi_dist = 0.0;
*thresholds = new int[bytes_per_pixel];
*hi_values = new int[bytes_per_pixel];
for (int ch = 0; ch < bytes_per_pixel; ++ch) {
(*thresholds)[ch] = -1;
(*hi_values)[ch] = -1;
// Compute the histogram of the image rectangle.
int histogram[kHistogramSize];
HistogramRect(imagedata + ch, bytes_per_pixel, bytes_per_line,
left, top, width, height, histogram);
int H;
int best_omega_0;
int best_t = OtsuStats(histogram, &H, &best_omega_0);
if (best_omega_0 == 0 || best_omega_0 == H) {
// This channel is empty.
continue;
}
// To be a convincing foreground we must have a small fraction of H
// or to be a convincing background we must have a large fraction of H.
// In between we assume this channel contains no thresholding information.
int hi_value = best_omega_0 < H * 0.5;
(*thresholds)[ch] = best_t;
if (best_omega_0 > H * 0.75) {
any_good_hivalue = true;
(*hi_values)[ch] = 0;
} else if (best_omega_0 < H * 0.25) {
any_good_hivalue = true;
(*hi_values)[ch] = 1;
} else {
// In case all channels are like this, keep the best of the bad lot.
double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
if (hi_dist > best_hi_dist) {
best_hi_dist = hi_dist;
best_hi_value = hi_value;
best_hi_index = ch;
}
}
}
if (!any_good_hivalue) {
// Use the best of the ones that were not good enough.
(*hi_values)[best_hi_index] = best_hi_value;
}
}
// Compute the histogram for the given image rectangle, and the given
// channel. (Channel pointed to by imagedata.) Each channel is always
// one byte per pixel.
// Bytes per pixel is used to skip channels not being
// counted with this call in a multi-channel (pixel-major) image.
// Histogram is always a kHistogramSize(256) element array to count
// occurrences of each pixel value.
void HistogramRect(const unsigned char* imagedata,
int bytes_per_pixel, int bytes_per_line,
int left, int top, int width, int height,
int* histogram) {
int bottom = top + height;
memset(histogram, 0, sizeof(*histogram) * kHistogramSize);
const unsigned char* pixels = imagedata +
top * bytes_per_line +
left * bytes_per_pixel;
for (int y = top; y < bottom; ++y) {
for (int x = 0; x < width; ++x) {
++histogram[pixels[x * bytes_per_pixel]];
}
pixels += bytes_per_line;
}
}
// Compute the Otsu threshold(s) for the given histogram.
// Also returns H = total count in histogram, and
// omega0 = count of histogram below threshold.
int OtsuStats(const int* histogram, int* H_out, int* omega0_out) {
int H = 0;
double mu_T = 0.0;
for (int i = 0; i < kHistogramSize; ++i) {
H += histogram[i];
mu_T += i * histogram[i];
}
// Now maximize sig_sq_B over t.
// http://www.ctie.monash.edu.au/hargreave/Cornall_Terry_328.pdf
int best_t = -1;
int omega_0, omega_1;
int best_omega_0 = 0;
double best_sig_sq_B = 0.0;
double mu_0, mu_1, mu_t;
omega_0 = 0;
mu_t = 0.0;
for (int t = 0; t < kHistogramSize - 1; ++t) {
omega_0 += histogram[t];
mu_t += t * static_cast<double>(histogram[t]);
if (omega_0 == 0)
continue;
omega_1 = H - omega_0;
if (omega_1 == 0)
break;
mu_0 = mu_t / omega_0;
mu_1 = (mu_T - mu_t) / omega_1;
double sig_sq_B = mu_1 - mu_0;
sig_sq_B *= sig_sq_B * omega_0 * omega_1;
if (best_t < 0 || sig_sq_B > best_sig_sq_B) {
best_sig_sq_B = sig_sq_B;
best_t = t;
best_omega_0 = omega_0;
}
}
if (H_out != NULL) *H_out = H;
if (omega0_out != NULL) *omega0_out = best_omega_0;
return best_t;
}
} // namespace tesseract.

58
ccmain/otsuthr.h Normal file
View File

@ -0,0 +1,58 @@
///////////////////////////////////////////////////////////////////////
// File: otsuthr.h
// Description: Simple Otsu thresholding for binarizing images.
// Author: Ray Smith
// Created: Fri Mar 07 12:14:01 PST 2008
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCMAIN_OTSUTHR_H__
#define TESSERACT_CCMAIN_OTSUTHR_H__
namespace tesseract {
const int kHistogramSize = 256; // The size of a histogram of pixel values.
// Compute the Otsu threshold(s) for the given image rectangle, making one
// for each channel. Each channel is always one byte per pixel.
// Returns an array of threshold values and an array of hi_values, such
// that a pixel value >threshold[channel] is considered foreground if
// hi_values[channel] is 0 or background if 1. A hi_value of -1 indicates
// that there is no apparent foreground. At least one hi_value will not be -1.
// Delete thresholds and hi_values with delete [] after use.
void OtsuThreshold(const unsigned char* imagedata,
int bytes_per_pixel, int bytes_per_line,
int left, int top, int width, int height,
int** thresholds, int** hi_values);
// Compute the histogram for the given image rectangle, and the given
// channel. (Channel pointed to by imagedata.) Each channel is always
// one byte per pixel.
// Bytes per pixel is used to skip channels not being
// counted with this call in a multi-channel (pixel-major) image.
// Histogram is always a 256 element array to count occurrences of
// each pixel value.
void HistogramRect(const unsigned char* imagedata,
int bytes_per_pixel, int bytes_per_line,
int left, int top, int width, int height,
int* histogram);
// Compute the Otsu threshold(s) for the given histogram.
// Also returns H = total count in histogram, and
// omega0 = count of histogram below threshold.
int OtsuStats(const int* histogram, int* H_out, int* omega0_out);
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_OTSUTHR_H__

330
ccmain/pageres.cpp Normal file
View File

@ -0,0 +1,330 @@
/**********************************************************************
* File: pageres.cpp (Formerly page_res.c)
* Description: Results classes used by control.c
* Author: Phil Cheatle
* Created: Tue Sep 22 08:42:49 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include <stdlib.h>
#ifdef __UNIX__
#include <assert.h>
#endif
#include "pageres.h"
#include "notdll.h"
ELISTIZE (BLOCK_RES)
CLISTIZE (BLOCK_RES) ELISTIZE (ROW_RES) ELISTIZE (WERD_RES)
/*************************************************************************
* PAGE_RES::PAGE_RES
*
* Constructor for page results
*************************************************************************/
PAGE_RES::PAGE_RES( //recursive construct
BLOCK_LIST *the_block_list //real page
) {
BLOCK_IT block_it(the_block_list);
BLOCK_RES_IT block_res_it(&block_res_list);
char_count = 0;
rej_count = 0;
rejected = FALSE;
for (block_it.mark_cycle_pt ();
!block_it.cycled_list (); block_it.forward ()) {
block_res_it.add_to_end (new BLOCK_RES (block_it.data ()));
}
}
/*************************************************************************
* BLOCK_RES::BLOCK_RES
*
* Constructor for BLOCK results
*************************************************************************/
BLOCK_RES::BLOCK_RES( //recursive construct
BLOCK *the_block //real BLOCK
) {
ROW_IT row_it (the_block->row_list ());
ROW_RES_IT row_res_it(&row_res_list);
char_count = 0;
rej_count = 0;
font_class = -1; //not assigned
x_height = -1.0;
font_assigned = FALSE;
bold = FALSE;
italic = FALSE;
row_count = 0;
block = the_block;
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
row_res_it.add_to_end (new ROW_RES (row_it.data ()));
}
}
/*************************************************************************
* ROW_RES::ROW_RES
*
* Constructor for ROW results
*************************************************************************/
ROW_RES::ROW_RES( //recursive construct
ROW *the_row //real ROW
) {
WERD_IT word_it (the_row->word_list ());
WERD_RES_IT word_res_it(&word_res_list);
WERD_RES *combo = NULL; //current combination of fuzzies
WERD_RES *word_res; //current word
WERD *copy_word;
char_count = 0;
rej_count = 0;
whole_word_rej_count = 0;
font_class = -1;
font_class_score = -1.0;
bold = FALSE;
italic = FALSE;
row = the_row;
for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
word_res = new WERD_RES (word_it.data ());
word_res->x_height = the_row->x_height();
if (word_res->word->flag (W_FUZZY_NON)) {
ASSERT_HOST (combo != NULL);
word_res->part_of_combo = TRUE;
combo->copy_on (word_res);
}
if (word_it.data_relative (1)->flag (W_FUZZY_NON)) {
if (combo == NULL) {
copy_word = new WERD;
//deep copy
*copy_word = *(word_it.data ());
combo = new WERD_RES (copy_word);
combo->x_height = the_row->x_height();
combo->combination = TRUE;
word_res_it.add_to_end (combo);
}
word_res->part_of_combo = TRUE;
}
else
combo = NULL;
word_res_it.add_to_end (word_res);
}
}
WERD_RES & WERD_RES::operator= ( //assign word_res
const WERD_RES & source //from this
) {
this->ELIST_LINK::operator= (source);
if (source.combination) {
word = new WERD;
*word = *(source.word); //deep copy
}
else
word = source.word; //pt to same word
if (source.outword != NULL) {
outword = new WERD;
*outword = *(source.outword);//deep copy
}
else
outword = NULL;
denorm = source.denorm;
if (source.best_choice != NULL) {
best_choice = new WERD_CHOICE;
*best_choice = *(source.best_choice);
raw_choice = new WERD_CHOICE;
*raw_choice = *(source.raw_choice);
}
else {
best_choice = NULL;
raw_choice = NULL;
}
if (source.ep_choice != NULL) {
ep_choice = new WERD_CHOICE;
*ep_choice = *(source.ep_choice);
}
else
ep_choice = NULL;
reject_map = source.reject_map;
tess_failed = source.tess_failed;
tess_accepted = source.tess_accepted;
tess_would_adapt = source.tess_would_adapt;
done = source.done;
unlv_crunch_mode = source.unlv_crunch_mode;
italic = source.italic;
bold = source.bold;
font1 = source.font1;
font1_count = source.font1_count;
font2 = source.font2;
font2_count = source.font2_count;
x_height = source.x_height;
caps_height = source.caps_height;
guessed_x_ht = source.guessed_x_ht;
guessed_caps_ht = source.guessed_caps_ht;
combination = source.combination;
part_of_combo = source.part_of_combo;
reject_spaces = source.reject_spaces;
return *this;
}
WERD_RES::~WERD_RES () {
if (combination)
delete word;
if (outword != NULL)
delete outword;
if (best_choice != NULL) {
delete best_choice;
delete raw_choice;
}
if (ep_choice != NULL) {
delete ep_choice;
}
}
/*************************************************************************
* PAGE_RES_IT::restart_page
*
* Set things up at the start of the page
*************************************************************************/
WERD_RES *PAGE_RES_IT::restart_page() {
block_res_it.set_to_list (&page_res->block_res_list);
block_res_it.mark_cycle_pt ();
prev_block_res = NULL;
prev_row_res = NULL;
prev_word_res = NULL;
block_res = NULL;
row_res = NULL;
word_res = NULL;
next_block_res = NULL;
next_row_res = NULL;
next_word_res = NULL;
internal_forward(TRUE);
return internal_forward (FALSE);
}
/*************************************************************************
* PAGE_RES_IT::internal_forward
*
* Find the next word on the page. Empty blocks and rows are skipped.
* The iterator maintains pointers to block, row and word for the previous,
* current and next words. These are correct, regardless of block/row
* boundaries. NULL values denote start and end of the page.
*************************************************************************/
WERD_RES *PAGE_RES_IT::internal_forward(BOOL8 new_block) {
BOOL8 found_next_word = FALSE;
BOOL8 new_row = FALSE;
prev_block_res = block_res;
prev_row_res = row_res;
prev_word_res = word_res;
block_res = next_block_res;
row_res = next_row_res;
word_res = next_word_res;
while (!found_next_word && !block_res_it.cycled_list ()) {
if (new_block) {
new_block = FALSE;
row_res_it.set_to_list (&block_res_it.data ()->row_res_list);
row_res_it.mark_cycle_pt ();
new_row = TRUE;
}
while (!found_next_word && !row_res_it.cycled_list ()) {
if (new_row) {
new_row = FALSE;
word_res_it.set_to_list (&row_res_it.data ()->word_res_list);
word_res_it.mark_cycle_pt ();
}
while (!found_next_word && !word_res_it.cycled_list ()) {
next_block_res = block_res_it.data ();
next_row_res = row_res_it.data ();
next_word_res = word_res_it.data ();
found_next_word = TRUE;
do {
word_res_it.forward ();
}
while (word_res_it.data ()->part_of_combo);
}
if (!found_next_word) { //end of row reached
row_res_it.forward ();
new_row = TRUE;
}
}
if (!found_next_word) { //end of block reached
block_res_it.forward ();
new_block = TRUE;
}
}
if (!found_next_word) { //end of page reached
next_block_res = NULL;
next_row_res = NULL;
next_word_res = NULL;
}
return word_res;
}
/*************************************************************************
* PAGE_RES_IT::forward_block
*
* Move to the first word of the next block
* Can be followed by subsequent calls to forward() BUT at the first word in
* the block, the prev block, row and word are all NULL.
*************************************************************************/
WERD_RES *PAGE_RES_IT::forward_block() {
if (block_res == next_block_res) {
block_res_it.forward ();;
block_res = NULL;
row_res = NULL;
word_res = NULL;
next_block_res = NULL;
next_row_res = NULL;
next_word_res = NULL;
internal_forward(TRUE);
}
return internal_forward (FALSE);
}
void PAGE_RES_IT::rej_stat_word() {
inT16 chars_in_word;
inT16 rejects_in_word = 0;
chars_in_word = word_res->reject_map.length ();
page_res->char_count += chars_in_word;
block_res->char_count += chars_in_word;
row_res->char_count += chars_in_word;
rejects_in_word = word_res->reject_map.reject_count ();
page_res->rej_count += rejects_in_word;
block_res->rej_count += rejects_in_word;
row_res->rej_count += rejects_in_word;
if (chars_in_word == rejects_in_word)
row_res->whole_word_rej_count += rejects_in_word;
}

313
ccmain/pageres.h Normal file
View File

@ -0,0 +1,313 @@
/**********************************************************************
* File: pageres.h (Formerly page_res.h)
* Description: Results classes used by control.c
* Author: Phil Cheatle
* Created: Tue Sep 22 08:42:49 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef PAGERES_H
#define PAGERES_H
#include "elst.h"
#include "ocrblock.h"
#include "ocrrow.h"
#include "werd.h"
#include "ratngs.h"
#include "rejctmap.h"
#include "notdll.h"
#include "notdll.h"
/* Forward declarations */
class BLOCK_RES;
ELISTIZEH (BLOCK_RES) CLISTIZEH (BLOCK_RES)
class
ROW_RES;
ELISTIZEH (ROW_RES)
class WERD_RES;
ELISTIZEH (WERD_RES)
/*************************************************************************
* PAGE_RES - Page results
*************************************************************************/
class PAGE_RES //page result
{
public:
inT32 char_count;
inT32 rej_count;
BLOCK_RES_LIST block_res_list;
BOOL8 rejected;
PAGE_RES() {
} //empty constructor
PAGE_RES( //simple constructor
BLOCK_LIST *block_list); //real blocks
~PAGE_RES () { //destructor
}
};
/*************************************************************************
* BLOCK_RES - Block results
*************************************************************************/
class BLOCK_RES:public ELIST_LINK
//page block result
{
public:
BLOCK * block; //real block
inT32 char_count; //chars in block
inT32 rej_count; //rejected chars
inT16 font_class; //
inT16 row_count;
float x_height;
BOOL8 font_assigned; // block already
// processed
BOOL8 bold; // all bold
BOOL8 italic; // all italic
ROW_RES_LIST row_res_list;
BLOCK_RES() {
} //empty constructor
BLOCK_RES( //simple constructor
BLOCK *the_block); //real block
~BLOCK_RES () { //destructor
}
};
/*************************************************************************
* ROW_RES - Row results
*************************************************************************/
class ROW_RES:public ELIST_LINK //row result
{
public:
ROW * row; //real row
inT32 char_count; //chars in block
inT32 rej_count; //rejected chars
inT32 whole_word_rej_count; //rejs in total rej wds
WERD_RES_LIST word_res_list;
float font_class_score;
inT16 font_class; //
inT32 italic;
inT32 bold;
inT8 font1; //primary font
inT8 font1_count; //no of voters
inT8 font2; //secondary font
inT8 font2_count; //no of voters
ROW_RES() {
} //empty constructor
ROW_RES( //simple constructor
ROW *the_row); //real row
~ROW_RES () { //destructor
}
};
/*************************************************************************
* WERD_RES - Word results
*************************************************************************/
enum CRUNCH_MODE
{
CR_NONE,
CR_KEEP_SPACE,
CR_LOOSE_SPACE,
CR_DELETE
};
class WERD_RES:public ELIST_LINK //word result
{
public:
WERD * word; //non-bln real word
WERD *outword; //bln best choice
//segmentation
DENORM denorm; //for use on outword
WERD_CHOICE *best_choice; //tess output
WERD_CHOICE *raw_choice; //top choice permuter
WERD_CHOICE *ep_choice; //ep text
REJMAP reject_map; //best_choice rejects
BOOL8 tess_failed;
/*
If tess_failed is TRUE, one of the following tests failed when Tess
returned:
- The outword blob list was not the same length as the best_choice string;
- The best_choice string contained ALL blanks;
- The best_choice string was zero length
*/
BOOL8 tess_accepted; //Tess thinks its ok?
BOOL8 tess_would_adapt; //Tess would adapt?
BOOL8 done; //ready for output?
inT8 italic;
inT8 bold;
inT8 font1; //primary font
inT8 font1_count; //no of voters
inT8 font2; //secondary font
inT8 font2_count; //no of voters
CRUNCH_MODE unlv_crunch_mode;
float x_height; //Post match estimate
float caps_height; //Post match estimate
BOOL8 guessed_x_ht;
BOOL8 guessed_caps_ht;
/*
To deal with fuzzy spaces we need to be able to combine "words" to form
combinations when we suspect that the gap is a non-space. The (new) text
ord code generates separate words for EVERY fuzzy gap - flags in the word
indicate whether the gap is below the threshold (fuzzy kern) and is thus
NOT a real word break by default, or above the threshold (fuzzy space) and
this is a real word break by default.
The WERD_RES list contains all these words PLUS "combination" words built
out of (copies of) the words split by fuzzy kerns. The separate parts have
their "part_of_combo" flag set true and should be IGNORED on a default
reading of the list.
Combination words are FOLLOWED by the sequence of part_of_combo words
which they combine.
*/
BOOL8 combination; //of two fuzzy gap wds
BOOL8 part_of_combo; //part of a combo
BOOL8 reject_spaces; //Reject spacing?
WERD_RES() {
} //empty constructor
WERD_RES( //simple constructor
WERD *the_word) { //real word
word = the_word;
outword = NULL;
best_choice = NULL;
raw_choice = NULL;
ep_choice = NULL;
tess_failed = FALSE;
tess_accepted = FALSE;
tess_would_adapt = FALSE;
done = FALSE;
unlv_crunch_mode = CR_NONE;
italic = FALSE;
bold = FALSE;
font1 = -1;
font1_count = 0;
font2 = -1;
font2_count = 0;
x_height = 0.0;
caps_height = 0.0;
guessed_x_ht = TRUE;
guessed_caps_ht = TRUE;
combination = FALSE;
part_of_combo = FALSE;
reject_spaces = FALSE;
}
WERD_RES(const WERD_RES &source) {
*this = source; //see operator=
}
~WERD_RES (); //destructor
WERD_RES& operator=(const WERD_RES& source); //from this
static WERD_RES* deep_copy(const WERD_RES* src) {
return new WERD_RES(*src);
}
void copy_on( //copy blobs onto word
WERD_RES *word_res) { //from this word
word->set_flag (W_EOL, word_res->word->flag (W_EOL));
word->copy_on (word_res->word);
}
};
/*************************************************************************
* PAGE_RES_IT - Page results iterator
*************************************************************************/
class PAGE_RES_IT
{
public:
PAGE_RES * page_res; //page being iterated
PAGE_RES_IT() {
} //empty contructor
PAGE_RES_IT( //empty contructor
PAGE_RES *the_page_res) { //page result
page_res = the_page_res;
restart_page(); //ready to scan
}
WERD_RES *restart_page(); //get ready
WERD_RES *internal_forward( //get next word
BOOL8 new_block);
WERD_RES *forward() { //get next word
return internal_forward (FALSE);
}
WERD_RES *forward_block(); //get first word in
//next non-empty block
WERD_RES *prev_word() { //previous word
return prev_word_res;
}
ROW_RES *prev_row() { //row of prev word
return prev_row_res;
}
BLOCK_RES *prev_block() { //block of prev word
return prev_block_res;
}
WERD_RES *word() { //current word
return word_res;
}
ROW_RES *row() { //row of current word
return row_res;
}
BLOCK_RES *block() { //block of cur. word
return block_res;
}
WERD_RES *next_word() { //next word
return next_word_res;
}
ROW_RES *next_row() { //row of next word
return next_row_res;
}
BLOCK_RES *next_block() { //block of next word
return next_block_res;
}
void rej_stat_word(); //for page/block/row
private:
WERD_RES * prev_word_res; //previous word
ROW_RES *prev_row_res; //row of prev word
BLOCK_RES *prev_block_res; //block of prev word
WERD_RES *word_res; //current word
ROW_RES *row_res; //row of current word
BLOCK_RES *block_res; //block of cur. word
WERD_RES *next_word_res; //next word
ROW_RES *next_row_res; //row of next word
BLOCK_RES *next_block_res; //block of next word
BLOCK_RES_IT block_res_it; //iterators
ROW_RES_IT row_res_it;
WERD_RES_IT word_res_it;
};
#endif

361
ccmain/pdblock.cpp Normal file
View File

@ -0,0 +1,361 @@
/**********************************************************************
* File: pdblock.c (Formerly pdblk.c)
* Description: PDBLK member functions and iterator functions.
* Author: Ray Smith
* Created: Fri Mar 15 09:41:28 GMT 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include <stdlib.h>
#include "blckerr.h"
#include "pdblock.h"
#include "svshowim.h"
#include "hpddef.h" //must be last (handpd.dll)
#define BLOCK_LABEL_HEIGHT 150 //char height of block id
CLISTIZE (PDBLK)
/**********************************************************************
* PDBLK::PDBLK
*
* Constructor for a simple rectangular block.
**********************************************************************/
PDBLK::PDBLK ( //rectangular block
inT16 xmin, //bottom left
inT16 ymin, inT16 xmax, //top right
inT16 ymax): box (ICOORD (xmin, ymin), ICOORD (xmax, ymax)) {
//boundaries
ICOORDELT_IT left_it = &leftside;
ICOORDELT_IT right_it = &rightside;
hand_poly = NULL;
left_it.set_to_list (&leftside);
right_it.set_to_list (&rightside);
//make default box
left_it.add_to_end (new ICOORDELT (xmin, ymin));
left_it.add_to_end (new ICOORDELT (xmin, ymax));
right_it.add_to_end (new ICOORDELT (xmax, ymin));
right_it.add_to_end (new ICOORDELT (xmax, ymax));
index_ = 0;
}
/**********************************************************************
* PDBLK::set_sides
*
* Sets left and right vertex lists
**********************************************************************/
void PDBLK::set_sides( //set vertex lists
ICOORDELT_LIST *left, //left vertices
ICOORDELT_LIST *right //right vertices
) {
//boundaries
ICOORDELT_IT left_it = &leftside;
ICOORDELT_IT right_it = &rightside;
leftside.clear ();
left_it.move_to_first ();
left_it.add_list_before (left);
rightside.clear ();
right_it.move_to_first ();
right_it.add_list_before (right);
}
/**********************************************************************
* PDBLK::contains
*
* Return TRUE if the given point is within the block.
**********************************************************************/
BOOL8 PDBLK::contains( //test containment
ICOORD pt //point to test
) {
BLOCK_RECT_IT it = this; //rectangle iterator
ICOORD bleft, tright; //corners of rectangle
for (it.start_block (); !it.cycled_rects (); it.forward ()) {
//get rectangle
it.bounding_box (bleft, tright);
//inside rect
if (pt.x () >= bleft.x () && pt.x () <= tright.x ()
&& pt.y () >= bleft.y () && pt.y () <= tright.y ())
return TRUE; //is inside
}
return FALSE; //not inside
}
/**********************************************************************
* PDBLK::move
*
* Reposition block
**********************************************************************/
void PDBLK::move( // reposition block
const ICOORD vec // by vector
) {
ICOORDELT_IT it(&leftside);
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ())
*(it.data ()) += vec;
it.set_to_list (&rightside);
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ())
*(it.data ()) += vec;
box.move (vec);
}
/**********************************************************************
* PDBLK::plot
*
* Plot the outline of a block in the given colour.
**********************************************************************/
#ifndef GRAPHICS_DISABLED
void PDBLK::plot( //draw outline
ScrollView* window, //window to draw in
inT32 serial, //serial number
ScrollView::Color colour //colour to draw in
) {
ICOORD startpt; //start of outline
ICOORD endpt; //end of outline
ICOORD prevpt; //previous point
ICOORDELT_IT it = &leftside; //iterator
//set the colour
window->Pen(colour);
window->TextAttributes("Times", BLOCK_LABEL_HEIGHT, false, false, false);
if (hand_poly != NULL) {
hand_poly->plot(window, serial);
} else if (!leftside.empty ()) {
startpt = *(it.data ()); //bottom left corner
// tprintf("Block %d bottom left is (%d,%d)\n",
// serial,startpt.x(),startpt.y());
char temp_buff[34];
#ifdef __UNIX__
sprintf(temp_buff, INT32FORMAT, serial);
#else
ultoa (serial, temp_buff, 10);
#endif
window->Text(startpt.x (), startpt.y (), temp_buff);
window->SetCursor(startpt.x (), startpt.y ());
do {
prevpt = *(it.data ()); //previous point
it.forward (); //move to next point
//draw round corner
window->DrawTo(prevpt.x (), it.data ()->y ());
window->DrawTo(it.data ()->x (), it.data ()->y ());
}
while (!it.at_last ()); //until end of list
endpt = *(it.data ()); //end point
//other side of boundary
window->SetCursor(startpt.x (), startpt.y ());
it.set_to_list (&rightside);
prevpt = startpt;
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
//draw round corner
window->DrawTo(prevpt.x (), it.data ()->y ());
window->DrawTo(it.data ()->x (), it.data ()->y ());
prevpt = *(it.data ()); //previous point
}
//close boundary
window->DrawTo(endpt.x(), endpt.y());
}
}
#endif
/**********************************************************************
* PDBLK::show
*
* Show the image corresponding to a block as its set of rectangles.
**********************************************************************/
#ifndef GRAPHICS_DISABLED
void PDBLK::show( //show image block
IMAGE *image, //image to show
ScrollView* window //window to show in
) {
BLOCK_RECT_IT it = this; //rectangle iterator
ICOORD bleft, tright; //corners of rectangle
for (it.start_block (); !it.cycled_rects (); it.forward ()) {
//get rectangle
it.bounding_box (bleft, tright);
// tprintf("Drawing a block with a bottom left of (%d,%d)\n",
// bleft.x(),bleft.y());
//show it
sv_show_sub_image (image, bleft.x (), bleft.y (), tright.x () - bleft.x (), tright.y () - bleft.y (), window, bleft.x (), bleft.y ());
}
}
#endif
/**********************************************************************
* PDBLK::operator=
*
* Assignment - duplicate the block structure, but with an EMPTY row list.
**********************************************************************/
PDBLK & PDBLK::operator= ( //assignment
const PDBLK & source //from this
) {
// this->ELIST_LINK::operator=(source);
if (!leftside.empty ())
leftside.clear ();
if (!rightside.empty ())
rightside.clear ();
leftside.deep_copy(&source.leftside, &ICOORDELT::deep_copy);
rightside.deep_copy(&source.rightside, &ICOORDELT::deep_copy);
box = source.box;
return *this;
}
/**********************************************************************
* BLOCK_RECT_IT::BLOCK_RECT_IT
*
* Construct a block rectangle iterator.
**********************************************************************/
BLOCK_RECT_IT::BLOCK_RECT_IT (
//iterate rectangles
PDBLK * blkptr //from block
):left_it (&blkptr->leftside), right_it (&blkptr->rightside) {
block = blkptr; //remember block
//non empty list
if (!blkptr->leftside.empty ()) {
start_block(); //ready for iteration
}
}
/**********************************************************************
* BLOCK_RECT_IT::set_to_block
*
* Start a new block.
**********************************************************************/
void BLOCK_RECT_IT::set_to_block( //start (new) block
PDBLK *blkptr) { //block to start
block = blkptr; //remember block
//set iterators
left_it.set_to_list (&blkptr->leftside);
right_it.set_to_list (&blkptr->rightside);
if (!blkptr->leftside.empty ())
start_block(); //ready for iteration
}
/**********************************************************************
* BLOCK_RECT_IT::start_block
*
* Restart a block.
**********************************************************************/
void BLOCK_RECT_IT::start_block() { //start (new) block
left_it.move_to_first ();
right_it.move_to_first ();
left_it.mark_cycle_pt ();
right_it.mark_cycle_pt ();
ymin = left_it.data ()->y (); //bottom of first box
ymax = left_it.data_relative (1)->y ();
if (right_it.data_relative (1)->y () < ymax)
//smallest step
ymax = right_it.data_relative (1)->y ();
}
/**********************************************************************
* BLOCK_RECT_IT::forward
*
* Move to the next rectangle in the block.
**********************************************************************/
void BLOCK_RECT_IT::forward() { //next rectangle
if (!left_it.empty ()) { //non-empty list
if (left_it.data_relative (1)->y () == ymax)
left_it.forward (); //move to meet top
if (right_it.data_relative (1)->y () == ymax)
right_it.forward ();
//last is special
if (left_it.at_last () || right_it.at_last ()) {
left_it.move_to_first (); //restart
right_it.move_to_first ();
//now at bottom
ymin = left_it.data ()->y ();
}
else {
ymin = ymax; //new bottom
}
//next point
ymax = left_it.data_relative (1)->y ();
if (right_it.data_relative (1)->y () < ymax)
//least step forward
ymax = right_it.data_relative (1)->y ();
}
}
/**********************************************************************
* BLOCK_LINE_IT::get_line
*
* Get the the start and width of a line in the block.
**********************************************************************/
inT16 BLOCK_LINE_IT::get_line( //get a line
inT16 y, //line to get
inT16 &xext //output extent
) {
ICOORD bleft; //bounding box
ICOORD tright; //of block & rect
//get block box
block->bounding_box (bleft, tright);
if (y < bleft.y () || y >= tright.y ()) {
// block->print(stderr,FALSE);
BADBLOCKLINE.error ("BLOCK_LINE_IT::get_line", ABORT, "Y=%d", y);
}
//get rectangle box
rect_it.bounding_box (bleft, tright);
//inside rectangle
if (y >= bleft.y () && y < tright.y ()) {
//width of line
xext = tright.x () - bleft.x ();
return bleft.x (); //start of line
}
for (rect_it.start_block (); !rect_it.cycled_rects (); rect_it.forward ()) {
//get rectangle box
rect_it.bounding_box (bleft, tright);
//inside rectangle
if (y >= bleft.y () && y < tright.y ()) {
//width of line
xext = tright.x () - bleft.x ();
return bleft.x (); //start of line
}
}
LOSTBLOCKLINE.error ("BLOCK_LINE_IT::get_line", ABORT, "Y=%d", y);
return 0; //dummy to stop warning
}

170
ccmain/pdblock.h Normal file
View File

@ -0,0 +1,170 @@
/**********************************************************************
* File: pdblock.h (Formerly pdblk.h)
* Description: Page block class definition.
* Author: Ray Smith
* Created: Thu Mar 14 17:32:01 GMT 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef PDBLOCK_H
#define PDBLOCK_H
#include "img.h"
#include "strngs.h"
#include "polyblk.h"
#include "hpddef.h" //must be last (handpd.dll)
class DLLSYM PDBLK; //forward decl
CLISTIZEH (PDBLK)
class DLLSYM PDBLK //page block
{
friend class BLOCK_RECT_IT; //block iterator
public:
PDBLK() { //empty constructor
hand_poly = NULL;
index_ = 0;
}
PDBLK( //simple constructor
inT16 xmin, //bottom left
inT16 ymin,
inT16 xmax, //top right
inT16 ymax);
void set_sides( //set vertex lists
ICOORDELT_LIST *left, //list of left vertices
ICOORDELT_LIST *right); //list of right vertices
~PDBLK () { //destructor
if (hand_poly) delete hand_poly;
}
POLY_BLOCK *poly_block() {
return hand_poly;
}
void set_poly_block( //set the poly block
POLY_BLOCK *blk) {
hand_poly = blk;
}
void bounding_box( //get box
ICOORD &bottom_left, //bottom left
ICOORD &top_right) const { //topright
bottom_left = box.botleft ();
top_right = box.topright ();
}
//get real box
const TBOX &bounding_box() const {
return box;
}
int index() const {
return index_;
}
void set_index(int value) {
index_ = value;
}
BOOL8 contains( //is pt inside block
ICOORD pt);
void move( // reposition block
const ICOORD vec); // by vector
void plot( //draw histogram
ScrollView* window, //window to draw in
inT32 serial, //serial number
ScrollView::Color colour); //colour to draw in
void show( //show image
IMAGE *image, //image to show
ScrollView* window); //window to show in
PDBLK & operator= ( //assignment
const PDBLK & source); //from this
protected:
POLY_BLOCK *hand_poly; //wierd as well
ICOORDELT_LIST leftside; //left side vertices
ICOORDELT_LIST rightside; //right side vertices
TBOX box; //bounding box
int index_; // Serial number of this block.
};
class DLLSYM BLOCK_RECT_IT //rectangle iterator
{
public:
BLOCK_RECT_IT( //constructor
PDBLK *blkptr); //block to iterate
//start (new) block
NEWDELETE2 (BLOCK_RECT_IT) void set_to_block (
PDBLK * blkptr); //block to iterate
void start_block(); //start iteration
void forward(); //next rectangle
BOOL8 cycled_rects() { //test end
return left_it.cycled_list () && right_it.cycled_list ();
}
void bounding_box( //current rectangle
ICOORD &bleft, //bottom left
ICOORD &tright) { //top right
//bottom left
bleft = ICOORD (left_it.data ()->x (), ymin);
//top right
tright = ICOORD (right_it.data ()->x (), ymax);
}
private:
inT16 ymin; //bottom of rectangle
inT16 ymax; //top of rectangle
PDBLK *block; //block to iterate
ICOORDELT_IT left_it; //boundary iterators
ICOORDELT_IT right_it;
};
class DLLSYM BLOCK_LINE_IT //rectangle iterator
{
public:
BLOCK_LINE_IT ( //constructor
PDBLK * blkptr) //from block
:rect_it (blkptr) {
block = blkptr; //remember block
}
//start (new) block
NEWDELETE2 (BLOCK_LINE_IT) void set_to_block (
PDBLK * blkptr) { //block to start
block = blkptr; //remember block
//set iterator
rect_it.set_to_block (blkptr);
}
inT16 get_line( //get a line
inT16 y, //line to get
inT16 &xext); //output extent
private:
PDBLK * block; //block to iterate
BLOCK_RECT_IT rect_it; //rectangle iterator
};
int decreasing_top_order( //
const void *row1,
const void *row2);
#endif

115
ccmain/points.cpp Normal file
View File

@ -0,0 +1,115 @@
/**********************************************************************
* File: points.c (Formerly coords.c)
* Description: Member functions for coordinate classes.
* Author: Ray Smith
* Created: Fri Mar 15 08:58:17 GMT 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h" //precompiled headers
#include <stdlib.h>
#include "ndminx.h"
#include "serialis.h"
#include "points.h"
ELISTIZE_S (ICOORDELT) //turn to list
bool FCOORD::normalise() { //Convert to unit vec
float len = length ();
if (len < 0.0000000001) {
return false;
}
xcoord /= len;
ycoord /= len;
return true;
}
// Set from the given x,y, shrinking the vector to fit if needed.
void ICOORD::set_with_shrink(int x, int y) {
// Fit the vector into an ICOORD, which is 16 bit.
int factor = 1;
int max_extent = MAX(abs(x), abs(y));
if (max_extent > MAX_INT16)
factor = max_extent / MAX_INT16 + 1;
xcoord = x / factor;
ycoord = y / factor;
}
// The fortran/basic sgn function returns -1, 0, 1 if x < 0, x == 0, x > 0
// respectively.
static int sign(int x) {
if (x < 0)
return -1;
else
return x > 0 ? 1 : 0;
}
// Setup for iterating over the pixels in a vector by the well-known
// Bresenham rendering algorithm.
// Starting with major/2 in the accumulator, on each step add major_step,
// and then add minor to the accumulator. When the accumulator >= major
// subtract major and step a minor step.
void ICOORD::setup_render(ICOORD* major_step, ICOORD* minor_step,
int* major, int* minor) const {
int abs_x = abs(xcoord);
int abs_y = abs(ycoord);
if (abs_x >= abs_y) {
// X-direction is major.
major_step->xcoord = sign(xcoord);
major_step->ycoord = 0;
minor_step->xcoord = 0;
minor_step->ycoord = sign(ycoord);
*major = abs_x;
*minor = abs_y;
} else {
// Y-direction is major.
major_step->xcoord = 0;
major_step->ycoord = sign(ycoord);
minor_step->xcoord = sign(xcoord);
minor_step->ycoord = 0;
*major = abs_y;
*minor = abs_x;
}
}
void ICOORD::serialise_asc( //convert to ascii
FILE *f //file to write
) {
serialise_INT32(f, xcoord);
serialise_INT32(f, ycoord);
}
void ICOORD::de_serialise_asc( //convert from ascii
FILE *f //file to write
) {
xcoord = (inT16) de_serialise_INT32 (f);
ycoord = (inT16) de_serialise_INT32 (f);
}
void ICOORDELT::serialise_asc( //convert to ascii
FILE *f //file to write
) {
((ICOORD *) this)->serialise_asc (f);
}
void ICOORDELT::de_serialise_asc( //convert from ascii
FILE *f //file to write
) {
((ICOORD *) this)->de_serialise_asc (f);
}

302
ccmain/points.h Normal file
View File

@ -0,0 +1,302 @@
/**********************************************************************
* File: points.h (Formerly coords.h)
* Description: Coordinate class definitions.
* Author: Ray Smith
* Created: Fri Mar 15 08:32:45 GMT 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef POINTS_H
#define POINTS_H
#include <stdio.h>
#include <math.h>
#include "elst.h"
//#include "ipeerr.h"
class FCOORD;
class DLLSYM ICOORD //integer coordinate
{
friend class FCOORD;
public:
ICOORD() { //empty constructor
xcoord = ycoord = 0; //default zero
}
ICOORD( //constructor
inT16 xin, //x value
inT16 yin) { //y value
xcoord = xin;
ycoord = yin;
}
~ICOORD () { //destructor
}
//access function
NEWDELETE2 (ICOORD) inT16 x () const
{
return xcoord;
}
inT16 y() const { //access_function
return ycoord;
}
void set_x( //rewrite function
inT16 xin) {
xcoord = xin; //write new value
}
void set_y( //rewrite function
inT16 yin) { //value to set
ycoord = yin;
}
// Set from the given x,y, shrinking the vector to fit if needed.
void set_with_shrink(int x, int y);
float sqlength() const { //find sq length
return (float) (xcoord * xcoord + ycoord * ycoord);
}
float length() const { //find length
return (float) sqrt (sqlength ());
}
float pt_to_pt_sqdist( //sq dist between pts
const ICOORD &pt) const {
ICOORD gap;
gap.xcoord = xcoord - pt.xcoord;
gap.ycoord = ycoord - pt.ycoord;
return gap.sqlength ();
}
float pt_to_pt_dist( //Distance between pts
const ICOORD &pt) const {
return (float) sqrt (pt_to_pt_sqdist (pt));
}
float angle() const { //find angle
return (float) atan2 ((double) ycoord, (double) xcoord);
}
BOOL8 operator== ( //test equality
const ICOORD & other) {
return xcoord == other.xcoord && ycoord == other.ycoord;
}
BOOL8 operator!= ( //test inequality
const ICOORD & other) {
return xcoord != other.xcoord || ycoord != other.ycoord;
}
friend ICOORD operator! ( //rotate 90 deg anti
const ICOORD &);
friend ICOORD operator- ( //unary minus
const ICOORD &);
friend ICOORD operator+ ( //add
const ICOORD &, const ICOORD &);
friend ICOORD & operator+= ( //add
ICOORD &, const ICOORD &);
friend ICOORD operator- ( //subtract
const ICOORD &, const ICOORD &);
friend ICOORD & operator-= ( //subtract
ICOORD &, const ICOORD &);
friend inT32 operator% ( //scalar product
const ICOORD &, const ICOORD &);
friend inT32 operator *( //cross product
const ICOORD &,
const ICOORD &);
friend ICOORD operator *( //multiply
const ICOORD &,
inT16);
friend ICOORD operator *( //multiply
inT16,
const ICOORD &);
friend ICOORD & operator*= ( //multiply
ICOORD &, inT16);
friend ICOORD operator/ ( //divide
const ICOORD &, inT16);
//divide
friend ICOORD & operator/= (ICOORD &, inT16);
void rotate( //rotate
const FCOORD& vec); //by vector
// Setup for iterating over the pixels in a vector by the well-known
// Bresenham rendering algorithm.
// Starting with major/2 in the accumulator, on each step move by
// major_step, and then add minor to the accumulator. When
// accumulator >= major subtract major and also move by minor_step.
void setup_render(ICOORD* major_step, ICOORD* minor_step,
int* major, int* minor) const;
void serialise_asc( //serialise to ascii
FILE *f);
void de_serialise_asc( //serialise from ascii
FILE *f);
protected:
inT16 xcoord; //x value
inT16 ycoord; //y value
};
class DLLSYM ICOORDELT:public ELIST_LINK, public ICOORD
//embedded coord list
{
public:
ICOORDELT() { //empty constructor
}
ICOORDELT ( //constructor
//from ICOORD
ICOORD icoord):ICOORD (icoord) {
}
ICOORDELT( //constructor
inT16 xin, //x value
inT16 yin) { //y value
xcoord = xin;
ycoord = yin;
}
/* Note that prep_serialise() dump() and de_dump() dont need to do anything
more than terminate recursion. */
void prep_serialise() const { //set ptrs to counts
}
void dump( //write external bits
FILE *) const {
}
void de_dump( //read external bits
FILE *) {
}
//serialise to ascii
make_serialise(ICOORDELT)
static ICOORDELT* deep_copy(const ICOORDELT* src) {
ICOORDELT* elt = new ICOORDELT;
*elt = *src;
return elt;
}
void serialise_asc(FILE * f);
void de_serialise_asc( //serialise from ascii
FILE *f);
};
ELISTIZEH_S (ICOORDELT)
class DLLSYM FCOORD
{
public:
FCOORD() {
} //empty constructor
FCOORD( //constructor
float xvalue, //coords to set
float yvalue) {
xcoord = xvalue; //set coords
ycoord = yvalue;
}
FCOORD( //make from ICOORD
ICOORD icoord) { //coords to set
xcoord = icoord.xcoord;
ycoord = icoord.ycoord;
}
float x() const { //get coords
return xcoord;
}
float y() const {
return ycoord;
}
void set_x( //rewrite function
float xin) {
xcoord = xin; //write new value
}
void set_y( //rewrite function
float yin) { //value to set
ycoord = yin;
}
float sqlength() const { //find sq length
return xcoord * xcoord + ycoord * ycoord;
}
float length() const { //find length
return (float) sqrt (sqlength ());
}
float pt_to_pt_sqdist( //sq dist between pts
const FCOORD &pt) const {
FCOORD gap;
gap.xcoord = xcoord - pt.xcoord;
gap.ycoord = ycoord - pt.ycoord;
return gap.sqlength ();
}
float pt_to_pt_dist( //Distance between pts
const FCOORD &pt) const {
return (float) sqrt (pt_to_pt_sqdist (pt));
}
float angle() const { //find angle
return (float) atan2 (ycoord, xcoord);
}
bool normalise(); //Convert to unit vec
BOOL8 operator== ( //test equality
const FCOORD & other) {
return xcoord == other.xcoord && ycoord == other.ycoord;
}
BOOL8 operator!= ( //test inequality
const FCOORD & other) {
return xcoord != other.xcoord || ycoord != other.ycoord;
}
//rotate 90 deg anti
friend FCOORD operator! (const FCOORD &);
//unary minus
friend FCOORD operator- (const FCOORD &);
//add
friend FCOORD operator+ (const FCOORD &, const FCOORD &);
//add
friend FCOORD & operator+= (FCOORD &, const FCOORD &);
//subtract
friend FCOORD operator- (const FCOORD &, const FCOORD &);
//subtract
friend FCOORD & operator-= (FCOORD &, const FCOORD &);
//scalar product
friend float operator% (const FCOORD &, const FCOORD &);
//cross product
friend float operator *(const FCOORD &, const FCOORD &);
friend FCOORD operator *(const FCOORD &, float);
//multiply
friend FCOORD operator *(float, const FCOORD &);
//multiply
//multiply
friend FCOORD & operator*= (FCOORD &, float);
friend FCOORD operator/ (const FCOORD &, float);
//divide
void rotate( //rotate
const FCOORD vec); //by vector
//divide
friend FCOORD & operator/= (FCOORD &, float);
private:
float xcoord; //2 floating coords
float ycoord;
};
#include "ipoints.h" /*do inline funcs */
#endif

588
ccmain/polyaprx.cpp Normal file
View File

@ -0,0 +1,588 @@
/**********************************************************************
* File: polyaprx.cpp (Formerly polygon.c)
* Description: Code for polygonal approximation from old edgeprog.
* Author: Ray Smith
* Created: Thu Nov 25 11:42:04 GMT 1993
*
* (C) Copyright 1993, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include <stdio.h>
#ifdef __UNIX__
#include <assert.h>
#endif
#define FASTEDGELENGTH 256
#include "polyaprx.h"
#include "varable.h"
#include "tprintf.h"
#define EXTERN
EXTERN BOOL_VAR (poly_debug, FALSE, "Debug old poly");
EXTERN BOOL_VAR (poly_wide_objects_better, TRUE,
"More accurate approx on wide things");
static int par1, par2;
#define CONVEX 1 /*OUTLINE point is convex */
#define CONCAVE 2 /*used and set only in edges */
#define FIXED 4 /*OUTLINE point is fixed */
#define ONHULL 8 /*on convex hull */
#define RUNLENGTH 1 /*length of run */
#define DIR 2 /*direction of run */
#define CORRECTION 3 /*correction of run */
//#define MAXSHORT 32767 /*max value of short*/
#define FLAGS 0
#define fixed_dist 20 //really an int_variable
#define approx_dist 15 //really an int_variable
#define point_diff(p,p1,p2) (p).x = (p1).x - (p2).x ; (p).y = (p1).y - (p2).y
#define CROSS(a,b) ((a).x * (b).y - (a).y * (b).x)
#define LENGTH(a) ((a).x * (a).x + (a).y * (a).y)
#define DISTANCE(a,b) (((b).x-(a).x) * ((b).x-(a).x) \
+ ((b).y-(a).y) * ((b).y-(a).y))
/**********************************************************************
* tesspoly_outline
*
* Approximate an outline from c form using the old tess algorithm.
**********************************************************************/
OUTLINE *tesspoly_outline( //old approximation
C_OUTLINE *c_outline, //input
float //xheight
) {
EDGEPT *edgept; //converted steps
EDGEPT *startpt; //start of outline
TBOX loop_box; //bounding box
inT32 area; //loop area
FCOORD pos; //vertex
FCOORD vec; //vector
POLYPT_LIST polypts; //output polygon
POLYPT *polypt; //converted point
POLYPT_IT poly_it = &polypts; //iterator
EDGEPT stack_edgepts[FASTEDGELENGTH]; // converted path
EDGEPT* edgepts = stack_edgepts;
// Use heap memory if the stack buffer is not big enough.
if (c_outline->pathlength() > FASTEDGELENGTH)
edgepts = new EDGEPT[c_outline->pathlength()];
loop_box = c_outline->bounding_box ();
area = loop_box.height ();
if (!poly_wide_objects_better && loop_box.width () > area)
area = loop_box.width ();
area *= area;
edgept = edgesteps_to_edgepts (c_outline, edgepts);
fix2(edgepts, area);
edgept = poly2 (edgepts, area);/*2nd approximation */
startpt = edgept;
do {
pos = FCOORD (edgept->pos.x, edgept->pos.y);
vec = FCOORD (edgept->vec.x, edgept->vec.y);
polypt = new POLYPT (pos, vec);
//add to list
poly_it.add_after_then_move (polypt);
edgept = edgept->next;
}
while (edgept != startpt);
if (edgepts != stack_edgepts)
delete [] edgepts;
if (poly_it.length() <= 2)
return NULL;
else
return new OUTLINE(&poly_it);
}
/**********************************************************************
* edgesteps_to_edgepts
*
* Convert a C_OUTLINE to EDGEPTs.
**********************************************************************/
EDGEPT *
edgesteps_to_edgepts ( //convert outline
C_OUTLINE * c_outline, //input
EDGEPT edgepts[] //output is array
) {
inT32 length; //steps in path
ICOORD pos; //current coords
inT32 stepindex; //current step
inT32 stepinc; //increment
inT32 epindex; //current EDGEPT
inT32 count; //repeated steps
ICOORD vec; //for this 8 step
ICOORD prev_vec;
inT8 epdir; //of this step
DIR128 prevdir; //prvious dir
DIR128 dir; //of this step
pos = c_outline->start_pos (); //start of loop
length = c_outline->pathlength ();
stepindex = 0;
epindex = 0;
prevdir = -1;
count = 0;
do {
dir = c_outline->step_dir (stepindex);
vec = c_outline->step (stepindex);
if (stepindex < length - 1
&& c_outline->step_dir (stepindex + 1) - dir == -32) {
dir += 128 - 16;
vec += c_outline->step (stepindex + 1);
stepinc = 2;
}
else
stepinc = 1;
if (count == 0) {
prevdir = dir;
prev_vec = vec;
}
if (prevdir.get_dir () != dir.get_dir ()) {
edgepts[epindex].pos.x = pos.x ();
edgepts[epindex].pos.y = pos.y ();
prev_vec *= count;
edgepts[epindex].vec.x = prev_vec.x ();
edgepts[epindex].vec.y = prev_vec.y ();
pos += prev_vec;
edgepts[epindex].flags[RUNLENGTH] = count;
edgepts[epindex].prev = &edgepts[epindex - 1];
edgepts[epindex].flags[FLAGS] = 0;
edgepts[epindex].next = &edgepts[epindex + 1];
prevdir += 64;
epdir = (DIR128) 0 - prevdir;
epdir >>= 4;
epdir &= 7;
edgepts[epindex].flags[DIR] = epdir;
epindex++;
prevdir = dir;
prev_vec = vec;
count = 1;
}
else
count++;
stepindex += stepinc;
}
while (stepindex < length);
edgepts[epindex].pos.x = pos.x ();
edgepts[epindex].pos.y = pos.y ();
prev_vec *= count;
edgepts[epindex].vec.x = prev_vec.x ();
edgepts[epindex].vec.y = prev_vec.y ();
pos += prev_vec;
edgepts[epindex].flags[RUNLENGTH] = count;
edgepts[epindex].flags[FLAGS] = 0;
edgepts[epindex].prev = &edgepts[epindex - 1];
edgepts[epindex].next = &edgepts[0];
prevdir += 64;
epdir = (DIR128) 0 - prevdir;
epdir >>= 4;
epdir &= 7;
edgepts[epindex].flags[DIR] = epdir;
edgepts[0].prev = &edgepts[epindex];
ASSERT_HOST (pos.x () == c_outline->start_pos ().x ()
&& pos.y () == c_outline->start_pos ().y ());
return &edgepts[0];
}
/**********************************************************************
*fix2(start,area) fixes points on the outline according to a trial method*
**********************************************************************/
//#pragma OPT_LEVEL 1 /*stop compiler bugs*/
void fix2( //polygonal approx
EDGEPT *start, /*loop to approimate */
int area) {
register EDGEPT *edgept; /*current point */
register EDGEPT *edgept1;
register EDGEPT *loopstart; /*modified start of loop */
register EDGEPT *linestart; /*start of line segment */
register int dir1, dir2; /*directions of line */
register int sum1, sum2; /*lengths in dir1,dir2 */
int stopped; /*completed flag */
int fixed_count; //no of fixed points
int d01, d12, d23, gapmin;
TPOINT d01vec, d12vec, d23vec;
register EDGEPT *edgefix, *startfix;
register EDGEPT *edgefix0, *edgefix1, *edgefix2, *edgefix3;
edgept = start; /*start of loop */
while (((edgept->flags[DIR] - edgept->prev->flags[DIR] + 1) & 7) < 3
&& (dir1 =
(edgept->prev->flags[DIR] - edgept->next->flags[DIR]) & 7) != 2
&& dir1 != 6)
edgept = edgept->next; /*find suitable start */
loopstart = edgept; /*remember start */
stopped = 0; /*not finished yet */
edgept->flags[FLAGS] |= FIXED; /*fix it */
do {
linestart = edgept; /*possible start of line */
dir1 = edgept->flags[DIR]; /*first direction */
/*length of dir1 */
sum1 = edgept->flags[RUNLENGTH];
edgept = edgept->next;
dir2 = edgept->flags[DIR]; /*2nd direction */
/*length in dir2 */
sum2 = edgept->flags[RUNLENGTH];
if (((dir1 - dir2 + 1) & 7) < 3) {
while (edgept->prev->flags[DIR] == edgept->next->flags[DIR]) {
edgept = edgept->next; /*look at next */
if (edgept->flags[DIR] == dir1)
/*sum lengths */
sum1 += edgept->flags[RUNLENGTH];
else
sum2 += edgept->flags[RUNLENGTH];
}
if (edgept == loopstart)
stopped = 1; /*finished */
if (sum2 + sum1 > 2
&& linestart->prev->flags[DIR] == dir2
&& (linestart->prev->flags[RUNLENGTH] >
linestart->flags[RUNLENGTH] || sum2 > sum1)) {
/*start is back one */
linestart = linestart->prev;
linestart->flags[FLAGS] |= FIXED;
}
if (((edgept->next->flags[DIR] - edgept->flags[DIR] + 1) & 7) >= 3
|| (edgept->flags[DIR] == dir1 && sum1 >= sum2)
|| ((edgept->prev->flags[RUNLENGTH] < edgept->flags[RUNLENGTH]
|| (edgept->flags[DIR] == dir2 && sum2 >= sum1))
&& linestart->next != edgept))
edgept = edgept->next;
}
/*sharp bend */
edgept->flags[FLAGS] |= FIXED;
}
/*do whole loop */
while (edgept != loopstart && !stopped);
edgept = start;
do {
if (((edgept->flags[RUNLENGTH] >= 8) &&
(edgept->flags[DIR] != 2) && (edgept->flags[DIR] != 6)) ||
((edgept->flags[RUNLENGTH] >= 8) &&
((edgept->flags[DIR] == 2) || (edgept->flags[DIR] == 6)))) {
edgept->flags[FLAGS] |= FIXED;
edgept1 = edgept->next;
edgept1->flags[FLAGS] |= FIXED;
}
edgept = edgept->next;
}
while (edgept != start);
edgept = start;
do {
/*single fixed step */
if (edgept->flags[FLAGS] & FIXED && edgept->flags[RUNLENGTH] == 1
/*and neighours free */
&& edgept->next->flags[FLAGS] & FIXED && (edgept->prev->flags[FLAGS] & FIXED) == 0
/*same pair of dirs */
&& (edgept->next->next->flags[FLAGS] & FIXED) == 0 && edgept->prev->flags[DIR] == edgept->next->flags[DIR] && edgept->prev->prev->flags[DIR] == edgept->next->next->flags[DIR]
&& ((edgept->prev->flags[DIR] - edgept->flags[DIR] + 1) & 7) < 3) {
/*unfix it */
edgept->flags[FLAGS] &= ~FIXED;
edgept->next->flags[FLAGS] &= ~FIXED;
}
edgept = edgept->next; /*do all points */
}
while (edgept != start); /*until finished */
stopped = 0;
if (area < 450)
area = 450;
gapmin = area * fixed_dist * fixed_dist / 44000;
edgept = start;
fixed_count = 0;
do {
if (edgept->flags[FLAGS] & FIXED)
fixed_count++;
edgept = edgept->next;
}
while (edgept != start);
while ((edgept->flags[FLAGS] & FIXED) == 0)
edgept = edgept->next;
edgefix0 = edgept;
edgept = edgept->next;
while ((edgept->flags[FLAGS] & FIXED) == 0)
edgept = edgept->next;
edgefix1 = edgept;
edgept = edgept->next;
while ((edgept->flags[FLAGS] & FIXED) == 0)
edgept = edgept->next;
edgefix2 = edgept;
edgept = edgept->next;
while ((edgept->flags[FLAGS] & FIXED) == 0)
edgept = edgept->next;
edgefix3 = edgept;
startfix = edgefix2;
do {
if (fixed_count <= 3)
break; //already too few
point_diff (d12vec, edgefix1->pos, edgefix2->pos);
d12 = LENGTH (d12vec);
if (d12 <= gapmin) {
point_diff (d01vec, edgefix0->pos, edgefix1->pos);
d01 = LENGTH (d01vec);
point_diff (d23vec, edgefix2->pos, edgefix3->pos);
d23 = LENGTH (d23vec);
if (d01 > d23) {
edgefix2->flags[FLAGS] &= ~FIXED;
fixed_count--;
/* if ( plots[EDGE] & PATHS )
mark(edgefd,edgefix2->pos.x,edgefix2->pos.y,PLUS);
*/
}
else {
edgefix1->flags[FLAGS] &= ~FIXED;
fixed_count--;
/* if ( plots[EDGE] & PATHS )
mark(edgefd,edgefix1->pos.x,edgefix1->pos.y,PLUS);
*/
edgefix1 = edgefix2;
}
}
else {
edgefix0 = edgefix1;
edgefix1 = edgefix2;
}
edgefix2 = edgefix3;
edgept = edgept->next;
while ((edgept->flags[FLAGS] & FIXED) == 0) {
if (edgept == startfix)
stopped = 1;
edgept = edgept->next;
}
edgefix3 = edgept;
edgefix = edgefix2;
}
while ((edgefix != startfix) && (!stopped));
}
//#pragma OPT_LEVEL 2 /*stop compiler bugs*/
/**********************************************************************
*poly2(startpt,area,path) applies a second approximation to the outline
*using the points which have been fixed by the first approximation*
**********************************************************************/
EDGEPT *poly2( //second poly
EDGEPT *startpt, /*start of loop */
int area /*area of blob box */
) {
register EDGEPT *edgept; /*current outline point */
EDGEPT *loopstart; /*starting point */
register EDGEPT *linestart; /*start of line */
register int edgesum; /*correction count */
if (area < 1200)
area = 1200; /*minimum value */
/*1200(4) */
par1 = 4500 / (approx_dist * approx_dist);
/*1200(6) */
par2 = 6750 / (approx_dist * approx_dist);
loopstart = NULL; /*not found it yet */
edgept = startpt; /*start of loop */
do {
/*current point fixed */
if (edgept->flags[FLAGS] & FIXED
/*and next not */
&& (edgept->next->flags[FLAGS] & FIXED) == 0) {
loopstart = edgept; /*start of repoly */
break;
}
edgept = edgept->next; /*next point */
}
while (edgept != startpt); /*until found or finished */
if (loopstart == NULL && (startpt->flags[FLAGS] & FIXED) == 0) {
/*fixed start of loop */
startpt->flags[FLAGS] |= FIXED;
loopstart = startpt; /*or start of loop */
}
if (loopstart) {
do {
edgept = loopstart; /*first to do */
do {
linestart = edgept;
edgesum = 0; /*sum of lengths */
do {
/*sum lengths */
edgesum += edgept->flags[RUNLENGTH];
edgept = edgept->next; /*move on */
}
while ((edgept->flags[FLAGS] & FIXED) == 0
&& edgept != loopstart && edgesum < 126);
if (poly_debug)
tprintf
("Poly2:starting at (%d,%d)+%d=(%d,%d),%d to (%d,%d)\n",
linestart->pos.x, linestart->pos.y, linestart->flags[DIR],
linestart->vec.x, linestart->vec.y, edgesum, edgept->pos.x,
edgept->pos.y);
/*reapproximate */
cutline(linestart, edgept, area);
while ((edgept->next->flags[FLAGS] & FIXED)
&& edgept != loopstart)
edgept = edgept->next; /*look for next non-fixed */
}
/*do all the loop */
while (edgept != loopstart);
edgesum = 0;
do {
if (edgept->flags[FLAGS] & FIXED)
edgesum++;
edgept = edgept->next;
}
//count fixed pts
while (edgept != loopstart);
if (edgesum < 3)
area /= 2; //must have 3 pts
}
while (edgesum < 3);
do {
linestart = edgept;
do {
edgept = edgept->next;
}
while ((edgept->flags[FLAGS] & FIXED) == 0);
linestart->next = edgept;
edgept->prev = linestart;
linestart->vec.x = edgept->pos.x - linestart->pos.x;
linestart->vec.y = edgept->pos.y - linestart->pos.y;
}
while (edgept != loopstart);
}
else
edgept = startpt; /*start of loop */
loopstart = edgept; /*new start */
return loopstart; /*correct exit */
}
/**********************************************************************
*cutline(first,last,area) straightens out a line by partitioning
*and joining the ends by a straight line*
**********************************************************************/
void cutline( //recursive refine
EDGEPT *first, /*ends of line */
EDGEPT *last,
int area /*area of object */
) {
register EDGEPT *edge; /*current edge */
TPOINT vecsum; /*vector sum */
int vlen; /*approx length of vecsum */
TPOINT vec; /*accumulated vector */
EDGEPT *maxpoint; /*worst point */
int maxperp; /*max deviation */
register int perp; /*perp distance */
int ptcount; /*no of points */
int squaresum; /*sum of perps */
edge = first; /*start of line */
if (edge->next == last)
return; /*simple line */
/*vector sum */
vecsum.x = last->pos.x - edge->pos.x;
vecsum.y = last->pos.y - edge->pos.y;
if (vecsum.x == 0 && vecsum.y == 0) {
/*special case */
vecsum.x = -edge->prev->vec.x;
vecsum.y = -edge->prev->vec.y;
}
/*absolute value */
vlen = vecsum.x > 0 ? vecsum.x : -vecsum.x;
if (vecsum.y > vlen)
vlen = vecsum.y; /*maximum */
else if (-vecsum.y > vlen)
vlen = -vecsum.y; /*absolute value */
vec.x = edge->vec.x; /*accumulated vector */
vec.y = edge->vec.y;
maxperp = 0; /*none yet */
squaresum = ptcount = 0;
edge = edge->next; /*move to actual point */
maxpoint = edge; /*in case there isn't one */
do {
perp = CROSS (vec, vecsum); /*get perp distance */
if (perp != 0) {
perp *= perp; /*squared deviation */
}
squaresum += perp; /*sum squares */
ptcount++; /*count points */
if (poly_debug)
tprintf ("Cutline:Final perp=%d\n", perp);
if (perp > maxperp) {
maxperp = perp;
maxpoint = edge; /*find greatest deviation */
}
vec.x += edge->vec.x; /*accumulate vectors */
vec.y += edge->vec.y;
edge = edge->next;
}
while (edge != last); /*test all line */
perp = LENGTH (vecsum);
ASSERT_HOST (perp != 0);
if (maxperp < 256 * MAX_INT16) {
maxperp <<= 8;
maxperp /= perp; /*true max perp */
}
else {
maxperp /= perp;
maxperp <<= 8; /*avoid overflow */
}
if (squaresum < 256 * MAX_INT16)
/*mean squared perp */
perp = (squaresum << 8) / (perp * ptcount);
else
/*avoid overflow */
perp = (squaresum / perp << 8) / ptcount;
if (poly_debug)
tprintf ("Cutline:A=%d, max=%.2f(%.2f%%), msd=%.2f(%.2f%%)\n",
area, maxperp / 256.0, maxperp * 200.0 / area,
perp / 256.0, perp * 300.0 / area);
if (maxperp * par1 >= 10 * area || perp * par2 >= 10 * area || vlen >= 126) {
maxpoint->flags[FLAGS] |= FIXED;
/*partitions */
cutline(first, maxpoint, area);
cutline(maxpoint, last, area);
}
}

51
ccmain/polyaprx.h Normal file
View File

@ -0,0 +1,51 @@
/**********************************************************************
* File: polyaprx.h (Formerly polygon.h)
* Description: Code for polygonal approximation from old edgeprog.
* Author: Ray Smith
* Created: Thu Nov 25 11:42:04 GMT 1993
*
* (C) Copyright 1993, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef POLYAPRX_H
#define POLYAPRX_H
#include "tessclas.h"
#include "poutline.h"
#include "coutln.h"
OUTLINE *tesspoly_outline( //old approximation
C_OUTLINE *c_outline, //input
float //xheight
);
EDGEPT *edgesteps_to_edgepts ( //convert outline
C_OUTLINE * c_outline, //input
EDGEPT edgepts[] //output is array
);
void fix2( //polygonal approx
EDGEPT *start, /*loop to approimate */
int area);
EDGEPT *poly2( //second poly
EDGEPT *startpt, /*start of loop */
int area /*area of blob box */
);
void cutline( //recursive refine
EDGEPT *first, /*ends of line */
EDGEPT *last,
int area /*area of object */
);
#define fixed_dist 20 //really an int_variable
#define point_diff(p,p1,p2) (p).x = (p1).x - (p2).x ; (p).y = (p1).y - (p2).y
#define CROSS(a,b) ((a).x * (b).y - (a).y * (b).x)
#define LENGTH(a) ((a).x * (a).x + (a).y * (a).y)
#endif

416
ccmain/polyblk.cpp Normal file
View File

@ -0,0 +1,416 @@
/**********************************************************************
* File: polyblk.c (Formerly poly_block.c)
* Description: Polygonal blocks
* Author: Sheelagh Lloyd?
* Created:
*
* (C) Copyright 1993, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include <ctype.h>
#include <math.h>
#include <stdio.h>
#include "elst.h"
#include "polyblk.h"
#include "hpddef.h" // must be last (handpd.dll)
#define PBLOCK_LABEL_SIZE 150
#define INTERSECTING MAX_INT16
int lessthan(const void *first, const void *second);
POLY_BLOCK::POLY_BLOCK(ICOORDELT_LIST *points, PolyBlockType t) {
ICOORDELT_IT v = &vertices;
vertices.clear();
v.move_to_first();
v.add_list_before(points);
compute_bb();
type = t;
}
/**********************************************************************
* POLY_BLOCK::compute_bb
*
* Compute the bounding box from the outline points.
**********************************************************************/
void POLY_BLOCK::compute_bb() { //constructor
ICOORD ibl, itr; //integer bb
ICOORD botleft; //bounding box
ICOORD topright;
ICOORD pos; //current pos;
ICOORDELT_IT pts = &vertices; //iterator
botleft = *pts.data ();
topright = botleft;
do {
pos = *pts.data ();
if (pos.x () < botleft.x ())
//get bounding box
botleft = ICOORD (pos.x (), botleft.y ());
if (pos.y () < botleft.y ())
botleft = ICOORD (botleft.x (), pos.y ());
if (pos.x () > topright.x ())
topright = ICOORD (pos.x (), topright.y ());
if (pos.y () > topright.y ())
topright = ICOORD (topright.x (), pos.y ());
pts.forward ();
}
while (!pts.at_first ());
ibl = ICOORD (botleft.x (), botleft.y ());
itr = ICOORD (topright.x (), topright.y ());
box = TBOX (ibl, itr);
}
/**********************************************************************
* POLY_BLOCK::winding_number
*
* Return the winding number of the outline around the given point.
**********************************************************************/
inT16 POLY_BLOCK::winding_number( //winding number
const ICOORD &point //point to wind around
) {
inT16 count; //winding count
ICOORD pt; //current point
ICOORD vec; //point to current point
ICOORD vvec; //current point to next point
inT32 cross; //cross product
ICOORDELT_IT it = &vertices; //iterator
count = 0;
do {
pt = *it.data ();
vec = pt - point;
vvec = *it.data_relative (1) - pt;
//crossing the line
if (vec.y () <= 0 && vec.y () + vvec.y () > 0) {
cross = vec * vvec; //cross product
if (cross > 0)
count++; //crossing right half
else if (cross == 0)
return INTERSECTING; //going through point
}
else if (vec.y () > 0 && vec.y () + vvec.y () <= 0) {
cross = vec * vvec;
if (cross < 0)
count--; //crossing back
else if (cross == 0)
return INTERSECTING; //illegal
}
else if (vec.y () == 0 && vec.x () == 0)
return INTERSECTING;
it.forward ();
}
while (!it.at_first ());
return count; //winding number
}
// Returns true if other is inside this.
bool POLY_BLOCK::contains(POLY_BLOCK *other) {
inT16 count; // winding count
ICOORDELT_IT it = &vertices; // iterator
ICOORD vertex;
if (!box.overlap (*(other->bounding_box ())))
return false; // can't be contained
/* check that no vertex of this is inside other */
do {
vertex = *it.data ();
// get winding number
count = other->winding_number (vertex);
if (count != INTERSECTING)
if (count != 0)
return false;
it.forward ();
}
while (!it.at_first ());
/* check that all vertices of other are inside this */
//switch lists
it.set_to_list (other->points ());
do {
vertex = *it.data ();
//try other way round
count = winding_number (vertex);
if (count != INTERSECTING)
if (count == 0)
return false;
it.forward ();
}
while (!it.at_first ());
return true;
}
/**********************************************************************
* POLY_BLOCK::rotate
*
* Rotate the POLY_BLOCK.
**********************************************************************/
void POLY_BLOCK::rotate( //constructor
FCOORD rotation //cos,sin of angle
) {
FCOORD pos; //current pos;
ICOORDELT *pt; //current point
ICOORDELT_IT pts = &vertices; //iterator
do {
pt = pts.data ();
pos.set_x (pt->x ());
pos.set_y (pt->y ());
pos.rotate (rotation);
pt->set_x ((inT16) (floor (pos.x () + 0.5)));
pt->set_y ((inT16) (floor (pos.y () + 0.5)));
pts.forward ();
}
while (!pts.at_first ());
compute_bb();
}
/**********************************************************************
* POLY_BLOCK::move
*
* Move the POLY_BLOCK.
**********************************************************************/
void POLY_BLOCK::move( //constructor
ICOORD shift //cos,sin of angle
) {
ICOORDELT *pt; //current point
ICOORDELT_IT pts = &vertices; //iterator
do {
pt = pts.data ();
*pt += shift;
pts.forward ();
}
while (!pts.at_first ());
compute_bb();
}
#ifndef GRAPHICS_DISABLED
void POLY_BLOCK::plot(ScrollView* window, inT32 num) {
ICOORDELT_IT v = &vertices;
window->Pen(ColorForPolyBlockType(type));
v.move_to_first ();
if (num > 0) {
window->TextAttributes("Times", 80, false, false, false);
char temp_buff[34];
#ifdef __UNIX__
sprintf(temp_buff, INT32FORMAT, num);
#else
ltoa (num, temp_buff, 10);
#endif
window->Text(v.data ()->x (), v.data ()->y (), temp_buff);
}
window->SetCursor(v.data ()->x (), v.data ()->y ());
for (v.mark_cycle_pt (); !v.cycled_list (); v.forward ()) {
window->DrawTo(v.data ()->x (), v.data ()->y ());
}
v.move_to_first ();
window->DrawTo(v.data ()->x (), v.data ()->y ());
}
void POLY_BLOCK::fill(ScrollView* window, ScrollView::Color colour) {
inT16 y;
inT16 width;
PB_LINE_IT *lines;
ICOORDELT_LIST *segments;
ICOORDELT_IT s_it;
lines = new PB_LINE_IT (this);
window->Pen(colour);
for (y = this->bounding_box ()->bottom ();
y <= this->bounding_box ()->top (); y++) {
segments = lines->get_line (y);
if (!segments->empty ()) {
s_it.set_to_list (segments);
for (s_it.mark_cycle_pt (); !s_it.cycled_list (); s_it.forward ()) {
// Note different use of ICOORDELT, x coord is x coord of pixel
// at the start of line segment, y coord is length of line segment
// Last pixel is start pixel + length.
width = s_it.data ()->y ();
window->SetCursor(s_it.data ()->x (), y);
window->DrawTo(s_it.data ()->x () + (float) width, y);
}
}
}
}
#endif
// Returns true if the polygons of other and this overlap.
bool POLY_BLOCK::overlap(POLY_BLOCK *other) {
inT16 count; // winding count
ICOORDELT_IT it = &vertices; // iterator
ICOORD vertex;
if (!box.overlap(*(other->bounding_box())))
return false; // can't be any overlap.
/* see if a vertex of this is inside other */
do {
vertex = *it.data ();
// get winding number
count = other->winding_number (vertex);
if (count != INTERSECTING)
if (count != 0)
return true;
it.forward ();
}
while (!it.at_first ());
/* see if a vertex of other is inside this */
// switch lists
it.set_to_list (other->points ());
do {
vertex = *it.data();
// try other way round
count = winding_number (vertex);
if (count != INTERSECTING)
if (count != 0)
return true;
it.forward ();
}
while (!it.at_first ());
return false;
}
ICOORDELT_LIST *PB_LINE_IT::get_line(inT16 y) {
ICOORDELT_IT v, r;
ICOORDELT_LIST *result;
ICOORDELT *x, *current, *previous;
float fy, fx;
fy = (float) (y + 0.5);
result = new ICOORDELT_LIST ();
r.set_to_list (result);
v.set_to_list (block->points ());
for (v.mark_cycle_pt (); !v.cycled_list (); v.forward ()) {
if (((v.data_relative (-1)->y () > y) && (v.data ()->y () <= y))
|| ((v.data_relative (-1)->y () <= y) && (v.data ()->y () > y))) {
previous = v.data_relative (-1);
current = v.data ();
fx = (float) (0.5 + previous->x () +
(current->x () - previous->x ()) * (fy -
previous->y ()) /
(current->y () - previous->y ()));
x = new ICOORDELT ((inT16) fx, 0);
r.add_to_end (x);
}
}
if (!r.empty ()) {
r.sort (lessthan);
for (r.mark_cycle_pt (); !r.cycled_list (); r.forward ())
x = r.data ();
for (r.mark_cycle_pt (); !r.cycled_list (); r.forward ()) {
r.data ()->set_y (r.data_relative (1)->x () - r.data ()->x ());
r.forward ();
delete (r.extract ());
}
}
return result;
}
int lessthan(const void *first, const void *second) {
ICOORDELT *p1 = (*(ICOORDELT **) first);
ICOORDELT *p2 = (*(ICOORDELT **) second);
if (p1->x () < p2->x ())
return (-1);
else if (p1->x () > p2->x ())
return (1);
else
return (0);
}
/**********************************************************************
* POLY_BLOCK::serialise_asc
*
* Converto to ascii file.
**********************************************************************/
void POLY_BLOCK::serialise_asc( //convert to ascii
FILE *f //file to use
) {
vertices.serialise_asc (f);
box.serialise_asc (f);
serialise_INT32(f, type);
}
/**********************************************************************
* POLY_BLOCK::de_serialise_asc
*
* Converto from ascii file.
**********************************************************************/
void POLY_BLOCK::de_serialise_asc( //convert from ascii
FILE *f //file to use
) {
vertices.de_serialise_asc (f);
box.de_serialise_asc (f);
type = (PolyBlockType) de_serialise_INT32 (f);
}
// Returns a color to draw the given type.
ScrollView::Color POLY_BLOCK::ColorForPolyBlockType(PolyBlockType type) {
const ScrollView::Color kPBColors[PT_COUNT] = {
ScrollView::WHITE,
ScrollView::BLUE,
ScrollView::CYAN,
ScrollView::MEDIUM_BLUE,
ScrollView::MAGENTA,
ScrollView::YELLOW,
ScrollView::RED,
ScrollView::MAROON,
ScrollView::ORANGE,
ScrollView::GREEN,
ScrollView::LIME_GREEN,
ScrollView::DARK_GREEN,
ScrollView::GREY
};
if (type >= 0 && type < PT_COUNT) {
return kPBColors[type];
}
return ScrollView::WHITE;
}

160
ccmain/polyblk.h Normal file
View File

@ -0,0 +1,160 @@
/**********************************************************************
* File: polyblk.h (Formerly poly_block.h)
* Description: Polygonal blocks
* Author: Sheelagh Lloyd?
* Created:
*
* (C) Copyright 1993, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef POLYBLK_H
#define POLYBLK_H
#include "rect.h"
#include "points.h"
#include "scrollview.h"
#include "elst.h"
#include "hpddef.h" // must be last (handpd.dll)
// Possible types for a POLY_BLOCK or ColPartition. Must be kept in sync with
// kPBColors. Used extensively by ColPartition, but polyblk is a lower-level
// file.
enum PolyBlockType {
PT_UNKNOWN, // Type is not yet known. Keep as the first element.
PT_FLOWING_TEXT, // Text that lives inside a column.
PT_HEADING_TEXT, // Text that spans more than one column.
PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region.
PT_TABLE, // Partition belonging to a table region.
PT_VERTICAL_TEXT, // Text-line runs vertically.
PT_FLOWING_IMAGE, // Image that lives inside a column.
PT_HEADING_IMAGE, // Image that spans more than one column.
PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region.
PT_FLOWING_LINE, // H-Line that lives inside a column.
PT_HEADING_LINE, // H-Line that spans more than one column.
PT_PULLOUT_LINE, // H-Line that is in a cross-column pull-out region.
PT_NOISE, // Lies outside of any column.
PT_COUNT
};
class DLLSYM POLY_BLOCK {
public:
POLY_BLOCK() {
}
POLY_BLOCK(ICOORDELT_LIST *points, PolyBlockType type);
~POLY_BLOCK () {
}
TBOX *bounding_box() { // access function
return &box;
}
ICOORDELT_LIST *points() { // access function
return &vertices;
}
void compute_bb();
PolyBlockType isA() const {
return type;
}
bool IsText() const {
return IsTextType(type);
}
// Rotate about the origin by the given rotation. (Analogous to
// multiplying by a complex number.
void rotate(FCOORD rotation);
// Move by adding shift to all coordinates.
void move(ICOORD shift);
void plot(ScrollView* window, inT32 num);
void fill(ScrollView* window, ScrollView::Color colour);
// Returns true if other is inside this.
bool contains(POLY_BLOCK *other);
// Returns true if the polygons of other and this overlap.
bool overlap(POLY_BLOCK *other);
// Returns the winding number of this around the test_pt.
// Positive for anticlockwise, negative for clockwise, and zero for
// test_pt outside this.
inT16 winding_number(const ICOORD &test_pt);
// Serialization.
void prep_serialise() {
vertices.prep_serialise();
}
void dump(FILE *f) {
vertices.dump(f);
}
void de_dump(FILE *f) {
vertices.de_dump(f);
}
make_serialise(POLY_BLOCK)
void serialise_asc(FILE * f);
void de_serialise_asc(FILE *f);
// Static utility functions to handle the PolyBlockType.
// Returns a color to draw the given type.
static ScrollView::Color ColorForPolyBlockType(PolyBlockType type);
// Returns true if PolyBlockType is of horizontal line type
static bool IsLineType(PolyBlockType type) {
return (type == PT_FLOWING_LINE) || (type == PT_HEADING_LINE) ||
(type == PT_PULLOUT_LINE);
}
// Returns true if PolyBlockType is of image type
static bool IsImageType(PolyBlockType type) {
return (type == PT_FLOWING_IMAGE) || (type == PT_HEADING_IMAGE) ||
(type == PT_PULLOUT_IMAGE);
}
// Returns true if PolyBlockType is of text type
static bool IsTextType(PolyBlockType type) {
return (type == PT_FLOWING_TEXT) || (type == PT_HEADING_TEXT) ||
(type == PT_PULLOUT_TEXT) || (type == PT_TABLE) ||
(type == PT_VERTICAL_TEXT);
}
private:
ICOORDELT_LIST vertices; // vertices
TBOX box; // bounding box
PolyBlockType type; // Type of this region.
};
// Class to iterate the scanlines of a polygon.
class DLLSYM PB_LINE_IT {
public:
PB_LINE_IT(POLY_BLOCK *blkptr) {
block = blkptr;
}
NEWDELETE2(PB_LINE_IT)
void set_to_block(POLY_BLOCK * blkptr) {
block = blkptr;
}
// Returns a list of runs of pixels for the given y coord.
// Each element of the returned list is the start (x) and extent(y) of
// a run inside the region.
// Delete the returned list after use.
ICOORDELT_LIST *get_line(inT16 y);
private:
POLY_BLOCK * block;
};
#endif

370
ccmain/polyblob.cpp Normal file
View File

@ -0,0 +1,370 @@
/**********************************************************************
* File: polyblob.cpp (Formerly blob.c)
* Description: Code for PBLOB class.
* Author: Ray Smith
* Created: Wed Oct 23 15:17:41 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include "varable.h"
#include "ocrrow.h"
#include "polyblob.h"
//#include "lapoly.h"
#include "polyaprx.h"
#define EXTERN
EXTERN BOOL_VAR (polygon_tess_approximation, TRUE,
"Do tess poly instead of greyscale");
ELISTIZE_S (PBLOB)
/**********************************************************************
* position_outline
*
* Position the outline in the given list at the relevant place
* according to its nesting.
**********************************************************************/
static void position_outline( //put in place
OUTLINE *outline, //thing to place
OUTLINE_LIST *destlist //desstination list
) {
OUTLINE *dest_outline; //outline from dest list
OUTLINE_IT it = destlist; //iterator
//iterator on children
OUTLINE_IT child_it = outline->child ();
if (!it.empty ()) {
do {
dest_outline = it.data (); //get destination
//encloses dest
if (*dest_outline < *outline) {
//take off list
dest_outline = it.extract ();
//put this in place
it.add_after_then_move (outline);
//make it a child
child_it.add_to_end (dest_outline);
while (!it.at_last ()) {
it.forward (); //do rest of list
//check for other children
dest_outline = it.data ();
if (*dest_outline < *outline) {
//take off list
dest_outline = it.extract ();
child_it.add_to_end (dest_outline);
//make it a child
if (it.empty ())
break;
}
}
return; //finished
}
//enclosed by dest
else if (*outline < *dest_outline) {
position_outline (outline, dest_outline->child ());
//place in child list
return; //finished
}
it.forward ();
}
while (!it.at_first ());
}
it.add_to_end (outline); //at outer level
}
/**********************************************************************
* plot_outline_list
*
* Draw a list of outlines in the given colour and their children
* in the child colour.
**********************************************************************/
#ifndef GRAPHICS_DISABLED
static void plot_outline_list( //draw outlines
OUTLINE_LIST *list, //outline to draw
ScrollView* window, //window to draw in
ScrollView::Color colour, //colour to use
ScrollView::Color child_colour //colour of children
) {
OUTLINE *outline; //current outline
OUTLINE_IT it = list; //iterator
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
outline = it.data ();
//draw it
outline->plot (window, colour);
if (!outline->child ()->empty ())
plot_outline_list (outline->child (), window,
child_colour, child_colour);
}
}
#endif
/**********************************************************************
* PBLOB::PBLOB
*
* Constructor to build a PBLOB from a list of OUTLINEs.
* The OUTLINEs are not copied so the source list is emptied.
* The OUTLINEs are nested correctly in the blob.
**********************************************************************/
PBLOB::PBLOB( //constructor
OUTLINE_LIST *outline_list //in random order
) {
OUTLINE *outline; //current outline
OUTLINE_IT it = outline_list; //iterator
while (!it.empty ()) { //grab the list
outline = it.extract (); //get off the list
//put it in place
position_outline(outline, &outlines);
if (!it.empty ())
it.forward ();
}
}
/**********************************************************************
* approximate_outline_list
*
* Convert a list of outlines to polygonal form.
**********************************************************************/
static void approximate_outline_list( //do list of outlines
C_OUTLINE_LIST *srclist, //list to convert
OUTLINE_LIST *destlist, //desstination list
float xheight //height of line
) {
C_OUTLINE *src_outline; //outline from src list
OUTLINE *dest_outline; //result
C_OUTLINE_IT src_it = srclist; //source iterator
OUTLINE_IT dest_it = destlist; //iterator
do {
src_outline = src_it.data ();
// if (polygon_tess_approximation)
dest_outline = tesspoly_outline (src_outline, xheight);
// else
// dest_outline=greypoly_outline(src_outline,xheight);
if (dest_outline != NULL) {
dest_it.add_after_then_move (dest_outline);
if (!src_outline->child ()->empty ())
//do child list
approximate_outline_list (src_outline->child (), dest_outline->child (), xheight);
}
src_it.forward ();
}
while (!src_it.at_first ());
}
/**********************************************************************
* PBLOB::PBLOB
*
* Constructor to build a PBLOB from a C_BLOB by polygonal approximation.
**********************************************************************/
PBLOB::PBLOB( //constructor
C_BLOB *cblob, //compact blob
float xheight //height of line
) {
TBOX bbox; //bounding box
if (!cblob->out_list ()->empty ()) {
//get bounding box
bbox = cblob->bounding_box ();
if (bbox.height () > xheight)
xheight = bbox.height (); //max of line and blob
//copy it
approximate_outline_list (cblob->out_list (), &outlines, xheight);
}
}
/**********************************************************************
* PBLOB::bounding_box
*
* Return the bounding box of the blob.
**********************************************************************/
TBOX PBLOB::bounding_box() { //bounding box
OUTLINE *outline; //current outline
OUTLINE_IT it = &outlines; //outlines of blob
TBOX box; //bounding box
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
outline = it.data ();
box += outline->bounding_box ();
}
return box;
}
/**********************************************************************
* PBLOB::area
*
* Return the area of the blob.
**********************************************************************/
float PBLOB::area() { //area
OUTLINE *outline; //current outline
OUTLINE_IT it = &outlines; //outlines of blob
float total; //total area
total = 0.0f;
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
outline = it.data ();
total += outline->area ();
}
return total;
}
/**********************************************************************
* PBLOB::baseline_normalise
*
* Baseline normalize a blob
**********************************************************************/
PBLOB *PBLOB::baseline_normalise( //normalize blob
ROW *row, //row it came from
DENORM *denorm //inverse mapping
) {
TBOX blob_box = bounding_box ();
float x_centre = (blob_box.left () + blob_box.right ()) / 2.0;
PBLOB *bn_blob; //copied blob
*denorm = DENORM (x_centre, bln_x_height / row->x_height (), row);
bn_blob = new PBLOB; //get one
*bn_blob = *this; //deep copy
bn_blob->move (FCOORD (-denorm->origin (), -row->base_line (x_centre)));
bn_blob->scale (denorm->scale ());
bn_blob->move (FCOORD (0.0, bln_baseline_offset));
return bn_blob;
}
/**********************************************************************
* PBLOB::baseline_denormalise
*
* DeBaseline Normalise the blob properly with the given denorm.
**********************************************************************/
void PBLOB::baseline_denormalise( // Tess style BL Norm
const DENORM *denorm //antidote
) {
float blob_x_left; // Left edge of blob.
TBOX blob_box; //blob bounding box
move(FCOORD (0.0f, 0.0f - bln_baseline_offset));
blob_box = bounding_box ();
blob_x_left = blob_box.left ();
scale (1.0 / denorm->scale_at_x (blob_x_left));
move (FCOORD (denorm->origin (),
denorm->yshift_at_x (blob_x_left)));
}
/**********************************************************************
* PBLOB::move
*
* Move PBLOB by vector
**********************************************************************/
void PBLOB::move( // reposition blob
const FCOORD vec // by vector
) {
OUTLINE_IT it(&outlines); // iterator
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ())
it.data ()->move (vec); // move each outline
}
/**********************************************************************
* PBLOB::scale
*
* Scale PBLOB by float multiplier
**********************************************************************/
void PBLOB::scale( // scale blob
const float f // by multiplier
) {
OUTLINE_IT it(&outlines); // iterator
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ())
it.data ()->scale (f); // scale each outline
}
/**********************************************************************
* PBLOB::scale
*
* Scale PBLOB by float multiplier
**********************************************************************/
void PBLOB::scale( // scale blob
const FCOORD vec // by multiplier
) {
OUTLINE_IT it(&outlines); // iterator
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ())
it.data ()->scale (vec); // scale each outline
}
/**********************************************************************
* PBLOB::rotate
*
* Rotate PBLOB 90 deg anticlockwise about the origin.
**********************************************************************/
void PBLOB::rotate() { // Rotate 90 deg anti
rotate(FCOORD(0.0f, 1.0f));
}
/**********************************************************************
* PBLOB::rotate
*
* Rotate PBLOB by the given rotation about the origin.
* The rotation is defined to be (cos a, sin a) where a is the anticlockwise
* rotation angle (in units appropriate for cos, sin).
* Alternatively think of multiplication by the complex number
* rotation = z = (x + iy), with |z| = 1.
**********************************************************************/
void PBLOB::rotate(const FCOORD& rotation) { // Rotate by given rotation.
OUTLINE_IT it(&outlines);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
it.data()->rotate(rotation); // Rotate each outline.
}
}
/**********************************************************************
* PBLOB::plot
*
* Draw the PBLOB in the given colour.
**********************************************************************/
#ifndef GRAPHICS_DISABLED
void PBLOB::plot( //draw it
ScrollView* window, //window to draw in
ScrollView::Color blob_colour, //main colour
ScrollView::Color child_colour //for holes
) {
plot_outline_list(&outlines, window, blob_colour, child_colour);
}
#endif

103
ccmain/polyblob.h Normal file
View File

@ -0,0 +1,103 @@
/**********************************************************************
* File: polyblob.h (Formerly blob.h)
* Description: Code for PBLOB class.
* Author: Ray Smith
* Created: Wed Oct 23 15:17:41 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef POLYBLOB_H
#define POLYBLOB_H
#include "poutline.h"
#include "rect.h"
#include "normalis.h"
#include "stepblob.h"
class PBLOB:public ELIST_LINK
{
public:
PBLOB() {
} //empty constructor
PBLOB( //constructor
OUTLINE_LIST *outline_list); //in random order
PBLOB( //constructor
C_BLOB *cblob, //polygonal approx
float xheight);
OUTLINE_LIST *out_list() { //get outline list
return &outlines;
}
TBOX bounding_box(); //compute bounding box
float area(); //get area of blob
PBLOB *baseline_normalise( //normalise single blob
ROW *row, //row it came from
DENORM *denorm); //inverse mapping out
void baseline_denormalise( //denormalise
const DENORM *denorm); //antidote
void plot( //draw one
ScrollView* window, //window to draw in
ScrollView::Color blob_colour, //for outer bits
ScrollView::Color child_colour); //for holes
void move( // reposition blob
const FCOORD vec); // by FLOAT vector
void scale( // scale blob
const float f); // by multiplier
void scale( // scale blob
const FCOORD vec); // by FLOAT vector
void rotate(); // Rotate 90 deg anti
void rotate(const FCOORD& rotation); // Rotate by given rotation.
void prep_serialise() { //set ptrs to counts
outlines.prep_serialise ();
}
void dump( //write external bits
FILE *f) {
outlines.dump (f);
}
void de_dump( //read external bits
FILE *f) {
outlines.de_dump (f);
}
//assignment
make_serialise(PBLOB)
PBLOB& operator=(const PBLOB & source) {
if (!outlines.empty ())
outlines.clear ();
outlines.deep_copy(&source.outlines, &OUTLINE::deep_copy);
return *this;
}
static PBLOB* deep_copy(const PBLOB* src) {
PBLOB* blob = new PBLOB;
*blob = *src;
return blob;
}
private:
OUTLINE_LIST outlines; //master elements
};
ELISTIZEH_S (PBLOB)
#endif

23
ccmain/polyvert.cpp Normal file
View File

@ -0,0 +1,23 @@
/**********************************************************************
* File: polyvert.cpp (Formerly polypt.c)
* Description: Code for the POLYPT class.
* Author: Ray Smith
* Created: Wed Oct 23 11:02:56 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include "polyvert.h"
ELIST2IZE_S (POLYPT)

58
ccmain/polyvert.h Normal file
View File

@ -0,0 +1,58 @@
/**********************************************************************
* File: polyvert.h (Formerly polypt.h)
* Description: Code for the POLYPT class.
* Author: Ray Smith
* Created: Wed Oct 23 11:02:56 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef POLYVERT_H
#define POLYVERT_H
#include "elst2.h"
#include "rect.h"
class POLYPT:public ELIST2_LINK
{
public:
POLYPT() { //empty
}
POLYPT( //constructor
const FCOORD &position, //coords
const FCOORD &vector) { //step to next
pos = position;
vec = vector; //just copy
}
void prep_serialise() { //set ptrs to counts
}
void dump( //write external bits
FILE *) {
}
void de_dump( //read external bits
FILE *) {
}
static POLYPT* deep_copy(const POLYPT* src) {
return new POLYPT(*src);
}
//really simple
make_serialise (POLYPT)
FCOORD pos;
FCOORD vec; //vector to next
};
ELIST2IZEH_S (POLYPT)
#endif

441
ccmain/poutline.cpp Normal file
View File

@ -0,0 +1,441 @@
/**********************************************************************
* File: poutline.cpp (Formerly outline.c)
* Description: Code for OUTLINE class.
* Author: Ray Smith
* Created: Wed Oct 23 10:52:04 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include "poutline.h"
ELISTIZE_S (OUTLINE)
/**********************************************************************
* OUTLINE::OUTLINE
*
* Constructor to build a OUTLINE from a compact LOOP.
**********************************************************************/
OUTLINE::OUTLINE ( //constructor
const ICOORD & startpt, //start position
inT8 * compactloop, //from Tess format
BOOL8 invert, //reverse it
ICOORD bot_left, //bounding box
ICOORD top_right):
box (bot_left, top_right),
start(startpt) {
ICOORD pos; //current point
ICOORD vec; //vector to next
POLYPT *polypt; //new point
inT8 *vector; //compact loop
POLYPT_IT it = &outline; //iterator
pos = startpt;
vector = compactloop;
do {
//vector to next
vec = ICOORD (*vector, *(vector + 1));
//make a new one
polypt = new POLYPT (FCOORD (pos), FCOORD (vec));
//add to list
it.add_after_then_move (polypt);
pos += vec; //move to next
vector += 2;
}
while (pos != startpt);
if (invert)
reverse(); //now reverse it
}
/**********************************************************************
* OUTLINE::OUTLINE
*
* Constructor to build an OUTLINE from a list of POLYPTs.
**********************************************************************/
OUTLINE::OUTLINE( //constructor
POLYPT_IT *polypts //input list
) {
POLYPT_IT other_it = *polypts; //end of list
polypts->move_to_first ();
other_it.move_to_last ();
//put in outline
outline.assign_to_sublist (polypts, &other_it);
compute_bb();
}
/**********************************************************************
* OUTLINE::compute_bb
*
* Compute the bounding box from the outline points.
**********************************************************************/
void OUTLINE::compute_bb() { //constructor
ICOORD ibl, itr; //integer bb
FCOORD botleft; //bounding box
FCOORD topright;
FCOORD pos; //current pos;
POLYPT_IT polypts = &outline; //iterator
botleft = polypts.data ()->pos;
topright = botleft;
start = ICOORD ((inT16) botleft.x (), (inT16) botleft.y ());
do {
pos = polypts.data ()->pos;
if (pos.x () < botleft.x ())
//get bounding box
botleft = FCOORD (pos.x (), botleft.y ());
if (pos.y () < botleft.y ())
botleft = FCOORD (botleft.x (), pos.y ());
if (pos.x () > topright.x ())
topright = FCOORD (pos.x (), topright.y ());
if (pos.y () > topright.y ())
topright = FCOORD (topright.x (), pos.y ());
polypts.forward ();
}
while (!polypts.at_first ());
ibl = ICOORD ((inT16) botleft.x (), (inT16) botleft.y ());
itr = ICOORD ((inT16) topright.x () + 1, (inT16) topright.y () + 1);
box = TBOX (ibl, itr);
}
/**********************************************************************
* OUTLINE::area
*
* Compute the area from the outline points.
**********************************************************************/
float OUTLINE::area() { //constructor
FCOORD origin; //startpt
FCOORD prev_vec; //previous value of vec
FCOORD vec; //from start to current
float total; //total area
POLYPT_IT poly_it = polypts ();//iterator
//child outline itertr
OUTLINE_IT child_it(&children);
origin = poly_it.data ()->pos;
poly_it.forward ();
vec = poly_it.data ()->pos - origin;
poly_it.forward ();
total = 0.0f;
while (!poly_it.at_first ()) {
prev_vec = vec;
vec = poly_it.data ()->pos - origin;
total += prev_vec * vec;
poly_it.forward ();
}
total /= 2;
for (child_it.mark_cycle_pt (); !child_it.cycled_list ();
child_it.forward ()) {
//add ares of childrein
total += child_it.data ()->area ();
}
return total;
}
/**********************************************************************
* OUTLINE::operator<
*
* Return TRUE if the left operand is inside the right one.
**********************************************************************/
BOOL8
OUTLINE::operator< ( //winding number
OUTLINE & other //other outline
) {
inT16 count; //winding count
POLYPT_IT it = &outline; //iterator
if (!box.overlap (other.box))
return FALSE; //can't be contained
do {
count = other.winding_number (FCOORD (it.data ()->pos));
//get winding number
if (count != INTERSECTING)
return count != 0;
it.forward ();
}
while (!it.at_first ());
//switch lists
it.set_to_list (&other.outline);
do {
//try other way round
count = winding_number (FCOORD (it.data ()->pos));
if (count != INTERSECTING)
return count == 0;
it.forward ();
}
while (!it.at_first ());
return TRUE;
}
/**********************************************************************
* OUTLINE::winding_number
*
* Return the winding number of the outline around the given point.
**********************************************************************/
inT16 OUTLINE::winding_number( //winding number
const FCOORD &point //point to wind around
) {
inT16 count; //winding count
POLYPT *polypt; //current point
FCOORD vec; //to current point
float cross; //cross product
POLYPT_IT it = &outline; //iterator
count = 0;
do {
polypt = it.data ();
vec = polypt->pos - point;
//crossing the line
if (vec.y () <= 0 && vec.y () + polypt->vec.y () > 0) {
cross = vec * polypt->vec; //cross product
if (cross > 0)
count++; //crossing right half
else if (cross == 0)
return INTERSECTING; //going through point
}
else if (vec.y () > 0 && vec.y () + polypt->vec.y () <= 0) {
cross = vec * polypt->vec;
if (cross < 0)
count--; //crossing back
else if (cross == 0)
return INTERSECTING; //illegal
}
it.forward ();
}
while (!it.at_first ());
return count; //winding number
}
/**********************************************************************
* OUTLINE::reverse
*
* Reverse the direction of an outline.
**********************************************************************/
void OUTLINE::reverse() { //reverse direction
POLYPT_LIST back_list; //reversed list
POLYPT_IT dest_it = &back_list;//destination
POLYPT_IT src_it = &outline; //source list
POLYPT *polypt; //current point
do {
polypt = src_it.extract ();
//copy in reverse
dest_it.add_after_then_move (polypt);
src_it.backward ();
}
while (!src_it.empty ());
dest_it.move_to_first ();
do {
polypt = dest_it.data ();
polypt->vec = dest_it.data_relative (1)->pos - polypt->pos;
//vector to next
dest_it.forward ();
}
while (!dest_it.at_first ());
dest_it.backward ();
src_it.set_to_list (&back_list);
//put it back
outline.assign_to_sublist (&src_it, &dest_it);
}
/**********************************************************************
* OUTLINE::move
*
* Move OUTLINE by vector
**********************************************************************/
void OUTLINE::move( // reposition OUTLINE
const FCOORD vec // by vector
) {
//child outline itertr
OUTLINE_IT child_it(&children);
POLYPT_IT poly_it(&outline); //outline point itertr
box.move (vec);
start.set_x ((inT16) floor (start.x () + vec.x () + 0.5));
// ?? Why ICOORD?
start.set_y ((inT16) floor (start.y () + vec.y () + 0.5));
// ?? Why ICOORD?
for (poly_it.mark_cycle_pt (); !poly_it.cycled_list (); poly_it.forward ())
poly_it.data ()->pos += vec;
for (child_it.mark_cycle_pt (); !child_it.cycled_list ();
child_it.forward ())
child_it.data ()->move (vec); // move child outlines
}
/**********************************************************************
* OUTLINE::scale
*
* Scale OUTLINE by vector
**********************************************************************/
void OUTLINE::scale( // scale OUTLINE
const float f // by multiplier
) {
//child outline itertr
OUTLINE_IT child_it(&children);
POLYPT_IT poly_it(&outline); //outline point itertr
POLYPT *pt;
box.scale (f);
// ?? Why ICOORD?
start.set_x ((inT16) floor (start.x () * f + 0.5));
// ?? Why ICOORD?
start.set_y ((inT16) floor (start.y () * f + 0.5));
for (poly_it.mark_cycle_pt (); !poly_it.cycled_list (); poly_it.forward ()) {
pt = poly_it.data ();
pt->pos *= f;
pt->vec *= f;
}
for (child_it.mark_cycle_pt (); !child_it.cycled_list ();
child_it.forward ())
child_it.data ()->scale (f); //scale child outlines
}
/**********************************************************************
* OUTLINE::scale
*
* Scale OUTLINE by vector
**********************************************************************/
void OUTLINE::scale( // scale OUTLINE
const FCOORD vector //by fcoord
) {
//child outline itertr
OUTLINE_IT child_it(&children);
POLYPT_IT poly_it(&outline); //outline point itertr
POLYPT *pt;
box.scale (vector);
start.set_x ((inT16) floor (start.x () * vector.x () + 0.5));
// ?? Why ICOORD?
start.set_y ((inT16) floor (start.y () * vector.y () + 0.5));
// ?? Why ICOORD?
for (poly_it.mark_cycle_pt (); !poly_it.cycled_list (); poly_it.forward ()) {
pt = poly_it.data ();
pt->pos =
FCOORD (pt->pos.x () * vector.x (), pt->pos.y () * vector.y ());
pt->vec =
FCOORD (pt->vec.x () * vector.x (), pt->vec.y () * vector.y ());
}
for (child_it.mark_cycle_pt (); !child_it.cycled_list ();
child_it.forward ())
//scale child outlines
child_it.data ()->scale (vector);
}
/**********************************************************************
* OUTLINE::rotate
*
* Rotate OUTLINE by the given vector
**********************************************************************/
void OUTLINE::rotate(
const FCOORD vector //by fcoord
) {
//child outline itertr
OUTLINE_IT child_it(&children);
POLYPT_IT poly_it(&outline); //outline point itertr
POLYPT *pt;
box.rotate(vector);
start.rotate(vector);
for (poly_it.mark_cycle_pt (); !poly_it.cycled_list (); poly_it.forward ()) {
pt = poly_it.data ();
pt->pos.rotate(vector);
pt->vec.rotate(vector);
}
for (child_it.mark_cycle_pt (); !child_it.cycled_list ();
child_it.forward ())
//scale child outlines
child_it.data ()->rotate(vector);
}
/**********************************************************************
* OUTLINE::plot
*
* Draw the outline in the given colour.
**********************************************************************/
#ifndef GRAPHICS_DISABLED
void OUTLINE::plot( //draw it
ScrollView* window, //window to draw in
ScrollView::Color colour //colour to draw in
) {
POLYPT *polypt; //current point
POLYPT_IT it = &outline; //iterator
window->Pen(colour);
polypt = it.data ();
int startx = polypt->pos.x ();
int starty = polypt->pos.y ();
do {
it.forward ();
polypt = it.data ();
window->Line(startx,starty,polypt->pos.x (),polypt->pos.y ());
startx = polypt->pos.x ();
starty = polypt->pos.y ();
}
while (!it.at_first ());
}
#endif
/**********************************************************************
* OUTLINE::operator=
*
* Assignment - deep copy data
**********************************************************************/
OUTLINE & OUTLINE::operator= ( //assignment
const OUTLINE & source //from this
) {
box = source.box;
start = source.start;
if (!outline.empty())
outline.clear();
outline.deep_copy(&source.outline, &POLYPT::deep_copy);
if (!children.empty())
children.clear();
children.deep_copy(&source.children, &OUTLINE::deep_copy);
return *this;
}

125
ccmain/poutline.h Normal file
View File

@ -0,0 +1,125 @@
/**********************************************************************
* File: poutline.h (Formerly outline.h)
* Description: OUTLINE class definition.
* Author: Ray Smith
* Created: Wed Oct 23 10:42:40 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef POUTLINE_H
#define POUTLINE_H
#include "scrollview.h"
#include "polyvert.h"
#include "rect.h"
#include "blckerr.h"
#define INTERSECTING MAX_INT16//no winding number
class OUTLINE; //forward declaration
ELISTIZEH_S (OUTLINE)
class OUTLINE:public ELIST_LINK
{
public:
OUTLINE() { //empty constructor
}
OUTLINE( //constructor
const ICOORD &startpt, //start point
inT8 *compactloop, //from Tess format
BOOL8 reverse, //reverse it
ICOORD bot_left, //bounding box
ICOORD top_right);
OUTLINE( //constructor
POLYPT_IT *poly_it); //from list of pts
OUTLINE_LIST *child() { //get child list
return &children;
}
//access function
const TBOX &bounding_box() const {
return box;
}
void compute_bb(); //set bounding box
//get start position
const ICOORD &start_pos() const {
return start;
}
float area(); //return area
POLYPT_LIST *polypts() { //get poly
return &outline;
}
BOOL8 operator< ( //containment test
OUTLINE & other);
BOOL8 operator> ( //containment test
OUTLINE & other) {
return other < *this; //use the < to do it
}
inT16 winding_number( //get winding number
const FCOORD &testpt); //around this point
void reverse(); //reverse it
void move( // reposition outline
const FCOORD vec); // by FLOAT vector
void scale( // scale outline
const float f); // by multiplier
void scale( // scale outline
const FCOORD vec); // by FLOAT vector
void rotate( // rotate outline
const FCOORD vector); // by fcoord
void plot( //draw one
ScrollView* window, //window to draw in
ScrollView::Color colour); //colour to draw it
void prep_serialise() { //set ptrs to counts
outline.prep_serialise ();
children.prep_serialise ();
}
void dump( //write external bits
FILE *f) {
outline.dump (f);
children.dump (f);
}
void de_dump( //read external bits
FILE *f) {
outline.de_dump (f);
children.de_dump (f);
}
//assignment
make_serialise(OUTLINE)
OUTLINE& operator=(const OUTLINE& source);
static OUTLINE* deep_copy(const OUTLINE* src) {
OUTLINE* outline = new OUTLINE;
*outline = *src;
return outline;
}
private:
TBOX box; //boudning box
ICOORD start; //start coord
POLYPT_LIST outline; //outline points
OUTLINE_LIST children; //child elements
};
#endif

147
ccmain/quadlsq.cpp Normal file
View File

@ -0,0 +1,147 @@
/**********************************************************************
* File: quadlsq.cpp (Formerly qlsq.c)
* Description: Code for least squares approximation of quadratics.
* Author: Ray Smith
* Created: Wed Oct 6 15:14:23 BST 1993
*
* (C) Copyright 1993, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include <stdio.h>
#include <math.h>
#include "errcode.h"
#include "quadlsq.h"
const ERRCODE EMPTY_QLSQ = "Can't delete from an empty QLSQ";
#define EXTERN
/**********************************************************************
* QLSQ::clear
*
* Function to initialize a QLSQ.
**********************************************************************/
void QLSQ::clear() { //initialize
a = 0;
b = 0;
c = 0;
n = 0; //no elements
sigx = 0; //update accumulators
sigy = 0;
sigxx = 0;
sigxy = 0;
sigyy = 0;
sigxxx = 0;
sigxxy = 0;
sigxxxx = 0;
}
/**********************************************************************
* QLSQ::add
*
* Add an element to the accumulator.
**********************************************************************/
void QLSQ::add( //add an element
double x, //xcoord
double y //ycoord
) {
n++; //count elements
sigx += x; //update accumulators
sigy += y;
sigxx += x * x;
sigxy += x * y;
sigyy += y * y;
sigxxx += (long double) x *x * x;
sigxxy += (long double) x *x * y;
sigxxxx += (long double) x *x * x * x;
}
/**********************************************************************
* QLSQ::remove
*
* Delete an element from the acculuator.
**********************************************************************/
void QLSQ::remove( //delete an element
double x, //xcoord
double y //ycoord
) {
if (n <= 0)
//illegal
EMPTY_QLSQ.error ("QLSQ::remove", ABORT, NULL);
n--; //count elements
sigx -= x; //update accumulators
sigy -= y;
sigxx -= x * x;
sigxy -= x * y;
sigyy -= y * y;
sigxxx -= (long double) x *x * x;
sigxxy -= (long double) x *x * y;
sigxxxx -= (long double) x *x * x * x;
}
/**********************************************************************
* QLSQ::fit
*
* Fit the given degree of polynomial and store the result.
**********************************************************************/
void QLSQ::fit( //fit polynomial
int degree //degree to fit
) {
long double cubetemp; //intermediates
long double squaretemp;
long double top96, bottom96; /*accurate top & bottom */
if (n >= 4 && degree >= 2) {
cubetemp = sigxxx * n - (long double) sigxx *sigx;
top96 =
cubetemp * ((long double) sigxy * n - (long double) sigx * sigy);
squaretemp = (long double) sigxx *n - (long double) sigx *sigx;
top96 += squaretemp * ((long double) sigxx * sigy - sigxxy * n);
bottom96 = cubetemp * cubetemp;
bottom96 -= squaretemp * (sigxxxx * n - (long double) sigxx * sigxx);
a = top96 / bottom96;
top96 = ((long double) sigxx * sigx - sigxxx * n) * a
+ (long double) sigxy *n - (long double) sigx *sigy;
bottom96 = (long double) sigxx *n - (long double) sigx *sigx;
b = top96 / bottom96;
c = (sigy - a * sigxx - b * sigx) / n;
}
else if (n == 0 || degree < 0) {
a = b = c = 0;
}
else {
a = 0;
if (n > 1 && degree > 0) {
b = (sigxy * n - sigx * sigy) / (sigxx * n - sigx * sigx);
}
else
b = 0;
c = (sigy - b * sigx) / n;
}
}

67
ccmain/quadlsq.h Normal file
View File

@ -0,0 +1,67 @@
/**********************************************************************
* File: quadlsq.h (Formerly qlsq.h)
* Description: Code for least squares approximation of quadratics.
* Author: Ray Smith
* Created: Wed Oct 6 15:14:23 BST 1993
*
* (C) Copyright 1993, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef QUADLSQ_H
#define QUADLSQ_H
#include "points.h"
class QLSQ
{
public:
QLSQ() { //constructor
clear(); //set to zeros
}
void clear(); //initialize
void add( //add element
double x, //coords to add
double y);
void remove( //delete element
double x, //coords to delete
double y);
inT32 count() { //no of elements
return n;
}
void fit( //fit the given
int degree); //return actual
double get_a() { //get x squard
return a;
}
double get_b() { //get x squard
return b;
}
double get_c() { //get x squard
return c;
}
private:
inT32 n; //no of elements
double a, b, c; //result
double sigx; //sum of x
double sigy; //sum of y
double sigxx; //sum x squared
double sigxy; //sum of xy
double sigyy; //sum y squared
long double sigxxx; //sum x cubed
long double sigxxy; //sum xsquared y
long double sigxxxx; //sum x fourth
};
#endif

21
ccmain/quadratc.cpp Normal file
View File

@ -0,0 +1,21 @@
/**********************************************************************
* File: quadratc.cpp (Formerly quadrtic.c)
* Description: Code for the QUAD_COEFFS class.
* Author: Ray Smith
* Created: Tue Oct 08 17:24:40 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include "quadratc.h"

63
ccmain/quadratc.h Normal file
View File

@ -0,0 +1,63 @@
/**********************************************************************
* File: quadratc.h (Formerly quadrtic.h)
* Description: Code for the QUAD_COEFFS class.
* Author: Ray Smith
* Created: Tue Oct 08 17:24:40 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef QUADRATC_H
#define QUADRATC_H
#include "points.h"
class QUAD_COEFFS
{
public:
QUAD_COEFFS() {
} //empty constructor
QUAD_COEFFS( //constructor
double xsq, //coefficients
float x,
float constant) {
a = xsq;
b = x;
c = constant;
}
float y( //evaluate
float x) const { //at x
return (float) ((a * x + b) * x + c);
}
void move( // reposition word
ICOORD vec) { // by vector
/************************************************************
y - q = a (x - p)^2 + b (x - p) + c
y - q = ax^2 - 2apx + ap^2 + bx - bp + c
y = ax^2 + (b - 2ap)x + (c - bp + ap^2 + q)
************************************************************/
inT16 p = vec.x ();
inT16 q = vec.y ();
c = (float) (c - b * p + a * p * p + q);
b = (float) (b - 2 * a * p);
}
double a; //x squared
float b; //x
float c; //constant
private:
};
#endif

382
ccmain/quspline.cpp Normal file
View File

@ -0,0 +1,382 @@
/**********************************************************************
* File: quspline.cpp (Formerly qspline.c)
* Description: Code for the QSPLINE class.
* Author: Ray Smith
* Created: Tue Oct 08 17:16:12 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include "memry.h"
#include "quadlsq.h"
#include "quspline.h"
#define QSPLINE_PRECISION 16 //no of steps to draw
/**********************************************************************
* QSPLINE::QSPLINE
*
* Constructor to build a QSPLINE given the components used in the old code.
**********************************************************************/
QSPLINE::QSPLINE( //constructor
inT32 count, //no of segments
inT32 *xstarts, //start coords
double *coeffs //coefficients
) {
inT32 index; //segment index
//get memory
xcoords = (inT32 *) alloc_mem ((count + 1) * sizeof (inT32));
quadratics = (QUAD_COEFFS *) alloc_mem (count * sizeof (QUAD_COEFFS));
segments = count;
for (index = 0; index < segments; index++) {
//copy them
xcoords[index] = xstarts[index];
quadratics[index] = QUAD_COEFFS (coeffs[index * 3],
coeffs[index * 3 + 1],
coeffs[index * 3 + 2]);
}
//right edge
xcoords[index] = xstarts[index];
}
/**********************************************************************
* QSPLINE::QSPLINE
*
* Constructor to build a QSPLINE by appproximation of points.
**********************************************************************/
QSPLINE::QSPLINE ( //constructor
int xstarts[], //spline boundaries
int segcount, //no of segments
int xpts[], //points to fit
int ypts[], int pointcount, //no of pts
int degree //fit required
) {
register int pointindex; /*no along text line */
register int segment; /*segment no */
inT32 *ptcounts; //no in each segment
QLSQ qlsq; /*accumulator */
segments = segcount;
xcoords = (inT32 *) alloc_mem ((segcount + 1) * sizeof (inT32));
ptcounts = (inT32 *) alloc_mem ((segcount + 1) * sizeof (inT32));
quadratics = (QUAD_COEFFS *) alloc_mem (segcount * sizeof (QUAD_COEFFS));
memmove (xcoords, xstarts, (segcount + 1) * sizeof (inT32));
ptcounts[0] = 0; /*none in any yet */
for (segment = 0, pointindex = 0; pointindex < pointcount; pointindex++) {
while (segment < segcount && xpts[pointindex] >= xstarts[segment]) {
segment++; /*try next segment */
/*cumulative counts */
ptcounts[segment] = ptcounts[segment - 1];
}
ptcounts[segment]++; /*no in previous partition */
}
while (segment < segcount) {
segment++;
/*zero the rest */
ptcounts[segment] = ptcounts[segment - 1];
}
for (segment = 0; segment < segcount; segment++) {
qlsq.clear ();
/*first blob */
pointindex = ptcounts[segment];
if (pointindex > 0
&& xpts[pointindex] != xpts[pointindex - 1]
&& xpts[pointindex] != xstarts[segment])
qlsq.add (xstarts[segment],
ypts[pointindex - 1]
+ (ypts[pointindex] - ypts[pointindex - 1])
* (xstarts[segment] - xpts[pointindex - 1])
/ (xpts[pointindex] - xpts[pointindex - 1]));
for (; pointindex < ptcounts[segment + 1]; pointindex++) {
qlsq.add (xpts[pointindex], ypts[pointindex]);
}
if (pointindex > 0 && pointindex < pointcount
&& xpts[pointindex] != xstarts[segment + 1])
qlsq.add (xstarts[segment + 1],
ypts[pointindex - 1]
+ (ypts[pointindex] - ypts[pointindex - 1])
* (xstarts[segment + 1] - xpts[pointindex - 1])
/ (xpts[pointindex] - xpts[pointindex - 1]));
qlsq.fit (degree);
quadratics[segment].a = qlsq.get_a ();
quadratics[segment].b = qlsq.get_b ();
quadratics[segment].c = qlsq.get_c ();
}
free_mem(ptcounts);
}
/**********************************************************************
* QSPLINE::QSPLINE
*
* Constructor to build a QSPLINE from another.
**********************************************************************/
QSPLINE::QSPLINE( //constructor
const QSPLINE &src) {
segments = 0;
xcoords = NULL;
quadratics = NULL;
*this = src;
}
/**********************************************************************
* QSPLINE::~QSPLINE
*
* Destroy a QSPLINE.
**********************************************************************/
QSPLINE::~QSPLINE ( //constructor
) {
if (xcoords != NULL) {
free_mem(xcoords);
xcoords = NULL;
}
if (quadratics != NULL) {
free_mem(quadratics);
quadratics = NULL;
}
}
/**********************************************************************
* QSPLINE::operator=
*
* Copy a QSPLINE
**********************************************************************/
QSPLINE & QSPLINE::operator= ( //assignment
const QSPLINE & source) {
if (xcoords != NULL)
free_mem(xcoords);
if (quadratics != NULL)
free_mem(quadratics);
segments = source.segments;
xcoords = (inT32 *) alloc_mem ((segments + 1) * sizeof (inT32));
quadratics = (QUAD_COEFFS *) alloc_mem (segments * sizeof (QUAD_COEFFS));
memmove (xcoords, source.xcoords, (segments + 1) * sizeof (inT32));
memmove (quadratics, source.quadratics, segments * sizeof (QUAD_COEFFS));
return *this;
}
/**********************************************************************
* QSPLINE::step
*
* Return the total of the step functions between the given coords.
**********************************************************************/
double QSPLINE::step( //find step functions
double x1, //between coords
double x2) {
int index1, index2; //indices of coords
double total; /*total steps */
index1 = spline_index (x1);
index2 = spline_index (x2);
total = 0;
while (index1 < index2) {
total +=
(double) quadratics[index1 + 1].y ((float) xcoords[index1 + 1]);
total -= (double) quadratics[index1].y ((float) xcoords[index1 + 1]);
index1++; /*next segment */
}
return total; /*total steps */
}
/**********************************************************************
* QSPLINE::y
*
* Return the y value at the given x value.
**********************************************************************/
double QSPLINE::y( //evaluate
double x //coord to evaluate at
) const {
inT32 index; //segment index
index = spline_index (x);
return quadratics[index].y (x);//in correct segment
}
/**********************************************************************
* QSPLINE::spline_index
*
* Return the index to the largest xcoord not greater than x.
**********************************************************************/
inT32 QSPLINE::spline_index( //evaluate
double x //coord to evaluate at
) const {
inT32 index; //segment index
inT32 bottom; //bottom of range
inT32 top; //top of range
bottom = 0;
top = segments;
while (top - bottom > 1) {
index = (top + bottom) / 2; //centre of range
if (x >= xcoords[index])
bottom = index; //new min
else
top = index; //new max
}
return bottom;
}
/**********************************************************************
* QSPLINE::move
*
* Reposition spline by vector
**********************************************************************/
void QSPLINE::move( // reposition spline
ICOORD vec // by vector
) {
inT32 segment; //index of segment
inT16 x_shift = vec.x ();
for (segment = 0; segment < segments; segment++) {
xcoords[segment] += x_shift;
quadratics[segment].move (vec);
}
xcoords[segment] += x_shift;
}
/**********************************************************************
* QSPLINE::overlap
*
* Return TRUE if spline2 overlaps this by no more than fraction less
* than the bounds of this.
**********************************************************************/
BOOL8 QSPLINE::overlap( //test overlap
QSPLINE *spline2, //2 cannot be smaller
double fraction //by more than this
) {
int leftlimit; /*common left limit */
int rightlimit; /*common right limit */
leftlimit = xcoords[1];
rightlimit = xcoords[segments - 1];
/*or too non-overlap */
if (spline2->segments < 3 || spline2->xcoords[1] > leftlimit + fraction * (rightlimit - leftlimit)
|| spline2->xcoords[spline2->segments - 1] < rightlimit
- fraction * (rightlimit - leftlimit))
return FALSE;
else
return TRUE;
}
/**********************************************************************
* extrapolate_spline
*
* Extrapolates the spline linearly using the same gradient as the
* quadratic has at either end.
**********************************************************************/
void QSPLINE::extrapolate( //linear extrapolation
double gradient, //gradient to use
int xmin, //new left edge
int xmax //new right edge
) {
register int segment; /*current segment of spline */
int dest_segment; //dest index
int *xstarts; //new boundaries
QUAD_COEFFS *quads; //new ones
int increment; //in size
increment = xmin < xcoords[0] ? 1 : 0;
if (xmax > xcoords[segments])
increment++;
if (increment == 0)
return;
xstarts = (int *) alloc_mem ((segments + 1 + increment) * sizeof (int));
quads =
(QUAD_COEFFS *) alloc_mem ((segments + increment) * sizeof (QUAD_COEFFS));
if (xmin < xcoords[0]) {
xstarts[0] = xmin;
quads[0].a = 0;
quads[0].b = gradient;
quads[0].c = y (xcoords[0]) - quads[0].b * xcoords[0];
dest_segment = 1;
}
else
dest_segment = 0;
for (segment = 0; segment < segments; segment++) {
xstarts[dest_segment] = xcoords[segment];
quads[dest_segment] = quadratics[segment];
dest_segment++;
}
xstarts[dest_segment] = xcoords[segment];
if (xmax > xcoords[segments]) {
quads[dest_segment].a = 0;
quads[dest_segment].b = gradient;
quads[dest_segment].c = y (xcoords[segments])
- quads[dest_segment].b * xcoords[segments];
dest_segment++;
xstarts[dest_segment] = xmax + 1;
}
segments = dest_segment;
free_mem(xcoords);
free_mem(quadratics);
xcoords = (inT32 *) xstarts;
quadratics = quads;
}
/**********************************************************************
* QSPLINE::plot
*
* Draw the QSPLINE in the given colour.
**********************************************************************/
#ifndef GRAPHICS_DISABLED
void QSPLINE::plot( //draw it
ScrollView* window, //window to draw in
ScrollView::Color colour //colour to draw in
) const {
inT32 segment; //index of segment
inT16 step; //index of poly piece
double increment; //x increment
double x; //x coord
window->Pen(colour);
for (segment = 0; segment < segments; segment++) {
increment =
(double) (xcoords[segment + 1] -
xcoords[segment]) / QSPLINE_PRECISION;
x = xcoords[segment];
for (step = 0; step <= QSPLINE_PRECISION; step++) {
if (segment == 0 && step == 0)
window->SetCursor(x, quadratics[segment].y (x));
else
window->DrawTo(x, quadratics[segment].y (x));
x += increment;
}
}
}
#endif

113
ccmain/quspline.h Normal file
View File

@ -0,0 +1,113 @@
/**********************************************************************
* File: quspline.h (Formerly qspline.h)
* Description: Code for the QSPLINE class.
* Author: Ray Smith
* Created: Tue Oct 08 17:16:12 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef QUSPLINE_H
#define QUSPLINE_H
#include "quadratc.h"
#include "serialis.h"
#include "memry.h"
#include "rect.h"
class ROW;
class QSPLINE
{
friend void make_first_baseline(TBOX *,
int,
int *,
int *,
QSPLINE *,
QSPLINE *,
float);
friend void make_holed_baseline(TBOX *, int, QSPLINE *, QSPLINE *, float);
friend void tweak_row_baseline(ROW *);
public:
QSPLINE() { //empty constructor
segments = 0;
xcoords = NULL; //everything empty
quadratics = NULL;
}
QSPLINE( //copy constructor
const QSPLINE &src);
QSPLINE( //constructor
inT32 count, //number of segments
inT32 *xstarts, //segment starts
double *coeffs); //coefficients
~QSPLINE (); //destructor
QSPLINE ( //least squares fit
int xstarts[], //spline boundaries
int segcount, //no of segments
int xcoords[], //points to fit
int ycoords[], int blobcount,//no of coords
int degree); //function
double step( //step change
double x1, //between coords
double x2);
double y( //evaluate
double x) const; //at x
void move( // reposition spline
ICOORD vec); // by vector
BOOL8 overlap( //test overlap
QSPLINE *spline2, //2 cannot be smaller
double fraction); //by more than this
void extrapolate( //linear extrapolation
double gradient, //gradient to use
int left, //new left edge
int right); //new right edge
#ifndef GRAPHICS_DISABLED
void plot( //draw it
ScrollView* window, //in window
ScrollView::Color colour) const; //in colour
#endif
void prep_serialise() { //set ptrs to counts
} //not required
void dump( //write external bits
FILE *f) {
serialise_bytes (f, (void *) xcoords, (segments + 1) * sizeof (inT32));
serialise_bytes (f, (void *) quadratics, segments * sizeof (QUAD_COEFFS));
}
void de_dump( //read external bits
FILE *f) {
xcoords = (inT32 *) de_serialise_bytes (f,
(segments + 1) * sizeof (inT32));
quadratics = (QUAD_COEFFS *) de_serialise_bytes (f,
segments *
sizeof (QUAD_COEFFS));
}
//assign copy
make_serialise (QSPLINE) QSPLINE & operator= (
const QSPLINE & source); //from this
private:
inT32 spline_index( //binary search
double x) const; //for x
inT32 segments; //no of segments
inT32 *xcoords; //no of coords
QUAD_COEFFS *quadratics; //spline pieces
};
#endif

517
ccmain/ratngs.cpp Normal file
View File

@ -0,0 +1,517 @@
/**********************************************************************
* File: ratngs.cpp (Formerly ratings.c)
* Description: Code to manipulate the BLOB_CHOICE and WERD_CHOICE classes.
* Author: Ray Smith
* Created: Thu Apr 23 13:23:29 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include "ratngs.h"
#include "callcpp.h"
#include "genericvector.h"
#include "unicharset.h"
extern FILE *matcher_fp;
ELISTIZE (BLOB_CHOICE) CLISTIZE (BLOB_CHOICE_LIST) CLISTIZE (WERD_CHOICE)
//extern FILE* matcher_fp;
/**********************************************************************
* BLOB_CHOICE::BLOB_CHOICE
*
* Constructor to build a BLOB_CHOICE from a char, rating and certainty.
**********************************************************************/
BLOB_CHOICE::BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id
float src_rating, // rating
float src_cert, // certainty
inT8 src_config, // config (font)
int src_script_id // script
) {
unichar_id_ = src_unichar_id;
rating_ = src_rating;
certainty_ = src_cert;
config_ = src_config;
script_id_ = src_script_id;
}
/**********************************************************************
* BLOB_CHOICE::BLOB_CHOICE
*
* Constructor to build a BLOB_CHOICE from another BLOB_CHOICE.
**********************************************************************/
BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) {
unichar_id_ = other.unichar_id();
rating_ = other.rating();
certainty_ = other.certainty();
config_ = other.config();
script_id_ = other.script_id();
}
/**********************************************************************
* WERD_CHOICE::WERD_CHOICE
*
* Constructor to build a WERD_CHOICE from the given string.
* The function assumes that src_string is not NULL.
**********************************************************************/
WERD_CHOICE::WERD_CHOICE(const char *src_string,
const UNICHARSET &unicharset) {
STRING src_lengths;
int len = strlen(src_string);
const char *ptr = src_string;
int step = unicharset.step(ptr);
for (; ptr < src_string + len && step > 0;
step = unicharset.step(ptr), src_lengths += step, ptr += step);
if (step != 0 && ptr == src_string + len) {
this->init(src_string, src_lengths.string(),
0.0, 0.0, NO_PERM, unicharset);
} else { // there must have been an invalid unichar in the string
this->init(8);
this->make_bad();
}
}
/**********************************************************************
* WERD_CHOICE::init
*
* Helper function to build a WERD_CHOICE from the given string,
* fragment lengths, rating, certainty and permuter.
*
* The function assumes that src_string is not NULL.
* src_lengths argument could be NULL, in which case the unichars
* in src_string are assumed to all be of length 1.
**********************************************************************/
void WERD_CHOICE::init(const char *src_string,
const char *src_lengths,
float src_rating,
float src_certainty,
uinT8 src_permuter,
const UNICHARSET &unicharset) {
int src_string_len = strlen(src_string);
if (src_string_len == 0) {
this->init(8);
} else {
this->init(src_lengths ? strlen(src_lengths): src_string_len);
length_ = reserved_;
int offset = 0;
for (int i = 0; i < length_; ++i) {
int unichar_length = src_lengths ? src_lengths[i] : 1;
unichar_ids_[i] =
unicharset.unichar_to_id(src_string+offset, unichar_length);
fragment_lengths_[i] = 1;
offset += unichar_length;
}
}
rating_ = src_rating;
certainty_ = src_certainty;
permuter_ = src_permuter;
}
/**********************************************************************
* WERD_CHOICE::~WERD_CHOICE
**********************************************************************/
WERD_CHOICE::~WERD_CHOICE() {
delete[] unichar_ids_;
delete[] fragment_lengths_;
delete_blob_choices();
}
/**********************************************************************
* WERD_CHOICE::set_blob_choices
*
* Delete current blob_choices. Set the blob_choices to the given new
* list.
**********************************************************************/
void WERD_CHOICE::set_blob_choices(BLOB_CHOICE_LIST_CLIST *blob_choices) {
if (blob_choices_ != blob_choices) {
delete_blob_choices();
blob_choices_ = blob_choices;
}
}
/**********************************************************************
* contains_unichar_id
*
* Returns true if unichar_ids_ contain the given unichar_id, false otherwise.
************************************************************************/
bool WERD_CHOICE::contains_unichar_id(UNICHAR_ID unichar_id) const {
for (int i = 0; i < length_; ++i) {
if (unichar_ids_[i] == unichar_id) {
return true;
}
}
return false;
}
/**********************************************************************
* remove_unichar_ids
*
* Removes num unichar ids starting from index start from unichar_ids_
* and updates length_ and fragment_lengths_ to reflect this change.
* Note: this function does not modify rating_ and certainty_.
***********************************************************************/
void WERD_CHOICE::remove_unichar_ids(int start, int num) {
ASSERT_HOST(start >= 0 && start + num <= length_);
for (int i = start; i+num < length_; ++i) {
unichar_ids_[i] = unichar_ids_[i+num];
fragment_lengths_[i] = fragment_lengths_[i+num];
}
length_ -= num;
}
/**********************************************************************
* string_and_lengths
*
* Populates the given word_str with unichars from unichar_ids and
* and word_lengths_str with the corresponding unichar lengths.
* Uses current_unicharset to make unichar id -> unichar conversions.
**********************************************************************/
void WERD_CHOICE::string_and_lengths(const UNICHARSET &current_unicharset,
STRING *word_str,
STRING *word_lengths_str) const {
*word_str = "";
if (word_lengths_str != NULL) *word_lengths_str = "";
for (int i = 0; i < length_; ++i) {
const char *ch = current_unicharset.id_to_unichar(unichar_ids_[i]);
*word_str += ch;
if (word_lengths_str != NULL) {
*word_lengths_str += strlen(ch);
}
}
}
/**********************************************************************
* append_unichar_id
*
* Make sure there is enough space in the word for the new unichar id
* and call append_unichar_id_space_allocated().
**********************************************************************/
void WERD_CHOICE::append_unichar_id(
UNICHAR_ID unichar_id, char fragment_length,
float rating, float certainty) {
if (length_ == reserved_) {
this->double_the_size();
}
this->append_unichar_id_space_allocated(unichar_id, fragment_length,
rating, certainty);
}
/**********************************************************************
* WERD_CHOICE::operator+=
*
* Cat a second word rating on the end of this current one.
* The ratings are added and the confidence is the min.
* If the permuters are NOT the same the permuter is set to COMPOUND_PERM
**********************************************************************/
WERD_CHOICE & WERD_CHOICE::operator+= (const WERD_CHOICE & second) {
// TODO(daria): find out why the choice was cleared this way if any
// of the pieces are empty. Add the description of this behavior
// to the comments.
// if (word_string.length () == 0 || second.word_string.length () == 0) {
// word_string = NULL; //make it empty
// word_lengths = NULL;
// delete_blob_choices();
// } else {
while (reserved_ < length_ + second.length()) {
this->double_the_size();
}
const UNICHAR_ID *other_unichar_ids = second.unichar_ids();
const char *other_fragment_lengths = second.fragment_lengths();
for (int i = 0; i < second.length(); ++i) {
unichar_ids_[length_ + i] = other_unichar_ids[i];
fragment_lengths_[length_ + i] = other_fragment_lengths[i];
}
length_ += second.length();
rating_ += second.rating(); // add ratings
if (second.certainty() < certainty_) // take min
certainty_ = second.certainty();
if (permuter_ == NO_PERM) {
permuter_ = second.permuter();
} else if (second.permuter() != NO_PERM &&
second.permuter() != permuter_) {
permuter_ = COMPOUND_PERM;
}
unichar_string_ += second.unichar_string();
unichar_lengths_ += second.unichar_lengths();
// Append a deep copy of second blob_choices if it exists.
if (second.blob_choices_ != NULL) {
if (this->blob_choices_ == NULL)
this->blob_choices_ = new BLOB_CHOICE_LIST_CLIST;
BLOB_CHOICE_LIST_C_IT this_blob_choices_it;
BLOB_CHOICE_LIST_C_IT second_blob_choices_it;
this_blob_choices_it.set_to_list(this->blob_choices_);
this_blob_choices_it.move_to_last();
second_blob_choices_it.set_to_list(second.blob_choices_);
for (second_blob_choices_it.mark_cycle_pt();
!second_blob_choices_it.cycled_list();
second_blob_choices_it.forward()) {
BLOB_CHOICE_LIST* blob_choices_copy = new BLOB_CHOICE_LIST();
blob_choices_copy->deep_copy(second_blob_choices_it.data(),
&BLOB_CHOICE::deep_copy);
this_blob_choices_it.add_after_then_move(blob_choices_copy);
}
}
return *this;
}
/**********************************************************************
* WERD_CHOICE::operator=
*
* Allocate enough memory to hold a copy of source and copy over
* all the information from source to this WERD_CHOICE.
**********************************************************************/
WERD_CHOICE& WERD_CHOICE::operator=(const WERD_CHOICE& source) {
while (reserved_ < source.length()) {
this->double_the_size();
}
const UNICHAR_ID *other_unichar_ids = source.unichar_ids();
const char *other_fragment_lengths = source.fragment_lengths();
for (int i = 0; i < source.length(); ++i) {
unichar_ids_[i] = other_unichar_ids[i];
fragment_lengths_[i] = other_fragment_lengths[i];
}
length_ = source.length();
rating_ = source.rating();
certainty_ = source.certainty();
permuter_ = source.permuter();
fragment_mark_ = source.fragment_mark();
unichar_string_ = source.unichar_string();
unichar_lengths_ = source.unichar_lengths();
// Delete existing blob_choices
this->delete_blob_choices();
// Deep copy blob_choices of source
if (source.blob_choices_ != NULL) {
BLOB_CHOICE_LIST_C_IT this_blob_choices_it;
BLOB_CHOICE_LIST_C_IT source_blob_choices_it;
this->blob_choices_ = new BLOB_CHOICE_LIST_CLIST();
this_blob_choices_it.set_to_list(this->blob_choices_);
source_blob_choices_it.set_to_list(source.blob_choices_);
for (source_blob_choices_it.mark_cycle_pt();
!source_blob_choices_it.cycled_list();
source_blob_choices_it.forward()) {
BLOB_CHOICE_LIST* blob_choices_copy = new BLOB_CHOICE_LIST();
blob_choices_copy->deep_copy(source_blob_choices_it.data(),
&BLOB_CHOICE::deep_copy);
this_blob_choices_it.add_after_then_move(blob_choices_copy);
}
}
return *this;
}
/**********************************************************************
* WERD_CHOICE::delete_blob_choices
*
* Clear the blob_choices list, delete it and set it to NULL.
**********************************************************************/
void WERD_CHOICE::delete_blob_choices() {
if (blob_choices_ != NULL) {
blob_choices_->deep_clear();
delete blob_choices_;
blob_choices_ = NULL;
}
}
/**********************************************************************
* WERD_CHOICE::print
*
* Print WERD_CHOICE to stdout.
**********************************************************************/
const void WERD_CHOICE::print(const char *msg) const {
tprintf("%s WERD_CHOICE:\n", msg);
tprintf("length_ %d reserved_ %d permuter_ %d\n",
length_, reserved_, permuter_);
tprintf("rating_ %.4f certainty_ %.4f", rating_, certainty_);
if (fragment_mark_) {
tprintf(" fragment_mark_ true");
}
tprintf("\n");
if (unichar_string_.length() > 0) {
tprintf("unichar_string_ %s unichar_lengths_ %s\n",
unichar_string_.string(), unichar_lengths_.string());
}
tprintf("unichar_ids: ");
int i;
for (i = 0; i < length_; ++i) {
tprintf("%d ", unichar_ids_[i]);
}
tprintf("\nfragment_lengths_: ");
for (i = 0; i < length_; ++i) {
tprintf("%d ", fragment_lengths_[i]);
}
tprintf("\n");
fflush(stdout);
}
/**********************************************************************
* print_ratings_list
*
* Send all the ratings out to the logfile.
**********************************************************************/
void print_ratings_list(
const char *msg, // intro message
BLOB_CHOICE_LIST *ratings, // list of results
const UNICHARSET &current_unicharset // unicharset that can be used
// for id-to-unichar conversion
) {
if (ratings->length() == 0) {
tprintf("%s:<none>\n", msg);
return;
}
if (*msg != '\0') {
tprintf("%s\n", msg);
}
BLOB_CHOICE_IT c_it;
c_it.set_to_list(ratings);
for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) {
tprintf("r%.2f c%.2f : %d %s",
c_it.data()->rating(), c_it.data()->certainty(),
c_it.data()->unichar_id(),
current_unicharset.debug_str(c_it.data()->unichar_id()).string());
if (!c_it.at_last()) {
tprintf("\n");
}
}
tprintf("\n");
fflush(stdout);
}
/**********************************************************************
* print_ratings_list
*
* Print ratings list (unichar ids only).
**********************************************************************/
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings) {
if (ratings->length() == 0) {
tprintf("%s:<none>\n", msg);
return;
}
if (*msg != '\0') {
tprintf("%s\n", msg);
}
BLOB_CHOICE_IT c_it;
c_it.set_to_list(ratings);
for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) {
tprintf("r%.2f c%.2f : %d", c_it.data()->rating(),
c_it.data()->certainty(), c_it.data()->unichar_id());
if (!c_it.at_last()) {
tprintf("\n");
}
}
tprintf("\n");
fflush(stdout);
}
/**********************************************************************
* print_ratings_info
*
* Send all the ratings out to the logfile.
**********************************************************************/
void print_ratings_info(
FILE *fp, // file to use
BLOB_CHOICE_LIST *ratings, // list of results
const UNICHARSET &current_unicharset // unicharset that can be used
// for id-to-unichar conversion
) {
inT32 index; // to list
inT32 best_index; // to list
FLOAT32 best_rat; // rating
FLOAT32 best_cert; // certainty
const char* first_char = NULL; // character
FLOAT32 first_rat; // rating
FLOAT32 first_cert; // certainty
const char* sec_char = NULL; // character
FLOAT32 sec_rat = 0.0f; // rating
FLOAT32 sec_cert = 0.0f; // certainty
BLOB_CHOICE_IT c_it = ratings; // iterator
index = ratings->length();
if (index > 0) {
first_char = current_unicharset.id_to_unichar(c_it.data()->unichar_id());
first_rat = c_it.data()->rating();
first_cert = -c_it.data()->certainty();
if (index > 1) {
sec_char = current_unicharset.id_to_unichar(
c_it.data_relative(1)->unichar_id());
sec_rat = c_it.data_relative(1)->rating();
sec_cert = -c_it.data_relative(1)->certainty();
} else {
sec_char = NULL;
sec_rat = -1;
sec_cert = -1;
}
} else {
first_char = NULL;
first_rat = -1;
first_cert = -1;
}
best_index = -1;
best_rat = -1;
best_cert = -1;
for (index = 0, c_it.mark_cycle_pt(); !c_it.cycled_list();
c_it.forward(), index++) {
if (strcmp(current_unicharset.id_to_unichar(c_it.data()->unichar_id()),
blob_answer) == 0) {
best_index = index;
best_rat = c_it.data()->rating();
best_cert = -c_it.data()->certainty();
}
}
if (first_char != NULL && (*first_char == '\0' || *first_char == ' '))
first_char = NULL;
if (sec_char != NULL && (*sec_char == '\0' || *sec_char == ' '))
sec_char = NULL;
fprintf(matcher_fp,
" " INT32FORMAT " " INT32FORMAT " %g %g %s %g %g %s %g %g\n",
ratings->length(), best_index, best_rat, best_cert,
first_char != NULL ? first_char : "~",
first_rat, first_cert, sec_char != NULL ? sec_char : "~",
sec_rat, sec_cert);
}
/**********************************************************************
* print_char_choices_list
**********************************************************************/
void print_char_choices_list(const char *msg,
const BLOB_CHOICE_LIST_VECTOR &char_choices,
const UNICHARSET &current_unicharset,
BOOL8 detailed) {
if (*msg != '\0') tprintf("%s\n", msg);
for (int x = 0; x < char_choices.length(); ++x) {
BLOB_CHOICE_IT c_it;
c_it.set_to_list(char_choices.get(x));
tprintf("char[%d]: %s\n", x,
current_unicharset.debug_str( c_it.data()->unichar_id()).string());
if (detailed)
print_ratings_list(" ", char_choices.get(x), current_unicharset);
}
}

349
ccmain/ratngs.h Normal file
View File

@ -0,0 +1,349 @@
/**********************************************************************
* File: ratngs.h (Formerly ratings.h)
* Description: Definition of the WERD_CHOICE and BLOB_CHOICE classes.
* Author: Ray Smith
* Created: Thu Apr 23 11:40:38 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef RATNGS_H
#define RATNGS_H
#include <assert.h>
#include "clst.h"
#include "genericvector.h"
#include "notdll.h"
#include "unichar.h"
#include "unicharset.h"
#include "werd.h"
class BLOB_CHOICE: public ELIST_LINK
{
public:
BLOB_CHOICE() {
unichar_id_ = INVALID_UNICHAR_ID;
config_ = '\0';
rating_ = MAX_FLOAT32;
certainty_ = -MAX_FLOAT32;
script_id_ = -1;
}
BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id
float src_rating, // rating
float src_cert, // certainty
inT8 src_config, // config (font)
int script_id); // script
BLOB_CHOICE(const BLOB_CHOICE &other);
~BLOB_CHOICE() {}
UNICHAR_ID unichar_id() const {
return unichar_id_;
}
float rating() const {
return rating_;
}
float certainty() const {
return certainty_;
}
inT8 config() const {
return config_;
}
int script_id() const {
return script_id_;
}
void set_unichar_id(UNICHAR_ID newunichar_id) {
unichar_id_ = newunichar_id;
}
void set_rating(float newrat) {
rating_ = newrat;
}
void set_certainty(float newrat) {
certainty_ = newrat;
}
void set_config(inT8 newfont) {
config_ = newfont;
}
void set_script(int newscript_id) {
script_id_ = newscript_id;
}
static BLOB_CHOICE* deep_copy(const BLOB_CHOICE* src) {
BLOB_CHOICE* choice = new BLOB_CHOICE;
*choice = *src;
return choice;
}
NEWDELETE
private:
UNICHAR_ID unichar_id_; // unichar id
char config_; // char config (font)
inT16 junk2_;
float rating_; // size related
float certainty_; // absolute
int script_id_;
};
// Make BLOB_CHOICE listable.
ELISTIZEH (BLOB_CHOICE) CLISTIZEH (BLOB_CHOICE_LIST)
// Permuter codes used in WERD_CHOICEs.
enum PermuterType {
NO_PERM, // 0
PUNC_PERM, // 1
TOP_CHOICE_PERM, // 2
LOWER_CASE_PERM, // 3
UPPER_CASE_PERM, // 4
NUMBER_PERM, // 5
SYSTEM_DAWG_PERM, // 6
DOC_DAWG_PERM, // 7
USER_DAWG_PERM, // 8
FREQ_DAWG_PERM, // 9
COMPOUND_PERM, // 10
};
class WERD_CHOICE {
public:
WERD_CHOICE() { this->init(8); }
WERD_CHOICE(int reserved) { this->init(reserved); }
WERD_CHOICE(const char *src_string,
const char *src_lengths,
float src_rating,
float src_certainty,
uinT8 src_permuter,
const UNICHARSET &unicharset) {
this->init(src_string, src_lengths, src_rating,
src_certainty, src_permuter, unicharset);
}
WERD_CHOICE (const char *src_string, const UNICHARSET &unicharset);
WERD_CHOICE(const WERD_CHOICE &word) {
this->init(word.length());
this->operator=(word);
}
~WERD_CHOICE();
inline int length() const {
return length_;
}
inline const UNICHAR_ID *unichar_ids() const {
return unichar_ids_;
}
inline const UNICHAR_ID unichar_id(int index) const {
assert(index < length_);
return unichar_ids_[index];
}
inline const char *fragment_lengths() const {
return fragment_lengths_;
}
inline const char fragment_length(int index) const {
assert(index < length_);
return fragment_lengths_[index];
}
inline float rating() const {
return rating_;
}
inline float certainty() const {
return certainty_;
}
inline uinT8 permuter() const {
return permuter_;
}
inline bool fragment_mark() const {
return fragment_mark_;
}
inline BLOB_CHOICE_LIST_CLIST* blob_choices() {
return blob_choices_;
}
inline void set_unichar_id(UNICHAR_ID unichar_id, int index) {
assert(index < length_);
unichar_ids_[index] = unichar_id;
}
inline void set_rating(float new_val) {
rating_ = new_val;
}
inline void set_certainty(float new_val) {
certainty_ = new_val;
}
inline void set_permuter(uinT8 perm) {
permuter_ = perm;
}
inline void set_fragment_mark(bool new_fragment_mark) {
fragment_mark_ = new_fragment_mark;
}
void set_blob_choices(BLOB_CHOICE_LIST_CLIST *blob_choices);
// Make more space in unichar_id_ and fragment_lengths_ arrays.
inline void double_the_size() {
unichar_ids_ = GenericVector<UNICHAR_ID>::double_the_size_memcpy(
reserved_, unichar_ids_);
fragment_lengths_ = GenericVector<char>::double_the_size_memcpy(
reserved_, fragment_lengths_);
reserved_ *= 2;
}
// Initializes WERD_CHOICE - reseves length slots in unichar_ids_ and
// fragment_length_ arrays. Sets other values to default (blank) values.
inline void init(int reserved) {
reserved_ = reserved;
unichar_ids_ = new UNICHAR_ID[reserved];
fragment_lengths_ = new char[reserved];
length_ = 0;
rating_ = 0.0;
certainty_ = MAX_FLOAT32;
permuter_ = NO_PERM;
fragment_mark_ = false;
blob_choices_ = NULL;
unichar_string_ = "";
unichar_lengths_ = "";
}
// Helper function to build a WERD_CHOICE from the given string,
// fragment lengths, rating, certainty and permuter.
// The function assumes that src_string is not NULL.
// src_lengths argument could be NULL, in which case the unichars
// in src_string are assumed to all be of length 1.
void init(const char *src_string, const char *src_lengths,
float src_rating, float src_certainty,
uinT8 src_permuter, const UNICHARSET &current_unicharset);
// Set the fields in this choice to be default (bad) values.
inline void make_bad() {
length_ = 0;
rating_ = MAX_FLOAT32;
certainty_ = -MAX_FLOAT32;
fragment_mark_ = false;
unichar_string_ = "";
unichar_lengths_ = "";
}
// This function assumes that there is enough space reserved
// in the WERD_CHOICE for adding another unichar.
// This is an efficient alternative to append_unichar_id().
inline void append_unichar_id_space_allocated(
UNICHAR_ID unichar_id, char fragment_length,
float rating, float certainty) {
assert(reserved_ > length_);
length_++;
this->set_unichar_id(unichar_id, fragment_length,
rating, certainty, length_-1);
}
void append_unichar_id(UNICHAR_ID unichar_id, char fragment_length,
float rating, float certainty);
inline void set_unichar_id(UNICHAR_ID unichar_id, char fragment_length,
float rating, float certainty, int index) {
assert(index < length_);
unichar_ids_[index] = unichar_id;
fragment_lengths_[index] = fragment_length;
rating_ += rating;
if (certainty < certainty_) {
certainty_ = certainty;
}
}
bool contains_unichar_id(UNICHAR_ID unichar_id) const;
void remove_unichar_ids(int index, int num);
inline void remove_last_unichar_id() { --length_; }
inline void remove_unichar_id(int index) { this->remove_unichar_ids(index, 1); }
void string_and_lengths(const UNICHARSET &current_unicharset,
STRING *word_str, STRING *word_lengths_str) const;
const STRING debug_string(const UNICHARSET &current_unicharset) const {
STRING word_str;
for (int i = 0; i < length_; ++i) {
word_str += current_unicharset.debug_str(unichar_ids_[i]);
word_str += " ";
}
return word_str;
}
// Since this function walks over the whole word to convert unichar ids
// to unichars, it is best to call it once, e.g. after all changes to
// unichar_ids_ in WERD_CHOICE are finished.
void populate_unichars(const UNICHARSET &current_unicharset) {
this->string_and_lengths(current_unicharset, &unichar_string_,
&unichar_lengths_);
}
// This function should only be called if populate_unichars()
// was called and WERD_CHOICE did not change since then.
const STRING &unichar_string() const {
assert(unichar_string_.length() <= 0 ||
unichar_string_.length() >= length_); // sanity check
return unichar_string_;
}
// This function should only be called if populate_unichars()
// was called and WERD_CHOICE did not change since then.
const STRING &unichar_lengths() const {
assert(unichar_lengths_.length() <= 0 ||
unichar_lengths_.length() == length_); // sanity check
return unichar_lengths_;
}
const void print() const { this->print(""); }
const void print(const char *msg) const;
WERD_CHOICE& operator+= ( // concatanate
const WERD_CHOICE & second);// second on first
WERD_CHOICE& operator= (const WERD_CHOICE& source);
NEWDELETE private:
UNICHAR_ID *unichar_ids_; // unichar ids that represent the text of the word
char *fragment_lengths_; // number of fragments in each unichar
int reserved_; // size of the above arrays
int length_; // word length
float rating_; // size related
float certainty_; // absolute
uinT8 permuter_; // permuter code
bool fragment_mark_; // if true, indicates that this choice
// was chosen over a better one that
// contained a fragment
BLOB_CHOICE_LIST_CLIST *blob_choices_; // best choices for each blob
// The following variables are only populated by calling populate_unichars().
// They are not synchronized with the values in unichar_ids otherwise.
STRING unichar_string_;
STRING unichar_lengths_;
bool unichar_info_present;
private:
void delete_blob_choices();
};
// Make WERD_CHOICE listable.
ELISTIZEH (WERD_CHOICE)
typedef GenericVector<BLOB_CHOICE_LIST *> BLOB_CHOICE_LIST_VECTOR;
typedef GenericVector<WERD_CHOICE_LIST *> WERD_CHOICE_LIST_VECTOR;
typedef void (*POLY_TESTER) (const STRING&, PBLOB *, DENORM *, BOOL8,
char *, inT32, BLOB_CHOICE_LIST *);
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings);
void print_ratings_list(
const char *msg, // intro message
BLOB_CHOICE_LIST *ratings, // list of results
const UNICHARSET &current_unicharset // unicharset that can be used
// for id-to-unichar conversion
);
void print_ratings_info(
FILE *fp, // file to use
BLOB_CHOICE_LIST *ratings, // list of results
const UNICHARSET &current_unicharset // unicharset that can be used
// for id-to-unichar conversion
);
void print_char_choices_list(
const char *msg,
const BLOB_CHOICE_LIST_VECTOR &char_choices,
const UNICHARSET &current_unicharset,
BOOL8 detailed
);
#endif

253
ccmain/rect.cpp Normal file
View File

@ -0,0 +1,253 @@
/**********************************************************************
* File: rect.c (Formerly box.c)
* Description: Bounding box class definition.
* Author: Phil Cheatle
* Created: Wed Oct 16 15:18:45 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h" //precompiled headers
#include "rect.h"
/**********************************************************************
* TBOX::TBOX() Constructor from 2 ICOORDS
*
**********************************************************************/
TBOX::TBOX( //construtor
const ICOORD pt1, //one corner
const ICOORD pt2 //the other corner
) {
if (pt1.x () <= pt2.x ()) {
if (pt1.y () <= pt2.y ()) {
bot_left = pt1;
top_right = pt2;
}
else {
bot_left = ICOORD (pt1.x (), pt2.y ());
top_right = ICOORD (pt2.x (), pt1.y ());
}
}
else {
if (pt1.y () <= pt2.y ()) {
bot_left = ICOORD (pt2.x (), pt1.y ());
top_right = ICOORD (pt1.x (), pt2.y ());
}
else {
bot_left = pt2;
top_right = pt1;
}
}
}
/**********************************************************************
* TBOX::TBOX() Constructor from 4 integer values.
* Note: It is caller's responsibility to provide values in the right
* order.
**********************************************************************/
TBOX::TBOX( //constructor
inT16 left, inT16 bottom, inT16 right, inT16 top)
: bot_left(left, bottom), top_right(right, top) {
}
// rotate_large constructs the containing bounding box of all 4
// corners after rotating them. It therefore guarantees that all
// original content is contained within, but also slightly enlarges the box.
void TBOX::rotate_large(const FCOORD& vec) {
ICOORD top_left(bot_left.x(), top_right.y());
ICOORD bottom_right(top_right.x(), bot_left.y());
top_left.rotate(vec);
bottom_right.rotate(vec);
rotate(vec);
TBOX box2(top_left, bottom_right);
*this += box2;
}
/**********************************************************************
* TBOX::intersection() Build the largest box contained in both boxes
*
**********************************************************************/
TBOX TBOX::intersection( //shared area box
const TBOX &box) const {
inT16 left;
inT16 bottom;
inT16 right;
inT16 top;
if (overlap (box)) {
if (box.bot_left.x () > bot_left.x ())
left = box.bot_left.x ();
else
left = bot_left.x ();
if (box.top_right.x () < top_right.x ())
right = box.top_right.x ();
else
right = top_right.x ();
if (box.bot_left.y () > bot_left.y ())
bottom = box.bot_left.y ();
else
bottom = bot_left.y ();
if (box.top_right.y () < top_right.y ())
top = box.top_right.y ();
else
top = top_right.y ();
}
else {
left = MAX_INT16;
bottom = MAX_INT16;
top = -MAX_INT16;
right = -MAX_INT16;
}
return TBOX (left, bottom, right, top);
}
/**********************************************************************
* TBOX::bounding_union() Build the smallest box containing both boxes
*
**********************************************************************/
TBOX TBOX::bounding_union( //box enclosing both
const TBOX &box) const {
ICOORD bl; //bottom left
ICOORD tr; //top right
if (box.bot_left.x () < bot_left.x ())
bl.set_x (box.bot_left.x ());
else
bl.set_x (bot_left.x ());
if (box.top_right.x () > top_right.x ())
tr.set_x (box.top_right.x ());
else
tr.set_x (top_right.x ());
if (box.bot_left.y () < bot_left.y ())
bl.set_y (box.bot_left.y ());
else
bl.set_y (bot_left.y ());
if (box.top_right.y () > top_right.y ())
tr.set_y (box.top_right.y ());
else
tr.set_y (top_right.y ());
return TBOX (bl, tr);
}
/**********************************************************************
* TBOX::plot() Paint a box using specified settings
*
**********************************************************************/
#ifndef GRAPHICS_DISABLED
void TBOX::plot( //paint box
ScrollView* fd, //where to paint
ScrollView::Color fill_colour, //colour for inside
ScrollView::Color border_colour //colour for border
) const {
fd->Brush(fill_colour);
fd->Pen(border_colour);
plot(fd);
}
#endif
/**********************************************************************
* operator+=
*
* Extend one box to include the other (In place union)
**********************************************************************/
DLLSYM TBOX &
operator+= ( //bounding bounding bx
TBOX & op1, //operands
const TBOX & op2) {
if (op2.bot_left.x () < op1.bot_left.x ())
op1.bot_left.set_x (op2.bot_left.x ());
if (op2.top_right.x () > op1.top_right.x ())
op1.top_right.set_x (op2.top_right.x ());
if (op2.bot_left.y () < op1.bot_left.y ())
op1.bot_left.set_y (op2.bot_left.y ());
if (op2.top_right.y () > op1.top_right.y ())
op1.top_right.set_y (op2.top_right.y ());
return op1;
}
/**********************************************************************
* operator-=
*
* Reduce one box to intersection with the other (In place intersection)
**********************************************************************/
DLLSYM TBOX &
operator-= ( //inplace intersection
TBOX & op1, //operands
const TBOX & op2) {
if (op1.overlap (op2)) {
if (op2.bot_left.x () > op1.bot_left.x ())
op1.bot_left.set_x (op2.bot_left.x ());
if (op2.top_right.x () < op1.top_right.x ())
op1.top_right.set_x (op2.top_right.x ());
if (op2.bot_left.y () > op1.bot_left.y ())
op1.bot_left.set_y (op2.bot_left.y ());
if (op2.top_right.y () < op1.top_right.y ())
op1.top_right.set_y (op2.top_right.y ());
}
else {
op1.bot_left.set_x (MAX_INT16);
op1.bot_left.set_y (MAX_INT16);
op1.top_right.set_x (-MAX_INT16);
op1.top_right.set_y (-MAX_INT16);
}
return op1;
}
/**********************************************************************
* TBOX::serialise_asc() Convert to ascii file.
*
**********************************************************************/
void TBOX::serialise_asc( //convert to ascii
FILE *f //file to use
) {
bot_left.serialise_asc (f);
top_right.serialise_asc (f);
}
/**********************************************************************
* TBOX::de_serialise_asc() Convert from ascii file.
*
**********************************************************************/
void TBOX::de_serialise_asc( //convert from ascii
FILE *f //file to use
) {
bot_left.de_serialise_asc (f);
top_right.de_serialise_asc (f);
}

382
ccmain/rect.h Normal file
View File

@ -0,0 +1,382 @@
/**********************************************************************
* File: rect.h (Formerly box.h)
* Description: Bounding box class definition.
* Author: Phil Cheatle
* Created: Wed Oct 16 15:18:45 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef RECT_H
#define RECT_H
#include <math.h>
#include "points.h"
#include "ndminx.h"
#include "tprintf.h"
#include "scrollview.h"
class DLLSYM TBOX { // bounding box
public:
TBOX (): // empty constructor making a null box
bot_left (MAX_INT16, MAX_INT16), top_right (-MAX_INT16, -MAX_INT16) {
}
TBOX( // constructor
const ICOORD pt1, // one corner
const ICOORD pt2); // the other corner
TBOX( // constructor
inT16 left, inT16 bottom, inT16 right, inT16 top);
TBOX( // box around FCOORD
const FCOORD pt);
bool null_box() const { // Is box null
return ((left () >= right ()) || (top () <= bottom ()));
}
inT16 top() const { // coord of top
return top_right.y ();
}
void set_top(int y) {
top_right.set_y(y);
}
inT16 bottom() const { // coord of bottom
return bot_left.y ();
}
void set_bottom(int y) {
bot_left.set_y(y);
}
inT16 left() const { // coord of left
return bot_left.x ();
}
void set_left(int x) {
bot_left.set_x(x);
}
inT16 right() const { // coord of right
return top_right.x ();
}
void set_right(int x) {
top_right.set_x(x);
}
const ICOORD &botleft() const { // access function
return bot_left;
}
ICOORD botright() const { // ~ access function
return ICOORD (top_right.x (), bot_left.y ());
}
ICOORD topleft() const { // ~ access function
return ICOORD (bot_left.x (), top_right.y ());
}
const ICOORD &topright() const { // access function
return top_right;
}
inT16 height() const { // how high is it?
if (!null_box ())
return top_right.y () - bot_left.y ();
else
return 0;
}
inT16 width() const { // how high is it?
if (!null_box ())
return top_right.x () - bot_left.x ();
else
return 0;
}
inT32 area() const { // what is the area?
if (!null_box ())
return width () * height ();
else
return 0;
}
void move_bottom_edge( // move one edge
const inT16 y) { // by +/- y
bot_left += ICOORD (0, y);
}
void move_left_edge( // move one edge
const inT16 x) { // by +/- x
bot_left += ICOORD (x, 0);
}
void move_right_edge( // move one edge
const inT16 x) { // by +/- x
top_right += ICOORD (x, 0);
}
void move_top_edge( // move one edge
const inT16 y) { // by +/- y
top_right += ICOORD (0, y);
}
void move( // move box
const ICOORD vec) { // by vector
bot_left += vec;
top_right += vec;
}
void move( // move box
const FCOORD vec) { // by float vector
bot_left.set_x ((inT16) floor (bot_left.x () + vec.x ()));
// round left
bot_left.set_y ((inT16) floor (bot_left.y () + vec.y ()));
// round down
top_right.set_x ((inT16) ceil (top_right.x () + vec.x ()));
// round right
top_right.set_y ((inT16) ceil (top_right.y () + vec.y ()));
// round up
}
void scale( // scale box
const float f) { // by multiplier
bot_left.set_x ((inT16) floor (bot_left.x () * f)); // round left
bot_left.set_y ((inT16) floor (bot_left.y () * f)); // round down
top_right.set_x ((inT16) ceil (top_right.x () * f)); // round right
top_right.set_y ((inT16) ceil (top_right.y () * f)); // round up
}
void scale( // scale box
const FCOORD vec) { // by float vector
bot_left.set_x ((inT16) floor (bot_left.x () * vec.x ()));
bot_left.set_y ((inT16) floor (bot_left.y () * vec.y ()));
top_right.set_x ((inT16) ceil (top_right.x () * vec.x ()));
top_right.set_y ((inT16) ceil (top_right.y () * vec.y ()));
}
// rotate doesn't enlarge the box - it just rotates the bottom-left
// and top-right corners. Use rotate_large if you want to guarantee
// that all content is contained within the rotated box.
void rotate(const FCOORD& vec) { // by vector
bot_left.rotate (vec);
top_right.rotate (vec);
*this = TBOX (bot_left, top_right);
}
// rotate_large constructs the containing bounding box of all 4
// corners after rotating them. It therefore guarantees that all
// original content is contained within, but also slightly enlarges the box.
void rotate_large(const FCOORD& vec);
bool contains( // is pt inside box
const FCOORD pt) const;
bool contains( // is box inside box
const TBOX &box) const;
bool overlap( // do boxes overlap
const TBOX &box) const;
bool major_overlap( // do boxes overlap more than half
const TBOX &box) const;
// Do boxes overlap on x axis.
bool x_overlap(const TBOX &box) const;
// Do boxes overlap on x axis by more than
// half of the width of the narrower box.
bool major_x_overlap(const TBOX &box) const;
// Do boxes overlap on y axis.
bool y_overlap(const TBOX &box) const;
// Do boxes overlap on y axis by more than
// half of the height of the shorter box.
bool major_y_overlap(const TBOX &box) const;
// fraction of current box's area covered by other
double overlap_fraction(const TBOX &box) const;
TBOX intersection( // shared area box
const TBOX &box) const;
TBOX bounding_union( // box enclosing both
const TBOX &box) const;
void print() const { // print
tprintf("Bounding box=(%d,%d)->(%d,%d)\n",
left(), bottom(), right(), top());
}
#ifndef GRAPHICS_DISABLED
void plot( // use current settings
ScrollView* fd) const { // where to paint
fd->Rectangle(bot_left.x (), bot_left.y (), top_right.x (),
top_right.y ());
}
void plot( // paint box
ScrollView* fd, // where to paint
ScrollView::Color fill_colour, // colour for inside
ScrollView::Color border_colour) const; // colour for border
#endif
friend DLLSYM TBOX & operator+= (TBOX &, const TBOX &);
// in place union
friend DLLSYM TBOX & operator-= (TBOX &, const TBOX &);
// in place intrsection
void serialise_asc( // convert to ascii
FILE *f);
void de_serialise_asc( // convert from ascii
FILE *f);
private:
ICOORD bot_left; // bottom left corner
ICOORD top_right; // top right corner
};
/**********************************************************************
* TBOX::TBOX() Constructor from 1 FCOORD
*
**********************************************************************/
inline TBOX::TBOX( // construtor
const FCOORD pt // floating centre
) {
bot_left = ICOORD ((inT16) floor (pt.x ()), (inT16) floor (pt.y ()));
top_right = ICOORD ((inT16) ceil (pt.x ()), (inT16) ceil (pt.y ()));
}
/**********************************************************************
* TBOX::contains() Is point within box
*
**********************************************************************/
inline bool TBOX::contains(const FCOORD pt) const {
return ((pt.x () >= bot_left.x ()) &&
(pt.x () <= top_right.x ()) &&
(pt.y () >= bot_left.y ()) && (pt.y () <= top_right.y ()));
}
/**********************************************************************
* TBOX::contains() Is box within box
*
**********************************************************************/
inline bool TBOX::contains(const TBOX &box) const {
return (contains (box.bot_left) && contains (box.top_right));
}
/**********************************************************************
* TBOX::overlap() Do two boxes overlap?
*
**********************************************************************/
inline bool TBOX::overlap( // do boxes overlap
const TBOX &box) const {
return ((box.bot_left.x () <= top_right.x ()) &&
(box.top_right.x () >= bot_left.x ()) &&
(box.bot_left.y () <= top_right.y ()) &&
(box.top_right.y () >= bot_left.y ()));
}
/**********************************************************************
* TBOX::major_overlap() Do two boxes overlap by at least half of the smallest?
*
**********************************************************************/
inline bool TBOX::major_overlap( // Do boxes overlap more that half.
const TBOX &box) const {
int overlap = MIN(box.top_right.x(), top_right.x());
overlap -= MAX(box.bot_left.x(), bot_left.x());
overlap += overlap;
if (overlap < MIN(box.width(), width()))
return false;
overlap = MIN(box.top_right.y(), top_right.y());
overlap -= MAX(box.bot_left.y(), bot_left.y());
overlap += overlap;
if (overlap < MIN(box.height(), height()))
return false;
return true;
}
/**********************************************************************
* TBOX::overlap_fraction() Fraction of area covered by the other box
*
**********************************************************************/
inline double TBOX::overlap_fraction(const TBOX &box) const {
double fraction = 0.0;
if (this->area()) {
fraction = this->intersection(box).area() * 1.0 / this->area();
}
return fraction;
}
/**********************************************************************
* TBOX::x_overlap() Do two boxes overlap on x-axis
*
**********************************************************************/
inline bool TBOX::x_overlap(const TBOX &box) const {
return ((box.bot_left.x() <= top_right.x()) &&
(box.top_right.x() >= bot_left.x()));
}
/**********************************************************************
* TBOX::major_x_overlap() Do two boxes overlap by more than half the
* width of the narrower box on the x-axis
*
**********************************************************************/
inline bool TBOX::major_x_overlap(const TBOX &box) const {
inT16 overlap = box.width();
if (this->left() > box.left()) {
overlap -= this->left() - box.left();
}
if (this->right() < box.right()) {
overlap -= box.right() - this->right();
}
return (overlap >= box.width() / 2 || overlap >= this->width() / 2);
}
/**********************************************************************
* TBOX::y_overlap() Do two boxes overlap on y-axis
*
**********************************************************************/
inline bool TBOX::y_overlap(const TBOX &box) const {
return ((box.bot_left.y() <= top_right.y()) &&
(box.top_right.y() >= bot_left.y()));
}
/**********************************************************************
* TBOX::major_y_overlap() Do two boxes overlap by more than half the
* height of the shorter box on the y-axis
*
**********************************************************************/
inline bool TBOX::major_y_overlap(const TBOX &box) const {
inT16 overlap = box.height();
if (this->bottom() > box.bottom()) {
overlap -= this->bottom() - box.bottom();
}
if (this->top() < box.top()) {
overlap -= box.top() - this->top();
}
return (overlap >= box.height() / 2 || overlap >= this->height() / 2);
}
#endif

545
ccmain/rejctmap.cpp Normal file
View File

@ -0,0 +1,545 @@
/**********************************************************************
* File: rejctmap.cpp (Formerly rejmap.c)
* Description: REJ and REJMAP class functions.
* Author: Phil Cheatle
* Created: Thu Jun 9 13:46:38 BST 1994
*
* (C) Copyright 1994, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include "hosthplb.h"
//#include "basefile.h"
#include "rejctmap.h"
#include "secname.h"
#define EXTERN
EXTERN BOOL_VAR (rejword_only_set_if_accepted, TRUE, "Mimic old reject_word");
EXTERN BOOL_VAR (rejmap_allow_more_good_qual, FALSE,
"Use initial good qual setting");
EXTERN BOOL_VAR (rej_use_1Il_rej, TRUE, "1Il rejection enabled");
BOOL8 REJ::perm_rejected() { //Is char perm reject?
return (flag (R_TESS_FAILURE) ||
flag (R_SMALL_XHT) ||
flag (R_EDGE_CHAR) ||
flag (R_1IL_CONFLICT) ||
flag (R_POSTNN_1IL) ||
flag (R_REJ_CBLOB) ||
flag (R_BAD_REPETITION) || flag (R_MM_REJECT));
}
BOOL8 REJ::rej_before_nn_accept() {
return flag (R_POOR_MATCH) ||
flag (R_NOT_TESS_ACCEPTED) ||
flag (R_CONTAINS_BLANKS) || flag (R_BAD_PERMUTER);
}
BOOL8 REJ::rej_between_nn_and_mm() {
return flag (R_HYPHEN) ||
flag (R_DUBIOUS) ||
flag (R_NO_ALPHANUMS) || flag (R_MOSTLY_REJ) || flag (R_XHT_FIXUP);
}
BOOL8 REJ::rej_between_mm_and_quality_accept() {
return flag (R_BAD_QUALITY);
}
BOOL8 REJ::rej_between_quality_and_minimal_rej_accept() {
return flag (R_DOC_REJ) ||
flag (R_BLOCK_REJ) || flag (R_ROW_REJ) || flag (R_UNLV_REJ);
}
BOOL8 REJ::rej_before_mm_accept() {
return rej_between_nn_and_mm () ||
(rej_before_nn_accept () &&
!flag (R_NN_ACCEPT) && !flag (R_HYPHEN_ACCEPT));
}
BOOL8 REJ::rej_before_quality_accept() {
return rej_between_mm_and_quality_accept () ||
(!flag (R_MM_ACCEPT) && rej_before_mm_accept ());
}
BOOL8 REJ::rejected() { //Is char rejected?
if (flag (R_MINIMAL_REJ_ACCEPT))
return FALSE;
else
return (perm_rejected () ||
rej_between_quality_and_minimal_rej_accept () ||
(!flag (R_QUALITY_ACCEPT) && rej_before_quality_accept ()));
}
BOOL8 REJ::accept_if_good_quality() { //potential rej?
return (rejected () &&
!perm_rejected () &&
flag (R_BAD_PERMUTER) &&
!flag (R_POOR_MATCH) &&
!flag (R_NOT_TESS_ACCEPTED) &&
!flag (R_CONTAINS_BLANKS) &&
(rejmap_allow_more_good_qual ||
(!rej_between_nn_and_mm () &&
!rej_between_mm_and_quality_accept () &&
!rej_between_quality_and_minimal_rej_accept ())));
}
void REJ::setrej_tess_failure() { //Tess generated blank
set_flag(R_TESS_FAILURE);
}
void REJ::setrej_small_xht() { //Small xht char/wd
set_flag(R_SMALL_XHT);
}
void REJ::setrej_edge_char() { //Close to image edge
set_flag(R_EDGE_CHAR);
}
void REJ::setrej_1Il_conflict() { //Initial reject map
if (rej_use_1Il_rej)
set_flag(R_1IL_CONFLICT);
}
void REJ::setrej_postNN_1Il() { //1Il after NN
set_flag(R_POSTNN_1IL);
}
void REJ::setrej_rej_cblob() { //Insert duff blob
set_flag(R_REJ_CBLOB);
}
void REJ::setrej_mm_reject() { //Matrix matcher
set_flag(R_MM_REJECT);
}
void REJ::setrej_bad_repetition() { //Odd repeated char
set_flag(R_BAD_REPETITION);
}
void REJ::setrej_poor_match() { //Failed Rays heuristic
set_flag(R_POOR_MATCH);
}
void REJ::setrej_not_tess_accepted() {
//TEMP reject_word
set_flag(R_NOT_TESS_ACCEPTED);
}
void REJ::setrej_contains_blanks() {
//TEMP reject_word
set_flag(R_CONTAINS_BLANKS);
}
void REJ::setrej_bad_permuter() { //POTENTIAL reject_word
set_flag(R_BAD_PERMUTER);
}
void REJ::setrej_hyphen() { //PostNN dubious hyphen or .
set_flag(R_HYPHEN);
}
void REJ::setrej_dubious() { //PostNN dubious limit
set_flag(R_DUBIOUS);
}
void REJ::setrej_no_alphanums() { //TEMP reject_word
set_flag(R_NO_ALPHANUMS);
}
void REJ::setrej_mostly_rej() { //TEMP reject_word
set_flag(R_MOSTLY_REJ);
}
void REJ::setrej_xht_fixup() { //xht fixup
set_flag(R_XHT_FIXUP);
}
void REJ::setrej_bad_quality() { //TEMP reject_word
set_flag(R_BAD_QUALITY);
}
void REJ::setrej_doc_rej() { //TEMP reject_word
set_flag(R_DOC_REJ);
}
void REJ::setrej_block_rej() { //TEMP reject_word
set_flag(R_BLOCK_REJ);
}
void REJ::setrej_row_rej() { //TEMP reject_word
set_flag(R_ROW_REJ);
}
void REJ::setrej_unlv_rej() { //TEMP reject_word
set_flag(R_UNLV_REJ);
}
void REJ::setrej_hyphen_accept() { //NN Flipped a char
set_flag(R_HYPHEN_ACCEPT);
}
void REJ::setrej_nn_accept() { //NN Flipped a char
set_flag(R_NN_ACCEPT);
}
void REJ::setrej_mm_accept() { //Matrix matcher
set_flag(R_MM_ACCEPT);
}
void REJ::setrej_quality_accept() { //Quality flip a char
set_flag(R_QUALITY_ACCEPT);
}
void REJ::setrej_minimal_rej_accept() {
//Accept all except blank
set_flag(R_MINIMAL_REJ_ACCEPT);
}
void REJ::full_print(FILE *fp) {
#ifndef SECURE_NAMES
fprintf (fp, "R_TESS_FAILURE: %s\n", flag (R_TESS_FAILURE) ? "T" : "F");
fprintf (fp, "R_SMALL_XHT: %s\n", flag (R_SMALL_XHT) ? "T" : "F");
fprintf (fp, "R_EDGE_CHAR: %s\n", flag (R_EDGE_CHAR) ? "T" : "F");
fprintf (fp, "R_1IL_CONFLICT: %s\n", flag (R_1IL_CONFLICT) ? "T" : "F");
fprintf (fp, "R_POSTNN_1IL: %s\n", flag (R_POSTNN_1IL) ? "T" : "F");
fprintf (fp, "R_REJ_CBLOB: %s\n", flag (R_REJ_CBLOB) ? "T" : "F");
fprintf (fp, "R_MM_REJECT: %s\n", flag (R_MM_REJECT) ? "T" : "F");
fprintf (fp, "R_BAD_REPETITION: %s\n", flag (R_BAD_REPETITION) ? "T" : "F");
fprintf (fp, "R_POOR_MATCH: %s\n", flag (R_POOR_MATCH) ? "T" : "F");
fprintf (fp, "R_NOT_TESS_ACCEPTED: %s\n",
flag (R_NOT_TESS_ACCEPTED) ? "T" : "F");
fprintf (fp, "R_CONTAINS_BLANKS: %s\n",
flag (R_CONTAINS_BLANKS) ? "T" : "F");
fprintf (fp, "R_BAD_PERMUTER: %s\n", flag (R_BAD_PERMUTER) ? "T" : "F");
fprintf (fp, "R_HYPHEN: %s\n", flag (R_HYPHEN) ? "T" : "F");
fprintf (fp, "R_DUBIOUS: %s\n", flag (R_DUBIOUS) ? "T" : "F");
fprintf (fp, "R_NO_ALPHANUMS: %s\n", flag (R_NO_ALPHANUMS) ? "T" : "F");
fprintf (fp, "R_MOSTLY_REJ: %s\n", flag (R_MOSTLY_REJ) ? "T" : "F");
fprintf (fp, "R_XHT_FIXUP: %s\n", flag (R_XHT_FIXUP) ? "T" : "F");
fprintf (fp, "R_BAD_QUALITY: %s\n", flag (R_BAD_QUALITY) ? "T" : "F");
fprintf (fp, "R_DOC_REJ: %s\n", flag (R_DOC_REJ) ? "T" : "F");
fprintf (fp, "R_BLOCK_REJ: %s\n", flag (R_BLOCK_REJ) ? "T" : "F");
fprintf (fp, "R_ROW_REJ: %s\n", flag (R_ROW_REJ) ? "T" : "F");
fprintf (fp, "R_UNLV_REJ: %s\n", flag (R_UNLV_REJ) ? "T" : "F");
fprintf (fp, "R_HYPHEN_ACCEPT: %s\n", flag (R_HYPHEN_ACCEPT) ? "T" : "F");
fprintf (fp, "R_NN_ACCEPT: %s\n", flag (R_NN_ACCEPT) ? "T" : "F");
fprintf (fp, "R_MM_ACCEPT: %s\n", flag (R_MM_ACCEPT) ? "T" : "F");
fprintf (fp, "R_QUALITY_ACCEPT: %s\n", flag (R_QUALITY_ACCEPT) ? "T" : "F");
fprintf (fp, "R_MINIMAL_REJ_ACCEPT: %s\n",
flag (R_MINIMAL_REJ_ACCEPT) ? "T" : "F");
#endif
}
//The REJMAP class has been hacked to use alloc_struct instead of new [].
//This is to reduce memory fragmentation only as it is rather kludgy.
//alloc_struct by-passes the call to the contsructor of REJ on each
//array element. Although the constructor is empty, the BITS16 members
//do have a constructor which sets all the flags to 0. The memset
//replaces this functionality.
REJMAP::REJMAP( //classwise copy
const REJMAP &source) {
REJ *to;
REJ *from = source.ptr;
int i;
len = source.length ();
if (len > 0) {
ptr = (REJ *) alloc_struct (len * sizeof (REJ), "REJ");
to = ptr;
for (i = 0; i < len; i++) {
*to = *from;
to++;
from++;
}
}
else
ptr = NULL;
}
REJMAP & REJMAP::operator= ( //assign REJMAP
const REJMAP & source //from this
) {
REJ *
to;
REJ *
from = source.ptr;
int
i;
initialise (source.len);
to = ptr;
for (i = 0; i < len; i++) {
*to = *from;
to++;
from++;
}
return *this;
}
void REJMAP::initialise( //Redefine map
inT16 length) {
if (ptr != NULL)
free_struct (ptr, len * sizeof (REJ), "REJ");
len = length;
if (len > 0)
ptr = (REJ *) memset (alloc_struct (len * sizeof (REJ), "REJ"),
0, len * sizeof (REJ));
else
ptr = NULL;
}
inT16 REJMAP::accept_count() { //How many accepted?
int i;
inT16 count = 0;
for (i = 0; i < len; i++) {
if (ptr[i].accepted ())
count++;
}
return count;
}
BOOL8 REJMAP::recoverable_rejects() { //Any non perm rejs?
int i;
for (i = 0; i < len; i++) {
if (ptr[i].recoverable ())
return TRUE;
}
return FALSE;
}
BOOL8 REJMAP::quality_recoverable_rejects() { //Any potential rejs?
int i;
for (i = 0; i < len; i++) {
if (ptr[i].accept_if_good_quality ())
return TRUE;
}
return FALSE;
}
void REJMAP::remove_pos( //Cut out an element
inT16 pos //element to remove
) {
REJ *new_ptr; //new, smaller map
int i;
ASSERT_HOST (pos >= 0);
ASSERT_HOST (pos < len);
ASSERT_HOST (len > 0);
len--;
if (len > 0)
new_ptr = (REJ *) memset (alloc_struct (len * sizeof (REJ), "REJ"),
0, len * sizeof (REJ));
else
new_ptr = NULL;
for (i = 0; i < pos; i++)
new_ptr[i] = ptr[i]; //copy pre pos
for (; pos < len; pos++)
new_ptr[pos] = ptr[pos + 1]; //copy post pos
//delete old map
free_struct (ptr, (len + 1) * sizeof (REJ), "REJ");
ptr = new_ptr;
}
void REJMAP::print(FILE *fp) {
int i;
char buff[512];
for (i = 0; i < len; i++) {
buff[i] = ptr[i].display_char ();
}
buff[i] = '\0';
fprintf (fp, "\"%s\"", buff);
}
void REJMAP::full_print(FILE *fp) {
int i;
for (i = 0; i < len; i++) {
ptr[i].full_print (fp);
fprintf (fp, "\n");
}
}
void REJMAP::rej_word_small_xht() { //Reject whole word
int i;
for (i = 0; i < len; i++) {
ptr[i].setrej_small_xht ();
}
}
void REJMAP::rej_word_tess_failure() { //Reject whole word
int i;
for (i = 0; i < len; i++) {
ptr[i].setrej_tess_failure ();
}
}
void REJMAP::rej_word_not_tess_accepted() { //Reject whole word
int i;
for (i = 0; i < len; i++) {
if (!rejword_only_set_if_accepted || ptr[i].accepted ())
ptr[i].setrej_not_tess_accepted ();
}
}
void REJMAP::rej_word_contains_blanks() { //Reject whole word
int i;
for (i = 0; i < len; i++) {
if (!rejword_only_set_if_accepted || ptr[i].accepted ())
ptr[i].setrej_contains_blanks ();
}
}
void REJMAP::rej_word_bad_permuter() { //Reject whole word
int i;
for (i = 0; i < len; i++) {
if (!rejword_only_set_if_accepted || ptr[i].accepted ())
ptr[i].setrej_bad_permuter ();
}
}
void REJMAP::rej_word_xht_fixup() { //Reject whole word
int i;
for (i = 0; i < len; i++) {
if (!rejword_only_set_if_accepted || ptr[i].accepted ())
ptr[i].setrej_xht_fixup ();
}
}
void REJMAP::rej_word_no_alphanums() { //Reject whole word
int i;
for (i = 0; i < len; i++) {
if (!rejword_only_set_if_accepted || ptr[i].accepted ())
ptr[i].setrej_no_alphanums ();
}
}
void REJMAP::rej_word_mostly_rej() { //Reject whole word
int i;
for (i = 0; i < len; i++) {
if (!rejword_only_set_if_accepted || ptr[i].accepted ())
ptr[i].setrej_mostly_rej ();
}
}
void REJMAP::rej_word_bad_quality() { //Reject whole word
int i;
for (i = 0; i < len; i++) {
if (!rejword_only_set_if_accepted || ptr[i].accepted ())
ptr[i].setrej_bad_quality ();
}
}
void REJMAP::rej_word_doc_rej() { //Reject whole word
int i;
for (i = 0; i < len; i++) {
if (!rejword_only_set_if_accepted || ptr[i].accepted ())
ptr[i].setrej_doc_rej ();
}
}
void REJMAP::rej_word_block_rej() { //Reject whole word
int i;
for (i = 0; i < len; i++) {
if (!rejword_only_set_if_accepted || ptr[i].accepted ())
ptr[i].setrej_block_rej ();
}
}
void REJMAP::rej_word_row_rej() { //Reject whole word
int i;
for (i = 0; i < len; i++) {
if (!rejword_only_set_if_accepted || ptr[i].accepted ())
ptr[i].setrej_row_rej ();
}
}

284
ccmain/rejctmap.h Normal file
View File

@ -0,0 +1,284 @@
/**********************************************************************
* File: rejctmap.h (Formerly rejmap.h)
* Description: REJ and REJMAP class functions.
* Author: Phil Cheatle
* Created: Thu Jun 9 13:46:38 BST 1994
*
* (C) Copyright 1994, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
This module may look unneccessarily verbose, but here's the philosophy...
ALL processing of the reject map is done in this module. There are lots of
separate calls to set reject/accept flags. These have DELIBERATELY been kept
distinct so that this module can decide what to do.
Basically, there is a flag for each sort of rejection or acceptance. This
provides a history of what has happened to EACH character.
Determining whether a character is CURRENTLY rejected depends on implicit
understanding of the SEQUENCE of possible calls. The flags are defined and
grouped in the REJ_FLAGS enum. These groupings are used in determining a
characters CURRENT rejection status. Basically, a character is ACCEPTED if
none of the permanent rej flags are set
AND ( the character has never been rejected
OR an accept flag is set which is LATER than the latest reject flag )
IT IS FUNDAMENTAL THAT ANYONE HACKING THIS CODE UNDERSTANDS THE SIGNIFICANCE
OF THIS IMPLIED TEMPORAL ORDERING OF THE FLAGS!!!!
**********************************************************************/
#ifndef REJCTMAP_H
#define REJCTMAP_H
#ifdef __UNIX__
#include <assert.h>
#endif
#include "memry.h"
#include "bits16.h"
#include "varable.h"
#include "notdll.h"
extern BOOL_VAR_H (rejword_only_set_if_accepted, TRUE,
"Mimic old reject_word");
extern BOOL_VAR_H (rejmap_allow_more_good_qual, FALSE,
"Use initial good qual setting");
extern BOOL_VAR_H (rej_use_1Il_rej, TRUE, "1Il rejection enabled");
enum REJ_FLAGS
{
/* Reject modes which are NEVER overridden */
R_TESS_FAILURE, // PERM Tess didnt classify
R_SMALL_XHT, // PERM Xht too small
R_EDGE_CHAR, // PERM Too close to edge of image
R_1IL_CONFLICT, // PERM 1Il confusion
R_POSTNN_1IL, // PERM 1Il unrejected by NN
R_REJ_CBLOB, // PERM Odd blob
R_MM_REJECT, // PERM Matrix match rejection (m's)
R_BAD_REPETITION, // TEMP Repeated char which doesn't match trend
/* Initial reject modes (pre NN_ACCEPT) */
R_POOR_MATCH, // TEMP Ray's original heuristic (Not used)
R_NOT_TESS_ACCEPTED, // TEMP Tess didnt accept WERD
R_CONTAINS_BLANKS, // TEMP Tess failed on other chs in WERD
R_BAD_PERMUTER, // POTENTIAL Bad permuter for WERD
/* Reject modes generated after NN_ACCEPT but before MM_ACCEPT */
R_HYPHEN, // TEMP Post NN dodgy hyphen or full stop
R_DUBIOUS, // TEMP Post NN dodgy chars
R_NO_ALPHANUMS, // TEMP No alphanumerics in word after NN
R_MOSTLY_REJ, // TEMP Most of word rejected so rej the rest
R_XHT_FIXUP, // TEMP Xht tests unsure
/* Reject modes generated after MM_ACCEPT but before QUALITY_ACCEPT */
R_BAD_QUALITY, // TEMP Quality metrics bad for WERD
/* Reject modes generated after QUALITY_ACCEPT but before MINIMAL_REJ accep*/
R_DOC_REJ, // TEMP Document rejection
R_BLOCK_REJ, // TEMP Block rejection
R_ROW_REJ, // TEMP Row rejection
R_UNLV_REJ, // TEMP ~ turned to - or ^ turned to space
/* Accept modes which occur inbetween the above rejection groups */
R_NN_ACCEPT, //NN acceptance
R_HYPHEN_ACCEPT, //Hyphen acceptance
R_MM_ACCEPT, //Matrix match acceptance
R_QUALITY_ACCEPT, //Accept word in good quality doc
R_MINIMAL_REJ_ACCEPT //Accept EVERYTHING except tess failures
};
/* REJECT MAP VALUES */
#define MAP_ACCEPT '1'
#define MAP_REJECT_PERM '0'
#define MAP_REJECT_TEMP '2'
#define MAP_REJECT_POTENTIAL '3'
class REJ
{
BITS16 flags1;
BITS16 flags2;
void set_flag(REJ_FLAGS rej_flag) {
if (rej_flag < 16)
flags1.turn_on_bit (rej_flag);
else
flags2.turn_on_bit (rej_flag - 16);
}
BOOL8 rej_before_nn_accept();
BOOL8 rej_between_nn_and_mm();
BOOL8 rej_between_mm_and_quality_accept();
BOOL8 rej_between_quality_and_minimal_rej_accept();
BOOL8 rej_before_mm_accept();
BOOL8 rej_before_quality_accept();
public:
REJ() { //constructor
}
REJ( //classwise copy
const REJ &source) {
flags1 = source.flags1;
flags2 = source.flags2;
}
REJ & operator= ( //assign REJ
const REJ & source) { //from this
flags1 = source.flags1;
flags2 = source.flags2;
return *this;
}
BOOL8 flag(REJ_FLAGS rej_flag) {
if (rej_flag < 16)
return flags1.bit (rej_flag);
else
return flags2.bit (rej_flag - 16);
}
char display_char() {
if (perm_rejected ())
return MAP_REJECT_PERM;
else if (accept_if_good_quality ())
return MAP_REJECT_POTENTIAL;
else if (rejected ())
return MAP_REJECT_TEMP;
else
return MAP_ACCEPT;
}
BOOL8 perm_rejected(); //Is char perm reject?
BOOL8 rejected(); //Is char rejected?
BOOL8 accepted() { //Is char accepted?
return !rejected ();
}
//potential rej?
BOOL8 accept_if_good_quality();
BOOL8 recoverable() {
return (rejected () && !perm_rejected ());
}
void setrej_tess_failure(); //Tess generated blank
void setrej_small_xht(); //Small xht char/wd
void setrej_edge_char(); //Close to image edge
void setrej_1Il_conflict(); //Initial reject map
void setrej_postNN_1Il(); //1Il after NN
void setrej_rej_cblob(); //Insert duff blob
void setrej_mm_reject(); //Matrix matcher
//Odd repeated char
void setrej_bad_repetition();
void setrej_poor_match(); //Failed Rays heuristic
//TEMP reject_word
void setrej_not_tess_accepted();
//TEMP reject_word
void setrej_contains_blanks();
void setrej_bad_permuter(); //POTENTIAL reject_word
void setrej_hyphen(); //PostNN dubious hyph or .
void setrej_dubious(); //PostNN dubious limit
void setrej_no_alphanums(); //TEMP reject_word
void setrej_mostly_rej(); //TEMP reject_word
void setrej_xht_fixup(); //xht fixup
void setrej_bad_quality(); //TEMP reject_word
void setrej_doc_rej(); //TEMP reject_word
void setrej_block_rej(); //TEMP reject_word
void setrej_row_rej(); //TEMP reject_word
void setrej_unlv_rej(); //TEMP reject_word
void setrej_nn_accept(); //NN Flipped a char
void setrej_hyphen_accept(); //Good aspect ratio
void setrej_mm_accept(); //Matrix matcher
//Quality flip a char
void setrej_quality_accept();
//Accept all except blank
void setrej_minimal_rej_accept();
void full_print(FILE *fp);
};
class REJMAP
{
REJ *ptr; //ptr to the chars
inT16 len; //Number of chars
public:
REJMAP() { //constructor
ptr = NULL;
len = 0;
}
REJMAP( //classwise copy
const REJMAP &rejmap);
REJMAP & operator= ( //assign REJMAP
const REJMAP & source); //from this
~REJMAP () { //destructor
if (ptr != NULL)
free_struct (ptr, len * sizeof (REJ), "REJ");
}
void initialise( //Redefine map
inT16 length);
REJ & operator[]( //access function
inT16 index) const //map index
{
ASSERT_HOST (index < len);
return ptr[index]; //no bounds checks
}
inT32 length() const { //map length
return len;
}
inT16 accept_count(); //How many accepted?
inT16 reject_count() { //How many rejects?
return len - accept_count ();
}
void remove_pos( //Cut out an element
inT16 pos); //element to remove
void print(FILE *fp);
void full_print(FILE *fp);
BOOL8 recoverable_rejects(); //Any non perm rejs?
BOOL8 quality_recoverable_rejects();
//Any potential rejs?
void rej_word_small_xht(); //Reject whole word
//Reject whole word
void rej_word_tess_failure();
void rej_word_not_tess_accepted();
//Reject whole word
//Reject whole word
void rej_word_contains_blanks();
//Reject whole word
void rej_word_bad_permuter();
void rej_word_xht_fixup(); //Reject whole word
//Reject whole word
void rej_word_no_alphanums();
void rej_word_mostly_rej(); //Reject whole word
void rej_word_bad_quality(); //Reject whole word
void rej_word_doc_rej(); //Reject whole word
void rej_word_block_rej(); //Reject whole word
void rej_word_row_rej(); //Reject whole word
};
#endif

905
ccmain/statistc.cpp Normal file
View File

@ -0,0 +1,905 @@
/**********************************************************************
* File: statistc.c (Formerly stats.c)
* Description: Simple statistical package for integer values.
* Author: Ray Smith
* Created: Mon Feb 04 16:56:05 GMT 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h" //precompiled headers
#include <string.h>
#include <math.h>
#include <stdlib.h>
#include "memry.h"
//#include "ipeerr.h"
#include "tprintf.h"
#include "statistc.h"
#define SEED1 0x1234 //default seeds
#define SEED2 0x5678
#define SEED3 0x9abc
/**********************************************************************
* STATS::STATS
*
* Construct a new stats element by allocating and zeroing the memory.
**********************************************************************/
STATS::STATS( //constructor
inT32 min, //min of range
inT32 max //max of range
) {
if (max <= min) {
/* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
"Illegal range for stats, Min=%d, Max=%d",min,max);*/
min = 0;
max = 1;
}
rangemin = min; //setup
rangemax = max;
buckets = (inT32 *) alloc_mem ((max - min) * sizeof (inT32));
if (buckets != NULL)
this->clear (); //zero it
/* else
err.log(RESULT_NO_MEMORY,E_LOC,ERR_PRIMITIVES,
ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
"No memory for stats, Min=%d, Max=%d",min,max); */
}
STATS::STATS() { //constructor
rangemax = 0; //empty
rangemin = 0;
buckets = NULL;
}
/**********************************************************************
* STATS::set_range
*
* Alter the range on an existing stats element.
**********************************************************************/
bool STATS::set_range( //constructor
inT32 min, //min of range
inT32 max //max of range
) {
if (max <= min) {
return false;
}
rangemin = min; //setup
rangemax = max;
if (buckets != NULL)
free_mem(buckets); //no longer want it
buckets = (inT32 *) alloc_mem ((max - min) * sizeof (inT32));
/* if (buckets==NULL)
return err.log(RESULT_NO_MEMORY,E_LOC,ERR_PRIMITIVES,
ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
"No memory for stats, Min=%d, Max=%d",min,max);*/
this->clear (); //zero it
return true;
}
/**********************************************************************
* STATS::clear
*
* Clear out the STATS class by zeroing all the buckets.
**********************************************************************/
void STATS::clear() { //clear out buckets
total_count = 0;
if (buckets != NULL)
memset (buckets, 0, (rangemax - rangemin) * sizeof (inT32));
//zero it
}
/**********************************************************************
* STATS::~STATS
*
* Destructor for a stats class.
**********************************************************************/
STATS::~STATS ( //destructor
) {
if (buckets != NULL) {
free_mem(buckets);
buckets = NULL;
}
}
/**********************************************************************
* STATS::add
*
* Add a set of samples to (or delete from) a pile.
**********************************************************************/
void STATS::add( //add sample
inT32 value, //bucket
inT32 count //no to add
) {
if (buckets == NULL) {
/* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
"Empty stats");*/
return;
}
if (value <= rangemin)
buckets[0] += count; //silently clip to range
else if (value >= rangemax)
buckets[rangemax - rangemin - 1] += count;
else
//add count to cell
buckets[value - rangemin] += count;
total_count += count; //keep count of total
}
/**********************************************************************
* STATS::mode
*
* Find the mode of a stats class.
**********************************************************************/
inT32 STATS::mode() { //get mode of samples
inT32 index; //current index
inT32 max; //max cell count
inT32 maxindex; //index of max
if (buckets == NULL) {
/* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
"Empty stats");*/
return rangemin;
}
for (max = 0, maxindex = 0, index = rangemax - rangemin - 1; index >= 0;
index--) {
if (buckets[index] > max) {
max = buckets[index]; //find biggest
maxindex = index;
}
}
return maxindex + rangemin; //index of biggest
}
/**********************************************************************
* STATS::mean
*
* Find the mean of a stats class.
**********************************************************************/
float STATS::mean() { //get mean of samples
inT32 index; //current index
inT32 sum; //sum of cells
if (buckets == NULL) {
/* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
"Empty stats");*/
return (float) rangemin;
}
for (sum = 0, index = rangemax - rangemin - 1; index >= 0; index--) {
//sum all buckets
sum += index * buckets[index];
}
if (total_count > 0)
//mean value
return (float) sum / total_count + rangemin;
else
return (float) rangemin; //no mean
}
/**********************************************************************
* STATS::sd
*
* Find the standard deviation of a stats class.
**********************************************************************/
float STATS::sd() { //standard deviation
inT32 index; //current index
inT32 sum; //sum of cells
inT32 sqsum; //sum of squares
float variance;
if (buckets == NULL) {
/* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
"Empty stats"); */
return (float) 0.0;
}
for (sum = 0, sqsum = 0, index = rangemax - rangemin - 1; index >= 0;
index--) {
//sum all buckets
sum += index * buckets[index];
//and squares
sqsum += index * index * buckets[index];
}
if (total_count > 0) {
variance = sum / ((float) total_count);
variance = sqsum / ((float) total_count) - variance * variance;
return (float) sqrt (variance);
}
else
return (float) 0.0;
}
/**********************************************************************
* STATS::ile
*
* Find an arbitrary %ile of a stats class.
**********************************************************************/
float STATS::ile( //percentile
float frac //fraction to find
) {
inT32 index; //current index
inT32 sum; //sum of cells
float target; //target value
if (buckets == NULL) {
/* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
"Empty stats"); */
return (float) rangemin;
}
target = frac * total_count;
if (target <= 0)
target = (float) 1;
if (target > total_count)
target = (float) total_count;
for (sum = 0, index = 0; index < rangemax - rangemin
&& sum < target; sum += buckets[index], index++);
if (index > 0)
return rangemin + index - (sum - target) / buckets[index - 1];
//better than just ints
else
return (float) rangemin;
}
/**********************************************************************
* STATS::median
*
* Finds a more usefule estimate of median than ile(0.5).
*
* Overcomes a problem with ile() - if the samples are, for example,
* 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway
* between 6 and 13 = 9.5
**********************************************************************/
float STATS::median() { //get median
float median;
inT32 min_pile;
inT32 median_pile;
inT32 max_pile;
if (buckets == NULL) {
/* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
"Empty stats");*/
return (float) rangemin;
}
median = (float) ile ((float) 0.5);
median_pile = (inT32) floor (median);
if ((total_count > 1) && (pile_count (median_pile) == 0)) {
/* Find preceeding non zero pile */
for (min_pile = median_pile; pile_count (min_pile) == 0; min_pile--);
/* Find following non zero pile */
for (max_pile = median_pile; pile_count (max_pile) == 0; max_pile++);
median = (float) ((min_pile + max_pile) / 2.0);
}
return median;
}
/**********************************************************************
* STATS::smooth
*
* Apply a triangular smoothing filter to the stats.
* This makes the modes a bit more useful.
* The factor gives the height of the triangle, i.e. the weight of the
* centre.
**********************************************************************/
void STATS::smooth( //smooth samples
inT32 factor //size of triangle
) {
inT32 entry; //bucket index
inT32 offset; //from entry
inT32 entrycount; //no of entries
inT32 bucket; //new smoothed pile
//output stats
STATS result(rangemin, rangemax);
if (buckets == NULL) {
/* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
"Empty stats"); */
return;
}
if (factor < 2)
return; //is a no-op
entrycount = rangemax - rangemin;
for (entry = 0; entry < entrycount; entry++) {
//centre weight
bucket = buckets[entry] * factor;
for (offset = 1; offset < factor; offset++) {
if (entry - offset >= 0)
bucket += buckets[entry - offset] * (factor - offset);
if (entry + offset < entrycount)
bucket += buckets[entry + offset] * (factor - offset);
}
result.add (entry + rangemin, bucket);
}
total_count = result.total_count;
memcpy (buckets, result.buckets, entrycount * sizeof (inT32));
}
/**********************************************************************
* STATS::cluster
*
* Cluster the samples into max_cluster clusters.
* Each call runs one iteration. The array of clusters must be
* max_clusters+1 in size as cluster 0 is used to indicate which samples
* have been used.
* The return value is the current number of clusters.
**********************************************************************/
inT32 STATS::cluster( //cluster samples
float lower, //thresholds
float upper,
float multiple, //distance threshold
inT32 max_clusters, //max no to make
STATS *clusters //array of clusters
) {
BOOL8 new_cluster; //added one
float *centres; //cluster centres
inT32 entry; //bucket index
inT32 cluster; //cluster index
inT32 best_cluster; //one to assign to
inT32 new_centre = 0; //residual mode
inT32 new_mode; //pile count of new_centre
inT32 count; //pile to place
float dist; //from cluster
float min_dist; //from best_cluster
inT32 cluster_count; //no of clusters
if (max_clusters < 1)
return 0;
if (buckets == NULL) {
/* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
"Empty stats");*/
return 0;
}
centres = (float *) alloc_mem ((max_clusters + 1) * sizeof (float));
if (centres == NULL) {
/* err.log(RESULT_NO_MEMORY,E_LOC,ERR_PRIMITIVES,
ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
"No memory for centres"); */
return 0;
}
for (cluster_count = 1; cluster_count <= max_clusters
&& clusters[cluster_count].buckets != NULL
&& clusters[cluster_count].total_count > 0; cluster_count++) {
centres[cluster_count] =
(float) clusters[cluster_count].ile ((float) 0.5);
new_centre = clusters[cluster_count].mode ();
for (entry = new_centre - 1; centres[cluster_count] - entry < lower
&& entry >= rangemin
&& pile_count (entry) <= pile_count (entry + 1); entry--) {
count = pile_count (entry) - clusters[0].pile_count (entry);
if (count > 0) {
clusters[cluster_count].add (entry, count);
clusters[0].add (entry, count);
}
}
for (entry = new_centre + 1; entry - centres[cluster_count] < lower
&& entry < rangemax
&& pile_count (entry) <= pile_count (entry - 1); entry++) {
count = pile_count (entry) - clusters[0].pile_count (entry);
if (count > 0) {
clusters[cluster_count].add (entry, count);
clusters[0].add (entry, count);
}
}
}
cluster_count--;
if (cluster_count == 0) {
clusters[0].set_range (rangemin, rangemax);
}
do {
new_cluster = FALSE;
new_mode = 0;
for (entry = 0; entry < rangemax - rangemin; entry++) {
count = buckets[entry] - clusters[0].buckets[entry];
//remaining pile
if (count > 0) { //any to handle
min_dist = (float) MAX_INT32;
best_cluster = 0;
for (cluster = 1; cluster <= cluster_count; cluster++) {
dist = entry + rangemin - centres[cluster];
//find distance
if (dist < 0)
dist = -dist;
if (dist < min_dist) {
min_dist = dist; //find least
best_cluster = cluster;
}
}
if (min_dist > upper //far enough for new
&& (best_cluster == 0
|| entry + rangemin > centres[best_cluster] * multiple
|| entry + rangemin < centres[best_cluster] / multiple)) {
if (count > new_mode) {
new_mode = count;
new_centre = entry + rangemin;
}
}
}
}
//need new and room
if (new_mode > 0 && cluster_count < max_clusters) {
cluster_count++;
new_cluster = TRUE;
if (!clusters[cluster_count].set_range (rangemin, rangemax))
return 0;
centres[cluster_count] = (float) new_centre;
clusters[cluster_count].add (new_centre, new_mode);
clusters[0].add (new_centre, new_mode);
for (entry = new_centre - 1; centres[cluster_count] - entry < lower
&& entry >= rangemin
&& pile_count (entry) <= pile_count (entry + 1); entry--) {
count = pile_count (entry) - clusters[0].pile_count (entry);
if (count > 0) {
clusters[cluster_count].add (entry, count);
clusters[0].add (entry, count);
}
}
for (entry = new_centre + 1; entry - centres[cluster_count] < lower
&& entry < rangemax
&& pile_count (entry) <= pile_count (entry - 1); entry++) {
count = pile_count (entry) - clusters[0].pile_count (entry);
if (count > 0) {
clusters[cluster_count].add (entry, count);
clusters[0].add (entry, count);
}
}
centres[cluster_count] =
(float) clusters[cluster_count].ile ((float) 0.5);
}
}
while (new_cluster && cluster_count < max_clusters);
free_mem(centres);
return cluster_count;
}
/**********************************************************************
* STATS::local_min
*
* Return TRUE if this point is a local min.
**********************************************************************/
BOOL8 STATS::local_min( //test minness
inT32 x //of x
) {
inT32 index; //table index
if (buckets == NULL) {
/* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
"Empty stats");*/
return FALSE;
}
if (x < rangemin)
x = rangemin;
if (x >= rangemax)
x = rangemax - 1;
x -= rangemin;
if (buckets[x] == 0)
return TRUE;
for (index = x - 1; index >= 0 && buckets[index] == buckets[x]; index--);
if (index >= 0 && buckets[index] < buckets[x])
return FALSE;
for (index = x + 1; index < rangemax - rangemin
&& buckets[index] == buckets[x]; index++);
if (index < rangemax - rangemin && buckets[index] < buckets[x])
return FALSE;
else
return TRUE;
}
/**********************************************************************
* STATS::print
*
* Print a summary of the stats and optionally a dump of the table.
**********************************************************************/
void STATS::print( //print stats table
FILE *, //Now uses tprintf instead
BOOL8 dump //dump full table
) {
inT32 index; //table index
if (buckets == NULL) {
/* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
"Empty stats"); */
return;
}
if (dump) {
for (index = 0; index < rangemax - rangemin; index++) {
tprintf ("%4d:%-3d ", rangemin + index, buckets[index]);
if (index % 8 == 7)
tprintf ("\n");
}
tprintf ("\n");
}
tprintf ("Total count=%d\n", total_count);
tprintf ("Min=%d\n", (inT32) (ile ((float) 0.0)));
tprintf ("Lower quartile=%.2f\n", ile ((float) 0.25));
tprintf ("Median=%.2f\n", ile ((float) 0.5));
tprintf ("Upper quartile=%.2f\n", ile ((float) 0.75));
tprintf ("Max=%d\n", (inT32) (ile ((float) 0.99999)));
tprintf ("Mean= %.2f\n", mean ());
tprintf ("SD= %.2f\n", sd ());
}
/**********************************************************************
* STATS::min_bucket
*
* Find REAL minimum bucket - ile(0.0) isnt necessarily correct
**********************************************************************/
inT32 STATS::min_bucket() { //Find min
inT32 min;
if (buckets == NULL) {
/* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
"Empty stats");*/
return rangemin;
}
for (min = 0; (min < rangemax - rangemin) && (buckets[min] == 0); min++);
return rangemin + min;
}
/**********************************************************************
* STATS::max_bucket
*
* Find REAL maximum bucket - ile(1.0) isnt necessarily correct
**********************************************************************/
inT32 STATS::max_bucket() { //Find max
inT32 max;
if (buckets == NULL) {
/* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
"Empty stats");*/
return rangemin;
}
for (max = rangemax - rangemin - 1;
(max > 0) && (buckets[max] == 0); max--);
return rangemin + max;
}
/**********************************************************************
* STATS::short_print
*
* Print a summary of the stats and optionally a dump of the table.
* ( BUT ONLY THE PART OF THE TABLE BETWEEN MIN AND MAX)
**********************************************************************/
void STATS::short_print( //print stats table
FILE *, //Now uses tprintf instead
BOOL8 dump //dump full table
) {
inT32 index; //table index
inT32 min = min_bucket ();
inT32 max = max_bucket ();
if (buckets == NULL) {
/* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
"Empty stats"); */
return;
}
if (dump) {
for (index = min; index <= max; index++) {
tprintf ("%4d:%-3d ", rangemin + index, buckets[index]);
if ((index - min) % 8 == 7)
tprintf ("\n");
}
tprintf ("\n");
}
tprintf ("Total count=%d\n", total_count);
tprintf ("Min=%d Really=%d\n", (inT32) (ile ((float) 0.0)), min);
tprintf ("Max=%d Really=%d\n", (inT32) (ile ((float) 1.1)), max);
tprintf ("Range=%d\n", max + 1 - min);
tprintf ("Lower quartile=%.2f\n", ile ((float) 0.25));
tprintf ("Median=%.2f\n", ile ((float) 0.5));
tprintf ("Upper quartile=%.2f\n", ile ((float) 0.75));
tprintf ("Mean= %.2f\n", mean ());
tprintf ("SD= %.2f\n", sd ());
}
/**********************************************************************
* STATS::plot
*
* Draw a histogram of the stats table.
**********************************************************************/
#ifndef GRAPHICS_DISABLED
void STATS::plot( //plot stats table
ScrollView* window, //to draw in
float xorigin, //bottom left
float yorigin,
float xscale, //one x unit
float yscale, //one y unit
ScrollView::Color colour //colour to draw in
) {
inT32 index; //table index
if (buckets == NULL) {
/* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
"Empty stats");*/
return;
}
window->Pen(colour);
for (index = 0; index < rangemax - rangemin; index++) {
window->Rectangle( xorigin + xscale * index, yorigin,
xorigin + xscale * (index + 1),
yorigin + yscale * buckets[index]);
}
}
#endif
/**********************************************************************
* STATS::plotline
*
* Draw a histogram of the stats table. (Line only
**********************************************************************/
#ifndef GRAPHICS_DISABLED
void STATS::plotline( //plot stats table
ScrollView* window, //to draw in
float xorigin, //bottom left
float yorigin,
float xscale, //one x unit
float yscale, //one y unit
ScrollView::Color colour //colour to draw in
) {
inT32 index; //table index
if (buckets == NULL) {
/* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,
ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,
"Empty stats"); */
return;
}
window->Pen(colour);
window->SetCursor(xorigin, yorigin + yscale * buckets[0]);
for (index = 0; index < rangemax - rangemin; index++) {
window->DrawTo(xorigin + xscale * index, yorigin + yscale * buckets[index]);
}
}
#endif
/**********************************************************************
* choose_nth_item
*
* Returns the index of what would b the nth item in the array
* if the members were sorted, without actually sorting.
**********************************************************************/
DLLSYM inT32 choose_nth_item( //fast median
inT32 index, //index to choose
float *array, //array of items
inT32 count //no of items
) {
static uinT16 seeds[3] = { SEED1, SEED2, SEED3 };
//for nrand
inT32 next_sample; //next one to do
inT32 next_lesser; //space for new
inT32 prev_greater; //last one saved
inT32 equal_count; //no of equal ones
float pivot; //proposed median
float sample; //current sample
if (count <= 1)
return 0;
if (count == 2) {
if (array[0] < array[1]) {
return index >= 1 ? 1 : 0;
}
else {
return index >= 1 ? 0 : 1;
}
}
else {
if (index < 0)
index = 0; //ensure lergal
else if (index >= count)
index = count - 1;
#ifdef __UNIX__
equal_count = (inT32) (nrand48 (seeds) % count);
#else
equal_count = (inT32) (rand () % count);
#endif
pivot = array[equal_count];
//fill gap
array[equal_count] = array[0];
next_lesser = 0;
prev_greater = count;
equal_count = 1;
for (next_sample = 1; next_sample < prev_greater;) {
sample = array[next_sample];
if (sample < pivot) {
//shuffle
array[next_lesser++] = sample;
next_sample++;
}
else if (sample > pivot) {
prev_greater--;
//juggle
array[next_sample] = array[prev_greater];
array[prev_greater] = sample;
}
else {
equal_count++;
next_sample++;
}
}
for (next_sample = next_lesser; next_sample < prev_greater;)
array[next_sample++] = pivot;
if (index < next_lesser)
return choose_nth_item (index, array, next_lesser);
else if (index < prev_greater)
return next_lesser; //in equal bracket
else
return choose_nth_item (index - prev_greater,
array + prev_greater,
count - prev_greater) + prev_greater;
}
}
/**********************************************************************
* choose_nth_item
*
* Returns the index of what would b the nth item in the array
* if the members were sorted, without actually sorting.
**********************************************************************/
DLLSYM inT32
choose_nth_item ( //fast median
inT32 index, //index to choose
void *array, //array of items
inT32 count, //no of items
size_t size, //element size
//comparator
int (*compar) (const void *, const void *)
) {
static uinT16 seeds[3] = { SEED1, SEED2, SEED3 };
//for nrand
int result; //of compar
inT32 next_sample; //next one to do
inT32 next_lesser; //space for new
inT32 prev_greater; //last one saved
inT32 equal_count; //no of equal ones
inT32 pivot; //proposed median
if (count <= 1)
return 0;
if (count == 2) {
if (compar (array, (char *) array + size) < 0) {
return index >= 1 ? 1 : 0;
}
else {
return index >= 1 ? 0 : 1;
}
}
if (index < 0)
index = 0; //ensure lergal
else if (index >= count)
index = count - 1;
#ifdef __UNIX__
pivot = (inT32) (nrand48 (seeds) % count);
#else
pivot = (inT32) (rand () % count);
#endif
swap_entries (array, size, pivot, 0);
next_lesser = 0;
prev_greater = count;
equal_count = 1;
for (next_sample = 1; next_sample < prev_greater;) {
result =
compar ((char *) array + size * next_sample,
(char *) array + size * next_lesser);
if (result < 0) {
swap_entries (array, size, next_lesser++, next_sample++);
//shuffle
}
else if (result > 0) {
prev_greater--;
swap_entries(array, size, prev_greater, next_sample);
}
else {
equal_count++;
next_sample++;
}
}
if (index < next_lesser)
return choose_nth_item (index, array, next_lesser, size, compar);
else if (index < prev_greater)
return next_lesser; //in equal bracket
else
return choose_nth_item (index - prev_greater,
(char *) array + size * prev_greater,
count - prev_greater, size,
compar) + prev_greater;
}
/**********************************************************************
* swap_entries
*
* Swap 2 entries of abitrary size in-place in a table.
**********************************************************************/
void swap_entries( //swap in place
void *array, //array of entries
size_t size, //size of entry
inT32 index1, //entries to swap
inT32 index2) {
char tmp;
char *ptr1; //to entries
char *ptr2;
size_t count; //of bytes
ptr1 = (char *) array + index1 * size;
ptr2 = (char *) array + index2 * size;
for (count = 0; count < size; count++) {
tmp = *ptr1;
*ptr1++ = *ptr2;
*ptr2++ = tmp; //tedious!
}
}

135
ccmain/statistc.h Normal file
View File

@ -0,0 +1,135 @@
/**********************************************************************
* File: statistc.h (Formerly stats.h)
* Description: Class description for STATS class.
* Author: Ray Smith
* Created: Mon Feb 04 16:19:07 GMT 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef STATISTC_H
#define STATISTC_H
#include <stdio.h>
#include "scrollview.h"
#include "host.h"
class DLLSYM STATS //statistics package
{
inT32 rangemin; //min of range
inT32 rangemax; //max of range
inT32 total_count; //no of samples
inT32 *buckets; //array of cells
public:
STATS( //constructor
inT32 min, //min of range
inT32 max); //max of range
STATS(); //empty for arrays
~STATS (); //destructor
bool set_range( //change range
inT32 min, //min of range
inT32 max); //max of range
void clear(); //empty buckets
void add( //add sample
inT32 value, //bucket
inT32 count); //no to add
inT32 mode(); //get mode of samples
float mean(); //get mean of samples
float sd(); //standard deviation
float ile( //percentile
float frac); //[0,1] for percentil
inT32 min_bucket(); //Find min
inT32 max_bucket(); //Find max
float median(); //get median of samples
void smooth( //apply blurring
inT32 factor); //filter to stats
inT32 cluster( //cluster samples
float lower, //thresholds
float upper,
float multiple, //distance threshold
inT32 max_clusters, //max no to make
STATS *clusters); //array of clusters
inT32 pile_count( //access function
inT32 value //pile to count
) {
return value > rangemin ? (value < rangemax
? buckets[value -
rangemin] : buckets[rangemax -
rangemin -
1]) : buckets[0];
}
inT32 get_total() { //access function
return total_count; //total of all piles
}
BOOL8 local_min( //test local minness
inT32 x);
void print( //print summary/table
FILE *fp, //file to print on
BOOL8 dump); //dump whole table
void short_print( //print summary/table
FILE *fp, //file to print on
BOOL8 dump); //dump whole table
void plot( //draw histogram rect
ScrollView* window, //window to draw in
float xorigin, //origin of histo
float yorigin, //gram
float xscale, //size of one unit
float yscale, //size of one uint
ScrollView::Color colour); //colour to draw in
void plotline( //draw histogram line
ScrollView* window, //window to draw in
float xorigin, //origin of histo
float yorigin, //gram
float xscale, //size of one unit
float yscale, //size of one uint
ScrollView::Color colour); //colour to draw in
};
DLLSYM inT32 choose_nth_item( //fast median
inT32 index, //index to choose
float *array, //array of items
inT32 count //no of items
);
DLLSYM inT32 choose_nth_item ( //fast median
inT32 index, //index to choose
void *array, //array of items
inT32 count, //no of items
size_t size, //element size
//comparator
int (*compar) (const void *, const void *)
);
void swap_entries( //swap in place
void *array, //array of entries
size_t size, //size of entry
inT32 index1, //entries to swap
inT32 index2);
#endif

333
ccmain/stepblob.cpp Normal file
View File

@ -0,0 +1,333 @@
/**********************************************************************
* File: stepblob.cpp (Formerly cblob.c)
* Description: Code for C_BLOB class.
* Author: Ray Smith
* Created: Tue Oct 08 10:41:13 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include "stepblob.h"
ELISTIZE_S (C_BLOB)
/**********************************************************************
* position_outline
*
* Position the outline in the given list at the relevant place
* according to its nesting.
**********************************************************************/
static void position_outline( //put in place
C_OUTLINE *outline, //thing to place
C_OUTLINE_LIST *destlist //desstination list
) {
C_OUTLINE *dest_outline; //outline from dest list
C_OUTLINE_IT it = destlist; //iterator
//iterator on children
C_OUTLINE_IT child_it = outline->child ();
if (!it.empty ()) {
do {
dest_outline = it.data (); //get destination
//encloses dest
if (*dest_outline < *outline) {
//take off list
dest_outline = it.extract ();
//put this in place
it.add_after_then_move (outline);
//make it a child
child_it.add_to_end (dest_outline);
while (!it.at_last ()) {
it.forward (); //do rest of list
//check for other children
dest_outline = it.data ();
if (*dest_outline < *outline) {
//take off list
dest_outline = it.extract ();
child_it.add_to_end (dest_outline);
//make it a child
if (it.empty ())
break;
}
}
return; //finished
}
//enclosed by dest
else if (*outline < *dest_outline) {
position_outline (outline, dest_outline->child ());
//place in child list
return; //finished
}
it.forward ();
}
while (!it.at_first ());
}
it.add_to_end (outline); //at outer level
}
/**********************************************************************
* plot_outline_list
*
* Draw a list of outlines in the given colour and their children
* in the child colour.
**********************************************************************/
#ifndef GRAPHICS_DISABLED
static void plot_outline_list( //draw outlines
C_OUTLINE_LIST *list, //outline to draw
ScrollView* window, //window to draw in
ScrollView::Color colour, //colour to use
ScrollView::Color child_colour //colour of children
) {
C_OUTLINE *outline; //current outline
C_OUTLINE_IT it = list; //iterator
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
outline = it.data ();
//draw it
outline->plot (window, colour);
if (!outline->child ()->empty ())
plot_outline_list (outline->child (), window,
child_colour, child_colour);
}
}
#endif
/**********************************************************************
* reverse_outline_list
*
* Reverse a list of outlines and their children.
**********************************************************************/
static void reverse_outline_list( //reverse outlines
C_OUTLINE_LIST *list //outline to reverse
) {
C_OUTLINE *outline; //current outline
C_OUTLINE_IT it = list; //iterator
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
outline = it.data ();
outline->reverse (); //reverse it
if (!outline->child ()->empty ())
reverse_outline_list (outline->child ());
}
}
/**********************************************************************
* C_BLOB::C_BLOB
*
* Constructor to build a C_BLOB from a list of C_OUTLINEs.
* The C_OUTLINEs are not copied so the source list is emptied.
* The C_OUTLINEs are nested correctly in the blob.
**********************************************************************/
C_BLOB::C_BLOB( //constructor
C_OUTLINE_LIST *outline_list //in random order
) {
C_OUTLINE *outline; //current outline
C_OUTLINE_IT it = outline_list;//iterator
while (!it.empty ()) { //grab the list
outline = it.extract (); //get off the list
//put it in place
position_outline(outline, &outlines);
if (!it.empty ())
it.forward ();
}
it.set_to_list (&outlines);
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
outline = it.data ();
if (outline->turn_direction () < 0) {
outline->reverse ();
reverse_outline_list (outline->child ());
outline->set_flag (COUT_INVERSE, TRUE);
}
else {
outline->set_flag (COUT_INVERSE, FALSE);
}
}
}
// Build and return a fake blob containing a single fake outline with no
// steps.
C_BLOB* C_BLOB::FakeBlob(const TBOX& box) {
C_OUTLINE_LIST outlines;
C_OUTLINE::FakeOutline(box, &outlines);
return new C_BLOB(&outlines);
}
/**********************************************************************
* C_BLOB::bounding_box
*
* Return the bounding box of the blob.
**********************************************************************/
TBOX C_BLOB::bounding_box() { //bounding box
C_OUTLINE *outline; //current outline
C_OUTLINE_IT it = &outlines; //outlines of blob
TBOX box; //bounding box
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
outline = it.data ();
box += outline->bounding_box ();
}
return box;
}
/**********************************************************************
* C_BLOB::area
*
* Return the area of the blob.
**********************************************************************/
inT32 C_BLOB::area() { //area
C_OUTLINE *outline; //current outline
C_OUTLINE_IT it = &outlines; //outlines of blob
inT32 total; //total area
total = 0;
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
outline = it.data ();
total += outline->area ();
}
return total;
}
/**********************************************************************
* C_BLOB::perimeter
*
* Return the perimeter of the top and 2nd level outlines.
**********************************************************************/
inT32 C_BLOB::perimeter() {
C_OUTLINE *outline; // current outline
C_OUTLINE_IT it = &outlines; // outlines of blob
inT32 total; // total perimeter
total = 0;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
outline = it.data();
total += outline->perimeter();
}
return total;
}
/**********************************************************************
* C_BLOB::outer_area
*
* Return the area of the blob.
**********************************************************************/
inT32 C_BLOB::outer_area() { //area
C_OUTLINE *outline; //current outline
C_OUTLINE_IT it = &outlines; //outlines of blob
inT32 total; //total area
total = 0;
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
outline = it.data ();
total += outline->outer_area ();
}
return total;
}
/**********************************************************************
* C_BLOB::count_transitions
*
* Return the total x and y maxes and mins in the blob.
* Chlid outlines are not counted.
**********************************************************************/
inT32 C_BLOB::count_transitions( //area
inT32 threshold //on size
) {
C_OUTLINE *outline; //current outline
C_OUTLINE_IT it = &outlines; //outlines of blob
inT32 total; //total area
total = 0;
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
outline = it.data ();
total += outline->count_transitions (threshold);
}
return total;
}
/**********************************************************************
* C_BLOB::move
*
* Move C_BLOB by vector
**********************************************************************/
void C_BLOB::move( // reposition blob
const ICOORD vec // by vector
) {
C_OUTLINE_IT it(&outlines); // iterator
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ())
it.data ()->move (vec); // move each outline
}
// Static helper for C_BLOB::rotate to allow recursion of child outlines.
void RotateOutlineList(const FCOORD& rotation, C_OUTLINE_LIST* outlines) {
C_OUTLINE_LIST new_outlines;
C_OUTLINE_IT src_it(outlines);
C_OUTLINE_IT dest_it(&new_outlines);
while (!src_it.empty()) {
C_OUTLINE* old_outline = src_it.extract();
src_it.forward();
C_OUTLINE* new_outline = new C_OUTLINE(old_outline, rotation);
if (!old_outline->child()->empty()) {
RotateOutlineList(rotation, old_outline->child());
C_OUTLINE_IT child_it(new_outline->child());
child_it.add_list_after(old_outline->child());
}
delete old_outline;
dest_it.add_to_end(new_outline);
}
src_it.add_list_after(&new_outlines);
}
/**********************************************************************
* C_BLOB::rotate
*
* Rotate C_BLOB by rotation.
* Warning! has to rebuild all the C_OUTLINEs.
**********************************************************************/
void C_BLOB::rotate(const FCOORD& rotation) {
RotateOutlineList(rotation, &outlines);
}
/**********************************************************************
* C_BLOB::plot
*
* Draw the C_BLOB in the given colour.
**********************************************************************/
#ifndef GRAPHICS_DISABLED
void C_BLOB::plot( //draw it
ScrollView* window, //window to draw in
ScrollView::Color blob_colour, //main colour
ScrollView::Color child_colour //for holes
) {
plot_outline_list(&outlines, window, blob_colour, child_colour);
}
#endif

91
ccmain/stepblob.h Normal file
View File

@ -0,0 +1,91 @@
/**********************************************************************
* File: stepblob.h (Formerly cblob.h)
* Description: Code for C_BLOB class.
* Author: Ray Smith
* Created: Tue Oct 08 10:41:13 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef STEPBLOB_H
#define STEPBLOB_H
#include "coutln.h"
#include "rect.h"
class C_BLOB:public ELIST_LINK
{
public:
C_BLOB() {
}
explicit C_BLOB(C_OUTLINE_LIST *outline_list);
// Build and return a fake blob containing a single fake outline with no
// steps.
static C_BLOB* FakeBlob(const TBOX& box);
C_OUTLINE_LIST *out_list() { //get outline list
return &outlines;
}
TBOX bounding_box(); //compute bounding box
inT32 area(); //compute area
inT32 perimeter(); // Total perimeter of outlines and 1st level children.
inT32 outer_area(); //compute area
inT32 count_transitions( //count maxima
inT32 threshold); //size threshold
void move(const ICOORD vec); // repostion blob by vector
void rotate(const FCOORD& rotation); // Rotate by given vector.
void plot( //draw one
ScrollView* window, //window to draw in
ScrollView::Color blob_colour, //for outer bits
ScrollView::Color child_colour); //for holes
void prep_serialise() { //set ptrs to counts
outlines.prep_serialise ();
}
void dump( //write external bits
FILE *f) {
outlines.dump (f);
}
void de_dump( //read external bits
FILE *f) {
outlines.de_dump (f);
}
//assignment
make_serialise(C_BLOB)
C_BLOB& operator= (const C_BLOB & source) {
if (!outlines.empty ())
outlines.clear();
outlines.deep_copy(&source.outlines, &C_OUTLINE::deep_copy);
return *this;
}
static C_BLOB* deep_copy(const C_BLOB* src) {
C_BLOB* blob = new C_BLOB;
*blob = *src;
return blob;
}
private:
C_OUTLINE_LIST outlines; //master elements
};
ELISTIZEH_S (C_BLOB)
#endif

84
ccmain/tesseractclass.cpp Normal file
View File

@ -0,0 +1,84 @@
///////////////////////////////////////////////////////////////////////
// File: tesseractclass.cpp
// Description: An instance of Tesseract. For thread safety, *every*
// global variable goes in here, directly, or indirectly.
// Author: Ray Smith
// Created: Fri Mar 07 08:17:01 PST 2008
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include "tesseractclass.h"
#include "globals.h"
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
#include "config_auto.h"
#endif
#ifdef HAVE_LIBLEPT
// Include leptonica library only if autoconf (or makefile etc) tell us to.
#include "allheaders.h"
#endif
namespace tesseract {
Tesseract::Tesseract()
: BOOL_MEMBER(tessedit_resegment_from_boxes, false,
"Take segmentation and labeling from box file"),
BOOL_MEMBER(tessedit_train_from_boxes, false,
"Generate training data from boxed chars"),
BOOL_MEMBER(tessedit_dump_pageseg_images, false,
"Dump itermediate images made during page segmentation"),
// The default for pageseg_mode is the old behaviour, so as not to
// upset anything that relies on that.
INT_MEMBER(tessedit_pageseg_mode, 2,
"Page seg mode: 0=auto, 1=col, 2=block, 3=line, 4=word, 6=char"
" (Values from PageSegMode enum in baseapi.h)"),
INT_MEMBER(tessedit_accuracyvspeed, 0,
"Accuracy V Speed tradeoff: 0 fastest, 100 most accurate"
" (Values from AccuracyVSpeed enum in baseapi.h)"),
BOOL_MEMBER(tessedit_train_from_boxes_word_level, false,
"Generate training data from boxed chars at word level."),
STRING_MEMBER(tessedit_char_blacklist, "",
"Blacklist of chars not to recognize"),
STRING_MEMBER(tessedit_char_whitelist, "",
"Whitelist of chars to recognize"),
BOOL_MEMBER(global_tessedit_ambigs_training, false,
"Perform training for ambiguities"),
pix_binary_(NULL),
deskew_(1.0f, 0.0f),
reskew_(1.0f, 0.0f),
hindi_image_(false) {
}
Tesseract::~Tesseract() {
Clear();
}
void Tesseract::Clear() {
#ifdef HAVE_LIBLEPT
if (pix_binary_ != NULL)
pixDestroy(&pix_binary_);
#endif
deskew_ = FCOORD(1.0f, 0.0f);
reskew_ = FCOORD(1.0f, 0.0f);
}
void Tesseract::SetBlackAndWhitelist() {
// Set the white and blacklists (if any)
unicharset.set_black_and_whitelist(tessedit_char_blacklist.string(),
tessedit_char_whitelist.string());
}
} // namespace tesseract

541
ccmain/tesseractclass.h Normal file
View File

@ -0,0 +1,541 @@
///////////////////////////////////////////////////////////////////////
// File: tesseractclass.h
// Description: An instance of Tesseract. For thread safety, *every*
// global variable goes in here, directly, or indirectly.
// Author: Ray Smith
// Created: Fri Mar 07 08:17:01 PST 2008
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCMAIN_TESSERACTCLASS_H__
#define TESSERACT_CCMAIN_TESSERACTCLASS_H__
#include "varable.h"
#include "wordrec.h"
#include "ocrclass.h"
#include "control.h"
#include "docqual.h"
class CHAR_SAMPLES_LIST;
class CHAR_SAMPLE_LIST;
class PAGE_RES;
class PAGE_RES_IT;
class BLOCK_LIST;
class TO_BLOCK_LIST;
class IMAGE;
class WERD_RES;
class ROW;
class TBOX;
class SVMenuNode;
struct Pix;
class WERD_CHOICE;
class WERD;
class BLOB_CHOICE_LIST_CLIST;
// Top-level class for all tesseract global instance data.
// This class either holds or points to all data used by an instance
// of Tesseract, including the memory allocator. When this is
// complete, Tesseract will be thread-safe. UNTIL THEN, IT IS NOT!
//
// NOTE to developers: Do not create cyclic dependencies through this class!
// The directory dependency tree must remain a tree! The keep this clean,
// lower-level code (eg in ccutil, the bottom level) must never need to
// know about the content of a higher-level directory.
// The following scheme will grant the easiest access to lower-level
// global members without creating a cyclic dependency:
// ccmain inherits wordrec, includes textord as a member
// wordrec inherits classify
// classify inherits ccstruct, includes dict as a member
// ccstruct inherits c_util, includes image as a member
// c_util inherits cc_util
// textord has a pointer to ccstruct, but doesn't own it.
// dict has a pointer to ccstruct, but doesn't own it.
//
// NOTE: that each level contains members that correspond to global
// data that is defined (and used) at that level, not necessarily where
// the type is defined so for instance:
// BOOL_VAR (textord_show_blobs, FALSE, "Display unsorted blobs");
// goes inside the Textord class, not the cc_util class.
namespace tesseract {
class Tesseract : public Wordrec {
public:
Tesseract();
~Tesseract();
void Clear();
// Simple accessors.
const FCOORD& reskew() const {
return reskew_;
}
// Destroy any existing pix and return a pointer to the pointer.
Pix** mutable_pix_binary() {
Clear();
return &pix_binary_;
}
Pix* pix_binary() const {
return pix_binary_;
}
void SetBlackAndWhitelist();
int SegmentPage(const STRING* input_file,
IMAGE* image, BLOCK_LIST* blocks);
int AutoPageSeg(int width, int height, int resolution,
bool single_column, IMAGE* image,
BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
//// control.h /////////////////////////////////////////////////////////
void recog_all_words( //process words
PAGE_RES *page_res, //page structure
//progress monitor
volatile ETEXT_DESC *monitor,
TBOX *target_word_box=0L,
inT16 dopasses=0
);
void classify_word_pass1( //recog one word
WERD_RES *word, //word to do
ROW *row,
BLOCK* block,
BOOL8 cluster_adapt,
CHAR_SAMPLES_LIST *char_clusters,
CHAR_SAMPLE_LIST *chars_waiting);
void recog_pseudo_word( //recognize blobs
BLOCK_LIST *block_list, //blocks to check
TBOX &selection_box);
// This method returns all the blobs in the specified blocks.
// It's the caller's responsibility to destroy the returned list.
C_BLOB_LIST* get_blobs_from_blocks(BLOCK_LIST* blocks // blocks to look at.
);
// This method can be used to perform word-level training using box files.
// TODO: this can be modified to perform training in general case too.
void train_word_level_with_boxes(
const STRING& box_file, // File with boxes.
const STRING& out_file, // Output file.
BLOCK_LIST* blocks // Blocks to use.
);
void fix_rep_char(WERD_RES *word);
void fix_quotes( //make double quotes
WERD_CHOICE *choice, //choice to fix
WERD *word, //word to do //char choices
BLOB_CHOICE_LIST_CLIST *blob_choices);
ACCEPTABLE_WERD_TYPE acceptable_word_string(const char *s,
const char *lengths);
void match_word_pass2( //recog one word
WERD_RES *word, //word to do
ROW *row,
BLOCK* block,
float x_height);
void classify_word_pass2( //word to do
WERD_RES *word,
BLOCK* block,
ROW *row);
BOOL8 recog_interactive( //recognize blobs
BLOCK *block, //block
ROW *row, //row of word
WERD *word //word to recognize
);
void fix_hyphens( //crunch double hyphens
WERD_CHOICE *choice, //choice to fix
WERD *word, //word to do //char choices
BLOB_CHOICE_LIST_CLIST *blob_choices);
void set_word_fonts(
WERD_RES *word, // word to adapt to
BLOB_CHOICE_LIST_CLIST *blob_choices); // detailed results
void font_recognition_pass( //good chars in word
PAGE_RES_IT &page_res_it);
//// output.h //////////////////////////////////////////////////////////
void output_pass( //Tess output pass //send to api
PAGE_RES_IT &page_res_it,
BOOL8 write_to_shm,
TBOX *target_word_box);
FILE *open_outfile( //open .map & .unlv file
const char *extension);
void write_results( //output a word
PAGE_RES_IT &page_res_it, //full info
char newline_type, //type of newline
BOOL8 force_eol, //override tilde crunch?
BOOL8 write_to_shm //send to api
);
void set_unlv_suspects(WERD_RES *word);
UNICHAR_ID get_rep_char(WERD_RES *word); // what char is repeated?
BOOL8 acceptable_number_string(const char *s,
const char *lengths);
inT16 count_alphanums(const WERD_CHOICE &word);
inT16 count_alphas(const WERD_CHOICE &word);
//// tessedit.h ////////////////////////////////////////////////////////
void read_config_file(const char *filename, bool global_only);
int init_tesseract(const char *arg0,
const char *textbase,
const char *language,
char **configs,
int configs_size,
bool configs_global_only);
int init_tesseract_lm(const char *arg0,
const char *textbase,
const char *language);
// Initializes the tesseract classifier without loading language models.
int init_tesseract_classifier(const char *arg0,
const char *textbase,
const char *language,
char **configs,
int configs_size,
bool configs_global_only);
void recognize_page(STRING& image_name);
void end_tesseract();
bool init_tesseract_lang_data(const char *arg0,
const char *textbase,
const char *language,
char **configs,
int configs_size,
bool configs_global_only);
//// pgedit.h //////////////////////////////////////////////////////////
SVMenuNode *build_menu_new();
void pgeditor_main(BLOCK_LIST *blocks);
void process_image_event( // action in image win
const SVEvent &event);
void pgeditor_read_file( // of serialised file
STRING &filename,
BLOCK_LIST *blocks // block list to add to
);
void do_new_source( // serialise
);
BOOL8 process_cmd_win_event( // UI command semantics
inT32 cmd_event, // which menu item?
char *new_value // any prompt data
);
//// reject.h //////////////////////////////////////////////////////////
const char *char_ambiguities(char c);
void make_reject_map( //make rej map for wd //detailed results
WERD_RES *word,
BLOB_CHOICE_LIST_CLIST *blob_choices,
ROW *row,
inT16 pass //1st or 2nd?
);
BOOL8 one_ell_conflict(WERD_RES *word_res, BOOL8 update_map);
inT16 first_alphanum_index(const char *word,
const char *word_lengths);
inT16 first_alphanum_offset(const char *word,
const char *word_lengths);
inT16 alpha_count(const char *word,
const char *word_lengths);
BOOL8 word_contains_non_1_digit(const char *word,
const char *word_lengths);
void dont_allow_1Il(WERD_RES *word);
inT16 count_alphanums( //how many alphanums
WERD_RES *word);
BOOL8 repeated_ch_string(const char *rep_ch_str,
const char *lengths);
void flip_0O(WERD_RES *word);
BOOL8 non_0_digit(UNICHAR_ID unichar_id);
BOOL8 non_O_upper(UNICHAR_ID unichar_id);
BOOL8 repeated_nonalphanum_wd(WERD_RES *word, ROW *row);
void nn_match_word( //Match a word
WERD_RES *word,
ROW *row);
void nn_recover_rejects(WERD_RES *word, ROW *row);
BOOL8 test_ambig_word( //test for ambiguity
WERD_RES *word);
void set_done( //set done flag
WERD_RES *word,
inT16 pass);
inT16 safe_dict_word(const WERD_CHOICE &word);
void flip_hyphens(WERD_RES *word);
//// adaptions.h ///////////////////////////////////////////////////////
void adapt_to_good_ems(WERD_RES *word,
CHAR_SAMPLES_LIST *char_clusters,
CHAR_SAMPLE_LIST *chars_waiting);
void adapt_to_good_samples(WERD_RES *word,
CHAR_SAMPLES_LIST *char_clusters,
CHAR_SAMPLE_LIST *chars_waiting);
BOOL8 word_adaptable( //should we adapt?
WERD_RES *word,
uinT16 mode);
void reject_suspect_ems(WERD_RES *word);
void collect_ems_for_adaption(WERD_RES *word,
CHAR_SAMPLES_LIST *char_clusters,
CHAR_SAMPLE_LIST *chars_waiting);
void collect_characters_for_adaption(WERD_RES *word,
CHAR_SAMPLES_LIST *char_clusters,
CHAR_SAMPLE_LIST *chars_waiting);
void check_wait_list(CHAR_SAMPLE_LIST *chars_waiting,
CHAR_SAMPLE *sample,
CHAR_SAMPLES *best_cluster);
void cluster_sample(CHAR_SAMPLE *sample,
CHAR_SAMPLES_LIST *char_clusters,
CHAR_SAMPLE_LIST *chars_waiting);
void complete_clustering(CHAR_SAMPLES_LIST *char_clusters,
CHAR_SAMPLE_LIST *chars_waiting);
//// tfacepp.cpp ///////////////////////////////////////////////////////
WERD_CHOICE *recog_word_recursive( //recog one owrd
WERD *word, //word to do
DENORM *denorm, //de-normaliser
//matcher function
POLY_MATCHER matcher,
//tester function
POLY_TESTER tester,
//trainer function
POLY_TESTER trainer,
BOOL8 testing, //true if answer driven
//raw result
WERD_CHOICE *&raw_choice,
//list of blob lists
BLOB_CHOICE_LIST_CLIST *blob_choices,
WERD *&outword //bln word output
);
WERD_CHOICE *recog_word( //recog one owrd
WERD *word, //word to do
DENORM *denorm, //de-normaliser
POLY_MATCHER matcher, //matcher function
POLY_TESTER tester, //tester function
POLY_TESTER trainer, //trainer function
BOOL8 testing, //true if answer driven
WERD_CHOICE *&raw_choice, //raw result
//list of blob lists
BLOB_CHOICE_LIST_CLIST *blob_choices,
WERD *&outword //bln word output
);
WERD_CHOICE *split_and_recog_word( //recog one owrd
WERD *word, //word to do
DENORM *denorm, //de-normaliser
//matcher function
POLY_MATCHER matcher,
//tester function
POLY_TESTER tester,
//trainer function
POLY_TESTER trainer,
BOOL8 testing, //true if answer driven
//raw result
WERD_CHOICE *&raw_choice,
//list of blob lists
BLOB_CHOICE_LIST_CLIST *blob_choices,
WERD *&outword //bln word output
);
//// fixspace.cpp ///////////////////////////////////////////////////////
BOOL8 digit_or_numeric_punct(WERD_RES *word, int char_position);
inT16 eval_word_spacing(WERD_RES_LIST &word_res_list);
void match_current_words(WERD_RES_LIST &words, ROW *row, BLOCK* block);
inT16 fp_eval_word_spacing(WERD_RES_LIST &word_res_list);
void fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK* block);
void fix_fuzzy_space_list( //space explorer
WERD_RES_LIST &best_perm,
ROW *row,
BLOCK* block);
void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK* block);
void fix_fuzzy_spaces( //find fuzzy words
volatile ETEXT_DESC *monitor, //progress monitor
inT32 word_count, //count of words in doc
PAGE_RES *page_res);
//// docqual.cpp ////////////////////////////////////////////////////////
GARBAGE_LEVEL garbage_word(WERD_RES *word, BOOL8 ok_dict_word);
BOOL8 potential_word_crunch(WERD_RES *word,
GARBAGE_LEVEL garbage_level,
BOOL8 ok_dict_word);
void tilde_crunch(PAGE_RES_IT &page_res_it);
void unrej_good_quality_words( //unreject potential
PAGE_RES_IT &page_res_it);
void doc_and_block_rejection( //reject big chunks
PAGE_RES_IT &page_res_it,
BOOL8 good_quality_doc);
void quality_based_rejection(PAGE_RES_IT &page_res_it,
BOOL8 good_quality_doc);
void convert_bad_unlv_chs(WERD_RES *word_res);
void merge_tess_fails(WERD_RES *word_res);
void tilde_delete(PAGE_RES_IT &page_res_it);
void insert_rej_cblobs(WERD_RES *word);
//// pagewalk.cpp ///////////////////////////////////////////////////////
void
process_selected_words (
BLOCK_LIST * block_list, //blocks to check
//function to call
TBOX & selection_box,
BOOL8 (tesseract::Tesseract::*word_processor) (
BLOCK *,
ROW *,
WERD *));
//// tessbox.cpp ///////////////////////////////////////////////////////
void tess_add_doc_word( //test acceptability
WERD_CHOICE *word_choice //after context
);
void tess_adapter( //adapt to word
WERD *word, //bln word
DENORM *denorm, //de-normalise
const WERD_CHOICE& choice, //string for word
const WERD_CHOICE& raw_choice, //before context
const char *rejmap //reject map
);
WERD_CHOICE *test_segment_pass2( //recog one word
WERD *word, //bln word to do
DENORM *denorm, //de-normaliser
POLY_MATCHER matcher, //matcher function
POLY_TESTER tester, //tester function
//raw result
WERD_CHOICE *&raw_choice,
//list of blob lists
BLOB_CHOICE_LIST_CLIST *blob_choices,
WERD *&outword //bln word output
);
WERD_CHOICE *tess_segment_pass1( //recog one word
WERD *word, //bln word to do
DENORM *denorm, //de-normaliser
POLY_MATCHER matcher, //matcher function
//raw result
WERD_CHOICE *&raw_choice,
//list of blob lists
BLOB_CHOICE_LIST_CLIST *blob_choices,
WERD *&outword //bln word output
);
WERD_CHOICE *tess_segment_pass2( //recog one word
WERD *word, //bln word to do
DENORM *denorm, //de-normaliser
POLY_MATCHER matcher, //matcher function
//raw result
WERD_CHOICE *&raw_choice,
//list of blob lists
BLOB_CHOICE_LIST_CLIST *blob_choices,
WERD *&outword //bln word output
);
WERD_CHOICE *correct_segment_pass2( //recog one word
WERD *word, //bln word to do
DENORM *denorm, //de-normaliser
POLY_MATCHER matcher, //matcher function
POLY_TESTER tester, //tester function
//raw result
WERD_CHOICE *&raw_choice,
//list of blob lists
BLOB_CHOICE_LIST_CLIST *blob_choices,
WERD *&outword //bln word output
);
void tess_default_matcher( //call tess
PBLOB *pblob, //previous blob
PBLOB *blob, //blob to match
PBLOB *nblob, //next blob
WERD *word, //word it came from
DENORM *denorm, //de-normaliser
BLOB_CHOICE_LIST *ratings, //list of results
const char* script
);
void tess_bn_matcher( //call tess
PBLOB *pblob, //previous blob
PBLOB *blob, //blob to match
PBLOB *nblob, //next blob
WERD *word, //word it came from
DENORM *denorm, //de-normaliser
BLOB_CHOICE_LIST *ratings //list of results
);
void tess_cn_matcher( //call tess
PBLOB *pblob, //previous blob
PBLOB *blob, //blob to match
PBLOB *nblob, //next blob
WERD *word, //word it came from
DENORM *denorm, //de-normaliser
BLOB_CHOICE_LIST *ratings, //list of results
// Sorted array of CP_RESULT_STRUCT from class pruner.
CLASS_PRUNER_RESULTS cpresults
);
BOOL8 tess_adaptable_word( //test adaptability
WERD *word, //word to test
WERD_CHOICE *word_choice, //after context
WERD_CHOICE *raw_choice //before context
);
BOOL8 tess_acceptable_word( //test acceptability
WERD_CHOICE *word_choice, //after context
WERD_CHOICE *raw_choice //before context
);
//// applybox.cpp //////////////////////////////////////////////////////
void apply_box_testing(BLOCK_LIST *block_list);
void apply_boxes(const STRING& fname,
BLOCK_LIST *block_list //real blocks
);
// converts an array of boxes to a block list
int Boxes2BlockList(int box_cnt, TBOX *boxes, BLOCK_LIST *block_list,
bool right2left);
//// blobcmp.cpp ///////////////////////////////////////////////////////
float compare_tess_blobs(TBLOB *blob1,
TEXTROW *row1,
TBLOB *blob2,
TEXTROW *row2);
//// paircmp.cpp ///////////////////////////////////////////////////////
float compare_bln_blobs( //match 2 blobs
PBLOB *blob1, //first blob
DENORM *denorm1,
PBLOB *blob2, //other blob
DENORM *denorm2);
float compare_blobs( //match 2 blobs
PBLOB *blob1, //first blob
ROW *row1, //row it came from
PBLOB *blob2, //other blob
ROW *row2);
BOOL8 compare_blob_pairs( //blob processor
BLOCK *,
ROW *row, //row it came from
WERD *,
PBLOB *blob //blob to compare
);
//// fixxht.cpp ///////////////////////////////////////////////////////
void check_block_occ(WERD_RES *word_res);
//// Data members ///////////////////////////////////////////////////////
BOOL_VAR_H(tessedit_resegment_from_boxes, false,
"Take segmentation and labeling from box file");
BOOL_VAR_H(tessedit_train_from_boxes, false,
"Generate training data from boxed chars");
BOOL_VAR_H(tessedit_dump_pageseg_images, false,
"Dump itermediate images made during page segmentation");
INT_VAR_H(tessedit_pageseg_mode, 2,
"Page seg mode: 0=auto, 1=col, 2=block, 3=line, 4=word, 6=char"
" (Values from PageSegMode enum in baseapi.h)");
INT_VAR_H(tessedit_accuracyvspeed, 0,
"Accuracy V Speed tradeoff: 0 fastest, 100 most accurate"
" (Values from AccuracyVSpeed enum in baseapi.h)");
BOOL_VAR_H(tessedit_train_from_boxes_word_level, false,
"Generate training data from boxed chars at word level.");
STRING_VAR_H(tessedit_char_blacklist, "",
"Blacklist of chars not to recognize");
STRING_VAR_H(tessedit_char_whitelist, "",
"Whitelist of chars to recognize");
BOOL_VAR_H(global_tessedit_ambigs_training, false,
"Perform training for ambiguities");
//// ambigsrecog.cpp /////////////////////////////////////////////////////////
FILE *init_ambigs_training(const STRING &fname);
void ambigs_training_segmented(const STRING &fname,
PAGE_RES *page_res,
volatile ETEXT_DESC *monitor,
FILE *output_file);
void ambigs_classify_and_output(PAGE_RES_IT *page_res_it,
const char *label,
FILE *output_file);
private:
Pix* pix_binary_;
FCOORD deskew_;
FCOORD reskew_;
bool hindi_image_;
};
} // namespace tesseract
#endif // TESSERACT_CCMAIN_TESSERACTCLASS_H__

441
ccmain/thresholder.cpp Normal file
View File

@ -0,0 +1,441 @@
///////////////////////////////////////////////////////////////////////
// File: thresholder.cpp
// Description: Base API for thresolding images in tesseract.
// Author: Ray Smith
// Created: Mon May 12 11:28:15 PDT 2008
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
#include "config_auto.h"
#endif
#ifdef HAVE_LIBLEPT
// Include leptonica library only if autoconf (or makefile etc) tell us to.
#include "allheaders.h"
#endif
#include "thresholder.h"
#include <string.h>
#include "img.h"
#include "otsuthr.h"
namespace tesseract {
ImageThresholder::ImageThresholder()
:
#ifdef HAVE_LIBLEPT
pix_(NULL),
#endif
image_data_(NULL),
image_width_(0), image_height_(0),
image_bytespp_(0), image_bytespl_(0) {
SetRectangle(0, 0, 0, 0);
}
ImageThresholder::~ImageThresholder() {
Clear();
}
// Destroy the Pix if there is one, freeing memory.
void ImageThresholder::Clear() {
#ifdef HAVE_LIBLEPT
if (pix_ != NULL) {
pixDestroy(&pix_);
pix_ = NULL;
}
#endif
image_data_ = NULL;
}
// Return true if no image has been set.
bool ImageThresholder::IsEmpty() const {
#ifdef HAVE_LIBLEPT
if (pix_ != NULL)
return false;
#endif
return image_data_ == NULL;
}
// SetImage makes a copy of only the metadata, not the underlying
// image buffer. It promises to treat the source as read-only in either case,
// but in return assumes that the Pix or image buffer remain valid
// throughout the life of the ImageThresholder.
// Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
// Palette color images will not work properly and must be converted to
// 24 bit.
// Binary images of 1 bit per pixel may also be given but they must be
// byte packed with the MSB of the first byte being the first pixel, and a
// one pixel is WHITE. For binary images set bytes_per_pixel=0.
void ImageThresholder::SetImage(const unsigned char* imagedata,
int width, int height,
int bytes_per_pixel, int bytes_per_line) {
#ifdef HAVE_LIBLEPT
if (pix_ != NULL)
pixDestroy(&pix_);
pix_ = NULL;
#endif
image_data_ = imagedata;
image_width_ = width;
image_height_ = height;
image_bytespp_ = bytes_per_pixel;
image_bytespl_ = bytes_per_line;
Init();
}
// Store the coordinates of the rectangle to process for later use.
// Doesn't actually do any thresholding.
void ImageThresholder::SetRectangle(int left, int top, int width, int height) {
rect_left_ = left;
rect_top_ = top;
rect_width_ = width;
rect_height_ = height;
}
// Get enough parameters to be able to rebuild bounding boxes in the
// original image (not just within the rectangle).
// Left and top are enough with top-down coordinates, but
// the height of the rectangle and the image are needed for bottom-up.
void ImageThresholder::GetImageSizes(int* left, int* top,
int* width, int* height,
int* imagewidth, int* imageheight) {
*left = rect_left_;
*top = rect_top_;
*width = rect_width_;
*height = rect_height_;
*imagewidth = image_width_;
*imageheight = image_height_;
}
// Return true if HAVE_LIBLEPT and this thresholder implements the Pix
// interface.
bool ImageThresholder::HasThresholdToPix() const {
#ifdef HAVE_LIBLEPT
return true;
#else
return false;
#endif
}
// Threshold the source image as efficiently as possible to the output
// tesseract IMAGE class.
void ImageThresholder::ThresholdToIMAGE(IMAGE* image) {
#ifdef HAVE_LIBLEPT
if (pix_ != NULL) {
if (image_bytespp_ == 0) {
// We have a binary image, so it just has to be converted.
CopyBinaryRectPixToIMAGE(image);
} else {
if (image_bytespp_ == 4) {
// Color data can just be passed direct.
const uinT32* data = pixGetData(pix_);
OtsuThresholdRectToIMAGE(reinterpret_cast<const uinT8*>(data),
image_bytespp_, image_bytespl_, image);
} else {
// Convert 8-bit to IMAGE and then pass its
// buffer to the raw interface to complete the conversion.
IMAGE temp_image;
temp_image.FromPix(pix_);
OtsuThresholdRectToIMAGE(temp_image.get_buffer(),
image_bytespp_,
COMPUTE_IMAGE_XDIM(temp_image.get_xsize(),
temp_image.get_bpp()),
image);
}
}
return;
}
#endif
if (image_bytespp_ > 0) {
// Threshold grey or color.
OtsuThresholdRectToIMAGE(image_data_, image_bytespp_, image_bytespl_,
image);
} else {
CopyBinaryRectRawToIMAGE(image);
}
}
#ifdef HAVE_LIBLEPT
// NOTE: Opposite to SetImage for raw images, SetImage for Pix clones its
// input, so the source pix may be pixDestroyed immediately after.
void ImageThresholder::SetImage(const Pix* pix) {
image_data_ = NULL;
if (pix_ != NULL)
pixDestroy(&pix_);
Pix* src = const_cast<Pix*>(pix);
int depth;
pixGetDimensions(src, &image_width_, &image_height_, &depth);
// Convert the image as necessary so it is one of binary, plain RGB, or
// 8 bit with no colormap.
if (depth > 1 && depth < 8) {
pix_ = pixConvertTo8(src, false);
} else if (pixGetColormap(src)) {
pix_ = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
} else {
pix_ = pixClone(src);
}
depth = pixGetDepth(pix_);
image_bytespp_ = depth / 8;
image_bytespl_ = pixGetWpl(pix_) * sizeof(l_uint32);
Init();
}
// Threshold the source image as efficiently as possible to the output Pix.
// Creates a Pix and sets pix to point to the resulting pointer.
// Caller must use pixDestroy to free the created Pix.
void ImageThresholder::ThresholdToPix(Pix** pix) {
if (pix_ != NULL) {
if (image_bytespp_ == 0) {
// We have a binary image, so it just has to be cloned.
*pix = GetPixRect();
} else {
if (image_bytespp_ == 4) {
// Color data can just be passed direct.
const uinT32* data = pixGetData(pix_);
OtsuThresholdRectToPix(reinterpret_cast<const uinT8*>(data),
image_bytespp_, image_bytespl_, pix);
} else {
// Convert 8-bit to IMAGE and then pass its
// buffer to the raw interface to complete the conversion.
IMAGE temp_image;
temp_image.FromPix(pix_);
OtsuThresholdRectToPix(temp_image.get_buffer(),
image_bytespp_,
COMPUTE_IMAGE_XDIM(temp_image.get_xsize(),
temp_image.get_bpp()),
pix);
}
}
return;
}
if (image_bytespp_ > 0) {
// Threshold grey or color.
OtsuThresholdRectToPix(image_data_, image_bytespp_, image_bytespl_, pix);
} else {
RawRectToPix(pix);
}
}
// Get a clone/copy of the source image rectangle.
// The returned Pix must be pixDestroyed.
// This function will be used in the future by the page layout analysis, and
// the layout analysis that uses it will only be available with Leptonica,
// so there is no raw equivalent.
Pix* ImageThresholder::GetPixRect() {
if (pix_ != NULL) {
if (IsFullImage()) {
// Just clone the whole thing.
return pixClone(pix_);
} else {
// Crop to the given rectangle.
Box* box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
Pix* cropped = pixClipRectangle(pix_, box, NULL);
boxDestroy(&box);
return cropped;
}
}
// The input is raw, so we have to make a copy of it.
Pix* raw_pix;
RawRectToPix(&raw_pix);
return raw_pix;
}
#endif
// Common initialization shared between SetImage methods.
void ImageThresholder::Init() {
SetRectangle(0, 0, image_width_, image_height_);
}
// Otsu threshold the rectangle, taking everything except the image buffer
// pointer from the class, to the output IMAGE.
void ImageThresholder::OtsuThresholdRectToIMAGE(const unsigned char* imagedata,
int bytes_per_pixel,
int bytes_per_line,
IMAGE* image) const {
int* thresholds;
int* hi_values;
OtsuThreshold(imagedata, bytes_per_pixel, bytes_per_line,
rect_left_, rect_top_, rect_width_, rect_height_,
&thresholds, &hi_values);
// Threshold the image to the given IMAGE.
ThresholdRectToIMAGE(imagedata, bytes_per_pixel, bytes_per_line,
thresholds, hi_values, image);
delete [] thresholds;
delete [] hi_values;
}
// Threshold the given grey or color image into the tesseract global
// image ready for recognition. Requires thresholds and hi_value
// produced by OtsuThreshold in otsuthr.cpp.
void ImageThresholder::ThresholdRectToIMAGE(const unsigned char* imagedata,
int bytes_per_pixel,
int bytes_per_line,
const int* thresholds,
const int* hi_values,
IMAGE* image) const {
IMAGELINE line;
image->create(rect_width_, rect_height_, 1);
line.init(rect_width_);
// For each line in the image, fill the IMAGELINE class and put it into the
// output IMAGE. Note that Tesseract stores images with the
// bottom at y=0 and 0 is black, so we need 2 kinds of inversion.
const unsigned char* data = imagedata + rect_top_* bytes_per_line +
rect_left_ * bytes_per_pixel;
for (int y = rect_height_ - 1 ; y >= 0; --y) {
const unsigned char* pix = data;
for (int x = 0; x < rect_width_; ++x, pix += bytes_per_pixel) {
line.pixels[x] = 1;
for (int ch = 0; ch < bytes_per_pixel; ++ch) {
if (hi_values[ch] >= 0 &&
(pix[ch] > thresholds[ch]) == (hi_values[ch] == 0)) {
line.pixels[x] = 0;
break;
}
}
}
image->put_line(0, y, rect_width_, &line, 0);
data += bytes_per_line;
}
}
// Cut out the requested rectangle of the binary image to the output IMAGE.
void ImageThresholder::CopyBinaryRectRawToIMAGE(IMAGE* image) const {
IMAGE rect_image;
rect_image.capture(const_cast<unsigned char*>(image_data_),
image_width_, rect_top_ + rect_height_, 1);
image->create(rect_width_, rect_height_, 1);
// copy_sub_image uses coords starting at the bottom, so the y coord of the
// copy is the bottom of the rect_image.
copy_sub_image(&rect_image, rect_left_, 0, rect_width_, rect_height_,
image, 0, 0, false);
}
#ifdef HAVE_LIBLEPT
// Otsu threshold the rectangle, taking everything except the image buffer
// pointer from the class, to the output Pix.
void ImageThresholder::OtsuThresholdRectToPix(const unsigned char* imagedata,
int bytes_per_pixel,
int bytes_per_line,
Pix** pix) const {
int* thresholds;
int* hi_values;
OtsuThreshold(imagedata, bytes_per_pixel, bytes_per_line,
rect_left_, rect_top_, rect_width_, rect_height_,
&thresholds, &hi_values);
// Threshold the image to the given IMAGE.
ThresholdRectToPix(imagedata, bytes_per_pixel, bytes_per_line,
thresholds, hi_values, pix);
delete [] thresholds;
delete [] hi_values;
}
// Threshold the rectangle, taking everything except the image buffer pointer
// from the class, using thresholds/hi_values to the output IMAGE.
void ImageThresholder::ThresholdRectToPix(const unsigned char* imagedata,
int bytes_per_pixel,
int bytes_per_line,
const int* thresholds,
const int* hi_values,
Pix** pix) const {
*pix = pixCreate(rect_width_, rect_height_, 1);
uinT32* pixdata = pixGetData(*pix);
int wpl = pixGetWpl(*pix);
const unsigned char* srcdata = imagedata + rect_top_* bytes_per_line +
rect_left_ * bytes_per_pixel;
for (int y = 0; y < rect_height_; ++y) {
const uinT8* linedata = srcdata;
uinT32* pixline = pixdata + y * wpl;
for (int x = 0; x < rect_width_; ++x, linedata += bytes_per_pixel) {
bool white_result = true;
for (int ch = 0; ch < bytes_per_pixel; ++ch) {
if (hi_values[ch] >= 0 &&
(linedata[ch] > thresholds[ch]) == (hi_values[ch] == 0)) {
white_result = false;
break;
}
}
if (white_result)
CLEAR_DATA_BIT(pixline, x);
else
SET_DATA_BIT(pixline, x);
}
srcdata += bytes_per_line;
}
}
// Copy the raw image rectangle, taking all data from the class, to the Pix.
void ImageThresholder::RawRectToPix(Pix** pix) const {
if (image_bytespp_ < 4) {
// Go via a tesseract image structure (doesn't copy the data)
// and use ToPix.
IMAGE image;
int bits_per_pixel = image_bytespp_ * 8;
if (image_bytespp_ == 0)
bits_per_pixel = 1;
image.capture(const_cast<uinT8*>(image_data_),
image_width_, rect_top_ + rect_height_, bits_per_pixel);
if (IsFullImage()) {
*pix = image.ToPix();
} else {
IMAGE rect;
rect.create(rect_width_, rect_height_, bits_per_pixel);
// The capture chopped the image off at top+height, so copy
// the rectangle with y = 0 to get a rectangle of height
// starting at the bottom, since copy_sub_image uses bottom-up coords.
copy_sub_image(&image, rect_left_, 0, rect_width_, rect_height_,
&rect, 0, 0, true);
*pix = rect.ToPix();
}
} else {
*pix = pixCreate(rect_width_, rect_height_, 32);
uinT32* data = pixGetData(*pix);
int wpl = pixGetWpl(*pix);
const uinT8* imagedata = image_data_ + rect_top_ * image_bytespl_ +
rect_left_ * image_bytespp_;
for (int y = 0; y < rect_height_; ++y) {
const uinT8* linedata = imagedata;
uinT32* line = data + y * wpl;
for (int x = 0; x < rect_width_; ++x) {
line[x] = (linedata[0] << 24) | (linedata[1] << 16) |
(linedata[2] << 8) | linedata[3];
linedata += 4;
}
imagedata += image_bytespl_;
}
}
}
// Cut out the requested rectangle of the binary image to the output IMAGE.
void ImageThresholder::CopyBinaryRectPixToIMAGE(IMAGE* image) const {
if (IsFullImage()) {
// Just poke it directly into the tess image.
image->FromPix(pix_);
} else {
// Crop to the given rectangle.
Box* box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
Pix* cropped = pixClipRectangle(pix_, box, NULL);
image->FromPix(cropped);
pixDestroy(&cropped);
boxDestroy(&box);
}
}
#endif
} // namespace tesseract.

180
ccmain/thresholder.h Normal file
View File

@ -0,0 +1,180 @@
///////////////////////////////////////////////////////////////////////
// File: thresholder.h
// Description: Base API for thresolding images in tesseract.
// Author: Ray Smith
// Created: Mon May 12 11:00:15 PDT 2008
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCMAIN_THRESHOLDER_H__
#define TESSERACT_CCMAIN_THRESHOLDER_H__
class IMAGE;
struct Pix;
namespace tesseract {
// Base class for all tesseract image thresholding classes.
// Specific classes can add new thresholding methods by
// overriding ThresholdToIMAGE and/or ThresholdToPix.
// Each instance deals with a single image, but the design is intended to
// be useful for multiple calls to SetRectangle and ThresholdTo* if
// desired.
class ImageThresholder {
public:
ImageThresholder();
virtual ~ImageThresholder();
// Destroy the Pix if there is one, freeing memory.
virtual void Clear();
// Return true if no image has been set.
bool IsEmpty() const;
// SetImage makes a copy of only the metadata, not the underlying
// image buffer. It promises to treat the source as read-only in either case,
// but in return assumes that the Pix or image buffer remain valid
// throughout the life of the ImageThresholder.
// Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
// Palette color images will not work properly and must be converted to
// 24 bit.
// Binary images of 1 bit per pixel may also be given but they must be
// byte packed with the MSB of the first byte being the first pixel, and a
// one pixel is WHITE. For binary images set bytes_per_pixel=0.
void SetImage(const unsigned char* imagedata, int width, int height,
int bytes_per_pixel, int bytes_per_line);
// Store the coordinates of the rectangle to process for later use.
// Doesn't actually do any thresholding.
void SetRectangle(int left, int top, int width, int height);
// Get enough parameters to be able to rebuild bounding boxes in the
// original image (not just within the rectangle).
// Left and top are enough with top-down coordinates, but
// the height of the rectangle and the image are needed for bottom-up.
virtual void GetImageSizes(int* left, int* top, int* width, int* height,
int* imagewidth, int* imageheight);
// Return true if HAVE_LIBLEPT and this thresholder implements the Pix
// interface.
virtual bool HasThresholdToPix() const;
// Return true if the source image is color.
bool IsColor() const {
return image_bytespp_ >= 3;
}
// Threshold the source image as efficiently as possible to the output
// tesseract IMAGE class.
virtual void ThresholdToIMAGE(IMAGE* image);
#ifdef HAVE_LIBLEPT
// Pix vs raw, which to use?
// Implementations should provide the ability to source and target Pix
// where possible. A future version of Tesseract may choose to use Pix
// as its internal representation and discard IMAGE altogether.
// Because of that, an implementation that sources and targets Pix may end up
// with less copies than an implementation that does not.
// NOTE: Opposite to SetImage for raw images, SetImage for Pix clones its
// input, so the source pix may be pixDestroyed immediately after.
void SetImage(const Pix* pix);
// Threshold the source image as efficiently as possible to the output Pix.
// Creates a Pix and sets pix to point to the resulting pointer.
// Caller must use pixDestroy to free the created Pix.
virtual void ThresholdToPix(Pix** pix);
// Get a clone/copy of the source image rectangle.
// The returned Pix must be pixDestroyed.
// This function will be used in the future by the page layout analysis, and
// the layout analysis that uses it will only be available with Leptonica,
// so there is no raw equivalent.
Pix* GetPixRect();
#endif
protected:
// ----------------------------------------------------------------------
// Utility functions that may be useful components for other thresholders.
// Common initialization shared between SetImage methods.
virtual void Init();
// Return true if we are processing the full image.
bool IsFullImage() const {
return rect_left_ == 0 && rect_top_ == 0 &&
rect_width_ == image_width_ && rect_height_ == image_height_;
}
// Otsu threshold the rectangle, taking everything except the image buffer
// pointer from the class, to the output IMAGE.
void OtsuThresholdRectToIMAGE(const unsigned char* imagedata,
int bytes_per_pixel, int bytes_per_line,
IMAGE* image) const;
// Threshold the rectangle, taking everything except the image buffer pointer
// from the class, using thresholds/hi_values to the output IMAGE.
void ThresholdRectToIMAGE(const unsigned char* imagedata,
int bytes_per_pixel, int bytes_per_line,
const int* thresholds, const int* hi_values,
IMAGE* image) const;
// Cut out the requested rectangle of the source raw binary image to the
// output IMAGE.
void CopyBinaryRectRawToIMAGE(IMAGE* image) const;
#ifdef HAVE_LIBLEPT
// Otsu threshold the rectangle, taking everything except the image buffer
// pointer from the class, to the output Pix.
void OtsuThresholdRectToPix(const unsigned char* imagedata,
int bytes_per_pixel, int bytes_per_line,
Pix** pix) const;
// Threshold the rectangle, taking everything except the image buffer pointer
// from the class, using thresholds/hi_values to the output IMAGE.
void ThresholdRectToPix(const unsigned char* imagedata,
int bytes_per_pixel, int bytes_per_line,
const int* thresholds, const int* hi_values,
Pix** pix) const;
// Copy the raw image rectangle, taking all data from the class, to the Pix.
void RawRectToPix(Pix** pix) const;
// Cut out the requested rectangle of the binary image to the output IMAGE.
void CopyBinaryRectPixToIMAGE(IMAGE* image) const;
#endif
protected:
#ifdef HAVE_LIBLEPT
// Clone or other copy of the source Pix.
// The pix will always be PixDestroy()ed on destruction of the class.
Pix* pix_;
#endif
// Exactly one of pix_ and image_data_ is not NULL.
const unsigned char* image_data_; // Raw source image.
int image_width_; // Width of source image/pix.
int image_height_; // Height of source image/pix.
int image_bytespp_; // Bytes per pixel of source image/pix.
int image_bytespl_; // Bytes per line of source image/pix.
// Limits of image rectangle to be processed.
int rect_left_;
int rect_top_;
int rect_width_;
int rect_height_;
};
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_THRESHOLDER_H__

63
ccmain/vecfuncs.cpp Normal file
View File

@ -0,0 +1,63 @@
/* -*-C-*-
********************************************************************************
*
* File: vecfuncs.c (Formerly vecfuncs.c)
* Description: Blob definition
* Author: Mark Seaman, OCR Technology
* Created: Fri Oct 27 15:39:52 1989
* Modified: Tue Jul 9 17:44:12 1991 (Mark Seaman) marks@hpgrlt
* Language: C
* Package: N/A
* Status: Experimental (Do Not Distribute)
*
* (c) Copyright 1989, Hewlett-Packard Company.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
********************************************************************************
* Revision 5.1 89/07/27 11:47:50 11:47:50 ray ()
* Added ratings acces methods.
* This version ready for independent development.
*/
/*----------------------------------------------------------------------
I n c l u d e s
----------------------------------------------------------------------*/
#include "mfcpch.h"
#include "vecfuncs.h"
/*----------------------------------------------------------------------
F u n c t i o n s
----------------------------------------------------------------------*/
/**********************************************************************
* direction
*
* Show if the line is going in the positive or negative X direction.
**********************************************************************/
int direction(EDGEPT *point) {
int dir; /** direction to return **/
EDGEPT *prev; /** prev point **/
EDGEPT *next; /** next point **/
dir = 0;
prev = point->prev;
next = point->next;
if (((prev->pos.x <= point->pos.x) &&
(point->pos.x < next->pos.x)) ||
((prev->pos.x < point->pos.x) && (point->pos.x <= next->pos.x)))
dir = 1;
if (((prev->pos.x >= point->pos.x) &&
(point->pos.x > next->pos.x)) ||
((prev->pos.x > point->pos.x) && (point->pos.x >= next->pos.x)))
dir = -1;
return dir;
}

91
ccmain/vecfuncs.h Normal file
View File

@ -0,0 +1,91 @@
/* -*-C-*-
********************************************************************************
*
* File: vecfuncs.h (Formerly vecfuncs.h)
* Description: Vector calculations
* Author: Mark Seaman, OCR Technology
* Created: Wed Dec 20 09:37:18 1989
* Modified: Tue Jul 9 17:44:37 1991 (Mark Seaman) marks@hpgrlt
* Language: C
* Package: N/A
* Status: Experimental (Do Not Distribute)
*
* (c) Copyright 1989, Hewlett-Packard Company.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
*********************************************************************************/
#ifndef VECFUNCS_H
#define VECFUNCS_H
#include "tessclas.h"
#include <math.h>
/*----------------------------------------------------------------------
M a c r o s
----------------------------------------------------------------------*/
/**********************************************************************
* point_diff
*
* Return the difference from point (p1) to point (p2). Put the value
* into point (p).
**********************************************************************/
#define point_diff(p,p1,p2) \
((p).x = (p1).x - (p2).x, \
(p).y = (p1).y - (p2).y, \
(p))
/**********************************************************************
* CROSS
*
* cross product
**********************************************************************/
#define CROSS(a,b) \
((a).x * (b).y - (a).y * (b).x)
/**********************************************************************
* SCALAR
*
* scalar vector product
**********************************************************************/
#define SCALAR(a,b) \
((a).x * (b).x + (a).y * (b).y)
/**********************************************************************
* LENGTH
*
* length of vector
**********************************************************************/
#define LENGTH(a) \
((a).x * (a).x + (a).y * (a).y)
/*----------------------------------------------------------------------
F u n c t i o n s
----------------------------------------------------------------------*/
int direction(EDGEPT *point);
/*
#if defined(__STDC__) || defined(__cplusplus) || MAC_OR_DOS
# define _ARGS(s) s
#else
# define _ARGS(s) ()
#endif*/
/* vecfuncs.c
int direction
_ARGS((EDGEPT *point));
#undef _ARGS
*/
#endif

967
ccmain/werd.cpp Normal file
View File

@ -0,0 +1,967 @@
/**********************************************************************
* File: werd.cpp (Formerly word.c)
* Description: Code for the WERD class.
* Author: Ray Smith
* Created: Tue Oct 08 14:32:12 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include "blckerr.h"
#include "linlsq.h"
#include "werd.h"
#define FIRST_COLOUR ScrollView::RED //first rainbow colour
//last rainbow colour
#define LAST_COLOUR ScrollView::AQUAMARINE
#define CHILD_COLOUR ScrollView::BROWN //colour of children
const ERRCODE CANT_SCALE_EDGESTEPS =
"Attempted to scale an edgestep format word";
#define EXTERN
EXTERN BOOL_VAR (bln_numericmode, 0, "Optimize for numbers");
EXTERN INT_VAR (bln_x_height, 128, "Baseline Normalisation X-height");
EXTERN INT_VAR (bln_baseline_offset, 64, "Baseline Norm. offset of baseline");
EXTERN double_VAR (bln_blshift_maxshift, -1.0,
"Fraction of xh before shifting");
EXTERN double_VAR (bln_blshift_xfraction, 0.75,
"Size fraction of xh before shifting");
ELISTIZE_S (WERD)
/**********************************************************************
* WERD::WERD
*
* Constructor to build a WERD from a list of C_BLOBs.
* The C_BLOBs are not copied so the source list is emptied.
**********************************************************************/
WERD::WERD ( //constructor
C_BLOB_LIST * blob_list, //in word order
uinT8 blank_count, //blanks in front
const char *text //correct text
):
flags (0),
correct(text) {
C_BLOB_IT start_it = blob_list;//iterator
C_BLOB_IT end_it = blob_list; //another
//rejected blobs in wd
C_BLOB_IT rej_cblob_it = &rej_cblobs;
C_OUTLINE_IT c_outline_it; //coutline iterator
BOOL8 blob_inverted;
BOOL8 reject_blob;
inT16 inverted_vote = 0;
inT16 non_inverted_vote = 0;
while (!end_it.at_last ())
end_it.forward (); //move to last
//move to our list
cblobs.assign_to_sublist (&start_it, &end_it);
blanks = blank_count;
/*
Set white on black flag for the WERD, moving any duff blobs onto the
rej_cblobs list.
First, walk the cblobs checking the inverse flag for each outline of each
cblob. If a cblob has inconsistent flag settings for its different
outlines, move the blob to the reject list. Otherwise, increment the
appropriate w-on-b or b-on-w vote for the word.
Now set the inversion flag for the WERD by maximum vote.
Walk the blobs again, moving any blob whose inversion flag does not agree
with the concencus onto the reject list.
*/
start_it.set_to_list (&cblobs);
if (start_it.empty ())
return;
for (start_it.mark_cycle_pt ();
!start_it.cycled_list (); start_it.forward ()) {
c_outline_it.set_to_list (start_it.data ()->out_list ());
blob_inverted = c_outline_it.data ()->flag (COUT_INVERSE);
reject_blob = FALSE;
for (c_outline_it.mark_cycle_pt ();
!c_outline_it.cycled_list () && !reject_blob;
c_outline_it.forward ()) {
reject_blob =
c_outline_it.data ()->flag (COUT_INVERSE) != blob_inverted;
}
if (reject_blob)
rej_cblob_it.add_after_then_move (start_it.extract ());
else {
if (blob_inverted)
inverted_vote++;
else
non_inverted_vote++;
}
}
flags.set_bit (W_INVERSE, (inverted_vote > non_inverted_vote));
start_it.set_to_list (&cblobs);
if (start_it.empty ())
return;
for (start_it.mark_cycle_pt ();
!start_it.cycled_list (); start_it.forward ()) {
c_outline_it.set_to_list (start_it.data ()->out_list ());
if (c_outline_it.data ()->flag (COUT_INVERSE) != flags.bit (W_INVERSE))
rej_cblob_it.add_after_then_move (start_it.extract ());
}
}
/**********************************************************************
* WERD::WERD
*
* Constructor to build a WERD from a list of BLOBs.
* The BLOBs are not copied so the source list is emptied.
**********************************************************************/
WERD::WERD ( //constructor
PBLOB_LIST * blob_list, //in word order
uinT8 blank_count, //blanks in front
const char *text //correct text
):
flags (0),
correct(text) {
PBLOB_IT start_it = blob_list; //iterator
PBLOB_IT end_it = blob_list; //another
while (!end_it.at_last ())
end_it.forward (); //move to last
((PBLOB_LIST *) (&cblobs))->assign_to_sublist (&start_it, &end_it);
//move to our list
//it's a polygon
flags.set_bit (W_POLYGON, TRUE);
blanks = blank_count;
// fprintf(stderr,"Wrong constructor!!!!\n");
}
/**********************************************************************
* WERD::WERD
*
* Constructor to build a WERD from a list of BLOBs.
* The BLOBs are not copied so the source list is emptied.
**********************************************************************/
WERD::WERD ( //constructor
PBLOB_LIST * blob_list, //in word order
WERD * clone //sorce of flags
):flags (clone->flags), correct (clone->correct) {
PBLOB_IT start_it = blob_list; //iterator
PBLOB_IT end_it = blob_list; //another
while (!end_it.at_last ())
end_it.forward (); //move to last
((PBLOB_LIST *) (&cblobs))->assign_to_sublist (&start_it, &end_it);
//move to our list
blanks = clone->blanks;
// fprintf(stderr,"Wrong constructor!!!!\n");
}
/**********************************************************************
* WERD::WERD
*
* Constructor to build a WERD from a list of C_BLOBs.
* The C_BLOBs are not copied so the source list is emptied.
**********************************************************************/
WERD::WERD ( //constructor
C_BLOB_LIST * blob_list, //in word order
WERD * clone //sorce of flags
):flags (clone->flags), correct (clone->correct) {
C_BLOB_IT start_it = blob_list;//iterator
C_BLOB_IT end_it = blob_list; //another
while (!end_it.at_last ())
end_it.forward (); //move to last
((C_BLOB_LIST *) (&cblobs))->assign_to_sublist (&start_it, &end_it);
//move to our list
blanks = clone->blanks;
// fprintf(stderr,"Wrong constructor!!!!\n");
}
/**********************************************************************
* WERD::poly_copy
*
* Make a copy of a WERD in polygon format.
* The source WERD is untouched.
**********************************************************************/
WERD *WERD::poly_copy( //make a poly copy
float xheight //row height
) {
PBLOB *blob; //new blob
WERD *result = new WERD; //output word
C_BLOB_IT src_it = &cblobs; //iterator
// LARC_BLOB_IT larc_it=(LARC_BLOB_LIST*)(&cblobs);
PBLOB_IT dest_it = (PBLOB_LIST *) (&result->cblobs);
//another
if (flags.bit (W_POLYGON)) {
*result = *this; //just copy it
}
else {
result->flags = flags;
result->correct = correct; //copy info
result->dummy = dummy;
if (!src_it.empty ()) {
// if (flags.bit(W_LINEARC))
// {
// do
// {
// blob=new PBLOB;
// poly_linearc_outlines(larc_it.data()->out_list(),
// blob->out_list()); //convert outlines
// dest_it.add_after_then_move(blob); //add to dest list
// larc_it.forward();
// }
// while (!larc_it.at_first());
// }
// else
// {
do {
blob = new PBLOB (src_it.data (), xheight);
//convert blob
//add to dest list
dest_it.add_after_then_move (blob);
src_it.forward ();
}
while (!src_it.at_first ());
// }
}
if (!rej_cblobs.empty ()) {
/* Polygonal approx of reject blobs */
src_it.set_to_list (&rej_cblobs);
dest_it = (PBLOB_LIST *) (&result->rej_cblobs);
do {
//convert blob
blob = new PBLOB (src_it.data (), xheight);
//add to dest list
dest_it.add_after_then_move (blob);
src_it.forward ();
}
while (!src_it.at_first ());
}
//polygon now
result->flags.set_bit (W_POLYGON, TRUE);
result->blanks = blanks;
}
return result;
}
/**********************************************************************
* WERD::bounding_box
*
* Return the bounding box of the WERD.
* This is quite a mess to compute!
* ORIGINALLY, REJECT CBLOBS WERE EXCLUDED, however, this led to bugs when the
* words on the row were re-sorted. The original words were built with reject
* blobs included. The FUZZY SPACE flags were set accordingly. If ALL the
* blobs in a word are rejected the BB for the word is NULL, causing the sort
* to screw up, leading to the erroneous possibility of the first word in a
* row being marked as FUZZY space.
**********************************************************************/
TBOX WERD::bounding_box() { //bounding box
TBOX box; //box being built
//rejected blobs in wd
C_BLOB_IT rej_cblob_it = &rej_cblobs;
for (rej_cblob_it.mark_cycle_pt ();
!rej_cblob_it.cycled_list (); rej_cblob_it.forward ()) {
box += rej_cblob_it.data ()->bounding_box ();
}
if (flags.bit (W_POLYGON)) {
//polygons
PBLOB_IT it = (PBLOB_LIST *) (&cblobs);
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
box += it.data ()->bounding_box ();
}
}
else {
C_BLOB_IT it = &cblobs; //blobs of WERD
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
box += it.data ()->bounding_box ();
}
}
return box;
}
/**********************************************************************
* WERD::move
*
* Reposition WERD by vector
* NOTE!! REJECT CBLOBS ARE NOT MOVED
**********************************************************************/
void WERD::move( // reposition WERD
const ICOORD vec // by vector
) {
PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs);
// blob iterator
// LARC_BLOB_IT lblob_it((LARC_BLOB_LIST*)&cblobs);
C_BLOB_IT cblob_it(&cblobs); // cblob iterator
if (flags.bit (W_POLYGON))
for (blob_it.mark_cycle_pt ();
!blob_it.cycled_list (); blob_it.forward ())
blob_it.data ()->move (vec);
// else if (flags.bit(W_LINEARC))
// for( lblob_it.mark_cycle_pt();
// !lblob_it.cycled_list();
// lblob_it.forward() )
// lblob_it.data()->move( vec );
else
for (cblob_it.mark_cycle_pt ();
!cblob_it.cycled_list (); cblob_it.forward ())
cblob_it.data ()->move (vec);
}
/**********************************************************************
* WERD::scale
*
* Scale WERD by multiplier
**********************************************************************/
void WERD::scale( // scale WERD
const float f // by multiplier
) {
PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs);
// blob iterator
// LARC_BLOB_IT lblob_it((LARC_BLOB_LIST*)&cblobs);
if (flags.bit (W_POLYGON))
for (blob_it.mark_cycle_pt ();
!blob_it.cycled_list (); blob_it.forward ())
blob_it.data ()->scale (f);
// else if (flags.bit(W_LINEARC))
// for (lblob_it.mark_cycle_pt();
// !lblob_it.cycled_list();
// lblob_it.forward() )
// lblob_it.data()->scale( f );
else
CANT_SCALE_EDGESTEPS.error ("WERD::scale", ABORT, NULL);
}
/**********************************************************************
* WERD::join_on
*
* Join other word onto this one. Delete the old word.
**********************************************************************/
void WERD::join_on( // join WERD
WERD *&other //other word
) {
PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs);
// blob iterator
PBLOB_IT src_it ((PBLOB_LIST *) & other->cblobs);
C_BLOB_IT rej_cblob_it(&rej_cblobs);
C_BLOB_IT src_rej_it (&other->rej_cblobs);
while (!src_it.empty ()) {
blob_it.add_to_end (src_it.extract ());
src_it.forward ();
}
while (!src_rej_it.empty ()) {
rej_cblob_it.add_to_end (src_rej_it.extract ());
src_rej_it.forward ();
}
}
/**********************************************************************
* WERD::copy_on
*
* Copy blobs from other word onto this one.
**********************************************************************/
void WERD::copy_on( //copy blobs
WERD *&other //from other
) {
if (flags.bit (W_POLYGON)) {
PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs);
// blob iterator
PBLOB_LIST blobs;
blobs.deep_copy(reinterpret_cast<PBLOB_LIST*>(&other->cblobs),
&PBLOB::deep_copy);
blob_it.move_to_last();
blob_it.add_list_after(&blobs);
} else {
C_BLOB_IT c_blob_it(&cblobs);
C_BLOB_LIST c_blobs;
c_blobs.deep_copy(&other->cblobs, &C_BLOB::deep_copy);
c_blob_it.move_to_last ();
c_blob_it.add_list_after (&c_blobs);
}
if (!other->rej_cblobs.empty ()) {
C_BLOB_IT rej_c_blob_it(&rej_cblobs);
C_BLOB_LIST new_rej_c_blobs;
new_rej_c_blobs.deep_copy(&other->rej_cblobs, &C_BLOB::deep_copy);
rej_c_blob_it.move_to_last ();
rej_c_blob_it.add_list_after (&new_rej_c_blobs);
}
}
/**********************************************************************
* WERD::baseline_normalise
*
* Baseline Normalise the word in Tesseract style. (I.e origin at centre of
* word at bottom. x-height region scaled to region y =
* (bln_baseline_offset)..(bln_baseline_offset + bln_x_height)
* - usually 64..192)
**********************************************************************/
void WERD::baseline_normalise( // Tess style BL Norm
ROW *row,
DENORM *denorm //antidote
) {
baseline_normalise_x (row, row->x_height (), denorm);
//Use standard x ht
}
/**********************************************************************
* WERD::baseline_normalise_x
*
* Baseline Normalise the word in Tesseract style. (I.e origin at centre of
* word at bottom. x-height region scaled to region y =
* (bln_baseline_offset)..(bln_baseline_offset + bln_x_height)
* - usually 64..192)
* USE A SPECIFIED X-HEIGHT - NOT NECESSARILY THE ONE IN row
**********************************************************************/
void WERD::baseline_normalise_x( // Tess style BL Norm
ROW *row,
float x_height, //non standard value
DENORM *denorm //antidote
) {
BOOL8 using_row; //as baseline
float blob_x_centre; //middle of blob
float blob_offset; //bottom miss
float top_offset; //top miss
float blob_x_height; //xh for this blob
inT16 segments; //no of segments
inT16 segment; //current segment
DENORM_SEG *segs; //array of segments
float mean_x; //mean xheight
inT32 x_count; //no of xs
TBOX word_box = bounding_box ();//word bounding box
TBOX blob_box; //blob bounding box
PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs);
// blob iterator
PBLOB *blob;
LLSQ line; //fitted line
double line_m, line_c; //fitted line
//inverse norm
DENORM antidote (word_box.left () +
(word_box.right () - word_box.left ()) / 2.0,
bln_x_height / x_height, row);
if (!flags.bit (W_POLYGON)) {
WRONG_WORD.error ("WERD::baseline_normalise", ABORT,
"Need to poly approx");
}
if (flags.bit (W_NORMALIZED)) {
WRONG_WORD.error ("WERD::baseline_normalise", ABORT,
"Baseline unnormalised");
}
if (bln_numericmode) {
segs = new DENORM_SEG[blob_it.length ()];
segments = 0;
float factor; // For scaling to baseline normalised size.
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
blob_it.forward ()) {
blob = blob_it.data ();
blob_box = blob->bounding_box ();
blob->move (FCOORD (-antidote.origin (),
-blob_box.bottom ()));
factor = bln_x_height * 4.0f / (3 * blob_box.height ());
// Constrain the scale factor as target numbers should be either
// cap height already or xheight.
if (factor < antidote.scale())
factor = antidote.scale();
else if (factor > antidote.scale() * 1.5f)
factor = antidote.scale() * 1.5f;
blob->scale (factor);
blob->move (FCOORD (0.0, bln_baseline_offset));
segs[segments].xstart = blob->bounding_box().left();
segs[segments].ycoord = blob_box.bottom();
segs[segments++].scale_factor = factor;
}
antidote = DENORM (antidote.origin (), antidote.scale (),
0.0f, 0.0f, segments, segs, true, row);
delete [] segs;
//Repeat for rej blobs
blob_it.set_to_list ((PBLOB_LIST *) & rej_cblobs);
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
blob_it.forward ()) {
blob = blob_it.data ();
blob_box = blob->bounding_box ();
blob->move (FCOORD (-antidote.origin (),
-blob_box.bottom ()));
blob->scale (bln_x_height * 4.0f / (3 * blob_box.height ()));
blob->move (FCOORD (0.0, bln_baseline_offset));
}
}
else if (bln_blshift_maxshift < 0) {
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
blob_it.forward ()) {
blob = blob_it.data ();
blob_box = blob->bounding_box ();
blob_x_centre = blob_box.left () +
(blob_box.right () - blob_box.left ()) / 2.0;
blob->move (FCOORD (-antidote.origin (),
-(row->base_line (blob_x_centre))));
blob->scale (antidote.scale ());
blob->move (FCOORD (0.0, bln_baseline_offset));
}
//Repeat for rej blobs
blob_it.set_to_list ((PBLOB_LIST *) & rej_cblobs);
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
blob_it.forward ()) {
blob = blob_it.data ();
blob_box = blob->bounding_box ();
blob_x_centre = blob_box.left () +
(blob_box.right () - blob_box.left ()) / 2.0;
blob->move (FCOORD (-antidote.origin (),
-(row->base_line (blob_x_centre))));
blob->scale (antidote.scale ());
blob->move (FCOORD (0.0, bln_baseline_offset));
}
}
else {
mean_x = x_height;
x_count = 1;
segs = new DENORM_SEG[blob_it.length ()];
segments = 0;
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
blob_it.forward ()) {
blob = blob_it.data ();
blob_box = blob->bounding_box ();
if (blob_box.height () > bln_blshift_xfraction * x_height) {
blob_x_centre = blob_box.left () +
(blob_box.right () - blob_box.left ()) / 2.0;
blob_offset =
blob_box.bottom () - row->base_line (blob_x_centre);
top_offset = blob_offset + blob_box.height () - x_height - 1;
blob_x_height = top_offset + x_height;
if (top_offset < 0)
top_offset = -top_offset;
if (blob_offset < 0)
blob_offset = -blob_offset;
if (blob_offset < bln_blshift_maxshift * x_height) {
segs[segments].ycoord = blob_box.bottom ();
line.add (blob_x_centre, blob_box.bottom ());
if (top_offset < bln_blshift_maxshift * x_height) {
segs[segments].scale_factor = blob_box.height () - 1.0f;
x_count++;
}
else
segs[segments].scale_factor = 0.0f;
//fix it later
}
else {
//not a goer
segs[segments].ycoord = -MAX_INT32;
if (top_offset < bln_blshift_maxshift * x_height) {
segs[segments].scale_factor = blob_x_height;
x_count++;
}
else
segs[segments].scale_factor = 0.0f;
//fix it later
}
}
else {
segs[segments].scale_factor = 0.0f;
segs[segments].ycoord = -MAX_INT32;
}
segs[segments].xstart = blob_box.left ();
segments++;
}
using_row = line.count () <= 1;
if (!using_row) {
line_m = line.m ();
line_c = line.c (line_m);
}
else
line_m = line_c = 0;
segments = 0;
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
blob_it.forward ()) {
blob = blob_it.data ();
blob_box = blob->bounding_box ();
blob_x_centre = blob_box.left () +
(blob_box.right () - blob_box.left ()) / 2.0;
if (segs[segments].ycoord == -MAX_INT32
&& segs[segments].scale_factor != 0 && !using_row) {
blob_offset = line_m * blob_x_centre + line_c;
segs[segments].scale_factor = blob_box.top () - blob_offset;
}
if (segs[segments].scale_factor != 0)
mean_x += segs[segments].scale_factor;
segments++;
}
mean_x /= x_count;
// printf("mean x=%g, count=%d, line_m=%g, line_c=%g\n",
// mean_x,x_count,line_m,line_c);
segments = 0;
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
blob_it.forward ()) {
blob = blob_it.data ();
blob_box = blob->bounding_box ();
blob_x_centre = blob_box.left () +
(blob_box.right () - blob_box.left ()) / 2.0;
if (segs[segments].ycoord != -MAX_INT32)
blob_offset = (float) segs[segments].ycoord;
else if (using_row)
blob_offset = row->base_line (blob_x_centre);
else
blob_offset = line_m * blob_x_centre + line_c;
if (segs[segments].scale_factor == 0)
segs[segments].scale_factor = mean_x;
segs[segments].scale_factor =
bln_x_height / segs[segments].scale_factor;
// printf("Blob sf=%g, top=%d, bot=%d, base=%g\n",
// segs[segments].scale_factor,blob_box.top(),
// blob_box.bottom(),blob_offset);
blob->move (FCOORD (-antidote.origin (), -blob_offset));
blob->
scale (FCOORD (antidote.scale (), segs[segments].scale_factor));
blob->move (FCOORD (0.0, bln_baseline_offset));
segments++;
}
//Repeat for rej blobs
blob_it.set_to_list ((PBLOB_LIST *) & rej_cblobs);
segment = 0;
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
blob_it.forward ()) {
blob = blob_it.data ();
blob_box = blob->bounding_box ();
blob_x_centre = blob_box.left () +
(blob_box.right () - blob_box.left ()) / 2.0;
while (segment < segments - 1
&& segs[segment + 1].xstart <= blob_x_centre)
segment++;
if (segs[segment].ycoord != -MAX_INT32)
blob_offset = (float) segs[segment].ycoord;
else if (using_row)
blob_offset = row->base_line (blob_x_centre);
else
blob_offset = line_m * blob_x_centre + line_c;
blob->move (FCOORD (-antidote.origin (), -blob_offset));
blob->
scale (FCOORD (antidote.scale (), segs[segment].scale_factor));
blob->move (FCOORD (0.0, bln_baseline_offset));
}
if (line.count () > 0 || x_count > 1)
antidote = DENORM (antidote.origin (), antidote.scale (),
line_m, line_c, segments, segs, using_row, row);
delete[]segs;
}
if (denorm != NULL)
*denorm = antidote;
//it's normalised
flags.set_bit (W_NORMALIZED, TRUE);
}
/**********************************************************************
* WERD::baseline_denormalise
*
* Baseline DeNormalise the word in Tesseract style. (I.e origin at centre of
* word at bottom. x-height region scaled to region y =
* (bln_baseline_offset)..(bln_baseline_offset + bln_x_height)
* - usually 64..192)
**********************************************************************/
void WERD::baseline_denormalise( // Tess style BL Norm
const DENORM *denorm //antidote
) {
PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs);
// blob iterator
PBLOB *blob;
if (!flags.bit (W_NORMALIZED)) {
WRONG_WORD.error ("WERD::baseline_denormalise", ABORT,
"Baseline normalised");
}
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
blob = blob_it.data ();
//denormalise it
blob->baseline_denormalise (denorm);
}
//Repeat for rej blobs
blob_it.set_to_list ((PBLOB_LIST *) & rej_cblobs);
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
blob = blob_it.data ();
//denormalise it
blob->baseline_denormalise (denorm);
}
//it's not normalised
flags.set_bit (W_NORMALIZED, FALSE);
}
/**********************************************************************
* WERD::print
*
* Display members
**********************************************************************/
void WERD::print( //print
FILE * //file to print on
) {
tprintf ("Blanks= %d\n", blanks);
bounding_box ().print ();
tprintf ("Flags = %d = 0%o\n", flags.val, flags.val);
tprintf (" W_SEGMENTED = %s\n",
flags.bit (W_SEGMENTED) ? "TRUE" : "FALSE ");
tprintf (" W_ITALIC = %s\n", flags.bit (W_ITALIC) ? "TRUE" : "FALSE ");
tprintf (" W_BOL = %s\n", flags.bit (W_BOL) ? "TRUE" : "FALSE ");
tprintf (" W_EOL = %s\n", flags.bit (W_EOL) ? "TRUE" : "FALSE ");
tprintf (" W_NORMALIZED = %s\n",
flags.bit (W_NORMALIZED) ? "TRUE" : "FALSE ");
tprintf (" W_POLYGON = %s\n", flags.bit (W_POLYGON) ? "TRUE" : "FALSE ");
tprintf (" W_LINEARC = %s\n", flags.bit (W_LINEARC) ? "TRUE" : "FALSE ");
tprintf (" W_DONT_CHOP = %s\n",
flags.bit (W_DONT_CHOP) ? "TRUE" : "FALSE ");
tprintf (" W_REP_CHAR = %s\n",
flags.bit (W_REP_CHAR) ? "TRUE" : "FALSE ");
tprintf (" W_FUZZY_SP = %s\n",
flags.bit (W_FUZZY_SP) ? "TRUE" : "FALSE ");
tprintf (" W_FUZZY_NON = %s\n",
flags.bit (W_FUZZY_NON) ? "TRUE" : "FALSE ");
tprintf ("Correct= %s\n", correct.string ());
tprintf ("Rejected cblob count = %d\n", rej_cblobs.length ());
}
/**********************************************************************
* WERD::plot
*
* Draw the WERD in the given colour.
**********************************************************************/
#ifndef GRAPHICS_DISABLED
void WERD::plot( //draw it
ScrollView* window, //window to draw in
ScrollView::Color colour, //colour to draw in
BOOL8 solid //draw larcs solid
) {
if (flags.bit (W_POLYGON)) {
//polygons
PBLOB_IT it = (PBLOB_LIST *) (&cblobs);
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
it.data ()->plot (window, colour, colour);
}
}
// else if (flags.bit(W_LINEARC))
// {
// LARC_BLOB_IT it=(LARC_BLOB_LIST*)(&cblobs);
// for ( it.mark_cycle_pt(); !it.cycled_list(); it.forward() )
// {
// it.data()->plot(window,solid,colour,solid ? BLACK : colour);
// }
// }
else {
C_BLOB_IT it = &cblobs; //blobs of WERD
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
it.data ()->plot (window, colour, colour);
}
}
plot_rej_blobs(window, solid);
}
#endif
/**********************************************************************
* WERD::plot
*
* Draw the WERD in rainbow colours.
**********************************************************************/
#ifndef GRAPHICS_DISABLED
void WERD::plot( //draw it
ScrollView* window, //window to draw in
BOOL8 solid //draw larcs solid
) {
ScrollView::Color colour = FIRST_COLOUR; //current colour
if (flags.bit (W_POLYGON)) {
//polygons
PBLOB_IT it = (PBLOB_LIST *) (&cblobs);
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
it.data ()->plot (window, colour, CHILD_COLOUR);
colour = (ScrollView::Color) (colour + 1);
if (colour == LAST_COLOUR)
colour = FIRST_COLOUR; //cycle round
}
}
// else if (flags.bit(W_LINEARC))
// {
// LARC_BLOB_IT it=(LARC_BLOB_LIST*)(&cblobs);
// for ( it.mark_cycle_pt(); !it.cycled_list(); it.forward() )
// {
// it.data()->plot(window,solid,colour,solid ? BLACK : CHILD_COLOUR);
// colour=(COLOUR)(colour+1);
// if (colour==LAST_COLOUR)
// colour=FIRST_COLOUR;
// }
// }
else {
C_BLOB_IT it = &cblobs; //blobs of WERD
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
it.data ()->plot (window, colour, CHILD_COLOUR);
colour = (ScrollView::Color) (colour + 1);
if (colour == LAST_COLOUR)
colour = FIRST_COLOUR; //cycle round
}
}
plot_rej_blobs(window, solid);
}
#endif
/**********************************************************************
* WERD::plot_rej_blobs
*
* Draw the WERD rejected blobs - ALWAYS GREY
**********************************************************************/
#ifndef GRAPHICS_DISABLED
void WERD::plot_rej_blobs( //draw it
ScrollView* window, //window to draw in
BOOL8 solid //draw larcs solid
) {
if (flags.bit (W_POLYGON)) {
PBLOB_IT it = (PBLOB_LIST *) (&rej_cblobs);
//polygons
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
it.data ()->plot (window, ScrollView::GREY, ScrollView::GREY);
}
} else {
C_BLOB_IT it = &rej_cblobs; //blobs of WERD
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
it.data ()->plot (window, ScrollView::GREY, ScrollView::GREY);
}
}
}
#endif
/**********************************************************************
* WERD::shallow_copy()
*
* Make a shallow copy of a word
**********************************************************************/
WERD *WERD::shallow_copy() { //shallow copy
WERD *new_word = new WERD;
new_word->blanks = blanks;
new_word->flags = flags;
new_word->dummy = dummy;
new_word->correct = correct;
return new_word;
}
/**********************************************************************
* WERD::operator=
*
* Assign a word, DEEP copying the blob list
**********************************************************************/
WERD & WERD::operator= ( //assign words
const WERD & source //from this
) {
this->ELIST_LINK::operator= (source);
blanks = source.blanks;
flags = source.flags;
dummy = source.dummy;
correct = source.correct;
if (flags.bit (W_POLYGON)) {
if (!cblobs.empty())
reinterpret_cast<PBLOB_LIST*>(&cblobs)->clear();
reinterpret_cast<PBLOB_LIST*>(&cblobs)->deep_copy(
reinterpret_cast<const PBLOB_LIST*>(&source.cblobs), &PBLOB::deep_copy);
if (!rej_cblobs.empty())
reinterpret_cast<PBLOB_LIST*>(&rej_cblobs)->clear();
reinterpret_cast<PBLOB_LIST*>(&rej_cblobs)->deep_copy(
reinterpret_cast<const PBLOB_LIST*>(&source.rej_cblobs),
&PBLOB::deep_copy);
} else {
if (!cblobs.empty ())
cblobs.clear ();
cblobs.deep_copy(&source.cblobs, &C_BLOB::deep_copy);
if (!rej_cblobs.empty ())
rej_cblobs.clear ();
rej_cblobs.deep_copy(&source.rej_cblobs, &C_BLOB::deep_copy);
}
return *this;
}
/**********************************************************************
* word_comparator()
*
* word comparator used to sort a word list so that words are in increasing
* order of left edge.
**********************************************************************/
int word_comparator( //sort blobs
const void *word1p, //ptr to ptr to word1
const void *word2p //ptr to ptr to word2
) {
WERD *
word1 = *(WERD **) word1p;
WERD *
word2 = *(WERD **) word2p;
return word1->bounding_box ().left () - word2->bounding_box ().left ();
}

277
ccmain/werd.h Normal file
View File

@ -0,0 +1,277 @@
/**********************************************************************
* File: word.c
* Description: Code for the WERD class.
* Author: Ray Smith
* Created: Tue Oct 08 14:32:12 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef WERD_H
#define WERD_H
#include "varable.h"
#include "bits16.h"
#include "strngs.h"
#include "blckerr.h"
#include "stepblob.h"
#include "polyblob.h"
//#include "larcblob.h"
enum WERD_FLAGS
{
W_SEGMENTED, //correctly segmented
W_ITALIC, //italic text
W_BOLD, //bold text
W_BOL, //start of line
W_EOL, //end of line
W_NORMALIZED, //flags
W_POLYGON, //approximation
W_LINEARC, //linearc approx
W_DONT_CHOP, //fixed pitch chopped
W_REP_CHAR, //repeated character
W_FUZZY_SP, //fuzzy space
W_FUZZY_NON, //fuzzy nonspace
W_INVERSE //white on black
};
enum DISPLAY_FLAGS
{
/* Display flags bit number allocations */
DF_BOX, //Bounding box
DF_TEXT, //Correct ascii
DF_POLYGONAL, //Polyg approx
DF_EDGE_STEP, //Edge steps
DF_BN_POLYGONAL //BL normalisd polyapx
};
class ROW; //forward decl
class WERD:public ELIST_LINK
{
public:
WERD() {
} //empty constructor
WERD( //constructor
C_BLOB_LIST *blob_list, //blobs in word
uinT8 blanks, //blanks in front
const char *text); //correct text
WERD( //constructor
PBLOB_LIST *blob_list, //blobs in word
uinT8 blanks, //blanks in front
const char *text); //correct text
WERD( //constructor
PBLOB_LIST *blob_list, //blobs in word
WERD *clone); //use these flags etc.
WERD( //constructor
C_BLOB_LIST *blob_list, //blobs in word
WERD *clone); //use these flags etc.
~WERD () { //destructor
if (flags.bit (W_POLYGON)) {
//use right destructor
((PBLOB_LIST *) & cblobs)->clear ();
//use right destructor
((PBLOB_LIST *) & rej_cblobs)->clear ();
}
// else if (flags.bit(W_LINEARC))
// ((LARC_BLOB_LIST*)&cblobs)->clear(); //use right destructor
}
WERD *poly_copy( //make copy as poly
float xheight); //row xheight
WERD *larc_copy( //make copy as larc
float xheight); //row xheight
//get DUFF compact blobs
C_BLOB_LIST *rej_cblob_list() {
if (flags.bit (W_POLYGON))
WRONG_WORD.error ("WERD::rej_cblob_list", ABORT, NULL);
return &rej_cblobs;
}
//get DUFF poly blobs
PBLOB_LIST *rej_blob_list() {
if (!flags.bit (W_POLYGON))
WRONG_WORD.error ("WERD::rej_blob_list", ABORT, NULL);
return (PBLOB_LIST *) (&rej_cblobs);
}
C_BLOB_LIST *cblob_list() { //get compact blobs
if (flags.bit (W_POLYGON) || flags.bit (W_LINEARC))
WRONG_WORD.error ("WERD::cblob_list", ABORT, NULL);
return &cblobs;
}
PBLOB_LIST *blob_list() { //get poly blobs
if (!flags.bit (W_POLYGON))
WRONG_WORD.error ("WERD::blob_list", ABORT, NULL);
//make it right type
return (PBLOB_LIST *) (&cblobs);
}
// LARC_BLOB_LIST *larc_blob_list() //get poly blobs
// {
// if (!flags.bit(W_LINEARC))
// WRONG_WORD.error("WERD::larc_blob_list",ABORT,NULL);
// return (LARC_BLOB_LIST*)(&cblobs); //make it right type
// }
PBLOB_LIST *gblob_list() { //get generic blobs
//make it right type
return (PBLOB_LIST *) (&cblobs);
}
const char *text() const { //correct text
return correct.string ();
}
uinT8 space() { //access function
return blanks;
}
void set_blanks( //set blanks
uinT8 new_blanks) {
blanks = new_blanks;
}
void set_text( //replace correct text
const char *new_text) { //with this
correct = new_text;
}
TBOX bounding_box(); //compute bounding box
BOOL8 flag( //test flag
WERD_FLAGS mask) const { //flag to test
return flags.bit (mask);
}
void set_flag( //set flag value
WERD_FLAGS mask, //flag to test
BOOL8 value) { //value to set
flags.set_bit (mask, value);
}
BOOL8 display_flag( //test display flag
uinT8 flag) const { //flag to test
return disp_flags.bit (flag);
}
void set_display_flag( //set display flag
uinT8 flag, //flag to set
BOOL8 value) { //value to set
disp_flags.set_bit (flag, value);
}
WERD *shallow_copy(); //shallow copy word
void move( // reposition word
const ICOORD vec); // by vector
void scale( // scale word
const float vec); // by multiplier
void join_on( //append word
WERD *&other); //Deleting other
void copy_on( //copy blobs
WERD *&other); //from other
void baseline_normalise ( // Tess style BL Norm
//optional antidote
ROW * row, DENORM * denorm = NULL);
void baseline_normalise_x ( //Use non standard xht
ROW * row, float x_height, //Weird value to use
DENORM * denorm = NULL); //optional antidote
void baseline_denormalise( //un-normalise
const DENORM *denorm);
void print( //print
FILE *fp); //file to print on
void plot ( //draw one
ScrollView* window, //window to draw in
//uniform colour
ScrollView::Color colour, BOOL8 solid = FALSE);
void plot ( //draw one
//in rainbow colours
ScrollView* window, BOOL8 solid = FALSE);
void plot_rej_blobs ( //draw one
//in rainbow colours
ScrollView* window, BOOL8 solid = FALSE);
WERD & operator= ( //assign words
const WERD & source); //from this
void prep_serialise() { //set ptrs to counts
correct.prep_serialise ();
if (flags.bit (W_POLYGON))
((PBLOB_LIST *) (&cblobs))->prep_serialise ();
// else if (flags.bit(W_LINEARC))
// ((LARC_BLOB_LIST*)(&cblobs))->prep_serialise();
else
cblobs.prep_serialise ();
rej_cblobs.prep_serialise ();
}
void dump( //write external bits
FILE *f) {
correct.dump (f);
if (flags.bit (W_POLYGON))
((PBLOB_LIST *) (&cblobs))->dump (f);
// else if (flags.bit(W_LINEARC))
// ((LARC_BLOB_LIST*)(&cblobs))->dump( f );
else
cblobs.dump (f);
rej_cblobs.dump (f);
}
void de_dump( //read external bits
FILE *f) {
correct.de_dump (f);
if (flags.bit (W_POLYGON))
((PBLOB_LIST *) (&cblobs))->de_dump (f);
// else if (flags.bit(W_LINEARC))
// ((LARC_BLOB_LIST*)(&cblobs))->de_dump( f );
else
cblobs.de_dump (f);
rej_cblobs.de_dump (f);
}
make_serialise (WERD) private:
uinT8 blanks; //no of blanks
uinT8 dummy; //padding
BITS16 flags; //flags about word
BITS16 disp_flags; //display flags
inT16 dummy2; //padding
STRING correct; //correct text
C_BLOB_LIST cblobs; //compacted blobs
C_BLOB_LIST rej_cblobs; //DUFF blobs
};
ELISTIZEH_S (WERD)
#include "ocrrow.h" //placed here due to
extern BOOL_VAR_H (bln_numericmode, 0, "Optimize for numbers");
extern INT_VAR_H (bln_x_height, 128, "Baseline Normalisation X-height");
extern INT_VAR_H (bln_baseline_offset, 64,
"Baseline Norm. offset of baseline");
//void poly_linearc_outlines( //do list of outlines
//LARC_OUTLINE_LIST *srclist, //list to convert
//OUTLINE_LIST *destlist //desstination list
//);
//OUTLINE *poly_larcline( //draw it
//LARC_OUTLINE *srcline //one to approximate
//);
int word_comparator( //sort blobs
const void *word1p, //ptr to ptr to word1
const void *word2p //ptr to ptr to word2
);
#endif