diff --git a/ccmain/Makefile b/ccmain/Makefile new file mode 100644 index 000000000..fee505fdc --- /dev/null +++ b/ccmain/Makefile @@ -0,0 +1,650 @@ +# Makefile.in generated by automake 1.10.1 from Makefile.am. +# ccmain/Makefile. Generated from Makefile.in by configure. + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + + + + + +pkgdatadir = $(datadir)/tesseract +pkglibdir = $(libdir)/tesseract +pkgincludedir = $(includedir)/tesseract +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = x86_64-unknown-linux-gnu +host_triplet = x86_64-unknown-linux-gnu +subdir = ccmain +DIST_COMMON = $(include_HEADERS) $(srcdir)/Makefile.am \ + $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(SHELL) $(top_srcdir)/config/mkinstalldirs +CONFIG_HEADER = $(top_builddir)/config_auto.h +CONFIG_CLEAN_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = `echo $$p | sed -e 's|^.*/||'`; +am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" +libLIBRARIES_INSTALL = $(INSTALL_DATA) +LIBRARIES = $(lib_LIBRARIES) +AR = ar +ARFLAGS = cru +libtesseract_main_a_AR = $(AR) $(ARFLAGS) +libtesseract_main_a_LIBADD = +am_libtesseract_main_a_OBJECTS = adaptions.$(OBJEXT) \ + ambigsrecog.$(OBJEXT) applybox.$(OBJEXT) blobcmp.$(OBJEXT) \ + callnet.$(OBJEXT) charcut.$(OBJEXT) charsample.$(OBJEXT) \ + control.$(OBJEXT) docqual.$(OBJEXT) expandblob.$(OBJEXT) \ + fixspace.$(OBJEXT) fixxht.$(OBJEXT) imgscale.$(OBJEXT) \ + matmatch.$(OBJEXT) osdetect.$(OBJEXT) output.$(OBJEXT) \ + pagewalk.$(OBJEXT) paircmp.$(OBJEXT) pgedit.$(OBJEXT) \ + reject.$(OBJEXT) scaleimg.$(OBJEXT) tessbox.$(OBJEXT) \ + tessedit.$(OBJEXT) tesseractclass.$(OBJEXT) tessvars.$(OBJEXT) \ + tfacepp.$(OBJEXT) thresholder.$(OBJEXT) tstruct.$(OBJEXT) \ + varabled.$(OBJEXT) werdit.$(OBJEXT) +libtesseract_main_a_OBJECTS = $(am_libtesseract_main_a_OBJECTS) +DEFAULT_INCLUDES = -I. -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/config/depcomp +am__depfiles_maybe = depfiles +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +CXXLD = $(CXX) +CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ + -o $@ +SOURCES = $(libtesseract_main_a_SOURCES) +DIST_SOURCES = $(libtesseract_main_a_SOURCES) +RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ + html-recursive info-recursive install-data-recursive \ + install-dvi-recursive install-exec-recursive \ + install-html-recursive install-info-recursive \ + install-pdf-recursive install-ps-recursive install-recursive \ + installcheck-recursive installdirs-recursive pdf-recursive \ + ps-recursive uninstall-recursive +includeHEADERS_INSTALL = $(INSTALL_HEADER) +HEADERS = $(include_HEADERS) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = aclocal-1.10 +AMTAR = tar +AUTOCONF = autoconf +AUTOHEADER = autoheader +AUTOMAKE = automake-1.10 +AWK = gawk +CC = gcc +CCDEPMODE = depmode=gcc3 +CFLAGS = -g -O2 +CPP = gcc -E +CPPFLAGS = -I/usr/local/include/liblept +CXX = g++ +CXXCPP = g++ -E +CXXDEPMODE = depmode=gcc3 +CXXFLAGS = -g -O2 +CYGPATH_W = echo +DEFS = -DHAVE_CONFIG_H +DEPDIR = .deps +ECHO_C = +ECHO_N = -n +ECHO_T = +EGREP = /bin/grep -E +EXEEXT = +GREP = /bin/grep +INSTALL = /usr/bin/install -c +INSTALL_DATA = ${INSTALL} -m 644 +INSTALL_PROGRAM = ${INSTALL} +INSTALL_SCRIPT = ${INSTALL} +INSTALL_STRIP_PROGRAM = $(install_sh) -c -s +LDFLAGS = +LIBOBJS = +LIBS = -llept -ltiff -lpthread -ljpeg -lpng -lz -lm +LIBTIFF_CFLAGS = +LIBTIFF_LIBS = -ltiff +LTLIBOBJS = +MAINT = # +MAKEINFO = makeinfo +MKDIR_P = /bin/mkdir -p +OBJEXT = o +PACKAGE = tesseract +PACKAGE_BUGREPORT = theraysmith@gmail.com +PACKAGE_DATE = 05/29 +PACKAGE_NAME = tesseract +PACKAGE_STRING = tesseract 3.00 +PACKAGE_TARNAME = tesseract +PACKAGE_VERSION = 3.00 +PACKAGE_YEAR = 2009 +PATH_SEPARATOR = : +RANLIB = ranlib +SET_MAKE = +SHELL = /bin/sh +STRIP = +VERSION = 3.00 +abs_builddir = /home/rays/src/opensrc/tesseract-3.00.src/ccmain +abs_srcdir = /home/rays/src/opensrc/tesseract-3.00.src/ccmain +abs_top_builddir = /home/rays/src/opensrc/tesseract-3.00.src +abs_top_srcdir = /home/rays/src/opensrc/tesseract-3.00.src +ac_ct_CC = gcc +ac_ct_CXX = g++ +am__include = include +am__leading_dot = . +am__quote = +am__tar = ${AMTAR} chof - "$$tardir" +am__untar = ${AMTAR} xf - +bindir = ${exec_prefix}/bin +build = x86_64-unknown-linux-gnu +build_alias = +build_cpu = x86_64 +build_os = linux-gnu +build_vendor = unknown +builddir = . +datadir = ${datarootdir} +datarootdir = ${prefix}/share +docdir = ${datarootdir}/doc/${PACKAGE_TARNAME} +dvidir = ${docdir} +exec_prefix = ${prefix} +host = x86_64-unknown-linux-gnu +host_alias = +host_cpu = x86_64 +host_os = linux-gnu +host_vendor = unknown +htmldir = ${docdir} +includedir = ${prefix}/include/tesseract +infodir = ${datarootdir}/info +install_sh = $(SHELL) /home/rays/src/opensrc/tesseract-3.00.src/config/install-sh +libdir = ${exec_prefix}/lib +libexecdir = ${exec_prefix}/libexec +localedir = ${datarootdir}/locale +localstatedir = ${prefix}/var +mandir = ${datarootdir}/man +mkdir_p = /bin/mkdir -p +oldincludedir = /usr/include +pdfdir = ${docdir} +prefix = /usr/local +program_transform_name = s,x,x, +psdir = ${docdir} +sbindir = ${exec_prefix}/sbin +sharedstatedir = ${prefix}/com +srcdir = . +sysconfdir = ${prefix}/etc +target_alias = +top_builddir = .. +top_srcdir = .. +SUBDIRS = +AM_CPPFLAGS = \ + -I$(top_srcdir)/ccutil -I$(top_srcdir)/ccstruct \ + -I$(top_srcdir)/image -I$(top_srcdir)/viewer \ + -I$(top_srcdir)/ccops -I$(top_srcdir)/dict \ + -I$(top_srcdir)/classify \ + -I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil \ + -I$(top_srcdir)/textord + +EXTRA_DIST = tessembedded.cpp ccmain.vcproj +include_HEADERS = \ + adaptions.h applybox.h blobcmp.h \ + callnet.h charcut.h charsample.h control.h \ + docqual.h expandblob.h fixspace.h fixxht.h \ + imgscale.h matmatch.h osdetect.h output.h \ + pagewalk.h paircmp.h pgedit.h reject.h scaleimg.h \ + tessbox.h tessedit.h tessembedded.h tesseractclass.h \ + tessio.h tessvars.h tfacep.h tfacepp.h thresholder.h tstruct.h \ + varabled.h werdit.h + +lib_LIBRARIES = libtesseract_main.a +libtesseract_main_a_SOURCES = \ + adaptions.cpp ambigsrecog.cpp applybox.cpp \ + blobcmp.cpp \ + callnet.cpp charcut.cpp charsample.cpp control.cpp \ + docqual.cpp expandblob.cpp fixspace.cpp fixxht.cpp \ + imgscale.cpp matmatch.cpp osdetect.cpp output.cpp \ + pagewalk.cpp paircmp.cpp pgedit.cpp reject.cpp scaleimg.cpp \ + tessbox.cpp tessedit.cpp tesseractclass.cpp tessvars.cpp \ + tfacepp.cpp thresholder.cpp tstruct.cpp \ + varabled.cpp werdit.cpp + +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .cpp .o .obj +$(srcdir)/Makefile.in: # $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu ccmain/Makefile'; \ + cd $(top_srcdir) && \ + $(AUTOMAKE) --gnu ccmain/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: # $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): # $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +install-libLIBRARIES: $(lib_LIBRARIES) + @$(NORMAL_INSTALL) + test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)" + @list='$(lib_LIBRARIES)'; for p in $$list; do \ + if test -f $$p; then \ + f=$(am__strip_dir) \ + echo " $(libLIBRARIES_INSTALL) '$$p' '$(DESTDIR)$(libdir)/$$f'"; \ + $(libLIBRARIES_INSTALL) "$$p" "$(DESTDIR)$(libdir)/$$f"; \ + else :; fi; \ + done + @$(POST_INSTALL) + @list='$(lib_LIBRARIES)'; for p in $$list; do \ + if test -f $$p; then \ + p=$(am__strip_dir) \ + echo " $(RANLIB) '$(DESTDIR)$(libdir)/$$p'"; \ + $(RANLIB) "$(DESTDIR)$(libdir)/$$p"; \ + else :; fi; \ + done + +uninstall-libLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LIBRARIES)'; for p in $$list; do \ + p=$(am__strip_dir) \ + echo " rm -f '$(DESTDIR)$(libdir)/$$p'"; \ + rm -f "$(DESTDIR)$(libdir)/$$p"; \ + done + +clean-libLIBRARIES: + -test -z "$(lib_LIBRARIES)" || rm -f $(lib_LIBRARIES) +libtesseract_main.a: $(libtesseract_main_a_OBJECTS) $(libtesseract_main_a_DEPENDENCIES) + -rm -f libtesseract_main.a + $(libtesseract_main_a_AR) libtesseract_main.a $(libtesseract_main_a_OBJECTS) $(libtesseract_main_a_LIBADD) + $(RANLIB) libtesseract_main.a + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +include ./$(DEPDIR)/adaptions.Po +include ./$(DEPDIR)/ambigsrecog.Po +include ./$(DEPDIR)/applybox.Po +include ./$(DEPDIR)/blobcmp.Po +include ./$(DEPDIR)/callnet.Po +include ./$(DEPDIR)/charcut.Po +include ./$(DEPDIR)/charsample.Po +include ./$(DEPDIR)/control.Po +include ./$(DEPDIR)/docqual.Po +include ./$(DEPDIR)/expandblob.Po +include ./$(DEPDIR)/fixspace.Po +include ./$(DEPDIR)/fixxht.Po +include ./$(DEPDIR)/imgscale.Po +include ./$(DEPDIR)/matmatch.Po +include ./$(DEPDIR)/osdetect.Po +include ./$(DEPDIR)/output.Po +include ./$(DEPDIR)/pagewalk.Po +include ./$(DEPDIR)/paircmp.Po +include ./$(DEPDIR)/pgedit.Po +include ./$(DEPDIR)/reject.Po +include ./$(DEPDIR)/scaleimg.Po +include ./$(DEPDIR)/tessbox.Po +include ./$(DEPDIR)/tessedit.Po +include ./$(DEPDIR)/tesseractclass.Po +include ./$(DEPDIR)/tessvars.Po +include ./$(DEPDIR)/tfacepp.Po +include ./$(DEPDIR)/thresholder.Po +include ./$(DEPDIR)/tstruct.Po +include ./$(DEPDIR)/varabled.Po +include ./$(DEPDIR)/werdit.Po + +.cpp.o: + $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< + mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +# source='$<' object='$@' libtool=no \ +# DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) \ +# $(CXXCOMPILE) -c -o $@ $< + +.cpp.obj: + $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` + mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +# source='$<' object='$@' libtool=no \ +# DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) \ +# $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` +install-includeHEADERS: $(include_HEADERS) + @$(NORMAL_INSTALL) + test -z "$(includedir)" || $(MKDIR_P) "$(DESTDIR)$(includedir)" + @list='$(include_HEADERS)'; for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + f=$(am__strip_dir) \ + echo " $(includeHEADERS_INSTALL) '$$d$$p' '$(DESTDIR)$(includedir)/$$f'"; \ + $(includeHEADERS_INSTALL) "$$d$$p" "$(DESTDIR)$(includedir)/$$f"; \ + done + +uninstall-includeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(include_HEADERS)'; for p in $$list; do \ + f=$(am__strip_dir) \ + echo " rm -f '$(DESTDIR)$(includedir)/$$f'"; \ + rm -f "$(DESTDIR)$(includedir)/$$f"; \ + done + +# This directory's subdirectories are mostly independent; you can cd +# into them and run `make' without going through this Makefile. +# To change the values of `make' variables: instead of editing Makefiles, +# (1) if the variable is set in `config.status', edit `config.status' +# (which will cause the Makefiles to be regenerated when you run `make'); +# (2) otherwise, pass the desired values on the `make' command line. +$(RECURSIVE_TARGETS): + @failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +$(RECURSIVE_CLEAN_TARGETS): + @failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + rev=''; for subdir in $$list; do \ + if test "$$subdir" = "."; then :; else \ + rev="$$subdir $$rev"; \ + fi; \ + done; \ + rev="$$rev ."; \ + target=`echo $@ | sed s/-recursive//`; \ + for subdir in $$rev; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done && test -z "$$fail" +tags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ + done +ctags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ + done + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + tags="$$tags $$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique; \ + fi +ctags: CTAGS +CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$tags$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done + list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + distdir=`$(am__cd) $(distdir) && pwd`; \ + top_distdir=`$(am__cd) $(top_distdir) && pwd`; \ + (cd $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$top_distdir" \ + distdir="$$distdir/$$subdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(LIBRARIES) $(HEADERS) +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libLIBRARIES mostlyclean-am + +distclean: distclean-recursive + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +info: info-recursive + +info-am: + +install-data-am: install-includeHEADERS + +install-dvi: install-dvi-recursive + +install-exec-am: install-libLIBRARIES + +install-html: install-html-recursive + +install-info: install-info-recursive + +install-man: + +install-pdf: install-pdf-recursive + +install-ps: install-ps-recursive + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-includeHEADERS uninstall-libLIBRARIES + +.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) install-am \ + install-strip + +.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ + all all-am check check-am clean clean-generic \ + clean-libLIBRARIES ctags ctags-recursive distclean \ + distclean-compile distclean-generic distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-includeHEADERS install-info install-info-am \ + install-libLIBRARIES install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic pdf pdf-am ps ps-am tags tags-recursive \ + uninstall uninstall-am uninstall-includeHEADERS \ + uninstall-libLIBRARIES + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/ccmain/ambigsrecog.cpp b/ccmain/ambigsrecog.cpp new file mode 100644 index 000000000..9ffa2555d --- /dev/null +++ b/ccmain/ambigsrecog.cpp @@ -0,0 +1,179 @@ +/////////////////////////////////////////////////////////////////////// +// File: genericvector.h +// Description: Functions for producing classifications +// for the input to ambigstraining. +// Author: Daria Antonova +// Created: Mon Jun 23 11:26:43 PDT 2008 +// +// (C) Copyright 2007, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "ambigs.h" + +#include "applybox.h" +#include "boxread.h" +#include "control.h" +#include "permute.h" +#include "ratngs.h" +#include "reject.h" +#include "stopper.h" +#include "tesseractclass.h" + +namespace tesseract { + +// Sets flags necessary for ambigs training mode. +// Opens and returns the pointer to the output file. +FILE *Tesseract::init_ambigs_training(const STRING &fname) { + permute_only_top = 1; // use only top choice permuter + tessedit_tess_adaption_mode.set_value(0); // turn off adaption + tessedit_ok_mode.set_value(0); // turn off context checking + tessedit_enable_doc_dict.set_value(0); // turn off document dictionary + save_best_choices.set_value(1); // save individual char choices + stopper_no_acceptable_choices.set_value(1); // explore all segmentations + save_raw_choices.set_value(1); // save raw choices + + // Open ambigs output file. + STRING output_fname = fname; + const char *lastdot = strrchr(output_fname.string(), '.'); + if (lastdot != NULL) { + output_fname[lastdot - output_fname.string()] = '\0'; + } + output_fname += ".txt"; + FILE *output_file; + if (!(output_file = fopen(output_fname.string(), "a+"))) { + CANTOPENFILE.error("ambigs_training", EXIT, + "Can't open box file %s\n", output_fname.string()); + } + return output_file; +} + +// This function takes tif/box pair of files and runs recognition on the image, +// while making sure that the word bounds that tesseract identified roughly +// match to those specified by the input box file. For each word (ngram in a +// single bounding box from the input box file) it outputs the ocred result, +// the correct label, rating and certainty. +void Tesseract::ambigs_training_segmented(const STRING &fname, + PAGE_RES *page_res, + volatile ETEXT_DESC *monitor, + FILE *output_file) { + STRING box_fname = fname; + const char *lastdot = strrchr(box_fname.string(), '.'); + if (lastdot != NULL) { + box_fname[lastdot - box_fname.string()] = '\0'; + } + box_fname += ".box"; + FILE *box_file; + if (!(box_file = fopen(box_fname.string(), "r"))) { + CANTOPENFILE.error("ambigs_training", EXIT, + "Can't open box file %s\n", box_fname.string()); + } + + static PAGE_RES_IT page_res_it; + page_res_it.page_res = page_res; + page_res_it.restart_page(); + int x_min, y_min, x_max, y_max; + char label[UNICHAR_LEN * 10]; + + // Process all the words on this page. + while (page_res_it.word() != NULL && + read_next_box(applybox_page, box_file, label, + &x_min, &y_min, &x_max, &y_max)) { + // Init bounding box of the current word bounding box and from box file. + TBOX box = TBOX(ICOORD(x_min, y_min), ICOORD(x_max, y_max)); + TBOX word_box(page_res_it.word()->word->bounding_box()); + bool one_word = true; + // Check whether the bounding box of the next word overlaps with the + // current box from box file. + while (page_res_it.next_word() != NULL && + box.x_overlap(page_res_it.next_word()->word->bounding_box())) { + word_box = word_box.bounding_union( + page_res_it.next_word()->word->bounding_box()); + page_res_it.forward(); + one_word = false; + } + if (!word_box.major_overlap(box)) { + if (!word_box.x_overlap(box)) { + // We must be looking at the word that belongs in the "next" bounding + // box from the box file. The ngram that was supposed to appear in + // the current box read from the box file must have been dropped by + // tesseract as noise. + tprintf("Word %s was dropped as noise.\n", label); + continue; // stay on this blob, but read next box from box file + } else { + tprintf("Error: Insufficient overlap for word box" + " and box from file for %s\n", label); + word_box.print(); + box.print(); + exit(1); + } + } + // Skip recognizing the ngram if tesseract is sure it's not + // one word, otherwise run one recognition pass on this word. + if (!one_word) { + tprintf("Tesseract segmented %s as multiple words\n", label); + } else { + ambigs_classify_and_output(&page_res_it, label, output_file); + } + page_res_it.forward(); + } + fclose(box_file); +} + +// Run classify_word_pass1() on the current word. Output tesseract's raw choice +// as a result of the classification. For words labeled with a single unichar +// also output all alternatives from blob_choices of the best choice. +void Tesseract::ambigs_classify_and_output(PAGE_RES_IT *page_res_it, + const char *label, + FILE *output_file) { + int offset; + // Classify word. + classify_word_pass1(page_res_it->word(), page_res_it->row()->row, + page_res_it->block()->block, + FALSE, NULL, NULL); + WERD_CHOICE *best_choice = page_res_it->word()->best_choice; + ASSERT_HOST(best_choice != NULL); + ASSERT_HOST(best_choice->blob_choices() != NULL); + + // Compute the number of unichars in the label. + int label_num_unichars = 0; + int step = 1; // should be non-zero on the first iteration + for (offset = 0; label[offset] != '\0' && step > 0; + step = getDict().getUnicharset().step(label + offset), + offset += step, ++label_num_unichars); + if (step == 0) { + tprintf("Not outputting illegal unichar %s\n", label); + return; + } + + // Output all classifier choices for the unigrams (1-1 classifications). + if (label_num_unichars == 1 && best_choice->blob_choices()->length() == 1) { + BLOB_CHOICE_LIST_C_IT outer_blob_choice_it; + outer_blob_choice_it.set_to_list(best_choice->blob_choices()); + BLOB_CHOICE_IT blob_choice_it; + blob_choice_it.set_to_list(outer_blob_choice_it.data()); + for (blob_choice_it.mark_cycle_pt(); + !blob_choice_it.cycled_list(); + blob_choice_it.forward()) { + BLOB_CHOICE *blob_choice = blob_choice_it.data(); + if (blob_choice->unichar_id() != INVALID_UNICHAR_ID) { + fprintf(output_file, "%s\t%s\t%.4f\t%.4f\n", + unicharset.id_to_unichar(blob_choice->unichar_id()), + label, blob_choice->rating(), blob_choice->certainty()); + } + } + } + // Output the raw choice for succesful non 1-1 classifications. + getDict().PrintAmbigAlternatives(output_file, label, label_num_unichars); +} + +} // namespace tesseract diff --git a/ccmain/blckerr.h b/ccmain/blckerr.h new file mode 100644 index 000000000..e30616398 --- /dev/null +++ b/ccmain/blckerr.h @@ -0,0 +1,29 @@ +/********************************************************************** + * File: blckerr.h (Formerly blockerr.h) + * Description: Error codes for the page block classes. + * Author: Ray Smith + * Created: Tue Mar 19 17:43:30 GMT 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef BLCKERR_H +#define BLCKERR_H + +#include "errcode.h" + +const ERRCODE BADBLOCKLINE = "Y coordinate in block out of bounds"; +const ERRCODE LOSTBLOCKLINE = "Can't find rectangle for line"; +const ERRCODE ILLEGAL_GRADIENT = "Gradient wrong side of edge step!"; +const ERRCODE WRONG_WORD = "Word doesn't have blobs of that type"; +#endif diff --git a/ccmain/blobbox.cpp b/ccmain/blobbox.cpp new file mode 100644 index 000000000..13de82741 --- /dev/null +++ b/ccmain/blobbox.cpp @@ -0,0 +1,805 @@ +/********************************************************************** + * File: blobbox.cpp (Formerly blobnbox.c) + * Description: Code for the textord blob class. + * Author: Ray Smith + * Created: Thu Jul 30 09:08:51 BST 1992 + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" +#include "blobbox.h" + +#define PROJECTION_MARGIN 10 //arbitrary +#define EXTERN + +EXTERN double_VAR (textord_error_weight, 3, +"Weighting for error in believability"); +EXTERN BOOL_VAR (pitsync_projection_fix, TRUE, +"Fix bug in projection profile"); + +ELISTIZE (BLOBNBOX) ELIST2IZE (TO_ROW) ELISTIZE (TO_BLOCK) +/********************************************************************** + * BLOBNBOX::merge + * + * Merge this blob with the given blob, which should be after this. + **********************************************************************/ +void BLOBNBOX::merge( //merge blobs + BLOBNBOX *nextblob //blob to join with + ) { + box += nextblob->box; //merge boxes + nextblob->joined = TRUE; +} + + +/********************************************************************** + * BLOBNBOX::chop + * + * Chop this blob into equal sized pieces using the x height as a guide. + * The blob is not actually chopped. Instead, fake blobs are inserted + * with the relevant bounding boxes. + **********************************************************************/ + +void BLOBNBOX::chop( //chop blobs + BLOBNBOX_IT *start_it, //location of this + BLOBNBOX_IT *end_it, //iterator + FCOORD rotation, //for landscape + float xheight //of line + ) { + inT16 blobcount; //no of blobs + BLOBNBOX *newblob; //fake blob + BLOBNBOX *blob; //current blob + inT16 blobindex; //number of chop + inT16 leftx; //left edge of blob + float blobwidth; //width of each + float rightx; //right edge to scan + float ymin, ymax; //limits of new blob + float test_ymin, test_ymax; //limits of part blob + ICOORD bl, tr; //corners of box + BLOBNBOX_IT blob_it; //blob iterator + + //get no of chops + blobcount = (inT16) floor (box.width () / xheight); + if (blobcount > 1 && (blob_ptr != NULL || cblob_ptr != NULL)) { + //width of each + blobwidth = (float) (box.width () + 1) / blobcount; + for (blobindex = blobcount - 1, rightx = box.right (); + blobindex >= 0; blobindex--, rightx -= blobwidth) { + ymin = (float) MAX_INT32; + ymax = (float) -MAX_INT32; + blob_it = *start_it; + do { + blob = blob_it.data (); + if (blob->blob_ptr != NULL) + find_blob_limits (blob->blob_ptr, rightx - blobwidth, rightx, + rotation, test_ymin, test_ymax); + else + find_cblob_vlimits (blob->cblob_ptr, rightx - blobwidth, + rightx, + /*rotation, */ test_ymin, test_ymax); + blob_it.forward (); + if (test_ymin < ymin) + ymin = test_ymin; + if (test_ymax > ymax) + ymax = test_ymax; + } + while (blob != end_it->data ()); + if (ymin < ymax) { + leftx = (inT16) floor (rightx - blobwidth); + if (leftx < box.left ()) + leftx = box.left (); //clip to real box + bl = ICOORD (leftx, (inT16) floor (ymin)); + tr = ICOORD ((inT16) ceil (rightx), (inT16) ceil (ymax)); + if (blobindex == 0) + box = TBOX (bl, tr); //change box + else { + newblob = new BLOBNBOX; + //box is all it has + newblob->box = TBOX (bl, tr); + //stay on current + end_it->add_after_stay_put (newblob); + } + } + } + } +} + + +/********************************************************************** + * find_blob_limits + * + * Scan the outlines of the blob to locate the y min and max + * between the given x limits. + **********************************************************************/ + +void find_blob_limits( //get y limits + PBLOB *blob, //blob to search + float leftx, //x limits + float rightx, + FCOORD rotation, //for landscape + float &ymin, //output y limits + float &ymax) { + float testy; //y intercept + FCOORD pos; //rotated + FCOORD vec; + POLYPT *polypt; //current point + //outlines + OUTLINE_IT out_it = blob->out_list (); + POLYPT_IT poly_it; //outline pts + + ymin = (float) MAX_INT32; + ymax = (float) -MAX_INT32; + for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { + //get points + poly_it.set_to_list (out_it.data ()->polypts ()); + for (poly_it.mark_cycle_pt (); !poly_it.cycled_list (); + poly_it.forward ()) { + polypt = poly_it.data (); + pos = polypt->pos; + pos.rotate (rotation); + vec = polypt->vec; + vec.rotate (rotation); + if ((pos.x () < leftx && pos.x () + vec.x () > leftx) + || (pos.x () > leftx && pos.x () + vec.x () < leftx)) { + testy = pos.y () + vec.y () * (leftx - pos.x ()) / vec.x (); + //intercept of boundary + if (testy < ymin) + ymin = testy; + if (testy > ymax) + ymax = testy; + } + if (pos.x () >= leftx && pos.x () <= rightx) { + if (pos.y () > ymax) + ymax = pos.y (); + if (pos.y () < ymin) + ymin = pos.y (); + } + if ((pos.x () > rightx && pos.x () + vec.x () < rightx) + || (pos.x () < rightx && pos.x () + vec.x () > rightx)) { + testy = pos.y () + vec.y () * (rightx - pos.x ()) / vec.x (); + //intercept of boundary + if (testy < ymin) + ymin = testy; + if (testy > ymax) + ymax = testy; + } + } + } +} + + +/********************************************************************** + * find_cblob_limits + * + * Scan the outlines of the cblob to locate the y min and max + * between the given x limits. + **********************************************************************/ + +void find_cblob_limits( //get y limits + C_BLOB *blob, //blob to search + float leftx, //x limits + float rightx, + FCOORD rotation, //for landscape + float &ymin, //output y limits + float &ymax) { + inT16 stepindex; //current point + ICOORD pos; //current coords + ICOORD vec; //rotated step + C_OUTLINE *outline; //current outline + //outlines + C_OUTLINE_IT out_it = blob->out_list (); + + ymin = (float) MAX_INT32; + ymax = (float) -MAX_INT32; + for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { + outline = out_it.data (); + pos = outline->start_pos (); //get coords + pos.rotate (rotation); + for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) { + //inside + if (pos.x () >= leftx && pos.x () <= rightx) { + if (pos.y () > ymax) + ymax = pos.y (); + if (pos.y () < ymin) + ymin = pos.y (); + } + vec = outline->step (stepindex); + vec.rotate (rotation); + pos += vec; //move to next + } + } +} + + +/********************************************************************** + * find_cblob_vlimits + * + * Scan the outlines of the cblob to locate the y min and max + * between the given x limits. + **********************************************************************/ + +void find_cblob_vlimits( //get y limits + C_BLOB *blob, //blob to search + float leftx, //x limits + float rightx, + float &ymin, //output y limits + float &ymax) { + inT16 stepindex; //current point + ICOORD pos; //current coords + ICOORD vec; //rotated step + C_OUTLINE *outline; //current outline + //outlines + C_OUTLINE_IT out_it = blob->out_list (); + + ymin = (float) MAX_INT32; + ymax = (float) -MAX_INT32; + for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { + outline = out_it.data (); + pos = outline->start_pos (); //get coords + for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) { + //inside + if (pos.x () >= leftx && pos.x () <= rightx) { + if (pos.y () > ymax) + ymax = pos.y (); + if (pos.y () < ymin) + ymin = pos.y (); + } + vec = outline->step (stepindex); + pos += vec; //move to next + } + } +} + + +/********************************************************************** + * find_cblob_hlimits + * + * Scan the outlines of the cblob to locate the x min and max + * between the given y limits. + **********************************************************************/ + +void find_cblob_hlimits( //get x limits + C_BLOB *blob, //blob to search + float bottomy, //y limits + float topy, + float &xmin, //output x limits + float &xmax) { + inT16 stepindex; //current point + ICOORD pos; //current coords + ICOORD vec; //rotated step + C_OUTLINE *outline; //current outline + //outlines + C_OUTLINE_IT out_it = blob->out_list (); + + xmin = (float) MAX_INT32; + xmax = (float) -MAX_INT32; + for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { + outline = out_it.data (); + pos = outline->start_pos (); //get coords + for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) { + //inside + if (pos.y () >= bottomy && pos.y () <= topy) { + if (pos.x () > xmax) + xmax = pos.x (); + if (pos.x () < xmin) + xmin = pos.x (); + } + vec = outline->step (stepindex); + pos += vec; //move to next + } + } +} + + +/********************************************************************** + * rotate_blob + * + * Poly copy the blob and rotate the copy by the given vector. + **********************************************************************/ + +PBLOB *rotate_blob( //get y limits + PBLOB *blob, //blob to search + FCOORD rotation //vector to rotate by + ) { + PBLOB *copy; //copy of blob + POLYPT *polypt; //current point + OUTLINE_IT out_it; + POLYPT_IT poly_it; //outline pts + + copy = new PBLOB; + *copy = *blob; //deep copy + out_it.set_to_list (copy->out_list ()); + for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { + //get points + poly_it.set_to_list (out_it.data ()->polypts ()); + for (poly_it.mark_cycle_pt (); !poly_it.cycled_list (); + poly_it.forward ()) { + polypt = poly_it.data (); + //rotate it + polypt->pos.rotate (rotation); + polypt->vec.rotate (rotation); + } + out_it.data ()->compute_bb (); + } + return copy; +} + + +/********************************************************************** + * rotate_cblob + * + * Poly copy the blob and rotate the copy by the given vector. + **********************************************************************/ + +PBLOB *rotate_cblob( //rotate it + C_BLOB *blob, //blob to search + float xheight, //for poly approx + FCOORD rotation //for landscape + ) { + PBLOB *copy; //copy of blob + POLYPT *polypt; //current point + OUTLINE_IT out_it; + POLYPT_IT poly_it; //outline pts + + copy = new PBLOB (blob, xheight); + out_it.set_to_list (copy->out_list ()); + for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { + //get points + poly_it.set_to_list (out_it.data ()->polypts ()); + for (poly_it.mark_cycle_pt (); !poly_it.cycled_list (); + poly_it.forward ()) { + polypt = poly_it.data (); + //rotate it + polypt->pos.rotate (rotation); + polypt->vec.rotate (rotation); + } + out_it.data ()->compute_bb (); + } + return copy; +} + + +/********************************************************************** + * crotate_cblob + * + * Rotate the copy by the given vector and return a C_BLOB. + **********************************************************************/ + +C_BLOB *crotate_cblob( //rotate it + C_BLOB *blob, //blob to search + FCOORD rotation //for landscape + ) { + C_OUTLINE_LIST out_list; //output outlines + //input outlines + C_OUTLINE_IT in_it = blob->out_list (); + //output outlines + C_OUTLINE_IT out_it = &out_list; + + for (in_it.mark_cycle_pt (); !in_it.cycled_list (); in_it.forward ()) { + out_it.add_after_then_move (new C_OUTLINE (in_it.data (), rotation)); + } + return new C_BLOB (&out_list); +} + + +/********************************************************************** + * box_next + * + * Compute the bounding box of this blob with merging of x overlaps + * but no pre-chopping. + * Then move the iterator on to the start of the next blob. + **********************************************************************/ + +TBOX box_next( //get bounding box + BLOBNBOX_IT *it //iterator to blobds + ) { + BLOBNBOX *blob; //current blob + TBOX result; //total box + + blob = it->data (); + result = blob->bounding_box (); + do { + it->forward (); + blob = it->data (); + if (blob->blob () == NULL && blob->cblob () == NULL) + //was pre-chopped + result += blob->bounding_box (); + } + //until next real blob + while ((blob->blob () == NULL && blob->cblob () == NULL) || blob->joined_to_prev ()); + return result; +} + + +/********************************************************************** + * box_next_pre_chopped + * + * Compute the bounding box of this blob with merging of x overlaps + * but WITH pre-chopping. + * Then move the iterator on to the start of the next pre-chopped blob. + **********************************************************************/ + +TBOX box_next_pre_chopped( //get bounding box + BLOBNBOX_IT *it //iterator to blobds + ) { + BLOBNBOX *blob; //current blob + TBOX result; //total box + + blob = it->data (); + result = blob->bounding_box (); + do { + it->forward (); + blob = it->data (); + } + //until next real blob + while (blob->joined_to_prev ()); + return result; +} + + +/********************************************************************** + * TO_ROW::TO_ROW + * + * Constructor to make a row from a blob. + **********************************************************************/ + +TO_ROW::TO_ROW ( //constructor +BLOBNBOX * blob, //first blob +float top, //corrected top +float bottom, //of row +float row_size //ideal +): y_min(bottom), y_max(top), initial_y_min(bottom), num_repeated_sets_(-1) { + float diff; //in size + BLOBNBOX_IT it = &blobs; //list of blobs + + it.add_to_end (blob); + diff = top - bottom - row_size; + if (diff > 0) { + y_max -= diff / 2; + y_min += diff / 2; + } + //very small object + else if ((top - bottom) * 3 < row_size) { + diff = row_size / 3 + bottom - top; + y_max += diff / 2; + y_min -= diff / 2; + } +} + + +/********************************************************************** + * TO_ROW:add_blob + * + * Add the blob to the end of the row. + **********************************************************************/ + +void TO_ROW::add_blob( //constructor + BLOBNBOX *blob, //first blob + float top, //corrected top + float bottom, //of row + float row_size //ideal + ) { + float allowed; //allowed expansion + float available; //expansion + BLOBNBOX_IT it = &blobs; //list of blobs + + it.add_to_end (blob); + allowed = row_size + y_min - y_max; + if (allowed > 0) { + available = top > y_max ? top - y_max : 0; + if (bottom < y_min) + //total available + available += y_min - bottom; + if (available > 0) { + available += available; //do it gradually + if (available < allowed) + available = allowed; + if (bottom < y_min) + y_min -= (y_min - bottom) * allowed / available; + if (top > y_max) + y_max += (top - y_max) * allowed / available; + } + } +} + + +/********************************************************************** + * TO_ROW:insert_blob + * + * Add the blob to the row in the correct position. + **********************************************************************/ + +void TO_ROW::insert_blob( //constructor + BLOBNBOX *blob //first blob + ) { + BLOBNBOX_IT it = &blobs; //list of blobs + + if (it.empty ()) + it.add_before_then_move (blob); + else { + it.mark_cycle_pt (); + while (!it.cycled_list () + && it.data ()->bounding_box ().left () <= + blob->bounding_box ().left ()) + it.forward (); + if (it.cycled_list ()) + it.add_to_end (blob); + else + it.add_before_stay_put (blob); + } +} + + +/********************************************************************** + * TO_ROW::compute_vertical_projection + * + * Compute the vertical projection of a TO_ROW from its blobs. + **********************************************************************/ + +void TO_ROW::compute_vertical_projection() { //project whole row + TBOX row_box; //bound of row + BLOBNBOX *blob; //current blob + TBOX blob_box; //bounding box + BLOBNBOX_IT blob_it = blob_list (); + + if (blob_it.empty ()) + return; + row_box = blob_it.data ()->bounding_box (); + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) + row_box += blob_it.data ()->bounding_box (); + + projection.set_range (row_box.left () - PROJECTION_MARGIN, + row_box.right () + PROJECTION_MARGIN); + projection_left = row_box.left () - PROJECTION_MARGIN; + projection_right = row_box.right () + PROJECTION_MARGIN; + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { + blob = blob_it.data (); + if (blob->blob () != NULL) + vertical_blob_projection (blob->blob (), &projection); + else if (blob->cblob () != NULL) + vertical_cblob_projection (blob->cblob (), &projection); + } +} + + +/********************************************************************** + * vertical_blob_projection + * + * Compute the vertical projection of a blob from its outlines + * and add to the given STATS. + **********************************************************************/ + +void vertical_blob_projection( //project outlines + PBLOB *blob, //blob to project + STATS *stats //output + ) { + //outlines of blob + OUTLINE_IT out_it = blob->out_list (); + + for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { + vertical_outline_projection (out_it.data (), stats); + } +} + + +/********************************************************************** + * vertical_outline_projection + * + * Compute the vertical projection of a outline from its outlines + * and add to the given STATS. + **********************************************************************/ + +void vertical_outline_projection( //project outlines + OUTLINE *outline, //outline to project + STATS *stats //output + ) { + POLYPT *polypt; //current point + inT32 xcoord; //current pixel coord + float end_x; //end of vec + POLYPT_IT poly_it = outline->polypts (); + OUTLINE_IT out_it = outline->child (); + float ymean; //amount to add + float width; //amount of x + + for (poly_it.mark_cycle_pt (); !poly_it.cycled_list (); poly_it.forward ()) { + polypt = poly_it.data (); + end_x = polypt->pos.x () + polypt->vec.x (); + if (polypt->vec.x () > 0) { + for (xcoord = (inT32) floor (polypt->pos.x ()); + xcoord < end_x; xcoord++) { + if (polypt->pos.x () < xcoord) { + width = (float) xcoord; + ymean = + polypt->vec.y () * (xcoord - + polypt->pos.x ()) / polypt->vec.x () + + polypt->pos.y (); + } + else { + width = polypt->pos.x (); + ymean = polypt->pos.y (); + } + if (end_x > xcoord + 1) { + width -= xcoord + 1; + ymean += + polypt->vec.y () * (xcoord + 1 - + polypt->pos.x ()) / polypt->vec.x () + + polypt->pos.y (); + } + else { + width -= end_x; + ymean += polypt->pos.y () + polypt->vec.y (); + } + ymean = ymean * width / 2; + stats->add (xcoord, (inT32) floor (ymean + 0.5)); + } + } + else if (polypt->vec.x () < 0) { + for (xcoord = (inT32) floor (end_x); + xcoord < polypt->pos.x (); xcoord++) { + if (polypt->pos.x () > xcoord + 1) { + width = xcoord + 1.0f; + ymean = + polypt->vec.y () * (xcoord + 1 - + polypt->pos.x ()) / polypt->vec.x () + + polypt->pos.y (); + } + else { + width = polypt->pos.x (); + ymean = polypt->pos.y (); + } + if (end_x < xcoord) { + width -= xcoord; + ymean += + polypt->vec.y () * (xcoord - + polypt->pos.x ()) / polypt->vec.x () + + polypt->pos.y (); + } + else { + width -= end_x; + ymean += polypt->pos.y () + polypt->vec.y (); + } + ymean = ymean * width / 2; + stats->add (xcoord, (inT32) floor (ymean + 0.5)); + } + } + } + + for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { + vertical_outline_projection (out_it.data (), stats); + } +} + + +/********************************************************************** + * vertical_cblob_projection + * + * Compute the vertical projection of a cblob from its outlines + * and add to the given STATS. + **********************************************************************/ + +void vertical_cblob_projection( //project outlines + C_BLOB *blob, //blob to project + STATS *stats //output + ) { + //outlines of blob + C_OUTLINE_IT out_it = blob->out_list (); + + for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { + vertical_coutline_projection (out_it.data (), stats); + } +} + + +/********************************************************************** + * vertical_coutline_projection + * + * Compute the vertical projection of a outline from its outlines + * and add to the given STATS. + **********************************************************************/ + +void vertical_coutline_projection( //project outlines + C_OUTLINE *outline, //outline to project + STATS *stats //output + ) { + ICOORD pos; //current point + ICOORD step; //edge step + inT32 length; //of outline + inT16 stepindex; //current step + C_OUTLINE_IT out_it = outline->child (); + + pos = outline->start_pos (); + length = outline->pathlength (); + for (stepindex = 0; stepindex < length; stepindex++) { + step = outline->step (stepindex); + if (step.x () > 0) { + if (pitsync_projection_fix) + stats->add (pos.x (), -pos.y ()); + else + stats->add (pos.x (), pos.y ()); + } + else if (step.x () < 0) { + if (pitsync_projection_fix) + stats->add (pos.x () - 1, pos.y ()); + else + stats->add (pos.x () - 1, -pos.y ()); + } + pos += step; + } + + for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { + vertical_coutline_projection (out_it.data (), stats); + } +} + + +/********************************************************************** + * TO_BLOCK::TO_BLOCK + * + * Constructor to make a TO_BLOCK from a real block. + **********************************************************************/ + +TO_BLOCK::TO_BLOCK( //make a block + BLOCK *src_block //real block + ) { + block = src_block; +} + +static void clear_blobnboxes(BLOBNBOX_LIST* boxes) { + BLOBNBOX_IT it = boxes; + // A BLOBNBOX generally doesn't own its blobs, so if they do, you + // have to delete them explicitly. + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* box = it.data(); + if (box->blob() != NULL) + delete box->blob(); + if (box->cblob() != NULL) + delete box->cblob(); + } +} + +TO_BLOCK::~TO_BLOCK() { + // Any residual BLOBNBOXes at this stage own their blobs, so delete them. + clear_blobnboxes(&blobs); + clear_blobnboxes(&underlines); + clear_blobnboxes(&noise_blobs); + clear_blobnboxes(&small_blobs); + clear_blobnboxes(&large_blobs); +} + +// Draw the blobs on the various lists in the block in different colors. +void TO_BLOCK::plot_graded_blobs(ScrollView* to_win) { + plot_blob_list(to_win, &noise_blobs, ScrollView::CORAL, ScrollView::BLUE); + plot_blob_list(to_win, &small_blobs, + ScrollView::GOLDENROD, ScrollView::YELLOW); + plot_blob_list(to_win, &large_blobs, + ScrollView::DARK_GREEN, ScrollView::YELLOW); + plot_blob_list(to_win, &blobs, ScrollView::WHITE, ScrollView::BROWN); +} + +/********************************************************************** + * plot_blob_list + * + * Draw a list of blobs. + **********************************************************************/ + +void plot_blob_list(ScrollView* win, // window to draw in + BLOBNBOX_LIST *list, // blob list + ScrollView::Color body_colour, // colour to draw + ScrollView::Color child_colour) { // colour of child + BLOBNBOX_IT it = list; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + it.data()->plot(win, body_colour, child_colour); + } +} + + diff --git a/ccmain/blobbox.h b/ccmain/blobbox.h new file mode 100644 index 000000000..8df5dff36 --- /dev/null +++ b/ccmain/blobbox.h @@ -0,0 +1,557 @@ +/********************************************************************** + * File: blobbox.h (Formerly blobnbox.h) + * Description: Code for the textord blob class. + * Author: Ray Smith + * Created: Thu Jul 30 09:08:51 BST 1992 + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef BLOBBOX_H +#define BLOBBOX_H + +#include "varable.h" +#include "clst.h" +#include "elst2.h" +#include "werd.h" +#include "ocrblock.h" +#include "statistc.h" + +extern double_VAR_H (textord_error_weight, 3, +"Weighting for error in believability"); + +enum PITCH_TYPE +{ + PITCH_DUNNO, //insufficient data + PITCH_DEF_FIXED, //definitely fixed + PITCH_MAYBE_FIXED, //could be + PITCH_DEF_PROP, + PITCH_MAYBE_PROP, + PITCH_CORR_FIXED, + PITCH_CORR_PROP +}; + +// The possible tab-stop types of each side of a BLOBNBOX. +enum TabType { + TT_NONE, // Not a tab. + TT_DELETED, // Not a tab after detailed analysis. + TT_UNCONFIRMED, // Initial designation of a tab-stop candidate. + TT_FAKE, // Added by interpolation. + TT_CONFIRMED, // Aligned with neighbours. + TT_VLINE // Detected as a vertical line. +}; + +// The possible region types of a BLOBNBOX. +// Note: keep all the text types > BRT_UNKNOWN and all the image types less. +// Keep in sync with kBlobTypes in colpartition.cpp and BoxColor below. +enum BlobRegionType { + BRT_NOISE, // Neither text nor image. + BRT_HLINE, // Horizontal separator line. + BRT_RECTIMAGE, // Rectangular image. + BRT_POLYIMAGE, // Non-rectangular image. + BRT_UNKNOWN, // Not determined yet. + BRT_VERT_TEXT, // Vertical alignment, not necessarily vertically oriented. + BRT_TEXT, // Convincing text. + + BRT_COUNT // Number of possibilities. +}; + +namespace tesseract { +class ColPartition; +} + +class BLOBNBOX; +ELISTIZEH (BLOBNBOX) +class BLOBNBOX:public ELIST_LINK +{ + public: + BLOBNBOX() { + blob_ptr = NULL; + cblob_ptr = NULL; + area = 0; + Init(); + } + explicit BLOBNBOX(PBLOB *srcblob) { + blob_ptr = srcblob; + cblob_ptr = NULL; + box = srcblob->bounding_box (); + area = (int) srcblob->area (); + Init(); + } + explicit BLOBNBOX(C_BLOB *srcblob) { + blob_ptr = NULL; + cblob_ptr = srcblob; + box = srcblob->bounding_box (); + area = (int) srcblob->area (); + Init(); + } + + void rotate_box(FCOORD vec) { + box.rotate(vec); + } + void translate_box(ICOORD v) { + box.move(v); + } + void merge(BLOBNBOX *nextblob); + void chop( // fake chop blob + BLOBNBOX_IT *start_it, // location of this + BLOBNBOX_IT *blob_it, // iterator + FCOORD rotation, // for landscape + float xheight); // line height + + // Simple accessors. + const TBOX& bounding_box() const { + return box; + } + void compute_bounding_box() { + box = cblob_ptr != NULL ? cblob_ptr->bounding_box() + : blob_ptr->bounding_box(); + } + const TBOX& reduced_box() const { + return red_box; + } + void set_reduced_box(TBOX new_box) { + red_box = new_box; + reduced = TRUE; + } + inT32 enclosed_area() const { + return area; + } + bool joined_to_prev() const { + return joined != 0; + } + bool red_box_set() const { + return reduced != 0; + } + int repeated_set() const { + return repeated_set_; + } + void set_repeated_set(int set_id) { + repeated_set_ = set_id; + } + PBLOB *blob() const { + return blob_ptr; + } + C_BLOB *cblob() const { + return cblob_ptr; + } + TabType left_tab_type() const { + return left_tab_type_; + } + void set_left_tab_type(TabType new_type) { + left_tab_type_ = new_type; + } + TabType right_tab_type() const { + return right_tab_type_; + } + void set_right_tab_type(TabType new_type) { + right_tab_type_ = new_type; + } + BlobRegionType region_type() const { + return region_type_; + } + void set_region_type(BlobRegionType new_type) { + region_type_ = new_type; + } + int left_rule() const { + return left_rule_; + } + void set_left_rule(int new_left) { + left_rule_ = new_left; + } + int right_rule() const { + return right_rule_; + } + void set_right_rule(int new_right) { + right_rule_ = new_right; + } + int left_crossing_rule() const { + return left_crossing_rule_; + } + void set_left_crossing_rule(int new_left) { + left_crossing_rule_ = new_left; + } + int right_crossing_rule() const { + return right_crossing_rule_; + } + void set_right_crossing_rule(int new_right) { + right_crossing_rule_ = new_right; + } + float horz_stroke_width() const { + return horz_stroke_width_; + } + void set_horz_stroke_width(float width) { + horz_stroke_width_ = width; + } + float vert_stroke_width() const { + return vert_stroke_width_; + } + void set_vert_stroke_width(float width) { + vert_stroke_width_ = width; + } + tesseract::ColPartition* owner() const { + return owner_; + } + void set_owner(tesseract::ColPartition* new_owner) { + owner_ = new_owner; + } + void set_noise_flag(bool flag) { + noise_flag_ = flag; + } + bool noise_flag() const { + return noise_flag_; + } + +#ifndef GRAPHICS_DISABLED + // Keep in sync with BlobRegionType. + ScrollView::Color BoxColor() const { + switch (region_type_) { + case BRT_HLINE: + return ScrollView::YELLOW; + case BRT_RECTIMAGE: + return ScrollView::RED; + case BRT_POLYIMAGE: + return ScrollView::ORANGE; + case BRT_UNKNOWN: + return ScrollView::CYAN; + case BRT_VERT_TEXT: + return ScrollView::GREEN; + case BRT_TEXT: + return ScrollView::BLUE; + case BRT_NOISE: + default: + return ScrollView::GREY; + } + } + + void plot(ScrollView* window, // window to draw in + ScrollView::Color blob_colour, // for outer bits + ScrollView::Color child_colour) { // for holes + if (blob_ptr != NULL) + blob_ptr->plot(window, blob_colour, child_colour); + if (cblob_ptr != NULL) + cblob_ptr->plot(window, blob_colour, child_colour); + } +#endif + + NEWDELETE2(BLOBNBOX) + + private: + // Initializes the bulk of the members to default values. + void Init() { + joined = false; + reduced = false; + repeated_set_ = 0; + left_tab_type_ = TT_NONE; + right_tab_type_ = TT_NONE; + region_type_ = BRT_UNKNOWN; + left_rule_ = 0; + right_rule_ = 0; + left_crossing_rule_ = 0; + right_crossing_rule_ = 0; + horz_stroke_width_ = 0.0f; + vert_stroke_width_ = 0.0f; + owner_ = NULL; + noise_flag_ = false; + } + + PBLOB *blob_ptr; // poly blob + C_BLOB *cblob_ptr; // edgestep blob + TBOX box; // bounding box + TBOX red_box; // bounding box + int area:30; // enclosed area + int joined:1; // joined to prev + int reduced:1; // reduced box set + int repeated_set_; // id of the set of repeated blobs + TabType left_tab_type_; // Indicates tab-stop assessment + TabType right_tab_type_; // Indicates tab-stop assessment + BlobRegionType region_type_; // Type of region this blob belongs to + inT16 left_rule_; // x-coord of nearest but not crossing rule line + inT16 right_rule_; // x-coord of nearest but not crossing rule line + inT16 left_crossing_rule_; // x-coord of nearest or crossing rule line + inT16 right_crossing_rule_; // x-coord of nearest or crossing rule line + float horz_stroke_width_; // Median horizontal stroke width + float vert_stroke_width_; // Median vertical stroke width + tesseract::ColPartition* owner_; // Who will delete me when I am not needed + // Was the blob flagged as noise in the initial filtering step + bool noise_flag_; +}; + +class TO_ROW:public ELIST2_LINK +{ + public: + TO_ROW() { + num_repeated_sets_ = -1; + } //empty + TO_ROW( //constructor + BLOBNBOX *blob, //from first blob + float top, //of row //target height + float bottom, + float row_size); + + float max_y() const { //access function + return y_max; + } + float min_y() const { + return y_min; + } + float mean_y() const { + return (y_min + y_max) / 2.0f; + } + float initial_min_y() const { + return initial_y_min; + } + float line_m() const { //access to line fit + return m; + } + float line_c() const { + return c; + } + float line_error() const { + return error; + } + float parallel_c() const { + return para_c; + } + float parallel_error() const { + return para_error; + } + float believability() const { //baseline goodness + return credibility; + } + float intercept() const { //real parallel_c + return y_origin; + } + void add_blob( //put in row + BLOBNBOX *blob, //blob to add + float top, //of row //target height + float bottom, + float row_size); + void insert_blob( //put in row in order + BLOBNBOX *blob); + + BLOBNBOX_LIST *blob_list() { //get list + return &blobs; + } + + void set_line( //set line spec + float new_m, //line to set + float new_c, + float new_error) { + m = new_m; + c = new_c; + error = new_error; + } + void set_parallel_line( //set fixed gradient line + float gradient, //page gradient + float new_c, + float new_error) { + para_c = new_c; + para_error = new_error; + credibility = + (float) (blobs.length () - textord_error_weight * new_error); + y_origin = (float) (new_c / sqrt (1 + gradient * gradient)); + //real intercept + } + void set_limits( //set min,max + float new_min, //bottom and + float new_max) { //top of row + y_min = new_min; + y_max = new_max; + } + void compute_vertical_projection(); + //get projection + + bool rep_chars_marked() const { + return num_repeated_sets_ != -1; + } + void clear_rep_chars_marked() { + num_repeated_sets_ = -1; + } + int num_repeated_sets() const { + return num_repeated_sets_; + } + void set_num_repeated_sets(int num_sets) { + num_repeated_sets_ = num_sets; + } + + // true when dead + NEWDELETE2 (TO_ROW) BOOL8 merged; + BOOL8 all_caps; // had no ascenders + BOOL8 used_dm_model; // in guessing pitch + inT16 projection_left; // start of projection + inT16 projection_right; // start of projection + PITCH_TYPE pitch_decision; // how strong is decision + float fixed_pitch; // pitch or 0 + float fp_space; // sp if fixed pitch + float fp_nonsp; // nonsp if fixed pitch + float pr_space; // sp if prop + float pr_nonsp; // non sp if prop + float spacing; // to "next" row + float xheight; // of line + int xheight_evidence; // number of blobs of height xheight + float ascrise; // ascenders + float descdrop; // descenders + inT32 min_space; // min size for real space + inT32 max_nonspace; // max size of non-space + inT32 space_threshold; // space vs nonspace + float kern_size; // average non-space + float space_size; // average space + WERD_LIST rep_words; // repeated chars + ICOORDELT_LIST char_cells; // fixed pitch cells + QSPLINE baseline; // curved baseline + STATS projection; // vertical projection + + private: + BLOBNBOX_LIST blobs; //blobs in row + float y_min; //coords + float y_max; + float initial_y_min; + float m, c; //line spec + float error; //line error + float para_c; //constrained fit + float para_error; + float y_origin; //rotated para_c; + float credibility; //baseline believability + int num_repeated_sets_; // number of sets of repeated blobs + // set to -1 if we have not searched + // for repeated blobs in this row yet +}; + +ELIST2IZEH (TO_ROW) +class TO_BLOCK:public ELIST_LINK +{ + public: + TO_BLOCK() { + } //empty + TO_BLOCK( //constructor + BLOCK *src_block); //real block + ~TO_BLOCK(); + + TO_ROW_LIST *get_rows() { //access function + return &row_list; + } + + void print_rows() { //debug info + TO_ROW_IT row_it = &row_list; + TO_ROW *row; + + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); + row_it.forward ()) { + row = row_it.data (); + printf ("Row range (%g,%g), para_c=%g, blobcount=" INT32FORMAT + "\n", row->min_y (), row->max_y (), row->parallel_c (), + row->blob_list ()->length ()); + } + } + + // Draw the blobs on on the various lists in the block in different colors. + void plot_graded_blobs(ScrollView* to_win); + + BLOBNBOX_LIST blobs; //medium size + BLOBNBOX_LIST underlines; //underline blobs + BLOBNBOX_LIST noise_blobs; //very small + BLOBNBOX_LIST small_blobs; //fairly small + BLOBNBOX_LIST large_blobs; //big blobs + BLOCK *block; //real block + PITCH_TYPE pitch_decision; //how strong is decision + float line_spacing; //estimate + float line_size; //estimate + float max_blob_size; //line assignment limit + float baseline_offset; //phase shift + float xheight; //median blob size + float fixed_pitch; //pitch or 0 + float kern_size; //average non-space + float space_size; //average space + inT32 min_space; //min definite space + inT32 max_nonspace; //max definite + float fp_space; //sp if fixed pitch + float fp_nonsp; //nonsp if fixed pitch + float pr_space; //sp if prop + float pr_nonsp; //non sp if prop + TO_ROW *key_row; //starting row + + NEWDELETE2 (TO_BLOCK) private: + TO_ROW_LIST row_list; //temporary rows +}; + +ELISTIZEH (TO_BLOCK) +extern double_VAR_H (textord_error_weight, 3, +"Weighting for error in believability"); +void find_blob_limits( //get y limits + PBLOB *blob, //blob to search + float leftx, //x limits + float rightx, + FCOORD rotation, //for landscape + float &ymin, //output y limits + float &ymax); +void find_cblob_limits( //get y limits + C_BLOB *blob, //blob to search + float leftx, //x limits + float rightx, + FCOORD rotation, //for landscape + float &ymin, //output y limits + float &ymax); +void find_cblob_vlimits( //get y limits + C_BLOB *blob, //blob to search + float leftx, //x limits + float rightx, + float &ymin, //output y limits + float &ymax); +void find_cblob_hlimits( //get x limits + C_BLOB *blob, //blob to search + float bottomy, //y limits + float topy, + float &xmin, //output x limits + float &xymax); +PBLOB *rotate_blob( //get y limits + PBLOB *blob, //blob to search + FCOORD rotation //vector to rotate by + ); +PBLOB *rotate_cblob( //rotate it + C_BLOB *blob, //blob to search + float xheight, //for poly approx + FCOORD rotation //for landscape + ); +C_BLOB *crotate_cblob( //rotate it + C_BLOB *blob, //blob to search + FCOORD rotation //for landscape + ); +TBOX box_next( //get bounding box + BLOBNBOX_IT *it //iterator to blobds + ); +TBOX box_next_pre_chopped( //get bounding box + BLOBNBOX_IT *it //iterator to blobds + ); +void vertical_blob_projection( //project outlines + PBLOB *blob, //blob to project + STATS *stats //output + ); + //project outlines +void vertical_outline_projection(OUTLINE *outline, //outline to project + STATS *stats //output + ); +void vertical_cblob_projection( //project outlines + C_BLOB *blob, //blob to project + STATS *stats //output + ); +void vertical_coutline_projection( //project outlines + C_OUTLINE *outline, //outline to project + STATS *stats //output + ); +void plot_blob_list(ScrollView* win, // window to draw in + BLOBNBOX_LIST *list, // blob list + ScrollView::Color body_colour, // colour to draw + ScrollView::Color child_colour); // colour of child + +#endif diff --git a/ccmain/blobs.cpp b/ccmain/blobs.cpp new file mode 100644 index 000000000..73fd09e11 --- /dev/null +++ b/ccmain/blobs.cpp @@ -0,0 +1,247 @@ +/* -*-C-*- + ******************************************************************************** + * + * File: blobs.c (Formerly blobs.c) + * Description: Blob definition + * Author: Mark Seaman, OCR Technology + * Created: Fri Oct 27 15:39:52 1989 + * Modified: Thu Mar 28 15:33:26 1991 (Mark Seaman) marks@hpgrlt + * Language: C + * Package: N/A + * Status: Experimental (Do Not Distribute) + * + * (c) Copyright 1989, Hewlett-Packard Company. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + *********************************************************************************/ + +/*---------------------------------------------------------------------- + I n c l u d e s +----------------------------------------------------------------------*/ +#include "mfcpch.h" +#include "blobs.h" +#include "cutil.h" +#include "emalloc.h" +#include "structures.h" + +/*---------------------------------------------------------------------- + F u n c t i o n s +----------------------------------------------------------------------*/ +/********************************************************************** + * blob_origin + * + * Compute the origin of a compound blob, define to be the centre + * of the bounding box. + **********************************************************************/ +void blob_origin(TBLOB *blob, /*blob to compute on */ + TPOINT *origin) { /*return value */ + TPOINT topleft; /*bounding box */ + TPOINT botright; + + /*find bounding box */ + blob_bounding_box(blob, &topleft, &botright); + /*centre of box */ + origin->x = (topleft.x + botright.x) / 2; + origin->y = (topleft.y + botright.y) / 2; +} + + +/********************************************************************** + * blob_bounding_box + * + * Compute the bounding_box of a compound blob, define to be the + * max coordinate value of the bounding boxes of all the top-level + * outlines in the box. + **********************************************************************/ +void blob_bounding_box(TBLOB *blob, /*blob to compute on */ + register TPOINT *topleft, /*bounding box */ + register TPOINT *botright) { + register TESSLINE *outline; /*current outline */ + + if (blob == NULL || blob->outlines == NULL) { + topleft->x = topleft->y = 0; + *botright = *topleft; /*default value */ + } + else { + outline = blob->outlines; + *topleft = outline->topleft; + *botright = outline->botright; + for (outline = outline->next; outline != NULL; outline = outline->next) { + if (outline->topleft.x < topleft->x) + /*find extremes */ + topleft->x = outline->topleft.x; + if (outline->botright.x > botright->x) + /*find extremes */ + botright->x = outline->botright.x; + if (outline->topleft.y > topleft->y) + /*find extremes */ + topleft->y = outline->topleft.y; + if (outline->botright.y < botright->y) + /*find extremes */ + botright->y = outline->botright.y; + } + } +} + + +/********************************************************************** + * blobs_bounding_box + * + * Return the smallest extreme point that contain this word. + **********************************************************************/ +void blobs_bounding_box(TBLOB *blobs, TPOINT *topleft, TPOINT *botright) { + TPOINT tl; + TPOINT br; + TBLOB *blob; + /* Start with first blob */ + blob_bounding_box(blobs, topleft, botright); + + iterate_blobs(blob, blobs) { + blob_bounding_box(blob, &tl, &br); + + if (tl.x < topleft->x) + topleft->x = tl.x; + if (tl.y > topleft->y) + topleft->y = tl.y; + if (br.x > botright->x) + botright->x = br.x; + if (br.y < botright->y) + botright->y = br.y; + } +} + + +/********************************************************************** + * blobs_origin + * + * Compute the origin of a compound blob, define to be the centre + * of the bounding box. + **********************************************************************/ +void blobs_origin(TBLOB *blobs, /*blob to compute on */ + TPOINT *origin) { /*return value */ + TPOINT topleft; /*bounding box */ + TPOINT botright; + + /*find bounding box */ + blobs_bounding_box(blobs, &topleft, &botright); + /*center of box */ + origin->x = (topleft.x + botright.x) / 2; + origin->y = (topleft.y + botright.y) / 2; +} + + +/********************************************************************** + * blobs_widths + * + * Compute the widths of a list of blobs. Return an array of the widths + * and gaps. + **********************************************************************/ +WIDTH_RECORD *blobs_widths(TBLOB *blobs) { /*blob to compute on */ + WIDTH_RECORD *width_record; + TPOINT topleft; /*bounding box */ + TPOINT botright; + TBLOB *blob; /*blob to compute on */ + int i = 0; + int blob_end; + int num_blobs = count_blobs (blobs); + + /* Get memory */ + width_record = (WIDTH_RECORD *) memalloc (sizeof (int) * num_blobs * 2); + width_record->num_chars = num_blobs; + + blob_bounding_box(blobs, &topleft, &botright); + width_record->widths[i++] = botright.x - topleft.x; + /* First width */ + blob_end = botright.x; + + iterate_blobs (blob, blobs->next) { + blob_bounding_box(blob, &topleft, &botright); + width_record->widths[i++] = topleft.x - blob_end; + width_record->widths[i++] = botright.x - topleft.x; + blob_end = botright.x; + } + return (width_record); +} + + +/********************************************************************** + * count_blobs + * + * Return a count of the number of blobs attached to this one. + **********************************************************************/ +int count_blobs(TBLOB *blobs) { + TBLOB *b; + int x = 0; + + iterate_blobs (b, blobs) x++; + return (x); +} + + +/********************************************************************** + * delete_word + * + * Reclaim the memory taken by this word structure and all of its + * lower level structures. + **********************************************************************/ +void delete_word(TWERD *word) { + TBLOB *blob; + TBLOB *nextblob; + TESSLINE *outline; + TESSLINE *nextoutline; + TESSLINE *child; + TESSLINE *nextchild; + + for (blob = word->blobs; blob; blob = nextblob) { + nextblob = blob->next; + + for (outline = blob->outlines; outline; outline = nextoutline) { + nextoutline = outline->next; + + delete_edgepts (outline->loop); + + for (child = outline->child; child; child = nextchild) { + nextchild = child->next; + + delete_edgepts (child->loop); + + oldoutline(child); + } + oldoutline(outline); + } + oldblob(blob); + } + if (word->correct != NULL) + strfree (word->correct); /* Reclaim memory */ + oldword(word); +} + + +/********************************************************************** + * delete_edgepts + * + * Delete a list of EDGEPT structures. + **********************************************************************/ +void delete_edgepts(register EDGEPT *edgepts) { + register EDGEPT *this_edge; + register EDGEPT *next_edge; + + if (edgepts == NULL) + return; + + this_edge = edgepts; + do { + next_edge = this_edge->next; + oldedgept(this_edge); + this_edge = next_edge; + } + while (this_edge != edgepts); +} diff --git a/ccmain/blobs.h b/ccmain/blobs.h new file mode 100644 index 000000000..16c64b423 --- /dev/null +++ b/ccmain/blobs.h @@ -0,0 +1,119 @@ +/* -*-C-*- + ******************************************************************************** + * + * File: blobs.h (Formerly blobs.h) + * Description: Blob definition + * Author: Mark Seaman, OCR Technology + * Created: Fri Oct 27 15:39:52 1989 + * Modified: Thu Mar 28 15:33:38 1991 (Mark Seaman) marks@hpgrlt + * Language: C + * Package: N/A + * Status: Experimental (Do Not Distribute) + * + * (c) Copyright 1989, Hewlett-Packard Company. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + *********************************************************************************/ + +#ifndef BLOBS_H +#define BLOBS_H + +/*---------------------------------------------------------------------- + I n c l u d e s +----------------------------------------------------------------------*/ +#include "vecfuncs.h" +#include "tessclas.h" + +/*---------------------------------------------------------------------- + T y p e s +----------------------------------------------------------------------*/ +typedef struct +{ /* Widths of pieces */ + int num_chars; + int widths[1]; +} WIDTH_RECORD; + +/*---------------------------------------------------------------------- + M a c r o s +----------------------------------------------------------------------*/ +/********************************************************************** + * free_widths + * + * Free the memory taken up by a width array. + **********************************************************************/ +#define free_widths(w) \ +if (w) memfree (w) + +/*---------------------------------------------------------------------- + F u n c t i o n s +----------------------------------------------------------------------*/ +void blob_origin(TBLOB *blob, /*blob to compute on */ + TPOINT *origin); /*return value */ + + /*blob to compute on */ +void blob_bounding_box(TBLOB *blob, + register TPOINT *topleft, /*bounding box */ + register TPOINT *botright); + +void blobs_bounding_box(TBLOB *blobs, TPOINT *topleft, TPOINT *botright); + +void blobs_origin(TBLOB *blobs, /*blob to compute on */ + TPOINT *origin); /*return value */ + + /*blob to compute on */ +WIDTH_RECORD *blobs_widths(TBLOB *blobs); + +int count_blobs(TBLOB *blobs); + +void delete_word(TWERD *word); + +void delete_edgepts(register EDGEPT *edgepts); + +/* +#if defined(__STDC__) || defined(__cplusplus) +# define _ARGS(s) s +#else +# define _ARGS(s) () +#endif*/ + +/* blobs.c +void blob_origin + _ARGS((BLOB *blob, + TPOINT *origin)); + +void blob_bounding_box + _ARGS((BLOB *blob, + TPOINT *topleft, + TPOINT *botright)); + +void blobs_bounding_box + _ARGS((BLOB *blobs, + TPOINT *topleft, + TPOINT *botright)); + +void blobs_origin + _ARGS((BLOB *blobs, + TPOINT *origin)); + +WIDTH_RECORD *blobs_widths + _ARGS((BLOB *blobs)); + +int count_blobs + _ARGS((BLOB *blobs)); + +void delete_word + _ARGS((TWERD *word)); + +void delete_edgepts + _ARGS((EDGEPT *edgepts)); +#undef _ARGS +*/ +#endif diff --git a/ccmain/blread.cpp b/ccmain/blread.cpp new file mode 100644 index 000000000..92446027c --- /dev/null +++ b/ccmain/blread.cpp @@ -0,0 +1,73 @@ +/********************************************************************** + * File: blread.cpp (Formerly pdread.c) + * Description: Friend function of BLOCK to read the uscan pd file. + * Author: Ray Smith + * Created: Mon Mar 18 14:39:00 GMT 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" +#include +#ifdef __UNIX__ +#include +#endif +#include "scanutils.h" +#include "fileerr.h" +#include "blread.h" + +#define UNLV_EXT ".uzn" // unlv zone file + +/********************************************************************** + * read_unlv_file + * + * Read a whole unlv zone file to make a list of blocks. + **********************************************************************/ + +bool read_unlv_file( //print list of sides + STRING name, //basename of file + inT32 xsize, //image size + inT32 ysize, //image size + BLOCK_LIST *blocks //output list + ) { + FILE *pdfp; //file pointer + BLOCK *block; //current block + int x; //current top-down coords + int y; + int width; //of current block + int height; + BLOCK_IT block_it = blocks; //block iterator + + name += UNLV_EXT; //add extension + if ((pdfp = fopen (name.string (), "r")) == NULL) { + return false; //didn't read one + } + else { + while (fscanf (pdfp, "%d %d %d %d %*s", &x, &y, &width, &height) >= 4) { + //make rect block + block = new BLOCK (name.string (), TRUE, 0, 0, + (inT16) x, (inT16) (ysize - y - height), + (inT16) (x + width), (inT16) (ysize - y)); + //on end of list + block_it.add_to_end (block); + } + fclose(pdfp); + } + return true; +} + +void FullPageBlock(int width, int height, BLOCK_LIST *blocks) { + BLOCK_IT block_it(blocks); + BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height); + block_it.add_to_end(block); +} diff --git a/ccmain/blread.h b/ccmain/blread.h new file mode 100644 index 000000000..5500a76ff --- /dev/null +++ b/ccmain/blread.h @@ -0,0 +1,33 @@ +/********************************************************************** + * File: blread.h (Formerly pdread.h) + * Description: Friend function of BLOCK to read the uscan pd file. + * Author: Ray Smith + * Created: Mon Mar 18 14:39:00 GMT 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef BLREAD_H +#define BLREAD_H + +#include "varable.h" +#include "ocrblock.h" + +bool read_unlv_file( //print list of sides + STRING name, //basename of file + inT32 xsize, //image size + inT32 ysize, //image size + BLOCK_LIST *blocks //output list + ); +void FullPageBlock(int width, int height, BLOCK_LIST *blocks); +#endif diff --git a/ccmain/callcpp.cpp b/ccmain/callcpp.cpp new file mode 100644 index 000000000..966befd90 --- /dev/null +++ b/ccmain/callcpp.cpp @@ -0,0 +1,172 @@ +/********************************************************************** + * File: callcpp.cpp + * Description: extern C interface calling C++ from C. + * Author: Ray Smith + * Created: Sun Feb 04 20:39:23 MST 1996 + * + * (C) Copyright 1996, Hewlett-Packard Co. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" +#include "errcode.h" +#ifdef __UNIX__ +#include +#include +#endif +#include +#include "memry.h" +#include "scrollview.h" +//#include "evnts.h" +#include "varable.h" +#include "callcpp.h" +#include "tprintf.h" +//#include "strace.h" +#include "host.h" +#include "unichar.h" + +//extern "C" { + +INT_VAR (tess_cp_mapping0, 0, "Mappings for class pruner distance"); +INT_VAR (tess_cp_mapping1, 1, "Mappings for class pruner distance"); +INT_VAR (tess_cp_mapping2, 2, "Mappings for class pruner distance"); +INT_VAR (tess_cp_mapping3, 3, "Mappings for class pruner distance"); +INT_VAR (record_matcher_output, 0, "Record detailed matcher info"); +INT_VAR (il1_adaption_test, 0, "Dont adapt to i/I at beginning of word"); +double_VAR (permuter_pending_threshold, 0.0, +"Worst conf for using pending dictionary"); +//Global matcher info from the class pruner. +inT32 cp_maps[4]; +//Global info to control writes of matcher info +char blob_answer[UNICHAR_LEN + 1]; //correct char +char *word_answer; //correct word +inT32 bits_in_states; //no of bits in states + +void setup_cp_maps() { + cp_maps[0] = tess_cp_mapping0; + cp_maps[1] = tess_cp_mapping1; + cp_maps[2] = tess_cp_mapping2; + cp_maps[3] = tess_cp_mapping3; +} + +void +cprintf ( //Trace printf +const char *format, ... //special message +) { + va_list args; //variable args + char msg[1000]; + + va_start(args, format); //variable list + vsprintf(msg, format, args); //Format into msg + va_end(args); + + tprintf ("%s", msg); +} + + +#ifndef GRAPHICS_DISABLED +ScrollView *c_create_window( /*create a window */ + const char *name, /*name/title of window */ + inT16 xpos, /*coords of window */ + inT16 ypos, /*coords of window */ + inT16 xsize, /*size of window */ + inT16 ysize, /*size of window */ + double xmin, /*scrolling limits */ + double xmax, /*to stop users */ + double ymin, /*getting lost in */ + double ymax /*empty space */ + ) { + return new ScrollView(name, xpos, ypos, xsize, ysize, xmax - xmin, ymax - ymin, true); +} + + +void c_line_color_index( /*set color */ + void *win, + C_COL index) { + // The colors are the same as the SV ones except that SV has COLOR:NONE --> offset of 1 + ScrollView* window = (ScrollView*) win; + window->Pen((ScrollView::Color) (index + 1)); +} + + +void c_move( /*move pen */ + void *win, + double x, + double y) { + ScrollView* window = (ScrollView*) win; + window->SetCursor((int) x, (int) y); +} + + +void c_draw( /*move pen */ + void *win, + double x, + double y) { + ScrollView* window = (ScrollView*) win; + window->DrawTo((int) x, (int) y); +} + + +void c_make_current( /*move pen */ + void *win) { + ScrollView* window = (ScrollView*) win; + window->Update(); +} + + +void c_clear_window( /*move pen */ + void *win) { + ScrollView* window = (ScrollView*) win; + window->Clear(); +} + + +char window_wait(ScrollView* win) { + SVEvent* ev; + // Wait till an input or click event (all others are thrown away) + char ret = '\0'; + SVEventType ev_type = SVET_ANY; + do { + ev = win->AwaitEvent(SVET_ANY); + ev_type = ev->type; + if (ev_type == SVET_INPUT) + ret = ev->parameter[0]; + delete ev; + } while (ev_type != SVET_INPUT && ev_type != SVET_CLICK); + return ret; +} +#endif + +void reverse32(void *ptr) { + char tmp; + char *cptr = (char *) ptr; + + tmp = *cptr; + *cptr = *(cptr + 3); + *(cptr + 3) = tmp; + tmp = *(cptr + 1); + *(cptr + 1) = *(cptr + 2); + *(cptr + 2) = tmp; +} + + +void reverse16(void *ptr) { + char tmp; + char *cptr = (char *) ptr; + + tmp = *cptr; + *cptr = *(cptr + 1); + *(cptr + 1) = tmp; +} + + +//}; diff --git a/ccmain/ccmain.vcproj b/ccmain/ccmain.vcproj new file mode 100755 index 000000000..ee8fbf566 --- /dev/null +++ b/ccmain/ccmain.vcprojdiff --git a/ccmain/ccstruct.cpp b/ccmain/ccstruct.cpp new file mode 100644 index 000000000..a3934d9ce --- /dev/null +++ b/ccmain/ccstruct.cpp @@ -0,0 +1,29 @@ +/////////////////////////////////////////////////////////////////////// +// File: ccstruct.cpp +// Description: ccstruct class. +// Author: Samuel Charron +// +// (C) Copyright 2006, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "ccstruct.h" + +namespace tesseract { +CCStruct::CCStruct() + : image_(this) { +} + +CCStruct::~CCStruct() { +} + +} diff --git a/ccmain/ccstruct.h b/ccmain/ccstruct.h new file mode 100644 index 000000000..b143c1998 --- /dev/null +++ b/ccmain/ccstruct.h @@ -0,0 +1,52 @@ +/////////////////////////////////////////////////////////////////////// +// File: ccstruct.h +// Description: ccstruct class. +// Author: Samuel Charron +// +// (C) Copyright 2006, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_CCSTRUCT_CCSTRUCT_H__ +#define TESSERACT_CCSTRUCT_CCSTRUCT_H__ + +#include "cutil.h" +#include "image.h" + +class PBLOB; +class DENORM; +class WERD; +class BLOB_CHOICE_LIST; + +namespace tesseract { +class CCStruct : public CUtil { + public: + CCStruct(); + ~CCStruct(); + + protected: + Image image_; +}; + +class Tesseract; +} // namespace tesseract + +typedef void (tesseract::Tesseract::*POLY_MATCHER) + (PBLOB *, PBLOB *, PBLOB *, WERD *, + DENORM *, BLOB_CHOICE_LIST *, const char*); +/* + typedef void (tesseract::Tesseract::*POLY_TESTER) + (const STRING&, PBLOB *, DENORM *, BOOL8, char *, + inT32, BLOB_CHOICE_LIST *); +*/ + +#endif // TESSERACT_CCSTRUCT_CCSTRUCT_H__ diff --git a/ccmain/ccstruct.vcproj b/ccmain/ccstruct.vcproj new file mode 100755 index 000000000..09c6f4060 --- /dev/null +++ b/ccmain/ccstruct.vcprojdiff --git a/ccmain/charsample.h b/ccmain/charsample.h new file mode 100644 index 000000000..5e53cc182 --- /dev/null +++ b/ccmain/charsample.h @@ -0,0 +1,214 @@ +/********************************************************************** + * File: charsample.h (Formerly charsample.h) + * Description: Class to contain character samples and match scores + * to be used for adaption + * Author: Chris Newton + * Created: Thu Oct 7 13:40:37 BST 1993 + * + * (C) Copyright 1993, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef CHARSAMPLE_H +#define CHARSAMPLE_H + +#include "elst.h" +#include "pageres.h" +#include "memry.h" +#include "notdll.h" + +#define BAD_SCORE MAX_INT32 +#define FIRST_CHAR '!' +#define LAST_CHAR '~' + +namespace tesseract { + class Tesseract; // Fwd decl. +} + +enum ClusterType +{ UNKNOWN, BLOB_CLUSTER, IMAGE_CLUSTER }; + +class CHAR_SAMPLE; //forward decl + +ELISTIZEH (CHAR_SAMPLE) +class CHAR_SAMPLES; //forward decl + +ELISTIZEH (CHAR_SAMPLES) +class CHAR_PROTO; //forward decl + +class CHAR_SAMPLE:public ELIST_LINK +{ + public: + CHAR_SAMPLE(); // empty constructor + + CHAR_SAMPLE( // simple constructor + PBLOB *blob, + DENORM *denorm, + char c + ); + + CHAR_SAMPLE( // simple constructor + IMAGE *image, + char c + ); + + ~CHAR_SAMPLE () { + // We own the image, so it has to be deleted. + if (sample_image != NULL) + delete sample_image; + } + + float match_sample(CHAR_SAMPLE *test_sample, BOOL8 updating, + tesseract::Tesseract* tess); + + inT32 n_matches() { + return n_samples_matched; + } + + IMAGE *image() { + return sample_image; + } + + PBLOB *blob() { + return sample_blob; + } + + DENORM *denorm() { + return sample_denorm; + } + + double mean_score(); + + double variance(); + + char character() { + return ch; + } + + void print(FILE *f); + + void reset_match_statistics(); + + NEWDELETE2 (CHAR_SAMPLE) private: + IMAGE * sample_image; + PBLOB *sample_blob; + DENORM *sample_denorm; + inT32 n_samples_matched; + double total_match_scores; + double sumsq_match_scores; + char ch; +}; + +class CHAR_SAMPLES:public ELIST_LINK +{ + public: + CHAR_SAMPLES(); //empty constructor + + CHAR_SAMPLES(CHAR_SAMPLE *sample); + + ~CHAR_SAMPLES () { //destructor + } + + inT32 n_samples() { + return samples.length (); + } + + void add_sample(CHAR_SAMPLE *sample, tesseract::Tesseract*); + + void build_prototype(); + + void rebuild_prototype(inT32 new_xsize, inT32 new_ysize); + + void add_sample_to_prototype(CHAR_SAMPLE *sample); + + CHAR_PROTO *prototype() { + return proto; + } + + void find_best_sample(); + + float match_score(CHAR_SAMPLE *sample, tesseract::Tesseract* tess); + + float nn_match_score(CHAR_SAMPLE *sample, tesseract::Tesseract* tess); + + char character() { + return ch; + } + + void assign_to_char(); + + void print(FILE *f); + + NEWDELETE2 (CHAR_SAMPLES) private: + ClusterType type; + char ch; + CHAR_PROTO *proto; + CHAR_SAMPLE *best_sample; + CHAR_SAMPLE_LIST samples; +}; + +class CHAR_PROTO +{ + public: + CHAR_PROTO(); // empty constructor + + CHAR_PROTO(inT32 x_size, + inT32 y_size, + inT32 n_samples, + float initial_value, + char c); + + CHAR_PROTO( // simple constructor + CHAR_SAMPLE *sample); + + ~CHAR_PROTO (); + + float match_sample(CHAR_SAMPLE *test_sample); + + float match(CHAR_PROTO *test_proto); + + inT32 n_samples() { + return nsamples; + } + + inT32 x_size() { + return xsize; + } + + inT32 y_size() { + return ysize; + } + + float **data() { + return proto; + } + char character() { + return ch; + } + + void enlarge_prototype(inT32 new_xsize, inT32 new_ysize); + + void add_sample(CHAR_SAMPLE *sample); + + IMAGE *make_image(); + + void print(FILE *f); + + NEWDELETE2 (CHAR_PROTO) private: + inT32 xsize; + inT32 ysize; + float *proto_data; + float **proto; + inT32 nsamples; + char ch; +}; +#endif diff --git a/ccmain/coutln.cpp b/ccmain/coutln.cpp new file mode 100644 index 000000000..504f316e4 --- /dev/null +++ b/ccmain/coutln.cpp @@ -0,0 +1,686 @@ +/********************************************************************** + * File: coutln.c (Formerly coutline.c) + * Description: Code for the C_OUTLINE class. + * Author: Ray Smith + * Created: Mon Oct 07 16:01:57 BST 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" +#include +#ifdef __UNIX__ +#include +#endif +#include "coutln.h" + +ELISTIZE_S (C_OUTLINE) +ICOORD C_OUTLINE::step_coords[4] = { + ICOORD (-1, 0), ICOORD (0, -1), ICOORD (1, 0), ICOORD (0, 1) +}; + +/********************************************************************** + * C_OUTLINE::C_OUTLINE + * + * Constructor to build a C_OUTLINE from a CRACKEDGE LOOP. + **********************************************************************/ + +C_OUTLINE::C_OUTLINE ( +//constructor +CRACKEDGE * startpt, //outline to convert +ICOORD bot_left, //bounding box +ICOORD top_right, inT16 length //length of loop +):box (bot_left, top_right), start (startpt->pos) { + inT16 stepindex; //index to step + CRACKEDGE *edgept; //current point + + stepcount = length; //no of steps + if (length == 0) { + steps = NULL; + return; + } + //get memory + steps = (uinT8 *) alloc_mem (step_mem()); + memset(steps, 0, step_mem()); + edgept = startpt; + + for (stepindex = 0; stepindex < length; stepindex++) { + //set compact step + set_step (stepindex, edgept->stepdir); + edgept = edgept->next; + } +} + + +/********************************************************************** + * C_OUTLINE::C_OUTLINE + * + * Constructor to build a C_OUTLINE from a C_OUTLINE_FRAG. + **********************************************************************/ +C_OUTLINE::C_OUTLINE ( +//constructor + //steps to copy +ICOORD startpt, DIR128 * new_steps, +inT16 length //length of loop +):start (startpt) { + inT8 dirdiff; //direction difference + DIR128 prevdir; //previous direction + DIR128 dir; //current direction + DIR128 lastdir; //dir of last step + TBOX new_box; //easy bounding + inT16 stepindex; //index to step + inT16 srcindex; //source steps + ICOORD pos; //current position + + pos = startpt; + stepcount = length; //no of steps + //get memory + steps = (uinT8 *) alloc_mem (step_mem()); + memset(steps, 0, step_mem()); + + lastdir = new_steps[length - 1]; + prevdir = lastdir; + for (stepindex = 0, srcindex = 0; srcindex < length; + stepindex++, srcindex++) { + new_box = TBOX (pos, pos); + box += new_box; + //copy steps + dir = new_steps[srcindex]; + set_step(stepindex, dir); + dirdiff = dir - prevdir; + pos += step (stepindex); + if ((dirdiff == 64 || dirdiff == -64) && stepindex > 0) { + stepindex -= 2; //cancel there-and-back + prevdir = stepindex >= 0 ? step_dir (stepindex) : lastdir; + } + else + prevdir = dir; + } + ASSERT_HOST (pos.x () == startpt.x () && pos.y () == startpt.y ()); + do { + dirdiff = step_dir (stepindex - 1) - step_dir (0); + if (dirdiff == 64 || dirdiff == -64) { + start += step (0); + stepindex -= 2; //cancel there-and-back + for (int i = 0; i < stepindex; ++i) + set_step(i, step_dir(i + 1)); + } + } + while (stepindex > 1 && (dirdiff == 64 || dirdiff == -64)); + stepcount = stepindex; + ASSERT_HOST (stepcount >= 4); +} + +/********************************************************************** + * C_OUTLINE::C_OUTLINE + * + * Constructor to build a C_OUTLINE from a rotation of a C_OUTLINE. + **********************************************************************/ + +C_OUTLINE::C_OUTLINE( //constructor + C_OUTLINE *srcline, //outline to + FCOORD rotation //rotate + ) { + TBOX new_box; //easy bounding + inT16 stepindex; //index to step + inT16 dirdiff; //direction change + ICOORD pos; //current position + ICOORD prevpos; //previous dest point + + ICOORD destpos; //destination point + inT16 destindex; //index to step + DIR128 dir; //coded direction + uinT8 new_step; + + stepcount = srcline->stepcount * 2; + if (stepcount == 0) { + steps = NULL; + box = srcline->box; + box.rotate(rotation); + return; + } + //get memory + steps = (uinT8 *) alloc_mem (step_mem()); + memset(steps, 0, step_mem()); + + for (int iteration = 0; iteration < 2; ++iteration) { + DIR128 round1 = iteration == 0 ? 32 : 0; + DIR128 round2 = iteration != 0 ? 32 : 0; + pos = srcline->start; + prevpos = pos; + prevpos.rotate (rotation); + start = prevpos; + box = TBOX (start, start); + destindex = 0; + for (stepindex = 0; stepindex < srcline->stepcount; stepindex++) { + pos += srcline->step (stepindex); + destpos = pos; + destpos.rotate (rotation); + // printf("%i %i %i %i ", destpos.x(), destpos.y(), pos.x(), pos.y()); + while (destpos.x () != prevpos.x () || destpos.y () != prevpos.y ()) { + dir = DIR128 (FCOORD (destpos - prevpos)); + dir += 64; //turn to step style + new_step = dir.get_dir (); + // printf(" %i\n", new_step); + if (new_step & 31) { + set_step(destindex++, dir + round1); + prevpos += step(destindex - 1); + if (destindex < 2 + || ((dirdiff = + step_dir (destindex - 1) - step_dir (destindex - 2)) != + -64 && dirdiff != 64)) { + set_step(destindex++, dir + round2); + prevpos += step(destindex - 1); + } else { + prevpos -= step(destindex - 1); + destindex--; + prevpos -= step(destindex - 1); + set_step(destindex - 1, dir + round2); + prevpos += step(destindex - 1); + } + } + else { + set_step(destindex++, dir); + prevpos += step(destindex - 1); + } + while (destindex >= 2 && + ((dirdiff = + step_dir (destindex - 1) - step_dir (destindex - 2)) == -64 || + dirdiff == 64)) { + prevpos -= step(destindex - 1); + prevpos -= step(destindex - 2); + destindex -= 2; // Forget u turn + } + //ASSERT_HOST(prevpos.x() == destpos.x() && prevpos.y() == destpos.y()); + new_box = TBOX (destpos, destpos); + box += new_box; + } + } + ASSERT_HOST (destpos.x () == start.x () && destpos.y () == start.y ()); + dirdiff = step_dir (destindex - 1) - step_dir (0); + while ((dirdiff == 64 || dirdiff == -64) && destindex > 1) { + start += step (0); + destindex -= 2; + for (int i = 0; i < destindex; ++i) + set_step(i, step_dir(i + 1)); + dirdiff = step_dir (destindex - 1) - step_dir (0); + } + if (destindex >= 4) + break; + } + ASSERT_HOST(destindex <= stepcount); + stepcount = destindex; + destpos = start; + for (stepindex = 0; stepindex < stepcount; stepindex++) { + destpos += step (stepindex); + } + ASSERT_HOST (destpos.x () == start.x () && destpos.y () == start.y ()); +} + +// Build a fake outline, given just a bounding box and append to the list. +void C_OUTLINE::FakeOutline(const TBOX& box, C_OUTLINE_LIST* outlines) { + C_OUTLINE_IT ol_it(outlines); + // Make a C_OUTLINE from the bounds. This is a bit of a hack, + // as there is no outline, just a bounding box, but it works nicely. + CRACKEDGE start; + start.pos = box.topleft(); + C_OUTLINE* outline = new C_OUTLINE(&start, box.topleft(), box.botright(), 0); + ol_it.add_to_end(outline); +} + +/********************************************************************** + * C_OUTLINE::area + * + * Compute the area of the outline. + **********************************************************************/ + +inT32 C_OUTLINE::area() { //winding number + int stepindex; //current step + inT32 total_steps; //steps to do + inT32 total; //total area + ICOORD pos; //position of point + ICOORD next_step; //step to next pix + C_OUTLINE_IT it = child (); + + pos = start_pos (); + total_steps = pathlength (); + total = 0; + for (stepindex = 0; stepindex < total_steps; stepindex++) { + //all intersected + next_step = step (stepindex); + if (next_step.x () < 0) + total += pos.y (); + else if (next_step.x () > 0) + total -= pos.y (); + pos += next_step; + } + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) + total += it.data ()->area ();//add areas of children + + return total; +} + +/********************************************************************** + * C_OUTLINE::perimeter + * + * Compute the perimeter of the outline and its first level children. + **********************************************************************/ + +inT32 C_OUTLINE::perimeter() { + inT32 total_steps; // Return value. + C_OUTLINE_IT it = child(); + + total_steps = pathlength(); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) + total_steps += it.data()->pathlength(); // Add perimeters of children. + + return total_steps; +} + + +/********************************************************************** + * C_OUTLINE::outer_area + * + * Compute the area of the outline. + **********************************************************************/ + +inT32 C_OUTLINE::outer_area() { //winding number + int stepindex; //current step + inT32 total_steps; //steps to do + inT32 total; //total area + ICOORD pos; //position of point + ICOORD next_step; //step to next pix + + pos = start_pos (); + total_steps = pathlength (); + if (total_steps == 0) + return box.area(); + total = 0; + for (stepindex = 0; stepindex < total_steps; stepindex++) { + //all intersected + next_step = step (stepindex); + if (next_step.x () < 0) + total += pos.y (); + else if (next_step.x () > 0) + total -= pos.y (); + pos += next_step; + } + + return total; +} + + +/********************************************************************** + * C_OUTLINE::count_transitions + * + * Compute the number of x and y maxes and mins in the outline. + **********************************************************************/ + +inT32 C_OUTLINE::count_transitions( //winding number + inT32 threshold //on size + ) { + BOOL8 first_was_max_x; //what was first + BOOL8 first_was_max_y; + BOOL8 looking_for_max_x; //what is next + BOOL8 looking_for_min_x; + BOOL8 looking_for_max_y; //what is next + BOOL8 looking_for_min_y; + int stepindex; //current step + inT32 total_steps; //steps to do + //current limits + inT32 max_x, min_x, max_y, min_y; + inT32 initial_x, initial_y; //initial limits + inT32 total; //total changes + ICOORD pos; //position of point + ICOORD next_step; //step to next pix + + pos = start_pos (); + total_steps = pathlength (); + total = 0; + max_x = min_x = pos.x (); + max_y = min_y = pos.y (); + looking_for_max_x = TRUE; + looking_for_min_x = TRUE; + looking_for_max_y = TRUE; + looking_for_min_y = TRUE; + first_was_max_x = FALSE; + first_was_max_y = FALSE; + initial_x = pos.x (); + initial_y = pos.y (); //stop uninit warning + for (stepindex = 0; stepindex < total_steps; stepindex++) { + //all intersected + next_step = step (stepindex); + pos += next_step; + if (next_step.x () < 0) { + if (looking_for_max_x && pos.x () < min_x) + min_x = pos.x (); + if (looking_for_min_x && max_x - pos.x () > threshold) { + if (looking_for_max_x) { + initial_x = max_x; + first_was_max_x = FALSE; + } + total++; + looking_for_max_x = TRUE; + looking_for_min_x = FALSE; + min_x = pos.x (); //reset min + } + } + else if (next_step.x () > 0) { + if (looking_for_min_x && pos.x () > max_x) + max_x = pos.x (); + if (looking_for_max_x && pos.x () - min_x > threshold) { + if (looking_for_min_x) { + initial_x = min_x; //remember first min + first_was_max_x = TRUE; + } + total++; + looking_for_max_x = FALSE; + looking_for_min_x = TRUE; + max_x = pos.x (); + } + } + else if (next_step.y () < 0) { + if (looking_for_max_y && pos.y () < min_y) + min_y = pos.y (); + if (looking_for_min_y && max_y - pos.y () > threshold) { + if (looking_for_max_y) { + initial_y = max_y; //remember first max + first_was_max_y = FALSE; + } + total++; + looking_for_max_y = TRUE; + looking_for_min_y = FALSE; + min_y = pos.y (); //reset min + } + } + else { + if (looking_for_min_y && pos.y () > max_y) + max_y = pos.y (); + if (looking_for_max_y && pos.y () - min_y > threshold) { + if (looking_for_min_y) { + initial_y = min_y; //remember first min + first_was_max_y = TRUE; + } + total++; + looking_for_max_y = FALSE; + looking_for_min_y = TRUE; + max_y = pos.y (); + } + } + + } + if (first_was_max_x && looking_for_min_x) { + if (max_x - initial_x > threshold) + total++; + else + total--; + } + else if (!first_was_max_x && looking_for_max_x) { + if (initial_x - min_x > threshold) + total++; + else + total--; + } + if (first_was_max_y && looking_for_min_y) { + if (max_y - initial_y > threshold) + total++; + else + total--; + } + else if (!first_was_max_y && looking_for_max_y) { + if (initial_y - min_y > threshold) + total++; + else + total--; + } + + return total; +} + + +/********************************************************************** + * C_OUTLINE::operator< + * + * Return TRUE if the left operand is inside the right one. + **********************************************************************/ + +BOOL8 +C_OUTLINE::operator< ( //winding number +const C_OUTLINE & other //other outline +) const +{ + inT16 count = 0; //winding count + ICOORD pos; //position of point + inT32 stepindex; //index to cstep + + if (!box.overlap (other.box)) + return FALSE; //can't be contained + if (stepcount == 0) + return other.box.contains(this->box); + + pos = start; + for (stepindex = 0; stepindex < stepcount + && (count = other.winding_number (pos)) == INTERSECTING; stepindex++) + pos += step (stepindex); //try all points + if (count == INTERSECTING) { + //all intersected + pos = other.start; + for (stepindex = 0; stepindex < other.stepcount + && (count = winding_number (pos)) == INTERSECTING; stepindex++) + //try other way round + pos += other.step (stepindex); + return count == INTERSECTING || count == 0; + } + return count != 0; +} + + +/********************************************************************** + * C_OUTLINE::winding_number + * + * Return the winding number of the outline around the given point. + **********************************************************************/ + +inT16 C_OUTLINE::winding_number( //winding number + ICOORD point //point to wind around + ) const { + inT16 stepindex; //index to cstep + inT16 count; //winding count + ICOORD vec; //to current point + ICOORD stepvec; //step vector + inT32 cross; //cross product + + vec = start - point; //vector to it + count = 0; + for (stepindex = 0; stepindex < stepcount; stepindex++) { + stepvec = step (stepindex); //get the step + //crossing the line + if (vec.y () <= 0 && vec.y () + stepvec.y () > 0) { + cross = vec * stepvec; //cross product + if (cross > 0) + count++; //crossing right half + else if (cross == 0) + return INTERSECTING; //going through point + } + else if (vec.y () > 0 && vec.y () + stepvec.y () <= 0) { + cross = vec * stepvec; + if (cross < 0) + count--; //crossing back + else if (cross == 0) + return INTERSECTING; //illegal + } + vec += stepvec; //sum vectors + } + return count; //winding number +} + + +/********************************************************************** + * C_OUTLINE::turn_direction + * + * Return the sum direction delta of the outline. + **********************************************************************/ + +inT16 C_OUTLINE::turn_direction() const { //winding number + DIR128 prevdir; //previous direction + DIR128 dir; //current direction + inT16 stepindex; //index to cstep + inT8 dirdiff; //direction difference + inT16 count; //winding count + + if (stepcount == 0) + return 128; + count = 0; + prevdir = step_dir (stepcount - 1); + for (stepindex = 0; stepindex < stepcount; stepindex++) { + dir = step_dir (stepindex); + dirdiff = dir - prevdir; + ASSERT_HOST (dirdiff == 0 || dirdiff == 32 || dirdiff == -32); + count += dirdiff; + prevdir = dir; + } + ASSERT_HOST (count == 128 || count == -128); + return count; //winding number +} + + +/********************************************************************** + * C_OUTLINE::reverse + * + * Reverse the direction of an outline. + **********************************************************************/ + +void C_OUTLINE::reverse() { //reverse drection + DIR128 halfturn = MODULUS / 2; //amount to shift + DIR128 stepdir; //direction of step + inT16 stepindex; //index to cstep + inT16 farindex; //index to other side + inT16 halfsteps; //half of stepcount + + halfsteps = (stepcount + 1) / 2; + for (stepindex = 0; stepindex < halfsteps; stepindex++) { + farindex = stepcount - stepindex - 1; + stepdir = step_dir (stepindex); + set_step (stepindex, step_dir (farindex) + halfturn); + set_step (farindex, stepdir + halfturn); + } +} + + +/********************************************************************** + * C_OUTLINE::move + * + * Move C_OUTLINE by vector + **********************************************************************/ + +void C_OUTLINE::move( // reposition OUTLINE + const ICOORD vec // by vector + ) { + C_OUTLINE_IT it(&children); // iterator + + box.move (vec); + start += vec; + + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) + it.data ()->move (vec); // move child outlines +} + +// If this outline is smaller than the given min_size, delete this and +// remove from its list, via *it, after checking that *it points to this. +// Otherwise, if any children of this are too small, delete them. +// On entry, *it must be an iterator pointing to this. If this gets deleted +// then this is extracted from *it, so an iteration can continue. +void C_OUTLINE::RemoveSmallRecursive(int min_size, C_OUTLINE_IT* it) { + if (box.width() < min_size || box.height() < min_size) { + ASSERT_HOST(this == it->data()); + delete it->extract(); // Too small so get rid of it and any children. + } else if (!children.empty()) { + // Search the children of this, deleting any that are too small. + C_OUTLINE_IT child_it(&children); + for (child_it.mark_cycle_pt(); !child_it.cycled_list(); + child_it.forward()) { + C_OUTLINE* child = child_it.data(); + child->RemoveSmallRecursive(min_size, &child_it); + } + } +} + +/********************************************************************** + * C_OUTLINE::plot + * + * Draw the outline in the given colour. + **********************************************************************/ + +#ifndef GRAPHICS_DISABLED +void C_OUTLINE::plot( //draw it + ScrollView* window, //window to draw in + ScrollView::Color colour //colour to draw in + ) const { + inT16 stepindex; //index to cstep + ICOORD pos; //current position + DIR128 stepdir; //direction of step + DIR128 oldstepdir; //previous stepdir + + pos = start; //current position + window->Pen(colour); + if (stepcount == 0) { + window->Rectangle(box.left(), box.top(), box.right(), box.bottom()); + return; + } + window->SetCursor(pos.x(), pos.y()); + + stepindex = 0; + stepdir = step_dir (0); //get direction + while (stepindex < stepcount) { + do { + pos += step (stepindex); //step to next + stepindex++; //count steps + oldstepdir = stepdir; + //new direction + stepdir = step_dir (stepindex); + } + while (stepindex < stepcount + && oldstepdir.get_dir () == stepdir.get_dir ()); + //merge straight lines + window->DrawTo(pos.x(), pos.y()); + } +} +#endif + + +/********************************************************************** + * C_OUTLINE::operator= + * + * Assignment - deep copy data + **********************************************************************/ + + //assignment +C_OUTLINE & C_OUTLINE::operator= ( +const C_OUTLINE & source //from this +) { + box = source.box; + start = source.start; + if (steps != NULL) + free_mem(steps); + stepcount = source.stepcount; + steps = (uinT8 *) alloc_mem (step_mem()); + memmove (steps, source.steps, step_mem()); + if (!children.empty ()) + children.clear (); + children.deep_copy(&source.children, &deep_copy); + return *this; +} + +ICOORD C_OUTLINE::chain_step(int chaindir) { + return step_coords[chaindir % 4]; +} diff --git a/ccmain/coutln.h b/ccmain/coutln.h new file mode 100644 index 000000000..1174e6ae0 --- /dev/null +++ b/ccmain/coutln.h @@ -0,0 +1,197 @@ +/********************************************************************** + * File: coutln.c (Formerly: coutline.c) + * Description: Code for the C_OUTLINE class. + * Author: Ray Smith + * Created: Mon Oct 07 16:01:57 BST 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef COUTLN_H +#define COUTLN_H + +#include "crakedge.h" +#include "mod128.h" +#include "bits16.h" +#include "rect.h" +#include "blckerr.h" +#include "scrollview.h" + +#define INTERSECTING MAX_INT16//no winding number + + //mask to get step +#define STEP_MASK 3 + +enum C_OUTLINE_FLAGS +{ + COUT_INVERSE //White on black blob +}; + +class DLLSYM C_OUTLINE; //forward declaration + +ELISTIZEH_S (C_OUTLINE) +class DLLSYM C_OUTLINE:public ELIST_LINK +{ + public: + C_OUTLINE() { //empty constructor + steps = NULL; + } + C_OUTLINE( //constructor + CRACKEDGE *startpt, //from edge detector + ICOORD bot_left, //bounding box //length of loop + ICOORD top_right, + inT16 length); + C_OUTLINE(ICOORD startpt, //start of loop + DIR128 *new_steps, //steps in loop + inT16 length); //length of loop + //outline to copy + C_OUTLINE(C_OUTLINE *srcline, FCOORD rotation); //and rotate + + // Build a fake outline, given just a bounding box and append to the list. + static void FakeOutline(const TBOX& box, C_OUTLINE_LIST* outlines); + + ~C_OUTLINE () { //destructor + if (steps != NULL) + free_mem(steps); + steps = NULL; + } + + BOOL8 flag( //test flag + C_OUTLINE_FLAGS mask) const { //flag to test + return flags.bit (mask); + } + void set_flag( //set flag value + C_OUTLINE_FLAGS mask, //flag to test + BOOL8 value) { //value to set + flags.set_bit (mask, value); + } + + C_OUTLINE_LIST *child() { //get child list + return &children; + } + + //access function + const TBOX &bounding_box() const { + return box; + } + void set_step( //set a step + inT16 stepindex, //index of step + inT8 stepdir) { //chain code + int shift = stepindex%4 * 2; + uinT8 mask = 3 << shift; + steps[stepindex/4] = ((stepdir << shift) & mask) | + (steps[stepindex/4] & ~mask); + //squeeze 4 into byte + } + void set_step( //set a step + inT16 stepindex, //index of step + DIR128 stepdir) { //direction + //clean it + inT8 chaindir = stepdir.get_dir() >> (DIRBITS - 2); + //difference + set_step(stepindex, chaindir); + //squeeze 4 into byte + } + + //get start position + const ICOORD &start_pos() const { + return start; + } + inT32 pathlength() const { //get path length + return stepcount; + } + // Return step at a given index as a DIR128. + DIR128 step_dir(inT16 index) const { + return DIR128((inT16)(((steps[index/4] >> (index%4 * 2)) & STEP_MASK) << + (DIRBITS - 2))); + } + // Return the step vector for the given outline position. + ICOORD step(inT16 index) const { //index of step + return step_coords[(steps[index/4] >> (index%4 * 2)) & STEP_MASK]; + } + + inT32 area(); //return area + inT32 perimeter(); // Total perimeter of self and 1st level children. + inT32 outer_area(); //return area + inT32 count_transitions( //count maxima + inT32 threshold); //size threshold + + BOOL8 operator< ( //containment test + const C_OUTLINE & other) const; + BOOL8 operator> ( //containment test + C_OUTLINE & other) const + { + return other < *this; //use the < to do it + } + inT16 winding_number( //get winding number + ICOORD testpt) const; //around this point + //get direction + inT16 turn_direction() const; + void reverse(); //reverse direction + + void move( // reposition outline + const ICOORD vec); // by vector + + // If this outline is smaller than the given min_size, delete this and + // remove from its list, via *it, after checking that *it points to this. + // Otherwise, if any children of this are too small, delete them. + // On entry, *it must be an iterator pointing to this. If this gets deleted + // then this is extracted from *it, so an iteration can continue. + void RemoveSmallRecursive(int min_size, C_OUTLINE_IT* it); + + void plot( //draw one + ScrollView* window, //window to draw in + ScrollView::Color colour) const; //colour to draw it + + void prep_serialise() { //set ptrs to counts + children.prep_serialise (); + } + + void dump( //write external bits + FILE *f) { + //stepcount = # bytes + serialise_bytes (f, (void *) steps, step_mem()); + children.dump (f); + } + + void de_dump( //read external bits + FILE *f) { + steps = (uinT8 *) de_serialise_bytes (f, step_mem()); + children.de_dump (f); + } + + //assignment + make_serialise (C_OUTLINE) + + C_OUTLINE& operator=(const C_OUTLINE& source); + + static C_OUTLINE* deep_copy(const C_OUTLINE* src) { + C_OUTLINE* outline = new C_OUTLINE; + *outline = *src; + return outline; + } + + static ICOORD chain_step(int chaindir); + + private: + int step_mem() const { return (stepcount+3) / 4; } + + TBOX box; //boudning box + ICOORD start; //start coord + uinT8 *steps; //step array + inT16 stepcount; //no of steps + BITS16 flags; //flags about outline + C_OUTLINE_LIST children; //child elements + static ICOORD step_coords[4]; +}; +#endif diff --git a/ccmain/crakedge.h b/ccmain/crakedge.h new file mode 100644 index 000000000..e6d58a9f0 --- /dev/null +++ b/ccmain/crakedge.h @@ -0,0 +1,39 @@ +/********************************************************************** + * File: crakedge.h (Formerly: crkedge.h) + * Description: Sturctures for the Crack following edge detector. + * Author: Ray Smith + * Created: Fri Mar 22 16:06:38 GMT 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef CRAKEDGE_H +#define CRAKEDGE_H + +#include "points.h" +#include "mod128.h" + +class CRACKEDGE +{ + public: + ICOORD pos; /*position of crack */ + inT8 stepx; //edge step + inT8 stepy; + inT8 stepdir; //chaincode + CRACKEDGE *prev; /*previous point */ + CRACKEDGE *next; /*next point */ + + NEWDELETE2 (CRACKEDGE) CRACKEDGE () { + } //empty constructor +}; +#endif diff --git a/ccmain/detlinefit.cpp b/ccmain/detlinefit.cpp new file mode 100644 index 000000000..2fed66220 --- /dev/null +++ b/ccmain/detlinefit.cpp @@ -0,0 +1,144 @@ +/////////////////////////////////////////////////////////////////////// +// File: detlinefit.cpp +// Description: Deterministic least median squares line fitting. +// Author: Ray Smith +// Created: Thu Feb 28 14:45:01 PDT 2008 +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "detlinefit.h" +#include "statistc.h" +#include "ndminx.h" + +namespace tesseract { + +// The number of points to consider at each end. +const int kNumEndPoints = 3; + +DetLineFit::DetLineFit() { +} + +DetLineFit::~DetLineFit() { +} + +// Delete all Added points. +void DetLineFit::Clear() { + pt_list_.clear(); +} + +// Add a new point. Takes a copy - the pt doesn't need to stay in scope. +void DetLineFit::Add(const ICOORD& pt) { + ICOORDELT_IT it = &pt_list_; + ICOORDELT* new_pt = new ICOORDELT(pt); + it.add_to_end(new_pt); +} + +// Fit a line to the points, returning the fitted line as a pair of +// points, and the upper quartile error. +double DetLineFit::Fit(ICOORD* pt1, ICOORD* pt2) { + ICOORDELT_IT it(&pt_list_); + // Do something sensible with no points. + if (pt_list_.empty()) { + pt1->set_x(0); + pt1->set_y(0); + *pt2 = *pt1; + return 0.0; + } + // Count the points and find the first and last kNumEndPoints. + ICOORD* starts[kNumEndPoints]; + ICOORD* ends[kNumEndPoints]; + int pt_count = 0; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + if (pt_count < kNumEndPoints) { + starts[pt_count] = it.data(); + ends[pt_count] = starts[pt_count]; + } else { + for (int i = 1; i < kNumEndPoints; ++i) + ends[i - 1] = ends[i]; + ends[kNumEndPoints - 1] = it.data(); + } + ++pt_count; + } + // 1 or 2 points need special treatment. + if (pt_count <= 2) { + *pt1 = *starts[0]; + if (pt_count > 1) + *pt2 = *starts[1]; + else + *pt2 = *pt1; + return 0.0; + } + int end_count = MIN(pt_count, kNumEndPoints); + int* distances = new int[pt_count]; + double best_uq = -1.0; + // Iterate each pair of points and find the best fitting line. + for (int i = 0; i < end_count; ++i) { + ICOORD* start = starts[i]; + for (int j = 0; j < end_count; ++j) { + ICOORD* end = ends[j]; + if (start != end) { + // Compute the upper quartile error from the line. + double dist = ComputeErrors(*start, *end, distances); + if (dist < best_uq || best_uq < 0.0) { + best_uq = dist; + *pt1 = *start; + *pt2 = *end; + } + } + } + } + delete [] distances; + // Finally compute the square root to return the true distance. + return best_uq > 0.0 ? sqrt(best_uq) : best_uq; +} + +// Comparator function used by the nth_item funtion. +static int CompareInts(const void *p1, const void *p2) { + const int* i1 = reinterpret_cast(p1); + const int* i2 = reinterpret_cast(p2); + + return *i1 - *i2; +} + +// Compute all the cross product distances of the points from the line +// and return the true squared upper quartile distance. +double DetLineFit::ComputeErrors(const ICOORD start, const ICOORD end, + int* distances) { + ICOORDELT_IT it(&pt_list_); + ICOORD line_vector = end; + line_vector -= start; + // Compute the distance of each point from the line. + int pt_index = 0; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ICOORD pt_vector = *it.data(); + pt_vector -= start; + // Compute |line_vector||pt_vector|sin(angle between) + int dist = line_vector * pt_vector; + if (dist < 0) + dist = -dist; + distances[pt_index++] = dist; + } + // Now get the upper quartile distance. + int index = choose_nth_item(3 * pt_index / 4, distances, pt_index, + sizeof(distances[0]), CompareInts); + double dist = distances[index]; + // The true distance is the square root of the dist squared / the + // squared length of line_vector (which is the dot product with itself) + // Don't bother with the square root. Just return the square distance. + return dist * dist / (line_vector % line_vector); +} + +} // namespace tesseract. + + diff --git a/ccmain/detlinefit.h b/ccmain/detlinefit.h new file mode 100644 index 000000000..6a2279b4c --- /dev/null +++ b/ccmain/detlinefit.h @@ -0,0 +1,80 @@ +/////////////////////////////////////////////////////////////////////// +// File: detlinefit.h +// Description: Deterministic least upper-quartile squares line fitting. +// Author: Ray Smith +// Created: Thu Feb 28 14:35:01 PDT 2008 +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_CCSTRUCT_DETLINEFIT_H_ +#define TESSERACT_CCSTRUCT_DETLINEFIT_H_ + +#include "points.h" + +namespace tesseract { + +// This class fits a line to a set of ICOORD points. +// There is no restriction on the direction of the line, as it +// uses a vector method, ie no concern over infinite gradients. +// The fitted line has the least upper quartile of squares of perpendicular +// distances of all source points from the line, subject to the constraint +// that the line is made from one of the pairs of [{p1,p2,p3},{pn-2, pn-1, pn}] +// i.e. the 9 combinations of one of the first 3 and last 3 points. +// A fundamental assumption of this algorithm is that one of the first 3 and +// one of the last 3 points are near the best line fit. +// The points must be Added in line order for the algorithm to work properly. +// No floating point calculations are needed* to make an accurate fit, +// and no random numbers are needed** so the algorithm is deterministic, +// architecture-stable, and compiler-stable as well as stable to minor +// changes in the input. +// *A single floating point division is used to compute each line's distance. +// This is unlikely to result in choice of a different line, but if it does, +// it would be easy to replace with a 64 bit integer calculation. +// **Random numbers are used in the nth_item function, but the worst +// non-determinism that can result is picking a different result among equals, +// and that wouldn't make any difference to the end-result distance, so the +// randomness does not affect the determinism of the algorithm. The random +// numbers are only there to guarantee average linear time. +// Fitting time is linear, but with a high constant, as it tries 9 different +// lines and computes the distance of all points each time. +// This class is aimed at replacing the LLSQ (linear least squares) and +// LMS (least median of squares) classes that are currently used for most +// of the line fitting in Tesseract. +class DetLineFit { + public: + DetLineFit(); + ~DetLineFit(); + + // Delete all Added points. + void Clear(); + + // Add a new point. Takes a copy - the pt doesn't need to stay in scope. + // Add must be called on points in sequence along the line. + void Add(const ICOORD& pt); + + // Fit a line to the points, returning the fitted line as a pair of + // points, and the upper quartile error. + double Fit(ICOORD* pt1, ICOORD* pt2); + + private: + double ComputeErrors(const ICOORD start, const ICOORD end, int* distances); + + ICOORDELT_LIST pt_list_; // All the added points. +}; + +} // namespace tesseract. + +#endif // TESSERACT_CCSTRUCT_DETLINEFIT_H_ + + diff --git a/ccmain/genblob.cpp b/ccmain/genblob.cpp new file mode 100644 index 000000000..8d635e970 --- /dev/null +++ b/ccmain/genblob.cpp @@ -0,0 +1,133 @@ +/********************************************************************** + * File: genblob.cpp (Formerly gblob.c) + * Description: Generic Blob processing routines + * Author: Phil Cheatle + * Created: Mon Nov 25 10:53:26 GMT 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" +#include "stepblob.h" +#include "polyblob.h" +#include "genblob.h" + +/********************************************************************** + * blob_comparator() + * + * Blob comparator used to sort a blob list so that blobs are in increasing + * order of left edge. + **********************************************************************/ + +int blob_comparator( //sort blobs + const void *blob1p, //ptr to ptr to blob1 + const void *blob2p //ptr to ptr to blob2 + ) { + PBLOB *blob1 = *(PBLOB **) blob1p; + PBLOB *blob2 = *(PBLOB **) blob2p; + + return blob1->bounding_box ().left () - blob2->bounding_box ().left (); +} + + +/********************************************************************** + * c_blob_comparator() + * + * Blob comparator used to sort a blob list so that blobs are in increasing + * order of left edge. + **********************************************************************/ + +int c_blob_comparator( //sort blobs + const void *blob1p, //ptr to ptr to blob1 + const void *blob2p //ptr to ptr to blob2 + ) { + C_BLOB *blob1 = *(C_BLOB **) blob1p; + C_BLOB *blob2 = *(C_BLOB **) blob2p; + + return blob1->bounding_box ().left () - blob2->bounding_box ().left (); +} + + +/********************************************************************** + * gblob_bounding_box() + * + * Return the bounding box of a generic blob. + **********************************************************************/ + +TBOX gblob_bounding_box( //Get bounding box + PBLOB *blob, //generic blob + BOOL8 polygonal //is blob polygonal? + ) { + if (polygonal) + return blob->bounding_box (); + else + return ((C_BLOB *) blob)->bounding_box (); +} + + +/********************************************************************** + * gblob_sort_list() + * + * Sort a generic blob list into order of bounding box left edge + **********************************************************************/ + +void gblob_sort_list( //Sort a gblob list + PBLOB_LIST *blob_list, //generic blob list + BOOL8 polygonal //is list polygonal? + ) { + PBLOB_IT b_it; + C_BLOB_IT c_it; + + if (polygonal) { + b_it.set_to_list (blob_list); + b_it.sort (blob_comparator); + } + else { + c_it.set_to_list ((C_BLOB_LIST *) blob_list); + c_it.sort (c_blob_comparator); + } +} + + +/********************************************************************** + * gblob_out_list() + * + * Return the generic outline list of a generic blob. + **********************************************************************/ + +OUTLINE_LIST *gblob_out_list( //Get outline list + PBLOB *blob, //generic blob + BOOL8 polygonal //is blob polygonal? + ) { + if (polygonal) + return blob->out_list (); + else + return (OUTLINE_LIST *) ((C_BLOB *) blob)->out_list (); +} + + +/********************************************************************** + * goutline_bounding_box() + * + * Return the bounding box of a generic outline. + **********************************************************************/ + +TBOX goutline_bounding_box( //Get bounding box + OUTLINE *outline, //generic outline + BOOL8 polygonal //is outline polygonal? + ) { + if (polygonal) + return outline->bounding_box (); + else + return ((C_OUTLINE *) outline)->bounding_box (); +} diff --git a/ccmain/genblob.h b/ccmain/genblob.h new file mode 100644 index 000000000..445f290e4 --- /dev/null +++ b/ccmain/genblob.h @@ -0,0 +1,52 @@ +/********************************************************************** + * File: genblob.h (Formerly gblob.h) + * Description: Generic Blob processing routines + * Author: Phil Cheatle + * Created: Mon Nov 25 10:53:26 GMT 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef GENBLOB_H +#define GENBLOB_H + +#include "polyblob.h" +#include "hosthplb.h" +#include "rect.h" +#include "notdll.h" + +int blob_comparator( //sort blobs + const void *blob1p, //ptr to ptr to blob1 + const void *blob2p //ptr to ptr to blob2 + ); +int c_blob_comparator( //sort blobs + const void *blob1p, //ptr to ptr to blob1 + const void *blob2p //ptr to ptr to blob2 + ); +TBOX gblob_bounding_box( //Get bounding box + PBLOB *blob, //generic blob + BOOL8 polygonal //is blob polygonal? + ); +void gblob_sort_list( //Sort a gblob list + PBLOB_LIST *blob_list, //generic blob list + BOOL8 polygonal //is list polygonal? + ); +OUTLINE_LIST *gblob_out_list( //Get outline list + PBLOB *blob, //generic blob + BOOL8 polygonal //is blob polygonal? + ); +TBOX goutline_bounding_box( //Get bounding box + OUTLINE *outline, //generic outline + BOOL8 polygonal //is outline polygonal? + ); +#endif diff --git a/ccmain/hpddef.h b/ccmain/hpddef.h new file mode 100644 index 000000000..bb90c4283 --- /dev/null +++ b/ccmain/hpddef.h @@ -0,0 +1,39 @@ +/********************************************************************** + * File: hpddef.h + * Description: Defines for dll symbols for handpd.dll. + * Author: Ray Smith + * Created: Tue Apr 30 17:15:01 MDT 1996 + * + * (C) Copyright 1996, Hewlett-Packard Co. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +//This file does NOT use the usual single inclusion code as it +//is necessary to allow it to be executed every time it is included. +//#ifndef HPDDEF_H +//#define HPDDEF_H + +#undef DLLSYM +#ifndef __IPEDLL +# define DLLSYM +#else +# ifdef __BUILDING_HANDPD__ +# define DLLSYM DLLEXPORT +# else +# define DLLSYM DLLIMPORT +# endif +#endif +#if defined(__CFM68K__) && !defined(__USING_STATIC_LIBS__) +# pragma import on +#endif + +//#endif diff --git a/ccmain/hpdsizes.h b/ccmain/hpdsizes.h new file mode 100644 index 000000000..2670e21b0 --- /dev/null +++ b/ccmain/hpdsizes.h @@ -0,0 +1,8 @@ +#ifndef HPDSIZES_H +#define HPDSIZES_H + +#define NUM_TEXT_ATTR 10 +#define NUM_BLOCK_ATTR 7 +#define MAXLENGTH 128 +#define NUM_BACKGROUNDS 8 +#endif diff --git a/ccmain/ipoints.h b/ccmain/ipoints.h new file mode 100644 index 000000000..d32359aa5 --- /dev/null +++ b/ccmain/ipoints.h @@ -0,0 +1,479 @@ +/********************************************************************** + * File: ipoints.h (Formerly icoords.h) + * Description: Inline functions for coords.h. + * Author: Ray Smith + * Created: Fri Jun 21 15:14:21 BST 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef IPOINTS_H +#define IPOINTS_H + +#include + +/********************************************************************** + * operator! + * + * Rotate an ICOORD 90 degrees anticlockwise. + **********************************************************************/ + +inline ICOORD +operator! ( //rotate 90 deg anti +const ICOORD & src //thing to rotate +) { + ICOORD result; //output + + result.xcoord = -src.ycoord; + result.ycoord = src.xcoord; + return result; +} + + +/********************************************************************** + * operator- + * + * Unary minus of an ICOORD. + **********************************************************************/ + +inline ICOORD +operator- ( //unary minus +const ICOORD & src //thing to minus +) { + ICOORD result; //output + + result.xcoord = -src.xcoord; + result.ycoord = -src.ycoord; + return result; +} + + +/********************************************************************** + * operator+ + * + * Add 2 ICOORDS. + **********************************************************************/ + +inline ICOORD +operator+ ( //sum vectors +const ICOORD & op1, //operands +const ICOORD & op2) { + ICOORD sum; //result + + sum.xcoord = op1.xcoord + op2.xcoord; + sum.ycoord = op1.ycoord + op2.ycoord; + return sum; +} + + +/********************************************************************** + * operator+= + * + * Add 2 ICOORDS. + **********************************************************************/ + +inline ICOORD & +operator+= ( //sum vectors +ICOORD & op1, //operands +const ICOORD & op2) { + op1.xcoord += op2.xcoord; + op1.ycoord += op2.ycoord; + return op1; +} + + +/********************************************************************** + * operator- + * + * Subtract 2 ICOORDS. + **********************************************************************/ + +inline ICOORD +operator- ( //subtract vectors +const ICOORD & op1, //operands +const ICOORD & op2) { + ICOORD sum; //result + + sum.xcoord = op1.xcoord - op2.xcoord; + sum.ycoord = op1.ycoord - op2.ycoord; + return sum; +} + + +/********************************************************************** + * operator-= + * + * Subtract 2 ICOORDS. + **********************************************************************/ + +inline ICOORD & +operator-= ( //sum vectors +ICOORD & op1, //operands +const ICOORD & op2) { + op1.xcoord -= op2.xcoord; + op1.ycoord -= op2.ycoord; + return op1; +} + + +/********************************************************************** + * operator% + * + * Scalar product of 2 ICOORDS. + **********************************************************************/ + +inline inT32 +operator% ( //scalar product +const ICOORD & op1, //operands +const ICOORD & op2) { + return op1.xcoord * op2.xcoord + op1.ycoord * op2.ycoord; +} + + +/********************************************************************** + * operator* + * + * Cross product of 2 ICOORDS. + **********************************************************************/ + +inline inT32 operator *( //cross product + const ICOORD &op1, //operands + const ICOORD &op2) { + return op1.xcoord * op2.ycoord - op1.ycoord * op2.xcoord; +} + + +/********************************************************************** + * operator* + * + * Scalar multiply of an ICOORD. + **********************************************************************/ + +inline ICOORD operator *( //scalar multiply + const ICOORD &op1, //operands + inT16 scale) { + ICOORD result; //output + + result.xcoord = op1.xcoord * scale; + result.ycoord = op1.ycoord * scale; + return result; +} + + +inline ICOORD operator *( //scalar multiply + inT16 scale, + const ICOORD &op1 //operands + ) { + ICOORD result; //output + + result.xcoord = op1.xcoord * scale; + result.ycoord = op1.ycoord * scale; + return result; +} + + +/********************************************************************** + * operator*= + * + * Scalar multiply of an ICOORD. + **********************************************************************/ + +inline ICOORD & +operator*= ( //scalar multiply +ICOORD & op1, //operands +inT16 scale) { + op1.xcoord *= scale; + op1.ycoord *= scale; + return op1; +} + + +/********************************************************************** + * operator/ + * + * Scalar divide of an ICOORD. + **********************************************************************/ + +inline ICOORD +operator/ ( //scalar divide +const ICOORD & op1, //operands +inT16 scale) { + ICOORD result; //output + + result.xcoord = op1.xcoord / scale; + result.ycoord = op1.ycoord / scale; + return result; +} + + +/********************************************************************** + * operator/= + * + * Scalar divide of an ICOORD. + **********************************************************************/ + +inline ICOORD & +operator/= ( //scalar divide +ICOORD & op1, //operands +inT16 scale) { + op1.xcoord /= scale; + op1.ycoord /= scale; + return op1; +} + + +/********************************************************************** + * ICOORD::rotate + * + * Rotate an ICOORD by the given (normalized) (cos,sin) vector. + **********************************************************************/ + +inline void ICOORD::rotate( //rotate by vector + const FCOORD& vec) { + inT16 tmp; + + tmp = (inT16) floor (xcoord * vec.x () - ycoord * vec.y () + 0.5); + ycoord = (inT16) floor (ycoord * vec.x () + xcoord * vec.y () + 0.5); + xcoord = tmp; +} + + +/********************************************************************** + * operator! + * + * Rotate an FCOORD 90 degrees anticlockwise. + **********************************************************************/ + +inline FCOORD +operator! ( //rotate 90 deg anti +const FCOORD & src //thing to rotate +) { + FCOORD result; //output + + result.xcoord = -src.ycoord; + result.ycoord = src.xcoord; + return result; +} + + +/********************************************************************** + * operator- + * + * Unary minus of an FCOORD. + **********************************************************************/ + +inline FCOORD +operator- ( //unary minus +const FCOORD & src //thing to minus +) { + FCOORD result; //output + + result.xcoord = -src.xcoord; + result.ycoord = -src.ycoord; + return result; +} + + +/********************************************************************** + * operator+ + * + * Add 2 FCOORDS. + **********************************************************************/ + +inline FCOORD +operator+ ( //sum vectors +const FCOORD & op1, //operands +const FCOORD & op2) { + FCOORD sum; //result + + sum.xcoord = op1.xcoord + op2.xcoord; + sum.ycoord = op1.ycoord + op2.ycoord; + return sum; +} + + +/********************************************************************** + * operator+= + * + * Add 2 FCOORDS. + **********************************************************************/ + +inline FCOORD & +operator+= ( //sum vectors +FCOORD & op1, //operands +const FCOORD & op2) { + op1.xcoord += op2.xcoord; + op1.ycoord += op2.ycoord; + return op1; +} + + +/********************************************************************** + * operator- + * + * Subtract 2 FCOORDS. + **********************************************************************/ + +inline FCOORD +operator- ( //subtract vectors +const FCOORD & op1, //operands +const FCOORD & op2) { + FCOORD sum; //result + + sum.xcoord = op1.xcoord - op2.xcoord; + sum.ycoord = op1.ycoord - op2.ycoord; + return sum; +} + + +/********************************************************************** + * operator-= + * + * Subtract 2 FCOORDS. + **********************************************************************/ + +inline FCOORD & +operator-= ( //sum vectors +FCOORD & op1, //operands +const FCOORD & op2) { + op1.xcoord -= op2.xcoord; + op1.ycoord -= op2.ycoord; + return op1; +} + + +/********************************************************************** + * operator% + * + * Scalar product of 2 FCOORDS. + **********************************************************************/ + +inline float +operator% ( //scalar product +const FCOORD & op1, //operands +const FCOORD & op2) { + return op1.xcoord * op2.xcoord + op1.ycoord * op2.ycoord; +} + + +/********************************************************************** + * operator* + * + * Cross product of 2 FCOORDS. + **********************************************************************/ + +inline float operator *( //cross product + const FCOORD &op1, //operands + const FCOORD &op2) { + return op1.xcoord * op2.ycoord - op1.ycoord * op2.xcoord; +} + + +/********************************************************************** + * operator* + * + * Scalar multiply of an FCOORD. + **********************************************************************/ + +inline FCOORD operator *( //scalar multiply + const FCOORD &op1, //operands + float scale) { + FCOORD result; //output + + result.xcoord = op1.xcoord * scale; + result.ycoord = op1.ycoord * scale; + return result; +} + + +inline FCOORD operator *( //scalar multiply + float scale, + const FCOORD &op1 //operands + ) { + FCOORD result; //output + + result.xcoord = op1.xcoord * scale; + result.ycoord = op1.ycoord * scale; + return result; +} + + +/********************************************************************** + * operator*= + * + * Scalar multiply of an FCOORD. + **********************************************************************/ + +inline FCOORD & +operator*= ( //scalar multiply +FCOORD & op1, //operands +float scale) { + op1.xcoord *= scale; + op1.ycoord *= scale; + return op1; +} + + +/********************************************************************** + * operator/ + * + * Scalar divide of an FCOORD. + **********************************************************************/ + +inline FCOORD +operator/ ( //scalar divide +const FCOORD & op1, //operands +float scale) { + FCOORD result; //output + + if (scale != 0) { + result.xcoord = op1.xcoord / scale; + result.ycoord = op1.ycoord / scale; + } + return result; +} + + +/********************************************************************** + * operator/= + * + * Scalar divide of an FCOORD. + **********************************************************************/ + +inline FCOORD & +operator/= ( //scalar divide +FCOORD & op1, //operands +float scale) { + if (scale != 0) { + op1.xcoord /= scale; + op1.ycoord /= scale; + } + return op1; +} + + +/********************************************************************** + * rotate + * + * Rotate an FCOORD by the given (normalized) (cos,sin) vector. + **********************************************************************/ + +inline void FCOORD::rotate( //rotate by vector + const FCOORD vec) { + float tmp; + + tmp = xcoord * vec.x () - ycoord * vec.y (); + ycoord = ycoord * vec.x () + xcoord * vec.y (); + xcoord = tmp; +} +#endif diff --git a/ccmain/labls.cpp b/ccmain/labls.cpp new file mode 100644 index 000000000..31d31c62b --- /dev/null +++ b/ccmain/labls.cpp @@ -0,0 +1,188 @@ +/********************************************************************** + * File: labls.c (Formerly labels.c) + * Description: Attribute definition tables + * Author: Sheelagh Lloyd? + * Created: + * + * (C) Copyright 1993, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" +#include "hpdsizes.h" +#include "labls.h" + +/****************************************************************************** + * TEXT REGIONS + *****************************************************************************/ +DLLSYM inT32 tn[NUM_TEXT_ATTR] = { + 3, //T_HORIZONTAL + 4, //T_TEXT + 2, //T_SERIF + 2, //T_PROPORTIONAL + 2, //T_NORMAL + 2, //T_UPRIGHT + 2, //T_SOLID + 3, //T_BLACK + 2, //T_NOTUNDER + 2, //T_NOTDROP +}; + +DLLSYM char tlabel[NUM_TEXT_ATTR][4][MAXLENGTH] = { { + //T_HORIZONTAL + "Horizontal", + "Vertical", + "Skew", + "" + }, + { //T_TEXT + "Text", + "Table", + "Form", + "Mixed" + }, + { //T_SERIF + "Serif", + "Sans-serif", + "", + "" + }, + { //T_PROPORTIONAL + "Proportional", + "Fixed pitch", + "", + "" + }, + { //T_NORMAL + "Normal", + "Bold", + "", + "" + }, + { //T_UPRIGHT + "Upright", + "Italic", + "", + "" + }, + { //T_SOLID + "Solid", + "Outline", + "", + "" + }, + { //T_BLACK + "Black", + "White", + "Coloured", + "" + }, + { //T_NOTUNDER + "Not underlined", + "Underlined", + "", + "" + }, + { //T_NOTDROP + "Not drop caps", + "Drop Caps", + "", + "" + } +}; + +DLLSYM inT32 bn[NUM_BLOCK_ATTR] = { + 4, //G_MONOCHROME + 2, //I_MONOCHROME + 2, //I_SMOOTH + 3, //R_SINGLE + 3, //R_BLACK + 3, //S_BLACK + 2 //W_TEXT +}; + +DLLSYM inT32 tvar[NUM_TEXT_ATTR]; +DLLSYM inT32 bvar[NUM_BLOCK_ATTR]; +DLLSYM char blabel[NUM_BLOCK_ATTR][4][MAXLENGTH] = { { + //G_MONOCHROME + + /**************************************************************************** + * GRAPHICS + ***************************************************************************/ + "Monochrome ", + "Two colour ", + "Spot colour", + "Multicolour" + }, + + /**************************************************************************** + * IMAGE + ***************************************************************************/ + { //I_MONOCHROME + "Monochrome ", + "Colour ", + "", + "" + }, + { //I_SMOOTH + "Smooth ", + "Grainy ", + "", + "" + }, + + /**************************************************************************** + * RULES + ***************************************************************************/ + { //R_SINGLE + "Single ", + "Double ", + "Multiple", + "" + }, + { //R_BLACK + "Black ", + "White ", + "Coloured", + "" + }, + + /**************************************************************************** + * SCRIBBLE + ***************************************************************************/ + { //S_BLACK + "Black ", + "White ", + "Coloured", + "" + }, + /**************************************************************************** + * WEIRD + ***************************************************************************/ + { //W_TEXT + "No text ", + "Contains text", + "", + "" + } +}; + +DLLSYM char backlabel[NUM_BACKGROUNDS][MAXLENGTH] = { + "White", //B_WHITE + "Black", //B_BLACK + "Coloured", //B_COLOURED + "Textured", //B_TEXTURED + "Patterned", //B_PATTERNED + "Gradient fill", //B_GRADIENTFILL + "Image", //B_IMAGE + "Text" //B_TEXT +}; diff --git a/ccmain/labls.h b/ccmain/labls.h new file mode 100644 index 000000000..ece7190a4 --- /dev/null +++ b/ccmain/labls.h @@ -0,0 +1,38 @@ +/********************************************************************** + * File: labls.h (Formerly labels.h) + * Description: Attribute definition tables + * Author: Sheelagh Lloyd? + * Created: + * + * (C) Copyright 1993, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ +#ifndef LABLS_H +#define LABLS_H + +#include "host.h" +#include "hpdsizes.h" + +#include "hpddef.h" //must be last (handpd.dll) + +extern DLLSYM inT32 tn[NUM_TEXT_ATTR]; + +extern DLLSYM char tlabel[NUM_TEXT_ATTR][4][MAXLENGTH]; + +extern DLLSYM inT32 bn[NUM_BLOCK_ATTR]; + +extern DLLSYM inT32 tvar[NUM_TEXT_ATTR]; +extern DLLSYM inT32 bvar[NUM_BLOCK_ATTR]; +extern DLLSYM char blabel[NUM_BLOCK_ATTR][4][MAXLENGTH]; + +extern DLLSYM char backlabel[NUM_BACKGROUNDS][MAXLENGTH]; +#endif diff --git a/ccmain/linlsq.cpp b/ccmain/linlsq.cpp new file mode 100644 index 000000000..1f7bbd0c3 --- /dev/null +++ b/ccmain/linlsq.cpp @@ -0,0 +1,249 @@ +/********************************************************************** + * File: linlsq.cpp (Formerly llsq.c) + * Description: Linear Least squares fitting code. + * Author: Ray Smith + * Created: Thu Sep 12 08:44:51 BST 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" +#include +#include +#include "errcode.h" +#include "linlsq.h" + +#ifndef __UNIX__ +#define M_PI 3.14159265359 +#endif + +const ERRCODE EMPTY_LLSQ = "Can't delete from an empty LLSQ"; + +#define EXTERN + +EXTERN double_VAR (pdlsq_posdir_ratio, 4e-6, "Mult of dir to cf pos"); +EXTERN double_VAR (pdlsq_threshold_angleavg, 0.1666666, +"Frac of pi for simple fit"); + +/********************************************************************** + * LLSQ::clear + * + * Function to initialize a LLSQ. + **********************************************************************/ + +void LLSQ::clear() { //initialize + n = 0; //no elements + sigx = 0; //update accumulators + sigy = 0; + sigxx = 0; + sigxy = 0; + sigyy = 0; +} + + +/********************************************************************** + * LLSQ::add + * + * Add an element to the accumulator. + **********************************************************************/ + +void LLSQ::add( //add an element + double x, //xcoord + double y //ycoord + ) { + n++; //count elements + sigx += x; //update accumulators + sigy += y; + sigxx += x * x; + sigxy += x * y; + sigyy += y * y; +} + + +/********************************************************************** + * LLSQ::remove + * + * Delete an element from the acculuator. + **********************************************************************/ + +void LLSQ::remove( //delete an element + double x, //xcoord + double y //ycoord + ) { + if (n <= 0) + //illegal + EMPTY_LLSQ.error ("LLSQ::remove", ABORT, NULL); + n--; //count elements + sigx -= x; //update accumulators + sigy -= y; + sigxx -= x * x; + sigxy -= x * y; + sigyy -= y * y; +} + + +/********************************************************************** + * LLSQ::m + * + * Return the gradient of the line fit. + **********************************************************************/ + +double LLSQ::m() { //get gradient + if (n > 1) + return (sigxy - sigx * sigy / n) / (sigxx - sigx * sigx / n); + else + return 0; //too little +} + + +/********************************************************************** + * LLSQ::c + * + * Return the constant of the line fit. + **********************************************************************/ + +double LLSQ::c( //get constant + double m //gradient to fit with + ) { + if (n > 0) + return (sigy - m * sigx) / n; + else + return 0; //too little +} + + +/********************************************************************** + * LLSQ::rms + * + * Return the rms error of the fit. + **********************************************************************/ + +double LLSQ::rms( //get error + double m, //gradient to fit with + double c //constant to fit with + ) { + double error; //total error + + if (n > 0) { + error = + sigyy + m * (m * sigxx + 2 * (c * sigx - sigxy)) + c * (n * c - + 2 * sigy); + if (error >= 0) + error = sqrt (error / n); //sqrt of mean + else + error = 0; + } + else + error = 0; //too little + return error; +} + + +/********************************************************************** + * LLSQ::spearman + * + * Return the spearman correlation coefficient. + **********************************************************************/ + +double LLSQ::spearman() { //get error + double error; //total error + + if (n > 1) { + error = (sigxx - sigx * sigx / n) * (sigyy - sigy * sigy / n); + if (error > 0) { + error = (sigxy - sigx * sigy / n) / sqrt (error); + } + else + error = 1; + } + else + error = 1; //too little + return error; +} + + +/********************************************************************** + * PDLSQ::fit + * + * Return all the parameters of the fit to pos/dir. + * The return value is the rms error. + **********************************************************************/ + +float PDLSQ::fit( //get fit + DIR128 &ang, //output angle + float &sin_ang, //r,theta parameterisation + float &cos_ang, + float &r) { + double a, b; //itermediates + double angle; //resulting angle + double avg_angle; //simple average + double error; //total error + double sinx, cosx; //return values + + if (pos.n > 0) { + a = pos.sigxy - pos.sigx * pos.sigy / pos.n + + pdlsq_posdir_ratio * dir.sigxy; + b = + pos.sigxx - pos.sigyy + (pos.sigy * pos.sigy - + pos.sigx * pos.sigx) / pos.n + + pdlsq_posdir_ratio * (dir.sigxx - dir.sigyy); + if (dir.sigy != 0 || dir.sigx != 0) + avg_angle = atan2 (dir.sigy, dir.sigx); + else + avg_angle = 0; + if ((a != 0 || b != 0) && pos.n > 1) + angle = atan2 (2 * a, b) / 2; + else + angle = avg_angle; + error = avg_angle - angle; + if (error > M_PI / 2) { + error -= M_PI; + angle += M_PI; + } + if (error < -M_PI / 2) { + error += M_PI; + angle -= M_PI; + } + if (error > M_PI * pdlsq_threshold_angleavg + || error < -M_PI * pdlsq_threshold_angleavg) + angle = avg_angle; //go simple + //convert direction + ang = (inT16) (angle * MODULUS / (2 * M_PI)); + sinx = sin (angle); + cosx = cos (angle); + r = (sinx * pos.sigx - cosx * pos.sigy) / pos.n; + // tprintf("x=%g, y=%g, xx=%g, xy=%g, yy=%g, a=%g, b=%g, ang=%g, r=%g\n", + // pos.sigx,pos.sigy,pos.sigxx,pos.sigxy,pos.sigyy, + // a,b,angle,r); + error = dir.sigxx * sinx * sinx + dir.sigyy * cosx * cosx + - 2 * dir.sigxy * sinx * cosx; + error *= pdlsq_posdir_ratio; + error += sinx * sinx * pos.sigxx + cosx * cosx * pos.sigyy + - 2 * sinx * cosx * pos.sigxy + - 2 * r * (sinx * pos.sigx - cosx * pos.sigy) + r * r * pos.n; + if (error >= 0) + //rms value + error = sqrt (error / pos.n); + else + error = 0; //-0 + sin_ang = sinx; + cos_ang = cosx; + } + else { + sin_ang = 0.0f; + cos_ang = 0.0f; + ang = 0; + error = 0; //too little + } + return error; +} diff --git a/ccmain/linlsq.h b/ccmain/linlsq.h new file mode 100644 index 000000000..13a5db693 --- /dev/null +++ b/ccmain/linlsq.h @@ -0,0 +1,102 @@ +/********************************************************************** + * File: linlsq.h (Formerly llsq.h) + * Description: Linear Least squares fitting code. + * Author: Ray Smith + * Created: Thu Sep 12 08:44:51 BST 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef LINLSQ_H +#define LINLSQ_H + +#include "points.h" +#include "mod128.h" +#include "varable.h" + +class LLSQ +{ + friend class PDLSQ; //pos & direction + + public: + LLSQ() { //constructor + clear(); //set to zeros + } + void clear(); //initialize + + void add( //add element + double x, //coords to add + double y); + void remove( //delete element + double x, //coords to delete + double y); + inT32 count() { //no of elements + return n; + } + + double m(); //get gradient + double c( //get constant + double m); //gradient + double rms( //get error + double m, //gradient + double c); //constant + double spearman(); //get error + + private: + inT32 n; //no of elements + double sigx; //sum of x + double sigy; //sum of y + double sigxx; //sum x squared + double sigxy; //sum of xy + double sigyy; //sum y squared +}; + +class PDLSQ +{ + public: + PDLSQ() { //constructor + clear(); //set to zeros + } + void clear() { //initialize + pos.clear (); //clear both + dir.clear (); + } + + void add( //add element + const ICOORD &addpos, //position of pt + const ICOORD &adddir) { //dir of pt + pos.add (addpos.x (), addpos.y ()); + dir.add (adddir.x (), adddir.y ()); + } + void remove( //remove element + const ICOORD &removepos, //position of pt + const ICOORD &removedir) { //dir of pt + pos.remove (removepos.x (), removepos.y ()); + dir.remove (removedir.x (), removedir.y ()); + } + inT32 count() { //no of elements + return pos.count (); + } + + float fit( //get fit parameters + DIR128 &ang, //output angle + float &sin_ang, //output components + float &cos_ang, + float &r); + + private: + LLSQ pos; //position + LLSQ dir; //directions +}; +extern double_VAR_H (pdlsq_posdir_ratio, 0.4e-6, "Mult of dir to cf pos"); +#endif diff --git a/ccmain/lmedsq.cpp b/ccmain/lmedsq.cpp new file mode 100644 index 000000000..6d230f672 --- /dev/null +++ b/ccmain/lmedsq.cpp @@ -0,0 +1,453 @@ +/********************************************************************** + * File: lmedsq.cpp (Formerly lms.c) + * Description: Code for the LMS class. + * Author: Ray Smith + * Created: Fri Aug 7 09:30:53 BST 1992 + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" +#include +#include "statistc.h" +#include "memry.h" +#include "statistc.h" +#include "lmedsq.h" + +#define EXTERN + +EXTERN INT_VAR (lms_line_trials, 12, "Number of linew fits to do"); +#define SEED1 0x1234 //default seeds +#define SEED2 0x5678 +#define SEED3 0x9abc +#define LMS_MAX_FAILURES 3 + +#ifndef __UNIX__ +uinT32 nrand48( //get random number + uinT16 *seeds //seeds to use + ) { + static uinT32 seed = 0; //only seed + + if (seed == 0) { + seed = seeds[0] ^ (seeds[1] << 8) ^ (seeds[2] << 16); + srand(seed); + } + //make 32 bit one + return rand () | (rand () << 16); +} +#endif + +/********************************************************************** + * LMS::LMS + * + * Construct a LMS class, given the max no of samples to be given + **********************************************************************/ + +LMS::LMS ( //constructor +inT32 size //samplesize +):samplesize (size) { + samplecount = 0; + a = 0; + m = 0.0f; + c = 0.0f; + samples = (FCOORD *) alloc_mem (size * sizeof (FCOORD)); + errors = (float *) alloc_mem (size * sizeof (float)); + line_error = 0.0f; + fitted = FALSE; +} + + +/********************************************************************** + * LMS::~LMS + * + * Destruct a LMS class. + **********************************************************************/ + +LMS::~LMS ( //constructor +) { + free_mem(samples); + free_mem(errors); +} + + +/********************************************************************** + * LMS::clear + * + * Clear samples from array. + **********************************************************************/ + +void LMS::clear() { //clear sample + samplecount = 0; + fitted = FALSE; +} + + +/********************************************************************** + * LMS::add + * + * Add another sample. More than the constructed number will be ignored. + **********************************************************************/ + +void LMS::add( //add sample + FCOORD sample //sample coords + ) { + if (samplecount < samplesize) + //save it + samples[samplecount++] = sample; + fitted = FALSE; +} + + +/********************************************************************** + * LMS::fit + * + * Fit a line to the given sample points. + **********************************************************************/ + +void LMS::fit( //fit sample + float &out_m, //output line + float &out_c) { + inT32 index; //of median + inT32 trials; //no of medians + float test_m, test_c; //candidate line + float test_error; //error of test line + + switch (samplecount) { + case 0: + m = 0.0f; //no info + c = 0.0f; + line_error = 0.0f; + break; + + case 1: + m = 0.0f; + c = samples[0].y (); //horiz thru pt + line_error = 0.0f; + break; + + case 2: + if (samples[0].x () != samples[1].x ()) { + m = (samples[1].y () - samples[0].y ()) + / (samples[1].x () - samples[0].x ()); + c = samples[0].y () - m * samples[0].x (); + } + else { + m = 0.0f; + c = (samples[0].y () + samples[1].y ()) / 2; + } + line_error = 0.0f; + break; + + default: + pick_line(m, c); //use pts at random + compute_errors(m, c); //from given line + index = choose_nth_item (samplecount / 2, errors, samplecount); + line_error = errors[index]; + for (trials = 1; trials < lms_line_trials; trials++) { + //random again + pick_line(test_m, test_c); + compute_errors(test_m, test_c); + index = choose_nth_item (samplecount / 2, errors, samplecount); + test_error = errors[index]; + if (test_error < line_error) { + //find least median + line_error = test_error; + m = test_m; + c = test_c; + } + } + } + fitted = TRUE; + out_m = m; + out_c = c; + a = 0; +} + + +/********************************************************************** + * LMS::fit_quadratic + * + * Fit a quadratic to the given sample points. + **********************************************************************/ + +void LMS::fit_quadratic( //fit sample + float outlier_threshold, //min outlier size + double &out_a, //x squared + float &out_b, //output line + float &out_c) { + inT32 trials; //no of medians + double test_a; + float test_b, test_c; //candidate line + float test_error; //error of test line + + if (samplecount < 3) { + out_a = 0; + fit(out_b, out_c); + return; + } + pick_quadratic(a, m, c); + line_error = compute_quadratic_errors (outlier_threshold, a, m, c); + for (trials = 1; trials < lms_line_trials * 2; trials++) { + pick_quadratic(test_a, test_b, test_c); + test_error = compute_quadratic_errors (outlier_threshold, + test_a, test_b, test_c); + if (test_error < line_error) { + line_error = test_error; //find least median + a = test_a; + m = test_b; + c = test_c; + } + } + fitted = TRUE; + out_a = a; + out_b = m; + out_c = c; +} + + +/********************************************************************** + * LMS::constrained_fit + * + * Fit a line to the given sample points. + * The line must have the given gradient. + **********************************************************************/ + +void LMS::constrained_fit( //fit sample + float fixed_m, //forced gradient + float &out_c) { + inT32 index; //of median + inT32 trials; //no of medians + float test_c; //candidate line + static uinT16 seeds[3] = { SEED1, SEED2, SEED3 }; + //for nrand + float test_error; //error of test line + + m = fixed_m; + switch (samplecount) { + case 0: + c = 0.0f; + line_error = 0.0f; + break; + + case 1: + //horiz thru pt + c = samples[0].y () - m * samples[0].x (); + line_error = 0.0f; + break; + + case 2: + c = (samples[0].y () + samples[1].y () + - m * (samples[0].x () + samples[1].x ())) / 2; + line_error = m * samples[0].x () + c - samples[0].y (); + line_error *= line_error; + break; + + default: + index = (inT32) nrand48 (seeds) % samplecount; + //compute line + c = samples[index].y () - m * samples[index].x (); + compute_errors(m, c); //from given line + index = choose_nth_item (samplecount / 2, errors, samplecount); + line_error = errors[index]; + for (trials = 1; trials < lms_line_trials; trials++) { + index = (inT32) nrand48 (seeds) % samplecount; + test_c = samples[index].y () - m * samples[index].x (); + //compute line + compute_errors(m, test_c); + index = choose_nth_item (samplecount / 2, errors, samplecount); + test_error = errors[index]; + if (test_error < line_error) { + //find least median + line_error = test_error; + c = test_c; + } + } + } + fitted = TRUE; + out_c = c; + a = 0; +} + + +/********************************************************************** + * LMS::pick_line + * + * Fit a line to a random pair of sample points. + **********************************************************************/ + +void LMS::pick_line( //fit sample + float &line_m, //output gradient + float &line_c) { + inT16 trial_count; //no of attempts + static uinT16 seeds[3] = { SEED1, SEED2, SEED3 }; + //for nrand + inT32 index1; //picked point + inT32 index2; //picked point + + trial_count = 0; + do { + index1 = (inT32) nrand48 (seeds) % samplecount; + index2 = (inT32) nrand48 (seeds) % samplecount; + line_m = samples[index2].x () - samples[index1].x (); + trial_count++; + } + while (line_m == 0 && trial_count < LMS_MAX_FAILURES); + if (line_m == 0) { + line_c = (samples[index2].y () + samples[index1].y ()) / 2; + } + else { + line_m = (samples[index2].y () - samples[index1].y ()) / line_m; + line_c = samples[index1].y () - samples[index1].x () * line_m; + } +} + + +/********************************************************************** + * LMS::pick_quadratic + * + * Fit a quadratic to a random triplet of sample points. + **********************************************************************/ + +void LMS::pick_quadratic( //fit sample + double &line_a, //x suaread + float &line_m, //output gradient + float &line_c) { + inT16 trial_count; //no of attempts + static uinT16 seeds[3] = { SEED1, SEED2, SEED3 }; + //for nrand + inT32 index1; //picked point + inT32 index2; //picked point + inT32 index3; + FCOORD x1x2; //vector + FCOORD x1x3; + FCOORD x3x2; + double bottom; //of a + + trial_count = 0; + do { + if (trial_count >= LMS_MAX_FAILURES - 1) { + index1 = 0; + index2 = samplecount / 2; + index3 = samplecount - 1; + } + else { + index1 = (inT32) nrand48 (seeds) % samplecount; + index2 = (inT32) nrand48 (seeds) % samplecount; + index3 = (inT32) nrand48 (seeds) % samplecount; + } + x1x2 = samples[index2] - samples[index1]; + x1x3 = samples[index3] - samples[index1]; + x3x2 = samples[index2] - samples[index3]; + bottom = x1x2.x () * x1x3.x () * x3x2.x (); + trial_count++; + } + while (bottom == 0 && trial_count < LMS_MAX_FAILURES); + if (bottom == 0) { + line_a = 0; + pick_line(line_m, line_c); + } + else { + line_a = x1x3 * x1x2 / bottom; + line_m = x1x2.y () - line_a * x1x2.x () + * (samples[index2].x () + samples[index1].x ()); + line_m /= x1x2.x (); + line_c = samples[index1].y () - samples[index1].x () + * (samples[index1].x () * line_a + line_m); + } +} + + +/********************************************************************** + * LMS::compute_errors + * + * Compute the squared error from all the points. + **********************************************************************/ + +void LMS::compute_errors( //fit sample + float line_m, //input gradient + float line_c) { + inT32 index; //picked point + + for (index = 0; index < samplecount; index++) { + errors[index] = + line_m * samples[index].x () + line_c - samples[index].y (); + errors[index] *= errors[index]; + } +} + + +/********************************************************************** + * LMS::compute_quadratic_errors + * + * Compute the squared error from all the points. + **********************************************************************/ + +float LMS::compute_quadratic_errors( //fit sample + float outlier_threshold, //min outlier + double line_a, + float line_m, //input gradient + float line_c) { + inT32 outlier_count; //total outliers + inT32 index; //picked point + inT32 error_count; //no in total + double total_error; //summed squares + + total_error = 0; + outlier_count = 0; + error_count = 0; + for (index = 0; index < samplecount; index++) { + errors[error_count] = line_c + samples[index].x () + * (line_m + samples[index].x () * line_a) - samples[index].y (); + errors[error_count] *= errors[error_count]; + if (errors[error_count] > outlier_threshold) { + outlier_count++; + errors[samplecount - outlier_count] = errors[error_count]; + } + else { + total_error += errors[error_count++]; + } + } + if (outlier_count * 3 < error_count) + return total_error / error_count; + else { + index = choose_nth_item (outlier_count / 2, + errors + samplecount - outlier_count, + outlier_count); + //median outlier + return errors[samplecount - outlier_count + index]; + } +} + + +/********************************************************************** + * LMS::plot + * + * Plot the fitted line of a LMS. + **********************************************************************/ + +#ifndef GRAPHICS_DISABLED +void LMS::plot( //plot fit + ScrollView* win, //window + ScrollView::Color colour //colour to draw in + ) { + if (fitted) { + win->Pen(colour); + win->SetCursor(samples[0].x (), + c + samples[0].x () * (m + samples[0].x () * a)); + win->DrawTo(samples[samplecount - 1].x (), + c + samples[samplecount - 1].x () * (m + + samples[samplecount - + 1].x () * a)); + } +} +#endif diff --git a/ccmain/lmedsq.h b/ccmain/lmedsq.h new file mode 100644 index 000000000..cf12f9766 --- /dev/null +++ b/ccmain/lmedsq.h @@ -0,0 +1,84 @@ +/********************************************************************** + * File: lmedsq.h (Formerly lms.h) + * Description: Code for the LMS class. + * Author: Ray Smith + * Created: Fri Aug 7 09:30:53 BST 1992 + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef LMEDSQ_H +#define LMEDSQ_H + +#include "points.h" +#include "varable.h" +#include "scrollview.h" +#include "notdll.h" + +class LMS +{ + public: + LMS( //constructor + inT32 size); //no of samples + ~LMS (); //destructor + void clear(); //clear samples + void add( //add sample + FCOORD sample); //sample coords + void fit( //generate fit + float &m, //output line + float &c); + void constrained_fit( //fixed gradient + float fixed_m, //forced gradient + float &out_c); //output line + void fit_quadratic( //easy quadratic + float outlier_threshold, //min outlier + double &a, //x squared + float &b, //x + float &c); //constant + void plot( //plot fit + ScrollView* win, //window + ScrollView::Color colour); //colour to draw in + float error() { //get error + return fitted ? line_error : -1; + } + + private: + + void pick_line( //random choice + float &m, //output line + float &c); + void pick_quadratic( //random choice + double &a, //output curve + float &b, + float &c); + void compute_errors( //find errors + float m, //from line + float c); + //find errors + float compute_quadratic_errors(float outlier_threshold, //min outlier + double a, //from curve + float m, + float c); + + BOOL8 fitted; //line parts valid + inT32 samplesize; //max samples + inT32 samplecount; //current sample size + FCOORD *samples; //array of samples + float *errors; //error distances + double a; //x squared + float m; //line gradient + float c; + float line_error; //error of fit +}; +extern INT_VAR_H (lms_line_trials, 12, "Number of linew fits to do"); +#endif diff --git a/ccmain/mod128.cpp b/ccmain/mod128.cpp new file mode 100644 index 000000000..72fd91781 --- /dev/null +++ b/ccmain/mod128.cpp @@ -0,0 +1,100 @@ +/********************************************************************** + * File: mod128.c (Formerly dir128.c) + * Description: Code to convert a DIR128 to an ICOORD. + * Author: Ray Smith + * Created: Tue Oct 22 11:56:09 BST 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" //precompiled headers +#include "mod128.h" + +static inT16 idirtab[] = { + 1000, 0, 998, 49, 995, 98, 989, 146, + 980, 195, 970, 242, 956, 290, 941, 336, + 923, 382, 903, 427, 881, 471, 857, 514, + 831, 555, 803, 595, 773, 634, 740, 671, + 707, 707, 671, 740, 634, 773, 595, 803, + 555, 831, 514, 857, 471, 881, 427, 903, + 382, 923, 336, 941, 290, 956, 242, 970, + 195, 980, 146, 989, 98, 995, 49, 998, + 0, 1000, -49, 998, -98, 995, -146, 989, + -195, 980, -242, 970, -290, 956, -336, 941, + -382, 923, -427, 903, -471, 881, -514, 857, + -555, 831, -595, 803, -634, 773, -671, 740, + -707, 707, -740, 671, -773, 634, -803, 595, + -831, 555, -857, 514, -881, 471, -903, 427, + -923, 382, -941, 336, -956, 290, -970, 242, + -980, 195, -989, 146, -995, 98, -998, 49, + -1000, 0, -998, -49, -995, -98, -989, -146, + -980, -195, -970, -242, -956, -290, -941, -336, + -923, -382, -903, -427, -881, -471, -857, -514, + -831, -555, -803, -595, -773, -634, -740, -671, + -707, -707, -671, -740, -634, -773, -595, -803, + -555, -831, -514, -857, -471, -881, -427, -903, + -382, -923, -336, -941, -290, -956, -242, -970, + -195, -980, -146, -989, -98, -995, -49, -998, + 0, -1000, 49, -998, 98, -995, 146, -989, + 195, -980, 242, -970, 290, -956, 336, -941, + 382, -923, 427, -903, 471, -881, 514, -857, + 555, -831, 595, -803, 634, -773, 671, -740, + 707, -707, 740, -671, 773, -634, 803, -595, + 831, -555, 857, -514, 881, -471, 903, -427, + 923, -382, 941, -336, 956, -290, 970, -242, + 980, -195, 989, -146, 995, -98, 998, -49 +}; + +static ICOORD *dirtab = (ICOORD *) idirtab; + +/********************************************************************** + * DIR128::DIR128 + * + * Quantize the direction of an FCOORD to make a DIR128. + **********************************************************************/ + +DIR128::DIR128( //from fcoord + const FCOORD fc //vector to quantize + ) { + int high, low, current; //binary search + + low = 0; + if (fc.y () == 0) { + if (fc.x () >= 0) + dir = 0; + else + dir = MODULUS / 2; + return; + } + high = MODULUS; + do { + current = (high + low) / 2; + if (dirtab[current] * fc >= 0) + low = current; + else + high = current; + } + while (high - low > 1); + dir = low; +} + + +/********************************************************************** + * dir_to_gradient + * + * Convert a direction to a vector. + **********************************************************************/ + +ICOORD DIR128::vector() const { //convert to vector + return dirtab[dir]; //easy really +} diff --git a/ccmain/mod128.h b/ccmain/mod128.h new file mode 100644 index 000000000..592264ba6 --- /dev/null +++ b/ccmain/mod128.h @@ -0,0 +1,85 @@ +/********************************************************************** + * File: mod128.h (Formerly dir128.h) + * Description: Header for class which implements modulo arithmetic. + * Author: Ray Smith + * Created: Tue Mar 26 17:48:13 GMT 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef MOD128_H +#define MOD128_H + +#include "points.h" + +#define MODULUS 128 /*range of directions */ +#define DIRBITS 7 //no of bits used +#define DIRSCALE 1000 //length of vector + +class DLLSYM DIR128 +{ + public: + DIR128() { + } //empty constructor + + DIR128( //constructor + inT16 value) { //value to assign + value %= MODULUS; //modulo arithmetic + if (value < 0) + value += MODULUS; //done properly + dir = (inT8) value; + } + DIR128(const FCOORD fc); //quantize vector + + DIR128 & operator= ( //assign of inT16 + inT16 value) { //value to assign + value %= MODULUS; //modulo arithmetic + if (value < 0) + value += MODULUS; //done properly + dir = (inT8) value; + return *this; + } + inT8 operator- ( //subtraction + const DIR128 & minus) const//for signed result + { + //result + inT16 result = dir - minus.dir; + + if (result > MODULUS / 2) + result -= MODULUS; //get in range + else if (result < -MODULUS / 2) + result += MODULUS; + return (inT8) result; + } + DIR128 operator+ ( //addition + const DIR128 & add) const //of itself + { + DIR128 result; //sum + + result = dir + add.dir; //let = do the work + return result; + } + DIR128 & operator+= ( //same as + + const DIR128 & add) { + *this = dir + add.dir; //let = do the work + return *this; + } + inT8 get_dir() const { //access function + return dir; + } + ICOORD vector() const; //turn to vector + + private: + inT8 dir; //a direction +}; +#endif diff --git a/ccmain/normalis.cpp b/ccmain/normalis.cpp new file mode 100644 index 000000000..89faf9349 --- /dev/null +++ b/ccmain/normalis.cpp @@ -0,0 +1,178 @@ +/********************************************************************** + * File: normalis.cpp (Formerly denorm.c) + * Description: Code for the DENORM class. + * Author: Ray Smith + * Created: Thu Apr 23 09:22:43 BST 1992 + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" +#include "werd.h" +#include "normalis.h" + +/********************************************************************** + * DENORM::binary_search_segment + * + * Find the segment to use for the given x. + **********************************************************************/ + +const DENORM_SEG *DENORM::binary_search_segment(float src_x) const { + int bottom, top, middle; //binary search + + bottom = 0; + top = segments; + do { + middle = (bottom + top) / 2; + if (segs[middle].xstart > src_x) + top = middle; + else + bottom = middle; + } + while (top - bottom > 1); + return &segs[bottom]; +} + +/********************************************************************** + * DENORM::scale_at_x + * + * Return scaling at a given (normalized) x coord. + **********************************************************************/ + +float DENORM::scale_at_x(float src_x) const { // In normalized coords. + if (segments != 0) { + const DENORM_SEG* seg = binary_search_segment(src_x); + if (seg->scale_factor > 0.0) + return seg->scale_factor; + } + return scale_factor; +} + +/********************************************************************** + * DENORM::yshift_at_x + * + * Return yshift at a given (normalized) x coord. + **********************************************************************/ + +float DENORM::yshift_at_x(float src_x) const { // In normalized coords. + if (segments != 0) { + const DENORM_SEG* seg = binary_search_segment(src_x); + if (seg->ycoord == -MAX_INT32) { + if (base_is_row) + return source_row->base_line(x(src_x)/scale_at_x(src_x) + x_centre); + else + return m * x(src_x) + c; + } else { + return seg->ycoord; + } + } + return source_row->base_line (x(src_x)/scale_at_x(src_x) + x_centre); +} + +/********************************************************************** + * DENORM::x + * + * Denormalise an x coordinate. + **********************************************************************/ + +float DENORM::x( //convert x coord + float src_x //coord to convert + ) const { + return src_x / scale_at_x(src_x) + x_centre; +} + + +/********************************************************************** + * DENORM::y + * + * Denormalise a y coordinate. + **********************************************************************/ + +float DENORM::y( //convert y coord + float src_y, //coord to convert + float src_centre //x location for base + ) const { + return (src_y - bln_baseline_offset) / scale_at_x(src_centre) + + yshift_at_x(src_centre); +} + + +DENORM::DENORM(float x, //from same pieces + float scaling, + double line_m, //default line + double line_c, + inT16 seg_count, //no of segments + DENORM_SEG *seg_pts, //actual segments + BOOL8 using_row, //as baseline + ROW *src) { + x_centre = x; //just copy + scale_factor = scaling; + source_row = src; + if (seg_count > 0) { + segs = new DENORM_SEG[seg_count]; + for (segments = 0; segments < seg_count; segments++) { + // It is possible, if infrequent that the segments may be out of order. + // since we are searching with a binary search, keep them in order. + if (segments == 0 || segs[segments - 1].xstart <= + seg_pts[segments].xstart) { + segs[segments] = seg_pts[segments]; + } else { + int i; + for (i = 0; i < segments + && segs[segments - 1 - i].xstart > seg_pts[segments].xstart; + ++i) { + segs[segments - i ] = segs[segments - 1 - i]; + } + segs[segments - i] = seg_pts[segments]; + } + } + } + else { + segments = 0; + segs = NULL; + } + base_is_row = using_row; + m = line_m; + c = line_c; + block_ = NULL; +} + + +DENORM::DENORM(const DENORM &src) { + segments = 0; + segs = NULL; + *this = src; +} + + +DENORM & DENORM::operator= (const DENORM & src) { + x_centre = src.x_centre; + scale_factor = src.scale_factor; + source_row = src.source_row; + if (segments > 0) + delete[]segs; + if (src.segments > 0) { + segs = new DENORM_SEG[src.segments]; + for (segments = 0; segments < src.segments; segments++) + segs[segments] = src.segs[segments]; + } + else { + segments = 0; + segs = NULL; + } + base_is_row = src.base_is_row; + m = src.m; + c = src.c; + block_ = src.block_; + return *this; +} diff --git a/ccmain/normalis.h b/ccmain/normalis.h new file mode 100644 index 000000000..055c01cc7 --- /dev/null +++ b/ccmain/normalis.h @@ -0,0 +1,118 @@ +/********************************************************************** + * File: normalis.h (Formerly denorm.h) + * Description: Code for the DENORM class. + * Author: Ray Smith + * Created: Thu Apr 23 09:22:43 BST 1992 + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef NORMALIS_H +#define NORMALIS_H + +#include + +class ROW; //forward decl +class BLOCK; + +class DENORM_SEG +{ + public: + DENORM_SEG() { + } //empty + + inT32 xstart; //start of segment + inT32 ycoord; //y at segment + float scale_factor; //for this segment +}; + +class DENORM +{ + public: + DENORM() { //constructor + source_row = NULL; + x_centre = 0.0f; + scale_factor = 1.0f; + segments = 0; + segs = NULL; + base_is_row = TRUE; + m = c = 0; + block_ = NULL; + } + DENORM( //constructor + float x, //from same pieces + float scaling, + ROW *src) { + x_centre = x; //just copy + scale_factor = scaling; + source_row = src; + segments = 0; + segs = NULL; + base_is_row = TRUE; + m = c = 0; + block_ = NULL; + } + DENORM( //constructor + float x, //from same pieces + float scaling, + double line_m, //default line //no of segments + double line_c, + inT16 seg_count, + DENORM_SEG *seg_pts, //actual segments + BOOL8 using_row, //as baseline + ROW *src); + DENORM(const DENORM &); + DENORM & operator= (const DENORM &); + ~DENORM () { + if (segments > 0) + delete[]segs; + } + + float origin() const { //get x centre + return x_centre; + } + float scale() const { //get scale + return scale_factor; + } + ROW *row() const { //get row + return source_row; + } + const BLOCK* block() const { + return block_; + } + void set_block(const BLOCK* block) { + block_ = block; + } + float x( //convert an xcoord + float src_x) const; + float y( //convert a ycoord + float src_y, //coord to convert + float src_centre) const; //normed x centre + float scale_at_x( // Return scaling at this coord. + float src_x) const; + float yshift_at_x( // Return yshift at this coord. + float src_x) const; + + private: + const DENORM_SEG *binary_search_segment(float src_x) const; + + BOOL8 base_is_row; //using row baseline? + inT16 segments; //no of segments + double c, m; //baseline + float x_centre; //middle of word + float scale_factor; //scaling + ROW *source_row; //row it came from + DENORM_SEG *segs; //array of segments + const BLOCK* block_; // Block the word came from. +}; +#endif diff --git a/ccmain/ocrblock.cpp b/ccmain/ocrblock.cpp new file mode 100644 index 000000000..69136f37e --- /dev/null +++ b/ccmain/ocrblock.cpp @@ -0,0 +1,219 @@ +/********************************************************************** + * File: ocrblock.cpp (Formerly block.c) + * Description: BLOCK member functions and iterator functions. + * Author: Ray Smith + * Created: Fri Mar 15 09:41:28 GMT 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" +#include +#include "blckerr.h" +#include "ocrblock.h" +#include "tprintf.h" + +#define BLOCK_LABEL_HEIGHT 150 //char height of block id + +ELISTIZE_S (BLOCK) +/********************************************************************** + * BLOCK::BLOCK + * + * Constructor for a simple rectangular block. + **********************************************************************/ +BLOCK::BLOCK(const char *name, //filename + BOOL8 prop, //proportional + inT16 kern, //kerning + inT16 space, //spacing + inT16 xmin, //bottom left + inT16 ymin, inT16 xmax, //top right + inT16 ymax) + : PDBLK (xmin, ymin, xmax, ymax), + filename(name), + re_rotation_(1.0f, 0.0f), + classify_rotation_(1.0f, 0.0f), + skew_(1.0f, 0.0f) { + ICOORDELT_IT left_it = &leftside; + ICOORDELT_IT right_it = &rightside; + + proportional = prop; + kerning = kern; + spacing = space; + font_class = -1; //not assigned + cell_over_xheight_ = 2.0f; + hand_poly = NULL; + left_it.set_to_list (&leftside); + right_it.set_to_list (&rightside); + //make default box + left_it.add_to_end (new ICOORDELT (xmin, ymin)); + left_it.add_to_end (new ICOORDELT (xmin, ymax)); + right_it.add_to_end (new ICOORDELT (xmax, ymin)); + right_it.add_to_end (new ICOORDELT (xmax, ymax)); +} + +/********************************************************************** + * decreasing_top_order + * + * Sort Comparator: Return <0 if row1 top < row2 top + **********************************************************************/ + +int decreasing_top_order( // + const void *row1, + const void *row2) { + return (*(ROW **) row2)->bounding_box ().top () - + (*(ROW **) row1)->bounding_box ().top (); +} + + +/********************************************************************** + * BLOCK::rotate + * + * Rotate the polygon by the given rotation and recompute the bounding_box. + **********************************************************************/ +void BLOCK::rotate(const FCOORD& rotation) { + poly_block()->rotate(rotation); + box = *poly_block()->bounding_box(); +} + +/********************************************************************** + * BLOCK::sort_rows + * + * Order rows so that they are in order of decreasing Y coordinate + **********************************************************************/ + +void BLOCK::sort_rows() { // order on "top" + ROW_IT row_it(&rows); + + row_it.sort (decreasing_top_order); +} + + +/********************************************************************** + * BLOCK::compress + * + * Delete space between the rows. (And maybe one day, compress the rows) + * Fill space of block from top down, left aligning rows. + **********************************************************************/ + +void BLOCK::compress() { // squash it up + #define ROW_SPACING 5 + + ROW_IT row_it(&rows); + ROW *row; + ICOORD row_spacing (0, ROW_SPACING); + + ICOORDELT_IT icoordelt_it; + + sort_rows(); + + box = TBOX (box.topleft (), box.topleft ()); + box.move_bottom_edge (ROW_SPACING); + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + row = row_it.data (); + row->move (box.botleft () - row_spacing - + row->bounding_box ().topleft ()); + box += row->bounding_box (); + } + + leftside.clear (); + icoordelt_it.set_to_list (&leftside); + icoordelt_it.add_to_end (new ICOORDELT (box.left (), box.bottom ())); + icoordelt_it.add_to_end (new ICOORDELT (box.left (), box.top ())); + rightside.clear (); + icoordelt_it.set_to_list (&rightside); + icoordelt_it.add_to_end (new ICOORDELT (box.right (), box.bottom ())); + icoordelt_it.add_to_end (new ICOORDELT (box.right (), box.top ())); +} + + +/********************************************************************** + * BLOCK::check_pitch + * + * Check whether the block is fixed or prop, set the flag, and set + * the pitch if it is fixed. + **********************************************************************/ + +void BLOCK::check_pitch() { // check prop + // tprintf("Missing FFT fixed pitch stuff!\n"); + pitch = -1; +} + + +/********************************************************************** + * BLOCK::compress + * + * Compress and move in a single operation. + **********************************************************************/ + +void BLOCK::compress( // squash it up + const ICOORD vec // and move + ) { + box.move (vec); + compress(); +} + + +/********************************************************************** + * BLOCK::print + * + * Print the info on a block + **********************************************************************/ + +void BLOCK::print( //print list of sides + FILE *, //file to print on + BOOL8 dump //print full detail + ) { + ICOORDELT_IT it = &leftside; //iterator + + box.print (); + tprintf ("Proportional= %s\n", proportional ? "TRUE" : "FALSE"); + tprintf ("Kerning= %d\n", kerning); + tprintf ("Spacing= %d\n", spacing); + tprintf ("Fixed_pitch=%d\n", pitch); + tprintf ("Filename= %s\n", filename.string ()); + + if (dump) { + tprintf ("Left side coords are:\n"); + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) + tprintf ("(%d,%d) ", it.data ()->x (), it.data ()->y ()); + tprintf ("\n"); + tprintf ("Right side coords are:\n"); + it.set_to_list (&rightside); + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) + tprintf ("(%d,%d) ", it.data ()->x (), it.data ()->y ()); + tprintf ("\n"); + } +} + +/********************************************************************** + * BLOCK::operator= + * + * Assignment - duplicate the block structure, but with an EMPTY row list. + **********************************************************************/ + +BLOCK & BLOCK::operator= ( //assignment +const BLOCK & source //from this +) { + this->ELIST_LINK::operator= (source); + this->PDBLK::operator= (source); + proportional = source.proportional; + kerning = source.kerning; + spacing = source.spacing; + filename = source.filename; //STRINGs assign ok + if (!rows.empty ()) + rows.clear (); + re_rotation_ = source.re_rotation_; + classify_rotation_ = source.classify_rotation_; + skew_ = source.skew_; + return *this; +} diff --git a/ccmain/ocrblock.h b/ccmain/ocrblock.h new file mode 100644 index 000000000..459f5d3dd --- /dev/null +++ b/ccmain/ocrblock.h @@ -0,0 +1,195 @@ +/********************************************************************** + * File: ocrblock.h (Formerly block.h) + * Description: Page block class definition. + * Author: Ray Smith + * Created: Thu Mar 14 17:32:01 GMT 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef OCRBLOCK_H +#define OCRBLOCK_H + +#include "img.h" +#include "ocrrow.h" +#include "pdblock.h" + +class BLOCK; //forward decl + +ELISTIZEH_S (BLOCK) +class BLOCK:public ELIST_LINK, public PDBLK +//page block +{ + friend class BLOCK_RECT_IT; //block iterator + + public: + BLOCK() + : re_rotation_(1.0f, 0.0f), + classify_rotation_(1.0f, 0.0f), + skew_(1.0f, 0.0f) { + hand_poly = NULL; + } + BLOCK(const char *name, // filename + BOOL8 prop, // proportional + inT16 kern, // kerning + inT16 space, // spacing + inT16 xmin, // bottom left + inT16 ymin, + inT16 xmax, // top right + inT16 ymax); + + ~BLOCK () { + } + + // set space size etc. + void set_stats(BOOL8 prop, // proportional + inT16 kern, // inter char size + inT16 space, // inter word size + inT16 ch_pitch) { // pitch if fixed + proportional = prop; + kerning = (inT8) kern; + spacing = space; + pitch = ch_pitch; + } + void set_xheight(inT32 height) { // set char size + xheight = height; + } + void set_font_class(inT16 font) { // set font class + font_class = font; + } + BOOL8 prop() const { // return proportional + return proportional; + } + inT32 fixed_pitch() const { // return pitch + return pitch; + } + inT16 kern() const { // return kerning + return kerning; + } + inT16 font() const { // return font class + return font_class; + } + inT16 space() const { // return spacing + return spacing; + } + const char *name() const { // return filename + return filename.string (); + } + inT32 x_height() const { // return xheight + return xheight; + } + float cell_over_xheight() const { + return cell_over_xheight_; + } + void set_cell_over_xheight(float ratio) { + cell_over_xheight_ = ratio; + } + ROW_LIST *row_list() { // get rows + return &rows; + } + C_BLOB_LIST *blob_list() { // get blobs + return &c_blobs; + } + C_BLOB_LIST *reject_blobs() { + return &rej_blobs; + } + FCOORD re_rotation() const { + return re_rotation_; // How to transform coords back to image. + } + void set_re_rotation(const FCOORD& rotation) { + re_rotation_ = rotation; + } + FCOORD classify_rotation() const { + return classify_rotation_; // Apply this before classifying. + } + void set_classify_rotation(const FCOORD& rotation) { + classify_rotation_ = rotation; + } + FCOORD skew() const { + return skew_; // Direction of true horizontal. + } + void set_skew(const FCOORD& skew) { + skew_ = skew; + } + const ICOORD& median_size() const { + return median_size_; + } + void set_median_size(int x, int y) { + median_size_.set_x(x); + median_size_.set_y(y); + } + + void rotate(const FCOORD& rotation); + + void sort_rows(); // decreasing y order + + void compress(); // shrink white space + + void check_pitch(); // check proportional + + void compress(const ICOORD vec); // shrink white spaceand move by vector + + void print(FILE *fp, BOOL8 dump); // dump whole table + + void prep_serialise() { // set ptrs to counts + filename.prep_serialise(); + rows.prep_serialise(); + c_blobs.prep_serialise(); + rej_blobs.prep_serialise(); + leftside.prep_serialise(); + rightside.prep_serialise(); + } + + void dump(FILE *f) { + filename.dump(f); + rows.dump(f); + c_blobs.dump(f); + rej_blobs.dump(f); + leftside.dump(f); + rightside.dump(f); + } + + void de_dump(FILE *f) { // read external bits + filename.de_dump(f); + rows.de_dump(f); + c_blobs.de_dump(f); + rej_blobs.de_dump(f); + leftside.de_dump(f); + rightside.de_dump(f); + } + + make_serialise(BLOCK) + + BLOCK& operator=(const BLOCK & source); + + private: + BOOL8 proportional; // proportional + inT8 kerning; // inter blob gap + inT16 spacing; // inter word gap + inT16 pitch; // pitch of non-props + inT16 font_class; // correct font class + inT32 xheight; // height of chars + float cell_over_xheight_; // Ratio of cell height to xheight. + STRING filename; // name of block + ROW_LIST rows; // rows in block + C_BLOB_LIST c_blobs; // before textord + C_BLOB_LIST rej_blobs; // duff stuff + FCOORD re_rotation_; // How to transform coords back to image. + FCOORD classify_rotation_; // Apply this before classifying. + FCOORD skew_; // Direction of true horizontal. + ICOORD median_size_; // Median size of blobs. +}; + +int decreasing_top_order(const void *row1, const void *row2); + +#endif diff --git a/ccmain/ocrrow.cpp b/ccmain/ocrrow.cpp new file mode 100644 index 000000000..b6c66dec3 --- /dev/null +++ b/ccmain/ocrrow.cpp @@ -0,0 +1,216 @@ +/********************************************************************** + * File: ocrrow.cpp (Formerly row.c) + * Description: Code for the ROW class. + * Author: Ray Smith + * Created: Tue Oct 08 15:58:04 BST 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" +#include "ocrrow.h" +#include "blobbox.h" + +ELISTIZE_S (ROW) +/********************************************************************** + * ROW::ROW + * + * Constructor to build a ROW. Only the stats stuff are given here. + * The words are added directly. + **********************************************************************/ +ROW::ROW ( //constructor +inT32 spline_size, //no of segments +inT32 * xstarts, //segment boundaries +double *coeffs, //coefficients +float x_height, //line height +float ascenders, //ascender size +float descenders, //descender drop +inT16 kern, //char gap +inT16 space //word gap +): +baseline(spline_size, xstarts, coeffs) { + kerning = kern; //just store stuff + spacing = space; + xheight = x_height; + ascrise = ascenders; + descdrop = descenders; +} + + +/********************************************************************** + * ROW::ROW + * + * Constructor to build a ROW. Only the stats stuff are given here. + * The words are added directly. + **********************************************************************/ + +ROW::ROW( //constructor + TO_ROW *to_row, //source row + inT16 kern, //char gap + inT16 space //word gap + ) { + kerning = kern; //just store stuff + spacing = space; + xheight = to_row->xheight; + ascrise = to_row->ascrise; + descdrop = to_row->descdrop; + baseline = to_row->baseline; +} + + +/********************************************************************** + * ROW::recalc_bounding_box + * + * Set the bounding box correctly + **********************************************************************/ + +void ROW::recalc_bounding_box() { //recalculate BB + WERD *word; //current word + WERD_IT it = &words; //words of ROW + inT16 left; //of word + inT16 prev_left; //old left + + if (!it.empty ()) { + word = it.data (); + prev_left = word->bounding_box ().left (); + it.forward (); + while (!it.at_first ()) { + word = it.data (); + left = word->bounding_box ().left (); + if (left < prev_left) { + it.move_to_first (); + //words in BB order + it.sort (word_comparator); + break; + } + prev_left = left; + it.forward (); + } + } + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { + word = it.data (); + if (it.at_first ()) + word->set_flag (W_BOL, TRUE); + else + //not start of line + word->set_flag (W_BOL, FALSE); + if (it.at_last ()) + word->set_flag (W_EOL, TRUE); + else + //not end of line + word->set_flag (W_EOL, FALSE); + //extend BB as reqd + bound_box += word->bounding_box (); + } +} + + +/********************************************************************** + * ROW::move + * + * Reposition row by vector + **********************************************************************/ + +void ROW::move( // reposition row + const ICOORD vec // by vector + ) { + WERD_IT it(&words); // word iterator + + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) + it.data ()->move (vec); + + bound_box.move (vec); + baseline.move (vec); +} + + +/********************************************************************** + * ROW::print + * + * Display members + **********************************************************************/ + +void ROW::print( //print + FILE *fp //file to print on + ) { + tprintf ("Kerning= %d\n", kerning); + tprintf ("Spacing= %d\n", spacing); + bound_box.print (); + tprintf ("Xheight= %f\n", xheight); + tprintf ("Ascrise= %f\n", ascrise); + tprintf ("Descdrop= %f\n", descdrop); +} + + +/********************************************************************** + * ROW::plot + * + * Draw the ROW in the given colour. + **********************************************************************/ + +#ifndef GRAPHICS_DISABLED +void ROW::plot( //draw it + ScrollView* window, //window to draw in + ScrollView::Color colour //colour to draw in + ) { + WERD *word; //current word + WERD_IT it = &words; //words of ROW + + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { + word = it.data (); + word->plot (window, colour); //all in one colour + } +} +#endif + +/********************************************************************** + * ROW::plot + * + * Draw the ROW in rainbow colours. + **********************************************************************/ + +#ifndef GRAPHICS_DISABLED +void ROW::plot( //draw it + ScrollView* window //window to draw in + ) { + WERD *word; //current word + WERD_IT it = &words; //words of ROW + + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { + word = it.data (); + word->plot (window); //in rainbow colours + } +} +#endif + +/********************************************************************** + * ROW::operator= + * + * Assign rows by duplicating the row structure but NOT the WERDLIST + **********************************************************************/ + +ROW & ROW::operator= ( //assignment +const ROW & source //from this +) { + this->ELIST_LINK::operator= (source); + kerning = source.kerning; + spacing = source.spacing; + xheight = source.xheight; + ascrise = source.ascrise; + descdrop = source.descdrop; + if (!words.empty ()) + words.clear (); + baseline = source.baseline; //QSPLINES must do = + bound_box = source.bound_box; + return *this; +} diff --git a/ccmain/ocrrow.h b/ccmain/ocrrow.h new file mode 100644 index 000000000..db045e77c --- /dev/null +++ b/ccmain/ocrrow.h @@ -0,0 +1,136 @@ +/********************************************************************** + * File: ocrrow.h (Formerly row.h) + * Description: Code for the ROW class. + * Author: Ray Smith + * Created: Tue Oct 08 15:58:04 BST 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef OCRROW_H +#define OCRROW_H + +#include +#include "quspline.h" +#include "werd.h" + +class TO_ROW; + +class ROW:public ELIST_LINK +{ + friend void tweak_row_baseline(ROW *); + public: + ROW() { + } //empty constructor + ROW( //constructor + inT32 spline_size, //no of segments + inT32 *xstarts, //segment boundaries + double *coeffs, //coefficients //ascender size + float x_height, + float ascenders, + float descenders, //descender size + inT16 kern, //char gap + inT16 space); //word gap + ROW( //constructor + TO_ROW *row, //textord row + inT16 kern, //char gap + inT16 space); //word gap + + WERD_LIST *word_list() { //get words + return &words; + } + + float base_line( //compute baseline + float xpos) const { //at the position + //get spline value + return (float) baseline.y (xpos); + } + float x_height() const { //return x height + return xheight; + } + void set_x_height(float new_xheight) { // set x height + xheight = new_xheight; + } + inT32 kern() const { //return kerning + return kerning; + } + inT32 space() const { //return spacing + return spacing; + } + float ascenders() const { //return size + return ascrise; + } + float descenders() const { //return size + return descdrop; + } + TBOX bounding_box() const { //return bounding box + return bound_box; + } + + void recalc_bounding_box(); //recalculate BB + + void move( // reposition row + const ICOORD vec); // by vector + + void print( //print + FILE *fp); //file to print on + + void plot( //draw one + ScrollView* window, //window to draw in + ScrollView::Color colour); //uniform colour + void plot( //draw one + ScrollView* window); //in rainbow colours + +#ifndef GRAPHICS_DISABLED + void plot_baseline( //draw the baseline + ScrollView* window, //window to draw in + ScrollView::Color colour) { //colour to draw + //draw it + baseline.plot (window, colour); + } +#endif + + void prep_serialise() { //set ptrs to counts + words.prep_serialise (); + baseline.prep_serialise (); + } + + void dump( //write external bits + FILE *f) { + words.dump (f); + baseline.dump (f); + } + + void de_dump( //read external bits + FILE *f) { + words.de_dump (f); + baseline.de_dump (f); + } + + //assignment + make_serialise (ROW) ROW & operator= ( + const ROW & source); //from this + + private: + inT32 kerning; //inter char gap + inT32 spacing; //inter word gap + TBOX bound_box; //bounding box + float xheight; //height of line + float ascrise; //size of ascenders + float descdrop; //-size of descenders + WERD_LIST words; //words + QSPLINE baseline; //baseline spline +}; + +ELISTIZEH_S (ROW) +#endif diff --git a/ccmain/osdetect.cpp b/ccmain/osdetect.cpp new file mode 100644 index 000000000..be7d5d6c7 --- /dev/null +++ b/ccmain/osdetect.cpp @@ -0,0 +1,427 @@ +/////////////////////////////////////////////////////////////////////// +// File: osdetect.cpp +// Description: Orientation and script detection. +// Author: Samuel Charron +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "osdetect.h" + +#include "strngs.h" +#include "blobbox.h" +#include "blread.h" +#include "tordmain.h" +#include "ratngs.h" +#include "oldlist.h" +#include "adaptmatch.h" +#include "tstruct.h" +#include "expandblob.h" +#include "tesseractclass.h" +#include "qrsequence.h" + +extern IMAGE page_image; + +const int kMinCharactersToTry = 50; +const int kMaxCharactersToTry = 5 * kMinCharactersToTry; + +const float kSizeRatioToReject = 2.0; + +const float kOrientationAcceptRatio = 1.3; +const float kScriptAcceptRatio = 1.3; + +const float kHanRatioInKorean = 0.7; +const float kHanRatioInJapanese = 0.3; + +const float kLatinRationInFraktur = 0.7; + +const float kNonAmbiguousMargin = 1.0; + +// General scripts +static const char* han_script = "Han"; +static const char* latin_script = "Latin"; +static const char* katakana_script = "Katakana"; +static const char* hiragana_script = "Hiragana"; +static const char* hangul_script = "Hangul"; + +// Pseudo-scripts Name +const char* ScriptDetector::korean_script_ = "Korean"; +const char* ScriptDetector::japanese_script_ = "Japanese"; +const char* ScriptDetector::fraktur_script_ = "Fraktur"; + +CLISTIZEH(BLOBNBOX); +CLISTIZE(BLOBNBOX); + +// Find connected components in the page and process a subset until finished or +// a stopping criterion is met. +// Returns true if the page was successfully processed. +bool orientation_and_script_detection(STRING& filename, + OSResults* osr, + tesseract::Tesseract* tess) { + STRING name = filename; //truncated name + const char *lastdot; //of name + TO_BLOCK_LIST land_blocks, port_blocks; + BLOCK_LIST blocks; + TBOX page_box; + + lastdot = strrchr (name.string (), '.'); + if (lastdot != NULL) + name[lastdot-name.string()] = '\0'; + if (!read_unlv_file(name, page_image.get_xsize(), page_image.get_ysize(), + &blocks)) + FullPageBlock(page_image.get_xsize(), page_image.get_ysize(), &blocks); + find_components(&blocks, &land_blocks, &port_blocks, &page_box); + return os_detect(&port_blocks, osr, tess); +} + +// Filter and sample the blobs. +// Returns true if the page was successfully processed, or false if the page had +// too few characters to be reliable +bool os_detect(TO_BLOCK_LIST* port_blocks, OSResults* osr, + tesseract::Tesseract* tess) { + int blobs_total = 0; + OSResults osr_; + if (osr == NULL) + osr = &osr_; + + osr->unicharset = &tess->unicharset; + OrientationDetector o(osr); + ScriptDetector s(osr, tess); + + TO_BLOCK_IT block_it; + block_it.set_to_list(port_blocks); + + BLOBNBOX_CLIST filtered_list; + BLOBNBOX_C_IT filtered_it(&filtered_list); + + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); + block_it.forward ()) { + TO_BLOCK* block = block_it.data(); + BLOBNBOX_IT bbox_it; + bbox_it.set_to_list(&block->blobs); + for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list (); + bbox_it.forward ()) { + BLOBNBOX* bbox = bbox_it.data(); + C_BLOB* blob = bbox->cblob(); + TBOX box = blob->bounding_box(); + ++blobs_total; + + float y_x = fabs((box.height() * 1.0) / box.width()); + float x_y = 1.0f / y_x; + // Select a >= 1.0 ratio + float ratio = x_y > y_x ? x_y : y_x; + // Blob is ambiguous + if (ratio > kSizeRatioToReject) continue; + if (box.height() < 10) continue; + filtered_it.add_to_end(bbox); + } + } + if (filtered_it.length() > 0) + filtered_it.move_to_first(); + + int real_max = MIN(filtered_it.length(), kMaxCharactersToTry); + printf("Total blobs found = %d\n", blobs_total); + printf("Number of blobs post-filtering = %d\n", filtered_it.length()); + printf("Number of blobs to try = %d\n", real_max); + + // If there are too few characters, skip this page entirely. + if (real_max < kMinCharactersToTry / 2) { + printf("Too few characters. Skipping this page\n"); + return false; + } + + BLOBNBOX** blobs = new BLOBNBOX*[filtered_it.length()]; + int number_of_blobs = 0; + for (filtered_it.mark_cycle_pt (); !filtered_it.cycled_list (); + filtered_it.forward ()) { + blobs[number_of_blobs++] = (BLOBNBOX*)filtered_it.data(); + } + QRSequenceGenerator sequence(number_of_blobs); + for (int i = 0; i < real_max; ++i) { + if (os_detect_blob(blobs[sequence.GetVal()], &o, &s, osr, tess) + && i > kMinCharactersToTry) { + break; + } + } + delete [] blobs; + + // Make sure the best_result is up-to-date + int orientation = o.get_orientation(); + s.update_best_script(orientation); + return true; +} + +// Processes a single blob to estimate script and orientation. +// Return true if estimate of orientation and script satisfies stopping +// criteria. +bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o, + ScriptDetector* s, OSResults* osr, + tesseract::Tesseract* tess) { + C_BLOB* blob = bbox->cblob(); + TBOX box = blob->bounding_box(); + + int x_mid = (box.left() + box.right()) / 2.0f; + int y_mid = (box.bottom() + box.top()) / 2.0f; + + PBLOB pblob(blob, box.height()); + + BLOB_CHOICE_LIST ratings[4]; + // Test the 4 orientations + for (int i = 0; i < 4; ++i) { + // normalize the blob + pblob.move(FCOORD(-x_mid, -box.bottom())); + pblob.scale(static_cast(bln_x_height) / box.height()); + pblob.move(FCOORD(0.0f, bln_baseline_offset)); + + { + // List of choices given by the classifier + TBLOB *tessblob; //converted blob + TEXTROW tessrow; //dummy row + + tess_cn_matching.set_value(true); // turn it on + tess_bn_matching.set_value(false); + //convert blob + tessblob = make_tess_blob (&pblob, TRUE); + //make dummy row + make_tess_row(NULL, &tessrow); + //classify + tess->AdaptiveClassifier (tessblob, NULL, &tessrow, ratings + i, NULL); + free_blob(tessblob); + } + // undo normalize + pblob.move(FCOORD(0.0f, -bln_baseline_offset)); + pblob.scale(1.0f / (static_cast(bln_x_height) / box.height())); + pblob.move(FCOORD(x_mid, box.bottom())); + + // center the blob + pblob.move(FCOORD(-x_mid, -y_mid)); + + // Rotate it + pblob.rotate(); + + // Re-compute the mid + box = pblob.bounding_box(); + x_mid = (box.left() + box.right()) / 2; + y_mid = (box.top() + box.bottom()) / 2; + + // re-center in the new mid + pblob.move(FCOORD(x_mid, y_mid)); + } + + bool stop = o->detect_blob(ratings); + s->detect_blob(ratings); + int orientation = o->get_orientation(); + stop = s->must_stop(orientation) && stop; + return stop; +} + + +OrientationDetector::OrientationDetector(OSResults* osr) { + osr_ = osr; +} + +// Score the given blob and return true if it is now sure of the orientation +// after adding this block. +bool OrientationDetector::detect_blob(BLOB_CHOICE_LIST* scores) { + for (int i = 0; i < 4; ++i) { + BLOB_CHOICE_IT choice_it; + choice_it.set_to_list(scores + i); + + if (!choice_it.empty()) { + osr_->orientations[i] += (100 + choice_it.data()->certainty()); + } + } + + float first = -1; + float second = -1; + + int idx = -1; + for (int i = 0; i < 4; ++i) { + if (osr_->orientations[i] > first) { + idx = i; + second = first; + first = osr_->orientations[i]; + } else if (osr_->orientations[i] > second) { + second = osr_->orientations[i]; + } + } + + return first / second > kOrientationAcceptRatio; +} + +void OrientationDetector::update_best_orientation() { + float first = osr_->orientations[0]; + float second = osr_->orientations[1]; + + if (first < second) { + second = first; + first = osr_->orientations[1]; + } + + osr_->best_result.orientation = 0; + osr_->best_result.oconfidence = 0; + + for (int i = 0; i < 4; ++i) { + if (osr_->orientations[i] > first) { + second = first; + first = osr_->orientations[i]; + osr_->best_result.orientation = i; + } else if (osr_->orientations[i] > second) { + second = osr_->orientations[i]; + } + } + + osr_->best_result.oconfidence = + (first / second - 1.0) / (kOrientationAcceptRatio - 1.0); +} + +int OrientationDetector::get_orientation() { + update_best_orientation(); + return osr_->best_result.orientation; +} + + +ScriptDetector::ScriptDetector(OSResults* osr, tesseract::Tesseract* tess) { + osr_ = osr; + tess_ = tess; + katakana_id_ = tess_->unicharset.add_script(katakana_script); + hiragana_id_ = tess_->unicharset.add_script(hiragana_script); + han_id_ = tess_->unicharset.add_script(han_script); + hangul_id_ = tess_->unicharset.add_script(hangul_script); + japanese_id_ = tess_->unicharset.add_script(japanese_script_); + korean_id_ = tess_->unicharset.add_script(korean_script_); + latin_id_ = tess_->unicharset.add_script(latin_script); + fraktur_id_ = tess_->unicharset.add_script(fraktur_script_); +} + + +// Score the given blob and return true if it is now sure of the script after +// adding this blob. +void ScriptDetector::detect_blob(BLOB_CHOICE_LIST* scores) { + bool done[kMaxNumberOfScripts]; + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < kMaxNumberOfScripts; ++j) + done[j] = false; + + BLOB_CHOICE_IT choice_it; + choice_it.set_to_list(scores + i); + + float prev_score = -1; + int script_count = 0; + int prev_id = -1; + int prev_script; + int prev_class_id = -1; + int prev_config = -1; + const char* prev_unichar = ""; + const char* unichar = ""; + float next_best_score = -1.0; + int next_best_script_id = -1; + const char* next_best_unichar = ""; + + for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); + choice_it.forward()) { + BLOB_CHOICE* choice = choice_it.data(); + int id = choice->script_id(); + // Script already processed before. + if (done[id]) continue; + done[id] = true; + + unichar = tess_->unicharset.id_to_unichar(choice->unichar_id()); + // Save data from the first match + if (prev_score < 0) { + prev_score = -choice->certainty(); + script_count = 1; + prev_id = id; + prev_script = choice->script_id(); + prev_unichar = unichar; + prev_class_id = choice->unichar_id(); + prev_config = choice->config(); + } else if (-choice->certainty() < prev_score + kNonAmbiguousMargin) { + script_count++; + next_best_score = -choice->certainty(); + next_best_script_id = choice->script_id(); + next_best_unichar = tess_->unicharset.id_to_unichar(choice->unichar_id()); + } + + if (strlen(prev_unichar) == 1) + if (unichar[0] >= '0' && unichar[0] <= '9') + break; + + // if script_count is >= 2, character is ambiguous, skip other matches + // since they are useless. + if (script_count >= 2) + break; + } + // Character is non ambiguous + if (script_count == 1) { + // Update the score of the winning script + osr_->scripts_na[i][prev_id] += 1; + + // Workaround for Fraktur + if (prev_id == latin_id_) { + int font_set_id = tess_->PreTrainedTemplates-> + Class[prev_class_id]->font_set_id; + if (font_set_id >= 0 && prev_config >= 0) { + FontInfo fi = tess_->get_fontinfo_table().get( + tess_->get_fontset_table().get(font_set_id).configs[prev_config]); + //printf("Font: %s i:%i b:%i f:%i s:%i k:%i (%s)\n", fi.name, + // fi.is_italic(), fi.is_bold(), fi.is_fixed_pitch(), + // fi.is_serif(), fi.is_fraktur(), + // prev_unichar); + if (fi.is_fraktur()) { + osr_->scripts_na[i][prev_id] -= 1; + osr_->scripts_na[i][fraktur_id_] += 1; + } + } + } + + // Update Japanese / Korean pseudo-scripts + if (prev_id == katakana_id_) + osr_->scripts_na[i][japanese_id_] += 1; + if (prev_id == hiragana_id_) + osr_->scripts_na[i][japanese_id_] += 1; + if (prev_id == hangul_id_) + osr_->scripts_na[i][korean_id_] += 1; + if (prev_id == han_id_) + osr_->scripts_na[i][korean_id_] += kHanRatioInKorean; + if (prev_id == han_id_) + osr_->scripts_na[i][japanese_id_] += kHanRatioInJapanese; + } + } // iterate over each orientation +} + +bool ScriptDetector::must_stop(int orientation) { + update_best_script(orientation); + return osr_->best_result.sconfidence > 1; +} + + +void ScriptDetector::update_best_script(int orientation) { + float first = -1; + float second = -1; + + // i = 1 -> ignore Common scripts + for (int i = 1; i < kMaxNumberOfScripts; ++i) { + if (osr_->scripts_na[orientation][i] > first) { + osr_->best_result.script = + tess_->unicharset.get_script_from_script_id(i); + second = first; + first = osr_->scripts_na[orientation][i]; + } else if (osr_->scripts_na[orientation][i] > second) { + second = osr_->scripts_na[orientation][i]; + } + } + + osr_->best_result.sconfidence = + (first / second - 1.0) / (kOrientationAcceptRatio - 1.0); +} diff --git a/ccmain/osdetect.h b/ccmain/osdetect.h new file mode 100644 index 000000000..364ac00ca --- /dev/null +++ b/ccmain/osdetect.h @@ -0,0 +1,102 @@ +/////////////////////////////////////////////////////////////////////// +// File: osdetect.h +// Description: Orientation and script detection. +// Author: Samuel Charron +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_CCMAIN_OSDETECT_H__ +#define TESSERACT_CCMAIN_OSDETECT_H__ + +#include "strngs.h" +#include "unicharset.h" + +class TO_BLOCK_LIST; +class BLOBNBOX; +class BLOB_CHOICE_LIST; + +namespace tesseract { +class Tesseract; +} + +// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur +const int kMaxNumberOfScripts = 116 + 1 + 2 + 1; + +struct OSBestResult { + int orientation; + const char* script; + float sconfidence; + float oconfidence; +}; + +struct OSResults { + OSResults() { + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < kMaxNumberOfScripts; ++j) + scripts_na[i][j] = 0; + orientations[i] = 0; + } + } + float orientations[4]; + float scripts_na[4][kMaxNumberOfScripts]; + + UNICHARSET* unicharset; + OSBestResult best_result; +}; + +class OrientationDetector { + public: + OrientationDetector(OSResults*); + bool detect_blob(BLOB_CHOICE_LIST* scores); + void update_best_orientation(); + int get_orientation(); + private: + OSResults* osr_; +}; + +class ScriptDetector { + public: + ScriptDetector(OSResults*, tesseract::Tesseract* tess); + void detect_blob(BLOB_CHOICE_LIST* scores); + void update_best_script(int); + void get_script() ; + bool must_stop(int orientation); + private: + OSResults* osr_; + static const char* korean_script_; + static const char* japanese_script_; + static const char* fraktur_script_; + int korean_id_; + int japanese_id_; + int katakana_id_; + int hiragana_id_; + int han_id_; + int hangul_id_; + int latin_id_; + int fraktur_id_; + tesseract::Tesseract* tess_; +}; + +bool orientation_and_script_detection(STRING& filename, + OSResults*, + tesseract::Tesseract*); + +bool os_detect(TO_BLOCK_LIST* port_blocks, + OSResults* osr, + tesseract::Tesseract* tess); + +bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o, + ScriptDetector* s, OSResults*, + tesseract::Tesseract* tess); +#endif // TESSERACT_CCMAIN_OSDETECT_H__ diff --git a/ccmain/otsuthr.cpp b/ccmain/otsuthr.cpp new file mode 100644 index 000000000..adce7ce2a --- /dev/null +++ b/ccmain/otsuthr.cpp @@ -0,0 +1,153 @@ +/********************************************************************** + * File: otsuthr.cpp + * Description: Simple Otsu thresholding for binarizing images. + * Author: Ray Smith + * Created: Fri Mar 07 12:31:01 PST 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include +#include "otsuthr.h" + +namespace tesseract { + +// Compute the Otsu threshold(s) for the given image rectangle, making one +// for each channel. Each channel is always one byte per pixel. +// Returns an array of threshold values and an array of hi_values, such +// that a pixel value >threshold[channel] is considered foreground if +// hi_values[channel] is 0 or background if 1. A hi_value of -1 indicates +// that there is no apparent foreground. At least one hi_value will not be -1. +// Delete thresholds and hi_values with delete [] after use. +void OtsuThreshold(const unsigned char* imagedata, + int bytes_per_pixel, int bytes_per_line, + int left, int top, int width, int height, + int** thresholds, int** hi_values) { + // Of all channels with no good hi_value, keep the best so we can always + // produce at least one answer. + int best_hi_value = 1; + int best_hi_index = 0; + bool any_good_hivalue = false; + double best_hi_dist = 0.0; + *thresholds = new int[bytes_per_pixel]; + *hi_values = new int[bytes_per_pixel]; + + for (int ch = 0; ch < bytes_per_pixel; ++ch) { + (*thresholds)[ch] = -1; + (*hi_values)[ch] = -1; + // Compute the histogram of the image rectangle. + int histogram[kHistogramSize]; + HistogramRect(imagedata + ch, bytes_per_pixel, bytes_per_line, + left, top, width, height, histogram); + int H; + int best_omega_0; + int best_t = OtsuStats(histogram, &H, &best_omega_0); + if (best_omega_0 == 0 || best_omega_0 == H) { + // This channel is empty. + continue; + } + // To be a convincing foreground we must have a small fraction of H + // or to be a convincing background we must have a large fraction of H. + // In between we assume this channel contains no thresholding information. + int hi_value = best_omega_0 < H * 0.5; + (*thresholds)[ch] = best_t; + if (best_omega_0 > H * 0.75) { + any_good_hivalue = true; + (*hi_values)[ch] = 0; + } else if (best_omega_0 < H * 0.25) { + any_good_hivalue = true; + (*hi_values)[ch] = 1; + } else { + // In case all channels are like this, keep the best of the bad lot. + double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0; + if (hi_dist > best_hi_dist) { + best_hi_dist = hi_dist; + best_hi_value = hi_value; + best_hi_index = ch; + } + } + } + if (!any_good_hivalue) { + // Use the best of the ones that were not good enough. + (*hi_values)[best_hi_index] = best_hi_value; + } +} + +// Compute the histogram for the given image rectangle, and the given +// channel. (Channel pointed to by imagedata.) Each channel is always +// one byte per pixel. +// Bytes per pixel is used to skip channels not being +// counted with this call in a multi-channel (pixel-major) image. +// Histogram is always a kHistogramSize(256) element array to count +// occurrences of each pixel value. +void HistogramRect(const unsigned char* imagedata, + int bytes_per_pixel, int bytes_per_line, + int left, int top, int width, int height, + int* histogram) { + int bottom = top + height; + memset(histogram, 0, sizeof(*histogram) * kHistogramSize); + const unsigned char* pixels = imagedata + + top * bytes_per_line + + left * bytes_per_pixel; + for (int y = top; y < bottom; ++y) { + for (int x = 0; x < width; ++x) { + ++histogram[pixels[x * bytes_per_pixel]]; + } + pixels += bytes_per_line; + } +} + +// Compute the Otsu threshold(s) for the given histogram. +// Also returns H = total count in histogram, and +// omega0 = count of histogram below threshold. +int OtsuStats(const int* histogram, int* H_out, int* omega0_out) { + int H = 0; + double mu_T = 0.0; + for (int i = 0; i < kHistogramSize; ++i) { + H += histogram[i]; + mu_T += i * histogram[i]; + } + + // Now maximize sig_sq_B over t. + // http://www.ctie.monash.edu.au/hargreave/Cornall_Terry_328.pdf + int best_t = -1; + int omega_0, omega_1; + int best_omega_0 = 0; + double best_sig_sq_B = 0.0; + double mu_0, mu_1, mu_t; + omega_0 = 0; + mu_t = 0.0; + for (int t = 0; t < kHistogramSize - 1; ++t) { + omega_0 += histogram[t]; + mu_t += t * static_cast(histogram[t]); + if (omega_0 == 0) + continue; + omega_1 = H - omega_0; + if (omega_1 == 0) + break; + mu_0 = mu_t / omega_0; + mu_1 = (mu_T - mu_t) / omega_1; + double sig_sq_B = mu_1 - mu_0; + sig_sq_B *= sig_sq_B * omega_0 * omega_1; + if (best_t < 0 || sig_sq_B > best_sig_sq_B) { + best_sig_sq_B = sig_sq_B; + best_t = t; + best_omega_0 = omega_0; + } + } + if (H_out != NULL) *H_out = H; + if (omega0_out != NULL) *omega0_out = best_omega_0; + return best_t; +} + +} // namespace tesseract. diff --git a/ccmain/otsuthr.h b/ccmain/otsuthr.h new file mode 100644 index 000000000..59eaf8060 --- /dev/null +++ b/ccmain/otsuthr.h @@ -0,0 +1,58 @@ +/////////////////////////////////////////////////////////////////////// +// File: otsuthr.h +// Description: Simple Otsu thresholding for binarizing images. +// Author: Ray Smith +// Created: Fri Mar 07 12:14:01 PST 2008 +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_CCMAIN_OTSUTHR_H__ +#define TESSERACT_CCMAIN_OTSUTHR_H__ + +namespace tesseract { + +const int kHistogramSize = 256; // The size of a histogram of pixel values. + +// Compute the Otsu threshold(s) for the given image rectangle, making one +// for each channel. Each channel is always one byte per pixel. +// Returns an array of threshold values and an array of hi_values, such +// that a pixel value >threshold[channel] is considered foreground if +// hi_values[channel] is 0 or background if 1. A hi_value of -1 indicates +// that there is no apparent foreground. At least one hi_value will not be -1. +// Delete thresholds and hi_values with delete [] after use. +void OtsuThreshold(const unsigned char* imagedata, + int bytes_per_pixel, int bytes_per_line, + int left, int top, int width, int height, + int** thresholds, int** hi_values); + +// Compute the histogram for the given image rectangle, and the given +// channel. (Channel pointed to by imagedata.) Each channel is always +// one byte per pixel. +// Bytes per pixel is used to skip channels not being +// counted with this call in a multi-channel (pixel-major) image. +// Histogram is always a 256 element array to count occurrences of +// each pixel value. +void HistogramRect(const unsigned char* imagedata, + int bytes_per_pixel, int bytes_per_line, + int left, int top, int width, int height, + int* histogram); + +// Compute the Otsu threshold(s) for the given histogram. +// Also returns H = total count in histogram, and +// omega0 = count of histogram below threshold. +int OtsuStats(const int* histogram, int* H_out, int* omega0_out); + +} // namespace tesseract. + +#endif // TESSERACT_CCMAIN_OTSUTHR_H__ diff --git a/ccmain/pageres.cpp b/ccmain/pageres.cpp new file mode 100644 index 000000000..d67830126 --- /dev/null +++ b/ccmain/pageres.cpp @@ -0,0 +1,330 @@ +/********************************************************************** + * File: pageres.cpp (Formerly page_res.c) + * Description: Results classes used by control.c + * Author: Phil Cheatle + * Created: Tue Sep 22 08:42:49 BST 1992 + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ +#include "mfcpch.h" +#include +#ifdef __UNIX__ +#include +#endif +#include "pageres.h" +#include "notdll.h" + +ELISTIZE (BLOCK_RES) +CLISTIZE (BLOCK_RES) ELISTIZE (ROW_RES) ELISTIZE (WERD_RES) +/************************************************************************* + * PAGE_RES::PAGE_RES + * + * Constructor for page results + *************************************************************************/ +PAGE_RES::PAGE_RES( //recursive construct + BLOCK_LIST *the_block_list //real page + ) { + BLOCK_IT block_it(the_block_list); + BLOCK_RES_IT block_res_it(&block_res_list); + + char_count = 0; + rej_count = 0; + rejected = FALSE; + + for (block_it.mark_cycle_pt (); + !block_it.cycled_list (); block_it.forward ()) { + block_res_it.add_to_end (new BLOCK_RES (block_it.data ())); + } +} + + +/************************************************************************* + * BLOCK_RES::BLOCK_RES + * + * Constructor for BLOCK results + *************************************************************************/ + +BLOCK_RES::BLOCK_RES( //recursive construct + BLOCK *the_block //real BLOCK + ) { + ROW_IT row_it (the_block->row_list ()); + ROW_RES_IT row_res_it(&row_res_list); + + char_count = 0; + rej_count = 0; + font_class = -1; //not assigned + x_height = -1.0; + font_assigned = FALSE; + bold = FALSE; + italic = FALSE; + row_count = 0; + + block = the_block; + + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + row_res_it.add_to_end (new ROW_RES (row_it.data ())); + } +} + + +/************************************************************************* + * ROW_RES::ROW_RES + * + * Constructor for ROW results + *************************************************************************/ + +ROW_RES::ROW_RES( //recursive construct + ROW *the_row //real ROW + ) { + WERD_IT word_it (the_row->word_list ()); + WERD_RES_IT word_res_it(&word_res_list); + WERD_RES *combo = NULL; //current combination of fuzzies + WERD_RES *word_res; //current word + WERD *copy_word; + + char_count = 0; + rej_count = 0; + whole_word_rej_count = 0; + font_class = -1; + font_class_score = -1.0; + bold = FALSE; + italic = FALSE; + + row = the_row; + + for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { + word_res = new WERD_RES (word_it.data ()); + word_res->x_height = the_row->x_height(); + + if (word_res->word->flag (W_FUZZY_NON)) { + ASSERT_HOST (combo != NULL); + word_res->part_of_combo = TRUE; + combo->copy_on (word_res); + } + if (word_it.data_relative (1)->flag (W_FUZZY_NON)) { + if (combo == NULL) { + copy_word = new WERD; + //deep copy + *copy_word = *(word_it.data ()); + combo = new WERD_RES (copy_word); + combo->x_height = the_row->x_height(); + combo->combination = TRUE; + word_res_it.add_to_end (combo); + } + word_res->part_of_combo = TRUE; + } + else + combo = NULL; + word_res_it.add_to_end (word_res); + } +} + + +WERD_RES & WERD_RES::operator= ( //assign word_res +const WERD_RES & source //from this +) { + this->ELIST_LINK::operator= (source); + if (source.combination) { + word = new WERD; + *word = *(source.word); //deep copy + } + else + word = source.word; //pt to same word + + if (source.outword != NULL) { + outword = new WERD; + *outword = *(source.outword);//deep copy + } + else + outword = NULL; + + denorm = source.denorm; + if (source.best_choice != NULL) { + best_choice = new WERD_CHOICE; + *best_choice = *(source.best_choice); + raw_choice = new WERD_CHOICE; + *raw_choice = *(source.raw_choice); + } + else { + best_choice = NULL; + raw_choice = NULL; + } + if (source.ep_choice != NULL) { + ep_choice = new WERD_CHOICE; + *ep_choice = *(source.ep_choice); + } + else + ep_choice = NULL; + reject_map = source.reject_map; + tess_failed = source.tess_failed; + tess_accepted = source.tess_accepted; + tess_would_adapt = source.tess_would_adapt; + done = source.done; + unlv_crunch_mode = source.unlv_crunch_mode; + italic = source.italic; + bold = source.bold; + font1 = source.font1; + font1_count = source.font1_count; + font2 = source.font2; + font2_count = source.font2_count; + x_height = source.x_height; + caps_height = source.caps_height; + guessed_x_ht = source.guessed_x_ht; + guessed_caps_ht = source.guessed_caps_ht; + combination = source.combination; + part_of_combo = source.part_of_combo; + reject_spaces = source.reject_spaces; + return *this; +} + + +WERD_RES::~WERD_RES () { + if (combination) + delete word; + if (outword != NULL) + delete outword; + if (best_choice != NULL) { + delete best_choice; + delete raw_choice; + } + if (ep_choice != NULL) { + delete ep_choice; + } +} + + +/************************************************************************* + * PAGE_RES_IT::restart_page + * + * Set things up at the start of the page + *************************************************************************/ + +WERD_RES *PAGE_RES_IT::restart_page() { + block_res_it.set_to_list (&page_res->block_res_list); + block_res_it.mark_cycle_pt (); + prev_block_res = NULL; + prev_row_res = NULL; + prev_word_res = NULL; + block_res = NULL; + row_res = NULL; + word_res = NULL; + next_block_res = NULL; + next_row_res = NULL; + next_word_res = NULL; + internal_forward(TRUE); + return internal_forward (FALSE); +} + + +/************************************************************************* + * PAGE_RES_IT::internal_forward + * + * Find the next word on the page. Empty blocks and rows are skipped. + * The iterator maintains pointers to block, row and word for the previous, + * current and next words. These are correct, regardless of block/row + * boundaries. NULL values denote start and end of the page. + *************************************************************************/ + +WERD_RES *PAGE_RES_IT::internal_forward(BOOL8 new_block) { + BOOL8 found_next_word = FALSE; + BOOL8 new_row = FALSE; + + prev_block_res = block_res; + prev_row_res = row_res; + prev_word_res = word_res; + block_res = next_block_res; + row_res = next_row_res; + word_res = next_word_res; + + while (!found_next_word && !block_res_it.cycled_list ()) { + if (new_block) { + new_block = FALSE; + row_res_it.set_to_list (&block_res_it.data ()->row_res_list); + row_res_it.mark_cycle_pt (); + new_row = TRUE; + } + while (!found_next_word && !row_res_it.cycled_list ()) { + if (new_row) { + new_row = FALSE; + word_res_it.set_to_list (&row_res_it.data ()->word_res_list); + word_res_it.mark_cycle_pt (); + } + while (!found_next_word && !word_res_it.cycled_list ()) { + next_block_res = block_res_it.data (); + next_row_res = row_res_it.data (); + next_word_res = word_res_it.data (); + found_next_word = TRUE; + do { + word_res_it.forward (); + } + while (word_res_it.data ()->part_of_combo); + } + if (!found_next_word) { //end of row reached + row_res_it.forward (); + new_row = TRUE; + } + } + if (!found_next_word) { //end of block reached + block_res_it.forward (); + new_block = TRUE; + } + } + if (!found_next_word) { //end of page reached + next_block_res = NULL; + next_row_res = NULL; + next_word_res = NULL; + } + return word_res; +} + + +/************************************************************************* + * PAGE_RES_IT::forward_block + * + * Move to the first word of the next block + * Can be followed by subsequent calls to forward() BUT at the first word in + * the block, the prev block, row and word are all NULL. + *************************************************************************/ + +WERD_RES *PAGE_RES_IT::forward_block() { + if (block_res == next_block_res) { + block_res_it.forward ();; + block_res = NULL; + row_res = NULL; + word_res = NULL; + next_block_res = NULL; + next_row_res = NULL; + next_word_res = NULL; + internal_forward(TRUE); + } + return internal_forward (FALSE); +} + + +void PAGE_RES_IT::rej_stat_word() { + inT16 chars_in_word; + inT16 rejects_in_word = 0; + + chars_in_word = word_res->reject_map.length (); + page_res->char_count += chars_in_word; + block_res->char_count += chars_in_word; + row_res->char_count += chars_in_word; + + rejects_in_word = word_res->reject_map.reject_count (); + + page_res->rej_count += rejects_in_word; + block_res->rej_count += rejects_in_word; + row_res->rej_count += rejects_in_word; + if (chars_in_word == rejects_in_word) + row_res->whole_word_rej_count += rejects_in_word; +} diff --git a/ccmain/pageres.h b/ccmain/pageres.h new file mode 100644 index 000000000..d1cf4b17a --- /dev/null +++ b/ccmain/pageres.h @@ -0,0 +1,313 @@ +/********************************************************************** + * File: pageres.h (Formerly page_res.h) + * Description: Results classes used by control.c + * Author: Phil Cheatle + * Created: Tue Sep 22 08:42:49 BST 1992 + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ +#ifndef PAGERES_H +#define PAGERES_H + +#include "elst.h" +#include "ocrblock.h" +#include "ocrrow.h" +#include "werd.h" +#include "ratngs.h" +#include "rejctmap.h" +#include "notdll.h" +#include "notdll.h" + +/* Forward declarations */ + +class BLOCK_RES; + +ELISTIZEH (BLOCK_RES) CLISTIZEH (BLOCK_RES) +class +ROW_RES; + +ELISTIZEH (ROW_RES) +class WERD_RES; + +ELISTIZEH (WERD_RES) +/************************************************************************* + * PAGE_RES - Page results + *************************************************************************/ +class PAGE_RES //page result +{ + public: + inT32 char_count; + inT32 rej_count; + BLOCK_RES_LIST block_res_list; + BOOL8 rejected; + + PAGE_RES() { + } //empty constructor + + PAGE_RES( //simple constructor + BLOCK_LIST *block_list); //real blocks + + ~PAGE_RES () { //destructor + } +}; + +/************************************************************************* + * BLOCK_RES - Block results + *************************************************************************/ + +class BLOCK_RES:public ELIST_LINK + //page block result +{ + public: + BLOCK * block; //real block + inT32 char_count; //chars in block + inT32 rej_count; //rejected chars + inT16 font_class; // + inT16 row_count; + float x_height; + BOOL8 font_assigned; // block already + // processed + BOOL8 bold; // all bold + BOOL8 italic; // all italic + + ROW_RES_LIST row_res_list; + + BLOCK_RES() { + } //empty constructor + + BLOCK_RES( //simple constructor + BLOCK *the_block); //real block + + ~BLOCK_RES () { //destructor + } +}; + +/************************************************************************* + * ROW_RES - Row results + *************************************************************************/ + +class ROW_RES:public ELIST_LINK //row result +{ + public: + ROW * row; //real row + inT32 char_count; //chars in block + inT32 rej_count; //rejected chars + inT32 whole_word_rej_count; //rejs in total rej wds + WERD_RES_LIST word_res_list; + float font_class_score; + inT16 font_class; // + inT32 italic; + inT32 bold; + inT8 font1; //primary font + inT8 font1_count; //no of voters + inT8 font2; //secondary font + inT8 font2_count; //no of voters + + ROW_RES() { + } //empty constructor + + ROW_RES( //simple constructor + ROW *the_row); //real row + + ~ROW_RES () { //destructor + } +}; + +/************************************************************************* + * WERD_RES - Word results + *************************************************************************/ +enum CRUNCH_MODE +{ + CR_NONE, + CR_KEEP_SPACE, + CR_LOOSE_SPACE, + CR_DELETE +}; + +class WERD_RES:public ELIST_LINK //word result +{ + public: + WERD * word; //non-bln real word + WERD *outword; //bln best choice + //segmentation + DENORM denorm; //for use on outword + WERD_CHOICE *best_choice; //tess output + WERD_CHOICE *raw_choice; //top choice permuter + WERD_CHOICE *ep_choice; //ep text + REJMAP reject_map; //best_choice rejects + BOOL8 tess_failed; + /* + If tess_failed is TRUE, one of the following tests failed when Tess + returned: + - The outword blob list was not the same length as the best_choice string; + - The best_choice string contained ALL blanks; + - The best_choice string was zero length + */ + BOOL8 tess_accepted; //Tess thinks its ok? + BOOL8 tess_would_adapt; //Tess would adapt? + BOOL8 done; //ready for output? + inT8 italic; + inT8 bold; + inT8 font1; //primary font + inT8 font1_count; //no of voters + inT8 font2; //secondary font + inT8 font2_count; //no of voters + CRUNCH_MODE unlv_crunch_mode; + float x_height; //Post match estimate + float caps_height; //Post match estimate + BOOL8 guessed_x_ht; + BOOL8 guessed_caps_ht; + /* + To deal with fuzzy spaces we need to be able to combine "words" to form + combinations when we suspect that the gap is a non-space. The (new) text + ord code generates separate words for EVERY fuzzy gap - flags in the word + indicate whether the gap is below the threshold (fuzzy kern) and is thus + NOT a real word break by default, or above the threshold (fuzzy space) and + this is a real word break by default. + + The WERD_RES list contains all these words PLUS "combination" words built + out of (copies of) the words split by fuzzy kerns. The separate parts have + their "part_of_combo" flag set true and should be IGNORED on a default + reading of the list. + + Combination words are FOLLOWED by the sequence of part_of_combo words + which they combine. + */ + BOOL8 combination; //of two fuzzy gap wds + BOOL8 part_of_combo; //part of a combo + BOOL8 reject_spaces; //Reject spacing? + + WERD_RES() { + } //empty constructor + + WERD_RES( //simple constructor + WERD *the_word) { //real word + word = the_word; + outword = NULL; + best_choice = NULL; + raw_choice = NULL; + ep_choice = NULL; + tess_failed = FALSE; + tess_accepted = FALSE; + tess_would_adapt = FALSE; + done = FALSE; + unlv_crunch_mode = CR_NONE; + italic = FALSE; + bold = FALSE; + font1 = -1; + font1_count = 0; + font2 = -1; + font2_count = 0; + x_height = 0.0; + caps_height = 0.0; + guessed_x_ht = TRUE; + guessed_caps_ht = TRUE; + combination = FALSE; + part_of_combo = FALSE; + reject_spaces = FALSE; + } + WERD_RES(const WERD_RES &source) { + *this = source; //see operator= + } + + ~WERD_RES (); //destructor + + WERD_RES& operator=(const WERD_RES& source); //from this + + static WERD_RES* deep_copy(const WERD_RES* src) { + return new WERD_RES(*src); + } + + void copy_on( //copy blobs onto word + WERD_RES *word_res) { //from this word + word->set_flag (W_EOL, word_res->word->flag (W_EOL)); + word->copy_on (word_res->word); + } +}; + +/************************************************************************* + * PAGE_RES_IT - Page results iterator + *************************************************************************/ + +class PAGE_RES_IT +{ + public: + PAGE_RES * page_res; //page being iterated + + PAGE_RES_IT() { + } //empty contructor + + PAGE_RES_IT( //empty contructor + PAGE_RES *the_page_res) { //page result + page_res = the_page_res; + restart_page(); //ready to scan + } + + WERD_RES *restart_page(); //get ready + + WERD_RES *internal_forward( //get next word + BOOL8 new_block); + + WERD_RES *forward() { //get next word + return internal_forward (FALSE); + } + + WERD_RES *forward_block(); //get first word in + //next non-empty block + WERD_RES *prev_word() { //previous word + return prev_word_res; + } + ROW_RES *prev_row() { //row of prev word + return prev_row_res; + } + BLOCK_RES *prev_block() { //block of prev word + return prev_block_res; + } + WERD_RES *word() { //current word + return word_res; + } + ROW_RES *row() { //row of current word + return row_res; + } + BLOCK_RES *block() { //block of cur. word + return block_res; + } + WERD_RES *next_word() { //next word + return next_word_res; + } + ROW_RES *next_row() { //row of next word + return next_row_res; + } + BLOCK_RES *next_block() { //block of next word + return next_block_res; + } + void rej_stat_word(); //for page/block/row + + private: + WERD_RES * prev_word_res; //previous word + ROW_RES *prev_row_res; //row of prev word + BLOCK_RES *prev_block_res; //block of prev word + + WERD_RES *word_res; //current word + ROW_RES *row_res; //row of current word + BLOCK_RES *block_res; //block of cur. word + + WERD_RES *next_word_res; //next word + ROW_RES *next_row_res; //row of next word + BLOCK_RES *next_block_res; //block of next word + + BLOCK_RES_IT block_res_it; //iterators + ROW_RES_IT row_res_it; + WERD_RES_IT word_res_it; +}; +#endif diff --git a/ccmain/pdblock.cpp b/ccmain/pdblock.cpp new file mode 100644 index 000000000..3004f6eb3 --- /dev/null +++ b/ccmain/pdblock.cpp @@ -0,0 +1,361 @@ +/********************************************************************** + * File: pdblock.c (Formerly pdblk.c) + * Description: PDBLK member functions and iterator functions. + * Author: Ray Smith + * Created: Fri Mar 15 09:41:28 GMT 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" +#include +#include "blckerr.h" +#include "pdblock.h" +#include "svshowim.h" + +#include "hpddef.h" //must be last (handpd.dll) + +#define BLOCK_LABEL_HEIGHT 150 //char height of block id + +CLISTIZE (PDBLK) +/********************************************************************** + * PDBLK::PDBLK + * + * Constructor for a simple rectangular block. + **********************************************************************/ +PDBLK::PDBLK ( //rectangular block +inT16 xmin, //bottom left +inT16 ymin, inT16 xmax, //top right +inT16 ymax): box (ICOORD (xmin, ymin), ICOORD (xmax, ymax)) { + //boundaries + ICOORDELT_IT left_it = &leftside; + ICOORDELT_IT right_it = &rightside; + + hand_poly = NULL; + left_it.set_to_list (&leftside); + right_it.set_to_list (&rightside); + //make default box + left_it.add_to_end (new ICOORDELT (xmin, ymin)); + left_it.add_to_end (new ICOORDELT (xmin, ymax)); + right_it.add_to_end (new ICOORDELT (xmax, ymin)); + right_it.add_to_end (new ICOORDELT (xmax, ymax)); + index_ = 0; +} + + +/********************************************************************** + * PDBLK::set_sides + * + * Sets left and right vertex lists + **********************************************************************/ + +void PDBLK::set_sides( //set vertex lists + ICOORDELT_LIST *left, //left vertices + ICOORDELT_LIST *right //right vertices + ) { + //boundaries + ICOORDELT_IT left_it = &leftside; + ICOORDELT_IT right_it = &rightside; + + leftside.clear (); + left_it.move_to_first (); + left_it.add_list_before (left); + rightside.clear (); + right_it.move_to_first (); + right_it.add_list_before (right); +} + + +/********************************************************************** + * PDBLK::contains + * + * Return TRUE if the given point is within the block. + **********************************************************************/ + +BOOL8 PDBLK::contains( //test containment + ICOORD pt //point to test + ) { + BLOCK_RECT_IT it = this; //rectangle iterator + ICOORD bleft, tright; //corners of rectangle + + for (it.start_block (); !it.cycled_rects (); it.forward ()) { + //get rectangle + it.bounding_box (bleft, tright); + //inside rect + if (pt.x () >= bleft.x () && pt.x () <= tright.x () + && pt.y () >= bleft.y () && pt.y () <= tright.y ()) + return TRUE; //is inside + } + return FALSE; //not inside +} + + +/********************************************************************** + * PDBLK::move + * + * Reposition block + **********************************************************************/ + +void PDBLK::move( // reposition block + const ICOORD vec // by vector + ) { + ICOORDELT_IT it(&leftside); + + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) + *(it.data ()) += vec; + + it.set_to_list (&rightside); + + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) + *(it.data ()) += vec; + + box.move (vec); +} + + +/********************************************************************** + * PDBLK::plot + * + * Plot the outline of a block in the given colour. + **********************************************************************/ + +#ifndef GRAPHICS_DISABLED +void PDBLK::plot( //draw outline + ScrollView* window, //window to draw in + inT32 serial, //serial number + ScrollView::Color colour //colour to draw in + ) { + ICOORD startpt; //start of outline + ICOORD endpt; //end of outline + ICOORD prevpt; //previous point + ICOORDELT_IT it = &leftside; //iterator + + //set the colour + window->Pen(colour); + window->TextAttributes("Times", BLOCK_LABEL_HEIGHT, false, false, false); + + if (hand_poly != NULL) { + hand_poly->plot(window, serial); + } else if (!leftside.empty ()) { + startpt = *(it.data ()); //bottom left corner + // tprintf("Block %d bottom left is (%d,%d)\n", + // serial,startpt.x(),startpt.y()); + char temp_buff[34]; + #ifdef __UNIX__ + sprintf(temp_buff, INT32FORMAT, serial); + #else + ultoa (serial, temp_buff, 10); + #endif + window->Text(startpt.x (), startpt.y (), temp_buff); + + window->SetCursor(startpt.x (), startpt.y ()); + do { + prevpt = *(it.data ()); //previous point + it.forward (); //move to next point + //draw round corner + window->DrawTo(prevpt.x (), it.data ()->y ()); + window->DrawTo(it.data ()->x (), it.data ()->y ()); + } + while (!it.at_last ()); //until end of list + endpt = *(it.data ()); //end point + + //other side of boundary + window->SetCursor(startpt.x (), startpt.y ()); + it.set_to_list (&rightside); + prevpt = startpt; + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { + //draw round corner + window->DrawTo(prevpt.x (), it.data ()->y ()); + window->DrawTo(it.data ()->x (), it.data ()->y ()); + prevpt = *(it.data ()); //previous point + } + //close boundary + window->DrawTo(endpt.x(), endpt.y()); + } +} +#endif + + +/********************************************************************** + * PDBLK::show + * + * Show the image corresponding to a block as its set of rectangles. + **********************************************************************/ + +#ifndef GRAPHICS_DISABLED +void PDBLK::show( //show image block + IMAGE *image, //image to show + ScrollView* window //window to show in + ) { + BLOCK_RECT_IT it = this; //rectangle iterator + ICOORD bleft, tright; //corners of rectangle + + for (it.start_block (); !it.cycled_rects (); it.forward ()) { + //get rectangle + it.bounding_box (bleft, tright); + // tprintf("Drawing a block with a bottom left of (%d,%d)\n", + // bleft.x(),bleft.y()); + //show it + sv_show_sub_image (image, bleft.x (), bleft.y (), tright.x () - bleft.x (), tright.y () - bleft.y (), window, bleft.x (), bleft.y ()); + } +} +#endif + + +/********************************************************************** + * PDBLK::operator= + * + * Assignment - duplicate the block structure, but with an EMPTY row list. + **********************************************************************/ + +PDBLK & PDBLK::operator= ( //assignment +const PDBLK & source //from this +) { + // this->ELIST_LINK::operator=(source); + if (!leftside.empty ()) + leftside.clear (); + if (!rightside.empty ()) + rightside.clear (); + leftside.deep_copy(&source.leftside, &ICOORDELT::deep_copy); + rightside.deep_copy(&source.rightside, &ICOORDELT::deep_copy); + box = source.box; + return *this; +} + + +/********************************************************************** + * BLOCK_RECT_IT::BLOCK_RECT_IT + * + * Construct a block rectangle iterator. + **********************************************************************/ + +BLOCK_RECT_IT::BLOCK_RECT_IT ( +//iterate rectangles +PDBLK * blkptr //from block +):left_it (&blkptr->leftside), right_it (&blkptr->rightside) { + block = blkptr; //remember block + //non empty list + if (!blkptr->leftside.empty ()) { + start_block(); //ready for iteration + } +} + + +/********************************************************************** + * BLOCK_RECT_IT::set_to_block + * + * Start a new block. + **********************************************************************/ + +void BLOCK_RECT_IT::set_to_block( //start (new) block + PDBLK *blkptr) { //block to start + block = blkptr; //remember block + //set iterators + left_it.set_to_list (&blkptr->leftside); + right_it.set_to_list (&blkptr->rightside); + if (!blkptr->leftside.empty ()) + start_block(); //ready for iteration +} + + +/********************************************************************** + * BLOCK_RECT_IT::start_block + * + * Restart a block. + **********************************************************************/ + +void BLOCK_RECT_IT::start_block() { //start (new) block + left_it.move_to_first (); + right_it.move_to_first (); + left_it.mark_cycle_pt (); + right_it.mark_cycle_pt (); + ymin = left_it.data ()->y (); //bottom of first box + ymax = left_it.data_relative (1)->y (); + if (right_it.data_relative (1)->y () < ymax) + //smallest step + ymax = right_it.data_relative (1)->y (); +} + + +/********************************************************************** + * BLOCK_RECT_IT::forward + * + * Move to the next rectangle in the block. + **********************************************************************/ + +void BLOCK_RECT_IT::forward() { //next rectangle + if (!left_it.empty ()) { //non-empty list + if (left_it.data_relative (1)->y () == ymax) + left_it.forward (); //move to meet top + if (right_it.data_relative (1)->y () == ymax) + right_it.forward (); + //last is special + if (left_it.at_last () || right_it.at_last ()) { + left_it.move_to_first (); //restart + right_it.move_to_first (); + //now at bottom + ymin = left_it.data ()->y (); + } + else { + ymin = ymax; //new bottom + } + //next point + ymax = left_it.data_relative (1)->y (); + if (right_it.data_relative (1)->y () < ymax) + //least step forward + ymax = right_it.data_relative (1)->y (); + } +} + + +/********************************************************************** + * BLOCK_LINE_IT::get_line + * + * Get the the start and width of a line in the block. + **********************************************************************/ + +inT16 BLOCK_LINE_IT::get_line( //get a line + inT16 y, //line to get + inT16 &xext //output extent + ) { + ICOORD bleft; //bounding box + ICOORD tright; //of block & rect + + //get block box + block->bounding_box (bleft, tright); + if (y < bleft.y () || y >= tright.y ()) { + // block->print(stderr,FALSE); + BADBLOCKLINE.error ("BLOCK_LINE_IT::get_line", ABORT, "Y=%d", y); + } + + //get rectangle box + rect_it.bounding_box (bleft, tright); + //inside rectangle + if (y >= bleft.y () && y < tright.y ()) { + //width of line + xext = tright.x () - bleft.x (); + return bleft.x (); //start of line + } + for (rect_it.start_block (); !rect_it.cycled_rects (); rect_it.forward ()) { + //get rectangle box + rect_it.bounding_box (bleft, tright); + //inside rectangle + if (y >= bleft.y () && y < tright.y ()) { + //width of line + xext = tright.x () - bleft.x (); + return bleft.x (); //start of line + } + } + LOSTBLOCKLINE.error ("BLOCK_LINE_IT::get_line", ABORT, "Y=%d", y); + return 0; //dummy to stop warning +} diff --git a/ccmain/pdblock.h b/ccmain/pdblock.h new file mode 100644 index 000000000..fa56ceb60 --- /dev/null +++ b/ccmain/pdblock.h @@ -0,0 +1,170 @@ +/********************************************************************** + * File: pdblock.h (Formerly pdblk.h) + * Description: Page block class definition. + * Author: Ray Smith + * Created: Thu Mar 14 17:32:01 GMT 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef PDBLOCK_H +#define PDBLOCK_H + +#include "img.h" +#include "strngs.h" +#include "polyblk.h" + +#include "hpddef.h" //must be last (handpd.dll) + +class DLLSYM PDBLK; //forward decl + +CLISTIZEH (PDBLK) +class DLLSYM PDBLK //page block +{ + friend class BLOCK_RECT_IT; //block iterator + + public: + PDBLK() { //empty constructor + hand_poly = NULL; + index_ = 0; + } + PDBLK( //simple constructor + inT16 xmin, //bottom left + inT16 ymin, + inT16 xmax, //top right + inT16 ymax); + + void set_sides( //set vertex lists + ICOORDELT_LIST *left, //list of left vertices + ICOORDELT_LIST *right); //list of right vertices + + ~PDBLK () { //destructor + if (hand_poly) delete hand_poly; + } + + POLY_BLOCK *poly_block() { + return hand_poly; + } + void set_poly_block( //set the poly block + POLY_BLOCK *blk) { + hand_poly = blk; + } + void bounding_box( //get box + ICOORD &bottom_left, //bottom left + ICOORD &top_right) const { //topright + bottom_left = box.botleft (); + top_right = box.topright (); + } + //get real box + const TBOX &bounding_box() const { + return box; + } + + int index() const { + return index_; + } + void set_index(int value) { + index_ = value; + } + + BOOL8 contains( //is pt inside block + ICOORD pt); + + void move( // reposition block + const ICOORD vec); // by vector + + void plot( //draw histogram + ScrollView* window, //window to draw in + inT32 serial, //serial number + ScrollView::Color colour); //colour to draw in + + void show( //show image + IMAGE *image, //image to show + ScrollView* window); //window to show in + + PDBLK & operator= ( //assignment + const PDBLK & source); //from this + + protected: + POLY_BLOCK *hand_poly; //wierd as well + ICOORDELT_LIST leftside; //left side vertices + ICOORDELT_LIST rightside; //right side vertices + TBOX box; //bounding box + int index_; // Serial number of this block. +}; + +class DLLSYM BLOCK_RECT_IT //rectangle iterator +{ + public: + BLOCK_RECT_IT( //constructor + PDBLK *blkptr); //block to iterate + + //start (new) block + NEWDELETE2 (BLOCK_RECT_IT) void set_to_block ( + PDBLK * blkptr); //block to iterate + + void start_block(); //start iteration + + void forward(); //next rectangle + + BOOL8 cycled_rects() { //test end + return left_it.cycled_list () && right_it.cycled_list (); + } + + void bounding_box( //current rectangle + ICOORD &bleft, //bottom left + ICOORD &tright) { //top right + //bottom left + bleft = ICOORD (left_it.data ()->x (), ymin); + //top right + tright = ICOORD (right_it.data ()->x (), ymax); + } + + private: + inT16 ymin; //bottom of rectangle + inT16 ymax; //top of rectangle + PDBLK *block; //block to iterate + ICOORDELT_IT left_it; //boundary iterators + ICOORDELT_IT right_it; +}; + +class DLLSYM BLOCK_LINE_IT //rectangle iterator +{ + public: + BLOCK_LINE_IT ( //constructor + PDBLK * blkptr) //from block + :rect_it (blkptr) { + block = blkptr; //remember block + } + + //start (new) block + NEWDELETE2 (BLOCK_LINE_IT) void set_to_block ( + PDBLK * blkptr) { //block to start + block = blkptr; //remember block + //set iterator + rect_it.set_to_block (blkptr); + } + + inT16 get_line( //get a line + inT16 y, //line to get + inT16 &xext); //output extent + + private: + PDBLK * block; //block to iterate + BLOCK_RECT_IT rect_it; //rectangle iterator +}; + +int decreasing_top_order( // + const void *row1, + const void *row2); +#endif diff --git a/ccmain/points.cpp b/ccmain/points.cpp new file mode 100644 index 000000000..88bea400f --- /dev/null +++ b/ccmain/points.cpp @@ -0,0 +1,115 @@ +/********************************************************************** + * File: points.c (Formerly coords.c) + * Description: Member functions for coordinate classes. + * Author: Ray Smith + * Created: Fri Mar 15 08:58:17 GMT 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" //precompiled headers +#include +#include "ndminx.h" +#include "serialis.h" +#include "points.h" + +ELISTIZE_S (ICOORDELT) //turn to list +bool FCOORD::normalise() { //Convert to unit vec + float len = length (); + + if (len < 0.0000000001) { + return false; + } + xcoord /= len; + ycoord /= len; + return true; +} + +// Set from the given x,y, shrinking the vector to fit if needed. +void ICOORD::set_with_shrink(int x, int y) { + // Fit the vector into an ICOORD, which is 16 bit. + int factor = 1; + int max_extent = MAX(abs(x), abs(y)); + if (max_extent > MAX_INT16) + factor = max_extent / MAX_INT16 + 1; + xcoord = x / factor; + ycoord = y / factor; +} + +// The fortran/basic sgn function returns -1, 0, 1 if x < 0, x == 0, x > 0 +// respectively. +static int sign(int x) { + if (x < 0) + return -1; + else + return x > 0 ? 1 : 0; +} + +// Setup for iterating over the pixels in a vector by the well-known +// Bresenham rendering algorithm. +// Starting with major/2 in the accumulator, on each step add major_step, +// and then add minor to the accumulator. When the accumulator >= major +// subtract major and step a minor step. + +void ICOORD::setup_render(ICOORD* major_step, ICOORD* minor_step, + int* major, int* minor) const { + int abs_x = abs(xcoord); + int abs_y = abs(ycoord); + if (abs_x >= abs_y) { + // X-direction is major. + major_step->xcoord = sign(xcoord); + major_step->ycoord = 0; + minor_step->xcoord = 0; + minor_step->ycoord = sign(ycoord); + *major = abs_x; + *minor = abs_y; + } else { + // Y-direction is major. + major_step->xcoord = 0; + major_step->ycoord = sign(ycoord); + minor_step->xcoord = sign(xcoord); + minor_step->ycoord = 0; + *major = abs_y; + *minor = abs_x; + } +} + + +void ICOORD::serialise_asc( //convert to ascii + FILE *f //file to write + ) { + serialise_INT32(f, xcoord); + serialise_INT32(f, ycoord); +} + + +void ICOORD::de_serialise_asc( //convert from ascii + FILE *f //file to write + ) { + xcoord = (inT16) de_serialise_INT32 (f); + ycoord = (inT16) de_serialise_INT32 (f); +} + + +void ICOORDELT::serialise_asc( //convert to ascii + FILE *f //file to write + ) { + ((ICOORD *) this)->serialise_asc (f); +} + + +void ICOORDELT::de_serialise_asc( //convert from ascii + FILE *f //file to write + ) { + ((ICOORD *) this)->de_serialise_asc (f); +} diff --git a/ccmain/points.h b/ccmain/points.h new file mode 100644 index 000000000..5eaf43828 --- /dev/null +++ b/ccmain/points.h @@ -0,0 +1,302 @@ +/********************************************************************** + * File: points.h (Formerly coords.h) + * Description: Coordinate class definitions. + * Author: Ray Smith + * Created: Fri Mar 15 08:32:45 GMT 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef POINTS_H +#define POINTS_H + +#include +#include +#include "elst.h" +//#include "ipeerr.h" + +class FCOORD; + +class DLLSYM ICOORD //integer coordinate +{ + friend class FCOORD; + + public: + ICOORD() { //empty constructor + xcoord = ycoord = 0; //default zero + } + ICOORD( //constructor + inT16 xin, //x value + inT16 yin) { //y value + xcoord = xin; + ycoord = yin; + } + ~ICOORD () { //destructor + } + + //access function + NEWDELETE2 (ICOORD) inT16 x () const + { + return xcoord; + } + inT16 y() const { //access_function + return ycoord; + } + + void set_x( //rewrite function + inT16 xin) { + xcoord = xin; //write new value + } + void set_y( //rewrite function + inT16 yin) { //value to set + ycoord = yin; + } + + // Set from the given x,y, shrinking the vector to fit if needed. + void set_with_shrink(int x, int y); + + float sqlength() const { //find sq length + return (float) (xcoord * xcoord + ycoord * ycoord); + } + + float length() const { //find length + return (float) sqrt (sqlength ()); + } + + float pt_to_pt_sqdist( //sq dist between pts + const ICOORD &pt) const { + ICOORD gap; + + gap.xcoord = xcoord - pt.xcoord; + gap.ycoord = ycoord - pt.ycoord; + return gap.sqlength (); + } + + float pt_to_pt_dist( //Distance between pts + const ICOORD &pt) const { + return (float) sqrt (pt_to_pt_sqdist (pt)); + } + + float angle() const { //find angle + return (float) atan2 ((double) ycoord, (double) xcoord); + } + + BOOL8 operator== ( //test equality + const ICOORD & other) { + return xcoord == other.xcoord && ycoord == other.ycoord; + } + BOOL8 operator!= ( //test inequality + const ICOORD & other) { + return xcoord != other.xcoord || ycoord != other.ycoord; + } + friend ICOORD operator! ( //rotate 90 deg anti + const ICOORD &); + friend ICOORD operator- ( //unary minus + const ICOORD &); + friend ICOORD operator+ ( //add + const ICOORD &, const ICOORD &); + friend ICOORD & operator+= ( //add + ICOORD &, const ICOORD &); + friend ICOORD operator- ( //subtract + const ICOORD &, const ICOORD &); + friend ICOORD & operator-= ( //subtract + ICOORD &, const ICOORD &); + friend inT32 operator% ( //scalar product + const ICOORD &, const ICOORD &); + friend inT32 operator *( //cross product + const ICOORD &, + const ICOORD &); + friend ICOORD operator *( //multiply + const ICOORD &, + inT16); + friend ICOORD operator *( //multiply + inT16, + const ICOORD &); + friend ICOORD & operator*= ( //multiply + ICOORD &, inT16); + friend ICOORD operator/ ( //divide + const ICOORD &, inT16); + //divide + friend ICOORD & operator/= (ICOORD &, inT16); + void rotate( //rotate + const FCOORD& vec); //by vector + + // Setup for iterating over the pixels in a vector by the well-known + // Bresenham rendering algorithm. + // Starting with major/2 in the accumulator, on each step move by + // major_step, and then add minor to the accumulator. When + // accumulator >= major subtract major and also move by minor_step. + void setup_render(ICOORD* major_step, ICOORD* minor_step, + int* major, int* minor) const; + + void serialise_asc( //serialise to ascii + FILE *f); + void de_serialise_asc( //serialise from ascii + FILE *f); + + protected: + inT16 xcoord; //x value + inT16 ycoord; //y value +}; + +class DLLSYM ICOORDELT:public ELIST_LINK, public ICOORD + //embedded coord list +{ + public: + ICOORDELT() { //empty constructor + } + ICOORDELT ( //constructor + //from ICOORD + ICOORD icoord):ICOORD (icoord) { + } + ICOORDELT( //constructor + inT16 xin, //x value + inT16 yin) { //y value + xcoord = xin; + ycoord = yin; + } + + /* Note that prep_serialise() dump() and de_dump() dont need to do anything + more than terminate recursion. */ + + void prep_serialise() const { //set ptrs to counts + } + + void dump( //write external bits + FILE *) const { + } + + void de_dump( //read external bits + FILE *) { + } + + //serialise to ascii + make_serialise(ICOORDELT) + + static ICOORDELT* deep_copy(const ICOORDELT* src) { + ICOORDELT* elt = new ICOORDELT; + *elt = *src; + return elt; + } + + void serialise_asc(FILE * f); + void de_serialise_asc( //serialise from ascii + FILE *f); + +}; + +ELISTIZEH_S (ICOORDELT) +class DLLSYM FCOORD +{ + public: + FCOORD() { + } //empty constructor + FCOORD( //constructor + float xvalue, //coords to set + float yvalue) { + xcoord = xvalue; //set coords + ycoord = yvalue; + } + FCOORD( //make from ICOORD + ICOORD icoord) { //coords to set + xcoord = icoord.xcoord; + ycoord = icoord.ycoord; + } + + float x() const { //get coords + return xcoord; + } + float y() const { + return ycoord; + } + void set_x( //rewrite function + float xin) { + xcoord = xin; //write new value + } + void set_y( //rewrite function + float yin) { //value to set + ycoord = yin; + } + + float sqlength() const { //find sq length + return xcoord * xcoord + ycoord * ycoord; + } + + float length() const { //find length + return (float) sqrt (sqlength ()); + } + + float pt_to_pt_sqdist( //sq dist between pts + const FCOORD &pt) const { + FCOORD gap; + + gap.xcoord = xcoord - pt.xcoord; + gap.ycoord = ycoord - pt.ycoord; + return gap.sqlength (); + } + + float pt_to_pt_dist( //Distance between pts + const FCOORD &pt) const { + return (float) sqrt (pt_to_pt_sqdist (pt)); + } + + float angle() const { //find angle + return (float) atan2 (ycoord, xcoord); + } + + bool normalise(); //Convert to unit vec + + BOOL8 operator== ( //test equality + const FCOORD & other) { + return xcoord == other.xcoord && ycoord == other.ycoord; + } + BOOL8 operator!= ( //test inequality + const FCOORD & other) { + return xcoord != other.xcoord || ycoord != other.ycoord; + } + //rotate 90 deg anti + friend FCOORD operator! (const FCOORD &); + //unary minus + friend FCOORD operator- (const FCOORD &); + //add + friend FCOORD operator+ (const FCOORD &, const FCOORD &); + //add + friend FCOORD & operator+= (FCOORD &, const FCOORD &); + //subtract + friend FCOORD operator- (const FCOORD &, const FCOORD &); + //subtract + friend FCOORD & operator-= (FCOORD &, const FCOORD &); + //scalar product + friend float operator% (const FCOORD &, const FCOORD &); + //cross product + friend float operator *(const FCOORD &, const FCOORD &); + friend FCOORD operator *(const FCOORD &, float); + //multiply + friend FCOORD operator *(float, const FCOORD &); + //multiply + //multiply + friend FCOORD & operator*= (FCOORD &, float); + friend FCOORD operator/ (const FCOORD &, float); + //divide + void rotate( //rotate + const FCOORD vec); //by vector + //divide + friend FCOORD & operator/= (FCOORD &, float); + + private: + float xcoord; //2 floating coords + float ycoord; +}; + +#include "ipoints.h" /*do inline funcs */ +#endif diff --git a/ccmain/polyaprx.cpp b/ccmain/polyaprx.cpp new file mode 100644 index 000000000..67d0a0b5c --- /dev/null +++ b/ccmain/polyaprx.cpp @@ -0,0 +1,588 @@ +/********************************************************************** + * File: polyaprx.cpp (Formerly polygon.c) + * Description: Code for polygonal approximation from old edgeprog. + * Author: Ray Smith + * Created: Thu Nov 25 11:42:04 GMT 1993 + * + * (C) Copyright 1993, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" +#include +#ifdef __UNIX__ +#include +#endif +#define FASTEDGELENGTH 256 +#include "polyaprx.h" +#include "varable.h" +#include "tprintf.h" + +#define EXTERN + +EXTERN BOOL_VAR (poly_debug, FALSE, "Debug old poly"); +EXTERN BOOL_VAR (poly_wide_objects_better, TRUE, +"More accurate approx on wide things"); + +static int par1, par2; + +#define CONVEX 1 /*OUTLINE point is convex */ +#define CONCAVE 2 /*used and set only in edges */ +#define FIXED 4 /*OUTLINE point is fixed */ +#define ONHULL 8 /*on convex hull */ + +#define RUNLENGTH 1 /*length of run */ + +#define DIR 2 /*direction of run */ + +#define CORRECTION 3 /*correction of run */ +//#define MAXSHORT 32767 /*max value of short*/ +#define FLAGS 0 + +#define fixed_dist 20 //really an int_variable +#define approx_dist 15 //really an int_variable + +#define point_diff(p,p1,p2) (p).x = (p1).x - (p2).x ; (p).y = (p1).y - (p2).y +#define CROSS(a,b) ((a).x * (b).y - (a).y * (b).x) +#define LENGTH(a) ((a).x * (a).x + (a).y * (a).y) + +#define DISTANCE(a,b) (((b).x-(a).x) * ((b).x-(a).x) \ + + ((b).y-(a).y) * ((b).y-(a).y)) + +/********************************************************************** + * tesspoly_outline + * + * Approximate an outline from c form using the old tess algorithm. + **********************************************************************/ + +OUTLINE *tesspoly_outline( //old approximation + C_OUTLINE *c_outline, //input + float //xheight + ) { + EDGEPT *edgept; //converted steps + EDGEPT *startpt; //start of outline + TBOX loop_box; //bounding box + inT32 area; //loop area + FCOORD pos; //vertex + FCOORD vec; //vector + POLYPT_LIST polypts; //output polygon + POLYPT *polypt; //converted point + POLYPT_IT poly_it = &polypts; //iterator + EDGEPT stack_edgepts[FASTEDGELENGTH]; // converted path + EDGEPT* edgepts = stack_edgepts; + + // Use heap memory if the stack buffer is not big enough. + if (c_outline->pathlength() > FASTEDGELENGTH) + edgepts = new EDGEPT[c_outline->pathlength()]; + + loop_box = c_outline->bounding_box (); + area = loop_box.height (); + if (!poly_wide_objects_better && loop_box.width () > area) + area = loop_box.width (); + area *= area; + edgept = edgesteps_to_edgepts (c_outline, edgepts); + fix2(edgepts, area); + edgept = poly2 (edgepts, area);/*2nd approximation */ + startpt = edgept; + do { + pos = FCOORD (edgept->pos.x, edgept->pos.y); + vec = FCOORD (edgept->vec.x, edgept->vec.y); + polypt = new POLYPT (pos, vec); + //add to list + poly_it.add_after_then_move (polypt); + edgept = edgept->next; + } + while (edgept != startpt); + if (edgepts != stack_edgepts) + delete [] edgepts; + if (poly_it.length() <= 2) + return NULL; + else + return new OUTLINE(&poly_it); +} + + +/********************************************************************** + * edgesteps_to_edgepts + * + * Convert a C_OUTLINE to EDGEPTs. + **********************************************************************/ + +EDGEPT * +edgesteps_to_edgepts ( //convert outline +C_OUTLINE * c_outline, //input +EDGEPT edgepts[] //output is array +) { + inT32 length; //steps in path + ICOORD pos; //current coords + inT32 stepindex; //current step + inT32 stepinc; //increment + inT32 epindex; //current EDGEPT + inT32 count; //repeated steps + ICOORD vec; //for this 8 step + ICOORD prev_vec; + inT8 epdir; //of this step + DIR128 prevdir; //prvious dir + DIR128 dir; //of this step + + pos = c_outline->start_pos (); //start of loop + length = c_outline->pathlength (); + stepindex = 0; + epindex = 0; + prevdir = -1; + count = 0; + do { + dir = c_outline->step_dir (stepindex); + vec = c_outline->step (stepindex); + if (stepindex < length - 1 + && c_outline->step_dir (stepindex + 1) - dir == -32) { + dir += 128 - 16; + vec += c_outline->step (stepindex + 1); + stepinc = 2; + } + else + stepinc = 1; + if (count == 0) { + prevdir = dir; + prev_vec = vec; + } + if (prevdir.get_dir () != dir.get_dir ()) { + edgepts[epindex].pos.x = pos.x (); + edgepts[epindex].pos.y = pos.y (); + prev_vec *= count; + edgepts[epindex].vec.x = prev_vec.x (); + edgepts[epindex].vec.y = prev_vec.y (); + pos += prev_vec; + edgepts[epindex].flags[RUNLENGTH] = count; + edgepts[epindex].prev = &edgepts[epindex - 1]; + edgepts[epindex].flags[FLAGS] = 0; + edgepts[epindex].next = &edgepts[epindex + 1]; + prevdir += 64; + epdir = (DIR128) 0 - prevdir; + epdir >>= 4; + epdir &= 7; + edgepts[epindex].flags[DIR] = epdir; + epindex++; + prevdir = dir; + prev_vec = vec; + count = 1; + } + else + count++; + stepindex += stepinc; + } + while (stepindex < length); + edgepts[epindex].pos.x = pos.x (); + edgepts[epindex].pos.y = pos.y (); + prev_vec *= count; + edgepts[epindex].vec.x = prev_vec.x (); + edgepts[epindex].vec.y = prev_vec.y (); + pos += prev_vec; + edgepts[epindex].flags[RUNLENGTH] = count; + edgepts[epindex].flags[FLAGS] = 0; + edgepts[epindex].prev = &edgepts[epindex - 1]; + edgepts[epindex].next = &edgepts[0]; + prevdir += 64; + epdir = (DIR128) 0 - prevdir; + epdir >>= 4; + epdir &= 7; + edgepts[epindex].flags[DIR] = epdir; + edgepts[0].prev = &edgepts[epindex]; + ASSERT_HOST (pos.x () == c_outline->start_pos ().x () + && pos.y () == c_outline->start_pos ().y ()); + return &edgepts[0]; +} + + +/********************************************************************** + *fix2(start,area) fixes points on the outline according to a trial method* + **********************************************************************/ + +//#pragma OPT_LEVEL 1 /*stop compiler bugs*/ + +void fix2( //polygonal approx + EDGEPT *start, /*loop to approimate */ + int area) { + register EDGEPT *edgept; /*current point */ + register EDGEPT *edgept1; + register EDGEPT *loopstart; /*modified start of loop */ + register EDGEPT *linestart; /*start of line segment */ + register int dir1, dir2; /*directions of line */ + register int sum1, sum2; /*lengths in dir1,dir2 */ + int stopped; /*completed flag */ + int fixed_count; //no of fixed points + int d01, d12, d23, gapmin; + TPOINT d01vec, d12vec, d23vec; + register EDGEPT *edgefix, *startfix; + register EDGEPT *edgefix0, *edgefix1, *edgefix2, *edgefix3; + + edgept = start; /*start of loop */ + while (((edgept->flags[DIR] - edgept->prev->flags[DIR] + 1) & 7) < 3 + && (dir1 = + (edgept->prev->flags[DIR] - edgept->next->flags[DIR]) & 7) != 2 + && dir1 != 6) + edgept = edgept->next; /*find suitable start */ + loopstart = edgept; /*remember start */ + + stopped = 0; /*not finished yet */ + edgept->flags[FLAGS] |= FIXED; /*fix it */ + do { + linestart = edgept; /*possible start of line */ + dir1 = edgept->flags[DIR]; /*first direction */ + /*length of dir1 */ + sum1 = edgept->flags[RUNLENGTH]; + edgept = edgept->next; + dir2 = edgept->flags[DIR]; /*2nd direction */ + /*length in dir2 */ + sum2 = edgept->flags[RUNLENGTH]; + if (((dir1 - dir2 + 1) & 7) < 3) { + while (edgept->prev->flags[DIR] == edgept->next->flags[DIR]) { + edgept = edgept->next; /*look at next */ + if (edgept->flags[DIR] == dir1) + /*sum lengths */ + sum1 += edgept->flags[RUNLENGTH]; + else + sum2 += edgept->flags[RUNLENGTH]; + } + + if (edgept == loopstart) + stopped = 1; /*finished */ + if (sum2 + sum1 > 2 + && linestart->prev->flags[DIR] == dir2 + && (linestart->prev->flags[RUNLENGTH] > + linestart->flags[RUNLENGTH] || sum2 > sum1)) { + /*start is back one */ + linestart = linestart->prev; + linestart->flags[FLAGS] |= FIXED; + } + + if (((edgept->next->flags[DIR] - edgept->flags[DIR] + 1) & 7) >= 3 + || (edgept->flags[DIR] == dir1 && sum1 >= sum2) + || ((edgept->prev->flags[RUNLENGTH] < edgept->flags[RUNLENGTH] + || (edgept->flags[DIR] == dir2 && sum2 >= sum1)) + && linestart->next != edgept)) + edgept = edgept->next; + } + /*sharp bend */ + edgept->flags[FLAGS] |= FIXED; + } + /*do whole loop */ + while (edgept != loopstart && !stopped); + + edgept = start; + do { + if (((edgept->flags[RUNLENGTH] >= 8) && + (edgept->flags[DIR] != 2) && (edgept->flags[DIR] != 6)) || + ((edgept->flags[RUNLENGTH] >= 8) && + ((edgept->flags[DIR] == 2) || (edgept->flags[DIR] == 6)))) { + edgept->flags[FLAGS] |= FIXED; + edgept1 = edgept->next; + edgept1->flags[FLAGS] |= FIXED; + } + edgept = edgept->next; + } + while (edgept != start); + + edgept = start; + do { + /*single fixed step */ + if (edgept->flags[FLAGS] & FIXED && edgept->flags[RUNLENGTH] == 1 + /*and neighours free */ + && edgept->next->flags[FLAGS] & FIXED && (edgept->prev->flags[FLAGS] & FIXED) == 0 + /*same pair of dirs */ + && (edgept->next->next->flags[FLAGS] & FIXED) == 0 && edgept->prev->flags[DIR] == edgept->next->flags[DIR] && edgept->prev->prev->flags[DIR] == edgept->next->next->flags[DIR] + && ((edgept->prev->flags[DIR] - edgept->flags[DIR] + 1) & 7) < 3) { + /*unfix it */ + edgept->flags[FLAGS] &= ~FIXED; + edgept->next->flags[FLAGS] &= ~FIXED; + } + edgept = edgept->next; /*do all points */ + } + while (edgept != start); /*until finished */ + + stopped = 0; + if (area < 450) + area = 450; + + gapmin = area * fixed_dist * fixed_dist / 44000; + + edgept = start; + fixed_count = 0; + do { + if (edgept->flags[FLAGS] & FIXED) + fixed_count++; + edgept = edgept->next; + } + while (edgept != start); + while ((edgept->flags[FLAGS] & FIXED) == 0) + edgept = edgept->next; + edgefix0 = edgept; + + edgept = edgept->next; + while ((edgept->flags[FLAGS] & FIXED) == 0) + edgept = edgept->next; + edgefix1 = edgept; + + edgept = edgept->next; + while ((edgept->flags[FLAGS] & FIXED) == 0) + edgept = edgept->next; + edgefix2 = edgept; + + edgept = edgept->next; + while ((edgept->flags[FLAGS] & FIXED) == 0) + edgept = edgept->next; + edgefix3 = edgept; + + startfix = edgefix2; + + do { + if (fixed_count <= 3) + break; //already too few + point_diff (d12vec, edgefix1->pos, edgefix2->pos); + d12 = LENGTH (d12vec); + if (d12 <= gapmin) { + point_diff (d01vec, edgefix0->pos, edgefix1->pos); + d01 = LENGTH (d01vec); + point_diff (d23vec, edgefix2->pos, edgefix3->pos); + d23 = LENGTH (d23vec); + if (d01 > d23) { + edgefix2->flags[FLAGS] &= ~FIXED; + fixed_count--; + /* if ( plots[EDGE] & PATHS ) + mark(edgefd,edgefix2->pos.x,edgefix2->pos.y,PLUS); + */ + } + else { + edgefix1->flags[FLAGS] &= ~FIXED; + fixed_count--; + /* if ( plots[EDGE] & PATHS ) + mark(edgefd,edgefix1->pos.x,edgefix1->pos.y,PLUS); + */ + edgefix1 = edgefix2; + } + } + else { + edgefix0 = edgefix1; + edgefix1 = edgefix2; + } + edgefix2 = edgefix3; + edgept = edgept->next; + while ((edgept->flags[FLAGS] & FIXED) == 0) { + if (edgept == startfix) + stopped = 1; + edgept = edgept->next; + } + edgefix3 = edgept; + edgefix = edgefix2; + } + while ((edgefix != startfix) && (!stopped)); +} + + +//#pragma OPT_LEVEL 2 /*stop compiler bugs*/ + +/********************************************************************** + *poly2(startpt,area,path) applies a second approximation to the outline + *using the points which have been fixed by the first approximation* + **********************************************************************/ + +EDGEPT *poly2( //second poly + EDGEPT *startpt, /*start of loop */ + int area /*area of blob box */ + ) { + register EDGEPT *edgept; /*current outline point */ + EDGEPT *loopstart; /*starting point */ + register EDGEPT *linestart; /*start of line */ + register int edgesum; /*correction count */ + + if (area < 1200) + area = 1200; /*minimum value */ + + /*1200(4) */ + par1 = 4500 / (approx_dist * approx_dist); + /*1200(6) */ + par2 = 6750 / (approx_dist * approx_dist); + + loopstart = NULL; /*not found it yet */ + edgept = startpt; /*start of loop */ + + do { + /*current point fixed */ + if (edgept->flags[FLAGS] & FIXED + /*and next not */ + && (edgept->next->flags[FLAGS] & FIXED) == 0) { + loopstart = edgept; /*start of repoly */ + break; + } + edgept = edgept->next; /*next point */ + } + while (edgept != startpt); /*until found or finished */ + + if (loopstart == NULL && (startpt->flags[FLAGS] & FIXED) == 0) { + /*fixed start of loop */ + startpt->flags[FLAGS] |= FIXED; + loopstart = startpt; /*or start of loop */ + } + if (loopstart) { + do { + edgept = loopstart; /*first to do */ + do { + linestart = edgept; + edgesum = 0; /*sum of lengths */ + do { + /*sum lengths */ + edgesum += edgept->flags[RUNLENGTH]; + edgept = edgept->next; /*move on */ + } + while ((edgept->flags[FLAGS] & FIXED) == 0 + && edgept != loopstart && edgesum < 126); + if (poly_debug) + tprintf + ("Poly2:starting at (%d,%d)+%d=(%d,%d),%d to (%d,%d)\n", + linestart->pos.x, linestart->pos.y, linestart->flags[DIR], + linestart->vec.x, linestart->vec.y, edgesum, edgept->pos.x, + edgept->pos.y); + /*reapproximate */ + cutline(linestart, edgept, area); + + while ((edgept->next->flags[FLAGS] & FIXED) + && edgept != loopstart) + edgept = edgept->next; /*look for next non-fixed */ + } + /*do all the loop */ + while (edgept != loopstart); + edgesum = 0; + do { + if (edgept->flags[FLAGS] & FIXED) + edgesum++; + edgept = edgept->next; + } + //count fixed pts + while (edgept != loopstart); + if (edgesum < 3) + area /= 2; //must have 3 pts + } + while (edgesum < 3); + do { + linestart = edgept; + do { + edgept = edgept->next; + } + while ((edgept->flags[FLAGS] & FIXED) == 0); + linestart->next = edgept; + edgept->prev = linestart; + linestart->vec.x = edgept->pos.x - linestart->pos.x; + linestart->vec.y = edgept->pos.y - linestart->pos.y; + } + while (edgept != loopstart); + } + else + edgept = startpt; /*start of loop */ + + loopstart = edgept; /*new start */ + return loopstart; /*correct exit */ +} + + +/********************************************************************** + *cutline(first,last,area) straightens out a line by partitioning + *and joining the ends by a straight line* + **********************************************************************/ + +void cutline( //recursive refine + EDGEPT *first, /*ends of line */ + EDGEPT *last, + int area /*area of object */ + ) { + register EDGEPT *edge; /*current edge */ + TPOINT vecsum; /*vector sum */ + int vlen; /*approx length of vecsum */ + TPOINT vec; /*accumulated vector */ + EDGEPT *maxpoint; /*worst point */ + int maxperp; /*max deviation */ + register int perp; /*perp distance */ + int ptcount; /*no of points */ + int squaresum; /*sum of perps */ + + edge = first; /*start of line */ + if (edge->next == last) + return; /*simple line */ + + /*vector sum */ + vecsum.x = last->pos.x - edge->pos.x; + vecsum.y = last->pos.y - edge->pos.y; + if (vecsum.x == 0 && vecsum.y == 0) { + /*special case */ + vecsum.x = -edge->prev->vec.x; + vecsum.y = -edge->prev->vec.y; + } + /*absolute value */ + vlen = vecsum.x > 0 ? vecsum.x : -vecsum.x; + if (vecsum.y > vlen) + vlen = vecsum.y; /*maximum */ + else if (-vecsum.y > vlen) + vlen = -vecsum.y; /*absolute value */ + + vec.x = edge->vec.x; /*accumulated vector */ + vec.y = edge->vec.y; + maxperp = 0; /*none yet */ + squaresum = ptcount = 0; + edge = edge->next; /*move to actual point */ + maxpoint = edge; /*in case there isn't one */ + do { + perp = CROSS (vec, vecsum); /*get perp distance */ + if (perp != 0) { + perp *= perp; /*squared deviation */ + } + squaresum += perp; /*sum squares */ + ptcount++; /*count points */ + if (poly_debug) + tprintf ("Cutline:Final perp=%d\n", perp); + if (perp > maxperp) { + maxperp = perp; + maxpoint = edge; /*find greatest deviation */ + } + vec.x += edge->vec.x; /*accumulate vectors */ + vec.y += edge->vec.y; + edge = edge->next; + } + while (edge != last); /*test all line */ + + perp = LENGTH (vecsum); + ASSERT_HOST (perp != 0); + + if (maxperp < 256 * MAX_INT16) { + maxperp <<= 8; + maxperp /= perp; /*true max perp */ + } + else { + maxperp /= perp; + maxperp <<= 8; /*avoid overflow */ + } + if (squaresum < 256 * MAX_INT16) + /*mean squared perp */ + perp = (squaresum << 8) / (perp * ptcount); + else + /*avoid overflow */ + perp = (squaresum / perp << 8) / ptcount; + + if (poly_debug) + tprintf ("Cutline:A=%d, max=%.2f(%.2f%%), msd=%.2f(%.2f%%)\n", + area, maxperp / 256.0, maxperp * 200.0 / area, + perp / 256.0, perp * 300.0 / area); + if (maxperp * par1 >= 10 * area || perp * par2 >= 10 * area || vlen >= 126) { + maxpoint->flags[FLAGS] |= FIXED; + /*partitions */ + cutline(first, maxpoint, area); + cutline(maxpoint, last, area); + } +} diff --git a/ccmain/polyaprx.h b/ccmain/polyaprx.h new file mode 100644 index 000000000..6e6feaef5 --- /dev/null +++ b/ccmain/polyaprx.h @@ -0,0 +1,51 @@ +/********************************************************************** + * File: polyaprx.h (Formerly polygon.h) + * Description: Code for polygonal approximation from old edgeprog. + * Author: Ray Smith + * Created: Thu Nov 25 11:42:04 GMT 1993 + * + * (C) Copyright 1993, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef POLYAPRX_H +#define POLYAPRX_H + +#include "tessclas.h" +#include "poutline.h" +#include "coutln.h" + +OUTLINE *tesspoly_outline( //old approximation + C_OUTLINE *c_outline, //input + float //xheight + ); +EDGEPT *edgesteps_to_edgepts ( //convert outline +C_OUTLINE * c_outline, //input +EDGEPT edgepts[] //output is array +); +void fix2( //polygonal approx + EDGEPT *start, /*loop to approimate */ + int area); +EDGEPT *poly2( //second poly + EDGEPT *startpt, /*start of loop */ + int area /*area of blob box */ + ); +void cutline( //recursive refine + EDGEPT *first, /*ends of line */ + EDGEPT *last, + int area /*area of object */ + ); +#define fixed_dist 20 //really an int_variable +#define point_diff(p,p1,p2) (p).x = (p1).x - (p2).x ; (p).y = (p1).y - (p2).y +#define CROSS(a,b) ((a).x * (b).y - (a).y * (b).x) +#define LENGTH(a) ((a).x * (a).x + (a).y * (a).y) +#endif diff --git a/ccmain/polyblk.cpp b/ccmain/polyblk.cpp new file mode 100644 index 000000000..c2a51a2e4 --- /dev/null +++ b/ccmain/polyblk.cpp @@ -0,0 +1,416 @@ +/********************************************************************** + * File: polyblk.c (Formerly poly_block.c) + * Description: Polygonal blocks + * Author: Sheelagh Lloyd? + * Created: + * + * (C) Copyright 1993, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" +#include +#include +#include +#include "elst.h" +#include "polyblk.h" + +#include "hpddef.h" // must be last (handpd.dll) + +#define PBLOCK_LABEL_SIZE 150 +#define INTERSECTING MAX_INT16 + +int lessthan(const void *first, const void *second); + +POLY_BLOCK::POLY_BLOCK(ICOORDELT_LIST *points, PolyBlockType t) { + ICOORDELT_IT v = &vertices; + + vertices.clear(); + v.move_to_first(); + v.add_list_before(points); + compute_bb(); + type = t; +} + + +/********************************************************************** + * POLY_BLOCK::compute_bb + * + * Compute the bounding box from the outline points. + **********************************************************************/ + +void POLY_BLOCK::compute_bb() { //constructor + ICOORD ibl, itr; //integer bb + ICOORD botleft; //bounding box + ICOORD topright; + ICOORD pos; //current pos; + ICOORDELT_IT pts = &vertices; //iterator + + botleft = *pts.data (); + topright = botleft; + do { + pos = *pts.data (); + if (pos.x () < botleft.x ()) + //get bounding box + botleft = ICOORD (pos.x (), botleft.y ()); + if (pos.y () < botleft.y ()) + botleft = ICOORD (botleft.x (), pos.y ()); + if (pos.x () > topright.x ()) + topright = ICOORD (pos.x (), topright.y ()); + if (pos.y () > topright.y ()) + topright = ICOORD (topright.x (), pos.y ()); + pts.forward (); + } + while (!pts.at_first ()); + ibl = ICOORD (botleft.x (), botleft.y ()); + itr = ICOORD (topright.x (), topright.y ()); + box = TBOX (ibl, itr); +} + + +/********************************************************************** + * POLY_BLOCK::winding_number + * + * Return the winding number of the outline around the given point. + **********************************************************************/ + +inT16 POLY_BLOCK::winding_number( //winding number + const ICOORD &point //point to wind around + ) { + inT16 count; //winding count + ICOORD pt; //current point + ICOORD vec; //point to current point + ICOORD vvec; //current point to next point + inT32 cross; //cross product + ICOORDELT_IT it = &vertices; //iterator + + count = 0; + do { + pt = *it.data (); + vec = pt - point; + vvec = *it.data_relative (1) - pt; + //crossing the line + if (vec.y () <= 0 && vec.y () + vvec.y () > 0) { + cross = vec * vvec; //cross product + if (cross > 0) + count++; //crossing right half + else if (cross == 0) + return INTERSECTING; //going through point + } + else if (vec.y () > 0 && vec.y () + vvec.y () <= 0) { + cross = vec * vvec; + if (cross < 0) + count--; //crossing back + else if (cross == 0) + return INTERSECTING; //illegal + } + else if (vec.y () == 0 && vec.x () == 0) + return INTERSECTING; + it.forward (); + } + while (!it.at_first ()); + return count; //winding number +} + + +// Returns true if other is inside this. +bool POLY_BLOCK::contains(POLY_BLOCK *other) { + inT16 count; // winding count + ICOORDELT_IT it = &vertices; // iterator + ICOORD vertex; + + if (!box.overlap (*(other->bounding_box ()))) + return false; // can't be contained + + /* check that no vertex of this is inside other */ + + do { + vertex = *it.data (); + // get winding number + count = other->winding_number (vertex); + if (count != INTERSECTING) + if (count != 0) + return false; + it.forward (); + } + while (!it.at_first ()); + + /* check that all vertices of other are inside this */ + + //switch lists + it.set_to_list (other->points ()); + do { + vertex = *it.data (); + //try other way round + count = winding_number (vertex); + if (count != INTERSECTING) + if (count == 0) + return false; + it.forward (); + } + while (!it.at_first ()); + return true; +} + + +/********************************************************************** + * POLY_BLOCK::rotate + * + * Rotate the POLY_BLOCK. + **********************************************************************/ + +void POLY_BLOCK::rotate( //constructor + FCOORD rotation //cos,sin of angle + ) { + FCOORD pos; //current pos; + ICOORDELT *pt; //current point + ICOORDELT_IT pts = &vertices; //iterator + + do { + pt = pts.data (); + pos.set_x (pt->x ()); + pos.set_y (pt->y ()); + pos.rotate (rotation); + pt->set_x ((inT16) (floor (pos.x () + 0.5))); + pt->set_y ((inT16) (floor (pos.y () + 0.5))); + pts.forward (); + } + while (!pts.at_first ()); + compute_bb(); +} + + +/********************************************************************** + * POLY_BLOCK::move + * + * Move the POLY_BLOCK. + **********************************************************************/ + +void POLY_BLOCK::move( //constructor + ICOORD shift //cos,sin of angle + ) { + ICOORDELT *pt; //current point + ICOORDELT_IT pts = &vertices; //iterator + + do { + pt = pts.data (); + *pt += shift; + pts.forward (); + } + while (!pts.at_first ()); + compute_bb(); +} + + +#ifndef GRAPHICS_DISABLED +void POLY_BLOCK::plot(ScrollView* window, inT32 num) { + ICOORDELT_IT v = &vertices; + + window->Pen(ColorForPolyBlockType(type)); + + v.move_to_first (); + + if (num > 0) { + window->TextAttributes("Times", 80, false, false, false); + char temp_buff[34]; + #ifdef __UNIX__ + sprintf(temp_buff, INT32FORMAT, num); + #else + ltoa (num, temp_buff, 10); + #endif + window->Text(v.data ()->x (), v.data ()->y (), temp_buff); + } + + window->SetCursor(v.data ()->x (), v.data ()->y ()); + for (v.mark_cycle_pt (); !v.cycled_list (); v.forward ()) { + window->DrawTo(v.data ()->x (), v.data ()->y ()); + } + v.move_to_first (); + window->DrawTo(v.data ()->x (), v.data ()->y ()); +} + + +void POLY_BLOCK::fill(ScrollView* window, ScrollView::Color colour) { + inT16 y; + inT16 width; + PB_LINE_IT *lines; + ICOORDELT_LIST *segments; + ICOORDELT_IT s_it; + + lines = new PB_LINE_IT (this); + window->Pen(colour); + + for (y = this->bounding_box ()->bottom (); + y <= this->bounding_box ()->top (); y++) { + segments = lines->get_line (y); + if (!segments->empty ()) { + s_it.set_to_list (segments); + for (s_it.mark_cycle_pt (); !s_it.cycled_list (); s_it.forward ()) { + // Note different use of ICOORDELT, x coord is x coord of pixel + // at the start of line segment, y coord is length of line segment + // Last pixel is start pixel + length. + width = s_it.data ()->y (); + window->SetCursor(s_it.data ()->x (), y); + window->DrawTo(s_it.data ()->x () + (float) width, y); + } + } + } +} +#endif + + +// Returns true if the polygons of other and this overlap. +bool POLY_BLOCK::overlap(POLY_BLOCK *other) { + inT16 count; // winding count + ICOORDELT_IT it = &vertices; // iterator + ICOORD vertex; + + if (!box.overlap(*(other->bounding_box()))) + return false; // can't be any overlap. + + /* see if a vertex of this is inside other */ + + do { + vertex = *it.data (); + // get winding number + count = other->winding_number (vertex); + if (count != INTERSECTING) + if (count != 0) + return true; + it.forward (); + } + while (!it.at_first ()); + + /* see if a vertex of other is inside this */ + + // switch lists + it.set_to_list (other->points ()); + do { + vertex = *it.data(); + // try other way round + count = winding_number (vertex); + if (count != INTERSECTING) + if (count != 0) + return true; + it.forward (); + } + while (!it.at_first ()); + return false; +} + + +ICOORDELT_LIST *PB_LINE_IT::get_line(inT16 y) { + ICOORDELT_IT v, r; + ICOORDELT_LIST *result; + ICOORDELT *x, *current, *previous; + float fy, fx; + + fy = (float) (y + 0.5); + result = new ICOORDELT_LIST (); + r.set_to_list (result); + v.set_to_list (block->points ()); + + for (v.mark_cycle_pt (); !v.cycled_list (); v.forward ()) { + if (((v.data_relative (-1)->y () > y) && (v.data ()->y () <= y)) + || ((v.data_relative (-1)->y () <= y) && (v.data ()->y () > y))) { + previous = v.data_relative (-1); + current = v.data (); + fx = (float) (0.5 + previous->x () + + (current->x () - previous->x ()) * (fy - + previous->y ()) / + (current->y () - previous->y ())); + x = new ICOORDELT ((inT16) fx, 0); + r.add_to_end (x); + } + } + + if (!r.empty ()) { + r.sort (lessthan); + for (r.mark_cycle_pt (); !r.cycled_list (); r.forward ()) + x = r.data (); + for (r.mark_cycle_pt (); !r.cycled_list (); r.forward ()) { + r.data ()->set_y (r.data_relative (1)->x () - r.data ()->x ()); + r.forward (); + delete (r.extract ()); + } + } + + return result; +} + + +int lessthan(const void *first, const void *second) { + ICOORDELT *p1 = (*(ICOORDELT **) first); + ICOORDELT *p2 = (*(ICOORDELT **) second); + + if (p1->x () < p2->x ()) + return (-1); + else if (p1->x () > p2->x ()) + return (1); + else + return (0); +} + + +/********************************************************************** + * POLY_BLOCK::serialise_asc + * + * Converto to ascii file. + **********************************************************************/ + +void POLY_BLOCK::serialise_asc( //convert to ascii + FILE *f //file to use + ) { + vertices.serialise_asc (f); + box.serialise_asc (f); + serialise_INT32(f, type); +} + + +/********************************************************************** + * POLY_BLOCK::de_serialise_asc + * + * Converto from ascii file. + **********************************************************************/ + +void POLY_BLOCK::de_serialise_asc( //convert from ascii + FILE *f //file to use + ) { + vertices.de_serialise_asc (f); + box.de_serialise_asc (f); + type = (PolyBlockType) de_serialise_INT32 (f); +} + + +// Returns a color to draw the given type. +ScrollView::Color POLY_BLOCK::ColorForPolyBlockType(PolyBlockType type) { + const ScrollView::Color kPBColors[PT_COUNT] = { + ScrollView::WHITE, + ScrollView::BLUE, + ScrollView::CYAN, + ScrollView::MEDIUM_BLUE, + ScrollView::MAGENTA, + ScrollView::YELLOW, + ScrollView::RED, + ScrollView::MAROON, + ScrollView::ORANGE, + ScrollView::GREEN, + ScrollView::LIME_GREEN, + ScrollView::DARK_GREEN, + ScrollView::GREY + }; + if (type >= 0 && type < PT_COUNT) { + return kPBColors[type]; + } + return ScrollView::WHITE; +} + diff --git a/ccmain/polyblk.h b/ccmain/polyblk.h new file mode 100644 index 000000000..aeb6ddb49 --- /dev/null +++ b/ccmain/polyblk.h @@ -0,0 +1,160 @@ +/********************************************************************** + * File: polyblk.h (Formerly poly_block.h) + * Description: Polygonal blocks + * Author: Sheelagh Lloyd? + * Created: + * + * (C) Copyright 1993, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ +#ifndef POLYBLK_H +#define POLYBLK_H + +#include "rect.h" +#include "points.h" +#include "scrollview.h" +#include "elst.h" + +#include "hpddef.h" // must be last (handpd.dll) + +// Possible types for a POLY_BLOCK or ColPartition. Must be kept in sync with +// kPBColors. Used extensively by ColPartition, but polyblk is a lower-level +// file. +enum PolyBlockType { + PT_UNKNOWN, // Type is not yet known. Keep as the first element. + PT_FLOWING_TEXT, // Text that lives inside a column. + PT_HEADING_TEXT, // Text that spans more than one column. + PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region. + PT_TABLE, // Partition belonging to a table region. + PT_VERTICAL_TEXT, // Text-line runs vertically. + PT_FLOWING_IMAGE, // Image that lives inside a column. + PT_HEADING_IMAGE, // Image that spans more than one column. + PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region. + PT_FLOWING_LINE, // H-Line that lives inside a column. + PT_HEADING_LINE, // H-Line that spans more than one column. + PT_PULLOUT_LINE, // H-Line that is in a cross-column pull-out region. + PT_NOISE, // Lies outside of any column. + PT_COUNT +}; + +class DLLSYM POLY_BLOCK { + public: + POLY_BLOCK() { + } + POLY_BLOCK(ICOORDELT_LIST *points, PolyBlockType type); + ~POLY_BLOCK () { + } + + TBOX *bounding_box() { // access function + return &box; + } + + ICOORDELT_LIST *points() { // access function + return &vertices; + } + + void compute_bb(); + + PolyBlockType isA() const { + return type; + } + + bool IsText() const { + return IsTextType(type); + } + + // Rotate about the origin by the given rotation. (Analogous to + // multiplying by a complex number. + void rotate(FCOORD rotation); + // Move by adding shift to all coordinates. + void move(ICOORD shift); + + void plot(ScrollView* window, inT32 num); + + void fill(ScrollView* window, ScrollView::Color colour); + + // Returns true if other is inside this. + bool contains(POLY_BLOCK *other); + + // Returns true if the polygons of other and this overlap. + bool overlap(POLY_BLOCK *other); + + // Returns the winding number of this around the test_pt. + // Positive for anticlockwise, negative for clockwise, and zero for + // test_pt outside this. + inT16 winding_number(const ICOORD &test_pt); + + // Serialization. + void prep_serialise() { + vertices.prep_serialise(); + } + void dump(FILE *f) { + vertices.dump(f); + } + void de_dump(FILE *f) { + vertices.de_dump(f); + } + make_serialise(POLY_BLOCK) + void serialise_asc(FILE * f); + void de_serialise_asc(FILE *f); + + // Static utility functions to handle the PolyBlockType. + + // Returns a color to draw the given type. + static ScrollView::Color ColorForPolyBlockType(PolyBlockType type); + + // Returns true if PolyBlockType is of horizontal line type + static bool IsLineType(PolyBlockType type) { + return (type == PT_FLOWING_LINE) || (type == PT_HEADING_LINE) || + (type == PT_PULLOUT_LINE); + } + // Returns true if PolyBlockType is of image type + static bool IsImageType(PolyBlockType type) { + return (type == PT_FLOWING_IMAGE) || (type == PT_HEADING_IMAGE) || + (type == PT_PULLOUT_IMAGE); + } + // Returns true if PolyBlockType is of text type + static bool IsTextType(PolyBlockType type) { + return (type == PT_FLOWING_TEXT) || (type == PT_HEADING_TEXT) || + (type == PT_PULLOUT_TEXT) || (type == PT_TABLE) || + (type == PT_VERTICAL_TEXT); + } + + private: + ICOORDELT_LIST vertices; // vertices + TBOX box; // bounding box + PolyBlockType type; // Type of this region. +}; + +// Class to iterate the scanlines of a polygon. +class DLLSYM PB_LINE_IT { + public: + PB_LINE_IT(POLY_BLOCK *blkptr) { + block = blkptr; + } + + NEWDELETE2(PB_LINE_IT) + + void set_to_block(POLY_BLOCK * blkptr) { + block = blkptr; + } + + // Returns a list of runs of pixels for the given y coord. + // Each element of the returned list is the start (x) and extent(y) of + // a run inside the region. + // Delete the returned list after use. + ICOORDELT_LIST *get_line(inT16 y); + + private: + POLY_BLOCK * block; +}; +#endif diff --git a/ccmain/polyblob.cpp b/ccmain/polyblob.cpp new file mode 100644 index 000000000..d4a568eb6 --- /dev/null +++ b/ccmain/polyblob.cpp @@ -0,0 +1,370 @@ +/********************************************************************** + * File: polyblob.cpp (Formerly blob.c) + * Description: Code for PBLOB class. + * Author: Ray Smith + * Created: Wed Oct 23 15:17:41 BST 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" +#include "varable.h" +#include "ocrrow.h" +#include "polyblob.h" +//#include "lapoly.h" +#include "polyaprx.h" + +#define EXTERN + +EXTERN BOOL_VAR (polygon_tess_approximation, TRUE, +"Do tess poly instead of greyscale"); + +ELISTIZE_S (PBLOB) +/********************************************************************** + * position_outline + * + * Position the outline in the given list at the relevant place + * according to its nesting. + **********************************************************************/ +static void position_outline( //put in place + OUTLINE *outline, //thing to place + OUTLINE_LIST *destlist //desstination list + ) { + OUTLINE *dest_outline; //outline from dest list + OUTLINE_IT it = destlist; //iterator + //iterator on children + OUTLINE_IT child_it = outline->child (); + + if (!it.empty ()) { + do { + dest_outline = it.data (); //get destination + //encloses dest + if (*dest_outline < *outline) { + //take off list + dest_outline = it.extract (); + //put this in place + it.add_after_then_move (outline); + //make it a child + child_it.add_to_end (dest_outline); + while (!it.at_last ()) { + it.forward (); //do rest of list + //check for other children + dest_outline = it.data (); + if (*dest_outline < *outline) { + //take off list + dest_outline = it.extract (); + child_it.add_to_end (dest_outline); + //make it a child + if (it.empty ()) + break; + } + } + return; //finished + } + //enclosed by dest + else if (*outline < *dest_outline) { + position_outline (outline, dest_outline->child ()); + //place in child list + return; //finished + } + it.forward (); + } + while (!it.at_first ()); + } + it.add_to_end (outline); //at outer level +} + + +/********************************************************************** + * plot_outline_list + * + * Draw a list of outlines in the given colour and their children + * in the child colour. + **********************************************************************/ + +#ifndef GRAPHICS_DISABLED +static void plot_outline_list( //draw outlines + OUTLINE_LIST *list, //outline to draw + ScrollView* window, //window to draw in + ScrollView::Color colour, //colour to use + ScrollView::Color child_colour //colour of children + ) { + OUTLINE *outline; //current outline + OUTLINE_IT it = list; //iterator + + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { + outline = it.data (); + //draw it + outline->plot (window, colour); + if (!outline->child ()->empty ()) + plot_outline_list (outline->child (), window, + child_colour, child_colour); + } +} +#endif + + +/********************************************************************** + * PBLOB::PBLOB + * + * Constructor to build a PBLOB from a list of OUTLINEs. + * The OUTLINEs are not copied so the source list is emptied. + * The OUTLINEs are nested correctly in the blob. + **********************************************************************/ + +PBLOB::PBLOB( //constructor + OUTLINE_LIST *outline_list //in random order + ) { + OUTLINE *outline; //current outline + OUTLINE_IT it = outline_list; //iterator + + while (!it.empty ()) { //grab the list + outline = it.extract (); //get off the list + //put it in place + position_outline(outline, &outlines); + if (!it.empty ()) + it.forward (); + } +} + + +/********************************************************************** + * approximate_outline_list + * + * Convert a list of outlines to polygonal form. + **********************************************************************/ + +static void approximate_outline_list( //do list of outlines + C_OUTLINE_LIST *srclist, //list to convert + OUTLINE_LIST *destlist, //desstination list + float xheight //height of line + ) { + C_OUTLINE *src_outline; //outline from src list + OUTLINE *dest_outline; //result + C_OUTLINE_IT src_it = srclist; //source iterator + OUTLINE_IT dest_it = destlist; //iterator + + do { + src_outline = src_it.data (); + // if (polygon_tess_approximation) + dest_outline = tesspoly_outline (src_outline, xheight); + // else + // dest_outline=greypoly_outline(src_outline,xheight); + if (dest_outline != NULL) { + dest_it.add_after_then_move (dest_outline); + if (!src_outline->child ()->empty ()) + //do child list + approximate_outline_list (src_outline->child (), dest_outline->child (), xheight); + } + src_it.forward (); + } + while (!src_it.at_first ()); +} + + +/********************************************************************** + * PBLOB::PBLOB + * + * Constructor to build a PBLOB from a C_BLOB by polygonal approximation. + **********************************************************************/ + +PBLOB::PBLOB( //constructor + C_BLOB *cblob, //compact blob + float xheight //height of line + ) { + TBOX bbox; //bounding box + + if (!cblob->out_list ()->empty ()) { + //get bounding box + bbox = cblob->bounding_box (); + if (bbox.height () > xheight) + xheight = bbox.height (); //max of line and blob + //copy it + approximate_outline_list (cblob->out_list (), &outlines, xheight); + } +} + + +/********************************************************************** + * PBLOB::bounding_box + * + * Return the bounding box of the blob. + **********************************************************************/ + +TBOX PBLOB::bounding_box() { //bounding box + OUTLINE *outline; //current outline + OUTLINE_IT it = &outlines; //outlines of blob + TBOX box; //bounding box + + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { + outline = it.data (); + box += outline->bounding_box (); + } + return box; +} + + +/********************************************************************** + * PBLOB::area + * + * Return the area of the blob. + **********************************************************************/ + +float PBLOB::area() { //area + OUTLINE *outline; //current outline + OUTLINE_IT it = &outlines; //outlines of blob + float total; //total area + + total = 0.0f; + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { + outline = it.data (); + total += outline->area (); + } + return total; +} + + +/********************************************************************** + * PBLOB::baseline_normalise + * + * Baseline normalize a blob + **********************************************************************/ + +PBLOB *PBLOB::baseline_normalise( //normalize blob + ROW *row, //row it came from + DENORM *denorm //inverse mapping + ) { + TBOX blob_box = bounding_box (); + float x_centre = (blob_box.left () + blob_box.right ()) / 2.0; + PBLOB *bn_blob; //copied blob + + *denorm = DENORM (x_centre, bln_x_height / row->x_height (), row); + bn_blob = new PBLOB; //get one + *bn_blob = *this; //deep copy + bn_blob->move (FCOORD (-denorm->origin (), -row->base_line (x_centre))); + bn_blob->scale (denorm->scale ()); + bn_blob->move (FCOORD (0.0, bln_baseline_offset)); + return bn_blob; +} + + +/********************************************************************** + * PBLOB::baseline_denormalise + * + * DeBaseline Normalise the blob properly with the given denorm. + **********************************************************************/ + +void PBLOB::baseline_denormalise( // Tess style BL Norm + const DENORM *denorm //antidote + ) { + float blob_x_left; // Left edge of blob. + TBOX blob_box; //blob bounding box + + move(FCOORD (0.0f, 0.0f - bln_baseline_offset)); + blob_box = bounding_box (); + blob_x_left = blob_box.left (); + scale (1.0 / denorm->scale_at_x (blob_x_left)); + move (FCOORD (denorm->origin (), + denorm->yshift_at_x (blob_x_left))); +} + + +/********************************************************************** + * PBLOB::move + * + * Move PBLOB by vector + **********************************************************************/ + +void PBLOB::move( // reposition blob + const FCOORD vec // by vector + ) { + OUTLINE_IT it(&outlines); // iterator + + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) + it.data ()->move (vec); // move each outline +} + + +/********************************************************************** + * PBLOB::scale + * + * Scale PBLOB by float multiplier + **********************************************************************/ + +void PBLOB::scale( // scale blob + const float f // by multiplier + ) { + OUTLINE_IT it(&outlines); // iterator + + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) + it.data ()->scale (f); // scale each outline +} + + +/********************************************************************** + * PBLOB::scale + * + * Scale PBLOB by float multiplier + **********************************************************************/ + +void PBLOB::scale( // scale blob + const FCOORD vec // by multiplier + ) { + OUTLINE_IT it(&outlines); // iterator + + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) + it.data ()->scale (vec); // scale each outline +} + +/********************************************************************** + * PBLOB::rotate + * + * Rotate PBLOB 90 deg anticlockwise about the origin. + **********************************************************************/ + +void PBLOB::rotate() { // Rotate 90 deg anti + rotate(FCOORD(0.0f, 1.0f)); +} + +/********************************************************************** + * PBLOB::rotate + * + * Rotate PBLOB by the given rotation about the origin. + * The rotation is defined to be (cos a, sin a) where a is the anticlockwise + * rotation angle (in units appropriate for cos, sin). + * Alternatively think of multiplication by the complex number + * rotation = z = (x + iy), with |z| = 1. + **********************************************************************/ +void PBLOB::rotate(const FCOORD& rotation) { // Rotate by given rotation. + OUTLINE_IT it(&outlines); + + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + it.data()->rotate(rotation); // Rotate each outline. + } +} + +/********************************************************************** + * PBLOB::plot + * + * Draw the PBLOB in the given colour. + **********************************************************************/ + +#ifndef GRAPHICS_DISABLED +void PBLOB::plot( //draw it + ScrollView* window, //window to draw in + ScrollView::Color blob_colour, //main colour + ScrollView::Color child_colour //for holes + ) { + plot_outline_list(&outlines, window, blob_colour, child_colour); +} +#endif diff --git a/ccmain/polyblob.h b/ccmain/polyblob.h new file mode 100644 index 000000000..52a1d6f6b --- /dev/null +++ b/ccmain/polyblob.h @@ -0,0 +1,103 @@ +/********************************************************************** + * File: polyblob.h (Formerly blob.h) + * Description: Code for PBLOB class. + * Author: Ray Smith + * Created: Wed Oct 23 15:17:41 BST 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef POLYBLOB_H +#define POLYBLOB_H + +#include "poutline.h" +#include "rect.h" +#include "normalis.h" +#include "stepblob.h" + +class PBLOB:public ELIST_LINK +{ + public: + PBLOB() { + } //empty constructor + PBLOB( //constructor + OUTLINE_LIST *outline_list); //in random order + PBLOB( //constructor + C_BLOB *cblob, //polygonal approx + float xheight); + + OUTLINE_LIST *out_list() { //get outline list + return &outlines; + } + + TBOX bounding_box(); //compute bounding box + float area(); //get area of blob + + PBLOB *baseline_normalise( //normalise single blob + ROW *row, //row it came from + DENORM *denorm); //inverse mapping out + void baseline_denormalise( //denormalise + const DENORM *denorm); //antidote + + void plot( //draw one + ScrollView* window, //window to draw in + ScrollView::Color blob_colour, //for outer bits + ScrollView::Color child_colour); //for holes + + void move( // reposition blob + const FCOORD vec); // by FLOAT vector + + void scale( // scale blob + const float f); // by multiplier + void scale( // scale blob + const FCOORD vec); // by FLOAT vector + void rotate(); // Rotate 90 deg anti + void rotate(const FCOORD& rotation); // Rotate by given rotation. + + void prep_serialise() { //set ptrs to counts + outlines.prep_serialise (); + } + + void dump( //write external bits + FILE *f) { + outlines.dump (f); + } + + void de_dump( //read external bits + FILE *f) { + outlines.de_dump (f); + } + + //assignment + make_serialise(PBLOB) + + PBLOB& operator=(const PBLOB & source) { + if (!outlines.empty ()) + outlines.clear (); + + outlines.deep_copy(&source.outlines, &OUTLINE::deep_copy); + return *this; + } + + static PBLOB* deep_copy(const PBLOB* src) { + PBLOB* blob = new PBLOB; + *blob = *src; + return blob; + } + + private: + OUTLINE_LIST outlines; //master elements +}; + +ELISTIZEH_S (PBLOB) +#endif diff --git a/ccmain/polyvert.cpp b/ccmain/polyvert.cpp new file mode 100644 index 000000000..cad11add9 --- /dev/null +++ b/ccmain/polyvert.cpp @@ -0,0 +1,23 @@ +/********************************************************************** + * File: polyvert.cpp (Formerly polypt.c) + * Description: Code for the POLYPT class. + * Author: Ray Smith + * Created: Wed Oct 23 11:02:56 BST 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" +#include "polyvert.h" + +ELIST2IZE_S (POLYPT) diff --git a/ccmain/polyvert.h b/ccmain/polyvert.h new file mode 100644 index 000000000..db66d0401 --- /dev/null +++ b/ccmain/polyvert.h @@ -0,0 +1,58 @@ +/********************************************************************** + * File: polyvert.h (Formerly polypt.h) + * Description: Code for the POLYPT class. + * Author: Ray Smith + * Created: Wed Oct 23 11:02:56 BST 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef POLYVERT_H +#define POLYVERT_H + +#include "elst2.h" +#include "rect.h" + +class POLYPT:public ELIST2_LINK +{ + public: + POLYPT() { //empty + } + POLYPT( //constructor + const FCOORD &position, //coords + const FCOORD &vector) { //step to next + pos = position; + vec = vector; //just copy + } + + void prep_serialise() { //set ptrs to counts + } + void dump( //write external bits + FILE *) { + } + void de_dump( //read external bits + FILE *) { + } + + static POLYPT* deep_copy(const POLYPT* src) { + return new POLYPT(*src); + } + //really simple + make_serialise (POLYPT) + + FCOORD pos; + FCOORD vec; //vector to next +}; + +ELIST2IZEH_S (POLYPT) +#endif diff --git a/ccmain/poutline.cpp b/ccmain/poutline.cpp new file mode 100644 index 000000000..b433ec473 --- /dev/null +++ b/ccmain/poutline.cpp @@ -0,0 +1,441 @@ +/********************************************************************** + * File: poutline.cpp (Formerly outline.c) + * Description: Code for OUTLINE class. + * Author: Ray Smith + * Created: Wed Oct 23 10:52:04 BST 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" +#include "poutline.h" + +ELISTIZE_S (OUTLINE) +/********************************************************************** + * OUTLINE::OUTLINE + * + * Constructor to build a OUTLINE from a compact LOOP. + **********************************************************************/ +OUTLINE::OUTLINE ( //constructor +const ICOORD & startpt, //start position +inT8 * compactloop, //from Tess format +BOOL8 invert, //reverse it +ICOORD bot_left, //bounding box +ICOORD top_right): +box (bot_left, top_right), +start(startpt) { + ICOORD pos; //current point + ICOORD vec; //vector to next + POLYPT *polypt; //new point + inT8 *vector; //compact loop + POLYPT_IT it = &outline; //iterator + + pos = startpt; + vector = compactloop; + do { + //vector to next + vec = ICOORD (*vector, *(vector + 1)); + //make a new one + polypt = new POLYPT (FCOORD (pos), FCOORD (vec)); + //add to list + it.add_after_then_move (polypt); + pos += vec; //move to next + vector += 2; + } + while (pos != startpt); + if (invert) + reverse(); //now reverse it +} + + +/********************************************************************** + * OUTLINE::OUTLINE + * + * Constructor to build an OUTLINE from a list of POLYPTs. + **********************************************************************/ + +OUTLINE::OUTLINE( //constructor + POLYPT_IT *polypts //input list + ) { + POLYPT_IT other_it = *polypts; //end of list + + polypts->move_to_first (); + other_it.move_to_last (); + //put in outline + outline.assign_to_sublist (polypts, &other_it); + compute_bb(); +} + + +/********************************************************************** + * OUTLINE::compute_bb + * + * Compute the bounding box from the outline points. + **********************************************************************/ + +void OUTLINE::compute_bb() { //constructor + ICOORD ibl, itr; //integer bb + FCOORD botleft; //bounding box + FCOORD topright; + FCOORD pos; //current pos; + POLYPT_IT polypts = &outline; //iterator + + botleft = polypts.data ()->pos; + topright = botleft; + start = ICOORD ((inT16) botleft.x (), (inT16) botleft.y ()); + do { + pos = polypts.data ()->pos; + if (pos.x () < botleft.x ()) + //get bounding box + botleft = FCOORD (pos.x (), botleft.y ()); + if (pos.y () < botleft.y ()) + botleft = FCOORD (botleft.x (), pos.y ()); + if (pos.x () > topright.x ()) + topright = FCOORD (pos.x (), topright.y ()); + if (pos.y () > topright.y ()) + topright = FCOORD (topright.x (), pos.y ()); + polypts.forward (); + } + while (!polypts.at_first ()); + ibl = ICOORD ((inT16) botleft.x (), (inT16) botleft.y ()); + itr = ICOORD ((inT16) topright.x () + 1, (inT16) topright.y () + 1); + box = TBOX (ibl, itr); +} + + +/********************************************************************** + * OUTLINE::area + * + * Compute the area from the outline points. + **********************************************************************/ + +float OUTLINE::area() { //constructor + FCOORD origin; //startpt + FCOORD prev_vec; //previous value of vec + FCOORD vec; //from start to current + float total; //total area + POLYPT_IT poly_it = polypts ();//iterator + //child outline itertr + OUTLINE_IT child_it(&children); + + origin = poly_it.data ()->pos; + poly_it.forward (); + vec = poly_it.data ()->pos - origin; + poly_it.forward (); + total = 0.0f; + while (!poly_it.at_first ()) { + prev_vec = vec; + vec = poly_it.data ()->pos - origin; + total += prev_vec * vec; + poly_it.forward (); + } + total /= 2; + for (child_it.mark_cycle_pt (); !child_it.cycled_list (); + child_it.forward ()) { + //add ares of childrein + total += child_it.data ()->area (); + } + return total; +} + + +/********************************************************************** + * OUTLINE::operator< + * + * Return TRUE if the left operand is inside the right one. + **********************************************************************/ + +BOOL8 +OUTLINE::operator< ( //winding number +OUTLINE & other //other outline +) { + inT16 count; //winding count + POLYPT_IT it = &outline; //iterator + + if (!box.overlap (other.box)) + return FALSE; //can't be contained + + do { + count = other.winding_number (FCOORD (it.data ()->pos)); + //get winding number + if (count != INTERSECTING) + return count != 0; + it.forward (); + } + while (!it.at_first ()); + + //switch lists + it.set_to_list (&other.outline); + do { + //try other way round + count = winding_number (FCOORD (it.data ()->pos)); + if (count != INTERSECTING) + return count == 0; + it.forward (); + } + while (!it.at_first ()); + return TRUE; +} + + +/********************************************************************** + * OUTLINE::winding_number + * + * Return the winding number of the outline around the given point. + **********************************************************************/ + +inT16 OUTLINE::winding_number( //winding number + const FCOORD &point //point to wind around + ) { + inT16 count; //winding count + POLYPT *polypt; //current point + FCOORD vec; //to current point + float cross; //cross product + POLYPT_IT it = &outline; //iterator + + count = 0; + do { + polypt = it.data (); + vec = polypt->pos - point; + //crossing the line + if (vec.y () <= 0 && vec.y () + polypt->vec.y () > 0) { + cross = vec * polypt->vec; //cross product + if (cross > 0) + count++; //crossing right half + else if (cross == 0) + return INTERSECTING; //going through point + } + else if (vec.y () > 0 && vec.y () + polypt->vec.y () <= 0) { + cross = vec * polypt->vec; + if (cross < 0) + count--; //crossing back + else if (cross == 0) + return INTERSECTING; //illegal + } + it.forward (); + } + while (!it.at_first ()); + return count; //winding number +} + + +/********************************************************************** + * OUTLINE::reverse + * + * Reverse the direction of an outline. + **********************************************************************/ + +void OUTLINE::reverse() { //reverse direction + POLYPT_LIST back_list; //reversed list + POLYPT_IT dest_it = &back_list;//destination + POLYPT_IT src_it = &outline; //source list + POLYPT *polypt; //current point + + do { + polypt = src_it.extract (); + //copy in reverse + dest_it.add_after_then_move (polypt); + src_it.backward (); + } + while (!src_it.empty ()); + dest_it.move_to_first (); + do { + polypt = dest_it.data (); + polypt->vec = dest_it.data_relative (1)->pos - polypt->pos; + //vector to next + dest_it.forward (); + } + while (!dest_it.at_first ()); + dest_it.backward (); + src_it.set_to_list (&back_list); + //put it back + outline.assign_to_sublist (&src_it, &dest_it); +} + + +/********************************************************************** + * OUTLINE::move + * + * Move OUTLINE by vector + **********************************************************************/ + +void OUTLINE::move( // reposition OUTLINE + const FCOORD vec // by vector + ) { + //child outline itertr + OUTLINE_IT child_it(&children); + POLYPT_IT poly_it(&outline); //outline point itertr + + box.move (vec); + + start.set_x ((inT16) floor (start.x () + vec.x () + 0.5)); + // ?? Why ICOORD? + start.set_y ((inT16) floor (start.y () + vec.y () + 0.5)); + // ?? Why ICOORD? + + for (poly_it.mark_cycle_pt (); !poly_it.cycled_list (); poly_it.forward ()) + poly_it.data ()->pos += vec; + + for (child_it.mark_cycle_pt (); !child_it.cycled_list (); + child_it.forward ()) + child_it.data ()->move (vec); // move child outlines +} + + +/********************************************************************** + * OUTLINE::scale + * + * Scale OUTLINE by vector + **********************************************************************/ + +void OUTLINE::scale( // scale OUTLINE + const float f // by multiplier + ) { + //child outline itertr + OUTLINE_IT child_it(&children); + POLYPT_IT poly_it(&outline); //outline point itertr + POLYPT *pt; + + box.scale (f); + + // ?? Why ICOORD? + start.set_x ((inT16) floor (start.x () * f + 0.5)); + // ?? Why ICOORD? + start.set_y ((inT16) floor (start.y () * f + 0.5)); + + for (poly_it.mark_cycle_pt (); !poly_it.cycled_list (); poly_it.forward ()) { + pt = poly_it.data (); + pt->pos *= f; + pt->vec *= f; + } + + for (child_it.mark_cycle_pt (); !child_it.cycled_list (); + child_it.forward ()) + child_it.data ()->scale (f); //scale child outlines +} + + +/********************************************************************** + * OUTLINE::scale + * + * Scale OUTLINE by vector + **********************************************************************/ + +void OUTLINE::scale( // scale OUTLINE + const FCOORD vector //by fcoord + ) { + //child outline itertr + OUTLINE_IT child_it(&children); + POLYPT_IT poly_it(&outline); //outline point itertr + POLYPT *pt; + + box.scale (vector); + + start.set_x ((inT16) floor (start.x () * vector.x () + 0.5)); + // ?? Why ICOORD? + start.set_y ((inT16) floor (start.y () * vector.y () + 0.5)); + // ?? Why ICOORD? + + for (poly_it.mark_cycle_pt (); !poly_it.cycled_list (); poly_it.forward ()) { + pt = poly_it.data (); + pt->pos = + FCOORD (pt->pos.x () * vector.x (), pt->pos.y () * vector.y ()); + pt->vec = + FCOORD (pt->vec.x () * vector.x (), pt->vec.y () * vector.y ()); + } + + for (child_it.mark_cycle_pt (); !child_it.cycled_list (); + child_it.forward ()) + //scale child outlines + child_it.data ()->scale (vector); +} + +/********************************************************************** + * OUTLINE::rotate + * + * Rotate OUTLINE by the given vector + **********************************************************************/ + +void OUTLINE::rotate( + const FCOORD vector //by fcoord + ) { + //child outline itertr + OUTLINE_IT child_it(&children); + POLYPT_IT poly_it(&outline); //outline point itertr + POLYPT *pt; + box.rotate(vector); + + start.rotate(vector); + + for (poly_it.mark_cycle_pt (); !poly_it.cycled_list (); poly_it.forward ()) { + pt = poly_it.data (); + pt->pos.rotate(vector); + pt->vec.rotate(vector); + } + + for (child_it.mark_cycle_pt (); !child_it.cycled_list (); + child_it.forward ()) + //scale child outlines + child_it.data ()->rotate(vector); +} + + +/********************************************************************** + * OUTLINE::plot + * + * Draw the outline in the given colour. + **********************************************************************/ + +#ifndef GRAPHICS_DISABLED +void OUTLINE::plot( //draw it + ScrollView* window, //window to draw in + ScrollView::Color colour //colour to draw in + ) { + POLYPT *polypt; //current point + POLYPT_IT it = &outline; //iterator + + window->Pen(colour); + polypt = it.data (); + int startx = polypt->pos.x (); + int starty = polypt->pos.y (); + do { + it.forward (); + polypt = it.data (); + window->Line(startx,starty,polypt->pos.x (),polypt->pos.y ()); + startx = polypt->pos.x (); + starty = polypt->pos.y (); + } + while (!it.at_first ()); +} +#endif + + +/********************************************************************** + * OUTLINE::operator= + * + * Assignment - deep copy data + **********************************************************************/ + +OUTLINE & OUTLINE::operator= ( //assignment +const OUTLINE & source //from this +) { + box = source.box; + start = source.start; + if (!outline.empty()) + outline.clear(); + outline.deep_copy(&source.outline, &POLYPT::deep_copy); + if (!children.empty()) + children.clear(); + children.deep_copy(&source.children, &OUTLINE::deep_copy); + return *this; +} diff --git a/ccmain/poutline.h b/ccmain/poutline.h new file mode 100644 index 000000000..156830f40 --- /dev/null +++ b/ccmain/poutline.h @@ -0,0 +1,125 @@ +/********************************************************************** + * File: poutline.h (Formerly outline.h) + * Description: OUTLINE class definition. + * Author: Ray Smith + * Created: Wed Oct 23 10:42:40 BST 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef POUTLINE_H +#define POUTLINE_H + +#include "scrollview.h" +#include "polyvert.h" +#include "rect.h" +#include "blckerr.h" + +#define INTERSECTING MAX_INT16//no winding number + +class OUTLINE; //forward declaration + +ELISTIZEH_S (OUTLINE) +class OUTLINE:public ELIST_LINK +{ + public: + OUTLINE() { //empty constructor + } + OUTLINE( //constructor + const ICOORD &startpt, //start point + inT8 *compactloop, //from Tess format + BOOL8 reverse, //reverse it + ICOORD bot_left, //bounding box + ICOORD top_right); + OUTLINE( //constructor + POLYPT_IT *poly_it); //from list of pts + + OUTLINE_LIST *child() { //get child list + return &children; + } + + //access function + const TBOX &bounding_box() const { + return box; + } + void compute_bb(); //set bounding box + + //get start position + const ICOORD &start_pos() const { + return start; + } + float area(); //return area + POLYPT_LIST *polypts() { //get poly + return &outline; + } + + BOOL8 operator< ( //containment test + OUTLINE & other); + BOOL8 operator> ( //containment test + OUTLINE & other) { + return other < *this; //use the < to do it + } + inT16 winding_number( //get winding number + const FCOORD &testpt); //around this point + void reverse(); //reverse it + + void move( // reposition outline + const FCOORD vec); // by FLOAT vector + + void scale( // scale outline + const float f); // by multiplier + void scale( // scale outline + const FCOORD vec); // by FLOAT vector + + void rotate( // rotate outline + const FCOORD vector); // by fcoord + + void plot( //draw one + ScrollView* window, //window to draw in + ScrollView::Color colour); //colour to draw it + + void prep_serialise() { //set ptrs to counts + outline.prep_serialise (); + children.prep_serialise (); + } + + void dump( //write external bits + FILE *f) { + outline.dump (f); + children.dump (f); + } + + void de_dump( //read external bits + FILE *f) { + outline.de_dump (f); + children.de_dump (f); + } + + //assignment + make_serialise(OUTLINE) + + OUTLINE& operator=(const OUTLINE& source); + + static OUTLINE* deep_copy(const OUTLINE* src) { + OUTLINE* outline = new OUTLINE; + *outline = *src; + return outline; + } + + private: + TBOX box; //boudning box + ICOORD start; //start coord + POLYPT_LIST outline; //outline points + OUTLINE_LIST children; //child elements +}; +#endif diff --git a/ccmain/quadlsq.cpp b/ccmain/quadlsq.cpp new file mode 100644 index 000000000..8f744c546 --- /dev/null +++ b/ccmain/quadlsq.cpp @@ -0,0 +1,147 @@ +/********************************************************************** + * File: quadlsq.cpp (Formerly qlsq.c) + * Description: Code for least squares approximation of quadratics. + * Author: Ray Smith + * Created: Wed Oct 6 15:14:23 BST 1993 + * + * (C) Copyright 1993, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" +#include +#include +#include "errcode.h" +#include "quadlsq.h" + +const ERRCODE EMPTY_QLSQ = "Can't delete from an empty QLSQ"; + +#define EXTERN + +/********************************************************************** + * QLSQ::clear + * + * Function to initialize a QLSQ. + **********************************************************************/ + +void QLSQ::clear() { //initialize + a = 0; + b = 0; + c = 0; + n = 0; //no elements + sigx = 0; //update accumulators + sigy = 0; + sigxx = 0; + sigxy = 0; + sigyy = 0; + sigxxx = 0; + sigxxy = 0; + sigxxxx = 0; +} + + +/********************************************************************** + * QLSQ::add + * + * Add an element to the accumulator. + **********************************************************************/ + +void QLSQ::add( //add an element + double x, //xcoord + double y //ycoord + ) { + n++; //count elements + sigx += x; //update accumulators + sigy += y; + sigxx += x * x; + sigxy += x * y; + sigyy += y * y; + sigxxx += (long double) x *x * x; + sigxxy += (long double) x *x * y; + sigxxxx += (long double) x *x * x * x; +} + + +/********************************************************************** + * QLSQ::remove + * + * Delete an element from the acculuator. + **********************************************************************/ + +void QLSQ::remove( //delete an element + double x, //xcoord + double y //ycoord + ) { + if (n <= 0) + //illegal + EMPTY_QLSQ.error ("QLSQ::remove", ABORT, NULL); + n--; //count elements + sigx -= x; //update accumulators + sigy -= y; + sigxx -= x * x; + sigxy -= x * y; + sigyy -= y * y; + sigxxx -= (long double) x *x * x; + sigxxy -= (long double) x *x * y; + sigxxxx -= (long double) x *x * x * x; +} + + +/********************************************************************** + * QLSQ::fit + * + * Fit the given degree of polynomial and store the result. + **********************************************************************/ + +void QLSQ::fit( //fit polynomial + int degree //degree to fit + ) { + long double cubetemp; //intermediates + long double squaretemp; + long double top96, bottom96; /*accurate top & bottom */ + + if (n >= 4 && degree >= 2) { + cubetemp = sigxxx * n - (long double) sigxx *sigx; + + top96 = + cubetemp * ((long double) sigxy * n - (long double) sigx * sigy); + + squaretemp = (long double) sigxx *n - (long double) sigx *sigx; + + top96 += squaretemp * ((long double) sigxx * sigy - sigxxy * n); + + bottom96 = cubetemp * cubetemp; + + bottom96 -= squaretemp * (sigxxxx * n - (long double) sigxx * sigxx); + + a = top96 / bottom96; + + top96 = ((long double) sigxx * sigx - sigxxx * n) * a + + (long double) sigxy *n - (long double) sigx *sigy; + bottom96 = (long double) sigxx *n - (long double) sigx *sigx; + b = top96 / bottom96; + + c = (sigy - a * sigxx - b * sigx) / n; + } + else if (n == 0 || degree < 0) { + a = b = c = 0; + } + else { + a = 0; + if (n > 1 && degree > 0) { + b = (sigxy * n - sigx * sigy) / (sigxx * n - sigx * sigx); + } + else + b = 0; + c = (sigy - b * sigx) / n; + } +} diff --git a/ccmain/quadlsq.h b/ccmain/quadlsq.h new file mode 100644 index 000000000..337fd6156 --- /dev/null +++ b/ccmain/quadlsq.h @@ -0,0 +1,67 @@ +/********************************************************************** + * File: quadlsq.h (Formerly qlsq.h) + * Description: Code for least squares approximation of quadratics. + * Author: Ray Smith + * Created: Wed Oct 6 15:14:23 BST 1993 + * + * (C) Copyright 1993, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef QUADLSQ_H +#define QUADLSQ_H + +#include "points.h" + +class QLSQ +{ + public: + QLSQ() { //constructor + clear(); //set to zeros + } + void clear(); //initialize + + void add( //add element + double x, //coords to add + double y); + void remove( //delete element + double x, //coords to delete + double y); + inT32 count() { //no of elements + return n; + } + + void fit( //fit the given + int degree); //return actual + double get_a() { //get x squard + return a; + } + double get_b() { //get x squard + return b; + } + double get_c() { //get x squard + return c; + } + + private: + inT32 n; //no of elements + double a, b, c; //result + double sigx; //sum of x + double sigy; //sum of y + double sigxx; //sum x squared + double sigxy; //sum of xy + double sigyy; //sum y squared + long double sigxxx; //sum x cubed + long double sigxxy; //sum xsquared y + long double sigxxxx; //sum x fourth +}; +#endif diff --git a/ccmain/quadratc.cpp b/ccmain/quadratc.cpp new file mode 100644 index 000000000..2f10ab226 --- /dev/null +++ b/ccmain/quadratc.cpp @@ -0,0 +1,21 @@ +/********************************************************************** + * File: quadratc.cpp (Formerly quadrtic.c) + * Description: Code for the QUAD_COEFFS class. + * Author: Ray Smith + * Created: Tue Oct 08 17:24:40 BST 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" +#include "quadratc.h" diff --git a/ccmain/quadratc.h b/ccmain/quadratc.h new file mode 100644 index 000000000..88047b29a --- /dev/null +++ b/ccmain/quadratc.h @@ -0,0 +1,63 @@ +/********************************************************************** + * File: quadratc.h (Formerly quadrtic.h) + * Description: Code for the QUAD_COEFFS class. + * Author: Ray Smith + * Created: Tue Oct 08 17:24:40 BST 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef QUADRATC_H +#define QUADRATC_H + +#include "points.h" + +class QUAD_COEFFS +{ + public: + QUAD_COEFFS() { + } //empty constructor + QUAD_COEFFS( //constructor + double xsq, //coefficients + float x, + float constant) { + a = xsq; + b = x; + c = constant; + } + + float y( //evaluate + float x) const { //at x + return (float) ((a * x + b) * x + c); + } + + void move( // reposition word + ICOORD vec) { // by vector + /************************************************************ + y - q = a (x - p)^2 + b (x - p) + c + y - q = ax^2 - 2apx + ap^2 + bx - bp + c + y = ax^2 + (b - 2ap)x + (c - bp + ap^2 + q) + ************************************************************/ + inT16 p = vec.x (); + inT16 q = vec.y (); + + c = (float) (c - b * p + a * p * p + q); + b = (float) (b - 2 * a * p); + } + + double a; //x squared + float b; //x + float c; //constant + private: +}; +#endif diff --git a/ccmain/quspline.cpp b/ccmain/quspline.cpp new file mode 100644 index 000000000..2ae8880b1 --- /dev/null +++ b/ccmain/quspline.cpp @@ -0,0 +1,382 @@ +/********************************************************************** + * File: quspline.cpp (Formerly qspline.c) + * Description: Code for the QSPLINE class. + * Author: Ray Smith + * Created: Tue Oct 08 17:16:12 BST 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" +#include "memry.h" +#include "quadlsq.h" +#include "quspline.h" + +#define QSPLINE_PRECISION 16 //no of steps to draw + +/********************************************************************** + * QSPLINE::QSPLINE + * + * Constructor to build a QSPLINE given the components used in the old code. + **********************************************************************/ + +QSPLINE::QSPLINE( //constructor + inT32 count, //no of segments + inT32 *xstarts, //start coords + double *coeffs //coefficients + ) { + inT32 index; //segment index + + //get memory + xcoords = (inT32 *) alloc_mem ((count + 1) * sizeof (inT32)); + quadratics = (QUAD_COEFFS *) alloc_mem (count * sizeof (QUAD_COEFFS)); + segments = count; + for (index = 0; index < segments; index++) { + //copy them + xcoords[index] = xstarts[index]; + quadratics[index] = QUAD_COEFFS (coeffs[index * 3], + coeffs[index * 3 + 1], + coeffs[index * 3 + 2]); + } + //right edge + xcoords[index] = xstarts[index]; +} + + +/********************************************************************** + * QSPLINE::QSPLINE + * + * Constructor to build a QSPLINE by appproximation of points. + **********************************************************************/ + +QSPLINE::QSPLINE ( //constructor +int xstarts[], //spline boundaries +int segcount, //no of segments +int xpts[], //points to fit +int ypts[], int pointcount, //no of pts +int degree //fit required +) { + register int pointindex; /*no along text line */ + register int segment; /*segment no */ + inT32 *ptcounts; //no in each segment + QLSQ qlsq; /*accumulator */ + + segments = segcount; + xcoords = (inT32 *) alloc_mem ((segcount + 1) * sizeof (inT32)); + ptcounts = (inT32 *) alloc_mem ((segcount + 1) * sizeof (inT32)); + quadratics = (QUAD_COEFFS *) alloc_mem (segcount * sizeof (QUAD_COEFFS)); + memmove (xcoords, xstarts, (segcount + 1) * sizeof (inT32)); + ptcounts[0] = 0; /*none in any yet */ + for (segment = 0, pointindex = 0; pointindex < pointcount; pointindex++) { + while (segment < segcount && xpts[pointindex] >= xstarts[segment]) { + segment++; /*try next segment */ + /*cumulative counts */ + ptcounts[segment] = ptcounts[segment - 1]; + } + ptcounts[segment]++; /*no in previous partition */ + } + while (segment < segcount) { + segment++; + /*zero the rest */ + ptcounts[segment] = ptcounts[segment - 1]; + } + + for (segment = 0; segment < segcount; segment++) { + qlsq.clear (); + /*first blob */ + pointindex = ptcounts[segment]; + if (pointindex > 0 + && xpts[pointindex] != xpts[pointindex - 1] + && xpts[pointindex] != xstarts[segment]) + qlsq.add (xstarts[segment], + ypts[pointindex - 1] + + (ypts[pointindex] - ypts[pointindex - 1]) + * (xstarts[segment] - xpts[pointindex - 1]) + / (xpts[pointindex] - xpts[pointindex - 1])); + for (; pointindex < ptcounts[segment + 1]; pointindex++) { + qlsq.add (xpts[pointindex], ypts[pointindex]); + } + if (pointindex > 0 && pointindex < pointcount + && xpts[pointindex] != xstarts[segment + 1]) + qlsq.add (xstarts[segment + 1], + ypts[pointindex - 1] + + (ypts[pointindex] - ypts[pointindex - 1]) + * (xstarts[segment + 1] - xpts[pointindex - 1]) + / (xpts[pointindex] - xpts[pointindex - 1])); + qlsq.fit (degree); + quadratics[segment].a = qlsq.get_a (); + quadratics[segment].b = qlsq.get_b (); + quadratics[segment].c = qlsq.get_c (); + } + free_mem(ptcounts); +} + + +/********************************************************************** + * QSPLINE::QSPLINE + * + * Constructor to build a QSPLINE from another. + **********************************************************************/ + +QSPLINE::QSPLINE( //constructor + const QSPLINE &src) { + segments = 0; + xcoords = NULL; + quadratics = NULL; + *this = src; +} + + +/********************************************************************** + * QSPLINE::~QSPLINE + * + * Destroy a QSPLINE. + **********************************************************************/ + +QSPLINE::~QSPLINE ( //constructor +) { + if (xcoords != NULL) { + free_mem(xcoords); + xcoords = NULL; + } + if (quadratics != NULL) { + free_mem(quadratics); + quadratics = NULL; + } +} + + +/********************************************************************** + * QSPLINE::operator= + * + * Copy a QSPLINE + **********************************************************************/ + +QSPLINE & QSPLINE::operator= ( //assignment +const QSPLINE & source) { + if (xcoords != NULL) + free_mem(xcoords); + if (quadratics != NULL) + free_mem(quadratics); + + segments = source.segments; + xcoords = (inT32 *) alloc_mem ((segments + 1) * sizeof (inT32)); + quadratics = (QUAD_COEFFS *) alloc_mem (segments * sizeof (QUAD_COEFFS)); + memmove (xcoords, source.xcoords, (segments + 1) * sizeof (inT32)); + memmove (quadratics, source.quadratics, segments * sizeof (QUAD_COEFFS)); + return *this; +} + + +/********************************************************************** + * QSPLINE::step + * + * Return the total of the step functions between the given coords. + **********************************************************************/ + +double QSPLINE::step( //find step functions + double x1, //between coords + double x2) { + int index1, index2; //indices of coords + double total; /*total steps */ + + index1 = spline_index (x1); + index2 = spline_index (x2); + total = 0; + while (index1 < index2) { + total += + (double) quadratics[index1 + 1].y ((float) xcoords[index1 + 1]); + total -= (double) quadratics[index1].y ((float) xcoords[index1 + 1]); + index1++; /*next segment */ + } + return total; /*total steps */ +} + + +/********************************************************************** + * QSPLINE::y + * + * Return the y value at the given x value. + **********************************************************************/ + +double QSPLINE::y( //evaluate + double x //coord to evaluate at + ) const { + inT32 index; //segment index + + index = spline_index (x); + return quadratics[index].y (x);//in correct segment +} + + +/********************************************************************** + * QSPLINE::spline_index + * + * Return the index to the largest xcoord not greater than x. + **********************************************************************/ + +inT32 QSPLINE::spline_index( //evaluate + double x //coord to evaluate at + ) const { + inT32 index; //segment index + inT32 bottom; //bottom of range + inT32 top; //top of range + + bottom = 0; + top = segments; + while (top - bottom > 1) { + index = (top + bottom) / 2; //centre of range + if (x >= xcoords[index]) + bottom = index; //new min + else + top = index; //new max + } + return bottom; +} + + +/********************************************************************** + * QSPLINE::move + * + * Reposition spline by vector + **********************************************************************/ + +void QSPLINE::move( // reposition spline + ICOORD vec // by vector + ) { + inT32 segment; //index of segment + inT16 x_shift = vec.x (); + + for (segment = 0; segment < segments; segment++) { + xcoords[segment] += x_shift; + quadratics[segment].move (vec); + } + xcoords[segment] += x_shift; +} + + +/********************************************************************** + * QSPLINE::overlap + * + * Return TRUE if spline2 overlaps this by no more than fraction less + * than the bounds of this. + **********************************************************************/ + +BOOL8 QSPLINE::overlap( //test overlap + QSPLINE *spline2, //2 cannot be smaller + double fraction //by more than this + ) { + int leftlimit; /*common left limit */ + int rightlimit; /*common right limit */ + + leftlimit = xcoords[1]; + rightlimit = xcoords[segments - 1]; + /*or too non-overlap */ + if (spline2->segments < 3 || spline2->xcoords[1] > leftlimit + fraction * (rightlimit - leftlimit) + || spline2->xcoords[spline2->segments - 1] < rightlimit + - fraction * (rightlimit - leftlimit)) + return FALSE; + else + return TRUE; +} + + +/********************************************************************** + * extrapolate_spline + * + * Extrapolates the spline linearly using the same gradient as the + * quadratic has at either end. + **********************************************************************/ + +void QSPLINE::extrapolate( //linear extrapolation + double gradient, //gradient to use + int xmin, //new left edge + int xmax //new right edge + ) { + register int segment; /*current segment of spline */ + int dest_segment; //dest index + int *xstarts; //new boundaries + QUAD_COEFFS *quads; //new ones + int increment; //in size + + increment = xmin < xcoords[0] ? 1 : 0; + if (xmax > xcoords[segments]) + increment++; + if (increment == 0) + return; + xstarts = (int *) alloc_mem ((segments + 1 + increment) * sizeof (int)); + quads = + (QUAD_COEFFS *) alloc_mem ((segments + increment) * sizeof (QUAD_COEFFS)); + if (xmin < xcoords[0]) { + xstarts[0] = xmin; + quads[0].a = 0; + quads[0].b = gradient; + quads[0].c = y (xcoords[0]) - quads[0].b * xcoords[0]; + dest_segment = 1; + } + else + dest_segment = 0; + for (segment = 0; segment < segments; segment++) { + xstarts[dest_segment] = xcoords[segment]; + quads[dest_segment] = quadratics[segment]; + dest_segment++; + } + xstarts[dest_segment] = xcoords[segment]; + if (xmax > xcoords[segments]) { + quads[dest_segment].a = 0; + quads[dest_segment].b = gradient; + quads[dest_segment].c = y (xcoords[segments]) + - quads[dest_segment].b * xcoords[segments]; + dest_segment++; + xstarts[dest_segment] = xmax + 1; + } + segments = dest_segment; + free_mem(xcoords); + free_mem(quadratics); + xcoords = (inT32 *) xstarts; + quadratics = quads; +} + + +/********************************************************************** + * QSPLINE::plot + * + * Draw the QSPLINE in the given colour. + **********************************************************************/ + +#ifndef GRAPHICS_DISABLED +void QSPLINE::plot( //draw it + ScrollView* window, //window to draw in + ScrollView::Color colour //colour to draw in + ) const { + inT32 segment; //index of segment + inT16 step; //index of poly piece + double increment; //x increment + double x; //x coord + + window->Pen(colour); + for (segment = 0; segment < segments; segment++) { + increment = + (double) (xcoords[segment + 1] - + xcoords[segment]) / QSPLINE_PRECISION; + x = xcoords[segment]; + for (step = 0; step <= QSPLINE_PRECISION; step++) { + if (segment == 0 && step == 0) + window->SetCursor(x, quadratics[segment].y (x)); + else + window->DrawTo(x, quadratics[segment].y (x)); + x += increment; + } + } +} +#endif diff --git a/ccmain/quspline.h b/ccmain/quspline.h new file mode 100644 index 000000000..7a1604625 --- /dev/null +++ b/ccmain/quspline.h @@ -0,0 +1,113 @@ +/********************************************************************** + * File: quspline.h (Formerly qspline.h) + * Description: Code for the QSPLINE class. + * Author: Ray Smith + * Created: Tue Oct 08 17:16:12 BST 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef QUSPLINE_H +#define QUSPLINE_H + +#include "quadratc.h" +#include "serialis.h" +#include "memry.h" +#include "rect.h" + +class ROW; + +class QSPLINE +{ + friend void make_first_baseline(TBOX *, + int, + int *, + int *, + QSPLINE *, + QSPLINE *, + float); + friend void make_holed_baseline(TBOX *, int, QSPLINE *, QSPLINE *, float); + friend void tweak_row_baseline(ROW *); + public: + QSPLINE() { //empty constructor + segments = 0; + xcoords = NULL; //everything empty + quadratics = NULL; + } + QSPLINE( //copy constructor + const QSPLINE &src); + QSPLINE( //constructor + inT32 count, //number of segments + inT32 *xstarts, //segment starts + double *coeffs); //coefficients + ~QSPLINE (); //destructor + QSPLINE ( //least squares fit + int xstarts[], //spline boundaries + int segcount, //no of segments + int xcoords[], //points to fit + int ycoords[], int blobcount,//no of coords + int degree); //function + + double step( //step change + double x1, //between coords + double x2); + double y( //evaluate + double x) const; //at x + + void move( // reposition spline + ICOORD vec); // by vector + BOOL8 overlap( //test overlap + QSPLINE *spline2, //2 cannot be smaller + double fraction); //by more than this + void extrapolate( //linear extrapolation + double gradient, //gradient to use + int left, //new left edge + int right); //new right edge + +#ifndef GRAPHICS_DISABLED + void plot( //draw it + ScrollView* window, //in window + ScrollView::Color colour) const; //in colour +#endif + + void prep_serialise() { //set ptrs to counts + } //not required + + void dump( //write external bits + FILE *f) { + serialise_bytes (f, (void *) xcoords, (segments + 1) * sizeof (inT32)); + serialise_bytes (f, (void *) quadratics, segments * sizeof (QUAD_COEFFS)); + } + + void de_dump( //read external bits + FILE *f) { + xcoords = (inT32 *) de_serialise_bytes (f, + (segments + 1) * sizeof (inT32)); + quadratics = (QUAD_COEFFS *) de_serialise_bytes (f, + segments * + sizeof (QUAD_COEFFS)); + } + + //assign copy + make_serialise (QSPLINE) QSPLINE & operator= ( + const QSPLINE & source); //from this + + private: + + inT32 spline_index( //binary search + double x) const; //for x + inT32 segments; //no of segments + inT32 *xcoords; //no of coords + QUAD_COEFFS *quadratics; //spline pieces +}; +#endif diff --git a/ccmain/ratngs.cpp b/ccmain/ratngs.cpp new file mode 100644 index 000000000..72f4dfb63 --- /dev/null +++ b/ccmain/ratngs.cpp @@ -0,0 +1,517 @@ +/********************************************************************** + * File: ratngs.cpp (Formerly ratings.c) + * Description: Code to manipulate the BLOB_CHOICE and WERD_CHOICE classes. + * Author: Ray Smith + * Created: Thu Apr 23 13:23:29 BST 1992 + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" + +#include "ratngs.h" +#include "callcpp.h" +#include "genericvector.h" +#include "unicharset.h" + +extern FILE *matcher_fp; + +ELISTIZE (BLOB_CHOICE) CLISTIZE (BLOB_CHOICE_LIST) CLISTIZE (WERD_CHOICE) +//extern FILE* matcher_fp; + +/********************************************************************** + * BLOB_CHOICE::BLOB_CHOICE + * + * Constructor to build a BLOB_CHOICE from a char, rating and certainty. + **********************************************************************/ +BLOB_CHOICE::BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id + float src_rating, // rating + float src_cert, // certainty + inT8 src_config, // config (font) + int src_script_id // script + ) { + unichar_id_ = src_unichar_id; + rating_ = src_rating; + certainty_ = src_cert; + config_ = src_config; + script_id_ = src_script_id; +} + +/********************************************************************** + * BLOB_CHOICE::BLOB_CHOICE + * + * Constructor to build a BLOB_CHOICE from another BLOB_CHOICE. + **********************************************************************/ +BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) { + unichar_id_ = other.unichar_id(); + rating_ = other.rating(); + certainty_ = other.certainty(); + config_ = other.config(); + script_id_ = other.script_id(); +} + +/********************************************************************** + * WERD_CHOICE::WERD_CHOICE + * + * Constructor to build a WERD_CHOICE from the given string. + * The function assumes that src_string is not NULL. + **********************************************************************/ +WERD_CHOICE::WERD_CHOICE(const char *src_string, + const UNICHARSET &unicharset) { + STRING src_lengths; + int len = strlen(src_string); + const char *ptr = src_string; + int step = unicharset.step(ptr); + for (; ptr < src_string + len && step > 0; + step = unicharset.step(ptr), src_lengths += step, ptr += step); + if (step != 0 && ptr == src_string + len) { + this->init(src_string, src_lengths.string(), + 0.0, 0.0, NO_PERM, unicharset); + } else { // there must have been an invalid unichar in the string + this->init(8); + this->make_bad(); + } +} + +/********************************************************************** + * WERD_CHOICE::init + * + * Helper function to build a WERD_CHOICE from the given string, + * fragment lengths, rating, certainty and permuter. + * + * The function assumes that src_string is not NULL. + * src_lengths argument could be NULL, in which case the unichars + * in src_string are assumed to all be of length 1. + **********************************************************************/ +void WERD_CHOICE::init(const char *src_string, + const char *src_lengths, + float src_rating, + float src_certainty, + uinT8 src_permuter, + const UNICHARSET &unicharset) { + int src_string_len = strlen(src_string); + if (src_string_len == 0) { + this->init(8); + } else { + this->init(src_lengths ? strlen(src_lengths): src_string_len); + length_ = reserved_; + int offset = 0; + for (int i = 0; i < length_; ++i) { + int unichar_length = src_lengths ? src_lengths[i] : 1; + unichar_ids_[i] = + unicharset.unichar_to_id(src_string+offset, unichar_length); + fragment_lengths_[i] = 1; + offset += unichar_length; + } + } + rating_ = src_rating; + certainty_ = src_certainty; + permuter_ = src_permuter; +} + +/********************************************************************** + * WERD_CHOICE::~WERD_CHOICE + **********************************************************************/ +WERD_CHOICE::~WERD_CHOICE() { + delete[] unichar_ids_; + delete[] fragment_lengths_; + delete_blob_choices(); +} + + +/********************************************************************** + * WERD_CHOICE::set_blob_choices + * + * Delete current blob_choices. Set the blob_choices to the given new + * list. + **********************************************************************/ +void WERD_CHOICE::set_blob_choices(BLOB_CHOICE_LIST_CLIST *blob_choices) { + if (blob_choices_ != blob_choices) { + delete_blob_choices(); + blob_choices_ = blob_choices; + } +} + + +/********************************************************************** + * contains_unichar_id + * + * Returns true if unichar_ids_ contain the given unichar_id, false otherwise. + ************************************************************************/ +bool WERD_CHOICE::contains_unichar_id(UNICHAR_ID unichar_id) const { + for (int i = 0; i < length_; ++i) { + if (unichar_ids_[i] == unichar_id) { + return true; + } + } + return false; +} + +/********************************************************************** + * remove_unichar_ids + * + * Removes num unichar ids starting from index start from unichar_ids_ + * and updates length_ and fragment_lengths_ to reflect this change. + * Note: this function does not modify rating_ and certainty_. + ***********************************************************************/ +void WERD_CHOICE::remove_unichar_ids(int start, int num) { + ASSERT_HOST(start >= 0 && start + num <= length_); + for (int i = start; i+num < length_; ++i) { + unichar_ids_[i] = unichar_ids_[i+num]; + fragment_lengths_[i] = fragment_lengths_[i+num]; + } + length_ -= num; +} + +/********************************************************************** + * string_and_lengths + * + * Populates the given word_str with unichars from unichar_ids and + * and word_lengths_str with the corresponding unichar lengths. + * Uses current_unicharset to make unichar id -> unichar conversions. + **********************************************************************/ +void WERD_CHOICE::string_and_lengths(const UNICHARSET ¤t_unicharset, + STRING *word_str, + STRING *word_lengths_str) const { + *word_str = ""; + if (word_lengths_str != NULL) *word_lengths_str = ""; + for (int i = 0; i < length_; ++i) { + const char *ch = current_unicharset.id_to_unichar(unichar_ids_[i]); + *word_str += ch; + if (word_lengths_str != NULL) { + *word_lengths_str += strlen(ch); + } + } +} + +/********************************************************************** + * append_unichar_id + * + * Make sure there is enough space in the word for the new unichar id + * and call append_unichar_id_space_allocated(). + **********************************************************************/ +void WERD_CHOICE::append_unichar_id( + UNICHAR_ID unichar_id, char fragment_length, + float rating, float certainty) { + if (length_ == reserved_) { + this->double_the_size(); + } + this->append_unichar_id_space_allocated(unichar_id, fragment_length, + rating, certainty); +} + +/********************************************************************** + * WERD_CHOICE::operator+= + * + * Cat a second word rating on the end of this current one. + * The ratings are added and the confidence is the min. + * If the permuters are NOT the same the permuter is set to COMPOUND_PERM + **********************************************************************/ +WERD_CHOICE & WERD_CHOICE::operator+= (const WERD_CHOICE & second) { + // TODO(daria): find out why the choice was cleared this way if any + // of the pieces are empty. Add the description of this behavior + // to the comments. + // if (word_string.length () == 0 || second.word_string.length () == 0) { + // word_string = NULL; //make it empty + // word_lengths = NULL; + // delete_blob_choices(); + // } else { + while (reserved_ < length_ + second.length()) { + this->double_the_size(); + } + const UNICHAR_ID *other_unichar_ids = second.unichar_ids(); + const char *other_fragment_lengths = second.fragment_lengths(); + for (int i = 0; i < second.length(); ++i) { + unichar_ids_[length_ + i] = other_unichar_ids[i]; + fragment_lengths_[length_ + i] = other_fragment_lengths[i]; + } + length_ += second.length(); + rating_ += second.rating(); // add ratings + if (second.certainty() < certainty_) // take min + certainty_ = second.certainty(); + if (permuter_ == NO_PERM) { + permuter_ = second.permuter(); + } else if (second.permuter() != NO_PERM && + second.permuter() != permuter_) { + permuter_ = COMPOUND_PERM; + } + unichar_string_ += second.unichar_string(); + unichar_lengths_ += second.unichar_lengths(); + + // Append a deep copy of second blob_choices if it exists. + if (second.blob_choices_ != NULL) { + if (this->blob_choices_ == NULL) + this->blob_choices_ = new BLOB_CHOICE_LIST_CLIST; + + BLOB_CHOICE_LIST_C_IT this_blob_choices_it; + BLOB_CHOICE_LIST_C_IT second_blob_choices_it; + + this_blob_choices_it.set_to_list(this->blob_choices_); + this_blob_choices_it.move_to_last(); + + second_blob_choices_it.set_to_list(second.blob_choices_); + + for (second_blob_choices_it.mark_cycle_pt(); + !second_blob_choices_it.cycled_list(); + second_blob_choices_it.forward()) { + + BLOB_CHOICE_LIST* blob_choices_copy = new BLOB_CHOICE_LIST(); + blob_choices_copy->deep_copy(second_blob_choices_it.data(), + &BLOB_CHOICE::deep_copy); + + this_blob_choices_it.add_after_then_move(blob_choices_copy); + } + } + return *this; +} + + +/********************************************************************** + * WERD_CHOICE::operator= + * + * Allocate enough memory to hold a copy of source and copy over + * all the information from source to this WERD_CHOICE. + **********************************************************************/ +WERD_CHOICE& WERD_CHOICE::operator=(const WERD_CHOICE& source) { + while (reserved_ < source.length()) { + this->double_the_size(); + } + + const UNICHAR_ID *other_unichar_ids = source.unichar_ids(); + const char *other_fragment_lengths = source.fragment_lengths(); + for (int i = 0; i < source.length(); ++i) { + unichar_ids_[i] = other_unichar_ids[i]; + fragment_lengths_[i] = other_fragment_lengths[i]; + } + length_ = source.length(); + rating_ = source.rating(); + certainty_ = source.certainty(); + permuter_ = source.permuter(); + fragment_mark_ = source.fragment_mark(); + unichar_string_ = source.unichar_string(); + unichar_lengths_ = source.unichar_lengths(); + + // Delete existing blob_choices + this->delete_blob_choices(); + + // Deep copy blob_choices of source + if (source.blob_choices_ != NULL) { + BLOB_CHOICE_LIST_C_IT this_blob_choices_it; + BLOB_CHOICE_LIST_C_IT source_blob_choices_it; + + this->blob_choices_ = new BLOB_CHOICE_LIST_CLIST(); + + this_blob_choices_it.set_to_list(this->blob_choices_); + source_blob_choices_it.set_to_list(source.blob_choices_); + + for (source_blob_choices_it.mark_cycle_pt(); + !source_blob_choices_it.cycled_list(); + source_blob_choices_it.forward()) { + + BLOB_CHOICE_LIST* blob_choices_copy = new BLOB_CHOICE_LIST(); + blob_choices_copy->deep_copy(source_blob_choices_it.data(), + &BLOB_CHOICE::deep_copy); + + this_blob_choices_it.add_after_then_move(blob_choices_copy); + } + } + return *this; +} + +/********************************************************************** + * WERD_CHOICE::delete_blob_choices + * + * Clear the blob_choices list, delete it and set it to NULL. + **********************************************************************/ +void WERD_CHOICE::delete_blob_choices() { + if (blob_choices_ != NULL) { + blob_choices_->deep_clear(); + delete blob_choices_; + blob_choices_ = NULL; + } +} + +/********************************************************************** + * WERD_CHOICE::print + * + * Print WERD_CHOICE to stdout. + **********************************************************************/ +const void WERD_CHOICE::print(const char *msg) const { + tprintf("%s WERD_CHOICE:\n", msg); + tprintf("length_ %d reserved_ %d permuter_ %d\n", + length_, reserved_, permuter_); + tprintf("rating_ %.4f certainty_ %.4f", rating_, certainty_); + if (fragment_mark_) { + tprintf(" fragment_mark_ true"); + } + tprintf("\n"); + if (unichar_string_.length() > 0) { + tprintf("unichar_string_ %s unichar_lengths_ %s\n", + unichar_string_.string(), unichar_lengths_.string()); + } + tprintf("unichar_ids: "); + int i; + for (i = 0; i < length_; ++i) { + tprintf("%d ", unichar_ids_[i]); + } + tprintf("\nfragment_lengths_: "); + for (i = 0; i < length_; ++i) { + tprintf("%d ", fragment_lengths_[i]); + } + tprintf("\n"); + fflush(stdout); +} + +/********************************************************************** + * print_ratings_list + * + * Send all the ratings out to the logfile. + **********************************************************************/ +void print_ratings_list( + const char *msg, // intro message + BLOB_CHOICE_LIST *ratings, // list of results + const UNICHARSET ¤t_unicharset // unicharset that can be used + // for id-to-unichar conversion + ) { + if (ratings->length() == 0) { + tprintf("%s:\n", msg); + return; + } + if (*msg != '\0') { + tprintf("%s\n", msg); + } + BLOB_CHOICE_IT c_it; + c_it.set_to_list(ratings); + for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) { + tprintf("r%.2f c%.2f : %d %s", + c_it.data()->rating(), c_it.data()->certainty(), + c_it.data()->unichar_id(), + current_unicharset.debug_str(c_it.data()->unichar_id()).string()); + if (!c_it.at_last()) { + tprintf("\n"); + } + } + tprintf("\n"); + fflush(stdout); +} + +/********************************************************************** + * print_ratings_list + * + * Print ratings list (unichar ids only). + **********************************************************************/ +void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings) { + if (ratings->length() == 0) { + tprintf("%s:\n", msg); + return; + } + if (*msg != '\0') { + tprintf("%s\n", msg); + } + BLOB_CHOICE_IT c_it; + c_it.set_to_list(ratings); + for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) { + tprintf("r%.2f c%.2f : %d", c_it.data()->rating(), + c_it.data()->certainty(), c_it.data()->unichar_id()); + if (!c_it.at_last()) { + tprintf("\n"); + } + } + tprintf("\n"); + fflush(stdout); +} + +/********************************************************************** + * print_ratings_info + * + * Send all the ratings out to the logfile. + **********************************************************************/ +void print_ratings_info( + FILE *fp, // file to use + BLOB_CHOICE_LIST *ratings, // list of results + const UNICHARSET ¤t_unicharset // unicharset that can be used + // for id-to-unichar conversion + ) { + inT32 index; // to list + inT32 best_index; // to list + FLOAT32 best_rat; // rating + FLOAT32 best_cert; // certainty + const char* first_char = NULL; // character + FLOAT32 first_rat; // rating + FLOAT32 first_cert; // certainty + const char* sec_char = NULL; // character + FLOAT32 sec_rat = 0.0f; // rating + FLOAT32 sec_cert = 0.0f; // certainty + BLOB_CHOICE_IT c_it = ratings; // iterator + + index = ratings->length(); + if (index > 0) { + first_char = current_unicharset.id_to_unichar(c_it.data()->unichar_id()); + first_rat = c_it.data()->rating(); + first_cert = -c_it.data()->certainty(); + if (index > 1) { + sec_char = current_unicharset.id_to_unichar( + c_it.data_relative(1)->unichar_id()); + sec_rat = c_it.data_relative(1)->rating(); + sec_cert = -c_it.data_relative(1)->certainty(); + } else { + sec_char = NULL; + sec_rat = -1; + sec_cert = -1; + } + } else { + first_char = NULL; + first_rat = -1; + first_cert = -1; + } + best_index = -1; + best_rat = -1; + best_cert = -1; + for (index = 0, c_it.mark_cycle_pt(); !c_it.cycled_list(); + c_it.forward(), index++) { + if (strcmp(current_unicharset.id_to_unichar(c_it.data()->unichar_id()), + blob_answer) == 0) { + best_index = index; + best_rat = c_it.data()->rating(); + best_cert = -c_it.data()->certainty(); + } + } + if (first_char != NULL && (*first_char == '\0' || *first_char == ' ')) + first_char = NULL; + if (sec_char != NULL && (*sec_char == '\0' || *sec_char == ' ')) + sec_char = NULL; + fprintf(matcher_fp, + " " INT32FORMAT " " INT32FORMAT " %g %g %s %g %g %s %g %g\n", + ratings->length(), best_index, best_rat, best_cert, + first_char != NULL ? first_char : "~", + first_rat, first_cert, sec_char != NULL ? sec_char : "~", + sec_rat, sec_cert); +} + +/********************************************************************** + * print_char_choices_list + **********************************************************************/ +void print_char_choices_list(const char *msg, + const BLOB_CHOICE_LIST_VECTOR &char_choices, + const UNICHARSET ¤t_unicharset, + BOOL8 detailed) { + if (*msg != '\0') tprintf("%s\n", msg); + for (int x = 0; x < char_choices.length(); ++x) { + BLOB_CHOICE_IT c_it; + c_it.set_to_list(char_choices.get(x)); + tprintf("char[%d]: %s\n", x, + current_unicharset.debug_str( c_it.data()->unichar_id()).string()); + if (detailed) + print_ratings_list(" ", char_choices.get(x), current_unicharset); + } +} diff --git a/ccmain/ratngs.h b/ccmain/ratngs.h new file mode 100644 index 000000000..9efc786ef --- /dev/null +++ b/ccmain/ratngs.h @@ -0,0 +1,349 @@ +/********************************************************************** + * File: ratngs.h (Formerly ratings.h) + * Description: Definition of the WERD_CHOICE and BLOB_CHOICE classes. + * Author: Ray Smith + * Created: Thu Apr 23 11:40:38 BST 1992 + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef RATNGS_H +#define RATNGS_H + +#include + +#include "clst.h" +#include "genericvector.h" +#include "notdll.h" +#include "unichar.h" +#include "unicharset.h" +#include "werd.h" + +class BLOB_CHOICE: public ELIST_LINK +{ + public: + BLOB_CHOICE() { + unichar_id_ = INVALID_UNICHAR_ID; + config_ = '\0'; + rating_ = MAX_FLOAT32; + certainty_ = -MAX_FLOAT32; + script_id_ = -1; + } + BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id + float src_rating, // rating + float src_cert, // certainty + inT8 src_config, // config (font) + int script_id); // script + BLOB_CHOICE(const BLOB_CHOICE &other); + ~BLOB_CHOICE() {} + UNICHAR_ID unichar_id() const { + return unichar_id_; + } + float rating() const { + return rating_; + } + float certainty() const { + return certainty_; + } + inT8 config() const { + return config_; + } + int script_id() const { + return script_id_; + } + + void set_unichar_id(UNICHAR_ID newunichar_id) { + unichar_id_ = newunichar_id; + } + void set_rating(float newrat) { + rating_ = newrat; + } + void set_certainty(float newrat) { + certainty_ = newrat; + } + void set_config(inT8 newfont) { + config_ = newfont; + } + void set_script(int newscript_id) { + script_id_ = newscript_id; + } + + static BLOB_CHOICE* deep_copy(const BLOB_CHOICE* src) { + BLOB_CHOICE* choice = new BLOB_CHOICE; + *choice = *src; + return choice; + } + + NEWDELETE + private: + UNICHAR_ID unichar_id_; // unichar id + char config_; // char config (font) + inT16 junk2_; + float rating_; // size related + float certainty_; // absolute + int script_id_; +}; + +// Make BLOB_CHOICE listable. +ELISTIZEH (BLOB_CHOICE) CLISTIZEH (BLOB_CHOICE_LIST) + +// Permuter codes used in WERD_CHOICEs. +enum PermuterType { + NO_PERM, // 0 + PUNC_PERM, // 1 + TOP_CHOICE_PERM, // 2 + LOWER_CASE_PERM, // 3 + UPPER_CASE_PERM, // 4 + NUMBER_PERM, // 5 + SYSTEM_DAWG_PERM, // 6 + DOC_DAWG_PERM, // 7 + USER_DAWG_PERM, // 8 + FREQ_DAWG_PERM, // 9 + COMPOUND_PERM, // 10 +}; + +class WERD_CHOICE { + public: + WERD_CHOICE() { this->init(8); } + WERD_CHOICE(int reserved) { this->init(reserved); } + WERD_CHOICE(const char *src_string, + const char *src_lengths, + float src_rating, + float src_certainty, + uinT8 src_permuter, + const UNICHARSET &unicharset) { + this->init(src_string, src_lengths, src_rating, + src_certainty, src_permuter, unicharset); + } + WERD_CHOICE (const char *src_string, const UNICHARSET &unicharset); + WERD_CHOICE(const WERD_CHOICE &word) { + this->init(word.length()); + this->operator=(word); + } + ~WERD_CHOICE(); + + inline int length() const { + return length_; + } + inline const UNICHAR_ID *unichar_ids() const { + return unichar_ids_; + } + inline const UNICHAR_ID unichar_id(int index) const { + assert(index < length_); + return unichar_ids_[index]; + } + inline const char *fragment_lengths() const { + return fragment_lengths_; + } + inline const char fragment_length(int index) const { + assert(index < length_); + return fragment_lengths_[index]; + } + inline float rating() const { + return rating_; + } + inline float certainty() const { + return certainty_; + } + inline uinT8 permuter() const { + return permuter_; + } + inline bool fragment_mark() const { + return fragment_mark_; + } + inline BLOB_CHOICE_LIST_CLIST* blob_choices() { + return blob_choices_; + } + inline void set_unichar_id(UNICHAR_ID unichar_id, int index) { + assert(index < length_); + unichar_ids_[index] = unichar_id; + } + inline void set_rating(float new_val) { + rating_ = new_val; + } + inline void set_certainty(float new_val) { + certainty_ = new_val; + } + inline void set_permuter(uinT8 perm) { + permuter_ = perm; + } + inline void set_fragment_mark(bool new_fragment_mark) { + fragment_mark_ = new_fragment_mark; + } + void set_blob_choices(BLOB_CHOICE_LIST_CLIST *blob_choices); + + // Make more space in unichar_id_ and fragment_lengths_ arrays. + inline void double_the_size() { + unichar_ids_ = GenericVector::double_the_size_memcpy( + reserved_, unichar_ids_); + fragment_lengths_ = GenericVector::double_the_size_memcpy( + reserved_, fragment_lengths_); + reserved_ *= 2; + } + + // Initializes WERD_CHOICE - reseves length slots in unichar_ids_ and + // fragment_length_ arrays. Sets other values to default (blank) values. + inline void init(int reserved) { + reserved_ = reserved; + unichar_ids_ = new UNICHAR_ID[reserved]; + fragment_lengths_ = new char[reserved]; + length_ = 0; + rating_ = 0.0; + certainty_ = MAX_FLOAT32; + permuter_ = NO_PERM; + fragment_mark_ = false; + blob_choices_ = NULL; + unichar_string_ = ""; + unichar_lengths_ = ""; + } + + // Helper function to build a WERD_CHOICE from the given string, + // fragment lengths, rating, certainty and permuter. + // The function assumes that src_string is not NULL. + // src_lengths argument could be NULL, in which case the unichars + // in src_string are assumed to all be of length 1. + void init(const char *src_string, const char *src_lengths, + float src_rating, float src_certainty, + uinT8 src_permuter, const UNICHARSET ¤t_unicharset); + + // Set the fields in this choice to be default (bad) values. + inline void make_bad() { + length_ = 0; + rating_ = MAX_FLOAT32; + certainty_ = -MAX_FLOAT32; + fragment_mark_ = false; + unichar_string_ = ""; + unichar_lengths_ = ""; + } + + // This function assumes that there is enough space reserved + // in the WERD_CHOICE for adding another unichar. + // This is an efficient alternative to append_unichar_id(). + inline void append_unichar_id_space_allocated( + UNICHAR_ID unichar_id, char fragment_length, + float rating, float certainty) { + assert(reserved_ > length_); + length_++; + this->set_unichar_id(unichar_id, fragment_length, + rating, certainty, length_-1); + } + + void append_unichar_id(UNICHAR_ID unichar_id, char fragment_length, + float rating, float certainty); + + inline void set_unichar_id(UNICHAR_ID unichar_id, char fragment_length, + float rating, float certainty, int index) { + assert(index < length_); + unichar_ids_[index] = unichar_id; + fragment_lengths_[index] = fragment_length; + rating_ += rating; + if (certainty < certainty_) { + certainty_ = certainty; + } + } + + bool contains_unichar_id(UNICHAR_ID unichar_id) const; + void remove_unichar_ids(int index, int num); + inline void remove_last_unichar_id() { --length_; } + inline void remove_unichar_id(int index) { this->remove_unichar_ids(index, 1); } + void string_and_lengths(const UNICHARSET ¤t_unicharset, + STRING *word_str, STRING *word_lengths_str) const; + const STRING debug_string(const UNICHARSET ¤t_unicharset) const { + STRING word_str; + for (int i = 0; i < length_; ++i) { + word_str += current_unicharset.debug_str(unichar_ids_[i]); + word_str += " "; + } + return word_str; + } + // Since this function walks over the whole word to convert unichar ids + // to unichars, it is best to call it once, e.g. after all changes to + // unichar_ids_ in WERD_CHOICE are finished. + void populate_unichars(const UNICHARSET ¤t_unicharset) { + this->string_and_lengths(current_unicharset, &unichar_string_, + &unichar_lengths_); + } + // This function should only be called if populate_unichars() + // was called and WERD_CHOICE did not change since then. + const STRING &unichar_string() const { + assert(unichar_string_.length() <= 0 || + unichar_string_.length() >= length_); // sanity check + return unichar_string_; + } + // This function should only be called if populate_unichars() + // was called and WERD_CHOICE did not change since then. + const STRING &unichar_lengths() const { + assert(unichar_lengths_.length() <= 0 || + unichar_lengths_.length() == length_); // sanity check + return unichar_lengths_; + } + const void print() const { this->print(""); } + const void print(const char *msg) const; + + WERD_CHOICE& operator+= ( // concatanate + const WERD_CHOICE & second);// second on first + + WERD_CHOICE& operator= (const WERD_CHOICE& source); + + NEWDELETE private: + UNICHAR_ID *unichar_ids_; // unichar ids that represent the text of the word + char *fragment_lengths_; // number of fragments in each unichar + int reserved_; // size of the above arrays + int length_; // word length + float rating_; // size related + float certainty_; // absolute + uinT8 permuter_; // permuter code + bool fragment_mark_; // if true, indicates that this choice + // was chosen over a better one that + // contained a fragment + BLOB_CHOICE_LIST_CLIST *blob_choices_; // best choices for each blob + + // The following variables are only populated by calling populate_unichars(). + // They are not synchronized with the values in unichar_ids otherwise. + STRING unichar_string_; + STRING unichar_lengths_; + bool unichar_info_present; + + private: + void delete_blob_choices(); +}; + +// Make WERD_CHOICE listable. +ELISTIZEH (WERD_CHOICE) +typedef GenericVector BLOB_CHOICE_LIST_VECTOR; +typedef GenericVector WERD_CHOICE_LIST_VECTOR; + +typedef void (*POLY_TESTER) (const STRING&, PBLOB *, DENORM *, BOOL8, + char *, inT32, BLOB_CHOICE_LIST *); + +void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings); +void print_ratings_list( + const char *msg, // intro message + BLOB_CHOICE_LIST *ratings, // list of results + const UNICHARSET ¤t_unicharset // unicharset that can be used + // for id-to-unichar conversion + ); +void print_ratings_info( + FILE *fp, // file to use + BLOB_CHOICE_LIST *ratings, // list of results + const UNICHARSET ¤t_unicharset // unicharset that can be used + // for id-to-unichar conversion + ); +void print_char_choices_list( + const char *msg, + const BLOB_CHOICE_LIST_VECTOR &char_choices, + const UNICHARSET ¤t_unicharset, + BOOL8 detailed + ); + +#endif diff --git a/ccmain/rect.cpp b/ccmain/rect.cpp new file mode 100644 index 000000000..1c922928e --- /dev/null +++ b/ccmain/rect.cpp @@ -0,0 +1,253 @@ +/********************************************************************** + * File: rect.c (Formerly box.c) + * Description: Bounding box class definition. + * Author: Phil Cheatle + * Created: Wed Oct 16 15:18:45 BST 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" //precompiled headers +#include "rect.h" + +/********************************************************************** + * TBOX::TBOX() Constructor from 2 ICOORDS + * + **********************************************************************/ + +TBOX::TBOX( //construtor + const ICOORD pt1, //one corner + const ICOORD pt2 //the other corner + ) { + if (pt1.x () <= pt2.x ()) { + if (pt1.y () <= pt2.y ()) { + bot_left = pt1; + top_right = pt2; + } + else { + bot_left = ICOORD (pt1.x (), pt2.y ()); + top_right = ICOORD (pt2.x (), pt1.y ()); + } + } + else { + if (pt1.y () <= pt2.y ()) { + bot_left = ICOORD (pt2.x (), pt1.y ()); + top_right = ICOORD (pt1.x (), pt2.y ()); + } + else { + bot_left = pt2; + top_right = pt1; + } + } +} + +/********************************************************************** + * TBOX::TBOX() Constructor from 4 integer values. + * Note: It is caller's responsibility to provide values in the right + * order. + **********************************************************************/ + +TBOX::TBOX( //constructor + inT16 left, inT16 bottom, inT16 right, inT16 top) + : bot_left(left, bottom), top_right(right, top) { +} + +// rotate_large constructs the containing bounding box of all 4 +// corners after rotating them. It therefore guarantees that all +// original content is contained within, but also slightly enlarges the box. +void TBOX::rotate_large(const FCOORD& vec) { + ICOORD top_left(bot_left.x(), top_right.y()); + ICOORD bottom_right(top_right.x(), bot_left.y()); + top_left.rotate(vec); + bottom_right.rotate(vec); + rotate(vec); + TBOX box2(top_left, bottom_right); + *this += box2; +} + +/********************************************************************** + * TBOX::intersection() Build the largest box contained in both boxes + * + **********************************************************************/ + +TBOX TBOX::intersection( //shared area box + const TBOX &box) const { + inT16 left; + inT16 bottom; + inT16 right; + inT16 top; + if (overlap (box)) { + if (box.bot_left.x () > bot_left.x ()) + left = box.bot_left.x (); + else + left = bot_left.x (); + + if (box.top_right.x () < top_right.x ()) + right = box.top_right.x (); + else + right = top_right.x (); + + if (box.bot_left.y () > bot_left.y ()) + bottom = box.bot_left.y (); + else + bottom = bot_left.y (); + + if (box.top_right.y () < top_right.y ()) + top = box.top_right.y (); + else + top = top_right.y (); + } + else { + left = MAX_INT16; + bottom = MAX_INT16; + top = -MAX_INT16; + right = -MAX_INT16; + } + return TBOX (left, bottom, right, top); +} + + +/********************************************************************** + * TBOX::bounding_union() Build the smallest box containing both boxes + * + **********************************************************************/ + +TBOX TBOX::bounding_union( //box enclosing both + const TBOX &box) const { + ICOORD bl; //bottom left + ICOORD tr; //top right + + if (box.bot_left.x () < bot_left.x ()) + bl.set_x (box.bot_left.x ()); + else + bl.set_x (bot_left.x ()); + + if (box.top_right.x () > top_right.x ()) + tr.set_x (box.top_right.x ()); + else + tr.set_x (top_right.x ()); + + if (box.bot_left.y () < bot_left.y ()) + bl.set_y (box.bot_left.y ()); + else + bl.set_y (bot_left.y ()); + + if (box.top_right.y () > top_right.y ()) + tr.set_y (box.top_right.y ()); + else + tr.set_y (top_right.y ()); + return TBOX (bl, tr); +} + + +/********************************************************************** + * TBOX::plot() Paint a box using specified settings + * + **********************************************************************/ + +#ifndef GRAPHICS_DISABLED +void TBOX::plot( //paint box + ScrollView* fd, //where to paint + ScrollView::Color fill_colour, //colour for inside + ScrollView::Color border_colour //colour for border + ) const { + fd->Brush(fill_colour); + fd->Pen(border_colour); + plot(fd); +} +#endif + + +/********************************************************************** + * operator+= + * + * Extend one box to include the other (In place union) + **********************************************************************/ + +DLLSYM TBOX & +operator+= ( //bounding bounding bx +TBOX & op1, //operands +const TBOX & op2) { + if (op2.bot_left.x () < op1.bot_left.x ()) + op1.bot_left.set_x (op2.bot_left.x ()); + + if (op2.top_right.x () > op1.top_right.x ()) + op1.top_right.set_x (op2.top_right.x ()); + + if (op2.bot_left.y () < op1.bot_left.y ()) + op1.bot_left.set_y (op2.bot_left.y ()); + + if (op2.top_right.y () > op1.top_right.y ()) + op1.top_right.set_y (op2.top_right.y ()); + + return op1; +} + + +/********************************************************************** + * operator-= + * + * Reduce one box to intersection with the other (In place intersection) + **********************************************************************/ + +DLLSYM TBOX & +operator-= ( //inplace intersection +TBOX & op1, //operands +const TBOX & op2) { + if (op1.overlap (op2)) { + if (op2.bot_left.x () > op1.bot_left.x ()) + op1.bot_left.set_x (op2.bot_left.x ()); + + if (op2.top_right.x () < op1.top_right.x ()) + op1.top_right.set_x (op2.top_right.x ()); + + if (op2.bot_left.y () > op1.bot_left.y ()) + op1.bot_left.set_y (op2.bot_left.y ()); + + if (op2.top_right.y () < op1.top_right.y ()) + op1.top_right.set_y (op2.top_right.y ()); + } + else { + op1.bot_left.set_x (MAX_INT16); + op1.bot_left.set_y (MAX_INT16); + op1.top_right.set_x (-MAX_INT16); + op1.top_right.set_y (-MAX_INT16); + } + return op1; +} + + +/********************************************************************** + * TBOX::serialise_asc() Convert to ascii file. + * + **********************************************************************/ + +void TBOX::serialise_asc( //convert to ascii + FILE *f //file to use + ) { + bot_left.serialise_asc (f); + top_right.serialise_asc (f); +} + + +/********************************************************************** + * TBOX::de_serialise_asc() Convert from ascii file. + * + **********************************************************************/ + +void TBOX::de_serialise_asc( //convert from ascii + FILE *f //file to use + ) { + bot_left.de_serialise_asc (f); + top_right.de_serialise_asc (f); +} diff --git a/ccmain/rect.h b/ccmain/rect.h new file mode 100644 index 000000000..a2fc40a11 --- /dev/null +++ b/ccmain/rect.h @@ -0,0 +1,382 @@ +/********************************************************************** + * File: rect.h (Formerly box.h) + * Description: Bounding box class definition. + * Author: Phil Cheatle + * Created: Wed Oct 16 15:18:45 BST 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef RECT_H +#define RECT_H + +#include +#include "points.h" +#include "ndminx.h" +#include "tprintf.h" +#include "scrollview.h" + +class DLLSYM TBOX { // bounding box + public: + TBOX (): // empty constructor making a null box + bot_left (MAX_INT16, MAX_INT16), top_right (-MAX_INT16, -MAX_INT16) { + } + + TBOX( // constructor + const ICOORD pt1, // one corner + const ICOORD pt2); // the other corner + + TBOX( // constructor + inT16 left, inT16 bottom, inT16 right, inT16 top); + + TBOX( // box around FCOORD + const FCOORD pt); + + bool null_box() const { // Is box null + return ((left () >= right ()) || (top () <= bottom ())); + } + + inT16 top() const { // coord of top + return top_right.y (); + } + void set_top(int y) { + top_right.set_y(y); + } + + inT16 bottom() const { // coord of bottom + return bot_left.y (); + } + void set_bottom(int y) { + bot_left.set_y(y); + } + + inT16 left() const { // coord of left + return bot_left.x (); + } + void set_left(int x) { + bot_left.set_x(x); + } + + inT16 right() const { // coord of right + return top_right.x (); + } + void set_right(int x) { + top_right.set_x(x); + } + + const ICOORD &botleft() const { // access function + return bot_left; + } + + ICOORD botright() const { // ~ access function + return ICOORD (top_right.x (), bot_left.y ()); + } + + ICOORD topleft() const { // ~ access function + return ICOORD (bot_left.x (), top_right.y ()); + } + + const ICOORD &topright() const { // access function + return top_right; + } + + inT16 height() const { // how high is it? + if (!null_box ()) + return top_right.y () - bot_left.y (); + else + return 0; + } + + inT16 width() const { // how high is it? + if (!null_box ()) + return top_right.x () - bot_left.x (); + else + return 0; + } + + inT32 area() const { // what is the area? + if (!null_box ()) + return width () * height (); + else + return 0; + } + + void move_bottom_edge( // move one edge + const inT16 y) { // by +/- y + bot_left += ICOORD (0, y); + } + + void move_left_edge( // move one edge + const inT16 x) { // by +/- x + bot_left += ICOORD (x, 0); + } + + void move_right_edge( // move one edge + const inT16 x) { // by +/- x + top_right += ICOORD (x, 0); + } + + void move_top_edge( // move one edge + const inT16 y) { // by +/- y + top_right += ICOORD (0, y); + } + + void move( // move box + const ICOORD vec) { // by vector + bot_left += vec; + top_right += vec; + } + + void move( // move box + const FCOORD vec) { // by float vector + bot_left.set_x ((inT16) floor (bot_left.x () + vec.x ())); + // round left + bot_left.set_y ((inT16) floor (bot_left.y () + vec.y ())); + // round down + top_right.set_x ((inT16) ceil (top_right.x () + vec.x ())); + // round right + top_right.set_y ((inT16) ceil (top_right.y () + vec.y ())); + // round up + } + + void scale( // scale box + const float f) { // by multiplier + bot_left.set_x ((inT16) floor (bot_left.x () * f)); // round left + bot_left.set_y ((inT16) floor (bot_left.y () * f)); // round down + top_right.set_x ((inT16) ceil (top_right.x () * f)); // round right + top_right.set_y ((inT16) ceil (top_right.y () * f)); // round up + } + void scale( // scale box + const FCOORD vec) { // by float vector + bot_left.set_x ((inT16) floor (bot_left.x () * vec.x ())); + bot_left.set_y ((inT16) floor (bot_left.y () * vec.y ())); + top_right.set_x ((inT16) ceil (top_right.x () * vec.x ())); + top_right.set_y ((inT16) ceil (top_right.y () * vec.y ())); + } + + // rotate doesn't enlarge the box - it just rotates the bottom-left + // and top-right corners. Use rotate_large if you want to guarantee + // that all content is contained within the rotated box. + void rotate(const FCOORD& vec) { // by vector + bot_left.rotate (vec); + top_right.rotate (vec); + *this = TBOX (bot_left, top_right); + } + // rotate_large constructs the containing bounding box of all 4 + // corners after rotating them. It therefore guarantees that all + // original content is contained within, but also slightly enlarges the box. + void rotate_large(const FCOORD& vec); + + bool contains( // is pt inside box + const FCOORD pt) const; + + bool contains( // is box inside box + const TBOX &box) const; + + bool overlap( // do boxes overlap + const TBOX &box) const; + + bool major_overlap( // do boxes overlap more than half + const TBOX &box) const; + + // Do boxes overlap on x axis. + bool x_overlap(const TBOX &box) const; + + // Do boxes overlap on x axis by more than + // half of the width of the narrower box. + bool major_x_overlap(const TBOX &box) const; + + // Do boxes overlap on y axis. + bool y_overlap(const TBOX &box) const; + + // Do boxes overlap on y axis by more than + // half of the height of the shorter box. + bool major_y_overlap(const TBOX &box) const; + + // fraction of current box's area covered by other + double overlap_fraction(const TBOX &box) const; + + TBOX intersection( // shared area box + const TBOX &box) const; + + TBOX bounding_union( // box enclosing both + const TBOX &box) const; + + void print() const { // print + tprintf("Bounding box=(%d,%d)->(%d,%d)\n", + left(), bottom(), right(), top()); + } + +#ifndef GRAPHICS_DISABLED + void plot( // use current settings + ScrollView* fd) const { // where to paint + fd->Rectangle(bot_left.x (), bot_left.y (), top_right.x (), + top_right.y ()); + } + + void plot( // paint box + ScrollView* fd, // where to paint + ScrollView::Color fill_colour, // colour for inside + ScrollView::Color border_colour) const; // colour for border +#endif + + friend DLLSYM TBOX & operator+= (TBOX &, const TBOX &); + // in place union + friend DLLSYM TBOX & operator-= (TBOX &, const TBOX &); + // in place intrsection + + void serialise_asc( // convert to ascii + FILE *f); + void de_serialise_asc( // convert from ascii + FILE *f); + + private: + ICOORD bot_left; // bottom left corner + ICOORD top_right; // top right corner +}; + +/********************************************************************** + * TBOX::TBOX() Constructor from 1 FCOORD + * + **********************************************************************/ + +inline TBOX::TBOX( // construtor + const FCOORD pt // floating centre + ) { + bot_left = ICOORD ((inT16) floor (pt.x ()), (inT16) floor (pt.y ())); + top_right = ICOORD ((inT16) ceil (pt.x ()), (inT16) ceil (pt.y ())); +} + + +/********************************************************************** + * TBOX::contains() Is point within box + * + **********************************************************************/ + +inline bool TBOX::contains(const FCOORD pt) const { + return ((pt.x () >= bot_left.x ()) && + (pt.x () <= top_right.x ()) && + (pt.y () >= bot_left.y ()) && (pt.y () <= top_right.y ())); +} + + +/********************************************************************** + * TBOX::contains() Is box within box + * + **********************************************************************/ + +inline bool TBOX::contains(const TBOX &box) const { + return (contains (box.bot_left) && contains (box.top_right)); +} + + +/********************************************************************** + * TBOX::overlap() Do two boxes overlap? + * + **********************************************************************/ + +inline bool TBOX::overlap( // do boxes overlap + const TBOX &box) const { + return ((box.bot_left.x () <= top_right.x ()) && + (box.top_right.x () >= bot_left.x ()) && + (box.bot_left.y () <= top_right.y ()) && + (box.top_right.y () >= bot_left.y ())); +} + +/********************************************************************** + * TBOX::major_overlap() Do two boxes overlap by at least half of the smallest? + * + **********************************************************************/ + +inline bool TBOX::major_overlap( // Do boxes overlap more that half. + const TBOX &box) const { + int overlap = MIN(box.top_right.x(), top_right.x()); + overlap -= MAX(box.bot_left.x(), bot_left.x()); + overlap += overlap; + if (overlap < MIN(box.width(), width())) + return false; + overlap = MIN(box.top_right.y(), top_right.y()); + overlap -= MAX(box.bot_left.y(), bot_left.y()); + overlap += overlap; + if (overlap < MIN(box.height(), height())) + return false; + return true; +} + +/********************************************************************** + * TBOX::overlap_fraction() Fraction of area covered by the other box + * + **********************************************************************/ + +inline double TBOX::overlap_fraction(const TBOX &box) const { + double fraction = 0.0; + if (this->area()) { + fraction = this->intersection(box).area() * 1.0 / this->area(); + } + return fraction; +} + +/********************************************************************** + * TBOX::x_overlap() Do two boxes overlap on x-axis + * + **********************************************************************/ + +inline bool TBOX::x_overlap(const TBOX &box) const { + return ((box.bot_left.x() <= top_right.x()) && + (box.top_right.x() >= bot_left.x())); +} + +/********************************************************************** + * TBOX::major_x_overlap() Do two boxes overlap by more than half the + * width of the narrower box on the x-axis + * + **********************************************************************/ + +inline bool TBOX::major_x_overlap(const TBOX &box) const { + inT16 overlap = box.width(); + if (this->left() > box.left()) { + overlap -= this->left() - box.left(); + } + if (this->right() < box.right()) { + overlap -= box.right() - this->right(); + } + return (overlap >= box.width() / 2 || overlap >= this->width() / 2); +} + +/********************************************************************** + * TBOX::y_overlap() Do two boxes overlap on y-axis + * + **********************************************************************/ + +inline bool TBOX::y_overlap(const TBOX &box) const { + return ((box.bot_left.y() <= top_right.y()) && + (box.top_right.y() >= bot_left.y())); +} + +/********************************************************************** + * TBOX::major_y_overlap() Do two boxes overlap by more than half the + * height of the shorter box on the y-axis + * + **********************************************************************/ + +inline bool TBOX::major_y_overlap(const TBOX &box) const { + inT16 overlap = box.height(); + if (this->bottom() > box.bottom()) { + overlap -= this->bottom() - box.bottom(); + } + if (this->top() < box.top()) { + overlap -= box.top() - this->top(); + } + return (overlap >= box.height() / 2 || overlap >= this->height() / 2); +} +#endif diff --git a/ccmain/rejctmap.cpp b/ccmain/rejctmap.cpp new file mode 100644 index 000000000..8402ffc35 --- /dev/null +++ b/ccmain/rejctmap.cpp @@ -0,0 +1,545 @@ +/********************************************************************** + * File: rejctmap.cpp (Formerly rejmap.c) + * Description: REJ and REJMAP class functions. + * Author: Phil Cheatle + * Created: Thu Jun 9 13:46:38 BST 1994 + * + * (C) Copyright 1994, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" +#include "hosthplb.h" +//#include "basefile.h" +#include "rejctmap.h" +#include "secname.h" + +#define EXTERN + +EXTERN BOOL_VAR (rejword_only_set_if_accepted, TRUE, "Mimic old reject_word"); +EXTERN BOOL_VAR (rejmap_allow_more_good_qual, FALSE, +"Use initial good qual setting"); +EXTERN BOOL_VAR (rej_use_1Il_rej, TRUE, "1Il rejection enabled"); + +BOOL8 REJ::perm_rejected() { //Is char perm reject? + return (flag (R_TESS_FAILURE) || + flag (R_SMALL_XHT) || + flag (R_EDGE_CHAR) || + flag (R_1IL_CONFLICT) || + flag (R_POSTNN_1IL) || + flag (R_REJ_CBLOB) || + flag (R_BAD_REPETITION) || flag (R_MM_REJECT)); +} + + +BOOL8 REJ::rej_before_nn_accept() { + return flag (R_POOR_MATCH) || + flag (R_NOT_TESS_ACCEPTED) || + flag (R_CONTAINS_BLANKS) || flag (R_BAD_PERMUTER); +} + + +BOOL8 REJ::rej_between_nn_and_mm() { + return flag (R_HYPHEN) || + flag (R_DUBIOUS) || + flag (R_NO_ALPHANUMS) || flag (R_MOSTLY_REJ) || flag (R_XHT_FIXUP); +} + + +BOOL8 REJ::rej_between_mm_and_quality_accept() { + return flag (R_BAD_QUALITY); +} + + +BOOL8 REJ::rej_between_quality_and_minimal_rej_accept() { + return flag (R_DOC_REJ) || + flag (R_BLOCK_REJ) || flag (R_ROW_REJ) || flag (R_UNLV_REJ); +} + + +BOOL8 REJ::rej_before_mm_accept() { + return rej_between_nn_and_mm () || + (rej_before_nn_accept () && + !flag (R_NN_ACCEPT) && !flag (R_HYPHEN_ACCEPT)); +} + + +BOOL8 REJ::rej_before_quality_accept() { + return rej_between_mm_and_quality_accept () || + (!flag (R_MM_ACCEPT) && rej_before_mm_accept ()); +} + + +BOOL8 REJ::rejected() { //Is char rejected? + if (flag (R_MINIMAL_REJ_ACCEPT)) + return FALSE; + else + return (perm_rejected () || + rej_between_quality_and_minimal_rej_accept () || + (!flag (R_QUALITY_ACCEPT) && rej_before_quality_accept ())); +} + + +BOOL8 REJ::accept_if_good_quality() { //potential rej? + return (rejected () && + !perm_rejected () && + flag (R_BAD_PERMUTER) && + !flag (R_POOR_MATCH) && + !flag (R_NOT_TESS_ACCEPTED) && + !flag (R_CONTAINS_BLANKS) && + (rejmap_allow_more_good_qual || + (!rej_between_nn_and_mm () && + !rej_between_mm_and_quality_accept () && + !rej_between_quality_and_minimal_rej_accept ()))); +} + + +void REJ::setrej_tess_failure() { //Tess generated blank + set_flag(R_TESS_FAILURE); +} + + +void REJ::setrej_small_xht() { //Small xht char/wd + set_flag(R_SMALL_XHT); +} + + +void REJ::setrej_edge_char() { //Close to image edge + set_flag(R_EDGE_CHAR); +} + + +void REJ::setrej_1Il_conflict() { //Initial reject map + if (rej_use_1Il_rej) + set_flag(R_1IL_CONFLICT); +} + + +void REJ::setrej_postNN_1Il() { //1Il after NN + set_flag(R_POSTNN_1IL); +} + + +void REJ::setrej_rej_cblob() { //Insert duff blob + set_flag(R_REJ_CBLOB); +} + + +void REJ::setrej_mm_reject() { //Matrix matcher + set_flag(R_MM_REJECT); +} + + +void REJ::setrej_bad_repetition() { //Odd repeated char + set_flag(R_BAD_REPETITION); +} + + +void REJ::setrej_poor_match() { //Failed Rays heuristic + set_flag(R_POOR_MATCH); +} + + +void REJ::setrej_not_tess_accepted() { + //TEMP reject_word + set_flag(R_NOT_TESS_ACCEPTED); +} + + +void REJ::setrej_contains_blanks() { + //TEMP reject_word + set_flag(R_CONTAINS_BLANKS); +} + + +void REJ::setrej_bad_permuter() { //POTENTIAL reject_word + set_flag(R_BAD_PERMUTER); +} + + +void REJ::setrej_hyphen() { //PostNN dubious hyphen or . + set_flag(R_HYPHEN); +} + + +void REJ::setrej_dubious() { //PostNN dubious limit + set_flag(R_DUBIOUS); +} + + +void REJ::setrej_no_alphanums() { //TEMP reject_word + set_flag(R_NO_ALPHANUMS); +} + + +void REJ::setrej_mostly_rej() { //TEMP reject_word + set_flag(R_MOSTLY_REJ); +} + + +void REJ::setrej_xht_fixup() { //xht fixup + set_flag(R_XHT_FIXUP); +} + + +void REJ::setrej_bad_quality() { //TEMP reject_word + set_flag(R_BAD_QUALITY); +} + + +void REJ::setrej_doc_rej() { //TEMP reject_word + set_flag(R_DOC_REJ); +} + + +void REJ::setrej_block_rej() { //TEMP reject_word + set_flag(R_BLOCK_REJ); +} + + +void REJ::setrej_row_rej() { //TEMP reject_word + set_flag(R_ROW_REJ); +} + + +void REJ::setrej_unlv_rej() { //TEMP reject_word + set_flag(R_UNLV_REJ); +} + + +void REJ::setrej_hyphen_accept() { //NN Flipped a char + set_flag(R_HYPHEN_ACCEPT); +} + + +void REJ::setrej_nn_accept() { //NN Flipped a char + set_flag(R_NN_ACCEPT); +} + + +void REJ::setrej_mm_accept() { //Matrix matcher + set_flag(R_MM_ACCEPT); +} + + +void REJ::setrej_quality_accept() { //Quality flip a char + set_flag(R_QUALITY_ACCEPT); +} + + +void REJ::setrej_minimal_rej_accept() { + //Accept all except blank + set_flag(R_MINIMAL_REJ_ACCEPT); +} + + +void REJ::full_print(FILE *fp) { + #ifndef SECURE_NAMES + + fprintf (fp, "R_TESS_FAILURE: %s\n", flag (R_TESS_FAILURE) ? "T" : "F"); + fprintf (fp, "R_SMALL_XHT: %s\n", flag (R_SMALL_XHT) ? "T" : "F"); + fprintf (fp, "R_EDGE_CHAR: %s\n", flag (R_EDGE_CHAR) ? "T" : "F"); + fprintf (fp, "R_1IL_CONFLICT: %s\n", flag (R_1IL_CONFLICT) ? "T" : "F"); + fprintf (fp, "R_POSTNN_1IL: %s\n", flag (R_POSTNN_1IL) ? "T" : "F"); + fprintf (fp, "R_REJ_CBLOB: %s\n", flag (R_REJ_CBLOB) ? "T" : "F"); + fprintf (fp, "R_MM_REJECT: %s\n", flag (R_MM_REJECT) ? "T" : "F"); + fprintf (fp, "R_BAD_REPETITION: %s\n", flag (R_BAD_REPETITION) ? "T" : "F"); + fprintf (fp, "R_POOR_MATCH: %s\n", flag (R_POOR_MATCH) ? "T" : "F"); + fprintf (fp, "R_NOT_TESS_ACCEPTED: %s\n", + flag (R_NOT_TESS_ACCEPTED) ? "T" : "F"); + fprintf (fp, "R_CONTAINS_BLANKS: %s\n", + flag (R_CONTAINS_BLANKS) ? "T" : "F"); + fprintf (fp, "R_BAD_PERMUTER: %s\n", flag (R_BAD_PERMUTER) ? "T" : "F"); + fprintf (fp, "R_HYPHEN: %s\n", flag (R_HYPHEN) ? "T" : "F"); + fprintf (fp, "R_DUBIOUS: %s\n", flag (R_DUBIOUS) ? "T" : "F"); + fprintf (fp, "R_NO_ALPHANUMS: %s\n", flag (R_NO_ALPHANUMS) ? "T" : "F"); + fprintf (fp, "R_MOSTLY_REJ: %s\n", flag (R_MOSTLY_REJ) ? "T" : "F"); + fprintf (fp, "R_XHT_FIXUP: %s\n", flag (R_XHT_FIXUP) ? "T" : "F"); + fprintf (fp, "R_BAD_QUALITY: %s\n", flag (R_BAD_QUALITY) ? "T" : "F"); + fprintf (fp, "R_DOC_REJ: %s\n", flag (R_DOC_REJ) ? "T" : "F"); + fprintf (fp, "R_BLOCK_REJ: %s\n", flag (R_BLOCK_REJ) ? "T" : "F"); + fprintf (fp, "R_ROW_REJ: %s\n", flag (R_ROW_REJ) ? "T" : "F"); + fprintf (fp, "R_UNLV_REJ: %s\n", flag (R_UNLV_REJ) ? "T" : "F"); + fprintf (fp, "R_HYPHEN_ACCEPT: %s\n", flag (R_HYPHEN_ACCEPT) ? "T" : "F"); + fprintf (fp, "R_NN_ACCEPT: %s\n", flag (R_NN_ACCEPT) ? "T" : "F"); + fprintf (fp, "R_MM_ACCEPT: %s\n", flag (R_MM_ACCEPT) ? "T" : "F"); + fprintf (fp, "R_QUALITY_ACCEPT: %s\n", flag (R_QUALITY_ACCEPT) ? "T" : "F"); + fprintf (fp, "R_MINIMAL_REJ_ACCEPT: %s\n", + flag (R_MINIMAL_REJ_ACCEPT) ? "T" : "F"); + #endif +} + + +//The REJMAP class has been hacked to use alloc_struct instead of new []. +//This is to reduce memory fragmentation only as it is rather kludgy. +//alloc_struct by-passes the call to the contsructor of REJ on each +//array element. Although the constructor is empty, the BITS16 members +//do have a constructor which sets all the flags to 0. The memset +//replaces this functionality. + +REJMAP::REJMAP( //classwise copy + const REJMAP &source) { + REJ *to; + REJ *from = source.ptr; + int i; + + len = source.length (); + + if (len > 0) { + ptr = (REJ *) alloc_struct (len * sizeof (REJ), "REJ"); + to = ptr; + for (i = 0; i < len; i++) { + *to = *from; + to++; + from++; + } + } + else + ptr = NULL; +} + + +REJMAP & REJMAP::operator= ( //assign REJMAP +const REJMAP & source //from this +) { + REJ * + to; + REJ * + from = source.ptr; + int + i; + + initialise (source.len); + to = ptr; + for (i = 0; i < len; i++) { + *to = *from; + to++; + from++; + } + return *this; +} + + +void REJMAP::initialise( //Redefine map + inT16 length) { + if (ptr != NULL) + free_struct (ptr, len * sizeof (REJ), "REJ"); + len = length; + if (len > 0) + ptr = (REJ *) memset (alloc_struct (len * sizeof (REJ), "REJ"), + 0, len * sizeof (REJ)); + else + ptr = NULL; +} + + +inT16 REJMAP::accept_count() { //How many accepted? + int i; + inT16 count = 0; + + for (i = 0; i < len; i++) { + if (ptr[i].accepted ()) + count++; + } + return count; +} + + +BOOL8 REJMAP::recoverable_rejects() { //Any non perm rejs? + int i; + + for (i = 0; i < len; i++) { + if (ptr[i].recoverable ()) + return TRUE; + } + return FALSE; +} + + +BOOL8 REJMAP::quality_recoverable_rejects() { //Any potential rejs? + int i; + + for (i = 0; i < len; i++) { + if (ptr[i].accept_if_good_quality ()) + return TRUE; + } + return FALSE; +} + + +void REJMAP::remove_pos( //Cut out an element + inT16 pos //element to remove + ) { + REJ *new_ptr; //new, smaller map + int i; + + ASSERT_HOST (pos >= 0); + ASSERT_HOST (pos < len); + ASSERT_HOST (len > 0); + + len--; + if (len > 0) + new_ptr = (REJ *) memset (alloc_struct (len * sizeof (REJ), "REJ"), + 0, len * sizeof (REJ)); + else + new_ptr = NULL; + + for (i = 0; i < pos; i++) + new_ptr[i] = ptr[i]; //copy pre pos + + for (; pos < len; pos++) + new_ptr[pos] = ptr[pos + 1]; //copy post pos + + //delete old map + free_struct (ptr, (len + 1) * sizeof (REJ), "REJ"); + ptr = new_ptr; +} + + +void REJMAP::print(FILE *fp) { + int i; + char buff[512]; + + for (i = 0; i < len; i++) { + buff[i] = ptr[i].display_char (); + } + buff[i] = '\0'; + fprintf (fp, "\"%s\"", buff); +} + + +void REJMAP::full_print(FILE *fp) { + int i; + + for (i = 0; i < len; i++) { + ptr[i].full_print (fp); + fprintf (fp, "\n"); + } +} + + +void REJMAP::rej_word_small_xht() { //Reject whole word + int i; + + for (i = 0; i < len; i++) { + ptr[i].setrej_small_xht (); + } +} + + +void REJMAP::rej_word_tess_failure() { //Reject whole word + int i; + + for (i = 0; i < len; i++) { + ptr[i].setrej_tess_failure (); + } +} + + +void REJMAP::rej_word_not_tess_accepted() { //Reject whole word + int i; + + for (i = 0; i < len; i++) { + if (!rejword_only_set_if_accepted || ptr[i].accepted ()) + ptr[i].setrej_not_tess_accepted (); + } +} + + +void REJMAP::rej_word_contains_blanks() { //Reject whole word + int i; + + for (i = 0; i < len; i++) { + if (!rejword_only_set_if_accepted || ptr[i].accepted ()) + ptr[i].setrej_contains_blanks (); + } +} + + +void REJMAP::rej_word_bad_permuter() { //Reject whole word + int i; + + for (i = 0; i < len; i++) { + if (!rejword_only_set_if_accepted || ptr[i].accepted ()) + ptr[i].setrej_bad_permuter (); + } +} + + +void REJMAP::rej_word_xht_fixup() { //Reject whole word + int i; + + for (i = 0; i < len; i++) { + if (!rejword_only_set_if_accepted || ptr[i].accepted ()) + ptr[i].setrej_xht_fixup (); + } +} + + +void REJMAP::rej_word_no_alphanums() { //Reject whole word + int i; + + for (i = 0; i < len; i++) { + if (!rejword_only_set_if_accepted || ptr[i].accepted ()) + ptr[i].setrej_no_alphanums (); + } +} + + +void REJMAP::rej_word_mostly_rej() { //Reject whole word + int i; + + for (i = 0; i < len; i++) { + if (!rejword_only_set_if_accepted || ptr[i].accepted ()) + ptr[i].setrej_mostly_rej (); + } +} + + +void REJMAP::rej_word_bad_quality() { //Reject whole word + int i; + + for (i = 0; i < len; i++) { + if (!rejword_only_set_if_accepted || ptr[i].accepted ()) + ptr[i].setrej_bad_quality (); + } +} + + +void REJMAP::rej_word_doc_rej() { //Reject whole word + int i; + + for (i = 0; i < len; i++) { + if (!rejword_only_set_if_accepted || ptr[i].accepted ()) + ptr[i].setrej_doc_rej (); + } +} + + +void REJMAP::rej_word_block_rej() { //Reject whole word + int i; + + for (i = 0; i < len; i++) { + if (!rejword_only_set_if_accepted || ptr[i].accepted ()) + ptr[i].setrej_block_rej (); + } +} + + +void REJMAP::rej_word_row_rej() { //Reject whole word + int i; + + for (i = 0; i < len; i++) { + if (!rejword_only_set_if_accepted || ptr[i].accepted ()) + ptr[i].setrej_row_rej (); + } +} diff --git a/ccmain/rejctmap.h b/ccmain/rejctmap.h new file mode 100644 index 000000000..eff8eacfd --- /dev/null +++ b/ccmain/rejctmap.h @@ -0,0 +1,284 @@ +/********************************************************************** + * File: rejctmap.h (Formerly rejmap.h) + * Description: REJ and REJMAP class functions. + * Author: Phil Cheatle + * Created: Thu Jun 9 13:46:38 BST 1994 + * + * (C) Copyright 1994, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + +This module may look unneccessarily verbose, but here's the philosophy... + +ALL processing of the reject map is done in this module. There are lots of +separate calls to set reject/accept flags. These have DELIBERATELY been kept +distinct so that this module can decide what to do. + +Basically, there is a flag for each sort of rejection or acceptance. This +provides a history of what has happened to EACH character. + +Determining whether a character is CURRENTLY rejected depends on implicit +understanding of the SEQUENCE of possible calls. The flags are defined and +grouped in the REJ_FLAGS enum. These groupings are used in determining a +characters CURRENT rejection status. Basically, a character is ACCEPTED if + + none of the permanent rej flags are set + AND ( the character has never been rejected + OR an accept flag is set which is LATER than the latest reject flag ) + +IT IS FUNDAMENTAL THAT ANYONE HACKING THIS CODE UNDERSTANDS THE SIGNIFICANCE +OF THIS IMPLIED TEMPORAL ORDERING OF THE FLAGS!!!! +**********************************************************************/ + +#ifndef REJCTMAP_H +#define REJCTMAP_H + +#ifdef __UNIX__ +#include +#endif +#include "memry.h" +#include "bits16.h" +#include "varable.h" +#include "notdll.h" + +extern BOOL_VAR_H (rejword_only_set_if_accepted, TRUE, +"Mimic old reject_word"); +extern BOOL_VAR_H (rejmap_allow_more_good_qual, FALSE, +"Use initial good qual setting"); +extern BOOL_VAR_H (rej_use_1Il_rej, TRUE, "1Il rejection enabled"); + +enum REJ_FLAGS +{ + /* Reject modes which are NEVER overridden */ + R_TESS_FAILURE, // PERM Tess didnt classify + R_SMALL_XHT, // PERM Xht too small + R_EDGE_CHAR, // PERM Too close to edge of image + R_1IL_CONFLICT, // PERM 1Il confusion + R_POSTNN_1IL, // PERM 1Il unrejected by NN + R_REJ_CBLOB, // PERM Odd blob + R_MM_REJECT, // PERM Matrix match rejection (m's) + R_BAD_REPETITION, // TEMP Repeated char which doesn't match trend + + /* Initial reject modes (pre NN_ACCEPT) */ + R_POOR_MATCH, // TEMP Ray's original heuristic (Not used) + R_NOT_TESS_ACCEPTED, // TEMP Tess didnt accept WERD + R_CONTAINS_BLANKS, // TEMP Tess failed on other chs in WERD + R_BAD_PERMUTER, // POTENTIAL Bad permuter for WERD + + /* Reject modes generated after NN_ACCEPT but before MM_ACCEPT */ + R_HYPHEN, // TEMP Post NN dodgy hyphen or full stop + R_DUBIOUS, // TEMP Post NN dodgy chars + R_NO_ALPHANUMS, // TEMP No alphanumerics in word after NN + R_MOSTLY_REJ, // TEMP Most of word rejected so rej the rest + R_XHT_FIXUP, // TEMP Xht tests unsure + + /* Reject modes generated after MM_ACCEPT but before QUALITY_ACCEPT */ + R_BAD_QUALITY, // TEMP Quality metrics bad for WERD + + /* Reject modes generated after QUALITY_ACCEPT but before MINIMAL_REJ accep*/ + R_DOC_REJ, // TEMP Document rejection + R_BLOCK_REJ, // TEMP Block rejection + R_ROW_REJ, // TEMP Row rejection + R_UNLV_REJ, // TEMP ~ turned to - or ^ turned to space + + /* Accept modes which occur inbetween the above rejection groups */ + R_NN_ACCEPT, //NN acceptance + R_HYPHEN_ACCEPT, //Hyphen acceptance + R_MM_ACCEPT, //Matrix match acceptance + R_QUALITY_ACCEPT, //Accept word in good quality doc + R_MINIMAL_REJ_ACCEPT //Accept EVERYTHING except tess failures +}; + +/* REJECT MAP VALUES */ + +#define MAP_ACCEPT '1' +#define MAP_REJECT_PERM '0' +#define MAP_REJECT_TEMP '2' +#define MAP_REJECT_POTENTIAL '3' + +class REJ +{ + BITS16 flags1; + BITS16 flags2; + + void set_flag(REJ_FLAGS rej_flag) { + if (rej_flag < 16) + flags1.turn_on_bit (rej_flag); + else + flags2.turn_on_bit (rej_flag - 16); + } + + BOOL8 rej_before_nn_accept(); + BOOL8 rej_between_nn_and_mm(); + BOOL8 rej_between_mm_and_quality_accept(); + BOOL8 rej_between_quality_and_minimal_rej_accept(); + BOOL8 rej_before_mm_accept(); + BOOL8 rej_before_quality_accept(); + + public: + REJ() { //constructor + } + + REJ( //classwise copy + const REJ &source) { + flags1 = source.flags1; + flags2 = source.flags2; + } + + REJ & operator= ( //assign REJ + const REJ & source) { //from this + flags1 = source.flags1; + flags2 = source.flags2; + return *this; + } + + BOOL8 flag(REJ_FLAGS rej_flag) { + if (rej_flag < 16) + return flags1.bit (rej_flag); + else + return flags2.bit (rej_flag - 16); + } + + char display_char() { + if (perm_rejected ()) + return MAP_REJECT_PERM; + else if (accept_if_good_quality ()) + return MAP_REJECT_POTENTIAL; + else if (rejected ()) + return MAP_REJECT_TEMP; + else + return MAP_ACCEPT; + } + + BOOL8 perm_rejected(); //Is char perm reject? + + BOOL8 rejected(); //Is char rejected? + + BOOL8 accepted() { //Is char accepted? + return !rejected (); + } + + //potential rej? + BOOL8 accept_if_good_quality(); + + BOOL8 recoverable() { + return (rejected () && !perm_rejected ()); + } + + void setrej_tess_failure(); //Tess generated blank + void setrej_small_xht(); //Small xht char/wd + void setrej_edge_char(); //Close to image edge + void setrej_1Il_conflict(); //Initial reject map + void setrej_postNN_1Il(); //1Il after NN + void setrej_rej_cblob(); //Insert duff blob + void setrej_mm_reject(); //Matrix matcher + //Odd repeated char + void setrej_bad_repetition(); + void setrej_poor_match(); //Failed Rays heuristic + //TEMP reject_word + void setrej_not_tess_accepted(); + //TEMP reject_word + void setrej_contains_blanks(); + void setrej_bad_permuter(); //POTENTIAL reject_word + void setrej_hyphen(); //PostNN dubious hyph or . + void setrej_dubious(); //PostNN dubious limit + void setrej_no_alphanums(); //TEMP reject_word + void setrej_mostly_rej(); //TEMP reject_word + void setrej_xht_fixup(); //xht fixup + void setrej_bad_quality(); //TEMP reject_word + void setrej_doc_rej(); //TEMP reject_word + void setrej_block_rej(); //TEMP reject_word + void setrej_row_rej(); //TEMP reject_word + void setrej_unlv_rej(); //TEMP reject_word + void setrej_nn_accept(); //NN Flipped a char + void setrej_hyphen_accept(); //Good aspect ratio + void setrej_mm_accept(); //Matrix matcher + //Quality flip a char + void setrej_quality_accept(); + //Accept all except blank + void setrej_minimal_rej_accept(); + + void full_print(FILE *fp); +}; + +class REJMAP +{ + REJ *ptr; //ptr to the chars + inT16 len; //Number of chars + + public: + REJMAP() { //constructor + ptr = NULL; + len = 0; + } + + REJMAP( //classwise copy + const REJMAP &rejmap); + + REJMAP & operator= ( //assign REJMAP + const REJMAP & source); //from this + + ~REJMAP () { //destructor + if (ptr != NULL) + free_struct (ptr, len * sizeof (REJ), "REJ"); + } + + void initialise( //Redefine map + inT16 length); + + REJ & operator[]( //access function + inT16 index) const //map index + { + ASSERT_HOST (index < len); + return ptr[index]; //no bounds checks + } + + inT32 length() const { //map length + return len; + } + + inT16 accept_count(); //How many accepted? + + inT16 reject_count() { //How many rejects? + return len - accept_count (); + } + + void remove_pos( //Cut out an element + inT16 pos); //element to remove + + void print(FILE *fp); + + void full_print(FILE *fp); + + BOOL8 recoverable_rejects(); //Any non perm rejs? + + BOOL8 quality_recoverable_rejects(); + //Any potential rejs? + + void rej_word_small_xht(); //Reject whole word + //Reject whole word + void rej_word_tess_failure(); + void rej_word_not_tess_accepted(); + //Reject whole word + //Reject whole word + void rej_word_contains_blanks(); + //Reject whole word + void rej_word_bad_permuter(); + void rej_word_xht_fixup(); //Reject whole word + //Reject whole word + void rej_word_no_alphanums(); + void rej_word_mostly_rej(); //Reject whole word + void rej_word_bad_quality(); //Reject whole word + void rej_word_doc_rej(); //Reject whole word + void rej_word_block_rej(); //Reject whole word + void rej_word_row_rej(); //Reject whole word +}; +#endif diff --git a/ccmain/statistc.cpp b/ccmain/statistc.cpp new file mode 100644 index 000000000..1f4d75a63 --- /dev/null +++ b/ccmain/statistc.cpp @@ -0,0 +1,905 @@ +/********************************************************************** + * File: statistc.c (Formerly stats.c) + * Description: Simple statistical package for integer values. + * Author: Ray Smith + * Created: Mon Feb 04 16:56:05 GMT 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" //precompiled headers +#include +#include +#include +#include "memry.h" +//#include "ipeerr.h" +#include "tprintf.h" +#include "statistc.h" + +#define SEED1 0x1234 //default seeds +#define SEED2 0x5678 +#define SEED3 0x9abc + +/********************************************************************** + * STATS::STATS + * + * Construct a new stats element by allocating and zeroing the memory. + **********************************************************************/ + +STATS::STATS( //constructor + inT32 min, //min of range + inT32 max //max of range + ) { + + if (max <= min) { + /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, + ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, + "Illegal range for stats, Min=%d, Max=%d",min,max);*/ + min = 0; + max = 1; + } + rangemin = min; //setup + rangemax = max; + buckets = (inT32 *) alloc_mem ((max - min) * sizeof (inT32)); + if (buckets != NULL) + this->clear (); //zero it + /* else + err.log(RESULT_NO_MEMORY,E_LOC,ERR_PRIMITIVES, + ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, + "No memory for stats, Min=%d, Max=%d",min,max); */ +} + + +STATS::STATS() { //constructor + rangemax = 0; //empty + rangemin = 0; + buckets = NULL; +} + + +/********************************************************************** + * STATS::set_range + * + * Alter the range on an existing stats element. + **********************************************************************/ + +bool STATS::set_range( //constructor + inT32 min, //min of range + inT32 max //max of range + ) { + + if (max <= min) { + return false; + } + rangemin = min; //setup + rangemax = max; + if (buckets != NULL) + free_mem(buckets); //no longer want it + buckets = (inT32 *) alloc_mem ((max - min) * sizeof (inT32)); + /* if (buckets==NULL) + return err.log(RESULT_NO_MEMORY,E_LOC,ERR_PRIMITIVES, + ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, + "No memory for stats, Min=%d, Max=%d",min,max);*/ + + this->clear (); //zero it + return true; +} + + +/********************************************************************** + * STATS::clear + * + * Clear out the STATS class by zeroing all the buckets. + **********************************************************************/ + +void STATS::clear() { //clear out buckets + total_count = 0; + if (buckets != NULL) + memset (buckets, 0, (rangemax - rangemin) * sizeof (inT32)); + //zero it +} + + +/********************************************************************** + * STATS::~STATS + * + * Destructor for a stats class. + **********************************************************************/ + +STATS::~STATS ( //destructor +) { + if (buckets != NULL) { + free_mem(buckets); + buckets = NULL; + } +} + + +/********************************************************************** + * STATS::add + * + * Add a set of samples to (or delete from) a pile. + **********************************************************************/ + +void STATS::add( //add sample + inT32 value, //bucket + inT32 count //no to add + ) { + if (buckets == NULL) { + /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, + ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, + "Empty stats");*/ + return; + } + if (value <= rangemin) + buckets[0] += count; //silently clip to range + else if (value >= rangemax) + buckets[rangemax - rangemin - 1] += count; + else + //add count to cell + buckets[value - rangemin] += count; + total_count += count; //keep count of total +} + + +/********************************************************************** + * STATS::mode + * + * Find the mode of a stats class. + **********************************************************************/ + +inT32 STATS::mode() { //get mode of samples + inT32 index; //current index + inT32 max; //max cell count + inT32 maxindex; //index of max + + if (buckets == NULL) { + /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, + ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, + "Empty stats");*/ + return rangemin; + } + for (max = 0, maxindex = 0, index = rangemax - rangemin - 1; index >= 0; + index--) { + if (buckets[index] > max) { + max = buckets[index]; //find biggest + maxindex = index; + } + } + return maxindex + rangemin; //index of biggest +} + + +/********************************************************************** + * STATS::mean + * + * Find the mean of a stats class. + **********************************************************************/ + +float STATS::mean() { //get mean of samples + inT32 index; //current index + inT32 sum; //sum of cells + + if (buckets == NULL) { + /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, + ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, + "Empty stats");*/ + return (float) rangemin; + } + for (sum = 0, index = rangemax - rangemin - 1; index >= 0; index--) { + //sum all buckets + sum += index * buckets[index]; + } + if (total_count > 0) + //mean value + return (float) sum / total_count + rangemin; + else + return (float) rangemin; //no mean +} + + +/********************************************************************** + * STATS::sd + * + * Find the standard deviation of a stats class. + **********************************************************************/ + +float STATS::sd() { //standard deviation + inT32 index; //current index + inT32 sum; //sum of cells + inT32 sqsum; //sum of squares + float variance; + + if (buckets == NULL) { + /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, + ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, + "Empty stats"); */ + return (float) 0.0; + } + for (sum = 0, sqsum = 0, index = rangemax - rangemin - 1; index >= 0; + index--) { + //sum all buckets + sum += index * buckets[index]; + //and squares + sqsum += index * index * buckets[index]; + } + if (total_count > 0) { + variance = sum / ((float) total_count); + variance = sqsum / ((float) total_count) - variance * variance; + return (float) sqrt (variance); + } + else + return (float) 0.0; +} + + +/********************************************************************** + * STATS::ile + * + * Find an arbitrary %ile of a stats class. + **********************************************************************/ + +float STATS::ile( //percentile + float frac //fraction to find + ) { + inT32 index; //current index + inT32 sum; //sum of cells + float target; //target value + + if (buckets == NULL) { + /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, + ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, + "Empty stats"); */ + return (float) rangemin; + } + target = frac * total_count; + if (target <= 0) + target = (float) 1; + if (target > total_count) + target = (float) total_count; + for (sum = 0, index = 0; index < rangemax - rangemin + && sum < target; sum += buckets[index], index++); + if (index > 0) + return rangemin + index - (sum - target) / buckets[index - 1]; + //better than just ints + else + return (float) rangemin; +} + + +/********************************************************************** + * STATS::median + * + * Finds a more usefule estimate of median than ile(0.5). + * + * Overcomes a problem with ile() - if the samples are, for example, + * 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway + * between 6 and 13 = 9.5 + **********************************************************************/ + +float STATS::median() { //get median + float median; + inT32 min_pile; + inT32 median_pile; + inT32 max_pile; + + if (buckets == NULL) { + /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, + ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, + "Empty stats");*/ + return (float) rangemin; + } + median = (float) ile ((float) 0.5); + median_pile = (inT32) floor (median); + if ((total_count > 1) && (pile_count (median_pile) == 0)) { + /* Find preceeding non zero pile */ + for (min_pile = median_pile; pile_count (min_pile) == 0; min_pile--); + /* Find following non zero pile */ + for (max_pile = median_pile; pile_count (max_pile) == 0; max_pile++); + median = (float) ((min_pile + max_pile) / 2.0); + } + return median; +} + + +/********************************************************************** + * STATS::smooth + * + * Apply a triangular smoothing filter to the stats. + * This makes the modes a bit more useful. + * The factor gives the height of the triangle, i.e. the weight of the + * centre. + **********************************************************************/ + +void STATS::smooth( //smooth samples + inT32 factor //size of triangle + ) { + inT32 entry; //bucket index + inT32 offset; //from entry + inT32 entrycount; //no of entries + inT32 bucket; //new smoothed pile + //output stats + STATS result(rangemin, rangemax); + + if (buckets == NULL) { + /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, + ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, + "Empty stats"); */ + return; + } + if (factor < 2) + return; //is a no-op + entrycount = rangemax - rangemin; + for (entry = 0; entry < entrycount; entry++) { + //centre weight + bucket = buckets[entry] * factor; + for (offset = 1; offset < factor; offset++) { + if (entry - offset >= 0) + bucket += buckets[entry - offset] * (factor - offset); + if (entry + offset < entrycount) + bucket += buckets[entry + offset] * (factor - offset); + } + result.add (entry + rangemin, bucket); + } + total_count = result.total_count; + memcpy (buckets, result.buckets, entrycount * sizeof (inT32)); +} + + +/********************************************************************** + * STATS::cluster + * + * Cluster the samples into max_cluster clusters. + * Each call runs one iteration. The array of clusters must be + * max_clusters+1 in size as cluster 0 is used to indicate which samples + * have been used. + * The return value is the current number of clusters. + **********************************************************************/ + +inT32 STATS::cluster( //cluster samples + float lower, //thresholds + float upper, + float multiple, //distance threshold + inT32 max_clusters, //max no to make + STATS *clusters //array of clusters + ) { + BOOL8 new_cluster; //added one + float *centres; //cluster centres + inT32 entry; //bucket index + inT32 cluster; //cluster index + inT32 best_cluster; //one to assign to + inT32 new_centre = 0; //residual mode + inT32 new_mode; //pile count of new_centre + inT32 count; //pile to place + float dist; //from cluster + float min_dist; //from best_cluster + inT32 cluster_count; //no of clusters + + if (max_clusters < 1) + return 0; + if (buckets == NULL) { + /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, + ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, + "Empty stats");*/ + return 0; + } + centres = (float *) alloc_mem ((max_clusters + 1) * sizeof (float)); + if (centres == NULL) { + /* err.log(RESULT_NO_MEMORY,E_LOC,ERR_PRIMITIVES, + ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, + "No memory for centres"); */ + return 0; + } + for (cluster_count = 1; cluster_count <= max_clusters + && clusters[cluster_count].buckets != NULL + && clusters[cluster_count].total_count > 0; cluster_count++) { + centres[cluster_count] = + (float) clusters[cluster_count].ile ((float) 0.5); + new_centre = clusters[cluster_count].mode (); + for (entry = new_centre - 1; centres[cluster_count] - entry < lower + && entry >= rangemin + && pile_count (entry) <= pile_count (entry + 1); entry--) { + count = pile_count (entry) - clusters[0].pile_count (entry); + if (count > 0) { + clusters[cluster_count].add (entry, count); + clusters[0].add (entry, count); + } + } + for (entry = new_centre + 1; entry - centres[cluster_count] < lower + && entry < rangemax + && pile_count (entry) <= pile_count (entry - 1); entry++) { + count = pile_count (entry) - clusters[0].pile_count (entry); + if (count > 0) { + clusters[cluster_count].add (entry, count); + clusters[0].add (entry, count); + } + } + } + cluster_count--; + + if (cluster_count == 0) { + clusters[0].set_range (rangemin, rangemax); + } + do { + new_cluster = FALSE; + new_mode = 0; + for (entry = 0; entry < rangemax - rangemin; entry++) { + count = buckets[entry] - clusters[0].buckets[entry]; + //remaining pile + if (count > 0) { //any to handle + min_dist = (float) MAX_INT32; + best_cluster = 0; + for (cluster = 1; cluster <= cluster_count; cluster++) { + dist = entry + rangemin - centres[cluster]; + //find distance + if (dist < 0) + dist = -dist; + if (dist < min_dist) { + min_dist = dist; //find least + best_cluster = cluster; + } + } + if (min_dist > upper //far enough for new + && (best_cluster == 0 + || entry + rangemin > centres[best_cluster] * multiple + || entry + rangemin < centres[best_cluster] / multiple)) { + if (count > new_mode) { + new_mode = count; + new_centre = entry + rangemin; + } + } + } + } + //need new and room + if (new_mode > 0 && cluster_count < max_clusters) { + cluster_count++; + new_cluster = TRUE; + if (!clusters[cluster_count].set_range (rangemin, rangemax)) + return 0; + centres[cluster_count] = (float) new_centre; + clusters[cluster_count].add (new_centre, new_mode); + clusters[0].add (new_centre, new_mode); + for (entry = new_centre - 1; centres[cluster_count] - entry < lower + && entry >= rangemin + && pile_count (entry) <= pile_count (entry + 1); entry--) { + count = pile_count (entry) - clusters[0].pile_count (entry); + if (count > 0) { + clusters[cluster_count].add (entry, count); + clusters[0].add (entry, count); + } + } + for (entry = new_centre + 1; entry - centres[cluster_count] < lower + && entry < rangemax + && pile_count (entry) <= pile_count (entry - 1); entry++) { + count = pile_count (entry) - clusters[0].pile_count (entry); + if (count > 0) { + clusters[cluster_count].add (entry, count); + clusters[0].add (entry, count); + } + } + centres[cluster_count] = + (float) clusters[cluster_count].ile ((float) 0.5); + } + } + while (new_cluster && cluster_count < max_clusters); + free_mem(centres); + return cluster_count; +} + + +/********************************************************************** + * STATS::local_min + * + * Return TRUE if this point is a local min. + **********************************************************************/ + +BOOL8 STATS::local_min( //test minness + inT32 x //of x + ) { + inT32 index; //table index + + if (buckets == NULL) { + /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, + ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, + "Empty stats");*/ + return FALSE; + } + if (x < rangemin) + x = rangemin; + if (x >= rangemax) + x = rangemax - 1; + x -= rangemin; + if (buckets[x] == 0) + return TRUE; + for (index = x - 1; index >= 0 && buckets[index] == buckets[x]; index--); + if (index >= 0 && buckets[index] < buckets[x]) + return FALSE; + for (index = x + 1; index < rangemax - rangemin + && buckets[index] == buckets[x]; index++); + if (index < rangemax - rangemin && buckets[index] < buckets[x]) + return FALSE; + else + return TRUE; +} + + +/********************************************************************** + * STATS::print + * + * Print a summary of the stats and optionally a dump of the table. + **********************************************************************/ + +void STATS::print( //print stats table + FILE *, //Now uses tprintf instead + BOOL8 dump //dump full table + ) { + inT32 index; //table index + + if (buckets == NULL) { + /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, + ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, + "Empty stats"); */ + return; + } + if (dump) { + for (index = 0; index < rangemax - rangemin; index++) { + tprintf ("%4d:%-3d ", rangemin + index, buckets[index]); + if (index % 8 == 7) + tprintf ("\n"); + } + tprintf ("\n"); + } + + tprintf ("Total count=%d\n", total_count); + tprintf ("Min=%d\n", (inT32) (ile ((float) 0.0))); + tprintf ("Lower quartile=%.2f\n", ile ((float) 0.25)); + tprintf ("Median=%.2f\n", ile ((float) 0.5)); + tprintf ("Upper quartile=%.2f\n", ile ((float) 0.75)); + tprintf ("Max=%d\n", (inT32) (ile ((float) 0.99999))); + tprintf ("Mean= %.2f\n", mean ()); + tprintf ("SD= %.2f\n", sd ()); +} + + +/********************************************************************** + * STATS::min_bucket + * + * Find REAL minimum bucket - ile(0.0) isnt necessarily correct + **********************************************************************/ + +inT32 STATS::min_bucket() { //Find min + inT32 min; + + if (buckets == NULL) { + /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, + ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, + "Empty stats");*/ + return rangemin; + } + + for (min = 0; (min < rangemax - rangemin) && (buckets[min] == 0); min++); + return rangemin + min; +} + + +/********************************************************************** + * STATS::max_bucket + * + * Find REAL maximum bucket - ile(1.0) isnt necessarily correct + **********************************************************************/ + +inT32 STATS::max_bucket() { //Find max + inT32 max; + + if (buckets == NULL) { + /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, + ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, + "Empty stats");*/ + return rangemin; + } + + for (max = rangemax - rangemin - 1; + (max > 0) && (buckets[max] == 0); max--); + return rangemin + max; +} + + +/********************************************************************** + * STATS::short_print + * + * Print a summary of the stats and optionally a dump of the table. + * ( BUT ONLY THE PART OF THE TABLE BETWEEN MIN AND MAX) + **********************************************************************/ + +void STATS::short_print( //print stats table + FILE *, //Now uses tprintf instead + BOOL8 dump //dump full table + ) { + inT32 index; //table index + inT32 min = min_bucket (); + inT32 max = max_bucket (); + + if (buckets == NULL) { + /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, + ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, + "Empty stats"); */ + return; + } + if (dump) { + for (index = min; index <= max; index++) { + tprintf ("%4d:%-3d ", rangemin + index, buckets[index]); + if ((index - min) % 8 == 7) + tprintf ("\n"); + } + tprintf ("\n"); + } + + tprintf ("Total count=%d\n", total_count); + tprintf ("Min=%d Really=%d\n", (inT32) (ile ((float) 0.0)), min); + tprintf ("Max=%d Really=%d\n", (inT32) (ile ((float) 1.1)), max); + tprintf ("Range=%d\n", max + 1 - min); + tprintf ("Lower quartile=%.2f\n", ile ((float) 0.25)); + tprintf ("Median=%.2f\n", ile ((float) 0.5)); + tprintf ("Upper quartile=%.2f\n", ile ((float) 0.75)); + tprintf ("Mean= %.2f\n", mean ()); + tprintf ("SD= %.2f\n", sd ()); +} + + +/********************************************************************** + * STATS::plot + * + * Draw a histogram of the stats table. + **********************************************************************/ + +#ifndef GRAPHICS_DISABLED +void STATS::plot( //plot stats table + ScrollView* window, //to draw in + float xorigin, //bottom left + float yorigin, + float xscale, //one x unit + float yscale, //one y unit + ScrollView::Color colour //colour to draw in + ) { + inT32 index; //table index + + if (buckets == NULL) { + /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, + ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, + "Empty stats");*/ + return; + } + window->Pen(colour); + + for (index = 0; index < rangemax - rangemin; index++) { + window->Rectangle( xorigin + xscale * index, yorigin, + xorigin + xscale * (index + 1), + yorigin + yscale * buckets[index]); + } +} +#endif + + +/********************************************************************** + * STATS::plotline + * + * Draw a histogram of the stats table. (Line only + **********************************************************************/ + +#ifndef GRAPHICS_DISABLED +void STATS::plotline( //plot stats table + ScrollView* window, //to draw in + float xorigin, //bottom left + float yorigin, + float xscale, //one x unit + float yscale, //one y unit + ScrollView::Color colour //colour to draw in + ) { + inT32 index; //table index + + if (buckets == NULL) { + /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, + ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, + "Empty stats"); */ + return; + } + window->Pen(colour); + + window->SetCursor(xorigin, yorigin + yscale * buckets[0]); + for (index = 0; index < rangemax - rangemin; index++) { + window->DrawTo(xorigin + xscale * index, yorigin + yscale * buckets[index]); + } +} +#endif + + +/********************************************************************** + * choose_nth_item + * + * Returns the index of what would b the nth item in the array + * if the members were sorted, without actually sorting. + **********************************************************************/ + +DLLSYM inT32 choose_nth_item( //fast median + inT32 index, //index to choose + float *array, //array of items + inT32 count //no of items + ) { + static uinT16 seeds[3] = { SEED1, SEED2, SEED3 }; + //for nrand + inT32 next_sample; //next one to do + inT32 next_lesser; //space for new + inT32 prev_greater; //last one saved + inT32 equal_count; //no of equal ones + float pivot; //proposed median + float sample; //current sample + + if (count <= 1) + return 0; + if (count == 2) { + if (array[0] < array[1]) { + return index >= 1 ? 1 : 0; + } + else { + return index >= 1 ? 0 : 1; + } + } + else { + if (index < 0) + index = 0; //ensure lergal + else if (index >= count) + index = count - 1; + #ifdef __UNIX__ + equal_count = (inT32) (nrand48 (seeds) % count); + #else + equal_count = (inT32) (rand () % count); + #endif + pivot = array[equal_count]; + //fill gap + array[equal_count] = array[0]; + next_lesser = 0; + prev_greater = count; + equal_count = 1; + for (next_sample = 1; next_sample < prev_greater;) { + sample = array[next_sample]; + if (sample < pivot) { + //shuffle + array[next_lesser++] = sample; + next_sample++; + } + else if (sample > pivot) { + prev_greater--; + //juggle + array[next_sample] = array[prev_greater]; + array[prev_greater] = sample; + } + else { + equal_count++; + next_sample++; + } + } + for (next_sample = next_lesser; next_sample < prev_greater;) + array[next_sample++] = pivot; + if (index < next_lesser) + return choose_nth_item (index, array, next_lesser); + else if (index < prev_greater) + return next_lesser; //in equal bracket + else + return choose_nth_item (index - prev_greater, + array + prev_greater, + count - prev_greater) + prev_greater; + } +} + + +/********************************************************************** + * choose_nth_item + * + * Returns the index of what would b the nth item in the array + * if the members were sorted, without actually sorting. + **********************************************************************/ + +DLLSYM inT32 +choose_nth_item ( //fast median +inT32 index, //index to choose +void *array, //array of items +inT32 count, //no of items +size_t size, //element size + //comparator +int (*compar) (const void *, const void *) +) { + static uinT16 seeds[3] = { SEED1, SEED2, SEED3 }; + //for nrand + int result; //of compar + inT32 next_sample; //next one to do + inT32 next_lesser; //space for new + inT32 prev_greater; //last one saved + inT32 equal_count; //no of equal ones + inT32 pivot; //proposed median + + if (count <= 1) + return 0; + if (count == 2) { + if (compar (array, (char *) array + size) < 0) { + return index >= 1 ? 1 : 0; + } + else { + return index >= 1 ? 0 : 1; + } + } + if (index < 0) + index = 0; //ensure lergal + else if (index >= count) + index = count - 1; + #ifdef __UNIX__ + pivot = (inT32) (nrand48 (seeds) % count); + #else + pivot = (inT32) (rand () % count); + #endif + swap_entries (array, size, pivot, 0); + next_lesser = 0; + prev_greater = count; + equal_count = 1; + for (next_sample = 1; next_sample < prev_greater;) { + result = + compar ((char *) array + size * next_sample, + (char *) array + size * next_lesser); + if (result < 0) { + swap_entries (array, size, next_lesser++, next_sample++); + //shuffle + } + else if (result > 0) { + prev_greater--; + swap_entries(array, size, prev_greater, next_sample); + } + else { + equal_count++; + next_sample++; + } + } + if (index < next_lesser) + return choose_nth_item (index, array, next_lesser, size, compar); + else if (index < prev_greater) + return next_lesser; //in equal bracket + else + return choose_nth_item (index - prev_greater, + (char *) array + size * prev_greater, + count - prev_greater, size, + compar) + prev_greater; +} + + +/********************************************************************** + * swap_entries + * + * Swap 2 entries of abitrary size in-place in a table. + **********************************************************************/ + +void swap_entries( //swap in place + void *array, //array of entries + size_t size, //size of entry + inT32 index1, //entries to swap + inT32 index2) { + char tmp; + char *ptr1; //to entries + char *ptr2; + size_t count; //of bytes + + ptr1 = (char *) array + index1 * size; + ptr2 = (char *) array + index2 * size; + for (count = 0; count < size; count++) { + tmp = *ptr1; + *ptr1++ = *ptr2; + *ptr2++ = tmp; //tedious! + } +} diff --git a/ccmain/statistc.h b/ccmain/statistc.h new file mode 100644 index 000000000..ea765e7c1 --- /dev/null +++ b/ccmain/statistc.h @@ -0,0 +1,135 @@ +/********************************************************************** + * File: statistc.h (Formerly stats.h) + * Description: Class description for STATS class. + * Author: Ray Smith + * Created: Mon Feb 04 16:19:07 GMT 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef STATISTC_H +#define STATISTC_H + +#include +#include "scrollview.h" +#include "host.h" + +class DLLSYM STATS //statistics package +{ + inT32 rangemin; //min of range + inT32 rangemax; //max of range + inT32 total_count; //no of samples + inT32 *buckets; //array of cells + + public: + STATS( //constructor + inT32 min, //min of range + inT32 max); //max of range + STATS(); //empty for arrays + + ~STATS (); //destructor + + bool set_range( //change range + inT32 min, //min of range + inT32 max); //max of range + + void clear(); //empty buckets + + void add( //add sample + inT32 value, //bucket + inT32 count); //no to add + + inT32 mode(); //get mode of samples + + float mean(); //get mean of samples + + float sd(); //standard deviation + + float ile( //percentile + float frac); //[0,1] for percentil + + inT32 min_bucket(); //Find min + + inT32 max_bucket(); //Find max + + float median(); //get median of samples + + void smooth( //apply blurring + inT32 factor); //filter to stats + inT32 cluster( //cluster samples + float lower, //thresholds + float upper, + float multiple, //distance threshold + inT32 max_clusters, //max no to make + STATS *clusters); //array of clusters + + inT32 pile_count( //access function + inT32 value //pile to count + ) { + return value > rangemin ? (value < rangemax + ? buckets[value - + rangemin] : buckets[rangemax - + rangemin - + 1]) : buckets[0]; + } + + inT32 get_total() { //access function + return total_count; //total of all piles + } + + BOOL8 local_min( //test local minness + inT32 x); + + void print( //print summary/table + FILE *fp, //file to print on + BOOL8 dump); //dump whole table + + void short_print( //print summary/table + FILE *fp, //file to print on + BOOL8 dump); //dump whole table + + void plot( //draw histogram rect + ScrollView* window, //window to draw in + float xorigin, //origin of histo + float yorigin, //gram + float xscale, //size of one unit + float yscale, //size of one uint + ScrollView::Color colour); //colour to draw in + + void plotline( //draw histogram line + ScrollView* window, //window to draw in + float xorigin, //origin of histo + float yorigin, //gram + float xscale, //size of one unit + float yscale, //size of one uint + ScrollView::Color colour); //colour to draw in +}; +DLLSYM inT32 choose_nth_item( //fast median + inT32 index, //index to choose + float *array, //array of items + inT32 count //no of items + ); +DLLSYM inT32 choose_nth_item ( //fast median +inT32 index, //index to choose +void *array, //array of items +inT32 count, //no of items +size_t size, //element size + //comparator +int (*compar) (const void *, const void *) +); +void swap_entries( //swap in place + void *array, //array of entries + size_t size, //size of entry + inT32 index1, //entries to swap + inT32 index2); +#endif diff --git a/ccmain/stepblob.cpp b/ccmain/stepblob.cpp new file mode 100644 index 000000000..408102f05 --- /dev/null +++ b/ccmain/stepblob.cpp @@ -0,0 +1,333 @@ +/********************************************************************** + * File: stepblob.cpp (Formerly cblob.c) + * Description: Code for C_BLOB class. + * Author: Ray Smith + * Created: Tue Oct 08 10:41:13 BST 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" +#include "stepblob.h" + +ELISTIZE_S (C_BLOB) +/********************************************************************** + * position_outline + * + * Position the outline in the given list at the relevant place + * according to its nesting. + **********************************************************************/ +static void position_outline( //put in place + C_OUTLINE *outline, //thing to place + C_OUTLINE_LIST *destlist //desstination list + ) { + C_OUTLINE *dest_outline; //outline from dest list + C_OUTLINE_IT it = destlist; //iterator + //iterator on children + C_OUTLINE_IT child_it = outline->child (); + + if (!it.empty ()) { + do { + dest_outline = it.data (); //get destination + //encloses dest + if (*dest_outline < *outline) { + //take off list + dest_outline = it.extract (); + //put this in place + it.add_after_then_move (outline); + //make it a child + child_it.add_to_end (dest_outline); + while (!it.at_last ()) { + it.forward (); //do rest of list + //check for other children + dest_outline = it.data (); + if (*dest_outline < *outline) { + //take off list + dest_outline = it.extract (); + child_it.add_to_end (dest_outline); + //make it a child + if (it.empty ()) + break; + } + } + return; //finished + } + //enclosed by dest + else if (*outline < *dest_outline) { + position_outline (outline, dest_outline->child ()); + //place in child list + return; //finished + } + it.forward (); + } + while (!it.at_first ()); + } + it.add_to_end (outline); //at outer level +} + + +/********************************************************************** + * plot_outline_list + * + * Draw a list of outlines in the given colour and their children + * in the child colour. + **********************************************************************/ + +#ifndef GRAPHICS_DISABLED +static void plot_outline_list( //draw outlines + C_OUTLINE_LIST *list, //outline to draw + ScrollView* window, //window to draw in + ScrollView::Color colour, //colour to use + ScrollView::Color child_colour //colour of children + ) { + C_OUTLINE *outline; //current outline + C_OUTLINE_IT it = list; //iterator + + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { + outline = it.data (); + //draw it + outline->plot (window, colour); + if (!outline->child ()->empty ()) + plot_outline_list (outline->child (), window, + child_colour, child_colour); + } +} +#endif + + +/********************************************************************** + * reverse_outline_list + * + * Reverse a list of outlines and their children. + **********************************************************************/ + +static void reverse_outline_list( //reverse outlines + C_OUTLINE_LIST *list //outline to reverse + ) { + C_OUTLINE *outline; //current outline + C_OUTLINE_IT it = list; //iterator + + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { + outline = it.data (); + outline->reverse (); //reverse it + if (!outline->child ()->empty ()) + reverse_outline_list (outline->child ()); + } +} + + +/********************************************************************** + * C_BLOB::C_BLOB + * + * Constructor to build a C_BLOB from a list of C_OUTLINEs. + * The C_OUTLINEs are not copied so the source list is emptied. + * The C_OUTLINEs are nested correctly in the blob. + **********************************************************************/ + +C_BLOB::C_BLOB( //constructor + C_OUTLINE_LIST *outline_list //in random order + ) { + C_OUTLINE *outline; //current outline + C_OUTLINE_IT it = outline_list;//iterator + + while (!it.empty ()) { //grab the list + outline = it.extract (); //get off the list + //put it in place + position_outline(outline, &outlines); + if (!it.empty ()) + it.forward (); + } + it.set_to_list (&outlines); + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { + outline = it.data (); + if (outline->turn_direction () < 0) { + outline->reverse (); + reverse_outline_list (outline->child ()); + outline->set_flag (COUT_INVERSE, TRUE); + } + else { + outline->set_flag (COUT_INVERSE, FALSE); + } + } +} + +// Build and return a fake blob containing a single fake outline with no +// steps. +C_BLOB* C_BLOB::FakeBlob(const TBOX& box) { + C_OUTLINE_LIST outlines; + C_OUTLINE::FakeOutline(box, &outlines); + return new C_BLOB(&outlines); +} + +/********************************************************************** + * C_BLOB::bounding_box + * + * Return the bounding box of the blob. + **********************************************************************/ + +TBOX C_BLOB::bounding_box() { //bounding box + C_OUTLINE *outline; //current outline + C_OUTLINE_IT it = &outlines; //outlines of blob + TBOX box; //bounding box + + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { + outline = it.data (); + box += outline->bounding_box (); + } + return box; +} + + +/********************************************************************** + * C_BLOB::area + * + * Return the area of the blob. + **********************************************************************/ + +inT32 C_BLOB::area() { //area + C_OUTLINE *outline; //current outline + C_OUTLINE_IT it = &outlines; //outlines of blob + inT32 total; //total area + + total = 0; + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { + outline = it.data (); + total += outline->area (); + } + return total; +} + +/********************************************************************** + * C_BLOB::perimeter + * + * Return the perimeter of the top and 2nd level outlines. + **********************************************************************/ + +inT32 C_BLOB::perimeter() { + C_OUTLINE *outline; // current outline + C_OUTLINE_IT it = &outlines; // outlines of blob + inT32 total; // total perimeter + + total = 0; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + outline = it.data(); + total += outline->perimeter(); + } + return total; +} + + +/********************************************************************** + * C_BLOB::outer_area + * + * Return the area of the blob. + **********************************************************************/ + +inT32 C_BLOB::outer_area() { //area + C_OUTLINE *outline; //current outline + C_OUTLINE_IT it = &outlines; //outlines of blob + inT32 total; //total area + + total = 0; + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { + outline = it.data (); + total += outline->outer_area (); + } + return total; +} + + +/********************************************************************** + * C_BLOB::count_transitions + * + * Return the total x and y maxes and mins in the blob. + * Chlid outlines are not counted. + **********************************************************************/ + +inT32 C_BLOB::count_transitions( //area + inT32 threshold //on size + ) { + C_OUTLINE *outline; //current outline + C_OUTLINE_IT it = &outlines; //outlines of blob + inT32 total; //total area + + total = 0; + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { + outline = it.data (); + total += outline->count_transitions (threshold); + } + return total; +} + + +/********************************************************************** + * C_BLOB::move + * + * Move C_BLOB by vector + **********************************************************************/ + +void C_BLOB::move( // reposition blob + const ICOORD vec // by vector + ) { + C_OUTLINE_IT it(&outlines); // iterator + + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) + it.data ()->move (vec); // move each outline +} + +// Static helper for C_BLOB::rotate to allow recursion of child outlines. +void RotateOutlineList(const FCOORD& rotation, C_OUTLINE_LIST* outlines) { + C_OUTLINE_LIST new_outlines; + C_OUTLINE_IT src_it(outlines); + C_OUTLINE_IT dest_it(&new_outlines); + while (!src_it.empty()) { + C_OUTLINE* old_outline = src_it.extract(); + src_it.forward(); + C_OUTLINE* new_outline = new C_OUTLINE(old_outline, rotation); + if (!old_outline->child()->empty()) { + RotateOutlineList(rotation, old_outline->child()); + C_OUTLINE_IT child_it(new_outline->child()); + child_it.add_list_after(old_outline->child()); + } + delete old_outline; + dest_it.add_to_end(new_outline); + } + src_it.add_list_after(&new_outlines); +} + +/********************************************************************** + * C_BLOB::rotate + * + * Rotate C_BLOB by rotation. + * Warning! has to rebuild all the C_OUTLINEs. + **********************************************************************/ +void C_BLOB::rotate(const FCOORD& rotation) { + RotateOutlineList(rotation, &outlines); +} + + +/********************************************************************** + * C_BLOB::plot + * + * Draw the C_BLOB in the given colour. + **********************************************************************/ + +#ifndef GRAPHICS_DISABLED +void C_BLOB::plot( //draw it + ScrollView* window, //window to draw in + ScrollView::Color blob_colour, //main colour + ScrollView::Color child_colour //for holes + ) { + plot_outline_list(&outlines, window, blob_colour, child_colour); +} +#endif diff --git a/ccmain/stepblob.h b/ccmain/stepblob.h new file mode 100644 index 000000000..a4dcaece0 --- /dev/null +++ b/ccmain/stepblob.h @@ -0,0 +1,91 @@ +/********************************************************************** + * File: stepblob.h (Formerly cblob.h) + * Description: Code for C_BLOB class. + * Author: Ray Smith + * Created: Tue Oct 08 10:41:13 BST 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef STEPBLOB_H +#define STEPBLOB_H + +#include "coutln.h" +#include "rect.h" + +class C_BLOB:public ELIST_LINK +{ + public: + C_BLOB() { + } + explicit C_BLOB(C_OUTLINE_LIST *outline_list); + + // Build and return a fake blob containing a single fake outline with no + // steps. + static C_BLOB* FakeBlob(const TBOX& box); + + C_OUTLINE_LIST *out_list() { //get outline list + return &outlines; + } + + TBOX bounding_box(); //compute bounding box + inT32 area(); //compute area + inT32 perimeter(); // Total perimeter of outlines and 1st level children. + inT32 outer_area(); //compute area + inT32 count_transitions( //count maxima + inT32 threshold); //size threshold + + void move(const ICOORD vec); // repostion blob by vector + void rotate(const FCOORD& rotation); // Rotate by given vector. + + void plot( //draw one + ScrollView* window, //window to draw in + ScrollView::Color blob_colour, //for outer bits + ScrollView::Color child_colour); //for holes + + void prep_serialise() { //set ptrs to counts + outlines.prep_serialise (); + } + + void dump( //write external bits + FILE *f) { + outlines.dump (f); + } + + void de_dump( //read external bits + FILE *f) { + outlines.de_dump (f); + } + + //assignment + make_serialise(C_BLOB) + + C_BLOB& operator= (const C_BLOB & source) { + if (!outlines.empty ()) + outlines.clear(); + outlines.deep_copy(&source.outlines, &C_OUTLINE::deep_copy); + return *this; + } + + static C_BLOB* deep_copy(const C_BLOB* src) { + C_BLOB* blob = new C_BLOB; + *blob = *src; + return blob; + } + + private: + C_OUTLINE_LIST outlines; //master elements +}; + +ELISTIZEH_S (C_BLOB) +#endif diff --git a/ccmain/tesseractclass.cpp b/ccmain/tesseractclass.cpp new file mode 100644 index 000000000..1c70ac4dd --- /dev/null +++ b/ccmain/tesseractclass.cpp @@ -0,0 +1,84 @@ +/////////////////////////////////////////////////////////////////////// +// File: tesseractclass.cpp +// Description: An instance of Tesseract. For thread safety, *every* +// global variable goes in here, directly, or indirectly. +// Author: Ray Smith +// Created: Fri Mar 07 08:17:01 PST 2008 +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "tesseractclass.h" +#include "globals.h" + +// Include automatically generated configuration file if running autoconf. +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#ifdef HAVE_LIBLEPT +// Include leptonica library only if autoconf (or makefile etc) tell us to. +#include "allheaders.h" +#endif + +namespace tesseract { + +Tesseract::Tesseract() + : BOOL_MEMBER(tessedit_resegment_from_boxes, false, + "Take segmentation and labeling from box file"), + BOOL_MEMBER(tessedit_train_from_boxes, false, + "Generate training data from boxed chars"), + BOOL_MEMBER(tessedit_dump_pageseg_images, false, + "Dump itermediate images made during page segmentation"), + // The default for pageseg_mode is the old behaviour, so as not to + // upset anything that relies on that. + INT_MEMBER(tessedit_pageseg_mode, 2, + "Page seg mode: 0=auto, 1=col, 2=block, 3=line, 4=word, 6=char" + " (Values from PageSegMode enum in baseapi.h)"), + INT_MEMBER(tessedit_accuracyvspeed, 0, + "Accuracy V Speed tradeoff: 0 fastest, 100 most accurate" + " (Values from AccuracyVSpeed enum in baseapi.h)"), + BOOL_MEMBER(tessedit_train_from_boxes_word_level, false, + "Generate training data from boxed chars at word level."), + STRING_MEMBER(tessedit_char_blacklist, "", + "Blacklist of chars not to recognize"), + STRING_MEMBER(tessedit_char_whitelist, "", + "Whitelist of chars to recognize"), + BOOL_MEMBER(global_tessedit_ambigs_training, false, + "Perform training for ambiguities"), + pix_binary_(NULL), + deskew_(1.0f, 0.0f), + reskew_(1.0f, 0.0f), + hindi_image_(false) { +} + +Tesseract::~Tesseract() { + Clear(); +} + +void Tesseract::Clear() { +#ifdef HAVE_LIBLEPT + if (pix_binary_ != NULL) + pixDestroy(&pix_binary_); +#endif + deskew_ = FCOORD(1.0f, 0.0f); + reskew_ = FCOORD(1.0f, 0.0f); +} + +void Tesseract::SetBlackAndWhitelist() { + // Set the white and blacklists (if any) + unicharset.set_black_and_whitelist(tessedit_char_blacklist.string(), + tessedit_char_whitelist.string()); +} + +} // namespace tesseract diff --git a/ccmain/tesseractclass.h b/ccmain/tesseractclass.h new file mode 100644 index 000000000..8203a3675 --- /dev/null +++ b/ccmain/tesseractclass.h @@ -0,0 +1,541 @@ +/////////////////////////////////////////////////////////////////////// +// File: tesseractclass.h +// Description: An instance of Tesseract. For thread safety, *every* +// global variable goes in here, directly, or indirectly. +// Author: Ray Smith +// Created: Fri Mar 07 08:17:01 PST 2008 +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_CCMAIN_TESSERACTCLASS_H__ +#define TESSERACT_CCMAIN_TESSERACTCLASS_H__ + +#include "varable.h" +#include "wordrec.h" +#include "ocrclass.h" +#include "control.h" +#include "docqual.h" + +class CHAR_SAMPLES_LIST; +class CHAR_SAMPLE_LIST; +class PAGE_RES; +class PAGE_RES_IT; +class BLOCK_LIST; +class TO_BLOCK_LIST; +class IMAGE; +class WERD_RES; +class ROW; +class TBOX; +class SVMenuNode; +struct Pix; +class WERD_CHOICE; +class WERD; +class BLOB_CHOICE_LIST_CLIST; + + +// Top-level class for all tesseract global instance data. +// This class either holds or points to all data used by an instance +// of Tesseract, including the memory allocator. When this is +// complete, Tesseract will be thread-safe. UNTIL THEN, IT IS NOT! +// +// NOTE to developers: Do not create cyclic dependencies through this class! +// The directory dependency tree must remain a tree! The keep this clean, +// lower-level code (eg in ccutil, the bottom level) must never need to +// know about the content of a higher-level directory. +// The following scheme will grant the easiest access to lower-level +// global members without creating a cyclic dependency: +// ccmain inherits wordrec, includes textord as a member +// wordrec inherits classify +// classify inherits ccstruct, includes dict as a member +// ccstruct inherits c_util, includes image as a member +// c_util inherits cc_util +// textord has a pointer to ccstruct, but doesn't own it. +// dict has a pointer to ccstruct, but doesn't own it. +// +// NOTE: that each level contains members that correspond to global +// data that is defined (and used) at that level, not necessarily where +// the type is defined so for instance: +// BOOL_VAR (textord_show_blobs, FALSE, "Display unsorted blobs"); +// goes inside the Textord class, not the cc_util class. + +namespace tesseract { + +class Tesseract : public Wordrec { + public: + Tesseract(); + ~Tesseract(); + + void Clear(); + + // Simple accessors. + const FCOORD& reskew() const { + return reskew_; + } + // Destroy any existing pix and return a pointer to the pointer. + Pix** mutable_pix_binary() { + Clear(); + return &pix_binary_; + } + Pix* pix_binary() const { + return pix_binary_; + } + + void SetBlackAndWhitelist(); + int SegmentPage(const STRING* input_file, + IMAGE* image, BLOCK_LIST* blocks); + int AutoPageSeg(int width, int height, int resolution, + bool single_column, IMAGE* image, + BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); + + //// control.h ///////////////////////////////////////////////////////// + void recog_all_words( //process words + PAGE_RES *page_res, //page structure + //progress monitor + volatile ETEXT_DESC *monitor, + TBOX *target_word_box=0L, + inT16 dopasses=0 + ); + void classify_word_pass1( //recog one word + WERD_RES *word, //word to do + ROW *row, + BLOCK* block, + BOOL8 cluster_adapt, + CHAR_SAMPLES_LIST *char_clusters, + CHAR_SAMPLE_LIST *chars_waiting); + void recog_pseudo_word( //recognize blobs + BLOCK_LIST *block_list, //blocks to check + TBOX &selection_box); + + // This method returns all the blobs in the specified blocks. + // It's the caller's responsibility to destroy the returned list. + C_BLOB_LIST* get_blobs_from_blocks(BLOCK_LIST* blocks // blocks to look at. + ); + + // This method can be used to perform word-level training using box files. + // TODO: this can be modified to perform training in general case too. + void train_word_level_with_boxes( + const STRING& box_file, // File with boxes. + const STRING& out_file, // Output file. + BLOCK_LIST* blocks // Blocks to use. + ); + void fix_rep_char(WERD_RES *word); + void fix_quotes( //make double quotes + WERD_CHOICE *choice, //choice to fix + WERD *word, //word to do //char choices + BLOB_CHOICE_LIST_CLIST *blob_choices); + ACCEPTABLE_WERD_TYPE acceptable_word_string(const char *s, + const char *lengths); + void match_word_pass2( //recog one word + WERD_RES *word, //word to do + ROW *row, + BLOCK* block, + float x_height); + void classify_word_pass2( //word to do + WERD_RES *word, + BLOCK* block, + ROW *row); + BOOL8 recog_interactive( //recognize blobs + BLOCK *block, //block + ROW *row, //row of word + WERD *word //word to recognize + ); + void fix_hyphens( //crunch double hyphens + WERD_CHOICE *choice, //choice to fix + WERD *word, //word to do //char choices + BLOB_CHOICE_LIST_CLIST *blob_choices); + void set_word_fonts( + WERD_RES *word, // word to adapt to + BLOB_CHOICE_LIST_CLIST *blob_choices); // detailed results + void font_recognition_pass( //good chars in word + PAGE_RES_IT &page_res_it); + + //// output.h ////////////////////////////////////////////////////////// + + void output_pass( //Tess output pass //send to api + PAGE_RES_IT &page_res_it, + BOOL8 write_to_shm, + TBOX *target_word_box); + FILE *open_outfile( //open .map & .unlv file + const char *extension); + void write_results( //output a word + PAGE_RES_IT &page_res_it, //full info + char newline_type, //type of newline + BOOL8 force_eol, //override tilde crunch? + BOOL8 write_to_shm //send to api + ); + void set_unlv_suspects(WERD_RES *word); + UNICHAR_ID get_rep_char(WERD_RES *word); // what char is repeated? + BOOL8 acceptable_number_string(const char *s, + const char *lengths); + inT16 count_alphanums(const WERD_CHOICE &word); + inT16 count_alphas(const WERD_CHOICE &word); + //// tessedit.h //////////////////////////////////////////////////////// + void read_config_file(const char *filename, bool global_only); + int init_tesseract(const char *arg0, + const char *textbase, + const char *language, + char **configs, + int configs_size, + bool configs_global_only); + + int init_tesseract_lm(const char *arg0, + const char *textbase, + const char *language); + + // Initializes the tesseract classifier without loading language models. + int init_tesseract_classifier(const char *arg0, + const char *textbase, + const char *language, + char **configs, + int configs_size, + bool configs_global_only); + + void recognize_page(STRING& image_name); + void end_tesseract(); + + bool init_tesseract_lang_data(const char *arg0, + const char *textbase, + const char *language, + char **configs, + int configs_size, + bool configs_global_only); + + //// pgedit.h ////////////////////////////////////////////////////////// + SVMenuNode *build_menu_new(); + void pgeditor_main(BLOCK_LIST *blocks); + void process_image_event( // action in image win + const SVEvent &event); + void pgeditor_read_file( // of serialised file + STRING &filename, + BLOCK_LIST *blocks // block list to add to + ); + void do_new_source( // serialise + ); + BOOL8 process_cmd_win_event( // UI command semantics + inT32 cmd_event, // which menu item? + char *new_value // any prompt data + ); + //// reject.h ////////////////////////////////////////////////////////// + const char *char_ambiguities(char c); + void make_reject_map( //make rej map for wd //detailed results + WERD_RES *word, + BLOB_CHOICE_LIST_CLIST *blob_choices, + ROW *row, + inT16 pass //1st or 2nd? + ); + BOOL8 one_ell_conflict(WERD_RES *word_res, BOOL8 update_map); + inT16 first_alphanum_index(const char *word, + const char *word_lengths); + inT16 first_alphanum_offset(const char *word, + const char *word_lengths); + inT16 alpha_count(const char *word, + const char *word_lengths); + BOOL8 word_contains_non_1_digit(const char *word, + const char *word_lengths); + void dont_allow_1Il(WERD_RES *word); + inT16 count_alphanums( //how many alphanums + WERD_RES *word); + BOOL8 repeated_ch_string(const char *rep_ch_str, + const char *lengths); + void flip_0O(WERD_RES *word); + BOOL8 non_0_digit(UNICHAR_ID unichar_id); + BOOL8 non_O_upper(UNICHAR_ID unichar_id); + BOOL8 repeated_nonalphanum_wd(WERD_RES *word, ROW *row); + void nn_match_word( //Match a word + WERD_RES *word, + ROW *row); + void nn_recover_rejects(WERD_RES *word, ROW *row); + BOOL8 test_ambig_word( //test for ambiguity + WERD_RES *word); + void set_done( //set done flag + WERD_RES *word, + inT16 pass); + inT16 safe_dict_word(const WERD_CHOICE &word); + void flip_hyphens(WERD_RES *word); + //// adaptions.h /////////////////////////////////////////////////////// + void adapt_to_good_ems(WERD_RES *word, + CHAR_SAMPLES_LIST *char_clusters, + CHAR_SAMPLE_LIST *chars_waiting); + void adapt_to_good_samples(WERD_RES *word, + CHAR_SAMPLES_LIST *char_clusters, + CHAR_SAMPLE_LIST *chars_waiting); + BOOL8 word_adaptable( //should we adapt? + WERD_RES *word, + uinT16 mode); + void reject_suspect_ems(WERD_RES *word); + void collect_ems_for_adaption(WERD_RES *word, + CHAR_SAMPLES_LIST *char_clusters, + CHAR_SAMPLE_LIST *chars_waiting); + void collect_characters_for_adaption(WERD_RES *word, + CHAR_SAMPLES_LIST *char_clusters, + CHAR_SAMPLE_LIST *chars_waiting); + void check_wait_list(CHAR_SAMPLE_LIST *chars_waiting, + CHAR_SAMPLE *sample, + CHAR_SAMPLES *best_cluster); + void cluster_sample(CHAR_SAMPLE *sample, + CHAR_SAMPLES_LIST *char_clusters, + CHAR_SAMPLE_LIST *chars_waiting); + void complete_clustering(CHAR_SAMPLES_LIST *char_clusters, + CHAR_SAMPLE_LIST *chars_waiting); + + //// tfacepp.cpp /////////////////////////////////////////////////////// + WERD_CHOICE *recog_word_recursive( //recog one owrd + WERD *word, //word to do + DENORM *denorm, //de-normaliser + //matcher function + POLY_MATCHER matcher, + //tester function + POLY_TESTER tester, + //trainer function + POLY_TESTER trainer, + BOOL8 testing, //true if answer driven + //raw result + WERD_CHOICE *&raw_choice, + //list of blob lists + BLOB_CHOICE_LIST_CLIST *blob_choices, + WERD *&outword //bln word output + ); + WERD_CHOICE *recog_word( //recog one owrd + WERD *word, //word to do + DENORM *denorm, //de-normaliser + POLY_MATCHER matcher, //matcher function + POLY_TESTER tester, //tester function + POLY_TESTER trainer, //trainer function + BOOL8 testing, //true if answer driven + WERD_CHOICE *&raw_choice, //raw result + //list of blob lists + BLOB_CHOICE_LIST_CLIST *blob_choices, + WERD *&outword //bln word output + ); + WERD_CHOICE *split_and_recog_word( //recog one owrd + WERD *word, //word to do + DENORM *denorm, //de-normaliser + //matcher function + POLY_MATCHER matcher, + //tester function + POLY_TESTER tester, + //trainer function + POLY_TESTER trainer, + BOOL8 testing, //true if answer driven + //raw result + WERD_CHOICE *&raw_choice, + //list of blob lists + BLOB_CHOICE_LIST_CLIST *blob_choices, + WERD *&outword //bln word output + ); + //// fixspace.cpp /////////////////////////////////////////////////////// + BOOL8 digit_or_numeric_punct(WERD_RES *word, int char_position); + inT16 eval_word_spacing(WERD_RES_LIST &word_res_list); + void match_current_words(WERD_RES_LIST &words, ROW *row, BLOCK* block); + inT16 fp_eval_word_spacing(WERD_RES_LIST &word_res_list); + void fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK* block); + void fix_fuzzy_space_list( //space explorer + WERD_RES_LIST &best_perm, + ROW *row, + BLOCK* block); + void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK* block); + void fix_fuzzy_spaces( //find fuzzy words + volatile ETEXT_DESC *monitor, //progress monitor + inT32 word_count, //count of words in doc + PAGE_RES *page_res); + //// docqual.cpp //////////////////////////////////////////////////////// + GARBAGE_LEVEL garbage_word(WERD_RES *word, BOOL8 ok_dict_word); + BOOL8 potential_word_crunch(WERD_RES *word, + GARBAGE_LEVEL garbage_level, + BOOL8 ok_dict_word); + void tilde_crunch(PAGE_RES_IT &page_res_it); + void unrej_good_quality_words( //unreject potential + PAGE_RES_IT &page_res_it); + void doc_and_block_rejection( //reject big chunks + PAGE_RES_IT &page_res_it, + BOOL8 good_quality_doc); + void quality_based_rejection(PAGE_RES_IT &page_res_it, + BOOL8 good_quality_doc); + void convert_bad_unlv_chs(WERD_RES *word_res); + void merge_tess_fails(WERD_RES *word_res); + void tilde_delete(PAGE_RES_IT &page_res_it); + void insert_rej_cblobs(WERD_RES *word); + //// pagewalk.cpp /////////////////////////////////////////////////////// + void + process_selected_words ( + BLOCK_LIST * block_list, //blocks to check + //function to call + TBOX & selection_box, + BOOL8 (tesseract::Tesseract::*word_processor) ( + BLOCK *, + ROW *, + WERD *)); + //// tessbox.cpp /////////////////////////////////////////////////////// + void tess_add_doc_word( //test acceptability + WERD_CHOICE *word_choice //after context + ); + void tess_adapter( //adapt to word + WERD *word, //bln word + DENORM *denorm, //de-normalise + const WERD_CHOICE& choice, //string for word + const WERD_CHOICE& raw_choice, //before context + const char *rejmap //reject map + ); + WERD_CHOICE *test_segment_pass2( //recog one word + WERD *word, //bln word to do + DENORM *denorm, //de-normaliser + POLY_MATCHER matcher, //matcher function + POLY_TESTER tester, //tester function + //raw result + WERD_CHOICE *&raw_choice, + //list of blob lists + BLOB_CHOICE_LIST_CLIST *blob_choices, + WERD *&outword //bln word output + ); + WERD_CHOICE *tess_segment_pass1( //recog one word + WERD *word, //bln word to do + DENORM *denorm, //de-normaliser + POLY_MATCHER matcher, //matcher function + //raw result + WERD_CHOICE *&raw_choice, + //list of blob lists + BLOB_CHOICE_LIST_CLIST *blob_choices, + WERD *&outword //bln word output + ); + WERD_CHOICE *tess_segment_pass2( //recog one word + WERD *word, //bln word to do + DENORM *denorm, //de-normaliser + POLY_MATCHER matcher, //matcher function + //raw result + WERD_CHOICE *&raw_choice, + //list of blob lists + BLOB_CHOICE_LIST_CLIST *blob_choices, + WERD *&outword //bln word output + ); + WERD_CHOICE *correct_segment_pass2( //recog one word + WERD *word, //bln word to do + DENORM *denorm, //de-normaliser + POLY_MATCHER matcher, //matcher function + POLY_TESTER tester, //tester function + //raw result + WERD_CHOICE *&raw_choice, + //list of blob lists + BLOB_CHOICE_LIST_CLIST *blob_choices, + WERD *&outword //bln word output + ); + void tess_default_matcher( //call tess + PBLOB *pblob, //previous blob + PBLOB *blob, //blob to match + PBLOB *nblob, //next blob + WERD *word, //word it came from + DENORM *denorm, //de-normaliser + BLOB_CHOICE_LIST *ratings, //list of results + const char* script + ); + void tess_bn_matcher( //call tess + PBLOB *pblob, //previous blob + PBLOB *blob, //blob to match + PBLOB *nblob, //next blob + WERD *word, //word it came from + DENORM *denorm, //de-normaliser + BLOB_CHOICE_LIST *ratings //list of results + ); + void tess_cn_matcher( //call tess + PBLOB *pblob, //previous blob + PBLOB *blob, //blob to match + PBLOB *nblob, //next blob + WERD *word, //word it came from + DENORM *denorm, //de-normaliser + BLOB_CHOICE_LIST *ratings, //list of results + // Sorted array of CP_RESULT_STRUCT from class pruner. + CLASS_PRUNER_RESULTS cpresults + ); + BOOL8 tess_adaptable_word( //test adaptability + WERD *word, //word to test + WERD_CHOICE *word_choice, //after context + WERD_CHOICE *raw_choice //before context + ); + BOOL8 tess_acceptable_word( //test acceptability + WERD_CHOICE *word_choice, //after context + WERD_CHOICE *raw_choice //before context + ); + //// applybox.cpp ////////////////////////////////////////////////////// + void apply_box_testing(BLOCK_LIST *block_list); + void apply_boxes(const STRING& fname, + BLOCK_LIST *block_list //real blocks + ); + // converts an array of boxes to a block list + int Boxes2BlockList(int box_cnt, TBOX *boxes, BLOCK_LIST *block_list, + bool right2left); + //// blobcmp.cpp /////////////////////////////////////////////////////// + float compare_tess_blobs(TBLOB *blob1, + TEXTROW *row1, + TBLOB *blob2, + TEXTROW *row2); + //// paircmp.cpp /////////////////////////////////////////////////////// + float compare_bln_blobs( //match 2 blobs + PBLOB *blob1, //first blob + DENORM *denorm1, + PBLOB *blob2, //other blob + DENORM *denorm2); + float compare_blobs( //match 2 blobs + PBLOB *blob1, //first blob + ROW *row1, //row it came from + PBLOB *blob2, //other blob + ROW *row2); + BOOL8 compare_blob_pairs( //blob processor + BLOCK *, + ROW *row, //row it came from + WERD *, + PBLOB *blob //blob to compare + ); + //// fixxht.cpp /////////////////////////////////////////////////////// + void check_block_occ(WERD_RES *word_res); + + //// Data members /////////////////////////////////////////////////////// + BOOL_VAR_H(tessedit_resegment_from_boxes, false, + "Take segmentation and labeling from box file"); + BOOL_VAR_H(tessedit_train_from_boxes, false, + "Generate training data from boxed chars"); + BOOL_VAR_H(tessedit_dump_pageseg_images, false, + "Dump itermediate images made during page segmentation"); + INT_VAR_H(tessedit_pageseg_mode, 2, + "Page seg mode: 0=auto, 1=col, 2=block, 3=line, 4=word, 6=char" + " (Values from PageSegMode enum in baseapi.h)"); + INT_VAR_H(tessedit_accuracyvspeed, 0, + "Accuracy V Speed tradeoff: 0 fastest, 100 most accurate" + " (Values from AccuracyVSpeed enum in baseapi.h)"); + BOOL_VAR_H(tessedit_train_from_boxes_word_level, false, + "Generate training data from boxed chars at word level."); + STRING_VAR_H(tessedit_char_blacklist, "", + "Blacklist of chars not to recognize"); + STRING_VAR_H(tessedit_char_whitelist, "", + "Whitelist of chars to recognize"); + BOOL_VAR_H(global_tessedit_ambigs_training, false, + "Perform training for ambiguities"); + //// ambigsrecog.cpp ///////////////////////////////////////////////////////// + FILE *init_ambigs_training(const STRING &fname); + void ambigs_training_segmented(const STRING &fname, + PAGE_RES *page_res, + volatile ETEXT_DESC *monitor, + FILE *output_file); + void ambigs_classify_and_output(PAGE_RES_IT *page_res_it, + const char *label, + FILE *output_file); + private: + Pix* pix_binary_; + FCOORD deskew_; + FCOORD reskew_; + bool hindi_image_; +}; + +} // namespace tesseract + + +#endif // TESSERACT_CCMAIN_TESSERACTCLASS_H__ diff --git a/ccmain/thresholder.cpp b/ccmain/thresholder.cpp new file mode 100644 index 000000000..f647d2294 --- /dev/null +++ b/ccmain/thresholder.cpp @@ -0,0 +1,441 @@ +/////////////////////////////////////////////////////////////////////// +// File: thresholder.cpp +// Description: Base API for thresolding images in tesseract. +// Author: Ray Smith +// Created: Mon May 12 11:28:15 PDT 2008 +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +// Include automatically generated configuration file if running autoconf. +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#ifdef HAVE_LIBLEPT +// Include leptonica library only if autoconf (or makefile etc) tell us to. +#include "allheaders.h" +#endif + +#include "thresholder.h" + +#include + +#include "img.h" +#include "otsuthr.h" + +namespace tesseract { + +ImageThresholder::ImageThresholder() + : +#ifdef HAVE_LIBLEPT + pix_(NULL), +#endif + image_data_(NULL), + image_width_(0), image_height_(0), + image_bytespp_(0), image_bytespl_(0) { + SetRectangle(0, 0, 0, 0); +} + +ImageThresholder::~ImageThresholder() { + Clear(); +} + +// Destroy the Pix if there is one, freeing memory. +void ImageThresholder::Clear() { +#ifdef HAVE_LIBLEPT + if (pix_ != NULL) { + pixDestroy(&pix_); + pix_ = NULL; + } +#endif + image_data_ = NULL; +} + +// Return true if no image has been set. +bool ImageThresholder::IsEmpty() const { +#ifdef HAVE_LIBLEPT + if (pix_ != NULL) + return false; +#endif + return image_data_ == NULL; +} + +// SetImage makes a copy of only the metadata, not the underlying +// image buffer. It promises to treat the source as read-only in either case, +// but in return assumes that the Pix or image buffer remain valid +// throughout the life of the ImageThresholder. +// Greyscale of 8 and color of 24 or 32 bits per pixel may be given. +// Palette color images will not work properly and must be converted to +// 24 bit. +// Binary images of 1 bit per pixel may also be given but they must be +// byte packed with the MSB of the first byte being the first pixel, and a +// one pixel is WHITE. For binary images set bytes_per_pixel=0. +void ImageThresholder::SetImage(const unsigned char* imagedata, + int width, int height, + int bytes_per_pixel, int bytes_per_line) { +#ifdef HAVE_LIBLEPT + if (pix_ != NULL) + pixDestroy(&pix_); + pix_ = NULL; +#endif + image_data_ = imagedata; + image_width_ = width; + image_height_ = height; + image_bytespp_ = bytes_per_pixel; + image_bytespl_ = bytes_per_line; + Init(); +} + +// Store the coordinates of the rectangle to process for later use. +// Doesn't actually do any thresholding. +void ImageThresholder::SetRectangle(int left, int top, int width, int height) { + rect_left_ = left; + rect_top_ = top; + rect_width_ = width; + rect_height_ = height; +} + +// Get enough parameters to be able to rebuild bounding boxes in the +// original image (not just within the rectangle). +// Left and top are enough with top-down coordinates, but +// the height of the rectangle and the image are needed for bottom-up. +void ImageThresholder::GetImageSizes(int* left, int* top, + int* width, int* height, + int* imagewidth, int* imageheight) { + *left = rect_left_; + *top = rect_top_; + *width = rect_width_; + *height = rect_height_; + *imagewidth = image_width_; + *imageheight = image_height_; +} + +// Return true if HAVE_LIBLEPT and this thresholder implements the Pix +// interface. +bool ImageThresholder::HasThresholdToPix() const { +#ifdef HAVE_LIBLEPT + return true; +#else + return false; +#endif +} + +// Threshold the source image as efficiently as possible to the output +// tesseract IMAGE class. +void ImageThresholder::ThresholdToIMAGE(IMAGE* image) { +#ifdef HAVE_LIBLEPT + if (pix_ != NULL) { + if (image_bytespp_ == 0) { + // We have a binary image, so it just has to be converted. + CopyBinaryRectPixToIMAGE(image); + } else { + if (image_bytespp_ == 4) { + // Color data can just be passed direct. + const uinT32* data = pixGetData(pix_); + OtsuThresholdRectToIMAGE(reinterpret_cast(data), + image_bytespp_, image_bytespl_, image); + } else { + // Convert 8-bit to IMAGE and then pass its + // buffer to the raw interface to complete the conversion. + IMAGE temp_image; + temp_image.FromPix(pix_); + OtsuThresholdRectToIMAGE(temp_image.get_buffer(), + image_bytespp_, + COMPUTE_IMAGE_XDIM(temp_image.get_xsize(), + temp_image.get_bpp()), + image); + } + } + return; + } +#endif + if (image_bytespp_ > 0) { + // Threshold grey or color. + OtsuThresholdRectToIMAGE(image_data_, image_bytespp_, image_bytespl_, + image); + } else { + CopyBinaryRectRawToIMAGE(image); + } +} + +#ifdef HAVE_LIBLEPT +// NOTE: Opposite to SetImage for raw images, SetImage for Pix clones its +// input, so the source pix may be pixDestroyed immediately after. +void ImageThresholder::SetImage(const Pix* pix) { + image_data_ = NULL; + if (pix_ != NULL) + pixDestroy(&pix_); + Pix* src = const_cast(pix); + int depth; + pixGetDimensions(src, &image_width_, &image_height_, &depth); + // Convert the image as necessary so it is one of binary, plain RGB, or + // 8 bit with no colormap. + if (depth > 1 && depth < 8) { + pix_ = pixConvertTo8(src, false); + } else if (pixGetColormap(src)) { + pix_ = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC); + } else { + pix_ = pixClone(src); + } + depth = pixGetDepth(pix_); + image_bytespp_ = depth / 8; + image_bytespl_ = pixGetWpl(pix_) * sizeof(l_uint32); + Init(); +} + +// Threshold the source image as efficiently as possible to the output Pix. +// Creates a Pix and sets pix to point to the resulting pointer. +// Caller must use pixDestroy to free the created Pix. +void ImageThresholder::ThresholdToPix(Pix** pix) { + if (pix_ != NULL) { + if (image_bytespp_ == 0) { + // We have a binary image, so it just has to be cloned. + *pix = GetPixRect(); + } else { + if (image_bytespp_ == 4) { + // Color data can just be passed direct. + const uinT32* data = pixGetData(pix_); + OtsuThresholdRectToPix(reinterpret_cast(data), + image_bytespp_, image_bytespl_, pix); + } else { + // Convert 8-bit to IMAGE and then pass its + // buffer to the raw interface to complete the conversion. + IMAGE temp_image; + temp_image.FromPix(pix_); + OtsuThresholdRectToPix(temp_image.get_buffer(), + image_bytespp_, + COMPUTE_IMAGE_XDIM(temp_image.get_xsize(), + temp_image.get_bpp()), + pix); + } + } + return; + } + if (image_bytespp_ > 0) { + // Threshold grey or color. + OtsuThresholdRectToPix(image_data_, image_bytespp_, image_bytespl_, pix); + } else { + RawRectToPix(pix); + } +} + +// Get a clone/copy of the source image rectangle. +// The returned Pix must be pixDestroyed. +// This function will be used in the future by the page layout analysis, and +// the layout analysis that uses it will only be available with Leptonica, +// so there is no raw equivalent. +Pix* ImageThresholder::GetPixRect() { + if (pix_ != NULL) { + if (IsFullImage()) { + // Just clone the whole thing. + return pixClone(pix_); + } else { + // Crop to the given rectangle. + Box* box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_); + Pix* cropped = pixClipRectangle(pix_, box, NULL); + boxDestroy(&box); + return cropped; + } + } + // The input is raw, so we have to make a copy of it. + Pix* raw_pix; + RawRectToPix(&raw_pix); + return raw_pix; +} +#endif + +// Common initialization shared between SetImage methods. +void ImageThresholder::Init() { + SetRectangle(0, 0, image_width_, image_height_); +} + +// Otsu threshold the rectangle, taking everything except the image buffer +// pointer from the class, to the output IMAGE. +void ImageThresholder::OtsuThresholdRectToIMAGE(const unsigned char* imagedata, + int bytes_per_pixel, + int bytes_per_line, + IMAGE* image) const { + int* thresholds; + int* hi_values; + OtsuThreshold(imagedata, bytes_per_pixel, bytes_per_line, + rect_left_, rect_top_, rect_width_, rect_height_, + &thresholds, &hi_values); + + // Threshold the image to the given IMAGE. + ThresholdRectToIMAGE(imagedata, bytes_per_pixel, bytes_per_line, + thresholds, hi_values, image); + delete [] thresholds; + delete [] hi_values; +} + +// Threshold the given grey or color image into the tesseract global +// image ready for recognition. Requires thresholds and hi_value +// produced by OtsuThreshold in otsuthr.cpp. +void ImageThresholder::ThresholdRectToIMAGE(const unsigned char* imagedata, + int bytes_per_pixel, + int bytes_per_line, + const int* thresholds, + const int* hi_values, + IMAGE* image) const { + IMAGELINE line; + image->create(rect_width_, rect_height_, 1); + line.init(rect_width_); + // For each line in the image, fill the IMAGELINE class and put it into the + // output IMAGE. Note that Tesseract stores images with the + // bottom at y=0 and 0 is black, so we need 2 kinds of inversion. + const unsigned char* data = imagedata + rect_top_* bytes_per_line + + rect_left_ * bytes_per_pixel; + for (int y = rect_height_ - 1 ; y >= 0; --y) { + const unsigned char* pix = data; + for (int x = 0; x < rect_width_; ++x, pix += bytes_per_pixel) { + line.pixels[x] = 1; + for (int ch = 0; ch < bytes_per_pixel; ++ch) { + if (hi_values[ch] >= 0 && + (pix[ch] > thresholds[ch]) == (hi_values[ch] == 0)) { + line.pixels[x] = 0; + break; + } + } + } + image->put_line(0, y, rect_width_, &line, 0); + data += bytes_per_line; + } +} + +// Cut out the requested rectangle of the binary image to the output IMAGE. +void ImageThresholder::CopyBinaryRectRawToIMAGE(IMAGE* image) const { + IMAGE rect_image; + rect_image.capture(const_cast(image_data_), + image_width_, rect_top_ + rect_height_, 1); + image->create(rect_width_, rect_height_, 1); + // copy_sub_image uses coords starting at the bottom, so the y coord of the + // copy is the bottom of the rect_image. + copy_sub_image(&rect_image, rect_left_, 0, rect_width_, rect_height_, + image, 0, 0, false); +} + +#ifdef HAVE_LIBLEPT +// Otsu threshold the rectangle, taking everything except the image buffer +// pointer from the class, to the output Pix. +void ImageThresholder::OtsuThresholdRectToPix(const unsigned char* imagedata, + int bytes_per_pixel, + int bytes_per_line, + Pix** pix) const { + int* thresholds; + int* hi_values; + OtsuThreshold(imagedata, bytes_per_pixel, bytes_per_line, + rect_left_, rect_top_, rect_width_, rect_height_, + &thresholds, &hi_values); + + // Threshold the image to the given IMAGE. + ThresholdRectToPix(imagedata, bytes_per_pixel, bytes_per_line, + thresholds, hi_values, pix); + delete [] thresholds; + delete [] hi_values; +} + +// Threshold the rectangle, taking everything except the image buffer pointer +// from the class, using thresholds/hi_values to the output IMAGE. +void ImageThresholder::ThresholdRectToPix(const unsigned char* imagedata, + int bytes_per_pixel, + int bytes_per_line, + const int* thresholds, + const int* hi_values, + Pix** pix) const { + *pix = pixCreate(rect_width_, rect_height_, 1); + uinT32* pixdata = pixGetData(*pix); + int wpl = pixGetWpl(*pix); + const unsigned char* srcdata = imagedata + rect_top_* bytes_per_line + + rect_left_ * bytes_per_pixel; + for (int y = 0; y < rect_height_; ++y) { + const uinT8* linedata = srcdata; + uinT32* pixline = pixdata + y * wpl; + for (int x = 0; x < rect_width_; ++x, linedata += bytes_per_pixel) { + bool white_result = true; + for (int ch = 0; ch < bytes_per_pixel; ++ch) { + if (hi_values[ch] >= 0 && + (linedata[ch] > thresholds[ch]) == (hi_values[ch] == 0)) { + white_result = false; + break; + } + } + if (white_result) + CLEAR_DATA_BIT(pixline, x); + else + SET_DATA_BIT(pixline, x); + } + srcdata += bytes_per_line; + } +} + +// Copy the raw image rectangle, taking all data from the class, to the Pix. +void ImageThresholder::RawRectToPix(Pix** pix) const { + if (image_bytespp_ < 4) { + // Go via a tesseract image structure (doesn't copy the data) + // and use ToPix. + IMAGE image; + int bits_per_pixel = image_bytespp_ * 8; + if (image_bytespp_ == 0) + bits_per_pixel = 1; + image.capture(const_cast(image_data_), + image_width_, rect_top_ + rect_height_, bits_per_pixel); + if (IsFullImage()) { + *pix = image.ToPix(); + } else { + IMAGE rect; + rect.create(rect_width_, rect_height_, bits_per_pixel); + // The capture chopped the image off at top+height, so copy + // the rectangle with y = 0 to get a rectangle of height + // starting at the bottom, since copy_sub_image uses bottom-up coords. + copy_sub_image(&image, rect_left_, 0, rect_width_, rect_height_, + &rect, 0, 0, true); + *pix = rect.ToPix(); + } + } else { + *pix = pixCreate(rect_width_, rect_height_, 32); + uinT32* data = pixGetData(*pix); + int wpl = pixGetWpl(*pix); + const uinT8* imagedata = image_data_ + rect_top_ * image_bytespl_ + + rect_left_ * image_bytespp_; + for (int y = 0; y < rect_height_; ++y) { + const uinT8* linedata = imagedata; + uinT32* line = data + y * wpl; + for (int x = 0; x < rect_width_; ++x) { + line[x] = (linedata[0] << 24) | (linedata[1] << 16) | + (linedata[2] << 8) | linedata[3]; + linedata += 4; + } + imagedata += image_bytespl_; + } + } +} + +// Cut out the requested rectangle of the binary image to the output IMAGE. +void ImageThresholder::CopyBinaryRectPixToIMAGE(IMAGE* image) const { + if (IsFullImage()) { + // Just poke it directly into the tess image. + image->FromPix(pix_); + } else { + // Crop to the given rectangle. + Box* box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_); + Pix* cropped = pixClipRectangle(pix_, box, NULL); + image->FromPix(cropped); + pixDestroy(&cropped); + boxDestroy(&box); + } +} +#endif + +} // namespace tesseract. + diff --git a/ccmain/thresholder.h b/ccmain/thresholder.h new file mode 100644 index 000000000..69635df60 --- /dev/null +++ b/ccmain/thresholder.h @@ -0,0 +1,180 @@ +/////////////////////////////////////////////////////////////////////// +// File: thresholder.h +// Description: Base API for thresolding images in tesseract. +// Author: Ray Smith +// Created: Mon May 12 11:00:15 PDT 2008 +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_CCMAIN_THRESHOLDER_H__ +#define TESSERACT_CCMAIN_THRESHOLDER_H__ + +class IMAGE; +struct Pix; + +namespace tesseract { + +// Base class for all tesseract image thresholding classes. +// Specific classes can add new thresholding methods by +// overriding ThresholdToIMAGE and/or ThresholdToPix. +// Each instance deals with a single image, but the design is intended to +// be useful for multiple calls to SetRectangle and ThresholdTo* if +// desired. +class ImageThresholder { + public: + ImageThresholder(); + virtual ~ImageThresholder(); + + // Destroy the Pix if there is one, freeing memory. + virtual void Clear(); + + // Return true if no image has been set. + bool IsEmpty() const; + + // SetImage makes a copy of only the metadata, not the underlying + // image buffer. It promises to treat the source as read-only in either case, + // but in return assumes that the Pix or image buffer remain valid + // throughout the life of the ImageThresholder. + // Greyscale of 8 and color of 24 or 32 bits per pixel may be given. + // Palette color images will not work properly and must be converted to + // 24 bit. + // Binary images of 1 bit per pixel may also be given but they must be + // byte packed with the MSB of the first byte being the first pixel, and a + // one pixel is WHITE. For binary images set bytes_per_pixel=0. + void SetImage(const unsigned char* imagedata, int width, int height, + int bytes_per_pixel, int bytes_per_line); + + // Store the coordinates of the rectangle to process for later use. + // Doesn't actually do any thresholding. + void SetRectangle(int left, int top, int width, int height); + + // Get enough parameters to be able to rebuild bounding boxes in the + // original image (not just within the rectangle). + // Left and top are enough with top-down coordinates, but + // the height of the rectangle and the image are needed for bottom-up. + virtual void GetImageSizes(int* left, int* top, int* width, int* height, + int* imagewidth, int* imageheight); + + // Return true if HAVE_LIBLEPT and this thresholder implements the Pix + // interface. + virtual bool HasThresholdToPix() const; + + // Return true if the source image is color. + bool IsColor() const { + return image_bytespp_ >= 3; + } + + // Threshold the source image as efficiently as possible to the output + // tesseract IMAGE class. + virtual void ThresholdToIMAGE(IMAGE* image); + +#ifdef HAVE_LIBLEPT + // Pix vs raw, which to use? + // Implementations should provide the ability to source and target Pix + // where possible. A future version of Tesseract may choose to use Pix + // as its internal representation and discard IMAGE altogether. + // Because of that, an implementation that sources and targets Pix may end up + // with less copies than an implementation that does not. + // NOTE: Opposite to SetImage for raw images, SetImage for Pix clones its + // input, so the source pix may be pixDestroyed immediately after. + void SetImage(const Pix* pix); + + // Threshold the source image as efficiently as possible to the output Pix. + // Creates a Pix and sets pix to point to the resulting pointer. + // Caller must use pixDestroy to free the created Pix. + virtual void ThresholdToPix(Pix** pix); + + // Get a clone/copy of the source image rectangle. + // The returned Pix must be pixDestroyed. + // This function will be used in the future by the page layout analysis, and + // the layout analysis that uses it will only be available with Leptonica, + // so there is no raw equivalent. + Pix* GetPixRect(); +#endif + + protected: + // ---------------------------------------------------------------------- + // Utility functions that may be useful components for other thresholders. + + // Common initialization shared between SetImage methods. + virtual void Init(); + + // Return true if we are processing the full image. + bool IsFullImage() const { + return rect_left_ == 0 && rect_top_ == 0 && + rect_width_ == image_width_ && rect_height_ == image_height_; + } + + // Otsu threshold the rectangle, taking everything except the image buffer + // pointer from the class, to the output IMAGE. + void OtsuThresholdRectToIMAGE(const unsigned char* imagedata, + int bytes_per_pixel, int bytes_per_line, + IMAGE* image) const; + + // Threshold the rectangle, taking everything except the image buffer pointer + // from the class, using thresholds/hi_values to the output IMAGE. + void ThresholdRectToIMAGE(const unsigned char* imagedata, + int bytes_per_pixel, int bytes_per_line, + const int* thresholds, const int* hi_values, + IMAGE* image) const; + + // Cut out the requested rectangle of the source raw binary image to the + // output IMAGE. + void CopyBinaryRectRawToIMAGE(IMAGE* image) const; + +#ifdef HAVE_LIBLEPT + // Otsu threshold the rectangle, taking everything except the image buffer + // pointer from the class, to the output Pix. + void OtsuThresholdRectToPix(const unsigned char* imagedata, + int bytes_per_pixel, int bytes_per_line, + Pix** pix) const; + + // Threshold the rectangle, taking everything except the image buffer pointer + // from the class, using thresholds/hi_values to the output IMAGE. + void ThresholdRectToPix(const unsigned char* imagedata, + int bytes_per_pixel, int bytes_per_line, + const int* thresholds, const int* hi_values, + Pix** pix) const; + + // Copy the raw image rectangle, taking all data from the class, to the Pix. + void RawRectToPix(Pix** pix) const; + + // Cut out the requested rectangle of the binary image to the output IMAGE. + void CopyBinaryRectPixToIMAGE(IMAGE* image) const; +#endif + + protected: +#ifdef HAVE_LIBLEPT + // Clone or other copy of the source Pix. + // The pix will always be PixDestroy()ed on destruction of the class. + Pix* pix_; +#endif + // Exactly one of pix_ and image_data_ is not NULL. + const unsigned char* image_data_; // Raw source image. + + int image_width_; // Width of source image/pix. + int image_height_; // Height of source image/pix. + int image_bytespp_; // Bytes per pixel of source image/pix. + int image_bytespl_; // Bytes per line of source image/pix. + // Limits of image rectangle to be processed. + int rect_left_; + int rect_top_; + int rect_width_; + int rect_height_; +}; + +} // namespace tesseract. + +#endif // TESSERACT_CCMAIN_THRESHOLDER_H__ + diff --git a/ccmain/vecfuncs.cpp b/ccmain/vecfuncs.cpp new file mode 100644 index 000000000..33992e1c2 --- /dev/null +++ b/ccmain/vecfuncs.cpp @@ -0,0 +1,63 @@ +/* -*-C-*- + ******************************************************************************** + * + * File: vecfuncs.c (Formerly vecfuncs.c) + * Description: Blob definition + * Author: Mark Seaman, OCR Technology + * Created: Fri Oct 27 15:39:52 1989 + * Modified: Tue Jul 9 17:44:12 1991 (Mark Seaman) marks@hpgrlt + * Language: C + * Package: N/A + * Status: Experimental (Do Not Distribute) + * + * (c) Copyright 1989, Hewlett-Packard Company. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + ******************************************************************************** + * Revision 5.1 89/07/27 11:47:50 11:47:50 ray () + * Added ratings acces methods. + * This version ready for independent development. + */ +/*---------------------------------------------------------------------- + I n c l u d e s +----------------------------------------------------------------------*/ +#include "mfcpch.h" +#include "vecfuncs.h" + +/*---------------------------------------------------------------------- + F u n c t i o n s +----------------------------------------------------------------------*/ +/********************************************************************** + * direction + * + * Show if the line is going in the positive or negative X direction. + **********************************************************************/ +int direction(EDGEPT *point) { + int dir; /** direction to return **/ + EDGEPT *prev; /** prev point **/ + EDGEPT *next; /** next point **/ + + dir = 0; + prev = point->prev; + next = point->next; + + if (((prev->pos.x <= point->pos.x) && + (point->pos.x < next->pos.x)) || + ((prev->pos.x < point->pos.x) && (point->pos.x <= next->pos.x))) + dir = 1; + + if (((prev->pos.x >= point->pos.x) && + (point->pos.x > next->pos.x)) || + ((prev->pos.x > point->pos.x) && (point->pos.x >= next->pos.x))) + dir = -1; + + return dir; +} diff --git a/ccmain/vecfuncs.h b/ccmain/vecfuncs.h new file mode 100644 index 000000000..844d036f2 --- /dev/null +++ b/ccmain/vecfuncs.h @@ -0,0 +1,91 @@ +/* -*-C-*- + ******************************************************************************** + * + * File: vecfuncs.h (Formerly vecfuncs.h) + * Description: Vector calculations + * Author: Mark Seaman, OCR Technology + * Created: Wed Dec 20 09:37:18 1989 + * Modified: Tue Jul 9 17:44:37 1991 (Mark Seaman) marks@hpgrlt + * Language: C + * Package: N/A + * Status: Experimental (Do Not Distribute) + * + * (c) Copyright 1989, Hewlett-Packard Company. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + *********************************************************************************/ +#ifndef VECFUNCS_H +#define VECFUNCS_H + +#include "tessclas.h" +#include + +/*---------------------------------------------------------------------- + M a c r o s +----------------------------------------------------------------------*/ +/********************************************************************** + * point_diff + * + * Return the difference from point (p1) to point (p2). Put the value + * into point (p). + **********************************************************************/ + +#define point_diff(p,p1,p2) \ +((p).x = (p1).x - (p2).x, \ + (p).y = (p1).y - (p2).y, \ + (p)) + +/********************************************************************** + * CROSS + * + * cross product + **********************************************************************/ + +#define CROSS(a,b) \ +((a).x * (b).y - (a).y * (b).x) + +/********************************************************************** + * SCALAR + * + * scalar vector product + **********************************************************************/ + +#define SCALAR(a,b) \ +((a).x * (b).x + (a).y * (b).y) + +/********************************************************************** + * LENGTH + * + * length of vector + **********************************************************************/ + +#define LENGTH(a) \ +((a).x * (a).x + (a).y * (a).y) + +/*---------------------------------------------------------------------- + F u n c t i o n s +----------------------------------------------------------------------*/ +int direction(EDGEPT *point); + +/* +#if defined(__STDC__) || defined(__cplusplus) || MAC_OR_DOS +# define _ARGS(s) s +#else +# define _ARGS(s) () +#endif*/ + +/* vecfuncs.c +int direction + _ARGS((EDGEPT *point)); + +#undef _ARGS +*/ +#endif diff --git a/ccmain/werd.cpp b/ccmain/werd.cpp new file mode 100644 index 000000000..fde39974e --- /dev/null +++ b/ccmain/werd.cpp @@ -0,0 +1,967 @@ +/********************************************************************** + * File: werd.cpp (Formerly word.c) + * Description: Code for the WERD class. + * Author: Ray Smith + * Created: Tue Oct 08 14:32:12 BST 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "mfcpch.h" +#include "blckerr.h" +#include "linlsq.h" +#include "werd.h" + +#define FIRST_COLOUR ScrollView::RED //first rainbow colour + //last rainbow colour +#define LAST_COLOUR ScrollView::AQUAMARINE +#define CHILD_COLOUR ScrollView::BROWN //colour of children + +const ERRCODE CANT_SCALE_EDGESTEPS = +"Attempted to scale an edgestep format word"; + +#define EXTERN + +EXTERN BOOL_VAR (bln_numericmode, 0, "Optimize for numbers"); +EXTERN INT_VAR (bln_x_height, 128, "Baseline Normalisation X-height"); +EXTERN INT_VAR (bln_baseline_offset, 64, "Baseline Norm. offset of baseline"); +EXTERN double_VAR (bln_blshift_maxshift, -1.0, +"Fraction of xh before shifting"); +EXTERN double_VAR (bln_blshift_xfraction, 0.75, +"Size fraction of xh before shifting"); + +ELISTIZE_S (WERD) +/********************************************************************** + * WERD::WERD + * + * Constructor to build a WERD from a list of C_BLOBs. + * The C_BLOBs are not copied so the source list is emptied. + **********************************************************************/ +WERD::WERD ( //constructor +C_BLOB_LIST * blob_list, //in word order +uinT8 blank_count, //blanks in front +const char *text //correct text +): +flags (0), +correct(text) { + C_BLOB_IT start_it = blob_list;//iterator + C_BLOB_IT end_it = blob_list; //another + //rejected blobs in wd + C_BLOB_IT rej_cblob_it = &rej_cblobs; + C_OUTLINE_IT c_outline_it; //coutline iterator + BOOL8 blob_inverted; + BOOL8 reject_blob; + inT16 inverted_vote = 0; + inT16 non_inverted_vote = 0; + + while (!end_it.at_last ()) + end_it.forward (); //move to last + //move to our list + cblobs.assign_to_sublist (&start_it, &end_it); + blanks = blank_count; + /* + Set white on black flag for the WERD, moving any duff blobs onto the + rej_cblobs list. + First, walk the cblobs checking the inverse flag for each outline of each + cblob. If a cblob has inconsistent flag settings for its different + outlines, move the blob to the reject list. Otherwise, increment the + appropriate w-on-b or b-on-w vote for the word. + + Now set the inversion flag for the WERD by maximum vote. + + Walk the blobs again, moving any blob whose inversion flag does not agree + with the concencus onto the reject list. + */ + start_it.set_to_list (&cblobs); + if (start_it.empty ()) + return; + for (start_it.mark_cycle_pt (); + !start_it.cycled_list (); start_it.forward ()) { + c_outline_it.set_to_list (start_it.data ()->out_list ()); + blob_inverted = c_outline_it.data ()->flag (COUT_INVERSE); + reject_blob = FALSE; + for (c_outline_it.mark_cycle_pt (); + !c_outline_it.cycled_list () && !reject_blob; + c_outline_it.forward ()) { + reject_blob = + c_outline_it.data ()->flag (COUT_INVERSE) != blob_inverted; + } + if (reject_blob) + rej_cblob_it.add_after_then_move (start_it.extract ()); + else { + if (blob_inverted) + inverted_vote++; + else + non_inverted_vote++; + } + } + + flags.set_bit (W_INVERSE, (inverted_vote > non_inverted_vote)); + + start_it.set_to_list (&cblobs); + if (start_it.empty ()) + return; + for (start_it.mark_cycle_pt (); + !start_it.cycled_list (); start_it.forward ()) { + c_outline_it.set_to_list (start_it.data ()->out_list ()); + if (c_outline_it.data ()->flag (COUT_INVERSE) != flags.bit (W_INVERSE)) + rej_cblob_it.add_after_then_move (start_it.extract ()); + } +} + + +/********************************************************************** + * WERD::WERD + * + * Constructor to build a WERD from a list of BLOBs. + * The BLOBs are not copied so the source list is emptied. + **********************************************************************/ + +WERD::WERD ( //constructor +PBLOB_LIST * blob_list, //in word order +uinT8 blank_count, //blanks in front +const char *text //correct text +): +flags (0), +correct(text) { + PBLOB_IT start_it = blob_list; //iterator + PBLOB_IT end_it = blob_list; //another + + while (!end_it.at_last ()) + end_it.forward (); //move to last + ((PBLOB_LIST *) (&cblobs))->assign_to_sublist (&start_it, &end_it); + //move to our list + //it's a polygon + flags.set_bit (W_POLYGON, TRUE); + blanks = blank_count; + // fprintf(stderr,"Wrong constructor!!!!\n"); +} + + +/********************************************************************** + * WERD::WERD + * + * Constructor to build a WERD from a list of BLOBs. + * The BLOBs are not copied so the source list is emptied. + **********************************************************************/ + +WERD::WERD ( //constructor +PBLOB_LIST * blob_list, //in word order +WERD * clone //sorce of flags +):flags (clone->flags), correct (clone->correct) { + PBLOB_IT start_it = blob_list; //iterator + PBLOB_IT end_it = blob_list; //another + + while (!end_it.at_last ()) + end_it.forward (); //move to last + ((PBLOB_LIST *) (&cblobs))->assign_to_sublist (&start_it, &end_it); + //move to our list + blanks = clone->blanks; + // fprintf(stderr,"Wrong constructor!!!!\n"); +} + + +/********************************************************************** + * WERD::WERD + * + * Constructor to build a WERD from a list of C_BLOBs. + * The C_BLOBs are not copied so the source list is emptied. + **********************************************************************/ + +WERD::WERD ( //constructor +C_BLOB_LIST * blob_list, //in word order +WERD * clone //sorce of flags +):flags (clone->flags), correct (clone->correct) { + C_BLOB_IT start_it = blob_list;//iterator + C_BLOB_IT end_it = blob_list; //another + + while (!end_it.at_last ()) + end_it.forward (); //move to last + ((C_BLOB_LIST *) (&cblobs))->assign_to_sublist (&start_it, &end_it); + //move to our list + blanks = clone->blanks; + // fprintf(stderr,"Wrong constructor!!!!\n"); +} + + +/********************************************************************** + * WERD::poly_copy + * + * Make a copy of a WERD in polygon format. + * The source WERD is untouched. + **********************************************************************/ + +WERD *WERD::poly_copy( //make a poly copy + float xheight //row height + ) { + PBLOB *blob; //new blob + WERD *result = new WERD; //output word + C_BLOB_IT src_it = &cblobs; //iterator + // LARC_BLOB_IT larc_it=(LARC_BLOB_LIST*)(&cblobs); + PBLOB_IT dest_it = (PBLOB_LIST *) (&result->cblobs); + //another + + if (flags.bit (W_POLYGON)) { + *result = *this; //just copy it + } + else { + result->flags = flags; + result->correct = correct; //copy info + result->dummy = dummy; + if (!src_it.empty ()) { + // if (flags.bit(W_LINEARC)) + // { + // do + // { + // blob=new PBLOB; + // poly_linearc_outlines(larc_it.data()->out_list(), + // blob->out_list()); //convert outlines + // dest_it.add_after_then_move(blob); //add to dest list + // larc_it.forward(); + // } + // while (!larc_it.at_first()); + // } + // else + // { + do { + blob = new PBLOB (src_it.data (), xheight); + //convert blob + //add to dest list + dest_it.add_after_then_move (blob); + src_it.forward (); + } + while (!src_it.at_first ()); + // } + } + if (!rej_cblobs.empty ()) { + /* Polygonal approx of reject blobs */ + src_it.set_to_list (&rej_cblobs); + dest_it = (PBLOB_LIST *) (&result->rej_cblobs); + do { + //convert blob + blob = new PBLOB (src_it.data (), xheight); + //add to dest list + dest_it.add_after_then_move (blob); + src_it.forward (); + } + while (!src_it.at_first ()); + } + //polygon now + result->flags.set_bit (W_POLYGON, TRUE); + result->blanks = blanks; + } + return result; +} + + +/********************************************************************** + * WERD::bounding_box + * + * Return the bounding box of the WERD. + * This is quite a mess to compute! + * ORIGINALLY, REJECT CBLOBS WERE EXCLUDED, however, this led to bugs when the + * words on the row were re-sorted. The original words were built with reject + * blobs included. The FUZZY SPACE flags were set accordingly. If ALL the + * blobs in a word are rejected the BB for the word is NULL, causing the sort + * to screw up, leading to the erroneous possibility of the first word in a + * row being marked as FUZZY space. + **********************************************************************/ + +TBOX WERD::bounding_box() { //bounding box + TBOX box; //box being built + //rejected blobs in wd + C_BLOB_IT rej_cblob_it = &rej_cblobs; + + for (rej_cblob_it.mark_cycle_pt (); + !rej_cblob_it.cycled_list (); rej_cblob_it.forward ()) { + box += rej_cblob_it.data ()->bounding_box (); + } + + if (flags.bit (W_POLYGON)) { + //polygons + PBLOB_IT it = (PBLOB_LIST *) (&cblobs); + + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { + box += it.data ()->bounding_box (); + } + } + else { + C_BLOB_IT it = &cblobs; //blobs of WERD + + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { + box += it.data ()->bounding_box (); + } + } + return box; +} + + +/********************************************************************** + * WERD::move + * + * Reposition WERD by vector + * NOTE!! REJECT CBLOBS ARE NOT MOVED + **********************************************************************/ + +void WERD::move( // reposition WERD + const ICOORD vec // by vector + ) { + PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs); + // blob iterator + // LARC_BLOB_IT lblob_it((LARC_BLOB_LIST*)&cblobs); + C_BLOB_IT cblob_it(&cblobs); // cblob iterator + + if (flags.bit (W_POLYGON)) + for (blob_it.mark_cycle_pt (); + !blob_it.cycled_list (); blob_it.forward ()) + blob_it.data ()->move (vec); + // else if (flags.bit(W_LINEARC)) + // for( lblob_it.mark_cycle_pt(); + // !lblob_it.cycled_list(); + // lblob_it.forward() ) + // lblob_it.data()->move( vec ); + else + for (cblob_it.mark_cycle_pt (); + !cblob_it.cycled_list (); cblob_it.forward ()) + cblob_it.data ()->move (vec); +} + + +/********************************************************************** + * WERD::scale + * + * Scale WERD by multiplier + **********************************************************************/ + +void WERD::scale( // scale WERD + const float f // by multiplier + ) { + PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs); + // blob iterator + // LARC_BLOB_IT lblob_it((LARC_BLOB_LIST*)&cblobs); + + if (flags.bit (W_POLYGON)) + for (blob_it.mark_cycle_pt (); + !blob_it.cycled_list (); blob_it.forward ()) + blob_it.data ()->scale (f); + // else if (flags.bit(W_LINEARC)) + // for (lblob_it.mark_cycle_pt(); + // !lblob_it.cycled_list(); + // lblob_it.forward() ) + // lblob_it.data()->scale( f ); + else + CANT_SCALE_EDGESTEPS.error ("WERD::scale", ABORT, NULL); +} + + +/********************************************************************** + * WERD::join_on + * + * Join other word onto this one. Delete the old word. + **********************************************************************/ + +void WERD::join_on( // join WERD + WERD *&other //other word + ) { + PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs); + // blob iterator + PBLOB_IT src_it ((PBLOB_LIST *) & other->cblobs); + C_BLOB_IT rej_cblob_it(&rej_cblobs); + C_BLOB_IT src_rej_it (&other->rej_cblobs); + + while (!src_it.empty ()) { + blob_it.add_to_end (src_it.extract ()); + src_it.forward (); + } + while (!src_rej_it.empty ()) { + rej_cblob_it.add_to_end (src_rej_it.extract ()); + src_rej_it.forward (); + } +} + + +/********************************************************************** + * WERD::copy_on + * + * Copy blobs from other word onto this one. + **********************************************************************/ + +void WERD::copy_on( //copy blobs + WERD *&other //from other + ) { + if (flags.bit (W_POLYGON)) { + PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs); + // blob iterator + PBLOB_LIST blobs; + + blobs.deep_copy(reinterpret_cast(&other->cblobs), + &PBLOB::deep_copy); + blob_it.move_to_last(); + blob_it.add_list_after(&blobs); + } else { + C_BLOB_IT c_blob_it(&cblobs); + C_BLOB_LIST c_blobs; + + c_blobs.deep_copy(&other->cblobs, &C_BLOB::deep_copy); + c_blob_it.move_to_last (); + c_blob_it.add_list_after (&c_blobs); + } + if (!other->rej_cblobs.empty ()) { + C_BLOB_IT rej_c_blob_it(&rej_cblobs); + C_BLOB_LIST new_rej_c_blobs; + + new_rej_c_blobs.deep_copy(&other->rej_cblobs, &C_BLOB::deep_copy); + rej_c_blob_it.move_to_last (); + rej_c_blob_it.add_list_after (&new_rej_c_blobs); + } +} + + +/********************************************************************** + * WERD::baseline_normalise + * + * Baseline Normalise the word in Tesseract style. (I.e origin at centre of + * word at bottom. x-height region scaled to region y = + * (bln_baseline_offset)..(bln_baseline_offset + bln_x_height) + * - usually 64..192) + **********************************************************************/ + +void WERD::baseline_normalise( // Tess style BL Norm + ROW *row, + DENORM *denorm //antidote + ) { + baseline_normalise_x (row, row->x_height (), denorm); + //Use standard x ht +} + + +/********************************************************************** + * WERD::baseline_normalise_x + * + * Baseline Normalise the word in Tesseract style. (I.e origin at centre of + * word at bottom. x-height region scaled to region y = + * (bln_baseline_offset)..(bln_baseline_offset + bln_x_height) + * - usually 64..192) + * USE A SPECIFIED X-HEIGHT - NOT NECESSARILY THE ONE IN row + **********************************************************************/ + +void WERD::baseline_normalise_x( // Tess style BL Norm + ROW *row, + float x_height, //non standard value + DENORM *denorm //antidote + ) { + BOOL8 using_row; //as baseline + float blob_x_centre; //middle of blob + float blob_offset; //bottom miss + float top_offset; //top miss + float blob_x_height; //xh for this blob + inT16 segments; //no of segments + inT16 segment; //current segment + DENORM_SEG *segs; //array of segments + float mean_x; //mean xheight + inT32 x_count; //no of xs + TBOX word_box = bounding_box ();//word bounding box + TBOX blob_box; //blob bounding box + PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs); + // blob iterator + PBLOB *blob; + LLSQ line; //fitted line + double line_m, line_c; //fitted line + //inverse norm + DENORM antidote (word_box.left () + + + (word_box.right () - word_box.left ()) / 2.0, + bln_x_height / x_height, row); + + if (!flags.bit (W_POLYGON)) { + WRONG_WORD.error ("WERD::baseline_normalise", ABORT, + "Need to poly approx"); + } + + if (flags.bit (W_NORMALIZED)) { + WRONG_WORD.error ("WERD::baseline_normalise", ABORT, + "Baseline unnormalised"); + } + + if (bln_numericmode) { + segs = new DENORM_SEG[blob_it.length ()]; + segments = 0; + float factor; // For scaling to baseline normalised size. + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); + blob_it.forward ()) { + blob = blob_it.data (); + blob_box = blob->bounding_box (); + blob->move (FCOORD (-antidote.origin (), + -blob_box.bottom ())); + factor = bln_x_height * 4.0f / (3 * blob_box.height ()); + // Constrain the scale factor as target numbers should be either + // cap height already or xheight. + if (factor < antidote.scale()) + factor = antidote.scale(); + else if (factor > antidote.scale() * 1.5f) + factor = antidote.scale() * 1.5f; + blob->scale (factor); + blob->move (FCOORD (0.0, bln_baseline_offset)); + segs[segments].xstart = blob->bounding_box().left(); + segs[segments].ycoord = blob_box.bottom(); + segs[segments++].scale_factor = factor; + } + antidote = DENORM (antidote.origin (), antidote.scale (), + 0.0f, 0.0f, segments, segs, true, row); + delete [] segs; + + //Repeat for rej blobs + blob_it.set_to_list ((PBLOB_LIST *) & rej_cblobs); + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); + blob_it.forward ()) { + blob = blob_it.data (); + blob_box = blob->bounding_box (); + blob->move (FCOORD (-antidote.origin (), + -blob_box.bottom ())); + blob->scale (bln_x_height * 4.0f / (3 * blob_box.height ())); + blob->move (FCOORD (0.0, bln_baseline_offset)); + } + } + else if (bln_blshift_maxshift < 0) { + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); + blob_it.forward ()) { + blob = blob_it.data (); + blob_box = blob->bounding_box (); + blob_x_centre = blob_box.left () + + (blob_box.right () - blob_box.left ()) / 2.0; + blob->move (FCOORD (-antidote.origin (), + -(row->base_line (blob_x_centre)))); + blob->scale (antidote.scale ()); + blob->move (FCOORD (0.0, bln_baseline_offset)); + } + + //Repeat for rej blobs + blob_it.set_to_list ((PBLOB_LIST *) & rej_cblobs); + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); + blob_it.forward ()) { + blob = blob_it.data (); + blob_box = blob->bounding_box (); + blob_x_centre = blob_box.left () + + (blob_box.right () - blob_box.left ()) / 2.0; + blob->move (FCOORD (-antidote.origin (), + -(row->base_line (blob_x_centre)))); + blob->scale (antidote.scale ()); + blob->move (FCOORD (0.0, bln_baseline_offset)); + } + + } + else { + mean_x = x_height; + x_count = 1; + segs = new DENORM_SEG[blob_it.length ()]; + segments = 0; + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); + blob_it.forward ()) { + blob = blob_it.data (); + blob_box = blob->bounding_box (); + if (blob_box.height () > bln_blshift_xfraction * x_height) { + blob_x_centre = blob_box.left () + + (blob_box.right () - blob_box.left ()) / 2.0; + blob_offset = + blob_box.bottom () - row->base_line (blob_x_centre); + top_offset = blob_offset + blob_box.height () - x_height - 1; + blob_x_height = top_offset + x_height; + if (top_offset < 0) + top_offset = -top_offset; + if (blob_offset < 0) + blob_offset = -blob_offset; + if (blob_offset < bln_blshift_maxshift * x_height) { + segs[segments].ycoord = blob_box.bottom (); + line.add (blob_x_centre, blob_box.bottom ()); + if (top_offset < bln_blshift_maxshift * x_height) { + segs[segments].scale_factor = blob_box.height () - 1.0f; + x_count++; + } + else + segs[segments].scale_factor = 0.0f; + //fix it later + } + else { + //not a goer + segs[segments].ycoord = -MAX_INT32; + if (top_offset < bln_blshift_maxshift * x_height) { + segs[segments].scale_factor = blob_x_height; + x_count++; + } + else + segs[segments].scale_factor = 0.0f; + //fix it later + } + } + else { + segs[segments].scale_factor = 0.0f; + segs[segments].ycoord = -MAX_INT32; + } + segs[segments].xstart = blob_box.left (); + segments++; + } + using_row = line.count () <= 1; + if (!using_row) { + line_m = line.m (); + line_c = line.c (line_m); + } + else + line_m = line_c = 0; + segments = 0; + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); + blob_it.forward ()) { + blob = blob_it.data (); + blob_box = blob->bounding_box (); + blob_x_centre = blob_box.left () + + (blob_box.right () - blob_box.left ()) / 2.0; + if (segs[segments].ycoord == -MAX_INT32 + && segs[segments].scale_factor != 0 && !using_row) { + blob_offset = line_m * blob_x_centre + line_c; + segs[segments].scale_factor = blob_box.top () - blob_offset; + } + if (segs[segments].scale_factor != 0) + mean_x += segs[segments].scale_factor; + segments++; + } + mean_x /= x_count; + // printf("mean x=%g, count=%d, line_m=%g, line_c=%g\n", + // mean_x,x_count,line_m,line_c); + segments = 0; + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); + blob_it.forward ()) { + blob = blob_it.data (); + blob_box = blob->bounding_box (); + blob_x_centre = blob_box.left () + + (blob_box.right () - blob_box.left ()) / 2.0; + if (segs[segments].ycoord != -MAX_INT32) + blob_offset = (float) segs[segments].ycoord; + else if (using_row) + blob_offset = row->base_line (blob_x_centre); + else + blob_offset = line_m * blob_x_centre + line_c; + if (segs[segments].scale_factor == 0) + segs[segments].scale_factor = mean_x; + segs[segments].scale_factor = + bln_x_height / segs[segments].scale_factor; + // printf("Blob sf=%g, top=%d, bot=%d, base=%g\n", + // segs[segments].scale_factor,blob_box.top(), + // blob_box.bottom(),blob_offset); + blob->move (FCOORD (-antidote.origin (), -blob_offset)); + blob-> + scale (FCOORD (antidote.scale (), segs[segments].scale_factor)); + blob->move (FCOORD (0.0, bln_baseline_offset)); + segments++; + } + + //Repeat for rej blobs + blob_it.set_to_list ((PBLOB_LIST *) & rej_cblobs); + segment = 0; + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); + blob_it.forward ()) { + blob = blob_it.data (); + blob_box = blob->bounding_box (); + blob_x_centre = blob_box.left () + + (blob_box.right () - blob_box.left ()) / 2.0; + while (segment < segments - 1 + && segs[segment + 1].xstart <= blob_x_centre) + segment++; + if (segs[segment].ycoord != -MAX_INT32) + blob_offset = (float) segs[segment].ycoord; + else if (using_row) + blob_offset = row->base_line (blob_x_centre); + else + blob_offset = line_m * blob_x_centre + line_c; + blob->move (FCOORD (-antidote.origin (), -blob_offset)); + blob-> + scale (FCOORD (antidote.scale (), segs[segment].scale_factor)); + blob->move (FCOORD (0.0, bln_baseline_offset)); + } + if (line.count () > 0 || x_count > 1) + antidote = DENORM (antidote.origin (), antidote.scale (), + line_m, line_c, segments, segs, using_row, row); + delete[]segs; + } + if (denorm != NULL) + *denorm = antidote; + //it's normalised + flags.set_bit (W_NORMALIZED, TRUE); +} + + +/********************************************************************** + * WERD::baseline_denormalise + * + * Baseline DeNormalise the word in Tesseract style. (I.e origin at centre of + * word at bottom. x-height region scaled to region y = + * (bln_baseline_offset)..(bln_baseline_offset + bln_x_height) + * - usually 64..192) + **********************************************************************/ + +void WERD::baseline_denormalise( // Tess style BL Norm + const DENORM *denorm //antidote + ) { + PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs); + // blob iterator + PBLOB *blob; + + if (!flags.bit (W_NORMALIZED)) { + WRONG_WORD.error ("WERD::baseline_denormalise", ABORT, + "Baseline normalised"); + } + + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { + blob = blob_it.data (); + //denormalise it + blob->baseline_denormalise (denorm); + } + + //Repeat for rej blobs + blob_it.set_to_list ((PBLOB_LIST *) & rej_cblobs); + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { + blob = blob_it.data (); + //denormalise it + blob->baseline_denormalise (denorm); + } + + //it's not normalised + flags.set_bit (W_NORMALIZED, FALSE); +} + + +/********************************************************************** + * WERD::print + * + * Display members + **********************************************************************/ + +void WERD::print( //print + FILE * //file to print on + ) { + tprintf ("Blanks= %d\n", blanks); + bounding_box ().print (); + tprintf ("Flags = %d = 0%o\n", flags.val, flags.val); + tprintf (" W_SEGMENTED = %s\n", + flags.bit (W_SEGMENTED) ? "TRUE" : "FALSE "); + tprintf (" W_ITALIC = %s\n", flags.bit (W_ITALIC) ? "TRUE" : "FALSE "); + tprintf (" W_BOL = %s\n", flags.bit (W_BOL) ? "TRUE" : "FALSE "); + tprintf (" W_EOL = %s\n", flags.bit (W_EOL) ? "TRUE" : "FALSE "); + tprintf (" W_NORMALIZED = %s\n", + flags.bit (W_NORMALIZED) ? "TRUE" : "FALSE "); + tprintf (" W_POLYGON = %s\n", flags.bit (W_POLYGON) ? "TRUE" : "FALSE "); + tprintf (" W_LINEARC = %s\n", flags.bit (W_LINEARC) ? "TRUE" : "FALSE "); + tprintf (" W_DONT_CHOP = %s\n", + flags.bit (W_DONT_CHOP) ? "TRUE" : "FALSE "); + tprintf (" W_REP_CHAR = %s\n", + flags.bit (W_REP_CHAR) ? "TRUE" : "FALSE "); + tprintf (" W_FUZZY_SP = %s\n", + flags.bit (W_FUZZY_SP) ? "TRUE" : "FALSE "); + tprintf (" W_FUZZY_NON = %s\n", + flags.bit (W_FUZZY_NON) ? "TRUE" : "FALSE "); + tprintf ("Correct= %s\n", correct.string ()); + tprintf ("Rejected cblob count = %d\n", rej_cblobs.length ()); +} + + +/********************************************************************** + * WERD::plot + * + * Draw the WERD in the given colour. + **********************************************************************/ + +#ifndef GRAPHICS_DISABLED +void WERD::plot( //draw it + ScrollView* window, //window to draw in + ScrollView::Color colour, //colour to draw in + BOOL8 solid //draw larcs solid + ) { + if (flags.bit (W_POLYGON)) { + //polygons + PBLOB_IT it = (PBLOB_LIST *) (&cblobs); + + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { + it.data ()->plot (window, colour, colour); + } + } + // else if (flags.bit(W_LINEARC)) + // { + // LARC_BLOB_IT it=(LARC_BLOB_LIST*)(&cblobs); + + // for ( it.mark_cycle_pt(); !it.cycled_list(); it.forward() ) + // { + // it.data()->plot(window,solid,colour,solid ? BLACK : colour); + // } + // } + else { + C_BLOB_IT it = &cblobs; //blobs of WERD + + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { + it.data ()->plot (window, colour, colour); + } + } + plot_rej_blobs(window, solid); +} +#endif + + +/********************************************************************** + * WERD::plot + * + * Draw the WERD in rainbow colours. + **********************************************************************/ + +#ifndef GRAPHICS_DISABLED +void WERD::plot( //draw it + ScrollView* window, //window to draw in + BOOL8 solid //draw larcs solid + ) { + ScrollView::Color colour = FIRST_COLOUR; //current colour + if (flags.bit (W_POLYGON)) { + //polygons + PBLOB_IT it = (PBLOB_LIST *) (&cblobs); + + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { + it.data ()->plot (window, colour, CHILD_COLOUR); + colour = (ScrollView::Color) (colour + 1); + if (colour == LAST_COLOUR) + colour = FIRST_COLOUR; //cycle round + } + } + // else if (flags.bit(W_LINEARC)) + // { + // LARC_BLOB_IT it=(LARC_BLOB_LIST*)(&cblobs); + + // for ( it.mark_cycle_pt(); !it.cycled_list(); it.forward() ) + // { + // it.data()->plot(window,solid,colour,solid ? BLACK : CHILD_COLOUR); + // colour=(COLOUR)(colour+1); + // if (colour==LAST_COLOUR) + // colour=FIRST_COLOUR; + // } + // } + else { + C_BLOB_IT it = &cblobs; //blobs of WERD + + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { + it.data ()->plot (window, colour, CHILD_COLOUR); + colour = (ScrollView::Color) (colour + 1); + if (colour == LAST_COLOUR) + colour = FIRST_COLOUR; //cycle round + } + } + plot_rej_blobs(window, solid); +} +#endif + + +/********************************************************************** + * WERD::plot_rej_blobs + * + * Draw the WERD rejected blobs - ALWAYS GREY + **********************************************************************/ + +#ifndef GRAPHICS_DISABLED +void WERD::plot_rej_blobs( //draw it + ScrollView* window, //window to draw in + BOOL8 solid //draw larcs solid + ) { + if (flags.bit (W_POLYGON)) { + PBLOB_IT it = (PBLOB_LIST *) (&rej_cblobs); + //polygons + + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { + it.data ()->plot (window, ScrollView::GREY, ScrollView::GREY); + } + } else { + C_BLOB_IT it = &rej_cblobs; //blobs of WERD + + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { + it.data ()->plot (window, ScrollView::GREY, ScrollView::GREY); + } + } +} +#endif + + +/********************************************************************** + * WERD::shallow_copy() + * + * Make a shallow copy of a word + **********************************************************************/ + +WERD *WERD::shallow_copy() { //shallow copy + WERD *new_word = new WERD; + + new_word->blanks = blanks; + new_word->flags = flags; + new_word->dummy = dummy; + new_word->correct = correct; + return new_word; +} + + +/********************************************************************** + * WERD::operator= + * + * Assign a word, DEEP copying the blob list + **********************************************************************/ + +WERD & WERD::operator= ( //assign words +const WERD & source //from this +) { + this->ELIST_LINK::operator= (source); + blanks = source.blanks; + flags = source.flags; + dummy = source.dummy; + correct = source.correct; + if (flags.bit (W_POLYGON)) { + if (!cblobs.empty()) + reinterpret_cast(&cblobs)->clear(); + reinterpret_cast(&cblobs)->deep_copy( + reinterpret_cast(&source.cblobs), &PBLOB::deep_copy); + + if (!rej_cblobs.empty()) + reinterpret_cast(&rej_cblobs)->clear(); + reinterpret_cast(&rej_cblobs)->deep_copy( + reinterpret_cast(&source.rej_cblobs), + &PBLOB::deep_copy); + } else { + if (!cblobs.empty ()) + cblobs.clear (); + cblobs.deep_copy(&source.cblobs, &C_BLOB::deep_copy); + + if (!rej_cblobs.empty ()) + rej_cblobs.clear (); + rej_cblobs.deep_copy(&source.rej_cblobs, &C_BLOB::deep_copy); + } + return *this; +} + + +/********************************************************************** + * word_comparator() + * + * word comparator used to sort a word list so that words are in increasing + * order of left edge. + **********************************************************************/ + +int word_comparator( //sort blobs + const void *word1p, //ptr to ptr to word1 + const void *word2p //ptr to ptr to word2 + ) { + WERD * + word1 = *(WERD **) word1p; + WERD * + word2 = *(WERD **) word2p; + + return word1->bounding_box ().left () - word2->bounding_box ().left (); +} diff --git a/ccmain/werd.h b/ccmain/werd.h new file mode 100644 index 000000000..04f33105e --- /dev/null +++ b/ccmain/werd.h @@ -0,0 +1,277 @@ +/********************************************************************** + * File: word.c + * Description: Code for the WERD class. + * Author: Ray Smith + * Created: Tue Oct 08 14:32:12 BST 1991 + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef WERD_H +#define WERD_H + +#include "varable.h" +#include "bits16.h" +#include "strngs.h" +#include "blckerr.h" +#include "stepblob.h" +#include "polyblob.h" +//#include "larcblob.h" + +enum WERD_FLAGS +{ + W_SEGMENTED, //correctly segmented + W_ITALIC, //italic text + W_BOLD, //bold text + W_BOL, //start of line + W_EOL, //end of line + W_NORMALIZED, //flags + W_POLYGON, //approximation + W_LINEARC, //linearc approx + W_DONT_CHOP, //fixed pitch chopped + W_REP_CHAR, //repeated character + W_FUZZY_SP, //fuzzy space + W_FUZZY_NON, //fuzzy nonspace + W_INVERSE //white on black +}; + +enum DISPLAY_FLAGS +{ + /* Display flags bit number allocations */ + DF_BOX, //Bounding box + DF_TEXT, //Correct ascii + DF_POLYGONAL, //Polyg approx + DF_EDGE_STEP, //Edge steps + DF_BN_POLYGONAL //BL normalisd polyapx +}; + +class ROW; //forward decl + +class WERD:public ELIST_LINK +{ + public: + WERD() { + } //empty constructor + WERD( //constructor + C_BLOB_LIST *blob_list, //blobs in word + uinT8 blanks, //blanks in front + const char *text); //correct text + WERD( //constructor + PBLOB_LIST *blob_list, //blobs in word + uinT8 blanks, //blanks in front + const char *text); //correct text + WERD( //constructor + PBLOB_LIST *blob_list, //blobs in word + WERD *clone); //use these flags etc. + WERD( //constructor + C_BLOB_LIST *blob_list, //blobs in word + WERD *clone); //use these flags etc. + ~WERD () { //destructor + if (flags.bit (W_POLYGON)) { + //use right destructor + ((PBLOB_LIST *) & cblobs)->clear (); + //use right destructor + ((PBLOB_LIST *) & rej_cblobs)->clear (); + } + // else if (flags.bit(W_LINEARC)) + // ((LARC_BLOB_LIST*)&cblobs)->clear(); //use right destructor + } + + WERD *poly_copy( //make copy as poly + float xheight); //row xheight + WERD *larc_copy( //make copy as larc + float xheight); //row xheight + + //get DUFF compact blobs + C_BLOB_LIST *rej_cblob_list() { + if (flags.bit (W_POLYGON)) + WRONG_WORD.error ("WERD::rej_cblob_list", ABORT, NULL); + return &rej_cblobs; + } + + //get DUFF poly blobs + PBLOB_LIST *rej_blob_list() { + if (!flags.bit (W_POLYGON)) + WRONG_WORD.error ("WERD::rej_blob_list", ABORT, NULL); + return (PBLOB_LIST *) (&rej_cblobs); + } + + C_BLOB_LIST *cblob_list() { //get compact blobs + if (flags.bit (W_POLYGON) || flags.bit (W_LINEARC)) + WRONG_WORD.error ("WERD::cblob_list", ABORT, NULL); + return &cblobs; + } + PBLOB_LIST *blob_list() { //get poly blobs + if (!flags.bit (W_POLYGON)) + WRONG_WORD.error ("WERD::blob_list", ABORT, NULL); + //make it right type + return (PBLOB_LIST *) (&cblobs); + } + // LARC_BLOB_LIST *larc_blob_list() //get poly blobs + // { + // if (!flags.bit(W_LINEARC)) + // WRONG_WORD.error("WERD::larc_blob_list",ABORT,NULL); + // return (LARC_BLOB_LIST*)(&cblobs); //make it right type + // } + PBLOB_LIST *gblob_list() { //get generic blobs + //make it right type + return (PBLOB_LIST *) (&cblobs); + } + + const char *text() const { //correct text + return correct.string (); + } + uinT8 space() { //access function + return blanks; + } + void set_blanks( //set blanks + uinT8 new_blanks) { + blanks = new_blanks; + } + + void set_text( //replace correct text + const char *new_text) { //with this + correct = new_text; + } + + TBOX bounding_box(); //compute bounding box + + BOOL8 flag( //test flag + WERD_FLAGS mask) const { //flag to test + return flags.bit (mask); + } + void set_flag( //set flag value + WERD_FLAGS mask, //flag to test + BOOL8 value) { //value to set + flags.set_bit (mask, value); + } + + BOOL8 display_flag( //test display flag + uinT8 flag) const { //flag to test + return disp_flags.bit (flag); + } + + void set_display_flag( //set display flag + uinT8 flag, //flag to set + BOOL8 value) { //value to set + disp_flags.set_bit (flag, value); + } + + WERD *shallow_copy(); //shallow copy word + + void move( // reposition word + const ICOORD vec); // by vector + + void scale( // scale word + const float vec); // by multiplier + + void join_on( //append word + WERD *&other); //Deleting other + + void copy_on( //copy blobs + WERD *&other); //from other + + void baseline_normalise ( // Tess style BL Norm + //optional antidote + ROW * row, DENORM * denorm = NULL); + + void baseline_normalise_x ( //Use non standard xht + ROW * row, float x_height, //Weird value to use + DENORM * denorm = NULL); //optional antidote + + void baseline_denormalise( //un-normalise + const DENORM *denorm); + + void print( //print + FILE *fp); //file to print on + + void plot ( //draw one + ScrollView* window, //window to draw in + //uniform colour + ScrollView::Color colour, BOOL8 solid = FALSE); + + void plot ( //draw one + //in rainbow colours + ScrollView* window, BOOL8 solid = FALSE); + + void plot_rej_blobs ( //draw one + //in rainbow colours + ScrollView* window, BOOL8 solid = FALSE); + + WERD & operator= ( //assign words + const WERD & source); //from this + + void prep_serialise() { //set ptrs to counts + correct.prep_serialise (); + if (flags.bit (W_POLYGON)) + ((PBLOB_LIST *) (&cblobs))->prep_serialise (); + // else if (flags.bit(W_LINEARC)) + // ((LARC_BLOB_LIST*)(&cblobs))->prep_serialise(); + else + cblobs.prep_serialise (); + rej_cblobs.prep_serialise (); + } + + void dump( //write external bits + FILE *f) { + correct.dump (f); + if (flags.bit (W_POLYGON)) + ((PBLOB_LIST *) (&cblobs))->dump (f); + // else if (flags.bit(W_LINEARC)) + // ((LARC_BLOB_LIST*)(&cblobs))->dump( f ); + else + cblobs.dump (f); + rej_cblobs.dump (f); + } + + void de_dump( //read external bits + FILE *f) { + correct.de_dump (f); + if (flags.bit (W_POLYGON)) + ((PBLOB_LIST *) (&cblobs))->de_dump (f); + // else if (flags.bit(W_LINEARC)) + // ((LARC_BLOB_LIST*)(&cblobs))->de_dump( f ); + else + cblobs.de_dump (f); + rej_cblobs.de_dump (f); + } + + make_serialise (WERD) private: + uinT8 blanks; //no of blanks + uinT8 dummy; //padding + BITS16 flags; //flags about word + BITS16 disp_flags; //display flags + inT16 dummy2; //padding + STRING correct; //correct text + C_BLOB_LIST cblobs; //compacted blobs + C_BLOB_LIST rej_cblobs; //DUFF blobs +}; + +ELISTIZEH_S (WERD) +#include "ocrrow.h" //placed here due to +extern BOOL_VAR_H (bln_numericmode, 0, "Optimize for numbers"); +extern INT_VAR_H (bln_x_height, 128, "Baseline Normalisation X-height"); +extern INT_VAR_H (bln_baseline_offset, 64, +"Baseline Norm. offset of baseline"); +//void poly_linearc_outlines( //do list of outlines +//LARC_OUTLINE_LIST *srclist, //list to convert +//OUTLINE_LIST *destlist //desstination list +//); +//OUTLINE *poly_larcline( //draw it +//LARC_OUTLINE *srcline //one to approximate +//); +int word_comparator( //sort blobs + const void *word1p, //ptr to ptr to word1 + const void *word2p //ptr to ptr to word2 + ); +#endif