diff --git a/src/ccmain/paragraphs_internal.h b/src/ccmain/paragraphs_internal.h index f8fe136e..a7c08d51 100644 --- a/src/ccmain/paragraphs_internal.h +++ b/src/ccmain/paragraphs_internal.h @@ -2,7 +2,6 @@ * File: paragraphs_internal.h * Description: Paragraph Detection internal data structures. * Author: David Eger - * Created: 11 March 2011 * * (C) Copyright 2011, Google Inc. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -21,10 +20,12 @@ #define TESSERACT_CCMAIN_PARAGRAPHS_INTERNAL_H_ #include "paragraphs.h" +#include "publictypes.h" // for ParagraphJustification // NO CODE OUTSIDE OF paragraphs.cpp AND TESTS SHOULD NEED TO ACCESS // DATA STRUCTURES OR FUNCTIONS IN THIS FILE. +class UNICHARSET; class WERD_CHOICE; namespace tesseract { @@ -299,4 +300,5 @@ void CanonicalizeDetectionResults( PARA_LIST *paragraphs); } // namespace + #endif // TESSERACT_CCMAIN_PARAGRAPHS_INTERNAL_H_ diff --git a/unittest/Makefile.am b/unittest/Makefile.am index 5337a0a6..681b9512 100644 --- a/unittest/Makefile.am +++ b/unittest/Makefile.am @@ -106,6 +106,7 @@ check_PROGRAMS = \ matrix_test \ nthitem_test \ osd_test \ + paragraphs_test \ progress_test \ qrsequence_test \ rect_test \ @@ -183,6 +184,12 @@ matrix_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS) nthitem_test_SOURCES = nthitem_test.cc nthitem_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS) +pango_font_info_test_SOURCES = pango_font_info_test.cc +pango_font_info_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS) + +paragraphs_test_SOURCES = paragraphs_test.cc +paragraphs_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TESS_LIBS) + osd_test_SOURCES = osd_test.cc osd_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS) diff --git a/unittest/paragraphs_test.cc b/unittest/paragraphs_test.cc index 2d42e212..c1be3971 100644 --- a/unittest/paragraphs_test.cc +++ b/unittest/paragraphs_test.cc @@ -1,9 +1,29 @@ +// (C) Copyright 2017, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. -#include +#include // for std::string -#include "tesseract/ccmain/paragraphs.h" -#include "tesseract/ccmain/paragraphs_internal.h" -#include "tesseract/ccstruct/ocrpara.h" +#include "absl/strings/str_cat.h" // for absl::StrCat +#include "absl/strings/str_join.h" // for absl::StrJoin +#include "absl/strings/str_split.h" // for absl::StrSplit + +#include "include_gunit.h" // for TEST +#include "log.h" // for LOG + +#include "genericvector.h" +// ccmain +#include "paragraphs.h" +#include "paragraphs_internal.h" +// ccstruct +#include "ocrpara.h" namespace { // anonymous namespace @@ -47,7 +67,7 @@ void AsciiToRowInfo(const char* text, int row_number, info->lword_text = info->rword_text = ""; info->ltr = true; - std::vector words = absl::StrSplit(text, ' ', absl::SkipEmpty()); + std::vector words = absl::StrSplit(text, ' ', absl::SkipEmpty()); info->num_words = words.size(); if (info->num_words < 1) return; @@ -135,7 +155,7 @@ void EvaluateParagraphDetection(const TextAndModel* correct, int n, EXPECT_EQ(bad_crowns, 0); if (incorrect_breaks || missed_breaks || poorly_matched_models || bad_list_items || bad_crowns) { - std::vector dbg_lines; + std::vector dbg_lines; dbg_lines.push_back("# =========================="); dbg_lines.push_back("# Correct paragraph breaks:"); dbg_lines.push_back("# =========================="); @@ -154,7 +174,7 @@ void EvaluateParagraphDetection(const TextAndModel* correct, int n, dbg_lines.push_back("# Paragraph detector output:"); dbg_lines.push_back("# =========================="); for (int i = 0; i < n; i++) { - string annotation; + std::string annotation; if (i == 0 || (detector_output[i - 1] != detector_output[i])) { if (detector_output[i] && detector_output[i]->model) { annotation += absl::StrCat( @@ -211,15 +231,15 @@ TEST(ParagraphsTest, ListItemsIdentified) { typedef ParagraphModel PModel; const TextAndModel kTwoSimpleParagraphs[] = { - {" Look here, I have a paragraph.", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"This paragraph starts at the top"}, - {"of the page and takes 3 lines. "}, - {" Here I have a second paragraph", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"which indicates that the first "}, - {"paragraph is not a continuation "}, - {"from a previous page, as it is "}, - {"indented just like this second "}, - {"paragraph. "}, + {" Look here, I have a paragraph.", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false}, + {"This paragraph starts at the top", PCONT, PModel(), false, false}, + {"of the page and takes 3 lines. ", PCONT, PModel(), false, false}, + {" Here I have a second paragraph", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false}, + {"which indicates that the first ", PCONT, PModel(), false, false}, + {"paragraph is not a continuation ", PCONT, PModel(), false, false}, + {"from a previous page, as it is ", PCONT, PModel(), false, false}, + {"indented just like this second ", PCONT, PModel(), false, false}, + {"paragraph. ", PCONT, PModel(), false, false}, }; TEST(ParagraphsTest, TestSimpleParagraphDetection) { @@ -228,15 +248,15 @@ TEST(ParagraphsTest, TestSimpleParagraphDetection) { } const TextAndModel kFewCluesWithCrown[] = { - {"This paragraph starts at the top", PSTART, PModel(kLeft, 0, 20, 0, 0), - true}, - {"of the page and takes two lines."}, - {" Here I have a second paragraph", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"which indicates that the first "}, - {"paragraph is a continuation from"}, - {"a previous page, as it is "}, - {"indented just like this second "}, - {"paragraph. "}, + {"This paragraph starts at the top", PSTART, PModel(kLeft, 0, 20, 0, 0), + true, false}, + {"of the page and takes two lines.", PCONT, PModel(), false, false}, + {" Here I have a second paragraph", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false}, + {"which indicates that the first ", PCONT, PModel(), false, false}, + {"paragraph is a continuation from", PCONT, PModel(), false, false}, + {"a previous page, as it is ", PCONT, PModel(), false, false}, + {"indented just like this second ", PCONT, PModel(), false, false}, + {"paragraph. ", PCONT, PModel(), false, false}, }; TEST(ParagraphsTest, TestFewCluesWithCrown) { @@ -245,17 +265,17 @@ TEST(ParagraphsTest, TestFewCluesWithCrown) { } const TextAndModel kCrownedParagraph[] = { - {"The first paragraph on a page is", PSTART, PModel(kLeft, 0, 20, 0, 0), - true}, - {"often not indented as the rest "}, - {"of the paragraphs are. Nonethe-"}, - {"less it should be counted as the"}, - {"same type of paragraph. "}, - {" The second and third para- ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"graphs are both indented two "}, - {"spaces. "}, - {" The first paragraph has what ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"fmt refers to as a 'crown.' "}, + {"The first paragraph on a page is", PSTART, PModel(kLeft, 0, 20, 0, 0), + true, false}, + {"often not indented as the rest ", PCONT, PModel(), false, false}, + {"of the paragraphs are. Nonethe-", PCONT, PModel(), false, false}, + {"less it should be counted as the", PCONT, PModel(), false, false}, + {"same type of paragraph. ", PCONT, PModel(), false, false}, + {" The second and third para- ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false}, + {"graphs are both indented two ", PCONT, PModel(), false, false}, + {"spaces. ", PCONT, PModel(), false, false}, + {" The first paragraph has what ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false}, + {"fmt refers to as a 'crown.' ", PCONT, PModel(), false, false}, }; TEST(ParagraphsTest, TestCrownParagraphDetection) { @@ -263,40 +283,40 @@ TEST(ParagraphsTest, TestCrownParagraphDetection) { } const TextAndModel kFlushLeftParagraphs[] = { - {"It is sometimes the case that", PSTART, PModel(kLeft, 0, 0, 0, 0)}, - {"flush left paragraphs (those"}, - {"with no body indent) are not"}, - {"actually crowns. "}, - {"Instead, further paragraphs are", PSTART, PModel(kLeft, 0, 0, 0, 0)}, - {"also flush left aligned. Usual-"}, - {"ly, these paragraphs are set"}, - {"apart vertically by some white-"}, - {"space, but you can also detect"}, - {"them by observing the big empty"}, - {"space at the ends of the para-"}, - {"graphs. "}, + {"It is sometimes the case that", PSTART, PModel(kLeft, 0, 0, 0, 0), false, false}, + {"flush left paragraphs (those", PCONT, PModel(), false, false}, + {"with no body indent) are not", PCONT, PModel(), false, false}, + {"actually crowns. ", PCONT, PModel(), false, false}, + {"Instead, further paragraphs are", PSTART, PModel(kLeft, 0, 0, 0, 0), false, false}, + {"also flush left aligned. Usual-", PCONT, PModel(), false, false}, + {"ly, these paragraphs are set", PCONT, PModel(), false, false}, + {"apart vertically by some white-", PCONT, PModel(), false, false}, + {"space, but you can also detect", PCONT, PModel(), false, false}, + {"them by observing the big empty", PCONT, PModel(), false, false}, + {"space at the ends of the para-", PCONT, PModel(), false, false}, + {"graphs. ", PCONT, PModel(), false, false}, }; TEST(ParagraphsText, TestRealFlushLeftParagraphs) { TestParagraphDetection(kFlushLeftParagraphs, ABSL_ARRAYSIZE(kFlushLeftParagraphs)); -}; +} const TextAndModel kSingleFullPageContinuation[] = { - {"sometimes a page is one giant", PSTART, PModel(kLeft, 0, 20, 0, 0), true}, - {"continuation. It flows from"}, - {"line to line, using the full"}, - {"column width with no clear"}, - {"paragraph break, because it"}, - {"actually doesn't have one. It"}, - {"is the middle of one monster"}, - {"paragraph continued from the"}, - {"previous page and continuing"}, - {"onto the next page. There-"}, - {"fore, it ends up getting"}, - {"marked as a crown and then"}, - {"getting re-marked as any ex-"}, - {"isting model. Not great, but"}, + {"sometimes a page is one giant", PSTART, PModel(kLeft, 0, 20, 0, 0), true, false}, + {"continuation. It flows from", PCONT, PModel(), false, false}, + {"line to line, using the full", PCONT, PModel(), false, false}, + {"column width with no clear", PCONT, PModel(), false, false}, + {"paragraph break, because it", PCONT, PModel(), false, false}, + {"actually doesn't have one. It", PCONT, PModel(), false, false}, + {"is the middle of one monster", PCONT, PModel(), false, false}, + {"paragraph continued from the", PCONT, PModel(), false, false}, + {"previous page and continuing", PCONT, PModel(), false, false}, + {"onto the next page. There-", PCONT, PModel(), false, false}, + {"fore, it ends up getting", PCONT, PModel(), false, false}, + {"marked as a crown and then", PCONT, PModel(), false, false}, + {"getting re-marked as any ex-", PCONT, PModel(), false, false}, + {"isting model. Not great, but", PCONT, PModel(), false, false}, }; TEST(ParagraphsTest, TestSingleFullPageContinuation) { @@ -314,14 +334,14 @@ TEST(ParagraphsTest, TestSingleFullPageContinuation) { } const TextAndModel kRightAligned[] = { - {"Right-aligned paragraphs are", PSTART, PModel(kRight, 0, 0, 0, 0)}, - {" uncommon in Left-to-Right"}, - {" languages, but they do"}, - {" exist."}, - {" Mostly, however, they're", PSTART, PModel(kRight, 0, 0, 0, 0)}, - {" horribly tiny paragraphs in"}, - {" tables on which we have no"}, - {" chance anyways."}, + {"Right-aligned paragraphs are", PSTART, PModel(kRight, 0, 0, 0, 0), false, false}, + {" uncommon in Left-to-Right", PCONT, PModel(), false, false}, + {" languages, but they do", PCONT, PModel(), false, false}, + {" exist.", PCONT, PModel(), false, false}, + {" Mostly, however, they're", PSTART, PModel(kRight, 0, 0, 0, 0), false, false}, + {" horribly tiny paragraphs in", PCONT, PModel(), false, false}, + {" tables on which we have no", PCONT, PModel(), false, false}, + {" chance anyways.", PCONT, PModel(), false, false}, }; TEST(ParagraphsTest, TestRightAlignedParagraph) { @@ -329,21 +349,21 @@ TEST(ParagraphsTest, TestRightAlignedParagraph) { } const TextAndModel kTinyParagraphs[] = { - {" Occasionally, interspersed with", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"obvious paragraph text, you might"}, - {"find short exchanges of dialogue "}, - {"between characters. "}, - {" 'Oh?' ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {" 'Don't be confused!' ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {" 'Not me!' ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {" One naive approach would be to ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"mark a new paragraph whenever one"}, - {"of the statistics (left, right or"}, - {"center) changes from one text-"}, - {"line to the next. Such an"}, - {"approach would misclassify the"}, - {"tiny paragraphs above as a single"}, - {"paragraph. "}, + {" Occasionally, interspersed with", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false}, + {"obvious paragraph text, you might", PCONT, PModel(), false, false}, + {"find short exchanges of dialogue ", PCONT, PModel(), false, false}, + {"between characters. ", PCONT, PModel(), false, false}, + {" 'Oh?' ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false}, + {" 'Don't be confused!' ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false}, + {" 'Not me!' ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false}, + {" One naive approach would be to ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false}, + {"mark a new paragraph whenever one", PCONT, PModel(), false, false}, + {"of the statistics (left, right or", PCONT, PModel(), false, false}, + {"center) changes from one text-", PCONT, PModel(), false, false}, + {"line to the next. Such an", PCONT, PModel(), false, false}, + {"approach would misclassify the", PCONT, PModel(), false, false}, + {"tiny paragraphs above as a single", PCONT, PModel(), false, false}, + {"paragraph. ", PCONT, PModel(), false, false}, }; TEST(ParagraphsTest, TestTinyParagraphs) { @@ -351,49 +371,49 @@ TEST(ParagraphsTest, TestTinyParagraphs) { } const TextAndModel kComplexPage1[] = { - {" Awesome ", PSTART, PModel(kCenter, 0, 0, 0, 0)}, - {" Centered Title "}, - {" Paragraph Detection "}, - {" OCR TEAM "}, - {" 10 November 2010 "}, - {" ", PNONE}, - {" Look here, I have a paragraph.", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"This paragraph starts at the top"}, - {"of the page and takes 3 lines. "}, - {" Here I have a second paragraph", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"which indicates that the first "}, - {"paragraph is not a continuation "}, - {"from a previous page, as it is "}, - {"indented just like this second "}, - {"paragraph. "}, - {" Here is a block quote. It ", PSTART, PModel(kLeft, 30, 0, 0, 0), - true}, - {" looks like the prior text "}, - {" but it is indented more "}, - {" and is fully justified. "}, - {" So how does one deal with ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"centered text, block quotes, "}, - {"normal paragraphs, and lists "}, - {"like what follows? "}, - {"1. Make a plan. ", PSTART, PModel(kLeft, 0, 0, 30, 0), - false, true}, - {"2. Use a heuristic, for example,", PSTART, PModel(kLeft, 0, 0, 30, 0), - false, true}, - {" looking for lines where the "}, - {" first word of the next line "}, - {" would fit on the previous "}, - {" line. "}, - {"8. Try to implement the plan in ", PSTART, PModel(kLeft, 0, 0, 30, 0), - false, true}, - {" Python and try it out. "}, - {"4. Determine how to fix the ", PSTART, PModel(kLeft, 0, 0, 30, 0), - false, true}, - {" mistakes. "}, - {"5. Repeat. ", PSTART, PModel(kLeft, 0, 0, 30, 0), - false, true}, - {" For extra painful penalty work", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"you can try to identify source "}, - {"code. Ouch! "}, + {" Awesome ", PSTART, PModel(kCenter, 0, 0, 0, 0), false, false}, + {" Centered Title ", PCONT, PModel(), false, false}, + {" Paragraph Detection ", PCONT, PModel(), false, false}, + {" OCR TEAM ", PCONT, PModel(), false, false}, + {" 10 November 2010 ", PCONT, PModel(), false, false}, + {" ", PNONE, PModel(), false, false}, + {" Look here, I have a paragraph.", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false}, + {"This paragraph starts at the top", PCONT, PModel(), false, false}, + {"of the page and takes 3 lines. ", PCONT, PModel(), false, false}, + {" Here I have a second paragraph", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false}, + {"which indicates that the first ", PCONT, PModel(), false, false}, + {"paragraph is not a continuation ", PCONT, PModel(), false, false}, + {"from a previous page, as it is ", PCONT, PModel(), false, false}, + {"indented just like this second ", PCONT, PModel(), false, false}, + {"paragraph. ", PCONT, PModel(), false, false}, + {" Here is a block quote. It ", PSTART, PModel(kLeft, 30, 0, 0, 0), + true, false}, + {" looks like the prior text ", PCONT, PModel(), false, false}, + {" but it is indented more ", PCONT, PModel(), false, false}, + {" and is fully justified. ", PCONT, PModel(), false, false}, + {" So how does one deal with ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false}, + {"centered text, block quotes, ", PCONT, PModel(), false, false}, + {"normal paragraphs, and lists ", PCONT, PModel(), false, false}, + {"like what follows? ", PCONT, PModel(), false, false}, + {"1. Make a plan. ", PSTART, PModel(kLeft, 0, 0, 30, 0), + false, true}, + {"2. Use a heuristic, for example,", PSTART, PModel(kLeft, 0, 0, 30, 0), + false, true}, + {" looking for lines where the ", PCONT, PModel(), false, false}, + {" first word of the next line ", PCONT, PModel(), false, false}, + {" would fit on the previous ", PCONT, PModel(), false, false}, + {" line. ", PCONT, PModel(), false, false}, + {"8. Try to implement the plan in ", PSTART, PModel(kLeft, 0, 0, 30, 0), + false, true}, + {" Python and try it out. ", PCONT, PModel(), false, false}, + {"4. Determine how to fix the ", PSTART, PModel(kLeft, 0, 0, 30, 0), + false, true}, + {" mistakes. ", PCONT, PModel(), false, false}, + {"5. Repeat. ", PSTART, PModel(kLeft, 0, 0, 30, 0), + false, true}, + {" For extra painful penalty work", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false}, + {"you can try to identify source ", PCONT, PModel(), false, false}, + {"code. Ouch! ", PCONT, PModel(), false, false}, }; TEST(ParagraphsTest, TestComplexPage1) { @@ -402,47 +422,47 @@ TEST(ParagraphsTest, TestComplexPage1) { // The same as above, but wider. const TextAndModel kComplexPage2[] = { - {" Awesome ", PSTART, - PModel(kCenter, 0, 0, 0, 0)}, - {" Centered Title "}, - {" Paragraph Detection "}, - {" OCR TEAM "}, - {" 10 November 2010 "}, - {" ", PNONE}, - {" Look here, I have a paragraph. ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"This paragraph starts at the top of"}, - {"the page and takes 3 lines. "}, - {" Here I have a second paragraph ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"which indicates that the first "}, - {"paragraph is not a continuation "}, - {"from a previous page, as it is in- "}, - {"dented just like this second para- "}, - {"graph. "}, - {" Here is a block quote. It ", PSTART, PModel(kLeft, 30, 0, 0, 0), - true}, - {" looks like the prior text "}, - {" but it is indented more "}, - {" and is fully justified. "}, - {" So how does one deal with center-", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"ed text, block quotes, normal para-"}, - {"graphs, and lists like what follow?"}, - {"1. Make a plan. "}, // BUG!! - {"2. Use a heuristic, for example, ", PSTART, PModel(kLeft, 0, 0, 30, 0), - false, true}, - {" looking for lines where the "}, - {" first word of the next line "}, - {" would fit on the previous line. "}, - {"8. Try to implement the plan in ", PSTART, PModel(kLeft, 0, 0, 30, 0), - false, true}, - {" Python and try it out. "}, - {"4. Determine how to fix the ", PSTART, PModel(kLeft, 0, 0, 30, 0), - false, true}, - {" mistakes. "}, - {"5. Repeat. ", PSTART, PModel(kLeft, 0, 0, 30, 0), - false, true}, - {" For extra painful penalty work ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"you can try to identify source "}, - {"code. Ouch! "}, + {" Awesome ", PSTART, + PModel(kCenter, 0, 0, 0, 0), false, false}, + {" Centered Title ", PCONT, PModel(), false, false}, + {" Paragraph Detection ", PCONT, PModel(), false, false}, + {" OCR TEAM ", PCONT, PModel(), false, false}, + {" 10 November 2010 ", PCONT, PModel(), false, false}, + {" ", PNONE, PModel(), false, false}, + {" Look here, I have a paragraph. ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false}, + {"This paragraph starts at the top of", PCONT, PModel(), false, false}, + {"the page and takes 3 lines. ", PCONT, PModel(), false, false}, + {" Here I have a second paragraph ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false}, + {"which indicates that the first ", PCONT, PModel(), false, false}, + {"paragraph is not a continuation ", PCONT, PModel(), false, false}, + {"from a previous page, as it is in- ", PCONT, PModel(), false, false}, + {"dented just like this second para- ", PCONT, PModel(), false, false}, + {"graph. ", PCONT, PModel(), false, false}, + {" Here is a block quote. It ", PSTART, PModel(kLeft, 30, 0, 0, 0), + true, false}, + {" looks like the prior text ", PCONT, PModel(), false, false}, + {" but it is indented more ", PCONT, PModel(), false, false}, + {" and is fully justified. ", PCONT, PModel(), false, false}, + {" So how does one deal with center-", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false}, + {"ed text, block quotes, normal para-", PCONT, PModel(), false, false}, + {"graphs, and lists like what follow?", PCONT, PModel(), false, false}, + {"1. Make a plan. ", PCONT, PModel(), false, false}, // BUG!! + {"2. Use a heuristic, for example, ", PSTART, PModel(kLeft, 0, 0, 30, 0), + false, true}, + {" looking for lines where the ", PCONT, PModel(), false, false}, + {" first word of the next line ", PCONT, PModel(), false, false}, + {" would fit on the previous line. ", PCONT, PModel(), false, false}, + {"8. Try to implement the plan in ", PSTART, PModel(kLeft, 0, 0, 30, 0), + false, true}, + {" Python and try it out. ", PCONT, PModel(), false, false}, + {"4. Determine how to fix the ", PSTART, PModel(kLeft, 0, 0, 30, 0), + false, true}, + {" mistakes. ", PCONT, PModel(), false, false}, + {"5. Repeat. ", PSTART, PModel(kLeft, 0, 0, 30, 0), + false, true}, + {" For extra painful penalty work ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false}, + {"you can try to identify source ", PCONT, PModel(), false, false}, + {"code. Ouch! ", PCONT, PModel(), false, false}, }; TEST(ParagraphsTest, TestComplexPage2) { @@ -450,15 +470,15 @@ TEST(ParagraphsTest, TestComplexPage2) { } const TextAndModel kSubtleCrown[] = { - {"The first paragraph on a page is", PSTART, PModel(kLeft, 0, 20, 0, 0), - true}, - {"often not indented as the rest "}, - {"of the paragraphs are. Nonethe-"}, - {"less it should be counted as the"}, - {"same type of paragraph. "}, - {" Even a short second paragraph ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"should suffice. "}, - {" 1235 ", PNONE}, + {"The first paragraph on a page is", PSTART, PModel(kLeft, 0, 20, 0, 0), + true, false}, + {"often not indented as the rest ", PCONT, PModel(), false, false}, + {"of the paragraphs are. Nonethe-", PCONT, PModel(), false, false}, + {"less it should be counted as the", PCONT, PModel(), false, false}, + {"same type of paragraph. ", PCONT, PModel(), false, false}, + {" Even a short second paragraph ", PSTART, PModel(kLeft, 0, 20, 0, 0), false, false}, + {"should suffice. ", PCONT, PModel(), false, false}, + {" 1235 ", PNONE, PModel(), false, false}, }; TEST(ParagraphsTest, TestSubtleCrown) { @@ -470,43 +490,43 @@ TEST(ParagraphsTest, TestStrayLineInBlock) { } const TextAndModel kUnlvRep3AO[] = { - {" Defined contribution plans cover employees in Australia, New", PSTART, - PModel(kLeft, 0, 50, 0, 0)}, - {"Zealand, Spain, the United Kingdom and some U.S. subsidiaries. "}, - {"In addition, employees in the U.S. are eligible to participate in "}, - {"defined contribution plans (Employee Savings Plans) by contribut-"}, - {"ing a portion of their compensation. The Company matches com- "}, - {"pensation, depending on Company profit levels. Contributions "}, - {"charged to income for defined contribution plans were $92 in "}, - {"1993, $98 in 1992 and $89 in 1991. "}, - {" In addition to providing pension benefits, the Company pro- ", PSTART, - PModel(kLeft, 0, 50, 0, 0)}, - {"vides certain health care and life insurance benefits to retired "}, - {"employees. As discussed in Note A, the Company adopted FASB "}, - {"Statement No. 106 effective January 1, 1992. Previously, the "}, - {"Company recognized the cost of providing these benefits as the "}, - {"benefits were paid. These pretax costs amounted to $53 in 1991. "}, - {"The Company continues to fund most of the cost of these medical "}, - {"and life insurance benefits in the year incurred. "}, - {" The U.S. plan covering the parent company is the largest plan.", - PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {"It provides medical and life insurance benefits including hospital, "}, - {"physicians’ services and major medical expense benefits and life "}, - {"insurance benefits. The plan provides benefits supplemental to "}, - {"Medicare after retirees are eligible for these benefits. The cost of "}, - {"these benefits are shared by the Company and the retiree, with the "}, - {"Company portion increasing as the retiree has increased years of "}, - {"credited service. The Company has the ability to change these "}, - {"benefits at any time. "}, - {" Effective October 1993, the Company amended its health ", PSTART, - PModel(kLeft, 0, 50, 0, 0)}, - {"benefits plan in the U.S. to cap the cost absorbed by the Company "}, - {"at approximately twice the 1993 cost per person for employees who"}, - {"retire after December 31, 1993. The effect of this amendment was "}, - {"to reduce the December 31, 1993 accumulated postretirement "}, - {"benefit obligation by $327. It also reduced the net periodic postre- "}, - {"tirement cost by $21 for 1993 and is estimated to reduce this cost "}, - {"for 1994 by approximately $83. "}, + {" Defined contribution plans cover employees in Australia, New", PSTART, + PModel(kLeft, 0, 50, 0, 0), false, false}, + {"Zealand, Spain, the United Kingdom and some U.S. subsidiaries. ", PCONT, PModel(), false, false}, + {"In addition, employees in the U.S. are eligible to participate in ", PCONT, PModel(), false, false}, + {"defined contribution plans (Employee Savings Plans) by contribut-", PCONT, PModel(), false, false}, + {"ing a portion of their compensation. The Company matches com- ", PCONT, PModel(), false, false}, + {"pensation, depending on Company profit levels. Contributions ", PCONT, PModel(), false, false}, + {"charged to income for defined contribution plans were $92 in ", PCONT, PModel(), false, false}, + {"1993, $98 in 1992 and $89 in 1991. ", PCONT, PModel(), false, false}, + {" In addition to providing pension benefits, the Company pro- ", PSTART, + PModel(kLeft, 0, 50, 0, 0), false, false}, + {"vides certain health care and life insurance benefits to retired ", PCONT, PModel(), false, false}, + {"employees. As discussed in Note A, the Company adopted FASB ", PCONT, PModel(), false, false}, + {"Statement No. 106 effective January 1, 1992. Previously, the ", PCONT, PModel(), false, false}, + {"Company recognized the cost of providing these benefits as the ", PCONT, PModel(), false, false}, + {"benefits were paid. These pretax costs amounted to $53 in 1991. ", PCONT, PModel(), false, false}, + {"The Company continues to fund most of the cost of these medical ", PCONT, PModel(), false, false}, + {"and life insurance benefits in the year incurred. ", PCONT, PModel(), false, false}, + {" The U.S. plan covering the parent company is the largest plan.", + PSTART, PModel(kLeft, 0, 50, 0, 0), false, false}, + {"It provides medical and life insurance benefits including hospital, ", PCONT, PModel(), false, false}, + {"physicians’ services and major medical expense benefits and life ", PCONT, PModel(), false, false}, + {"insurance benefits. The plan provides benefits supplemental to ", PCONT, PModel(), false, false}, + {"Medicare after retirees are eligible for these benefits. The cost of ", PCONT, PModel(), false, false}, + {"these benefits are shared by the Company and the retiree, with the ", PCONT, PModel(), false, false}, + {"Company portion increasing as the retiree has increased years of ", PCONT, PModel(), false, false}, + {"credited service. The Company has the ability to change these ", PCONT, PModel(), false, false}, + {"benefits at any time. ", PCONT, PModel(), false, false}, + {" Effective October 1993, the Company amended its health ", PSTART, + PModel(kLeft, 0, 50, 0, 0), false, false}, + {"benefits plan in the U.S. to cap the cost absorbed by the Company ", PCONT, PModel(), false, false}, + {"at approximately twice the 1993 cost per person for employees who", PCONT, PModel(), false, false}, + {"retire after December 31, 1993. The effect of this amendment was ", PCONT, PModel(), false, false}, + {"to reduce the December 31, 1993 accumulated postretirement ", PCONT, PModel(), false, false}, + {"benefit obligation by $327. It also reduced the net periodic postre- ", PCONT, PModel(), false, false}, + {"tirement cost by $21 for 1993 and is estimated to reduce this cost ", PCONT, PModel(), false, false}, + {"for 1994 by approximately $83. ", PCONT, PModel(), false, false}, }; TEST(ParagraphsTest, TestUnlvInsurance) { @@ -519,19 +539,19 @@ TEST(ParagraphsTest, TestUnlvInsurance) { // paragraph or two. // This example comes from Volume 9886293, Page 5 const TextAndModel kTableOfContents[] = { - {"1 Hmong People ........... 1", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, - {" Hmong Origins . . . . . 1", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, - {" Language . . . . . . . 1", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, - {" Proverbs . . . . . . 2", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, - {" Discussion . . . . 2", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, - {" Riddles . . . . . . . 2", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, - {" Discussion . . . . 3", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, - {" Appearance . . . . . 3", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, - {" Hmong History . . . . . 4", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, - {" Hmong in SE Asia . . . 4", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, - {" Hmong in the West . . .5", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, - {" Hmong in the USA . . . 5", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, - {" Discussion . . . . 6", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, + {"1 Hmong People ........... 1", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false}, + {" Hmong Origins . . . . . 1", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false}, + {" Language . . . . . . . 1", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false}, + {" Proverbs . . . . . . 2", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false}, + {" Discussion . . . . 2", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false}, + {" Riddles . . . . . . . 2", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false}, + {" Discussion . . . . 3", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false}, + {" Appearance . . . . . 3", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false}, + {" Hmong History . . . . . 4", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false}, + {" Hmong in SE Asia . . . 4", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false}, + {" Hmong in the West . . .5", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false}, + {" Hmong in the USA . . . 5", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false}, + {" Discussion . . . . 6", PSTART, PModel(kUnknown, 0, 0, 0, 0), false, false}, }; TEST(ParagraphsTest, TestSplitsOutLeaderLines) { @@ -539,34 +559,34 @@ TEST(ParagraphsTest, TestSplitsOutLeaderLines) { } const TextAndModel kTextWithSourceCode[] = { - {" A typical page of a programming book may contain", PSTART, - PModel(kLeft, 0, 20, 0, 0)}, - {"examples of source code to exemplify an algorithm "}, - {"being described in prose. Such examples should be"}, - {"rendered as lineated text, meaning text with "}, - {"explicit line breaks but without extra inter-line "}, - {"spacing. Accidentally finding stray paragraphs in"}, - {"source code would lead to a bad reading experience"}, - {"when the text is re-flowed. "}, - {" Let's show this by describing the function fact-", PSTART, - PModel(kLeft, 0, 20, 0, 0)}, - {"orial. Factorial is a simple recursive function "}, - {"which grows very quickly. So quickly, in fact, "}, - {"that the typical C implementation will only work "}, - {"for values less than about 12: "}, - {" ", PNONE}, - {" # Naive implementation in C "}, - {" int factorial(int n) { "}, - {" if (n < 2) "}, - {" return 1; "}, - {" return n * factorial(n - 1); "}, - {" } "}, - {" "}, - {" The C programming language does not have built- ", PSTART, - PModel(kLeft, 0, 20, 0, 0)}, - {"in support for detecting integer overflow, so this"}, - {"naive implementation simply returns random values "}, - {"if even a moderate sized n is provided. "}, + {" A typical page of a programming book may contain", PSTART, + PModel(kLeft, 0, 20, 0, 0), false, false}, + {"examples of source code to exemplify an algorithm ", PCONT, PModel(), false, false}, + {"being described in prose. Such examples should be", PCONT, PModel(), false, false}, + {"rendered as lineated text, meaning text with ", PCONT, PModel(), false, false}, + {"explicit line breaks but without extra inter-line ", PCONT, PModel(), false, false}, + {"spacing. Accidentally finding stray paragraphs in", PCONT, PModel(), false, false}, + {"source code would lead to a bad reading experience", PCONT, PModel(), false, false}, + {"when the text is re-flowed. ", PCONT, PModel(), false, false}, + {" Let's show this by describing the function fact-", PSTART, + PModel(kLeft, 0, 20, 0, 0), false, false}, + {"orial. Factorial is a simple recursive function ", PCONT, PModel(), false, false}, + {"which grows very quickly. So quickly, in fact, ", PCONT, PModel(), false, false}, + {"that the typical C implementation will only work ", PCONT, PModel(), false, false}, + {"for values less than about 12: ", PCONT, PModel(), false, false}, + {" ", PNONE, PModel(), false, false}, + {" # Naive implementation in C ", PCONT, PModel(), false, false}, + {" int factorial(int n) { ", PCONT, PModel(), false, false}, + {" if (n < 2) ", PCONT, PModel(), false, false}, + {" return 1; ", PCONT, PModel(), false, false}, + {" return n * factorial(n - 1); ", PCONT, PModel(), false, false}, + {" } ", PCONT, PModel(), false, false}, + {" ", PCONT, PModel(), false, false}, + {" The C programming language does not have built- ", PSTART, + PModel(kLeft, 0, 20, 0, 0), false, false}, + {"in support for detecting integer overflow, so this", PCONT, PModel(), false, false}, + {"naive implementation simply returns random values ", PCONT, PModel(), false, false}, + {"if even a moderate sized n is provided. ", PCONT, PModel(), false, false}, }; TEST(ParagraphsTest, NotDistractedBySourceCode) { @@ -575,103 +595,105 @@ TEST(ParagraphsTest, NotDistractedBySourceCode) { } const TextAndModel kOldManAndSea[] = { - {"royal palm which are called guano and in it there was a bed, a", - PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {"table, one chair, and a place on the dirt floor to cook with charcoal."}, - {"On the brown walls of the flattened, overlapping leaves of the"}, - {"sturdy fibered guano there was a picture in color of the Sacred"}, - {"Heart of Jesus and another of the Virgin of Cobre. These were"}, - {"relics of his wife. Once there had been a tinted photograph of his"}, - {"wife on the wall but he had taken it down because it made him too"}, - {"lonely to see it and it was on the shelf in the corner under his clean"}, - {"shirt. "}, - {" \"What do you have to eat?\" the boy asked. ", - PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {" \"A pot of yellow rice with fish. Do you want some?\" ", - PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {" \"No. I will eat at home. Do you want me to make the fire?\" ", - PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {" \"No. I will make it later on. Or I may eat the rice cold.\" ", - PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {" \"May I take the cast net?\" ", - PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {" \"Of course.\" ", - PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {" There was no cast net and the boy remembered when they had", - PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {"sold it. But they went through this fiction every day. There was no"}, - {"pot of yellow rice and fish and the boy knew this too. " - " "}, - {" \"Eighty-five is a lucky number,\" the old man said. \"How", - PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {"would you like to see me bring one in that dressed out over a " - "thou-"}, - {"sand pounds? " - " "}, - {" \"I'll get the cast net and go for sardines. Will you sit in the " - "sun", - PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {"in the doorway?\" " - " "}, - {" \"Yes. I have yesterday's paper and I will read the baseball.\" ", - PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {" The boy did not know whether yesterday's paper was a fiction", - PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {"too. But the old man brought it out from under the bed. "}, - {" \"Pedrico gave it to me at the bodega,\" he explained. " - " ", - PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {" \"I'll be back when I have the sardines. I'll keep yours and mine", - PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {"together on ice and we can share them in the morning. When I"}, - {"come back you can tell me about the baseball.\" "}, - {" \"The Yankees cannot lose.\" ", - PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {" \"But I fear the Indians of Cleveland.\" ", - PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {" \"Have faith in the Yankees my son. Think of the great Di-", - PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {"Maggio.\" "}, - {" \"I fear both the Tigers of Detroit and the Indians of Cleve-", - PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {"land.\" "}}; + {"royal palm which are called guano and in it there was a bed, a", + PSTART, PModel(kLeft, 0, 50, 0, 0), false, false}, + {"table, one chair, and a place on the dirt floor to cook with charcoal.", PCONT, PModel(), false, false}, + {"On the brown walls of the flattened, overlapping leaves of the", PCONT, PModel(), false, false}, + {"sturdy fibered guano there was a picture in color of the Sacred", PCONT, PModel(), false, false}, + {"Heart of Jesus and another of the Virgin of Cobre. These were", PCONT, PModel(), false, false}, + {"relics of his wife. Once there had been a tinted photograph of his", PCONT, PModel(), false, false}, + {"wife on the wall but he had taken it down because it made him too", PCONT, PModel(), false, false}, + {"lonely to see it and it was on the shelf in the corner under his clean", PCONT, PModel(), false, false}, + {"shirt. ", PCONT, PModel(), false, false}, + {" \"What do you have to eat?\" the boy asked. ", + PSTART, PModel(kLeft, 0, 50, 0, 0), false, false}, + {" \"A pot of yellow rice with fish. Do you want some?\" ", + PSTART, PModel(kLeft, 0, 50, 0, 0), false, false}, + {" \"No. I will eat at home. Do you want me to make the fire?\" ", + PSTART, PModel(kLeft, 0, 50, 0, 0), false, false}, + {" \"No. I will make it later on. Or I may eat the rice cold.\" ", + PSTART, PModel(kLeft, 0, 50, 0, 0), false, false}, + {" \"May I take the cast net?\" ", + PSTART, PModel(kLeft, 0, 50, 0, 0), false, false}, + {" \"Of course.\" ", + PSTART, PModel(kLeft, 0, 50, 0, 0), false, false}, + {" There was no cast net and the boy remembered when they had", + PSTART, PModel(kLeft, 0, 50, 0, 0), false, false}, + {"sold it. But they went through this fiction every day. There was no", PCONT, PModel(), false, false}, + {"pot of yellow rice and fish and the boy knew this too. " + " ", PCONT, PModel(), false, false}, + {" \"Eighty-five is a lucky number,\" the old man said. \"How", + PSTART, PModel(kLeft, 0, 50, 0, 0), false, false}, + {"would you like to see me bring one in that dressed out over a " + "thou-", PCONT, PModel(), false, false}, + {"sand pounds? " + " ", PCONT, PModel(), false, false}, + {" \"I'll get the cast net and go for sardines. Will you sit in the " + "sun", + PSTART, PModel(kLeft, 0, 50, 0, 0), false, false}, + {"in the doorway?\" " + " ", PCONT, PModel(), false, false}, + {" \"Yes. I have yesterday's paper and I will read the baseball.\" ", + PSTART, PModel(kLeft, 0, 50, 0, 0), false, false}, + {" The boy did not know whether yesterday's paper was a fiction", + PSTART, PModel(kLeft, 0, 50, 0, 0), false, false}, + {"too. But the old man brought it out from under the bed. ", PCONT, PModel(), false, false}, + {" \"Pedrico gave it to me at the bodega,\" he explained. " + " ", + PSTART, PModel(kLeft, 0, 50, 0, 0), false, false}, + {" \"I'll be back when I have the sardines. I'll keep yours and mine", + PSTART, PModel(kLeft, 0, 50, 0, 0), false, false}, + {"together on ice and we can share them in the morning. When I", PCONT, PModel(), false, false}, + {"come back you can tell me about the baseball.\" ", PCONT, PModel(), false, false}, + {" \"The Yankees cannot lose.\" ", + PSTART, PModel(kLeft, 0, 50, 0, 0), false, false}, + {" \"But I fear the Indians of Cleveland.\" ", + PSTART, PModel(kLeft, 0, 50, 0, 0), false, false}, + {" \"Have faith in the Yankees my son. Think of the great Di-", + PSTART, PModel(kLeft, 0, 50, 0, 0), false, false}, + {"Maggio.\" ", PCONT, PModel(), false, false}, + {" \"I fear both the Tigers of Detroit and the Indians of Cleve-", + PSTART, PModel(kLeft, 0, 50, 0, 0), false, false}, + {"land.\" ", PCONT, PModel(), false, false} +}; TEST(ParagraphsTest, NotOverlyAggressiveWithBlockQuotes) { TestParagraphDetection(kOldManAndSea, ABSL_ARRAYSIZE(kOldManAndSea)); } const TextAndModel kNewZealandIndex[] = { - {"Oats, 51 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"O'Brien, Gregory, 175 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Occupational composition, 110,", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {" 138 "}, - {"OECD rankings, 155, 172 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Okiato (original capital), 47 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Oil shock: 1974, xxx, 143; 1979,", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {" 145 "}, - {"Old Age Pensions, xxii, 89-90 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Old World evils, 77 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Oliver, W. H., 39, 77, 89 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Olssen, Erik, 45, 64, 84 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Olympic Games, 1924, 111, 144 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Once on Chunuk Bair, 149 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Once Were Warriors, xxxiii, 170", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"On—shore whaling, xvi ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Opotiki, xix ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Orakau battle of, xviii, 57 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"O’Regan, Tipene, 170, 198-99 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Organic agriculture, 177 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Orwell, George, 151 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Otago, xvii, 45, 49-50, 70 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Otago block, xvii ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Otago Daily Times, 67 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Otago Girls’ High School, xix, 61,", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {" 85 "}, - {"Otago gold rushes, 61-63 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Otago Peninsula, xx ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Otago Provincial Council, 68 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Otaki, 33 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Owls Do Cry, 139 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}}; + {"Oats, 51 ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {"O'Brien, Gregory, 175 ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {"Occupational composition, 110,", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {" 138 ", PCONT, PModel(), false, false}, + {"OECD rankings, 155, 172 ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {"Okiato (original capital), 47 ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {"Oil shock: 1974, xxx, 143; 1979,", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {" 145 ", PCONT, PModel(), false, false}, + {"Old Age Pensions, xxii, 89-90 ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {"Old World evils, 77 ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {"Oliver, W. H., 39, 77, 89 ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {"Olssen, Erik, 45, 64, 84 ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {"Olympic Games, 1924, 111, 144 ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {"Once on Chunuk Bair, 149 ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {"Once Were Warriors, xxxiii, 170", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {"On—shore whaling, xvi ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {"Opotiki, xix ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {"Orakau battle of, xviii, 57 ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {"O’Regan, Tipene, 170, 198-99 ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {"Organic agriculture, 177 ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {"Orwell, George, 151 ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {"Otago, xvii, 45, 49-50, 70 ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {"Otago block, xvii ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {"Otago Daily Times, 67 ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {"Otago Girls’ High School, xix, 61,", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {" 85 ", PCONT, PModel(), false, false}, + {"Otago gold rushes, 61-63 ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {"Otago Peninsula, xx ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {"Otago Provincial Council, 68 ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {"Otaki, 33 ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false}, + {"Owls Do Cry, 139 ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, false} +}; TEST(ParagraphsTest, IndexPageTest) { TestParagraphDetection(kNewZealandIndex, ABSL_ARRAYSIZE(kNewZealandIndex));