From 9798400cfba0dfd810df927ebe7afcd8193082b8 Mon Sep 17 00:00:00 2001 From: Bill Currie Date: Fri, 30 Sep 2022 19:05:18 +0900 Subject: [PATCH] [ui] Add a sub-system for parsing text passages A passage object has a list of all the text objects in the given string, where the objects represent either white space or "words", as well as a view_t object representing the entire passage, with paragraphs split into child views of the passage view, and each paragraph has a child view for every text/space object in the paragraph. Paragraphs are split by '\n' (not included in any object). White space is grouped into clumps such that multiple adjacent spaces form a single object. The standard ASCII space (0x20) and all of the Unicode characters marked "WS; 0020" are counted as white space. Unless a white space object is the first in the paragraph, its view is marked for suppression by the view flow code. Contiguous non-white space characters are grouped into single objects, and their views are not suppressed. All text object views (both white space and "word") have their data pointer set to the psg_text_t object representing the text for that view. This should be suitable for simple text-mode unattributed display. More advanced rendering would probably want to create suitable objects and set the view data pointers to those objects. No assumption is made about text direction. Passage and paragraph views need to have their primary axis sizes set appropriately, as well as their resize flags. Their xlen and ylen are both set to 10, and xpos,ypos is 0,0. Paragraph views need their setgeometry pointer set to the appropriate view_flow_* function. However, they are set up to have their secondary axis set automatically when flowed. Text object views are set up for automatic flowing: grav_flow, 0,0 for xpos,ypos. However, xlen and ylen are also both 0, so need to be set by the renderer before attempting to flow the text. --- include/QF/ui/passage.h | 58 +++++++++++ libs/ui/Makemodule.am | 1 + libs/ui/passage.c | 189 ++++++++++++++++++++++++++++++++++++ libs/ui/test/Makemodule.am | 5 + libs/ui/test/test-passage.c | 124 +++++++++++++++++++++++ 5 files changed, 377 insertions(+) create mode 100644 include/QF/ui/passage.h create mode 100644 libs/ui/passage.c create mode 100644 libs/ui/test/test-passage.c diff --git a/include/QF/ui/passage.h b/include/QF/ui/passage.h new file mode 100644 index 000000000..76232bd51 --- /dev/null +++ b/include/QF/ui/passage.h @@ -0,0 +1,58 @@ +/* + passage.h + + Text passage formatting. + + Copyright (C) 2022 Bill Currie + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + + Free Software Foundation, Inc. + 59 Temple Place - Suite 330 + Boston, MA 02111-1307, USA + +*/ + +#ifndef __QF_ui_passage_h +#define __QF_ui_passage_h + +#include + +/** \defgroup passage Text passages + \ingroup utils +*/ +///@{ + +typedef struct psg_text_s { + /// beginning of text for this segment relative to passage_t.text + uint32_t text; + /// length of text segment in bytes rather than chars as text may be utf-8 + uint32_t size; +} psg_text_t; + +typedef struct passage_s { + const char *text; ///< Not owned by passage + struct view_s *view; ///< hierarchy of views representing passage + psg_text_t *text_objects; ///< all text objects in passage + unsigned num_text_objects; ///< number of text objects in passage +} passage_t; + +passage_t *Passage_ParseText (const char *text); +void Passage_Delete (passage_t *passage); +int Passage_IsSpace (const char *text) __attribute__((pure)); + +///@} + +#endif//__QF_ui_passage_h diff --git a/libs/ui/Makemodule.am b/libs/ui/Makemodule.am index b564ebc8a..e0b54c30b 100644 --- a/libs/ui/Makemodule.am +++ b/libs/ui/Makemodule.am @@ -9,6 +9,7 @@ libs_ui_libQFui_la_LIBADD= $(ui_deps) libs_ui_libQFui_la_DEPENDENCIES= $(ui_deps) libs_ui_libQFui_la_SOURCES= \ libs/ui/inputline.c \ + libs/ui/passage.c \ libs/ui/txtbuffer.c \ libs/ui/view.c \ libs/ui/vrect.c diff --git a/libs/ui/passage.c b/libs/ui/passage.c new file mode 100644 index 000000000..ea289ad47 --- /dev/null +++ b/libs/ui/passage.c @@ -0,0 +1,189 @@ +/* + passage.c + + Text passage formatting. + + Copyright (C) 2022 Bill Currie + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + + Free Software Foundation, Inc. + 59 Temple Place - Suite 330 + Boston, MA 02111-1307, USA + +*/ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_STRING_H +# include +#endif +#ifdef HAVE_STRINGS_H +# include +#endif + +#include "QF/alloc.h" +#include "QF/qtypes.h" +#include "QF/sys.h" + +#include "QF/ui/view.h" +#include "QF/ui/passage.h" + +VISIBLE int +Passage_IsSpace (const char *text) +{ + if (text[0] == ' ') { + return 1; + } + // 2002;EN SPACE;Zs;0;WS; 0020;;;;N;;;;; + // 2003;EM SPACE;Zs;0;WS; 0020;;;;N;;;;; + // 2004;THREE-PER-EM SPACE;Zs;0;WS; 0020;;;;N;;;;; + // 2005;FOUR-PER-EM SPACE;Zs;0;WS; 0020;;;;N;;;;; + // 2006;SIX-PER-EM SPACE;Zs;0;WS; 0020;;;;N;;;;; + // 2008;PUNCTUATION SPACE;Zs;0;WS; 0020;;;;N;;;;; + // 2009;THIN SPACE;Zs;0;WS; 0020;;;;N;;;;; + // 200A;HAIR SPACE;Zs;0;WS; 0020;;;;N;;;;; + if ((byte)text[0] == 0xe2 && (byte)text[1] == 0x80 + && ((byte)text[2] >= 0x80 && (byte)text[2] < 0x90 + && ((1 << (text[2] & 0xf)) & 0x077c))) { + return 3; + } + // 205F;MEDIUM MATHEMATICAL SPACE;Zs;0;WS; 0020;;;;N;;;;; + if ((byte)text[0] == 0xe2 && (byte)text[1] == 0x81 + && (byte)text[2] == 0x9f) { + return 3; + } + return 0; +} + +static void +add_text_view (view_t *paragraph_view, psg_text_t *text_object, int suppress) +{ + view_t *text_view = view_new (0, 0, 0, 0, grav_flow); + text_view->data = text_object; + text_view->bol_suppress = suppress; + view_add (paragraph_view, text_view); +} + +VISIBLE passage_t * +Passage_ParseText (const char *text) +{ + passage_t *passage = malloc (sizeof (passage_t)); + passage->text = text; + passage->num_text_objects = 0; + passage->view = view_new (0, 0, 10, 10, grav_northwest); + passage->text_objects = 0; + + if (!*text) { + return passage; + } + + unsigned num_paragraphs = 1; + int parsing_space = Passage_IsSpace (text); + passage->num_text_objects = 1; + for (const char *c = text; *c; c++) { + int size; + if ((size = Passage_IsSpace (c))) { + if (!parsing_space) { + passage->num_text_objects++; + } + parsing_space = 1; + c += size - 1; + } else if (*c == '\n') { + if (c[1]) { + num_paragraphs++; + passage->num_text_objects += !Passage_IsSpace (c + 1); + } + } else { + if (parsing_space) { + passage->num_text_objects++; + } + parsing_space = 0; + } + } +#if 0 + printf ("num_paragraphs %d, num_text_objects %d\n", num_paragraphs, + passage->num_text_objects); +#endif + passage->text_objects = malloc (passage->num_text_objects + * sizeof (psg_text_t)); + for (unsigned i = 0; i < num_paragraphs; i++) { + view_t *view = view_new (0, 0, 10, 10, grav_northwest); + view->flow_size = 1; + view_add (passage->view, view); + } + + num_paragraphs = 0; + parsing_space = Passage_IsSpace (text); + psg_text_t *text_object = passage->text_objects; + text_object->text = 0; + text_object->size = 0; + view_t *paragraph_view = passage->view->children[num_paragraphs++]; + add_text_view (paragraph_view, text_object, parsing_space); + for (const char *c = text; *c; c++) { + int size; + if ((size = Passage_IsSpace (c))) { + if (!parsing_space) { + text_object->size = c - text - text_object->text; + (++text_object)->text = c - text; + add_text_view (paragraph_view, text_object, 1); + } + parsing_space = 1; + c += size - 1; + } else if (*c == '\n') { + text_object->size = c - text - text_object->text; + if (c[1]) { + (++text_object)->text = c + 1 - text; + paragraph_view = passage->view->children[num_paragraphs++]; + add_text_view (paragraph_view, text_object, 0); + parsing_space = Passage_IsSpace (c + 1); + } + } else { + if (parsing_space) { + text_object->size = c - text - text_object->text; + (++text_object)->text = c - text; + add_text_view (paragraph_view, text_object, 0); + } + parsing_space = 0; + if (!c[1]) { + text_object->size = c + 1 - text - text_object->text; + } + } + } +#if 0 + for (int i = 0; i < passage->view->num_children; i++) { + paragraph_view = passage->view->children[i]; + for (int j = 0; j < paragraph_view->num_children; j++) { + view_t *text_view = paragraph_view->children[j]; + psg_text_t *to = text_view->data; + printf ("%3d %3d %d %4d %4d '%.*s'\n", i, j, + text_view->bol_suppress, + to->text, to->size, to->size, text + to->text); + } + } +#endif + return passage; +} + +VISIBLE void +Passage_Delete (passage_t *passage) +{ + if (passage->view) { + view_delete (passage->view); + } + free (passage->text_objects); + free (passage); +} diff --git a/libs/ui/test/Makemodule.am b/libs/ui/test/Makemodule.am index d1475be63..5ac206552 100644 --- a/libs/ui/test/Makemodule.am +++ b/libs/ui/test/Makemodule.am @@ -1,6 +1,7 @@ libs_ui_tests = \ libs/ui/test/test-flow \ libs/ui/test/test-flow-size \ + libs/ui/test/test-passage \ libs/ui/test/test-txtbuffer \ libs/ui/test/test-vrect @@ -16,6 +17,10 @@ libs_ui_test_test_flow_size_SOURCES=libs/ui/test/test-flow-size.c libs_ui_test_test_flow_size_LDADD=libs/ui/libQFui.la libs_ui_test_test_flow_size_DEPENDENCIES=libs/ui/libQFui.la +libs_ui_test_test_passage_SOURCES=libs/ui/test/test-passage.c +libs_ui_test_test_passage_LDADD=libs/ui/libQFui.la +libs_ui_test_test_passage_DEPENDENCIES=libs/ui/libQFui.la + libs_ui_test_test_txtbuffer_SOURCES=libs/ui/test/test-txtbuffer.c libs_ui_test_test_txtbuffer_LDADD=libs/ui/libQFui.la libs_ui_test_test_txtbuffer_DEPENDENCIES=libs/ui/libQFui.la diff --git a/libs/ui/test/test-passage.c b/libs/ui/test/test-passage.c new file mode 100644 index 000000000..4b3fddd98 --- /dev/null +++ b/libs/ui/test/test-passage.c @@ -0,0 +1,124 @@ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif +#include + +#include "QF/ui/view.h" +#include "QF/ui/passage.h" + +static const char test_text[] = { + "Guarding the entrance to the Grendal " + "Gorge is the Shadow Gate, a small keep " + "and monastary which was once the home " + "of the Shadow cult.\n" + " For years the Shadow Gate existed in " + "obscurity but after the cult discovered " + "the \u00c2\u00ec\u00e1\u00e3\u00eb\u2002\u00c7\u00e1\u00f4\u00e5 " + "in the caves below the empire took notice. " + "A batallion of Imperial Knights were " + "sent to the gate to destroy the cult " + "and claim the artifact for the King.\nasdf", +}; + +static int __attribute__((pure)) +check_non_space (const char *text, psg_text_t *to) +{ + int size; + + for (uint32_t offs = 0; offs < to->size; offs += size) { + if (!(size = Passage_IsSpace (text + to->text + offs))) { + return 1; + } + } + return 0; +} + +static int __attribute__((pure)) +check_space_or_nl (const char *text, psg_text_t *to) +{ + for (uint32_t offs = 0; offs < to->size; offs++) { + if (text[to->text + offs] == '\n' + || Passage_IsSpace (text + to->text + offs)) { + return 1; + } + } + return 0; +} + +int +main (void) +{ + int ret = 0; + + passage_t *passage = Passage_ParseText (test_text); + if (passage->view->num_children != 3) { + ret = 1; + printf ("incorrect number of paragraphs: %d\n", + passage->view->num_children); + } + if (passage->num_text_objects != 140) { + ret = 1; + printf ("incorrect number of text objects: %d\n", + passage->num_text_objects); + } + if (passage->view->children[0]->num_children != 49) { + ret = 1; + printf ("incorrect number of text objects in first paragraph: %d\n", + passage->view->children[0]->num_children); + } + if (passage->view->children[1]->num_children != 90) { + ret = 1; + printf ("incorrect number of text objects in second paragraph: %d\n", + passage->view->children[1]->num_children); + } + if (passage->view->children[2]->num_children != 1) { + ret = 1; + printf ("incorrect number of text objects in third paragraph: %d\n", + passage->view->children[1]->num_children); + } + view_t *text_view = passage->view->children[1]->children[0]; + psg_text_t *to = text_view->data; + if (to->size != 2 && (passage->text[to->text] != ' ' + && passage->text[to->text + 1] != ' ')) { + ret = 1; + printf ("second paragram does not begin with double space: %d '%.*s'\n", + to->size, to->size, passage->text + to->text); + } + if (text_view->bol_suppress) { + ret = 1; + printf ("second paragram indent suppressed\n"); + } + for (int i = 0; i < passage->view->num_children; i++) { + view_t *paragraph_view = passage->view->children[i]; + for (int j = 0; j < paragraph_view->num_children; j++) { + view_t *text_view = paragraph_view->children[j]; + psg_text_t *to = text_view->data; + unsigned is_space = Passage_IsSpace (passage->text + to->text); + if (i == 1 && j == 0) { + // second paragraph indent, tested above + continue; + } + if ((!!is_space) != text_view->bol_suppress) { + ret = 1; + printf ("text/suppress mismatch %d [%d '%.*s'] %d %d\n", + text_view->bol_suppress, to->size, to->size, + passage->text + to->text, i, j); + } + if (is_space) { + if (!check_non_space (passage->text, to)) { + continue; + } + } else { + if (!check_space_or_nl (passage->text, to)) { + continue; + } + } + ret = 1; + printf ("mixed space/text/\\n [%d '%.*s'] %d %d\n", + to->size, to->size, passage->text + to->text, i, j); + } + } + Passage_Delete (passage); + + return ret; +}