[ui] Add a sub-system for parsing text passages

A passage object has a list of all the text objects in the given string,
where the objects represent either white space or "words", as well as a
view_t object representing the entire passage, with paragraphs split
into child views of the passage view, and each paragraph has a child
view for every text/space object in the paragraph.

Paragraphs are split by '\n' (not included in any object).

White space is grouped into clumps such that multiple adjacent spaces
form a single object. The standard ASCII space (0x20) and all of the
Unicode characters marked "WS;<compat> 0020" are counted as white space.
Unless a white space object is the first in the paragraph, its view is
marked for suppression by the view flow code.

Contiguous non-white space characters are grouped into single objects,
and their views are not suppressed.

All text object views (both white space and "word") have their data
pointer set to the psg_text_t object representing the text for that
view. This should be suitable for simple text-mode unattributed display.
More advanced rendering would probably want to create suitable objects
and set the view data pointers to those objects.

No assumption is made about text direction.

Passage and paragraph views need to have their primary axis sizes set
appropriately, as well as their resize flags. Their xlen and ylen are
both set to 10, and xpos,ypos is 0,0. Paragraph views need their
setgeometry pointer set to the appropriate view_flow_* function.
However, they are set up to have their secondary axis set automatically
when flowed.

Text object views are set up for automatic flowing: grav_flow, 0,0 for
xpos,ypos. However, xlen and ylen are also both 0, so need to be set by
the renderer before attempting to flow the text.
This commit is contained in:
Bill Currie 2022-09-30 19:05:18 +09:00
parent a24fb0ff6a
commit 9798400cfb
5 changed files with 377 additions and 0 deletions

58
include/QF/ui/passage.h Normal file
View file

@ -0,0 +1,58 @@
/*
passage.h
Text passage formatting.
Copyright (C) 2022 Bill Currie <bill@taniwha.org>
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to:
Free Software Foundation, Inc.
59 Temple Place - Suite 330
Boston, MA 02111-1307, USA
*/
#ifndef __QF_ui_passage_h
#define __QF_ui_passage_h
#include <stdint.h>
/** \defgroup passage Text passages
\ingroup utils
*/
///@{
typedef struct psg_text_s {
/// beginning of text for this segment relative to passage_t.text
uint32_t text;
/// length of text segment in bytes rather than chars as text may be utf-8
uint32_t size;
} psg_text_t;
typedef struct passage_s {
const char *text; ///< Not owned by passage
struct view_s *view; ///< hierarchy of views representing passage
psg_text_t *text_objects; ///< all text objects in passage
unsigned num_text_objects; ///< number of text objects in passage
} passage_t;
passage_t *Passage_ParseText (const char *text);
void Passage_Delete (passage_t *passage);
int Passage_IsSpace (const char *text) __attribute__((pure));
///@}
#endif//__QF_ui_passage_h

View file

@ -9,6 +9,7 @@ libs_ui_libQFui_la_LIBADD= $(ui_deps)
libs_ui_libQFui_la_DEPENDENCIES= $(ui_deps)
libs_ui_libQFui_la_SOURCES= \
libs/ui/inputline.c \
libs/ui/passage.c \
libs/ui/txtbuffer.c \
libs/ui/view.c \
libs/ui/vrect.c

189
libs/ui/passage.c Normal file
View file

@ -0,0 +1,189 @@
/*
passage.c
Text passage formatting.
Copyright (C) 2022 Bill Currie <bill@taniwha.org>
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to:
Free Software Foundation, Inc.
59 Temple Place - Suite 330
Boston, MA 02111-1307, USA
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#ifdef HAVE_STRING_H
# include <string.h>
#endif
#ifdef HAVE_STRINGS_H
# include <strings.h>
#endif
#include "QF/alloc.h"
#include "QF/qtypes.h"
#include "QF/sys.h"
#include "QF/ui/view.h"
#include "QF/ui/passage.h"
VISIBLE int
Passage_IsSpace (const char *text)
{
if (text[0] == ' ') {
return 1;
}
// 2002;EN SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
// 2003;EM SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
// 2004;THREE-PER-EM SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
// 2005;FOUR-PER-EM SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
// 2006;SIX-PER-EM SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
// 2008;PUNCTUATION SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
// 2009;THIN SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
// 200A;HAIR SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
if ((byte)text[0] == 0xe2 && (byte)text[1] == 0x80
&& ((byte)text[2] >= 0x80 && (byte)text[2] < 0x90
&& ((1 << (text[2] & 0xf)) & 0x077c))) {
return 3;
}
// 205F;MEDIUM MATHEMATICAL SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
if ((byte)text[0] == 0xe2 && (byte)text[1] == 0x81
&& (byte)text[2] == 0x9f) {
return 3;
}
return 0;
}
static void
add_text_view (view_t *paragraph_view, psg_text_t *text_object, int suppress)
{
view_t *text_view = view_new (0, 0, 0, 0, grav_flow);
text_view->data = text_object;
text_view->bol_suppress = suppress;
view_add (paragraph_view, text_view);
}
VISIBLE passage_t *
Passage_ParseText (const char *text)
{
passage_t *passage = malloc (sizeof (passage_t));
passage->text = text;
passage->num_text_objects = 0;
passage->view = view_new (0, 0, 10, 10, grav_northwest);
passage->text_objects = 0;
if (!*text) {
return passage;
}
unsigned num_paragraphs = 1;
int parsing_space = Passage_IsSpace (text);
passage->num_text_objects = 1;
for (const char *c = text; *c; c++) {
int size;
if ((size = Passage_IsSpace (c))) {
if (!parsing_space) {
passage->num_text_objects++;
}
parsing_space = 1;
c += size - 1;
} else if (*c == '\n') {
if (c[1]) {
num_paragraphs++;
passage->num_text_objects += !Passage_IsSpace (c + 1);
}
} else {
if (parsing_space) {
passage->num_text_objects++;
}
parsing_space = 0;
}
}
#if 0
printf ("num_paragraphs %d, num_text_objects %d\n", num_paragraphs,
passage->num_text_objects);
#endif
passage->text_objects = malloc (passage->num_text_objects
* sizeof (psg_text_t));
for (unsigned i = 0; i < num_paragraphs; i++) {
view_t *view = view_new (0, 0, 10, 10, grav_northwest);
view->flow_size = 1;
view_add (passage->view, view);
}
num_paragraphs = 0;
parsing_space = Passage_IsSpace (text);
psg_text_t *text_object = passage->text_objects;
text_object->text = 0;
text_object->size = 0;
view_t *paragraph_view = passage->view->children[num_paragraphs++];
add_text_view (paragraph_view, text_object, parsing_space);
for (const char *c = text; *c; c++) {
int size;
if ((size = Passage_IsSpace (c))) {
if (!parsing_space) {
text_object->size = c - text - text_object->text;
(++text_object)->text = c - text;
add_text_view (paragraph_view, text_object, 1);
}
parsing_space = 1;
c += size - 1;
} else if (*c == '\n') {
text_object->size = c - text - text_object->text;
if (c[1]) {
(++text_object)->text = c + 1 - text;
paragraph_view = passage->view->children[num_paragraphs++];
add_text_view (paragraph_view, text_object, 0);
parsing_space = Passage_IsSpace (c + 1);
}
} else {
if (parsing_space) {
text_object->size = c - text - text_object->text;
(++text_object)->text = c - text;
add_text_view (paragraph_view, text_object, 0);
}
parsing_space = 0;
if (!c[1]) {
text_object->size = c + 1 - text - text_object->text;
}
}
}
#if 0
for (int i = 0; i < passage->view->num_children; i++) {
paragraph_view = passage->view->children[i];
for (int j = 0; j < paragraph_view->num_children; j++) {
view_t *text_view = paragraph_view->children[j];
psg_text_t *to = text_view->data;
printf ("%3d %3d %d %4d %4d '%.*s'\n", i, j,
text_view->bol_suppress,
to->text, to->size, to->size, text + to->text);
}
}
#endif
return passage;
}
VISIBLE void
Passage_Delete (passage_t *passage)
{
if (passage->view) {
view_delete (passage->view);
}
free (passage->text_objects);
free (passage);
}

View file

@ -1,6 +1,7 @@
libs_ui_tests = \
libs/ui/test/test-flow \
libs/ui/test/test-flow-size \
libs/ui/test/test-passage \
libs/ui/test/test-txtbuffer \
libs/ui/test/test-vrect
@ -16,6 +17,10 @@ libs_ui_test_test_flow_size_SOURCES=libs/ui/test/test-flow-size.c
libs_ui_test_test_flow_size_LDADD=libs/ui/libQFui.la
libs_ui_test_test_flow_size_DEPENDENCIES=libs/ui/libQFui.la
libs_ui_test_test_passage_SOURCES=libs/ui/test/test-passage.c
libs_ui_test_test_passage_LDADD=libs/ui/libQFui.la
libs_ui_test_test_passage_DEPENDENCIES=libs/ui/libQFui.la
libs_ui_test_test_txtbuffer_SOURCES=libs/ui/test/test-txtbuffer.c
libs_ui_test_test_txtbuffer_LDADD=libs/ui/libQFui.la
libs_ui_test_test_txtbuffer_DEPENDENCIES=libs/ui/libQFui.la

124
libs/ui/test/test-passage.c Normal file
View file

@ -0,0 +1,124 @@
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <stdio.h>
#include "QF/ui/view.h"
#include "QF/ui/passage.h"
static const char test_text[] = {
"Guarding the entrance to the Grendal "
"Gorge is the Shadow Gate, a small keep "
"and monastary which was once the home "
"of the Shadow cult.\n"
" For years the Shadow Gate existed in "
"obscurity but after the cult discovered "
"the \u00c2\u00ec\u00e1\u00e3\u00eb\u2002\u00c7\u00e1\u00f4\u00e5 "
"in the caves below the empire took notice. "
"A batallion of Imperial Knights were "
"sent to the gate to destroy the cult "
"and claim the artifact for the King.\nasdf",
};
static int __attribute__((pure))
check_non_space (const char *text, psg_text_t *to)
{
int size;
for (uint32_t offs = 0; offs < to->size; offs += size) {
if (!(size = Passage_IsSpace (text + to->text + offs))) {
return 1;
}
}
return 0;
}
static int __attribute__((pure))
check_space_or_nl (const char *text, psg_text_t *to)
{
for (uint32_t offs = 0; offs < to->size; offs++) {
if (text[to->text + offs] == '\n'
|| Passage_IsSpace (text + to->text + offs)) {
return 1;
}
}
return 0;
}
int
main (void)
{
int ret = 0;
passage_t *passage = Passage_ParseText (test_text);
if (passage->view->num_children != 3) {
ret = 1;
printf ("incorrect number of paragraphs: %d\n",
passage->view->num_children);
}
if (passage->num_text_objects != 140) {
ret = 1;
printf ("incorrect number of text objects: %d\n",
passage->num_text_objects);
}
if (passage->view->children[0]->num_children != 49) {
ret = 1;
printf ("incorrect number of text objects in first paragraph: %d\n",
passage->view->children[0]->num_children);
}
if (passage->view->children[1]->num_children != 90) {
ret = 1;
printf ("incorrect number of text objects in second paragraph: %d\n",
passage->view->children[1]->num_children);
}
if (passage->view->children[2]->num_children != 1) {
ret = 1;
printf ("incorrect number of text objects in third paragraph: %d\n",
passage->view->children[1]->num_children);
}
view_t *text_view = passage->view->children[1]->children[0];
psg_text_t *to = text_view->data;
if (to->size != 2 && (passage->text[to->text] != ' '
&& passage->text[to->text + 1] != ' ')) {
ret = 1;
printf ("second paragram does not begin with double space: %d '%.*s'\n",
to->size, to->size, passage->text + to->text);
}
if (text_view->bol_suppress) {
ret = 1;
printf ("second paragram indent suppressed\n");
}
for (int i = 0; i < passage->view->num_children; i++) {
view_t *paragraph_view = passage->view->children[i];
for (int j = 0; j < paragraph_view->num_children; j++) {
view_t *text_view = paragraph_view->children[j];
psg_text_t *to = text_view->data;
unsigned is_space = Passage_IsSpace (passage->text + to->text);
if (i == 1 && j == 0) {
// second paragraph indent, tested above
continue;
}
if ((!!is_space) != text_view->bol_suppress) {
ret = 1;
printf ("text/suppress mismatch %d [%d '%.*s'] %d %d\n",
text_view->bol_suppress, to->size, to->size,
passage->text + to->text, i, j);
}
if (is_space) {
if (!check_non_space (passage->text, to)) {
continue;
}
} else {
if (!check_space_or_nl (passage->text, to)) {
continue;
}
}
ret = 1;
printf ("mixed space/text/\\n [%d '%.*s'] %d %d\n",
to->size, to->size, passage->text + to->text, i, j);
}
}
Passage_Delete (passage);
return ret;
}