/* rtcScanner Copyright (C) 1999 Free Software Foundation, Inc. Author: Stefan Bðhringer (stefan.boehringer@uni-bochum.de) Date: Dec 1999 This file is part of the GNUstep GUI Library. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include #include #include #include "rtfScanner.h" #include "rtfGrammer.tab.h" // <§> scanner types and helpers #define CArraySize(a) (sizeof(a)/sizeof((a)[0])-1) typedef struct { char *bf; int length, position, chunkSize; } DynamicString; typedef struct { const char *string; int token; } LexKeyword; GSLexError initDynamicString(DynamicString *string) { string->length = 0, string->position = 0, string->chunkSize = 128; string->bf = calloc(1, string->length = string->chunkSize); if (!string->bf) return LEXoutOfMemory; return NoError; } GSLexError appendChar(DynamicString *string, int c) { if (string->position == string->length) { if (!(string->bf = realloc(string->bf, string->length += string->chunkSize))) return LEXoutOfMemory; else string->chunkSize <<= 1; } string->bf[string->position++] = c; return NoError; } void lexInitContext(RTFscannerCtxt *lctxt, void *customContext, int (*getcharFunction)(void *)) { lctxt->streamLineNumber = 1; lctxt->streamPosition = lctxt->pushbackCount = 0; lctxt->lgetchar = getcharFunction; lctxt->customContext = customContext; } int lexGetchar(RTFscannerCtxt *lctxt) { int c; if (lctxt->pushbackCount) { lctxt->pushbackCount--; c = lctxt->pushbackBuffer[lctxt->pushbackCount]; } else { lctxt->streamPosition++; c = lctxt->lgetchar(lctxt->customContext); } if (c == '\n') lctxt->streamLineNumber++; return c; } void lexUngetchar(RTFscannerCtxt *lctxt, int c) { if (c == '\n') lctxt->streamLineNumber--; lctxt->pushbackBuffer[lctxt->pushbackCount++] = c; // no checking here } int lexStreamPosition(RTFscannerCtxt *lctxt) { return lctxt->streamPosition - lctxt->pushbackCount; } char *my_strdup(const char *str) { char *copy = str? malloc(strlen(str) + 1): 0; return !copy? 0: strcpy(copy, str); } int findStringFromKeywordArray(const char *string, const LexKeyword *array, int arrayCount) { int min, max, mid, cmp; const LexKeyword *currentKeyword; for (min=0, max=arrayCount; min<=max; ) { mid = (min+max)>>1; currentKeyword = array + mid; if (!(cmp = strcmp(string, currentKeyword->string))) { return currentKeyword->token; } else if (cmp>0) min=mid+1; else max=mid-1; } return 0; // couldn't find } // end <§> scanner types and helpers // <§> core scanner functions #define token(a) (a) // must be sorted LexKeyword RTFcommands[]={ "ansi", token(RTFansi), "b", token(RTFbold), "blue", token(RTFblue), "bullet", token(RTFbullet), "cb", token(RTFcolorbg), "cell", token(RTFcell), "cf", token(RTFcolorfg), "colortbl", token(RTFcolortable), "cpg", token(RTFcpg), "dn", token(RTFsubscript), "emdash", token(RTFemdash), "emspace", token(RTFemspace), "endash", token(RTFendash), "enspace", token(RTFenspace), "f", token(RTFfont), "fcharset", token(RTFfcharset), "fdecor", token(RTFfamilyDecor), "fi", token(RTFfirstLineIndent), "fmodern", token(RTFfamilyModern), "fnil", token(RTFfamilyNil), "fonttbl", token(RTFfontListStart), /* All footers are mapped on one entry */ "footer", token(RTFfooter), "footerf", token(RTFfooter), "footerl", token(RTFfooter), "footerr", token(RTFfooter), "footnote", token(RTFfootnote), "fprq", token(RTFfprq), "froman", token(RTFfamilyRoman), "fs", token(RTFfontSize), "fscript", token(RTFfamilyScript), "fswiss", token(RTFfamilySwiss), "ftech", token(RTFfamilyTech), "green", token(RTFgreen), /* All headers are mapped on one entry */ "header", token(RTFheader), "headerf", token(RTFheader), "headerl", token(RTFheader), "headerr", token(RTFheader), "i", token(RTFitalic), "info", token(RTFinfo), "ldblquote", token(RTFldblquote), "li", token(RTFleftIndent), "lquote", token(RTFlquote), "mac", token(RTFmac), "margb", token(RTFmarginButtom), "margl", token(RTFmarginLeft), "margr", token(RTFmarginRight), "margt", token(RTFmarginTop), "paperh", token(RTFpaperHeight), "paperw", token(RTFpaperWidth), "par", token(RTFparagraph), "pard", token(RTFdefaultParagraph), "pc", token(RTFpc), "pca", token(RTFpca), "pict", token(RTFpict), "plain", token(RTFplain), "qc", token(RTFalignCenter), "qj", token(RTFalignJustified), "ql", token(RTFalignLeft), "qr", token(RTFalignRight), "rdblquote", token(RTFrdblquote), "red", token(RTFred), "ri", token(RTFrightIndent), "row", token(RTFrow), "rquote", token(RTFrquote), "rtf", token(RTFstart), "s", token(RTFstyle), "sa", token(RTFspaceAbove), "sl", token(RTFlineSpace), "stylesheet", token(RTFstylesheet), "tab", token(RTFtabulator), "tx", token(RTFtabstop), /* All underline are mapped on one entry */ "ul", token(RTFunderline), "uld", token(RTFunderline), "uldb", token(RTFunderline), "ulnone", token(RTFunderlineStop), "ulw", token(RTFunderline), "up", token(RTFsuperscript) }; BOOL probeCommand(RTFscannerCtxt *lctxt) { int c = lexGetchar(lctxt); lexUngetchar(lctxt, c); if (isalpha(c)) return YES; return NO; } // According to spec a cmdLength of 32 is respected #define RTFMaxCmdLength 32 #define RTFMaxArgumentLength 64 GSLexError readCommand(RTFscannerCtxt *lctxt, YYSTYPE *lvalp, int *token) // the '\\' is already read { char cmdNameBf[RTFMaxCmdLength+1], *cmdName = cmdNameBf; char argumentBf[RTFMaxArgumentLength+1], *argument = argumentBf; int c, foundToken; lvalp->cmd.name = 0; // initialize while (isalpha( c = lexGetchar(lctxt) )) { *cmdName++ = c; if (cmdName >= cmdNameBf + RTFMaxCmdLength) return LEXsyntaxError; } *cmdName = 0; if (!(foundToken = findStringFromKeywordArray(cmdNameBf, RTFcommands, CArraySize(RTFcommands)))) { if (!(lvalp->cmd.name = my_strdup(cmdNameBf))) return LEXoutOfMemory; *token = RTFOtherStatement; } else { *token = foundToken; } if (c == ' ') // this is an empty argument { lvalp->cmd.isEmpty = YES; } else if (isdigit(c) || c == '-') // we've found a numerical argument { do { *argument++ = c; if (argument >= argumentBf + RTFMaxArgumentLength) return LEXsyntaxError; } while (isdigit(c = lexGetchar(lctxt))); *argument = 0; if (c != ' ') lexUngetchar(lctxt, c); // ungetc non-digit // the consumption of the space seems necessary on NeXT but // is not according to spec lvalp->cmd.isEmpty = NO; lvalp->cmd.parameter = atoi(argumentBf); } else { lvalp->cmd.isEmpty = YES; lexUngetchar(lctxt, c); // ungetc non-whitespace delimiter } return NoError; } GSLexError readText(RTFscannerCtxt *lctxt, YYSTYPE *lvalp) { int c; DynamicString text; GSLexError error; if ((error = initDynamicString(&text))) return error; for (;;) { c = lexGetchar(lctxt); if (c == EOF || c == '{' || c == '}' || c == '\\') { lexUngetchar(lctxt, c); break; } else { if (c != '\n' && c != '\r') // newline and cr are ignored if not quoted appendChar(&text, c); } } appendChar(&text, 0); lvalp->text = text.bf; // release is up to the consumer return NoError; } // read in a character as two hex digit static int gethex(RTFscannerCtxt *lctxt) { int c = 0; int i; for (i = 0; i < 2; i++) { int c1 = lexGetchar(lctxt); if (!isxdigit(c1)) { lexUngetchar(lctxt, c1); break; } else { c = c * 16; if (isdigit(c1)) c += c1 - '0'; else if (isupper(c1)) c += c1 - 'A' + 10; else c += c1 - 'a' + 10; } } return c; } int GSRTFlex(YYSTYPE *lvalp, YYLTYPE *llocp, RTFscannerCtxt *lctxt) /* provide value and position in the params */ { int c; int token = 0; char *cv; do c = lexGetchar(lctxt); while ( c == '\n' || c == '\r' ); // the listed characters are to be ignored switch (c) { case EOF: token = 0; break; case '{': token = '{'; break; case '}': token = '}'; break; case '\\': if (probeCommand(lctxt) == YES) { readCommand(lctxt, lvalp, &token); switch (token) { case RTFtabulator: c = '\t'; break; case RTFcell: c = '\t'; break; case RTFemdash: c = '-'; break; case RTFendash: c = '-'; break; case RTFbullet: c = '*'; break; case RTFlquote: c = '`'; break; case RTFrquote: c = '\''; break; case RTFldblquote: c = '"'; break; case RTFrdblquote: c = '"'; break; default: return token; } } else { c = lexGetchar(lctxt); switch (c) { case EOF: token = 0; return token; case '\'': // Convert the next two hex digits into a char c = gethex(lctxt); break; case '*': return RTFignore; case '|': case '-': case ':': // Ignore these characters c = lexGetchar(lctxt); break; case '_': c = '-'; break; case '~': c = ' '; break; case '\n': case '\r': return RTFparagraph; case '{': case '}': case '\\': // release is up to the consumer cv = calloc(1, 2); cv[0] = c; cv[1] = '\0'; lvalp->text = cv; token = RTFtext; return token; default: // fall through } } // else fall through to default: read text // no break default: lexUngetchar(lctxt, c); readText(lctxt, lvalp); token = RTFtext; break; } //*llocp = lctxt->position(); return token; }