fteqw/engine/common/translate.c
Shpoike ff1a2299f4 megacommit.
adds qtv relay support.
lots of other misc tweaks.
2024-07-14 19:58:26 +01:00

829 lines
18 KiB
C

#include "quakedef.h"
//#define COLOURMISSINGSTRINGS //for english people to more easily see what's not translatable (text still white)
//#define COLOURUNTRANSLATEDSTRINGS //show empty translations as alt-text versions of the original string
//client may remap messages from the server to a regional bit of text.
//server may remap progs messages
//basic language is english (cos that's what (my version of) Quake uses).
//translate is english->lang
//untranslate is lang->english for console commands.
static void FilterPurge(void);
static void FilterInit(const char *file);
int com_language;
char sys_language[64] = "";
static char langpath[MAX_OSPATH] = "";
struct language_s languages[MAX_LANGUAGES];
static void QDECL TL_LanguageChanged(struct cvar_s *var, char *oldvalue)
{
com_language = TL_FindLanguage(var->string);
}
cvar_t language = CVARAFCD("lang", sys_language, "prvm_language", CVAR_USERINFO|CVAR_NORESET/*otherwise gamedir switches will be annoying*/, TL_LanguageChanged, "This cvar contains the language_dialect code of your language, used to find localisation strings.");
static void Filter_Reload_f(void)
{
// FilterInit(
}
void TranslateInit(void)
{
Cmd_AddCommand("com_reloadfilter", Filter_Reload_f);
Cvar_Register(&language, "Internationalisation");
}
void TL_Shutdown(void)
{
int j;
for (j = 0; j < MAX_LANGUAGES; j++)
{
if (!languages[j].name)
continue;
free(languages[j].name);
languages[j].name = NULL;
PO_Close(languages[j].po);
languages[j].po = NULL;
PO_Close(languages[j].po_qex);
languages[j].po_qex = NULL;
}
FilterPurge();
}
static int TL_LoadLanguage(char *lang)
{
vfsfile_t *f;
int j;
char *u;
for (j = 0; j < MAX_LANGUAGES; j++)
{
if (!languages[j].name)
break;
if (!stricmp(languages[j].name, lang))
return j;
}
//err... oops, ran out of languages...
if (j == MAX_LANGUAGES)
return 0;
if (*lang)
f = FS_OpenVFS(va("%sfteqw.%s.po", langpath, lang), "rb", FS_SYSTEM);
else
f = NULL;
if (!f && *lang)
{
//keep truncating until we can find a name that works
u = strrchr(lang, '_');
if (u)
{
*u = 0;
return TL_LoadLanguage(lang);
}
}
languages[j].name = strdup(lang);
languages[j].po = NULL;
#if !defined(COLOURUNTRANSLATEDSTRINGS) && !defined(COLOURMISSINGSTRINGS)
if (f)
#endif
{
languages[j].po = PO_Create();
PO_Merge(languages[j].po, f);
}
return j;
}
int TL_FindLanguage(const char *lang)
{
char trimname[64];
Q_strncpyz(trimname, lang, sizeof(trimname));
return TL_LoadLanguage(trimname);
}
//need to set up default languages for any early prints before cvars are inited.
void TL_InitLanguages(const char *newlangpath)
{
int i;
char *lang;
if (!newlangpath)
newlangpath = "";
Q_strncpyz(langpath, newlangpath, sizeof(langpath));
//lang can override any environment or system settings.
if ((i = COM_CheckParm("-lang")))
Q_strncpyz(sys_language, com_argv[i+1], sizeof(sys_language));
else
{
lang = NULL;
if (!lang)
lang = getenv("LANGUAGE");
if (!lang)
lang = getenv("LC_ALL");
if (!lang)
lang = getenv("LC_MESSAGES");
if (!lang)
lang = getenv("LANG");
if (!lang)
lang = "";
if (!strcmp(lang, "C") || !strcmp(lang, "POSIX"))
lang = "";
//windows will have already set the locale from the windows settings, so only replace it if its actually valid.
if (*lang)
Q_strncpyz(sys_language, lang, sizeof(sys_language));
}
//clean it up.
//takes the form: [language[_territory][.codeset][@modifier]]
//we don't understand modifiers
lang = strrchr(sys_language, '@');
if (lang)
*lang = 0;
//we don't understand codesets sadly.
lang = strrchr(sys_language, '.');
if (lang)
*lang = 0;
//we also only support the single primary locale (no fallbacks, we're just using the language[+territory])
lang = strrchr(sys_language, ':');
if (lang)
*lang = 0;
//but we do support territories.
com_language = TL_FindLanguage(sys_language);
//make sure a fallback exists, but not as language 0
TL_FindLanguage("");
}
#ifdef HEXEN2
//this stuff is for hexen2 translation strings.
//(hexen2 is uuuuggllyyyy...)
static char *strings_list;
static char **strings_table;
static int strings_count;
static qboolean strings_loaded;
void T_FreeStrings(void)
{ //on map change, following gamedir change
if (strings_loaded)
{
BZ_Free(strings_list);
BZ_Free(strings_table);
strings_count = 0;
strings_loaded = false;
}
}
void T_LoadString(void)
{
int i;
char *s, *s2;
//count new lines
strings_loaded = true;
strings_count = 0;
strings_list = FS_LoadMallocFile("strings.txt", NULL);
if (!strings_list)
return;
for (s = strings_list; *s; s++)
{
if (*s == '\n')
strings_count++;
}
strings_table = BZ_Malloc(sizeof(char*)*strings_count);
s = strings_list;
for (i = 0; i < strings_count; i++)
{
strings_table[i] = s;
s2 = strchr(s, '\n');
if (!s2)
break;
while (s < s2)
{
if (*s == '\r')
*s = '\0';
else if (*s == '^' || *s == '@') //becomes new line
*s = '\n';
s++;
}
s = s2+1;
*s2 = '\0';
}
}
char *T_GetString(int num)
{
if (!strings_loaded)
{
T_LoadString();
}
if (num<0 || num >= strings_count)
return "BAD STRING";
return strings_table[num];
}
#ifdef HAVE_CLIENT
//for hexen2's objectives and stuff.
static char *info_strings_list;
static char **info_strings_table;
static int info_strings_count;
static qboolean info_strings_loaded;
void T_FreeInfoStrings(void)
{ //on map change, following gamedir change
if (info_strings_loaded)
{
BZ_Free(info_strings_list);
BZ_Free(info_strings_table);
info_strings_count = 0;
info_strings_loaded = false;
}
}
void T_LoadInfoString(void)
{
int i;
char *s, *s2;
//count new lines
info_strings_loaded = true;
info_strings_count = 0;
info_strings_list = FS_LoadMallocFile("infolist.txt", NULL);
if (!info_strings_list)
return;
for (s = info_strings_list; *s; s++)
{
if (*s == '\n')
info_strings_count++;
}
info_strings_table = BZ_Malloc(sizeof(char*)*info_strings_count);
s = info_strings_list;
for (i = 0; i < info_strings_count; i++)
{
info_strings_table[i] = s;
s2 = strchr(s, '\n');
if (!s2)
break;
while (s < s2)
{
if (*s == '\r')
*s = '\0';
else if (*s == '^' || *s == '@') //becomes new line
*s = '\n';
s++;
}
s = s2+1;
*s2 = '\0';
}
}
char *T_GetInfoString(int num)
{
if (!info_strings_loaded)
{
T_LoadInfoString();
}
if (num<0 || num >= info_strings_count)
return "BAD STRING";
return info_strings_table[num];
}
#endif
#endif
struct poline_s
{
bucket_t buck;
struct poline_s *next;
char *orig;
char *translated;
};
struct po_s
{
hashtable_t hash;
struct poline_s *lines;
};
static struct poline_s *PO_AddText(struct po_s *po, const char *orig, const char *trans)
{ //input is assumed to be utf-8, but that's not always what quake uses. on the plus side we do have our own silly markup to handle unicode (and colours etc).
size_t olen = strlen(orig)+1;
size_t tlen;
struct poline_s *line;
const char *s;
char temp[64];
//figure out the required length for the encoding we're actually going to use
if (com_parseutf8.ival != 1)
{
tlen = 0;
for (s = trans, tlen = 0; *s; )
{
unsigned int err;
unsigned int chr = utf8_decode(&err, s, &s);
tlen += unicode_encode(temp, chr, sizeof(temp), true);
}
tlen++;
}
else
tlen = strlen(trans)+1;
line = Z_Malloc(sizeof(*line)+olen+tlen);
memcpy(line+1, orig, olen);
orig = (const char*)(line+1);
line->translated = (char*)(line+1)+olen;
if (com_parseutf8.ival != 1)
{
//do the loop again now we know we've got enough space for it.
for (s = trans, tlen = 0; *s; )
{
unsigned int err;
unsigned int chr = utf8_decode(&err, s, &s);
tlen += unicode_encode(line->translated+tlen, chr, sizeof(temp), true);
}
line->translated[tlen] = 0;
}
else
memcpy(line->translated, trans, tlen);
trans = (const char*)(line->translated);
Hash_Add(&po->hash, orig, line, &line->buck);
line->next = po->lines;
po->lines = line;
return line;
}
void PO_Merge(struct po_s *po, vfsfile_t *file)
{
char *instart, *in, *end;
int inlen;
char msgid[32768];
char msgstr[32768];
struct {
quint32_t magic;
quint32_t revision;
quint32_t numstrings;
quint32_t offset_orig;
quint32_t offset_trans;
// quint32_t hashsize;
// quint32_t offset_hash;
} *moheader;
qboolean allowblanks = !!COM_CheckParm("-translatetoblank");
if (!file)
return;
inlen = file?VFS_GETLEN(file):0;
instart = in = BZ_Malloc(inlen+1);
if (file)
VFS_READ(file, in, inlen);
in[inlen] = 0;
if (file)
VFS_CLOSE(file);
moheader = (void*)in;
if (inlen >= sizeof(*moheader) && moheader->magic == 0x950412de)
{
struct
{
quint32_t length;
quint32_t offset;
} *src = (void*)(in+moheader->offset_orig), *dst = (void*)(in+moheader->offset_trans);
quint32_t i;
for (i = moheader->numstrings; i-- > 0; src++, dst++)
PO_AddText(po, in+src->offset, in+dst->offset);
}
else
{
end = in + inlen;
while(in < end)
{
while(*in == ' ' || *in == '\n' || *in == '\r' || *in == '\t')
in++;
if (*in == '#')
{
while (*in && *in != '\n')
in++;
}
else if (!strncmp(in, "msgid", 5) && (in[5] == ' ' || in[5] == '\t' || in[5] == '\r' || in[5] == '\n'))
{
size_t start = 0;
size_t ofs = 0;
in += 5;
while(1)
{
while(*in == ' ' || *in == '\n' || *in == '\r' || *in == '\t')
in++;
if (*in == '\"')
{
in = COM_ParseCString(in, msgid+start, sizeof(msgid) - start, &ofs);
start += ofs;
}
else
break;
}
}
else if (!strncmp(in, "msgstr", 6) && (in[6] == ' ' || in[6] == '\t' || in[6] == '\r' || in[6] == '\n'))
{
size_t start = 0;
size_t ofs = 0;
in += 6;
while(1)
{
while(*in == ' ' || *in == '\n' || *in == '\r' || *in == '\t')
in++;
if (*in == '\"')
{
in = COM_ParseCString(in, msgstr+start, sizeof(msgstr) - start, &ofs);
start += ofs;
}
else
break;
}
if ((*msgid && start) || allowblanks)
PO_AddText(po, msgid, msgstr);
#ifdef COLOURUNTRANSLATEDSTRINGS
else if (!start)
{
char temp[1024];
int i;
Q_snprintfz(temp, sizeof(temp), "%s", *msgstr?msgstr:msgid);
for (i = 0; temp[i]; i++)
{
if (temp[i] == '%')
{
while (temp[i] > ' ')
i++;
}
else if (temp[i] >= ' ')
temp[i] |= 0x80;
}
PO_AddText(po, msgid, temp);
}
#endif
}
else
{
//some sort of junk?
in++;
while (*in && *in != '\n')
in++;
}
}
}
BZ_Free(instart);
}
struct po_s *PO_Create(void)
{
struct po_s *po;
unsigned int buckets = 1024;
po = Z_Malloc(sizeof(*po) + Hash_BytesForBuckets(buckets));
Hash_InitTable(&po->hash, buckets, po+1);
return po;
}
void PO_Close(struct po_s *po)
{
if (!po)
return;
while(po->lines)
{
struct poline_s *r = po->lines;
po->lines = r->next;
Z_Free(r);
}
Z_Free(po);
}
const char *PO_GetText(struct po_s *po, const char *msg)
{
struct poline_s *line;
if (!po || !msg)
return msg;
line = Hash_Get(&po->hash, msg);
#ifdef COLOURMISSINGSTRINGS
if (!line)
{
char temp[1024];
int i;
const char *in = msg;
for (i = 0; *in && i < sizeof(temp)-1; )
{
if (*in == '%')
{ //don't mess up % formatting too much
while (*in > ' ' && i < sizeof(temp)-1)
temp[i++] = *in++;
}
else if (in > ' ' && *in < 128) //otherwise force any ascii chars to the 0xe0XX range so it doesn't use any freetype fonts so its instantly recognisable as bad.
i += utf8_encode(temp+i, *in++|0xe080, sizeof(temp)-1-i);
else
temp[i++] = *in++; //don't mess with any c0/extended codepoints
}
temp[i] = 0;
line = PO_AddText(po, msg, temp);
}
#endif
if (line)
return line->translated;
return msg;
}
static void PO_Merge_Rerelease(struct po_s *po, const char *langname, const char *fmt)
{
//FOO <plat,plat> = "CString"
char line[32768];
char key[256];
char val[32768];
char *s;
vfsfile_t *file = NULL;
if (!file && *langname) //use system locale names
file = FS_OpenVFS(va(fmt, langname), "rb", FS_GAME);
if (!file) //make a guess
{
s = NULL;
if (langname[0] && langname[1] && (!langname[2] || langname[2] == '-' || langname[2] == '_'))
{ //try to map the user's formal locale to the rerelease's arbitrary names (at least from the perspective of anyone who doesn't speak english).
if (!strncmp(langname, "fr", 2))
s = "french";
else if (!strncmp(langname, "de", 2))
s = "german";
else if (!strncmp(langname, "it", 2))
s = "italian";
else if (!strncmp(langname, "ru", 2))
s = "russian";
else if (!strncmp(langname, "es", 2))
s = "spanish";
}
if (s)
file = FS_OpenVFS(va(fmt, s), "rb", FS_GAME);
}
if (!file) //fall back on misnamed american, for lack of a better default.
file = FS_OpenVFS(va(fmt, "english"), "rb", FS_GAME);
if (file)
{
*key = '$';
while(VFS_GETS(file, line, sizeof(line)))
{
s = COM_ParseOut(line, key+1, sizeof(key)-1);
s = COM_ParseOut(s, val, sizeof(val));
if (strcmp(val,"="))
continue;
s = COM_ParseCString(s, val, sizeof(val), NULL);
if (!s)
continue;
PO_AddText(po, key, val);
}
VFS_CLOSE(file);
}
}
const char *TL_Translate(int language, const char *src)
{
if (*src == '$')
{
if (!languages[language].po_qex)
{
char lang[64], *h;
vfsfile_t *f = NULL;
languages[language].po_qex = PO_Create();
PO_Merge_Rerelease(languages[language].po_qex, languages[language].name, "localization/loc_%s.txt");
Q_strncpyz(lang, languages[language].name, sizeof(lang));
while ((h = strchr(lang, '-')))
*h = '_'; //standardise it
if (*lang)
f = FS_OpenVFS(va("localisation/%s.po", lang), "rb", FS_GAME); //long/specific form
if (!f)
{
if ((h = strchr(lang, '_')))
{
*h = 0;
if (*lang)
f = FS_OpenVFS(va("localisation/%s.po", lang), "rb", FS_GAME); //short/general form
}
}
if (f)
PO_Merge(languages[language].po_qex, f);
}
src = PO_GetText(languages[language].po_qex, src);
}
return src;
}
void TL_Reformat(int language, char *out, size_t outsize, size_t numargs, const char **arg)
{
const char *fmt;
const char *a;
size_t alen;
fmt = (numargs>0&&arg[0])?arg[0]:"";
fmt = TL_Translate(language, fmt);
outsize--;
while (outsize > 0)
{
if (!*fmt)
break;
else if (*fmt == '{' && fmt[1] == '{')
*out++ = '{', fmt+=2, outsize--;
else if (*fmt == '}' && fmt[1] == '}')
*out++ = '}', fmt+=2, outsize--;
else if (*fmt == '{')
{
unsigned int index = strtoul(fmt+1, (char**)&fmt, 10)+1;
int size = 0;
if (*fmt == ',')
size = strtol(fmt+1, (char**)&fmt, 10);
if (*fmt == ':')
{ //formatting, which we don't support because its all strings.
fmt = fmt+1;
while (*fmt && *fmt != '}')
fmt++;
}
if (*fmt == '}')
fmt++;
else
break; //some formatting error
if (index >= numargs || !arg[index])
a = "";
else
a = TL_Translate(language, arg[index]);
alen = strlen(a);
if (alen > outsize)
alen = outsize;
if (size > 0)
{ //right aligned
if (alen > size)
alen = size;
memcpy(out, a, alen);
}
else if (size < 0)
{ //left aligned
if (alen > -size)
alen = -size;
memcpy(out, a, alen);
}
else //no alignment, no padding.
memcpy(out, a, alen);
out += alen;
outsize -= alen;
}
else
*out++ = *fmt++, outsize--;
}
*out = 0;
}
#include <ctype.h>
static qbyte *filter[256]; //one list per lead char, simple optimisation instead of some big decision tree.
static qbyte *filtermem;
static int FilterCompareWords(const void *v1, const void *v2)
{
const char *s1 = *(const char*const*)v1;
const char *s2 = *(const char*const*)v2;
return strcmp(s2,s1);
}
static void FilterPurge(void)
{
memset(filter, 0, sizeof(filter));
free(filtermem);
filtermem = NULL;
}
static void FilterInit(const char *file)
{
qbyte *tempmem = malloc(strlen(file)+1);
qbyte *tempmemstart = tempmem;
const char **words;
size_t count = 1, i, l;
size_t bytes;
const char *c;
FilterPurge();
for (c = file; *c; c++)
if (*c == '\n')
count++;
words = malloc(sizeof(qbyte*)*count);
count = 0;
for (c = file; *c; )
{
while (*c == '\n')
c++; //don't add 0-byte strings...
words[count] = tempmem;
for (; *c; c++)
{
if (*c == ' ')
continue; //block even if they omit the spaces.
if (*c == '\n')
break;
*tempmem++ = tolower(*c);
}
*tempmem++ = 0;
count++;
}
qsort(words, count, sizeof(words[0]), FilterCompareWords); //sort by lead byte... and longest first...
i = 0;
for (i = 0, bytes = 0; i < count; i++)
bytes += strlen(words[i]);
bytes += countof(filter);
filtermem = malloc(bytes);
for (l = countof(filter), i = 0; l-- > 0; )
{
if (i < count && words[i][0] == l)
{
filter[l] = filtermem;
while (i < count && *words[i] == l)
{ //second copy... urgh. can forget the first char and replace with a length.
*filtermem++ = strlen(words[i]+1);
memcpy(filtermem, words[i]+1, filtermem[-1]); //just the text, no null needed. tighly packed.
filtermem += filtermem[-1];
i++;
}
*filtermem++ = 0;
}
else
filter[l] = NULL;
}
free(tempmemstart);
free(words);
}
#define whiteish(c) (c == ',' || c == '.' || c == ' ' || c == '\t' || c == '\r' || c == '\n')
char *FilterObsceneString(const qbyte *in, char *outbuf, size_t bufsize)
{ //input must be utf-8... if there's any ^ crap in there then strip it first. no bypassing filters with colour codes.
char *ret = outbuf;
if (strlen(in) >= bufsize)
Sys_Error("output buffer too small!");
restart:
while (*in)
{
qbyte c = tolower(*in);
if (filter[c])
{
qbyte *m = filter[c];
while (*m)
{ //for each word starting with this letter...
const qbyte *test = in+1;
qbyte len = *m;
const qbyte *match = m+1;
m += 1+len;
while (*test)
{ //don't let 'foo bar' through when 'foobar' is a bad word.
if (whiteish(*test))
{
test++;
continue;
}
if (tolower(*test) == *match)
{
test++, match++;
if (--len == 0)
{ //a match.
if (*test && !whiteish(*test))
break; //assassinate!
while (test > in)
{ //censor it.
*outbuf = "#*@$"[(outbuf-ret)&3];
outbuf++;
in++;
}
goto restart; //double breaks suck
}
continue;
}
break;
}
}
}
while (*in)
{
if (whiteish(*in))
{
*outbuf++ = *in++;
break;
}
*outbuf++ = *in++;
}
}
*outbuf++ = 0; //make sure its null terminated.
return ret;
}