etlegacy-libs/tinygettext/.svn/text-base/language.cpp.svn-base

569 lines
21 KiB
Text
Raw Normal View History

2013-03-31 22:09:49 +00:00
// tinygettext - A gettext replacement that works directly on .po files
// Copyright (C) 2006 Ingo Ruhnke <grumbel@gmx.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#include "language.hpp"
#include <map>
#include <assert.h>
#include <vector>
namespace tinygettext {
struct LanguageSpec {
/** Language code: "de", "en", ... */
const char* language;
/** Country code: "BR", "DE", ..., can be 0 */
const char* country;
/** Modifier/Varint: "Latn", "ije", "latin"..., can be 0 */
const char* modifier;
/** Language name: "German", "English", "French", ... */
const char* name;
};
/** Language Definitions */
//*{
LanguageSpec languages[] = {
{ "aa", 0, 0, "Afar" },
{ "af", 0, 0, "Afrikaans" },
{ "af", "ZA", 0, "Afrikaans (South Africa)" },
{ "am", 0, 0, "Amharic" },
{ "ar", 0, 0, "Arabic" },
{ "ar", "AR", 0, "Arabic (Argentina)" },
{ "ar", "OM", 0, "Arabic (Oman)" },
{ "ar", "SA", 0, "Arabic (Saudi Arabia)" },
{ "ar", "SY", 0, "Arabic (Syrian Arab Republic)" },
{ "ar", "TN", 0, "Arabic (Tunisia)" },
{ "as", 0, 0, "Assamese" },
{ "ast",0, 0, "Asturian" },
{ "ay", 0, 0, "Aymara" },
{ "az", 0, 0, "Azerbaijani" },
{ "az", "IR", 0, "Azerbaijani (Iran)" },
{ "be", 0, 0, "Belarusian" },
{ "be", 0, "latin", "Belarusian" },
{ "bg", 0, 0, "Bulgarian" },
{ "bg", "BG", 0, "Bulgarian (Bulgaria)" },
{ "bn", 0, 0, "Bengali" },
{ "bn", "BD", 0, "Bengali (Bangladesh)" },
{ "bn", "IN", 0, "Bengali (India)" },
{ "bo", 0, 0, "Tibetan" },
{ "br", 0, 0, "Breton" },
{ "bs", 0, 0, "Bosnian" },
{ "bs", "BA", 0, "Bosnian (Bosnia/Herzegovina)"},
{ "bs", "BS", 0, "Bosnian (Bahamas)" },
{ "ca", "ES", "valencia", "Catalan (valencia)" },
{ "ca", "ES", 0, "Catalan (Spain)" },
{ "ca", 0, "valencia", "Catalan (valencia)" },
{ "ca", 0, 0, "Catalan" },
{ "co", 0, 0, "Corsican" },
{ "cs", 0, 0, "Czech" },
{ "cs", "CZ", 0, "Czech (Czech Republic)" },
{ "cy", 0, 0, "Welsh" },
{ "cy", "GB", 0, "Welsh (Great Britain)" },
{ "cz", 0, 0, "Unknown language" },
{ "da", 0, 0, "Danish" },
{ "da", "DK", 0, "Danish (Denmark)" },
{ "de", 0, 0, "German" },
{ "de", "AT", 0, "German (Austria)" },
{ "de", "CH", 0, "German (Switzerland)" },
{ "de", "DE", 0, "German (Germany)" },
{ "dk", 0, 0, "Unknown language" },
{ "dz", 0, 0, "Dzongkha" },
{ "el", 0, 0, "Greek" },
{ "el", "GR", 0, "Greek (Greece)" },
{ "en", 0, 0, "English" },
{ "en", "AU", 0, "English (Australia)" },
{ "en", "CA", 0, "English (Canada)" },
{ "en", "GB", 0, "English (Great Britain)" },
{ "en", "US", 0, "English (United States)" },
{ "en", "ZA", 0, "English (South Africa)" },
{ "en", 0, "boldquot", "English" },
{ "en", 0, "quot", "English" },
{ "en", "US", "piglatin", "English" },
{ "eo", 0, 0, "Esperanto" },
{ "es", 0, 0, "Spanish" },
{ "es", "AR", 0, "Spanish (Argentina)" },
{ "es", "CL", 0, "Spanish (Chile)" },
{ "es", "CO", 0, "Spanish (Colombia)" },
{ "es", "CR", 0, "Spanish (Costa Rica)" },
{ "es", "DO", 0, "Spanish (Dominican Republic)"},
{ "es", "EC", 0, "Spanish (Ecuador)" },
{ "es", "ES", 0, "Spanish (Spain)" },
{ "es", "GT", 0, "Spanish (Guatemala)" },
{ "es", "HN", 0, "Spanish (Honduras)" },
{ "es", "LA", 0, "Spanish (Laos)" },
{ "es", "MX", 0, "Spanish (Mexico)" },
{ "es", "NI", 0, "Spanish (Nicaragua)" },
{ "es", "PA", 0, "Spanish (Panama)" },
{ "es", "PE", 0, "Spanish (Peru)" },
{ "es", "PR", 0, "Spanish (Puerto Rico)" },
{ "es", "SV", 0, "Spanish (El Salvador)" },
{ "es", "UY", 0, "Spanish (Uruguay)" },
{ "es", "VE", 0, "Spanish (Venezuela)" },
{ "et", 0, 0, "Estonian" },
{ "et", "EE", 0, "Estonian (Estonia)" },
{ "et", "ET", 0, "Estonian (Ethiopia)" },
{ "eu", 0, 0, "Basque" },
{ "eu", "ES", 0, "Basque (Spain)" },
{ "fa", 0, 0, "Persian" },
{ "fa", "AF", 0, "Persian (Afghanistan)" },
{ "fa", "IR", 0, "Persian (Iran)" },
{ "fi", 0, 0, "Finnish" },
{ "fi", "FI", 0, "Finnish (Finland)" },
{ "fo", 0, 0, "Faroese" },
{ "fo", "FO", 0, "Faeroese (Faroe Islands)" },
{ "fr", 0, 0, "French" },
{ "fr", "CA", 0, "French (Canada)" },
{ "fr", "CH", 0, "French (Switzerland)" },
{ "fr", "FR", 0, "French (France)" },
{ "fr", "LU", 0, "French (Luxembourg)" },
{ "fy", 0, 0, "Frisian" },
{ "ga", 0, 0, "Irish" },
{ "gd", 0, 0, "Gaelic Scots" },
{ "gl", 0, 0, "Galician" },
{ "gl", "ES", 0, "Galician (Spain)" },
{ "gn", 0, 0, "Guarani" },
{ "gu", 0, 0, "Gujarati" },
{ "gv", 0, 0, "Manx" },
{ "ha", 0, 0, "Hausa" },
{ "he", 0, 0, "Hebrew" },
{ "he", "IL", 0, "Hebrew (Israel)" },
{ "hi", 0, 0, "Hindi" },
{ "hr", 0, 0, "Croatian" },
{ "hr", "HR", 0, "Croatian (Croatia)" },
{ "hu", 0, 0, "Hungarian" },
{ "hu", "HU", 0, "Hungarian (Hungary)" },
{ "hy", 0, 0, "Armenian" },
{ "ia", 0, 0, "Interlingua" },
{ "id", 0, 0, "Indonesian" },
{ "id", "ID", 0, "Indonesian (Indonesia)" },
{ "is", 0, 0, "Icelandic" },
{ "is", "IS", 0, "Icelandic (Iceland)" },
{ "it", 0, 0, "Italian" },
{ "it", "CH", 0, "Italian (Switzerland)" },
{ "it", "IT", 0, "Italian (Italy)" },
{ "iu", 0, 0, "Inuktitut" },
{ "ja", 0, 0, "Japanese" },
{ "ja", "JP", 0, "Japanese (Japan)" },
{ "ka", 0, 0, "Georgian" },
{ "kk", 0, 0, "Kazakh" },
{ "kl", 0, 0, "Kalaallisut" },
{ "km", 0, 0, "Khmer" },
{ "km", "KH", 0, "Khmer (Cambodia)" },
{ "kn", 0, 0, "Kannada" },
{ "ko", 0, 0, "Korean" },
{ "ko", "KR", 0, "Korean (Korea)" },
{ "ku", 0, 0, "Kurdish" },
{ "kw", 0, 0, "Cornish" },
{ "ky", 0, 0, "Kirghiz" },
{ "la", 0, 0, "Latin" },
{ "lo", 0, 0, "Lao" },
{ "lt", 0, 0, "Lithuanian" },
{ "lt", "LT", 0, "Lithuanian (Lithuania)" },
{ "lv", 0, 0, "Latvian" },
{ "lv", "LV", 0, "Latvian (Latvia)" },
{ "mg", 0, 0, "Malagasy" },
{ "mi", 0, 0, "Maori" },
{ "mk", 0, 0, "Macedonian" },
{ "mk", "MK", 0, "Macedonian (Macedonia)" },
{ "ml", 0, 0, "Malayalam" },
{ "mn", 0, 0, "Mongolian" },
{ "mr", 0, 0, "Marathi" },
{ "ms", 0, 0, "Malay" },
{ "ms", "MY", 0, "Malay (Malaysia)" },
{ "mt", 0, 0, "Maltese" },
{ "my", 0, 0, "Burmese" },
{ "my", "MM", 0, "Burmese (Myanmar)" },
{ "nb", 0, 0, "Norwegian Bokmal" },
{ "nb", "NO", 0, "Norwegian Bokm<6B>l (Norway)" },
{ "ne", 0, 0, "Nepali" },
{ "nl", 0, 0, "Dutch" },
{ "nl", "BE", 0, "Dutch (Belgium)" },
{ "nl", "NL", 0, "Dutch (Netherlands)" },
{ "nn", 0, 0, "Norwegian Nynorsk" },
{ "nn", "NO", 0, "Norwegian Nynorsk (Norway)" },
{ "no", 0, 0, "Norwegian" },
{ "no", "NO", 0, "Norwegian (Norway)" },
{ "no", "NY", 0, "Norwegian (NY)" },
{ "nr", 0, 0, "Ndebele, South" },
{ "oc", 0, 0, "Occitan post 1500" },
{ "om", 0, 0, "Oromo" },
{ "or", 0, 0, "Oriya" },
{ "pa", 0, 0, "Punjabi" },
{ "pl", 0, 0, "Polish" },
{ "pl", "PL", 0, "Polish (Poland)" },
{ "ps", 0, 0, "Pashto" },
{ "pt", 0, 0, "Portuguese" },
{ "pt", "BR", 0, "Brazilian" },
{ "pt", "PT", 0, "Portuguese (Portugal)" },
{ "qu", 0, 0, "Quechua" },
{ "rm", 0, 0, "Rhaeto-Romance" },
{ "ro", 0, 0, "Romanian" },
{ "ro", "RO", 0, "Romanian (Romania)" },
{ "ru", 0, 0, "Russian" },
{ "ru", "RU", 0, "Russian (Russia" },
{ "rw", 0, 0, "Kinyarwanda" },
{ "sa", 0, 0, "Sanskrit" },
{ "sd", 0, 0, "Sindhi" },
{ "se", 0, 0, "Sami" },
{ "se", "NO", 0, "Sami (Norway)" },
{ "si", 0, 0, "Sinhalese" },
{ "sk", 0, 0, "Slovak" },
{ "sk", "SK", 0, "Slovak (Slovakia)" },
{ "sl", 0, 0, "Slovenian" },
{ "sl", "SI", 0, "Slovenian (Slovenia)" },
{ "sl", "SL", 0, "Slovenian (Sierra Leone)" },
{ "sm", 0, 0, "Samoan" },
{ "so", 0, 0, "Somali" },
{ "sp", 0, 0, "Unknown language" },
{ "sq", 0, 0, "Albanian" },
{ "sq", "AL", 0, "Albanian (Albania)" },
{ "sr", 0, 0, "Serbian" },
{ "sr", "YU", 0, "Serbian (Yugoslavia)" },
{ "sr", 0,"ije", "Serbian" },
{ "sr", 0, "latin", "Serbian" },
{ "sr", 0, "Latn", "Serbian" },
{ "ss", 0, 0, "Swati" },
{ "st", 0, 0, "Sotho" },
{ "sv", 0, 0, "Swedish" },
{ "sv", "SE", 0, "Swedish (Sweden)" },
{ "sv", "SV", 0, "Swedish (El Salvador)" },
{ "sw", 0, 0, "Swahili" },
{ "ta", 0, 0, "Tamil" },
{ "te", 0, 0, "Telugu" },
{ "tg", 0, 0, "Tajik" },
{ "th", 0, 0, "Thai" },
{ "th", "TH", 0, "Thai (Thailand)" },
{ "ti", 0, 0, "Tigrinya" },
{ "tk", 0, 0, "Turkmen" },
{ "tl", 0, 0, "Tagalog" },
{ "to", 0, 0, "Tonga" },
{ "tr", 0, 0, "Turkish" },
{ "tr", "TR", 0, "Turkish (Turkey)" },
{ "ts", 0, 0, "Tsonga" },
{ "tt", 0, 0, "Tatar" },
{ "ug", 0, 0, "Uighur" },
{ "uk", 0, 0, "Ukrainian" },
{ "uk", "UA", 0, "Ukrainian (Ukraine)" },
{ "ur", 0, 0, "Urdu" },
{ "ur", "PK", 0, "Urdu (Pakistan)" },
{ "uz", 0, 0, "Uzbek" },
{ "uz", 0, "cyrillic", "Uzbek" },
{ "vi", 0, 0, "Vietnamese" },
{ "vi", "VN", 0, "Vietnamese (Vietnam)" },
{ "wa", 0, 0, "Walloon" },
{ "wo", 0, 0, "Wolof" },
{ "xh", 0, 0, "Xhosa" },
{ "yi", 0, 0, "Yiddish" },
{ "yo", 0, 0, "Yoruba" },
{ "zh", 0, 0, "Chinese" },
{ "zh", "CN", 0, "Chinese (simplified)" },
{ "zh", "HK", 0, "Chinese (Hong Kong)" },
{ "zh", "TW", 0, "Chinese (traditional)" },
{ "zu", 0, 0, "Zulu" },
{ NULL, 0, 0, NULL }
};
//*}
std::string
resolve_language_alias(const std::string& name)
{
typedef std::map<std::string, std::string> Aliases;
static Aliases language_aliases;
if (language_aliases.empty())
{
// FIXME: Many of those are not useful for us, since we leave
// encoding to the app, not to the language, we could/should
// also match against all language names, not just aliases from
// locale.alias
// Aliases taken from /etc/locale.alias
language_aliases["bokmal"] = "nb_NO.ISO-8859-1";
language_aliases["bokm<6B>l"] = "nb_NO.ISO-8859-1";
language_aliases["catalan"] = "ca_ES.ISO-8859-1";
language_aliases["croatian"] = "hr_HR.ISO-8859-2";
language_aliases["czech"] = "cs_CZ.ISO-8859-2";
language_aliases["danish"] = "da_DK.ISO-8859-1";
language_aliases["dansk"] = "da_DK.ISO-8859-1";
language_aliases["deutsch"] = "de_DE.ISO-8859-1";
language_aliases["dutch"] = "nl_NL.ISO-8859-1";
language_aliases["eesti"] = "et_EE.ISO-8859-1";
language_aliases["estonian"] = "et_EE.ISO-8859-1";
language_aliases["finnish"] = "fi_FI.ISO-8859-1";
language_aliases["fran<61>ais"] = "fr_FR.ISO-8859-1";
language_aliases["french"] = "fr_FR.ISO-8859-1";
language_aliases["galego"] = "gl_ES.ISO-8859-1";
language_aliases["galician"] = "gl_ES.ISO-8859-1";
language_aliases["german"] = "de_DE.ISO-8859-1";
language_aliases["greek"] = "el_GR.ISO-8859-7";
language_aliases["hebrew"] = "he_IL.ISO-8859-8";
language_aliases["hrvatski"] = "hr_HR.ISO-8859-2";
language_aliases["hungarian"] = "hu_HU.ISO-8859-2";
language_aliases["icelandic"] = "is_IS.ISO-8859-1";
language_aliases["italian"] = "it_IT.ISO-8859-1";
language_aliases["japanese"] = "ja_JP.eucJP";
language_aliases["japanese.euc"] = "ja_JP.eucJP";
language_aliases["ja_JP"] = "ja_JP.eucJP";
language_aliases["ja_JP.ujis"] = "ja_JP.eucJP";
language_aliases["japanese.sjis"] = "ja_JP.SJIS";
language_aliases["korean"] = "ko_KR.eucKR";
language_aliases["korean.euc"] = "ko_KR.eucKR";
language_aliases["ko_KR"] = "ko_KR.eucKR";
language_aliases["lithuanian"] = "lt_LT.ISO-8859-13";
language_aliases["no_NO"] = "nb_NO.ISO-8859-1";
language_aliases["no_NO.ISO-8859-1"] = "nb_NO.ISO-8859-1";
language_aliases["norwegian"] = "nb_NO.ISO-8859-1";
language_aliases["nynorsk"] = "nn_NO.ISO-8859-1";
language_aliases["polish"] = "pl_PL.ISO-8859-2";
language_aliases["portuguese"] = "pt_PT.ISO-8859-1";
language_aliases["romanian"] = "ro_RO.ISO-8859-2";
language_aliases["russian"] = "ru_RU.ISO-8859-5";
language_aliases["slovak"] = "sk_SK.ISO-8859-2";
language_aliases["slovene"] = "sl_SI.ISO-8859-2";
language_aliases["slovenian"] = "sl_SI.ISO-8859-2";
language_aliases["spanish"] = "es_ES.ISO-8859-1";
language_aliases["swedish"] = "sv_SE.ISO-8859-1";
language_aliases["thai"] = "th_TH.TIS-620";
language_aliases["turkish"] = "tr_TR.ISO-8859-9";
}
std::string name_lowercase;
name_lowercase.resize(name.size());
for(std::string::size_type i = 0; i < name.size(); ++i)
name_lowercase[i] = static_cast<char>(tolower(name[i]));
Aliases::iterator i = language_aliases.find(name_lowercase);
if (i != language_aliases.end())
{
return i->second;
}
else
{
return name;
}
}
Language
Language::from_spec(const std::string& language, const std::string& country, const std::string& modifier)
{
static std::map<std::string, std::vector<LanguageSpec*> > language_map;
if (language_map.empty())
{ // Init language_map
for(int i = 0; languages[i].language != NULL; ++i)
language_map[languages[i].language].push_back(&languages[i]);
}
std::map<std::string, std::vector<LanguageSpec*> >::iterator i = language_map.find(language);
if (i != language_map.end())
{
std::vector<LanguageSpec*>& lst = i->second;
LanguageSpec tmpspec;
tmpspec.language = language.c_str();
tmpspec.country = country.c_str();
tmpspec.modifier = modifier.c_str();
Language tmplang(&tmpspec);
LanguageSpec* best_match = 0;
int best_match_score = 0;
for(std::vector<LanguageSpec*>::iterator j = lst.begin(); j != lst.end(); ++j)
{ // Search for the language that best matches the given spec, value country more then modifier
int score = Language::match(Language(*j), tmplang);
if (score > best_match_score)
{
best_match = *j;
best_match_score = score;
}
}
assert(best_match);
return Language(best_match);
}
else
{
return Language();
}
}
Language
Language::from_name(const std::string& spec_str)
{
return from_env(resolve_language_alias(spec_str));
}
Language
Language::from_env(const std::string& env)
{
// Split LANGUAGE_COUNTRY.CODESET@MODIFIER into parts
std::string::size_type ln = env.find('_');
std::string::size_type dt = env.find('.');
std::string::size_type at = env.find('@');
std::string language;
std::string country;
std::string codeset;
std::string modifier;
//std::cout << ln << " " << dt << " " << at << std::endl;
language = env.substr(0, std::min(std::min(ln, dt), at));
if (ln != std::string::npos && ln+1 < env.size()) // _
{
country = env.substr(ln+1, (std::min(dt, at) == std::string::npos) ? std::string::npos : std::min(dt, at) - (ln+1));
}
if (dt != std::string::npos && dt+1 < env.size()) // .
{
codeset = env.substr(dt+1, (at == std::string::npos) ? std::string::npos : (at - (dt+1)));
}
if (at != std::string::npos && at+1 < env.size()) // @
{
modifier = env.substr(at+1);
}
return from_spec(language, country, modifier);
}
Language::Language(LanguageSpec* language_spec_)
: language_spec(language_spec_)
{
}
Language::Language()
: language_spec(0)
{
}
int
Language::match(const Language& lhs, const Language& rhs)
{
if (lhs.get_language() != rhs.get_language())
{
return 0;
}
else
{
static int match_tbl[3][3] = {
// modifier match, wildchard, miss
{ 9, 8, 5 }, // country match
{ 7, 6, 3 }, // country wildcard
{ 4, 2, 1 }, // country miss
};
int c;
if (lhs.get_country() == rhs.get_country())
c = 0;
else if (lhs.get_country().empty() || rhs.get_country().empty())
c = 1;
else
c = 2;
int m;
if (lhs.get_modifier() == rhs.get_modifier())
m = 0;
else if (lhs.get_modifier().empty() || rhs.get_modifier().empty())
m = 1;
else
m = 2;
return match_tbl[c][m];
}
}
std::string
Language::get_language() const
{
if (language_spec)
return language_spec->language;
else
return "";
}
std::string
Language::get_country() const
{
if (language_spec && language_spec->country)
return language_spec->country;
else
return "";
}
std::string
Language::get_modifier() const
{
if (language_spec && language_spec->modifier)
return language_spec->modifier;
else
return "";
}
std::string
Language::get_name() const
{
if (language_spec)
return language_spec->name;
else
return "";
}
std::string
Language::str() const
{
if (language_spec)
{
std::string var;
var += language_spec->language;
if (language_spec->country)
{
var += "_";
var += language_spec->country;
}
if (language_spec->modifier)
{
var += "@";
var += language_spec->modifier;
}
return var;
}
else
{
return "";
}
}
bool
Language::operator==(const Language& rhs) const
{
return language_spec == rhs.language_spec;
}
bool
Language::operator!=(const Language& rhs) const
{
return language_spec != rhs.language_spec;
}
} // namespace tinygettext
/* EOF */