From e230420f88fa4e9f219e2c7d96712b0ba7c8b869 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sat, 11 Apr 2020 12:53:45 +0200 Subject: [PATCH] - moved all character set utilities to utf8.cpp. - moved matrix class to 'common'. --- src/CMakeLists.txt | 5 +- src/{ => common}/utility/matrix.cpp | 0 src/{ => common}/utility/matrix.h | 0 src/gamedata/fonts/fontinternals.h | 6 - src/gamedata/fonts/v_font.cpp | 850 --------------------------- src/scripting/vmthunks.cpp | 1 + src/utility/utf8.cpp | 873 +++++++++++++++++++++++++++- src/utility/utf8.h | 7 + 8 files changed, 884 insertions(+), 858 deletions(-) rename src/{ => common}/utility/matrix.cpp (100%) rename src/{ => common}/utility/matrix.h (100%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a2dfbeaa48..7be4ba78ae 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -610,6 +610,7 @@ file( GLOB HEADER_FILES win32/*.h r_data/*.h r_data/models/*.h + common/utility/*.h common/thirdparty/*.h common/thirdparty/rapidjson/*.h common/thirdparty/math./*h @@ -783,7 +784,7 @@ set( FASTMATH_SOURCES rendering/hwrenderer/scene/hw_walls_vertex.cpp rendering/hwrenderer/scene/hw_weapon.cpp r_data/models/models.cpp - utility/matrix.cpp + common/utility/matrix.cpp ) #Vulkan stuff must go into a separate list later because it needs to be disabled for some platforms @@ -1217,6 +1218,7 @@ target_link_libraries( zdoom ${ZDOOM_LIBS} gdtoa lzma ${ZMUSIC_LIBRARIES} ) include_directories( . common/thirdparty + common/utility g_statusbar console playsim @@ -1417,6 +1419,7 @@ source_group("Scripting\\Compiler backend" REGULAR_EXPRESSION "^${CMAKE_CURRENT_ source_group("Scripting\\VM" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/scripting/vm/.+") source_group("Scripting" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/scripting/.+") source_group("Common" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/common/.+") +source_group("Common\\Utility" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/utility/.+") source_group("Common\\Third Party" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/common/thirdparty/.+") source_group("Common\\Third Party\\Math" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/common/thirdparty/math/.+") source_group("Common\\Third Party\\RapidJSON" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/common/thirdparty/rapidjson/.+") diff --git a/src/utility/matrix.cpp b/src/common/utility/matrix.cpp similarity index 100% rename from src/utility/matrix.cpp rename to src/common/utility/matrix.cpp diff --git a/src/utility/matrix.h b/src/common/utility/matrix.h similarity index 100% rename from src/utility/matrix.h rename to src/common/utility/matrix.h diff --git a/src/gamedata/fonts/fontinternals.h b/src/gamedata/fonts/fontinternals.h index 6d68c6aa86..dc942319e6 100644 --- a/src/gamedata/fonts/fontinternals.h +++ b/src/gamedata/fonts/fontinternals.h @@ -34,13 +34,7 @@ struct TranslationMap extern TArray TranslationParms[2]; extern TArray TranslationLookup; extern TArray TranslationColors; -extern uint16_t lowerforupper[65536]; -extern uint16_t upperforlower[65536]; class FImageSource; void RecordTextureColors (FImageSource *pic, uint32_t *usedcolors); -bool myislower(int code); -bool myisupper(int code); -int stripaccent(int code); -int getAlternative(int code); diff --git a/src/gamedata/fonts/v_font.cpp b/src/gamedata/fonts/v_font.cpp index d961829a7a..0e30ab2481 100644 --- a/src/gamedata/fonts/v_font.cpp +++ b/src/gamedata/fonts/v_font.cpp @@ -90,855 +90,6 @@ TArray TranslationColors; // CODE -------------------------------------------------------------------- -uint16_t lowerforupper[65536]; -uint16_t upperforlower[65536]; -bool islowermap[65536]; -bool isuppermap[65536]; - -// This is a supposedly complete mapping of all lower <-> upper pairs. Most will most likely never be needed by Doom but this way there won't be any future surprises -static const uint16_t loweruppercase[] = { -0x0061,0x0041, -0x0062,0x0042, -0x0063,0x0043, -0x0064,0x0044, -0x0065,0x0045, -0x0066,0x0046, -0x0067,0x0047, -0x0068,0x0048, -0x0069,0x0049, -0x006A,0x004A, -0x006B,0x004B, -0x006C,0x004C, -0x006D,0x004D, -0x006E,0x004E, -0x006F,0x004F, -0x0070,0x0050, -0x0071,0x0051, -0x0072,0x0052, -0x0073,0x0053, -0x0074,0x0054, -0x0075,0x0055, -0x0076,0x0056, -0x0077,0x0057, -0x0078,0x0058, -0x0079,0x0059, -0x007A,0x005A, -0x00DF,0x1E9E, -0x00E0,0x00C0, -0x00E1,0x00C1, -0x00E2,0x00C2, -0x00E3,0x00C3, -0x00E4,0x00C4, -0x00E5,0x00C5, -0x00E6,0x00C6, -0x00E7,0x00C7, -0x00E8,0x00C8, -0x00E9,0x00C9, -0x00EA,0x00CA, -0x00EB,0x00CB, -0x00EC,0x00CC, -0x00ED,0x00CD, -0x00EE,0x00CE, -0x00EF,0x00CF, -0x00F0,0x00D0, -0x00F1,0x00D1, -0x00F2,0x00D2, -0x00F3,0x00D3, -0x00F4,0x00D4, -0x00F5,0x00D5, -0x00F6,0x00D6, -0x00F8,0x00D8, -0x00F9,0x00D9, -0x00FA,0x00DA, -0x00FB,0x00DB, -0x00FC,0x00DC, -0x00FD,0x00DD, -0x00FE,0x00DE, -0x00FF,0x0178, -0x0101,0x0100, -0x0103,0x0102, -0x0105,0x0104, -0x0107,0x0106, -0x0109,0x0108, -0x010B,0x010A, -0x010D,0x010C, -0x010F,0x010E, -0x0111,0x0110, -0x0113,0x0112, -0x0115,0x0114, -0x0117,0x0116, -0x0119,0x0118, -0x011B,0x011A, -0x011D,0x011C, -0x011F,0x011E, -0x0121,0x0120, -0x0123,0x0122, -0x0125,0x0124, -0x0127,0x0126, -0x0129,0x0128, -0x012B,0x012A, -0x012D,0x012C, -0x012F,0x012E, -0x0131,0x0049, -0x0133,0x0132, -0x0135,0x0134, -0x0137,0x0136, -0x013A,0x0139, -0x013C,0x013B, -0x013E,0x013D, -0x0140,0x013F, -0x0142,0x0141, -0x0144,0x0143, -0x0146,0x0145, -0x0148,0x0147, -0x014B,0x014A, -0x014D,0x014C, -0x014F,0x014E, -0x0151,0x0150, -0x0153,0x0152, -0x0155,0x0154, -0x0157,0x0156, -0x0159,0x0158, -0x015B,0x015A, -0x015D,0x015C, -0x015F,0x015E, -0x0161,0x0160, -0x0163,0x0162, -0x0165,0x0164, -0x0167,0x0166, -0x0169,0x0168, -0x016B,0x016A, -0x016D,0x016C, -0x016F,0x016E, -0x0171,0x0170, -0x0173,0x0172, -0x0175,0x0174, -0x0177,0x0176, -0x017A,0x0179, -0x017C,0x017B, -0x017E,0x017D, -0x0183,0x0182, -0x0185,0x0184, -0x0188,0x0187, -0x018C,0x018B, -0x0192,0x0191, -0x0199,0x0198, -0x01A1,0x01A0, -0x01A3,0x01A2, -0x01A5,0x01A4, -0x01A8,0x01A7, -0x01AD,0x01AC, -0x01B0,0x01AF, -0x01B4,0x01B3, -0x01B6,0x01B5, -0x01B9,0x01B8, -0x01BD,0x01BC, -0x01C6,0x01C4, -0x01C9,0x01C7, -0x01CC,0x01CA, -0x01CE,0x01CD, -0x01D0,0x01CF, -0x01D2,0x01D1, -0x01D4,0x01D3, -0x01D6,0x01D5, -0x01D8,0x01D7, -0x01DA,0x01D9, -0x01DC,0x01DB, -0x01DF,0x01DE, -0x01E1,0x01E0, -0x01E3,0x01E2, -0x01E5,0x01E4, -0x01E7,0x01E6, -0x01E9,0x01E8, -0x01EB,0x01EA, -0x01ED,0x01EC, -0x01EF,0x01EE, -0x01F3,0x01F1, -0x01F5,0x01F4, -0x01FB,0x01FA, -0x01FD,0x01FC, -0x01FF,0x01FE, -0x0201,0x0200, -0x0203,0x0202, -0x0205,0x0204, -0x0207,0x0206, -0x0209,0x0208, -0x020B,0x020A, -0x020D,0x020C, -0x020F,0x020E, -0x0211,0x0210, -0x0213,0x0212, -0x0215,0x0214, -0x0217,0x0216, -0x0253,0x0181, -0x0254,0x0186, -0x0257,0x018A, -0x0258,0x018E, -0x0259,0x018F, -0x025B,0x0190, -0x0260,0x0193, -0x0263,0x0194, -0x0268,0x0197, -0x0269,0x0196, -0x026F,0x019C, -0x0272,0x019D, -0x0275,0x019F, -0x0283,0x01A9, -0x0288,0x01AE, -0x028A,0x01B1, -0x028B,0x01B2, -0x0292,0x01B7, -0x03AC,0x0386, -0x03AD,0x0388, -0x03AE,0x0389, -0x03AF,0x038A, -0x03B1,0x0391, -0x03B2,0x0392, -0x03B3,0x0393, -0x03B4,0x0394, -0x03B5,0x0395, -0x03B6,0x0396, -0x03B7,0x0397, -0x03B8,0x0398, -0x03B9,0x0399, -0x03BA,0x039A, -0x03BB,0x039B, -0x03BC,0x039C, -0x03BD,0x039D, -0x03BE,0x039E, -0x03BF,0x039F, -0x03C0,0x03A0, -0x03C1,0x03A1, -0x03C3,0x03A3, -0x03C4,0x03A4, -0x03C5,0x03A5, -0x03C6,0x03A6, -0x03C7,0x03A7, -0x03C8,0x03A8, -0x03C9,0x03A9, -0x03CA,0x03AA, -0x03CB,0x03AB, -0x03CC,0x038C, -0x03CD,0x038E, -0x03CE,0x038F, -0x03E3,0x03E2, -0x03E5,0x03E4, -0x03E7,0x03E6, -0x03E9,0x03E8, -0x03EB,0x03EA, -0x03ED,0x03EC, -0x03EF,0x03EE, -0x0430,0x0410, -0x0431,0x0411, -0x0432,0x0412, -0x0433,0x0413, -0x0434,0x0414, -0x0435,0x0415, -0x0436,0x0416, -0x0437,0x0417, -0x0438,0x0418, -0x0439,0x0419, -0x043A,0x041A, -0x043B,0x041B, -0x043C,0x041C, -0x043D,0x041D, -0x043E,0x041E, -0x043F,0x041F, -0x0440,0x0420, -0x0441,0x0421, -0x0442,0x0422, -0x0443,0x0423, -0x0444,0x0424, -0x0445,0x0425, -0x0446,0x0426, -0x0447,0x0427, -0x0448,0x0428, -0x0449,0x0429, -0x044A,0x042A, -0x044B,0x042B, -0x044C,0x042C, -0x044D,0x042D, -0x044E,0x042E, -0x044F,0x042F, -0x0451,0x0401, -0x0452,0x0402, -0x0453,0x0403, -0x0454,0x0404, -0x0455,0x0405, -0x0456,0x0406, -0x0457,0x0407, -0x0458,0x0408, -0x0459,0x0409, -0x045A,0x040A, -0x045B,0x040B, -0x045C,0x040C, -0x045E,0x040E, -0x045F,0x040F, -0x0461,0x0460, -0x0463,0x0462, -0x0465,0x0464, -0x0467,0x0466, -0x0469,0x0468, -0x046B,0x046A, -0x046D,0x046C, -0x046F,0x046E, -0x0471,0x0470, -0x0473,0x0472, -0x0475,0x0474, -0x0477,0x0476, -0x0479,0x0478, -0x047B,0x047A, -0x047D,0x047C, -0x047F,0x047E, -0x0481,0x0480, -0x0491,0x0490, -0x0493,0x0492, -0x0495,0x0494, -0x0497,0x0496, -0x0499,0x0498, -0x049B,0x049A, -0x049D,0x049C, -0x049F,0x049E, -0x04A1,0x04A0, -0x04A3,0x04A2, -0x04A5,0x04A4, -0x04A7,0x04A6, -0x04A9,0x04A8, -0x04AB,0x04AA, -0x04AD,0x04AC, -0x04AF,0x04AE, -0x04B1,0x04B0, -0x04B3,0x04B2, -0x04B5,0x04B4, -0x04B7,0x04B6, -0x04B9,0x04B8, -0x04BB,0x04BA, -0x04BD,0x04BC, -0x04BF,0x04BE, -0x04C2,0x04C1, -0x04C4,0x04C3, -0x04C8,0x04C7, -0x04CC,0x04CB, -0x04D1,0x04D0, -0x04D3,0x04D2, -0x04D5,0x04D4, -0x04D7,0x04D6, -0x04D9,0x04D8, -0x04DB,0x04DA, -0x04DD,0x04DC, -0x04DF,0x04DE, -0x04E1,0x04E0, -0x04E3,0x04E2, -0x04E5,0x04E4, -0x04E7,0x04E6, -0x04E9,0x04E8, -0x04EB,0x04EA, -0x04EF,0x04EE, -0x04F1,0x04F0, -0x04F3,0x04F2, -0x04F5,0x04F4, -0x04F9,0x04F8, -0x0561,0x0531, -0x0562,0x0532, -0x0563,0x0533, -0x0564,0x0534, -0x0565,0x0535, -0x0566,0x0536, -0x0567,0x0537, -0x0568,0x0538, -0x0569,0x0539, -0x056A,0x053A, -0x056B,0x053B, -0x056C,0x053C, -0x056D,0x053D, -0x056E,0x053E, -0x056F,0x053F, -0x0570,0x0540, -0x0571,0x0541, -0x0572,0x0542, -0x0573,0x0543, -0x0574,0x0544, -0x0575,0x0545, -0x0576,0x0546, -0x0577,0x0547, -0x0578,0x0548, -0x0579,0x0549, -0x057A,0x054A, -0x057B,0x054B, -0x057C,0x054C, -0x057D,0x054D, -0x057E,0x054E, -0x057F,0x054F, -0x0580,0x0550, -0x0581,0x0551, -0x0582,0x0552, -0x0583,0x0553, -0x0584,0x0554, -0x0585,0x0555, -0x0586,0x0556, -0x10D0,0x10A0, -0x10D1,0x10A1, -0x10D2,0x10A2, -0x10D3,0x10A3, -0x10D4,0x10A4, -0x10D5,0x10A5, -0x10D6,0x10A6, -0x10D7,0x10A7, -0x10D8,0x10A8, -0x10D9,0x10A9, -0x10DA,0x10AA, -0x10DB,0x10AB, -0x10DC,0x10AC, -0x10DD,0x10AD, -0x10DE,0x10AE, -0x10DF,0x10AF, -0x10E0,0x10B0, -0x10E1,0x10B1, -0x10E2,0x10B2, -0x10E3,0x10B3, -0x10E4,0x10B4, -0x10E5,0x10B5, -0x10E6,0x10B6, -0x10E7,0x10B7, -0x10E8,0x10B8, -0x10E9,0x10B9, -0x10EA,0x10BA, -0x10EB,0x10BB, -0x10EC,0x10BC, -0x10ED,0x10BD, -0x10EE,0x10BE, -0x10EF,0x10BF, -0x10F0,0x10C0, -0x10F1,0x10C1, -0x10F2,0x10C2, -0x10F3,0x10C3, -0x10F4,0x10C4, -0x10F5,0x10C5, -0x1E01,0x1E00, -0x1E03,0x1E02, -0x1E05,0x1E04, -0x1E07,0x1E06, -0x1E09,0x1E08, -0x1E0B,0x1E0A, -0x1E0D,0x1E0C, -0x1E0F,0x1E0E, -0x1E11,0x1E10, -0x1E13,0x1E12, -0x1E15,0x1E14, -0x1E17,0x1E16, -0x1E19,0x1E18, -0x1E1B,0x1E1A, -0x1E1D,0x1E1C, -0x1E1F,0x1E1E, -0x1E21,0x1E20, -0x1E23,0x1E22, -0x1E25,0x1E24, -0x1E27,0x1E26, -0x1E29,0x1E28, -0x1E2B,0x1E2A, -0x1E2D,0x1E2C, -0x1E2F,0x1E2E, -0x1E31,0x1E30, -0x1E33,0x1E32, -0x1E35,0x1E34, -0x1E37,0x1E36, -0x1E39,0x1E38, -0x1E3B,0x1E3A, -0x1E3D,0x1E3C, -0x1E3F,0x1E3E, -0x1E41,0x1E40, -0x1E43,0x1E42, -0x1E45,0x1E44, -0x1E47,0x1E46, -0x1E49,0x1E48, -0x1E4B,0x1E4A, -0x1E4D,0x1E4C, -0x1E4F,0x1E4E, -0x1E51,0x1E50, -0x1E53,0x1E52, -0x1E55,0x1E54, -0x1E57,0x1E56, -0x1E59,0x1E58, -0x1E5B,0x1E5A, -0x1E5D,0x1E5C, -0x1E5F,0x1E5E, -0x1E61,0x1E60, -0x1E63,0x1E62, -0x1E65,0x1E64, -0x1E67,0x1E66, -0x1E69,0x1E68, -0x1E6B,0x1E6A, -0x1E6D,0x1E6C, -0x1E6F,0x1E6E, -0x1E71,0x1E70, -0x1E73,0x1E72, -0x1E75,0x1E74, -0x1E77,0x1E76, -0x1E79,0x1E78, -0x1E7B,0x1E7A, -0x1E7D,0x1E7C, -0x1E7F,0x1E7E, -0x1E81,0x1E80, -0x1E83,0x1E82, -0x1E85,0x1E84, -0x1E87,0x1E86, -0x1E89,0x1E88, -0x1E8B,0x1E8A, -0x1E8D,0x1E8C, -0x1E8F,0x1E8E, -0x1E91,0x1E90, -0x1E93,0x1E92, -0x1E95,0x1E94, -0x1EA1,0x1EA0, -0x1EA3,0x1EA2, -0x1EA5,0x1EA4, -0x1EA7,0x1EA6, -0x1EA9,0x1EA8, -0x1EAB,0x1EAA, -0x1EAD,0x1EAC, -0x1EAF,0x1EAE, -0x1EB1,0x1EB0, -0x1EB3,0x1EB2, -0x1EB5,0x1EB4, -0x1EB7,0x1EB6, -0x1EB9,0x1EB8, -0x1EBB,0x1EBA, -0x1EBD,0x1EBC, -0x1EBF,0x1EBE, -0x1EC1,0x1EC0, -0x1EC3,0x1EC2, -0x1EC5,0x1EC4, -0x1EC7,0x1EC6, -0x1EC9,0x1EC8, -0x1ECB,0x1ECA, -0x1ECD,0x1ECC, -0x1ECF,0x1ECE, -0x1ED1,0x1ED0, -0x1ED3,0x1ED2, -0x1ED5,0x1ED4, -0x1ED7,0x1ED6, -0x1ED9,0x1ED8, -0x1EDB,0x1EDA, -0x1EDD,0x1EDC, -0x1EDF,0x1EDE, -0x1EE1,0x1EE0, -0x1EE3,0x1EE2, -0x1EE5,0x1EE4, -0x1EE7,0x1EE6, -0x1EE9,0x1EE8, -0x1EEB,0x1EEA, -0x1EED,0x1EEC, -0x1EEF,0x1EEE, -0x1EF1,0x1EF0, -0x1EF3,0x1EF2, -0x1EF5,0x1EF4, -0x1EF7,0x1EF6, -0x1EF9,0x1EF8, -0x1F00,0x1F08, -0x1F01,0x1F09, -0x1F02,0x1F0A, -0x1F03,0x1F0B, -0x1F04,0x1F0C, -0x1F05,0x1F0D, -0x1F06,0x1F0E, -0x1F07,0x1F0F, -0x1F10,0x1F18, -0x1F11,0x1F19, -0x1F12,0x1F1A, -0x1F13,0x1F1B, -0x1F14,0x1F1C, -0x1F15,0x1F1D, -0x1F20,0x1F28, -0x1F21,0x1F29, -0x1F22,0x1F2A, -0x1F23,0x1F2B, -0x1F24,0x1F2C, -0x1F25,0x1F2D, -0x1F26,0x1F2E, -0x1F27,0x1F2F, -0x1F30,0x1F38, -0x1F31,0x1F39, -0x1F32,0x1F3A, -0x1F33,0x1F3B, -0x1F34,0x1F3C, -0x1F35,0x1F3D, -0x1F36,0x1F3E, -0x1F37,0x1F3F, -0x1F40,0x1F48, -0x1F41,0x1F49, -0x1F42,0x1F4A, -0x1F43,0x1F4B, -0x1F44,0x1F4C, -0x1F45,0x1F4D, -0x1F51,0x1F59, -0x1F53,0x1F5B, -0x1F55,0x1F5D, -0x1F57,0x1F5F, -0x1F60,0x1F68, -0x1F61, 0x1F69, -0x1F62, 0x1F6A, -0x1F63, 0x1F6B, -0x1F64, 0x1F6C, -0x1F65, 0x1F6D, -0x1F66, 0x1F6E, -0x1F67, 0x1F6F, -0x1F80, 0x1F88, -0x1F81, 0x1F89, -0x1F82, 0x1F8A, -0x1F83, 0x1F8B, -0x1F84, 0x1F8C, -0x1F85, 0x1F8D, -0x1F86, 0x1F8E, -0x1F87, 0x1F8F, -0x1F90, 0x1F98, -0x1F91, 0x1F99, -0x1F92, 0x1F9A, -0x1F93, 0x1F9B, -0x1F94, 0x1F9C, -0x1F95, 0x1F9D, -0x1F96, 0x1F9E, -0x1F97, 0x1F9F, -0x1FA0, 0x1FA8, -0x1FA1, 0x1FA9, -0x1FA2, 0x1FAA, -0x1FA3, 0x1FAB, -0x1FA4, 0x1FAC, -0x1FA5, 0x1FAD, -0x1FA6, 0x1FAE, -0x1FA7, 0x1FAF, -0x1FB0, 0x1FB8, -0x1FB1, 0x1FB9, -0x1FD0, 0x1FD8, -0x1FD1, 0x1FD9, -0x1FE0, 0x1FE8, -0x1FE1, 0x1FE9, -0x24D0, 0x24B6, -0x24D1, 0x24B7, -0x24D2, 0x24B8, -0x24D3, 0x24B9, -0x24D4, 0x24BA, -0x24D5, 0x24BB, -0x24D6, 0x24BC, -0x24D7, 0x24BD, -0x24D8, 0x24BE, -0x24D9, 0x24BF, -0x24DA, 0x24C0, -0x24DB, 0x24C1, -0x24DC, 0x24C2, -0x24DD, 0x24C3, -0x24DE, 0x24C4, -0x24DF, 0x24C5, -0x24E0, 0x24C6, -0x24E1, 0x24C7, -0x24E2, 0x24C8, -0x24E3, 0x24C9, -0x24E4, 0x24CA, -0x24E5, 0x24CB, -0x24E6, 0x24CC, -0x24E7, 0x24CD, -0x24E8, 0x24CE, -0x24E9, 0x24CF, -0xFF41, 0xFF21, -0xFF42, 0xFF22, -0xFF43, 0xFF23, -0xFF44, 0xFF24, -0xFF45, 0xFF25, -0xFF46, 0xFF26, -0xFF47, 0xFF27, -0xFF48, 0xFF28, -0xFF49, 0xFF29, -0xFF4A, 0xFF2A, -0xFF4B, 0xFF2B, -0xFF4C, 0xFF2C, -0xFF4D, 0xFF2D, -0xFF4E, 0xFF2E, -0xFF4F, 0xFF2F, -0xFF50, 0xFF30, -0xFF51, 0xFF31, -0xFF52, 0xFF32, -0xFF53, 0xFF33, -0xFF54, 0xFF34, -0xFF55, 0xFF35, -0xFF56, 0xFF36, -0xFF57, 0xFF37, -0xFF58, 0xFF38, -0xFF59, 0xFF39, -0xFF5A, 0xFF3A, -0, 0 -}; - -void InitLowerUpper() -{ - for (int i = 0; i < 65536; i++) - { - lowerforupper[i] = i; - upperforlower[i] = i; - } - for (int i = 0; loweruppercase[i]; i += 2) - { - auto lower = loweruppercase[i]; - auto upper = loweruppercase[i + 1]; - if (lowerforupper[upper] == upper) lowerforupper[upper] = lower; // This mapping is ambiguous (see 0x0131 -> 0x0049, (small Turkish 'i' without dot.) so only pick the first match. - if (upperforlower[lower] == lower) upperforlower[lower] = upper; - isuppermap[upper] = islowermap[lower] = true; - } - // Special treatment for the two variants of the small sigma in Greek. - islowermap[0x3c2] = true; - upperforlower[0x3c2] = 0x3a3; -} - - -bool myislower(int code) -{ - if (code >= 0 && code < 65536) return islowermap[code]; - return false; -} - -bool myisupper(int code) -{ - if (code >= 0 && code < 65536) return isuppermap[code]; - return false; -} - - -// Returns a character without an accent mark (or one with a similar looking accent in some cases where direct support is unlikely.) - -int stripaccent(int code) -{ - if (code < 0x8a) - return code; - if (code < 0x100) - { - if (code == 0x8a) // Latin capital letter S with caron - return 'S'; - if (code == 0x8e) // Latin capital letter Z with caron - return 'Z'; - if (code == 0x9a) // Latin small letter S with caron - return 's'; - if (code == 0x9e) // Latin small letter Z with caron - return 'z'; - if (code == 0x9f) // Latin capital letter Y with diaeresis - return 'Y'; - if (code == 0xab || code == 0xbb) return '"'; // typographic quotation marks. - if (code == 0xff) // Latin small letter Y with diaeresis - return 'y'; - // Every other accented character has the high two bits set. - if ((code & 0xC0) == 0) - return code; - // Make lowercase characters uppercase so there are half as many tests. - int acode = code & 0xDF; - if (acode >= 0xC0 && acode <= 0xC5) // A with accents - return 'A' + (code & 0x20); - if (acode == 0xC7) // Cedilla - return 'C' + (acode & 0x20); - if (acode >= 0xC8 && acode <= 0xCB) // E with accents - return 'E' + (code & 0x20); - if (acode >= 0xCC && acode <= 0xCF) // I with accents - return 'I' + (code & 0x20); - if (acode == 0xD0) // Eth - return 'D' + (code & 0x20); - if (acode == 0xD1) // N with tilde - return 'N' + (code & 0x20); - if ((acode >= 0xD2 && acode <= 0xD6) || // O with accents - acode == 0xD8) // O with stroke - return 'O' + (code & 0x20); - if (acode >= 0xD9 && acode <= 0xDC) // U with accents - return 'U' + (code & 0x20); - if (acode == 0xDD) // Y with accute - return 'Y' + (code & 0x20); - if (acode == 0xDE) // Thorn - return 'P' + (code & 0x20); // well, it sort of looks like a 'P' - } - else if (code >= 0x100 && code < 0x180) - { - // For the double-accented Hungarian letters it makes more sense to first map them to the very similar looking Umlauts. - // (And screw the crappy specs that do not allow UTF-8 multibyte character literals here.) - if (code == 0x150) code = 0xd6; - else if (code == 0x151) code = 0xf6; - else if (code == 0x170) code = 0xdc; - else if (code == 0x171) code = 0xfc; - else - { - static const char accentless[] = "AaAaAaCcCcCcCcDdDdEeEeEeEeEeGgGgGgGgHhHhIiIiIiIiIiIiJjKkkLlLlLlLlLlNnNnNnnNnOoOoOoOoRrRrRrSsSsSsSsTtTtTtUuUuUuUuUuUuWwYyYZzZzZzs"; - return accentless[code - 0x100]; - } - } - else if (code >= 0x200 && code < 0x218) - { - // 0x200-0x217 are irrelevant but easy to map to other characters more likely to exist. - static const uint16_t u200map[] = {0xc4, 0xe4, 0xc2, 0xe2, 0xcb, 0xeb, 0xca, 0xea, 0xcf, 0xef, 0xce, 0xee, 0xd6, 0xf6, 0xd4, 0xe4, 'R', 'r', 'R', 'r', 0xdc, 0xfc, 0xdb, 0xfb}; - return u200map[code - 0x200]; - } - return getAlternative(code); -} - -int getAlternative(int code) -{ - // This is for determining replacements that do not make CanPrint fail. - switch (code) - { - default: - return code; - - case 0x17f: return 's'; // The 'long s' can be safely remapped to the regular variant, not that this gets used in any real text... - case 0x218: return 0x15e; // Romanian S with comma below may get remapped to S with cedilla. - case 0x219: return 0x15f; - case 0x21a: return 0x162; // Romanian T with comma below may get remapped to T with cedilla. - case 0x21b: return 0x163; - case 0x386: return 0x391; // Greek characters with accents must map to their base form due to the "no accents in allcaps " rule. - case 0x388: return 0x395; - case 0x389: return 0x397; - case 0x38a: return 0x399; - case 0x38c: return 0x39f; - case 0x3a0: return 0x41f; - case 0x38e: return 0x3a5; - case 0x38f: return 0x3a9; - case 0x391: return 'A';// Greek characters with equivalents in either Latin or Cyrillic. This is only suitable for uppercase fonts! - case 0x392: return 'B'; - case 0x393: return 0x413; - case 0x395: return 'E'; - case 0x396: return 'Z'; - case 0x397: return 'H'; - case 0x399: return 'I'; - case 0x39a: return 'K'; - case 0x39c: return 'M'; - case 0x39d: return 'N'; - case 0x39f: return 'O'; - case 0x3a1: return 'P'; - case 0x3a4: return 'T'; - case 0x3a5: return 'Y'; - case 0x3a6: return 0x424; - case 0x3a7: return 'X'; - case 0x3aa: return 0xcf; - case 0x3ab: return 0x178; - case 0x3bf: return 'o'; // the Omicron is the only small Greek character that's easily mappable to a Latin equivalent. :( - case 0x3c2: return 0x3c3; // Lowercase Sigma character in Greek, which changes depending on its positioning in a word; if the font is uppercase only or features a smallcaps style, the second variant of the letter will remain unused - case 0x390: return 0x3ca; // For smallcaps fonts the small accented Greek characters remap to the unaccented versions. - case 0x3ac: return 0x3b1; - case 0x3ad: return 0x3b5; - case 0x3ae: return 0x3b7; - case 0x3af: return 0x3b9; - case 0x3b0: return 0x3cb; - case 0x3cc: return 0x3bf; - case 0x3cd: return 0x3c5; - case 0x3ce: return 0x3c9; - case 0x400: return 0xc8; // Cyrillic characters with equivalents in the Latin alphabet. - case 0x401: return 0xcb; - case 0x405: return 'S'; - case 0x406: return 'I'; - case 0x407: return 0xcf; - case 0x408: return 'J'; - case 0x450: return 0xe8; - case 0x451: return 0xeb; - case 0x455: return 's'; - case 0x456: return 'i'; - case 0x457: return 0xef; - case 0x458: return 'j'; - } - return code; -} - - FFont *V_GetFont(const char *name, const char *fontlumpname) { if (!stricmp(name, "DBIGFONT")) name = "BigFont"; @@ -1507,7 +658,6 @@ EColorRange V_ParseFontColor (const uint8_t *&color_value, int normalcolor, int void V_InitFonts() { - InitLowerUpper(); V_InitCustomFonts(); FFont *CreateHexLumpFont(const char *fontname, int lump); diff --git a/src/scripting/vmthunks.cpp b/src/scripting/vmthunks.cpp index 719efae163..304c4e857f 100644 --- a/src/scripting/vmthunks.cpp +++ b/src/scripting/vmthunks.cpp @@ -49,6 +49,7 @@ #include "am_map.h" #include "v_video.h" #include "gi.h" +#include "utf8.h" #include "fontinternals.h" #include "intermission/intermission.h" diff --git a/src/utility/utf8.cpp b/src/utility/utf8.cpp index ca2a4733a4..1eec781301 100644 --- a/src/utility/utf8.cpp +++ b/src/utility/utf8.cpp @@ -33,6 +33,7 @@ */ #include #include "tarray.h" +#include "utf8.h" //========================================================================== @@ -231,7 +232,7 @@ int GetCharFromString(const uint8_t *&string) static TArray UTF8String; -const char *MakeUTF8(const char *outline, int *numchars = nullptr) +const char *MakeUTF8(const char *outline, int *numchars) { UTF8String.Clear(); const uint8_t *in = (const uint8_t*)outline; @@ -263,3 +264,873 @@ const char *MakeUTF8(int codepoint, int *psize) if (psize) *psize = size; return UTF8String.Data(); } + + +//========================================================================== +// +// Returns a character without an accent mark (or one with a similar looking accent in some cases where direct support is unlikely.) +// +//========================================================================== + +int stripaccent(int code) +{ + if (code < 0x8a) + return code; + if (code < 0x100) + { + if (code == 0x8a) // Latin capital letter S with caron + return 'S'; + if (code == 0x8e) // Latin capital letter Z with caron + return 'Z'; + if (code == 0x9a) // Latin small letter S with caron + return 's'; + if (code == 0x9e) // Latin small letter Z with caron + return 'z'; + if (code == 0x9f) // Latin capital letter Y with diaeresis + return 'Y'; + if (code == 0xab || code == 0xbb) return '"'; // typographic quotation marks. + if (code == 0xff) // Latin small letter Y with diaeresis + return 'y'; + // Every other accented character has the high two bits set. + if ((code & 0xC0) == 0) + return code; + // Make lowercase characters uppercase so there are half as many tests. + int acode = code & 0xDF; + if (acode >= 0xC0 && acode <= 0xC5) // A with accents + return 'A' + (code & 0x20); + if (acode == 0xC7) // Cedilla + return 'C' + (acode & 0x20); + if (acode >= 0xC8 && acode <= 0xCB) // E with accents + return 'E' + (code & 0x20); + if (acode >= 0xCC && acode <= 0xCF) // I with accents + return 'I' + (code & 0x20); + if (acode == 0xD0) // Eth + return 'D' + (code & 0x20); + if (acode == 0xD1) // N with tilde + return 'N' + (code & 0x20); + if ((acode >= 0xD2 && acode <= 0xD6) || // O with accents + acode == 0xD8) // O with stroke + return 'O' + (code & 0x20); + if (acode >= 0xD9 && acode <= 0xDC) // U with accents + return 'U' + (code & 0x20); + if (acode == 0xDD) // Y with accute + return 'Y' + (code & 0x20); + if (acode == 0xDE) // Thorn + return 'P' + (code & 0x20); // well, it sort of looks like a 'P' + } + else if (code >= 0x100 && code < 0x180) + { + // For the double-accented Hungarian letters it makes more sense to first map them to the very similar looking Umlauts. + // (And screw the crappy specs that do not allow UTF-8 multibyte character literals here.) + if (code == 0x150) code = 0xd6; + else if (code == 0x151) code = 0xf6; + else if (code == 0x170) code = 0xdc; + else if (code == 0x171) code = 0xfc; + else + { + static const char accentless[] = "AaAaAaCcCcCcCcDdDdEeEeEeEeEeGgGgGgGgHhHhIiIiIiIiIiIiJjKkkLlLlLlLlLlNnNnNnnNnOoOoOoOoRrRrRrSsSsSsSsTtTtTtUuUuUuUuUuUuWwYyYZzZzZzs"; + return accentless[code - 0x100]; + } + } + else if (code >= 0x200 && code < 0x218) + { + // 0x200-0x217 are irrelevant but easy to map to other characters more likely to exist. + static const uint16_t u200map[] = {0xc4, 0xe4, 0xc2, 0xe2, 0xcb, 0xeb, 0xca, 0xea, 0xcf, 0xef, 0xce, 0xee, 0xd6, 0xf6, 0xd4, 0xe4, 'R', 'r', 'R', 'r', 0xdc, 0xfc, 0xdb, 0xfb}; + return u200map[code - 0x200]; + } + return getAlternative(code); +} + +//========================================================================== +// +// Return replacement characters that should not make font completeness tests fail. +// +//========================================================================== + +int getAlternative(int code) +{ + switch (code) + { + default: + return code; + + case 0x17f: return 's'; // The 'long s' can be safely remapped to the regular variant, not that this gets used in any real text... + case 0x218: return 0x15e; // Romanian S with comma below may get remapped to S with cedilla. + case 0x219: return 0x15f; + case 0x21a: return 0x162; // Romanian T with comma below may get remapped to T with cedilla. + case 0x21b: return 0x163; + case 0x386: return 0x391; // Greek characters with accents must map to their base form due to the "no accents in allcaps " rule. + case 0x388: return 0x395; + case 0x389: return 0x397; + case 0x38a: return 0x399; + case 0x38c: return 0x39f; + case 0x3a0: return 0x41f; + case 0x38e: return 0x3a5; + case 0x38f: return 0x3a9; + case 0x391: return 'A';// Greek characters with equivalents in either Latin or Cyrillic. This is only suitable for uppercase fonts! + case 0x392: return 'B'; + case 0x393: return 0x413; + case 0x395: return 'E'; + case 0x396: return 'Z'; + case 0x397: return 'H'; + case 0x399: return 'I'; + case 0x39a: return 'K'; + case 0x39c: return 'M'; + case 0x39d: return 'N'; + case 0x39f: return 'O'; + case 0x3a1: return 'P'; + case 0x3a4: return 'T'; + case 0x3a5: return 'Y'; + case 0x3a6: return 0x424; + case 0x3a7: return 'X'; + case 0x3aa: return 0xcf; + case 0x3ab: return 0x178; + case 0x3bf: return 'o'; // the Omicron is the only small Greek character that's easily mappable to a Latin equivalent. :( + case 0x3c2: return 0x3c3; // Lowercase Sigma character in Greek, which changes depending on its positioning in a word; if the font is uppercase only or features a smallcaps style, the second variant of the letter will remain unused + case 0x390: return 0x3ca; // For smallcaps fonts the small accented Greek characters remap to the unaccented versions. + case 0x3ac: return 0x3b1; + case 0x3ad: return 0x3b5; + case 0x3ae: return 0x3b7; + case 0x3af: return 0x3b9; + case 0x3b0: return 0x3cb; + case 0x3cc: return 0x3bf; + case 0x3cd: return 0x3c5; + case 0x3ce: return 0x3c9; + case 0x400: return 0xc8; // Cyrillic characters with equivalents in the Latin alphabet. + case 0x401: return 0xcb; + case 0x405: return 'S'; + case 0x406: return 'I'; + case 0x407: return 0xcf; + case 0x408: return 'J'; + case 0x450: return 0xe8; + case 0x451: return 0xeb; + case 0x455: return 's'; + case 0x456: return 'i'; + case 0x457: return 0xef; + case 0x458: return 'j'; + } + return code; +} + +//========================================================================== +// +// Unicode-aware upper/lowercase conversion +// The only characters not being handled by this are the Turkish I's +// because those are language specific. +// +//========================================================================== + +uint16_t lowerforupper[65536]; +uint16_t upperforlower[65536]; +bool islowermap[65536]; +bool isuppermap[65536]; + +// This is a supposedly complete mapping of all lower <-> upper pairs. Most will most likely never be needed by Doom but this way there won't be any future surprises +static const uint16_t loweruppercase[] = { +0x0061,0x0041, +0x0062,0x0042, +0x0063,0x0043, +0x0064,0x0044, +0x0065,0x0045, +0x0066,0x0046, +0x0067,0x0047, +0x0068,0x0048, +0x0069,0x0049, +0x006A,0x004A, +0x006B,0x004B, +0x006C,0x004C, +0x006D,0x004D, +0x006E,0x004E, +0x006F,0x004F, +0x0070,0x0050, +0x0071,0x0051, +0x0072,0x0052, +0x0073,0x0053, +0x0074,0x0054, +0x0075,0x0055, +0x0076,0x0056, +0x0077,0x0057, +0x0078,0x0058, +0x0079,0x0059, +0x007A,0x005A, +0x00DF,0x1E9E, +0x00E0,0x00C0, +0x00E1,0x00C1, +0x00E2,0x00C2, +0x00E3,0x00C3, +0x00E4,0x00C4, +0x00E5,0x00C5, +0x00E6,0x00C6, +0x00E7,0x00C7, +0x00E8,0x00C8, +0x00E9,0x00C9, +0x00EA,0x00CA, +0x00EB,0x00CB, +0x00EC,0x00CC, +0x00ED,0x00CD, +0x00EE,0x00CE, +0x00EF,0x00CF, +0x00F0,0x00D0, +0x00F1,0x00D1, +0x00F2,0x00D2, +0x00F3,0x00D3, +0x00F4,0x00D4, +0x00F5,0x00D5, +0x00F6,0x00D6, +0x00F8,0x00D8, +0x00F9,0x00D9, +0x00FA,0x00DA, +0x00FB,0x00DB, +0x00FC,0x00DC, +0x00FD,0x00DD, +0x00FE,0x00DE, +0x00FF,0x0178, +0x0101,0x0100, +0x0103,0x0102, +0x0105,0x0104, +0x0107,0x0106, +0x0109,0x0108, +0x010B,0x010A, +0x010D,0x010C, +0x010F,0x010E, +0x0111,0x0110, +0x0113,0x0112, +0x0115,0x0114, +0x0117,0x0116, +0x0119,0x0118, +0x011B,0x011A, +0x011D,0x011C, +0x011F,0x011E, +0x0121,0x0120, +0x0123,0x0122, +0x0125,0x0124, +0x0127,0x0126, +0x0129,0x0128, +0x012B,0x012A, +0x012D,0x012C, +0x012F,0x012E, +0x0131,0x0049, +0x0133,0x0132, +0x0135,0x0134, +0x0137,0x0136, +0x013A,0x0139, +0x013C,0x013B, +0x013E,0x013D, +0x0140,0x013F, +0x0142,0x0141, +0x0144,0x0143, +0x0146,0x0145, +0x0148,0x0147, +0x014B,0x014A, +0x014D,0x014C, +0x014F,0x014E, +0x0151,0x0150, +0x0153,0x0152, +0x0155,0x0154, +0x0157,0x0156, +0x0159,0x0158, +0x015B,0x015A, +0x015D,0x015C, +0x015F,0x015E, +0x0161,0x0160, +0x0163,0x0162, +0x0165,0x0164, +0x0167,0x0166, +0x0169,0x0168, +0x016B,0x016A, +0x016D,0x016C, +0x016F,0x016E, +0x0171,0x0170, +0x0173,0x0172, +0x0175,0x0174, +0x0177,0x0176, +0x017A,0x0179, +0x017C,0x017B, +0x017E,0x017D, +0x0183,0x0182, +0x0185,0x0184, +0x0188,0x0187, +0x018C,0x018B, +0x0192,0x0191, +0x0199,0x0198, +0x01A1,0x01A0, +0x01A3,0x01A2, +0x01A5,0x01A4, +0x01A8,0x01A7, +0x01AD,0x01AC, +0x01B0,0x01AF, +0x01B4,0x01B3, +0x01B6,0x01B5, +0x01B9,0x01B8, +0x01BD,0x01BC, +0x01C6,0x01C4, +0x01C9,0x01C7, +0x01CC,0x01CA, +0x01CE,0x01CD, +0x01D0,0x01CF, +0x01D2,0x01D1, +0x01D4,0x01D3, +0x01D6,0x01D5, +0x01D8,0x01D7, +0x01DA,0x01D9, +0x01DC,0x01DB, +0x01DF,0x01DE, +0x01E1,0x01E0, +0x01E3,0x01E2, +0x01E5,0x01E4, +0x01E7,0x01E6, +0x01E9,0x01E8, +0x01EB,0x01EA, +0x01ED,0x01EC, +0x01EF,0x01EE, +0x01F3,0x01F1, +0x01F5,0x01F4, +0x01FB,0x01FA, +0x01FD,0x01FC, +0x01FF,0x01FE, +0x0201,0x0200, +0x0203,0x0202, +0x0205,0x0204, +0x0207,0x0206, +0x0209,0x0208, +0x020B,0x020A, +0x020D,0x020C, +0x020F,0x020E, +0x0211,0x0210, +0x0213,0x0212, +0x0215,0x0214, +0x0217,0x0216, +0x0253,0x0181, +0x0254,0x0186, +0x0257,0x018A, +0x0258,0x018E, +0x0259,0x018F, +0x025B,0x0190, +0x0260,0x0193, +0x0263,0x0194, +0x0268,0x0197, +0x0269,0x0196, +0x026F,0x019C, +0x0272,0x019D, +0x0275,0x019F, +0x0283,0x01A9, +0x0288,0x01AE, +0x028A,0x01B1, +0x028B,0x01B2, +0x0292,0x01B7, +0x03AC,0x0386, +0x03AD,0x0388, +0x03AE,0x0389, +0x03AF,0x038A, +0x03B1,0x0391, +0x03B2,0x0392, +0x03B3,0x0393, +0x03B4,0x0394, +0x03B5,0x0395, +0x03B6,0x0396, +0x03B7,0x0397, +0x03B8,0x0398, +0x03B9,0x0399, +0x03BA,0x039A, +0x03BB,0x039B, +0x03BC,0x039C, +0x03BD,0x039D, +0x03BE,0x039E, +0x03BF,0x039F, +0x03C0,0x03A0, +0x03C1,0x03A1, +0x03C3,0x03A3, +0x03C4,0x03A4, +0x03C5,0x03A5, +0x03C6,0x03A6, +0x03C7,0x03A7, +0x03C8,0x03A8, +0x03C9,0x03A9, +0x03CA,0x03AA, +0x03CB,0x03AB, +0x03CC,0x038C, +0x03CD,0x038E, +0x03CE,0x038F, +0x03E3,0x03E2, +0x03E5,0x03E4, +0x03E7,0x03E6, +0x03E9,0x03E8, +0x03EB,0x03EA, +0x03ED,0x03EC, +0x03EF,0x03EE, +0x0430,0x0410, +0x0431,0x0411, +0x0432,0x0412, +0x0433,0x0413, +0x0434,0x0414, +0x0435,0x0415, +0x0436,0x0416, +0x0437,0x0417, +0x0438,0x0418, +0x0439,0x0419, +0x043A,0x041A, +0x043B,0x041B, +0x043C,0x041C, +0x043D,0x041D, +0x043E,0x041E, +0x043F,0x041F, +0x0440,0x0420, +0x0441,0x0421, +0x0442,0x0422, +0x0443,0x0423, +0x0444,0x0424, +0x0445,0x0425, +0x0446,0x0426, +0x0447,0x0427, +0x0448,0x0428, +0x0449,0x0429, +0x044A,0x042A, +0x044B,0x042B, +0x044C,0x042C, +0x044D,0x042D, +0x044E,0x042E, +0x044F,0x042F, +0x0451,0x0401, +0x0452,0x0402, +0x0453,0x0403, +0x0454,0x0404, +0x0455,0x0405, +0x0456,0x0406, +0x0457,0x0407, +0x0458,0x0408, +0x0459,0x0409, +0x045A,0x040A, +0x045B,0x040B, +0x045C,0x040C, +0x045E,0x040E, +0x045F,0x040F, +0x0461,0x0460, +0x0463,0x0462, +0x0465,0x0464, +0x0467,0x0466, +0x0469,0x0468, +0x046B,0x046A, +0x046D,0x046C, +0x046F,0x046E, +0x0471,0x0470, +0x0473,0x0472, +0x0475,0x0474, +0x0477,0x0476, +0x0479,0x0478, +0x047B,0x047A, +0x047D,0x047C, +0x047F,0x047E, +0x0481,0x0480, +0x0491,0x0490, +0x0493,0x0492, +0x0495,0x0494, +0x0497,0x0496, +0x0499,0x0498, +0x049B,0x049A, +0x049D,0x049C, +0x049F,0x049E, +0x04A1,0x04A0, +0x04A3,0x04A2, +0x04A5,0x04A4, +0x04A7,0x04A6, +0x04A9,0x04A8, +0x04AB,0x04AA, +0x04AD,0x04AC, +0x04AF,0x04AE, +0x04B1,0x04B0, +0x04B3,0x04B2, +0x04B5,0x04B4, +0x04B7,0x04B6, +0x04B9,0x04B8, +0x04BB,0x04BA, +0x04BD,0x04BC, +0x04BF,0x04BE, +0x04C2,0x04C1, +0x04C4,0x04C3, +0x04C8,0x04C7, +0x04CC,0x04CB, +0x04D1,0x04D0, +0x04D3,0x04D2, +0x04D5,0x04D4, +0x04D7,0x04D6, +0x04D9,0x04D8, +0x04DB,0x04DA, +0x04DD,0x04DC, +0x04DF,0x04DE, +0x04E1,0x04E0, +0x04E3,0x04E2, +0x04E5,0x04E4, +0x04E7,0x04E6, +0x04E9,0x04E8, +0x04EB,0x04EA, +0x04EF,0x04EE, +0x04F1,0x04F0, +0x04F3,0x04F2, +0x04F5,0x04F4, +0x04F9,0x04F8, +0x0561,0x0531, +0x0562,0x0532, +0x0563,0x0533, +0x0564,0x0534, +0x0565,0x0535, +0x0566,0x0536, +0x0567,0x0537, +0x0568,0x0538, +0x0569,0x0539, +0x056A,0x053A, +0x056B,0x053B, +0x056C,0x053C, +0x056D,0x053D, +0x056E,0x053E, +0x056F,0x053F, +0x0570,0x0540, +0x0571,0x0541, +0x0572,0x0542, +0x0573,0x0543, +0x0574,0x0544, +0x0575,0x0545, +0x0576,0x0546, +0x0577,0x0547, +0x0578,0x0548, +0x0579,0x0549, +0x057A,0x054A, +0x057B,0x054B, +0x057C,0x054C, +0x057D,0x054D, +0x057E,0x054E, +0x057F,0x054F, +0x0580,0x0550, +0x0581,0x0551, +0x0582,0x0552, +0x0583,0x0553, +0x0584,0x0554, +0x0585,0x0555, +0x0586,0x0556, +0x10D0,0x10A0, +0x10D1,0x10A1, +0x10D2,0x10A2, +0x10D3,0x10A3, +0x10D4,0x10A4, +0x10D5,0x10A5, +0x10D6,0x10A6, +0x10D7,0x10A7, +0x10D8,0x10A8, +0x10D9,0x10A9, +0x10DA,0x10AA, +0x10DB,0x10AB, +0x10DC,0x10AC, +0x10DD,0x10AD, +0x10DE,0x10AE, +0x10DF,0x10AF, +0x10E0,0x10B0, +0x10E1,0x10B1, +0x10E2,0x10B2, +0x10E3,0x10B3, +0x10E4,0x10B4, +0x10E5,0x10B5, +0x10E6,0x10B6, +0x10E7,0x10B7, +0x10E8,0x10B8, +0x10E9,0x10B9, +0x10EA,0x10BA, +0x10EB,0x10BB, +0x10EC,0x10BC, +0x10ED,0x10BD, +0x10EE,0x10BE, +0x10EF,0x10BF, +0x10F0,0x10C0, +0x10F1,0x10C1, +0x10F2,0x10C2, +0x10F3,0x10C3, +0x10F4,0x10C4, +0x10F5,0x10C5, +0x1E01,0x1E00, +0x1E03,0x1E02, +0x1E05,0x1E04, +0x1E07,0x1E06, +0x1E09,0x1E08, +0x1E0B,0x1E0A, +0x1E0D,0x1E0C, +0x1E0F,0x1E0E, +0x1E11,0x1E10, +0x1E13,0x1E12, +0x1E15,0x1E14, +0x1E17,0x1E16, +0x1E19,0x1E18, +0x1E1B,0x1E1A, +0x1E1D,0x1E1C, +0x1E1F,0x1E1E, +0x1E21,0x1E20, +0x1E23,0x1E22, +0x1E25,0x1E24, +0x1E27,0x1E26, +0x1E29,0x1E28, +0x1E2B,0x1E2A, +0x1E2D,0x1E2C, +0x1E2F,0x1E2E, +0x1E31,0x1E30, +0x1E33,0x1E32, +0x1E35,0x1E34, +0x1E37,0x1E36, +0x1E39,0x1E38, +0x1E3B,0x1E3A, +0x1E3D,0x1E3C, +0x1E3F,0x1E3E, +0x1E41,0x1E40, +0x1E43,0x1E42, +0x1E45,0x1E44, +0x1E47,0x1E46, +0x1E49,0x1E48, +0x1E4B,0x1E4A, +0x1E4D,0x1E4C, +0x1E4F,0x1E4E, +0x1E51,0x1E50, +0x1E53,0x1E52, +0x1E55,0x1E54, +0x1E57,0x1E56, +0x1E59,0x1E58, +0x1E5B,0x1E5A, +0x1E5D,0x1E5C, +0x1E5F,0x1E5E, +0x1E61,0x1E60, +0x1E63,0x1E62, +0x1E65,0x1E64, +0x1E67,0x1E66, +0x1E69,0x1E68, +0x1E6B,0x1E6A, +0x1E6D,0x1E6C, +0x1E6F,0x1E6E, +0x1E71,0x1E70, +0x1E73,0x1E72, +0x1E75,0x1E74, +0x1E77,0x1E76, +0x1E79,0x1E78, +0x1E7B,0x1E7A, +0x1E7D,0x1E7C, +0x1E7F,0x1E7E, +0x1E81,0x1E80, +0x1E83,0x1E82, +0x1E85,0x1E84, +0x1E87,0x1E86, +0x1E89,0x1E88, +0x1E8B,0x1E8A, +0x1E8D,0x1E8C, +0x1E8F,0x1E8E, +0x1E91,0x1E90, +0x1E93,0x1E92, +0x1E95,0x1E94, +0x1EA1,0x1EA0, +0x1EA3,0x1EA2, +0x1EA5,0x1EA4, +0x1EA7,0x1EA6, +0x1EA9,0x1EA8, +0x1EAB,0x1EAA, +0x1EAD,0x1EAC, +0x1EAF,0x1EAE, +0x1EB1,0x1EB0, +0x1EB3,0x1EB2, +0x1EB5,0x1EB4, +0x1EB7,0x1EB6, +0x1EB9,0x1EB8, +0x1EBB,0x1EBA, +0x1EBD,0x1EBC, +0x1EBF,0x1EBE, +0x1EC1,0x1EC0, +0x1EC3,0x1EC2, +0x1EC5,0x1EC4, +0x1EC7,0x1EC6, +0x1EC9,0x1EC8, +0x1ECB,0x1ECA, +0x1ECD,0x1ECC, +0x1ECF,0x1ECE, +0x1ED1,0x1ED0, +0x1ED3,0x1ED2, +0x1ED5,0x1ED4, +0x1ED7,0x1ED6, +0x1ED9,0x1ED8, +0x1EDB,0x1EDA, +0x1EDD,0x1EDC, +0x1EDF,0x1EDE, +0x1EE1,0x1EE0, +0x1EE3,0x1EE2, +0x1EE5,0x1EE4, +0x1EE7,0x1EE6, +0x1EE9,0x1EE8, +0x1EEB,0x1EEA, +0x1EED,0x1EEC, +0x1EEF,0x1EEE, +0x1EF1,0x1EF0, +0x1EF3,0x1EF2, +0x1EF5,0x1EF4, +0x1EF7,0x1EF6, +0x1EF9,0x1EF8, +0x1F00,0x1F08, +0x1F01,0x1F09, +0x1F02,0x1F0A, +0x1F03,0x1F0B, +0x1F04,0x1F0C, +0x1F05,0x1F0D, +0x1F06,0x1F0E, +0x1F07,0x1F0F, +0x1F10,0x1F18, +0x1F11,0x1F19, +0x1F12,0x1F1A, +0x1F13,0x1F1B, +0x1F14,0x1F1C, +0x1F15,0x1F1D, +0x1F20,0x1F28, +0x1F21,0x1F29, +0x1F22,0x1F2A, +0x1F23,0x1F2B, +0x1F24,0x1F2C, +0x1F25,0x1F2D, +0x1F26,0x1F2E, +0x1F27,0x1F2F, +0x1F30,0x1F38, +0x1F31,0x1F39, +0x1F32,0x1F3A, +0x1F33,0x1F3B, +0x1F34,0x1F3C, +0x1F35,0x1F3D, +0x1F36,0x1F3E, +0x1F37,0x1F3F, +0x1F40,0x1F48, +0x1F41,0x1F49, +0x1F42,0x1F4A, +0x1F43,0x1F4B, +0x1F44,0x1F4C, +0x1F45,0x1F4D, +0x1F51,0x1F59, +0x1F53,0x1F5B, +0x1F55,0x1F5D, +0x1F57,0x1F5F, +0x1F60,0x1F68, +0x1F61, 0x1F69, +0x1F62, 0x1F6A, +0x1F63, 0x1F6B, +0x1F64, 0x1F6C, +0x1F65, 0x1F6D, +0x1F66, 0x1F6E, +0x1F67, 0x1F6F, +0x1F80, 0x1F88, +0x1F81, 0x1F89, +0x1F82, 0x1F8A, +0x1F83, 0x1F8B, +0x1F84, 0x1F8C, +0x1F85, 0x1F8D, +0x1F86, 0x1F8E, +0x1F87, 0x1F8F, +0x1F90, 0x1F98, +0x1F91, 0x1F99, +0x1F92, 0x1F9A, +0x1F93, 0x1F9B, +0x1F94, 0x1F9C, +0x1F95, 0x1F9D, +0x1F96, 0x1F9E, +0x1F97, 0x1F9F, +0x1FA0, 0x1FA8, +0x1FA1, 0x1FA9, +0x1FA2, 0x1FAA, +0x1FA3, 0x1FAB, +0x1FA4, 0x1FAC, +0x1FA5, 0x1FAD, +0x1FA6, 0x1FAE, +0x1FA7, 0x1FAF, +0x1FB0, 0x1FB8, +0x1FB1, 0x1FB9, +0x1FD0, 0x1FD8, +0x1FD1, 0x1FD9, +0x1FE0, 0x1FE8, +0x1FE1, 0x1FE9, +0x24D0, 0x24B6, +0x24D1, 0x24B7, +0x24D2, 0x24B8, +0x24D3, 0x24B9, +0x24D4, 0x24BA, +0x24D5, 0x24BB, +0x24D6, 0x24BC, +0x24D7, 0x24BD, +0x24D8, 0x24BE, +0x24D9, 0x24BF, +0x24DA, 0x24C0, +0x24DB, 0x24C1, +0x24DC, 0x24C2, +0x24DD, 0x24C3, +0x24DE, 0x24C4, +0x24DF, 0x24C5, +0x24E0, 0x24C6, +0x24E1, 0x24C7, +0x24E2, 0x24C8, +0x24E3, 0x24C9, +0x24E4, 0x24CA, +0x24E5, 0x24CB, +0x24E6, 0x24CC, +0x24E7, 0x24CD, +0x24E8, 0x24CE, +0x24E9, 0x24CF, +0xFF41, 0xFF21, +0xFF42, 0xFF22, +0xFF43, 0xFF23, +0xFF44, 0xFF24, +0xFF45, 0xFF25, +0xFF46, 0xFF26, +0xFF47, 0xFF27, +0xFF48, 0xFF28, +0xFF49, 0xFF29, +0xFF4A, 0xFF2A, +0xFF4B, 0xFF2B, +0xFF4C, 0xFF2C, +0xFF4D, 0xFF2D, +0xFF4E, 0xFF2E, +0xFF4F, 0xFF2F, +0xFF50, 0xFF30, +0xFF51, 0xFF31, +0xFF52, 0xFF32, +0xFF53, 0xFF33, +0xFF54, 0xFF34, +0xFF55, 0xFF35, +0xFF56, 0xFF36, +0xFF57, 0xFF37, +0xFF58, 0xFF38, +0xFF59, 0xFF39, +0xFF5A, 0xFF3A, +0, 0 +}; + +struct InitLowerUpper +{ + InitLowerUpper() + { + for (int i = 0; i < 65536; i++) + { + lowerforupper[i] = i; + upperforlower[i] = i; + } + for (int i = 0; loweruppercase[i]; i += 2) + { + auto lower = loweruppercase[i]; + auto upper = loweruppercase[i + 1]; + if (lowerforupper[upper] == upper) lowerforupper[upper] = lower; // This mapping is ambiguous (see 0x0131 -> 0x0049, (small Turkish 'i' without dot.) so only pick the first match. + if (upperforlower[lower] == lower) upperforlower[lower] = upper; + isuppermap[upper] = islowermap[lower] = true; + } + // Special treatment for the two variants of the small sigma in Greek. + islowermap[0x3c2] = true; + upperforlower[0x3c2] = 0x3a3; + } +}; + +static InitLowerUpper initer; + +bool myislower(int code) +{ + if (code >= 0 && code < 65536) return islowermap[code]; + return false; +} + +bool myisupper(int code) +{ + if (code >= 0 && code < 65536) return isuppermap[code]; + return false; +} + diff --git a/src/utility/utf8.h b/src/utility/utf8.h index ab45775339..7ca8a5f324 100644 --- a/src/utility/utf8.h +++ b/src/utility/utf8.h @@ -10,4 +10,11 @@ inline int GetCharFromString(const char32_t *&string) const char *MakeUTF8(const char *outline, int *numchars = nullptr); // returns a pointer to a static buffer, assuming that its caller will immediately process the result. const char *MakeUTF8(int codepoint, int *psize = nullptr); +bool myislower(int code); +bool myisupper(int code); +int stripaccent(int code); +int getAlternative(int code); + extern uint16_t win1252map[]; +extern uint16_t lowerforupper[65536]; +extern uint16_t upperforlower[65536];