diff --git a/CMakeLists.txt b/CMakeLists.txt index da54ed65c..fbbffa8dd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -392,6 +392,7 @@ endif() set( LZMA_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/libraries/lzma/C" ) set( ENET_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/libraries/enet" ) +set( GDTOA_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/libraries/gdtoa" ) if( NOT CMAKE_CROSSCOMPILING ) if( NOT CROSS_EXPORTS ) diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt index f4a295a41..ead2c9bb7 100644 --- a/source/CMakeLists.txt +++ b/source/CMakeLists.txt @@ -506,9 +506,9 @@ endif() # Ugh... These precompiled dependencies need to go. if (WIN32) - include_directories( "${ZLIB_INCLUDE_DIR}" "${BZIP2_INCLUDE_DIR}" "${LZMA_INCLUDE_DIR}" "${JPEG_INCLUDE_DIR}" "${GME_INCLUDE_DIR}" "${ENET_INCLUDE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/../platform/windows/include" "${CMAKE_CURRENT_SOURCE_DIR}/../platform/windows/include/vpx" "${CMAKE_CURRENT_SOURCE_DIR}/../platform/windows/include/sdl2") + include_directories( "${ZLIB_INCLUDE_DIR}" "${BZIP2_INCLUDE_DIR}" "${LZMA_INCLUDE_DIR}" "${JPEG_INCLUDE_DIR}" "${GME_INCLUDE_DIR}" "${ENET_INCLUDE_DIR}" "${GDTOA_INCLUDE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/../platform/windows/include" "${CMAKE_CURRENT_SOURCE_DIR}/../platform/windows/include/vpx" "${CMAKE_CURRENT_SOURCE_DIR}/../platform/windows/include/sdl2") else () -include_directories( "${ZLIB_INCLUDE_DIR}" "${BZIP2_INCLUDE_DIR}" "${LZMA_INCLUDE_DIR}" "${JPEG_INCLUDE_DIR}" "${GME_INCLUDE_DIR}" "${ENET_INCLUDE_DIR}") +include_directories( "${ZLIB_INCLUDE_DIR}" "${BZIP2_INCLUDE_DIR}" "${LZMA_INCLUDE_DIR}" "${JPEG_INCLUDE_DIR}" "${GME_INCLUDE_DIR}" "${ENET_INCLUDE_DIR}" "${GDTOA_INCLUDE_DIR}") endif() @@ -965,7 +965,11 @@ set (PCH_SOURCES common/utility/m_argv.cpp common/utility/files.cpp common/utility/files_decompress.cpp - #common/utility/configfile.cpp + common/utility/zstring.cpp + common/utility/zstrformat.cpp + common/utility/utf8.cpp + common/utility/superfasthash.cpp +#common/utility/configfile.cpp ) if( MSVC ) diff --git a/source/common/utility/basics.h b/source/common/utility/basics.h new file mode 100644 index 000000000..ebbdd3f29 --- /dev/null +++ b/source/common/utility/basics.h @@ -0,0 +1,15 @@ +#ifndef __BASICS_H +#define __BASICS_H + +#ifdef __GNUC__ +#define GCCPRINTF(stri,firstargi) __attribute__((format(printf,stri,firstargi))) +#define GCCFORMAT(stri) __attribute__((format(printf,stri,0))) +#define GCCNOWARN __attribute__((unused)) +#else +#define GCCPRINTF(a,b) +#define GCCFORMAT(a) +#define GCCNOWARN +#endif + + +#endif diff --git a/source/common/utility/files.cpp b/source/common/utility/files.cpp index 8b50bd486..63cda2436 100644 --- a/source/common/utility/files.cpp +++ b/source/common/utility/files.cpp @@ -35,12 +35,13 @@ #include "files.h" #include "templates.h" // just for 'clamp' +#include "zstring.h" FILE *myfopen(const char *filename, const char *flags) { // fix this later -#if 1//ndef _WIN32 +#ifndef _WIN32 return fopen(filename, flags); #else auto widename = WideString(filename); diff --git a/source/common/utility/files_decompress.cpp b/source/common/utility/files_decompress.cpp index 132d5d4ff..4770c1440 100644 --- a/source/common/utility/files_decompress.cpp +++ b/source/common/utility/files_decompress.cpp @@ -41,6 +41,7 @@ #include "files.h" #include "templates.h" +#include "zstring.h" //========================================================================== // @@ -84,7 +85,6 @@ char *DecompressorBase::Gets(char *strbuf, int len) // // M_ZlibError // -#if 0 FString M_ZLibError(int zerr) { if (zerr >= 0) @@ -111,7 +111,6 @@ FString M_ZLibError(int zerr) return errs[-zerr - 1]; } } -#endif //========================================================================== // @@ -148,10 +147,7 @@ public: if (err != Z_OK) { - // Later, when FString is available. -#if 0 DecompressionError ("DecompressorZ: inflateInit failed: %s\n", M_ZLibError(err).GetChars()); -#endif } } diff --git a/source/common/utility/superfasthash.cpp b/source/common/utility/superfasthash.cpp new file mode 100644 index 000000000..9e0820639 --- /dev/null +++ b/source/common/utility/superfasthash.cpp @@ -0,0 +1,141 @@ +#include +#include +#include + +/* ======================================================================== */ + +/* By Paul Hsieh (C) 2004, 2005. Covered under the Paul Hsieh derivative + license. See: + http://www.azillionmonkeys.com/qed/weblicense.html for license details. + + http://www.azillionmonkeys.com/qed/hash.html */ + +#undef get16bits +#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ + || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) +#define get16bits(d) (*((const uint16_t *) (d))) +#endif + +#if !defined (get16bits) +#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\ + +(uint32_t)(((const uint8_t *)(d))[0]) ) +#endif + +uint32_t SuperFastHash (const char *data, size_t len) +{ + uint32_t hash = 0, tmp; + size_t rem; + + if (len == 0 || data == NULL) return 0; + + rem = len & 3; + len >>= 2; + + /* Main loop */ + for (;len > 0; len--) + { + hash += get16bits (data); + tmp = (get16bits (data+2) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + data += 2*sizeof (uint16_t); + hash += hash >> 11; + } + + /* Handle end cases */ + switch (rem) + { + case 3: hash += get16bits (data); + hash ^= hash << 16; + hash ^= data[sizeof (uint16_t)] << 18; + hash += hash >> 11; + break; + case 2: hash += get16bits (data); + hash ^= hash << 11; + hash += hash >> 17; + break; + case 1: hash += *data; + hash ^= hash << 10; + hash += hash >> 1; + } + + /* Force "avalanching" of final 127 bits */ + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + + return hash; +} + +/* A modified version to do a case-insensitive hash */ + +#undef get16bits +#define get16bits(d) ((((uint32_t)tolower(((const uint8_t *)(d))[1])) << 8)\ + +(uint32_t)tolower(((const uint8_t *)(d))[0]) ) + +uint32_t SuperFastHashI (const char *data, size_t len) +{ + uint32_t hash = 0, tmp; + size_t rem; + + if (len <= 0 || data == NULL) return 0; + + rem = len & 3; + len >>= 2; + + /* Main loop */ + for (;len > 0; len--) + { + hash += get16bits (data); + tmp = (get16bits (data+2) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + data += 2*sizeof (uint16_t); + hash += hash >> 11; + } + + /* Handle end cases */ + switch (rem) + { + case 3: hash += get16bits (data); + hash ^= hash << 16; + hash ^= tolower(data[sizeof (uint16_t)]) << 18; + hash += hash >> 11; + break; + case 2: hash += get16bits (data); + hash ^= hash << 11; + hash += hash >> 17; + break; + case 1: hash += tolower(*data); + hash ^= hash << 10; + hash += hash >> 1; + } + + /* Force "avalanching" of final 127 bits */ + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + + return hash; +} + +/* ======================================================================== */ + +unsigned int MakeKey (const char *s) +{ + if (s == NULL) + { + return 0; + } + return SuperFastHashI (s, strlen (s)); +} + +unsigned int MakeKey (const char *s, size_t len) +{ + return SuperFastHashI (s, len); +} + diff --git a/source/common/utility/superfasthash.h b/source/common/utility/superfasthash.h new file mode 100644 index 000000000..1b831dbfd --- /dev/null +++ b/source/common/utility/superfasthash.h @@ -0,0 +1,7 @@ +#pragma once +#include + +uint32_t SuperFastHash (const char *data, size_t len); +uint32_t SuperFastHashI (const char *data, size_t len); +unsigned int MakeKey (const char *s); +unsigned int MakeKey (const char *s, size_t len); diff --git a/source/common/utility/utf8.cpp b/source/common/utility/utf8.cpp new file mode 100644 index 000000000..31e4eff4c --- /dev/null +++ b/source/common/utility/utf8.cpp @@ -0,0 +1,989 @@ +/* +** utf8.cpp +** UTF-8 utilities +** +**--------------------------------------------------------------------------- +** Copyright 2019 Christoph Oelckers +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**--------------------------------------------------------------------------- +** +*/ +#include +#include "tarray.h" + + +//========================================================================== +// +// +// +//========================================================================== + +int utf8_encode(int32_t codepoint, uint8_t *buffer, int *size) +{ + if (codepoint < 0) + return -1; + else if (codepoint < 0x80) + { + buffer[0] = (char)codepoint; + *size = 1; + } + else if (codepoint < 0x800) + { + buffer[0] = 0xC0 + ((codepoint & 0x7C0) >> 6); + buffer[1] = 0x80 + ((codepoint & 0x03F)); + *size = 2; + } + else if (codepoint < 0x10000) + { + buffer[0] = 0xE0 + ((codepoint & 0xF000) >> 12); + buffer[1] = 0x80 + ((codepoint & 0x0FC0) >> 6); + buffer[2] = 0x80 + ((codepoint & 0x003F)); + *size = 3; + } + else if (codepoint <= 0x10FFFF) + { + buffer[0] = 0xF0 + ((codepoint & 0x1C0000) >> 18); + buffer[1] = 0x80 + ((codepoint & 0x03F000) >> 12); + buffer[2] = 0x80 + ((codepoint & 0x000FC0) >> 6); + buffer[3] = 0x80 + ((codepoint & 0x00003F)); + *size = 4; + } + else + return -1; + + return 0; +} + +//========================================================================== +// +// +// +//========================================================================== + +int utf8_decode(const uint8_t *src, int *size) +{ + int c = src[0]; + int r; + + *size = 1; + if ((c & 0x80) == 0) + { + return c; + } + + int c1 = src[1]; + if (c1 < 0x80 || c1 >= 0xc0) return -1; + c1 &= 0x3f; + + if ((c & 0xE0) == 0xC0) + { + r = ((c & 0x1F) << 6) | c1; + if (r >= 128) + { + *size = 2; + return r; + } + return -1; + } + + int c2 = src[2]; + if (c2 < 0x80 || c2 >= 0xc0) return -1; + c2 &= 0x3f; + + if ((c & 0xF0) == 0xE0) + { + r = ((c & 0x0F) << 12) | (c1 << 6) | c2; + if (r >= 2048 && (r < 55296 || r > 57343)) + { + *size = 3; + return r; + } + return -1; + } + + int c3 = src[3]; + if (c3 < 0x80 || c1 >= 0xc0) return -1; + c3 &= 0x3f; + + if ((c & 0xF8) == 0xF0) + { + r = ((c & 0x07) << 18) | (c1 << 12) | (c2 << 6) | c3; + if (r >= 65536 && r <= 1114111) + { + *size = 4; + return r; + } + } + return -1; +} + +//========================================================================== +// +// Unicode mapping for the 0x80-0x9f range of the Windows 1252 code page +// +//========================================================================== + +uint16_t win1252map[] = { + 0x20AC, + 0x81 , + 0x201A, + 0x0192, + 0x201E, + 0x2026, + 0x2020, + 0x2021, + 0x02C6, + 0x2030, + 0x0160, + 0x2039, + 0x0152, + 0x8d , + 0x017D, + 0x8f , + 0x90 , + 0x2018, + 0x2019, + 0x201C, + 0x201D, + 0x2022, + 0x2013, + 0x2014, + 0x02DC, + 0x2122, + 0x0161, + 0x203A, + 0x0153, + 0x9d , + 0x017E, + 0x0178, +}; + +//========================================================================== +// +// reads one character from the string. +// This can handle both ISO 8859-1/Windows-1252 and UTF-8, as well as mixed strings +// between both encodings, which may happen if inconsistent encoding is +// used between different files in a mod. +// +//========================================================================== + +int GetCharFromString(const uint8_t *&string) +{ + int z; + + z = *string; + + if (z < 192) + { + string++; + + // Handle Windows 1252 characters + if (z >= 128 && z < 160) + { + return win1252map[z - 128]; + } + return z; + } + else + { + int size = 0; + auto chr = utf8_decode(string, &size); + if (chr >= 0) + { + string += size; + return chr; + } + string++; + return z; + } +} + +//========================================================================== +// +// convert a potentially mixed-encoded string to pure UTF-8 +// this returns a pointer to a static buffer, +// assuming that its caller will immediately process the result. +// +//========================================================================== + +static TArray UTF8String; + +const char *MakeUTF8(const char *outline, int *numchars = nullptr) +{ + UTF8String.Clear(); + const uint8_t *in = (const uint8_t*)outline; + + if (numchars) *numchars = 0; + while (int chr = GetCharFromString(in)) + { + int size = 0; + uint8_t encode[4]; + if (!utf8_encode(chr, encode, &size)) + { + for (int i = 0; i < size; i++) + { + UTF8String.Push(encode[i]); + } + } + if (numchars) *numchars++; + } + UTF8String.Push(0); + return UTF8String.Data(); +} + +const char *MakeUTF8(int codepoint, int *psize) +{ + int size = 0; + UTF8String.Resize(5); + utf8_encode(codepoint, (uint8_t*)UTF8String.Data(), &size); + UTF8String[size] = 0; + if (psize) *psize = size; + return UTF8String.Data(); +} + +//========================================================================== +// +// Unicode-aware upper/lowercase conversion +// The only characters not being handled by this are the Turkish I's +// because those are language specific. +// +//========================================================================== + + +uint16_t lowerforupper[65536]; +uint16_t upperforlower[65536]; +bool islowermap[65536]; +bool isuppermap[65536]; + +// This is a supposedly complete mapping of all lower <-> upper pairs. Most will most likely never be needed by Doom but this way there won't be any future surprises +static const uint16_t loweruppercase[] = { +0x0061,0x0041, +0x0062,0x0042, +0x0063,0x0043, +0x0064,0x0044, +0x0065,0x0045, +0x0066,0x0046, +0x0067,0x0047, +0x0068,0x0048, +0x0069,0x0049, +0x006A,0x004A, +0x006B,0x004B, +0x006C,0x004C, +0x006D,0x004D, +0x006E,0x004E, +0x006F,0x004F, +0x0070,0x0050, +0x0071,0x0051, +0x0072,0x0052, +0x0073,0x0053, +0x0074,0x0054, +0x0075,0x0055, +0x0076,0x0056, +0x0077,0x0057, +0x0078,0x0058, +0x0079,0x0059, +0x007A,0x005A, +0x00DF,0x1E9E, +0x00E0,0x00C0, +0x00E1,0x00C1, +0x00E2,0x00C2, +0x00E3,0x00C3, +0x00E4,0x00C4, +0x00E5,0x00C5, +0x00E6,0x00C6, +0x00E7,0x00C7, +0x00E8,0x00C8, +0x00E9,0x00C9, +0x00EA,0x00CA, +0x00EB,0x00CB, +0x00EC,0x00CC, +0x00ED,0x00CD, +0x00EE,0x00CE, +0x00EF,0x00CF, +0x00F0,0x00D0, +0x00F1,0x00D1, +0x00F2,0x00D2, +0x00F3,0x00D3, +0x00F4,0x00D4, +0x00F5,0x00D5, +0x00F6,0x00D6, +0x00F8,0x00D8, +0x00F9,0x00D9, +0x00FA,0x00DA, +0x00FB,0x00DB, +0x00FC,0x00DC, +0x00FD,0x00DD, +0x00FE,0x00DE, +0x00FF,0x0178, +0x0101,0x0100, +0x0103,0x0102, +0x0105,0x0104, +0x0107,0x0106, +0x0109,0x0108, +0x010B,0x010A, +0x010D,0x010C, +0x010F,0x010E, +0x0111,0x0110, +0x0113,0x0112, +0x0115,0x0114, +0x0117,0x0116, +0x0119,0x0118, +0x011B,0x011A, +0x011D,0x011C, +0x011F,0x011E, +0x0121,0x0120, +0x0123,0x0122, +0x0125,0x0124, +0x0127,0x0126, +0x0129,0x0128, +0x012B,0x012A, +0x012D,0x012C, +0x012F,0x012E, +0x0131,0x0049, +0x0133,0x0132, +0x0135,0x0134, +0x0137,0x0136, +0x013A,0x0139, +0x013C,0x013B, +0x013E,0x013D, +0x0140,0x013F, +0x0142,0x0141, +0x0144,0x0143, +0x0146,0x0145, +0x0148,0x0147, +0x014B,0x014A, +0x014D,0x014C, +0x014F,0x014E, +0x0151,0x0150, +0x0153,0x0152, +0x0155,0x0154, +0x0157,0x0156, +0x0159,0x0158, +0x015B,0x015A, +0x015D,0x015C, +0x015F,0x015E, +0x0161,0x0160, +0x0163,0x0162, +0x0165,0x0164, +0x0167,0x0166, +0x0169,0x0168, +0x016B,0x016A, +0x016D,0x016C, +0x016F,0x016E, +0x0171,0x0170, +0x0173,0x0172, +0x0175,0x0174, +0x0177,0x0176, +0x017A,0x0179, +0x017C,0x017B, +0x017E,0x017D, +0x0183,0x0182, +0x0185,0x0184, +0x0188,0x0187, +0x018C,0x018B, +0x0192,0x0191, +0x0199,0x0198, +0x01A1,0x01A0, +0x01A3,0x01A2, +0x01A5,0x01A4, +0x01A8,0x01A7, +0x01AD,0x01AC, +0x01B0,0x01AF, +0x01B4,0x01B3, +0x01B6,0x01B5, +0x01B9,0x01B8, +0x01BD,0x01BC, +0x01C6,0x01C4, +0x01C9,0x01C7, +0x01CC,0x01CA, +0x01CE,0x01CD, +0x01D0,0x01CF, +0x01D2,0x01D1, +0x01D4,0x01D3, +0x01D6,0x01D5, +0x01D8,0x01D7, +0x01DA,0x01D9, +0x01DC,0x01DB, +0x01DF,0x01DE, +0x01E1,0x01E0, +0x01E3,0x01E2, +0x01E5,0x01E4, +0x01E7,0x01E6, +0x01E9,0x01E8, +0x01EB,0x01EA, +0x01ED,0x01EC, +0x01EF,0x01EE, +0x01F3,0x01F1, +0x01F5,0x01F4, +0x01FB,0x01FA, +0x01FD,0x01FC, +0x01FF,0x01FE, +0x0201,0x0200, +0x0203,0x0202, +0x0205,0x0204, +0x0207,0x0206, +0x0209,0x0208, +0x020B,0x020A, +0x020D,0x020C, +0x020F,0x020E, +0x0211,0x0210, +0x0213,0x0212, +0x0215,0x0214, +0x0217,0x0216, +0x0253,0x0181, +0x0254,0x0186, +0x0257,0x018A, +0x0258,0x018E, +0x0259,0x018F, +0x025B,0x0190, +0x0260,0x0193, +0x0263,0x0194, +0x0268,0x0197, +0x0269,0x0196, +0x026F,0x019C, +0x0272,0x019D, +0x0275,0x019F, +0x0283,0x01A9, +0x0288,0x01AE, +0x028A,0x01B1, +0x028B,0x01B2, +0x0292,0x01B7, +0x03AC,0x0386, +0x03AD,0x0388, +0x03AE,0x0389, +0x03AF,0x038A, +0x03B1,0x0391, +0x03B2,0x0392, +0x03B3,0x0393, +0x03B4,0x0394, +0x03B5,0x0395, +0x03B6,0x0396, +0x03B7,0x0397, +0x03B8,0x0398, +0x03B9,0x0399, +0x03BA,0x039A, +0x03BB,0x039B, +0x03BC,0x039C, +0x03BD,0x039D, +0x03BE,0x039E, +0x03BF,0x039F, +0x03C0,0x03A0, +0x03C1,0x03A1, +0x03C3,0x03A3, +0x03C4,0x03A4, +0x03C5,0x03A5, +0x03C6,0x03A6, +0x03C7,0x03A7, +0x03C8,0x03A8, +0x03C9,0x03A9, +0x03CA,0x03AA, +0x03CB,0x03AB, +0x03CC,0x038C, +0x03CD,0x038E, +0x03CE,0x038F, +0x03E3,0x03E2, +0x03E5,0x03E4, +0x03E7,0x03E6, +0x03E9,0x03E8, +0x03EB,0x03EA, +0x03ED,0x03EC, +0x03EF,0x03EE, +0x0430,0x0410, +0x0431,0x0411, +0x0432,0x0412, +0x0433,0x0413, +0x0434,0x0414, +0x0435,0x0415, +0x0436,0x0416, +0x0437,0x0417, +0x0438,0x0418, +0x0439,0x0419, +0x043A,0x041A, +0x043B,0x041B, +0x043C,0x041C, +0x043D,0x041D, +0x043E,0x041E, +0x043F,0x041F, +0x0440,0x0420, +0x0441,0x0421, +0x0442,0x0422, +0x0443,0x0423, +0x0444,0x0424, +0x0445,0x0425, +0x0446,0x0426, +0x0447,0x0427, +0x0448,0x0428, +0x0449,0x0429, +0x044A,0x042A, +0x044B,0x042B, +0x044C,0x042C, +0x044D,0x042D, +0x044E,0x042E, +0x044F,0x042F, +0x0451,0x0401, +0x0452,0x0402, +0x0453,0x0403, +0x0454,0x0404, +0x0455,0x0405, +0x0456,0x0406, +0x0457,0x0407, +0x0458,0x0408, +0x0459,0x0409, +0x045A,0x040A, +0x045B,0x040B, +0x045C,0x040C, +0x045E,0x040E, +0x045F,0x040F, +0x0461,0x0460, +0x0463,0x0462, +0x0465,0x0464, +0x0467,0x0466, +0x0469,0x0468, +0x046B,0x046A, +0x046D,0x046C, +0x046F,0x046E, +0x0471,0x0470, +0x0473,0x0472, +0x0475,0x0474, +0x0477,0x0476, +0x0479,0x0478, +0x047B,0x047A, +0x047D,0x047C, +0x047F,0x047E, +0x0481,0x0480, +0x0491,0x0490, +0x0493,0x0492, +0x0495,0x0494, +0x0497,0x0496, +0x0499,0x0498, +0x049B,0x049A, +0x049D,0x049C, +0x049F,0x049E, +0x04A1,0x04A0, +0x04A3,0x04A2, +0x04A5,0x04A4, +0x04A7,0x04A6, +0x04A9,0x04A8, +0x04AB,0x04AA, +0x04AD,0x04AC, +0x04AF,0x04AE, +0x04B1,0x04B0, +0x04B3,0x04B2, +0x04B5,0x04B4, +0x04B7,0x04B6, +0x04B9,0x04B8, +0x04BB,0x04BA, +0x04BD,0x04BC, +0x04BF,0x04BE, +0x04C2,0x04C1, +0x04C4,0x04C3, +0x04C8,0x04C7, +0x04CC,0x04CB, +0x04D1,0x04D0, +0x04D3,0x04D2, +0x04D5,0x04D4, +0x04D7,0x04D6, +0x04D9,0x04D8, +0x04DB,0x04DA, +0x04DD,0x04DC, +0x04DF,0x04DE, +0x04E1,0x04E0, +0x04E3,0x04E2, +0x04E5,0x04E4, +0x04E7,0x04E6, +0x04E9,0x04E8, +0x04EB,0x04EA, +0x04EF,0x04EE, +0x04F1,0x04F0, +0x04F3,0x04F2, +0x04F5,0x04F4, +0x04F9,0x04F8, +0x0561,0x0531, +0x0562,0x0532, +0x0563,0x0533, +0x0564,0x0534, +0x0565,0x0535, +0x0566,0x0536, +0x0567,0x0537, +0x0568,0x0538, +0x0569,0x0539, +0x056A,0x053A, +0x056B,0x053B, +0x056C,0x053C, +0x056D,0x053D, +0x056E,0x053E, +0x056F,0x053F, +0x0570,0x0540, +0x0571,0x0541, +0x0572,0x0542, +0x0573,0x0543, +0x0574,0x0544, +0x0575,0x0545, +0x0576,0x0546, +0x0577,0x0547, +0x0578,0x0548, +0x0579,0x0549, +0x057A,0x054A, +0x057B,0x054B, +0x057C,0x054C, +0x057D,0x054D, +0x057E,0x054E, +0x057F,0x054F, +0x0580,0x0550, +0x0581,0x0551, +0x0582,0x0552, +0x0583,0x0553, +0x0584,0x0554, +0x0585,0x0555, +0x0586,0x0556, +0x10D0,0x10A0, +0x10D1,0x10A1, +0x10D2,0x10A2, +0x10D3,0x10A3, +0x10D4,0x10A4, +0x10D5,0x10A5, +0x10D6,0x10A6, +0x10D7,0x10A7, +0x10D8,0x10A8, +0x10D9,0x10A9, +0x10DA,0x10AA, +0x10DB,0x10AB, +0x10DC,0x10AC, +0x10DD,0x10AD, +0x10DE,0x10AE, +0x10DF,0x10AF, +0x10E0,0x10B0, +0x10E1,0x10B1, +0x10E2,0x10B2, +0x10E3,0x10B3, +0x10E4,0x10B4, +0x10E5,0x10B5, +0x10E6,0x10B6, +0x10E7,0x10B7, +0x10E8,0x10B8, +0x10E9,0x10B9, +0x10EA,0x10BA, +0x10EB,0x10BB, +0x10EC,0x10BC, +0x10ED,0x10BD, +0x10EE,0x10BE, +0x10EF,0x10BF, +0x10F0,0x10C0, +0x10F1,0x10C1, +0x10F2,0x10C2, +0x10F3,0x10C3, +0x10F4,0x10C4, +0x10F5,0x10C5, +0x1E01,0x1E00, +0x1E03,0x1E02, +0x1E05,0x1E04, +0x1E07,0x1E06, +0x1E09,0x1E08, +0x1E0B,0x1E0A, +0x1E0D,0x1E0C, +0x1E0F,0x1E0E, +0x1E11,0x1E10, +0x1E13,0x1E12, +0x1E15,0x1E14, +0x1E17,0x1E16, +0x1E19,0x1E18, +0x1E1B,0x1E1A, +0x1E1D,0x1E1C, +0x1E1F,0x1E1E, +0x1E21,0x1E20, +0x1E23,0x1E22, +0x1E25,0x1E24, +0x1E27,0x1E26, +0x1E29,0x1E28, +0x1E2B,0x1E2A, +0x1E2D,0x1E2C, +0x1E2F,0x1E2E, +0x1E31,0x1E30, +0x1E33,0x1E32, +0x1E35,0x1E34, +0x1E37,0x1E36, +0x1E39,0x1E38, +0x1E3B,0x1E3A, +0x1E3D,0x1E3C, +0x1E3F,0x1E3E, +0x1E41,0x1E40, +0x1E43,0x1E42, +0x1E45,0x1E44, +0x1E47,0x1E46, +0x1E49,0x1E48, +0x1E4B,0x1E4A, +0x1E4D,0x1E4C, +0x1E4F,0x1E4E, +0x1E51,0x1E50, +0x1E53,0x1E52, +0x1E55,0x1E54, +0x1E57,0x1E56, +0x1E59,0x1E58, +0x1E5B,0x1E5A, +0x1E5D,0x1E5C, +0x1E5F,0x1E5E, +0x1E61,0x1E60, +0x1E63,0x1E62, +0x1E65,0x1E64, +0x1E67,0x1E66, +0x1E69,0x1E68, +0x1E6B,0x1E6A, +0x1E6D,0x1E6C, +0x1E6F,0x1E6E, +0x1E71,0x1E70, +0x1E73,0x1E72, +0x1E75,0x1E74, +0x1E77,0x1E76, +0x1E79,0x1E78, +0x1E7B,0x1E7A, +0x1E7D,0x1E7C, +0x1E7F,0x1E7E, +0x1E81,0x1E80, +0x1E83,0x1E82, +0x1E85,0x1E84, +0x1E87,0x1E86, +0x1E89,0x1E88, +0x1E8B,0x1E8A, +0x1E8D,0x1E8C, +0x1E8F,0x1E8E, +0x1E91,0x1E90, +0x1E93,0x1E92, +0x1E95,0x1E94, +0x1EA1,0x1EA0, +0x1EA3,0x1EA2, +0x1EA5,0x1EA4, +0x1EA7,0x1EA6, +0x1EA9,0x1EA8, +0x1EAB,0x1EAA, +0x1EAD,0x1EAC, +0x1EAF,0x1EAE, +0x1EB1,0x1EB0, +0x1EB3,0x1EB2, +0x1EB5,0x1EB4, +0x1EB7,0x1EB6, +0x1EB9,0x1EB8, +0x1EBB,0x1EBA, +0x1EBD,0x1EBC, +0x1EBF,0x1EBE, +0x1EC1,0x1EC0, +0x1EC3,0x1EC2, +0x1EC5,0x1EC4, +0x1EC7,0x1EC6, +0x1EC9,0x1EC8, +0x1ECB,0x1ECA, +0x1ECD,0x1ECC, +0x1ECF,0x1ECE, +0x1ED1,0x1ED0, +0x1ED3,0x1ED2, +0x1ED5,0x1ED4, +0x1ED7,0x1ED6, +0x1ED9,0x1ED8, +0x1EDB,0x1EDA, +0x1EDD,0x1EDC, +0x1EDF,0x1EDE, +0x1EE1,0x1EE0, +0x1EE3,0x1EE2, +0x1EE5,0x1EE4, +0x1EE7,0x1EE6, +0x1EE9,0x1EE8, +0x1EEB,0x1EEA, +0x1EED,0x1EEC, +0x1EEF,0x1EEE, +0x1EF1,0x1EF0, +0x1EF3,0x1EF2, +0x1EF5,0x1EF4, +0x1EF7,0x1EF6, +0x1EF9,0x1EF8, +0x1F00,0x1F08, +0x1F01,0x1F09, +0x1F02,0x1F0A, +0x1F03,0x1F0B, +0x1F04,0x1F0C, +0x1F05,0x1F0D, +0x1F06,0x1F0E, +0x1F07,0x1F0F, +0x1F10,0x1F18, +0x1F11,0x1F19, +0x1F12,0x1F1A, +0x1F13,0x1F1B, +0x1F14,0x1F1C, +0x1F15,0x1F1D, +0x1F20,0x1F28, +0x1F21,0x1F29, +0x1F22,0x1F2A, +0x1F23,0x1F2B, +0x1F24,0x1F2C, +0x1F25,0x1F2D, +0x1F26,0x1F2E, +0x1F27,0x1F2F, +0x1F30,0x1F38, +0x1F31,0x1F39, +0x1F32,0x1F3A, +0x1F33,0x1F3B, +0x1F34,0x1F3C, +0x1F35,0x1F3D, +0x1F36,0x1F3E, +0x1F37,0x1F3F, +0x1F40,0x1F48, +0x1F41,0x1F49, +0x1F42,0x1F4A, +0x1F43,0x1F4B, +0x1F44,0x1F4C, +0x1F45,0x1F4D, +0x1F51,0x1F59, +0x1F53,0x1F5B, +0x1F55,0x1F5D, +0x1F57,0x1F5F, +0x1F60,0x1F68, +0x1F61, 0x1F69, +0x1F62, 0x1F6A, +0x1F63, 0x1F6B, +0x1F64, 0x1F6C, +0x1F65, 0x1F6D, +0x1F66, 0x1F6E, +0x1F67, 0x1F6F, +0x1F80, 0x1F88, +0x1F81, 0x1F89, +0x1F82, 0x1F8A, +0x1F83, 0x1F8B, +0x1F84, 0x1F8C, +0x1F85, 0x1F8D, +0x1F86, 0x1F8E, +0x1F87, 0x1F8F, +0x1F90, 0x1F98, +0x1F91, 0x1F99, +0x1F92, 0x1F9A, +0x1F93, 0x1F9B, +0x1F94, 0x1F9C, +0x1F95, 0x1F9D, +0x1F96, 0x1F9E, +0x1F97, 0x1F9F, +0x1FA0, 0x1FA8, +0x1FA1, 0x1FA9, +0x1FA2, 0x1FAA, +0x1FA3, 0x1FAB, +0x1FA4, 0x1FAC, +0x1FA5, 0x1FAD, +0x1FA6, 0x1FAE, +0x1FA7, 0x1FAF, +0x1FB0, 0x1FB8, +0x1FB1, 0x1FB9, +0x1FD0, 0x1FD8, +0x1FD1, 0x1FD9, +0x1FE0, 0x1FE8, +0x1FE1, 0x1FE9, +0x24D0, 0x24B6, +0x24D1, 0x24B7, +0x24D2, 0x24B8, +0x24D3, 0x24B9, +0x24D4, 0x24BA, +0x24D5, 0x24BB, +0x24D6, 0x24BC, +0x24D7, 0x24BD, +0x24D8, 0x24BE, +0x24D9, 0x24BF, +0x24DA, 0x24C0, +0x24DB, 0x24C1, +0x24DC, 0x24C2, +0x24DD, 0x24C3, +0x24DE, 0x24C4, +0x24DF, 0x24C5, +0x24E0, 0x24C6, +0x24E1, 0x24C7, +0x24E2, 0x24C8, +0x24E3, 0x24C9, +0x24E4, 0x24CA, +0x24E5, 0x24CB, +0x24E6, 0x24CC, +0x24E7, 0x24CD, +0x24E8, 0x24CE, +0x24E9, 0x24CF, +0xFF41, 0xFF21, +0xFF42, 0xFF22, +0xFF43, 0xFF23, +0xFF44, 0xFF24, +0xFF45, 0xFF25, +0xFF46, 0xFF26, +0xFF47, 0xFF27, +0xFF48, 0xFF28, +0xFF49, 0xFF29, +0xFF4A, 0xFF2A, +0xFF4B, 0xFF2B, +0xFF4C, 0xFF2C, +0xFF4D, 0xFF2D, +0xFF4E, 0xFF2E, +0xFF4F, 0xFF2F, +0xFF50, 0xFF30, +0xFF51, 0xFF31, +0xFF52, 0xFF32, +0xFF53, 0xFF33, +0xFF54, 0xFF34, +0xFF55, 0xFF35, +0xFF56, 0xFF36, +0xFF57, 0xFF37, +0xFF58, 0xFF38, +0xFF59, 0xFF39, +0xFF5A, 0xFF3A, +0, 0 +}; + +struct InitLowerUpper +{ + InitLowerUpper() + { + for (int i = 0; i < 65536; i++) + { + lowerforupper[i] = i; + upperforlower[i] = i; + } + for (int i = 0; loweruppercase[i]; i += 2) + { + auto lower = loweruppercase[i]; + auto upper = loweruppercase[i + 1]; + if (lowerforupper[upper] == upper) lowerforupper[upper] = lower; // This mapping is ambiguous (see 0x0131 -> 0x0049, (small Turkish 'i' without dot.) so only pick the first match. + if (upperforlower[lower] == lower) upperforlower[lower] = upper; + isuppermap[upper] = islowermap[lower] = true; + } + // Special treatment for the two variants of the small sigma in Greek. + islowermap[0x3c2] = true; + upperforlower[0x3c2] = 0x3a3; + } +}; + +static InitLowerUpper initer; + +bool myislower(int code) +{ + if (code >= 0 && code < 65536) return islowermap[code]; + return false; +} + +bool myisupper(int code) +{ + if (code >= 0 && code < 65536) return isuppermap[code]; + return false; +} + diff --git a/source/common/utility/utf8.h b/source/common/utility/utf8.h new file mode 100644 index 000000000..ab4577533 --- /dev/null +++ b/source/common/utility/utf8.h @@ -0,0 +1,13 @@ +#pragma once + +int utf8_encode(int32_t codepoint, uint8_t *buffer, int *size); +int utf8_decode(const uint8_t *src, int *size); +int GetCharFromString(const uint8_t *&string); +inline int GetCharFromString(const char32_t *&string) +{ + return *string++; +} +const char *MakeUTF8(const char *outline, int *numchars = nullptr); // returns a pointer to a static buffer, assuming that its caller will immediately process the result. +const char *MakeUTF8(int codepoint, int *psize = nullptr); + +extern uint16_t win1252map[]; diff --git a/source/common/utility/zstrformat.cpp b/source/common/utility/zstrformat.cpp new file mode 100644 index 000000000..bf61ce282 --- /dev/null +++ b/source/common/utility/zstrformat.cpp @@ -0,0 +1,1061 @@ +/* +** zstrformat.cpp +** Routines for generic printf-style formatting. +** +**--------------------------------------------------------------------------- +** Copyright 2005-2008 Randy Heit +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**--------------------------------------------------------------------------- +** +** Portions of this file relating to printing floating point numbers +** are covered by the following copyright: +** +**--------------------------------------------------------------------------- +** Copyright (c) 1990, 1993 +** The Regents of the University of California. All rights reserved. +** +** This code is derived from software contributed to Berkeley by +** Chris Torek. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 4. Neither the name of the University nor the names of its contributors +** may be used to endorse or promote products derived from this software +** without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +** ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +** FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +** DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +** OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +** HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +** LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +** OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +** SUCH DAMAGE. +** +**--------------------------------------------------------------------------- +** +** Even though the standard C library has a function to do printf-style +** formatting in a generic way, there is no standard interface to this +** function. So if you want to do some printf formatting that doesn't fit in +** the context of the provided functions, you need to roll your own. Why is +** that? +** +** Maybe Microsoft wants you to write a better one yourself? When used as +** part of a sprintf replacement, this function is significantly faster than +** Microsoft's offering. When used as part of a fprintf replacement, this +** function turns out to be slower, but that's probably because the CRT's +** fprintf can interact with the FILE object on a low level for better +** perfomance. If you sprintf into a buffer and then fwrite that buffer, this +** routine wins again, though the difference isn't great. +*/ + +#include +#include +#include +#include +#include + +#include "zstring.h" +#include "gdtoa.h" +#include "utf8.h" + + +/* + * MAXEXPDIG is the maximum number of decimal digits needed to store a + * floating point exponent in the largest supported format. It should + * be ceil(log10(LDBL_MAX_10_EXP)) or, if hexadecimal floating point + * conversions are supported, ceil(log10(LDBL_MAX_EXP)). But since it + * is presently never greater than 5 in practice, we fudge it. + */ +#define MAXEXPDIG 6 +#if LDBL_MAX_EXP > 999999 +#error "floating point buffers too small" +#endif + +#define DEFPREC 6 + +static const char hexits[16] = {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'}; +static const char HEXits[16] = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'}; +static const char spaces[16] = {' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' '}; +static const char zeroes[17] = {'0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','.'}; + +namespace StringFormat +{ + static int writepad (OutputFunc output, void *outputData, const char *pad, int padsize, int spaceToFill); + static int printandpad (OutputFunc output, void *outputData, const char *p, const char *ep, int len, const char *with, int padsize); + static int exponent (char *p0, int exp, int fmtch); + + int Worker (OutputFunc output, void *outputData, const char *fmt, ...) + { + va_list arglist; + int len; + + va_start (arglist, fmt); + len = VWorker (output, outputData, fmt, arglist); + va_end (arglist); + return len; + } + + int VWorker (OutputFunc output, void *outputData, const char *fmt, va_list arglist) + { + const char *c; + const char *base; + int len = 0; + int width; + int precision; + int flags; + + base = c = fmt; + for (;;) + { + while (*c && *c != '%') + { + ++c; + } + if (*c == '\0') + { + return len + output (outputData, base, int(c - base)); + } + + if (c - base > 0) + { + len += output (outputData, base, int(c - base)); + } + c++; + + // Gather the flags, if any + for (flags = 0;; ++c) + { + if (*c == '-') + { + flags |= F_MINUS; // bit 0 + } + else if (*c == '+') + { + flags |= F_PLUS; // bit 1 + } + else if (*c == '0') + { + flags |= F_ZERO; // bit 2 + } + else if (*c == ' ') + { + flags |= F_BLANK; // bit 3 + } + else if (*c == '#') + { + flags |= F_HASH; // bit 4 + } + else + { + break; + } + } + + width = precision = -1; + + // Read the width, if any + if (*c == '*') + { + ++c; + width = va_arg (arglist, int); + if (width < 0) + { // Negative width means minus flag and positive width + flags |= F_MINUS; + width = -width; + } + } + else if (*c >= '0' && *c <= '9') + { + width = *c++ - '0'; + while (*c >= '0' && *c <= '9') + { + width = width * 10 + *c++ - '0'; + } + } + + // If 0 and - both appear, 0 is ignored. + // If the blank and + both appear, the blank is ignored. + flags &= ~((flags & 3) << 2); + + // Read the precision, if any + if (*c == '.') + { + precision = 0; + if (*++c == '*') + { + ++c; + precision = va_arg (arglist, int); + } + else if (*c >= '0' && *c <= '9') + { + precision = *c++ - '0'; + while (*c >= '0' && *c <= '9') + { + precision = precision * 10 + *c++ - '0'; + } + } + } + + // Read the size prefix, if any + if (*c == 'h') + { + if (*++c == 'h') + { + flags |= F_HALFHALF; + ++c; + } + else + { + flags |= F_HALF; + } + } + else if (*c == 'l') + { + if (*++c == 'l') + { + flags |= F_LONGLONG; + ++c; + } + else + { + flags |= F_LONG; + } + } + else if (*c == 'I') + { + if (*++c == '6') + { + if (*++c == '4') + { + flags |= F_LONGLONG; + ++c; + } + } + else + { + flags |= F_BIGI; + } + } + else if (*c == 't') + { + flags |= F_PTRDIFF; + ++c; + } + else if (*c == 'z') + { + flags |= F_SIZE; + ++c; + } + + base = c+1; + + // Now that that's all out of the way, we should be pointing at the type specifier + { + char prefix[3]; + int prefixlen; + char hexprefix = '\0'; + char sign = '\0'; + int postprefixzeros = 0; + int size = flags & 0xF000; + char buffer[80], *ibuff; + const char *obuff = 0; + char type = *c++; + int bufflen = 0; + int outlen = 0; + unsigned int intarg = 0; + uint64_t int64arg = 0; + const void *voidparg; + const char *charparg; + double dblarg; + const char *xits = hexits; + int inlen = len; + /* + * We can decompose the printed representation of floating + * point numbers into several parts, some of which may be empty: + * + * [+|-| ] [0x|0X] MMM . NNN [e|E|p|P] [+|-] ZZ + * A B ---C--- D E F + * + * A: 'sign' holds this value if present; '\0' otherwise + * B: hexprefix holds the 'x' or 'X'; '\0' if not hexadecimal + * C: obuff points to the string MMMNNN. Leading and trailing + * zeros are not in the string and must be added. + * D: expchar holds this character; '\0' if no exponent, e.g. %f + * F: at least two digits for decimal, at least one digit for hex + */ + const char *decimal_point = ".";/* locale specific decimal point */ + int signflag; /* true if float is negative */ + int expt; /* integer value of exponent */ + char expchar = 'e'; /* exponent character: [eEpP\0] */ + char *dtoaend; /* pointer to end of converted digits */ + int expsize = 0; /* character count for expstr */ + int ndig = 0; /* actual number of digits returned by dtoa */ + char expstr[MAXEXPDIG+2]; /* buffer for exponent string: e+ZZZ */ + char *dtoaresult = NULL; /* buffer allocated by dtoa */ + + // Using a bunch of if/else if statements is faster than a switch, because a switch generates + // a jump table. A jump table means a possible data cache miss and a hefty penalty while the + // cache line is loaded. + + if (type == 'x' || type == 'X' || + type == 'p' || + type == 'd' || type == 'u' || type == 'i' || + type == 'o' || + type == 'B') + { + if (type == 'X' || type == 'p') + { + xits = HEXits; + } + if (type == 'p') + { + type = 'X'; + voidparg = va_arg (arglist, void *); + if (sizeof(void*) == sizeof(int)) + { + intarg = (unsigned int)(size_t)voidparg; + precision = 8; + size = 0; + } + else + { + int64arg = (uint64_t)(size_t)voidparg; + precision = 16; + size = F_LONGLONG; + } + } + else + { + if (size == 0) + { + intarg = va_arg (arglist, int); + } + else if (size == F_HALFHALF) + { + intarg = va_arg (arglist, int); + intarg = (signed char)intarg; + } + else if (size == F_HALF) + { + intarg = va_arg (arglist, int); + intarg = (short)intarg; + } + else if (size == F_LONG) + { + if (sizeof(long) == sizeof(int)) intarg = va_arg (arglist, int); + else { int64arg = va_arg (arglist, int64_t); size = F_LONGLONG; } + } + else if (size == F_BIGI) + { + if (sizeof(void*) == sizeof(int)) intarg = va_arg (arglist, int); + else { int64arg = va_arg (arglist, int64_t); size = F_LONGLONG; } + } + else if (size == F_LONGLONG) + { + int64arg = va_arg (arglist, int64_t); + } + else if (size == F_PTRDIFF) + { + if (sizeof(ptrdiff_t) == sizeof(int)) intarg = va_arg (arglist, int); + else { int64arg = va_arg (arglist, int64_t); size = F_LONGLONG; } + } + else if (size == F_SIZE) + { + if (sizeof(size_t) == sizeof(int)) intarg = va_arg (arglist, int); + else { int64arg = va_arg (arglist, int64_t); size = F_LONGLONG; } + } + else + { + intarg = va_arg (arglist, int); + } + } + + if (precision < 0) precision = 1; + + ibuff = &buffer[sizeof(buffer)]; + + if (size == F_LONGLONG) + { + if (int64arg == 0) + { + flags |= F_ZEROVALUE; + } + else + { + if (type == 'o') + { // Octal: Dump digits until it fits in an unsigned int + while (int64arg > UINT_MAX) + { + *--ibuff = char(int64arg & 7) + '0'; int64arg >>= 3; + } + intarg = int(int64arg); + } + else if (type == 'x' || type == 'X') + { // Hexadecimal: Dump digits until it fits in an unsigned int + while (int64arg > UINT_MAX) + { + *--ibuff = xits[int64arg & 15]; int64arg >>= 4; + } + intarg = int(int64arg); + } + else if (type == 'B') + { // Binary: Dump digits until it fits in an unsigned int + while (int64arg > UINT_MAX) + { + *--ibuff = char(int64arg & 1) + '0'; int64arg >>= 1; + } + intarg = int(int64arg); + } + else + { + if (type != 'u') + { + // If a signed number is negative, set the negative flag and make it positive. + int64_t sint64arg = (int64_t)int64arg; + if (sint64arg < 0) + { + flags |= F_NEGATIVE; + sint64arg = -sint64arg; + int64arg = sint64arg; + } + flags |= F_SIGNED; + type = 'u'; + } + // If an unsigned int64 is too big to fit in an unsigned int, dump out + // digits until it is sufficiently small. + while (int64arg > INT_MAX) + { + *--ibuff = char(int64arg % 10) + '0'; int64arg /= 10; + } + intarg = (unsigned int)(int64arg); + } + } + } + else + { + if (intarg == 0) + { + flags |= F_ZEROVALUE; + } + else if (type == 'i' || type == 'd') + { // If a signed int is negative, set the negative flag and make it positive. + signed int sintarg = (signed int)intarg; + if (sintarg < 0) + { + flags |= F_NEGATIVE; + sintarg = -sintarg; + intarg = sintarg; + } + flags |= F_SIGNED; + type = 'u'; + } + } + if (flags & F_ZEROVALUE) + { + if (precision != 0) + { + *--ibuff = '0'; + } + } + else if (type == 'u') + { // Decimal + int i; + + // Unsigned division is typically slower than signed division. + // Do it at most once. + if (intarg > INT_MAX) + { + *--ibuff = char(intarg % 10) + '0'; intarg /= 10; + } + i = (int)intarg; + while (i != 0) + { + *--ibuff = char(i % 10) + '0'; i /= 10; + } + } + else if (type == 'o') + { // Octal + while (intarg != 0) + { + *--ibuff = char(intarg & 7) + '0'; intarg >>= 3; + } + } + else if (type == 'B') + { // Binary + while (intarg != 0) + { + *--ibuff = char(intarg & 1) + '0'; intarg >>= 1; + } + } + else + { // Hexadecimal + while (intarg != 0) + { + *--ibuff = xits[intarg & 15]; intarg >>= 4; + } + } + // Check for prefix (only for non-decimal, which are always unsigned) + if ((flags & (F_HASH|F_ZEROVALUE)) == F_HASH) + { + if (type == 'o') + { + if (bufflen >= precision) + { + sign = '0'; + } + } + else if (type == 'x' || type == 'X') + { + hexprefix = type; + } + else if (type == 'B') + { + hexprefix = '!'; + } + } + bufflen = (int)(ptrdiff_t)(&buffer[sizeof(buffer)] - ibuff); + obuff = ibuff; + if (precision >= 0) + { + postprefixzeros = precision - bufflen; + if (postprefixzeros < 0) postprefixzeros = 0; +// flags &= ~F_ZERO; + } + } + else if (type == 'c') + { + intarg = va_arg (arglist, int); + if (utf8_encode(intarg, (uint8_t*)buffer, &bufflen) != 0) + { + buffer[0] = '?'; + bufflen = 1; + } + obuff = buffer; + } + else if (type == 's') + { + charparg = va_arg (arglist, const char *); + if (charparg == NULL) + { + obuff = "(null)"; + bufflen = 6; + } + else + { + obuff = charparg; + if (precision < 0) + { + bufflen = (int)strlen (charparg); + } + else + { + for (bufflen = 0; bufflen < precision && charparg[bufflen] != '\0'; ++bufflen) + { /* empty */ } + } + } + } + else if (type == '%') + { // Just print a '%': Output it with the next stage. + base--; + continue; + } + else if (type == 'n') + { + if (size == F_HALFHALF) + { + *va_arg (arglist, char *) = (char)inlen; + } + else if (size == F_HALF) + { + *va_arg (arglist, short *) = (short)inlen; + } + else if (size == F_LONG) + { + *va_arg (arglist, long *) = inlen; + } + else if (size == F_LONGLONG) + { + *va_arg (arglist, int64_t *) = inlen; + } + else if (size == F_BIGI) + { + *va_arg (arglist, ptrdiff_t *) = inlen; + } + else + { + *va_arg (arglist, int *) = inlen; + } + } + else if (type == 'f' || type == 'F') + { + expchar = '\0'; + goto fp_begin; + } + else if (type == 'g' || type == 'G') + { + expchar = type - ('g' - 'e'); + if (precision == 0) + { + precision = 1; + } + goto fp_begin; + } + else if (type == 'H') + { // %H is an extension that behaves similarly to %g, except it automatically + // selects precision based on whatever will produce the smallest string. + expchar = 'e'; + goto fp_begin; + } +#if 0 + // The hdtoa function provided with FreeBSD uses a hexadecimal FP constant. + // Microsoft's compiler does not support these, so I would need to hack it + // together with ints instead. It's very do-able, but until I actually have + // some reason to print hex FP numbers, I won't bother. + else if (type == 'a' || type == 'A') + { + if (type == 'A') + { + xits = HEXits; + hexprefix = 'X'; + expchar = 'P'; + } + else + { + hexprefix = 'x'; + expchar = 'p'; + } + if (precision >= 0) + { + precision++; + } + dblarg = va_arg(arglist, double); + dtoaresult = obuff = hdtoa(dblarg, xits, precision, &expt, &signflag, &dtoaend); + if (precision < 0) + { + precision = (int)(dtoaend - obuff); + } + if (expt == INT_MAX) + { + hexprefix = '\0'; + } + goto fp_common; + } +#endif + else if (type == 'e' || type == 'E') + { + expchar = type; + if (precision < 0) // account for digit before decpt + { + precision = DEFPREC + 1; + } + else + { + precision++; + } +fp_begin: + if (precision < 0) + { + precision = DEFPREC; + } + dblarg = va_arg(arglist, double); + obuff = dtoaresult = dtoa(dblarg, type != 'H' ? (expchar ? 2 : 3) : 0, precision, &expt, &signflag, &dtoaend); +//fp_common: + decimal_point = localeconv()->decimal_point; + flags |= F_SIGNED; + if (signflag) + { + flags |= F_NEGATIVE; + } + if (expt == 9999) // inf or nan + { + if (*obuff == 'N') + { + obuff = (type >= 'a') ? "nan" : "NAN"; + flags &= ~F_SIGNED; + } + else + { + obuff = (type >= 'a') ? "inf" : "INF"; + } + bufflen = 3; + flags &= ~F_ZERO; + } + else + { + flags |= F_FPT; + ndig = (int)(dtoaend - obuff); + if (type == 'g' || type == 'G') + { + if (expt > -4 && expt <= precision) + { // Make %[gG] smell like %[fF]. + expchar = '\0'; + if (flags & F_HASH) + { + precision -= expt; + } + else + { + precision = ndig - expt; + } + if (precision < 0) + { + precision = 0; + } + } + else + { // Make %[gG] smell like %[eE], but trim trailing zeroes if no # flag. + if (!(flags & F_HASH)) + { + precision = ndig; + } + } + } + else if (type == 'H') + { + if (expt > -(ndig + 2) && expt <= (ndig + 4)) + { // Make %H smell like %f + expchar = '\0'; + precision = ndig - expt; + if (precision < 0) + { + precision = 0; + } + } + else + {// Make %H smell like %e + precision = ndig; + } + } + if (expchar) + { + expsize = exponent(expstr, expt - 1, expchar); + bufflen = expsize + precision; + if (precision > 1 || (flags & F_HASH)) + { + ++bufflen; + } + } + else + { // space for digits before decimal point + if (expt > 0) + { + bufflen = expt; + } + else // "0" + { + bufflen = 1; + } + // space for decimal pt and following digits + if (precision != 0 || (flags & F_HASH)) + { + bufflen += precision + 1; + } + } + } + } + + // Check for sign prefix (only for signed numbers) + if (flags & F_SIGNED) + { + if (flags & F_NEGATIVE) + { + sign = '-'; + } + else if (flags & F_PLUS) + { + sign = '+'; + } + else if (flags & F_BLANK) + { + sign = ' '; + } + } + + // Construct complete prefix from sign and hex prefix character + prefixlen = 0; + if (sign != '\0') + { + prefix[0] = sign; + prefixlen = 1; + } + if (hexprefix != '\0') + { + prefix[prefixlen] = '0'; + prefix[prefixlen + 1] = hexprefix; + prefixlen += 2; + } + + // Pad the output to the field width, if needed + int fieldlen = prefixlen + postprefixzeros + bufflen; + const char *pad = (flags & F_ZERO) ? zeroes : spaces; + + // If the output is right aligned and zero-padded, then the prefix must come before the padding. + if ((flags & (F_ZERO|F_MINUS)) == F_ZERO && prefixlen > 0) + { + outlen += output (outputData, prefix, prefixlen); + prefixlen = 0; + } + if (!(flags & F_MINUS) && fieldlen < width) + { // Field is right-justified, so padding comes first + outlen += writepad (output, outputData, pad, sizeof(spaces), width - fieldlen); + width = -1; + } + + // Output field: Prefix, post-prefix zeros, buffer text + if (prefixlen > 0) + { + outlen += output (outputData, prefix, prefixlen); + } + outlen += writepad (output, outputData, zeroes, sizeof(spaces), postprefixzeros); + if (!(flags & F_FPT)) + { + if (bufflen > 0) + { + outlen += output (outputData, obuff, bufflen); + } + } + else + { + if (expchar == '\0') // %[fF] or sufficiently short %[gG] + { + if (expt <= 0) + { + outlen += output (outputData, zeroes, 1); + if (precision != 0 || (flags & F_HASH)) + { + outlen += output (outputData, decimal_point, 1); + } + outlen += writepad (output, outputData, zeroes, sizeof(zeroes), -expt); + // already handled initial 0's + precision += expt; + } + else + { + outlen += printandpad (output, outputData, obuff, dtoaend, expt, zeroes, sizeof(zeroes)); + obuff += expt; + if (precision || (flags & F_HASH)) + { + outlen += output (outputData, decimal_point, 1); + } + } + outlen += printandpad (output, outputData, obuff, dtoaend, precision, zeroes, sizeof(zeroes)); + } + else // %[eE] or sufficiently long %[gG] + { + if (precision > 1 || (flags & F_HASH)) + { + buffer[0] = *obuff++; + buffer[1] = *decimal_point; + outlen += output (outputData, buffer, 2); + outlen += output (outputData, obuff, ndig - 1); + outlen += writepad (output, outputData, zeroes, sizeof(zeroes), precision - ndig); + } + else // XeYY + { + outlen += output (outputData, obuff, 1); + } + outlen += output (outputData, expstr, expsize); + } + } + + if ((flags & F_MINUS) && fieldlen < width) + { // Field is left-justified, so padding comes last + outlen += writepad (output, outputData, pad, sizeof(spaces), width - fieldlen); + } + len += outlen; + if (dtoaresult != NULL) + { + freedtoa(dtoaresult); + dtoaresult = NULL; + } + } + } + } + + static int writepad (OutputFunc output, void *outputData, const char *pad, int padsize, int spaceToFill) + { + int outlen = 0; + while (spaceToFill > 0) + { + int count = spaceToFill > padsize ? padsize : spaceToFill; + outlen += output (outputData, pad, count); + spaceToFill -= count; + } + return outlen; + } + + static int printandpad (OutputFunc output, void *outputData, const char *p, const char *ep, int len, const char *with, int padsize) + { + int outlen = 0; + int n2 = (int)(ep - p); + if (n2 > len) + { + n2 = len; + } + if (n2 > 0) + { + outlen = output (outputData, p, n2); + } + return outlen + writepad (output, outputData, with, padsize, len - (n2 > 0 ? n2 : 0)); + } + + static int exponent (char *p0, int exp, int fmtch) + { + char *p, *t; + char expbuf[MAXEXPDIG]; + + p = p0; + *p++ = fmtch; + if (exp < 0) + { + exp = -exp; + *p++ = '-'; + } + else + { + *p++ = '+'; + } + t = expbuf + MAXEXPDIG; + if (exp > 9) + { + do + { + *--t = '0' + (exp % 10); + } + while ((exp /= 10) > 9); + *--t = '0' + exp; + for(; t < expbuf + MAXEXPDIG; *p++ = *t++) + { } + } + else + { + // Exponents for decimal floating point conversions + // (%[eEgG]) must be at least two characters long, + // whereas exponents for hexadecimal conversions can + // be only one character long. + if (fmtch == 'e' || fmtch == 'E') + { + *p++ = '0'; + } + *p++ = '0' + exp; + } + return (int)(p - p0); + } +}; + +//========================================================================// +// snprintf / vsnprintf imitations + +#ifdef __GNUC__ +#define GCCPRINTF(stri,firstargi) __attribute__((format(printf,stri,firstargi))) +#define GCCFORMAT(stri) __attribute__((format(printf,stri,0))) +#define GCCNOWARN __attribute__((unused)) +#else +#define GCCPRINTF(a,b) +#define GCCFORMAT(a) +#define GCCNOWARN +#endif + +struct snprintf_state +{ + char *buffer; + size_t maxlen; + size_t curlen; + int ideallen; +}; + +static int myvsnprintf_helper(void *data, const char *cstr, int cstr_len) +{ + snprintf_state *state = (snprintf_state *)data; + + if (INT_MAX - cstr_len < state->ideallen) + { + state->ideallen = INT_MAX; + } + else + { + state->ideallen += cstr_len; + } + if (state->curlen + cstr_len > state->maxlen) + { + cstr_len = (int)(state->maxlen - state->curlen); + } + if (cstr_len > 0) + { + memcpy(state->buffer + state->curlen, cstr, cstr_len); + state->curlen += cstr_len; + } + return cstr_len; +} + +extern "C" +{ + +// Unlike the MS CRT function snprintf, this one always writes a terminating +// null character to the buffer. It also returns the full length of the string +// that would have been output if the buffer had been large enough. In other +// words, it follows BSD/Linux rules and not MS rules. +int myvsnprintf(char *buffer, size_t count, const char *format, va_list argptr) +{ + size_t originalcount = count; + if (count != 0) + { + count--; + } + if (count > INT_MAX) + { // This is probably an error. Output nothing. + originalcount = 0; + count = 0; + } + snprintf_state state = { buffer, count, 0, 0 }; + StringFormat::VWorker(myvsnprintf_helper, &state, format, argptr); + if (originalcount > 0) + { + buffer[state.curlen] = '\0'; + } + return state.ideallen; +} + +int mysnprintf(char *buffer, size_t count, const char *format, ...) +{ + va_list argptr; + va_start(argptr, format); + int len = myvsnprintf(buffer, count, format, argptr); + va_end(argptr); + return len; +} + +} diff --git a/source/common/utility/zstring.cpp b/source/common/utility/zstring.cpp new file mode 100644 index 000000000..2cd308596 --- /dev/null +++ b/source/common/utility/zstring.cpp @@ -0,0 +1,1416 @@ +/* +** zstring.cpp +** A dynamically-allocated string class. +** +**--------------------------------------------------------------------------- +** Copyright 2005-2008 Randy Heit +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**--------------------------------------------------------------------------- +** +*/ + +#include +#include +#include +#include // for bad_alloc + +#include "zstring.h" +#include "utf8.h" + +extern uint16_t lowerforupper[65536]; +extern uint16_t upperforlower[65536]; + +FNullStringData FString::NullString = +{ + 0, // Length of string + 2, // Size of character buffer + 2, // RefCount; it must never be modified, so keep it above 1 user at all times + "\0" +}; + +void FString::AttachToOther (const FString &other) +{ + assert (other.Chars != NULL); + + if (other.Data()->RefCount < 0) + { + AllocBuffer (other.Data()->Len); + StrCopy (Chars, other.Chars, other.Data()->Len); + } + else + { + Chars = const_cast(other).Data()->AddRef(); + } +} + +FString::FString (const char *copyStr) +{ + if (copyStr == NULL || *copyStr == '\0') + { + ResetToNull(); + } + else + { + size_t len = strlen (copyStr); + AllocBuffer (len); + StrCopy (Chars, copyStr, len); + } +} + +FString::FString (const char *copyStr, size_t len) +{ + AllocBuffer (len); + StrCopy (Chars, copyStr, len); +} + +FString::FString (char oneChar) +{ + if (oneChar == '\0') + { + ResetToNull(); + } + else + { + AllocBuffer (1); + Chars[0] = oneChar; + Chars[1] = '\0'; + } +} + +FString::FString (const FString &head, const FString &tail) +{ + size_t len1 = head.Len(); + size_t len2 = tail.Len(); + AllocBuffer (len1 + len2); + StrCopy (Chars, head); + StrCopy (Chars + len1, tail); +} + +FString::FString (const FString &head, const char *tail) +{ + size_t len1 = head.Len(); + size_t len2 = strlen (tail); + AllocBuffer (len1 + len2); + StrCopy (Chars, head); + StrCopy (Chars + len1, tail, len2); +} + +FString::FString (const FString &head, char tail) +{ + size_t len1 = head.Len(); + AllocBuffer (len1 + 1); + StrCopy (Chars, head); + Chars[len1] = tail; + Chars[len1+1] = '\0'; +} + +FString::FString (const char *head, const FString &tail) +{ + size_t len1 = strlen (head); + size_t len2 = tail.Len(); + AllocBuffer (len1 + len2); + StrCopy (Chars, head, len1); + StrCopy (Chars + len1, tail); +} + +FString::FString (const char *head, const char *tail) +{ + size_t len1 = strlen (head); + size_t len2 = strlen (tail); + AllocBuffer (len1 + len2); + StrCopy (Chars, head, len1); + StrCopy (Chars + len1, tail, len2); +} + +FString::FString (char head, const FString &tail) +{ + size_t len2 = tail.Len(); + AllocBuffer (1 + len2); + Chars[0] = head; + StrCopy (Chars + 1, tail); +} + +FString::~FString () +{ + Data()->Release(); +} + +char *FString::LockNewBuffer(size_t len) +{ + Data()->Release(); + AllocBuffer(len); + assert(Data()->RefCount == 1); + Data()->RefCount = -1; + return Chars; +} + +char *FString::LockBuffer() +{ + if (Data()->RefCount == 1) + { // We're the only user, so we can lock it straight away + Data()->RefCount = -1; + } + else if (Data()->RefCount < -1) + { // Already locked; just add to the lock count + Data()->RefCount--; + } + else + { // Somebody else is also using this character buffer, so create a copy + FStringData *old = Data(); + AllocBuffer (old->Len); + StrCopy (Chars, old->Chars(), old->Len); + old->Release(); + Data()->RefCount = -1; + } + return Chars; +} + +void FString::UnlockBuffer() +{ + assert (Data()->RefCount < 0); + + if (++Data()->RefCount == 0) + { + Data()->RefCount = 1; + } +} + +FString &FString::operator = (const FString &other) +{ + assert (Chars != NULL); + + if (&other != this) + { + int oldrefcount = Data()->RefCount < 0; + Data()->Release(); + AttachToOther(other); + if (oldrefcount < 0) + { + LockBuffer(); + Data()->RefCount = oldrefcount; + } + } + return *this; +} + +FString &FString::operator = (FString &&other) +{ + assert (Chars != NULL); + + if (&other != this) + { + Data()->Release(); + Chars = other.Chars; + other.ResetToNull(); + } + + return *this; +} + +FString &FString::operator = (const char *copyStr) +{ + if (copyStr != Chars) + { + if (copyStr == NULL || *copyStr == '\0') + { + Data()->Release(); + ResetToNull(); + } + else + { + // In case copyStr is inside us, we can't release it until + // we've finished the copy. + FStringData *old = Data(); + + if (copyStr < Chars || copyStr >= Chars + old->Len) + { + // We know the string isn't in our buffer, so release it now + // to reduce the potential for needless memory fragmentation. + old->Release(); + old = NULL; + } + size_t len = strlen (copyStr); + AllocBuffer (len); + StrCopy (Chars, copyStr, len); + if (old != NULL) + { + old->Release(); + } + } + } + return *this; +} + +void FString::Format (const char *fmt, ...) +{ + va_list arglist; + va_start (arglist, fmt); + VFormat (fmt, arglist); + va_end (arglist); +} + +void FString::AppendFormat (const char *fmt, ...) +{ + va_list arglist; + va_start (arglist, fmt); + StringFormat::VWorker (FormatHelper, this, fmt, arglist); + va_end (arglist); +} + +void FString::VFormat (const char *fmt, va_list arglist) +{ + Data()->Release(); + Chars = (char *)(FStringData::Alloc(128) + 1); + StringFormat::VWorker (FormatHelper, this, fmt, arglist); +} + +void FString::VAppendFormat (const char *fmt, va_list arglist) +{ + StringFormat::VWorker (FormatHelper, this, fmt, arglist); +} + +int FString::FormatHelper (void *data, const char *cstr, int len) +{ + FString *str = (FString *)data; + size_t len1 = str->Len(); + if (len1 + len > str->Data()->AllocLen || str->Chars == &NullString.Nothing[0]) + { + str->ReallocBuffer((len1 + len + 127) & ~127); + } + StrCopy (str->Chars + len1, cstr, len); + str->Data()->Len = (unsigned int)(len1 + len); + return len; +} + +FString FString::operator + (const FString &tail) const +{ + return FString (*this, tail); +} + +FString FString::operator + (const char *tail) const +{ + return FString (*this, tail); +} + +FString operator + (const char *head, const FString &tail) +{ + return FString (head, tail); +} + +FString FString::operator + (char tail) const +{ + return FString (*this, tail); +} + +FString operator + (char head, const FString &tail) +{ + return FString (head, tail); +} + +FString &FString::operator += (const FString &tail) +{ + size_t len1 = Len(); + size_t len2 = tail.Len(); + ReallocBuffer (len1 + len2); + StrCopy (Chars + len1, tail); + return *this; +} + +FString &FString::operator += (const char *tail) +{ + size_t len1 = Len(); + size_t len2 = strlen(tail); + ReallocBuffer (len1 + len2); + StrCopy (Chars + len1, tail, len2); + return *this; +} + +FString &FString::operator += (char tail) +{ + size_t len1 = Len(); + ReallocBuffer (len1 + 1); + Chars[len1] = tail; + Chars[len1+1] = '\0'; + return *this; +} + +FString &FString::AppendCStrPart (const char *tail, size_t tailLen) +{ + if (tailLen > 0) + { + size_t len1 = Len(); + ReallocBuffer(len1 + tailLen); + StrCopy(Chars + len1, tail, tailLen); + } + return *this; +} + +FString &FString::CopyCStrPart(const char *tail, size_t tailLen) +{ + if (tailLen > 0) + { + ReallocBuffer(tailLen); + StrCopy(Chars, tail, tailLen); + } + else + { + Data()->Release(); + ResetToNull(); + } + return *this; +} + +size_t FString::CharacterCount() const +{ + // Counts string length in Unicode code points. + size_t len = 0; + const uint8_t *cp = (const uint8_t*)Chars; + while (GetCharFromString(cp)) len++; + return len; +} + + +int FString::GetNextCharacter(int &position) const +{ + const uint8_t *cp = (const uint8_t*)Chars + position; + const uint8_t *cpread = cp; + int chr = GetCharFromString(cpread); + position += int(cpread - cp); + return chr; +} + +void FString::Truncate(size_t newlen) +{ + if (newlen == 0) + { + Data()->Release(); + ResetToNull(); + } + else if (newlen < Len()) + { + ReallocBuffer (newlen); + Chars[newlen] = '\0'; + } +} + +void FString::Remove(size_t index, size_t remlen) +{ + if (index < Len()) + { + if (index + remlen >= Len()) + { + Truncate((long)index); + } + else + { + if (Data()->RefCount == 1) + { // Can do this in place + memmove(Chars + index, Chars + index + remlen, Len() - index - remlen); + memset(Chars + Len() - remlen, 0, remlen); + Data()->Len -= (unsigned)remlen; + } + else + { // Must do it in a copy + FStringData *old = Data(); + AllocBuffer(old->Len - remlen); + StrCopy(Chars, old->Chars(), index); + StrCopy(Chars + index, old->Chars() + index + remlen, old->Len - index - remlen); + old->Release(); + } + } + } +} + +FString FString::Left (size_t numChars) const +{ + size_t len = Len(); + if (len < numChars) + { + numChars = len; + } + return FString (Chars, numChars); +} + +FString FString::Right (size_t numChars) const +{ + size_t len = Len(); + if (len < numChars) + { + numChars = len; + } + return FString (Chars + len - numChars, numChars); +} + +FString FString::Mid (size_t pos, size_t numChars) const +{ + size_t len = Len(); + if (pos >= len) + { + return FString(); + } + if (pos + numChars > len || pos + numChars < pos) + { + numChars = len - pos; + } + return FString (Chars + pos, numChars); +} + +void FString::AppendCharacter(int codepoint) +{ + (*this) << MakeUTF8(codepoint); +} + +void FString::DeleteLastCharacter() +{ + if (Len() == 0) return; + auto pos = Len() - 1; + while (pos > 0 && uint8_t(Chars[pos]) >= 0x80 && uint8_t(Chars[pos]) < 0xc0) pos--; + if (pos <= 0) + { + Data()->Release(); + ResetToNull(); + } + else + { + Truncate(pos); + } +} + + +long FString::IndexOf (const FString &substr, long startIndex) const +{ + return IndexOf (substr.Chars, startIndex); +} + +long FString::IndexOf (const char *substr, long startIndex) const +{ + if (startIndex > 0 && Len() <= (size_t)startIndex) + { + return -1; + } + char *str = strstr (Chars + startIndex, substr); + if (str == NULL) + { + return -1; + } + return long(str - Chars); +} + +long FString::IndexOf (char subchar, long startIndex) const +{ + if (startIndex > 0 && Len() <= (size_t)startIndex) + { + return -1; + } + char *str = strchr (Chars + startIndex, subchar); + if (str == NULL) + { + return -1; + } + return long(str - Chars); +} + +long FString::IndexOfAny (const FString &charset, long startIndex) const +{ + return IndexOfAny (charset.Chars, startIndex); +} + +long FString::IndexOfAny (const char *charset, long startIndex) const +{ + if (startIndex > 0 && Len() <= (size_t)startIndex) + { + return -1; + } + char *brk = strpbrk (Chars + startIndex, charset); + if (brk == NULL) + { + return -1; + } + return long(brk - Chars); +} + +long FString::LastIndexOf (char subchar) const +{ + return LastIndexOf (subchar, long(Len())); +} + +long FString::LastIndexOf (char subchar, long endIndex) const +{ + if ((size_t)endIndex > Len()) + { + endIndex = long(Len()); + } + while (--endIndex >= 0) + { + if (Chars[endIndex] == subchar) + { + return endIndex; + } + } + return -1; +} + +long FString::LastIndexOfBroken (const FString &_substr, long endIndex) const +{ + const char *substr = _substr.GetChars(); + size_t substrlen = _substr.Len(); + if ((size_t)endIndex > Len()) + { + endIndex = long(Len()); + } + substrlen--; + while (--endIndex >= long(substrlen)) + { + if (strncmp (substr, Chars + endIndex - substrlen, substrlen + 1) == 0) + { + return endIndex; + } + } + return -1; +} + +long FString::LastIndexOfAny (const FString &charset) const +{ + return LastIndexOfAny (charset.Chars, long(Len())); +} + +long FString::LastIndexOfAny (const char *charset) const +{ + return LastIndexOfAny (charset, long(Len())); +} + +long FString::LastIndexOfAny (const FString &charset, long endIndex) const +{ + return LastIndexOfAny (charset.Chars, endIndex); +} + +long FString::LastIndexOfAny (const char *charset, long endIndex) const +{ + if ((size_t)endIndex > Len()) + { + endIndex = long(Len()); + } + while (--endIndex >= 0) + { + if (strchr (charset, Chars[endIndex]) != NULL) + { + return endIndex; + } + } + return -1; +} + +long FString::LastIndexOf (const FString &substr) const +{ + return LastIndexOf(substr.Chars, long(Len() - substr.Len()), substr.Len()); +} + +long FString::LastIndexOf (const FString &substr, long endIndex) const +{ + return LastIndexOf(substr.Chars, endIndex, substr.Len()); +} + +long FString::LastIndexOf (const char *substr) const +{ + return LastIndexOf(substr, long(Len() - strlen(substr)), strlen(substr)); +} + +long FString::LastIndexOf (const char *substr, long endIndex) const +{ + return LastIndexOf(substr, endIndex, strlen(substr)); +} + +long FString::LastIndexOf (const char *substr, long endIndex, size_t substrlen) const +{ + if ((size_t)endIndex + substrlen > Len()) + { + endIndex = long(Len() - substrlen); + } + while (endIndex >= 0) + { + if (strncmp (substr, Chars + endIndex, substrlen) == 0) + { + return endIndex; + } + endIndex--; + } + return -1; +} + +void FString::ToUpper () +{ + LockBuffer(); + size_t max = Len(); + for (size_t i = 0; i < max; ++i) + { + Chars[i] = (char)toupper(Chars[i]); + } + UnlockBuffer(); +} + +void FString::ToLower () +{ + LockBuffer(); + size_t max = Len(); + for (size_t i = 0; i < max; ++i) + { + Chars[i] = (char)tolower(Chars[i]); + } + UnlockBuffer(); +} + +FString FString::MakeLower() const +{ + TArray builder(Len()); + int pos = 0; + while (int c = GetNextCharacter(pos)) + { + if (c < 65536) c = lowerforupper[c]; + auto cp = MakeUTF8(c); + while (auto uc = *cp++) builder.Push(uc); + } + return FString(builder); +} + +FString FString::MakeUpper() const +{ + TArray builder(Len()); + int pos = 0; + while (int c = GetNextCharacter(pos)) + { + if (c < 65536) c = upperforlower[c]; + auto cp = MakeUTF8(c); + while (auto uc = *cp++) builder.Push(uc); + } + return FString(builder); +} + +void FString::StripLeft () +{ + size_t max = Len(), i, j; + if (max == 0) return; + for (i = 0; i < max; ++i) + { + if (!isspace((unsigned char)Chars[i])) + break; + } + if (i == 0) + { // Nothing to strip. + return; + } + if (Data()->RefCount <= 1) + { + for (j = 0; i <= max; ++j, ++i) + { + Chars[j] = Chars[i]; + } + ReallocBuffer (j-1); + } + else + { + FStringData *old = Data(); + AllocBuffer (max - i); + StrCopy (Chars, old->Chars() + i, max - i); + old->Release(); + } +} + +void FString::StripLeft (const FString &charset) +{ + return StripLeft (charset.Chars); +} + +void FString::StripLeft (const char *charset) +{ + size_t max = Len(), i, j; + if (max == 0) return; + for (i = 0; i < max; ++i) + { + if (!strchr (charset, Chars[i])) + break; + } + if (i == 0) + { // Nothing to strip. + return; + } + if (Data()->RefCount <= 1) + { + for (j = 0; i <= max; ++j, ++i) + { + Chars[j] = Chars[i]; + } + ReallocBuffer (j-1); + } + else + { + FStringData *old = Data(); + AllocBuffer (max - i); + StrCopy (Chars, old->Chars() + i, max - i); + old->Release(); + } +} + +void FString::StripRight () +{ + size_t max = Len(), i; + if (max == 0) return; + for (i = --max; i > 0; i--) + { + if (!isspace((unsigned char)Chars[i])) + break; + } + if (i == max) + { // Nothing to strip. + return; + } + if (Data()->RefCount <= 1) + { + Chars[i+1] = '\0'; + ReallocBuffer (i+1); + } + else + { + FStringData *old = Data(); + AllocBuffer (i+1); + StrCopy (Chars, old->Chars(), i+1); + old->Release(); + } +} + +void FString::StripRight (const FString &charset) +{ + return StripRight (charset.Chars); +} + +void FString::StripRight (const char *charset) +{ + size_t max = Len(), i; + if (max == 0) return; + for (i = --max; i > 0; i--) + { + if (!strchr (charset, Chars[i])) + break; + } + if (i == max) + { // Nothing to strip. + return; + } + if (Data()->RefCount <= 1) + { + Chars[i+1] = '\0'; + ReallocBuffer (i+1); + } + else + { + FStringData *old = Data(); + AllocBuffer (i+1); + StrCopy (Chars, old->Chars(), i+1); + old->Release(); + } +} + +void FString::StripLeftRight () +{ + size_t max = Len(), i, j, k; + if (max == 0) return; + for (i = 0; i < max; ++i) + { + if (Chars[i] < 0 || !isspace((unsigned char)Chars[i])) + break; + } + for (j = max - 1; j >= i; --j) + { + if (Chars[i] < 0 || !isspace((unsigned char)Chars[j])) + break; + } + if (i == 0 && j == max - 1) + { // Nothing to strip. + return; + } + if (Data()->RefCount <= 1) + { + for (k = 0; i <= j; ++i, ++k) + { + Chars[k] = Chars[i]; + } + Chars[k] = '\0'; + ReallocBuffer (k); + } + else + { + FStringData *old = Data(); + AllocBuffer(j - i + 1); + StrCopy(Chars, old->Chars(), j - i + 1); + old->Release(); + } +} + +void FString::StripLeftRight (const FString &charset) +{ + return StripLeftRight (charset.Chars); +} + +void FString::StripLeftRight (const char *charset) +{ + size_t max = Len(), i, j, k; + if (max == 0) return; + for (i = 0; i < max; ++i) + { + if (!strchr (charset, Chars[i])) + break; + } + for (j = max - 1; j >= i; --j) + { + if (!strchr (charset, Chars[j])) + break; + } + if (Data()->RefCount <= 1) + { + for (k = 0; i <= j; ++i, ++k) + { + Chars[k] = Chars[i]; + } + Chars[k] = '\0'; + ReallocBuffer (k); + } + else + { + FStringData *old = Data(); + AllocBuffer (j - i); + StrCopy (Chars, old->Chars(), j - i); + old->Release(); + } +} + +void FString::Insert (size_t index, const FString &instr) +{ + Insert (index, instr.Chars, instr.Len()); +} + +void FString::Insert (size_t index, const char *instr) +{ + Insert (index, instr, strlen(instr)); +} + +void FString::Insert (size_t index, const char *instr, size_t instrlen) +{ + if (instrlen > 0) + { + size_t mylen = Len(); + if (index >= mylen) + { + AppendCStrPart(instr, instrlen); + } + else if (Data()->RefCount <= 1) + { + ReallocBuffer(mylen + instrlen); + memmove(Chars + index + instrlen, Chars + index, (mylen - index + 1) * sizeof(char)); + memcpy(Chars + index, instr, instrlen * sizeof(char)); + } + else + { + FStringData *old = Data(); + AllocBuffer(mylen + instrlen); + StrCopy(Chars, old->Chars(), index); + StrCopy(Chars + index, instr, instrlen); + StrCopy(Chars + index + instrlen, old->Chars() + index, mylen - index); + old->Release(); + } + } +} + +void FString::ReplaceChars (char oldchar, char newchar) +{ + if (oldchar == '\0') + return; + + ReplaceChars([&oldchar](char c){ return c == oldchar; }, newchar); +} + +void FString::ReplaceChars (const char *oldcharset, char newchar) +{ + if (oldcharset == NULL || oldcharset[0] == '\0') + return; + + ReplaceChars([&oldcharset](char c){ return strchr(oldcharset, c) != NULL; }, newchar); +} + +void FString::StripChars (char killchar) +{ + if (killchar == '\0') + return; + + StripChars([&killchar](char c){ return c == killchar; }); +} + +void FString::StripChars (const char *killcharset) +{ + if (killcharset == NULL || killcharset[0] == '\0') + return; + + StripChars([&killcharset](char c){ return strchr(killcharset, c) != NULL; }); +} + +void FString::MergeChars (char merger) +{ + MergeChars (merger, merger); +} + +void FString::MergeChars (char merger, char newchar) +{ + size_t read, write, mylen; + + LockBuffer(); + for (read = write = 0, mylen = Len(); read < mylen; ) + { + if (Chars[read] == merger) + { + while (Chars[++read] == merger) + { + } + Chars[write++] = newchar; + } + else + { + Chars[write++] = Chars[read++]; + } + } + Chars[write] = '\0'; + ReallocBuffer (write); + UnlockBuffer(); +} + +void FString::MergeChars (const char *charset, char newchar) +{ + size_t read, write, mylen; + + LockBuffer(); + for (read = write = 0, mylen = Len(); read < mylen; ) + { + if (strchr (charset, Chars[read]) != NULL) + { + while (strchr (charset, Chars[++read]) != NULL) + { + } + Chars[write++] = newchar; + } + else + { + Chars[write++] = Chars[read++]; + } + } + Chars[write] = '\0'; + ReallocBuffer (write); + UnlockBuffer(); +} + +void FString::Substitute (const FString &oldstr, const FString &newstr) +{ + return Substitute (oldstr.Chars, newstr.Chars, oldstr.Len(), newstr.Len()); +} + +void FString::Substitute (const char *oldstr, const FString &newstr) +{ + return Substitute (oldstr, newstr.Chars, strlen(oldstr), newstr.Len()); +} + +void FString::Substitute (const FString &oldstr, const char *newstr) +{ + return Substitute (oldstr.Chars, newstr, oldstr.Len(), strlen(newstr)); +} + +void FString::Substitute (const char *oldstr, const char *newstr) +{ + return Substitute (oldstr, newstr, strlen(oldstr), strlen(newstr)); +} + +void FString::Substitute (const char *oldstr, const char *newstr, size_t oldstrlen, size_t newstrlen) +{ + if (oldstr == nullptr || newstr == nullptr || *oldstr == 0) return; + LockBuffer(); + for (size_t checkpt = 0; checkpt < Len(); ) + { + char *match = strstr (Chars + checkpt, oldstr); + size_t len = Len(); + if (match != NULL) + { + size_t matchpt = match - Chars; + if (oldstrlen != newstrlen) + { + ReallocBuffer (len + newstrlen - oldstrlen); + memmove (Chars + matchpt + newstrlen, Chars + matchpt + oldstrlen, (len + 1 - matchpt - oldstrlen)*sizeof(char)); + } + memcpy (Chars + matchpt, newstr, newstrlen); + checkpt = matchpt + newstrlen; + } + else + { + break; + } + } + UnlockBuffer(); +} + +bool FString::IsInt () const +{ + // String must match: [whitespace] [{+ | �}] [0 [{ x | X }]] [digits] [whitespace] + +/* This state machine is based on a simplification of re2c's output for this input: +digits = [0-9]; +hexdigits = [0-9a-fA-F]; +octdigits = [0-7]; + +("0" octdigits+ | "0" [xX] hexdigits+ | (digits \ '0') digits*) { return true; } +[\000-\377] { return false; }*/ + const char *YYCURSOR = Chars; + char yych; + + yych = *YYCURSOR; + + // Skip preceding whitespace + while (yych != '\0' && isspace((unsigned char)yych)) { yych = *++YYCURSOR; } + + // Check for sign + if (yych == '+' || yych == '-') { yych = *++YYCURSOR; } + + if (yych == '0') + { + yych = *++YYCURSOR; + if (yych >= '0' && yych <= '7') + { + do { yych = *++YYCURSOR; } while (yych >= '0' && yych <= '7'); + } + else if (yych == 'X' || yych == 'x') + { + bool gothex = false; + yych = *++YYCURSOR; + while ((yych >= '0' && yych <= '9') || (yych >= 'A' && yych <= 'F') || (yych >= 'a' && yych <= 'f')) + { + gothex = true; + yych = *++YYCURSOR; + } + if (!gothex) return false; + } + else + { + return false; + } + } + else if (yych >= '1' && yych <= '9') + { + do { yych = *++YYCURSOR; } while (yych >= '0' && yych <= '9'); + } + else + { + return false; + } + + // The rest should all be whitespace + while (yych != '\0' && isspace((unsigned char)yych)) { yych = *++YYCURSOR; } + return yych == '\0'; +} + +bool FString::IsFloat () const +{ + // String must match: [whitespace] [sign] [digits] [.digits] [ {d | D | e | E}[sign]digits] [whitespace] +/* This state machine is based on a simplification of re2c's output for this input: +digits = [0-9]; + +(digits+ | digits* "." digits+) ([dDeE] [+-]? digits+)? { return true; } +[\000-\377] { return false; } +*/ + const char *YYCURSOR = Chars; + char yych; + bool gotdig = false; + + yych = *YYCURSOR; + + // Skip preceding whitespace + while (yych != '\0' && isspace((unsigned char)yych)) { yych = *++YYCURSOR; } + + // Check for sign + if (yych == '+' || yych == '-') { yych = *++YYCURSOR; } + + while (yych >= '0' && yych <= '9') + { + gotdig = true; + yych = *++YYCURSOR; + } + if (yych == '.') + { + yych = *++YYCURSOR; + if (yych >= '0' && yych <= '9') + { + gotdig = true; + do { yych = *++YYCURSOR; } while (yych >= '0' && yych <= '9'); + } + else return false; + } + if (gotdig) + { + if (yych == 'D' || yych == 'd' || yych == 'E' || yych == 'e') + { + yych = *++YYCURSOR; + if (yych == '+' || yych == '-') yych = *++YYCURSOR; + while (yych >= '0' && yych <= '9') { yych = *++YYCURSOR; } + } + } + + // The rest should all be whitespace + while (yych != '\0' && isspace((unsigned char)yych)) { yych = *++YYCURSOR; } + return yych == '\0'; +} + +int64_t FString::ToLong (int base) const +{ + return strtoll (Chars, NULL, base); +} + +uint64_t FString::ToULong (int base) const +{ + return strtoull (Chars, NULL, base); +} + +double FString::ToDouble () const +{ + return strtod (Chars, NULL); +} + +void FString::StrCopy (char *to, const char *from, size_t len) +{ + memcpy (to, from, len*sizeof(char)); + to[len] = 0; +} + +void FString::StrCopy (char *to, const FString &from) +{ + StrCopy (to, from.Chars, from.Len()); +} + +void FString::AllocBuffer (size_t len) +{ + Chars = (char *)(FStringData::Alloc(len) + 1); + Data()->Len = (unsigned int)len; +} + +void FString::ReallocBuffer (size_t newlen) +{ + if (Data()->RefCount > 1) + { // If more than one reference, we must use a new copy + FStringData *old = Data(); + AllocBuffer (newlen); + StrCopy (Chars, old->Chars(), newlen < old->Len ? newlen : old->Len); + old->Release(); + } + else + { + if (newlen > Data()->AllocLen) + { + Chars = (char *)(Data()->Realloc(newlen) + 1); + } + Data()->Len = (unsigned int)newlen; + } +} + +TArray FString::Split(const FString &delimiter, const EmptyTokenType keepEmpty) const +{ + return Split(delimiter.GetChars(), keepEmpty); +} + +TArray FString::Split(const char *const delimiter, const EmptyTokenType keepEmpty) const +{ + TArray tokens; + Split(tokens, delimiter, keepEmpty); + return tokens; +} + +void FString::Split(TArray& tokens, const FString &delimiter, EmptyTokenType keepEmpty) const +{ + Split(tokens, delimiter.GetChars(), keepEmpty); +} + +void FString::Split(TArray& tokens, const char *delimiter, EmptyTokenType keepEmpty) const +{ + assert(nullptr != delimiter); + + const long selfLen = static_cast(Len()); + const long delimLen = static_cast(strlen(delimiter)); + long lastPos = 0; + + if (selfLen == 0) return; // Empty strings do not contain tokens, even with TOK_KEEPEMPTY. + + while (lastPos <= selfLen) + { + long pos = IndexOf(delimiter, lastPos); + + if (-1 == pos) + { + pos = selfLen; + } + + if (pos != lastPos || TOK_KEEPEMPTY == keepEmpty) + { + tokens.Push(FString(GetChars() + lastPos, pos - lastPos)); + } + + lastPos = pos + delimLen; + } +} + +// Under Windows, use the system heap functions for managing string memory. +// Under other OSs, use ordinary memory management instead. + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include + +// Convert from and to Windows wide strings so that we can interface with the Unicode version of the Windows API. +FString::FString(const wchar_t *copyStr) +{ + if (copyStr == NULL || *copyStr == '\0') + { + ResetToNull(); + } + else + { + auto len = wcslen(copyStr); + int size_needed = WideCharToMultiByte(CP_UTF8, 0, copyStr, (int)len, nullptr, 0, nullptr, nullptr); + AllocBuffer(size_needed); + WideCharToMultiByte(CP_UTF8, 0, copyStr, (int)len, Chars, size_needed, nullptr, nullptr); + Chars[size_needed] = 0; + } +} + +FString &FString::operator=(const wchar_t *copyStr) +{ + if (copyStr == NULL || *copyStr == '\0') + { + Data()->Release(); + ResetToNull(); + } + else + { + auto len = wcslen(copyStr); + int size_needed = WideCharToMultiByte(CP_UTF8, 0, copyStr, (int)len, nullptr, 0, nullptr, nullptr); + ReallocBuffer(size_needed); + WideCharToMultiByte(CP_UTF8, 0, copyStr, (int)len, Chars, size_needed, nullptr, nullptr); + Chars[size_needed] = 0; + } + return *this; +} + +std::wstring WideString(const char *cin) +{ + if (!cin) return L""; + const uint8_t *in = (const uint8_t*)cin; + // This is a bit tricky because we need to support both UTF-8 and legacy content in ISO-8859-1 + // and thanks to user-side string manipulation it can be that a text mixes both. + // To convert the string this uses the same function as all text printing in the engine. + TArray buildbuffer; + while (*in) buildbuffer.Push((wchar_t)GetCharFromString(in)); + buildbuffer.Push(0); + return std::wstring(buildbuffer.Data()); +} + +static HANDLE StringHeap; +const SIZE_T STRING_HEAP_SIZE = 64*1024; +#endif + +FStringData *FStringData::Alloc (size_t strlen) +{ + strlen += 1 + sizeof(FStringData); // Add space for header and terminating null + strlen = (strlen + 7) & ~7; // Pad length up + +#ifdef _WIN32 + if (StringHeap == NULL) + { + StringHeap = HeapCreate (0, STRING_HEAP_SIZE, 0); + if (StringHeap == NULL) + { + throw std::bad_alloc(); + } + } + + FStringData *block = (FStringData *)HeapAlloc (StringHeap, 0, strlen); +#else + FStringData *block = (FStringData *)malloc (strlen); +#endif + if (block == NULL) + { + throw std::bad_alloc(); + } + block->Len = 0; + block->AllocLen = (unsigned int)strlen - sizeof(FStringData) - 1; + block->RefCount = 1; + return block; +} + +FStringData *FStringData::Realloc (size_t newstrlen) +{ + assert (RefCount <= 1); + + newstrlen += 1 + sizeof(FStringData); // Add space for header and terminating null + newstrlen = (newstrlen + 7) & ~7; // Pad length up + +#ifdef _WIN32 + FStringData *block = (FStringData *)HeapReAlloc (StringHeap, 0, this, newstrlen); +#else + FStringData *block = (FStringData *)realloc (this, newstrlen); +#endif + if (block == NULL) + { + throw std::bad_alloc(); + } + block->AllocLen = (unsigned int)newstrlen - sizeof(FStringData) - 1; + return block; +} + +void FStringData::Dealloc () +{ + assert (RefCount <= 0); + +#ifdef _WIN32 + HeapFree (StringHeap, 0, this); +#else + free (this); +#endif +} + +FStringData *FStringData::MakeCopy () +{ + FStringData *copy = Alloc (Len); + copy->Len = Len; + FString::StrCopy (copy->Chars(), Chars(), Len); + return copy; +} + +FStringf::FStringf(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + VFormat(fmt, ap); + va_end(ap); +} diff --git a/source/common/utility/zstring.h b/source/common/utility/zstring.h new file mode 100644 index 000000000..f5b718fa8 --- /dev/null +++ b/source/common/utility/zstring.h @@ -0,0 +1,471 @@ +#pragma once +/* +** zstring.h +** +**--------------------------------------------------------------------------- +** Copyright 2005-2007 Randy Heit +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**--------------------------------------------------------------------------- +** +*/ + + +#include +#include +#include +#include +#include +#include "tarray.h" + +#ifdef __GNUC__ +#define PRINTFISH(x) __attribute__((format(printf, 2, x))) +#else +#define PRINTFISH(x) +#endif + +#ifdef __GNUC__ +#define IGNORE_FORMAT_PRE \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wformat\"") \ + _Pragma("GCC diagnostic ignored \"-Wformat-extra-args\"") +#define IGNORE_FORMAT_POST _Pragma("GCC diagnostic pop") +#else +#define IGNORE_FORMAT_PRE +#define IGNORE_FORMAT_POST +#endif + +#ifdef _WIN32 +std::wstring WideString(const char *); +#endif + +struct FStringData +{ + unsigned int Len; // Length of string, excluding terminating null + unsigned int AllocLen; // Amount of memory allocated for string + int RefCount; // < 0 means it's locked + // char StrData[xxx]; + + char *Chars() + { + return (char *)(this + 1); + } + + const char *Chars() const + { + return (const char *)(this + 1); + } + + char *AddRef() + { + if (RefCount < 0) + { + return (char *)(MakeCopy() + 1); + } + else + { + RefCount++; + return (char *)(this + 1); + } + } + + void Release() + { + assert (RefCount != 0); + + if (--RefCount <= 0) + { + Dealloc(); + } + } + + FStringData *MakeCopy(); + + static FStringData *Alloc (size_t strlen); + FStringData *Realloc (size_t newstrlen); + void Dealloc (); +}; + +struct FNullStringData +{ + unsigned int Len; + unsigned int AllocLen; + int RefCount; + char Nothing[2]; +}; + +enum ELumpNum +{ +}; + +class FString +{ +public: + FString () { ResetToNull(); } + + // Copy constructors + FString (const FString &other) { AttachToOther (other); } + FString (FString &&other) : Chars(other.Chars) { other.ResetToNull(); } + FString (const char *copyStr); + FString (const char *copyStr, size_t copyLen); + FString (char oneChar); + FString(const TArray & source) : FString(source.Data(), source.Size()) {} + FString(const TArray & source) : FString((char*)source.Data(), source.Size()) {} + // This is intentionally #ifdef'd. The only code which needs this is parts of the Windows backend that receive Unicode text from the system. +#ifdef _WIN32 + explicit FString(const wchar_t *copyStr); + FString &operator = (const wchar_t *copyStr); + std::wstring WideString() const { return ::WideString(Chars); } +#endif + + // Concatenation constructors + FString (const FString &head, const FString &tail); + FString (const FString &head, const char *tail); + FString (const FString &head, char tail); + FString (const char *head, const FString &tail); + FString (const char *head, const char *tail); + FString (char head, const FString &tail); + + // Other constructors + FString (ELumpNum); // Create from a lump + + ~FString (); + + // Discard string's contents, create a new buffer, and lock it. + char *LockNewBuffer(size_t len); + + char *LockBuffer(); // Obtain write access to the character buffer + void UnlockBuffer(); // Allow shared access to the character buffer + + void Swap(FString &other) + { + std::swap(Chars, other.Chars); + } + + operator const char *() const { return Chars; } + + const char *GetChars() const { return Chars; } + + const char &operator[] (int index) const { return Chars[index]; } +#if defined(_WIN32) && !defined(_WIN64) && defined(_MSC_VER) + // Compiling 32-bit Windows source with MSVC: size_t is typedefed to an + // unsigned int with the 64-bit portability warning attribute, so the + // prototype cannot substitute unsigned int for size_t, or you get + // spurious warnings. + const char &operator[] (size_t index) const { return Chars[index]; } +#else + const char &operator[] (unsigned int index) const { return Chars[index]; } +#endif + const char &operator[] (unsigned long index) const { return Chars[index]; } + const char &operator[] (unsigned long long index) const { return Chars[index]; } + + FString &operator = (const FString &other); + FString &operator = (FString &&other); + FString &operator = (const char *copyStr); + + FString operator + (const FString &tail) const; + FString operator + (const char *tail) const; + FString operator + (char tail) const; + friend FString operator + (const char *head, const FString &tail); + friend FString operator + (char head, const FString &tail); + + FString &operator += (const FString &tail); + FString &operator += (const char *tail); + FString &operator += (char tail); + FString &AppendCStrPart (const char *tail, size_t tailLen); + FString &CopyCStrPart(const char *tail, size_t tailLen); + + FString &operator << (const FString &tail) { return *this += tail; } + FString &operator << (const char *tail) { return *this += tail; } + FString &operator << (char tail) { return *this += tail; } + + const char &Front() const { assert(IsNotEmpty()); return Chars[0]; } + const char &Back() const { assert(IsNotEmpty()); return Chars[Len() - 1]; } + + FString Left (size_t numChars) const; + FString Right (size_t numChars) const; + FString Mid (size_t pos, size_t numChars = ~(size_t)0) const; + + void AppendCharacter(int codepoint); + void DeleteLastCharacter(); + + long IndexOf (const FString &substr, long startIndex=0) const; + long IndexOf (const char *substr, long startIndex=0) const; + long IndexOf (char subchar, long startIndex=0) const; + + long IndexOfAny (const FString &charset, long startIndex=0) const; + long IndexOfAny (const char *charset, long startIndex=0) const; + + // This is only kept for backwards compatibility with old ZScript versions that used this function and depend on its bug. + long LastIndexOf (char subchar) const; + long LastIndexOfBroken (const FString &substr, long endIndex) const; + long LastIndexOf (char subchar, long endIndex) const; + + long LastIndexOfAny (const FString &charset) const; + long LastIndexOfAny (const char *charset) const; + long LastIndexOfAny (const FString &charset, long endIndex) const; + long LastIndexOfAny (const char *charset, long endIndex) const; + + long LastIndexOf (const FString &substr) const; + long LastIndexOf (const FString &substr, long endIndex) const; + long LastIndexOf (const char *substr) const; + long LastIndexOf (const char *substr, long endIndex) const; + long LastIndexOf (const char *substr, long endIndex, size_t substrlen) const; + + void ToUpper (); + void ToLower (); + FString MakeUpper() const; + FString MakeLower() const; + + void StripLeft (); + void StripLeft (const FString &charset); + void StripLeft (const char *charset); + + void StripRight (); + void StripRight (const FString &charset); + void StripRight (const char *charset); + + void StripLeftRight (); + void StripLeftRight (const FString &charset); + void StripLeftRight (const char *charset); + + void Insert (size_t index, const FString &instr); + void Insert (size_t index, const char *instr); + void Insert (size_t index, const char *instr, size_t instrlen); + + template + void ReplaceChars (Func IsOldChar, char newchar) + { + size_t i, j; + + LockBuffer(); + for (i = 0, j = Len(); i < j; ++i) + { + if (IsOldChar(Chars[i])) + { + Chars[i] = newchar; + } + } + UnlockBuffer(); + } + + void ReplaceChars (char oldchar, char newchar); + void ReplaceChars (const char *oldcharset, char newchar); + + template + void StripChars (Func IsKillChar) + { + size_t read, write, mylen; + + LockBuffer(); + for (read = write = 0, mylen = Len(); read < mylen; ++read) + { + if (!IsKillChar(Chars[read])) + { + Chars[write++] = Chars[read]; + } + } + Chars[write] = '\0'; + ReallocBuffer (write); + UnlockBuffer(); + } + + void StripChars (char killchar); + void StripChars (const char *killcharset); + + void MergeChars (char merger); + void MergeChars (char merger, char newchar); + void MergeChars (const char *charset, char newchar); + + void Substitute (const FString &oldstr, const FString &newstr); + void Substitute (const char *oldstr, const FString &newstr); + void Substitute (const FString &oldstr, const char *newstr); + void Substitute (const char *oldstr, const char *newstr); + void Substitute (const char *oldstr, const char *newstr, size_t oldstrlen, size_t newstrlen); + + void Format (const char *fmt, ...) PRINTFISH(3); + void AppendFormat (const char *fmt, ...) PRINTFISH(3); + void VFormat (const char *fmt, va_list arglist) PRINTFISH(0); + void VAppendFormat (const char *fmt, va_list arglist) PRINTFISH(0); + + bool IsInt () const; + bool IsFloat () const; + int64_t ToLong (int base=0) const; + uint64_t ToULong (int base=0) const; + double ToDouble () const; + + size_t Len() const { return Data()->Len; } + size_t CharacterCount() const; + int GetNextCharacter(int &position) const; + bool IsEmpty() const { return Len() == 0; } + bool IsNotEmpty() const { return Len() != 0; } + + void Truncate (size_t newlen); + void Remove(size_t index, size_t remlen); + + int Compare (const FString &other) const { return strcmp (Chars, other.Chars); } + int Compare (const char *other) const { return strcmp (Chars, other); } + int Compare(const FString &other, int len) const { return strncmp(Chars, other.Chars, len); } + int Compare(const char *other, int len) const { return strncmp(Chars, other, len); } + + int CompareNoCase (const FString &other) const { return stricmp (Chars, other.Chars); } + int CompareNoCase (const char *other) const { return stricmp (Chars, other); } + int CompareNoCase(const FString &other, int len) const { return strnicmp(Chars, other.Chars, len); } + int CompareNoCase(const char *other, int len) const { return strnicmp(Chars, other, len); } + + enum EmptyTokenType + { + TOK_SKIPEMPTY = 0, + TOK_KEEPEMPTY = 1, + }; + + TArray Split(const FString &delimiter, EmptyTokenType keepEmpty = TOK_KEEPEMPTY) const; + TArray Split(const char *delimiter, EmptyTokenType keepEmpty = TOK_KEEPEMPTY) const; + void Split(TArray& tokens, const FString &delimiter, EmptyTokenType keepEmpty = TOK_KEEPEMPTY) const; + void Split(TArray& tokens, const char *delimiter, EmptyTokenType keepEmpty = TOK_KEEPEMPTY) const; + +protected: + const FStringData *Data() const { return (FStringData *)Chars - 1; } + FStringData *Data() { return (FStringData *)Chars - 1; } + + void ResetToNull() + { + NullString.RefCount++; + Chars = &NullString.Nothing[0]; + } + + void AttachToOther (const FString &other); + void AllocBuffer (size_t len); + void ReallocBuffer (size_t newlen); + + static int FormatHelper (void *data, const char *str, int len); + static void StrCopy (char *to, const char *from, size_t len); + static void StrCopy (char *to, const FString &from); + + char *Chars; + + static FNullStringData NullString; + + friend struct FStringData; + +public: + bool operator == (const FString &other) const + { + return Compare(other) == 0; + } + + bool operator != (const FString &other) const + { + return Compare(other) != 0; + } + + bool operator < (const FString &other) const + { + return Compare(other) < 0; + } + + bool operator > (const FString &other) const + { + return Compare(other) > 0; + } + + bool operator <= (const FString &other) const + { + return Compare(other) <= 0; + } + + bool operator >= (const FString &other) const + { + return Compare(other) >= 0; + } + + // These are needed to block the default char * conversion operator from making a mess. + bool operator == (const char *) const = delete; + bool operator != (const char *) const = delete; + bool operator < (const char *) const = delete; + bool operator > (const char *) const = delete; + bool operator <= (const char *) const = delete; + bool operator >= (const char *) const = delete; + +private: +}; + +// These are also needed to block the default char * conversion operator from making a mess. +bool operator == (const char *, const FString &) = delete; +bool operator != (const char *, const FString &) = delete; +bool operator < (const char *, const FString &) = delete; +bool operator > (const char *, const FString &) = delete; +bool operator <= (const char *, const FString &) = delete; +bool operator >= (const char *, const FString &) = delete; + +class FStringf : public FString +{ +public: + FStringf(const char *fmt, ...); +}; + + +namespace StringFormat +{ + enum + { + // Format specification flags + F_MINUS = 1, + F_PLUS = 2, + F_ZERO = 4, + F_BLANK = 8, + F_HASH = 16, + + F_SIGNED = 32, + F_NEGATIVE = 64, + F_ZEROVALUE = 128, + F_FPT = 256, + + // Format specification size prefixes + F_HALFHALF = 0x1000, // hh + F_HALF = 0x2000, // h + F_LONG = 0x3000, // l + F_LONGLONG = 0x4000, // ll or I64 + F_BIGI = 0x5000, // I + F_PTRDIFF = 0x6000, // t + F_SIZE = 0x7000, // z + }; + typedef int (*OutputFunc)(void *data, const char *str, int len); + + int VWorker (OutputFunc output, void *outputData, const char *fmt, va_list arglist); + int Worker (OutputFunc output, void *outputData, const char *fmt, ...); +}; + +#undef PRINTFISH + +// Hash FStrings on their contents. (used by TMap) +#include "superfasthash.h" + +template<> struct THashTraits +{ + hash_t Hash(const FString &key) { return (hash_t)SuperFastHash(key.GetChars(), key.Len()); } + // Compares two keys, returning zero if they are the same. + int Compare(const FString &left, const FString &right) { return left.Compare(right); } +};