Bundle libOpenMPT as a dynamic framework, which should be safe once again, now that there is only one version to bundle. Also, now it is using the versions of libvorbisfile and libmpg123 that are bundled with the player, instead of compiling minimp3 and stbvorbis. Signed-off-by: Christopher Snowhill <kode54@gmail.com>
446 lines
14 KiB
C++
446 lines
14 KiB
C++
/*
|
|
* mptString.h
|
|
* ----------
|
|
* Purpose: Small string-related utilities, number and message formatting.
|
|
* Notes : Currently none.
|
|
* Authors: OpenMPT Devs
|
|
* The OpenMPT source code is released under the BSD license. Read LICENSE for more details.
|
|
*/
|
|
|
|
|
|
#pragma once
|
|
|
|
#include "openmpt/all/BuildSettings.hpp"
|
|
|
|
#include "mpt/base/alloc.hpp"
|
|
#include "mpt/base/span.hpp"
|
|
#include "mpt/string/types.hpp"
|
|
#include "mpt/string/utility.hpp"
|
|
|
|
#include "mptBaseTypes.h"
|
|
|
|
#include <algorithm>
|
|
#include <limits>
|
|
#include <string>
|
|
#include <string_view>
|
|
|
|
#include <cstring>
|
|
|
|
|
|
|
|
OPENMPT_NAMESPACE_BEGIN
|
|
|
|
|
|
namespace mpt
|
|
{
|
|
|
|
|
|
|
|
namespace String
|
|
{
|
|
|
|
|
|
template <typename Tstring, typename Tstring2, typename Tstring3>
|
|
inline Tstring Replace(Tstring str, const Tstring2 &oldStr, const Tstring3 &newStr)
|
|
{
|
|
return mpt::replace(str, oldStr, newStr);
|
|
}
|
|
|
|
|
|
} // namespace String
|
|
|
|
|
|
enum class Charset {
|
|
|
|
UTF8,
|
|
|
|
ASCII, // strictly 7-bit ASCII
|
|
|
|
ISO8859_1,
|
|
ISO8859_15,
|
|
|
|
CP850,
|
|
CP437,
|
|
CP437AMS,
|
|
CP437AMS2,
|
|
|
|
Windows1252,
|
|
|
|
Amiga,
|
|
RISC_OS,
|
|
|
|
ISO8859_1_no_C1,
|
|
ISO8859_15_no_C1,
|
|
Amiga_no_C1,
|
|
|
|
#if defined(MPT_ENABLE_CHARSET_LOCALE)
|
|
Locale, // CP_ACP on windows, current C locale otherwise
|
|
#endif // MPT_ENABLE_CHARSET_LOCALE
|
|
|
|
};
|
|
|
|
|
|
|
|
// source code / preprocessor (i.e. # token)
|
|
inline constexpr Charset CharsetSource = Charset::ASCII;
|
|
|
|
// debug log files
|
|
inline constexpr Charset CharsetLogfile = Charset::UTF8;
|
|
|
|
// std::clog / std::cout / std::cerr
|
|
#if defined(MODPLUG_TRACKER) && MPT_OS_WINDOWS && defined(MPT_ENABLE_CHARSET_LOCALE)
|
|
inline constexpr Charset CharsetStdIO = Charset::Locale;
|
|
#else
|
|
inline constexpr Charset CharsetStdIO = Charset::UTF8;
|
|
#endif
|
|
|
|
// getenv
|
|
#if defined(MPT_ENABLE_CHARSET_LOCALE)
|
|
inline constexpr Charset CharsetEnvironment = Charset::Locale;
|
|
#else
|
|
inline constexpr Charset CharsetEnvironment = Charset::UTF8;
|
|
#endif
|
|
|
|
// std::exception::what()
|
|
#if defined(MPT_ENABLE_CHARSET_LOCALE)
|
|
inline constexpr Charset CharsetException = Charset::Locale;
|
|
#else
|
|
inline constexpr Charset CharsetException = Charset::UTF8;
|
|
#endif
|
|
|
|
|
|
|
|
// Checks if the std::string represents an UTF8 string.
|
|
// This is currently implemented as converting to std::wstring and back assuming UTF8 both ways,
|
|
// and comparing the result to the original string.
|
|
// Caveats:
|
|
// - can give false negatives because of possible unicode normalization during conversion
|
|
// - can give false positives if the 8bit encoding contains high-ascii only in valid utf8 groups
|
|
// - slow because of double conversion
|
|
bool IsUTF8(const std::string &str);
|
|
|
|
|
|
|
|
#if MPT_WSTRING_CONVERT
|
|
// Convert to a wide character string.
|
|
// The wide encoding is UTF-16 or UTF-32, based on sizeof(wchar_t).
|
|
// If str does not contain any invalid characters, this conversion is lossless.
|
|
// Invalid source bytes will be replaced by some replacement character or string.
|
|
inline std::wstring ToWide(const std::wstring &str) { return str; }
|
|
inline std::wstring ToWide(const wchar_t * str) { return (str ? std::wstring(str) : std::wstring()); }
|
|
std::wstring ToWide(Charset from, const std::string &str);
|
|
inline std::wstring ToWide(Charset from, const char * str) { return ToWide(from, str ? std::string(str) : std::string()); }
|
|
#if defined(MPT_ENABLE_CHARSET_LOCALE)
|
|
std::wstring ToWide(const mpt::lstring &str);
|
|
#endif // MPT_ENABLE_CHARSET_LOCALE
|
|
#endif
|
|
|
|
// Convert to a string encoded in the 'to'-specified character set.
|
|
// If str does not contain any invalid characters,
|
|
// this conversion will be lossless iff, and only iff,
|
|
// 'to' is UTF8.
|
|
// Invalid source bytes or characters that are not representable in the
|
|
// destination charset will be replaced by some replacement character or string.
|
|
#if MPT_WSTRING_CONVERT
|
|
std::string ToCharset(Charset to, const std::wstring &str);
|
|
inline std::string ToCharset(Charset to, const wchar_t * str) { return ToCharset(to, str ? std::wstring(str) : std::wstring()); }
|
|
#endif
|
|
std::string ToCharset(Charset to, Charset from, const std::string &str);
|
|
inline std::string ToCharset(Charset to, Charset from, const char * str) { return ToCharset(to, from, str ? std::string(str) : std::string()); }
|
|
#if defined(MPT_ENABLE_CHARSET_LOCALE)
|
|
std::string ToCharset(Charset to, const mpt::lstring &str);
|
|
#endif // MPT_ENABLE_CHARSET_LOCALE
|
|
|
|
#if defined(MPT_ENABLE_CHARSET_LOCALE)
|
|
#if MPT_WSTRING_CONVERT
|
|
mpt::lstring ToLocale(const std::wstring &str);
|
|
inline mpt::lstring ToLocale(const wchar_t * str) { return ToLocale(str ? std::wstring(str): std::wstring()); }
|
|
#endif
|
|
mpt::lstring ToLocale(Charset from, const std::string &str);
|
|
inline mpt::lstring ToLocale(Charset from, const char * str) { return ToLocale(from, str ? std::string(str): std::string()); }
|
|
inline mpt::lstring ToLocale(const mpt::lstring &str) { return str; }
|
|
#endif // MPT_ENABLE_CHARSET_LOCALE
|
|
|
|
#if MPT_OS_WINDOWS
|
|
#if MPT_WSTRING_CONVERT
|
|
mpt::winstring ToWin(const std::wstring &str);
|
|
inline mpt::winstring ToWin(const wchar_t * str) { return ToWin(str ? std::wstring(str): std::wstring()); }
|
|
#endif
|
|
mpt::winstring ToWin(Charset from, const std::string &str);
|
|
inline mpt::winstring ToWin(Charset from, const char * str) { return ToWin(from, str ? std::string(str): std::string()); }
|
|
#if defined(MPT_ENABLE_CHARSET_LOCALE)
|
|
mpt::winstring ToWin(const mpt::lstring &str);
|
|
#endif // MPT_ENABLE_CHARSET_LOCALE
|
|
#endif // MPT_OS_WINDOWS
|
|
|
|
|
|
#if defined(MPT_WITH_MFC)
|
|
#if !(MPT_WSTRING_CONVERT)
|
|
#error "MFC depends on MPT_WSTRING_CONVERT)"
|
|
#endif
|
|
|
|
// Convert to a MFC CString. The CString encoding depends on UNICODE.
|
|
// This should also be used when converting to TCHAR strings.
|
|
// If UNICODE is defined, this is a completely lossless operation.
|
|
inline CString ToCString(const CString &str) { return str; }
|
|
CString ToCString(const std::wstring &str);
|
|
inline CString ToCString(const wchar_t * str) { return ToCString(str ? std::wstring(str) : std::wstring()); }
|
|
CString ToCString(Charset from, const std::string &str);
|
|
inline CString ToCString(Charset from, const char * str) { return ToCString(from, str ? std::string(str) : std::string()); }
|
|
#if defined(MPT_ENABLE_CHARSET_LOCALE)
|
|
CString ToCString(const mpt::lstring &str);
|
|
mpt::lstring ToLocale(const CString &str);
|
|
#endif // MPT_ENABLE_CHARSET_LOCALE
|
|
#if MPT_OS_WINDOWS
|
|
mpt::winstring ToWin(const CString &str);
|
|
#endif // MPT_OS_WINDOWS
|
|
|
|
// Convert from a MFC CString. The CString encoding depends on UNICODE.
|
|
// This should also be used when converting from TCHAR strings.
|
|
// If UNICODE is defined, this is a completely lossless operation.
|
|
std::wstring ToWide(const CString &str);
|
|
std::string ToCharset(Charset to, const CString &str);
|
|
|
|
#endif // MPT_WITH_MFC
|
|
|
|
|
|
|
|
#define UC_(x) MPT_UCHAR(x)
|
|
#define UL_(x) MPT_ULITERAL(x)
|
|
#define U_(x) MPT_USTRING(x)
|
|
|
|
|
|
|
|
#if MPT_USTRING_MODE_WIDE
|
|
#if !(MPT_WSTRING_CONVERT)
|
|
#error "MPT_USTRING_MODE_WIDE depends on MPT_WSTRING_CONVERT)"
|
|
#endif
|
|
inline mpt::ustring ToUnicode(const std::wstring &str) { return str; }
|
|
inline mpt::ustring ToUnicode(const wchar_t * str) { return (str ? std::wstring(str) : std::wstring()); }
|
|
inline mpt::ustring ToUnicode(Charset from, const std::string &str) { return ToWide(from, str); }
|
|
inline mpt::ustring ToUnicode(Charset from, const char * str) { return ToUnicode(from, str ? std::string(str) : std::string()); }
|
|
#if defined(MPT_ENABLE_CHARSET_LOCALE)
|
|
inline mpt::ustring ToUnicode(const mpt::lstring &str) { return ToWide(str); }
|
|
#endif // MPT_ENABLE_CHARSET_LOCALE
|
|
#if defined(MPT_WITH_MFC)
|
|
inline mpt::ustring ToUnicode(const CString &str) { return ToWide(str); }
|
|
#endif // MFC
|
|
#else // !MPT_USTRING_MODE_WIDE
|
|
inline mpt::ustring ToUnicode(const mpt::ustring &str) { return str; }
|
|
#if MPT_WSTRING_CONVERT
|
|
mpt::ustring ToUnicode(const std::wstring &str);
|
|
inline mpt::ustring ToUnicode(const wchar_t * str) { return ToUnicode(str ? std::wstring(str) : std::wstring()); }
|
|
#endif
|
|
mpt::ustring ToUnicode(Charset from, const std::string &str);
|
|
inline mpt::ustring ToUnicode(Charset from, const char * str) { return ToUnicode(from, str ? std::string(str) : std::string()); }
|
|
#if defined(MPT_ENABLE_CHARSET_LOCALE)
|
|
mpt::ustring ToUnicode(const mpt::lstring &str);
|
|
#endif // MPT_ENABLE_CHARSET_LOCALE
|
|
#if defined(MPT_WITH_MFC)
|
|
mpt::ustring ToUnicode(const CString &str);
|
|
#endif // MPT_WITH_MFC
|
|
#endif // MPT_USTRING_MODE_WIDE
|
|
|
|
#if MPT_USTRING_MODE_WIDE
|
|
#if !(MPT_WSTRING_CONVERT)
|
|
#error "MPT_USTRING_MODE_WIDE depends on MPT_WSTRING_CONVERT)"
|
|
#endif
|
|
// nothing, std::wstring overloads will catch all stuff
|
|
#else // !MPT_USTRING_MODE_WIDE
|
|
#if MPT_WSTRING_CONVERT
|
|
std::wstring ToWide(const mpt::ustring &str);
|
|
#endif
|
|
std::string ToCharset(Charset to, const mpt::ustring &str);
|
|
#if defined(MPT_ENABLE_CHARSET_LOCALE)
|
|
mpt::lstring ToLocale(const mpt::ustring &str);
|
|
#endif // MPT_ENABLE_CHARSET_LOCALE
|
|
#if MPT_OS_WINDOWS
|
|
mpt::winstring ToWin(const mpt::ustring &str);
|
|
#endif // MPT_OS_WINDOWS
|
|
#if defined(MPT_WITH_MFC)
|
|
CString ToCString(const mpt::ustring &str);
|
|
#endif // MPT_WITH_MFC
|
|
#endif // MPT_USTRING_MODE_WIDE
|
|
|
|
|
|
|
|
|
|
|
|
// The MPT_UTF8 allows specifying UTF8 char arrays.
|
|
// The resulting type is mpt::ustring and the construction might require runtime translation,
|
|
// i.e. it is NOT generally available at compile time.
|
|
// Use explicit UTF8 encoding,
|
|
// i.e. U+00FC (LATIN SMALL LETTER U WITH DIAERESIS) would be written as "\xC3\xBC".
|
|
#define MPT_UTF8(x) mpt::ToUnicode(mpt::Charset::UTF8, x)
|
|
|
|
|
|
|
|
|
|
|
|
mpt::ustring ToUnicode(uint16 codepage, mpt::Charset fallback, const std::string &str);
|
|
|
|
|
|
|
|
|
|
|
|
char ToLowerCaseAscii(char c);
|
|
char ToUpperCaseAscii(char c);
|
|
std::string ToLowerCaseAscii(std::string s);
|
|
std::string ToUpperCaseAscii(std::string s);
|
|
|
|
int CompareNoCaseAscii(const char *a, const char *b, std::size_t n);
|
|
int CompareNoCaseAscii(std::string_view a, std::string_view b);
|
|
int CompareNoCaseAscii(const std::string &a, const std::string &b);
|
|
|
|
|
|
#if defined(MODPLUG_TRACKER)
|
|
|
|
mpt::ustring ToLowerCase(const mpt::ustring &s);
|
|
mpt::ustring ToUpperCase(const mpt::ustring &s);
|
|
|
|
#endif // MODPLUG_TRACKER
|
|
|
|
|
|
|
|
|
|
|
|
} // namespace mpt
|
|
|
|
|
|
|
|
|
|
|
|
// The AnyString types are meant to be used as function argument types only,
|
|
// and only during the transition phase to all-unicode strings in the whole codebase.
|
|
// Using an AnyString type as function argument avoids the need to overload a function for all the
|
|
// different string types that we currently have.
|
|
// Warning: These types will silently do charset conversions. Only use them when this can be tolerated.
|
|
|
|
// BasicAnyString is convertable to mpt::ustring and constructable from any string at all.
|
|
template <mpt::Charset charset = mpt::Charset::UTF8, bool tryUTF8 = true>
|
|
class BasicAnyString : public mpt::ustring
|
|
{
|
|
|
|
private:
|
|
|
|
static mpt::ustring From8bit(const std::string &str)
|
|
{
|
|
if constexpr(charset == mpt::Charset::UTF8)
|
|
{
|
|
return mpt::ToUnicode(mpt::Charset::UTF8, str);
|
|
} else
|
|
{
|
|
// auto utf8 detection
|
|
if constexpr(tryUTF8)
|
|
{
|
|
if(mpt::IsUTF8(str))
|
|
{
|
|
return mpt::ToUnicode(mpt::Charset::UTF8, str);
|
|
} else
|
|
{
|
|
return mpt::ToUnicode(charset, str);
|
|
}
|
|
} else
|
|
{
|
|
return mpt::ToUnicode(charset, str);
|
|
}
|
|
}
|
|
}
|
|
|
|
public:
|
|
|
|
// 8 bit
|
|
BasicAnyString(const char *str) : mpt::ustring(From8bit(str ? str : std::string())) { }
|
|
BasicAnyString(const std::string str) : mpt::ustring(From8bit(str)) { }
|
|
|
|
// locale
|
|
#if defined(MPT_ENABLE_CHARSET_LOCALE)
|
|
BasicAnyString(const mpt::lstring str) : mpt::ustring(mpt::ToUnicode(str)) { }
|
|
#endif // MPT_ENABLE_CHARSET_LOCALE
|
|
|
|
// unicode
|
|
BasicAnyString(const mpt::ustring &str) : mpt::ustring(str) { }
|
|
BasicAnyString(mpt::ustring &&str) : mpt::ustring(std::move(str)) { }
|
|
#if MPT_USTRING_MODE_UTF8 && MPT_WSTRING_CONVERT
|
|
BasicAnyString(const std::wstring &str) : mpt::ustring(mpt::ToUnicode(str)) { }
|
|
#endif
|
|
#if MPT_WSTRING_CONVERT
|
|
BasicAnyString(const wchar_t *str) : mpt::ustring(str ? mpt::ToUnicode(str) : mpt::ustring()) { }
|
|
#endif
|
|
|
|
// mfc
|
|
#if defined(MPT_WITH_MFC)
|
|
BasicAnyString(const CString &str) : mpt::ustring(mpt::ToUnicode(str)) { }
|
|
#endif // MPT_WITH_MFC
|
|
|
|
// fallback for custom string types
|
|
template <typename Tstring> BasicAnyString(const Tstring &str) : mpt::ustring(mpt::ToUnicode(str)) { }
|
|
template <typename Tstring> BasicAnyString(Tstring &&str) : mpt::ustring(mpt::ToUnicode(std::forward<Tstring>(str))) { }
|
|
|
|
};
|
|
|
|
// AnyUnicodeString is convertable to mpt::ustring and constructable from any unicode string,
|
|
class AnyUnicodeString : public mpt::ustring
|
|
{
|
|
|
|
public:
|
|
|
|
// locale
|
|
#if defined(MPT_ENABLE_CHARSET_LOCALE)
|
|
AnyUnicodeString(const mpt::lstring &str) : mpt::ustring(mpt::ToUnicode(str)) { }
|
|
#endif // MPT_ENABLE_CHARSET_LOCALE
|
|
|
|
// unicode
|
|
AnyUnicodeString(const mpt::ustring &str) : mpt::ustring(str) { }
|
|
AnyUnicodeString(mpt::ustring &&str) : mpt::ustring(std::move(str)) { }
|
|
#if MPT_USTRING_MODE_UTF8 && MPT_WSTRING_CONVERT
|
|
AnyUnicodeString(const std::wstring &str) : mpt::ustring(mpt::ToUnicode(str)) { }
|
|
#endif
|
|
#if MPT_WSTRING_CONVERT
|
|
AnyUnicodeString(const wchar_t *str) : mpt::ustring(str ? mpt::ToUnicode(str) : mpt::ustring()) { }
|
|
#endif
|
|
|
|
// mfc
|
|
#if defined(MPT_WITH_MFC)
|
|
AnyUnicodeString(const CString &str) : mpt::ustring(mpt::ToUnicode(str)) { }
|
|
#endif // MPT_WITH_MFC
|
|
|
|
// fallback for custom string types
|
|
template <typename Tstring> AnyUnicodeString(const Tstring &str) : mpt::ustring(mpt::ToUnicode(str)) { }
|
|
template <typename Tstring> AnyUnicodeString(Tstring &&str) : mpt::ustring(mpt::ToUnicode(std::forward<Tstring>(str))) { }
|
|
|
|
};
|
|
|
|
// AnyString
|
|
// Try to do the smartest auto-magic we can do.
|
|
#if defined(MPT_ENABLE_CHARSET_LOCALE)
|
|
using AnyString = BasicAnyString<mpt::Charset::Locale, true>;
|
|
#elif MPT_OS_WINDOWS
|
|
using AnyString = BasicAnyString<mpt::Charset::Windows1252, true>;
|
|
#else
|
|
using AnyString = BasicAnyString<mpt::Charset::ISO8859_1, true>;
|
|
#endif
|
|
|
|
// AnyStringLocale
|
|
// char-based strings are assumed to be in locale encoding.
|
|
#if defined(MPT_ENABLE_CHARSET_LOCALE)
|
|
using AnyStringLocale = BasicAnyString<mpt::Charset::Locale, false>;
|
|
#else
|
|
using AnyStringLocale = BasicAnyString<mpt::Charset::UTF8, false>;
|
|
#endif
|
|
|
|
// AnyStringUTF8orLocale
|
|
// char-based strings are tried in UTF8 first, if this fails, locale is used.
|
|
#if defined(MPT_ENABLE_CHARSET_LOCALE)
|
|
using AnyStringUTF8orLocale = BasicAnyString<mpt::Charset::Locale, true>;
|
|
#else
|
|
using AnyStringUTF8orLocale = BasicAnyString<mpt::Charset::UTF8, false>;
|
|
#endif
|
|
|
|
// AnyStringUTF8
|
|
// char-based strings are assumed to be in UTF8.
|
|
using AnyStringUTF8 = BasicAnyString<mpt::Charset::UTF8, false>;
|
|
|
|
|
|
|
|
OPENMPT_NAMESPACE_END
|