harfang3d/harfang/foundation/string.cpp
2022-12-07 09:51:01 +01:00

379 lines
9.6 KiB
C++

// HARFANG(R) Copyright (C) 2021 Emmanuel Julien, NWNC HARFANG. Released under GPL/LGPL/Commercial Licence, see licence.txt for details.
#include "foundation/string.h"
#include "foundation/assert.h"
#include "foundation/utf8-cpp/utf8.h"
#include "foundation/utf8.h"
#include <algorithm>
#include <cctype>
#include <locale>
#include <sstream>
#include <memory>
#include <codecvt>
namespace hg {
/*
* Copyright (c) 1999 by David I. Bell
* Permission is granted to use, distribute, or modify this source,
* provided that this copyright notice remains intact.
*
*/
/*
* Routine to see if a text string is matched by a wildcard pattern.
* Returns TRUE if the text is matched, or FALSE if it is not matched
* or if the pattern is invalid.
* * matches zero or more characters
* ? matches a single character
* [abc] matches 'a', 'b' or 'c'
* \c quotes character c
* Adapted from code written by Ingo Wilken.
*/
bool match_wildcard(const char *text, const char *pattern) {
if (text == nullptr)
return false;
const char *retryPat = nullptr;
const char *retryText = text;
bool found;
while (*text || *pattern) {
int ch = *pattern++;
switch (ch) {
case '*':
retryPat = pattern;
retryText = text;
break;
case '[':
found = false;
while ((ch = *pattern++) != ']') {
if (ch == '\\')
ch = *pattern++;
if (ch == '\0')
return false;
if (*text == ch)
found = true;
}
if (!found) {
pattern = retryPat;
text = ++retryText;
}
// fall into next case
case '?':
if (*text++ == '\0')
return false;
break;
case '\\':
ch = *pattern++;
if (ch == '\0')
return false;
// fall into next case
default:
if (*text == ch) {
if (*text)
text++;
break;
}
if (*text) {
pattern = retryPat;
text = ++retryText;
break;
}
return false;
}
if (!pattern)
return false;
}
return true;
}
std::string slice(const std::string &str, ptrdiff_t from, ptrdiff_t count) {
const size_t l = str.length();
if (from < 0)
from += l; // start from right of std::string
if (from < 0)
from = 0; // clamp to start
if (count == 0)
count = l - from; // all characters left
else if (count < 0)
count += l - from; // all characters left - count
if (count > (ptrdiff_t(l) - from))
count = l - from; // clamp to length
return count > 0 ? str.substr(from, count) : std::string();
}
std::string left(const std::string &str, ptrdiff_t count) { return slice(str, 0, count); }
std::string right(const std::string &str, ptrdiff_t count) { return slice(str, -count, 0); }
size_t replace_all(std::string &value, const std::string &what, const std::string &by) {
auto what_len = what.length(), by_len = by.length();
int count = 0;
for (std::string::size_type i = 0; (i = value.find(what, i)) != std::string::npos;) {
value.replace(i, what_len, by);
i += by_len;
++count;
}
return count;
}
bool contains(const std::string &in, const std::string &what) { return in.find(what) != std::string::npos; }
std::vector<std::string> split(const std::string &value, const std::string &separator, const std::string &trim) {
std::vector<std::string> elements; // keep here to help NVRO
elements.reserve(8);
const size_t value_length = value.length();
const size_t separator_length = separator.length();
const size_t trim_length = trim.length();
for (std::string::size_type s = 0, i = 0; i != std::string::npos;) {
i = value.find(separator, i);
if (i == std::string::npos) {
std::string element = value.substr(s);
if (!element.empty()) {
if (trim_length) {
replace_all(element, trim, "");
}
elements.push_back(std::move(element));
}
break;
} else {
std::string element(value.substr(s, i - s));
if (trim_length)
replace_all(element, trim, "");
elements.push_back(std::move(element));
}
i += separator_length;
s = i;
if (i >= value_length)
i = std::string::npos;
}
return elements;
}
std::string trim(const std::string &str, const std::string &pattern) {
const auto str_begin = str.find_first_not_of(pattern);
if (str_begin == std::string::npos)
return ""; // no content
const auto str_end = str.find_last_not_of(pattern);
const auto str_range = str_end - str_begin + 1;
return str.substr(str_begin, str_range);
}
std::string reduce(const std::string &str, const std::string &fill, const std::string &pattern) {
// trim first
auto result = trim(str, pattern);
// replace sub ranges
auto begin_space = result.find_first_of(pattern);
while (begin_space != std::string::npos) {
const auto endSpace = result.find_first_not_of(pattern, begin_space);
const auto range = endSpace - begin_space;
result.replace(begin_space, range, fill);
const auto newStart = begin_space + fill.length();
begin_space = result.find_first_of(pattern, newStart);
}
return result;
}
//
std::string lstrip(const std::string &str, const std::string &pattern) {
const size_t str_begin = str.find_first_not_of(pattern);
if (str_begin == std::string::npos)
return {};
return str.substr(str_begin, str.length() - str_begin);
}
std::string rstrip(const std::string &str, const std::string &pattern) {
const size_t str_end = str.find_last_not_of(pattern);
if (str_end == std::string::npos)
return {};
return str.substr(0, str_end + 1);
}
std::string strip(const std::string &str, const std::string &pattern) { return lstrip(rstrip(str, pattern), pattern); }
//
std::string lstrip_space(const std::string &str) {
size_t i = 0;
for (; i < str.size(); ++i)
if (!std::isspace(str[i]))
break;
return slice(str, i);
}
std::string rstrip_space(const std::string &str) {
auto i = str.size();
for (; i > 0; --i)
if (!std::isspace(str[i - 1]))
break;
return slice(str, 0, i);
}
std::string strip_space(const std::string &str) { return lstrip_space(rstrip_space(str)); }
//
std::string strip_prefix(const std::string &str, const std::string &prefix) { return starts_with(str, prefix) ? slice(str, prefix.length()) : str; }
std::string strip_suffix(const std::string &str, const std::string &suffix) { return ends_with(str, suffix) ? slice(str, 0, -int(suffix.length())) : str; }
//
std::string utf32_to_utf8(const std::u32string &str) {
std::vector<unsigned char> utf8string;
utf8::utf32to8(str.begin(), str.end(), std::back_inserter(utf8string));
return std::string((char *)utf8string.data(), utf8string.size());
}
std::u32string utf8_to_utf32(const std::string &str) {
std::vector<char32_t> utf32string;
utf8::utf8to32(str.begin(), str.end(), std::back_inserter(utf32string));
return std::u32string(utf32string.data(), utf32string.size());
}
std::string wchar_to_utf8(const std::wstring &str) {
std::string out;
utf8::utf16to8(str.begin(), str.end(), std::back_inserter(out));
return out;
}
std::wstring utf8_to_wchar(const std::string &str) {
std::wstring out;
utf8::utf8to16(str.begin(), str.end(), std::back_inserter(out));
return out;
}
std::wstring ansi_to_wchar(const std::string &str) {
std::wstring ret;
std::mbstate_t state = {};
const char *src = str.data();
size_t len = std::mbsrtowcs(nullptr, &src, 0, &state);
if (static_cast<size_t>(-1) != len) {
std::unique_ptr<wchar_t[]> buff(new wchar_t[len + 1]);
len = std::mbsrtowcs(buff.get(), &src, len, &state);
if (static_cast<size_t>(-1) != len) {
ret.assign(buff.get(), len);
}
}
return ret;
}
std::string ansi_to_utf8(const std::string &string) { return wchar_to_utf8(ansi_to_wchar(string)); }
//
void tolower_inplace(std::string &str, size_t start, size_t end) {
transform(std::begin(str) + start, end ? std::begin(str) + end : std::end(str), std::begin(str) + start,
[](char c) -> char { return c >= 65 && c <= 90 ? c + (97 - 65) : c; });
}
std::string tolower(std::string str, size_t start, size_t end) {
tolower_inplace(str, start, end);
return str;
}
void toupper_inplace(std::string &str, size_t start, size_t end) {
transform(std::begin(str) + start, end ? std::begin(str) + end : std::end(str), std::begin(str) + start,
[](char c) -> char { return c >= 97 && c <= 122 ? c - (97 - 65) : c; });
}
std::string toupper(std::string str, size_t start, size_t end) {
toupper_inplace(str, start, end);
return str;
}
void normalize_eol(std::string &inplace_normalize, EOLConvention eol_convention) {
if (eol_convention == EOLUnix)
replace_all(inplace_normalize, "\r\n", "\n");
else
replace_all(inplace_normalize, "\n", "\r\n");
}
std::string word_wrap(const std::string &str, int width, int lead, char lead_char) {
if (width < 1)
width = 1;
std::string o;
o.reserve(str.length() + (str.length() / width) * (lead + 1));
const std::string lead_str(lead, lead_char);
int n = width;
for (auto &c : str) {
if (c == '\n') {
o += c;
o += lead_str;
n = width;
} else {
const bool is_split_possible = (c == ' ') || (c == ';');
if (is_split_possible && n <= 0) {
if (c != ' ')
o += c;
o += '\n';
o += lead_str;
n = width;
} else {
o += c;
--n;
}
}
}
return o;
}
std::string name_to_path(std::string name) {
name = tolower(name);
static const std::vector<std::string> blacklist = {" ", "\\", "/", "!", "@"};
for (const auto &s : blacklist)
replace_all(name, s, "-");
return name;
}
std::string pad_left(const std::string &str, int padded_width, char padding_char) {
int count = padded_width - int(str.size());
if (count <= 0)
return str;
return std::string(count, padding_char) + str;
}
std::string pad_right(const std::string &str, int padded_width, char padding_char) {
int count = padded_width - int(str.size());
if (count <= 0)
return str;
return str + std::string(count, padding_char);
}
} // namespace hg