From afcad89f189568c507567efad3bbfe3197630f90 Mon Sep 17 00:00:00 2001 From: Sandu Liviu Catalin Date: Sat, 10 Apr 2021 17:18:49 +0300 Subject: [PATCH] String utils. --- module/Library/Utils/String.cpp | 82 ++++++- module/Library/Utils/String.hpp | 394 +++++++++++++++++++++++++++++--- 2 files changed, 439 insertions(+), 37 deletions(-) diff --git a/module/Library/Utils/String.cpp b/module/Library/Utils/String.cpp index 6734d5dd..cfcf1b12 100644 --- a/module/Library/Utils/String.cpp +++ b/module/Library/Utils/String.cpp @@ -8,6 +8,34 @@ namespace SqMod { // ------------------------------------------------------------------------------------------------ SQMOD_DECL_TYPENAME(Typename, _SC("SqString")) +// ------------------------------------------------------------------------------------------------ +SQInteger SqString::GetLevenshtein(SqString & o) const +{ + if(mS.empty()) return o.mS.size(); + if(o.mS.empty()) return mS.size(); + + std::vector< size_t > costs(o.mS.size() + 1); + std::iota(costs.begin(), costs.end(), 0); + size_t i = 0; + + for (const auto & c1 : mS) + { + costs[0] = i + 1; + size_t corner = i; + size_t j = 0; + for (const auto & c2 : o.mS) + { + size_t upper = costs[j + 1]; + costs[j + 1] = (c1 == c2) ? corner : 1 + std::min(std::min(upper, corner), costs[j]); + corner = upper; + ++j; + } + ++i; + } + + return costs[o.mS.size()]; +} + // ------------------------------------------------------------------------------------------------ static SQInteger SqStringFormat(HSQUIRRELVM vm) { @@ -46,6 +74,11 @@ void Register_Native_String(HSQUIRRELVM vm, Table & ns) .Prop(_SC("Size"), &SqString::Size, &SqString::Resize) .Prop(_SC("Capacity"), &SqString::Capacity, &SqString::Reserve) .Prop(_SC("Sorted"), &SqString::IsSorted) + .Prop(_SC("Trimmed"), &SqString::Trimmed) + .Prop(_SC("Lower"), &SqString::GetLower) + .Prop(_SC("Upper"), &SqString::GetUpper) + .Prop(_SC("Fnv1a32"), &SqString::GetFnv1a32) + .Prop(_SC("Fnv1a64"), &SqString::GetFnv1a64) // Member Methods .Func(_SC("Get"), &SqString::Get) .Func(_SC("Set"), &SqString::Set) @@ -62,10 +95,6 @@ void Register_Native_String(HSQUIRRELVM vm, Table & ns) .Func(_SC("EraseValue"), &SqString::EraseValue) .Func(_SC("InsertAt"), &SqString::InsertAt) .Func(_SC("Insert"), &SqString::Insert) - .Func(_SC("Locate"), &SqString::Locate) - .Func(_SC("LocateFrom"), &SqString::LocateFrom) - .Func(_SC("Find"), &SqString::Find) - .Func(_SC("FindFrom"), &SqString::FindFrom) .Func(_SC("Count"), &SqString::Count) .Func(_SC("Equal"), &SqString::Equal) .Func(_SC("Slice"), &SqString::Slice) @@ -81,6 +110,51 @@ void Register_Native_String(HSQUIRRELVM vm, Table & ns) .Func(_SC("Sort"), &SqString::Sort) .Func(_SC("Shuffle"), &SqString::Shuffle) .Func(_SC("Assign"), &SqString::SetString) + .Func(_SC("TrimLeft"), &SqString::TrimLeft) + .Func(_SC("TrimRight"), &SqString::TrimRight) + .Func(_SC("Trim"), &SqString::Trim) + .Func(_SC("ToLower"), &SqString::ToLower) + .Func(_SC("ToUpper"), &SqString::ToUpper) + .Func(_SC("SwapCase"), &SqString::SwapCase) + .Func(_SC("Compare"), &SqString::Compare) + .Func(_SC("CompareI"), &SqString::CompareI) + .FmtFunc(_SC("Contains"), &SqString::Contains) + .FmtFunc(_SC("StartsWith"), &SqString::StartsWith) + .FmtFunc(_SC("EndsWith"), &SqString::EndsWith) + .FmtFunc(_SC("Find"), &SqString::Find) + .FmtFunc(_SC("FindFrom"), &SqString::FindFrom) + .FmtFunc(_SC("RFind"), &SqString::RFind) + .FmtFunc(_SC("RFindFrom"), &SqString::RFindFrom) + .FmtFunc(_SC("FindFirstOf"), &SqString::FindFirstOf) + .FmtFunc(_SC("FindFirstOfFrom"), &SqString::FindFirstOfFrom) + .FmtFunc(_SC("FindFirstNotOf"), &SqString::FindFirstNotOf) + .FmtFunc(_SC("FindFirstNotOfFrom"), &SqString::FindFirstNotOfFrom) + .FmtFunc(_SC("FindLastOf"), &SqString::FindLastOf) + .FmtFunc(_SC("FindLastOfFrom"), &SqString::FindLastOfFrom) + .FmtFunc(_SC("FindLastNotOf"), &SqString::FindLastNotOf) + .FmtFunc(_SC("FindLastNotOfFrom"), &SqString::FindLastNotOfFrom) + .FmtFunc(_SC("Levenshtein"), &SqString::GetLevenshtein) + .FmtFunc(_SC("Remove"), &SqString::Remove) + .FmtFunc(_SC("Eliminate"), &SqString::Eliminate) + // Member Overloads + .Overload(_SC("Locate"), &SqString::Locate) + .Overload(_SC("Locate"), &SqString::LocateFrom) + .Overload(_SC("RLocate"), &SqString::RLocate) + .Overload(_SC("RLocate"), &SqString::RLocateFrom) + .Overload(_SC("LocateFirstOf"), &SqString::LocateFirstOf) + .Overload(_SC("LocateFirstOf"), &SqString::LocateFirstOfFrom) + .Overload(_SC("LocateFirstNotOf"), &SqString::LocateFirstNotOf) + .Overload(_SC("LocateFirstNotOf"), &SqString::LocateFirstNotOfFrom) + .Overload(_SC("LocateLastOf"), &SqString::LocateLastOf) + .Overload(_SC("LocateLastOf"), &SqString::LocateLastOfFrom) + .Overload(_SC("LocateLastNotOf"), &SqString::LocateLastNotOf) + .Overload(_SC("LocateLastNotOf"), &SqString::LocateLastNotOfFrom) + .Overload(_SC("Repeat"), &SqString::Repeat) + .Overload(_SC("Repeat"), &SqString::Repeat_) + .Overload(_SC("Replace"), &SqString::Replace) + .Overload(_SC("Replace"), &SqString::Replace_) + .Overload(_SC("Change"), &SqString::Change) + .Overload(_SC("Change"), &SqString::Change_) ); // -------------------------------------------------------------------------------------------- RootTable(vm).SquirrelFunc(_SC("SqStringF"), SqStringFormat); diff --git a/module/Library/Utils/String.hpp b/module/Library/Utils/String.hpp index 576ad55f..be6f58dd 100644 --- a/module/Library/Utils/String.hpp +++ b/module/Library/Utils/String.hpp @@ -3,6 +3,9 @@ // ------------------------------------------------------------------------------------------------ #include "Core/Utility.hpp" +// ------------------------------------------------------------------------------------------------ +#include + // ------------------------------------------------------------------------------------------------ #include #include @@ -17,6 +20,8 @@ namespace SqMod { */ struct SqString { + static constexpr const char WHITESPACE[] = " \n\t\v\b\r\f\a"; + /* -------------------------------------------------------------------------------------------- * String instance. */ @@ -56,7 +61,7 @@ struct SqString /* -------------------------------------------------------------------------------------------- * Construct with forwarded native arguments. */ - template < class... Args > SqString(SqInPlace SQ_UNUSED_ARG(x), Args&&... args) + template < class... Args > explicit SqString(SqInPlace SQ_UNUSED_ARG(x), Args&&... args) : mS(std::forward< Args >(args)...) { } @@ -370,38 +375,6 @@ struct SqString mS.insert(ValidIdx(i).begin() + static_cast< size_t >(i), ClampL< SQInteger, size_t >(n), v); } - /* -------------------------------------------------------------------------------------------- - * Locate the position of a value. - */ - SQMOD_NODISCARD SQInteger Locate(String::value_type v) const - { - return static_cast< SQInteger >(mS.find(v)); - } - - /* -------------------------------------------------------------------------------------------- - * Locate the position of a value starting from an offset. - */ - SQMOD_NODISCARD SQInteger LocateFrom(SQInteger p, String::value_type v) const - { - return static_cast< SQInteger >(mS.find(v, static_cast< size_t >(p))); - } - - /* -------------------------------------------------------------------------------------------- - * Find the position of a sub-string. - */ - SQMOD_NODISCARD SQInteger Find(StackStrF & s) const - { - return static_cast< SQInteger >(mS.find(s.mPtr)); - } - - /* -------------------------------------------------------------------------------------------- - * Find the position of a sub-string starting from an offset. - */ - SQMOD_NODISCARD SQInteger FindFrom(SQInteger p, StackStrF & s) const - { - return static_cast< SQInteger >(mS.find(s.mPtr, static_cast< size_t >(p), s.GetSize())); - } - /* -------------------------------------------------------------------------------------------- * Count the occurrences of a value in the container. */ @@ -591,6 +564,361 @@ struct SqString std::shuffle(mS.begin(), mS.end(), g); return *this; } + + /* -------------------------------------------------------------------------------------------- + * Remove white-space at the start of the string. + */ + SqString & TrimLeft() + { + mS.erase(mS.begin(), std::find_if(mS.begin(), mS.end(), [](auto c) { return !std::isspace(c); })); + // Allow chaining + return *this; + } + + /* -------------------------------------------------------------------------------------------- + * Remove white-space at the end of the string. + */ + SqString & TrimRight() + { + mS.erase(std::find_if(mS.rbegin(), mS.rend(), [](auto c) { return !std::isspace(c); }).base(), mS.end()); + // Allow chaining + return *this; + } + + /* -------------------------------------------------------------------------------------------- + * Remove white-space either side of the string. + */ + SqString & Trim() + { + return TrimLeft().TrimRight(); + } + + /* -------------------------------------------------------------------------------------------- + * Retrieve the string with white space trimmed from either side of the string. + */ + SQMOD_NODISCARD LightObj Trimmed() + { + const auto sb = mS.find_first_not_of(WHITESPACE); + // Is it all white-space? + if (sb != String::npos) + { + const auto se = mS.find_last_not_of(WHITESPACE); + // Return the portion of the string that isn't surrounded by white-space + return LightObj(mS.data() + sb, se - sb); + } + // Return an empty string + return LightObj(_SC(""), SQInteger(0)); + } + + /* -------------------------------------------------------------------------------------------- + * Convert the string to lower-case. + */ + SqString & ToLower() + { + std::transform(mS.begin(), mS.end(), mS.begin(), [](auto c) { return std::tolower(c); }); + // Allow chaining + return *this; + } + + /* -------------------------------------------------------------------------------------------- + * Convert the string to upper-case. + */ + SqString & ToUpper() + { + std::transform(mS.begin(), mS.end(), mS.begin(), [](auto c) { return std::toupper(c); }); + // Allow chaining + return *this; + } + + /* -------------------------------------------------------------------------------------------- + * Retrieve the string as lower-case. + */ + SQMOD_NODISCARD LightObj GetLower() const + { + SqString s(mS); + std::transform(s.mS.begin(), s.mS.end(), s.mS.begin(), [&](auto c) { return std::tolower(c); }); + return LightObj(s.mS.data(), static_cast< SQInteger >(s.mS.size())); + } + + /* -------------------------------------------------------------------------------------------- + * Retrieve the string as upper-case. + */ + SQMOD_NODISCARD LightObj GetUpper() const + { + SqString s(mS); + std::transform(s.mS.begin(), s.mS.end(), s.mS.begin(), [&](auto c) { return std::toupper(c); }); + return LightObj(s.mS.data(), static_cast< SQInteger >(s.mS.size())); + } + + /* -------------------------------------------------------------------------------------------- + * Swap the letter case. + */ + SqString & SwapCase() + { + std::transform(mS.begin(), mS.end(), mS.begin(), [](auto c) { + return std::islower(c) ? std::toupper(c) : std::tolower(c); + }); + // Allow chaining + return *this; + } + + /* -------------------------------------------------------------------------------------------- + * Perform a case-sensitive comparison against another string. + */ + SQMOD_NODISCARD SQInteger Compare(SqString & o) const + { + return mS.compare(o.mS); + } + + /* -------------------------------------------------------------------------------------------- + * Perform a case-insensitive comparison against another string. + */ + SQMOD_NODISCARD SQInteger CompareI(SqString & o) const + { + return Poco::icompare(mS, mS); + } + + /* -------------------------------------------------------------------------------------------- + * Check if string contains another sub-string. + */ + SQMOD_NODISCARD bool Contains(StackStrF & s) const + { + return mS.find(s.mPtr, 0, s.GetSize()) != String::npos; + } + + /* -------------------------------------------------------------------------------------------- + * Checks if the string begins with the given prefix. + */ + SQMOD_NODISCARD bool StartsWith(StackStrF & s) const + { + return mS.size() >= s.GetSize() && mS.compare(0, s.GetSize(), s.mPtr) == 0; + } + + /* -------------------------------------------------------------------------------------------- + * Checks if the string ends with the given suffix. + */ + SQMOD_NODISCARD bool EndsWith(StackStrF & s) const + { + return mS.size() >= s.GetSize() && mS.compare(mS.size() - s.GetSize(), s.GetSize(), s.mPtr) == 0; + } + + /* -------------------------------------------------------------------------------------------- + * Finds the first substring equal to s. + */ + SQMOD_NODISCARD SQInteger Find(StackStrF & s) const { return FindFrom(0, s); } + SQMOD_NODISCARD SQInteger FindFrom(SQInteger p, StackStrF & s) const + { + return static_cast< SQInteger >(mS.find(s.mPtr, static_cast< size_t >(p), s.GetSize())); + } + + /* -------------------------------------------------------------------------------------------- + * Finds the first character equal c (treated as a single-character substring). + */ + SQMOD_NODISCARD SQInteger Locate(String::value_type c) const { return LocateFrom(c, 0); } + SQMOD_NODISCARD SQInteger LocateFrom(String::value_type c, SQInteger p) const + { + return static_cast< SQInteger >(mS.find(c, static_cast< size_t >(p))); + } + + /* -------------------------------------------------------------------------------------------- + * Finds the last substring equal to s. + */ + SQMOD_NODISCARD SQInteger RFind(StackStrF & s) const { return RFindFrom(0, s); } + SQMOD_NODISCARD SQInteger RFindFrom(SQInteger p, StackStrF & s) const + { + return static_cast< SQInteger >(mS.rfind(s.mPtr, static_cast< size_t >(p), s.GetSize())); + } + + /* -------------------------------------------------------------------------------------------- + * Finds the last character equal to c. (treated as a single-character substring). + */ + SQMOD_NODISCARD SQInteger RLocate(String::value_type c) const { return RLocateFrom(c, 0); } + SQMOD_NODISCARD SQInteger RLocateFrom(String::value_type c, SQInteger p) const + { + return static_cast< SQInteger >(mS.rfind(c, static_cast< size_t >(p))); + } + + /* -------------------------------------------------------------------------------------------- + * Finds the first character equal to one of the characters in s. + */ + SQMOD_NODISCARD SQInteger FindFirstOf(StackStrF & s) const { return FindFirstOfFrom(0, s); } + SQMOD_NODISCARD SQInteger FindFirstOfFrom(SQInteger p, StackStrF & s) const + { + return static_cast< SQInteger >(mS.find_first_of(s.mPtr, static_cast< size_t >(p), s.GetSize())); + } + + /* -------------------------------------------------------------------------------------------- + * Finds the first character equal to c. + */ + SQMOD_NODISCARD SQInteger LocateFirstOf(String::value_type c) const { return LocateFirstOfFrom(c, 0); } + SQMOD_NODISCARD SQInteger LocateFirstOfFrom(String::value_type c, SQInteger p) const + { + return static_cast< SQInteger >(mS.find_first_of(c, static_cast< size_t >(p))); + } + + /* -------------------------------------------------------------------------------------------- + * Finds the first character equal to none of characters in s. + */ + SQMOD_NODISCARD SQInteger FindFirstNotOf(StackStrF & s) const { return FindFirstNotOfFrom(0, s); } + SQMOD_NODISCARD SQInteger FindFirstNotOfFrom(SQInteger p, StackStrF & s) const + { + return static_cast< SQInteger >(mS.find_first_not_of(s.mPtr, static_cast< size_t >(p), s.GetSize())); + } + + /* -------------------------------------------------------------------------------------------- + * Finds the first character not equal to c. + */ + SQMOD_NODISCARD SQInteger LocateFirstNotOf(String::value_type c) const { return LocateFirstNotOfFrom(c, 0); } + SQMOD_NODISCARD SQInteger LocateFirstNotOfFrom(String::value_type c, SQInteger p) const + { + return static_cast< SQInteger >(mS.find_first_not_of(c, static_cast< size_t >(p))); + } + + /* -------------------------------------------------------------------------------------------- + * Finds the last character equal to one of characters in s. + */ + SQMOD_NODISCARD SQInteger FindLastOf(StackStrF & s) const { return FindLastOfFrom(0, s); } + SQMOD_NODISCARD SQInteger FindLastOfFrom(SQInteger p, StackStrF & s) const + { + return static_cast< SQInteger >(mS.find_last_of(s.mPtr, static_cast< size_t >(p), s.GetSize())); + } + + /* -------------------------------------------------------------------------------------------- + * Finds the last character equal to c. + */ + SQMOD_NODISCARD SQInteger LocateLastOf(String::value_type c) const { return LocateLastOfFrom(c, 0); } + SQMOD_NODISCARD SQInteger LocateLastOfFrom(String::value_type c, SQInteger p) const + { + return static_cast< SQInteger >(mS.find_last_of(c, static_cast< size_t >(p))); + } + + /* -------------------------------------------------------------------------------------------- + * Finds the last character equal to none of characters in s. + */ + SQMOD_NODISCARD SQInteger FindLastNotOf(StackStrF & s) const { return FindLastNotOfFrom(0, s); } + SQMOD_NODISCARD SQInteger FindLastNotOfFrom(SQInteger p, StackStrF & s) const + { + return static_cast< SQInteger >(mS.find_last_not_of(s.mPtr, static_cast< size_t >(p), s.GetSize())); + } + + /* -------------------------------------------------------------------------------------------- + * Finds the last character not equal to c. + */ + SQMOD_NODISCARD SQInteger LocateLastNotOf(String::value_type c) const { return LocateLastNotOfFrom(c, 0); } + SQMOD_NODISCARD SQInteger LocateLastNotOfFrom(String::value_type c, SQInteger p) const + { + return static_cast< SQInteger >(mS.find_last_not_of(c, static_cast< size_t >(p))); + } + + /* -------------------------------------------------------------------------------------------- + * Duplicate the string a certain amount of times. + */ + SqString & Repeat(SQInteger n) + { + if (!mS.empty()) + { + const size_t len = mS.size(); + // Replicate the same string n times + while (n--) + { + mS.append(mS.data(), len); + } + } + return *this; // Allow chaining + } + + /* -------------------------------------------------------------------------------------------- + * Duplicate the string a certain amount of times with a specific delimiter. + */ + SqString & Repeat_(SQInteger n, StackStrF & d) + { + const size_t len = mS.size(); + // Replicate the same string n times + while (n--) + { + mS.append(mS.data(), len); + // If there's no string then the delimiter will be repeated + mS.append(d.mPtr, static_cast< size_t >(d.mLen)); + } + return *this; // Allow chaining + } + + /* -------------------------------------------------------------------------------------------- + * Replace all occurrences of `from` with `to`, starting at position p. + */ + SqString & Replace(StackStrF & f, StackStrF & t) { return Replace_(0, f, t); } + SqString & Replace_(SQInteger p, StackStrF & f, StackStrF & t) + { + if (!f.mLen) + { + STHROWF("Cannot replace empty string"); + } + else if (!mS.empty()) + { + Poco::replaceInPlace(mS, f.mPtr, t.mPtr, static_cast< size_t >(p)); + } + return *this; // Allow chaining + } + + + /* -------------------------------------------------------------------------------------------- + * Replace all occurrences of `from` with `to`, starting at position p. + */ + SqString & Change(String::value_type f, String::value_type t) { return Change_(0, f, t); } + SqString & Change_(SQInteger p, String::value_type f, String::value_type t) + { + Poco::replaceInPlace(mS, f, t, static_cast< size_t >(p)); + return *this; // Allow chaining + } + + /* -------------------------------------------------------------------------------------------- + * Remove all occurrences of characters from s. + */ + SqString & Remove(StackStrF & s) + { + if (!s.mLen) + { + STHROWF("Cannot remove empty string"); + } + for (SQInteger i = 0; !mS.empty() && i < s.mLen; ++i) + { + mS.erase(std::remove(mS.begin(), mS.end(), s.mPtr[i]), mS.end()); + } + return *this; // Allow chaining + } + + /* -------------------------------------------------------------------------------------------- + * Remove all occurrences of character c. + */ + SqString & Eliminate(String::value_type c) + { + if (!mS.empty() ) + { + mS.erase(std::remove(mS.begin(), mS.end(), c), mS.end()); + } + return *this; // Allow chaining + } + + /* -------------------------------------------------------------------------------------------- + * Generate a hash of the string using the 32-bit using the Fnv1a algorithm. + */ + SQMOD_NODISCARD SQInteger GetFnv1a32() const + { + return static_cast< SQInteger >(FnvHash32(reinterpret_cast< FnvHashData >(mS.data()), mS.size())); + } + + /* -------------------------------------------------------------------------------------------- + * Generate a hash of the string using the 64-bit using the Fnv1a algorithm. + */ + SQMOD_NODISCARD SQInteger GetFnv1a64() const + { + return static_cast< SQInteger >(FnvHash64(reinterpret_cast< FnvHashData >(mS.data()), mS.size())); + } + + /* -------------------------------------------------------------------------------------------- + * Compute the Levenshtein distance between two strings. + */ + SQMOD_NODISCARD SQInteger GetLevenshtein(SqString & o) const; }; } // Namespace:: SqMod