diff --git a/module/CMakeLists.txt b/module/CMakeLists.txt index dee6547e..410ca3f0 100644 --- a/module/CMakeLists.txt +++ b/module/CMakeLists.txt @@ -80,6 +80,7 @@ add_library(SqModule MODULE SqBase.hpp Main.cpp Library/Numeric.cpp Library/Numeric.hpp Library/Numeric/Math.cpp Library/Numeric/Math.hpp Library/Numeric/Random.cpp Library/Numeric/Random.hpp + Library/RegEx.cpp Library/RegEx.hpp Library/String.cpp Library/String.hpp Library/System.cpp Library/System.hpp Library/System/Dir.cpp Library/System/Dir.hpp @@ -203,6 +204,15 @@ endif(WIN32) target_include_directories(SqModule PRIVATE ${CMAKE_CURRENT_LIST_DIR}) target_include_directories(SqModule PRIVATE ${CMAKE_CURRENT_LIST_DIR}/VCMP) target_include_directories(SqModule PRIVATE ${CMAKE_CURRENT_LIST_DIR}/Sqrat) +# Include PCRE directory in the header search path +if (POCO_UNBUNDLED) + find_package(PCRE REQUIRED) + target_link_libraries(SqModule PRIVATE Pcre::Pcre) +else() + # Get the foundation source folder path + get_target_property(POCO_FOUNDATION_SOURCE_DIR Foundation SOURCE_DIR) + target_include_directories(SqModule PRIVATE "${POCO_FOUNDATION_SOURCE_DIR}/src") +endif() # Copy module into the plug-ins folder add_custom_command(TARGET SqModule POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ "${PROJECT_SOURCE_DIR}/bin/plugins") # Copy several dependent DLLs on windows to make distribution easier (used mainly by people that distribute builds) diff --git a/module/Library/RegEx.cpp b/module/Library/RegEx.cpp new file mode 100644 index 00000000..7643f5e8 --- /dev/null +++ b/module/Library/RegEx.cpp @@ -0,0 +1,191 @@ +// ------------------------------------------------------------------------------------------------ +#include "Library/RegEx.hpp" + +// ------------------------------------------------------------------------------------------------ +#include + +// ------------------------------------------------------------------------------------------------ +namespace SqMod { + +// ------------------------------------------------------------------------------------------------ +SQMOD_DECL_TYPENAME(SqRxMatchTypename, _SC("SqRxMatch")) +SQMOD_DECL_TYPENAME(SqRxMatchesTypename, _SC("SqRxMatches")) +SQMOD_DECL_TYPENAME(SqRxInstanceTypename, _SC("SqRxInstance")) + +// ------------------------------------------------------------------------------------------------ +bool RxInstance::STUDY = true; +int RxInstance::OPTIONS = 0; +int RxInstance::STUDY_OPTIONS = 0; + +// ================================================================================================ +void Register_RegEx(HSQUIRRELVM vm) +{ + RootTable(vm).Bind(SqRxMatchTypename::Str, + Class< RxMatch >(vm, SqRxMatchTypename::Str) + // Constructors + .Ctor() + .Ctor< SQInteger >() + .Ctor< SQInteger, SQInteger >() + // Meta-methods + .SquirrelFunc(_SC("_typename"), &SqRxMatchTypename::Fn) + // Properties + .Prop(_SC("Offset"), &RxMatch::GetOffset, &RxMatch::SetOffset) + .Prop(_SC("Length"), &RxMatch::GetLength, &RxMatch::SetLength) + .Prop(_SC("End"), &RxMatch::GetEnd) + // Member Methods + .Func(_SC("SubStr"), &RxMatch::SubStr) + ); + RootTable(vm).Bind(SqRxMatchesTypename::Str, + Class< RxMatches >(vm, SqRxMatchesTypename::Str) + // Constructors + .Ctor() + // Meta-methods + .SquirrelFunc(_SC("_typename"), &SqRxMatchesTypename::Fn) + // Properties + .Prop(_SC("Front"), &RxMatches::Front) + .Prop(_SC("Back"), &RxMatches::Back) + .Prop(_SC("Empty"), &RxMatches::Empty) + .Prop(_SC("Size"), &RxMatches::Size) + .Prop(_SC("Capacity"), &RxMatches::Capacity, &RxMatches::Reserve) + // Member Methods + .Func(_SC("Get"), &RxMatches::Get) + .Func(_SC("Reserve"), &RxMatches::Reserve) + .Func(_SC("Compact"), &RxMatches::Compact) + .Func(_SC("Clear"), &RxMatches::Clear) + .Func(_SC("Pop"), &RxMatches::Pop) + .Func(_SC("EraseAt"), &RxMatches::EraseAt) + .Func(_SC("EraseFrom"), &RxMatches::EraseFrom) + .Func(_SC("Each"), &RxMatches::Each) + .Func(_SC("EachRange"), &RxMatches::EachRange) + .Func(_SC("While"), &RxMatches::While) + .Func(_SC("WhileRange"), &RxMatches::WhileRange) + .Func(_SC("SubStr"), &RxMatches::SubStr) + ); + RootTable(vm).Bind(_SC("SqRx"), + Class< RxInstance, NoCopy< RxInstance > >(vm, SqRxInstanceTypename::Str) + // Constructors + .Ctor() + .Ctor< StackStrF & >() + .Ctor< int, StackStrF & >() + .Ctor< int, bool, StackStrF & >() + // Meta-methods + .SquirrelFunc(_SC("_typename"), &SqRxInstanceTypename::Fn) + //.Func(_SC("_tostring"), &CPlayer::ToString) + // Static Values + .SetStaticValue(_SC("STUDY"), RxInstance::STUDY) + .SetStaticValue(_SC("OPTIONS"), RxInstance::OPTIONS) + .SetStaticValue(_SC("STUDY_OPTIONS"), RxInstance::STUDY_OPTIONS) + // Properties + .Prop(_SC("Valid"), &RxInstance::IsValid) + .Prop(_SC("Studied"), &RxInstance::IsStudied) + // Member Methods + .FmtFunc(_SC("CompileF"), &RxInstance::Compile1) + .FmtFunc(_SC("CompileExF"), &RxInstance::Compile2) + .FmtFunc(_SC("TryCompileF"), &RxInstance::TryCompile1) + .FmtFunc(_SC("TryCompileExF"), &RxInstance::TryCompile2) + .FmtFunc(_SC("MatchFirst"), &RxInstance::MatchFirst) + .FmtFunc(_SC("MatchFirstEx"), &RxInstance::MatchFirst_) + .FmtFunc(_SC("MatchFirstFrom"), &RxInstance::MatchFirstFrom) + .FmtFunc(_SC("MatchFirstFromEx"), &RxInstance::MatchFirstFrom_) + .FmtFunc(_SC("Match"), &RxInstance::Match) + .FmtFunc(_SC("MatchEx"), &RxInstance::Match_) + .FmtFunc(_SC("MatchFrom"), &RxInstance::MatchFrom) + .FmtFunc(_SC("MatchFromEx"), &RxInstance::MatchFrom_) + .FmtFunc(_SC("Matches"), &RxInstance::Matches) + .FmtFunc(_SC("MatchesEx"), &RxInstance::Matches_) + .FmtFunc(_SC("MatchesEx2"), &RxInstance::MatchesEx) + // Member Overloads + .Overload(_SC("Compile"), &RxInstance::Compile1) + .Overload(_SC("Compile"), &RxInstance::Compile2) + .Overload(_SC("TryCompile"), &RxInstance::TryCompile1) + .Overload(_SC("TryCompile"), &RxInstance::TryCompile2) + .Overload(_SC("Study"), &RxInstance::Study0) + .Overload(_SC("Study"), &RxInstance::Study1) + ); + // -------------------------------------------------------------------------------------------- + ConstTable(vm).Enum(_SC("SqRxOption"), Enumeration(vm) + .Const(_SC("Caseless"), static_cast< SQInteger >(PCRE_CASELESS)) + .Const(_SC("Multiline"), static_cast< SQInteger >(PCRE_MULTILINE)) + .Const(_SC("Dotall"), static_cast< SQInteger >(PCRE_DOTALL)) + .Const(_SC("Extended"), static_cast< SQInteger >(PCRE_EXTENDED)) + .Const(_SC("Anchored"), static_cast< SQInteger >(PCRE_ANCHORED)) + .Const(_SC("DollarEndOnly"), static_cast< SQInteger >(PCRE_DOLLAR_ENDONLY)) + .Const(_SC("Extra"), static_cast< SQInteger >(PCRE_EXTRA)) + .Const(_SC("NotBOL"), static_cast< SQInteger >(PCRE_NOTBOL)) + .Const(_SC("NotEOL"), static_cast< SQInteger >(PCRE_NOTEOL)) + .Const(_SC("UnGreedy"), static_cast< SQInteger >(PCRE_UNGREEDY)) + .Const(_SC("NotEmpty"), static_cast< SQInteger >(PCRE_NOTEMPTY)) + .Const(_SC("UTF8"), static_cast< SQInteger >(PCRE_UTF8)) + .Const(_SC("UTF16"), static_cast< SQInteger >(PCRE_UTF16)) + .Const(_SC("UTF32"), static_cast< SQInteger >(PCRE_UTF32)) + .Const(_SC("NoAutoCapture"), static_cast< SQInteger >(PCRE_NO_AUTO_CAPTURE)) + .Const(_SC("NoUTF8Check"), static_cast< SQInteger >(PCRE_NO_UTF8_CHECK)) + .Const(_SC("NoUTF16Check"), static_cast< SQInteger >(PCRE_NO_UTF16_CHECK)) + .Const(_SC("NoUTF32Check"), static_cast< SQInteger >(PCRE_NO_UTF32_CHECK)) + .Const(_SC("AutoCallout"), static_cast< SQInteger >(PCRE_AUTO_CALLOUT)) + .Const(_SC("PartialSoft"), static_cast< SQInteger >(PCRE_PARTIAL_SOFT)) + .Const(_SC("Partial"), static_cast< SQInteger >(PCRE_PARTIAL)) + .Const(_SC("NeverUTF"), static_cast< SQInteger >(PCRE_NEVER_UTF)) + .Const(_SC("DfaShortest"), static_cast< SQInteger >(PCRE_DFA_SHORTEST)) + .Const(_SC("NoAutoPossess"), static_cast< SQInteger >(PCRE_NO_AUTO_POSSESS)) + .Const(_SC("DfaRestart"), static_cast< SQInteger >(PCRE_DFA_RESTART)) + .Const(_SC("FirstLine"), static_cast< SQInteger >(PCRE_FIRSTLINE)) + .Const(_SC("DupNames"), static_cast< SQInteger >(PCRE_DUPNAMES)) + .Const(_SC("NewLineCR"), static_cast< SQInteger >(PCRE_NEWLINE_CR)) + .Const(_SC("NewLineLF"), static_cast< SQInteger >(PCRE_NEWLINE_LF)) + .Const(_SC("NewLineCRLF"), static_cast< SQInteger >(PCRE_NEWLINE_CRLF)) + .Const(_SC("NewLineAny"), static_cast< SQInteger >(PCRE_NEWLINE_ANY)) + .Const(_SC("NewLineAnyCRLF"), static_cast< SQInteger >(PCRE_NEWLINE_ANYCRLF)) + .Const(_SC("BsrAnyCRLF"), static_cast< SQInteger >(PCRE_BSR_ANYCRLF)) + .Const(_SC("BsrUnicode"), static_cast< SQInteger >(PCRE_BSR_UNICODE)) + .Const(_SC("JavaScriptCompat"), static_cast< SQInteger >(PCRE_JAVASCRIPT_COMPAT)) + .Const(_SC("NoStartOptimize"), static_cast< SQInteger >(PCRE_NO_START_OPTIMIZE)) + .Const(_SC("NoStartOptimise"), static_cast< SQInteger >(PCRE_NO_START_OPTIMISE)) + .Const(_SC("PartialHard"), static_cast< SQInteger >(PCRE_PARTIAL_HARD)) + .Const(_SC("NotEmptyAtStart"), static_cast< SQInteger >(PCRE_NOTEMPTY_ATSTART)) + .Const(_SC("UCP"), static_cast< SQInteger >(PCRE_UCP)) + ); + // -------------------------------------------------------------------------------------------- + ConstTable(vm).Enum(_SC("SqRxError"), Enumeration(vm) + .Const(_SC("NoMatch"), static_cast< SQInteger >(PCRE_ERROR_NOMATCH)) + .Const(_SC("Null"), static_cast< SQInteger >(PCRE_ERROR_NULL)) + .Const(_SC("BadOption"), static_cast< SQInteger >(PCRE_ERROR_BADOPTION)) + .Const(_SC("BadMagic"), static_cast< SQInteger >(PCRE_ERROR_BADMAGIC)) + .Const(_SC("UnknownOpCode"), static_cast< SQInteger >(PCRE_ERROR_UNKNOWN_OPCODE)) + .Const(_SC("UnknownNode"), static_cast< SQInteger >(PCRE_ERROR_UNKNOWN_NODE)) + .Const(_SC("NoMemory"), static_cast< SQInteger >(PCRE_ERROR_NOMEMORY)) + .Const(_SC("NoSubstring"), static_cast< SQInteger >(PCRE_ERROR_NOSUBSTRING)) + .Const(_SC("MatchLimit"), static_cast< SQInteger >(PCRE_ERROR_MATCHLIMIT)) + .Const(_SC("Callout"), static_cast< SQInteger >(PCRE_ERROR_CALLOUT)) + .Const(_SC("BadUTF8"), static_cast< SQInteger >(PCRE_ERROR_BADUTF8)) + .Const(_SC("BadUTF16"), static_cast< SQInteger >(PCRE_ERROR_BADUTF16)) + .Const(_SC("BadUTF32"), static_cast< SQInteger >(PCRE_ERROR_BADUTF32)) + .Const(_SC("BadUTF8Offset"), static_cast< SQInteger >(PCRE_ERROR_BADUTF8_OFFSET)) + .Const(_SC("BadUTF16Offset"), static_cast< SQInteger >(PCRE_ERROR_BADUTF16_OFFSET)) + .Const(_SC("Partial"), static_cast< SQInteger >(PCRE_ERROR_PARTIAL)) + .Const(_SC("BadPartial"), static_cast< SQInteger >(PCRE_ERROR_BADPARTIAL)) + .Const(_SC("Internal"), static_cast< SQInteger >(PCRE_ERROR_INTERNAL)) + .Const(_SC("BadCount"), static_cast< SQInteger >(PCRE_ERROR_BADCOUNT)) + .Const(_SC("DfaUitem"), static_cast< SQInteger >(PCRE_ERROR_DFA_UITEM)) + .Const(_SC("DfaUcond"), static_cast< SQInteger >(PCRE_ERROR_DFA_UCOND)) + .Const(_SC("DfaUmLimit"), static_cast< SQInteger >(PCRE_ERROR_DFA_UMLIMIT)) + .Const(_SC("DfaWsSize"), static_cast< SQInteger >(PCRE_ERROR_DFA_WSSIZE)) + .Const(_SC("DfaRecurse"), static_cast< SQInteger >(PCRE_ERROR_DFA_RECURSE)) + .Const(_SC("RecursionLimit"), static_cast< SQInteger >(PCRE_ERROR_RECURSIONLIMIT)) + .Const(_SC("NullWsLimit"), static_cast< SQInteger >(PCRE_ERROR_NULLWSLIMIT)) + .Const(_SC("BadNewLine"), static_cast< SQInteger >(PCRE_ERROR_BADNEWLINE)) + .Const(_SC("BadOffset"), static_cast< SQInteger >(PCRE_ERROR_BADOFFSET)) + .Const(_SC("ShortUTF8"), static_cast< SQInteger >(PCRE_ERROR_SHORTUTF8)) + .Const(_SC("ShortUTF16"), static_cast< SQInteger >(PCRE_ERROR_SHORTUTF16)) + .Const(_SC("RecurseLoop"), static_cast< SQInteger >(PCRE_ERROR_RECURSELOOP)) + .Const(_SC("JitStackLimit"), static_cast< SQInteger >(PCRE_ERROR_JIT_STACKLIMIT)) + .Const(_SC("BadMode"), static_cast< SQInteger >(PCRE_ERROR_BADMODE)) + .Const(_SC("BadEndianness"), static_cast< SQInteger >(PCRE_ERROR_BADENDIANNESS)) + .Const(_SC("DfaBadRestart"), static_cast< SQInteger >(PCRE_ERROR_DFA_BADRESTART)) + .Const(_SC("JitBadOption"), static_cast< SQInteger >(PCRE_ERROR_JIT_BADOPTION)) + .Const(_SC("BadLength"), static_cast< SQInteger >(PCRE_ERROR_BADLENGTH)) + .Const(_SC("Unset"), static_cast< SQInteger >(PCRE_ERROR_UNSET)) + ); +} + +} // Namespace:: SqMod diff --git a/module/Library/RegEx.hpp b/module/Library/RegEx.hpp new file mode 100644 index 00000000..a77ac19e --- /dev/null +++ b/module/Library/RegEx.hpp @@ -0,0 +1,874 @@ +#pragma once + +// ------------------------------------------------------------------------------------------------ +#include "Core/Utility.hpp" + +// ------------------------------------------------------------------------------------------------ +#ifdef POCO_UNBUNDLED + #include +#else + #include "pcre_config.h" + #include "pcre.h" +#endif + +// ------------------------------------------------------------------------------------------------ +#include + +// ------------------------------------------------------------------------------------------------ +namespace SqMod { + +/* ------------------------------------------------------------------------------------------------ + * +*/ +struct RxMatch +{ + /* -------------------------------------------------------------------------------------------- + * + */ + SQInteger mOffset{0}; + + /* -------------------------------------------------------------------------------------------- + * + */ + SQInteger mLength{0}; + /* -------------------------------------------------------------------------------------------- + * Default constructor. + */ + RxMatch() noexcept = default; + + /* -------------------------------------------------------------------------------------------- + * Offset constructor. + */ + explicit RxMatch(SQInteger offset) noexcept + : mOffset{offset} + { + } + + /* -------------------------------------------------------------------------------------------- + * Explicit constructor. + */ + RxMatch(SQInteger offset, SQInteger length) noexcept + : mOffset{offset}, mLength{length} + { + } + + /* -------------------------------------------------------------------------------------------- + * Copy constructor. + */ + RxMatch(const RxMatch & o) = default; + + /* -------------------------------------------------------------------------------------------- + * Move constructor. + */ + RxMatch(RxMatch && o) noexcept = default; + + /* -------------------------------------------------------------------------------------------- + * Copy assignment operator. + */ + RxMatch & operator = (const RxMatch & o) = default; + + /* -------------------------------------------------------------------------------------------- + * Move assignment operator. + */ + RxMatch & operator = (RxMatch && o) noexcept = default; + + /* -------------------------------------------------------------------------------------------- + * Retrieve offset. + */ + SQMOD_NODISCARD SQInteger GetOffset() const noexcept + { + return mOffset; + } + + /* -------------------------------------------------------------------------------------------- + * Modify offset. + */ + void SetOffset(SQInteger value) noexcept + { + mOffset = value; + } + + /* -------------------------------------------------------------------------------------------- + * Retrieve length. + */ + SQMOD_NODISCARD SQInteger GetLength() const noexcept + { + return mLength; + } + + /* -------------------------------------------------------------------------------------------- + * Modify length. + */ + void SetLength(SQInteger value) noexcept + { + mLength = value; + } + + /* -------------------------------------------------------------------------------------------- + * Retrieve match end. + */ + SQMOD_NODISCARD SQInteger GetEnd() const noexcept + { + return mOffset + mLength; + } + + /* -------------------------------------------------------------------------------------------- + * Extract a sub-string. + */ + [[nodiscard]] LightObj SubStr(StackStrF & str) const + { + return LightObj{str.mPtr + mOffset, mLength}; + } +}; + +/* ------------------------------------------------------------------------------------------------ + * +*/ +struct RxMatches +{ + using List = std::vector< RxMatch >; + + /* -------------------------------------------------------------------------------------------- + * Internal RegularExpression instance. + */ + List mList; + + /* -------------------------------------------------------------------------------------------- + * Default constructor. + */ + RxMatches() = default; + + /* -------------------------------------------------------------------------------------------- + * Copy list constructor. + */ + explicit RxMatches(const List & l) // NOLINT(modernize-pass-by-value) + : mList{l} + { + } + + /* -------------------------------------------------------------------------------------------- + * Move list constructor. + */ + explicit RxMatches(List && m) noexcept + : mList{std::move(m)} + { + } + + /* -------------------------------------------------------------------------------------------- + * Copy constructor. + */ + RxMatches(const RxMatches & o) = default; + + /* -------------------------------------------------------------------------------------------- + * Move constructor. + */ + RxMatches(RxMatches && o) noexcept = default; + + /* -------------------------------------------------------------------------------------------- + * Copy assignment operator. + */ + RxMatches & operator = (const RxMatches & o) = default; + + /* -------------------------------------------------------------------------------------------- + * Move assignment operator. + */ + RxMatches & operator = (RxMatches && o) noexcept = default; + + /* -------------------------------------------------------------------------------------------- + * Make sure an index is within range and return the container. Container must exist. + */ + List & ValidIdx(SQInteger i) + { + if (static_cast< size_t >(i) >= mList.size()) + { + STHROWF("Invalid Regular Expression match list index({})", i); + } + return mList; + } + + /* -------------------------------------------------------------------------------------------- + * Make sure an index is within range and return the container. Container must exist. + */ + SQMOD_NODISCARD const List & ValidIdx(SQInteger i) const + { + if (static_cast< size_t >(i) >= mList.size()) + { + STHROWF("Invalid Regular Expression match list index({})", i); + } + return mList; + } + + /* -------------------------------------------------------------------------------------------- + * Make sure a container instance is populated, then return it. + */ + SQMOD_NODISCARD List & ValidPop() + { + if (mList.empty()) + { + STHROWF("Regular Expression match list container is empty"); + } + return mList; + } + + /* -------------------------------------------------------------------------------------------- + * Retrieve a value from the container. + */ + SQMOD_NODISCARD List::reference Get(SQInteger i) + { + return ValidIdx(i).at(ClampL< SQInteger, size_t >(i)); + } + + /* -------------------------------------------------------------------------------------------- + * Retrieve the first element in the container. + */ + SQMOD_NODISCARD List::reference Front() + { + return ValidPop().front(); + } + + /* -------------------------------------------------------------------------------------------- + * Retrieve the last element in the container. + */ + SQMOD_NODISCARD List::reference Back() + { + return mList.back(); + } + + /* -------------------------------------------------------------------------------------------- + * Check if the container has no elements. + */ + SQMOD_NODISCARD bool Empty() const + { + return mList.empty(); + } + + /* -------------------------------------------------------------------------------------------- + * Retrieve the number of elements in the container. + */ + SQMOD_NODISCARD SQInteger Size() const + { + return static_cast< SQInteger >(mList.size()); + } + + /* -------------------------------------------------------------------------------------------- + * Retrieve the number of elements that the container has currently allocated space for. + */ + SQMOD_NODISCARD SQInteger Capacity() const + { + return static_cast< SQInteger >(mList.capacity()); + } + + /* -------------------------------------------------------------------------------------------- + * Increase the capacity of the container to a value that's greater or equal to the one specified. + */ + RxMatches & Reserve(SQInteger n) + { + mList.reserve(ClampL< SQInteger, size_t >(n)); + return *this; + } + + /* -------------------------------------------------------------------------------------------- + * Request the removal of unused capacity. + */ + void Compact() + { + mList.shrink_to_fit(); + } + + /* -------------------------------------------------------------------------------------------- + * Erase all elements from the container. + */ + void Clear() + { + mList.clear(); + } + + /* -------------------------------------------------------------------------------------------- + * Pop the last element in the container. + */ + void Pop() + { + ValidPop().pop_back(); + } + + /* -------------------------------------------------------------------------------------------- + * Erase the element at a certain position. + */ + void EraseAt(SQInteger i) + { + mList.erase(ValidIdx(i).begin() + static_cast< size_t >(i)); // NOLINT(cppcoreguidelines-narrowing-conversions) + } + + /* -------------------------------------------------------------------------------------------- + * Erase a certain amount of elements starting from a specific position. + */ + void EraseFrom(SQInteger i, SQInteger n) + { + mList.erase(ValidIdx(i).begin() + static_cast< size_t >(i), // NOLINT(cppcoreguidelines-narrowing-conversions) + ValidIdx(i + n).begin() + static_cast< size_t >(i + n)); // NOLINT(cppcoreguidelines-narrowing-conversions) + } + + /* -------------------------------------------------------------------------------------------- + * Iterate all values through a functor. + */ + void Each(Function & fn) const + { + for (const auto & e : mList) + { + fn.Execute(static_cast< SQInteger >(e.mOffset), static_cast< SQInteger >(e.mLength)); + } + } + + /* -------------------------------------------------------------------------------------------- + * Iterate values in range through a functor. + */ + void EachRange(SQInteger p, SQInteger n, Function & fn) const + { + std::for_each(ValidIdx(p).begin() + static_cast< size_t >(p), // NOLINT(cppcoreguidelines-narrowing-conversions) + ValidIdx(p + n).begin() + static_cast< size_t >(p + n), // NOLINT(cppcoreguidelines-narrowing-conversions) + [&](List::const_reference & e) { + fn.Execute(static_cast< SQInteger >(e.mOffset), static_cast< SQInteger >(e.mLength)); + }); + } + + /* -------------------------------------------------------------------------------------------- + * Iterate all values through a functor until stopped (i.e. false is returned). + */ + void While(Function & fn) const + { + for (const auto & e : mList) + { + auto ret = fn.Eval(static_cast< SQInteger >(e.mOffset), static_cast< SQInteger >(e.mLength)); + // (null || true) == continue & false == break + if (!ret.IsNull() || !ret.template Cast< bool >()) + { + break; + } + } + } + + /* -------------------------------------------------------------------------------------------- + * Iterate values in range through a functor until stopped (i.e. false is returned). + */ + void WhileRange(SQInteger p, SQInteger n, Function & fn) const + { + auto itr = ValidIdx(p).begin() + static_cast< size_t >(p); // NOLINT(cppcoreguidelines-narrowing-conversions) + auto end = ValidIdx(p + n).begin() + static_cast< size_t >(p + n); // NOLINT(cppcoreguidelines-narrowing-conversions) + for (; itr != end; ++itr) + { + auto ret = fn.Eval(static_cast< SQInteger >(itr->mOffset), static_cast< SQInteger >(itr->mLength)); + // (null || true) == continue & false == break + if (!ret.IsNull() || !ret.template Cast< bool >()) + { + break; + } + } + } + + /* -------------------------------------------------------------------------------------------- + * Extract a sub-string. + */ + [[nodiscard]] LightObj SubStr(SQInteger i, StackStrF & str) const + { + const RxMatch & m = ValidIdx(i)[i]; + return LightObj{str.mPtr + m.mOffset, m.mLength}; + } +}; + +/* ------------------------------------------------------------------------------------------------ + * +*/ +struct RxInstance +{ + /* -------------------------------------------------------------------------------------------- + * Whether to analyze and optimize the pattern by default for evey new instance (true). + */ + static bool STUDY; + + /* -------------------------------------------------------------------------------------------- + * Default options for every new instance (0). + */ + static int OPTIONS; + + /* -------------------------------------------------------------------------------------------- + * Default study options for every new instance (0). + */ + static int STUDY_OPTIONS; + + /* -------------------------------------------------------------------------------------------- + * Default offset vector size (must be multiple of 3). + */ + static constexpr int OVEC_SIZE = 63; + + /* -------------------------------------------------------------------------------------------- + * Internal vector type used for offsets buffer. + */ + using OVEC_t = std::vector< int >; + + /* -------------------------------------------------------------------------------------------- + * Internal RegularExpression instance. + */ + pcre * mPCRE{nullptr}; + + /* -------------------------------------------------------------------------------------------- + * Internal RegularExpression instance. + */ + pcre_extra * mExtra{nullptr}; + + /* -------------------------------------------------------------------------------------------- + * Internal buffer used for offsets. + */ + OVEC_t mOVEC{}; + + /* -------------------------------------------------------------------------------------------- + * Default constructor. + */ + RxInstance() noexcept = default; + + /* -------------------------------------------------------------------------------------------- + * Copy constructor (disabled). + */ + RxInstance(const RxInstance &) = delete; + + /* -------------------------------------------------------------------------------------------- + * Move constructor. + */ + RxInstance(RxInstance && o) noexcept + : mPCRE(o.mPCRE), mExtra(o.mExtra), mOVEC(std::move(o.mOVEC)) // Replicate it + { + o.mPCRE = nullptr; // Take ownership + o.mExtra = nullptr; // Take ownership + } + + /* -------------------------------------------------------------------------------------------- + * Basic constructor. + */ + explicit RxInstance(StackStrF & pattern) + : RxInstance(OPTIONS, STUDY, pattern) + { + } + + /* -------------------------------------------------------------------------------------------- + * Basic constructor. With specific options. + */ + explicit RxInstance(int options, StackStrF & pattern) + : RxInstance(options, STUDY, pattern) + { + } + + /* -------------------------------------------------------------------------------------------- + * Basic constructor. With specific options. + */ + explicit RxInstance(int options, bool study, StackStrF & pattern) + : mPCRE(Compile_(pattern.mPtr, options)), mExtra(nullptr) + { + if (study) + { + Study0(); + } + } + + /* -------------------------------------------------------------------------------------------- + * Internal constructor. + */ + RxInstance(const char * pattern, int options, bool study) + : mPCRE(Compile_(pattern, options)), mExtra(nullptr) + { + if (study) + { + Study0(); + } + } + + /* -------------------------------------------------------------------------------------------- + * Destructor. + */ + ~RxInstance() + { + Destroy(); + } + + /* -------------------------------------------------------------------------------------------- + * Copy assignment operator (disabled). + */ + RxInstance & operator = (const RxInstance &) = delete; + + /* -------------------------------------------------------------------------------------------- + * Move constructor. + */ + RxInstance & operator = (RxInstance && o) noexcept + { + // Prevent self assignment + if (this != &o) + { + // Release current instance, if any + Destroy(); + // Replicate it + mPCRE = o.mPCRE; + mExtra = o.mExtra; + mOVEC = std::move(o.mOVEC); + // Take ownership + o.mPCRE = nullptr; + o.mExtra = nullptr; + } + return *this; + } + + /* -------------------------------------------------------------------------------------------- + * Estimate the size necessary for the offsets vector buffer. + */ + void EstimateOVEC(bool force = false) + { + if (mOVEC.empty() || force) + { + int size = 0; + // Attempt to estimate the size of the offsets vector buffer + const int r = pcre_fullinfo(ValidPCRE(), mExtra, PCRE_INFO_CAPTURECOUNT, &size); + // Check for errors + if (r != 0) + { + STHROWF("Rx: Offsets vector buffer estimation failed ({})", r); + } + // Attempt to scale the vector (must be multiple of 3) + mOVEC.resize((size + 1) * 3); + } + } + + /* -------------------------------------------------------------------------------------------- + * Return a valid `pcre` instance pointer or throw an exception. + */ + SQMOD_NODISCARD pcre * ValidPCRE() const + { + // Do we manage a valid instance? + if (mPCRE == nullptr) + { + STHROWF("Uninitialized Regular Expression instance."); + } + // Return it + return mPCRE; + } + + /* -------------------------------------------------------------------------------------------- + * Return a valid `pcre_extra` instance pointer or throw an exception. + */ + SQMOD_NODISCARD pcre_extra * ValidExtra() const + { + // Do we manage a valid instance? + if (mExtra == nullptr) + { + STHROWF("Regular Expression was not studied and optimized."); + } + // Return it + return mExtra; + } + + /* -------------------------------------------------------------------------------------------- + * Compile the specified pattern. + */ + SQMOD_NODISCARD static pcre * Compile_(const char * pattern, int options = OPTIONS) + { + const char * error_msg = nullptr; + int error_code, error_offset = 0; + // Attempt to compile the specified pattern + pcre * ptr = pcre_compile2(pattern, options, &error_code, &error_msg, &error_offset, nullptr); + // Did the compilation failed? + if (ptr == nullptr) + { + STHROWF("Rx: {s} (code {}) (at offset {})", error_msg, error_code, error_offset); + } + // Return the `pcre` instance + return ptr; + } + + /* -------------------------------------------------------------------------------------------- + * Attempt to compile the specified pattern. Error information is returned instead of thrown. + */ + SQMOD_NODISCARD static std::pair< pcre *, Table > TryCompile_(const char * pattern, int options = OPTIONS) + { + const char * error_msg = nullptr; + int error_code, error_offset = 0; + // Attempt to compile the specified pattern + pcre * ptr = pcre_compile2(pattern, options, &error_code, &error_msg, &error_offset, nullptr); + // Did the compilation failed? + if (ptr == nullptr) + { + Table t; + t.SetValue("message", error_msg); + t.SetValue("code", error_code); + t.SetValue("offset", error_offset); + // Return the table with error information + return std::make_pair(ptr, std::move(t)); + } + // Return the `pcre` instance with no error information + return std::make_pair(ptr, Table{}); + } + + /* -------------------------------------------------------------------------------------------- + * Compile the specified pattern. + */ + RxInstance & Compile1(StackStrF & pattern) + { + return Compile2(OPTIONS, pattern); + } + + /* -------------------------------------------------------------------------------------------- + * Compile the specified pattern. With specific options. + */ + RxInstance & Compile2(int options, StackStrF & pattern) + { + // Release current instance, if any + Destroy(); + // Attempt to compile + mPCRE = Compile_(pattern.mPtr, options); + // Allocate offsets vector buffer + EstimateOVEC(); + // Allow chaining + return *this; + } + + /* -------------------------------------------------------------------------------------------- + * Compile the specified pattern. + */ + Table TryCompile1(StackStrF & pattern) + { + return TryCompile2(OPTIONS, pattern); + } + + /* -------------------------------------------------------------------------------------------- + * Compile the specified pattern. With specific options. + */ + Table TryCompile2(int options, StackStrF & pattern) + { + // Release current instance, if any + Destroy(); + // Attempt to compile + auto p = TryCompile_(pattern.mPtr, options); + // Were there any compilation errors? + if (p.first != nullptr) + { + mPCRE = p.first; + } + // Return compilation info + return p.second; + } + + /* -------------------------------------------------------------------------------------------- + * Analyze the managed pattern and optimized it. + */ + RxInstance & Study0() + { + return Study1(STUDY_OPTIONS); + } + + /* -------------------------------------------------------------------------------------------- + * Analyze the managed pattern and optimized it. With specific options. + */ + RxInstance & Study1(int options) + { + if (mExtra != nullptr) + { + STHROWF("Regular Expression was already analyzed and optimized"); + } + const char * error = nullptr; + // Study and optimize the expression + mExtra = pcre_study(ValidPCRE(), options, &error); + // If there was an error studying the expression then throw it + if (mExtra == nullptr && error != nullptr) + { + STHROWF("Rx: {s}", error); + } + // Allow chaining + return *this; + } + + /* -------------------------------------------------------------------------------------------- + * Release managed resources and revert to uninitialized instance. + */ + SQMOD_NODISCARD bool IsValid() const + { + return (mPCRE != nullptr); + } + + /* -------------------------------------------------------------------------------------------- + * Release managed resources and revert to uninitialized instance. + */ + SQMOD_NODISCARD bool IsStudied() const + { + return (mExtra != nullptr); + } + + /* -------------------------------------------------------------------------------------------- + * Release managed resources and revert to uninitialized instance. + */ + void Destroy() + { + // Do we manage any instance? + if (mPCRE != nullptr) + { + pcre_free(mPCRE); + mPCRE = nullptr; + } + // Is the expression optimized? + if (mExtra != nullptr) + { + pcre_free(mExtra); + mExtra = nullptr; + } + } + + /* -------------------------------------------------------------------------------------------- + * Matches the given subject string against the pattern. + * Returns the position of the first captured sub-string in m. + * If no part of the subject matches the pattern, m.mOffset is -1 and m.mLength is 0. + * Returns the number of matches. Throws a exception in case of an error. + */ + SQMOD_NODISCARD int MatchFirstFrom(SQInteger o, RxMatch & m, StackStrF & s) + { + return MatchFirstFrom_(OPTIONS, o, m, s); + } + SQMOD_NODISCARD int MatchFirstFrom_(int f, SQInteger o, RxMatch & m, StackStrF & s) + { + if (o > s.mLen) + { + STHROWF("Rx: Offset is out of range"); + } + EstimateOVEC(); + // Attempt to execute the expression on the specified subject + const int rc = pcre_exec(ValidPCRE(), mExtra, s.mPtr, static_cast< int >(s.mLen), static_cast< int >(o), f & 0xFFFF, mOVEC.data(), static_cast< int >(mOVEC.size())); + // Was there a match? + if (rc == PCRE_ERROR_NOMATCH) + { + m.mOffset = -1; + m.mLength = 0; + // No match found + return 0; + } + // Bad options/flags? + else if (rc == PCRE_ERROR_BADOPTION) + { + STHROWF("Rx: Bad option"); + } + // Overflow? + else if (rc == 0) + { + STHROWF("Rx: too many captured sub-strings"); + } + // Some other error? + else if (rc < 0) + { + STHROWF("Rx: error {}", rc); + } + // Store match + m.mOffset = mOVEC[0]; + m.mLength = mOVEC[1] - mOVEC[0]; + // Yield result back to script + return rc; + } + + /* -------------------------------------------------------------------------------------------- + * Matches the given subject string against the pattern. + * Returns the position of the first captured sub-string in m. + * If no part of the subject matches the pattern, m.mOffset is -1 and m.mLength is 0. + * Returns the number of matches. Throws a exception in case of an error. + */ + SQMOD_NODISCARD int MatchFirst(RxMatch & m, StackStrF & s) + { + return MatchFirstFrom_(OPTIONS, 0, m, s); + } + SQMOD_NODISCARD int MatchFirst_(int f, RxMatch & m, StackStrF & s) + { + return MatchFirstFrom_(f, 0, m, s); + } + + /* -------------------------------------------------------------------------------------------- + * Matches the given subject string against the pattern. + * The first entry in m contains the position of the captured sub-string. + * The following entries identify matching sub-patterns. See the PCRE documentation for a more detailed explanation. + * If no part of the subject matches the pattern, m is empty. + * Returns the number of matches. Throws an exception in case of an error. + */ + SQMOD_NODISCARD int MatchFrom(SQInteger o, RxMatches & m, StackStrF & s) + { + return MatchFrom_(OPTIONS, o, m, s); + } + SQMOD_NODISCARD int MatchFrom_(int f, SQInteger o, RxMatches & m, StackStrF & s) + { + if (o > s.mLen) + { + STHROWF("Rx: Offset is out of range"); + } + EstimateOVEC(); + // Clear previous matches, if any + m.mList.clear(); + // Attempt to execute the expression on the specified subject + const int rc = pcre_exec(ValidPCRE(), mExtra, s.mPtr, static_cast< int >(s.mLen), static_cast< int >(o), f & 0xFFFF, mOVEC.data(), static_cast< int >(mOVEC.size())); + // Was there a match? + if (rc == PCRE_ERROR_NOMATCH) + { + return 0; // No match found + } + // Bad options/flags? + else if (rc == PCRE_ERROR_BADOPTION) + { + STHROWF("Rx: Bad option"); + } + // Overflow? + else if (rc == 0) + { + STHROWF("Rx: too many captured sub-strings"); + } + // Some other error? + else if (rc < 0) + { + STHROWF("Rx: error {}", rc); + } + // Reserve space in advance + m.mList.reserve(static_cast< size_t >(rc)); + // Transfer matches to match-list + for (int i = 0; i < rc; ++i) + { + m.mList.emplace_back(mOVEC[i*2], mOVEC[i*2+1] - mOVEC[i*2]); + } + // Yield result back to script + return rc; + } + + /* -------------------------------------------------------------------------------------------- + * Matches the given subject string against the pattern. + * The first entry in m contains the position of the captured sub-string. + * The following entries identify matching sub-patterns. See the PCRE documentation for a more detailed explanation. + * If no part of the subject matches the pattern, m is empty. + * Returns the number of matches. Throws an exception in case of an error. + */ + SQMOD_NODISCARD int Match(RxMatches & m, StackStrF & s) + { + return MatchFrom_(OPTIONS, 0, m, s); + } + SQMOD_NODISCARD int Match_(int f, RxMatches & m, StackStrF & s) + { + return MatchFrom_(f, 0, m, s); + } + + /* -------------------------------------------------------------------------------------------- + * Returns true if and only if the subject matches the regular expression. + * Internally, this method sets the RE_ANCHORED and RE_NOTEMPTY options for matching, + * which means that the empty string will never match and the pattern is treated as if it starts with a ^. + */ + SQMOD_NODISCARD bool Matches(StackStrF & s) + { + return Matches_(PCRE_ANCHORED | PCRE_NOTEMPTY, s); + } + SQMOD_NODISCARD bool Matches_(SQInteger o, StackStrF & s) + { + return MatchesEx(PCRE_ANCHORED | PCRE_NOTEMPTY, 0, s); + } + SQMOD_NODISCARD bool MatchesEx(int f, SQInteger o, StackStrF & s) + { + RxMatch m; + const int rc = MatchFirstFrom_(f, o, m, s); + return (rc > 0) && (m.mOffset == o) && (m.mLength == (s.mLen - o)); + } +}; + + +} // Namespace:: SqMod diff --git a/module/Register.cpp b/module/Register.cpp index d4221e48..74d52bbe 100644 --- a/module/Register.cpp +++ b/module/Register.cpp @@ -39,6 +39,7 @@ extern void Register_JSON(HSQUIRRELVM vm); extern void Register_MMDB(HSQUIRRELVM vm); extern void Register_Net(HSQUIRRELVM vm); extern void Register_Numeric(HSQUIRRELVM vm); +extern void Register_RegEx(HSQUIRRELVM vm); extern void Register_String(HSQUIRRELVM vm); extern void Register_System(HSQUIRRELVM vm); extern void Register_UTF8(HSQUIRRELVM vm); @@ -106,6 +107,7 @@ bool RegisterAPI(HSQUIRRELVM vm) Register_MMDB(vm); Register_Net(vm); Register_Numeric(vm); + Register_RegEx(vm); Register_String(vm); Register_System(vm); Register_UTF8(vm);