#pragma once // ------------------------------------------------------------------------------------------------ #include "Core/Utility.hpp" // ------------------------------------------------------------------------------------------------ #ifdef POCO_UNBUNDLED #include #else #include "pcre_config.h" #include "pcre.h" #endif // ------------------------------------------------------------------------------------------------ #include // ------------------------------------------------------------------------------------------------ namespace SqMod { /* ------------------------------------------------------------------------------------------------ * */ struct RxMatch { /* -------------------------------------------------------------------------------------------- * */ SQInteger mOffset{0}; /* -------------------------------------------------------------------------------------------- * */ SQInteger mLength{0}; /* -------------------------------------------------------------------------------------------- * Default constructor. */ RxMatch() noexcept = default; /* -------------------------------------------------------------------------------------------- * Offset constructor. */ explicit RxMatch(SQInteger offset) noexcept : mOffset{offset} { } /* -------------------------------------------------------------------------------------------- * Explicit constructor. */ RxMatch(SQInteger offset, SQInteger length) noexcept : mOffset{offset}, mLength{length} { } /* -------------------------------------------------------------------------------------------- * Copy constructor. */ RxMatch(const RxMatch & o) = default; /* -------------------------------------------------------------------------------------------- * Move constructor. */ RxMatch(RxMatch && o) noexcept = default; /* -------------------------------------------------------------------------------------------- * Copy assignment operator. */ RxMatch & operator = (const RxMatch & o) = default; /* -------------------------------------------------------------------------------------------- * Move assignment operator. */ RxMatch & operator = (RxMatch && o) noexcept = default; /* -------------------------------------------------------------------------------------------- * Retrieve offset. */ SQMOD_NODISCARD SQInteger GetOffset() const noexcept { return mOffset; } /* -------------------------------------------------------------------------------------------- * Modify offset. */ void SetOffset(SQInteger value) noexcept { mOffset = value; } /* -------------------------------------------------------------------------------------------- * Retrieve length. */ SQMOD_NODISCARD SQInteger GetLength() const noexcept { return mLength; } /* -------------------------------------------------------------------------------------------- * Modify length. */ void SetLength(SQInteger value) noexcept { mLength = value; } /* -------------------------------------------------------------------------------------------- * Retrieve match end. */ SQMOD_NODISCARD SQInteger GetEnd() const noexcept { return mOffset + mLength; } /* -------------------------------------------------------------------------------------------- * Extract a sub-string. */ [[nodiscard]] LightObj SubStr(StackStrF & str) const { if ((mOffset + mLength) > str.mLen) { STHROWF("Rx: Match is outside the range of the specified string."); } // Return the sub-string return LightObj{str.mPtr + mOffset, mLength}; } }; /* ------------------------------------------------------------------------------------------------ * */ struct RxMatches { using List = std::vector< RxMatch >; /* -------------------------------------------------------------------------------------------- * Internal RegularExpression instance. */ List mList; /* -------------------------------------------------------------------------------------------- * Default constructor. */ RxMatches() = default; /* -------------------------------------------------------------------------------------------- * Copy list constructor. */ explicit RxMatches(const List & l) // NOLINT(modernize-pass-by-value) : mList{l} { } /* -------------------------------------------------------------------------------------------- * Move list constructor. */ explicit RxMatches(List && m) noexcept : mList{std::move(m)} { } /* -------------------------------------------------------------------------------------------- * Copy constructor. */ RxMatches(const RxMatches & o) = default; /* -------------------------------------------------------------------------------------------- * Move constructor. */ RxMatches(RxMatches && o) noexcept = default; /* -------------------------------------------------------------------------------------------- * Copy assignment operator. */ RxMatches & operator = (const RxMatches & o) = default; /* -------------------------------------------------------------------------------------------- * Move assignment operator. */ RxMatches & operator = (RxMatches && o) noexcept = default; /* -------------------------------------------------------------------------------------------- * Make sure an index is within range and return the container. Container must exist. */ List & ValidIdx(SQInteger i) { if (static_cast< size_t >(i) >= mList.size()) { STHROWF("Invalid Regular Expression match list index({})", i); } return mList; } /* -------------------------------------------------------------------------------------------- * Make sure an index is within range and return the container. Container must exist. */ SQMOD_NODISCARD const List & ValidIdx(SQInteger i) const { if (static_cast< size_t >(i) >= mList.size()) { STHROWF("Invalid Regular Expression match list index({})", i); } return mList; } /* -------------------------------------------------------------------------------------------- * Make sure a container instance is populated, then return it. */ SQMOD_NODISCARD List & ValidPop() { if (mList.empty()) { STHROWF("Regular Expression match list container is empty"); } return mList; } /* -------------------------------------------------------------------------------------------- * Retrieve a value from the container. */ SQMOD_NODISCARD List::reference Get(SQInteger i) { return ValidIdx(i).at(ClampL< SQInteger, size_t >(i)); } /* -------------------------------------------------------------------------------------------- * Retrieve the first element in the container. */ SQMOD_NODISCARD List::reference Front() { return ValidPop().front(); } /* -------------------------------------------------------------------------------------------- * Retrieve the last element in the container. */ SQMOD_NODISCARD List::reference Back() { return mList.back(); } /* -------------------------------------------------------------------------------------------- * Check if the container has no elements. */ SQMOD_NODISCARD bool Empty() const { return mList.empty(); } /* -------------------------------------------------------------------------------------------- * Retrieve the number of elements in the container. */ SQMOD_NODISCARD SQInteger Size() const { return static_cast< SQInteger >(mList.size()); } /* -------------------------------------------------------------------------------------------- * Retrieve the number of elements that the container has currently allocated space for. */ SQMOD_NODISCARD SQInteger Capacity() const { return static_cast< SQInteger >(mList.capacity()); } /* -------------------------------------------------------------------------------------------- * Increase the capacity of the container to a value that's greater or equal to the one specified. */ RxMatches & Reserve(SQInteger n) { mList.reserve(ClampL< SQInteger, size_t >(n)); return *this; } /* -------------------------------------------------------------------------------------------- * Request the removal of unused capacity. */ void Compact() { mList.shrink_to_fit(); } /* -------------------------------------------------------------------------------------------- * Erase all elements from the container. */ void Clear() { mList.clear(); } /* -------------------------------------------------------------------------------------------- * Pop the last element in the container. */ void Pop() { ValidPop().pop_back(); } /* -------------------------------------------------------------------------------------------- * Erase the element at a certain position. */ void EraseAt(SQInteger i) { mList.erase(ValidIdx(i).begin() + static_cast< size_t >(i)); // NOLINT(cppcoreguidelines-narrowing-conversions) } /* -------------------------------------------------------------------------------------------- * Erase a certain amount of elements starting from a specific position. */ void EraseFrom(SQInteger i, SQInteger n) { mList.erase(ValidIdx(i).begin() + static_cast< size_t >(i), // NOLINT(cppcoreguidelines-narrowing-conversions) ValidIdx(i + n).begin() + static_cast< size_t >(i + n)); // NOLINT(cppcoreguidelines-narrowing-conversions) } /* -------------------------------------------------------------------------------------------- * Iterate all values through a functor. */ void Each(Function & fn) const { for (const auto & e : mList) { fn.Execute(static_cast< SQInteger >(e.mOffset), static_cast< SQInteger >(e.mLength)); } } /* -------------------------------------------------------------------------------------------- * Iterate values in range through a functor. */ void EachRange(SQInteger p, SQInteger n, Function & fn) const { std::for_each(ValidIdx(p).begin() + static_cast< size_t >(p), // NOLINT(cppcoreguidelines-narrowing-conversions) ValidIdx(p + n).begin() + static_cast< size_t >(p + n), // NOLINT(cppcoreguidelines-narrowing-conversions) [&](List::const_reference & e) { fn.Execute(static_cast< SQInteger >(e.mOffset), static_cast< SQInteger >(e.mLength)); }); } /* -------------------------------------------------------------------------------------------- * Iterate all values through a functor until stopped (i.e. false is returned). */ void While(Function & fn) const { for (const auto & e : mList) { auto ret = fn.Eval(static_cast< SQInteger >(e.mOffset), static_cast< SQInteger >(e.mLength)); // (null || true) == continue & false == break if (!ret.IsNull() || !ret.template Cast< bool >()) { break; } } } /* -------------------------------------------------------------------------------------------- * Iterate values in range through a functor until stopped (i.e. false is returned). */ void WhileRange(SQInteger p, SQInteger n, Function & fn) const { auto itr = ValidIdx(p).begin() + static_cast< size_t >(p); // NOLINT(cppcoreguidelines-narrowing-conversions) auto end = ValidIdx(p + n).begin() + static_cast< size_t >(p + n); // NOLINT(cppcoreguidelines-narrowing-conversions) for (; itr != end; ++itr) { auto ret = fn.Eval(static_cast< SQInteger >(itr->mOffset), static_cast< SQInteger >(itr->mLength)); // (null || true) == continue & false == break if (!ret.IsNull() || !ret.template Cast< bool >()) { break; } } } /* -------------------------------------------------------------------------------------------- * Extract a sub-string. */ [[nodiscard]] LightObj SubStr(SQInteger i, StackStrF & str) const { const RxMatch & m = ValidIdx(i)[i]; // Check if match is within range if ((m.mOffset + m.mLength) > str.mLen) { STHROWF("Rx: Match is outside the range of the specified string."); } // Return the sub-string return LightObj{str.mPtr + m.mOffset, m.mLength}; } }; /* ------------------------------------------------------------------------------------------------ * */ struct RxInstance { /* -------------------------------------------------------------------------------------------- * Whether to analyze and optimize the pattern by default for evey new instance (true). */ static bool STUDY; /* -------------------------------------------------------------------------------------------- * Default options for every new instance (0). */ static int OPTIONS; /* -------------------------------------------------------------------------------------------- * Default study options for every new instance (0). */ static int STUDY_OPTIONS; /* -------------------------------------------------------------------------------------------- * Default offset vector size (must be multiple of 3). */ static constexpr int OVEC_SIZE = 63; /* -------------------------------------------------------------------------------------------- * Internal vector type used for offsets buffer. */ using OVEC_t = std::vector< int >; /* -------------------------------------------------------------------------------------------- * Internal RegularExpression instance. */ pcre * mPCRE{nullptr}; /* -------------------------------------------------------------------------------------------- * Internal RegularExpression instance. */ pcre_extra * mExtra{nullptr}; /* -------------------------------------------------------------------------------------------- * Internal buffer used for offsets. */ OVEC_t mOVEC{}; /* -------------------------------------------------------------------------------------------- * Default constructor. */ RxInstance() noexcept = default; /* -------------------------------------------------------------------------------------------- * Copy constructor (disabled). */ RxInstance(const RxInstance &) = delete; /* -------------------------------------------------------------------------------------------- * Move constructor. */ RxInstance(RxInstance && o) noexcept : mPCRE(o.mPCRE), mExtra(o.mExtra), mOVEC(std::move(o.mOVEC)) // Replicate it { o.mPCRE = nullptr; // Take ownership o.mExtra = nullptr; // Take ownership } /* -------------------------------------------------------------------------------------------- * Basic constructor. */ explicit RxInstance(StackStrF & pattern) : RxInstance(OPTIONS, STUDY, pattern) { } /* -------------------------------------------------------------------------------------------- * Basic constructor. With specific options. */ explicit RxInstance(int options, StackStrF & pattern) : RxInstance(options, STUDY, pattern) { } /* -------------------------------------------------------------------------------------------- * Basic constructor. With specific options. */ explicit RxInstance(int options, bool study, StackStrF & pattern) : mPCRE(Compile_(pattern.mPtr, options)), mExtra(nullptr) { if (study) { Study0(); } } /* -------------------------------------------------------------------------------------------- * Internal constructor. */ RxInstance(const char * pattern, int options, bool study) : mPCRE(Compile_(pattern, options)), mExtra(nullptr) { if (study) { Study0(); } } /* -------------------------------------------------------------------------------------------- * Destructor. */ ~RxInstance() { Destroy(); } /* -------------------------------------------------------------------------------------------- * Copy assignment operator (disabled). */ RxInstance & operator = (const RxInstance &) = delete; /* -------------------------------------------------------------------------------------------- * Move assignment operator. */ RxInstance & operator = (RxInstance && o) noexcept { // Prevent self assignment if (this != &o) { // Release current instance, if any Destroy(); // Replicate it mPCRE = o.mPCRE; mExtra = o.mExtra; mOVEC = std::move(o.mOVEC); // Take ownership o.mPCRE = nullptr; o.mExtra = nullptr; } return *this; } /* -------------------------------------------------------------------------------------------- * Estimate the size necessary for the offsets vector buffer. */ void EstimateOVEC(bool force = false) { if (mOVEC.empty() || force) { int size = 0; // Attempt to estimate the size of the offsets vector buffer const int r = pcre_fullinfo(ValidPCRE(), mExtra, PCRE_INFO_CAPTURECOUNT, &size); // Check for errors if (r != 0) { STHROWF("Rx: Offsets vector buffer estimation failed ({})", r); } // Attempt to scale the vector (must be multiple of 3) mOVEC.resize((size + 1) * 3); } } /* -------------------------------------------------------------------------------------------- * Return a valid `pcre` instance pointer or throw an exception. */ SQMOD_NODISCARD pcre * ValidPCRE() const { // Do we manage a valid instance? if (mPCRE == nullptr) { STHROWF("Uninitialized Regular Expression instance."); } // Return it return mPCRE; } /* -------------------------------------------------------------------------------------------- * Return a valid `pcre_extra` instance pointer or throw an exception. */ SQMOD_NODISCARD pcre_extra * ValidExtra() const { // Do we manage a valid instance? if (mExtra == nullptr) { STHROWF("Regular Expression was not studied and optimized."); } // Return it return mExtra; } /* -------------------------------------------------------------------------------------------- * Compile the specified pattern. */ SQMOD_NODISCARD static pcre * Compile_(const char * pattern, int options = OPTIONS) { const char * error_msg = nullptr; int error_code, error_offset = 0; // Attempt to compile the specified pattern pcre * ptr = pcre_compile2(pattern, options, &error_code, &error_msg, &error_offset, nullptr); // Did the compilation failed? if (ptr == nullptr) { STHROWF("Rx: {s} (code {}) (at offset {})", error_msg, error_code, error_offset); } // Return the `pcre` instance return ptr; } /* -------------------------------------------------------------------------------------------- * Attempt to compile the specified pattern. Error information is returned instead of thrown. */ SQMOD_NODISCARD static std::pair< pcre *, Table > TryCompile_(const char * pattern, int options = OPTIONS) { const char * error_msg = nullptr; int error_code, error_offset = 0; // Attempt to compile the specified pattern pcre * ptr = pcre_compile2(pattern, options, &error_code, &error_msg, &error_offset, nullptr); // Did the compilation failed? if (ptr == nullptr) { Table t; t.SetValue("message", error_msg); t.SetValue("code", error_code); t.SetValue("offset", error_offset); // Return the table with error information return std::make_pair(ptr, std::move(t)); } // Return the `pcre` instance with no error information return std::make_pair(ptr, Table{}); } /* -------------------------------------------------------------------------------------------- * Compile the specified pattern. */ RxInstance & Compile1(StackStrF & pattern) { return Compile2(OPTIONS, pattern); } /* -------------------------------------------------------------------------------------------- * Compile the specified pattern. With specific options. */ RxInstance & Compile2(int options, StackStrF & pattern) { // Release current instance, if any Destroy(); // Attempt to compile mPCRE = Compile_(pattern.mPtr, options); // Allocate offsets vector buffer EstimateOVEC(); // Allow chaining return *this; } /* -------------------------------------------------------------------------------------------- * Compile the specified pattern. */ Table TryCompile1(StackStrF & pattern) { return TryCompile2(OPTIONS, pattern); } /* -------------------------------------------------------------------------------------------- * Compile the specified pattern. With specific options. */ Table TryCompile2(int options, StackStrF & pattern) { // Release current instance, if any Destroy(); // Attempt to compile auto p = TryCompile_(pattern.mPtr, options); // Were there any compilation errors? if (p.first != nullptr) { mPCRE = p.first; } // Return compilation info return p.second; } /* -------------------------------------------------------------------------------------------- * Analyze the managed pattern and optimized it. */ RxInstance & Study0() { return Study1(STUDY_OPTIONS); } /* -------------------------------------------------------------------------------------------- * Analyze the managed pattern and optimized it. With specific options. */ RxInstance & Study1(int options) { if (mExtra != nullptr) { STHROWF("Regular Expression was already analyzed and optimized"); } const char * error = nullptr; // Study and optimize the expression mExtra = pcre_study(ValidPCRE(), options, &error); // If there was an error studying the expression then throw it if (mExtra == nullptr && error != nullptr) { STHROWF("Rx: {s}", error); } // Allow chaining return *this; } /* -------------------------------------------------------------------------------------------- * Release managed resources and revert to uninitialized instance. */ SQMOD_NODISCARD bool IsValid() const { return (mPCRE != nullptr); } /* -------------------------------------------------------------------------------------------- * Release managed resources and revert to uninitialized instance. */ SQMOD_NODISCARD bool IsStudied() const { return (mExtra != nullptr); } /* -------------------------------------------------------------------------------------------- * Release managed resources and revert to uninitialized instance. */ void Destroy() { // Do we manage any instance? if (mPCRE != nullptr) { pcre_free(mPCRE); mPCRE = nullptr; } // Is the expression optimized? if (mExtra != nullptr) { pcre_free(mExtra); mExtra = nullptr; } } /* -------------------------------------------------------------------------------------------- * Matches the given subject string against the pattern. * Returns the position of the first captured sub-string in m. * If no part of the subject matches the pattern, m.mOffset is -1 and m.mLength is 0. * Returns the number of matches. Throws a exception in case of an error. */ SQMOD_NODISCARD int MatchFirstFrom(SQInteger o, RxMatch & m, StackStrF & s) { return MatchFirstFrom_(OPTIONS, o, m, s); } SQMOD_NODISCARD int MatchFirstFrom_(int f, SQInteger o, RxMatch & m, StackStrF & s) { if (o > s.mLen) { STHROWF("Rx: Offset is out of range"); } EstimateOVEC(); // Attempt to execute the expression on the specified subject const int rc = pcre_exec(ValidPCRE(), mExtra, s.mPtr, static_cast< int >(s.mLen), static_cast< int >(o), f & 0xFFFF, mOVEC.data(), static_cast< int >(mOVEC.size())); // Was there a match? if (rc == PCRE_ERROR_NOMATCH) { m.mOffset = -1; m.mLength = 0; // No match found return 0; } // Bad options/flags? else if (rc == PCRE_ERROR_BADOPTION) { STHROWF("Rx: Bad option"); } // Overflow? else if (rc == 0) { STHROWF("Rx: too many captured sub-strings"); } // Some other error? else if (rc < 0) { STHROWF("Rx: error {}", rc); } // Store match m.mOffset = mOVEC[0]; m.mLength = mOVEC[1] - mOVEC[0]; // Yield result back to script return rc; } /* -------------------------------------------------------------------------------------------- * Matches the given subject string against the pattern. * Returns the position of the first captured sub-string in m. * If no part of the subject matches the pattern, m.mOffset is -1 and m.mLength is 0. * Returns the number of matches. Throws a exception in case of an error. */ SQMOD_NODISCARD int MatchFirst(RxMatch & m, StackStrF & s) { return MatchFirstFrom_(OPTIONS, 0, m, s); } SQMOD_NODISCARD int MatchFirst_(int f, RxMatch & m, StackStrF & s) { return MatchFirstFrom_(f, 0, m, s); } /* -------------------------------------------------------------------------------------------- * Matches the given subject string against the pattern. * The first entry in m contains the position of the captured sub-string. * The following entries identify matching sub-patterns. See the PCRE documentation for a more detailed explanation. * If no part of the subject matches the pattern, m is empty. * Returns the number of matches. Throws an exception in case of an error. */ SQMOD_NODISCARD int MatchFrom(SQInteger o, RxMatches & m, StackStrF & s) { return MatchFrom_(OPTIONS, o, m, s); } SQMOD_NODISCARD int MatchFrom_(int f, SQInteger o, RxMatches & m, StackStrF & s) { if (o > s.mLen) { STHROWF("Rx: Offset is out of range"); } EstimateOVEC(); // Clear previous matches, if any m.mList.clear(); // Attempt to execute the expression on the specified subject const int rc = pcre_exec(ValidPCRE(), mExtra, s.mPtr, static_cast< int >(s.mLen), static_cast< int >(o), f & 0xFFFF, mOVEC.data(), static_cast< int >(mOVEC.size())); // Was there a match? if (rc == PCRE_ERROR_NOMATCH) { return 0; // No match found } // Bad options/flags? else if (rc == PCRE_ERROR_BADOPTION) { STHROWF("Rx: Bad option"); } // Overflow? else if (rc == 0) { STHROWF("Rx: too many captured sub-strings"); } // Some other error? else if (rc < 0) { STHROWF("Rx: error {}", rc); } // Reserve space in advance m.mList.reserve(static_cast< size_t >(rc)); // Transfer matches to match-list for (int i = 0; i < rc; ++i) { m.mList.emplace_back(mOVEC[i*2], mOVEC[i*2+1] - mOVEC[i*2]); } // Yield result back to script return rc; } /* -------------------------------------------------------------------------------------------- * Matches the given subject string against the pattern. * The first entry in m contains the position of the captured sub-string. * The following entries identify matching sub-patterns. See the PCRE documentation for a more detailed explanation. * If no part of the subject matches the pattern, m is empty. * Returns the number of matches. Throws an exception in case of an error. */ SQMOD_NODISCARD int Match(RxMatches & m, StackStrF & s) { return MatchFrom_(OPTIONS, 0, m, s); } SQMOD_NODISCARD int Match_(int f, RxMatches & m, StackStrF & s) { return MatchFrom_(f, 0, m, s); } /* -------------------------------------------------------------------------------------------- * Returns true if and only if the subject matches the regular expression. * Internally, this method sets the RE_ANCHORED and RE_NOTEMPTY options for matching, * which means that the empty string will never match and the pattern is treated as if it starts with a ^. */ SQMOD_NODISCARD bool Matches(StackStrF & s) { return Matches_(PCRE_ANCHORED | PCRE_NOTEMPTY, s); } SQMOD_NODISCARD bool Matches_(SQInteger o, StackStrF & s) { return MatchesEx(PCRE_ANCHORED | PCRE_NOTEMPTY, 0, s); } SQMOD_NODISCARD bool MatchesEx(int f, SQInteger o, StackStrF & s) { RxMatch m; const int rc = MatchFirstFrom_(f, o, m, s); return (rc > 0) && (m.mOffset == o) && (m.mLength == (s.mLen - o)); } }; } // Namespace:: SqMod