// // URI.h // // Library: Foundation // Package: URI // Module: URI // // Definition of the URI class. // // Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. // and Contributors. // // SPDX-License-Identifier: BSL-1.0 // #ifndef Foundation_URI_INCLUDED #define Foundation_URI_INCLUDED #include "Poco/Foundation.h" #include #include namespace Poco { class Path; class Foundation_API URI /// A Uniform Resource Identifier, as specified in RFC 3986. /// /// The URI class provides methods for building URIs from their /// parts, as well as for splitting URIs into their parts. /// Furthermore, the class provides methods for resolving /// relative URIs against base URIs. /// /// The class automatically performs a few normalizations on /// all URIs and URI parts passed to it: /// * scheme identifiers are converted to lower case /// * percent-encoded characters are decoded (except for the query string and fragment string) /// * optionally, dot segments are removed from paths (see normalize()) /// /// Note that dealing with query strings and fragment strings requires some precautions, /// as, internally, query strings and fragment strings are stored in percent-encoded /// form, while all other parts of the URI are stored in decoded form. While parsing /// query strings and fragment strings from properly encoded URLs generally works, /// explicitly setting query strings (fragment strings) with setQuery() (setFragment()) /// or extracting query strings (fragment strings) with getQuery() (getFragment()) may /// lead to ambiguities. See the descriptions of setQuery(), setRawQuery(), getQuery(), /// getRawQuery(), setFragment(), setRawFragment(), getFragment() and getRawFragment() /// for more information. { public: using QueryParameters = std::vector>; URI(); /// Creates an empty URI. explicit URI(const std::string& uri); /// Parses an URI from the given string. Throws a /// SyntaxException if the uri is not valid. explicit URI(const char* uri); /// Parses an URI from the given string. Throws a /// SyntaxException if the uri is not valid. URI(const std::string& scheme, const std::string& pathEtc); /// Creates an URI from its parts. URI(const std::string& scheme, const std::string& authority, const std::string& pathEtc); /// Creates an URI from its parts. URI(const std::string& scheme, const std::string& authority, const std::string& path, const std::string& query); /// Creates an URI from its parts. URI(const std::string& scheme, const std::string& authority, const std::string& path, const std::string& query, const std::string& fragment); /// Creates an URI from its parts. URI(const URI& uri); /// Copy constructor. Creates an URI from another one. URI(URI&& uri) noexcept; /// Move constructor. URI(const URI& baseURI, const std::string& relativeURI); /// Creates an URI from a base URI and a relative URI, according to /// the algorithm in section 5.2 of RFC 3986. explicit URI(const Path& path); /// Creates a URI from a path. /// /// The path will be made absolute, and a file:// URI /// will be built from it. ~URI(); /// Destroys the URI. URI& operator = (const URI& uri); /// Assignment operator. URI& operator = (URI&& uri) noexcept; /// Move assignment. URI& operator = (const std::string& uri); /// Parses and assigns an URI from the given string. Throws a /// SyntaxException if the uri is not valid. URI& operator = (const char* uri); /// Parses and assigns an URI from the given string. Throws a /// SyntaxException if the uri is not valid. void swap(URI& uri) noexcept; /// Swaps the URI with another one. void clear(); /// Clears all parts of the URI. std::string toString() const; /// Returns a string representation of the URI. /// /// Characters in the path, query and fragment parts will be /// percent-encoded as necessary. const std::string& getScheme() const; /// Returns the scheme part of the URI. void setScheme(const std::string& scheme); /// Sets the scheme part of the URI. The given scheme /// is converted to lower-case. /// /// A list of registered URI schemes can be found /// at . const std::string& getUserInfo() const; /// Returns the user-info part of the URI. void setUserInfo(const std::string& userInfo); /// Sets the user-info part of the URI. const std::string& getHost() const; /// Returns the host part of the URI. void setHost(const std::string& host); /// Sets the host part of the URI. unsigned short getPort() const; /// Returns the port number part of the URI. /// /// If no port number (0) has been specified, the /// well-known port number (e.g., 80 for http) for /// the given scheme is returned if it is known. /// Otherwise, 0 is returned. void setPort(unsigned short port); /// Sets the port number part of the URI. unsigned short getSpecifiedPort() const; /// Returns the port number part of the URI. /// /// If no explicit port number has been specified, /// returns 0. std::string getAuthority() const; /// Returns the authority part (userInfo, host and port) /// of the URI. /// /// If the port number is a well-known port /// number for the given scheme (e.g., 80 for http), it /// is not included in the authority. void setAuthority(const std::string& authority); /// Parses the given authority part for the URI and sets /// the user-info, host, port components accordingly. const std::string& getPath() const; /// Returns the decoded path part of the URI. void setPath(const std::string& path); /// Sets the path part of the URI. std::string getQuery() const; /// Returns the decoded query part of the URI. /// /// Note that encoded ampersand characters ('&', "%26") /// will be decoded, which could cause ambiguities if the query /// string contains multiple parameters and a parameter name /// or value contains an ampersand as well. /// In such a case it's better to use getRawQuery() or /// getQueryParameters(). void setQuery(const std::string& query); /// Sets the query part of the URI. /// /// The query string will be percent-encoded. If the query /// already contains percent-encoded characters, these /// will be double-encoded, which is probably not what's /// intended by the caller. Furthermore, ampersand ('&') /// characters in the query will not be encoded. This could /// lead to ambiguity issues if the query string contains multiple /// name-value parameters separated by ampersand, and if any /// name or value also contains an ampersand. In such a /// case, it's better to use setRawQuery() with a properly /// percent-encoded query string, or use addQueryParameter() /// or setQueryParameters(), which take care of appropriate /// percent encoding of parameter names and values. void addQueryParameter(const std::string& param, const std::string& val = ""); /// Adds "param=val" to the query; "param" may not be empty. /// If val is empty, only '=' is appended to the parameter. /// /// In addition to regular encoding, function also encodes '&' and '=', /// if found in param or val. const std::string& getRawQuery() const; /// Returns the query string in raw form, which usually /// means percent encoded. void setRawQuery(const std::string& query); /// Sets the query part of the URI. /// /// The given query string must be properly percent-encoded. QueryParameters getQueryParameters(bool plusIsSpace = true) const; /// Returns the decoded query string parameters as a vector /// of name-value pairs. void setQueryParameters(const QueryParameters& params); /// Sets the query part of the URI from a vector /// of query parameters. /// /// Calls addQueryParameter() for each parameter name and value. std::string getFragment() const; /// Returns the fragment part of the URI. void setFragment(const std::string& fragment); /// Sets the fragment part of the URI. std::string getRawFragment() const; /// Returns the fragment part of the URI in raw form. void setRawFragment(const std::string& fragment); /// Sets the fragment part of the URI. /// /// The given fragment string must be properly percent-encoded void setPathEtc(const std::string& pathEtc); /// Sets the path, query and fragment parts of the URI. std::string getPathEtc() const; /// Returns the encoded path, query and fragment parts of the URI. std::string getPathAndQuery() const; /// Returns the encoded path and query parts of the URI. void resolve(const std::string& relativeURI); /// Resolves the given relative URI against the base URI. /// See section 5.2 of RFC 3986 for the algorithm used. void resolve(const URI& relativeURI); /// Resolves the given relative URI against the base URI. /// See section 5.2 of RFC 3986 for the algorithm used. bool isRelative() const; /// Returns true if the URI is a relative reference, false otherwise. /// /// A relative reference does not contain a scheme identifier. /// Relative references are usually resolved against an absolute /// base reference. bool empty() const; /// Returns true if the URI is empty, false otherwise. bool operator == (const URI& uri) const; /// Returns true if both URIs are identical, false otherwise. /// /// Two URIs are identical if their scheme, authority, /// path, query and fragment part are identical. bool operator == (const std::string& uri) const; /// Parses the given URI and returns true if both URIs are identical, /// false otherwise. bool operator != (const URI& uri) const; /// Returns true if both URIs are identical, false otherwise. bool operator != (const std::string& uri) const; /// Parses the given URI and returns true if both URIs are identical, /// false otherwise. void normalize(); /// Normalizes the URI by removing all but leading . and .. segments from the path. /// /// If the first path segment in a relative path contains a colon (:), /// such as in a Windows path containing a drive letter, a dot segment (./) /// is prepended in accordance with section 3.3 of RFC 3986. void getPathSegments(std::vector& segments) const; /// Places the single path segments (delimited by slashes) into the /// given vector. static void encode(const std::string& str, const std::string& reserved, std::string& encodedStr); /// URI-encodes the given string by escaping reserved and non-ASCII /// characters. The encoded string is appended to encodedStr. static void decode(const std::string& str, std::string& decodedStr, bool plusAsSpace = false); /// URI-decodes the given string by replacing percent-encoded /// characters with the actual character. The decoded string /// is appended to decodedStr. /// /// When plusAsSpace is true, non-encoded plus signs in the query are decoded as spaces. /// (http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.1) protected: bool equals(const URI& uri) const; /// Returns true if both uri's are equivalent. bool isWellKnownPort() const; /// Returns true if the URI's port number is a well-known one /// (for example, 80, if the scheme is http). unsigned short getWellKnownPort() const; /// Returns the well-known port number for the URI's scheme, /// or 0 if the port number is not known. void parse(const std::string& uri); /// Parses and assigns an URI from the given string. Throws a /// SyntaxException if the uri is not valid. void parseAuthority(std::string::const_iterator& it, const std::string::const_iterator& end); /// Parses and sets the user-info, host and port from the given data. void parseHostAndPort(std::string::const_iterator& it, const std::string::const_iterator& end); /// Parses and sets the host and port from the given data. void parsePath(std::string::const_iterator& it, const std::string::const_iterator& end); /// Parses and sets the path from the given data. void parsePathEtc(std::string::const_iterator& it, const std::string::const_iterator& end); /// Parses and sets the path, query and fragment from the given data. void parseQuery(std::string::const_iterator& it, const std::string::const_iterator& end); /// Parses and sets the query from the given data. void parseFragment(std::string::const_iterator& it, const std::string::const_iterator& end); /// Parses and sets the fragment from the given data. void mergePath(const std::string& path); /// Appends a path to the URI's path. void removeDotSegments(bool removeLeading = true); /// Removes all dot segments from the path. static void getPathSegments(const std::string& path, std::vector& segments); /// Places the single path segments (delimited by slashes) into the /// given vector. void buildPath(const std::vector& segments, bool leadingSlash, bool trailingSlash); /// Builds the path from the given segments. static const std::string RESERVED_PATH; static const std::string RESERVED_QUERY; static const std::string RESERVED_QUERY_PARAM; static const std::string RESERVED_FRAGMENT; static const std::string ILLEGAL; private: std::string _scheme; std::string _userInfo; std::string _host; unsigned short _port; std::string _path; std::string _query; std::string _fragment; }; // // inlines // inline const std::string& URI::getScheme() const { return _scheme; } inline const std::string& URI::getUserInfo() const { return _userInfo; } inline const std::string& URI::getHost() const { return _host; } inline const std::string& URI::getPath() const { return _path; } inline const std::string& URI::getRawQuery() const { return _query; } inline std::string URI::getRawFragment() const { return _fragment; } inline unsigned short URI::getSpecifiedPort() const { return _port; } inline void swap(URI& u1, URI& u2) noexcept { u1.swap(u2); } } // namespace Poco #endif // Foundation_URI_INCLUDED