Ada 3.1.0
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
url_pattern.h
Go to the documentation of this file.
1
5#ifndef ADA_URL_PATTERN_H
6#define ADA_URL_PATTERN_H
7
9#include "ada/expected.h"
10#include "ada/parser.h"
12
13#include <ostream>
14#include <string>
15#include <string_view>
16#include <unordered_map>
17#include <variant>
18#include <vector>
19
20#if ADA_TESTING
21#include <iostream>
22#endif // ADA_TESTING
23
24namespace ada {
25
26enum class url_pattern_part_type : uint8_t {
27 // The part represents a simple fixed text string.
29 // The part represents a matching group with a custom regular expression.
31 // The part represents a matching group that matches code points up to the
32 // next separator code point. This is typically used for a named group like
33 // ":foo" that does not have a custom regular expression.
35 // The part represents a matching group that greedily matches all code points.
36 // This is typically used for the "*" wildcard matching group.
38};
39
40enum class url_pattern_part_modifier : uint8_t {
41 // The part does not have a modifier.
43 // The part has an optional modifier indicated by the U+003F (?) code point.
45 // The part has a "zero or more" modifier indicated by the U+002A (*) code
46 // point.
48 // The part has a "one or more" modifier indicated by the U+002B (+) code
49 // point.
51};
52
53// @see https://urlpattern.spec.whatwg.org/#part
55 public:
56 url_pattern_part(url_pattern_part_type _type, std::string&& _value,
58 : type(_type), value(_value), modifier(_modifier) {}
59
60 url_pattern_part(url_pattern_part_type _type, std::string&& _value,
61 url_pattern_part_modifier _modifier, std::string&& _name,
62 std::string&& _prefix, std::string&& _suffix)
63 : type(_type),
64 value(_value),
65 modifier(_modifier),
66 name(_name),
67 prefix(_prefix),
68 suffix(_suffix) {}
69 // A part has an associated type, a string, which must be set upon creation.
71 // A part has an associated value, a string, which must be set upon creation.
72 std::string value;
73 // A part has an associated modifier a string, which must be set upon
74 // creation.
76 // A part has an associated name, a string, initially the empty string.
77 std::string name{};
78 // A part has an associated prefix, a string, initially the empty string.
79 std::string prefix{};
80 // A part has an associated suffix, a string, initially the empty string.
81 std::string suffix{};
82
83 inline bool is_regexp() const noexcept;
84};
85
86// @see https://urlpattern.spec.whatwg.org/#options-header
90 std::optional<char> new_delimiter = std::nullopt,
91 std::optional<char> new_prefix = std::nullopt)
92 : delimiter(new_delimiter), prefix(new_prefix) {}
93
94 inline std::string_view get_delimiter() const ada_warn_unused;
95 inline std::string_view get_prefix() const ada_warn_unused;
96
97 // @see https://urlpattern.spec.whatwg.org/#options-ignore-case
98 bool ignore_case = false;
99
103
104 private:
105 // @see https://urlpattern.spec.whatwg.org/#options-delimiter-code-point
106 std::optional<char> delimiter{};
107 // @see https://urlpattern.spec.whatwg.org/#options-prefix-code-point
108 std::optional<char> prefix{};
109};
110
111// The default options is an options struct with delimiter code point set to
112// the empty string and prefix code point set to the empty string.
113inline url_pattern_compile_component_options
114 url_pattern_compile_component_options::DEFAULT(std::nullopt, std::nullopt);
115
116// The hostname options is an options struct with delimiter code point set
117// "." and prefix code point set to the empty string.
118inline url_pattern_compile_component_options
120
121// The pathname options is an options struct with delimiter code point set
122// "/" and prefix code point set to "/".
123inline url_pattern_compile_component_options
125
126// A struct providing the URLPattern matching results for a single
127// URL component. The URLPatternComponentResult is only ever used
128// as a member attribute of a URLPatternResult struct. The
129// URLPatternComponentResult API is defined as part of the URLPattern
130// specification.
132 std::string input;
133 std::unordered_map<std::string, std::optional<std::string>> groups;
134
135 bool operator==(const url_pattern_component_result&) const;
136
137#if ADA_TESTING
138 friend void PrintTo(const url_pattern_component_result& result,
139 std::ostream* os) {
140 *os << "input: '" << result.input << "', group: ";
141 for (const auto& group : result.groups) {
142 *os << "(" << group.first << ", " << group.second.value_or("undefined")
143 << ") ";
144 }
145 }
146#endif // ADA_TESTING
147};
148
149template <url_pattern_regex::regex_concept regex_provider>
151 public:
153
154 // This function explicitly takes a std::string because it is moved.
155 // To avoid unnecessary copy, move each value while calling the constructor.
156 url_pattern_component(std::string&& new_pattern,
157 typename regex_provider::regex_type&& new_regexp,
158 std::vector<std::string>&& new_group_name_list,
159 bool new_has_regexp_groups)
160 : regexp(std::move(new_regexp)),
161 pattern(std::move(new_pattern)),
162 group_name_list(new_group_name_list),
163 has_regexp_groups(new_has_regexp_groups) {}
164
165 // @see https://urlpattern.spec.whatwg.org/#compile-a-component
166 template <url_pattern_encoding_callback F>
167 static tl::expected<url_pattern_component, errors> compile(
168 std::string_view input, F& encoding_callback,
170
171 // @see https://urlpattern.spec.whatwg.org/#create-a-component-match-result
173 std::string&& input,
174 std::vector<std::optional<std::string>>&& exec_result);
175
176#if ADA_TESTING
177 friend void PrintTo(const url_pattern_component& component,
178 std::ostream* os) {
179 *os << "pattern: '" << component.pattern
180 << "', has_regexp_groups: " << component.has_regexp_groups
181 << "group_name_list: ";
182 for (const auto& name : component.group_name_list) {
183 *os << name << ", ";
184 }
185 }
186#endif // ADA_TESTING
187
188 typename regex_provider::regex_type regexp{};
189 std::string pattern{};
190 std::vector<std::string> group_name_list{};
191 bool has_regexp_groups = false;
192};
193
194// A URLPattern input can be either a string or a URLPatternInit object.
195// If it is a string, it must be a valid UTF-8 string.
196using url_pattern_input = std::variant<std::string_view, url_pattern_init>;
197
198// A struct providing the URLPattern matching results for all
199// components of a URL. The URLPatternResult API is defined as
200// part of the URLPattern specification.
212
214 bool ignore_case = false;
215
216#if ADA_TESTING
217 friend void PrintTo(const url_pattern_options& options, std::ostream* os) {
218 *os << "ignore_case: '" << options.ignore_case;
219 }
220#endif // ADA_TESTING
221};
222
223// URLPattern is a Web Platform standard API for matching URLs against a
224// pattern syntax (think of it as a regular expression for URLs). It is
225// defined in https://wicg.github.io/urlpattern.
226// More information about the URL Pattern syntax can be found at
227// https://developer.mozilla.org/en-US/docs/Web/API/URL_Pattern_API
228//
229// We require all strings to be valid UTF-8: it is the user's responsibility
230// to ensure that the provided strings are valid UTF-8.
231template <url_pattern_regex::regex_concept regex_provider>
233 public:
234 url_pattern() = default;
235
241 const url_pattern_input& input,
242 const std::string_view* base_url = nullptr);
243
249 const std::string_view* base_url = nullptr);
250
256 const url_pattern_input& input,
257 const std::string_view* base_url_string = nullptr);
258
259 // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-protocol
260 [[nodiscard]] std::string_view get_protocol() const ada_lifetime_bound;
261 // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-username
262 [[nodiscard]] std::string_view get_username() const ada_lifetime_bound;
263 // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-password
264 [[nodiscard]] std::string_view get_password() const ada_lifetime_bound;
265 // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-hostname
266 [[nodiscard]] std::string_view get_hostname() const ada_lifetime_bound;
267 // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-port
268 [[nodiscard]] std::string_view get_port() const ada_lifetime_bound;
269 // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-pathname
270 [[nodiscard]] std::string_view get_pathname() const ada_lifetime_bound;
271 // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-search
272 [[nodiscard]] std::string_view get_search() const ada_lifetime_bound;
273 // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-hash
274 [[nodiscard]] std::string_view get_hash() const ada_lifetime_bound;
275
276 // If ignoreCase is true, the JavaScript regular expression created for each
277 // pattern must use the `vi` flag. Otherwise, they must use the `v` flag.
278 [[nodiscard]] bool ignore_case() const;
279
280 // @see https://urlpattern.spec.whatwg.org/#url-pattern-has-regexp-groups
281 [[nodiscard]] bool has_regexp_groups() const;
282
283#if ADA_TESTING
284 friend void PrintTo(const url_pattern& c, std::ostream* os) {
285 *os << "protocol_component: '" << c.get_protocol() << ", ";
286 *os << "username_component: '" << c.get_username() << ", ";
287 *os << "password_component: '" << c.get_password() << ", ";
288 *os << "hostname_component: '" << c.get_hostname() << ", ";
289 *os << "port_component: '" << c.get_port() << ", ";
290 *os << "pathname_component: '" << c.get_pathname() << ", ";
291 *os << "search_component: '" << c.get_search() << ", ";
292 *os << "hash_component: '" << c.get_hash();
293 }
294#endif // ADA_TESTING
295
296 template <url_pattern_regex::regex_concept P>
297 friend tl::expected<url_pattern<P>, errors> parser::parse_url_pattern_impl(
298 std::variant<std::string_view, url_pattern_init> input,
299 const std::string_view* base_url, const url_pattern_options* options);
300
306 url_pattern_component<regex_provider> protocol_component{};
312 url_pattern_component<regex_provider> username_component{};
318 url_pattern_component<regex_provider> password_component{};
324 url_pattern_component<regex_provider> hostname_component{};
330 url_pattern_component<regex_provider> port_component{};
336 url_pattern_component<regex_provider> pathname_component{};
342 url_pattern_component<regex_provider> search_component{};
348 url_pattern_component<regex_provider> hash_component{};
354 bool ignore_case_ = false;
355};
356
357} // namespace ada
358
359#endif
url_pattern_component_result create_component_match_result(std::string &&input, std::vector< std::optional< std::string > > &&exec_result)
url_pattern_component(std::string &&new_pattern, typename regex_provider::regex_type &&new_regexp, std::vector< std::string > &&new_group_name_list, bool new_has_regexp_groups)
static tl::expected< url_pattern_component, errors > compile(std::string_view input, F &encoding_callback, url_pattern_compile_component_options &options)
regex_provider::regex_type regexp
std::vector< std::string > group_name_list
url_pattern_part(url_pattern_part_type _type, std::string &&_value, url_pattern_part_modifier _modifier)
Definition url_pattern.h:56
url_pattern_part(url_pattern_part_type _type, std::string &&_value, url_pattern_part_modifier _modifier, std::string &&_name, std::string &&_prefix, std::string &&_suffix)
Definition url_pattern.h:60
url_pattern_part_modifier modifier
Definition url_pattern.h:75
bool is_regexp() const noexcept
url_pattern_part_type type
Definition url_pattern.h:70
bool has_regexp_groups() const
std::string_view get_hostname() const ada_lifetime_bound
std::string_view get_port() const ada_lifetime_bound
result< bool > test(const url_pattern_input &input, const std::string_view *base_url=nullptr)
result< std::optional< url_pattern_result > > match(const url_pattern_input &input, const std::string_view *base_url_string=nullptr)
bool ignore_case() const
std::string_view get_password() const ada_lifetime_bound
std::string_view get_protocol() const ada_lifetime_bound
std::string_view get_hash() const ada_lifetime_bound
url_pattern()=default
std::string_view get_username() const ada_lifetime_bound
std::string_view get_pathname() const ada_lifetime_bound
std::string_view get_search() const ada_lifetime_bound
result< std::optional< url_pattern_result > > exec(const url_pattern_input &input, const std::string_view *base_url=nullptr)
#define ada_lifetime_bound
#define ada_warn_unused
Definition common_defs.h:85
Definitions for user facing functions for parsing URL and it's components.
tl::expected< url_pattern< regex_provider >, errors > parse_url_pattern_impl(std::variant< std::string_view, url_pattern_init > input, const std::string_view *base_url, const url_pattern_options *options)
Definition parser-inl.h:18
Definition ada_idna.h:13
url_pattern_part_modifier
Definition url_pattern.h:40
url_pattern_part_type
Definition url_pattern.h:26
errors
Definition errors.h:10
tl::expected< result_type, ada::errors > result
std::variant< std::string_view, url_pattern_init > url_pattern_input
Definitions for the parser.
url_pattern_compile_component_options(std::optional< char > new_delimiter=std::nullopt, std::optional< char > new_prefix=std::nullopt)
Definition url_pattern.h:89
static url_pattern_compile_component_options HOSTNAME
static url_pattern_compile_component_options PATHNAME
static url_pattern_compile_component_options DEFAULT
bool operator==(const url_pattern_component_result &) const
std::unordered_map< std::string, std::optional< std::string > > groups
std::vector< url_pattern_input > inputs
url_pattern_component_result hostname
url_pattern_component_result password
url_pattern_component_result hash
url_pattern_component_result port
url_pattern_component_result protocol
url_pattern_component_result pathname
url_pattern_component_result username
url_pattern_component_result search
Declaration for the url_pattern_init implementation.