Ada 3.1.0
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
url_pattern-inl.h
Go to the documentation of this file.
1
5#ifndef ADA_URL_PATTERN_INL_H
6#define ADA_URL_PATTERN_INL_H
7
8#include "ada/common_defs.h"
10#include "ada/url_pattern.h"
11
12#include <string_view>
13
14namespace ada {
15
16inline bool url_pattern_init::operator==(const url_pattern_init& other) const {
17 return protocol == other.protocol && username == other.username &&
18 password == other.password && hostname == other.hostname &&
19 port == other.port && search == other.search && hash == other.hash &&
20 pathname == other.pathname;
21}
22
24 const url_pattern_component_result& other) const {
25 return input == other.input && groups == other.groups;
26}
27
28template <url_pattern_regex::regex_concept regex_provider>
31 std::string&& input,
32 std::vector<std::optional<std::string>>&& exec_result) {
33 // Let result be a new URLPatternComponentResult.
34 // Set result["input"] to input.
35 // Let groups be a record<USVString, (USVString or undefined)>.
36 auto result =
37 url_pattern_component_result{.input = std::move(input), .groups = {}};
38
39 // Optimization: Let's reserve the size.
40 result.groups.reserve(exec_result.size());
41
42 // We explicitly start iterating from 0 even though the spec
43 // says we should start from 1. This case is handled by the
44 // std_regex_provider.
45 for (size_t index = 0; index < exec_result.size(); index++) {
46 result.groups.insert({
47 group_name_list[index],
48 std::move(exec_result[index]),
49 });
50 }
51 return result;
52}
53
54template <url_pattern_regex::regex_concept regex_provider>
57 // Return this's associated URL pattern's protocol component's pattern string.
58 return protocol_component.pattern;
59}
60template <url_pattern_regex::regex_concept regex_provider>
63 // Return this's associated URL pattern's username component's pattern string.
64 return username_component.pattern;
65}
66template <url_pattern_regex::regex_concept regex_provider>
69 // Return this's associated URL pattern's password component's pattern string.
70 return password_component.pattern;
71}
72template <url_pattern_regex::regex_concept regex_provider>
75 // Return this's associated URL pattern's hostname component's pattern string.
76 return hostname_component.pattern;
77}
78template <url_pattern_regex::regex_concept regex_provider>
81 // Return this's associated URL pattern's port component's pattern string.
82 return port_component.pattern;
83}
84template <url_pattern_regex::regex_concept regex_provider>
87 // Return this's associated URL pattern's pathname component's pattern string.
88 return pathname_component.pattern;
89}
90template <url_pattern_regex::regex_concept regex_provider>
93 // Return this's associated URL pattern's search component's pattern string.
94 return search_component.pattern;
95}
96template <url_pattern_regex::regex_concept regex_provider>
99 // Return this's associated URL pattern's hash component's pattern string.
100 return hash_component.pattern;
101}
102template <url_pattern_regex::regex_concept regex_provider>
104 return ignore_case_;
105}
106template <url_pattern_regex::regex_concept regex_provider>
108 // If this's associated URL pattern's has regexp groups, then return true.
109 return protocol_component.has_regexp_groups ||
110 username_component.has_regexp_groups ||
111 password_component.has_regexp_groups ||
112 hostname_component.has_regexp_groups ||
113 port_component.has_regexp_groups ||
114 pathname_component.has_regexp_groups ||
115 search_component.has_regexp_groups || hash_component.has_regexp_groups;
116}
117
118inline bool url_pattern_part::is_regexp() const noexcept {
120}
121
123 const {
124 if (delimiter) {
125 return {&delimiter.value(), 1};
126 }
127 return {};
128}
129
131 const {
132 if (prefix) {
133 return {&prefix.value(), 1};
134 }
135 return {};
136}
137
138template <url_pattern_regex::regex_concept regex_provider>
139template <url_pattern_encoding_callback F>
140tl::expected<url_pattern_component<regex_provider>, errors>
142 std::string_view input, F& encoding_callback,
144 ada_log("url_pattern_component::compile input: ", input);
145 // Let part list be the result of running parse a pattern string given input,
146 // options, and encoding callback.
147 auto part_list = url_pattern_helpers::parse_pattern_string(input, options,
148 encoding_callback);
149
150 if (!part_list) {
151 ada_log("parse_pattern_string failed");
152 return tl::unexpected(part_list.error());
153 }
154
155 // Let (regular expression string, name list) be the result of running
156 // generate a regular expression and name list given part list and options.
157 auto [regular_expression_string, name_list] =
159 options);
160
161 ada_log("regular expression string: ", regular_expression_string);
162
163 // Let pattern string be the result of running generate a pattern
164 // string given part list and options.
165 auto pattern_string =
167
168 // Let regular expression be RegExpCreate(regular expression string,
169 // flags). If this throws an exception, catch it, and throw a
170 // TypeError.
171 std::optional<typename regex_provider::regex_type> regular_expression =
172 regex_provider::create_instance(regular_expression_string,
173 options.ignore_case);
174
175 if (!regular_expression) {
176 return tl::unexpected(errors::type_error);
177 }
178
179 // For each part of part list:
180 // - If part’s type is "regexp", then set has regexp groups to true.
181 const auto has_regexp = [](const auto& part) { return part.is_regexp(); };
182 const bool has_regexp_groups = std::ranges::any_of(*part_list, has_regexp);
183
184 ada_log("has regexp groups: ", has_regexp_groups);
185
186 // Return a new component whose pattern string is pattern string, regular
187 // expression is regular expression, group name list is name list, and has
188 // regexp groups is has regexp groups.
190 std::move(pattern_string), std::move(*regular_expression),
191 std::move(name_list), has_regexp_groups);
192}
193
194template <url_pattern_regex::regex_concept regex_provider>
196 const url_pattern_input& input, const std::string_view* base_url) {
197 // Return the result of match given this's associated URL pattern, input, and
198 // baseURL if given.
199 return match(input, base_url);
200}
201
202template <url_pattern_regex::regex_concept regex_provider>
204 const url_pattern_input& input, const std::string_view* base_url) {
205 // TODO: Optimization opportunity. Rather than returning `url_pattern_result`
206 // Implement a fast path just like `can_parse()` in ada_url.
207 // Let result be the result of match given this's associated URL pattern,
208 // input, and baseURL if given.
209 // If result is null, return false.
210 if (auto result = match(input, base_url); result.has_value()) {
211 return result->has_value();
212 }
213 return tl::unexpected(errors::type_error);
214}
215
216template <url_pattern_regex::regex_concept regex_provider>
218 const url_pattern_input& input, const std::string_view* base_url_string) {
219 std::string protocol{};
220 std::string username{};
221 std::string password{};
222 std::string hostname{};
223 std::string port{};
224 std::string pathname{};
225 std::string search{};
226 std::string hash{};
227
228 // Let inputs be an empty list.
229 // Append input to inputs.
230 std::vector inputs{input};
231
232 // If input is a URLPatternInit then:
233 if (std::holds_alternative<url_pattern_init>(input)) {
234 ada_log(
235 "url_pattern::match called with url_pattern_init and base_url_string=",
236 base_url_string);
237 // If baseURLString was given, throw a TypeError.
238 if (base_url_string) {
239 ada_log("failed to match because base_url_string was given");
240 return tl::unexpected(errors::type_error);
241 }
242
243 // Let applyResult be the result of process a URLPatternInit given input,
244 // "url", protocol, username, password, hostname, port, pathname, search,
245 // and hash.
246 auto apply_result = url_pattern_init::process(
247 std::get<url_pattern_init>(input), url_pattern_init::process_type::url,
248 protocol, username, password, hostname, port, pathname, search, hash);
249
250 // If this throws an exception, catch it, and return null.
251 if (!apply_result.has_value()) {
252 ada_log("match returned std::nullopt because process threw");
253 return std::nullopt;
254 }
255
256 // Set protocol to applyResult["protocol"].
257 ADA_ASSERT_TRUE(apply_result->protocol.has_value());
258 protocol = std::move(apply_result->protocol.value());
259
260 // Set username to applyResult["username"].
261 ADA_ASSERT_TRUE(apply_result->username.has_value());
262 username = std::move(apply_result->username.value());
263
264 // Set password to applyResult["password"].
265 ADA_ASSERT_TRUE(apply_result->password.has_value());
266 password = std::move(apply_result->password.value());
267
268 // Set hostname to applyResult["hostname"].
269 ADA_ASSERT_TRUE(apply_result->hostname.has_value());
270 hostname = std::move(apply_result->hostname.value());
271
272 // Set port to applyResult["port"].
273 ADA_ASSERT_TRUE(apply_result->port.has_value());
274 port = std::move(apply_result->port.value());
275
276 // Set pathname to applyResult["pathname"].
277 ADA_ASSERT_TRUE(apply_result->pathname.has_value());
278 pathname = std::move(apply_result->pathname.value());
279
280 // Set search to applyResult["search"].
281 ADA_ASSERT_TRUE(apply_result->search.has_value());
282 if (apply_result->search->starts_with("?")) {
283 search = apply_result->search->substr(1);
284 } else {
285 search = std::move(apply_result->search.value());
286 }
287
288 // Set hash to applyResult["hash"].
289 ADA_ASSERT_TRUE(apply_result->hash.has_value());
290 ADA_ASSERT_TRUE(!apply_result->hash->starts_with("#"));
291 hash = std::move(apply_result->hash.value());
292 } else {
293 ADA_ASSERT_TRUE(std::holds_alternative<std::string_view>(input));
294
295 // Let baseURL be null.
296 result<url_aggregator> base_url;
297
298 // If baseURLString was given, then:
299 if (base_url_string) {
300 // Let baseURL be the result of parsing baseURLString.
301 base_url = ada::parse<url_aggregator>(*base_url_string, nullptr);
302
303 // If baseURL is failure, return null.
304 if (!base_url) {
305 ada_log("match returned std::nullopt because failed to parse base_url=",
306 *base_url_string);
307 return std::nullopt;
308 }
309
310 // Append baseURLString to inputs.
311 inputs.emplace_back(*base_url_string);
312 }
313
314 url_aggregator* base_url_value =
315 base_url.has_value() ? &*base_url : nullptr;
316
317 // Set url to the result of parsing input given baseURL.
318 auto url = ada::parse<url_aggregator>(std::get<std::string_view>(input),
319 base_url_value);
320
321 // If url is failure, return null.
322 if (!url) {
323 ada_log("match returned std::nullopt because url failed");
324 return std::nullopt;
325 }
326
327 // Set protocol to url’s scheme.
328 // IMPORTANT: Not documented on the URLPattern spec, but protocol suffix ':'
329 // is removed. Similar work was done on workerd:
330 // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2038
331 protocol = url->get_protocol().substr(0, url->get_protocol().size() - 1);
332 // Set username to url’s username.
333 username = url->get_username();
334 // Set password to url’s password.
335 password = url->get_password();
336 // Set hostname to url’s host, serialized, or the empty string if the value
337 // is null.
338 hostname = url->get_hostname();
339 // Set port to url’s port, serialized, or the empty string if the value is
340 // null.
341 port = url->get_port();
342 // Set pathname to the result of URL path serializing url.
343 pathname = url->get_pathname();
344 // Set search to url’s query or the empty string if the value is null.
345 // IMPORTANT: Not documented on the URLPattern spec, but search prefix '?'
346 // is removed. Similar work was done on workerd:
347 // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2232
348 if (url->has_search()) {
349 auto view = url->get_search();
350 search = view.starts_with("?") ? url->get_search().substr(1) : view;
351 } else {
352 search = "";
353 }
354 // Set hash to url’s fragment or the empty string if the value is null.
355 // IMPORTANT: Not documented on the URLPattern spec, but hash prefix '#' is
356 // removed. Similar work was done on workerd:
357 // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2242
358 if (url->has_hash()) {
359 auto view = url->get_hash();
360 hash = view.starts_with("#") ? url->get_hash().substr(1) : view;
361 } else {
362 hash = "";
363 }
364 }
365
366 // Let protocolExecResult be RegExpBuiltinExec(urlPattern’s protocol
367 // component's regular expression, protocol).
368 auto protocol_exec_result =
369 regex_provider::regex_search(protocol, protocol_component.regexp);
370
371 if (!protocol_exec_result) {
372 return std::nullopt;
373 }
374
375 // Let usernameExecResult be RegExpBuiltinExec(urlPattern’s username
376 // component's regular expression, username).
377 auto username_exec_result =
378 regex_provider::regex_search(username, username_component.regexp);
379
380 if (!username_exec_result) {
381 return std::nullopt;
382 }
383
384 // Let passwordExecResult be RegExpBuiltinExec(urlPattern’s password
385 // component's regular expression, password).
386 auto password_exec_result =
387 regex_provider::regex_search(password, password_component.regexp);
388
389 if (!password_exec_result) {
390 return std::nullopt;
391 }
392
393 // Let hostnameExecResult be RegExpBuiltinExec(urlPattern’s hostname
394 // component's regular expression, hostname).
395 auto hostname_exec_result =
396 regex_provider::regex_search(hostname, hostname_component.regexp);
397
398 if (!hostname_exec_result) {
399 return std::nullopt;
400 }
401
402 // Let portExecResult be RegExpBuiltinExec(urlPattern’s port component's
403 // regular expression, port).
404 auto port_exec_result =
405 regex_provider::regex_search(port, port_component.regexp);
406
407 if (!port_exec_result) {
408 return std::nullopt;
409 }
410
411 // Let pathnameExecResult be RegExpBuiltinExec(urlPattern’s pathname
412 // component's regular expression, pathname).
413 auto pathname_exec_result =
414 regex_provider::regex_search(pathname, pathname_component.regexp);
415
416 if (!pathname_exec_result) {
417 return std::nullopt;
418 }
419
420 // Let searchExecResult be RegExpBuiltinExec(urlPattern’s search component's
421 // regular expression, search).
422 auto search_exec_result =
423 regex_provider::regex_search(search, search_component.regexp);
424
425 if (!search_exec_result) {
426 return std::nullopt;
427 }
428
429 // Let hashExecResult be RegExpBuiltinExec(urlPattern’s hash component's
430 // regular expression, hash).
431 auto hash_exec_result =
432 regex_provider::regex_search(hash, hash_component.regexp);
433
434 if (!hash_exec_result) {
435 return std::nullopt;
436 }
437
438 // Let result be a new URLPatternResult.
439 auto result = url_pattern_result{};
440 // Set result["inputs"] to inputs.
441 result.inputs = std::move(inputs);
442 // Set result["protocol"] to the result of creating a component match result
443 // given urlPattern’s protocol component, protocol, and protocolExecResult.
444 result.protocol = protocol_component.create_component_match_result(
445 std::move(protocol), std::move(*protocol_exec_result));
446
447 // Set result["username"] to the result of creating a component match result
448 // given urlPattern’s username component, username, and usernameExecResult.
449 result.username = username_component.create_component_match_result(
450 std::move(username), std::move(*username_exec_result));
451
452 // Set result["password"] to the result of creating a component match result
453 // given urlPattern’s password component, password, and passwordExecResult.
454 result.password = password_component.create_component_match_result(
455 std::move(password), std::move(*password_exec_result));
456
457 // Set result["hostname"] to the result of creating a component match result
458 // given urlPattern’s hostname component, hostname, and hostnameExecResult.
459 result.hostname = hostname_component.create_component_match_result(
460 std::move(hostname), std::move(*hostname_exec_result));
461
462 // Set result["port"] to the result of creating a component match result given
463 // urlPattern’s port component, port, and portExecResult.
464 result.port = port_component.create_component_match_result(
465 std::move(port), std::move(*port_exec_result));
466
467 // Set result["pathname"] to the result of creating a component match result
468 // given urlPattern’s pathname component, pathname, and pathnameExecResult.
469 result.pathname = pathname_component.create_component_match_result(
470 std::move(pathname), std::move(*pathname_exec_result));
471
472 // Set result["search"] to the result of creating a component match result
473 // given urlPattern’s search component, search, and searchExecResult.
474 result.search = search_component.create_component_match_result(
475 std::move(search), std::move(*search_exec_result));
476
477 // Set result["hash"] to the result of creating a component match result given
478 // urlPattern’s hash component, hash, and hashExecResult.
479 result.hash = hash_component.create_component_match_result(
480 std::move(hash), std::move(*hash_exec_result));
481
482 return result;
483}
484
485} // namespace ada
486
487#endif
url_pattern_component_result create_component_match_result(std::string &&input, std::vector< std::optional< std::string > > &&exec_result)
static tl::expected< url_pattern_component, errors > compile(std::string_view input, F &encoding_callback, url_pattern_compile_component_options &options)
std::vector< std::string > group_name_list
bool is_regexp() const noexcept
url_pattern_part_type type
Definition url_pattern.h:70
bool has_regexp_groups() const
std::string_view get_hostname() const ada_lifetime_bound
std::string_view get_port() const ada_lifetime_bound
result< bool > test(const url_pattern_input &input, const std::string_view *base_url=nullptr)
result< std::optional< url_pattern_result > > match(const url_pattern_input &input, const std::string_view *base_url_string=nullptr)
bool ignore_case() const
std::string_view get_password() const ada_lifetime_bound
std::string_view get_protocol() const ada_lifetime_bound
std::string_view get_hash() const ada_lifetime_bound
std::string_view get_username() const ada_lifetime_bound
std::string_view get_pathname() const ada_lifetime_bound
std::string_view get_search() const ada_lifetime_bound
result< std::optional< url_pattern_result > > exec(const url_pattern_input &input, const std::string_view *base_url=nullptr)
Common definitions for cross-platform compiler support.
#define ADA_ASSERT_TRUE(COND)
#define ada_lifetime_bound
tl::expected< std::vector< url_pattern_part >, errors > parse_pattern_string(std::string_view input, url_pattern_compile_component_options &options, F &encoding_callback)
std::string generate_pattern_string(std::vector< url_pattern_part > &part_list, url_pattern_compile_component_options &options)
std::tuple< std::string, std::vector< std::string > > generate_regular_expression_and_name_list(const std::vector< url_pattern_part > &part_list, url_pattern_compile_component_options options)
Definition ada_idna.h:13
errors
Definition errors.h:10
@ type_error
Definition errors.h:10
template ada::result< url_aggregator > parse< url_aggregator >(std::string_view input, const url_aggregator *base_url)
tl::expected< result_type, ada::errors > result
std::variant< std::string_view, url_pattern_init > url_pattern_input
Lightweight URL struct.
std::string_view get_prefix() const ada_warn_unused
std::string_view get_delimiter() const ada_warn_unused
bool operator==(const url_pattern_component_result &) const
std::unordered_map< std::string, std::optional< std::string > > groups
std::optional< std::string > port
std::optional< std::string > protocol
static tl::expected< url_pattern_init, errors > process(url_pattern_init init, process_type type, std::optional< std::string_view > protocol=std::nullopt, std::optional< std::string_view > username=std::nullopt, std::optional< std::string_view > password=std::nullopt, std::optional< std::string_view > hostname=std::nullopt, std::optional< std::string_view > port=std::nullopt, std::optional< std::string_view > pathname=std::nullopt, std::optional< std::string_view > search=std::nullopt, std::optional< std::string_view > hash=std::nullopt)
std::optional< std::string > password
std::optional< std::string > hostname
std::optional< std::string > search
bool operator==(const url_pattern_init &) const
std::optional< std::string > username
std::optional< std::string > pathname
std::optional< std::string > hash
Generic URL struct reliant on std::string instantiation.
Definition url.h:45
std::string get_search() const noexcept
Definition url.cpp:641
constexpr std::string_view get_pathname() const noexcept
Definition url-inl.h:46
std::string get_hash() const noexcept
Definition url.cpp:660
std::string get_hostname() const noexcept
Definition url.cpp:637
const std::string & get_password() const noexcept
Definition url.cpp:652
std::string get_port() const noexcept
Definition url.cpp:656
const std::string & get_username() const noexcept
Definition url.cpp:648
constexpr bool has_search() const noexcept override
Definition url-inl.h:163
std::string get_protocol() const noexcept
Definition url.cpp:615
constexpr bool has_hash() const noexcept override
Definition url-inl.h:159
Declaration for the URLPattern implementation.
Declaration for the URLPattern helpers.