Ada 3.1.0
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
url_pattern_helpers-inl.h
Go to the documentation of this file.
1
5#ifndef ADA_URL_PATTERN_HELPERS_INL_H
6#define ADA_URL_PATTERN_HELPERS_INL_H
7
8#include <optional>
9#include <string_view>
10
11#include "ada/common_defs.h"
12#include "ada/expected.h"
14#include "ada/implementation.h"
15
17#ifdef ADA_TESTING
18inline std::string to_string(token_type type) {
19 switch (type) {
21 return "INVALID_CHAR";
23 return "OPEN";
25 return "CLOSE";
27 return "REGEXP";
29 return "NAME";
31 return "CHAR";
33 return "ESCAPED_CHAR";
35 return "OTHER_MODIFIER";
37 return "ASTERISK";
38 case token_type::END:
39 return "END";
40 default:
42 }
43}
44#endif // ADA_TESTING
45
46template <url_pattern_regex::regex_concept regex_provider>
48 // Set parser’s token index to parser’s component start.
49 token_index = component_start;
50 // Set parser’s token increment to 0.
51 token_increment = 0;
52}
53
54template <url_pattern_regex::regex_concept regex_provider>
56 // Return the result of running is a non-special pattern char given parser,
57 // parser’s token index and "#".
58 return is_non_special_pattern_char(token_index, "#");
59}
60
61template <url_pattern_regex::regex_concept regex_provider>
63 // If result of running is a non-special pattern char given parser, parser’s
64 // token index and "?" is true, then return true.
65 if (is_non_special_pattern_char(token_index, "?")) {
66 return true;
67 }
68
69 // If parser’s token list[parser’s token index]'s value is not "?", then
70 // return false.
71 if (token_list[token_index].value != "?") {
72 return false;
73 }
74
75 // If previous index is less than 0, then return true.
76 if (token_index == 0) return true;
77 // Let previous index be parser’s token index − 1.
78 auto previous_index = token_index - 1;
79 // Let previous token be the result of running get a safe token given parser
80 // and previous index.
81 auto previous_token = get_safe_token(previous_index);
82 ADA_ASSERT_TRUE(previous_token);
83 // If any of the following are true, then return false:
84 // - previous token’s type is "name".
85 // - previous token’s type is "regexp".
86 // - previous token’s type is "close".
87 // - previous token’s type is "asterisk".
88 return !(previous_token->type == token_type::NAME ||
89 previous_token->type == token_type::REGEXP ||
90 previous_token->type == token_type::CLOSE ||
91 previous_token->type == token_type::ASTERISK);
92}
93
94template <url_pattern_regex::regex_concept regex_provider>
95bool constructor_string_parser<regex_provider>::is_non_special_pattern_char(
96 size_t index, std::string_view value) const {
97 // Let token be the result of running get a safe token given parser and index.
98 auto token = get_safe_token(index);
100
101 // If token’s value is not value, then return false.
102 if (token->value != value) {
103 return false;
104 }
105
106 // If any of the following are true:
107 // - token’s type is "char";
108 // - token’s type is "escaped-char"; or
109 // - token’s type is "invalid-char",
110 // - then return true.
111 return token->type == token_type::CHAR ||
112 token->type == token_type::ESCAPED_CHAR ||
113 token->type == token_type::INVALID_CHAR;
114}
115
116template <url_pattern_regex::regex_concept regex_provider>
117const token* constructor_string_parser<regex_provider>::get_safe_token(
118 size_t index) const {
119 // If index is less than parser’s token list's size, then return parser’s
120 // token list[index].
121 if (index < token_list.size()) [[likely]] {
122 return &token_list[index];
123 }
124
125 // Assert: parser’s token list's size is greater than or equal to 1.
126 ADA_ASSERT_TRUE(!token_list.empty());
127
128 // Let token be parser’s token list[last index].
129 // Assert: token’s type is "end".
130 ADA_ASSERT_TRUE(token_list.back().type == token_type::END);
131
132 // Return token.
133 return &token_list.back();
134}
135
136template <url_pattern_regex::regex_concept regex_provider>
138 // If parser’s token list[parser’s token index]'s type is "open", then return
139 // true.
140 return token_list[token_index].type == token_type::OPEN;
141}
142
143template <url_pattern_regex::regex_concept regex_provider>
145 // If parser’s token list[parser’s token index]'s type is "close", then return
146 // true.
147 return token_list[token_index].type == token_type::CLOSE;
148}
149
150template <url_pattern_regex::regex_concept regex_provider>
152 const {
153 // If the result of running is a non-special pattern char given parser,
154 // parser’s token index + 1, and "/" is false, then return false.
155 if (!is_non_special_pattern_char(token_index + 1, "/")) {
156 return false;
157 }
158 // If the result of running is a non-special pattern char given parser,
159 // parser’s token index + 2, and "/" is false, then return false.
160 if (!is_non_special_pattern_char(token_index + 2, "/")) {
161 return false;
162 }
163 return true;
164}
165
166template <url_pattern_regex::regex_concept regex_provider>
168 // Return the result of running is a non-special pattern char given parser,
169 // parser’s token index, and ":".
170 return is_non_special_pattern_char(token_index, ":");
171}
172
173template <url_pattern_regex::regex_concept regex_provider>
175 size_t skip) {
176 // If parser’s state is not "init", not "authority", and not "done", then set
177 // parser’s result[parser’s state] to the result of running make a component
178 // string given parser.
179 if (state != State::INIT && state != State::AUTHORITY &&
180 state != State::DONE) {
181 auto value = make_component_string();
182 // TODO: Simplify this.
183 switch (state) {
184 case State::PROTOCOL: {
185 result.protocol = value;
186 break;
187 }
188 case State::USERNAME: {
189 result.username = value;
190 break;
191 }
192 case State::PASSWORD: {
193 result.password = value;
194 break;
195 }
196 case State::HOSTNAME: {
197 result.hostname = value;
198 break;
199 }
200 case State::PORT: {
201 result.port = value;
202 break;
203 }
204 case State::PATHNAME: {
205 result.pathname = value;
206 break;
207 }
208 case State::SEARCH: {
209 result.search = value;
210 break;
211 }
212 case State::HASH: {
213 result.hash = value;
214 break;
215 }
216 default:
218 }
219 }
220
221 // If parser’s state is not "init" and new state is not "done", then:
222 if (state != State::INIT && new_state != State::DONE) {
223 // If parser’s state is "protocol", "authority", "username", or "password";
224 // new state is "port", "pathname", "search", or "hash"; and parser’s
225 // result["hostname"] does not exist, then set parser’s result["hostname"]
226 // to the empty string.
227 if ((state == State::PROTOCOL || state == State::AUTHORITY ||
228 state == State::USERNAME || state == State::PASSWORD) &&
229 (new_state == State::PORT || new_state == State::PATHNAME ||
230 new_state == State::SEARCH || new_state == State::HASH) &&
231 !result.hostname)
232 result.hostname = "";
233 }
234
235 // If parser’s state is "protocol", "authority", "username", "password",
236 // "hostname", or "port"; new state is "search" or "hash"; and parser’s
237 // result["pathname"] does not exist, then:
238 if ((state == State::PROTOCOL || state == State::AUTHORITY ||
239 state == State::USERNAME || state == State::PASSWORD ||
240 state == State::HOSTNAME || state == State::PORT) &&
241 (new_state == State::SEARCH || new_state == State::HASH) &&
242 !result.pathname) {
243 if (protocol_matches_a_special_scheme_flag) {
244 result.pathname = "/";
245 } else {
246 // Otherwise, set parser’s result["pathname"] to the empty string.
247 result.pathname = "";
248 }
249 }
250
251 // If parser’s state is "protocol", "authority", "username", "password",
252 // "hostname", "port", or "pathname"; new state is "hash"; and parser’s
253 // result["search"] does not exist, then set parser’s result["search"] to
254 // the empty string.
255 if ((state == State::PROTOCOL || state == State::AUTHORITY ||
256 state == State::USERNAME || state == State::PASSWORD ||
257 state == State::HOSTNAME || state == State::PORT ||
258 state == State::PATHNAME) &&
259 new_state == State::HASH && !result.search) {
260 result.search = "";
261 }
262
263 // Set parser’s state to new state.
264 state = new_state;
265 // Increment parser’s token index by skip.
266 token_index += skip;
267 // Set parser’s component start to parser’s token index.
268 component_start = token_index;
269 // Set parser’s token increment to 0.
270 token_increment = 0;
271}
272
273template <url_pattern_regex::regex_concept regex_provider>
274std::string constructor_string_parser<regex_provider>::make_component_string() {
275 // Assert: parser’s token index is less than parser’s token list's size.
276 ADA_ASSERT_TRUE(token_index < token_list.size());
277
278 // Let token be parser’s token list[parser’s token index].
279 // Let end index be token’s index.
280 const auto end_index = token_list[token_index].index;
281 // Let component start token be the result of running get a safe token given
282 // parser and parser’s component start.
283 const auto component_start_token = get_safe_token(component_start);
284 ADA_ASSERT_TRUE(component_start_token);
285 // Let component start input index be component start token’s index.
286 const auto component_start_input_index = component_start_token->index;
287 // Return the code point substring from component start input index to end
288 // index within parser’s input.
289 return input.substr(component_start_input_index,
290 end_index - component_start_input_index);
291}
292
293template <url_pattern_regex::regex_concept regex_provider>
295 const {
296 // Return the result of running is a non-special pattern char given parser,
297 // parser’s token index, and "@".
298 return is_non_special_pattern_char(token_index, "@");
299}
300
301template <url_pattern_regex::regex_concept regex_provider>
303 // Return the result of running is a non-special pattern char given parser,
304 // parser’s token index, and "/".
305 return is_non_special_pattern_char(token_index, "/");
306}
307
308template <url_pattern_regex::regex_concept regex_provider>
310 // Return the result of running is a non-special pattern char given parser,
311 // parser’s token index, and ":".
312 return is_non_special_pattern_char(token_index, ":");
313}
314
315template <url_pattern_regex::regex_concept regex_provider>
317 // Return the result of running is a non-special pattern char given parser,
318 // parser’s token index, and "[".
319 return is_non_special_pattern_char(token_index, "[");
320}
321
322template <url_pattern_regex::regex_concept regex_provider>
324 // Return the result of running is a non-special pattern char given parser,
325 // parser’s token index, and "]".
326 return is_non_special_pattern_char(token_index, "]");
327}
328
329template <url_pattern_regex::regex_concept regex_provider>
331 // Return the result of running is a non-special pattern char given parser,
332 // parser’s token index, and ":".
333 return is_non_special_pattern_char(token_index, ":");
334}
335
337 ada_log("Tokenizer::get_next_code_point called with index=", next_index);
338 ADA_ASSERT_TRUE(next_index < input.size());
339 // this assumes that we have a valid, non-truncated UTF-8 stream.
340 code_point = 0;
341 size_t number_bytes = 0;
342 unsigned char first_byte = input[next_index];
343
344 if ((first_byte & 0x80) == 0) {
345 // 1-byte character (ASCII)
346 next_index++;
347 code_point = first_byte;
348 ada_log("Tokenizer::get_next_code_point returning ASCII code point=",
349 uint32_t(code_point));
350 ada_log("Tokenizer::get_next_code_point next_index =", next_index,
351 " input.size()=", input.size());
352 return;
353 }
354 ada_log("Tokenizer::get_next_code_point read first byte=",
355 uint32_t(first_byte));
356 if ((first_byte & 0xE0) == 0xC0) {
357 code_point = first_byte & 0x1F;
358 number_bytes = 2;
359 ada_log("Tokenizer::get_next_code_point two bytes");
360 } else if ((first_byte & 0xF0) == 0xE0) {
361 code_point = first_byte & 0x0F;
362 number_bytes = 3;
363 ada_log("Tokenizer::get_next_code_point three bytes");
364 } else if ((first_byte & 0xF8) == 0xF0) {
365 code_point = first_byte & 0x07;
366 number_bytes = 4;
367 ada_log("Tokenizer::get_next_code_point four bytes");
368 }
369 ADA_ASSERT_TRUE(number_bytes + next_index <= input.size());
370
371 for (size_t i = 1 + next_index; i < number_bytes + next_index; ++i) {
372 unsigned char byte = input[i];
373 ada_log("Tokenizer::get_next_code_point read byte=", uint32_t(byte));
374 code_point = (code_point << 6) | (byte & 0x3F);
375 }
376 ada_log("Tokenizer::get_next_code_point returning non-ASCII code point=",
377 uint32_t(code_point));
378 ada_log("Tokenizer::get_next_code_point next_index =", next_index,
379 " input.size()=", input.size());
380 next_index += number_bytes;
381}
382
383inline void Tokenizer::seek_and_get_next_code_point(size_t new_index) {
384 ada_log("Tokenizer::seek_and_get_next_code_point called with new_index=",
385 new_index);
386 // Set tokenizer’s next index to index.
387 next_index = new_index;
388 // Run get the next code point given tokenizer.
390}
391
392inline void Tokenizer::add_token(token_type type, size_t next_position,
393 size_t value_position, size_t value_length) {
394 ada_log("Tokenizer::add_token called with type=", to_string(type),
395 " next_position=", next_position, " value_position=", value_position);
396 ADA_ASSERT_TRUE(next_position >= value_position);
397
398 // Let token be a new token.
399 // Set token’s type to type.
400 // Set token’s index to tokenizer’s index.
401 // Set token’s value to the code point substring from value position with
402 // length value length within tokenizer’s input.
403 // Append token to the back of tokenizer’s token list.
404 token_list.emplace_back(type, index,
405 input.substr(value_position, value_length));
406 // Set tokenizer’s index to next position.
407 index = next_position;
408}
409
411 size_t next_position,
412 size_t value_position) {
413 // Let computed length be next position − value position.
414 auto computed_length = next_position - value_position;
415 // Run add a token given tokenizer, type, next position, value position, and
416 // computed length.
417 add_token(type, next_position, value_position, computed_length);
418}
419
421 ada_log("Tokenizer::add_token_with_defaults called with type=",
422 to_string(type));
423 // Run add a token with default length given tokenizer, type, tokenizer’s next
424 // index, and tokenizer’s index.
425 add_token_with_default_length(type, next_index, index);
426}
427
428inline ada_warn_unused std::optional<errors>
430 size_t value_position) {
431 // If tokenizer’s policy is "strict", then throw a TypeError.
432 if (policy == token_policy::strict) {
433 ada_log("process_tokenizing_error failed with next_position=",
434 next_position, " value_position=", value_position);
435 return errors::type_error;
436 }
437 // Assert: tokenizer’s policy is "lenient".
439 // Run add a token with default length given tokenizer, "invalid-char", next
440 // position, and value position.
442 value_position);
443 return std::nullopt;
444}
445
446template <url_pattern_encoding_callback F>
448 // Let token be the result of running try to consume a token given parser and
449 // "other-modifier".
451 // If token is not null, then return token.
452 if (token) return token;
453 // Set token to the result of running try to consume a token given parser and
454 // "asterisk".
455 // Return token.
457}
458
459template <url_pattern_encoding_callback F>
461 const token* name_token) {
462 // Let token be the result of running try to consume a token given parser and
463 // "regexp".
465 // If name token is null and token is null, then set token to the result of
466 // running try to consume a token given parser and "asterisk".
467 if (!name_token && !token) {
469 }
470 // Return token.
471 return token;
472}
473
474template <url_pattern_encoding_callback F>
476 ada_log("url_pattern_parser::try_consume_token called with type=",
477 to_string(type));
478 // Assert: parser’s index is less than parser’s token list size.
479 ADA_ASSERT_TRUE(index < tokens.size());
480 // Let next token be parser’s token list[parser’s index].
481 auto& next_token = tokens[index];
482 // If next token’s type is not type return null.
483 if (next_token.type != type) return nullptr;
484 // Increase parser’s index by 1.
485 index++;
486 // Return next token.
487 return &next_token;
488}
489
490template <url_pattern_encoding_callback F>
492 // Let result be the empty string.
493 std::string result{};
494 // While true:
495 while (true) {
496 // Let token be the result of running try to consume a token given parser
497 // and "char".
499 // If token is null, then set token to the result of running try to consume
500 // a token given parser and "escaped-char".
502 // If token is null, then break.
503 if (!token) break;
504 // Append token’s value to the end of result.
505 result.append(token->value);
506 }
507 // Return result.
508 return result;
509}
510
511template <url_pattern_encoding_callback F>
513 ada_log("url_pattern_parser::consume_required_token called with type=",
514 to_string(type));
515 // Let result be the result of running try to consume a token given parser and
516 // type.
517 return try_consume_token(type) != nullptr;
518}
519
520template <url_pattern_encoding_callback F>
521std::optional<errors>
523 // If parser’s pending fixed value is the empty string, then return.
524 if (pending_fixed_value.empty()) {
525 ada_log("pending_fixed_value is empty");
526 return std::nullopt;
527 }
528 // Let encoded value be the result of running parser’s encoding callback given
529 // parser’s pending fixed value.
530 auto encoded_value = encoding_callback(pending_fixed_value);
531 if (!encoded_value) {
532 ada_log("failed to encode pending_fixed_value: ", pending_fixed_value);
533 return encoded_value.error();
534 }
535 // Set parser’s pending fixed value to the empty string.
536 pending_fixed_value.clear();
537 // Let part be a new part whose type is "fixed-text", value is encoded value,
538 // and modifier is "none".
539 // Append part to parser’s part list.
541 std::move(*encoded_value),
543 return std::nullopt;
544}
545
546template <url_pattern_encoding_callback F>
548 std::string_view prefix, token* name_token, token* regexp_or_wildcard_token,
549 std::string_view suffix, token* modifier_token) {
550 // Let modifier be "none".
551 auto modifier = url_pattern_part_modifier::none;
552 // If modifier token is not null:
553 if (modifier_token) {
554 // If modifier token’s value is "?" then set modifier to "optional".
555 if (modifier_token->value == "?") {
557 } else if (modifier_token->value == "*") {
558 // Otherwise if modifier token’s value is "*" then set modifier to
559 // "zero-or-more".
561 } else if (modifier_token->value == "+") {
562 // Otherwise if modifier token’s value is "+" then set modifier to
563 // "one-or-more".
565 }
566 }
567 // If name token is null and regexp or wildcard token is null and modifier
568 // is "none":
569 if (!name_token && !regexp_or_wildcard_token &&
571 // Append prefix to the end of parser’s pending fixed value.
572 pending_fixed_value.append(prefix);
573 return std::nullopt;
574 }
575 // Run maybe add a part from the pending fixed value given parser.
577 return *error;
578 }
579 // If name token is null and regexp or wildcard token is null:
580 if (!name_token && !regexp_or_wildcard_token) {
581 // Assert: suffix is the empty string.
582 ADA_ASSERT_TRUE(suffix.empty());
583 // If prefix is the empty string, then return.
584 if (prefix.empty()) return std::nullopt;
585 // Let encoded value be the result of running parser’s encoding callback
586 // given prefix.
587 auto encoded_value = encoding_callback(prefix);
588 if (!encoded_value) {
589 return encoded_value.error();
590 }
591 // Let part be a new part whose type is "fixed-text", value is encoded
592 // value, and modifier is modifier.
593 // Append part to parser’s part list.
595 std::move(*encoded_value), modifier);
596 return std::nullopt;
597 }
598 // Let regexp value be the empty string.
599 std::string regexp_value{};
600 // If regexp or wildcard token is null, then set regexp value to parser’s
601 // segment wildcard regexp.
602 if (!regexp_or_wildcard_token) {
603 regexp_value = segment_wildcard_regexp;
604 } else if (regexp_or_wildcard_token->type == token_type::ASTERISK) {
605 // Otherwise if regexp or wildcard token’s type is "asterisk", then set
606 // regexp value to the full wildcard regexp value.
607 regexp_value = ".*";
608 } else {
609 // Otherwise set regexp value to regexp or wildcard token’s value.
610 regexp_value = regexp_or_wildcard_token->value;
611 }
612 // Let type be "regexp".
614 // If regexp value is parser’s segment wildcard regexp:
615 if (regexp_value == segment_wildcard_regexp) {
616 // Set type to "segment-wildcard".
618 // Set regexp value to the empty string.
619 regexp_value.clear();
620 } else if (regexp_value == ".*") {
621 // Otherwise if regexp value is the full wildcard regexp value:
622 // Set type to "full-wildcard".
624 // Set regexp value to the empty string.
625 regexp_value.clear();
626 }
627 // Let name be the empty string.
628 std::string name{};
629 // If name token is not null, then set name to name token’s value.
630 if (name_token) {
631 name = name_token->value;
632 } else if (regexp_or_wildcard_token != nullptr) {
633 // Otherwise if regexp or wildcard token is not null:
634 // Set name to parser’s next numeric name, serialized.
635 name = std::to_string(next_numeric_name);
636 // Increment parser’s next numeric name by 1.
638 }
639 // If the result of running is a duplicate name given parser and name is
640 // true, then throw a TypeError.
641 if (std::ranges::any_of(
642 parts, [&name](const auto& part) { return part.name == name; })) {
643 return errors::type_error;
644 }
645 // Let encoded prefix be the result of running parser’s encoding callback
646 // given prefix.
647 auto encoded_prefix = encoding_callback(prefix);
648 if (!encoded_prefix) return encoded_prefix.error();
649 // Let encoded suffix be the result of running parser’s encoding callback
650 // given suffix.
651 auto encoded_suffix = encoding_callback(suffix);
652 if (!encoded_suffix) return encoded_suffix.error();
653 // Let part be a new part whose type is type, value is regexp value,
654 // modifier is modifier, name is name, prefix is encoded prefix, and suffix
655 // is encoded suffix.
656 // Append part to parser’s part list.
657 parts.emplace_back(type, std::move(regexp_value), modifier, std::move(name),
658 std::move(*encoded_prefix), std::move(*encoded_suffix));
659 return std::nullopt;
660}
661
662template <url_pattern_encoding_callback F>
663tl::expected<std::vector<url_pattern_part>, errors> parse_pattern_string(
664 std::string_view input, url_pattern_compile_component_options& options,
665 F& encoding_callback) {
666 ada_log("parse_pattern_string input=", input);
667 // Let parser be a new pattern parser whose encoding callback is encoding
668 // callback and segment wildcard regexp is the result of running generate a
669 // segment wildcard regexp given options.
671 encoding_callback, generate_segment_wildcard_regexp(options));
672 // Set parser’s token list to the result of running tokenize given input and
673 // "strict".
674 auto tokenize_result = tokenize(input, token_policy::strict);
675 if (!tokenize_result) {
676 ada_log("parse_pattern_string tokenize failed");
677 return tl::unexpected(tokenize_result.error());
678 }
679 parser.tokens = std::move(*tokenize_result);
680
681 // While parser’s index is less than parser’s token list's size:
682 while (parser.can_continue()) {
683 // Let char token be the result of running try to consume a token given
684 // parser and "char".
685 auto char_token = parser.try_consume_token(token_type::CHAR);
686 // Let name token be the result of running try to consume a token given
687 // parser and "name".
688 auto name_token = parser.try_consume_token(token_type::NAME);
689 // Let regexp or wildcard token be the result of running try to consume a
690 // regexp or wildcard token given parser and name token.
691 auto regexp_or_wildcard_token =
692 parser.try_consume_regexp_or_wildcard_token(name_token);
693 // If name token is not null or regexp or wildcard token is not null:
694 if (name_token || regexp_or_wildcard_token) {
695 // Let prefix be the empty string.
696 std::string prefix{};
697 // If char token is not null then set prefix to char token’s value.
698 if (char_token) prefix = char_token->value;
699 // If prefix is not the empty string and not options’s prefix code point:
700 if (!prefix.empty() && prefix != options.get_prefix()) {
701 // Append prefix to the end of parser’s pending fixed value.
702 parser.pending_fixed_value.append(prefix);
703 // Set prefix to the empty string.
704 prefix.clear();
705 }
706 // Run maybe add a part from the pending fixed value given parser.
707 if (auto error = parser.maybe_add_part_from_the_pending_fixed_value()) {
708 ada_log("maybe_add_part_from_the_pending_fixed_value failed");
709 return tl::unexpected(*error);
710 }
711 // Let modifier token be the result of running try to consume a modifier
712 // token given parser.
713 auto modifier_token = parser.try_consume_modifier_token();
714 // Run add a part given parser, prefix, name token, regexp or wildcard
715 // token, the empty string, and modifier token.
716 if (auto error =
717 parser.add_part(prefix, name_token, regexp_or_wildcard_token, "",
718 modifier_token)) {
719 ada_log("parser.add_part failed");
720 return tl::unexpected(*error);
721 }
722 // Continue.
723 continue;
724 }
725
726 // Let fixed token be char token.
727 auto fixed_token = char_token;
728 // If fixed token is null, then set fixed token to the result of running try
729 // to consume a token given parser and "escaped-char".
730 if (!fixed_token)
731 fixed_token = parser.try_consume_token(token_type::ESCAPED_CHAR);
732 // If fixed token is not null:
733 if (fixed_token) {
734 // Append fixed token’s value to parser’s pending fixed value.
735 parser.pending_fixed_value.append(fixed_token->value);
736 // Continue.
737 continue;
738 }
739 // Let open token be the result of running try to consume a token given
740 // parser and "open".
741 auto open_token = parser.try_consume_token(token_type::OPEN);
742 // If open token is not null:
743 if (open_token) {
744 // Set prefix be the result of running consume text given parser.
745 auto prefix_ = parser.consume_text();
746 // Set name token to the result of running try to consume a token given
747 // parser and "name".
748 name_token = parser.try_consume_token(token_type::NAME);
749 // Set regexp or wildcard token to the result of running try to consume a
750 // regexp or wildcard token given parser and name token.
751 regexp_or_wildcard_token =
752 parser.try_consume_regexp_or_wildcard_token(name_token);
753 // Let suffix be the result of running consume text given parser.
754 auto suffix_ = parser.consume_text();
755 // Run consume a required token given parser and "close".
756 if (!parser.consume_required_token(token_type::CLOSE)) {
757 ada_log("parser.consume_required_token failed");
758 return tl::unexpected(errors::type_error);
759 }
760 // Set modifier token to the result of running try to consume a modifier
761 // token given parser.
762 auto modifier_token = parser.try_consume_modifier_token();
763 // Run add a part given parser, prefix, name token, regexp or wildcard
764 // token, suffix, and modifier token.
765 if (auto error =
766 parser.add_part(prefix_, name_token, regexp_or_wildcard_token,
767 suffix_, modifier_token)) {
768 return tl::unexpected(*error);
769 }
770 // Continue.
771 continue;
772 }
773 // Run maybe add a part from the pending fixed value given parser.
774 if (auto error = parser.maybe_add_part_from_the_pending_fixed_value()) {
775 ada_log("maybe_add_part_from_the_pending_fixed_value failed on line 992");
776 return tl::unexpected(*error);
777 }
778 // Run consume a required token given parser and "end".
779 if (!parser.consume_required_token(token_type::END)) {
780 return tl::unexpected(errors::type_error);
781 }
782 }
783 ada_log("parser.parts size is: ", parser.parts.size());
784 // Return parser’s part list.
785 return parser.parts;
786}
787
788template <url_pattern_regex::regex_concept regex_provider>
791 // let's avoid unnecessary copy here.
792 auto& regex = component.regexp;
793 return regex_provider::regex_match("http", regex) ||
794 regex_provider::regex_match("https", regex) ||
795 regex_provider::regex_match("ws", regex) ||
796 regex_provider::regex_match("wss", regex) ||
797 regex_provider::regex_match("ftp", regex);
798}
799
800template <url_pattern_regex::regex_concept regex_provider>
801inline std::optional<errors> constructor_string_parser<
803 ada_log(
804 "constructor_string_parser::compute_protocol_matches_special_scheme_"
805 "flag");
806 // Let protocol string be the result of running make a component string given
807 // parser.
808 auto protocol_string = make_component_string();
809 // Let protocol component be the result of compiling a component given
810 // protocol string, canonicalize a protocol, and default options.
811 auto protocol_component = url_pattern_component<regex_provider>::compile(
812 protocol_string, canonicalize_protocol,
814 if (!protocol_component) {
815 ada_log("url_pattern_component::compile failed for protocol_string ",
816 protocol_string);
817 return protocol_component.error();
818 }
819 // If the result of running protocol component matches a special scheme given
820 // protocol component is true, then set parser’s protocol matches a special
821 // scheme flag to true.
822 if (protocol_component_matches_special_scheme(*protocol_component)) {
823 protocol_matches_a_special_scheme_flag = true;
824 }
825 return std::nullopt;
826}
827
828template <url_pattern_regex::regex_concept regex_provider>
829tl::expected<url_pattern_init, errors>
831 ada_log("constructor_string_parser::parse input=", input);
832 // Let parser be a new constructor string parser whose input is input and
833 // token list is the result of running tokenize given input and "lenient".
834 auto token_list = tokenize(input, token_policy::lenient);
835 if (!token_list) {
836 return tl::unexpected(token_list.error());
837 }
838 auto parser = constructor_string_parser(input, std::move(*token_list));
839
840 // While parser’s token index is less than parser’s token list size:
841 while (parser.token_index < parser.token_list.size()) {
842 // Set parser’s token increment to 1.
843 parser.token_increment = 1;
844
845 // If parser’s token list[parser’s token index]'s type is "end" then:
846 if (parser.token_list[parser.token_index].type == token_type::END) {
847 // If parser’s state is "init":
848 if (parser.state == State::INIT) {
849 // Run rewind given parser.
850 parser.rewind();
851 // If the result of running is a hash prefix given parser is true, then
852 // run change state given parser, "hash" and 1.
853 if (parser.is_hash_prefix()) {
854 parser.change_state(State::HASH, 1);
855 } else if (parser.is_search_prefix()) {
856 // Otherwise if the result of running is a search prefix given parser
857 // is true: Run change state given parser, "search" and 1.
858 parser.change_state(State::SEARCH, 1);
859 } else {
860 // Run change state given parser, "pathname" and 0.
861 parser.change_state(State::PATHNAME, 0);
862 }
863 // Increment parser’s token index by parser’s token increment.
864 parser.token_index += parser.token_increment;
865 // Continue.
866 continue;
867 }
868
869 if (parser.state == State::AUTHORITY) {
870 // If parser’s state is "authority":
871 // Run rewind and set state given parser, and "hostname".
872 parser.rewind();
873 parser.change_state(State::HOSTNAME, 0);
874 // Increment parser’s token index by parser’s token increment.
875 parser.token_index += parser.token_increment;
876 // Continue.
877 continue;
878 }
879
880 // Run change state given parser, "done" and 0.
881 parser.change_state(State::DONE, 0);
882 // Break.
883 break;
884 }
885
886 // If the result of running is a group open given parser is true:
887 if (parser.is_group_open()) {
888 // Increment parser’s group depth by 1.
889 parser.group_depth += 1;
890 // Increment parser’s token index by parser’s token increment.
891 parser.token_index += parser.token_increment;
892 }
893
894 // If parser’s group depth is greater than 0:
895 if (parser.group_depth > 0) {
896 // If the result of running is a group close given parser is true, then
897 // decrement parser’s group depth by 1.
898 if (parser.is_group_close()) {
899 parser.group_depth -= 1;
900 } else {
901 // Increment parser’s token index by parser’s token increment.
902 parser.token_index += parser.token_increment;
903 continue;
904 }
905 }
906
907 // Switch on parser’s state and run the associated steps:
908 switch (parser.state) {
909 case State::INIT: {
910 // If the result of running is a protocol suffix given parser is true:
911 if (parser.is_protocol_suffix()) {
912 // Run rewind and set state given parser and "protocol".
913 parser.rewind();
914 parser.change_state(State::PROTOCOL, 0);
915 }
916 break;
917 }
918 case State::PROTOCOL: {
919 // If the result of running is a protocol suffix given parser is true:
920 if (parser.is_protocol_suffix()) {
921 // Run compute protocol matches a special scheme flag given parser.
922 if (const auto error =
923 parser.compute_protocol_matches_special_scheme_flag()) {
924 ada_log("compute_protocol_matches_special_scheme_flag failed");
925 return tl::unexpected(*error);
926 }
927 // Let next state be "pathname".
928 auto next_state = State::PATHNAME;
929 // Let skip be 1.
930 auto skip = 1;
931 // If the result of running next is authority slashes given parser is
932 // true:
933 if (parser.next_is_authority_slashes()) {
934 // Set next state to "authority".
935 next_state = State::AUTHORITY;
936 // Set skip to 3.
937 skip = 3;
938 } else if (parser.protocol_matches_a_special_scheme_flag) {
939 // Otherwise if parser’s protocol matches a special scheme flag is
940 // true, then set next state to "authority".
941 next_state = State::AUTHORITY;
942 }
943
944 // Run change state given parser, next state, and skip.
945 parser.change_state(next_state, skip);
946 }
947 break;
948 }
949 case State::AUTHORITY: {
950 // If the result of running is an identity terminator given parser is
951 // true, then run rewind and set state given parser and "username".
952 if (parser.is_an_identity_terminator()) {
953 parser.rewind();
954 parser.change_state(State::USERNAME, 0);
955 } else if (parser.is_pathname_start() || parser.is_search_prefix() ||
956 parser.is_hash_prefix()) {
957 // Otherwise if any of the following are true:
958 // - the result of running is a pathname start given parser;
959 // - the result of running is a search prefix given parser; or
960 // - the result of running is a hash prefix given parser,
961 // then run rewind and set state given parser and "hostname".
962 parser.rewind();
963 parser.change_state(State::HOSTNAME, 0);
964 }
965 break;
966 }
967 case State::USERNAME: {
968 // If the result of running is a password prefix given parser is true,
969 // then run change state given parser, "password", and 1.
970 if (parser.is_password_prefix()) {
971 parser.change_state(State::PASSWORD, 1);
972 } else if (parser.is_an_identity_terminator()) {
973 // Otherwise if the result of running is an identity terminator given
974 // parser is true, then run change state given parser, "hostname",
975 // and 1.
976 parser.change_state(State::HOSTNAME, 1);
977 }
978 break;
979 }
980 case State::PASSWORD: {
981 // If the result of running is an identity terminator given parser is
982 // true, then run change state given parser, "hostname", and 1.
983 if (parser.is_an_identity_terminator()) {
984 parser.change_state(State::HOSTNAME, 1);
985 }
986 break;
987 }
988 case State::HOSTNAME: {
989 // If the result of running is an IPv6 open given parser is true, then
990 // increment parser’s hostname IPv6 bracket depth by 1.
991 if (parser.is_an_ipv6_open()) {
992 parser.hostname_ipv6_bracket_depth += 1;
993 } else if (parser.is_an_ipv6_close()) {
994 // Otherwise if the result of running is an IPv6 close given parser is
995 // true, then decrement parser’s hostname IPv6 bracket depth by 1.
996 parser.hostname_ipv6_bracket_depth -= 1;
997 } else if (parser.is_port_prefix() &&
998 parser.hostname_ipv6_bracket_depth == 0) {
999 // Otherwise if the result of running is a port prefix given parser is
1000 // true and parser’s hostname IPv6 bracket depth is zero, then run
1001 // change state given parser, "port", and 1.
1002 parser.change_state(State::PORT, 1);
1003 } else if (parser.is_pathname_start()) {
1004 // Otherwise if the result of running is a pathname start given parser
1005 // is true, then run change state given parser, "pathname", and 0.
1006 parser.change_state(State::PATHNAME, 0);
1007 } else if (parser.is_search_prefix()) {
1008 // Otherwise if the result of running is a search prefix given parser
1009 // is true, then run change state given parser, "search", and 1.
1010 parser.change_state(State::SEARCH, 1);
1011 } else if (parser.is_hash_prefix()) {
1012 // Otherwise if the result of running is a hash prefix given parser is
1013 // true, then run change state given parser, "hash", and 1.
1014 parser.change_state(State::HASH, 1);
1015 }
1016
1017 break;
1018 }
1019 case State::PORT: {
1020 // If the result of running is a pathname start given parser is true,
1021 // then run change state given parser, "pathname", and 0.
1022 if (parser.is_pathname_start()) {
1023 parser.change_state(State::PATHNAME, 0);
1024 } else if (parser.is_search_prefix()) {
1025 // Otherwise if the result of running is a search prefix given parser
1026 // is true, then run change state given parser, "search", and 1.
1027 parser.change_state(State::SEARCH, 1);
1028 } else if (parser.is_hash_prefix()) {
1029 // Otherwise if the result of running is a hash prefix given parser is
1030 // true, then run change state given parser, "hash", and 1.
1031 parser.change_state(State::HASH, 1);
1032 }
1033 break;
1034 }
1035 case State::PATHNAME: {
1036 // If the result of running is a search prefix given parser is true,
1037 // then run change state given parser, "search", and 1.
1038 if (parser.is_search_prefix()) {
1039 parser.change_state(State::SEARCH, 1);
1040 } else if (parser.is_hash_prefix()) {
1041 // Otherwise if the result of running is a hash prefix given parser is
1042 // true, then run change state given parser, "hash", and 1.
1043 parser.change_state(State::HASH, 1);
1044 }
1045 break;
1046 }
1047 case State::SEARCH: {
1048 // If the result of running is a hash prefix given parser is true, then
1049 // run change state given parser, "hash", and 1.
1050 if (parser.is_hash_prefix()) {
1051 parser.change_state(State::HASH, 1);
1052 }
1053 }
1054 case State::HASH: {
1055 // Do nothing
1056 break;
1057 }
1058 default: {
1059 // Assert: This step is never reached.
1060 unreachable();
1061 }
1062 }
1063
1064 // Increment parser’s token index by parser’s token increment.
1065 parser.token_index += parser.token_increment;
1066 }
1067
1068 // If parser’s result contains "hostname" and not "port", then set parser’s
1069 // result["port"] to the empty string.
1070 if (parser.result.hostname && !parser.result.port) {
1071 parser.result.port = "";
1072 }
1073
1074 // Return parser’s result.
1075 return parser.result;
1076}
1077
1078} // namespace ada::url_pattern_helpers
1079
1080#endif
static tl::expected< url_pattern_component, errors > compile(std::string_view input, F &encoding_callback, url_pattern_compile_component_options &options)
regex_provider::regex_type regexp
void add_token_with_default_length(token_type type, size_t next_position, size_t value_position)
void add_token(token_type type, size_t next_position, size_t value_position, size_t value_length)
std::optional< errors > process_tokenizing_error(size_t next_position, size_t value_position) ada_warn_unused
std::optional< errors > add_part(std::string_view prefix, token *name_token, token *regexp_or_wildcard_token, std::string_view suyffix, token *modifier_token) ada_warn_unused
token * try_consume_regexp_or_wildcard_token(const token *name_token)
std::optional< errors > maybe_add_part_from_the_pending_fixed_value() ada_warn_unused
Common definitions for cross-platform compiler support.
#define ADA_ASSERT_TRUE(COND)
#define ada_warn_unused
Definition common_defs.h:85
Definitions for user facing functions for parsing URL and it's components.
Includes the definitions for supported parsers.
Definition parser-inl.h:16
bool protocol_component_matches_special_scheme(url_pattern_component< regex_provider > &component)
tl::expected< std::vector< token >, errors > tokenize(std::string_view input, token_policy policy)
std::string generate_segment_wildcard_regexp(url_pattern_compile_component_options options)
tl::expected< std::string, errors > canonicalize_protocol(std::string_view input)
tl::expected< std::vector< url_pattern_part >, errors > parse_pattern_string(std::string_view input, url_pattern_compile_component_options &options, F &encoding_callback)
ada_warn_unused std::string to_string(encoding_type type)
errors
Definition errors.h:10
@ type_error
Definition errors.h:10
void unreachable()
tl::expected< result_type, ada::errors > result
std::string_view get_prefix() const ada_warn_unused
static url_pattern_compile_component_options DEFAULT
constructor_string_parser(std::string_view new_input, std::vector< token > &&new_token_list)
static tl::expected< url_pattern_init, errors > parse(std::string_view input)
ada::url_pattern_regex::std_regex_provider regex_provider
Definition url_pattern.cc:9
Declaration for the URLPattern helpers.