5#ifndef ADA_URL_PATTERN_HELPERS_INL_H
6#define ADA_URL_PATTERN_HELPERS_INL_H
12#include "ada/expected.h"
21 return "INVALID_CHAR";
33 return "ESCAPED_CHAR";
35 return "OTHER_MODIFIER";
46template <url_pattern_regex::regex_concept regex_prov
ider>
49 token_index = component_start;
54template <url_pattern_regex::regex_concept regex_prov
ider>
58 return is_non_special_pattern_char(token_index,
"#");
61template <url_pattern_regex::regex_concept regex_prov
ider>
65 if (is_non_special_pattern_char(token_index,
"?")) {
71 if (token_list[token_index].value !=
"?") {
76 if (token_index == 0)
return true;
78 auto previous_index = token_index - 1;
81 auto previous_token = get_safe_token(previous_index);
94template <url_pattern_regex::regex_concept regex_prov
ider>
95bool constructor_string_parser<regex_provider>::is_non_special_pattern_char(
96 size_t index, std::string_view value)
const {
98 auto token = get_safe_token(index);
116template <url_pattern_regex::regex_concept regex_prov
ider>
117const token* constructor_string_parser<regex_provider>::get_safe_token(
118 size_t index)
const {
121 if (index < token_list.size()) [[likely]] {
122 return &token_list[index];
133 return &token_list.back();
136template <url_pattern_regex::regex_concept regex_prov
ider>
143template <url_pattern_regex::regex_concept regex_prov
ider>
150template <url_pattern_regex::regex_concept regex_prov
ider>
155 if (!is_non_special_pattern_char(token_index + 1,
"/")) {
160 if (!is_non_special_pattern_char(token_index + 2,
"/")) {
166template <url_pattern_regex::regex_concept regex_prov
ider>
170 return is_non_special_pattern_char(token_index,
":");
173template <url_pattern_regex::regex_concept regex_prov
ider>
181 auto value = make_component_string();
185 result.protocol = value;
189 result.username = value;
193 result.password = value;
197 result.hostname = value;
205 result.pathname = value;
209 result.search = value;
232 result.hostname =
"";
243 if (protocol_matches_a_special_scheme_flag) {
244 result.pathname =
"/";
247 result.pathname =
"";
268 component_start = token_index;
273template <url_pattern_regex::regex_concept regex_prov
ider>
274std::string constructor_string_parser<regex_provider>::make_component_string() {
280 const auto end_index = token_list[token_index].index;
283 const auto component_start_token = get_safe_token(component_start);
286 const auto component_start_input_index = component_start_token->index;
289 return input.substr(component_start_input_index,
290 end_index - component_start_input_index);
293template <url_pattern_regex::regex_concept regex_prov
ider>
298 return is_non_special_pattern_char(token_index,
"@");
301template <url_pattern_regex::regex_concept regex_prov
ider>
305 return is_non_special_pattern_char(token_index,
"/");
308template <url_pattern_regex::regex_concept regex_prov
ider>
312 return is_non_special_pattern_char(token_index,
":");
315template <url_pattern_regex::regex_concept regex_prov
ider>
319 return is_non_special_pattern_char(token_index,
"[");
322template <url_pattern_regex::regex_concept regex_prov
ider>
326 return is_non_special_pattern_char(token_index,
"]");
329template <url_pattern_regex::regex_concept regex_prov
ider>
333 return is_non_special_pattern_char(token_index,
":");
337 ada_log(
"Tokenizer::get_next_code_point called with index=", next_index);
341 size_t number_bytes = 0;
342 unsigned char first_byte = input[next_index];
344 if ((first_byte & 0x80) == 0) {
347 code_point = first_byte;
348 ada_log(
"Tokenizer::get_next_code_point returning ASCII code point=",
349 uint32_t(code_point));
350 ada_log(
"Tokenizer::get_next_code_point next_index =", next_index,
351 " input.size()=", input.size());
354 ada_log(
"Tokenizer::get_next_code_point read first byte=",
355 uint32_t(first_byte));
356 if ((first_byte & 0xE0) == 0xC0) {
357 code_point = first_byte & 0x1F;
359 ada_log(
"Tokenizer::get_next_code_point two bytes");
360 }
else if ((first_byte & 0xF0) == 0xE0) {
361 code_point = first_byte & 0x0F;
363 ada_log(
"Tokenizer::get_next_code_point three bytes");
364 }
else if ((first_byte & 0xF8) == 0xF0) {
365 code_point = first_byte & 0x07;
367 ada_log(
"Tokenizer::get_next_code_point four bytes");
371 for (
size_t i = 1 + next_index; i < number_bytes + next_index; ++i) {
372 unsigned char byte = input[i];
373 ada_log(
"Tokenizer::get_next_code_point read byte=", uint32_t(
byte));
374 code_point = (code_point << 6) | (
byte & 0x3F);
376 ada_log(
"Tokenizer::get_next_code_point returning non-ASCII code point=",
377 uint32_t(code_point));
378 ada_log(
"Tokenizer::get_next_code_point next_index =", next_index,
379 " input.size()=", input.size());
380 next_index += number_bytes;
384 ada_log(
"Tokenizer::seek_and_get_next_code_point called with new_index=",
387 next_index = new_index;
393 size_t value_position,
size_t value_length) {
394 ada_log(
"Tokenizer::add_token called with type=",
to_string(type),
395 " next_position=", next_position,
" value_position=", value_position);
404 token_list.emplace_back(type, index,
405 input.substr(value_position, value_length));
407 index = next_position;
411 size_t next_position,
412 size_t value_position) {
414 auto computed_length = next_position - value_position;
417 add_token(type, next_position, value_position, computed_length);
421 ada_log(
"Tokenizer::add_token_with_defaults called with type=",
430 size_t value_position) {
433 ada_log(
"process_tokenizing_error failed with next_position=",
434 next_position,
" value_position=", value_position);
446template <url_pattern_encoding_callback F>
459template <url_pattern_encoding_callback F>
461 const token* name_token) {
467 if (!name_token && !
token) {
474template <url_pattern_encoding_callback F>
476 ada_log(
"url_pattern_parser::try_consume_token called with type=",
483 if (next_token.type != type)
return nullptr;
490template <url_pattern_encoding_callback F>
511template <url_pattern_encoding_callback F>
513 ada_log(
"url_pattern_parser::consume_required_token called with type=",
520template <url_pattern_encoding_callback F>
525 ada_log(
"pending_fixed_value is empty");
531 if (!encoded_value) {
533 return encoded_value.error();
541 std::move(*encoded_value),
546template <url_pattern_encoding_callback F>
548 std::string_view prefix,
token* name_token,
token* regexp_or_wildcard_token,
549 std::string_view suffix,
token* modifier_token) {
553 if (modifier_token) {
555 if (modifier_token->
value ==
"?") {
557 }
else if (modifier_token->
value ==
"*") {
561 }
else if (modifier_token->
value ==
"+") {
569 if (!name_token && !regexp_or_wildcard_token &&
580 if (!name_token && !regexp_or_wildcard_token) {
584 if (prefix.empty())
return std::nullopt;
588 if (!encoded_value) {
589 return encoded_value.error();
595 std::move(*encoded_value), modifier);
599 std::string regexp_value{};
602 if (!regexp_or_wildcard_token) {
610 regexp_value = regexp_or_wildcard_token->
value;
619 regexp_value.clear();
620 }
else if (regexp_value ==
".*") {
625 regexp_value.clear();
631 name = name_token->
value;
632 }
else if (regexp_or_wildcard_token !=
nullptr) {
641 if (std::ranges::any_of(
642 parts, [&name](
const auto& part) {
return part.name == name; })) {
648 if (!encoded_prefix)
return encoded_prefix.error();
652 if (!encoded_suffix)
return encoded_suffix.error();
657 parts.emplace_back(type, std::move(regexp_value), modifier, std::move(name),
658 std::move(*encoded_prefix), std::move(*encoded_suffix));
662template <url_pattern_encoding_callback F>
665 F& encoding_callback) {
666 ada_log(
"parse_pattern_string input=", input);
675 if (!tokenize_result) {
676 ada_log(
"parse_pattern_string tokenize failed");
677 return tl::unexpected(tokenize_result.error());
679 parser.tokens = std::move(*tokenize_result);
682 while (
parser.can_continue()) {
691 auto regexp_or_wildcard_token =
692 parser.try_consume_regexp_or_wildcard_token(name_token);
694 if (name_token || regexp_or_wildcard_token) {
696 std::string prefix{};
698 if (char_token) prefix = char_token->value;
700 if (!prefix.empty() && prefix != options.
get_prefix()) {
702 parser.pending_fixed_value.append(prefix);
707 if (
auto error =
parser.maybe_add_part_from_the_pending_fixed_value()) {
708 ada_log(
"maybe_add_part_from_the_pending_fixed_value failed");
709 return tl::unexpected(*error);
713 auto modifier_token =
parser.try_consume_modifier_token();
717 parser.add_part(prefix, name_token, regexp_or_wildcard_token,
"",
719 ada_log(
"parser.add_part failed");
720 return tl::unexpected(*error);
727 auto fixed_token = char_token;
735 parser.pending_fixed_value.append(fixed_token->value);
745 auto prefix_ =
parser.consume_text();
751 regexp_or_wildcard_token =
752 parser.try_consume_regexp_or_wildcard_token(name_token);
754 auto suffix_ =
parser.consume_text();
757 ada_log(
"parser.consume_required_token failed");
762 auto modifier_token =
parser.try_consume_modifier_token();
766 parser.add_part(prefix_, name_token, regexp_or_wildcard_token,
767 suffix_, modifier_token)) {
768 return tl::unexpected(*error);
774 if (
auto error =
parser.maybe_add_part_from_the_pending_fixed_value()) {
775 ada_log(
"maybe_add_part_from_the_pending_fixed_value failed on line 992");
776 return tl::unexpected(*error);
783 ada_log(
"parser.parts size is: ",
parser.parts.size());
788template <url_pattern_regex::regex_concept regex_prov
ider>
792 auto& regex = component.
regexp;
793 return regex_provider::regex_match(
"http", regex) ||
794 regex_provider::regex_match(
"https", regex) ||
795 regex_provider::regex_match(
"ws", regex) ||
796 regex_provider::regex_match(
"wss", regex) ||
797 regex_provider::regex_match(
"ftp", regex);
800template <url_pattern_regex::regex_concept regex_prov
ider>
801inline std::optional<errors> constructor_string_parser<
804 "constructor_string_parser::compute_protocol_matches_special_scheme_"
808 auto protocol_string = make_component_string();
814 if (!protocol_component) {
815 ada_log(
"url_pattern_component::compile failed for protocol_string ",
817 return protocol_component.error();
823 protocol_matches_a_special_scheme_flag =
true;
828template <url_pattern_regex::regex_concept regex_prov
ider>
829tl::expected<url_pattern_init, errors>
831 ada_log(
"constructor_string_parser::parse input=", input);
836 return tl::unexpected(token_list.error());
843 parser.token_increment = 1;
853 if (
parser.is_hash_prefix()) {
855 }
else if (
parser.is_search_prefix()) {
887 if (
parser.is_group_open()) {
895 if (
parser.group_depth > 0) {
898 if (
parser.is_group_close()) {
911 if (
parser.is_protocol_suffix()) {
920 if (
parser.is_protocol_suffix()) {
922 if (
const auto error =
923 parser.compute_protocol_matches_special_scheme_flag()) {
924 ada_log(
"compute_protocol_matches_special_scheme_flag failed");
925 return tl::unexpected(*error);
933 if (
parser.next_is_authority_slashes()) {
938 }
else if (
parser.protocol_matches_a_special_scheme_flag) {
945 parser.change_state(next_state, skip);
952 if (
parser.is_an_identity_terminator()) {
955 }
else if (
parser.is_pathname_start() ||
parser.is_search_prefix() ||
956 parser.is_hash_prefix()) {
970 if (
parser.is_password_prefix()) {
972 }
else if (
parser.is_an_identity_terminator()) {
983 if (
parser.is_an_identity_terminator()) {
991 if (
parser.is_an_ipv6_open()) {
992 parser.hostname_ipv6_bracket_depth += 1;
993 }
else if (
parser.is_an_ipv6_close()) {
996 parser.hostname_ipv6_bracket_depth -= 1;
997 }
else if (
parser.is_port_prefix() &&
998 parser.hostname_ipv6_bracket_depth == 0) {
1003 }
else if (
parser.is_pathname_start()) {
1007 }
else if (
parser.is_search_prefix()) {
1011 }
else if (
parser.is_hash_prefix()) {
1022 if (
parser.is_pathname_start()) {
1024 }
else if (
parser.is_search_prefix()) {
1028 }
else if (
parser.is_hash_prefix()) {
1038 if (
parser.is_search_prefix()) {
1040 }
else if (
parser.is_hash_prefix()) {
1050 if (
parser.is_hash_prefix()) {
static tl::expected< url_pattern_component, errors > compile(std::string_view input, F &encoding_callback, url_pattern_compile_component_options &options)
regex_provider::regex_type regexp
void add_token_with_default_length(token_type type, size_t next_position, size_t value_position)
void add_token(token_type type, size_t next_position, size_t value_position, size_t value_length)
void seek_and_get_next_code_point(size_t index)
void get_next_code_point()
std::optional< errors > process_tokenizing_error(size_t next_position, size_t value_position) ada_warn_unused
void add_token_with_defaults(token_type type)
std::optional< errors > add_part(std::string_view prefix, token *name_token, token *regexp_or_wildcard_token, std::string_view suyffix, token *modifier_token) ada_warn_unused
bool consume_required_token(token_type type)
std::vector< token > tokens
token * try_consume_token(token_type type)
std::string pending_fixed_value
std::vector< url_pattern_part > parts
std::string consume_text()
token * try_consume_regexp_or_wildcard_token(const token *name_token)
std::optional< errors > maybe_add_part_from_the_pending_fixed_value() ada_warn_unused
token * try_consume_modifier_token()
std::string segment_wildcard_regexp
Common definitions for cross-platform compiler support.
#define ADA_ASSERT_TRUE(COND)
Definitions for user facing functions for parsing URL and it's components.
Includes the definitions for supported parsers.
bool protocol_component_matches_special_scheme(url_pattern_component< regex_provider > &component)
tl::expected< std::vector< token >, errors > tokenize(std::string_view input, token_policy policy)
std::string generate_segment_wildcard_regexp(url_pattern_compile_component_options options)
tl::expected< std::string, errors > canonicalize_protocol(std::string_view input)
tl::expected< std::vector< url_pattern_part >, errors > parse_pattern_string(std::string_view input, url_pattern_compile_component_options &options, F &encoding_callback)
ada_warn_unused std::string to_string(encoding_type type)
tl::expected< result_type, ada::errors > result
std::string_view get_prefix() const ada_warn_unused
static url_pattern_compile_component_options DEFAULT
std::optional< errors > compute_protocol_matches_special_scheme_flag()
constructor_string_parser(std::string_view new_input, std::vector< token > &&new_token_list)
bool is_an_ipv6_open() const
bool is_an_identity_terminator() const
bool is_pathname_start() const
static tl::expected< url_pattern_init, errors > parse(std::string_view input)
bool is_an_ipv6_close() const
bool next_is_authority_slashes() const
bool is_group_open() const
void change_state(State state, size_t skip)
bool is_group_close() const
bool is_protocol_suffix() const
bool is_port_prefix() const
bool is_password_prefix() const
ada::url_pattern_regex::std_regex_provider regex_provider
Declaration for the URLPattern helpers.