16 const result_type* base_url) {
22 constexpr bool result_type_is_ada_url =
23 std::is_same<ada::url, result_type>::value;
24 constexpr bool result_type_is_ada_url_aggregator =
25 std::is_same<ada::url_aggregator, result_type>::value;
26 static_assert(result_type_is_ada_url ||
27 result_type_is_ada_url_aggregator);
30 ada_log(
"ada::parser::parse_url('", user_input,
"' [", user_input.size(),
31 " bytes],", (base_url !=
nullptr ? base_url->to_string() :
"null"),
39 if (user_input.size() > std::numeric_limits<uint32_t>::max()) [[unlikely]] {
45 if (base_url !=
nullptr) {
51 if constexpr (result_type_is_ada_url_aggregator && store_values) {
63 uint32_t reserve_capacity =
65 helpers::leading_zeroes(uint32_t(1 | user_input.size()))) +
67 url.reserve(reserve_capacity);
69 std::string tmp_buffer;
70 std::string_view url_data;
71 if (unicode::has_tabs_or_newline(user_input)) [[unlikely]] {
72 tmp_buffer = user_input;
75 helpers::remove_ascii_tab_or_newline(tmp_buffer);
76 url_data = tmp_buffer;
78 url_data = user_input;
83 helpers::trim_c0_whitespace(url_data);
86 std::optional<std::string_view> fragment = helpers::prune_hash(url_data);
95 size_t input_position = 0;
96 const size_t input_size = url_data.size();
101 while (input_position <= input_size) {
102 ada_log(
"In parsing at ", input_position,
" out of ", input_size,
106 ada_log(
"SCHEME_START ", helpers::substring(url_data, input_position));
109 if ((input_position != input_size) &&
121 ada_log(
"SCHEME ", helpers::substring(url_data, input_position));
124 while ((input_position != input_size) &&
125 (ada::unicode::is_alnum_plus(url_data[input_position]))) {
129 if ((input_position != input_size) &&
130 (url_data[input_position] ==
':')) {
131 ada_log(
"SCHEME the scheme should be ",
132 url_data.substr(0, input_position));
133 if constexpr (result_type_is_ada_url) {
134 if (!
url.parse_scheme(url_data.substr(0, input_position))) {
139 if (!
url.parse_scheme_with_colon(
140 url_data.substr(0, input_position + 1))) {
155 base_url->type ==
url.type) {
166 else if (input_position + 1 < input_size &&
167 url_data[input_position + 1] ==
'/') {
189 ada_log(
"NO_SCHEME ", helpers::substring(url_data, input_position));
192 if (base_url ==
nullptr ||
193 (base_url->has_opaque_path && !fragment.has_value())) {
194 ada_log(
"NO_SCHEME validation error");
201 else if (base_url->has_opaque_path && fragment.has_value() &&
202 input_position == input_size) {
203 ada_log(
"NO_SCHEME opaque base with fragment");
204 url.copy_scheme(*base_url);
207 if constexpr (result_type_is_ada_url) {
208 url.path = base_url->path;
209 url.query = base_url->query;
211 url.update_base_pathname(base_url->get_pathname());
212 url.update_base_search(base_url->get_search());
214 url.update_unencoded_base_hash(*fragment);
220 ada_log(
"NO_SCHEME non-file relative path");
225 ada_log(
"NO_SCHEME file base type");
231 ada_log(
"AUTHORITY ", helpers::substring(url_data, input_position));
242 if (url_data.find(
'@', input_position) == std::string_view::npos) {
246 bool at_sign_seen{
false};
247 bool password_token_seen{
false};
254 std::string_view view = url_data.substr(input_position);
257 url.
is_special() ? helpers::find_authority_delimiter_special(view)
258 : helpers::find_authority_delimiter(view);
259 std::string_view authority_view = view.substr(0, location);
260 size_t end_of_authority = input_position + authority_view.size();
262 if ((end_of_authority != input_size) &&
263 (url_data[end_of_authority] ==
'@')) {
266 if (password_token_seen) {
267 if constexpr (result_type_is_ada_url) {
268 url.password +=
"%40";
270 url.append_base_password(
"%40");
273 if constexpr (result_type_is_ada_url) {
274 url.username +=
"%40";
276 url.append_base_username(
"%40");
283 if (!password_token_seen) {
284 size_t password_token_location = authority_view.find(
':');
285 password_token_seen =
286 password_token_location != std::string_view::npos;
288 if constexpr (store_values) {
289 if (!password_token_seen) {
290 if constexpr (result_type_is_ada_url) {
291 url.username += unicode::percent_encode(
295 url.append_base_username(unicode::percent_encode(
300 if constexpr (result_type_is_ada_url) {
301 url.username += unicode::percent_encode(
302 authority_view.substr(0, password_token_location),
304 url.password += unicode::percent_encode(
305 authority_view.substr(password_token_location + 1),
308 url.append_base_username(unicode::percent_encode(
309 authority_view.substr(0, password_token_location),
311 url.append_base_password(unicode::percent_encode(
312 authority_view.substr(password_token_location + 1),
317 }
else if constexpr (store_values) {
318 if constexpr (result_type_is_ada_url) {
319 url.password += unicode::percent_encode(
322 url.append_base_password(unicode::percent_encode(
330 else if (end_of_authority == input_size ||
331 url_data[end_of_authority] ==
'/' ||
332 url_data[end_of_authority] ==
'?' ||
336 if (at_sign_seen && authority_view.empty()) {
343 if (end_of_authority == input_size) {
344 if constexpr (store_values) {
345 if (fragment.has_value()) {
346 url.update_unencoded_base_hash(*fragment);
351 input_position = end_of_authority + 1;
357 ada_log(
"SPECIAL_RELATIVE_OR_AUTHORITY ",
358 helpers::substring(url_data, input_position));
363 if (url_data.substr(input_position, 2) ==
"//") {
375 ada_log(
"PATH_OR_AUTHORITY ",
376 helpers::substring(url_data, input_position));
379 if ((input_position != input_size) &&
380 (url_data[input_position] ==
'/')) {
391 ada_log(
"RELATIVE_SCHEME ",
392 helpers::substring(url_data, input_position));
395 url.copy_scheme(*base_url);
398 if ((input_position != input_size) &&
399 (url_data[input_position] ==
'/')) {
401 "RELATIVE_SCHEME if c is U+002F (/), then set state to relative "
404 }
else if (
url.
is_special() && (input_position != input_size) &&
405 (url_data[input_position] ==
'\\')) {
409 "RELATIVE_SCHEME if url is special and c is U+005C, validation "
410 "error, set state to relative slash state");
413 ada_log(
"RELATIVE_SCHEME otherwise");
418 if constexpr (result_type_is_ada_url) {
419 url.username = base_url->username;
420 url.password = base_url->password;
421 url.host = base_url->host;
422 url.port = base_url->port;
425 url.path = base_url->path;
426 url.query = base_url->query;
428 url.update_base_authority(base_url->get_href(),
429 base_url->get_components());
430 url.update_host_to_base_host(base_url->get_hostname());
431 url.update_base_port(base_url->retrieve_base_port());
434 url.update_base_pathname(base_url->get_pathname());
435 url.update_base_search(base_url->get_search());
442 if ((input_position != input_size) &&
443 (url_data[input_position] ==
'?')) {
447 else if (input_position != input_size) {
450 if constexpr (result_type_is_ada_url) {
452 helpers::shorten_path(
url.path,
url.type);
455 if (helpers::shorten_path(path,
url.type)) {
456 url.update_base_pathname(std::move(std::string(path)));
468 ada_log(
"RELATIVE_SLASH ",
469 helpers::substring(url_data, input_position));
473 (url_data[input_position] ==
'/' ||
474 url_data[input_position] ==
'\\')) {
479 else if ((input_position != input_size) &&
480 (url_data[input_position] ==
'/')) {
490 if constexpr (result_type_is_ada_url) {
491 url.username = base_url->username;
492 url.password = base_url->password;
493 url.host = base_url->host;
494 url.port = base_url->port;
496 url.update_base_authority(base_url->get_href(),
497 base_url->get_components());
498 url.update_host_to_base_host(base_url->get_hostname());
499 url.update_base_port(base_url->retrieve_base_port());
509 ada_log(
"SPECIAL_AUTHORITY_SLASHES ",
510 helpers::substring(url_data, input_position));
515 if (url_data.substr(input_position, 2) ==
"//") {
522 ada_log(
"SPECIAL_AUTHORITY_IGNORE_SLASHES ",
523 helpers::substring(url_data, input_position));
527 while ((input_position != input_size) &&
528 ((url_data[input_position] ==
'/') ||
529 (url_data[input_position] ==
'\\'))) {
537 ada_log(
"QUERY ", helpers::substring(url_data, input_position));
538 if constexpr (store_values) {
541 const uint8_t* query_percent_encode_set =
548 url.update_base_search(url_data.substr(input_position),
549 query_percent_encode_set);
550 ada_log(
"QUERY update_base_search completed ");
551 if (fragment.has_value()) {
552 url.update_unencoded_base_hash(*fragment);
558 ada_log(
"HOST ", helpers::substring(url_data, input_position));
560 std::string_view host_view = url_data.substr(input_position);
561 auto [location, found_colon] =
562 helpers::get_host_delimiter_location(
url.
is_special(), host_view);
563 input_position = (location != std::string_view::npos)
564 ? input_position + location
573 ada_log(
"HOST parsing ", host_view);
574 if (!
url.parse_host(host_view)) {
595 ada_log(
"HOST parsing ", host_view,
" href=",
url.
get_href());
598 if (host_view.empty()) {
599 url.update_base_hostname(
"");
600 }
else if (!
url.parse_host(host_view)) {
613 ada_log(
"OPAQUE_PATH ", helpers::substring(url_data, input_position));
614 std::string_view view = url_data.substr(input_position);
617 size_t location = view.find(
'?');
618 if (location != std::string_view::npos) {
619 view.remove_suffix(view.size() - location);
621 input_position += location + 1;
623 input_position = input_size + 1;
628 url.update_base_pathname(unicode::percent_encode(
633 ada_log(
"PORT ", helpers::substring(url_data, input_position));
634 std::string_view port_view = url_data.substr(input_position);
635 input_position +=
url.parse_port(port_view,
true);
643 ada_log(
"PATH_START ", helpers::substring(url_data, input_position));
652 if (input_position == input_size) {
653 if constexpr (store_values) {
654 url.update_base_pathname(
"/");
655 if (fragment.has_value()) {
656 url.update_unencoded_base_hash(*fragment);
664 if ((url_data[input_position] !=
'/') &&
665 (url_data[input_position] !=
'\\')) {
671 else if ((input_position != input_size) &&
672 (url_data[input_position] ==
'?')) {
676 else if (input_position != input_size) {
681 if (url_data[input_position] !=
'/') {
690 ada_log(
"PATH ", helpers::substring(url_data, input_position));
691 std::string_view view = url_data.substr(input_position);
695 size_t locofquestionmark = view.find(
'?');
696 if (locofquestionmark != std::string_view::npos) {
698 view.remove_suffix(view.size() - locofquestionmark);
699 input_position += locofquestionmark + 1;
701 input_position = input_size + 1;
703 if constexpr (store_values) {
704 if constexpr (result_type_is_ada_url) {
705 helpers::parse_prepared_path(view,
url.type,
url.path);
707 url.consume_prepared_path(view);
714 ada_log(
"FILE_SLASH ", helpers::substring(url_data, input_position));
717 if ((input_position != input_size) &&
718 (url_data[input_position] ==
'/' ||
719 url_data[input_position] ==
'\\')) {
720 ada_log(
"FILE_SLASH c is U+002F or U+005C");
725 ada_log(
"FILE_SLASH otherwise");
729 if (base_url !=
nullptr &&
732 if constexpr (result_type_is_ada_url) {
733 url.host = base_url->host;
735 url.update_host_to_base_host(base_url->get_host());
741 if (!base_url->get_pathname().empty()) {
743 url_data.substr(input_position))) {
744 std::string_view first_base_url_path =
745 base_url->get_pathname().substr(1);
746 size_t loc = first_base_url_path.find(
'/');
747 if (loc != std::string_view::npos) {
748 helpers::resize(first_base_url_path, loc);
751 first_base_url_path)) {
752 if constexpr (result_type_is_ada_url) {
754 url.path += first_base_url_path;
756 url.append_base_pathname(
757 helpers::concat(
"/", first_base_url_path));
771 ada_log(
"FILE_HOST ", helpers::substring(url_data, input_position));
772 std::string_view view = url_data.substr(input_position);
774 size_t location = view.find_first_of(
"/\\?");
775 std::string_view file_host_buffer(
777 (location != std::string_view::npos) ? location : view.size());
781 }
else if (file_host_buffer.empty()) {
783 if constexpr (result_type_is_ada_url) {
786 url.update_base_hostname(
"");
791 size_t consumed_bytes = file_host_buffer.size();
792 input_position += consumed_bytes;
795 if (!
url.parse_host(file_host_buffer)) {
799 if constexpr (result_type_is_ada_url) {
801 if (
url.host.has_value() &&
url.host.value() ==
"localhost") {
806 url.update_base_hostname(
"");
817 ada_log(
"FILE ", helpers::substring(url_data, input_position));
818 std::string_view file_view = url_data.substr(input_position);
820 url.set_protocol_as_file();
821 if constexpr (result_type_is_ada_url) {
825 url.update_base_hostname(
"");
828 if (input_position != input_size &&
829 (url_data[input_position] ==
'/' ||
830 url_data[input_position] ==
'\\')) {
831 ada_log(
"FILE c is U+002F or U+005C");
836 else if (base_url !=
nullptr &&
840 ada_log(
"FILE base non-null");
841 if constexpr (result_type_is_ada_url) {
842 url.host = base_url->host;
843 url.path = base_url->path;
844 url.query = base_url->query;
846 url.update_host_to_base_host(base_url->get_hostname());
847 url.update_base_pathname(base_url->get_pathname());
848 url.update_base_search(base_url->get_search());
854 if (input_position != input_size && url_data[input_position] ==
'?') {
858 else if (input_position != input_size) {
864 if constexpr (result_type_is_ada_url) {
865 helpers::shorten_path(
url.path,
url.type);
868 if (helpers::shorten_path(path,
url.type)) {
869 url.update_base_pathname(std::move(std::string(path)));
876 url.clear_pathname();
887 ada_log(
"FILE go to path");
899 if constexpr (store_values) {
900 if (fragment.has_value()) {
901 url.update_unencoded_base_hash(*fragment);