14 const result_type* base_url) {
20 constexpr bool result_type_is_ada_url = std::is_same_v<url, result_type>;
21 constexpr bool result_type_is_ada_url_aggregator =
22 std::is_same_v<url_aggregator, result_type>;
23 static_assert(result_type_is_ada_url ||
24 result_type_is_ada_url_aggregator);
27 ada_log(
"ada::parser::parse_url('", user_input,
"' [", user_input.size(),
28 " bytes],", (base_url !=
nullptr ? base_url->to_string() :
"null"),
36 if (user_input.size() > std::numeric_limits<uint32_t>::max()) [[unlikely]] {
42 if (base_url !=
nullptr) {
48 if constexpr (result_type_is_ada_url_aggregator && store_values) {
60 uint32_t reserve_capacity =
62 helpers::leading_zeroes(uint32_t(1 | user_input.size()))) +
64 url.reserve(reserve_capacity);
66 std::string tmp_buffer;
67 std::string_view url_data;
68 if (unicode::has_tabs_or_newline(user_input)) [[unlikely]] {
69 tmp_buffer = user_input;
72 helpers::remove_ascii_tab_or_newline(tmp_buffer);
73 url_data = tmp_buffer;
75 url_data = user_input;
80 helpers::trim_c0_whitespace(url_data);
83 std::optional<std::string_view> fragment = helpers::prune_hash(url_data);
92 size_t input_position = 0;
93 const size_t input_size = url_data.size();
98 while (input_position <= input_size) {
99 ada_log(
"In parsing at ", input_position,
" out of ", input_size,
103 ada_log(
"SCHEME_START ", helpers::substring(url_data, input_position));
106 if ((input_position != input_size) &&
118 ada_log(
"SCHEME ", helpers::substring(url_data, input_position));
121 while ((input_position != input_size) &&
122 (unicode::is_alnum_plus(url_data[input_position]))) {
126 if ((input_position != input_size) &&
127 (url_data[input_position] ==
':')) {
128 ada_log(
"SCHEME the scheme should be ",
129 url_data.substr(0, input_position));
130 if constexpr (result_type_is_ada_url) {
131 if (!
url.parse_scheme(url_data.substr(0, input_position))) {
136 if (!
url.parse_scheme_with_colon(
137 url_data.substr(0, input_position + 1))) {
152 base_url->type ==
url.type) {
163 else if (input_position + 1 < input_size &&
164 url_data[input_position + 1] ==
'/') {
186 ada_log(
"NO_SCHEME ", helpers::substring(url_data, input_position));
189 if (base_url ==
nullptr ||
190 (base_url->has_opaque_path && !fragment.has_value())) {
191 ada_log(
"NO_SCHEME validation error");
198 else if (base_url->has_opaque_path && fragment.has_value() &&
199 input_position == input_size) {
200 ada_log(
"NO_SCHEME opaque base with fragment");
201 url.copy_scheme(*base_url);
204 if constexpr (result_type_is_ada_url) {
205 url.path = base_url->path;
206 url.query = base_url->query;
208 url.update_base_pathname(base_url->get_pathname());
209 url.update_base_search(base_url->get_search());
211 url.update_unencoded_base_hash(*fragment);
217 ada_log(
"NO_SCHEME non-file relative path");
222 ada_log(
"NO_SCHEME file base type");
228 ada_log(
"AUTHORITY ", helpers::substring(url_data, input_position));
239 if (url_data.find(
'@', input_position) == std::string_view::npos) {
243 bool at_sign_seen{
false};
244 bool password_token_seen{
false};
251 std::string_view view = url_data.substr(input_position);
254 url.
is_special() ? helpers::find_authority_delimiter_special(view)
255 : helpers::find_authority_delimiter(view);
256 std::string_view authority_view = view.substr(0, location);
257 size_t end_of_authority = input_position + authority_view.size();
259 if ((end_of_authority != input_size) &&
260 (url_data[end_of_authority] ==
'@')) {
263 if (password_token_seen) {
264 if constexpr (result_type_is_ada_url) {
265 url.password +=
"%40";
267 url.append_base_password(
"%40");
270 if constexpr (result_type_is_ada_url) {
271 url.username +=
"%40";
273 url.append_base_username(
"%40");
280 if (!password_token_seen) {
281 size_t password_token_location = authority_view.find(
':');
282 password_token_seen =
283 password_token_location != std::string_view::npos;
285 if constexpr (store_values) {
286 if (!password_token_seen) {
287 if constexpr (result_type_is_ada_url) {
288 url.username += unicode::percent_encode(
292 url.append_base_username(unicode::percent_encode(
297 if constexpr (result_type_is_ada_url) {
298 url.username += unicode::percent_encode(
299 authority_view.substr(0, password_token_location),
301 url.password += unicode::percent_encode(
302 authority_view.substr(password_token_location + 1),
305 url.append_base_username(unicode::percent_encode(
306 authority_view.substr(0, password_token_location),
308 url.append_base_password(unicode::percent_encode(
309 authority_view.substr(password_token_location + 1),
314 }
else if constexpr (store_values) {
315 if constexpr (result_type_is_ada_url) {
316 url.password += unicode::percent_encode(
319 url.append_base_password(unicode::percent_encode(
327 else if (end_of_authority == input_size ||
328 url_data[end_of_authority] ==
'/' ||
329 url_data[end_of_authority] ==
'?' ||
333 if (at_sign_seen && authority_view.empty()) {
340 if (end_of_authority == input_size) {
341 if constexpr (store_values) {
342 if (fragment.has_value()) {
343 url.update_unencoded_base_hash(*fragment);
348 input_position = end_of_authority + 1;
354 ada_log(
"SPECIAL_RELATIVE_OR_AUTHORITY ",
355 helpers::substring(url_data, input_position));
360 if (url_data.substr(input_position, 2) ==
"//") {
372 ada_log(
"PATH_OR_AUTHORITY ",
373 helpers::substring(url_data, input_position));
376 if ((input_position != input_size) &&
377 (url_data[input_position] ==
'/')) {
388 ada_log(
"RELATIVE_SCHEME ",
389 helpers::substring(url_data, input_position));
392 url.copy_scheme(*base_url);
395 if ((input_position != input_size) &&
396 (url_data[input_position] ==
'/')) {
398 "RELATIVE_SCHEME if c is U+002F (/), then set state to relative "
401 }
else if (
url.
is_special() && (input_position != input_size) &&
402 (url_data[input_position] ==
'\\')) {
406 "RELATIVE_SCHEME if url is special and c is U+005C, validation "
407 "error, set state to relative slash state");
410 ada_log(
"RELATIVE_SCHEME otherwise");
415 if constexpr (result_type_is_ada_url) {
416 url.username = base_url->username;
417 url.password = base_url->password;
418 url.host = base_url->host;
419 url.port = base_url->port;
422 url.path = base_url->path;
423 url.query = base_url->query;
425 url.update_base_authority(base_url->get_href(),
426 base_url->get_components());
427 url.update_host_to_base_host(base_url->get_hostname());
428 url.update_base_port(base_url->retrieve_base_port());
431 url.update_base_pathname(base_url->get_pathname());
432 url.update_base_search(base_url->get_search());
439 if ((input_position != input_size) &&
440 (url_data[input_position] ==
'?')) {
444 else if (input_position != input_size) {
447 if constexpr (result_type_is_ada_url) {
449 helpers::shorten_path(
url.path,
url.type);
452 if (helpers::shorten_path(path,
url.type)) {
453 url.update_base_pathname(std::move(std::string(path)));
465 ada_log(
"RELATIVE_SLASH ",
466 helpers::substring(url_data, input_position));
470 (url_data[input_position] ==
'/' ||
471 url_data[input_position] ==
'\\')) {
476 else if ((input_position != input_size) &&
477 (url_data[input_position] ==
'/')) {
487 if constexpr (result_type_is_ada_url) {
488 url.username = base_url->username;
489 url.password = base_url->password;
490 url.host = base_url->host;
491 url.port = base_url->port;
493 url.update_base_authority(base_url->get_href(),
494 base_url->get_components());
495 url.update_host_to_base_host(base_url->get_hostname());
496 url.update_base_port(base_url->retrieve_base_port());
506 ada_log(
"SPECIAL_AUTHORITY_SLASHES ",
507 helpers::substring(url_data, input_position));
512 if (url_data.substr(input_position, 2) ==
"//") {
519 ada_log(
"SPECIAL_AUTHORITY_IGNORE_SLASHES ",
520 helpers::substring(url_data, input_position));
524 while ((input_position != input_size) &&
525 ((url_data[input_position] ==
'/') ||
526 (url_data[input_position] ==
'\\'))) {
534 ada_log(
"QUERY ", helpers::substring(url_data, input_position));
535 if constexpr (store_values) {
538 const uint8_t* query_percent_encode_set =
544 url.update_base_search(url_data.substr(input_position),
545 query_percent_encode_set);
546 ada_log(
"QUERY update_base_search completed ");
547 if (fragment.has_value()) {
548 url.update_unencoded_base_hash(*fragment);
554 ada_log(
"HOST ", helpers::substring(url_data, input_position));
556 std::string_view host_view = url_data.substr(input_position);
557 auto [location, found_colon] =
558 helpers::get_host_delimiter_location(
url.
is_special(), host_view);
559 input_position = (location != std::string_view::npos)
560 ? input_position + location
569 ada_log(
"HOST parsing ", host_view);
570 if (!
url.parse_host(host_view)) {
591 ada_log(
"HOST parsing ", host_view,
" href=",
url.
get_href());
594 if (host_view.empty()) {
595 url.update_base_hostname(
"");
596 }
else if (!
url.parse_host(host_view)) {
609 ada_log(
"OPAQUE_PATH ", helpers::substring(url_data, input_position));
610 std::string_view view = url_data.substr(input_position);
613 size_t location = view.find(
'?');
614 if (location != std::string_view::npos) {
615 view.remove_suffix(view.size() - location);
617 input_position += location + 1;
619 input_position = input_size + 1;
624 url.update_base_pathname(unicode::percent_encode(
629 ada_log(
"PORT ", helpers::substring(url_data, input_position));
630 std::string_view port_view = url_data.substr(input_position);
631 input_position +=
url.parse_port(port_view,
true);
639 ada_log(
"PATH_START ", helpers::substring(url_data, input_position));
648 if (input_position == input_size) {
649 if constexpr (store_values) {
650 url.update_base_pathname(
"/");
651 if (fragment.has_value()) {
652 url.update_unencoded_base_hash(*fragment);
660 if ((url_data[input_position] !=
'/') &&
661 (url_data[input_position] !=
'\\')) {
667 else if ((input_position != input_size) &&
668 (url_data[input_position] ==
'?')) {
672 else if (input_position != input_size) {
677 if (url_data[input_position] !=
'/') {
686 ada_log(
"PATH ", helpers::substring(url_data, input_position));
687 std::string_view view = url_data.substr(input_position);
691 size_t locofquestionmark = view.find(
'?');
692 if (locofquestionmark != std::string_view::npos) {
694 view.remove_suffix(view.size() - locofquestionmark);
695 input_position += locofquestionmark + 1;
697 input_position = input_size + 1;
699 if constexpr (store_values) {
700 if constexpr (result_type_is_ada_url) {
701 helpers::parse_prepared_path(view,
url.type,
url.path);
703 url.consume_prepared_path(view);
710 ada_log(
"FILE_SLASH ", helpers::substring(url_data, input_position));
713 if ((input_position != input_size) &&
714 (url_data[input_position] ==
'/' ||
715 url_data[input_position] ==
'\\')) {
716 ada_log(
"FILE_SLASH c is U+002F or U+005C");
721 ada_log(
"FILE_SLASH otherwise");
727 if constexpr (result_type_is_ada_url) {
728 url.host = base_url->host;
730 url.update_host_to_base_host(base_url->get_host());
736 if (!base_url->get_pathname().empty()) {
738 url_data.substr(input_position))) {
739 std::string_view first_base_url_path =
740 base_url->get_pathname().substr(1);
741 size_t loc = first_base_url_path.find(
'/');
742 if (loc != std::string_view::npos) {
743 helpers::resize(first_base_url_path, loc);
746 first_base_url_path)) {
747 if constexpr (result_type_is_ada_url) {
749 url.path += first_base_url_path;
751 url.append_base_pathname(
752 helpers::concat(
"/", first_base_url_path));
766 ada_log(
"FILE_HOST ", helpers::substring(url_data, input_position));
767 std::string_view view = url_data.substr(input_position);
769 size_t location = view.find_first_of(
"/\\?");
770 std::string_view file_host_buffer(
772 (location != std::string_view::npos) ? location : view.size());
776 }
else if (file_host_buffer.empty()) {
778 if constexpr (result_type_is_ada_url) {
781 url.update_base_hostname(
"");
786 size_t consumed_bytes = file_host_buffer.size();
787 input_position += consumed_bytes;
790 if (!
url.parse_host(file_host_buffer)) {
794 if constexpr (result_type_is_ada_url) {
796 if (
url.host.has_value() &&
url.host.value() ==
"localhost") {
801 url.update_base_hostname(
"");
812 ada_log(
"FILE ", helpers::substring(url_data, input_position));
813 std::string_view file_view = url_data.substr(input_position);
815 url.set_protocol_as_file();
816 if constexpr (result_type_is_ada_url) {
820 url.update_base_hostname(
"");
823 if (input_position != input_size &&
824 (url_data[input_position] ==
'/' ||
825 url_data[input_position] ==
'\\')) {
826 ada_log(
"FILE c is U+002F or U+005C");
834 ada_log(
"FILE base non-null");
835 if constexpr (result_type_is_ada_url) {
836 url.host = base_url->host;
837 url.path = base_url->path;
838 url.query = base_url->query;
840 url.update_host_to_base_host(base_url->get_hostname());
841 url.update_base_pathname(base_url->get_pathname());
842 url.update_base_search(base_url->get_search());
848 if (input_position != input_size && url_data[input_position] ==
'?') {
852 else if (input_position != input_size) {
858 if constexpr (result_type_is_ada_url) {
859 helpers::shorten_path(
url.path,
url.type);
862 if (helpers::shorten_path(path,
url.type)) {
863 url.update_base_pathname(std::move(std::string(path)));
870 url.clear_pathname();
881 ada_log(
"FILE go to path");
893 if constexpr (store_values) {
894 if (fragment.has_value()) {
895 url.update_unencoded_base_hash(*fragment);