15 const result_type* base_url) {
21 constexpr bool result_type_is_ada_url = std::is_same_v<url, result_type>;
22 constexpr bool result_type_is_ada_url_aggregator =
23 std::is_same_v<url_aggregator, result_type>;
24 static_assert(result_type_is_ada_url ||
25 result_type_is_ada_url_aggregator);
28 ada_log(
"ada::parser::parse_url('", user_input,
"' [", user_input.size(),
29 " bytes],", (base_url !=
nullptr ? base_url->to_string() :
"null"),
37 if (user_input.size() > std::numeric_limits<uint32_t>::max()) [[unlikely]] {
43 if (base_url !=
nullptr) {
49 if constexpr (result_type_is_ada_url_aggregator && store_values) {
61 uint32_t reserve_capacity =
63 helpers::leading_zeroes(uint32_t(1 | user_input.size()))) +
65 url.reserve(reserve_capacity);
67 std::string tmp_buffer;
68 std::string_view url_data;
69 if (unicode::has_tabs_or_newline(user_input)) [[unlikely]] {
70 tmp_buffer = user_input;
73 helpers::remove_ascii_tab_or_newline(tmp_buffer);
74 url_data = tmp_buffer;
76 url_data = user_input;
81 helpers::trim_c0_whitespace(url_data);
84 std::optional<std::string_view> fragment = helpers::prune_hash(url_data);
93 size_t input_position = 0;
94 const size_t input_size = url_data.size();
99 while (input_position <= input_size) {
100 ada_log(
"In parsing at ", input_position,
" out of ", input_size,
104 ada_log(
"SCHEME_START ", helpers::substring(url_data, input_position));
107 if ((input_position != input_size) &&
119 ada_log(
"SCHEME ", helpers::substring(url_data, input_position));
122 while ((input_position != input_size) &&
123 (unicode::is_alnum_plus(url_data[input_position]))) {
127 if ((input_position != input_size) &&
128 (url_data[input_position] ==
':')) {
129 ada_log(
"SCHEME the scheme should be ",
130 url_data.substr(0, input_position));
131 if constexpr (result_type_is_ada_url) {
132 if (!
url.parse_scheme(url_data.substr(0, input_position))) {
137 if (!
url.parse_scheme_with_colon(
138 url_data.substr(0, input_position + 1))) {
153 base_url->type ==
url.type) {
164 else if (input_position + 1 < input_size &&
165 url_data[input_position + 1] ==
'/') {
187 ada_log(
"NO_SCHEME ", helpers::substring(url_data, input_position));
190 if (base_url ==
nullptr ||
191 (base_url->has_opaque_path && !fragment.has_value())) {
192 ada_log(
"NO_SCHEME validation error");
199 else if (base_url->has_opaque_path && fragment.has_value() &&
200 input_position == input_size) {
201 ada_log(
"NO_SCHEME opaque base with fragment");
202 url.copy_scheme(*base_url);
205 if constexpr (result_type_is_ada_url) {
206 url.path = base_url->path;
207 url.query = base_url->query;
209 url.update_base_pathname(base_url->get_pathname());
210 url.update_base_search(base_url->get_search());
212 url.update_unencoded_base_hash(*fragment);
218 ada_log(
"NO_SCHEME non-file relative path");
223 ada_log(
"NO_SCHEME file base type");
229 ada_log(
"AUTHORITY ", helpers::substring(url_data, input_position));
240 if (url_data.find(
'@', input_position) == std::string_view::npos) {
244 bool at_sign_seen{
false};
245 bool password_token_seen{
false};
252 std::string_view view = url_data.substr(input_position);
255 url.
is_special() ? helpers::find_authority_delimiter_special(view)
256 : helpers::find_authority_delimiter(view);
257 std::string_view authority_view = view.substr(0, location);
258 size_t end_of_authority = input_position + authority_view.size();
260 if ((end_of_authority != input_size) &&
261 (url_data[end_of_authority] ==
'@')) {
264 if (password_token_seen) {
265 if constexpr (result_type_is_ada_url) {
266 url.password +=
"%40";
268 url.append_base_password(
"%40");
271 if constexpr (result_type_is_ada_url) {
272 url.username +=
"%40";
274 url.append_base_username(
"%40");
281 if (!password_token_seen) {
282 size_t password_token_location = authority_view.find(
':');
283 password_token_seen =
284 password_token_location != std::string_view::npos;
286 if constexpr (store_values) {
287 if (!password_token_seen) {
288 if constexpr (result_type_is_ada_url) {
289 url.username += unicode::percent_encode(
293 url.append_base_username(unicode::percent_encode(
298 if constexpr (result_type_is_ada_url) {
299 url.username += unicode::percent_encode(
300 authority_view.substr(0, password_token_location),
302 url.password += unicode::percent_encode(
303 authority_view.substr(password_token_location + 1),
306 url.append_base_username(unicode::percent_encode(
307 authority_view.substr(0, password_token_location),
309 url.append_base_password(unicode::percent_encode(
310 authority_view.substr(password_token_location + 1),
315 }
else if constexpr (store_values) {
316 if constexpr (result_type_is_ada_url) {
317 url.password += unicode::percent_encode(
320 url.append_base_password(unicode::percent_encode(
328 else if (end_of_authority == input_size ||
329 url_data[end_of_authority] ==
'/' ||
330 url_data[end_of_authority] ==
'?' ||
334 if (at_sign_seen && authority_view.empty()) {
341 if (end_of_authority == input_size) {
342 if constexpr (store_values) {
343 if (fragment.has_value()) {
344 url.update_unencoded_base_hash(*fragment);
349 input_position = end_of_authority + 1;
355 ada_log(
"SPECIAL_RELATIVE_OR_AUTHORITY ",
356 helpers::substring(url_data, input_position));
361 if (url_data.substr(input_position, 2) ==
"//") {
373 ada_log(
"PATH_OR_AUTHORITY ",
374 helpers::substring(url_data, input_position));
377 if ((input_position != input_size) &&
378 (url_data[input_position] ==
'/')) {
389 ada_log(
"RELATIVE_SCHEME ",
390 helpers::substring(url_data, input_position));
393 url.copy_scheme(*base_url);
396 if ((input_position != input_size) &&
398 (url_data[input_position] ==
'/')) {
400 "RELATIVE_SCHEME if c is U+002F (/), then set state to relative "
403 }
else if (
url.
is_special() && (input_position != input_size) &&
404 (url_data[input_position] ==
'\\')) {
408 "RELATIVE_SCHEME if url is special and c is U+005C, validation "
409 "error, set state to relative slash state");
412 ada_log(
"RELATIVE_SCHEME otherwise");
417 if constexpr (result_type_is_ada_url) {
418 url.username = base_url->username;
419 url.password = base_url->password;
420 url.host = base_url->host;
421 url.port = base_url->port;
424 url.path = base_url->path;
425 url.query = base_url->query;
427 url.update_base_authority(base_url->get_href(),
428 base_url->get_components());
429 url.update_host_to_base_host(base_url->get_hostname());
430 url.update_base_port(base_url->retrieve_base_port());
433 url.update_base_pathname(base_url->get_pathname());
434 url.update_base_search(base_url->get_search());
441 if ((input_position != input_size) &&
442 (url_data[input_position] ==
'?')) {
446 else if (input_position != input_size) {
449 if constexpr (result_type_is_ada_url) {
451 helpers::shorten_path(
url.path,
url.type);
454 if (helpers::shorten_path(path,
url.type)) {
455 url.update_base_pathname(std::move(std::string(path)));
467 ada_log(
"RELATIVE_SLASH ",
468 helpers::substring(url_data, input_position));
472 (url_data[input_position] ==
'/' ||
473 url_data[input_position] ==
'\\')) {
478 else if ((input_position != input_size) &&
479 (url_data[input_position] ==
'/')) {
489 if constexpr (result_type_is_ada_url) {
490 url.username = base_url->username;
491 url.password = base_url->password;
492 url.host = base_url->host;
493 url.port = base_url->port;
495 url.update_base_authority(base_url->get_href(),
496 base_url->get_components());
497 url.update_host_to_base_host(base_url->get_hostname());
498 url.update_base_port(base_url->retrieve_base_port());
508 ada_log(
"SPECIAL_AUTHORITY_SLASHES ",
509 helpers::substring(url_data, input_position));
514 if (url_data.substr(input_position, 2) ==
"//") {
521 ada_log(
"SPECIAL_AUTHORITY_IGNORE_SLASHES ",
522 helpers::substring(url_data, input_position));
526 while ((input_position != input_size) &&
527 ((url_data[input_position] ==
'/') ||
528 (url_data[input_position] ==
'\\'))) {
536 ada_log(
"QUERY ", helpers::substring(url_data, input_position));
537 if constexpr (store_values) {
540 const uint8_t* query_percent_encode_set =
546 url.update_base_search(url_data.substr(input_position),
547 query_percent_encode_set);
548 ada_log(
"QUERY update_base_search completed ");
549 if (fragment.has_value()) {
550 url.update_unencoded_base_hash(*fragment);
556 ada_log(
"HOST ", helpers::substring(url_data, input_position));
558 std::string_view host_view = url_data.substr(input_position);
559 auto [location, found_colon] =
560 helpers::get_host_delimiter_location(
url.
is_special(), host_view);
561 input_position = (location != std::string_view::npos)
562 ? input_position + location
571 ada_log(
"HOST parsing ", host_view);
572 if (!
url.parse_host(host_view)) {
593 ada_log(
"HOST parsing ", host_view,
" href=",
url.
get_href());
596 if (host_view.empty()) {
597 url.update_base_hostname(
"");
598 }
else if (!
url.parse_host(host_view)) {
611 ada_log(
"OPAQUE_PATH ", helpers::substring(url_data, input_position));
612 std::string_view view = url_data.substr(input_position);
615 size_t location = view.find(
'?');
616 if (location != std::string_view::npos) {
617 view.remove_suffix(view.size() - location);
619 input_position += location + 1;
621 input_position = input_size + 1;
627 if (view.ends_with(
' ')) {
628 std::string modified_view =
629 std::string(view.substr(0, view.size() - 1)) +
"%20";
630 url.update_base_pathname(unicode::percent_encode(
633 url.update_base_pathname(unicode::percent_encode(
639 ada_log(
"PORT ", helpers::substring(url_data, input_position));
640 std::string_view port_view = url_data.substr(input_position);
641 input_position +=
url.parse_port(port_view,
true);
649 ada_log(
"PATH_START ", helpers::substring(url_data, input_position));
658 if (input_position == input_size) {
659 if constexpr (store_values) {
660 url.update_base_pathname(
"/");
661 if (fragment.has_value()) {
662 url.update_unencoded_base_hash(*fragment);
670 if ((url_data[input_position] !=
'/') &&
671 (url_data[input_position] !=
'\\')) {
677 else if ((input_position != input_size) &&
678 (url_data[input_position] ==
'?')) {
682 else if (input_position != input_size) {
687 if (url_data[input_position] !=
'/') {
696 ada_log(
"PATH ", helpers::substring(url_data, input_position));
697 std::string_view view = url_data.substr(input_position);
701 size_t locofquestionmark = view.find(
'?');
702 if (locofquestionmark != std::string_view::npos) {
704 view.remove_suffix(view.size() - locofquestionmark);
705 input_position += locofquestionmark + 1;
707 input_position = input_size + 1;
709 if constexpr (store_values) {
710 if constexpr (result_type_is_ada_url) {
711 helpers::parse_prepared_path(view,
url.type,
url.path);
713 url.consume_prepared_path(view);
720 ada_log(
"FILE_SLASH ", helpers::substring(url_data, input_position));
723 if ((input_position != input_size) &&
724 (url_data[input_position] ==
'/' ||
725 url_data[input_position] ==
'\\')) {
726 ada_log(
"FILE_SLASH c is U+002F or U+005C");
731 ada_log(
"FILE_SLASH otherwise");
737 if constexpr (result_type_is_ada_url) {
738 url.host = base_url->host;
740 url.update_host_to_base_host(base_url->get_host());
746 if (!base_url->get_pathname().empty()) {
748 url_data.substr(input_position))) {
749 std::string_view first_base_url_path =
750 base_url->get_pathname().substr(1);
751 size_t loc = first_base_url_path.find(
'/');
752 if (loc != std::string_view::npos) {
753 helpers::resize(first_base_url_path, loc);
756 first_base_url_path)) {
757 if constexpr (result_type_is_ada_url) {
759 url.path += first_base_url_path;
761 url.append_base_pathname(
762 helpers::concat(
"/", first_base_url_path));
776 ada_log(
"FILE_HOST ", helpers::substring(url_data, input_position));
777 std::string_view view = url_data.substr(input_position);
779 size_t location = view.find_first_of(
"/\\?");
780 std::string_view file_host_buffer(
782 (location != std::string_view::npos) ? location : view.size());
786 }
else if (file_host_buffer.empty()) {
788 if constexpr (result_type_is_ada_url) {
791 url.update_base_hostname(
"");
796 size_t consumed_bytes = file_host_buffer.size();
797 input_position += consumed_bytes;
800 if (!
url.parse_host(file_host_buffer)) {
804 if constexpr (result_type_is_ada_url) {
806 if (
url.host.has_value() &&
url.host.value() ==
"localhost") {
811 url.update_base_hostname(
"");
822 ada_log(
"FILE ", helpers::substring(url_data, input_position));
823 std::string_view file_view = url_data.substr(input_position);
825 url.set_protocol_as_file();
826 if constexpr (result_type_is_ada_url) {
830 url.update_base_hostname(
"");
833 if (input_position != input_size &&
834 (url_data[input_position] ==
'/' ||
835 url_data[input_position] ==
'\\')) {
836 ada_log(
"FILE c is U+002F or U+005C");
844 ada_log(
"FILE base non-null");
845 if constexpr (result_type_is_ada_url) {
846 url.host = base_url->host;
847 url.path = base_url->path;
848 url.query = base_url->query;
850 url.update_host_to_base_host(base_url->get_hostname());
851 url.update_base_pathname(base_url->get_pathname());
852 url.update_base_search(base_url->get_search());
858 if (input_position != input_size && url_data[input_position] ==
'?') {
862 else if (input_position != input_size) {
868 if constexpr (result_type_is_ada_url) {
869 helpers::shorten_path(
url.path,
url.type);
872 if (helpers::shorten_path(path,
url.type)) {
873 url.update_base_pathname(std::move(std::string(path)));
880 url.clear_pathname();
891 ada_log(
"FILE go to path");
903 if constexpr (store_values) {
904 if (fragment.has_value()) {
905 url.update_unencoded_base_hash(*fragment);