Ada 2.9.2
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
url_aggregator-inl.h
Go to the documentation of this file.
1
5#ifndef ADA_URL_AGGREGATOR_INL_H
6#define ADA_URL_AGGREGATOR_INL_H
7
10#include "ada/checkers-inl.h"
11#include "ada/helpers.h"
12#include "ada/unicode.h"
13#include "ada/unicode-inl.h"
14#include "ada/url_aggregator.h"
15#include "ada/url_components.h"
16#include "ada/scheme.h"
17#include "ada/log.h"
18
19#include <optional>
20#include <string_view>
21
22namespace ada {
23
24inline void url_aggregator::update_base_authority(
25 std::string_view base_buffer, const ada::url_components &base) {
26 std::string_view input = base_buffer.substr(
27 base.protocol_end, base.host_start - base.protocol_end);
28 ada_log("url_aggregator::update_base_authority ", input);
29
30 bool input_starts_with_dash = input.starts_with("//");
31 uint32_t diff = components.host_start - components.protocol_end;
32
33 buffer.erase(components.protocol_end,
34 components.host_start - components.protocol_end);
35 components.username_end = components.protocol_end;
36
37 if (input_starts_with_dash) {
38 input.remove_prefix(2);
39 diff += 2; // add "//"
40 buffer.insert(components.protocol_end, "//");
41 components.username_end += 2;
42 }
43
44 size_t password_delimiter = input.find(':');
45
46 // Check if input contains both username and password by checking the
47 // delimiter: ":" A typical input that contains authority would be "user:pass"
48 if (password_delimiter != std::string_view::npos) {
49 // Insert both username and password
50 std::string_view username = input.substr(0, password_delimiter);
51 std::string_view password = input.substr(password_delimiter + 1);
52
53 buffer.insert(components.protocol_end + diff, username);
54 diff += uint32_t(username.size());
55 buffer.insert(components.protocol_end + diff, ":");
56 components.username_end = components.protocol_end + diff;
57 buffer.insert(components.protocol_end + diff + 1, password);
58 diff += uint32_t(password.size()) + 1;
59 } else if (!input.empty()) {
60 // Insert only username
61 buffer.insert(components.protocol_end + diff, input);
62 components.username_end =
63 components.protocol_end + diff + uint32_t(input.size());
64 diff += uint32_t(input.size());
65 }
66
67 components.host_start += diff;
68
69 if (buffer.size() > base.host_start && buffer[base.host_start] != '@') {
70 buffer.insert(components.host_start, "@");
71 diff++;
72 }
73 components.host_end += diff;
74 components.pathname_start += diff;
75 if (components.search_start != url_components::omitted) {
76 components.search_start += diff;
77 }
78 if (components.hash_start != url_components::omitted) {
79 components.hash_start += diff;
80 }
81}
82
83inline void url_aggregator::update_unencoded_base_hash(std::string_view input) {
84 ada_log("url_aggregator::update_unencoded_base_hash ", input, " [",
85 input.size(), " bytes], buffer is '", buffer, "' [", buffer.size(),
86 " bytes] components.hash_start = ", components.hash_start);
88 ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
89 if (components.hash_start != url_components::omitted) {
90 buffer.resize(components.hash_start);
91 }
92 components.hash_start = uint32_t(buffer.size());
93 buffer += "#";
94 bool encoding_required = unicode::percent_encode<true>(
96 // When encoding_required is false, then buffer is left unchanged, and percent
97 // encoding was not deemed required.
98 if (!encoding_required) {
99 buffer.append(input);
100 }
101 ada_log("url_aggregator::update_unencoded_base_hash final buffer is '",
102 buffer, "' [", buffer.size(), " bytes]");
104}
105
106ada_really_inline uint32_t url_aggregator::replace_and_resize(
107 uint32_t start, uint32_t end, std::string_view input) {
108 uint32_t current_length = end - start;
109 uint32_t input_size = uint32_t(input.size());
110 uint32_t new_difference = input_size - current_length;
111
112 if (current_length == 0) {
113 buffer.insert(start, input);
114 } else if (input_size == current_length) {
115 buffer.replace(start, input_size, input);
116 } else if (input_size < current_length) {
117 buffer.erase(start, current_length - input_size);
118 buffer.replace(start, input_size, input);
119 } else {
120 buffer.replace(start, current_length, input.substr(0, current_length));
121 buffer.insert(start + current_length, input.substr(current_length));
122 }
123
124 return new_difference;
125}
126
127inline void url_aggregator::update_base_hostname(const std::string_view input) {
128 ada_log("url_aggregator::update_base_hostname ", input, " [", input.size(),
129 " bytes], buffer is '", buffer, "' [", buffer.size(), " bytes]");
131 ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
132
133 // This next line is required for when parsing a URL like `foo://`
134 add_authority_slashes_if_needed();
135
136 bool has_credentials = components.protocol_end + 2 < components.host_start;
137 uint32_t new_difference =
138 replace_and_resize(components.host_start, components.host_end, input);
139
140 if (has_credentials) {
141 buffer.insert(components.host_start, "@");
142 new_difference++;
143 }
144 components.host_end += new_difference;
145 components.pathname_start += new_difference;
146 if (components.search_start != url_components::omitted) {
147 components.search_start += new_difference;
148 }
149 if (components.hash_start != url_components::omitted) {
150 components.hash_start += new_difference;
151 }
153}
154
155[[nodiscard]] ada_really_inline uint32_t
157 ada_log("url_aggregator::get_pathname_length");
158 uint32_t ending_index = uint32_t(buffer.size());
159 if (components.search_start != url_components::omitted) {
160 ending_index = components.search_start;
161 } else if (components.hash_start != url_components::omitted) {
162 ending_index = components.hash_start;
163 }
164 return ending_index - components.pathname_start;
165}
166
167[[nodiscard]] ada_really_inline bool url_aggregator::is_at_path()
168 const noexcept {
169 return buffer.size() == components.pathname_start;
170}
171
172inline void url_aggregator::update_base_search(std::string_view input) {
173 ada_log("url_aggregator::update_base_search ", input);
175 ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
176 if (input.empty()) {
177 clear_search();
178 return;
179 }
180
181 if (input[0] == '?') {
182 input.remove_prefix(1);
183 }
184
185 if (components.hash_start == url_components::omitted) {
186 if (components.search_start == url_components::omitted) {
187 components.search_start = uint32_t(buffer.size());
188 buffer += "?";
189 } else {
190 buffer.resize(components.search_start + 1);
191 }
192
193 buffer.append(input);
194 } else {
195 if (components.search_start == url_components::omitted) {
196 components.search_start = components.hash_start;
197 } else {
198 buffer.erase(components.search_start,
199 components.hash_start - components.search_start);
200 components.hash_start = components.search_start;
201 }
202
203 buffer.insert(components.search_start, "?");
204 buffer.insert(components.search_start + 1, input);
205 components.hash_start += uint32_t(input.size() + 1); // Do not forget `?`
206 }
207
209}
210
211inline void url_aggregator::update_base_search(
212 std::string_view input, const uint8_t query_percent_encode_set[]) {
213 ada_log("url_aggregator::update_base_search ", input,
214 " with encoding parameter ", to_string(), "\n", to_diagram());
216 ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
217
218 if (components.hash_start == url_components::omitted) {
219 if (components.search_start == url_components::omitted) {
220 components.search_start = uint32_t(buffer.size());
221 buffer += "?";
222 } else {
223 buffer.resize(components.search_start + 1);
224 }
225
226 bool encoding_required =
227 unicode::percent_encode<true>(input, query_percent_encode_set, buffer);
228 // When encoding_required is false, then buffer is left unchanged, and
229 // percent encoding was not deemed required.
230 if (!encoding_required) {
231 buffer.append(input);
232 }
233 } else {
234 if (components.search_start == url_components::omitted) {
235 components.search_start = components.hash_start;
236 } else {
237 buffer.erase(components.search_start,
238 components.hash_start - components.search_start);
239 components.hash_start = components.search_start;
240 }
241
242 buffer.insert(components.search_start, "?");
243 size_t idx =
244 ada::unicode::percent_encode_index(input, query_percent_encode_set);
245 if (idx == input.size()) {
246 buffer.insert(components.search_start + 1, input);
247 components.hash_start += uint32_t(input.size() + 1); // Do not forget `?`
248 } else {
249 buffer.insert(components.search_start + 1, input, 0, idx);
250 input.remove_prefix(idx);
251 // We only create a temporary string if we need percent encoding and
252 // we attempt to create as small a temporary string as we can.
253 std::string encoded =
254 ada::unicode::percent_encode(input, query_percent_encode_set);
255 buffer.insert(components.search_start + idx + 1, encoded);
256 components.hash_start +=
257 uint32_t(encoded.size() + idx + 1); // Do not forget `?`
258 }
259 }
260
262}
263
264inline void url_aggregator::update_base_pathname(const std::string_view input) {
265 ada_log("url_aggregator::update_base_pathname '", input, "' [", input.size(),
266 " bytes] \n", to_diagram());
267 ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
269
270 const bool begins_with_dashdash = input.starts_with("//");
271 if (!begins_with_dashdash && has_dash_dot()) {
272 ada_log("url_aggregator::update_base_pathname has /.: \n", to_diagram());
273 // We must delete the ./
274 delete_dash_dot();
275 }
276
277 if (begins_with_dashdash && !has_opaque_path && !has_authority() &&
278 !has_dash_dot()) {
279 // If url's host is null, url does not have an opaque path, url's path's
280 // size is greater than 1, then append U+002F (/) followed by U+002E (.) to
281 // output.
282 buffer.insert(components.pathname_start, "/.");
283 components.pathname_start += 2;
284 }
285
286 uint32_t difference = replace_and_resize(
287 components.pathname_start,
288 components.pathname_start + get_pathname_length(), input);
289 if (components.search_start != url_components::omitted) {
290 components.search_start += difference;
291 }
292 if (components.hash_start != url_components::omitted) {
293 components.hash_start += difference;
294 }
295 ada_log("url_aggregator::update_base_pathname end '", input, "' [",
296 input.size(), " bytes] \n", to_diagram());
298}
299
300inline void url_aggregator::append_base_pathname(const std::string_view input) {
301 ada_log("url_aggregator::append_base_pathname ", input, " ", to_string(),
302 "\n", to_diagram());
304 ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
305#if ADA_DEVELOPMENT_CHECKS
306 // computing the expected password.
307 std::string path_expected(get_pathname());
308 path_expected.append(input);
309#endif // ADA_DEVELOPMENT_CHECKS
310 uint32_t ending_index = uint32_t(buffer.size());
311 if (components.search_start != url_components::omitted) {
312 ending_index = components.search_start;
313 } else if (components.hash_start != url_components::omitted) {
314 ending_index = components.hash_start;
315 }
316 buffer.insert(ending_index, input);
317
318 if (components.search_start != url_components::omitted) {
319 components.search_start += uint32_t(input.size());
320 }
321 if (components.hash_start != url_components::omitted) {
322 components.hash_start += uint32_t(input.size());
323 }
324#if ADA_DEVELOPMENT_CHECKS
325 std::string path_after = std::string(get_pathname());
327 path_expected, path_after,
328 "append_base_pathname problem after inserting " + std::string(input));
329#endif // ADA_DEVELOPMENT_CHECKS
331}
332
333inline void url_aggregator::update_base_username(const std::string_view input) {
334 ada_log("url_aggregator::update_base_username '", input, "' ", to_string(),
335 "\n", to_diagram());
337 ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
338
339 add_authority_slashes_if_needed();
340
342 bool host_starts_with_at = buffer.size() > components.host_start &&
343 buffer[components.host_start] == '@';
344 uint32_t diff = replace_and_resize(components.protocol_end + 2,
345 components.username_end, input);
346
347 components.username_end += diff;
348 components.host_start += diff;
349
350 if (!input.empty() && !host_starts_with_at) {
351 buffer.insert(components.host_start, "@");
352 diff++;
353 } else if (input.empty() && host_starts_with_at && !has_password) {
354 // Input is empty, there is no password, and we need to remove "@" from
355 // hostname
356 buffer.erase(components.host_start, 1);
357 diff--;
358 }
359
360 components.host_end += diff;
361 components.pathname_start += diff;
362 if (components.search_start != url_components::omitted) {
363 components.search_start += diff;
364 }
365 if (components.hash_start != url_components::omitted) {
366 components.hash_start += diff;
367 }
369}
370
371inline void url_aggregator::append_base_username(const std::string_view input) {
372 ada_log("url_aggregator::append_base_username ", input);
374 ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
375#if ADA_DEVELOPMENT_CHECKS
376 // computing the expected password.
377 std::string username_expected(get_username());
378 username_expected.append(input);
379#endif // ADA_DEVELOPMENT_CHECKS
380 add_authority_slashes_if_needed();
381
382 // If input is empty, do nothing.
383 if (input.empty()) {
384 return;
385 }
386
387 uint32_t difference = uint32_t(input.size());
388 buffer.insert(components.username_end, input);
389 components.username_end += difference;
390 components.host_start += difference;
391
392 if (buffer[components.host_start] != '@' &&
393 components.host_start != components.host_end) {
394 buffer.insert(components.host_start, "@");
395 difference++;
396 }
397
398 components.host_end += difference;
399 components.pathname_start += difference;
400 if (components.search_start != url_components::omitted) {
401 components.search_start += difference;
402 }
403 if (components.hash_start != url_components::omitted) {
404 components.hash_start += difference;
405 }
406#if ADA_DEVELOPMENT_CHECKS
407 std::string username_after(get_username());
409 username_expected, username_after,
410 "append_base_username problem after inserting " + std::string(input));
411#endif // ADA_DEVELOPMENT_CHECKS
413}
414
415constexpr void url_aggregator::clear_password() {
416 ada_log("url_aggregator::clear_password ", to_string(), "\n", to_diagram());
418 if (!has_password()) {
419 return;
420 }
421
422 uint32_t diff = components.host_start - components.username_end;
423 buffer.erase(components.username_end, diff);
424 components.host_start -= diff;
425 components.host_end -= diff;
426 components.pathname_start -= diff;
427 if (components.search_start != url_components::omitted) {
428 components.search_start -= diff;
429 }
430 if (components.hash_start != url_components::omitted) {
431 components.hash_start -= diff;
432 }
433}
434
435inline void url_aggregator::update_base_password(const std::string_view input) {
436 ada_log("url_aggregator::update_base_password ", input);
438 ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
439
440 add_authority_slashes_if_needed();
441
442 // TODO: Optimization opportunity. Merge the following removal functions.
443 if (input.empty()) {
444 clear_password();
445
446 // Remove username too, if it is empty.
447 if (!has_non_empty_username()) {
448 update_base_username("");
449 }
450
451 return;
452 }
453
454 bool password_exists = has_password();
455 uint32_t difference = uint32_t(input.size());
456
457 if (password_exists) {
458 uint32_t current_length =
459 components.host_start - components.username_end - 1;
460 buffer.erase(components.username_end + 1, current_length);
461 difference -= current_length;
462 } else {
463 buffer.insert(components.username_end, ":");
464 difference++;
465 }
466
467 buffer.insert(components.username_end + 1, input);
468 components.host_start += difference;
469
470 // The following line is required to add "@" to hostname. When updating
471 // password if hostname does not start with "@", it is "update_base_password"s
472 // responsibility to set it.
473 if (buffer[components.host_start] != '@') {
474 buffer.insert(components.host_start, "@");
475 difference++;
476 }
477
478 components.host_end += difference;
479 components.pathname_start += difference;
480 if (components.search_start != url_components::omitted) {
481 components.search_start += difference;
482 }
483 if (components.hash_start != url_components::omitted) {
484 components.hash_start += difference;
485 }
487}
488
489inline void url_aggregator::append_base_password(const std::string_view input) {
490 ada_log("url_aggregator::append_base_password ", input, " ", to_string(),
491 "\n", to_diagram());
493 ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
494#if ADA_DEVELOPMENT_CHECKS
495 // computing the expected password.
496 std::string password_expected = std::string(get_password());
497 password_expected.append(input);
498#endif // ADA_DEVELOPMENT_CHECKS
499 add_authority_slashes_if_needed();
500
501 // If input is empty, do nothing.
502 if (input.empty()) {
503 return;
504 }
505
506 uint32_t difference = uint32_t(input.size());
507 if (has_password()) {
508 buffer.insert(components.host_start, input);
509 } else {
510 difference++; // Increment for ":"
511 buffer.insert(components.username_end, ":");
512 buffer.insert(components.username_end + 1, input);
513 }
514 components.host_start += difference;
515
516 // The following line is required to add "@" to hostname. When updating
517 // password if hostname does not start with "@", it is "append_base_password"s
518 // responsibility to set it.
519 if (buffer[components.host_start] != '@') {
520 buffer.insert(components.host_start, "@");
521 difference++;
522 }
523
524 components.host_end += difference;
525 components.pathname_start += difference;
526 if (components.search_start != url_components::omitted) {
527 components.search_start += difference;
528 }
529 if (components.hash_start != url_components::omitted) {
530 components.hash_start += difference;
531 }
532#if ADA_DEVELOPMENT_CHECKS
533 std::string password_after(get_password());
535 password_expected, password_after,
536 "append_base_password problem after inserting " + std::string(input));
537#endif // ADA_DEVELOPMENT_CHECKS
539}
540
541inline void url_aggregator::update_base_port(uint32_t input) {
542 ada_log("url_aggregator::update_base_port");
544 if (input == url_components::omitted) {
545 clear_port();
546 return;
547 }
548 // calling std::to_string(input.value()) is unfortunate given that the port
549 // value is probably already available as a string.
550 std::string value = helpers::concat(":", std::to_string(input));
551 uint32_t difference = uint32_t(value.size());
552
553 if (components.port != url_components::omitted) {
554 difference -= components.pathname_start - components.host_end;
555 buffer.erase(components.host_end,
556 components.pathname_start - components.host_end);
557 }
558
559 buffer.insert(components.host_end, value);
560 components.pathname_start += difference;
561 if (components.search_start != url_components::omitted) {
562 components.search_start += difference;
563 }
564 if (components.hash_start != url_components::omitted) {
565 components.hash_start += difference;
566 }
567 components.port = input;
569}
570
572 ada_log("url_aggregator::clear_port");
574 if (components.port == url_components::omitted) {
575 return;
576 }
577 uint32_t length = components.pathname_start - components.host_end;
578 buffer.erase(components.host_end, length);
579 components.pathname_start -= length;
580 if (components.search_start != url_components::omitted) {
581 components.search_start -= length;
582 }
583 if (components.hash_start != url_components::omitted) {
584 components.hash_start -= length;
585 }
586 components.port = url_components::omitted;
588}
589
590[[nodiscard]] inline uint32_t url_aggregator::retrieve_base_port() const {
591 ada_log("url_aggregator::retrieve_base_port");
592 return components.port;
593}
594
596 ada_log("url_aggregator::clear_search");
598 if (components.search_start == url_components::omitted) {
599 return;
600 }
601
602 if (components.hash_start == url_components::omitted) {
603 buffer.resize(components.search_start);
604 } else {
605 buffer.erase(components.search_start,
606 components.hash_start - components.search_start);
607 components.hash_start = components.search_start;
608 }
609
611
612#if ADA_DEVELOPMENT_CHECKS
614 "search should have been cleared on buffer=" + buffer +
615 " with " + components.to_string() + "\n" + to_diagram());
616#endif
618}
619
621 ada_log("url_aggregator::clear_hash");
623 if (components.hash_start == url_components::omitted) {
624 return;
625 }
626 buffer.resize(components.hash_start);
628
629#if ADA_DEVELOPMENT_CHECKS
631 "hash should have been cleared on buffer=" + buffer +
632 " with " + components.to_string() + "\n" + to_diagram());
633#endif
635}
636
637constexpr void url_aggregator::clear_pathname() {
638 ada_log("url_aggregator::clear_pathname");
640 uint32_t ending_index = uint32_t(buffer.size());
641 if (components.search_start != url_components::omitted) {
642 ending_index = components.search_start;
643 } else if (components.hash_start != url_components::omitted) {
644 ending_index = components.hash_start;
645 }
646 uint32_t pathname_length = ending_index - components.pathname_start;
647 buffer.erase(components.pathname_start, pathname_length);
648 uint32_t difference = pathname_length;
649 if (components.pathname_start == components.host_end + 2 &&
650 buffer[components.host_end] == '/' &&
651 buffer[components.host_end + 1] == '.') {
652 components.pathname_start -= 2;
653 buffer.erase(components.host_end, 2);
654 difference += 2;
655 }
656 if (components.search_start != url_components::omitted) {
657 components.search_start -= difference;
658 }
659 if (components.hash_start != url_components::omitted) {
660 components.hash_start -= difference;
661 }
662 ada_log("url_aggregator::clear_pathname completed, running checks...");
663#if ADA_DEVELOPMENT_CHECKS
665 "pathname should have been cleared on buffer=" + buffer +
666 " with " + components.to_string() + "\n" + to_diagram());
667#endif
669 ada_log("url_aggregator::clear_pathname completed, running checks... ok");
670}
671
672constexpr void url_aggregator::clear_hostname() {
673 ada_log("url_aggregator::clear_hostname");
675 if (!has_authority()) {
676 return;
677 }
678 ADA_ASSERT_TRUE(has_authority());
679
680 uint32_t hostname_length = components.host_end - components.host_start;
681 uint32_t start = components.host_start;
682
683 // If hostname starts with "@", we should not remove that character.
684 if (hostname_length > 0 && buffer[start] == '@') {
685 start++;
686 hostname_length--;
687 }
688 buffer.erase(start, hostname_length);
689 components.host_end = start;
690 components.pathname_start -= hostname_length;
691 if (components.search_start != url_components::omitted) {
692 components.search_start -= hostname_length;
693 }
694 if (components.hash_start != url_components::omitted) {
695 components.hash_start -= hostname_length;
696 }
697#if ADA_DEVELOPMENT_CHECKS
699 "hostname should have been cleared on buffer=" + buffer +
700 " with " + components.to_string() + "\n" + to_diagram());
701#endif
702 ADA_ASSERT_TRUE(has_authority());
704 "hostname should have been cleared on buffer=" + buffer +
705 " with " + components.to_string() + "\n" + to_diagram());
707}
708
709[[nodiscard]] constexpr bool url_aggregator::has_hash() const noexcept {
710 ada_log("url_aggregator::has_hash");
711 return components.hash_start != url_components::omitted;
712}
713
714[[nodiscard]] constexpr bool url_aggregator::has_search() const noexcept {
715 ada_log("url_aggregator::has_search");
716 return components.search_start != url_components::omitted;
717}
718
719constexpr bool url_aggregator::has_credentials() const noexcept {
720 ada_log("url_aggregator::has_credentials");
722}
723
724constexpr bool url_aggregator::cannot_have_credentials_or_port() const {
725 ada_log("url_aggregator::cannot_have_credentials_or_port");
726 return type == ada::scheme::type::FILE ||
727 components.host_start == components.host_end;
728}
729
730[[nodiscard]] ada_really_inline const ada::url_components &
732 return components;
733}
734
735[[nodiscard]] constexpr bool ada::url_aggregator::has_authority()
736 const noexcept {
737 ada_log("url_aggregator::has_authority");
738 // Performance: instead of doing this potentially expensive check, we could
739 // have a boolean in the struct.
740 return components.protocol_end + 2 <= components.host_start &&
741 helpers::substring(buffer, components.protocol_end,
742 components.protocol_end + 2) == "//";
743}
744
745inline void ada::url_aggregator::add_authority_slashes_if_needed() noexcept {
746 ada_log("url_aggregator::add_authority_slashes_if_needed");
747 ADA_ASSERT_TRUE(validate());
748 // Protocol setter will insert `http:` to the URL. It is up to hostname setter
749 // to insert
750 // `//` initially to the buffer, since it depends on the hostname existence.
751 if (has_authority()) {
752 return;
753 }
754 // Performance: the common case is components.protocol_end == buffer.size()
755 // Optimization opportunity: in many cases, the "//" is part of the input and
756 // the insert could be fused with another insert.
757 buffer.insert(components.protocol_end, "//");
758 components.username_end += 2;
759 components.host_start += 2;
760 components.host_end += 2;
761 components.pathname_start += 2;
762 if (components.search_start != url_components::omitted) {
763 components.search_start += 2;
764 }
765 if (components.hash_start != url_components::omitted) {
766 components.hash_start += 2;
767 }
768 ADA_ASSERT_TRUE(validate());
769}
770
771constexpr void ada::url_aggregator::reserve(uint32_t capacity) {
772 buffer.reserve(capacity);
773}
774
775constexpr bool url_aggregator::has_non_empty_username() const noexcept {
776 ada_log("url_aggregator::has_non_empty_username");
777 return components.protocol_end + 2 < components.username_end;
778}
779
780constexpr bool url_aggregator::has_non_empty_password() const noexcept {
781 ada_log("url_aggregator::has_non_empty_password");
782 return components.host_start - components.username_end > 0;
783}
784
785constexpr bool url_aggregator::has_password() const noexcept {
786 ada_log("url_aggregator::has_password");
787 // This function does not care about the length of the password
788 return components.host_start > components.username_end &&
789 buffer[components.username_end] == ':';
790}
791
792constexpr bool url_aggregator::has_empty_hostname() const noexcept {
793 if (!has_hostname()) {
794 return false;
795 }
796 if (components.host_start == components.host_end) {
797 return true;
798 }
799 if (components.host_end > components.host_start + 1) {
800 return false;
801 }
802 return components.username_end != components.host_start;
803}
804
805constexpr bool url_aggregator::has_hostname() const noexcept {
806 return has_authority();
807}
808
809constexpr bool url_aggregator::has_port() const noexcept {
810 ada_log("url_aggregator::has_port");
811 // A URL cannot have a username/password/port if its host is null or the empty
812 // string, or its scheme is "file".
813 return has_hostname() && components.pathname_start != components.host_end;
814}
815
816[[nodiscard]] constexpr bool url_aggregator::has_dash_dot() const noexcept {
817 // If url's host is null, url does not have an opaque path, url's path's size
818 // is greater than 1, and url's path[0] is the empty string, then append
819 // U+002F (/) followed by U+002E (.) to output.
820 ada_log("url_aggregator::has_dash_dot");
821#if ADA_DEVELOPMENT_CHECKS
822 // If pathname_start and host_end are exactly two characters apart, then we
823 // either have a one-digit port such as http://test.com:5?param=1 or else we
824 // have a /.: sequence such as "non-spec:/.//". We test that this is the case.
825 if (components.pathname_start == components.host_end + 2) {
826 ADA_ASSERT_TRUE((buffer[components.host_end] == '/' &&
827 buffer[components.host_end + 1] == '.') ||
828 (buffer[components.host_end] == ':' &&
829 checkers::is_digit(buffer[components.host_end + 1])));
830 }
831 if (components.pathname_start == components.host_end + 2 &&
832 buffer[components.host_end] == '/' &&
833 buffer[components.host_end + 1] == '.') {
834 ADA_ASSERT_TRUE(components.pathname_start + 1 < buffer.size());
835 ADA_ASSERT_TRUE(buffer[components.pathname_start] == '/');
836 ADA_ASSERT_TRUE(buffer[components.pathname_start + 1] == '/');
837 }
838#endif
839 // Performance: it should be uncommon for components.pathname_start ==
840 // components.host_end + 2 to be true. So we put this check first in the
841 // sequence. Most times, we do not have an opaque path. Checking for '/.' is
842 // more expensive, but should be uncommon.
843 return components.pathname_start == components.host_end + 2 &&
844 !has_opaque_path && buffer[components.host_end] == '/' &&
845 buffer[components.host_end + 1] == '.';
846}
847
848[[nodiscard]] constexpr std::string_view url_aggregator::get_href()
849 const noexcept ada_lifetime_bound {
850 ada_log("url_aggregator::get_href");
851 return buffer;
852}
853
854ada_really_inline size_t url_aggregator::parse_port(
855 std::string_view view, bool check_trailing_content) noexcept {
856 ada_log("url_aggregator::parse_port('", view, "') ", view.size());
857 if (!view.empty() && view[0] == '-') {
858 ada_log("parse_port: view[0] == '0' && view.size() > 1");
859 is_valid = false;
860 return 0;
861 }
862 uint16_t parsed_port{};
863 auto r = std::from_chars(view.data(), view.data() + view.size(), parsed_port);
864 if (r.ec == std::errc::result_out_of_range) {
865 ada_log("parse_port: r.ec == std::errc::result_out_of_range");
866 is_valid = false;
867 return 0;
868 }
869 ada_log("parse_port: ", parsed_port);
870 const size_t consumed = size_t(r.ptr - view.data());
871 ada_log("parse_port: consumed ", consumed);
872 if (check_trailing_content) {
873 is_valid &=
874 (consumed == view.size() || view[consumed] == '/' ||
875 view[consumed] == '?' || (is_special() && view[consumed] == '\\'));
876 }
877 ada_log("parse_port: is_valid = ", is_valid);
878 if (is_valid) {
879 ada_log("parse_port", r.ec == std::errc());
880 // scheme_default_port can return 0, and we should allow 0 as a base port.
881 auto default_port = scheme_default_port();
882 bool is_port_valid = (default_port == 0 && parsed_port == 0) ||
883 (default_port != parsed_port);
884 if (r.ec == std::errc() && is_port_valid) {
885 update_base_port(parsed_port);
886 } else {
887 clear_port();
888 }
889 }
890 return consumed;
891}
892
893constexpr void url_aggregator::set_protocol_as_file() {
894 ada_log("url_aggregator::set_protocol_as_file ");
897 // next line could overflow but unsigned arithmetic has well-defined
898 // overflows.
899 uint32_t new_difference = 5 - components.protocol_end;
900
901 if (buffer.empty()) {
902 buffer.append("file:");
903 } else {
904 buffer.erase(0, components.protocol_end);
905 buffer.insert(0, "file:");
906 }
907 components.protocol_end = 5;
908
909 // Update the rest of the components.
910 components.username_end += new_difference;
911 components.host_start += new_difference;
912 components.host_end += new_difference;
913 components.pathname_start += new_difference;
914 if (components.search_start != url_components::omitted) {
915 components.search_start += new_difference;
916 }
917 if (components.hash_start != url_components::omitted) {
918 components.hash_start += new_difference;
919 }
921}
922
923[[nodiscard]] constexpr bool url_aggregator::validate() const noexcept {
924 if (!is_valid) {
925 return true;
926 }
927 if (!components.check_offset_consistency()) {
928 ada_log("url_aggregator::validate inconsistent components \n",
929 to_diagram());
930 return false;
931 }
932 // We have a credible components struct, but let us investivate more
933 // carefully:
946 if (components.protocol_end == url_components::omitted) {
947 ada_log("url_aggregator::validate omitted protocol_end \n", to_diagram());
948 return false;
949 }
950 if (components.username_end == url_components::omitted) {
951 ada_log("url_aggregator::validate omitted username_end \n", to_diagram());
952 return false;
953 }
954 if (components.host_start == url_components::omitted) {
955 ada_log("url_aggregator::validate omitted host_start \n", to_diagram());
956 return false;
957 }
958 if (components.host_end == url_components::omitted) {
959 ada_log("url_aggregator::validate omitted host_end \n", to_diagram());
960 return false;
961 }
962 if (components.pathname_start == url_components::omitted) {
963 ada_log("url_aggregator::validate omitted pathname_start \n", to_diagram());
964 return false;
965 }
966
967 if (components.protocol_end > buffer.size()) {
968 ada_log("url_aggregator::validate protocol_end overflow \n", to_diagram());
969 return false;
970 }
971 if (components.username_end > buffer.size()) {
972 ada_log("url_aggregator::validate username_end overflow \n", to_diagram());
973 return false;
974 }
975 if (components.host_start > buffer.size()) {
976 ada_log("url_aggregator::validate host_start overflow \n", to_diagram());
977 return false;
978 }
979 if (components.host_end > buffer.size()) {
980 ada_log("url_aggregator::validate host_end overflow \n", to_diagram());
981 return false;
982 }
983 if (components.pathname_start > buffer.size()) {
984 ada_log("url_aggregator::validate pathname_start overflow \n",
985 to_diagram());
986 return false;
987 }
988
989 if (components.protocol_end > 0) {
990 if (buffer[components.protocol_end - 1] != ':') {
991 ada_log(
992 "url_aggregator::validate missing : at the end of the protocol \n",
993 to_diagram());
994 return false;
995 }
996 }
997
998 if (components.username_end != buffer.size() &&
999 components.username_end > components.protocol_end + 2) {
1000 if (buffer[components.username_end] != ':' &&
1001 buffer[components.username_end] != '@') {
1002 ada_log(
1003 "url_aggregator::validate missing : or @ at the end of the username "
1004 "\n",
1005 to_diagram());
1006 return false;
1007 }
1008 }
1009
1010 if (components.host_start != buffer.size()) {
1011 if (components.host_start > components.username_end) {
1012 if (buffer[components.host_start] != '@') {
1013 ada_log(
1014 "url_aggregator::validate missing @ at the end of the password \n",
1015 to_diagram());
1016 return false;
1017 }
1018 } else if (components.host_start == components.username_end &&
1019 components.host_end > components.host_start) {
1020 if (components.host_start == components.protocol_end + 2) {
1021 if (buffer[components.protocol_end] != '/' ||
1022 buffer[components.protocol_end + 1] != '/') {
1023 ada_log(
1024 "url_aggregator::validate missing // between protocol and host "
1025 "\n",
1026 to_diagram());
1027 return false;
1028 }
1029 } else {
1030 if (components.host_start > components.protocol_end &&
1031 buffer[components.host_start] != '@') {
1032 ada_log(
1033 "url_aggregator::validate missing @ at the end of the username "
1034 "\n",
1035 to_diagram());
1036 return false;
1037 }
1038 }
1039 } else {
1040 if (components.host_end != components.host_start) {
1041 ada_log("url_aggregator::validate expected omitted host \n",
1042 to_diagram());
1043 return false;
1044 }
1045 }
1046 }
1047 if (components.host_end != buffer.size() &&
1048 components.pathname_start > components.host_end) {
1049 if (components.pathname_start == components.host_end + 2 &&
1050 buffer[components.host_end] == '/' &&
1051 buffer[components.host_end + 1] == '.') {
1052 if (components.pathname_start + 1 >= buffer.size() ||
1053 buffer[components.pathname_start] != '/' ||
1054 buffer[components.pathname_start + 1] != '/') {
1055 ada_log(
1056 "url_aggregator::validate expected the path to begin with // \n",
1057 to_diagram());
1058 return false;
1059 }
1060 } else if (buffer[components.host_end] != ':') {
1061 ada_log("url_aggregator::validate missing : at the port \n",
1062 to_diagram());
1063 return false;
1064 }
1065 }
1066 if (components.pathname_start != buffer.size() &&
1067 components.pathname_start < components.search_start &&
1068 components.pathname_start < components.hash_start && !has_opaque_path) {
1069 if (buffer[components.pathname_start] != '/') {
1070 ada_log("url_aggregator::validate missing / at the path \n",
1071 to_diagram());
1072 return false;
1073 }
1074 }
1075 if (components.search_start != url_components::omitted) {
1076 if (buffer[components.search_start] != '?') {
1077 ada_log("url_aggregator::validate missing ? at the search \n",
1078 to_diagram());
1079 return false;
1080 }
1081 }
1082 if (components.hash_start != url_components::omitted) {
1083 if (buffer[components.hash_start] != '#') {
1084 ada_log("url_aggregator::validate missing # at the hash \n",
1085 to_diagram());
1086 return false;
1087 }
1088 }
1089
1090 return true;
1091}
1092
1093[[nodiscard]] constexpr std::string_view url_aggregator::get_pathname()
1094 const noexcept ada_lifetime_bound {
1095 ada_log("url_aggregator::get_pathname pathname_start = ",
1096 components.pathname_start, " buffer.size() = ", buffer.size(),
1097 " components.search_start = ", components.search_start,
1098 " components.hash_start = ", components.hash_start);
1099 auto ending_index = uint32_t(buffer.size());
1100 if (components.search_start != url_components::omitted) {
1101 ending_index = components.search_start;
1102 } else if (components.hash_start != url_components::omitted) {
1103 ending_index = components.hash_start;
1104 }
1105 return helpers::substring(buffer, components.pathname_start, ending_index);
1106}
1107
1108inline std::ostream &operator<<(std::ostream &out,
1109 const ada::url_aggregator &u) {
1110 return out << u.to_string();
1111}
1112
1113void url_aggregator::update_host_to_base_host(
1114 const std::string_view input) noexcept {
1115 ada_log("url_aggregator::update_host_to_base_host ", input);
1116 ADA_ASSERT_TRUE(validate());
1117 ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
1118 if (type != ada::scheme::type::FILE) {
1119 // Let host be the result of host parsing host_view with url is not special.
1120 if (input.empty() && !is_special()) {
1121 if (has_hostname()) {
1122 clear_hostname();
1123 } else if (has_dash_dot()) {
1124 add_authority_slashes_if_needed();
1125 delete_dash_dot();
1126 }
1127 return;
1128 }
1129 }
1130 update_base_hostname(input);
1131 ADA_ASSERT_TRUE(validate());
1132 return;
1133}
1134} // namespace ada
1135
1136#endif // ADA_URL_AGGREGATOR_INL_H
Definitions of the character sets used by unicode functions.
Declaration of the character sets used by unicode functions.
Definitions for URL specific checkers used within Ada.
#define ADA_ASSERT_TRUE(COND)
#define ada_lifetime_bound
#define ADA_ASSERT_EQUAL(LHS, RHS, MESSAGE)
#define ada_really_inline
Definition common_defs.h:77
Definitions for helper functions used within Ada.
constexpr uint8_t FRAGMENT_PERCENT_ENCODE[32]
constexpr bool is_digit(char x) noexcept
constexpr int32_t base
ada_really_inline size_t percent_encode_index(const std::string_view input, const uint8_t character_set[])
Definition unicode-inl.h:19
Definition ada_idna.h:13
std::ostream & operator<<(std::ostream &out, const ada::url &u)
Definition url-inl.h:38
Declarations for the URL scheme.
Lightweight URL struct.
ada_really_inline const ada::url_components & get_components() const noexcept
constexpr bool has_non_empty_password() const noexcept
constexpr bool validate() const noexcept
void clear_search() override
std::string_view get_hostname() const noexcept ada_lifetime_bound
std::string to_string() const override
std::string_view get_hash() const noexcept ada_lifetime_bound
std::string to_diagram() const
constexpr bool has_hostname() const noexcept
constexpr bool has_search() const noexcept override
constexpr std::string_view get_href() const noexcept ada_lifetime_bound
constexpr bool has_empty_hostname() const noexcept
constexpr bool has_password() const noexcept
std::string_view get_search() const noexcept ada_lifetime_bound
ada_really_inline uint32_t get_pathname_length() const noexcept
constexpr std::string_view get_pathname() const noexcept ada_lifetime_bound
std::string_view get_password() const noexcept ada_lifetime_bound
constexpr bool has_hash() const noexcept override
constexpr bool has_port() const noexcept
ada_really_inline constexpr bool has_credentials() const noexcept
constexpr bool has_non_empty_username() const noexcept
std::string_view get_username() const noexcept ada_lifetime_bound
bool is_valid
Definition url_base.h:50
bool has_opaque_path
Definition url_base.h:55
URL Component representations using offsets.
std::string to_string() const
static constexpr uint32_t omitted
constexpr bool check_offset_consistency() const noexcept
Definitions for unicode operations.
Definitions for all unicode specific functions.
Declaration for the basic URL definitions.
Declaration for the URL Components.