17template <
bool has_state_overr
ide>
24 input.remove_suffix(1);
114inline void url_aggregator::copy_scheme(
const url_aggregator&
u)
noexcept {
115 ada_log(
"url_aggregator::copy_scheme ",
u.buffer);
121 buffer.erase(0, components.protocol_end);
122 buffer.insert(0,
u.get_protocol());
123 components.protocol_end =
u.components.protocol_end;
144inline void url_aggregator::set_scheme_from_view_with_colon(
146 ada_log(
"url_aggregator::set_scheme_from_view_with_colon ",
156 if (buffer.empty()) {
159 buffer.erase(0, components.protocol_end);
178inline void url_aggregator::set_scheme(std::string_view
new_scheme)
noexcept {
188 if (buffer.empty()) {
189 buffer.append(helpers::concat(
new_scheme,
":"));
191 buffer.erase(0, components.protocol_end);
192 buffer.insert(0, helpers::concat(
new_scheme,
":"));
215 helpers::remove_ascii_tab_or_newline(
view);
227 std::string::iterator
pointer =
228 std::find_if_not(
view.begin(),
view.end(), unicode::is_alnum_plus);
241 if (cannot_have_credentials_or_port()) {
247 update_base_username(
input);
250 update_base_username(ada::unicode::percent_encode(
261 if (cannot_have_credentials_or_port()) {
267 update_base_password(
input);
270 update_base_password(ada::unicode::percent_encode(
281 if (cannot_have_credentials_or_port()) {
285 helpers::remove_ascii_tab_or_newline(
trimmed);
291 if (ada::unicode::is_c0_control_or_space(
trimmed.front())) {
295 if (
input.find_first_of(
"0123456789") == std::string_view::npos) {
335 if (unicode::has_tabs_or_newline(
input)) {
339 helpers::remove_ascii_tab_or_newline(
tmp_buffer);
348 update_base_pathname(
"/");
364 update_base_pathname(
"/");
376 helpers::strip_trailing_spaces_from_opaque_path(*
this);
382 helpers::remove_ascii_tab_or_newline(
new_value);
401 helpers::strip_trailing_spaces_from_opaque_path(*
this);
407 helpers::remove_ascii_tab_or_newline(
new_value);
416 ada_log(
"url_aggregator::set_href, success :",
out.has_value());
419 ada_log(
"url_aggregator::set_href, parsed ",
out->to_string());
424 return out.has_value();
436 if (
input[0] ==
'[') {
438 if (
input.back() !=
']') {
445 input.remove_prefix(1);
446 input.remove_suffix(1);
447 return parse_ipv6(
input);
453 return parse_opaque_host(
input);
465 unicode::contains_forbidden_domain_code_point_or_upper(
input.data(),
474 input.find(
"xn-") == std::string_view::npos) {
476 update_base_hostname(
input);
478 ada_log(
"parse_host fast path ipv4");
488 ada_log(
"parse_host calling to_ascii");
489 std::optional<std::string> host = std::string(
get_hostname());
492 ada_log(
"parse_host to_ascii returns false");
495 ada_log(
"parse_host to_ascii succeeded ", *host,
" [", host->size(),
498 if (std::any_of(host.value().begin(), host.value().end(),
499 ada::unicode::is_forbidden_domain_code_point)) {
505 if (checkers::is_ipv4(host.value())) {
506 ada_log(
"parse_host got ipv4 ", *host);
507 return parse_ipv4(host.value(),
false);
510 update_base_hostname(host.value());
515template <
bool overr
ide_hostname>
516bool url_aggregator::set_host_or_hostname(
const std::string_view
input) {
517 ada_log(
"url_aggregator::set_host_or_hostname ",
input);
531 helpers::remove_ascii_tab_or_newline(
_host);
566 }
else if (has_dash_dot()) {
567 add_authority_slashes_if_needed();
577 }
else if (has_dash_dot()) {
585 if (
location != std::string_view::npos) {
601 if (helpers::substring(buffer, components.
host_start,
602 components.
host_end) ==
"localhost") {
625 ada_log(
"url_aggregator::get_origin");
642 return helpers::concat(
out->get_protocol(),
"//",
out->get_host());
652 ada_log(
"url_aggregator::get_username");
654 return helpers::substring(buffer, components.
protocol_end + 2,
661 ada_log(
"url_aggregator::get_password");
663 return helpers::substring(buffer, components.
username_end + 1,
670 ada_log(
"url_aggregator::get_port");
674 return helpers::substring(buffer, components.
host_end + 1,
679 ada_log(
"url_aggregator::get_hash");
685 if (buffer.size() - components.
hash_start <= 1) {
688 return helpers::substring(buffer, components.
hash_start);
692 ada_log(
"url_aggregator::get_host");
704 return std::string_view();
710 ada_log(
"url_aggregator::get_hostname");
720 return helpers::substring(buffer,
start, components.
host_end);
724 ada_log(
"url_aggregator::get_pathname pathname_start = ",
727 " components.hash_start = ", components.
hash_start);
738 ada_log(
"url_aggregator::get_search");
755 ada_log(
"url_aggregator::get_protocol");
756 return helpers::substring(buffer, 0, components.
protocol_end);
760 ada_log(
"url_aggregator::to_string buffer:", buffer,
" [", buffer.size(),
767 auto back = std::back_insert_iterator(
answer);
770 answer.append(
"\t\"buffer\":\"");
771 helpers::encode_json(buffer, back);
774 answer.append(
"\t\"protocol\":\"");
775 helpers::encode_json(get_protocol(), back);
778 if (has_credentials()) {
779 answer.append(
"\t\"username\":\"");
780 helpers::encode_json(get_username(), back);
782 answer.append(
"\t\"password\":\"");
783 helpers::encode_json(get_password(), back);
787 answer.append(
"\t\"host\":\"");
788 helpers::encode_json(get_host(), back);
791 answer.append(
"\t\"path\":\"");
792 helpers::encode_json(get_pathname(), back);
794 answer.append(
"\t\"opaque path\":");
795 answer.append((has_opaque_path ?
"true" :
"false"));
799 answer.append(
"\t\"query\":\"");
800 helpers::encode_json(get_search(), back);
804 answer.append(
"\t\"fragment\":\"");
805 helpers::encode_json(get_hash(), back);
813 return std::to_string(
offset);
817 answer.append(
"\t\"protocol_end\":");
821 answer.append(
"\t\"username_end\":");
825 answer.append(
"\t\"host_start\":");
829 answer.append(
"\t\"host_end\":");
833 answer.append(
"\t\"port\":");
837 answer.append(
"\t\"pathname_start\":");
841 answer.append(
"\t\"search_start\":");
845 answer.append(
"\t\"hash_start\":");
859bool url_aggregator::parse_ipv4(std::string_view
input,
bool in_place) {
861 " bytes], overlaps with buffer: ",
862 helpers::overlaps(
input, buffer) ?
"yes" :
"no");
866 input.remove_suffix(1);
877 ((
input.length() > 2) && (
input[2] ==
'.')))) {
880 input.remove_prefix(2);
882 std::from_chars_result
r;
884 ada_log(
"parse_ipv4 trying to parse hex number");
887 }
else if ((
input.length() >= 2) &&
input[0] ==
'0' &&
889 ada_log(
"parse_ipv4 trying to parse octal number");
893 ada_log(
"parse_ipv4 trying to parse decimal number");
898 if (
r.ec != std::errc()) {
899 ada_log(
"parse_ipv4 parsing failed");
923 input.remove_prefix(1);
927 ada_log(
"parse_ipv4 found invalid (more than 4 numbers or empty) ");
937 "url_aggregator::parse_ipv4 completed and was already correct in the "
942 ada_log(
"url_aggregator::parse_ipv4 completed and we need to update it");
947 update_base_hostname(
955bool url_aggregator::parse_ipv6(std::string_view
input) {
967 std::array<uint16_t, 8>
address{};
979 if (
input[0] ==
':') {
983 ada_log(
"parse_ipv6 starts with : but the rest does not start with :");
998 ada_log(
"parse_ipv6 piece_index == 8");
1006 ada_log(
"parse_ipv6 compress is non-null");
1024 unicode::is_ascii_hex_digit(*
pointer)) {
1026 value =
uint16_t(value * 0x10 + unicode::convert_hex_to_binary(*
pointer));
1035 ada_log(
"parse_ipv6 length is 0");
1044 ada_log(
"parse_ipv6 piece_index > 6");
1064 ada_log(
"parse_ipv6 Otherwise, validation error, return failure");
1072 "parse_ipv6 If c is not an ASCII digit, validation error, return "
1088 ada_log(
"parse_ipv6 if ipv4Piece is 0, validation error");
1098 ada_log(
"parse_ipv6 ipv4_piece > 255");
1137 "parse_ipv6 If c is the EOF code point, validation error, return "
1146 "parse_ipv6 Otherwise, if c is not the EOF code point, validation "
1147 "error, return failure");
1179 "parse_ipv6 if compress is null and pieceIndex is not 8, validation "
1180 "error, return failure");
1193bool url_aggregator::parse_opaque_host(std::string_view
input) {
1198 ada::unicode::is_forbidden_host_code_point)) {
1207 update_base_hostname(
input);
1210 update_base_hostname(ada::unicode::percent_encode(
1224 answer.append(std::to_string(buffer.size()));
1225 answer.append(
" bytes]");
1229 line1.resize(buffer.size(),
' ');
1239 if (components.
host_end != buffer.size()) {
1242 if (components.
host_start != buffer.size()) {
1262 line2.append(
" hash_start");
1275 line3.append(
" search_start ");
1288 line4.append(
" pathname_start ");
1295 if (components.
host_end != buffer.size()) {
1302 line5.append(
" host_end ");
1309 if (components.
host_start != buffer.size()) {
1316 line6.append(
" host_start ");
1330 line7.append(
" username_end ");
1344 line8.append(
" protocol_end ");
1351 answer.append(
"note: hash omitted\n");
1354 answer.append(
"note: search omitted\n");
1357 answer.append(
"warning: protocol_end overflows\n");
1360 answer.append(
"warning: username_end overflows\n");
1363 answer.append(
"warning: host_start overflows\n");
1365 if (components.
host_end > buffer.size()) {
1366 answer.append(
"warning: host_end overflows\n");
1369 answer.append(
"warning: pathname_start overflows\n");
1379 ada_log(
"url_aggregator::validate inconsistent components \n",
1430 if (components.
host_end > buffer.size()) {
1435 ada_log(
"url_aggregator::validate pathname_start overflow \n",
1443 "url_aggregator::validate missing : at the end of the protocol \n",
1454 "url_aggregator::validate missing : or @ at the end of the username "
1461 if (components.
host_start != buffer.size()) {
1465 "url_aggregator::validate missing @ at the end of the password \n",
1475 "url_aggregator::validate missing // between protocol and host "
1484 "url_aggregator::validate missing @ at the end of the username "
1492 ada_log(
"url_aggregator::validate expected omitted host \n",
1498 if (components.
host_end != buffer.size() &&
1501 buffer[components.
host_end] ==
'/' &&
1502 buffer[components.
host_end + 1] ==
'.') {
1507 "url_aggregator::validate expected the path to begin with // \n",
1511 }
else if (buffer[components.
host_end] !=
':') {
1512 ada_log(
"url_aggregator::validate missing : at the port \n",
1521 ada_log(
"url_aggregator::validate missing / at the path \n",
1528 ada_log(
"url_aggregator::validate missing ? at the search \n",
1535 ada_log(
"url_aggregator::validate missing # at the hash \n",
1544void url_aggregator::delete_dash_dot() {
1545 ada_log(
"url_aggregator::delete_dash_dot");
1548 buffer.erase(components.
host_end, 2);
1560inline void url_aggregator::consume_prepared_path(std::string_view
input) {
1561 ada_log(
"url_aggregator::consume_prepared_path ",
input);
1596 if (
input[0] !=
'.') {
1598 if (
slashdot == std::string_view::npos) {
1609 ada_log(
"parse_path trivial");
1624 ada_log(
"parse_prepared_path fast");
1640 update_base_pathname(path);
1644 if (path.back() ==
'/') {
1645 update_base_pathname(path);
1650 path.resize(path.rfind(
'/') + 1);
1651 update_base_pathname(path);
1658 update_base_pathname(path);
1683 ?
input.find_first_of(
"/\\")
1686 if (
location != std::string_view::npos) {
1694 ada::unicode::percent_encode<false>(
1698 if (unicode::is_double_dot_path_segment(
path_buffer)) {
1699 if ((helpers::shorten_path(path, type) ||
special) &&
1700 location == std::string_view::npos) {
1703 }
else if (unicode::is_single_dot_path_segment(
path_buffer) &&
1704 (
location == std::string_view::npos)) {
1708 else if (!unicode::is_single_dot_path_segment(
path_buffer)) {
1725 if (
location == std::string_view::npos) {
1726 update_base_pathname(path);
Includes all definitions for Ada.
Definitions for URL specific checkers used within Ada.
Declarations for URL specific checkers used within Ada.
#define ADA_ASSERT_TRUE(COND)
#define ada_really_inline
Definitions for helper functions used within Ada.
Definitions for user facing functions for parsing URL and it's components.
constexpr uint8_t QUERY_PERCENT_ENCODE[32]
constexpr uint8_t SPECIAL_QUERY_PERCENT_ENCODE[32]
constexpr uint8_t PATH_PERCENT_ENCODE[32]
constexpr uint8_t C0_CONTROL_PERCENT_ENCODE[32]
constexpr uint8_t USERINFO_PERCENT_ENCODE[32]
constexpr bool is_windows_drive_letter(std::string_view input) noexcept
bool has_hex_prefix(std::string_view input)
constexpr bool is_alpha(char x) noexcept
constexpr bool is_digit(char x) noexcept
ada_really_inline bool begins_with(std::string_view view, std::string_view prefix)
constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept
constexpr uint16_t get_special_port(std::string_view scheme) noexcept
std::string ipv6(const std::array< uint16_t, 8 > &address) noexcept
std::string ipv4(uint64_t address) noexcept
ada_really_inline size_t percent_encode_index(const std::string_view input, const uint8_t character_set[])
template ada::result< url_aggregator > parse< url_aggregator >(std::string_view input, const url_aggregator *base_url)
tl::expected< result_type, ada::errors > result
ada_warn_unused ada::result< result_type > parse(std::string_view input, const result_type *base_url=nullptr)
Definitions for the parser.
Declarations for the URL scheme.
bool has_non_empty_username() const noexcept
std::string_view get_pathname() const noexcept
void set_hash(std::string_view input)
void clear_search() override
std::string_view get_host() const noexcept
bool has_hostname() const noexcept
bool has_non_empty_password() const noexcept
ada_really_inline bool has_credentials() const noexcept
std::string_view get_search() const noexcept
std::string_view get_username() const noexcept
std::string to_string() const override
std::string to_diagram() const
bool set_protocol(std::string_view input)
std::string get_origin() const noexcept override
bool validate() const noexcept
std::string_view get_protocol() const noexcept
std::string_view get_port() const noexcept
std::string_view get_hostname() const noexcept
bool has_valid_domain() const noexcept override
bool set_hostname(std::string_view input)
std::string_view get_href() const noexcept
bool set_password(std::string_view input)
bool set_pathname(std::string_view input)
std::string_view get_password() const noexcept
bool set_href(std::string_view input)
void set_search(std::string_view input)
bool has_port() const noexcept
std::string_view get_hash() const noexcept
bool set_host(std::string_view input)
bool set_port(std::string_view input)
bool set_username(std::string_view input)
ada_really_inline bool is_special() const noexcept
bool check_offset_consistency() const noexcept
static constexpr uint32_t omitted
Definitions for unicode operations.
Inline functions for url aggregator.
Declaration for the basic URL definitions.
Declaration for the URL Components.