Ada 3.3.0
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
url-inl.h
Go to the documentation of this file.
1
5#ifndef ADA_URL_INL_H
6#define ADA_URL_INL_H
7
8#include "ada/url.h"
10
11#include <charconv>
12#include <optional>
13#include <string>
14#if ADA_REGULAR_VISUAL_STUDIO
15#include <intrin.h>
16#endif // ADA_REGULAR_VISUAL_STUDIO
17
18namespace ada {
19[[nodiscard]] ada_really_inline bool url::has_credentials() const noexcept {
20 return !username.empty() || !password.empty();
21}
22[[nodiscard]] ada_really_inline bool url::has_port() const noexcept {
23 return port.has_value();
24}
25[[nodiscard]] inline bool url::cannot_have_credentials_or_port() const {
26 return !host.has_value() || host.value().empty() ||
28}
29[[nodiscard]] inline bool url::has_empty_hostname() const noexcept {
30 if (!host.has_value()) {
31 return false;
32 }
33 return host.value().empty();
34}
35[[nodiscard]] inline bool url::has_hostname() const noexcept {
36 return host.has_value();
37}
38inline std::ostream &operator<<(std::ostream &out, const ada::url &u) {
39 return out << u.to_string();
40}
41
42[[nodiscard]] size_t url::get_pathname_length() const noexcept {
43 return path.size();
44}
45
46[[nodiscard]] constexpr std::string_view url::get_pathname() const noexcept {
47 return path;
48}
49
51 const noexcept {
52 url_components out{};
53
54 // protocol ends with ':'. for example: "https:"
55 out.protocol_end = uint32_t(get_protocol().size());
56
57 // Trailing index is always the next character of the current one.
58 // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores)
59 size_t running_index = out.protocol_end;
60
61 if (host.has_value()) {
62 // 2 characters for "//" and 1 character for starting index
63 out.host_start = out.protocol_end + 2;
64
65 if (has_credentials()) {
66 out.username_end = uint32_t(out.host_start + username.size());
67
68 out.host_start += uint32_t(username.size());
69
70 if (!password.empty()) {
71 out.host_start += uint32_t(password.size() + 1);
72 }
73
74 out.host_end = uint32_t(out.host_start + host.value().size());
75 } else {
76 out.username_end = out.host_start;
77
78 // Host does not start with "@" if it does not include credentials.
79 out.host_end = uint32_t(out.host_start + host.value().size()) - 1;
80 }
81
82 running_index = out.host_end + 1;
83 } else {
84 // Update host start and end date to the same index, since it does not
85 // exist.
86 out.host_start = out.protocol_end;
87 out.host_end = out.host_start;
88
89 if (!has_opaque_path && path.starts_with("//")) {
90 // If url's host is null, url does not have an opaque path, url's path's
91 // size is greater than 1, and url's path[0] is the empty string, then
92 // append U+002F (/) followed by U+002E (.) to output.
93 running_index = out.protocol_end + 2;
94 } else {
95 running_index = out.protocol_end;
96 }
97 }
98
99 if (port.has_value()) {
100 out.port = *port;
101 running_index += helpers::fast_digit_count(*port) + 1; // Port omits ':'
102 }
103
104 out.pathname_start = uint32_t(running_index);
105
106 running_index += path.size();
107
108 if (query.has_value()) {
109 out.search_start = uint32_t(running_index);
110 running_index += get_search().size();
111 if (get_search().empty()) {
112 running_index++;
113 }
114 }
115
116 if (hash.has_value()) {
117 out.hash_start = uint32_t(running_index);
118 }
119
120 return out;
121}
122
123inline void url::update_base_hostname(std::string_view input) { host = input; }
124
125inline void url::update_unencoded_base_hash(std::string_view input) {
126 // We do the percent encoding
127 hash = unicode::percent_encode(input,
129}
130
131inline void url::update_base_search(std::string_view input,
132 const uint8_t query_percent_encode_set[]) {
133 query = ada::unicode::percent_encode(input, query_percent_encode_set);
134}
135
136inline void url::update_base_search(std::optional<std::string> &&input) {
137 query = std::move(input);
138}
139
140inline void url::update_base_pathname(const std::string_view input) {
141 path = input;
142}
143
144inline void url::update_base_username(const std::string_view input) {
145 username = input;
146}
147
148inline void url::update_base_password(const std::string_view input) {
149 password = input;
150}
151
152inline void url::update_base_port(std::optional<uint16_t> input) {
153 port = input;
154}
155
156constexpr void url::clear_pathname() { path.clear(); }
157
158constexpr void url::clear_search() { query = std::nullopt; }
159
160[[nodiscard]] constexpr bool url::has_hash() const noexcept {
161 return hash.has_value();
162}
163
164[[nodiscard]] constexpr bool url::has_search() const noexcept {
165 return query.has_value();
166}
167
168constexpr void url::set_protocol_as_file() { type = ada::scheme::type::FILE; }
169
170inline void url::set_scheme(std::string &&new_scheme) noexcept {
171 type = ada::scheme::get_scheme_type(new_scheme);
172 // We only move the 'scheme' if it is non-special.
173 if (!is_special()) {
174 non_special_scheme = std::move(new_scheme);
175 }
176}
177
178constexpr void url::copy_scheme(ada::url &&u) noexcept {
179 non_special_scheme = u.non_special_scheme;
180 type = u.type;
181}
182
183constexpr void url::copy_scheme(const ada::url &u) {
184 non_special_scheme = u.non_special_scheme;
185 type = u.type;
186}
187
188[[nodiscard]] ada_really_inline std::string url::get_href() const noexcept {
189 std::string output = get_protocol();
190
191 if (host.has_value()) {
192 output += "//";
193 if (has_credentials()) {
194 output += username;
195 if (!password.empty()) {
196 output += ":" + get_password();
197 }
198 output += "@";
199 }
200 output += host.value();
201 if (port.has_value()) {
202 output += ":" + get_port();
203 }
204 } else if (!has_opaque_path && path.starts_with("//")) {
205 // If url's host is null, url does not have an opaque path, url's path's
206 // size is greater than 1, and url's path[0] is the empty string, then
207 // append U+002F (/) followed by U+002E (.) to output.
208 output += "/.";
209 }
210 output += path;
211 if (query.has_value()) {
212 output += "?" + query.value();
213 }
214 if (hash.has_value()) {
215 output += "#" + hash.value();
216 }
217 return output;
218}
219
220ada_really_inline size_t url::parse_port(std::string_view view,
221 bool check_trailing_content) noexcept {
222 ada_log("parse_port('", view, "') ", view.size());
223 if (!view.empty() && view[0] == '-') {
224 ada_log("parse_port: view[0] == '0' && view.size() > 1");
225 is_valid = false;
226 return 0;
227 }
228 uint16_t parsed_port{};
229 auto r = std::from_chars(view.data(), view.data() + view.size(), parsed_port);
230 if (r.ec == std::errc::result_out_of_range) {
231 ada_log("parse_port: r.ec == std::errc::result_out_of_range");
232 is_valid = false;
233 return 0;
234 }
235 ada_log("parse_port: ", parsed_port);
236 const auto consumed = size_t(r.ptr - view.data());
237 ada_log("parse_port: consumed ", consumed);
238 if (check_trailing_content) {
239 is_valid &=
240 (consumed == view.size() || view[consumed] == '/' ||
241 view[consumed] == '?' || (is_special() && view[consumed] == '\\'));
242 }
243 ada_log("parse_port: is_valid = ", is_valid);
244 if (is_valid) {
245 // scheme_default_port can return 0, and we should allow 0 as a base port.
246 auto default_port = scheme_default_port();
247 bool is_port_valid = (default_port == 0 && parsed_port == 0) ||
248 (default_port != parsed_port);
249 port = (r.ec == std::errc() && is_port_valid) ? std::optional(parsed_port)
250 : std::nullopt;
251 }
252 return consumed;
253}
254
255} // namespace ada
256
257#endif // ADA_URL_H
#define ada_really_inline
Definition common_defs.h:81
constexpr uint8_t FRAGMENT_PERCENT_ENCODE[32]
constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept
Definition scheme-inl.h:72
Definition ada_idna.h:13
std::ostream & operator<<(std::ostream &out, const ada::url &u)
Definition url-inl.h:38
bool has_opaque_path
Definition url_base.h:55
URL Component representations using offsets.
Generic URL struct reliant on std::string instantiation.
Definition url.h:45
std::string get_search() const noexcept
Definition url.cpp:643
ada_really_inline ada::url_components get_components() const noexcept
Definition url-inl.h:50
bool has_empty_hostname() const noexcept
Definition url-inl.h:29
bool has_port() const noexcept
Definition url-inl.h:22
ada_really_inline bool has_credentials() const noexcept
Definition url-inl.h:19
ada_really_inline size_t get_pathname_length() const noexcept
Definition url-inl.h:42
ada_really_inline std::string get_href() const noexcept
Definition url-inl.h:188
bool has_hostname() const noexcept
Definition url-inl.h:35
constexpr std::string_view get_pathname() const noexcept
Definition url-inl.h:46
const std::string & get_password() const noexcept
Definition url.cpp:654
std::string get_port() const noexcept
Definition url.cpp:658
constexpr bool has_search() const noexcept override
Definition url-inl.h:164
std::string to_string() const override
Definition url.cpp:534
std::string get_protocol() const noexcept
Definition url.cpp:617
constexpr bool has_hash() const noexcept override
Definition url-inl.h:160
Declaration for the URL.
Declaration for the URL Components.