Ada 2.7.8
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
checkers.cpp
Go to the documentation of this file.
1#include "ada/checkers.h"
2#include <algorithm>
3
4namespace ada::checkers {
5
6ada_really_inline ada_constexpr bool is_ipv4(std::string_view view) noexcept {
7 // The string is not empty and does not contain upper case ASCII characters.
8 //
9 // Optimization. To be considered as a possible ipv4, the string must end
10 // with 'x' or a lowercase hex character.
11 // Most of the time, this will be false so this simple check will save a lot
12 // of effort.
13 char last_char = view.back();
14 // If the address ends with a dot, we need to prune it (special case).
15 if (last_char == '.') {
16 view.remove_suffix(1);
17 if (view.empty()) {
18 return false;
19 }
20 last_char = view.back();
21 }
22 bool possible_ipv4 = (last_char >= '0' && last_char <= '9') ||
23 (last_char >= 'a' && last_char <= 'f') ||
24 last_char == 'x';
25 if (!possible_ipv4) {
26 return false;
27 }
28 // From the last character, find the last dot.
29 size_t last_dot = view.rfind('.');
30 if (last_dot != std::string_view::npos) {
31 // We have at least one dot.
32 view = view.substr(last_dot + 1);
33 }
37 if (std::all_of(view.begin(), view.end(), ada::checkers::is_digit)) {
38 return true;
39 }
40 // It could be hex (0x), but not if there is a single character.
41 if (view.size() == 1) {
42 return false;
43 }
44 // It must start with 0x.
45 if (!std::equal(view.begin(), view.begin() + 2, "0x")) {
46 return false;
47 }
48 // We must allow "0x".
49 if (view.size() == 2) {
50 return true;
51 }
52 // We have 0x followed by some characters, we need to check that they are
53 // hexadecimals.
54 return std::all_of(view.begin() + 2, view.end(),
55 ada::unicode::is_lowercase_hex);
56}
57
58// for use with path_signature, we include all characters that need percent
59// encoding.
60static constexpr std::array<uint8_t, 256> path_signature_table =
61 []() constexpr {
62 std::array<uint8_t, 256> result{};
63 for (size_t i = 0; i < 256; i++) {
64 if (i <= 0x20 || i == 0x22 || i == 0x23 || i == 0x3c || i == 0x3e ||
65 i == 0x3f || i == 0x60 || i == 0x7b || i == 0x7b || i == 0x7d ||
66 i > 0x7e) {
67 result[i] = 1;
68 } else if (i == 0x25) {
69 result[i] = 8;
70 } else if (i == 0x2e) {
71 result[i] = 4;
72 } else if (i == 0x5c) {
73 result[i] = 2;
74 } else {
75 result[i] = 0;
76 }
77 }
78 return result;
79 }();
80
81ada_really_inline constexpr uint8_t path_signature(
82 std::string_view input) noexcept {
83 // The path percent-encode set is the query percent-encode set and U+003F (?),
84 // U+0060 (`), U+007B ({), and U+007D (}). The query percent-encode set is the
85 // C0 control percent-encode set and U+0020 SPACE, U+0022 ("), U+0023 (#),
86 // U+003C (<), and U+003E (>). The C0 control percent-encode set are the C0
87 // controls and all code points greater than U+007E (~).
88 size_t i = 0;
90 for (; i + 7 < input.size(); i += 8) {
99 }
100 for (; i < input.size(); i++) {
102 }
103 return accumulator;
104}
105
106ada_really_inline constexpr bool verify_dns_length(
107 std::string_view input) noexcept {
108 if (input.back() == '.') {
109 if (input.size() > 254) return false;
110 } else if (input.size() > 253)
111 return false;
112
113 size_t start = 0;
114 while (start < input.size()) {
115 auto dot_location = input.find('.', start);
116 // If not found, it's likely the end of the domain
117 if (dot_location == std::string_view::npos) dot_location = input.size();
118
120 if (label_size > 63 || label_size == 0) return false;
121
122 start = dot_location + 1;
123 }
124
125 return true;
126}
127} // namespace ada::checkers
Declarations for URL specific checkers used within Ada.
#define ada_constexpr
#define ada_really_inline
Definition common_defs.h:84
Includes the definitions for validation functions.
static constexpr std::array< uint8_t, 256 > path_signature_table
Definition checkers.cpp:60
constexpr bool is_digit(char x) noexcept
tl::expected< result_type, ada::errors > result
ada_warn_unused ada::result< result_type > parse(std::string_view input, const result_type *base_url=nullptr)