Ada 3.4.0
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
checkers.cpp
Go to the documentation of this file.
1#include "ada/checkers-inl.h"
2#include "ada/checkers.h"
3#include "ada/unicode-inl.h"
4#include "ada/common_defs.h"
5
6#include <algorithm>
7#include <array>
8#include <string_view>
9
10namespace ada::checkers {
11
12ada_really_inline constexpr bool is_ipv4(std::string_view view) noexcept {
13 // The string is not empty and does not contain upper case ASCII characters.
14 //
15 // Optimization. To be considered as a possible ipv4, the string must end
16 // with 'x' or a lowercase hex character.
17 // Most of the time, this will be false so this simple check will save a lot
18 // of effort.
19 // If the address ends with a dot, we need to prune it (special case).
20 if (view.ends_with('.')) {
21 view.remove_suffix(1);
22 if (view.empty()) {
23 return false;
24 }
25 }
26 char last_char = view.back();
27 bool possible_ipv4 = (last_char >= '0' && last_char <= '9') ||
28 (last_char >= 'a' && last_char <= 'f') ||
29 last_char == 'x';
30 if (!possible_ipv4) {
31 return false;
32 }
33 // From the last character, find the last dot.
34 size_t last_dot = view.rfind('.');
35 if (last_dot != std::string_view::npos) {
36 // We have at least one dot.
37 view = view.substr(last_dot + 1);
38 }
42 if (std::ranges::all_of(view, ada::checkers::is_digit)) {
43 return true;
44 }
45 // It could be hex (0x), but not if there is a single character.
46 if (view.size() == 1) {
47 return false;
48 }
49 // It must start with 0x.
50 if (!view.starts_with("0x")) {
51 return false;
52 }
53 // We must allow "0x".
54 if (view.size() == 2) {
55 return true;
56 }
57 // We have 0x followed by some characters, we need to check that they are
58 // hexadecimals.
59 view.remove_prefix(2);
60 return std::ranges::all_of(view, ada::unicode::is_lowercase_hex);
61}
62
63// for use with path_signature, we include all characters that need percent
64// encoding.
65static constexpr std::array<uint8_t, 256> path_signature_table =
66 []() consteval {
67 std::array<uint8_t, 256> result{};
68 for (size_t i = 0; i < 256; i++) {
69 if (i <= 0x20 || i == 0x22 || i == 0x23 || i == 0x3c || i == 0x3e ||
70 i == 0x3f || i == 0x5e || i == 0x60 || i == 0x7b || i == 0x7d ||
71 i > 0x7e) {
72 result[i] = 1;
73 } else if (i == 0x25) {
74 result[i] = 8;
75 } else if (i == 0x2e) {
76 result[i] = 4;
77 } else if (i == 0x5c) {
78 result[i] = 2;
79 } else {
80 result[i] = 0;
81 }
82 }
83 return result;
84 }();
85
86ada_really_inline constexpr uint8_t path_signature(
87 std::string_view input) noexcept {
88 // The path percent-encode set is the query percent-encode set and U+003F (?),
89 // U+0060 (`), U+007B ({), and U+007D (}). The query percent-encode set is the
90 // C0 control percent-encode set and U+0020 SPACE, U+0022 ("), U+0023 (#),
91 // U+003C (<), and U+003E (>). The C0 control percent-encode set are the C0
92 // controls and all code points greater than U+007E (~).
93 size_t i = 0;
94 uint8_t accumulator{};
95 for (; i + 7 < input.size(); i += 8) {
96 accumulator |= uint8_t(path_signature_table[uint8_t(input[i])] |
97 path_signature_table[uint8_t(input[i + 1])] |
98 path_signature_table[uint8_t(input[i + 2])] |
99 path_signature_table[uint8_t(input[i + 3])] |
100 path_signature_table[uint8_t(input[i + 4])] |
101 path_signature_table[uint8_t(input[i + 5])] |
102 path_signature_table[uint8_t(input[i + 6])] |
103 path_signature_table[uint8_t(input[i + 7])]);
104 }
105 for (; i < input.size(); i++) {
106 accumulator |= uint8_t(path_signature_table[uint8_t(input[i])]);
107 }
108 return accumulator;
109}
110
111ada_really_inline constexpr bool verify_dns_length(
112 std::string_view input) noexcept {
113 if (input.back() == '.') {
114 if (input.size() > 254) return false;
115 } else if (input.size() > 253)
116 return false;
117
118 size_t start = 0;
119 while (start < input.size()) {
120 auto dot_location = input.find('.', start);
121 // If not found, it's likely the end of the domain
122 if (dot_location == std::string_view::npos) dot_location = input.size();
123
124 auto label_size = dot_location - start;
125 if (label_size > 63 || label_size == 0) return false;
126
127 start = dot_location + 1;
128 }
129
130 return true;
131}
132} // namespace ada::checkers
Definitions for URL specific checkers used within Ada.
Declarations for URL specific checkers used within Ada.
Cross-platform compiler macros and common definitions.
#define ada_really_inline
Definition common_defs.h:85
Includes the definitions for validation functions.
static constexpr std::array< uint8_t, 256 > path_signature_table
Definition checkers.cpp:65
constexpr bool is_digit(char x) noexcept
tl::expected< result_type, ada::errors > result
Definitions for unicode operations.