1 | // |
2 | // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com) |
3 | // Copyright (c) 2022 Alan de Freitas (alandefreitas@gmail.com) |
4 | // |
5 | // Distributed under the Boost Software License, Version 1.0. (See accompanying |
6 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
7 | // |
8 | // Official repository: https://github.com/boostorg/url |
9 | // |
10 | |
11 | #ifndef BOOST_URL_IMPL_URL_BASE_IPP |
12 | #define BOOST_URL_IMPL_URL_BASE_IPP |
13 | |
14 | #include <boost/url/detail/config.hpp> |
15 | #include <boost/url/url_base.hpp> |
16 | #include <boost/url/encode.hpp> |
17 | #include <boost/url/error.hpp> |
18 | #include <boost/url/host_type.hpp> |
19 | #include <boost/url/scheme.hpp> |
20 | #include <boost/url/url_view.hpp> |
21 | #include <boost/url/detail/any_params_iter.hpp> |
22 | #include <boost/url/detail/any_segments_iter.hpp> |
23 | #include "detail/decode.hpp" |
24 | #include <boost/url/detail/encode.hpp> |
25 | #include <boost/url/detail/except.hpp> |
26 | #include "detail/normalize.hpp" |
27 | #include "detail/path.hpp" |
28 | #include "detail/print.hpp" |
29 | #include <boost/url/grammar/ci_string.hpp> |
30 | #include <boost/url/rfc/authority_rule.hpp> |
31 | #include <boost/url/rfc/query_rule.hpp> |
32 | #include "rfc/detail/charsets.hpp" |
33 | #include "rfc/detail/host_rule.hpp" |
34 | #include "rfc/detail/ipvfuture_rule.hpp" |
35 | #include "boost/url/rfc/detail/path_rules.hpp" |
36 | #include "rfc/detail/port_rule.hpp" |
37 | #include "rfc/detail/scheme_rule.hpp" |
38 | #include "rfc/detail/userinfo_rule.hpp" |
39 | #include <boost/url/grammar/parse.hpp> |
40 | #include "detail/move_chars.hpp" |
41 | #include <cstring> |
42 | #include <iostream> |
43 | #include <stdexcept> |
44 | #include <utility> |
45 | |
46 | namespace boost { |
47 | namespace urls { |
48 | |
49 | //------------------------------------------------ |
50 | |
51 | // these objects help handle the cases |
52 | // where the user passes in strings that |
53 | // come from inside the url buffer. |
54 | |
55 | url_base:: |
56 | op_t:: |
57 | ~op_t() |
58 | { |
59 | if(old) |
60 | u.cleanup(*this); |
61 | u.check_invariants(); |
62 | } |
63 | |
64 | url_base:: |
65 | op_t:: |
66 | op_t( |
67 | url_base& impl_, |
68 | core::string_view* s0_, |
69 | core::string_view* s1_) noexcept |
70 | : u(impl_) |
71 | , s0(s0_) |
72 | , s1(s1_) |
73 | { |
74 | u.check_invariants(); |
75 | } |
76 | |
77 | void |
78 | url_base:: |
79 | op_t:: |
80 | move( |
81 | char* dest, |
82 | char const* src, |
83 | std::size_t n) noexcept |
84 | { |
85 | if(! n) |
86 | return; |
87 | if(s0) |
88 | { |
89 | if(s1) |
90 | return detail::move_chars( |
91 | dest, src, n, args&: *s0, args&: *s1); |
92 | return detail::move_chars( |
93 | dest, src, n, args&: *s0); |
94 | } |
95 | detail::move_chars( |
96 | dest, src, n); |
97 | } |
98 | |
99 | //------------------------------------------------ |
100 | |
101 | // construct reference |
102 | url_base:: |
103 | url_base( |
104 | detail::url_impl const& impl) noexcept |
105 | : url_view_base(impl) |
106 | { |
107 | } |
108 | |
109 | void |
110 | url_base:: |
111 | reserve_impl(std::size_t n) |
112 | { |
113 | op_t op(*this); |
114 | reserve_impl(n, op); |
115 | if(s_) |
116 | s_[size()] = '\0'; |
117 | } |
118 | |
119 | // make a copy of u |
120 | void |
121 | url_base:: |
122 | copy(url_view_base const& u) |
123 | { |
124 | if (this == &u) |
125 | return; |
126 | op_t op(*this); |
127 | if(u.size() == 0) |
128 | { |
129 | clear(); |
130 | return; |
131 | } |
132 | reserve_impl( |
133 | u.size(), op); |
134 | impl_ = u.impl_; |
135 | impl_.cs_ = s_; |
136 | impl_.from_ = {from::url}; |
137 | std::memcpy(dest: s_, |
138 | src: u.data(), n: u.size()); |
139 | s_[size()] = '\0'; |
140 | } |
141 | |
142 | //------------------------------------------------ |
143 | // |
144 | // Scheme |
145 | // |
146 | //------------------------------------------------ |
147 | |
148 | url_base& |
149 | url_base:: |
150 | set_scheme(core::string_view s) |
151 | { |
152 | set_scheme_impl( |
153 | s, string_to_scheme(s)); |
154 | return *this; |
155 | } |
156 | |
157 | url_base& |
158 | url_base:: |
159 | set_scheme_id(urls::scheme id) |
160 | { |
161 | if(id == urls::scheme::unknown) |
162 | detail::throw_invalid_argument(); |
163 | if(id == urls::scheme::none) |
164 | return remove_scheme(); |
165 | set_scheme_impl(to_string(s: id), id); |
166 | return *this; |
167 | } |
168 | |
169 | url_base& |
170 | url_base:: |
171 | remove_scheme() |
172 | { |
173 | op_t op(*this); |
174 | auto const sn = impl_.len(id_scheme); |
175 | if(sn == 0) |
176 | return *this; |
177 | auto const po = impl_.offset(id_path); |
178 | auto fseg = first_segment(); |
179 | bool const encode_colon = |
180 | !has_authority() && |
181 | impl_.nseg_ > 0 && |
182 | s_[po] != '/' && |
183 | fseg.contains(c: ':'); |
184 | if(!encode_colon) |
185 | { |
186 | // just remove the scheme |
187 | resize_impl(id_scheme, 0, op); |
188 | impl_.scheme_ = urls::scheme::none; |
189 | check_invariants(); |
190 | return *this; |
191 | } |
192 | // encode any ":" in the first path segment |
193 | BOOST_ASSERT(sn >= 2); |
194 | auto pn = impl_.len(id_path); |
195 | std::size_t cn = 0; |
196 | for (char c: fseg) |
197 | cn += c == ':'; |
198 | std::size_t new_size = |
199 | size() - sn + 2 * cn; |
200 | bool need_resize = new_size > size(); |
201 | if (need_resize) |
202 | { |
203 | resize_impl( |
204 | id_path, pn + 2 * cn, op); |
205 | } |
206 | // move [id_scheme, id_path) left |
207 | op.move( |
208 | dest: s_, |
209 | src: s_ + sn, |
210 | n: po - sn); |
211 | // move [id_path, id_query) left |
212 | auto qo = impl_.offset(id_query); |
213 | op.move( |
214 | dest: s_ + po - sn, |
215 | src: s_ + po, |
216 | n: qo - po); |
217 | // move [id_query, id_end) left |
218 | op.move( |
219 | dest: s_ + qo - sn + 2 * cn, |
220 | src: s_ + qo, |
221 | n: impl_.offset(id_end) - qo); |
222 | |
223 | // adjust part offsets. |
224 | // (po and qo are invalidated) |
225 | if (need_resize) |
226 | { |
227 | impl_.adjust_left(first: id_user, last: id_end, n: sn); |
228 | } |
229 | else |
230 | { |
231 | impl_.adjust_left(first: id_user, last: id_path, n: sn); |
232 | impl_.adjust_left(first: id_query, last: id_end, n: sn - 2 * cn); |
233 | } |
234 | if (encode_colon) |
235 | { |
236 | // move the 2nd, 3rd, ... segments |
237 | auto begin = s_ + impl_.offset(id_path); |
238 | auto it = begin; |
239 | auto end = begin + pn; |
240 | while (*it != '/' && |
241 | it != end) |
242 | ++it; |
243 | // we don't need op here because this is |
244 | // an internal operation |
245 | std::memmove(dest: it + (2 * cn), src: it, n: end - it); |
246 | |
247 | // move 1st segment |
248 | auto src = s_ + impl_.offset(id_path) + pn; |
249 | auto dest = s_ + impl_.offset(id_query); |
250 | src -= end - it; |
251 | dest -= end - it; |
252 | pn -= end - it; |
253 | do { |
254 | --src; |
255 | --dest; |
256 | if (*src != ':') |
257 | { |
258 | *dest = *src; |
259 | } |
260 | else |
261 | { |
262 | // use uppercase as required by |
263 | // syntax-based normalization |
264 | *dest-- = 'A'; |
265 | *dest-- = '3'; |
266 | *dest = '%'; |
267 | } |
268 | --pn; |
269 | } while (pn); |
270 | } |
271 | s_[size()] = '\0'; |
272 | impl_.scheme_ = urls::scheme::none; |
273 | return *this; |
274 | } |
275 | |
276 | //------------------------------------------------ |
277 | // |
278 | // Authority |
279 | // |
280 | //------------------------------------------------ |
281 | |
282 | url_base& |
283 | url_base:: |
284 | set_encoded_authority( |
285 | pct_string_view s) |
286 | { |
287 | op_t op(*this, &detail::ref(s)); |
288 | authority_view a = grammar::parse( |
289 | s, r: authority_rule |
290 | ).value(BOOST_URL_POS); |
291 | auto n = s.size() + 2; |
292 | auto const need_slash = |
293 | ! is_path_absolute() && |
294 | impl_.len(id_path) > 0; |
295 | if(need_slash) |
296 | ++n; |
297 | auto dest = resize_impl( |
298 | id_user, id_path, n, op); |
299 | dest[0] = '/'; |
300 | dest[1] = '/'; |
301 | std::memcpy(dest: dest + 2, |
302 | src: s.data(), n: s.size()); |
303 | if(need_slash) |
304 | dest[n - 1] = '/'; |
305 | impl_.apply_authority(a); |
306 | if(need_slash) |
307 | impl_.adjust_right( |
308 | first: id_query, last: id_end, n: 1); |
309 | return *this; |
310 | } |
311 | |
312 | url_base& |
313 | url_base:: |
314 | remove_authority() |
315 | { |
316 | if(! has_authority()) |
317 | return *this; |
318 | |
319 | op_t op(*this); |
320 | auto path = impl_.get(id_path); |
321 | bool const need_dot = path.starts_with(x: "//" ); |
322 | if(need_dot) |
323 | { |
324 | // prepend "/.", can't throw |
325 | auto p = resize_impl( |
326 | id_user, id_path, 2, op); |
327 | p[0] = '/'; |
328 | p[1] = '.'; |
329 | impl_.split(id_user, 0); |
330 | impl_.split(id_pass, 0); |
331 | impl_.split(id_host, 0); |
332 | impl_.split(id_port, 0); |
333 | } |
334 | else |
335 | { |
336 | resize_impl( |
337 | id_user, id_path, 0, op); |
338 | } |
339 | impl_.host_type_ = |
340 | urls::host_type::none; |
341 | return *this; |
342 | } |
343 | |
344 | //------------------------------------------------ |
345 | // |
346 | // Userinfo |
347 | // |
348 | //------------------------------------------------ |
349 | |
350 | url_base& |
351 | url_base:: |
352 | set_userinfo( |
353 | core::string_view s) |
354 | { |
355 | op_t op(*this, &s); |
356 | encoding_opts opt; |
357 | auto const n = encoded_size( |
358 | s, unreserved: detail::userinfo_chars, opt); |
359 | auto dest = set_userinfo_impl(n, op); |
360 | encode( |
361 | dest, |
362 | size: n, |
363 | s, |
364 | unreserved: detail::userinfo_chars, |
365 | opt); |
366 | auto const pos = impl_.get( |
367 | id_user, id_host |
368 | ).find_first_of(c: ':'); |
369 | if(pos != core::string_view::npos) |
370 | { |
371 | impl_.split(id_user, pos); |
372 | // find ':' in plain string |
373 | auto const pos2 = |
374 | s.find_first_of(c: ':'); |
375 | impl_.decoded_[id_user] = |
376 | pos2 - 1; |
377 | impl_.decoded_[id_pass] = |
378 | s.size() - pos2; |
379 | } |
380 | else |
381 | { |
382 | impl_.decoded_[id_user] = s.size(); |
383 | impl_.decoded_[id_pass] = 0; |
384 | } |
385 | return *this; |
386 | } |
387 | |
388 | url_base& |
389 | url_base:: |
390 | set_encoded_userinfo( |
391 | pct_string_view s) |
392 | { |
393 | op_t op(*this, &detail::ref(s)); |
394 | encoding_opts opt; |
395 | auto const pos = s.find_first_of(c: ':'); |
396 | if(pos != core::string_view::npos) |
397 | { |
398 | // user:pass |
399 | auto const s0 = s.substr(pos: 0, n: pos); |
400 | auto const s1 = s.substr(pos: pos + 1); |
401 | auto const n0 = |
402 | detail::re_encoded_size_unsafe( |
403 | s: s0, |
404 | unreserved: detail::user_chars, |
405 | opt); |
406 | auto const n1 = |
407 | detail::re_encoded_size_unsafe(s: s1, |
408 | unreserved: detail::password_chars, |
409 | opt); |
410 | auto dest = |
411 | set_userinfo_impl(n: n0 + n1 + 1, op); |
412 | impl_.decoded_[id_user] = |
413 | detail::re_encode_unsafe( |
414 | dest_&: dest, |
415 | end: dest + n0, |
416 | s: s0, |
417 | unreserved: detail::user_chars, |
418 | opt); |
419 | *dest++ = ':'; |
420 | impl_.decoded_[id_pass] = |
421 | detail::re_encode_unsafe( |
422 | dest_&: dest, |
423 | end: dest + n1, |
424 | s: s1, |
425 | unreserved: detail::password_chars, |
426 | opt); |
427 | impl_.split(id_user, 2 + n0); |
428 | } |
429 | else |
430 | { |
431 | // user |
432 | auto const n = |
433 | detail::re_encoded_size_unsafe( |
434 | s, unreserved: detail::user_chars, opt); |
435 | auto dest = set_userinfo_impl(n, op); |
436 | impl_.decoded_[id_user] = |
437 | detail::re_encode_unsafe( |
438 | dest_&: dest, |
439 | end: dest + n, |
440 | s, |
441 | unreserved: detail::user_chars, |
442 | opt); |
443 | impl_.split(id_user, 2 + n); |
444 | impl_.decoded_[id_pass] = 0; |
445 | } |
446 | return *this; |
447 | } |
448 | |
449 | url_base& |
450 | url_base:: |
451 | remove_userinfo() noexcept |
452 | { |
453 | if(impl_.len(id_pass) == 0) |
454 | return *this; // no userinfo |
455 | |
456 | op_t op(*this); |
457 | // keep authority '//' |
458 | resize_impl( |
459 | id_user, id_host, 2, op); |
460 | impl_.decoded_[id_user] = 0; |
461 | impl_.decoded_[id_pass] = 0; |
462 | return *this; |
463 | } |
464 | |
465 | //------------------------------------------------ |
466 | |
467 | url_base& |
468 | url_base:: |
469 | set_user(core::string_view s) |
470 | { |
471 | op_t op(*this, &s); |
472 | encoding_opts opt; |
473 | auto const n = encoded_size( |
474 | s, unreserved: detail::user_chars, opt); |
475 | auto dest = set_user_impl(n, op); |
476 | encode_unsafe( |
477 | dest, |
478 | size: n, |
479 | s, |
480 | unreserved: detail::user_chars, |
481 | opt); |
482 | impl_.decoded_[id_user] = s.size(); |
483 | return *this; |
484 | } |
485 | |
486 | url_base& |
487 | url_base:: |
488 | set_encoded_user( |
489 | pct_string_view s) |
490 | { |
491 | op_t op(*this, &detail::ref(s)); |
492 | encoding_opts opt; |
493 | auto const n = |
494 | detail::re_encoded_size_unsafe( |
495 | s, unreserved: detail::user_chars, opt); |
496 | auto dest = set_user_impl(n, op); |
497 | impl_.decoded_[id_user] = |
498 | detail::re_encode_unsafe( |
499 | dest_&: dest, |
500 | end: dest + n, |
501 | s, |
502 | unreserved: detail::user_chars, |
503 | opt); |
504 | BOOST_ASSERT( |
505 | impl_.decoded_[id_user] == |
506 | s.decoded_size()); |
507 | return *this; |
508 | } |
509 | |
510 | //------------------------------------------------ |
511 | |
512 | url_base& |
513 | url_base:: |
514 | set_password(core::string_view s) |
515 | { |
516 | op_t op(*this, &s); |
517 | encoding_opts opt; |
518 | auto const n = encoded_size( |
519 | s, unreserved: detail::password_chars, opt); |
520 | auto dest = set_password_impl(n, op); |
521 | encode_unsafe( |
522 | dest, |
523 | size: n, |
524 | s, |
525 | unreserved: detail::password_chars, |
526 | opt); |
527 | impl_.decoded_[id_pass] = s.size(); |
528 | return *this; |
529 | } |
530 | |
531 | url_base& |
532 | url_base:: |
533 | set_encoded_password( |
534 | pct_string_view s) |
535 | { |
536 | op_t op(*this, &detail::ref(s)); |
537 | encoding_opts opt; |
538 | auto const n = |
539 | detail::re_encoded_size_unsafe( |
540 | s, |
541 | unreserved: detail::password_chars, |
542 | opt); |
543 | auto dest = set_password_impl(n, op); |
544 | impl_.decoded_[id_pass] = |
545 | detail::re_encode_unsafe( |
546 | dest_&: dest, |
547 | end: dest + n, |
548 | s, |
549 | unreserved: detail::password_chars, |
550 | opt); |
551 | BOOST_ASSERT( |
552 | impl_.decoded_[id_pass] == |
553 | s.decoded_size()); |
554 | return *this; |
555 | } |
556 | |
557 | url_base& |
558 | url_base:: |
559 | remove_password() noexcept |
560 | { |
561 | auto const n = impl_.len(id_pass); |
562 | if(n < 2) |
563 | return *this; // no password |
564 | |
565 | op_t op(*this); |
566 | // clear password, retain '@' |
567 | auto dest = |
568 | resize_impl(id_pass, 1, op); |
569 | dest[0] = '@'; |
570 | impl_.decoded_[id_pass] = 0; |
571 | return *this; |
572 | } |
573 | |
574 | //------------------------------------------------ |
575 | // |
576 | // Host |
577 | // |
578 | //------------------------------------------------ |
579 | /* |
580 | host_type host_type() // ipv4, ipv6, ipvfuture, name |
581 | |
582 | std::string host() // return encoded_host().decode() |
583 | pct_string_view encoded_host() // return host part, as-is |
584 | std::string host_address() // return encoded_host_address().decode() |
585 | pct_string_view encoded_host_address() // ipv4, ipv6, ipvfut, or encoded name, no brackets |
586 | |
587 | ipv4_address host_ipv4_address() // return ipv4_address or {} |
588 | ipv6_address host_ipv6_address() // return ipv6_address or {} |
589 | core::string_view host_ipvfuture() // return ipvfuture or {} |
590 | std::string host_name() // return decoded name or "" |
591 | pct_string_view encoded_host_name() // return encoded host name or "" |
592 | |
593 | -------------------------------------------------- |
594 | |
595 | set_host( core::string_view ) // set host part from plain text |
596 | set_encoded_host( pct_string_view ) // set host part from encoded text |
597 | set_host_address( core::string_view ) // set host from ipv4, ipv6, ipvfut, or plain reg-name string |
598 | set_encoded_host_address( pct_string_view ) // set host from ipv4, ipv6, ipvfut, or encoded reg-name string |
599 | |
600 | set_host_ipv4( ipv4_address ) // set ipv4 |
601 | set_host_ipv6( ipv6_address ) // set ipv6 |
602 | set_host_ipvfuture( core::string_view ) // set ipvfuture |
603 | set_host_name( core::string_view ) // set name from plain |
604 | set_encoded_host_name( pct_string_view ) // set name from encoded |
605 | */ |
606 | |
607 | // set host part from plain text |
608 | url_base& |
609 | url_base:: |
610 | set_host( |
611 | core::string_view s) |
612 | { |
613 | if( s.size() > 2 && |
614 | s.front() == '[' && |
615 | s.back() == ']') |
616 | { |
617 | // IP-literal |
618 | { |
619 | // IPv6-address |
620 | auto rv = parse_ipv6_address( |
621 | s: s.substr(pos: 1, n: s.size() - 2)); |
622 | if(rv) |
623 | return set_host_ipv6(*rv); |
624 | } |
625 | { |
626 | // IPvFuture |
627 | auto rv = grammar::parse( |
628 | s: s.substr(pos: 1, n: s.size() - 2), |
629 | r: detail::ipvfuture_rule); |
630 | if(rv) |
631 | return set_host_ipvfuture(rv->str); |
632 | } |
633 | } |
634 | else if(s.size() >= 7) // "0.0.0.0" |
635 | { |
636 | // IPv4-address |
637 | auto rv = parse_ipv4_address(s); |
638 | if(rv) |
639 | return set_host_ipv4(*rv); |
640 | } |
641 | |
642 | // reg-name |
643 | op_t op(*this, &s); |
644 | encoding_opts opt; |
645 | auto const n = encoded_size( |
646 | s, unreserved: detail::host_chars, opt); |
647 | auto dest = set_host_impl(n, op); |
648 | encode( |
649 | dest, |
650 | size: impl_.get(id_path).data() - dest, |
651 | s, |
652 | unreserved: detail::host_chars, |
653 | opt); |
654 | impl_.decoded_[id_host] = s.size(); |
655 | impl_.host_type_ = |
656 | urls::host_type::name; |
657 | return *this; |
658 | } |
659 | |
660 | // set host part from encoded text |
661 | url_base& |
662 | url_base:: |
663 | set_encoded_host( |
664 | pct_string_view s) |
665 | { |
666 | if( s.size() > 2 && |
667 | s.front() == '[' && |
668 | s.back() == ']') |
669 | { |
670 | // IP-literal |
671 | { |
672 | // IPv6-address |
673 | auto rv = parse_ipv6_address( |
674 | s: s.substr(pos: 1, n: s.size() - 2)); |
675 | if(rv) |
676 | return set_host_ipv6(*rv); |
677 | } |
678 | { |
679 | // IPvFuture |
680 | auto rv = grammar::parse( |
681 | s: s.substr(pos: 1, n: s.size() - 2), |
682 | r: detail::ipvfuture_rule); |
683 | if(rv) |
684 | return set_host_ipvfuture(rv->str); |
685 | } |
686 | } |
687 | else if(s.size() >= 7) // "0.0.0.0" |
688 | { |
689 | // IPv4-address |
690 | auto rv = parse_ipv4_address(s); |
691 | if(rv) |
692 | return set_host_ipv4(*rv); |
693 | } |
694 | |
695 | // reg-name |
696 | op_t op(*this, &detail::ref(s)); |
697 | encoding_opts opt; |
698 | auto const n = detail::re_encoded_size_unsafe( |
699 | s, unreserved: detail::host_chars, opt); |
700 | auto dest = set_host_impl(n, op); |
701 | impl_.decoded_[id_host] = |
702 | detail::re_encode_unsafe( |
703 | dest_&: dest, |
704 | end: impl_.get(id_path).data(), |
705 | s, |
706 | unreserved: detail::host_chars, |
707 | opt); |
708 | BOOST_ASSERT(impl_.decoded_[id_host] == |
709 | s.decoded_size()); |
710 | impl_.host_type_ = |
711 | urls::host_type::name; |
712 | return *this; |
713 | } |
714 | |
715 | url_base& |
716 | url_base:: |
717 | set_host_address( |
718 | core::string_view s) |
719 | { |
720 | { |
721 | // IPv6-address |
722 | auto rv = parse_ipv6_address(s); |
723 | if(rv) |
724 | return set_host_ipv6(*rv); |
725 | } |
726 | { |
727 | // IPvFuture |
728 | auto rv = grammar::parse( |
729 | s, r: detail::ipvfuture_rule); |
730 | if(rv) |
731 | return set_host_ipvfuture(rv->str); |
732 | } |
733 | if(s.size() >= 7) // "0.0.0.0" |
734 | { |
735 | // IPv4-address |
736 | auto rv = parse_ipv4_address(s); |
737 | if(rv) |
738 | return set_host_ipv4(*rv); |
739 | } |
740 | |
741 | // reg-name |
742 | op_t op(*this, &s); |
743 | encoding_opts opt; |
744 | auto const n = encoded_size( |
745 | s, unreserved: detail::host_chars, opt); |
746 | auto dest = set_host_impl(n, op); |
747 | encode( |
748 | dest, |
749 | size: impl_.get(id_path).data() - dest, |
750 | s, |
751 | unreserved: detail::host_chars, |
752 | opt); |
753 | impl_.decoded_[id_host] = s.size(); |
754 | impl_.host_type_ = |
755 | urls::host_type::name; |
756 | return *this; |
757 | } |
758 | |
759 | url_base& |
760 | url_base:: |
761 | set_encoded_host_address( |
762 | pct_string_view s) |
763 | { |
764 | { |
765 | // IPv6-address |
766 | auto rv = parse_ipv6_address(s); |
767 | if(rv) |
768 | return set_host_ipv6(*rv); |
769 | } |
770 | { |
771 | // IPvFuture |
772 | auto rv = grammar::parse( |
773 | s, r: detail::ipvfuture_rule); |
774 | if(rv) |
775 | return set_host_ipvfuture(rv->str); |
776 | } |
777 | if(s.size() >= 7) // "0.0.0.0" |
778 | { |
779 | // IPv4-address |
780 | auto rv = parse_ipv4_address(s); |
781 | if(rv) |
782 | return set_host_ipv4(*rv); |
783 | } |
784 | |
785 | // reg-name |
786 | op_t op(*this, &detail::ref(s)); |
787 | encoding_opts opt; |
788 | auto const n = detail::re_encoded_size_unsafe( |
789 | s, unreserved: detail::host_chars, opt); |
790 | auto dest = set_host_impl(n, op); |
791 | impl_.decoded_[id_host] = |
792 | detail::re_encode_unsafe( |
793 | dest_&: dest, |
794 | end: impl_.get(id_path).data(), |
795 | s, |
796 | unreserved: detail::host_chars, |
797 | opt); |
798 | BOOST_ASSERT(impl_.decoded_[id_host] == |
799 | s.decoded_size()); |
800 | impl_.host_type_ = |
801 | urls::host_type::name; |
802 | return *this; |
803 | } |
804 | |
805 | url_base& |
806 | url_base:: |
807 | set_host_ipv4( |
808 | ipv4_address const& addr) |
809 | { |
810 | op_t op(*this); |
811 | char buf[urls::ipv4_address::max_str_len]; |
812 | auto s = addr.to_buffer(dest: buf, dest_size: sizeof(buf)); |
813 | auto dest = set_host_impl(n: s.size(), op); |
814 | std::memcpy(dest: dest, src: s.data(), n: s.size()); |
815 | impl_.decoded_[id_host] = impl_.len(id_host); |
816 | impl_.host_type_ = urls::host_type::ipv4; |
817 | auto bytes = addr.to_bytes(); |
818 | std::memcpy( |
819 | dest: impl_.ip_addr_, |
820 | src: bytes.data(), |
821 | n: bytes.size()); |
822 | return *this; |
823 | } |
824 | |
825 | url_base& |
826 | url_base:: |
827 | set_host_ipv6( |
828 | ipv6_address const& addr) |
829 | { |
830 | op_t op(*this); |
831 | char buf[2 + |
832 | urls::ipv6_address::max_str_len]; |
833 | auto s = addr.to_buffer( |
834 | dest: buf + 1, dest_size: sizeof(buf) - 2); |
835 | buf[0] = '['; |
836 | buf[s.size() + 1] = ']'; |
837 | auto const n = s.size() + 2; |
838 | auto dest = set_host_impl(n, op); |
839 | std::memcpy(dest: dest, src: buf, n: n); |
840 | impl_.decoded_[id_host] = n; |
841 | impl_.host_type_ = urls::host_type::ipv6; |
842 | auto bytes = addr.to_bytes(); |
843 | std::memcpy( |
844 | dest: impl_.ip_addr_, |
845 | src: bytes.data(), |
846 | n: bytes.size()); |
847 | return *this; |
848 | } |
849 | |
850 | url_base& |
851 | url_base:: |
852 | set_host_ipvfuture( |
853 | core::string_view s) |
854 | { |
855 | op_t op(*this, &s); |
856 | // validate |
857 | grammar::parse(s, |
858 | r: detail::ipvfuture_rule |
859 | ).value(BOOST_URL_POS); |
860 | auto dest = set_host_impl( |
861 | n: s.size() + 2, op); |
862 | *dest++ = '['; |
863 | dest += s.copy(s: dest, n: s.size()); |
864 | *dest = ']'; |
865 | impl_.host_type_ = |
866 | urls::host_type::ipvfuture; |
867 | impl_.decoded_[id_host] = s.size() + 2; |
868 | return *this; |
869 | } |
870 | |
871 | url_base& |
872 | url_base:: |
873 | set_host_name( |
874 | core::string_view s) |
875 | { |
876 | bool is_ipv4 = false; |
877 | if(s.size() >= 7) // "0.0.0.0" |
878 | { |
879 | // IPv4-address |
880 | if(parse_ipv4_address(s).has_value()) |
881 | is_ipv4 = true; |
882 | } |
883 | auto allowed = detail::host_chars; |
884 | if(is_ipv4) |
885 | allowed = allowed - '.'; |
886 | |
887 | op_t op(*this, &s); |
888 | encoding_opts opt; |
889 | auto const n = encoded_size( |
890 | s, unreserved: allowed, opt); |
891 | auto dest = set_host_impl(n, op); |
892 | encode_unsafe( |
893 | dest, |
894 | size: n, |
895 | s, |
896 | unreserved: allowed, |
897 | opt); |
898 | impl_.host_type_ = |
899 | urls::host_type::name; |
900 | impl_.decoded_[id_host] = s.size(); |
901 | return *this; |
902 | } |
903 | |
904 | url_base& |
905 | url_base:: |
906 | set_encoded_host_name( |
907 | pct_string_view s) |
908 | { |
909 | bool is_ipv4 = false; |
910 | if(s.size() >= 7) // "0.0.0.0" |
911 | { |
912 | // IPv4-address |
913 | if(parse_ipv4_address(s).has_value()) |
914 | is_ipv4 = true; |
915 | } |
916 | auto allowed = detail::host_chars; |
917 | if(is_ipv4) |
918 | allowed = allowed - '.'; |
919 | |
920 | op_t op(*this, &detail::ref(s)); |
921 | encoding_opts opt; |
922 | auto const n = detail::re_encoded_size_unsafe( |
923 | s, unreserved: allowed, opt); |
924 | auto dest = set_host_impl(n, op); |
925 | impl_.decoded_[id_host] = |
926 | detail::re_encode_unsafe( |
927 | dest_&: dest, |
928 | end: dest + n, |
929 | s, |
930 | unreserved: allowed, |
931 | opt); |
932 | BOOST_ASSERT( |
933 | impl_.decoded_[id_host] == |
934 | s.decoded_size()); |
935 | impl_.host_type_ = |
936 | urls::host_type::name; |
937 | return *this; |
938 | } |
939 | |
940 | //------------------------------------------------ |
941 | |
942 | url_base& |
943 | url_base:: |
944 | set_port_number( |
945 | std::uint16_t n) |
946 | { |
947 | op_t op(*this); |
948 | auto s = |
949 | detail::make_printed(t: n); |
950 | auto dest = set_port_impl( |
951 | n: s.string().size(), op); |
952 | std::memcpy( |
953 | dest: dest, src: s.string().data(), |
954 | n: s.string().size()); |
955 | impl_.port_number_ = n; |
956 | return *this; |
957 | } |
958 | |
959 | url_base& |
960 | url_base:: |
961 | set_port( |
962 | core::string_view s) |
963 | { |
964 | op_t op(*this, &s); |
965 | auto t = grammar::parse(s, |
966 | r: detail::port_rule{} |
967 | ).value(BOOST_URL_POS); |
968 | auto dest = |
969 | set_port_impl(n: t.str.size(), op); |
970 | std::memcpy(dest: dest, |
971 | src: t.str.data(), n: t.str.size()); |
972 | if(t.has_number) |
973 | impl_.port_number_ = t.number; |
974 | else |
975 | impl_.port_number_ = 0; |
976 | return *this; |
977 | } |
978 | |
979 | url_base& |
980 | url_base:: |
981 | remove_port() noexcept |
982 | { |
983 | op_t op(*this); |
984 | resize_impl(id_port, 0, op); |
985 | impl_.port_number_ = 0; |
986 | return *this; |
987 | } |
988 | |
989 | //------------------------------------------------ |
990 | // |
991 | // Compound Fields |
992 | // |
993 | //------------------------------------------------ |
994 | |
995 | url_base& |
996 | url_base:: |
997 | remove_origin() |
998 | { |
999 | // these two calls perform 2 memmoves instead of 1 |
1000 | remove_authority(); |
1001 | remove_scheme(); |
1002 | return *this; |
1003 | } |
1004 | |
1005 | //------------------------------------------------ |
1006 | // |
1007 | // Path |
1008 | // |
1009 | //------------------------------------------------ |
1010 | |
1011 | bool |
1012 | url_base:: |
1013 | set_path_absolute( |
1014 | bool absolute) |
1015 | { |
1016 | op_t op(*this); |
1017 | |
1018 | // check if path empty |
1019 | if(impl_.len(id_path) == 0) |
1020 | { |
1021 | if(! absolute) |
1022 | { |
1023 | // already not absolute |
1024 | return true; |
1025 | } |
1026 | |
1027 | // add '/' |
1028 | auto dest = resize_impl( |
1029 | id_path, 1, op); |
1030 | *dest = '/'; |
1031 | ++impl_.decoded_[id_path]; |
1032 | return true; |
1033 | } |
1034 | |
1035 | // check if path absolute |
1036 | if(s_[impl_.offset(id_path)] == '/') |
1037 | { |
1038 | if(absolute) |
1039 | { |
1040 | // already absolute |
1041 | return true; |
1042 | } |
1043 | |
1044 | if( has_authority() && |
1045 | impl_.len(id_path) > 1) |
1046 | { |
1047 | // can't do it, paths are always |
1048 | // absolute when authority present! |
1049 | return false; |
1050 | } |
1051 | |
1052 | auto p = encoded_path(); |
1053 | auto pos = p.find_first_of(s: ":/" , pos: 1); |
1054 | if (pos != core::string_view::npos && |
1055 | p[pos] == ':') |
1056 | { |
1057 | // prepend with . |
1058 | auto n = impl_.len(id_path); |
1059 | resize_impl(id_path, n + 1, op); |
1060 | std::memmove( |
1061 | dest: s_ + impl_.offset(id_path) + 1, |
1062 | src: s_ + impl_.offset(id_path), n: n); |
1063 | *(s_ + impl_.offset(id_path)) = '.'; |
1064 | ++impl_.decoded_[id_path]; |
1065 | return true; |
1066 | } |
1067 | |
1068 | // remove '/' |
1069 | auto n = impl_.len(id_port); |
1070 | impl_.split(id_port, n + 1); |
1071 | resize_impl(id_port, n, op); |
1072 | --impl_.decoded_[id_path]; |
1073 | return true; |
1074 | } |
1075 | |
1076 | if(! absolute) |
1077 | { |
1078 | // already not absolute |
1079 | return true; |
1080 | } |
1081 | |
1082 | // add '/' |
1083 | auto n = impl_.len(id_port); |
1084 | auto dest = resize_impl( |
1085 | id_port, n + 1, op) + n; |
1086 | impl_.split(id_port, n); |
1087 | *dest = '/'; |
1088 | ++impl_.decoded_[id_path]; |
1089 | return true; |
1090 | } |
1091 | |
1092 | url_base& |
1093 | url_base:: |
1094 | set_path( |
1095 | core::string_view s) |
1096 | { |
1097 | op_t op(*this, &s); |
1098 | encoding_opts opt; |
1099 | |
1100 | //------------------------------------------------ |
1101 | // |
1102 | // Calculate encoded size |
1103 | // |
1104 | // - "/"s are not encoded |
1105 | // - "%2F"s are not encoded |
1106 | // |
1107 | // - reserved path chars are re-encoded |
1108 | // - colons in first segment might need to be re-encoded |
1109 | // - the path might need to receive a prefix |
1110 | auto const n = encoded_size( |
1111 | s, unreserved: detail::path_chars, opt); |
1112 | std::size_t n_reencode_colons = 0; |
1113 | core::string_view first_seg; |
1114 | if (!has_scheme() && |
1115 | !has_authority() && |
1116 | !s.starts_with(x: '/')) |
1117 | { |
1118 | // the first segment with unencoded colons would look |
1119 | // like the scheme |
1120 | first_seg = detail::to_sv(t: s); |
1121 | std::size_t p = s.find(c: '/'); |
1122 | if (p != core::string_view::npos) |
1123 | first_seg = s.substr(pos: 0, n: p); |
1124 | n_reencode_colons = std::count( |
1125 | first: first_seg.begin(), last: first_seg.end(), value: ':'); |
1126 | } |
1127 | // the authority can only be followed by an empty or relative path |
1128 | // if we have an authority and the path is a non-empty relative path, we |
1129 | // add the "/" prefix to make it valid. |
1130 | bool make_absolute = |
1131 | has_authority() && |
1132 | !s.starts_with(x: '/') && |
1133 | !s.empty(); |
1134 | // a path starting with "//" might look like the authority. |
1135 | // we add a "/." prefix to prevent that |
1136 | bool add_dot_segment = |
1137 | !make_absolute && |
1138 | s.starts_with(x: "//" ); |
1139 | |
1140 | //------------------------------------------------ |
1141 | // |
1142 | // Re-encode data |
1143 | // |
1144 | auto dest = set_path_impl( |
1145 | n: n + make_absolute + 2 * n_reencode_colons + 2 * add_dot_segment, op); |
1146 | impl_.decoded_[id_path] = 0; |
1147 | if (!dest) |
1148 | { |
1149 | impl_.nseg_ = 0; |
1150 | return *this; |
1151 | } |
1152 | if (make_absolute) |
1153 | { |
1154 | *dest++ = '/'; |
1155 | impl_.decoded_[id_path] += 1; |
1156 | } |
1157 | else if (add_dot_segment) |
1158 | { |
1159 | *dest++ = '/'; |
1160 | *dest++ = '.'; |
1161 | impl_.decoded_[id_path] += 2; |
1162 | } |
1163 | dest += encode_unsafe( |
1164 | dest, |
1165 | size: impl_.get(id_query).data() - dest, |
1166 | s: first_seg, |
1167 | unreserved: detail::segment_chars - ':', |
1168 | opt); |
1169 | dest += encode_unsafe( |
1170 | dest, |
1171 | size: impl_.get(id_query).data() - dest, |
1172 | s: s.substr(pos: first_seg.size()), |
1173 | unreserved: detail::path_chars, |
1174 | opt); |
1175 | impl_.decoded_[id_path] += s.size(); |
1176 | BOOST_ASSERT(!dest || dest == impl_.get(id_query).data()); |
1177 | BOOST_ASSERT( |
1178 | impl_.decoded_[id_path] == |
1179 | s.size() + make_absolute + 2 * add_dot_segment); |
1180 | |
1181 | //------------------------------------------------ |
1182 | // |
1183 | // Update path parameters |
1184 | // |
1185 | // get the encoded_path with the replacements we applied |
1186 | if (s == "/" ) |
1187 | { |
1188 | // "/" maps to sequence {} |
1189 | impl_.nseg_ = 0; |
1190 | } |
1191 | else if (!s.empty()) |
1192 | { |
1193 | if (s.starts_with(x: "/./" )) |
1194 | s = s.substr(pos: 2); |
1195 | // count segments as number of '/'s + 1 |
1196 | impl_.nseg_ = std::count( |
1197 | first: s.begin() + 1, last: s.end(), value: '/') + 1; |
1198 | } |
1199 | else |
1200 | { |
1201 | // an empty relative path maps to sequence {} |
1202 | impl_.nseg_ = 0; |
1203 | } |
1204 | |
1205 | check_invariants(); |
1206 | return *this; |
1207 | } |
1208 | |
1209 | url_base& |
1210 | url_base:: |
1211 | set_encoded_path( |
1212 | pct_string_view s) |
1213 | { |
1214 | op_t op(*this, &detail::ref(s)); |
1215 | encoding_opts opt; |
1216 | |
1217 | //------------------------------------------------ |
1218 | // |
1219 | // Calculate re-encoded output size |
1220 | // |
1221 | // - reserved path chars are re-encoded |
1222 | // - colons in first segment might need to be re-encoded |
1223 | // - the path might need to receive a prefix |
1224 | auto const n = detail::re_encoded_size_unsafe( |
1225 | s, unreserved: detail::path_chars, opt); |
1226 | std::size_t n_reencode_colons = 0; |
1227 | core::string_view first_seg; |
1228 | if (!has_scheme() && |
1229 | !has_authority() && |
1230 | !s.starts_with(x: '/')) |
1231 | { |
1232 | // the first segment with unencoded colons would look |
1233 | // like the scheme |
1234 | first_seg = detail::to_sv(s); |
1235 | std::size_t p = s.find(c: '/'); |
1236 | if (p != core::string_view::npos) |
1237 | first_seg = s.substr(pos: 0, n: p); |
1238 | n_reencode_colons = std::count( |
1239 | first: first_seg.begin(), last: first_seg.end(), value: ':'); |
1240 | } |
1241 | // the authority can only be followed by an empty or relative path |
1242 | // if we have an authority and the path is a non-empty relative path, we |
1243 | // add the "/" prefix to make it valid. |
1244 | bool make_absolute = |
1245 | has_authority() && |
1246 | !s.starts_with(x: '/') && |
1247 | !s.empty(); |
1248 | // a path starting with "//" might look like the authority |
1249 | // we add a "/." prefix to prevent that |
1250 | bool add_dot_segment = |
1251 | !make_absolute && |
1252 | !has_authority() && |
1253 | s.starts_with(x: "//" ); |
1254 | |
1255 | //------------------------------------------------ |
1256 | // |
1257 | // Re-encode data |
1258 | // |
1259 | auto dest = set_path_impl( |
1260 | n: n + make_absolute + 2 * n_reencode_colons + 2 * add_dot_segment, op); |
1261 | impl_.decoded_[id_path] = 0; |
1262 | if (!dest) |
1263 | { |
1264 | impl_.nseg_ = 0; |
1265 | return *this; |
1266 | } |
1267 | if (make_absolute) |
1268 | { |
1269 | *dest++ = '/'; |
1270 | impl_.decoded_[id_path] += 1; |
1271 | } |
1272 | else if (add_dot_segment) |
1273 | { |
1274 | *dest++ = '/'; |
1275 | *dest++ = '.'; |
1276 | impl_.decoded_[id_path] += 2; |
1277 | } |
1278 | impl_.decoded_[id_path] += |
1279 | detail::re_encode_unsafe( |
1280 | dest_&: dest, |
1281 | end: impl_.get(id_query).data(), |
1282 | s: first_seg, |
1283 | unreserved: detail::segment_chars - ':', |
1284 | opt); |
1285 | impl_.decoded_[id_path] += |
1286 | detail::re_encode_unsafe( |
1287 | dest_&: dest, |
1288 | end: impl_.get(id_query).data(), |
1289 | s: s.substr(pos: first_seg.size()), |
1290 | unreserved: detail::path_chars, |
1291 | opt); |
1292 | BOOST_ASSERT(dest == impl_.get(id_query).data()); |
1293 | BOOST_ASSERT( |
1294 | impl_.decoded_[id_path] == |
1295 | s.decoded_size() + make_absolute + 2 * add_dot_segment); |
1296 | |
1297 | //------------------------------------------------ |
1298 | // |
1299 | // Update path parameters |
1300 | // |
1301 | // get the encoded_path with the replacements we applied |
1302 | if (s == "/" ) |
1303 | { |
1304 | // "/" maps to sequence {} |
1305 | impl_.nseg_ = 0; |
1306 | } |
1307 | else if (!s.empty()) |
1308 | { |
1309 | if (s.starts_with(x: "/./" )) |
1310 | s = s.substr(pos: 2); |
1311 | // count segments as number of '/'s + 1 |
1312 | impl_.nseg_ = std::count( |
1313 | first: s.begin() + 1, last: s.end(), value: '/') + 1; |
1314 | } |
1315 | else |
1316 | { |
1317 | // an empty relative path maps to sequence {} |
1318 | impl_.nseg_ = 0; |
1319 | } |
1320 | |
1321 | check_invariants(); |
1322 | return *this; |
1323 | } |
1324 | |
1325 | segments_ref |
1326 | url_base:: |
1327 | segments() noexcept |
1328 | { |
1329 | return {*this}; |
1330 | } |
1331 | |
1332 | segments_encoded_ref |
1333 | url_base:: |
1334 | encoded_segments() noexcept |
1335 | { |
1336 | return {*this}; |
1337 | } |
1338 | |
1339 | //------------------------------------------------ |
1340 | // |
1341 | // Query |
1342 | // |
1343 | //------------------------------------------------ |
1344 | |
1345 | url_base& |
1346 | url_base:: |
1347 | set_query( |
1348 | core::string_view s) |
1349 | { |
1350 | edit_params( |
1351 | detail::params_iter_impl(impl_), |
1352 | detail::params_iter_impl(impl_, 0), |
1353 | detail::query_iter(s, true)); |
1354 | return *this; |
1355 | } |
1356 | |
1357 | url_base& |
1358 | url_base:: |
1359 | set_encoded_query( |
1360 | pct_string_view s) |
1361 | { |
1362 | op_t op(*this); |
1363 | encoding_opts opt; |
1364 | std::size_t n = 0; // encoded size |
1365 | std::size_t nparam = 1; // param count |
1366 | auto const end = s.end(); |
1367 | auto p = s.begin(); |
1368 | |
1369 | // measure |
1370 | while(p != end) |
1371 | { |
1372 | if(*p == '&') |
1373 | { |
1374 | ++p; |
1375 | ++n; |
1376 | ++nparam; |
1377 | } |
1378 | else if(*p != '%') |
1379 | { |
1380 | if(detail::query_chars(*p)) |
1381 | n += 1; // allowed |
1382 | else |
1383 | n += 3; // escaped |
1384 | ++p; |
1385 | } |
1386 | else |
1387 | { |
1388 | // escape |
1389 | n += 3; |
1390 | p += 3; |
1391 | } |
1392 | } |
1393 | |
1394 | // resize |
1395 | auto dest = resize_impl( |
1396 | id_query, n + 1, op); |
1397 | *dest++ = '?'; |
1398 | |
1399 | // encode |
1400 | impl_.decoded_[id_query] = |
1401 | detail::re_encode_unsafe( |
1402 | dest_&: dest, |
1403 | end: dest + n, |
1404 | s, |
1405 | unreserved: detail::query_chars, |
1406 | opt); |
1407 | BOOST_ASSERT( |
1408 | impl_.decoded_[id_query] == |
1409 | s.decoded_size()); |
1410 | impl_.nparam_ = nparam; |
1411 | return *this; |
1412 | } |
1413 | |
1414 | params_ref |
1415 | url_base:: |
1416 | params() noexcept |
1417 | { |
1418 | return params_ref( |
1419 | *this, |
1420 | encoding_opts{ |
1421 | true, false, false}); |
1422 | } |
1423 | |
1424 | params_ref |
1425 | url_base:: |
1426 | params(encoding_opts opt) noexcept |
1427 | { |
1428 | return params_ref(*this, opt); |
1429 | } |
1430 | |
1431 | params_encoded_ref |
1432 | url_base:: |
1433 | encoded_params() noexcept |
1434 | { |
1435 | return {*this}; |
1436 | } |
1437 | |
1438 | url_base& |
1439 | url_base:: |
1440 | set_params( std::initializer_list<param_view> ps ) noexcept |
1441 | { |
1442 | params().assign(init: ps); |
1443 | return *this; |
1444 | } |
1445 | |
1446 | url_base& |
1447 | url_base:: |
1448 | set_encoded_params( std::initializer_list< param_pct_view > ps ) noexcept |
1449 | { |
1450 | encoded_params().assign(init: ps); |
1451 | return *this; |
1452 | } |
1453 | |
1454 | url_base& |
1455 | url_base:: |
1456 | remove_query() noexcept |
1457 | { |
1458 | op_t op(*this); |
1459 | resize_impl(id_query, 0, op); |
1460 | impl_.nparam_ = 0; |
1461 | impl_.decoded_[id_query] = 0; |
1462 | return *this; |
1463 | } |
1464 | |
1465 | //------------------------------------------------ |
1466 | // |
1467 | // Fragment |
1468 | // |
1469 | //------------------------------------------------ |
1470 | |
1471 | url_base& |
1472 | url_base:: |
1473 | remove_fragment() noexcept |
1474 | { |
1475 | op_t op(*this); |
1476 | resize_impl(id_frag, 0, op); |
1477 | impl_.decoded_[id_frag] = 0; |
1478 | return *this; |
1479 | } |
1480 | |
1481 | url_base& |
1482 | url_base:: |
1483 | set_fragment(core::string_view s) |
1484 | { |
1485 | op_t op(*this, &s); |
1486 | encoding_opts opt; |
1487 | auto const n = encoded_size( |
1488 | s, |
1489 | unreserved: detail::fragment_chars, |
1490 | opt); |
1491 | auto dest = resize_impl( |
1492 | id_frag, n + 1, op); |
1493 | *dest++ = '#'; |
1494 | encode_unsafe( |
1495 | dest, |
1496 | size: n, |
1497 | s, |
1498 | unreserved: detail::fragment_chars, |
1499 | opt); |
1500 | impl_.decoded_[id_frag] = s.size(); |
1501 | return *this; |
1502 | } |
1503 | |
1504 | url_base& |
1505 | url_base:: |
1506 | set_encoded_fragment( |
1507 | pct_string_view s) |
1508 | { |
1509 | op_t op(*this, &detail::ref(s)); |
1510 | encoding_opts opt; |
1511 | auto const n = |
1512 | detail::re_encoded_size_unsafe( |
1513 | s, |
1514 | unreserved: detail::fragment_chars, |
1515 | opt); |
1516 | auto dest = resize_impl( |
1517 | id_frag, n + 1, op); |
1518 | *dest++ = '#'; |
1519 | impl_.decoded_[id_frag] = |
1520 | detail::re_encode_unsafe( |
1521 | dest_&: dest, |
1522 | end: dest + n, |
1523 | s, |
1524 | unreserved: detail::fragment_chars, |
1525 | opt); |
1526 | BOOST_ASSERT( |
1527 | impl_.decoded_[id_frag] == |
1528 | s.decoded_size()); |
1529 | return *this; |
1530 | } |
1531 | |
1532 | //------------------------------------------------ |
1533 | // |
1534 | // Resolution |
1535 | // |
1536 | //------------------------------------------------ |
1537 | |
1538 | system::result<void> |
1539 | url_base:: |
1540 | resolve( |
1541 | url_view_base const& ref) |
1542 | { |
1543 | if (this == &ref && |
1544 | has_scheme()) |
1545 | { |
1546 | normalize_path(); |
1547 | return {}; |
1548 | } |
1549 | |
1550 | if(! has_scheme()) |
1551 | { |
1552 | BOOST_URL_RETURN_EC(error::not_a_base); |
1553 | } |
1554 | |
1555 | op_t op(*this); |
1556 | |
1557 | // |
1558 | // 5.2.2. Transform References |
1559 | // https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.2 |
1560 | // |
1561 | |
1562 | if( ref.has_scheme() && |
1563 | ref.scheme() != scheme()) |
1564 | { |
1565 | reserve_impl(ref.size(), op); |
1566 | copy(u: ref); |
1567 | normalize_path(); |
1568 | return {}; |
1569 | } |
1570 | if(ref.has_authority()) |
1571 | { |
1572 | reserve_impl( |
1573 | impl_.offset(id_user) + ref.size(), op); |
1574 | set_encoded_authority( |
1575 | ref.encoded_authority()); |
1576 | set_encoded_path( |
1577 | ref.encoded_path()); |
1578 | if (ref.encoded_path().empty()) |
1579 | set_path_absolute(false); |
1580 | else |
1581 | normalize_path(); |
1582 | if(ref.has_query()) |
1583 | set_encoded_query( |
1584 | ref.encoded_query()); |
1585 | else |
1586 | remove_query(); |
1587 | if(ref.has_fragment()) |
1588 | set_encoded_fragment( |
1589 | ref.encoded_fragment()); |
1590 | else |
1591 | remove_fragment(); |
1592 | return {}; |
1593 | } |
1594 | if(ref.encoded_path().empty()) |
1595 | { |
1596 | reserve_impl( |
1597 | impl_.offset(id_query) + |
1598 | ref.size(), op); |
1599 | normalize_path(); |
1600 | if(ref.has_query()) |
1601 | { |
1602 | set_encoded_query( |
1603 | ref.encoded_query()); |
1604 | } |
1605 | if(ref.has_fragment()) |
1606 | set_encoded_fragment( |
1607 | ref.encoded_fragment()); |
1608 | return {}; |
1609 | } |
1610 | if(ref.is_path_absolute()) |
1611 | { |
1612 | reserve_impl( |
1613 | impl_.offset(id_path) + |
1614 | ref.size(), op); |
1615 | set_encoded_path( |
1616 | ref.encoded_path()); |
1617 | normalize_path(); |
1618 | if(ref.has_query()) |
1619 | set_encoded_query( |
1620 | ref.encoded_query()); |
1621 | else |
1622 | remove_query(); |
1623 | if(ref.has_fragment()) |
1624 | set_encoded_fragment( |
1625 | ref.encoded_fragment()); |
1626 | else |
1627 | remove_fragment(); |
1628 | return {}; |
1629 | } |
1630 | // General case: ref is relative path |
1631 | reserve_impl( |
1632 | impl_.offset(id_query) + |
1633 | ref.size(), op); |
1634 | // 5.2.3. Merge Paths |
1635 | auto es = encoded_segments(); |
1636 | if(es.size() > 0) |
1637 | { |
1638 | es.pop_back(); |
1639 | } |
1640 | es.insert(before: es.end(), |
1641 | first: ref.encoded_segments().begin(), |
1642 | last: ref.encoded_segments().end()); |
1643 | normalize_path(); |
1644 | if(ref.has_query()) |
1645 | set_encoded_query( |
1646 | ref.encoded_query()); |
1647 | else |
1648 | remove_query(); |
1649 | if(ref.has_fragment()) |
1650 | set_encoded_fragment( |
1651 | ref.encoded_fragment()); |
1652 | else |
1653 | remove_fragment(); |
1654 | return {}; |
1655 | } |
1656 | |
1657 | //------------------------------------------------ |
1658 | // |
1659 | // Normalization |
1660 | // |
1661 | //------------------------------------------------ |
1662 | |
1663 | template <class Charset> |
1664 | void |
1665 | url_base:: |
1666 | normalize_octets_impl( |
1667 | int id, |
1668 | Charset const& allowed, |
1669 | op_t& op) noexcept |
1670 | { |
1671 | char* it = s_ + impl_.offset(id); |
1672 | char* end = s_ + impl_.offset(id + 1); |
1673 | char d = 0; |
1674 | char* dest = it; |
1675 | while (it < end) |
1676 | { |
1677 | if (*it != '%') |
1678 | { |
1679 | *dest = *it; |
1680 | ++it; |
1681 | ++dest; |
1682 | continue; |
1683 | } |
1684 | BOOST_ASSERT(end - it >= 3); |
1685 | |
1686 | // decode unreserved octets |
1687 | d = detail::decode_one(it: it + 1); |
1688 | if (allowed(d)) |
1689 | { |
1690 | *dest = d; |
1691 | it += 3; |
1692 | ++dest; |
1693 | continue; |
1694 | } |
1695 | |
1696 | // uppercase percent-encoding triplets |
1697 | *dest++ = '%'; |
1698 | ++it; |
1699 | *dest++ = grammar::to_upper(c: *it++); |
1700 | *dest++ = grammar::to_upper(c: *it++); |
1701 | } |
1702 | if (it != dest) |
1703 | { |
1704 | auto diff = it - dest; |
1705 | auto n = impl_.len(id) - diff; |
1706 | shrink_impl(id, n, op); |
1707 | s_[size()] = '\0'; |
1708 | } |
1709 | } |
1710 | |
1711 | url_base& |
1712 | url_base:: |
1713 | normalize_scheme() |
1714 | { |
1715 | to_lower_impl(id: id_scheme); |
1716 | return *this; |
1717 | } |
1718 | |
1719 | url_base& |
1720 | url_base:: |
1721 | normalize_authority() |
1722 | { |
1723 | op_t op(*this); |
1724 | |
1725 | // normalize host |
1726 | if (host_type() == urls::host_type::name) |
1727 | { |
1728 | normalize_octets_impl( |
1729 | id: id_host, |
1730 | allowed: detail::reg_name_chars, op); |
1731 | } |
1732 | decoded_to_lower_impl(id: id_host); |
1733 | |
1734 | // normalize password |
1735 | normalize_octets_impl(id: id_pass, allowed: detail::password_chars, op); |
1736 | |
1737 | // normalize user |
1738 | normalize_octets_impl(id: id_user, allowed: detail::user_chars, op); |
1739 | return *this; |
1740 | } |
1741 | |
1742 | url_base& |
1743 | url_base:: |
1744 | normalize_path() |
1745 | { |
1746 | op_t op(*this); |
1747 | normalize_octets_impl(id: id_path, allowed: detail::segment_chars, op); |
1748 | core::string_view p = impl_.get(id_path); |
1749 | char* p_dest = s_ + impl_.offset(id_path); |
1750 | char* p_end = s_ + impl_.offset(id_path + 1); |
1751 | auto pn = p.size(); |
1752 | auto skip_dot = 0; |
1753 | bool encode_colons = false; |
1754 | core::string_view first_seg; |
1755 | |
1756 | //------------------------------------------------ |
1757 | // |
1758 | // Determine unnecessary initial dot segments to skip and |
1759 | // if we need to encode colons in the first segment |
1760 | // |
1761 | if ( |
1762 | !has_authority() && |
1763 | p.starts_with(x: "/./" )) |
1764 | { |
1765 | // check if removing the "/./" would result in "//" |
1766 | // ex: "/.//", "/././/", "/././/", ... |
1767 | skip_dot = 2; |
1768 | while (p.substr(pos: skip_dot, n: 3).starts_with(x: "/./" )) |
1769 | skip_dot += 2; |
1770 | if (p.substr(pos: skip_dot).starts_with(x: "//" )) |
1771 | skip_dot = 2; |
1772 | else |
1773 | skip_dot = 0; |
1774 | } |
1775 | else if ( |
1776 | !has_scheme() && |
1777 | !has_authority()) |
1778 | { |
1779 | if (p.starts_with(x: "./" )) |
1780 | { |
1781 | // check if removing the "./" would result in "//" |
1782 | // ex: ".//", "././/", "././/", ... |
1783 | skip_dot = 1; |
1784 | while (p.substr(pos: skip_dot, n: 3).starts_with(x: "/./" )) |
1785 | skip_dot += 2; |
1786 | if (p.substr(pos: skip_dot).starts_with(x: "//" )) |
1787 | skip_dot = 2; |
1788 | else |
1789 | skip_dot = 0; |
1790 | |
1791 | if ( !skip_dot ) |
1792 | { |
1793 | // check if removing "./"s would leave us |
1794 | // a first segment with an ambiguous ":" |
1795 | first_seg = p.substr(pos: 2); |
1796 | while (first_seg.starts_with(x: "./" )) |
1797 | first_seg = first_seg.substr(pos: 2); |
1798 | auto i = first_seg.find(c: '/'); |
1799 | if (i != core::string_view::npos) |
1800 | first_seg = first_seg.substr(pos: 0, n: i); |
1801 | encode_colons = first_seg.contains(c: ':'); |
1802 | } |
1803 | } |
1804 | else |
1805 | { |
1806 | // check if normalize_octets_impl |
1807 | // didn't already create a ":" |
1808 | // in the first segment |
1809 | first_seg = p; |
1810 | auto i = first_seg.find(c: '/'); |
1811 | if (i != core::string_view::npos) |
1812 | first_seg = p.substr(pos: 0, n: i); |
1813 | encode_colons = first_seg.contains(c: ':'); |
1814 | } |
1815 | } |
1816 | |
1817 | //------------------------------------------------ |
1818 | // |
1819 | // Encode colons in the first segment |
1820 | // |
1821 | if (encode_colons) |
1822 | { |
1823 | // prepend with "./" |
1824 | // (resize_impl never throws) |
1825 | auto cn = |
1826 | std::count( |
1827 | first: first_seg.begin(), |
1828 | last: first_seg.end(), |
1829 | value: ':'); |
1830 | resize_impl( |
1831 | id_path, pn + (2 * cn), op); |
1832 | // move the 2nd, 3rd, ... segments |
1833 | auto begin = s_ + impl_.offset(id_path); |
1834 | auto it = begin; |
1835 | auto end = begin + pn; |
1836 | while (core::string_view(it, 2) == "./" ) |
1837 | it += 2; |
1838 | while (*it != '/' && |
1839 | it != end) |
1840 | ++it; |
1841 | // we don't need op here because this is |
1842 | // an internal operation |
1843 | std::memmove(dest: it + (2 * cn), src: it, n: end - it); |
1844 | |
1845 | // move 1st segment |
1846 | auto src = s_ + impl_.offset(id_path) + pn; |
1847 | auto dest = s_ + impl_.offset(id_query); |
1848 | src -= end - it; |
1849 | dest -= end - it; |
1850 | pn -= end - it; |
1851 | do { |
1852 | --src; |
1853 | --dest; |
1854 | if (*src != ':') |
1855 | { |
1856 | *dest = *src; |
1857 | } |
1858 | else |
1859 | { |
1860 | // use uppercase as required by |
1861 | // syntax-based normalization |
1862 | *dest-- = 'A'; |
1863 | *dest-- = '3'; |
1864 | *dest = '%'; |
1865 | } |
1866 | --pn; |
1867 | } while (pn); |
1868 | skip_dot = 0; |
1869 | p = impl_.get(id_path); |
1870 | pn = p.size(); |
1871 | p_dest = s_ + impl_.offset(id_path); |
1872 | p_end = s_ + impl_.offset(id_path + 1); |
1873 | } |
1874 | |
1875 | //------------------------------------------------ |
1876 | // |
1877 | // Remove "." and ".." segments |
1878 | // |
1879 | p.remove_prefix(n: skip_dot); |
1880 | p_dest += skip_dot; |
1881 | auto n = detail::remove_dot_segments( |
1882 | dest: p_dest, end: p_end, input: p); |
1883 | |
1884 | //------------------------------------------------ |
1885 | // |
1886 | // Update path parameters |
1887 | // |
1888 | if (n != pn) |
1889 | { |
1890 | BOOST_ASSERT(n < pn); |
1891 | shrink_impl(id_path, n + skip_dot, op); |
1892 | p = encoded_path(); |
1893 | if (p == "/" ) |
1894 | impl_.nseg_ = 0; |
1895 | else if (!p.empty()) |
1896 | impl_.nseg_ = std::count( |
1897 | first: p.begin() + 1, last: p.end(), value: '/') + 1; |
1898 | else |
1899 | impl_.nseg_ = 0; |
1900 | impl_.decoded_[id_path] = |
1901 | detail::decode_bytes_unsafe(s: impl_.get(id_path)); |
1902 | } |
1903 | return *this; |
1904 | } |
1905 | |
1906 | url_base& |
1907 | url_base:: |
1908 | normalize_query() |
1909 | { |
1910 | op_t op(*this); |
1911 | normalize_octets_impl( |
1912 | id: id_query, allowed: detail::query_chars, op); |
1913 | return *this; |
1914 | } |
1915 | |
1916 | url_base& |
1917 | url_base:: |
1918 | normalize_fragment() |
1919 | { |
1920 | op_t op(*this); |
1921 | normalize_octets_impl( |
1922 | id: id_frag, allowed: detail::fragment_chars, op); |
1923 | return *this; |
1924 | } |
1925 | |
1926 | url_base& |
1927 | url_base:: |
1928 | normalize() |
1929 | { |
1930 | normalize_fragment(); |
1931 | normalize_query(); |
1932 | normalize_path(); |
1933 | normalize_authority(); |
1934 | normalize_scheme(); |
1935 | return *this; |
1936 | } |
1937 | |
1938 | //------------------------------------------------ |
1939 | // |
1940 | // Implementation |
1941 | // |
1942 | //------------------------------------------------ |
1943 | |
1944 | void |
1945 | url_base:: |
1946 | check_invariants() const noexcept |
1947 | { |
1948 | BOOST_ASSERT(pi_); |
1949 | BOOST_ASSERT( |
1950 | impl_.len(id_scheme) == 0 || |
1951 | impl_.get(id_scheme).ends_with(':')); |
1952 | BOOST_ASSERT( |
1953 | impl_.len(id_user) == 0 || |
1954 | impl_.get(id_user).starts_with("//" )); |
1955 | BOOST_ASSERT( |
1956 | impl_.len(id_pass) == 0 || |
1957 | impl_.get(id_user).starts_with("//" )); |
1958 | BOOST_ASSERT( |
1959 | impl_.len(id_pass) == 0 || |
1960 | (impl_.len(id_pass) == 1 && |
1961 | impl_.get(id_pass) == "@" ) || |
1962 | (impl_.len(id_pass) > 1 && |
1963 | impl_.get(id_pass).starts_with(':') && |
1964 | impl_.get(id_pass).ends_with('@'))); |
1965 | BOOST_ASSERT( |
1966 | impl_.len(id_user, id_path) == 0 || |
1967 | impl_.get(id_user).starts_with("//" )); |
1968 | BOOST_ASSERT(impl_.decoded_[id_path] >= |
1969 | ((impl_.len(id_path) + 2) / 3)); |
1970 | BOOST_ASSERT( |
1971 | impl_.len(id_port) == 0 || |
1972 | impl_.get(id_port).starts_with(':')); |
1973 | BOOST_ASSERT( |
1974 | impl_.len(id_query) == 0 || |
1975 | impl_.get(id_query).starts_with('?')); |
1976 | BOOST_ASSERT( |
1977 | (impl_.len(id_query) == 0 && impl_.nparam_ == 0) || |
1978 | (impl_.len(id_query) > 0 && impl_.nparam_ > 0)); |
1979 | BOOST_ASSERT( |
1980 | impl_.len(id_frag) == 0 || |
1981 | impl_.get(id_frag).starts_with('#')); |
1982 | BOOST_ASSERT(c_str()[size()] == '\0'); |
1983 | } |
1984 | |
1985 | char* |
1986 | url_base:: |
1987 | resize_impl( |
1988 | int id, |
1989 | std::size_t new_size, |
1990 | op_t& op) |
1991 | { |
1992 | return resize_impl( |
1993 | id, id + 1, new_size, op); |
1994 | } |
1995 | |
1996 | char* |
1997 | url_base:: |
1998 | resize_impl( |
1999 | int first, |
2000 | int last, |
2001 | std::size_t new_len, |
2002 | op_t& op) |
2003 | { |
2004 | auto const n0 = impl_.len(first, last); |
2005 | if(new_len == 0 && n0 == 0) |
2006 | return s_ + impl_.offset(first); |
2007 | if(new_len <= n0) |
2008 | return shrink_impl( |
2009 | first, last, new_len, op); |
2010 | |
2011 | // growing |
2012 | std::size_t n = new_len - n0; |
2013 | reserve_impl(size() + n, op); |
2014 | auto const pos = |
2015 | impl_.offset(last); |
2016 | // adjust chars |
2017 | op.move( |
2018 | dest: s_ + pos + n, |
2019 | src: s_ + pos, |
2020 | n: impl_.offset(id_end) - |
2021 | pos + 1); |
2022 | // collapse (first, last) |
2023 | impl_.collapse(first, last, |
2024 | impl_.offset(last) + n); |
2025 | // shift (last, end) right |
2026 | impl_.adjust_right(first: last, last: id_end, n); |
2027 | s_[size()] = '\0'; |
2028 | return s_ + impl_.offset(first); |
2029 | } |
2030 | |
2031 | char* |
2032 | url_base:: |
2033 | shrink_impl( |
2034 | int id, |
2035 | std::size_t new_size, |
2036 | op_t& op) |
2037 | { |
2038 | return shrink_impl( |
2039 | id, id + 1, new_size, op); |
2040 | } |
2041 | |
2042 | char* |
2043 | url_base:: |
2044 | shrink_impl( |
2045 | int first, |
2046 | int last, |
2047 | std::size_t new_len, |
2048 | op_t& op) |
2049 | { |
2050 | // shrinking |
2051 | auto const n0 = impl_.len(first, last); |
2052 | BOOST_ASSERT(new_len <= n0); |
2053 | std::size_t n = n0 - new_len; |
2054 | auto const pos = |
2055 | impl_.offset(last); |
2056 | // adjust chars |
2057 | op.move( |
2058 | dest: s_ + pos - n, |
2059 | src: s_ + pos, |
2060 | n: impl_.offset( |
2061 | id_end) - pos + 1); |
2062 | // collapse (first, last) |
2063 | impl_.collapse(first, last, |
2064 | impl_.offset(last) - n); |
2065 | // shift (last, end) left |
2066 | impl_.adjust_left(first: last, last: id_end, n); |
2067 | s_[size()] = '\0'; |
2068 | return s_ + impl_.offset(first); |
2069 | } |
2070 | |
2071 | //------------------------------------------------ |
2072 | |
2073 | void |
2074 | url_base:: |
2075 | set_scheme_impl( |
2076 | core::string_view s, |
2077 | urls::scheme id) |
2078 | { |
2079 | op_t op(*this, &s); |
2080 | check_invariants(); |
2081 | grammar::parse( |
2082 | s, r: detail::scheme_rule() |
2083 | ).value(BOOST_URL_POS); |
2084 | auto const n = s.size(); |
2085 | auto const p = impl_.offset(id_path); |
2086 | |
2087 | // check for "./" prefix |
2088 | bool const has_dot = |
2089 | [this, p] |
2090 | { |
2091 | if(impl_.nseg_ == 0) |
2092 | return false; |
2093 | if(first_segment().size() < 2) |
2094 | return false; |
2095 | auto const src = s_ + p; |
2096 | if(src[0] != '.') |
2097 | return false; |
2098 | if(src[1] != '/') |
2099 | return false; |
2100 | return true; |
2101 | }(); |
2102 | |
2103 | // Remove "./" |
2104 | if(has_dot) |
2105 | { |
2106 | // do this first, for |
2107 | // strong exception safety |
2108 | reserve_impl( |
2109 | size() + n + 1 - 2, op); |
2110 | op.move( |
2111 | dest: s_ + p, |
2112 | src: s_ + p + 2, |
2113 | n: size() + 1 - |
2114 | (p + 2)); |
2115 | impl_.set_size( |
2116 | id_path, |
2117 | impl_.len(id_path) - 2); |
2118 | s_[size()] = '\0'; |
2119 | } |
2120 | |
2121 | auto dest = resize_impl( |
2122 | id: id_scheme, new_size: n + 1, op); |
2123 | s.copy(s: dest, n); |
2124 | dest[n] = ':'; |
2125 | impl_.scheme_ = id; |
2126 | check_invariants(); |
2127 | } |
2128 | |
2129 | char* |
2130 | url_base:: |
2131 | set_user_impl( |
2132 | std::size_t n, |
2133 | op_t& op) |
2134 | { |
2135 | check_invariants(); |
2136 | if(impl_.len(id_pass) != 0) |
2137 | { |
2138 | // keep "//" |
2139 | auto dest = resize_impl( |
2140 | id: id_user, new_size: 2 + n, op); |
2141 | check_invariants(); |
2142 | return dest + 2; |
2143 | } |
2144 | // add authority |
2145 | bool const make_absolute = |
2146 | !is_path_absolute() && |
2147 | !impl_.get(id_path).empty(); |
2148 | auto dest = resize_impl( |
2149 | id: id_user, new_size: 2 + n + 1 + make_absolute, op); |
2150 | impl_.split(id_user, 2 + n); |
2151 | dest[0] = '/'; |
2152 | dest[1] = '/'; |
2153 | dest[2 + n] = '@'; |
2154 | if (make_absolute) |
2155 | { |
2156 | impl_.split(id_pass, 1); |
2157 | impl_.split(id_host, 0); |
2158 | impl_.split(id_port, 0); |
2159 | dest[3 + n] = '/'; |
2160 | } |
2161 | check_invariants(); |
2162 | return dest + 2; |
2163 | } |
2164 | |
2165 | char* |
2166 | url_base:: |
2167 | set_password_impl( |
2168 | std::size_t n, |
2169 | op_t& op) |
2170 | { |
2171 | check_invariants(); |
2172 | if(impl_.len(id_user) != 0) |
2173 | { |
2174 | // already have authority |
2175 | auto const dest = resize_impl( |
2176 | id: id_pass, new_size: 1 + n + 1, op); |
2177 | dest[0] = ':'; |
2178 | dest[n + 1] = '@'; |
2179 | check_invariants(); |
2180 | return dest + 1; |
2181 | } |
2182 | // add authority |
2183 | bool const make_absolute = |
2184 | !is_path_absolute() && |
2185 | !impl_.get(id_path).empty(); |
2186 | auto const dest = |
2187 | resize_impl( |
2188 | first: id_user, last: id_host, |
2189 | new_len: 2 + 1 + n + 1 + make_absolute, op); |
2190 | impl_.split(id_user, 2); |
2191 | dest[0] = '/'; |
2192 | dest[1] = '/'; |
2193 | dest[2] = ':'; |
2194 | dest[2 + n + 1] = '@'; |
2195 | if (make_absolute) |
2196 | { |
2197 | impl_.split(id_pass, 2 + n); |
2198 | impl_.split(id_host, 0); |
2199 | impl_.split(id_port, 0); |
2200 | dest[4 + n] = '/'; |
2201 | } |
2202 | check_invariants(); |
2203 | return dest + 3; |
2204 | } |
2205 | |
2206 | char* |
2207 | url_base:: |
2208 | set_userinfo_impl( |
2209 | std::size_t n, |
2210 | op_t& op) |
2211 | { |
2212 | // "//" {dest} "@" |
2213 | check_invariants(); |
2214 | bool const make_absolute = |
2215 | !is_path_absolute() && |
2216 | !impl_.get(id_path).empty(); |
2217 | auto dest = resize_impl( |
2218 | first: id_user, last: id_host, new_len: n + 3 + make_absolute, op); |
2219 | impl_.split(id_user, n + 2); |
2220 | dest[0] = '/'; |
2221 | dest[1] = '/'; |
2222 | dest[n + 2] = '@'; |
2223 | if (make_absolute) |
2224 | { |
2225 | impl_.split(id_pass, 1); |
2226 | impl_.split(id_host, 0); |
2227 | impl_.split(id_port, 0); |
2228 | dest[3 + n] = '/'; |
2229 | } |
2230 | check_invariants(); |
2231 | return dest + 2; |
2232 | } |
2233 | |
2234 | char* |
2235 | url_base:: |
2236 | set_host_impl( |
2237 | std::size_t n, |
2238 | op_t& op) |
2239 | { |
2240 | check_invariants(); |
2241 | if(impl_.len(id_user) == 0) |
2242 | { |
2243 | // add authority |
2244 | bool make_absolute = |
2245 | !is_path_absolute() && |
2246 | impl_.len(id_path) != 0; |
2247 | auto pn = impl_.len(id_path); |
2248 | auto dest = resize_impl( |
2249 | id: id_user, new_size: n + 2 + make_absolute, op); |
2250 | impl_.split(id_user, 2); |
2251 | impl_.split(id_pass, 0); |
2252 | impl_.split(id_host, n); |
2253 | impl_.split(id_port, 0); |
2254 | impl_.split(id_path, pn + make_absolute); |
2255 | if (make_absolute) |
2256 | { |
2257 | dest[n + 2] = '/'; |
2258 | ++impl_.decoded_[id_path]; |
2259 | } |
2260 | dest[0] = '/'; |
2261 | dest[1] = '/'; |
2262 | check_invariants(); |
2263 | return dest + 2; |
2264 | } |
2265 | // already have authority |
2266 | auto const dest = resize_impl( |
2267 | id: id_host, new_size: n, op); |
2268 | check_invariants(); |
2269 | return dest; |
2270 | } |
2271 | |
2272 | char* |
2273 | url_base:: |
2274 | set_port_impl( |
2275 | std::size_t n, |
2276 | op_t& op) |
2277 | { |
2278 | check_invariants(); |
2279 | if(impl_.len(id_user) != 0) |
2280 | { |
2281 | // authority exists |
2282 | auto dest = resize_impl( |
2283 | id: id_port, new_size: n + 1, op); |
2284 | dest[0] = ':'; |
2285 | check_invariants(); |
2286 | return dest + 1; |
2287 | } |
2288 | bool make_absolute = |
2289 | !is_path_absolute() && |
2290 | impl_.len(id_path) != 0; |
2291 | auto dest = resize_impl( |
2292 | id: id_user, new_size: 3 + n + make_absolute, op); |
2293 | impl_.split(id_user, 2); |
2294 | impl_.split(id_pass, 0); |
2295 | impl_.split(id_host, 0); |
2296 | dest[0] = '/'; |
2297 | dest[1] = '/'; |
2298 | dest[2] = ':'; |
2299 | if (make_absolute) |
2300 | { |
2301 | impl_.split(id_port, n + 1); |
2302 | dest[n + 3] = '/'; |
2303 | ++impl_.decoded_[id_path]; |
2304 | } |
2305 | check_invariants(); |
2306 | return dest + 3; |
2307 | } |
2308 | |
2309 | char* |
2310 | url_base:: |
2311 | set_path_impl( |
2312 | std::size_t n, |
2313 | op_t& op) |
2314 | { |
2315 | check_invariants(); |
2316 | auto const dest = resize_impl( |
2317 | id: id_path, new_size: n, op); |
2318 | return dest; |
2319 | } |
2320 | |
2321 | |
2322 | //------------------------------------------------ |
2323 | |
2324 | // return the first segment of the path. |
2325 | // this is needed for some algorithms. |
2326 | core::string_view |
2327 | url_base:: |
2328 | first_segment() const noexcept |
2329 | { |
2330 | if(impl_.nseg_ == 0) |
2331 | return {}; |
2332 | auto const p0 = impl_.cs_ + |
2333 | impl_.offset(id_path) + |
2334 | detail::path_prefix( |
2335 | s: impl_.get(id_path)); |
2336 | auto const end = impl_.cs_ + |
2337 | impl_.offset(id_query); |
2338 | if(impl_.nseg_ == 1) |
2339 | return core::string_view( |
2340 | p0, end - p0); |
2341 | auto p = p0; |
2342 | while(*p != '/') |
2343 | ++p; |
2344 | BOOST_ASSERT(p < end); |
2345 | return core::string_view(p0, p - p0); |
2346 | } |
2347 | |
2348 | detail::segments_iter_impl |
2349 | url_base:: |
2350 | edit_segments( |
2351 | detail::segments_iter_impl const& it0, |
2352 | detail::segments_iter_impl const& it1, |
2353 | detail::any_segments_iter&& src, |
2354 | // -1 = preserve |
2355 | // 0 = make relative (can fail) |
2356 | // 1 = make absolute |
2357 | int absolute) |
2358 | { |
2359 | // Iterator doesn't belong to this url |
2360 | BOOST_ASSERT(it0.ref.alias_of(impl_)); |
2361 | |
2362 | // Iterator doesn't belong to this url |
2363 | BOOST_ASSERT(it1.ref.alias_of(impl_)); |
2364 | |
2365 | // Iterator is in the wrong order |
2366 | BOOST_ASSERT(it0.index <= it1.index); |
2367 | |
2368 | // Iterator is out of range |
2369 | BOOST_ASSERT(it0.index <= impl_.nseg_); |
2370 | BOOST_ASSERT(it0.pos <= impl_.len(id_path)); |
2371 | |
2372 | // Iterator is out of range |
2373 | BOOST_ASSERT(it1.index <= impl_.nseg_); |
2374 | BOOST_ASSERT(it1.pos <= impl_.len(id_path)); |
2375 | |
2376 | //------------------------------------------------ |
2377 | // |
2378 | // Calculate output prefix |
2379 | // |
2380 | // 0 = "" |
2381 | // 1 = "/" |
2382 | // 2 = "./" |
2383 | // 3 = "/./" |
2384 | // |
2385 | bool const is_abs = is_path_absolute(); |
2386 | if(has_authority()) |
2387 | { |
2388 | // Check if the new |
2389 | // path would be empty |
2390 | if( src.fast_nseg == 0 && |
2391 | it0.index == 0 && |
2392 | it1.index == impl_.nseg_) |
2393 | { |
2394 | // VFALCO we don't have |
2395 | // access to nchar this early |
2396 | // |
2397 | //BOOST_ASSERT(nchar == 0); |
2398 | absolute = 0; |
2399 | } |
2400 | else |
2401 | { |
2402 | // prefix "/" required |
2403 | absolute = 1; |
2404 | } |
2405 | } |
2406 | else if(absolute < 0) |
2407 | { |
2408 | absolute = is_abs; // preserve |
2409 | } |
2410 | auto const path_pos = impl_.offset(id_path); |
2411 | |
2412 | std::size_t nchar = 0; |
2413 | std::size_t prefix = 0; |
2414 | bool encode_colons = false; |
2415 | bool cp_src_prefix = false; |
2416 | if(it0.index > 0) |
2417 | { |
2418 | // first segment unchanged |
2419 | prefix = src.fast_nseg > 0; |
2420 | } |
2421 | else if(src.fast_nseg > 0) |
2422 | { |
2423 | // first segment from src |
2424 | if(! src.front.empty()) |
2425 | { |
2426 | if( src.front == "." && |
2427 | src.fast_nseg > 1) |
2428 | if (src.s.empty()) |
2429 | { |
2430 | // if front is ".", we need the extra "." in the prefix |
2431 | // which will maintain the invariant that segments represent |
2432 | // {"."} |
2433 | prefix = 2 + absolute; |
2434 | } |
2435 | else |
2436 | { |
2437 | // if the "." prefix is explicitly required from set_path |
2438 | // we do not include an extra "." segment |
2439 | prefix = absolute; |
2440 | cp_src_prefix = true; |
2441 | } |
2442 | else if(absolute) |
2443 | prefix = 1; |
2444 | else if(has_scheme() || |
2445 | ! src.front.contains(c: ':')) |
2446 | prefix = 0; |
2447 | else |
2448 | { |
2449 | prefix = 0; |
2450 | encode_colons = true; |
2451 | } |
2452 | } |
2453 | else |
2454 | { |
2455 | prefix = 2 + absolute; |
2456 | } |
2457 | } |
2458 | else |
2459 | { |
2460 | // first segment from it1 |
2461 | auto const p = |
2462 | impl_.cs_ + path_pos + it1.pos; |
2463 | switch(impl_.cs_ + |
2464 | impl_.offset(id_query) - p) |
2465 | { |
2466 | case 0: |
2467 | // points to end |
2468 | prefix = absolute; |
2469 | break; |
2470 | default: |
2471 | BOOST_ASSERT(*p == '/'); |
2472 | if(p[1] != '/') |
2473 | { |
2474 | if(absolute) |
2475 | prefix = 1; |
2476 | else if(has_scheme() || |
2477 | ! it1.dereference().contains(c: ':')) |
2478 | prefix = 0; |
2479 | else |
2480 | prefix = 2; |
2481 | break; |
2482 | } |
2483 | // empty |
2484 | BOOST_FALLTHROUGH; |
2485 | case 1: |
2486 | // empty |
2487 | BOOST_ASSERT(*p == '/'); |
2488 | prefix = 2 + absolute; |
2489 | break; |
2490 | } |
2491 | } |
2492 | |
2493 | // append '/' to new segs |
2494 | // if inserting at front. |
2495 | std::size_t const suffix = |
2496 | it1.index == 0 && |
2497 | impl_.nseg_ > 0 && |
2498 | src.fast_nseg > 0; |
2499 | |
2500 | //------------------------------------------------ |
2501 | // |
2502 | // Measure the number of encoded characters |
2503 | // of output, and the number of inserted |
2504 | // segments including internal separators. |
2505 | // |
2506 | src.encode_colons = encode_colons; |
2507 | std::size_t nseg = 0; |
2508 | if(src.measure(n&: nchar)) |
2509 | { |
2510 | src.encode_colons = false; |
2511 | for(;;) |
2512 | { |
2513 | ++nseg; |
2514 | if(! src.measure(n&: nchar)) |
2515 | break; |
2516 | ++nchar; |
2517 | } |
2518 | } |
2519 | |
2520 | switch(src.fast_nseg) |
2521 | { |
2522 | case 0: |
2523 | BOOST_ASSERT(nseg == 0); |
2524 | break; |
2525 | case 1: |
2526 | BOOST_ASSERT(nseg == 1); |
2527 | break; |
2528 | case 2: |
2529 | BOOST_ASSERT(nseg >= 2); |
2530 | break; |
2531 | } |
2532 | |
2533 | //------------------------------------------------ |
2534 | // |
2535 | // Calculate [pos0, pos1) to remove |
2536 | // |
2537 | auto pos0 = it0.pos; |
2538 | if(it0.index == 0) |
2539 | { |
2540 | // patch pos for prefix |
2541 | pos0 = 0; |
2542 | } |
2543 | auto pos1 = it1.pos; |
2544 | if(it1.index == 0) |
2545 | { |
2546 | // patch pos for prefix |
2547 | pos1 = detail::path_prefix( |
2548 | s: impl_.get(id_path)); |
2549 | } |
2550 | else if( |
2551 | it0.index == 0 && |
2552 | it1.index < impl_.nseg_ && |
2553 | nseg == 0) |
2554 | { |
2555 | // Remove the slash from segment it1 |
2556 | // if it is becoming the new first |
2557 | // segment. |
2558 | ++pos1; |
2559 | } |
2560 | // calc decoded size of old range |
2561 | auto const dn0 = |
2562 | detail::decode_bytes_unsafe( |
2563 | s: core::string_view( |
2564 | impl_.cs_ + |
2565 | impl_.offset(id_path) + |
2566 | pos0, |
2567 | pos1 - pos0)); |
2568 | |
2569 | //------------------------------------------------ |
2570 | // |
2571 | // Resize |
2572 | // |
2573 | op_t op(*this, &src.s); |
2574 | char* dest; |
2575 | char const* end; |
2576 | { |
2577 | auto const nremove = pos1 - pos0; |
2578 | // check overflow |
2579 | if( nchar <= max_size() && ( |
2580 | prefix + suffix <= |
2581 | max_size() - nchar)) |
2582 | { |
2583 | nchar = prefix + nchar + suffix; |
2584 | if( nchar <= nremove || |
2585 | nchar - nremove <= |
2586 | max_size() - size()) |
2587 | goto ok; |
2588 | } |
2589 | // too large |
2590 | detail::throw_length_error(); |
2591 | ok: |
2592 | auto const new_size = |
2593 | size() + nchar - nremove; |
2594 | reserve_impl(new_size, op); |
2595 | dest = s_ + path_pos + pos0; |
2596 | op.move( |
2597 | dest: dest + nchar, |
2598 | src: s_ + path_pos + pos1, |
2599 | n: size() - path_pos - pos1); |
2600 | impl_.set_size( |
2601 | id_path, |
2602 | impl_.len(id_path) + nchar - nremove); |
2603 | BOOST_ASSERT(size() == new_size); |
2604 | end = dest + nchar; |
2605 | impl_.nseg_ = impl_.nseg_ + nseg - ( |
2606 | it1.index - it0.index) - cp_src_prefix; |
2607 | if(s_) |
2608 | s_[size()] = '\0'; |
2609 | } |
2610 | |
2611 | //------------------------------------------------ |
2612 | // |
2613 | // Output segments and internal separators: |
2614 | // |
2615 | // prefix [ segment [ '/' segment ] ] suffix |
2616 | // |
2617 | auto const dest0 = dest; |
2618 | switch(prefix) |
2619 | { |
2620 | case 3: |
2621 | *dest++ = '/'; |
2622 | *dest++ = '.'; |
2623 | *dest++ = '/'; |
2624 | break; |
2625 | case 2: |
2626 | *dest++ = '.'; |
2627 | BOOST_FALLTHROUGH; |
2628 | case 1: |
2629 | *dest++ = '/'; |
2630 | break; |
2631 | default: |
2632 | break; |
2633 | } |
2634 | src.rewind(); |
2635 | if(nseg > 0) |
2636 | { |
2637 | src.encode_colons = encode_colons; |
2638 | for(;;) |
2639 | { |
2640 | src.copy(dest, end); |
2641 | if(--nseg == 0) |
2642 | break; |
2643 | *dest++ = '/'; |
2644 | src.encode_colons = false; |
2645 | } |
2646 | if(suffix) |
2647 | *dest++ = '/'; |
2648 | } |
2649 | BOOST_ASSERT(dest == dest0 + nchar); |
2650 | |
2651 | // calc decoded size of new range, |
2652 | auto const dn = |
2653 | detail::decode_bytes_unsafe( |
2654 | s: core::string_view(dest0, dest - dest0)); |
2655 | impl_.decoded_[id_path] += dn - dn0; |
2656 | |
2657 | return detail::segments_iter_impl( |
2658 | impl_, pos0, it0.index); |
2659 | } |
2660 | |
2661 | //------------------------------------------------ |
2662 | |
2663 | auto |
2664 | url_base:: |
2665 | edit_params( |
2666 | detail::params_iter_impl const& it0, |
2667 | detail::params_iter_impl const& it1, |
2668 | detail::any_params_iter&& src) -> |
2669 | detail::params_iter_impl |
2670 | { |
2671 | auto pos0 = impl_.offset(id_query); |
2672 | auto pos1 = pos0 + it1.pos; |
2673 | pos0 = pos0 + it0.pos; |
2674 | |
2675 | // Iterator doesn't belong to this url |
2676 | BOOST_ASSERT(it0.ref.alias_of(impl_)); |
2677 | |
2678 | // Iterator doesn't belong to this url |
2679 | BOOST_ASSERT(it1.ref.alias_of(impl_)); |
2680 | |
2681 | // Iterator is in the wrong order |
2682 | BOOST_ASSERT(it0.index <= it1.index); |
2683 | |
2684 | // Iterator is out of range |
2685 | BOOST_ASSERT(it0.index <= impl_.nparam_); |
2686 | BOOST_ASSERT(pos0 <= impl_.offset(id_frag)); |
2687 | |
2688 | // Iterator is out of range |
2689 | BOOST_ASSERT(it1.index <= impl_.nparam_); |
2690 | BOOST_ASSERT(pos1 <= impl_.offset(id_frag)); |
2691 | |
2692 | // calc decoded size of old range, |
2693 | // minus one if '?' or '&' prefixed |
2694 | auto const dn0 = |
2695 | detail::decode_bytes_unsafe( |
2696 | s: core::string_view( |
2697 | impl_.cs_ + pos0, |
2698 | pos1 - pos0)) - ( |
2699 | impl_.len(id_query) > 0); |
2700 | |
2701 | //------------------------------------------------ |
2702 | // |
2703 | // Measure the number of encoded characters |
2704 | // of output, and the number of inserted |
2705 | // segments including internal separators. |
2706 | // |
2707 | |
2708 | std::size_t nchar = 0; |
2709 | std::size_t nparam = 0; |
2710 | if(src.measure(n&: nchar)) |
2711 | { |
2712 | ++nchar; // for '?' or '&' |
2713 | for(;;) |
2714 | { |
2715 | ++nparam; |
2716 | if(! src.measure(n&: nchar)) |
2717 | break; |
2718 | ++nchar; // for '&' |
2719 | } |
2720 | } |
2721 | |
2722 | //------------------------------------------------ |
2723 | // |
2724 | // Resize |
2725 | // |
2726 | op_t op(*this, &src.s0, &src.s1); |
2727 | char* dest; |
2728 | char const* end; |
2729 | { |
2730 | auto const nremove = pos1 - pos0; |
2731 | // check overflow |
2732 | if( nchar > nremove && |
2733 | nchar - nremove > |
2734 | max_size() - size()) |
2735 | { |
2736 | // too large |
2737 | detail::throw_length_error(); |
2738 | } |
2739 | auto const nparam1 = |
2740 | impl_.nparam_ + nparam - ( |
2741 | it1.index - it0.index); |
2742 | reserve_impl(size() + nchar - nremove, op); |
2743 | dest = s_ + pos0; |
2744 | end = dest + nchar; |
2745 | if(impl_.nparam_ > 0) |
2746 | { |
2747 | // needed when we move |
2748 | // the beginning of the query |
2749 | s_[impl_.offset(id_query)] = '&'; |
2750 | } |
2751 | op.move( |
2752 | dest: dest + nchar, |
2753 | src: impl_.cs_ + pos1, |
2754 | n: size() - pos1); |
2755 | impl_.set_size( |
2756 | id_query, |
2757 | impl_.len(id_query) + |
2758 | nchar - nremove); |
2759 | impl_.nparam_ = nparam1; |
2760 | if(nparam1 > 0) |
2761 | { |
2762 | // needed when we erase |
2763 | // the beginning of the query |
2764 | s_[impl_.offset(id_query)] = '?'; |
2765 | } |
2766 | if(s_) |
2767 | s_[size()] = '\0'; |
2768 | } |
2769 | auto const dest0 = dest; |
2770 | |
2771 | //------------------------------------------------ |
2772 | // |
2773 | // Output params and internal separators: |
2774 | // |
2775 | // [ '?' param ] [ '&' param ] |
2776 | // |
2777 | if(nparam > 0) |
2778 | { |
2779 | if(it0.index == 0) |
2780 | *dest++ = '?'; |
2781 | else |
2782 | *dest++ = '&'; |
2783 | src.rewind(); |
2784 | for(;;) |
2785 | { |
2786 | src.copy(dest, end); |
2787 | if(--nparam == 0) |
2788 | break; |
2789 | *dest++ = '&'; |
2790 | } |
2791 | } |
2792 | |
2793 | // calc decoded size of new range, |
2794 | // minus one if '?' or '&' prefixed |
2795 | auto const dn = |
2796 | detail::decode_bytes_unsafe( |
2797 | s: core::string_view(dest0, dest - dest0)) - ( |
2798 | impl_.len(id_query) > 0); |
2799 | |
2800 | impl_.decoded_[id_query] += (dn - dn0); |
2801 | |
2802 | return detail::params_iter_impl( |
2803 | impl_, |
2804 | pos0 - impl_.offset_[id_query], |
2805 | it0.index); |
2806 | } |
2807 | |
2808 | //------------------------------------------------ |
2809 | |
2810 | void |
2811 | url_base:: |
2812 | decoded_to_lower_impl(int id) noexcept |
2813 | { |
2814 | char* it = s_ + impl_.offset(id); |
2815 | char const* const end = s_ + impl_.offset(id + 1); |
2816 | while(it < end) |
2817 | { |
2818 | if (*it != '%') |
2819 | { |
2820 | *it = grammar::to_lower( |
2821 | c: *it); |
2822 | ++it; |
2823 | continue; |
2824 | } |
2825 | it += 3; |
2826 | } |
2827 | } |
2828 | |
2829 | void |
2830 | url_base:: |
2831 | to_lower_impl(int id) noexcept |
2832 | { |
2833 | char* it = s_ + impl_.offset(id); |
2834 | char const* const end = s_ + impl_.offset(id + 1); |
2835 | while(it < end) |
2836 | { |
2837 | *it = grammar::to_lower( |
2838 | c: *it); |
2839 | ++it; |
2840 | } |
2841 | } |
2842 | |
2843 | } // urls |
2844 | } // boost |
2845 | |
2846 | #endif |
2847 | |