| 1 | //===----------------------------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include <cstdarg> // va_start, va_end |
| 10 | #include <cstddef> // size_t |
| 11 | #include <cstdio> // vsprintf, vsnprintf |
| 12 | #include <cstdlib> // malloc |
| 13 | #include <cstring> // strcpy, wcsncpy |
| 14 | #include <cwchar> // mbstate_t |
| 15 | |
| 16 | // Returns >= 0: the number of wide characters found in the |
| 17 | // multi byte sequence src (of src_size_bytes), that fit in the buffer dst |
| 18 | // (of max_dest_chars elements size). The count returned excludes the |
| 19 | // null terminator. When dst is NULL, no characters are copied |
| 20 | // and no "out" parameters are updated. |
| 21 | // Returns (size_t) -1: an incomplete sequence encountered. |
| 22 | // Leaves *src pointing the next character to convert or NULL |
| 23 | // if a null character was converted from *src. |
| 24 | size_t mbsnrtowcs(wchar_t* __restrict dst, |
| 25 | const char** __restrict src, |
| 26 | size_t src_size_bytes, |
| 27 | size_t max_dest_chars, |
| 28 | mbstate_t* __restrict ps) { |
| 29 | const size_t terminated_sequence = static_cast<size_t>(0); |
| 30 | // const size_t invalid_sequence = static_cast<size_t>(-1); |
| 31 | const size_t incomplete_sequence = static_cast< size_t>(-2); |
| 32 | |
| 33 | size_t dest_converted = 0; |
| 34 | size_t source_converted = 0; |
| 35 | size_t source_remaining = src_size_bytes; |
| 36 | size_t result = 0; |
| 37 | bool have_result = false; |
| 38 | |
| 39 | // If dst is null then max_dest_chars should be ignored according to the |
| 40 | // standard. Setting max_dest_chars to a large value has this effect. |
| 41 | if (!dst) |
| 42 | max_dest_chars = static_cast<size_t>(-1); |
| 43 | |
| 44 | while (source_remaining) { |
| 45 | if (dst && dest_converted >= max_dest_chars) |
| 46 | break; |
| 47 | // Converts one multi byte character. |
| 48 | // if result > 0, it's the size in bytes of that character. |
| 49 | // othewise if result is zero it indicates the null character has been found. |
| 50 | // otherwise it's an error and errno may be set. |
| 51 | size_t char_size = mbrtowc(pwc: dst ? dst + dest_converted : nullptr, s: *src + source_converted, n: source_remaining, p: ps); |
| 52 | // Don't do anything to change errno from here on. |
| 53 | if (char_size > 0) { |
| 54 | source_remaining -= char_size; |
| 55 | source_converted += char_size; |
| 56 | ++dest_converted; |
| 57 | continue; |
| 58 | } |
| 59 | result = char_size; |
| 60 | have_result = true; |
| 61 | break; |
| 62 | } |
| 63 | if (dst) { |
| 64 | if (have_result && result == terminated_sequence) |
| 65 | *src = nullptr; |
| 66 | else |
| 67 | *src += source_converted; |
| 68 | } |
| 69 | if (have_result && result != terminated_sequence && result != incomplete_sequence) |
| 70 | return static_cast<size_t>(-1); |
| 71 | |
| 72 | return dest_converted; |
| 73 | } |
| 74 | |
| 75 | // Converts max_source_chars from the wide character buffer pointer to by *src, |
| 76 | // into the multi byte character sequence buffer stored at dst which must be |
| 77 | // dst_size_bytes bytes in size. |
| 78 | // Returns >= 0: the number of bytes in the sequence |
| 79 | // converted from *src, excluding the null terminator. |
| 80 | // Returns size_t(-1) if an error occurs, also sets errno. |
| 81 | // If dst is NULL dst_size_bytes is ignored and no bytes are copied to dst |
| 82 | // and no "out" parameters are updated. |
| 83 | size_t wcsnrtombs(char* __restrict dst, |
| 84 | const wchar_t** __restrict src, |
| 85 | size_t max_source_chars, |
| 86 | size_t dst_size_bytes, |
| 87 | mbstate_t* __restrict ps) { |
| 88 | // const size_t invalid_sequence = static_cast<size_t>(-1); |
| 89 | |
| 90 | size_t source_converted = 0; |
| 91 | size_t dest_converted = 0; |
| 92 | size_t dest_remaining = dst_size_bytes; |
| 93 | size_t char_size = 0; |
| 94 | const errno_t no_error = (errno_t)0; |
| 95 | errno_t result = (errno_t)0; |
| 96 | bool have_result = false; |
| 97 | bool terminator_found = false; |
| 98 | |
| 99 | // If dst is null then dst_size_bytes should be ignored according to the |
| 100 | // standard. Setting dest_remaining to a large value has this effect. |
| 101 | if (!dst) |
| 102 | dest_remaining = static_cast<size_t>(-1); |
| 103 | |
| 104 | while (source_converted != max_source_chars) { |
| 105 | if (!dest_remaining) |
| 106 | break; |
| 107 | wchar_t c = (*src)[source_converted]; |
| 108 | if (dst) |
| 109 | result = wcrtomb_s(&char_size, dst + dest_converted, dest_remaining, c, ps); |
| 110 | else |
| 111 | result = wcrtomb_s(&char_size, nullptr, 0, c, ps); |
| 112 | // If result is zero there is no error and char_size contains the |
| 113 | // size of the multi-byte-sequence converted. |
| 114 | // Otherwise result indicates an errno type error. |
| 115 | if (result == no_error) { |
| 116 | if (c == L'\0') { |
| 117 | terminator_found = true; |
| 118 | break; |
| 119 | } |
| 120 | ++source_converted; |
| 121 | if (dst) |
| 122 | dest_remaining -= char_size; |
| 123 | dest_converted += char_size; |
| 124 | continue; |
| 125 | } |
| 126 | have_result = true; |
| 127 | break; |
| 128 | } |
| 129 | if (dst) { |
| 130 | if (terminator_found) |
| 131 | *src = nullptr; |
| 132 | else |
| 133 | *src = *src + source_converted; |
| 134 | } |
| 135 | if (have_result && result != no_error) { |
| 136 | errno = result; |
| 137 | return static_cast<size_t>(-1); |
| 138 | } |
| 139 | |
| 140 | return dest_converted; |
| 141 | } |
| 142 | |