| 1 | //===----------------------------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include <cstddef> // size_t |
| 10 | #include <cwchar> // mbstate_t |
| 11 | #include <limits.h> // MB_LEN_MAX |
| 12 | #include <string.h> // wmemcpy |
| 13 | |
| 14 | // Returns the number of wide characters found in the multi byte sequence `src` |
| 15 | // (of `src_size_bytes`), that fit in the buffer `dst` (of `max_dest_chars` |
| 16 | // elements size). The count returned excludes the null terminator. |
| 17 | // When `dst` is NULL, no characters are copied to `dst`. |
| 18 | // Returns (size_t) -1 when an invalid sequence is encountered. |
| 19 | // Leaves *`src` pointing to the next character to convert or NULL |
| 20 | // if a null character was converted from *`src`. |
| 21 | _LIBCPP_EXPORTED_FROM_ABI size_t mbsnrtowcs( |
| 22 | wchar_t* __restrict dst, |
| 23 | const char** __restrict src, |
| 24 | size_t src_size_bytes, |
| 25 | size_t max_dest_chars, |
| 26 | mbstate_t* __restrict ps) { |
| 27 | const size_t terminated_sequence = static_cast<size_t>(0); |
| 28 | const size_t invalid_sequence = static_cast<size_t>(-1); |
| 29 | const size_t incomplete_sequence = static_cast<size_t>(-2); |
| 30 | |
| 31 | size_t source_converted; |
| 32 | size_t dest_converted; |
| 33 | size_t result = 0; |
| 34 | |
| 35 | // If `dst` is null then `max_dest_chars` should be ignored according to the |
| 36 | // standard. Setting `max_dest_chars` to a large value has this effect. |
| 37 | if (dst == nullptr) |
| 38 | max_dest_chars = static_cast<size_t>(-1); |
| 39 | |
| 40 | for (dest_converted = source_converted = 0; |
| 41 | source_converted < src_size_bytes && (!dst || dest_converted < max_dest_chars); |
| 42 | ++dest_converted, source_converted += result) { |
| 43 | // Converts one multi byte character. |
| 44 | // If result (char_size) is greater than 0, it's the size in bytes of that character. |
| 45 | // If result (char_size) is zero, it indicates that the null character has been found. |
| 46 | // Otherwise, it's an error and errno may be set. |
| 47 | size_t source_remaining = src_size_bytes - source_converted; |
| 48 | size_t dest_remaining = max_dest_chars - dest_converted; |
| 49 | |
| 50 | if (dst == nullptr) { |
| 51 | result = mbrtowc(nullptr, *src + source_converted, source_remaining, ps); |
| 52 | } else if (dest_remaining >= source_remaining) { |
| 53 | // dst has enough space to translate in-place. |
| 54 | result = mbrtowc(dst + dest_converted, *src + source_converted, source_remaining, ps); |
| 55 | } else { |
| 56 | /* |
| 57 | * dst may not have enough space, so use a temporary buffer. |
| 58 | * |
| 59 | * We need to save a copy of the conversion state |
| 60 | * here so we can restore it if the multibyte |
| 61 | * character is too long for the buffer. |
| 62 | */ |
| 63 | wchar_t buff[MB_LEN_MAX]; |
| 64 | mbstate_t mbstate_tmp; |
| 65 | |
| 66 | if (ps != nullptr) |
| 67 | mbstate_tmp = *ps; |
| 68 | result = mbrtowc(buff, *src + source_converted, source_remaining, ps); |
| 69 | |
| 70 | if (result > dest_remaining) { |
| 71 | // Multi-byte sequence for character won't fit. |
| 72 | if (ps != nullptr) |
| 73 | *ps = mbstate_tmp; |
| 74 | break; |
| 75 | } else { |
| 76 | // The buffer was used, so we need copy the translation to dst. |
| 77 | wmemcpy(dst, buff, result); |
| 78 | } |
| 79 | } |
| 80 | |
| 81 | // Don't do anything to change errno from here on. |
| 82 | if (result == invalid_sequence || result == terminated_sequence || result == incomplete_sequence) { |
| 83 | break; |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | if (dst) { |
| 88 | if (result == terminated_sequence) |
| 89 | *src = nullptr; |
| 90 | else |
| 91 | *src += source_converted; |
| 92 | } |
| 93 | if (result == invalid_sequence) |
| 94 | return invalid_sequence; |
| 95 | |
| 96 | return dest_converted; |
| 97 | } |
| 98 | |