1//===-- Unittests for mbrtowc ---------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "hdr/types/mbstate_t.h"
10#include "hdr/types/wchar_t.h"
11#include "src/__support/libc_errno.h"
12#include "src/__support/wchar/mbstate.h"
13#include "src/string/memset.h"
14#include "src/wchar/mbrtowc.h"
15#include "test/UnitTest/ErrnoCheckingTest.h"
16#include "test/UnitTest/Test.h"
17
18using LlvmLibcMBRToWCTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
19
20TEST_F(LlvmLibcMBRToWCTest, OneByte) {
21 const char *ch = "A";
22 wchar_t dest[2];
23 // Testing if it works with nullptr mbstate_t
24 mbstate_t *mb = nullptr;
25 size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 1, mb);
26 ASSERT_EQ(static_cast<char>(*dest), 'A');
27 ASSERT_EQ(static_cast<int>(n), 1);
28 ASSERT_ERRNO_SUCCESS();
29
30 // Should fail since we have not read enough
31 n = LIBC_NAMESPACE::mbrtowc(dest, ch, 0, mb);
32 ASSERT_EQ(static_cast<int>(n), -2);
33 ASSERT_ERRNO_SUCCESS();
34}
35
36TEST_F(LlvmLibcMBRToWCTest, TwoByte) {
37 const char ch[2] = {static_cast<char>(0xC2),
38 static_cast<char>(0x8E)}; // Ž car symbol
39 wchar_t dest[2];
40 mbstate_t *mb;
41 LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
42 size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 2, mb);
43 ASSERT_EQ(static_cast<int>(*dest), 142);
44 ASSERT_EQ(static_cast<int>(n), 2);
45 ASSERT_ERRNO_SUCCESS();
46
47 // Should fail since we have not read enough
48 n = LIBC_NAMESPACE::mbrtowc(dest, ch, 1, mb);
49 ASSERT_EQ(static_cast<int>(n), -2);
50 // Should pass after reading one more byte
51 n = LIBC_NAMESPACE::mbrtowc(dest, ch + 1, 1, mb);
52 ASSERT_EQ(static_cast<int>(n), 1);
53 ASSERT_EQ(static_cast<int>(*dest), 142);
54 ASSERT_ERRNO_SUCCESS();
55}
56
57TEST_F(LlvmLibcMBRToWCTest, ThreeByte) {
58 const char ch[3] = {static_cast<char>(0xE2), static_cast<char>(0x88),
59 static_cast<char>(0x91)}; // ∑ sigma symbol
60 wchar_t dest[2];
61 mbstate_t *mb;
62 LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
63 size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 3, mb);
64 ASSERT_EQ(static_cast<int>(*dest), 8721);
65 ASSERT_EQ(static_cast<int>(n), 3);
66 ASSERT_ERRNO_SUCCESS();
67
68 // Should fail since we have not read enough
69 n = LIBC_NAMESPACE::mbrtowc(dest, ch, 1, mb);
70 ASSERT_EQ(static_cast<int>(n), -2);
71 ASSERT_ERRNO_SUCCESS();
72 // Should pass after reading two more bytes
73 n = LIBC_NAMESPACE::mbrtowc(dest, ch + 1, 2, mb);
74 ASSERT_EQ(static_cast<int>(n), 2);
75 ASSERT_EQ(static_cast<int>(*dest), 8721);
76 ASSERT_ERRNO_SUCCESS();
77}
78
79TEST_F(LlvmLibcMBRToWCTest, FourByte) {
80 const char ch[4] = {static_cast<char>(0xF0), static_cast<char>(0x9F),
81 static_cast<char>(0xA4),
82 static_cast<char>(0xA1)}; // 🤡 clown emoji
83 wchar_t dest[2];
84 mbstate_t *mb;
85 LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
86 size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 4, mb);
87 ASSERT_EQ(static_cast<int>(*dest), 129313);
88 ASSERT_EQ(static_cast<int>(n), 4);
89 ASSERT_ERRNO_SUCCESS();
90 // Should fail since we have not read enough
91 n = LIBC_NAMESPACE::mbrtowc(dest, ch, 2, mb);
92 ASSERT_EQ(static_cast<int>(n), -2);
93 ASSERT_ERRNO_SUCCESS();
94 // Should pass after reading two more bytes
95 n = LIBC_NAMESPACE::mbrtowc(dest, ch + 2, 2, mb);
96 ASSERT_EQ(static_cast<int>(n), 2);
97 ASSERT_EQ(static_cast<int>(*dest), 129313);
98 ASSERT_ERRNO_SUCCESS();
99}
100
101TEST_F(LlvmLibcMBRToWCTest, InvalidByte) {
102 const char ch[1] = {static_cast<char>(0x80)};
103 wchar_t dest[2];
104 mbstate_t *mb;
105 LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
106 size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 1, mb);
107 ASSERT_EQ(static_cast<int>(n), -1);
108 ASSERT_ERRNO_EQ(EILSEQ);
109}
110
111TEST_F(LlvmLibcMBRToWCTest, InvalidMultiByte) {
112 const char ch[4] = {static_cast<char>(0x80), static_cast<char>(0x00),
113 static_cast<char>(0x80),
114 static_cast<char>(0x00)}; // invalid sequence of bytes
115 wchar_t dest[2];
116 mbstate_t *mb;
117 LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
118 // Trying to push all 4 should error
119 size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 4, mb);
120 ASSERT_EQ(static_cast<int>(n), -1);
121 ASSERT_ERRNO_EQ(EILSEQ);
122 // Trying to push just the first one should error
123 n = LIBC_NAMESPACE::mbrtowc(dest, ch, 1, mb);
124 ASSERT_EQ(static_cast<int>(n), -1);
125 ASSERT_ERRNO_EQ(EILSEQ);
126 // Trying to push the second and third should correspond to null wc
127 n = LIBC_NAMESPACE::mbrtowc(dest, ch + 1, 2, mb);
128 ASSERT_EQ(static_cast<int>(n), 0);
129 ASSERT_TRUE(*dest == L'\0');
130 ASSERT_ERRNO_SUCCESS();
131}
132
133TEST_F(LlvmLibcMBRToWCTest, InvalidLastByte) {
134 // Last byte is invalid since it does not have correct starting sequence.
135 // 0xC0 --> 11000000 starting sequence should be 10xxxxxx
136 const char ch[4] = {static_cast<char>(0xF1), static_cast<char>(0x80),
137 static_cast<char>(0x80), static_cast<char>(0xC0)};
138 wchar_t dest[2];
139 mbstate_t *mb;
140 LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
141 // Trying to push all 4 should error
142 size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 4, mb);
143 ASSERT_EQ(static_cast<int>(n), -1);
144 ASSERT_ERRNO_EQ(EILSEQ);
145}
146
147TEST_F(LlvmLibcMBRToWCTest, ValidTwoByteWithExtraRead) {
148 const char ch[3] = {static_cast<char>(0xC2), static_cast<char>(0x8E),
149 static_cast<char>(0x80)};
150 wchar_t dest[2];
151 mbstate_t *mb;
152 LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
153 // Trying to push all 3 should return valid 2 byte
154 size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 3, mb);
155 ASSERT_EQ(static_cast<int>(n), 2);
156 ASSERT_EQ(static_cast<int>(*dest), 142);
157 ASSERT_ERRNO_SUCCESS();
158}
159
160TEST_F(LlvmLibcMBRToWCTest, TwoValidTwoBytes) {
161 const char ch[4] = {static_cast<char>(0xC2), static_cast<char>(0x8E),
162 static_cast<char>(0xC7), static_cast<char>(0x8C)};
163 wchar_t dest[2];
164 mbstate_t *mb;
165 LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
166 // mbstate should reset after reading first one
167 size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 2, mb);
168 ASSERT_EQ(static_cast<int>(n), 2);
169 ASSERT_EQ(static_cast<int>(*dest), 142);
170 ASSERT_ERRNO_SUCCESS();
171 n = LIBC_NAMESPACE::mbrtowc(dest + 1, ch + 2, 2, mb);
172 ASSERT_EQ(static_cast<int>(n), 2);
173 ASSERT_EQ(static_cast<int>(*(dest + 1)), 460);
174 ASSERT_ERRNO_SUCCESS();
175}
176
177TEST_F(LlvmLibcMBRToWCTest, NullString) {
178 wchar_t dest[2] = {L'O', L'K'};
179 mbstate_t *mb;
180 LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
181 // reading on nullptr should return 0
182 size_t n = LIBC_NAMESPACE::mbrtowc(dest, nullptr, 2, mb);
183 ASSERT_EQ(static_cast<int>(n), 0);
184 ASSERT_TRUE(dest[0] == L'O');
185 ASSERT_ERRNO_SUCCESS();
186 // reading a null terminator should return 0
187 const char *ch = "\0";
188 n = LIBC_NAMESPACE::mbrtowc(dest, ch, 1, mb);
189 ASSERT_EQ(static_cast<int>(n), 0);
190 ASSERT_ERRNO_SUCCESS();
191}
192
193TEST_F(LlvmLibcMBRToWCTest, InvalidMBState) {
194 const char ch[4] = {static_cast<char>(0xC2), static_cast<char>(0x8E),
195 static_cast<char>(0xC7), static_cast<char>(0x8C)};
196 wchar_t dest[2] = {L'O', L'K'};
197 mbstate_t *mb;
198 LIBC_NAMESPACE::internal::mbstate inv;
199 inv.total_bytes = 6;
200 mb = reinterpret_cast<mbstate_t *>(&inv);
201 // invalid mbstate should error
202 size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 2, mb);
203 ASSERT_EQ(static_cast<int>(n), -1);
204 ASSERT_ERRNO_EQ(EINVAL);
205}
206

source code of libc/test/src/wchar/mbrtowc_test.cpp