1//===-- Unittests for the CharacterConverter class (utf32 -> 8) -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "src/__support/common.h"
10#include "src/__support/wchar/character_converter.h"
11#include "src/__support/wchar/mbstate.h"
12
13#include "test/UnitTest/Test.h"
14
15TEST(LlvmLibcCharacterConverterUTF32To8Test, OneByte) {
16 LIBC_NAMESPACE::internal::mbstate state;
17 LIBC_NAMESPACE::internal::CharacterConverter cr(&state);
18 cr.clear();
19
20 // utf8 1-byte encodings are identical to their utf32 representations
21 char32_t utf32_A = 0x41; // 'A'
22 cr.push(utf32_A);
23 ASSERT_TRUE(cr.isFull());
24 auto popped = cr.pop_utf8();
25 ASSERT_TRUE(popped.has_value());
26 ASSERT_EQ(static_cast<char>(popped.value()), 'A');
27 ASSERT_TRUE(cr.isEmpty());
28
29 char32_t utf32_B = 0x42; // 'B'
30 cr.push(utf32_B);
31 ASSERT_TRUE(cr.isFull());
32 popped = cr.pop_utf8();
33 ASSERT_TRUE(popped.has_value());
34 ASSERT_EQ(static_cast<char>(popped.value()), 'B');
35 ASSERT_TRUE(cr.isEmpty());
36
37 // should error if we try to pop another utf8 byte out
38 popped = cr.pop_utf8();
39 ASSERT_FALSE(popped.has_value());
40}
41
42TEST(LlvmLibcCharacterConverterUTF32To8Test, TwoByte) {
43 LIBC_NAMESPACE::internal::mbstate state;
44 LIBC_NAMESPACE::internal::CharacterConverter cr(&state);
45 cr.clear();
46
47 // testing utf32: 0xff -> utf8: 0xc3 0xbf
48 char32_t utf32 = 0xff;
49 cr.push(utf32);
50 ASSERT_TRUE(cr.isFull());
51 auto popped = cr.pop_utf8();
52 ASSERT_TRUE(popped.has_value());
53 ASSERT_EQ(static_cast<int>(popped.value()), 0xc3);
54 ASSERT_TRUE(!cr.isEmpty());
55 popped = cr.pop_utf8();
56 ASSERT_TRUE(popped.has_value());
57 ASSERT_EQ(static_cast<int>(popped.value()), 0xbf);
58 ASSERT_TRUE(cr.isEmpty());
59
60 // testing utf32: 0x58e -> utf8: 0xd6 0x8e
61 utf32 = 0x58e;
62 cr.push(utf32);
63 ASSERT_TRUE(cr.isFull());
64 popped = cr.pop_utf8();
65 ASSERT_TRUE(popped.has_value());
66 ASSERT_EQ(static_cast<int>(popped.value()), 0xd6);
67 ASSERT_TRUE(!cr.isEmpty());
68 popped = cr.pop_utf8();
69 ASSERT_TRUE(popped.has_value());
70 ASSERT_EQ(static_cast<int>(popped.value()), 0x8e);
71 ASSERT_TRUE(cr.isEmpty());
72
73 // should error if we try to pop another utf8 byte out
74 popped = cr.pop_utf8();
75 ASSERT_FALSE(popped.has_value());
76}
77
78TEST(LlvmLibcCharacterConverterUTF32To8Test, ThreeByte) {
79 LIBC_NAMESPACE::internal::mbstate state;
80 LIBC_NAMESPACE::internal::CharacterConverter cr(&state);
81 cr.clear();
82
83 // testing utf32: 0xac15 -> utf8: 0xea 0xb0 0x95
84 char32_t utf32 = 0xac15;
85 cr.push(utf32);
86 ASSERT_TRUE(cr.isFull());
87 auto popped = cr.pop_utf8();
88 ASSERT_TRUE(popped.has_value());
89 ASSERT_EQ(static_cast<int>(popped.value()), 0xea);
90 ASSERT_TRUE(!cr.isEmpty());
91 popped = cr.pop_utf8();
92 ASSERT_TRUE(popped.has_value());
93 ASSERT_EQ(static_cast<int>(popped.value()), 0xb0);
94 ASSERT_TRUE(!cr.isEmpty());
95 popped = cr.pop_utf8();
96 ASSERT_TRUE(popped.has_value());
97 ASSERT_EQ(static_cast<int>(popped.value()), 0x95);
98 ASSERT_TRUE(cr.isEmpty());
99
100 // testing utf32: 0x267b -> utf8: 0xe2 0x99 0xbb
101 utf32 = 0x267b;
102 cr.push(utf32);
103 ASSERT_TRUE(cr.isFull());
104 popped = cr.pop_utf8();
105 ASSERT_TRUE(popped.has_value());
106 ASSERT_EQ(static_cast<int>(popped.value()), 0xe2);
107 ASSERT_TRUE(!cr.isEmpty());
108 popped = cr.pop_utf8();
109 ASSERT_TRUE(popped.has_value());
110 ASSERT_EQ(static_cast<int>(popped.value()), 0x99);
111 ASSERT_TRUE(!cr.isEmpty());
112 popped = cr.pop_utf8();
113 ASSERT_TRUE(popped.has_value());
114 ASSERT_EQ(static_cast<int>(popped.value()), 0xbb);
115 ASSERT_TRUE(cr.isEmpty());
116
117 // should error if we try to pop another utf8 byte out
118 popped = cr.pop_utf8();
119 ASSERT_FALSE(popped.has_value());
120}
121
122TEST(LlvmLibcCharacterConverterUTF32To8Test, FourByte) {
123 LIBC_NAMESPACE::internal::mbstate state;
124 LIBC_NAMESPACE::internal::CharacterConverter cr(&state);
125 cr.clear();
126
127 // testing utf32: 0x1f921 -> utf8: 0xf0 0x9f 0xa4 0xa1
128 char32_t utf32 = 0x1f921;
129 cr.push(utf32);
130 ASSERT_TRUE(cr.isFull());
131 auto popped = cr.pop_utf8();
132 ASSERT_TRUE(popped.has_value());
133 ASSERT_EQ(static_cast<int>(popped.value()), 0xf0);
134 ASSERT_TRUE(!cr.isEmpty());
135 popped = cr.pop_utf8();
136 ASSERT_TRUE(popped.has_value());
137 ASSERT_EQ(static_cast<int>(popped.value()), 0x9f);
138 ASSERT_TRUE(!cr.isEmpty());
139 popped = cr.pop_utf8();
140 ASSERT_TRUE(popped.has_value());
141 ASSERT_EQ(static_cast<int>(popped.value()), 0xa4);
142 ASSERT_TRUE(!cr.isEmpty());
143 popped = cr.pop_utf8();
144 ASSERT_TRUE(popped.has_value());
145 ASSERT_EQ(static_cast<int>(popped.value()), 0xa1);
146 ASSERT_TRUE(cr.isEmpty());
147
148 // testing utf32: 0x12121 -> utf8: 0xf0 0x92 0x84 0xa1
149 utf32 = 0x12121;
150 cr.push(utf32);
151 ASSERT_TRUE(cr.isFull());
152 popped = cr.pop_utf8();
153 ASSERT_TRUE(popped.has_value());
154 ASSERT_EQ(static_cast<int>(popped.value()), 0xf0);
155 ASSERT_TRUE(!cr.isEmpty());
156 popped = cr.pop_utf8();
157 ASSERT_TRUE(popped.has_value());
158 ASSERT_EQ(static_cast<int>(popped.value()), 0x92);
159 ASSERT_TRUE(!cr.isEmpty());
160 popped = cr.pop_utf8();
161 ASSERT_TRUE(popped.has_value());
162 ASSERT_EQ(static_cast<int>(popped.value()), 0x84);
163 ASSERT_TRUE(!cr.isEmpty());
164 popped = cr.pop_utf8();
165 ASSERT_TRUE(popped.has_value());
166 ASSERT_EQ(static_cast<int>(popped.value()), 0xa1);
167 ASSERT_TRUE(cr.isEmpty());
168
169 // should error if we try to pop another utf8 byte out
170 popped = cr.pop_utf8();
171 ASSERT_FALSE(popped.has_value());
172}
173
174TEST(LlvmLibcCharacterConverterUTF32To8Test, CantPushMidConversion) {
175 LIBC_NAMESPACE::internal::mbstate state;
176 LIBC_NAMESPACE::internal::CharacterConverter cr(&state);
177 cr.clear();
178
179 // testing utf32: 0x12121 -> utf8: 0xf0 0x92 0x84 0xa1
180 char32_t utf32 = 0x12121;
181 ASSERT_EQ(cr.push(utf32), 0);
182 auto popped = cr.pop_utf8();
183 ASSERT_TRUE(popped.has_value());
184
185 // can't push a utf32 without finishing popping the utf8 bytes out
186 int err = cr.push(utf32);
187 ASSERT_EQ(err, -1);
188}
189
190TEST(LlvmLibcCharacterConverterUTF32To8Test, InvalidState) {
191 LIBC_NAMESPACE::internal::mbstate s1;
192 LIBC_NAMESPACE::internal::CharacterConverter c1(&s1);
193 ASSERT_TRUE(c1.isValidState());
194
195 LIBC_NAMESPACE::internal::mbstate s2{0, 2, 0};
196 LIBC_NAMESPACE::internal::CharacterConverter c2(&s2);
197 ASSERT_FALSE(c2.isValidState());
198
199 LIBC_NAMESPACE::internal::mbstate s3{0x7f, 1, 1};
200 LIBC_NAMESPACE::internal::CharacterConverter c3(&s3);
201 ASSERT_TRUE(c3.isValidState());
202 LIBC_NAMESPACE::internal::mbstate s4{0x80, 1, 1};
203 LIBC_NAMESPACE::internal::CharacterConverter c4(&s4);
204 ASSERT_FALSE(c4.isValidState());
205
206 LIBC_NAMESPACE::internal::mbstate s5{0x7ff, 1, 2};
207 LIBC_NAMESPACE::internal::CharacterConverter c5(&s5);
208 ASSERT_TRUE(c5.isValidState());
209 LIBC_NAMESPACE::internal::mbstate s6{0x800, 1, 2};
210 LIBC_NAMESPACE::internal::CharacterConverter c6(&s6);
211 ASSERT_FALSE(c6.isValidState());
212
213 LIBC_NAMESPACE::internal::mbstate s7{0xffff, 1, 3};
214 LIBC_NAMESPACE::internal::CharacterConverter c7(&s7);
215 ASSERT_TRUE(c7.isValidState());
216 LIBC_NAMESPACE::internal::mbstate s8{0x10000, 1, 3};
217 LIBC_NAMESPACE::internal::CharacterConverter c8(&s8);
218 ASSERT_FALSE(c8.isValidState());
219
220 LIBC_NAMESPACE::internal::mbstate s9{0x10ffff, 1, 4};
221 LIBC_NAMESPACE::internal::CharacterConverter c9(&s9);
222 ASSERT_TRUE(c9.isValidState());
223 LIBC_NAMESPACE::internal::mbstate s10{0x110000, 1, 2};
224 LIBC_NAMESPACE::internal::CharacterConverter c10(&s10);
225 ASSERT_FALSE(c10.isValidState());
226
227 LIBC_NAMESPACE::internal::mbstate s11{0, 0, 5};
228 LIBC_NAMESPACE::internal::CharacterConverter c11(&s11);
229 ASSERT_FALSE(c11.isValidState());
230}
231

source code of libc/test/src/__support/wchar/utf32_to_8_test.cpp