1 | //===-- Integer Converter for printf ----------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H |
10 | #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H |
11 | |
12 | #include "src/__support/CPP/span.h" |
13 | #include "src/__support/CPP/string_view.h" |
14 | #include "src/__support/integer_to_string.h" |
15 | #include "src/stdio/printf_core/converter_utils.h" |
16 | #include "src/stdio/printf_core/core_structs.h" |
17 | #include "src/stdio/printf_core/writer.h" |
18 | |
19 | #include <inttypes.h> |
20 | #include <stddef.h> |
21 | |
22 | namespace LIBC_NAMESPACE { |
23 | namespace printf_core { |
24 | |
25 | // These functions only work on characters that are already known to be in the |
26 | // alphabet. Their behavior is undefined otherwise. |
27 | LIBC_INLINE constexpr char to_lower(char a) { return a | 32; } |
28 | LIBC_INLINE constexpr bool is_lower(char a) { return (a & 32) > 0; } |
29 | |
30 | namespace details { |
31 | |
32 | using HexFmt = IntegerToString<uintmax_t, radix::Hex>; |
33 | using HexFmtUppercase = IntegerToString<uintmax_t, radix::Hex::Uppercase>; |
34 | using OctFmt = IntegerToString<uintmax_t, radix::Oct>; |
35 | using DecFmt = IntegerToString<uintmax_t>; |
36 | using BinFmt = IntegerToString<uintmax_t, radix::Bin>; |
37 | |
38 | LIBC_INLINE constexpr size_t num_buf_size() { |
39 | cpp::array<size_t, 5> sizes{ |
40 | HexFmt::buffer_size(), HexFmtUppercase::buffer_size(), |
41 | OctFmt::buffer_size(), DecFmt::buffer_size(), BinFmt::buffer_size()}; |
42 | |
43 | auto result = sizes[0]; |
44 | for (size_t i = 1; i < sizes.size(); i++) |
45 | result = cpp::max(a: result, b: sizes[i]); |
46 | return result; |
47 | } |
48 | |
49 | LIBC_INLINE cpp::optional<cpp::string_view> |
50 | num_to_strview(uintmax_t num, cpp::span<char> bufref, char conv_name) { |
51 | if (to_lower(a: conv_name) == 'x') { |
52 | if (is_lower(a: conv_name)) |
53 | return HexFmt::format_to(buffer: bufref, value: num); |
54 | else |
55 | return HexFmtUppercase::format_to(buffer: bufref, value: num); |
56 | } else if (conv_name == 'o') { |
57 | return OctFmt::format_to(buffer: bufref, value: num); |
58 | } else if (to_lower(a: conv_name) == 'b') { |
59 | return BinFmt::format_to(buffer: bufref, value: num); |
60 | } else { |
61 | return DecFmt::format_to(buffer: bufref, value: num); |
62 | } |
63 | } |
64 | |
65 | } // namespace details |
66 | |
67 | LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) { |
68 | static constexpr size_t BITS_IN_BYTE = 8; |
69 | static constexpr size_t BITS_IN_NUM = sizeof(uintmax_t) * BITS_IN_BYTE; |
70 | |
71 | uintmax_t num = static_cast<uintmax_t>(to_conv.conv_val_raw); |
72 | bool is_negative = false; |
73 | FormatFlags flags = to_conv.flags; |
74 | const char a = is_lower(a: to_conv.conv_name) ? 'a' : 'A'; |
75 | |
76 | // If the conversion is signed, then handle negative values. |
77 | if (to_conv.conv_name == 'd' || to_conv.conv_name == 'i') { |
78 | // Check if the number is negative by checking the high bit. This works even |
79 | // for smaller numbers because they're sign extended by default. |
80 | if ((num & (uintmax_t(1) << (BITS_IN_NUM - 1))) > 0) { |
81 | is_negative = true; |
82 | num = -num; |
83 | } |
84 | } else { |
85 | // These flags are only for signed conversions, so this removes them if the |
86 | // conversion is unsigned. |
87 | flags = FormatFlags(flags & |
88 | ~(FormatFlags::FORCE_SIGN | FormatFlags::SPACE_PREFIX)); |
89 | } |
90 | |
91 | num = |
92 | apply_length_modifier(num, length_spec: {.lm: to_conv.length_modifier, .bit_width: to_conv.bit_width}); |
93 | cpp::array<char, details::num_buf_size()> buf; |
94 | auto str = details::num_to_strview(num, bufref: buf, conv_name: to_conv.conv_name); |
95 | if (!str) |
96 | return INT_CONVERSION_ERROR; |
97 | |
98 | size_t digits_written = str->size(); |
99 | |
100 | char sign_char = 0; |
101 | |
102 | if (is_negative) |
103 | sign_char = '-'; |
104 | else if ((flags & FormatFlags::FORCE_SIGN) == FormatFlags::FORCE_SIGN) |
105 | sign_char = '+'; // FORCE_SIGN has precedence over SPACE_PREFIX |
106 | else if ((flags & FormatFlags::SPACE_PREFIX) == FormatFlags::SPACE_PREFIX) |
107 | sign_char = ' '; |
108 | |
109 | // These are signed to prevent underflow due to negative values. The eventual |
110 | // values will always be non-negative. |
111 | int zeroes; |
112 | int spaces; |
113 | |
114 | // prefix is "0x" for hexadecimal, or the sign character for signed |
115 | // conversions. Since hexadecimal is unsigned these will never conflict. |
116 | size_t prefix_len; |
117 | char prefix[2]; |
118 | if ((to_lower(a: to_conv.conv_name) == 'x') && |
119 | ((flags & FormatFlags::ALTERNATE_FORM) != 0) && num != 0) { |
120 | prefix_len = 2; |
121 | prefix[0] = '0'; |
122 | prefix[1] = a + ('x' - 'a'); |
123 | } else if ((to_lower(a: to_conv.conv_name) == 'b') && |
124 | ((flags & FormatFlags::ALTERNATE_FORM) != 0) && num != 0) { |
125 | prefix_len = 2; |
126 | prefix[0] = '0'; |
127 | prefix[1] = a + ('b' - 'a'); |
128 | } else { |
129 | prefix_len = (sign_char == 0 ? 0 : 1); |
130 | prefix[0] = sign_char; |
131 | } |
132 | |
133 | // Negative precision indicates that it was not specified. |
134 | if (to_conv.precision < 0) { |
135 | if ((flags & (FormatFlags::LEADING_ZEROES | FormatFlags::LEFT_JUSTIFIED)) == |
136 | FormatFlags::LEADING_ZEROES) { |
137 | // If this conv has flag 0 but not - and no specified precision, it's |
138 | // padded with 0's instead of spaces identically to if precision = |
139 | // min_width - (1 if sign_char). For example: ("%+04d", 1) -> "+001" |
140 | zeroes = |
141 | static_cast<int>(to_conv.min_width - digits_written - prefix_len); |
142 | spaces = 0; |
143 | } else { |
144 | // If there are enough digits to pass over the precision, just write the |
145 | // number, padded by spaces. |
146 | zeroes = 0; |
147 | spaces = |
148 | static_cast<int>(to_conv.min_width - digits_written - prefix_len); |
149 | } |
150 | } else { |
151 | // If precision was specified, possibly write zeroes, and possibly write |
152 | // spaces. Example: ("%5.4d", 10000) -> "10000" |
153 | // If the check for if zeroes is negative was not there, spaces would be |
154 | // incorrectly evaluated as 1. |
155 | // |
156 | // The standard treats the case when num and precision are both zeroes as |
157 | // special - it requires that no characters are produced. So, we adjust for |
158 | // that special case first. |
159 | if (num == 0 && to_conv.precision == 0) |
160 | digits_written = 0; |
161 | zeroes = static_cast<int>(to_conv.precision - |
162 | digits_written); // a negative value means 0 |
163 | if (zeroes < 0) |
164 | zeroes = 0; |
165 | spaces = static_cast<int>(to_conv.min_width - zeroes - digits_written - |
166 | prefix_len); |
167 | } |
168 | |
169 | // The standard says that alternate form for the o conversion "increases |
170 | // the precision, if and only if necessary, to force the first digit of the |
171 | // result to be a zero (if the value and precision are both 0, a single 0 is |
172 | // printed)" |
173 | // This if checks the following conditions: |
174 | // 1) is this an o conversion in alternate form? |
175 | // 2) does this number has a leading zero? |
176 | // 2a) ... because there are additional leading zeroes? |
177 | // 2b) ... because it is just "0", unless it will not write any digits. |
178 | const bool has_leading_zero = |
179 | (zeroes > 0) || ((num == 0) && (digits_written != 0)); |
180 | if ((to_conv.conv_name == 'o') && |
181 | ((to_conv.flags & FormatFlags::ALTERNATE_FORM) != 0) && |
182 | !has_leading_zero) { |
183 | zeroes = 1; |
184 | --spaces; |
185 | } |
186 | |
187 | if ((flags & FormatFlags::LEFT_JUSTIFIED) == FormatFlags::LEFT_JUSTIFIED) { |
188 | // If left justified it goes prefix zeroes digits spaces |
189 | if (prefix_len != 0) |
190 | RET_IF_RESULT_NEGATIVE(writer->write({prefix, prefix_len})); |
191 | if (zeroes > 0) |
192 | RET_IF_RESULT_NEGATIVE(writer->write('0', zeroes)); |
193 | if (digits_written > 0) |
194 | RET_IF_RESULT_NEGATIVE(writer->write(*str)); |
195 | if (spaces > 0) |
196 | RET_IF_RESULT_NEGATIVE(writer->write(' ', spaces)); |
197 | } else { |
198 | // Else it goes spaces prefix zeroes digits |
199 | if (spaces > 0) |
200 | RET_IF_RESULT_NEGATIVE(writer->write(' ', spaces)); |
201 | if (prefix_len != 0) |
202 | RET_IF_RESULT_NEGATIVE(writer->write({prefix, prefix_len})); |
203 | if (zeroes > 0) |
204 | RET_IF_RESULT_NEGATIVE(writer->write('0', zeroes)); |
205 | if (digits_written > 0) |
206 | RET_IF_RESULT_NEGATIVE(writer->write(*str)); |
207 | } |
208 | return WRITE_OK; |
209 | } |
210 | |
211 | } // namespace printf_core |
212 | } // namespace LIBC_NAMESPACE |
213 | |
214 | #endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H |
215 | |