unicodeobject.h source code [include/python3.10/cpython/unicodeobject.h]

1	#ifndef Py_CPYTHON_UNICODEOBJECT_H
2	# error "this header file must not be included directly"
3	#endif
4
5	/ Py_UNICODE was the native Unicode storage format (code unit) used by*
6	Python and represents a single Unicode element in the Unicode type.
7	With PEP 393, Py_UNICODE is deprecated and replaced with a
8	typedef to wchar_t. /*
9	#define PY_UNICODE_TYPE wchar_t
10	/ Py_DEPRECATED(3.3) / typedef wchar_t Py_UNICODE;
11
12	/ --- Internal Unicode Operations ---------------------------------------- /
13
14	#ifndef USE_UNICODE_WCHAR_CACHE
15	# define USE_UNICODE_WCHAR_CACHE 1
16	#endif /* USE_UNICODE_WCHAR_CACHE */
17
18	/ Since splitting on whitespace is an important use case, and*
19	whitespace in most situations is solely ASCII whitespace, we
20	optimize for the common case by using a quick look-up table
21	_Py_ascii_whitespace (see below) with an inlined check.
22
23	*/
24	#define Py_UNICODE_ISSPACE(ch) \
25	((Py_UCS4)(ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
26
27	#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
28	#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
29	#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
30	#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
31
32	#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
33	#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
34	#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
35
36	#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
37	#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
38	#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
39	#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
40
41	#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
42	#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
43	#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
44
45	#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
46
47	#define Py_UNICODE_ISALNUM(ch) \
48	(Py_UNICODE_ISALPHA(ch) \|\| \
49	Py_UNICODE_ISDECIMAL(ch) \|\| \
50	Py_UNICODE_ISDIGIT(ch) \|\| \
51	Py_UNICODE_ISNUMERIC(ch))
52
53	Py_DEPRECATED(`3.3`) static inline void
54	Py_UNICODE_COPY(Py_UNICODE target, const* Py_UNICODE *source, Py_ssize_t length) {
55	memcpy(dest: target, src: source, n: (size_t)(length) * sizeof(Py_UNICODE));
56	}
57
58	Py_DEPRECATED(`3.3`) static inline void
59	Py_UNICODE_FILL(Py_UNICODE *target, Py_UNICODE value, Py_ssize_t length) {
60	Py_ssize_t i;
61	for (i = `0`; i < length; i++) {
62	target[i] = value;
63	}
64	}
65
66	/ macros to work with surrogates /
67	#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
68	#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF)
69	#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF)
70	/ Join two surrogate characters and return a single Py_UCS4 value. /
71	#define Py_UNICODE_JOIN_SURROGATES(high, low) \
72	(((((Py_UCS4)(high) & 0x03FF) << 10) \| \
73	((Py_UCS4)(low) & 0x03FF)) + 0x10000)
74	/ high surrogate = top 10 bits added to D800 /
75	#define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10))
76	/ low surrogate = bottom 10 bits added to DC00 /
77	#define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
78
79	/ --- Unicode Type ------------------------------------------------------- /
80
81	/ ASCII-only strings created through PyUnicode_New use the PyASCIIObject*
82	structure. state.ascii and state.compact are set, and the data
83	immediately follow the structure. utf8_length and wstr_length can be found
84	in the length field; the utf8 pointer is equal to the data pointer. /*
85	typedef struct {
86	/ There are 4 forms of Unicode strings:*
87
88	- compact ascii:
89
90	* structure = PyASCIIObject
91	* test: PyUnicode_IS_COMPACT_ASCII(op)
92	* kind = PyUnicode_1BYTE_KIND
93	* compact = 1
94	* ascii = 1
95	* ready = 1
96	* (length is the length of the utf8 and wstr strings)
97	* (data starts just after the structure)
98	* (since ASCII is decoded from UTF-8, the utf8 string are the data)
99
100	- compact:
101
102	* structure = PyCompactUnicodeObject
103	* test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op)
104	* kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
105	PyUnicode_4BYTE_KIND
106	* compact = 1
107	* ready = 1
108	* ascii = 0
109	* utf8 is not shared with data
110	* utf8_length = 0 if utf8 is NULL
111	* wstr is shared with data and wstr_length=length
112	if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
113	or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
114	* wstr_length = 0 if wstr is NULL
115	* (data starts just after the structure)
116
117	- legacy string, not ready:
118
119	* structure = PyUnicodeObject
120	* test: kind == PyUnicode_WCHAR_KIND
121	* length = 0 (use wstr_length)
122	* hash = -1
123	* kind = PyUnicode_WCHAR_KIND
124	* compact = 0
125	* ascii = 0
126	* ready = 0
127	* interned = SSTATE_NOT_INTERNED
128	* wstr is not NULL
129	* data.any is NULL
130	* utf8 is NULL
131	* utf8_length = 0
132
133	- legacy string, ready:
134
135	* structure = PyUnicodeObject structure
136	* test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
137	* kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
138	PyUnicode_4BYTE_KIND
139	* compact = 0
140	* ready = 1
141	* data.any is not NULL
142	* utf8 is shared and utf8_length = length with data.any if ascii = 1
143	* utf8_length = 0 if utf8 is NULL
144	* wstr is shared with data.any and wstr_length = length
145	if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
146	or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
147	* wstr_length = 0 if wstr is NULL
148
149	Compact strings use only one memory block (structure + characters),
150	whereas legacy strings use one block for the structure and one block
151	for characters.
152
153	Legacy strings are created by PyUnicode_FromUnicode() and
154	PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
155	when PyUnicode_READY() is called.
156
157	See also _PyUnicode_CheckConsistency().
158	*/
159	PyObject_HEAD
160	Py_ssize_t length; / Number of code points in the string /
161	Py_hash_t hash; / Hash value; -1 if not set /
162	struct {
163	/*
164	SSTATE_NOT_INTERNED (0)
165	SSTATE_INTERNED_MORTAL (1)
166	SSTATE_INTERNED_IMMORTAL (2)
167
168	If interned != SSTATE_NOT_INTERNED, the two references from the
169	dictionary to this object are not* counted in ob_refcnt.*
170	*/
171	unsigned int interned:`2`;
172	/ Character size:*
173
174	- PyUnicode_WCHAR_KIND (0):
175
176	* character type = wchar_t (16 or 32 bits, depending on the
177	platform)
178
179	- PyUnicode_1BYTE_KIND (1):
180
181	* character type = Py_UCS1 (8 bits, unsigned)
182	* all characters are in the range U+0000-U+00FF (latin1)
183	* if ascii is set, all characters are in the range U+0000-U+007F
184	(ASCII), otherwise at least one character is in the range
185	U+0080-U+00FF
186
187	- PyUnicode_2BYTE_KIND (2):
188
189	* character type = Py_UCS2 (16 bits, unsigned)
190	* all characters are in the range U+0000-U+FFFF (BMP)
191	* at least one character is in the range U+0100-U+FFFF
192
193	- PyUnicode_4BYTE_KIND (4):
194
195	* character type = Py_UCS4 (32 bits, unsigned)
196	* all characters are in the range U+0000-U+10FFFF
197	* at least one character is in the range U+10000-U+10FFFF
198	*/
199	unsigned int kind:`3`;
200	/ Compact is with respect to the allocation scheme. Compact unicode*
201	objects only require one memory block while non-compact objects use
202	one block for the PyUnicodeObject struct and another for its data
203	buffer. /*
204	unsigned int compact:`1`;
205	/ The string only contains characters in the range U+0000-U+007F (ASCII)*
206	and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
207	set, use the PyASCIIObject structure. /*
208	unsigned int ascii:`1`;
209	/ The ready flag indicates whether the object layout is initialized*
210	completely. This means that this is either a compact object, or
211	the data pointer is filled out. The bit is redundant, and helps
212	to minimize the test in PyUnicode_IS_READY(). /*
213	unsigned int ready:`1`;
214	/ Padding to ensure that PyUnicode_DATA() is always aligned to*
215	4 bytes (see issue #19537 on m68k). /*
216	unsigned int :`24`;
217	} state;
218	wchar_t wstr; /* wchar_t representation (null-terminated) /
219	} PyASCIIObject;
220
221	/ Non-ASCII strings allocated through PyUnicode_New use the*
222	PyCompactUnicodeObject structure. state.compact is set, and the data
223	immediately follow the structure. /*
224	typedef struct {
225	PyASCIIObject _base;
226	Py_ssize_t utf8_length; / Number of bytes in utf8, excluding the*
227	* terminating \0. */
228	char utf8; /* UTF-8 representation (null-terminated) /
229	Py_ssize_t wstr_length; / Number of code points in wstr, possible*
230	* surrogates count as two code points. */
231	} PyCompactUnicodeObject;
232
233	/ Strings allocated through PyUnicode_FromUnicode(NULL, len) use the*
234	PyUnicodeObject structure. The actual string data is initially in the wstr
235	block, and copied into the data block using _PyUnicode_Ready. /*
236	typedef struct {
237	PyCompactUnicodeObject _base;
238	union {
239	void *any;
240	Py_UCS1 *latin1;
241	Py_UCS2 *ucs2;
242	Py_UCS4 *ucs4;
243	} data; / Canonical, smallest-form Unicode buffer /
244	} PyUnicodeObject;
245
246	PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
247	PyObject *op,
248	int check_content);
249
250	/ Fast access macros /
251
252	/ Returns the deprecated Py_UNICODE representation's size in code units*
253	(this includes surrogate pairs as 2 units).
254	If the Py_UNICODE representation is not available, it will be computed
255	on request. Use PyUnicode_GET_LENGTH() for the length in code points. /*
256
257	/ Py_DEPRECATED(3.3) /
258	#define PyUnicode_GET_SIZE(op) \
259	(assert(PyUnicode_Check(op)), \
260	(((PyASCIIObject *)(op))->wstr) ? \
261	PyUnicode_WSTR_LENGTH(op) : \
262	((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\
263	assert(((PyASCIIObject *)(op))->wstr), \
264	PyUnicode_WSTR_LENGTH(op)))
265
266	/ Py_DEPRECATED(3.3) /
267	#define PyUnicode_GET_DATA_SIZE(op) \
268	(PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
269
270	/ Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE*
271	representation on demand. Using this macro is very inefficient now,
272	try to port your code to use the new PyUnicode_BYTE_DATA() macros or*
273	use PyUnicode_WRITE() and PyUnicode_READ(). /*
274
275	/ Py_DEPRECATED(3.3) /
276	#define PyUnicode_AS_UNICODE(op) \
277	(assert(PyUnicode_Check(op)), \
278	(((PyASCIIObject )(op))->wstr) ? (((PyASCIIObject )(op))->wstr) : \
279	PyUnicode_AsUnicode(_PyObject_CAST(op)))
280
281	/ Py_DEPRECATED(3.3) /
282	#define PyUnicode_AS_DATA(op) \
283	((const char *)(PyUnicode_AS_UNICODE(op)))
284
285
286	/ --- Flexible String Representation Helper Macros (PEP 393) -------------- /
287
288	/ Values for PyASCIIObject.state: /
289
290	/ Interning state. /
291	#define SSTATE_NOT_INTERNED 0
292	#define SSTATE_INTERNED_MORTAL 1
293	#define SSTATE_INTERNED_IMMORTAL 2
294
295	/ Return true if the string contains only ASCII characters, or 0 if not. The*
296	string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
297	ready. /*
298	#define PyUnicode_IS_ASCII(op) \
299	(assert(PyUnicode_Check(op)), \
300	assert(PyUnicode_IS_READY(op)), \
301	((PyASCIIObject*)op)->state.ascii)
302
303	/ Return true if the string is compact or 0 if not.*
304	No type checks or Ready calls are performed. /*
305	#define PyUnicode_IS_COMPACT(op) \
306	(((PyASCIIObject*)(op))->state.compact)
307
308	/ Return true if the string is a compact ASCII string (use PyASCIIObject*
309	structure), or 0 if not. No type checks or Ready calls are performed. /*
310	#define PyUnicode_IS_COMPACT_ASCII(op) \
311	(((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op))
312
313	enum PyUnicode_Kind {
314	/ String contains only wstr byte characters. This is only possible*
315	when the string was created with a legacy API and _PyUnicode_Ready()
316	has not been called yet. /*
317	PyUnicode_WCHAR_KIND = `0`,
318	/ Return values of the PyUnicode_KIND() macro: /
319	PyUnicode_1BYTE_KIND = `1`,
320	PyUnicode_2BYTE_KIND = `2`,
321	PyUnicode_4BYTE_KIND = `4`
322	};
323
324	/ Return pointers to the canonical representation cast to unsigned char,*
325	Py_UCS2, or Py_UCS4 for direct character access.
326	No checks are performed, use PyUnicode_KIND() before to ensure
327	these will work correctly. /*
328
329	#define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
330	#define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
331	#define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
332
333	/ Return one of the PyUnicode__KIND values defined above. /*
334	#define PyUnicode_KIND(op) \
335	(assert(PyUnicode_Check(op)), \
336	assert(PyUnicode_IS_READY(op)), \
337	((PyASCIIObject *)(op))->state.kind)
338
339	/ Return a void pointer to the raw unicode buffer. /
340	#define _PyUnicode_COMPACT_DATA(op) \
341	(PyUnicode_IS_ASCII(op) ? \
342	((void)((PyASCIIObject)(op) + 1)) : \
343	((void)((PyCompactUnicodeObject)(op) + 1)))
344
345	#define _PyUnicode_NONCOMPACT_DATA(op) \
346	(assert(((PyUnicodeObject*)(op))->data.any), \
347	((((PyUnicodeObject *)(op))->data.any)))
348
349	#define PyUnicode_DATA(op) \
350	(assert(PyUnicode_Check(op)), \
351	PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \
352	_PyUnicode_NONCOMPACT_DATA(op))
353
354	/ In the access macros below, "kind" may be evaluated more than once.*
355	All other macro parameters are evaluated exactly once, so it is safe
356	to put side effects into them (such as increasing the index). /*
357
358	/ Write into the canonical representation, this macro does not do any sanity*
359	checks and is intended for usage in loops. The caller should cache the
360	kind and data pointers obtained from other macro calls.
361	index is the index in the string (starts at 0) and value is the new
362	code point value which should be written to that location. /*
363	#define PyUnicode_WRITE(kind, data, index, value) \
364	do { \
365	switch ((kind)) { \
366	case PyUnicode_1BYTE_KIND: { \
367	((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
368	break; \
369	} \
370	case PyUnicode_2BYTE_KIND: { \
371	((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
372	break; \
373	} \
374	default: { \
375	assert((kind) == PyUnicode_4BYTE_KIND); \
376	((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
377	} \
378	} \
379	} while (0)
380
381	/ Read a code point from the string's canonical representation. No checks*
382	or ready calls are performed. /*
383	#define PyUnicode_READ(kind, data, index) \
384	((Py_UCS4) \
385	((kind) == PyUnicode_1BYTE_KIND ? \
386	((const Py_UCS1 *)(data))[(index)] : \
387	((kind) == PyUnicode_2BYTE_KIND ? \
388	((const Py_UCS2 *)(data))[(index)] : \
389	((const Py_UCS4 *)(data))[(index)] \
390	) \
391	))
392
393	/ PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it*
394	calls PyUnicode_KIND() and might call it twice. For single reads, use
395	PyUnicode_READ_CHAR, for multiple consecutive reads callers should
396	cache kind and use PyUnicode_READ instead. /*
397	#define PyUnicode_READ_CHAR(unicode, index) \
398	(assert(PyUnicode_Check(unicode)), \
399	assert(PyUnicode_IS_READY(unicode)), \
400	(Py_UCS4) \
401	(PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
402	((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
403	(PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
404	((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
405	((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
406	) \
407	))
408
409	/ Returns the length of the unicode string. The caller has to make sure that*
410	the string has it's canonical representation set before calling
411	this macro. Call PyUnicode_(FAST_)Ready to ensure that. /*
412	#define PyUnicode_GET_LENGTH(op) \
413	(assert(PyUnicode_Check(op)), \
414	assert(PyUnicode_IS_READY(op)), \
415	((PyASCIIObject *)(op))->length)
416
417
418	/ Fast check to determine whether an object is ready. Equivalent to*
419	PyUnicode_IS_COMPACT(op) \|\| ((PyUnicodeObject)(op))->data.any /
420
421	#define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
422
423	/ PyUnicode_READY() does less work than _PyUnicode_Ready() in the best*
424	case. If the canonical representation is not yet set, it will still call
425	_PyUnicode_Ready().
426	Returns 0 on success and -1 on errors. /*
427	#define PyUnicode_READY(op) \
428	(assert(PyUnicode_Check(op)), \
429	(PyUnicode_IS_READY(op) ? \
430	0 : _PyUnicode_Ready(_PyObject_CAST(op))))
431
432	/ Return a maximum character value which is suitable for creating another*
433	string based on op. This is always an approximation but more efficient
434	than iterating over the string. /*
435	#define PyUnicode_MAX_CHAR_VALUE(op) \
436	(assert(PyUnicode_IS_READY(op)), \
437	(PyUnicode_IS_ASCII(op) ? \
438	(0x7f) : \
439	(PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ? \
440	(0xffU) : \
441	(PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ? \
442	(0xffffU) : \
443	(0x10ffffU)))))
444
445	Py_DEPRECATED(`3.3`)
446	static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {
447	return PyUnicode_IS_COMPACT_ASCII(op) ?
448	((PyASCIIObject*)op)->length :
449	((PyCompactUnicodeObject*)op)->wstr_length;
450	}
451	#define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)
452
453	/ === Public API ========================================================= /
454
455	/ --- Plain Py_UNICODE --------------------------------------------------- /
456
457	/ With PEP 393, this is the recommended way to allocate a new unicode object.*
458	This function will allocate the object and its buffer in a single memory
459	block. Objects created using this function are not resizable. /*
460	PyAPI_FUNC(PyObject*) PyUnicode_New(
461	Py_ssize_t size, / Number of code points in the new string /
462	Py_UCS4 maxchar / maximum code point value in the string /
463	);
464
465	/ Initializes the canonical string representation from the deprecated*
466	wstr/Py_UNICODE representation. This function is used to convert Unicode
467	objects which were created using the old API to the new flexible format
468	introduced with PEP 393.
469
470	Don't call this function directly, use the public PyUnicode_READY() macro
471	instead. /*
472	PyAPI_FUNC(int) _PyUnicode_Ready(
473	PyObject unicode /* Unicode object /
474	);
475
476	/ Get a copy of a Unicode string. /
477	PyAPI_FUNC(PyObject*) _PyUnicode_Copy(
478	PyObject *unicode
479	);
480
481	/ Copy character from one unicode object into another, this function performs*
482	character conversion when necessary and falls back to memcpy() if possible.
483
484	Fail if to is too small (smaller than how_many* or smaller than*
485	len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
486	kind(to), or if to* has more than 1 reference.*
487
488	Return the number of written character, or return -1 and raise an exception
489	on error.
490
491	Pseudo-code:
492
493	how_many = min(how_many, len(from) - from_start)
494	to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
495	return how_many
496
497	Note: The function doesn't write a terminating null character.
498	*/
499	PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
500	PyObject *to,
501	Py_ssize_t to_start,
502	PyObject *from,
503	Py_ssize_t from_start,
504	Py_ssize_t how_many
505	);
506
507	/ Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so*
508	may crash if parameters are invalid (e.g. if the output string
509	is too short). /*
510	PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
511	PyObject *to,
512	Py_ssize_t to_start,
513	PyObject *from,
514	Py_ssize_t from_start,
515	Py_ssize_t how_many
516	);
517
518	/ Fill a string with a character: write fill_char into*
519	unicode[start:start+length].
520
521	Fail if fill_char is bigger than the string maximum character, or if the
522	string has more than 1 reference.
523
524	Return the number of written character, or return -1 and raise an exception
525	on error. /*
526	PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
527	PyObject *unicode,
528	Py_ssize_t start,
529	Py_ssize_t length,
530	Py_UCS4 fill_char
531	);
532
533	/ Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash*
534	if parameters are invalid (e.g. if length is longer than the string). /*
535	PyAPI_FUNC(void) _PyUnicode_FastFill(
536	PyObject *unicode,
537	Py_ssize_t start,
538	Py_ssize_t length,
539	Py_UCS4 fill_char
540	);
541
542	/ Create a Unicode Object from the Py_UNICODE buffer u of the given*
543	size.
544
545	u may be NULL which causes the contents to be undefined. It is the
546	user's responsibility to fill in the needed data afterwards. Note
547	that modifying the Unicode object contents after construction is
548	only allowed if u was set to NULL.
549
550	The buffer is copied into the new object. /*
551	Py_DEPRECATED(`3.3`) PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
552	const Py_UNICODE u, /* Unicode buffer /
553	Py_ssize_t size / size of buffer /
554	);
555
556	/ Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.*
557	Scan the string to find the maximum character. /*
558	PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
559	int kind,
560	const void *buffer,
561	Py_ssize_t size);
562
563	/ Create a new string from a buffer of ASCII characters.*
564	WARNING: Don't check if the string contains any non-ASCII character. /*
565	PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
566	const char *buffer,
567	Py_ssize_t size);
568
569	/ Compute the maximum character of the substring unicode[start:end].*
570	Return 127 for an empty string. /*
571	PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
572	PyObject *unicode,
573	Py_ssize_t start,
574	Py_ssize_t end);
575
576	/ Return a read-only pointer to the Unicode object's internal*
577	Py_UNICODE buffer.
578	If the wchar_t/Py_UNICODE representation is not yet available, this
579	function will calculate it. /*
580	Py_DEPRECATED(`3.3`) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
581	PyObject unicode /* Unicode object /
582	);
583
584	/ Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string*
585	contains null characters. /*
586	PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
587	PyObject unicode /* Unicode object /
588	);
589
590	/ Return a read-only pointer to the Unicode object's internal*
591	Py_UNICODE buffer and save the length at size.
592	If the wchar_t/Py_UNICODE representation is not yet available, this
593	function will calculate it. /*
594
595	Py_DEPRECATED(`3.3`) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
596	PyObject unicode, /* Unicode object /
597	Py_ssize_t size /* location where to save the length /
598	);
599
600
601	/ --- _PyUnicodeWriter API ----------------------------------------------- /
602
603	typedef struct {
604	PyObject *buffer;
605	void *data;
606	enum PyUnicode_Kind kind;
607	Py_UCS4 maxchar;
608	Py_ssize_t size;
609	Py_ssize_t pos;
610
611	/ minimum number of allocated characters (default: 0) /
612	Py_ssize_t min_length;
613
614	/ minimum character (default: 127, ASCII) /
615	Py_UCS4 min_char;
616
617	/ If non-zero, overallocate the buffer (default: 0). /
618	unsigned char overallocate;
619
620	/ If readonly is 1, buffer is a shared string (cannot be modified)*
621	and size is set to 0. /*
622	unsigned char readonly;
623	} _PyUnicodeWriter ;
624
625	/ Initialize a Unicode writer.*
626	*
627	* By default, the minimum buffer size is 0 character and overallocation is
628	* disabled. Set min_length, min_char and overallocate attributes to control
629	* the allocation of the buffer. */
630	PyAPI_FUNC(void)
631	_PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
632
633	/ Prepare the buffer to write 'length' characters*
634	with the specified maximum character.
635
636	Return 0 on success, raise an exception and return -1 on error. /*
637	#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
638	(((MAXCHAR) <= (WRITER)->maxchar \
639	&& (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
640	? 0 \
641	: (((LENGTH) == 0) \
642	? 0 \
643	: _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
644
645	/ Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro*
646	instead. /*
647	PyAPI_FUNC(int)
648	_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
649	Py_ssize_t length, Py_UCS4 maxchar);
650
651	/ Prepare the buffer to have at least the kind KIND.*
652	For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
653	support characters in range U+000-U+FFFF.
654
655	Return 0 on success, raise an exception and return -1 on error. /*
656	#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \
657	(assert((KIND) != PyUnicode_WCHAR_KIND), \
658	(KIND) <= (WRITER)->kind \
659	? 0 \
660	: _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
661
662	/ Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()*
663	macro instead. /*
664	PyAPI_FUNC(int)
665	_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
666	enum PyUnicode_Kind kind);
667
668	/ Append a Unicode character.*
669	Return 0 on success, raise an exception and return -1 on error. /*
670	PyAPI_FUNC(int)
671	_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
672	Py_UCS4 ch
673	);
674
675	/ Append a Unicode string.*
676	Return 0 on success, raise an exception and return -1 on error. /*
677	PyAPI_FUNC(int)
678	_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
679	PyObject str /* Unicode string /
680	);
681
682	/ Append a substring of a Unicode string.*
683	Return 0 on success, raise an exception and return -1 on error. /*
684	PyAPI_FUNC(int)
685	_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
686	PyObject str, /* Unicode string /
687	Py_ssize_t start,
688	Py_ssize_t end
689	);
690
691	/ Append an ASCII-encoded byte string.*
692	Return 0 on success, raise an exception and return -1 on error. /*
693	PyAPI_FUNC(int)
694	_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
695	const char str, /* ASCII-encoded byte string /
696	Py_ssize_t len / number of bytes, or -1 if unknown /
697	);
698
699	/ Append a latin1-encoded byte string.*
700	Return 0 on success, raise an exception and return -1 on error. /*
701	PyAPI_FUNC(int)
702	_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
703	const char str, /* latin1-encoded byte string /
704	Py_ssize_t len / length in bytes /
705	);
706
707	/ Get the value of the writer as a Unicode string. Clear the*
708	buffer of the writer. Raise an exception and return NULL
709	on error. /*
710	PyAPI_FUNC(PyObject *)
711	_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
712
713	/ Deallocate memory of a writer (clear its internal buffer). /
714	PyAPI_FUNC(void)
715	_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
716
717
718	/ Format the object based on the format_spec, as defined in PEP 3101*
719	(Advanced String Formatting). /*
720	PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
721	_PyUnicodeWriter *writer,
722	PyObject *obj,
723	PyObject *format_spec,
724	Py_ssize_t start,
725	Py_ssize_t end);
726
727	/ --- Manage the default encoding ---------------------------------------- /
728
729	/ Returns a pointer to the default encoding (UTF-8) of the*
730	Unicode object unicode.
731
732	Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
733	in the unicodeobject.
734
735	_PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
736	support the previous internal function with the same behaviour.
737
738	Use of this API is DEPRECATED since no size information can be
739	extracted from the returned data.
740	*/
741
742	PyAPI_FUNC(const char ) PyUnicode_AsUTF8(PyObject unicode);
743
744	#define _PyUnicode_AsString PyUnicode_AsUTF8
745
746	/ --- Generic Codecs ----------------------------------------------------- /
747
748	/ Encodes a Py_UNICODE buffer of the given size and returns a*
749	Python string object. /*
750	Py_DEPRECATED(`3.3`) PyAPI_FUNC(PyObject*) PyUnicode_Encode(
751	const Py_UNICODE s, /* Unicode char buffer /
752	Py_ssize_t size, / number of Py_UNICODE chars to encode /
753	const char encoding, /* encoding /
754	const char errors /* error handling /
755	);
756
757	/ --- UTF-7 Codecs ------------------------------------------------------- /
758
759	Py_DEPRECATED(`3.3`) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
760	const Py_UNICODE data, /* Unicode char buffer /
761	Py_ssize_t length, / number of Py_UNICODE chars to encode /
762	int base64SetO, / Encode RFC2152 Set O characters in base64 /
763	int base64WhiteSpace, / Encode whitespace (sp, ht, nl, cr) in base64 /
764	const char errors /* error handling /
765	);
766
767	PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
768	PyObject unicode, /* Unicode object /
769	int base64SetO, / Encode RFC2152 Set O characters in base64 /
770	int base64WhiteSpace, / Encode whitespace (sp, ht, nl, cr) in base64 /
771	const char errors /* error handling /
772	);
773
774	/ --- UTF-8 Codecs ------------------------------------------------------- /
775
776	PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
777	PyObject *unicode,
778	const char *errors);
779
780	Py_DEPRECATED(`3.3`) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
781	const Py_UNICODE data, /* Unicode char buffer /
782	Py_ssize_t length, / number of Py_UNICODE chars to encode /
783	const char errors /* error handling /
784	);
785
786	/ --- UTF-32 Codecs ------------------------------------------------------ /
787
788	Py_DEPRECATED(`3.3`) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
789	const Py_UNICODE data, /* Unicode char buffer /
790	Py_ssize_t length, / number of Py_UNICODE chars to encode /
791	const char errors, /* error handling /
792	int byteorder / byteorder to use 0=BOM+native;-1=LE,1=BE /
793	);
794
795	PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
796	PyObject object, /* Unicode object /
797	const char errors, /* error handling /
798	int byteorder / byteorder to use 0=BOM+native;-1=LE,1=BE /
799	);
800
801	/ --- UTF-16 Codecs ------------------------------------------------------ /
802
803	/ Returns a Python string object holding the UTF-16 encoded value of*
804	the Unicode data.
805
806	If byteorder is not 0, output is written according to the following
807	byte order:
808
809	byteorder == -1: little endian
810	byteorder == 0: native byte order (writes a BOM mark)
811	byteorder == 1: big endian
812
813	If byteorder is 0, the output string will always start with the
814	Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
815	prepended.
816
817	Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
818	UCS-2. This trick makes it possible to add full UTF-16 capabilities
819	at a later point without compromising the APIs.
820
821	*/
822	Py_DEPRECATED(`3.3`) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
823	const Py_UNICODE data, /* Unicode char buffer /
824	Py_ssize_t length, / number of Py_UNICODE chars to encode /
825	const char errors, /* error handling /
826	int byteorder / byteorder to use 0=BOM+native;-1=LE,1=BE /
827	);
828
829	PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
830	PyObject* unicode, / Unicode object /
831	const char errors, /* error handling /
832	int byteorder / byteorder to use 0=BOM+native;-1=LE,1=BE /
833	);
834
835	/ --- Unicode-Escape Codecs ---------------------------------------------- /
836
837	/ Variant of PyUnicode_DecodeUnicodeEscape that supports partial decoding. /
838	PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeStateful(
839	const char string, /* Unicode-Escape encoded string /
840	Py_ssize_t length, / size of string /
841	const char errors, /* error handling /
842	Py_ssize_t consumed /* bytes consumed /
843	);
844
845	/ Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape*
846	chars. /*
847	PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
848	const char string, /* Unicode-Escape encoded string /
849	Py_ssize_t length, / size of string /
850	const char errors, /* error handling /
851	Py_ssize_t consumed, /* bytes consumed /
852	const char *first_invalid_escape /* on return, points to first*
853	invalid escaped char in
854	string. /*
855	);
856
857	Py_DEPRECATED(`3.3`) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
858	const Py_UNICODE data, /* Unicode char buffer /
859	Py_ssize_t length / Number of Py_UNICODE chars to encode /
860	);
861
862	/ --- Raw-Unicode-Escape Codecs ------------------------------------------ /
863
864	Py_DEPRECATED(`3.3`) PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
865	const Py_UNICODE data, /* Unicode char buffer /
866	Py_ssize_t length / Number of Py_UNICODE chars to encode /
867	);
868
869	/ Variant of PyUnicode_DecodeRawUnicodeEscape that supports partial decoding. /
870	PyAPI_FUNC(PyObject*) _PyUnicode_DecodeRawUnicodeEscapeStateful(
871	const char string, /* Unicode-Escape encoded string /
872	Py_ssize_t length, / size of string /
873	const char errors, /* error handling /
874	Py_ssize_t consumed /* bytes consumed /
875	);
876
877	/ --- Latin-1 Codecs ----------------------------------------------------- /
878
879	PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
880	PyObject* unicode,
881	const char* errors);
882
883	Py_DEPRECATED(`3.3`) PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
884	const Py_UNICODE data, /* Unicode char buffer /
885	Py_ssize_t length, / Number of Py_UNICODE chars to encode /
886	const char errors /* error handling /
887	);
888
889	/ --- ASCII Codecs ------------------------------------------------------- /
890
891	PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
892	PyObject* unicode,
893	const char* errors);
894
895	Py_DEPRECATED(`3.3`) PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
896	const Py_UNICODE data, /* Unicode char buffer /
897	Py_ssize_t length, / Number of Py_UNICODE chars to encode /
898	const char errors /* error handling /
899	);
900
901	/ --- Character Map Codecs ----------------------------------------------- /
902
903	Py_DEPRECATED(`3.3`) PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
904	const Py_UNICODE data, /* Unicode char buffer /
905	Py_ssize_t length, / Number of Py_UNICODE chars to encode /
906	PyObject mapping, /* encoding mapping /
907	const char errors /* error handling /
908	);
909
910	PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
911	PyObject unicode, /* Unicode object /
912	PyObject mapping, /* encoding mapping /
913	const char errors /* error handling /
914	);
915
916	/ Translate a Py_UNICODE buffer of the given length by applying a*
917	character mapping table to it and return the resulting Unicode
918	object.
919
920	The mapping table must map Unicode ordinal integers to Unicode strings,
921	Unicode ordinal integers or None (causing deletion of the character).
922
923	Mapping tables may be dictionaries or sequences. Unmapped character
924	ordinals (ones which cause a LookupError) are left untouched and
925	are copied as-is.
926
927	*/
928	Py_DEPRECATED(`3.3`) PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
929	const Py_UNICODE data, /* Unicode char buffer /
930	Py_ssize_t length, / Number of Py_UNICODE chars to encode /
931	PyObject table, /* Translate table /
932	const char errors /* error handling /
933	);
934
935	/ --- MBCS codecs for Windows -------------------------------------------- /
936
937	#ifdef MS_WINDOWS
938	Py_DEPRECATED(`3.3`) PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
939	const Py_UNICODE data, /* Unicode char buffer /
940	Py_ssize_t length, / number of Py_UNICODE chars to encode /
941	const char errors /* error handling /
942	);
943	#endif
944
945	/ --- Decimal Encoder ---------------------------------------------------- /
946
947	/ Takes a Unicode string holding a decimal value and writes it into*
948	an output buffer using standard ASCII digit codes.
949
950	The output buffer has to provide at least length+1 bytes of storage
951	area. The output string is 0-terminated.
952
953	The encoder converts whitespace to ' ', decimal characters to their
954	corresponding ASCII digit and all other Latin-1 characters except
955	\0 as-is. Characters outside this range (Unicode ordinals 1-256)
956	are treated as errors. This includes embedded NULL bytes.
957
958	Error handling is defined by the errors argument:
959
960	NULL or "strict": raise a ValueError
961	"ignore": ignore the wrong characters (these are not copied to the
962	output buffer)
963	"replace": replaces illegal characters with '?'
964
965	Returns 0 on success, -1 on failure.
966
967	*/
968
969	Py_DEPRECATED(`3.3`) PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
970	Py_UNICODE s, /* Unicode buffer /
971	Py_ssize_t length, / Number of Py_UNICODE chars to encode /
972	char output, /* Output buffer; must have size >= length /
973	const char errors /* error handling /
974	);
975
976	/ Transforms code points that have decimal digit property to the*
977	corresponding ASCII digit code points.
978
979	Returns a new Unicode string on success, NULL on failure.
980	*/
981
982	Py_DEPRECATED(`3.3`)
983	PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
984	Py_UNICODE s, /* Unicode buffer /
985	Py_ssize_t length / Number of Py_UNICODE chars to transform /
986	);
987
988	/ Coverts a Unicode object holding a decimal value to an ASCII string*
989	for using in int, float and complex parsers.
990	Transforms code points that have decimal digit property to the
991	corresponding ASCII digit code points. Transforms spaces to ASCII.
992	Transforms code points starting from the first non-ASCII code point that
993	is neither a decimal digit nor a space to the end into '?'. /*
994
995	PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
996	PyObject unicode /* Unicode object /
997	);
998
999	/ --- Methods & Slots ---------------------------------------------------- /
1000
1001	PyAPI_FUNC(PyObject *) _PyUnicode_JoinArray(
1002	PyObject *separator,
1003	PyObject *const *items,
1004	Py_ssize_t seqlen
1005	);
1006
1007	/ Test whether a unicode is equal to ASCII identifier. Return 1 if true,*
1008	0 otherwise. The right argument must be ASCII identifier.
1009	Any error occurs inside will be cleared before return. /*
1010	PyAPI_FUNC(int) _PyUnicode_EqualToASCIIId(
1011	PyObject left, /* Left string /
1012	_Py_Identifier right /* Right identifier /
1013	);
1014
1015	/ Test whether a unicode is equal to ASCII string. Return 1 if true,*
1016	0 otherwise. The right argument must be ASCII-encoded string.
1017	Any error occurs inside will be cleared before return. /*
1018	PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString(
1019	PyObject *left,
1020	const char right /* ASCII-encoded string /
1021	);
1022
1023	/ Externally visible for str.strip(unicode) /
1024	PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
1025	PyObject *self,
1026	int striptype,
1027	PyObject *sepobj
1028	);
1029
1030	/ Using explicit passed-in values, insert the thousands grouping*
1031	into the string pointed to by buffer. For the argument descriptions,
1032	see Objects/stringlib/localeutil.h /*
1033	PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
1034	_PyUnicodeWriter *writer,
1035	Py_ssize_t n_buffer,
1036	PyObject *digits,
1037	Py_ssize_t d_pos,
1038	Py_ssize_t n_digits,
1039	Py_ssize_t min_width,
1040	const char *grouping,
1041	PyObject *thousands_sep,
1042	Py_UCS4 *maxchar);
1043
1044	/ === Characters Type APIs =============================================== /
1045
1046	/ Helper array used by Py_UNICODE_ISSPACE(). /
1047
1048	PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
1049
1050	/ These should not be used directly. Use the Py_UNICODE_IS* and*
1051	Py_UNICODE_TO macros instead.*
1052
1053	These APIs are implemented in Objects/unicodectype.c.
1054
1055	*/
1056
1057	PyAPI_FUNC(int) _PyUnicode_IsLowercase(
1058	Py_UCS4 ch / Unicode character /
1059	);
1060
1061	PyAPI_FUNC(int) _PyUnicode_IsUppercase(
1062	Py_UCS4 ch / Unicode character /
1063	);
1064
1065	PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
1066	Py_UCS4 ch / Unicode character /
1067	);
1068
1069	PyAPI_FUNC(int) _PyUnicode_IsXidStart(
1070	Py_UCS4 ch / Unicode character /
1071	);
1072
1073	PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
1074	Py_UCS4 ch / Unicode character /
1075	);
1076
1077	PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
1078	const Py_UCS4 ch / Unicode character /
1079	);
1080
1081	PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
1082	const Py_UCS4 ch / Unicode character /
1083	);
1084
1085	/ Py_DEPRECATED(3.3) / PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
1086	Py_UCS4 ch / Unicode character /
1087	);
1088
1089	/ Py_DEPRECATED(3.3) / PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
1090	Py_UCS4 ch / Unicode character /
1091	);
1092
1093	Py_DEPRECATED(`3.3`) PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
1094	Py_UCS4 ch / Unicode character /
1095	);
1096
1097	PyAPI_FUNC(int) _PyUnicode_ToLowerFull(
1098	Py_UCS4 ch, / Unicode character /
1099	Py_UCS4 *res
1100	);
1101
1102	PyAPI_FUNC(int) _PyUnicode_ToTitleFull(
1103	Py_UCS4 ch, / Unicode character /
1104	Py_UCS4 *res
1105	);
1106
1107	PyAPI_FUNC(int) _PyUnicode_ToUpperFull(
1108	Py_UCS4 ch, / Unicode character /
1109	Py_UCS4 *res
1110	);
1111
1112	PyAPI_FUNC(int) _PyUnicode_ToFoldedFull(
1113	Py_UCS4 ch, / Unicode character /
1114	Py_UCS4 *res
1115	);
1116
1117	PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable(
1118	Py_UCS4 ch / Unicode character /
1119	);
1120
1121	PyAPI_FUNC(int) _PyUnicode_IsCased(
1122	Py_UCS4 ch / Unicode character /
1123	);
1124
1125	PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
1126	Py_UCS4 ch / Unicode character /
1127	);
1128
1129	PyAPI_FUNC(int) _PyUnicode_ToDigit(
1130	Py_UCS4 ch / Unicode character /
1131	);
1132
1133	PyAPI_FUNC(double) _PyUnicode_ToNumeric(
1134	Py_UCS4 ch / Unicode character /
1135	);
1136
1137	PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
1138	Py_UCS4 ch / Unicode character /
1139	);
1140
1141	PyAPI_FUNC(int) _PyUnicode_IsDigit(
1142	Py_UCS4 ch / Unicode character /
1143	);
1144
1145	PyAPI_FUNC(int) _PyUnicode_IsNumeric(
1146	Py_UCS4 ch / Unicode character /
1147	);
1148
1149	PyAPI_FUNC(int) _PyUnicode_IsPrintable(
1150	Py_UCS4 ch / Unicode character /
1151	);
1152
1153	PyAPI_FUNC(int) _PyUnicode_IsAlpha(
1154	Py_UCS4 ch / Unicode character /
1155	);
1156
1157	PyAPI_FUNC(PyObject) _PyUnicode_FormatLong(PyObject , int, int, int);
1158
1159	/ Return an interned Unicode object for an Identifier; may fail if there is no memory./
1160	PyAPI_FUNC(PyObject) _PyUnicode_FromId(_Py_Identifier);
1161
1162	/ Fast equality check when the inputs are known to be exact unicode types*
1163	and where the hash values are equal (i.e. a very probable match) /*
1164	PyAPI_FUNC(int) _PyUnicode_EQ(PyObject , PyObject );
1165
1166	PyAPI_FUNC(int) _PyUnicode_WideCharString_Converter(PyObject , void* *);
1167	PyAPI_FUNC(int) _PyUnicode_WideCharString_Opt_Converter(PyObject , void* *);
1168
1169	PyAPI_FUNC(Py_ssize_t) _PyUnicode_ScanIdentifier(PyObject *);
1170

source code of include/python3.10/cpython/unicodeobject.h