| 1 | #ifndef Py_UNICODEOBJECT_H |
| 2 | #define Py_UNICODEOBJECT_H |
| 3 | |
| 4 | #include <stdarg.h> |
| 5 | |
| 6 | /* |
| 7 | |
| 8 | Unicode implementation based on original code by Fredrik Lundh, |
| 9 | modified by Marc-Andre Lemburg (mal@lemburg.com) according to the |
| 10 | Unicode Integration Proposal. (See |
| 11 | http://www.egenix.com/files/python/unicode-proposal.txt). |
| 12 | |
| 13 | Copyright (c) Corporation for National Research Initiatives. |
| 14 | |
| 15 | |
| 16 | Original header: |
| 17 | -------------------------------------------------------------------- |
| 18 | |
| 19 | * Yet another Unicode string type for Python. This type supports the |
| 20 | * 16-bit Basic Multilingual Plane (BMP) only. |
| 21 | * |
| 22 | * Written by Fredrik Lundh, January 1999. |
| 23 | * |
| 24 | * Copyright (c) 1999 by Secret Labs AB. |
| 25 | * Copyright (c) 1999 by Fredrik Lundh. |
| 26 | * |
| 27 | * fredrik@pythonware.com |
| 28 | * http://www.pythonware.com |
| 29 | * |
| 30 | * -------------------------------------------------------------------- |
| 31 | * This Unicode String Type is |
| 32 | * |
| 33 | * Copyright (c) 1999 by Secret Labs AB |
| 34 | * Copyright (c) 1999 by Fredrik Lundh |
| 35 | * |
| 36 | * By obtaining, using, and/or copying this software and/or its |
| 37 | * associated documentation, you agree that you have read, understood, |
| 38 | * and will comply with the following terms and conditions: |
| 39 | * |
| 40 | * Permission to use, copy, modify, and distribute this software and its |
| 41 | * associated documentation for any purpose and without fee is hereby |
| 42 | * granted, provided that the above copyright notice appears in all |
| 43 | * copies, and that both that copyright notice and this permission notice |
| 44 | * appear in supporting documentation, and that the name of Secret Labs |
| 45 | * AB or the author not be used in advertising or publicity pertaining to |
| 46 | * distribution of the software without specific, written prior |
| 47 | * permission. |
| 48 | * |
| 49 | * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO |
| 50 | * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND |
| 51 | * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR |
| 52 | * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| 53 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
| 54 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT |
| 55 | * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| 56 | * -------------------------------------------------------------------- */ |
| 57 | |
| 58 | #include <ctype.h> |
| 59 | |
| 60 | /* === Internal API ======================================================= */ |
| 61 | |
| 62 | /* --- Internal Unicode Format -------------------------------------------- */ |
| 63 | |
| 64 | /* Python 3.x requires unicode */ |
| 65 | #define Py_USING_UNICODE |
| 66 | |
| 67 | #ifndef SIZEOF_WCHAR_T |
| 68 | #error Must define SIZEOF_WCHAR_T |
| 69 | #endif |
| 70 | |
| 71 | #define Py_UNICODE_SIZE SIZEOF_WCHAR_T |
| 72 | |
| 73 | /* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE. |
| 74 | Otherwise, Unicode strings are stored as UCS-2 (with limited support |
| 75 | for UTF-16) */ |
| 76 | |
| 77 | #if Py_UNICODE_SIZE >= 4 |
| 78 | #define Py_UNICODE_WIDE |
| 79 | #endif |
| 80 | |
| 81 | /* Set these flags if the platform has "wchar.h" and the |
| 82 | wchar_t type is a 16-bit unsigned type */ |
| 83 | /* #define HAVE_WCHAR_H */ |
| 84 | /* #define HAVE_USABLE_WCHAR_T */ |
| 85 | |
| 86 | /* If the compiler provides a wchar_t type we try to support it |
| 87 | through the interface functions PyUnicode_FromWideChar(), |
| 88 | PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */ |
| 89 | |
| 90 | #ifdef HAVE_USABLE_WCHAR_T |
| 91 | # ifndef HAVE_WCHAR_H |
| 92 | # define HAVE_WCHAR_H |
| 93 | # endif |
| 94 | #endif |
| 95 | |
| 96 | #ifdef HAVE_WCHAR_H |
| 97 | # include <wchar.h> |
| 98 | #endif |
| 99 | |
| 100 | /* Py_UCS4 and Py_UCS2 are typedefs for the respective |
| 101 | unicode representations. */ |
| 102 | typedef uint32_t Py_UCS4; |
| 103 | typedef uint16_t Py_UCS2; |
| 104 | typedef uint8_t Py_UCS1; |
| 105 | |
| 106 | #ifdef __cplusplus |
| 107 | extern "C" { |
| 108 | #endif |
| 109 | |
| 110 | |
| 111 | PyAPI_DATA(PyTypeObject) PyUnicode_Type; |
| 112 | PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type; |
| 113 | |
| 114 | #define PyUnicode_Check(op) \ |
| 115 | PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS) |
| 116 | #define PyUnicode_CheckExact(op) Py_IS_TYPE(op, &PyUnicode_Type) |
| 117 | |
| 118 | /* --- Constants ---------------------------------------------------------- */ |
| 119 | |
| 120 | /* This Unicode character will be used as replacement character during |
| 121 | decoding if the errors argument is set to "replace". Note: the |
| 122 | Unicode character U+FFFD is the official REPLACEMENT CHARACTER in |
| 123 | Unicode 3.0. */ |
| 124 | |
| 125 | #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD) |
| 126 | |
| 127 | /* === Public API ========================================================= */ |
| 128 | |
| 129 | /* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */ |
| 130 | PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize( |
| 131 | const char *u, /* UTF-8 encoded string */ |
| 132 | Py_ssize_t size /* size of buffer */ |
| 133 | ); |
| 134 | |
| 135 | /* Similar to PyUnicode_FromUnicode(), but u points to null-terminated |
| 136 | UTF-8 encoded bytes. The size is determined with strlen(). */ |
| 137 | PyAPI_FUNC(PyObject*) PyUnicode_FromString( |
| 138 | const char *u /* UTF-8 encoded string */ |
| 139 | ); |
| 140 | |
| 141 | #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 |
| 142 | PyAPI_FUNC(PyObject*) PyUnicode_Substring( |
| 143 | PyObject *str, |
| 144 | Py_ssize_t start, |
| 145 | Py_ssize_t end); |
| 146 | #endif |
| 147 | |
| 148 | #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 |
| 149 | /* Copy the string into a UCS4 buffer including the null character if copy_null |
| 150 | is set. Return NULL and raise an exception on error. Raise a SystemError if |
| 151 | the buffer is smaller than the string. Return buffer on success. |
| 152 | |
| 153 | buflen is the length of the buffer in (Py_UCS4) characters. */ |
| 154 | PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4( |
| 155 | PyObject *unicode, |
| 156 | Py_UCS4* buffer, |
| 157 | Py_ssize_t buflen, |
| 158 | int copy_null); |
| 159 | |
| 160 | /* Copy the string into a UCS4 buffer. A new buffer is allocated using |
| 161 | * PyMem_Malloc; if this fails, NULL is returned with a memory error |
| 162 | exception set. */ |
| 163 | PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode); |
| 164 | #endif |
| 165 | |
| 166 | #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 |
| 167 | /* Get the length of the Unicode object. */ |
| 168 | |
| 169 | PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength( |
| 170 | PyObject *unicode |
| 171 | ); |
| 172 | #endif |
| 173 | |
| 174 | /* Get the number of Py_UNICODE units in the |
| 175 | string representation. */ |
| 176 | |
| 177 | Py_DEPRECATED(3.3) PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize( |
| 178 | PyObject *unicode /* Unicode object */ |
| 179 | ); |
| 180 | |
| 181 | #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 |
| 182 | /* Read a character from the string. */ |
| 183 | |
| 184 | PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar( |
| 185 | PyObject *unicode, |
| 186 | Py_ssize_t index |
| 187 | ); |
| 188 | |
| 189 | /* Write a character to the string. The string must have been created through |
| 190 | PyUnicode_New, must not be shared, and must not have been hashed yet. |
| 191 | |
| 192 | Return 0 on success, -1 on error. */ |
| 193 | |
| 194 | PyAPI_FUNC(int) PyUnicode_WriteChar( |
| 195 | PyObject *unicode, |
| 196 | Py_ssize_t index, |
| 197 | Py_UCS4 character |
| 198 | ); |
| 199 | #endif |
| 200 | |
| 201 | /* Resize a Unicode object. The length is the number of characters, except |
| 202 | if the kind of the string is PyUnicode_WCHAR_KIND: in this case, the length |
| 203 | is the number of Py_UNICODE characters. |
| 204 | |
| 205 | *unicode is modified to point to the new (resized) object and 0 |
| 206 | returned on success. |
| 207 | |
| 208 | Try to resize the string in place (which is usually faster than allocating |
| 209 | a new string and copy characters), or create a new string. |
| 210 | |
| 211 | Error handling is implemented as follows: an exception is set, -1 |
| 212 | is returned and *unicode left untouched. |
| 213 | |
| 214 | WARNING: The function doesn't check string content, the result may not be a |
| 215 | string in canonical representation. */ |
| 216 | |
| 217 | PyAPI_FUNC(int) PyUnicode_Resize( |
| 218 | PyObject **unicode, /* Pointer to the Unicode object */ |
| 219 | Py_ssize_t length /* New length */ |
| 220 | ); |
| 221 | |
| 222 | /* Decode obj to a Unicode object. |
| 223 | |
| 224 | bytes, bytearray and other bytes-like objects are decoded according to the |
| 225 | given encoding and error handler. The encoding and error handler can be |
| 226 | NULL to have the interface use UTF-8 and "strict". |
| 227 | |
| 228 | All other objects (including Unicode objects) raise an exception. |
| 229 | |
| 230 | The API returns NULL in case of an error. The caller is responsible |
| 231 | for decref'ing the returned objects. |
| 232 | |
| 233 | */ |
| 234 | |
| 235 | PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject( |
| 236 | PyObject *obj, /* Object */ |
| 237 | const char *encoding, /* encoding */ |
| 238 | const char *errors /* error handling */ |
| 239 | ); |
| 240 | |
| 241 | /* Copy an instance of a Unicode subtype to a new true Unicode object if |
| 242 | necessary. If obj is already a true Unicode object (not a subtype), return |
| 243 | the reference with *incremented* refcount. |
| 244 | |
| 245 | The API returns NULL in case of an error. The caller is responsible |
| 246 | for decref'ing the returned objects. |
| 247 | |
| 248 | */ |
| 249 | |
| 250 | PyAPI_FUNC(PyObject*) PyUnicode_FromObject( |
| 251 | PyObject *obj /* Object */ |
| 252 | ); |
| 253 | |
| 254 | PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV( |
| 255 | const char *format, /* ASCII-encoded string */ |
| 256 | va_list vargs |
| 257 | ); |
| 258 | PyAPI_FUNC(PyObject *) PyUnicode_FromFormat( |
| 259 | const char *format, /* ASCII-encoded string */ |
| 260 | ... |
| 261 | ); |
| 262 | |
| 263 | PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **); |
| 264 | PyAPI_FUNC(PyObject *) PyUnicode_InternFromString( |
| 265 | const char *u /* UTF-8 encoded string */ |
| 266 | ); |
| 267 | |
| 268 | // PyUnicode_InternImmortal() is deprecated since Python 3.10 |
| 269 | // and will be removed in Python 3.12. Use PyUnicode_InternInPlace() instead. |
| 270 | Py_DEPRECATED(3.10) PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **); |
| 271 | |
| 272 | /* Use only if you know it's a string */ |
| 273 | #define PyUnicode_CHECK_INTERNED(op) \ |
| 274 | (((PyASCIIObject *)(op))->state.interned) |
| 275 | |
| 276 | /* --- wchar_t support for platforms which support it --------------------- */ |
| 277 | |
| 278 | #ifdef HAVE_WCHAR_H |
| 279 | |
| 280 | /* Create a Unicode Object from the wchar_t buffer w of the given |
| 281 | size. |
| 282 | |
| 283 | The buffer is copied into the new object. */ |
| 284 | |
| 285 | PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar( |
| 286 | const wchar_t *w, /* wchar_t buffer */ |
| 287 | Py_ssize_t size /* size of buffer */ |
| 288 | ); |
| 289 | |
| 290 | /* Copies the Unicode Object contents into the wchar_t buffer w. At |
| 291 | most size wchar_t characters are copied. |
| 292 | |
| 293 | Note that the resulting wchar_t string may or may not be |
| 294 | 0-terminated. It is the responsibility of the caller to make sure |
| 295 | that the wchar_t string is 0-terminated in case this is required by |
| 296 | the application. |
| 297 | |
| 298 | Returns the number of wchar_t characters copied (excluding a |
| 299 | possibly trailing 0-termination character) or -1 in case of an |
| 300 | error. */ |
| 301 | |
| 302 | PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar( |
| 303 | PyObject *unicode, /* Unicode object */ |
| 304 | wchar_t *w, /* wchar_t buffer */ |
| 305 | Py_ssize_t size /* size of buffer */ |
| 306 | ); |
| 307 | |
| 308 | /* Convert the Unicode object to a wide character string. The output string |
| 309 | always ends with a nul character. If size is not NULL, write the number of |
| 310 | wide characters (excluding the null character) into *size. |
| 311 | |
| 312 | Returns a buffer allocated by PyMem_Malloc() (use PyMem_Free() to free it) |
| 313 | on success. On error, returns NULL, *size is undefined and raises a |
| 314 | MemoryError. */ |
| 315 | |
| 316 | PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString( |
| 317 | PyObject *unicode, /* Unicode object */ |
| 318 | Py_ssize_t *size /* number of characters of the result */ |
| 319 | ); |
| 320 | |
| 321 | #endif |
| 322 | |
| 323 | /* --- Unicode ordinals --------------------------------------------------- */ |
| 324 | |
| 325 | /* Create a Unicode Object from the given Unicode code point ordinal. |
| 326 | |
| 327 | The ordinal must be in range(0x110000). A ValueError is |
| 328 | raised in case it is not. |
| 329 | |
| 330 | */ |
| 331 | |
| 332 | PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal); |
| 333 | |
| 334 | /* === Builtin Codecs ===================================================== |
| 335 | |
| 336 | Many of these APIs take two arguments encoding and errors. These |
| 337 | parameters encoding and errors have the same semantics as the ones |
| 338 | of the builtin str() API. |
| 339 | |
| 340 | Setting encoding to NULL causes the default encoding (UTF-8) to be used. |
| 341 | |
| 342 | Error handling is set by errors which may also be set to NULL |
| 343 | meaning to use the default handling defined for the codec. Default |
| 344 | error handling for all builtin codecs is "strict" (ValueErrors are |
| 345 | raised). |
| 346 | |
| 347 | The codecs all use a similar interface. Only deviation from the |
| 348 | generic ones are documented. |
| 349 | |
| 350 | */ |
| 351 | |
| 352 | /* --- Manage the default encoding ---------------------------------------- */ |
| 353 | |
| 354 | /* Returns "utf-8". */ |
| 355 | PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void); |
| 356 | |
| 357 | /* --- Generic Codecs ----------------------------------------------------- */ |
| 358 | |
| 359 | /* Create a Unicode object by decoding the encoded string s of the |
| 360 | given size. */ |
| 361 | |
| 362 | PyAPI_FUNC(PyObject*) PyUnicode_Decode( |
| 363 | const char *s, /* encoded string */ |
| 364 | Py_ssize_t size, /* size of buffer */ |
| 365 | const char *encoding, /* encoding */ |
| 366 | const char *errors /* error handling */ |
| 367 | ); |
| 368 | |
| 369 | /* Decode a Unicode object unicode and return the result as Python |
| 370 | object. |
| 371 | |
| 372 | This API is DEPRECATED. The only supported standard encoding is rot13. |
| 373 | Use PyCodec_Decode() to decode with rot13 and non-standard codecs |
| 374 | that decode from str. */ |
| 375 | |
| 376 | Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject( |
| 377 | PyObject *unicode, /* Unicode object */ |
| 378 | const char *encoding, /* encoding */ |
| 379 | const char *errors /* error handling */ |
| 380 | ); |
| 381 | |
| 382 | /* Decode a Unicode object unicode and return the result as Unicode |
| 383 | object. |
| 384 | |
| 385 | This API is DEPRECATED. The only supported standard encoding is rot13. |
| 386 | Use PyCodec_Decode() to decode with rot13 and non-standard codecs |
| 387 | that decode from str to str. */ |
| 388 | |
| 389 | Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode( |
| 390 | PyObject *unicode, /* Unicode object */ |
| 391 | const char *encoding, /* encoding */ |
| 392 | const char *errors /* error handling */ |
| 393 | ); |
| 394 | |
| 395 | /* Encodes a Unicode object and returns the result as Python |
| 396 | object. |
| 397 | |
| 398 | This API is DEPRECATED. It is superseded by PyUnicode_AsEncodedString() |
| 399 | since all standard encodings (except rot13) encode str to bytes. |
| 400 | Use PyCodec_Encode() for encoding with rot13 and non-standard codecs |
| 401 | that encode form str to non-bytes. */ |
| 402 | |
| 403 | Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject( |
| 404 | PyObject *unicode, /* Unicode object */ |
| 405 | const char *encoding, /* encoding */ |
| 406 | const char *errors /* error handling */ |
| 407 | ); |
| 408 | |
| 409 | /* Encodes a Unicode object and returns the result as Python string |
| 410 | object. */ |
| 411 | |
| 412 | PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString( |
| 413 | PyObject *unicode, /* Unicode object */ |
| 414 | const char *encoding, /* encoding */ |
| 415 | const char *errors /* error handling */ |
| 416 | ); |
| 417 | |
| 418 | /* Encodes a Unicode object and returns the result as Unicode |
| 419 | object. |
| 420 | |
| 421 | This API is DEPRECATED. The only supported standard encodings is rot13. |
| 422 | Use PyCodec_Encode() to encode with rot13 and non-standard codecs |
| 423 | that encode from str to str. */ |
| 424 | |
| 425 | Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode( |
| 426 | PyObject *unicode, /* Unicode object */ |
| 427 | const char *encoding, /* encoding */ |
| 428 | const char *errors /* error handling */ |
| 429 | ); |
| 430 | |
| 431 | /* Build an encoding map. */ |
| 432 | |
| 433 | PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap( |
| 434 | PyObject* string /* 256 character map */ |
| 435 | ); |
| 436 | |
| 437 | /* --- UTF-7 Codecs ------------------------------------------------------- */ |
| 438 | |
| 439 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7( |
| 440 | const char *string, /* UTF-7 encoded string */ |
| 441 | Py_ssize_t length, /* size of string */ |
| 442 | const char *errors /* error handling */ |
| 443 | ); |
| 444 | |
| 445 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful( |
| 446 | const char *string, /* UTF-7 encoded string */ |
| 447 | Py_ssize_t length, /* size of string */ |
| 448 | const char *errors, /* error handling */ |
| 449 | Py_ssize_t *consumed /* bytes consumed */ |
| 450 | ); |
| 451 | |
| 452 | /* --- UTF-8 Codecs ------------------------------------------------------- */ |
| 453 | |
| 454 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8( |
| 455 | const char *string, /* UTF-8 encoded string */ |
| 456 | Py_ssize_t length, /* size of string */ |
| 457 | const char *errors /* error handling */ |
| 458 | ); |
| 459 | |
| 460 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful( |
| 461 | const char *string, /* UTF-8 encoded string */ |
| 462 | Py_ssize_t length, /* size of string */ |
| 463 | const char *errors, /* error handling */ |
| 464 | Py_ssize_t *consumed /* bytes consumed */ |
| 465 | ); |
| 466 | |
| 467 | PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String( |
| 468 | PyObject *unicode /* Unicode object */ |
| 469 | ); |
| 470 | |
| 471 | /* Returns a pointer to the default encoding (UTF-8) of the |
| 472 | Unicode object unicode and the size of the encoded representation |
| 473 | in bytes stored in *size. |
| 474 | |
| 475 | In case of an error, no *size is set. |
| 476 | |
| 477 | This function caches the UTF-8 encoded string in the unicodeobject |
| 478 | and subsequent calls will return the same string. The memory is released |
| 479 | when the unicodeobject is deallocated. |
| 480 | */ |
| 481 | |
| 482 | #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030A0000 |
| 483 | PyAPI_FUNC(const char *) PyUnicode_AsUTF8AndSize( |
| 484 | PyObject *unicode, |
| 485 | Py_ssize_t *size); |
| 486 | #endif |
| 487 | |
| 488 | /* --- UTF-32 Codecs ------------------------------------------------------ */ |
| 489 | |
| 490 | /* Decodes length bytes from a UTF-32 encoded buffer string and returns |
| 491 | the corresponding Unicode object. |
| 492 | |
| 493 | errors (if non-NULL) defines the error handling. It defaults |
| 494 | to "strict". |
| 495 | |
| 496 | If byteorder is non-NULL, the decoder starts decoding using the |
| 497 | given byte order: |
| 498 | |
| 499 | *byteorder == -1: little endian |
| 500 | *byteorder == 0: native order |
| 501 | *byteorder == 1: big endian |
| 502 | |
| 503 | In native mode, the first four bytes of the stream are checked for a |
| 504 | BOM mark. If found, the BOM mark is analysed, the byte order |
| 505 | adjusted and the BOM skipped. In the other modes, no BOM mark |
| 506 | interpretation is done. After completion, *byteorder is set to the |
| 507 | current byte order at the end of input data. |
| 508 | |
| 509 | If byteorder is NULL, the codec starts in native order mode. |
| 510 | |
| 511 | */ |
| 512 | |
| 513 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32( |
| 514 | const char *string, /* UTF-32 encoded string */ |
| 515 | Py_ssize_t length, /* size of string */ |
| 516 | const char *errors, /* error handling */ |
| 517 | int *byteorder /* pointer to byteorder to use |
| 518 | 0=native;-1=LE,1=BE; updated on |
| 519 | exit */ |
| 520 | ); |
| 521 | |
| 522 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful( |
| 523 | const char *string, /* UTF-32 encoded string */ |
| 524 | Py_ssize_t length, /* size of string */ |
| 525 | const char *errors, /* error handling */ |
| 526 | int *byteorder, /* pointer to byteorder to use |
| 527 | 0=native;-1=LE,1=BE; updated on |
| 528 | exit */ |
| 529 | Py_ssize_t *consumed /* bytes consumed */ |
| 530 | ); |
| 531 | |
| 532 | /* Returns a Python string using the UTF-32 encoding in native byte |
| 533 | order. The string always starts with a BOM mark. */ |
| 534 | |
| 535 | PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String( |
| 536 | PyObject *unicode /* Unicode object */ |
| 537 | ); |
| 538 | |
| 539 | /* Returns a Python string object holding the UTF-32 encoded value of |
| 540 | the Unicode data. |
| 541 | |
| 542 | If byteorder is not 0, output is written according to the following |
| 543 | byte order: |
| 544 | |
| 545 | byteorder == -1: little endian |
| 546 | byteorder == 0: native byte order (writes a BOM mark) |
| 547 | byteorder == 1: big endian |
| 548 | |
| 549 | If byteorder is 0, the output string will always start with the |
| 550 | Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is |
| 551 | prepended. |
| 552 | |
| 553 | */ |
| 554 | |
| 555 | /* --- UTF-16 Codecs ------------------------------------------------------ */ |
| 556 | |
| 557 | /* Decodes length bytes from a UTF-16 encoded buffer string and returns |
| 558 | the corresponding Unicode object. |
| 559 | |
| 560 | errors (if non-NULL) defines the error handling. It defaults |
| 561 | to "strict". |
| 562 | |
| 563 | If byteorder is non-NULL, the decoder starts decoding using the |
| 564 | given byte order: |
| 565 | |
| 566 | *byteorder == -1: little endian |
| 567 | *byteorder == 0: native order |
| 568 | *byteorder == 1: big endian |
| 569 | |
| 570 | In native mode, the first two bytes of the stream are checked for a |
| 571 | BOM mark. If found, the BOM mark is analysed, the byte order |
| 572 | adjusted and the BOM skipped. In the other modes, no BOM mark |
| 573 | interpretation is done. After completion, *byteorder is set to the |
| 574 | current byte order at the end of input data. |
| 575 | |
| 576 | If byteorder is NULL, the codec starts in native order mode. |
| 577 | |
| 578 | */ |
| 579 | |
| 580 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16( |
| 581 | const char *string, /* UTF-16 encoded string */ |
| 582 | Py_ssize_t length, /* size of string */ |
| 583 | const char *errors, /* error handling */ |
| 584 | int *byteorder /* pointer to byteorder to use |
| 585 | 0=native;-1=LE,1=BE; updated on |
| 586 | exit */ |
| 587 | ); |
| 588 | |
| 589 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful( |
| 590 | const char *string, /* UTF-16 encoded string */ |
| 591 | Py_ssize_t length, /* size of string */ |
| 592 | const char *errors, /* error handling */ |
| 593 | int *byteorder, /* pointer to byteorder to use |
| 594 | 0=native;-1=LE,1=BE; updated on |
| 595 | exit */ |
| 596 | Py_ssize_t *consumed /* bytes consumed */ |
| 597 | ); |
| 598 | |
| 599 | /* Returns a Python string using the UTF-16 encoding in native byte |
| 600 | order. The string always starts with a BOM mark. */ |
| 601 | |
| 602 | PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String( |
| 603 | PyObject *unicode /* Unicode object */ |
| 604 | ); |
| 605 | |
| 606 | /* --- Unicode-Escape Codecs ---------------------------------------------- */ |
| 607 | |
| 608 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape( |
| 609 | const char *string, /* Unicode-Escape encoded string */ |
| 610 | Py_ssize_t length, /* size of string */ |
| 611 | const char *errors /* error handling */ |
| 612 | ); |
| 613 | |
| 614 | PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString( |
| 615 | PyObject *unicode /* Unicode object */ |
| 616 | ); |
| 617 | |
| 618 | /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */ |
| 619 | |
| 620 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape( |
| 621 | const char *string, /* Raw-Unicode-Escape encoded string */ |
| 622 | Py_ssize_t length, /* size of string */ |
| 623 | const char *errors /* error handling */ |
| 624 | ); |
| 625 | |
| 626 | PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString( |
| 627 | PyObject *unicode /* Unicode object */ |
| 628 | ); |
| 629 | |
| 630 | /* --- Latin-1 Codecs ----------------------------------------------------- |
| 631 | |
| 632 | Note: Latin-1 corresponds to the first 256 Unicode ordinals. */ |
| 633 | |
| 634 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1( |
| 635 | const char *string, /* Latin-1 encoded string */ |
| 636 | Py_ssize_t length, /* size of string */ |
| 637 | const char *errors /* error handling */ |
| 638 | ); |
| 639 | |
| 640 | PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String( |
| 641 | PyObject *unicode /* Unicode object */ |
| 642 | ); |
| 643 | |
| 644 | /* --- ASCII Codecs ------------------------------------------------------- |
| 645 | |
| 646 | Only 7-bit ASCII data is excepted. All other codes generate errors. |
| 647 | |
| 648 | */ |
| 649 | |
| 650 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII( |
| 651 | const char *string, /* ASCII encoded string */ |
| 652 | Py_ssize_t length, /* size of string */ |
| 653 | const char *errors /* error handling */ |
| 654 | ); |
| 655 | |
| 656 | PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString( |
| 657 | PyObject *unicode /* Unicode object */ |
| 658 | ); |
| 659 | |
| 660 | /* --- Character Map Codecs ----------------------------------------------- |
| 661 | |
| 662 | This codec uses mappings to encode and decode characters. |
| 663 | |
| 664 | Decoding mappings must map byte ordinals (integers in the range from 0 to |
| 665 | 255) to Unicode strings, integers (which are then interpreted as Unicode |
| 666 | ordinals) or None. Unmapped data bytes (ones which cause a LookupError) |
| 667 | as well as mapped to None, 0xFFFE or '\ufffe' are treated as "undefined |
| 668 | mapping" and cause an error. |
| 669 | |
| 670 | Encoding mappings must map Unicode ordinal integers to bytes objects, |
| 671 | integers in the range from 0 to 255 or None. Unmapped character |
| 672 | ordinals (ones which cause a LookupError) as well as mapped to |
| 673 | None are treated as "undefined mapping" and cause an error. |
| 674 | |
| 675 | */ |
| 676 | |
| 677 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap( |
| 678 | const char *string, /* Encoded string */ |
| 679 | Py_ssize_t length, /* size of string */ |
| 680 | PyObject *mapping, /* decoding mapping */ |
| 681 | const char *errors /* error handling */ |
| 682 | ); |
| 683 | |
| 684 | PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString( |
| 685 | PyObject *unicode, /* Unicode object */ |
| 686 | PyObject *mapping /* encoding mapping */ |
| 687 | ); |
| 688 | |
| 689 | /* --- MBCS codecs for Windows -------------------------------------------- */ |
| 690 | |
| 691 | #ifdef MS_WINDOWS |
| 692 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS( |
| 693 | const char *string, /* MBCS encoded string */ |
| 694 | Py_ssize_t length, /* size of string */ |
| 695 | const char *errors /* error handling */ |
| 696 | ); |
| 697 | |
| 698 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful( |
| 699 | const char *string, /* MBCS encoded string */ |
| 700 | Py_ssize_t length, /* size of string */ |
| 701 | const char *errors, /* error handling */ |
| 702 | Py_ssize_t *consumed /* bytes consumed */ |
| 703 | ); |
| 704 | |
| 705 | #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 |
| 706 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful( |
| 707 | int code_page, /* code page number */ |
| 708 | const char *string, /* encoded string */ |
| 709 | Py_ssize_t length, /* size of string */ |
| 710 | const char *errors, /* error handling */ |
| 711 | Py_ssize_t *consumed /* bytes consumed */ |
| 712 | ); |
| 713 | #endif |
| 714 | |
| 715 | PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString( |
| 716 | PyObject *unicode /* Unicode object */ |
| 717 | ); |
| 718 | |
| 719 | #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 |
| 720 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage( |
| 721 | int code_page, /* code page number */ |
| 722 | PyObject *unicode, /* Unicode object */ |
| 723 | const char *errors /* error handling */ |
| 724 | ); |
| 725 | #endif |
| 726 | |
| 727 | #endif /* MS_WINDOWS */ |
| 728 | |
| 729 | /* --- Locale encoding --------------------------------------------------- */ |
| 730 | |
| 731 | #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 |
| 732 | /* Decode a string from the current locale encoding. The decoder is strict if |
| 733 | *surrogateescape* is equal to zero, otherwise it uses the 'surrogateescape' |
| 734 | error handler (PEP 383) to escape undecodable bytes. If a byte sequence can |
| 735 | be decoded as a surrogate character and *surrogateescape* is not equal to |
| 736 | zero, the byte sequence is escaped using the 'surrogateescape' error handler |
| 737 | instead of being decoded. *str* must end with a null character but cannot |
| 738 | contain embedded null characters. */ |
| 739 | |
| 740 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize( |
| 741 | const char *str, |
| 742 | Py_ssize_t len, |
| 743 | const char *errors); |
| 744 | |
| 745 | /* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string |
| 746 | length using strlen(). */ |
| 747 | |
| 748 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale( |
| 749 | const char *str, |
| 750 | const char *errors); |
| 751 | |
| 752 | /* Encode a Unicode object to the current locale encoding. The encoder is |
| 753 | strict is *surrogateescape* is equal to zero, otherwise the |
| 754 | "surrogateescape" error handler is used. Return a bytes object. The string |
| 755 | cannot contain embedded null characters. */ |
| 756 | |
| 757 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale( |
| 758 | PyObject *unicode, |
| 759 | const char *errors |
| 760 | ); |
| 761 | #endif |
| 762 | |
| 763 | /* --- File system encoding ---------------------------------------------- */ |
| 764 | |
| 765 | /* ParseTuple converter: encode str objects to bytes using |
| 766 | PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */ |
| 767 | |
| 768 | PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*); |
| 769 | |
| 770 | /* ParseTuple converter: decode bytes objects to unicode using |
| 771 | PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */ |
| 772 | |
| 773 | PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*); |
| 774 | |
| 775 | /* Decode a null-terminated string using Py_FileSystemDefaultEncoding |
| 776 | and the "surrogateescape" error handler. |
| 777 | |
| 778 | If Py_FileSystemDefaultEncoding is not set, fall back to the locale |
| 779 | encoding. |
| 780 | |
| 781 | Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known. |
| 782 | */ |
| 783 | |
| 784 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault( |
| 785 | const char *s /* encoded string */ |
| 786 | ); |
| 787 | |
| 788 | /* Decode a string using Py_FileSystemDefaultEncoding |
| 789 | and the "surrogateescape" error handler. |
| 790 | |
| 791 | If Py_FileSystemDefaultEncoding is not set, fall back to the locale |
| 792 | encoding. |
| 793 | */ |
| 794 | |
| 795 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize( |
| 796 | const char *s, /* encoded string */ |
| 797 | Py_ssize_t size /* size */ |
| 798 | ); |
| 799 | |
| 800 | /* Encode a Unicode object to Py_FileSystemDefaultEncoding with the |
| 801 | "surrogateescape" error handler, and return bytes. |
| 802 | |
| 803 | If Py_FileSystemDefaultEncoding is not set, fall back to the locale |
| 804 | encoding. |
| 805 | */ |
| 806 | |
| 807 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault( |
| 808 | PyObject *unicode |
| 809 | ); |
| 810 | |
| 811 | /* --- Methods & Slots ---------------------------------------------------- |
| 812 | |
| 813 | These are capable of handling Unicode objects and strings on input |
| 814 | (we refer to them as strings in the descriptions) and return |
| 815 | Unicode objects or integers as appropriate. */ |
| 816 | |
| 817 | /* Concat two strings giving a new Unicode string. */ |
| 818 | |
| 819 | PyAPI_FUNC(PyObject*) PyUnicode_Concat( |
| 820 | PyObject *left, /* Left string */ |
| 821 | PyObject *right /* Right string */ |
| 822 | ); |
| 823 | |
| 824 | /* Concat two strings and put the result in *pleft |
| 825 | (sets *pleft to NULL on error) */ |
| 826 | |
| 827 | PyAPI_FUNC(void) PyUnicode_Append( |
| 828 | PyObject **pleft, /* Pointer to left string */ |
| 829 | PyObject *right /* Right string */ |
| 830 | ); |
| 831 | |
| 832 | /* Concat two strings, put the result in *pleft and drop the right object |
| 833 | (sets *pleft to NULL on error) */ |
| 834 | |
| 835 | PyAPI_FUNC(void) PyUnicode_AppendAndDel( |
| 836 | PyObject **pleft, /* Pointer to left string */ |
| 837 | PyObject *right /* Right string */ |
| 838 | ); |
| 839 | |
| 840 | /* Split a string giving a list of Unicode strings. |
| 841 | |
| 842 | If sep is NULL, splitting will be done at all whitespace |
| 843 | substrings. Otherwise, splits occur at the given separator. |
| 844 | |
| 845 | At most maxsplit splits will be done. If negative, no limit is set. |
| 846 | |
| 847 | Separators are not included in the resulting list. |
| 848 | |
| 849 | */ |
| 850 | |
| 851 | PyAPI_FUNC(PyObject*) PyUnicode_Split( |
| 852 | PyObject *s, /* String to split */ |
| 853 | PyObject *sep, /* String separator */ |
| 854 | Py_ssize_t maxsplit /* Maxsplit count */ |
| 855 | ); |
| 856 | |
| 857 | /* Dito, but split at line breaks. |
| 858 | |
| 859 | CRLF is considered to be one line break. Line breaks are not |
| 860 | included in the resulting list. */ |
| 861 | |
| 862 | PyAPI_FUNC(PyObject*) PyUnicode_Splitlines( |
| 863 | PyObject *s, /* String to split */ |
| 864 | int keepends /* If true, line end markers are included */ |
| 865 | ); |
| 866 | |
| 867 | /* Partition a string using a given separator. */ |
| 868 | |
| 869 | PyAPI_FUNC(PyObject*) PyUnicode_Partition( |
| 870 | PyObject *s, /* String to partition */ |
| 871 | PyObject *sep /* String separator */ |
| 872 | ); |
| 873 | |
| 874 | /* Partition a string using a given separator, searching from the end of the |
| 875 | string. */ |
| 876 | |
| 877 | PyAPI_FUNC(PyObject*) PyUnicode_RPartition( |
| 878 | PyObject *s, /* String to partition */ |
| 879 | PyObject *sep /* String separator */ |
| 880 | ); |
| 881 | |
| 882 | /* Split a string giving a list of Unicode strings. |
| 883 | |
| 884 | If sep is NULL, splitting will be done at all whitespace |
| 885 | substrings. Otherwise, splits occur at the given separator. |
| 886 | |
| 887 | At most maxsplit splits will be done. But unlike PyUnicode_Split |
| 888 | PyUnicode_RSplit splits from the end of the string. If negative, |
| 889 | no limit is set. |
| 890 | |
| 891 | Separators are not included in the resulting list. |
| 892 | |
| 893 | */ |
| 894 | |
| 895 | PyAPI_FUNC(PyObject*) PyUnicode_RSplit( |
| 896 | PyObject *s, /* String to split */ |
| 897 | PyObject *sep, /* String separator */ |
| 898 | Py_ssize_t maxsplit /* Maxsplit count */ |
| 899 | ); |
| 900 | |
| 901 | /* Translate a string by applying a character mapping table to it and |
| 902 | return the resulting Unicode object. |
| 903 | |
| 904 | The mapping table must map Unicode ordinal integers to Unicode strings, |
| 905 | Unicode ordinal integers or None (causing deletion of the character). |
| 906 | |
| 907 | Mapping tables may be dictionaries or sequences. Unmapped character |
| 908 | ordinals (ones which cause a LookupError) are left untouched and |
| 909 | are copied as-is. |
| 910 | |
| 911 | */ |
| 912 | |
| 913 | PyAPI_FUNC(PyObject *) PyUnicode_Translate( |
| 914 | PyObject *str, /* String */ |
| 915 | PyObject *table, /* Translate table */ |
| 916 | const char *errors /* error handling */ |
| 917 | ); |
| 918 | |
| 919 | /* Join a sequence of strings using the given separator and return |
| 920 | the resulting Unicode string. */ |
| 921 | |
| 922 | PyAPI_FUNC(PyObject*) PyUnicode_Join( |
| 923 | PyObject *separator, /* Separator string */ |
| 924 | PyObject *seq /* Sequence object */ |
| 925 | ); |
| 926 | |
| 927 | /* Return 1 if substr matches str[start:end] at the given tail end, 0 |
| 928 | otherwise. */ |
| 929 | |
| 930 | PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch( |
| 931 | PyObject *str, /* String */ |
| 932 | PyObject *substr, /* Prefix or Suffix string */ |
| 933 | Py_ssize_t start, /* Start index */ |
| 934 | Py_ssize_t end, /* Stop index */ |
| 935 | int direction /* Tail end: -1 prefix, +1 suffix */ |
| 936 | ); |
| 937 | |
| 938 | /* Return the first position of substr in str[start:end] using the |
| 939 | given search direction or -1 if not found. -2 is returned in case |
| 940 | an error occurred and an exception is set. */ |
| 941 | |
| 942 | PyAPI_FUNC(Py_ssize_t) PyUnicode_Find( |
| 943 | PyObject *str, /* String */ |
| 944 | PyObject *substr, /* Substring to find */ |
| 945 | Py_ssize_t start, /* Start index */ |
| 946 | Py_ssize_t end, /* Stop index */ |
| 947 | int direction /* Find direction: +1 forward, -1 backward */ |
| 948 | ); |
| 949 | |
| 950 | #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 |
| 951 | /* Like PyUnicode_Find, but search for single character only. */ |
| 952 | PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar( |
| 953 | PyObject *str, |
| 954 | Py_UCS4 ch, |
| 955 | Py_ssize_t start, |
| 956 | Py_ssize_t end, |
| 957 | int direction |
| 958 | ); |
| 959 | #endif |
| 960 | |
| 961 | /* Count the number of occurrences of substr in str[start:end]. */ |
| 962 | |
| 963 | PyAPI_FUNC(Py_ssize_t) PyUnicode_Count( |
| 964 | PyObject *str, /* String */ |
| 965 | PyObject *substr, /* Substring to count */ |
| 966 | Py_ssize_t start, /* Start index */ |
| 967 | Py_ssize_t end /* Stop index */ |
| 968 | ); |
| 969 | |
| 970 | /* Replace at most maxcount occurrences of substr in str with replstr |
| 971 | and return the resulting Unicode object. */ |
| 972 | |
| 973 | PyAPI_FUNC(PyObject *) PyUnicode_Replace( |
| 974 | PyObject *str, /* String */ |
| 975 | PyObject *substr, /* Substring to find */ |
| 976 | PyObject *replstr, /* Substring to replace */ |
| 977 | Py_ssize_t maxcount /* Max. number of replacements to apply; |
| 978 | -1 = all */ |
| 979 | ); |
| 980 | |
| 981 | /* Compare two strings and return -1, 0, 1 for less than, equal, |
| 982 | greater than resp. |
| 983 | Raise an exception and return -1 on error. */ |
| 984 | |
| 985 | PyAPI_FUNC(int) PyUnicode_Compare( |
| 986 | PyObject *left, /* Left string */ |
| 987 | PyObject *right /* Right string */ |
| 988 | ); |
| 989 | |
| 990 | /* Compare a Unicode object with C string and return -1, 0, 1 for less than, |
| 991 | equal, and greater than, respectively. It is best to pass only |
| 992 | ASCII-encoded strings, but the function interprets the input string as |
| 993 | ISO-8859-1 if it contains non-ASCII characters. |
| 994 | This function does not raise exceptions. */ |
| 995 | |
| 996 | PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString( |
| 997 | PyObject *left, |
| 998 | const char *right /* ASCII-encoded string */ |
| 999 | ); |
| 1000 | |
| 1001 | /* Rich compare two strings and return one of the following: |
| 1002 | |
| 1003 | - NULL in case an exception was raised |
| 1004 | - Py_True or Py_False for successful comparisons |
| 1005 | - Py_NotImplemented in case the type combination is unknown |
| 1006 | |
| 1007 | Possible values for op: |
| 1008 | |
| 1009 | Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE |
| 1010 | |
| 1011 | */ |
| 1012 | |
| 1013 | PyAPI_FUNC(PyObject *) PyUnicode_RichCompare( |
| 1014 | PyObject *left, /* Left string */ |
| 1015 | PyObject *right, /* Right string */ |
| 1016 | int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */ |
| 1017 | ); |
| 1018 | |
| 1019 | /* Apply an argument tuple or dictionary to a format string and return |
| 1020 | the resulting Unicode string. */ |
| 1021 | |
| 1022 | PyAPI_FUNC(PyObject *) PyUnicode_Format( |
| 1023 | PyObject *format, /* Format string */ |
| 1024 | PyObject *args /* Argument tuple or dictionary */ |
| 1025 | ); |
| 1026 | |
| 1027 | /* Checks whether element is contained in container and return 1/0 |
| 1028 | accordingly. |
| 1029 | |
| 1030 | element has to coerce to a one element Unicode string. -1 is |
| 1031 | returned in case of an error. */ |
| 1032 | |
| 1033 | PyAPI_FUNC(int) PyUnicode_Contains( |
| 1034 | PyObject *container, /* Container string */ |
| 1035 | PyObject *element /* Element string */ |
| 1036 | ); |
| 1037 | |
| 1038 | /* Checks whether argument is a valid identifier. */ |
| 1039 | |
| 1040 | PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s); |
| 1041 | |
| 1042 | /* === Characters Type APIs =============================================== */ |
| 1043 | |
| 1044 | #ifndef Py_LIMITED_API |
| 1045 | # define Py_CPYTHON_UNICODEOBJECT_H |
| 1046 | # include "cpython/unicodeobject.h" |
| 1047 | # undef Py_CPYTHON_UNICODEOBJECT_H |
| 1048 | #endif |
| 1049 | |
| 1050 | #ifdef __cplusplus |
| 1051 | } |
| 1052 | #endif |
| 1053 | #endif /* !Py_UNICODEOBJECT_H */ |
| 1054 | |