unorm2.h source code [include/unicode/unorm2.h]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	*******************************************************************************
5	*
6	* Copyright (C) 2009-2015, International Business Machines
7	* Corporation and others. All Rights Reserved.
8	*
9	*******************************************************************************
10	* file name: unorm2.h
11	* encoding: UTF-8
12	* tab size: 8 (not used)
13	* indentation:4
14	*
15	* created on: 2009dec15
16	* created by: Markus W. Scherer
17	*/
18
19	#ifndef __UNORM2_H__
20	#define __UNORM2_H__
21
22	/**
23	* \file
24	* \brief C API: New API for Unicode Normalization.
25	*
26	* Unicode normalization functionality for standard Unicode normalization or
27	* for using custom mapping tables.
28	* All instances of UNormalizer2 are unmodifiable/immutable.
29	* Instances returned by unorm2_getInstance() are singletons that must not be deleted by the caller.
30	* For more details see the Normalizer2 C++ class.
31	*/
32
33	#include "unicode/utypes.h"
34	#include "unicode/stringoptions.h"
35	#include "unicode/uset.h"
36
37	#if U_SHOW_CPLUSPLUS_API
38	#include "unicode/localpointer.h"
39	#endif // U_SHOW_CPLUSPLUS_API
40
41	/**
42	* Constants for normalization modes.
43	* For details about standard Unicode normalization forms
44	* and about the algorithms which are also used with custom mapping tables
45	* see http://www.unicode.org/unicode/reports/tr15/
46	* @stable ICU 4.4
47	*/
48	typedef enum {
49	/**
50	* Decomposition followed by composition.
51	* Same as standard NFC when using an "nfc" instance.
52	* Same as standard NFKC when using an "nfkc" instance.
53	* For details about standard Unicode normalization forms
54	* see http://www.unicode.org/unicode/reports/tr15/
55	* @stable ICU 4.4
56	*/
57	UNORM2_COMPOSE,
58	/**
59	* Map, and reorder canonically.
60	* Same as standard NFD when using an "nfc" instance.
61	* Same as standard NFKD when using an "nfkc" instance.
62	* For details about standard Unicode normalization forms
63	* see http://www.unicode.org/unicode/reports/tr15/
64	* @stable ICU 4.4
65	*/
66	UNORM2_DECOMPOSE,
67	/**
68	* "Fast C or D" form.
69	* If a string is in this form, then further decomposition <i>without reordering</i>
70	* would yield the same form as DECOMPOSE.
71	* Text in "Fast C or D" form can be processed efficiently with data tables
72	* that are "canonically closed", that is, that provide equivalent data for
73	* equivalent text, without having to be fully normalized.
74	* Not a standard Unicode normalization form.
75	* Not a unique form: Different FCD strings can be canonically equivalent.
76	* For details see http://www.unicode.org/notes/tn5/#FCD
77	* @stable ICU 4.4
78	*/
79	UNORM2_FCD,
80	/**
81	* Compose only contiguously.
82	* Also known as "FCC" or "Fast C Contiguous".
83	* The result will often but not always be in NFC.
84	* The result will conform to FCD which is useful for processing.
85	* Not a standard Unicode normalization form.
86	* For details see http://www.unicode.org/notes/tn5/#FCC
87	* @stable ICU 4.4
88	*/
89	UNORM2_COMPOSE_CONTIGUOUS
90	} UNormalization2Mode;
91
92	/**
93	* Result values for normalization quick check functions.
94	* For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms
95	* @stable ICU 2.0
96	*/
97	typedef enum UNormalizationCheckResult {
98	/**
99	* The input string is not in the normalization form.
100	* @stable ICU 2.0
101	*/
102	UNORM_NO,
103	/**
104	* The input string is in the normalization form.
105	* @stable ICU 2.0
106	*/
107	UNORM_YES,
108	/**
109	* The input string may or may not be in the normalization form.
110	* This value is only returned for composition forms like NFC and FCC,
111	* when a backward-combining character is found for which the surrounding text
112	* would have to be analyzed further.
113	* @stable ICU 2.0
114	*/
115	UNORM_MAYBE
116	} UNormalizationCheckResult;
117
118	/**
119	* Opaque C service object type for the new normalization API.
120	* @stable ICU 4.4
121	*/
122	struct UNormalizer2;
123	typedef struct UNormalizer2 UNormalizer2; /< C typedef for struct UNormalizer2. @stable ICU 4.4 /*
124
125	#if !UCONFIG_NO_NORMALIZATION
126
127	/**
128	* Returns a UNormalizer2 instance for Unicode NFC normalization.
129	* Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode).
130	* Returns an unmodifiable singleton instance. Do not delete it.
131	* @param pErrorCode Standard ICU error code. Its input value must
132	* pass the U_SUCCESS() test, or else the function returns
133	* immediately. Check for U_FAILURE() on output or use with
134	* function chaining. (See User Guide for details.)
135	* @return the requested Normalizer2, if successful
136	* @stable ICU 49
137	*/
138	U_CAPI const UNormalizer2 * U_EXPORT2
139	unorm2_getNFCInstance(UErrorCode *pErrorCode);
140
141	/**
142	* Returns a UNormalizer2 instance for Unicode NFD normalization.
143	* Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode).
144	* Returns an unmodifiable singleton instance. Do not delete it.
145	* @param pErrorCode Standard ICU error code. Its input value must
146	* pass the U_SUCCESS() test, or else the function returns
147	* immediately. Check for U_FAILURE() on output or use with
148	* function chaining. (See User Guide for details.)
149	* @return the requested Normalizer2, if successful
150	* @stable ICU 49
151	*/
152	U_CAPI const UNormalizer2 * U_EXPORT2
153	unorm2_getNFDInstance(UErrorCode *pErrorCode);
154
155	/**
156	* Returns a UNormalizer2 instance for Unicode NFKC normalization.
157	* Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode).
158	* Returns an unmodifiable singleton instance. Do not delete it.
159	* @param pErrorCode Standard ICU error code. Its input value must
160	* pass the U_SUCCESS() test, or else the function returns
161	* immediately. Check for U_FAILURE() on output or use with
162	* function chaining. (See User Guide for details.)
163	* @return the requested Normalizer2, if successful
164	* @stable ICU 49
165	*/
166	U_CAPI const UNormalizer2 * U_EXPORT2
167	unorm2_getNFKCInstance(UErrorCode *pErrorCode);
168
169	/**
170	* Returns a UNormalizer2 instance for Unicode NFKD normalization.
171	* Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode).
172	* Returns an unmodifiable singleton instance. Do not delete it.
173	* @param pErrorCode Standard ICU error code. Its input value must
174	* pass the U_SUCCESS() test, or else the function returns
175	* immediately. Check for U_FAILURE() on output or use with
176	* function chaining. (See User Guide for details.)
177	* @return the requested Normalizer2, if successful
178	* @stable ICU 49
179	*/
180	U_CAPI const UNormalizer2 * U_EXPORT2
181	unorm2_getNFKDInstance(UErrorCode *pErrorCode);
182
183	/**
184	* Returns a UNormalizer2 instance for Unicode toNFKC_Casefold() normalization
185	* which is equivalent to applying the NFKC_Casefold mappings and then NFC.
186	* See https://www.unicode.org/reports/tr44/#NFKC_Casefold
187	*
188	* Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode).
189	* Returns an unmodifiable singleton instance. Do not delete it.
190	* @param pErrorCode Standard ICU error code. Its input value must
191	* pass the U_SUCCESS() test, or else the function returns
192	* immediately. Check for U_FAILURE() on output or use with
193	* function chaining. (See User Guide for details.)
194	* @return the requested Normalizer2, if successful
195	* @stable ICU 49
196	*/
197	U_CAPI const UNormalizer2 * U_EXPORT2
198	unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode);
199
200	#ifndef U_HIDE_DRAFT_API
201	/**
202	* Returns a UNormalizer2 instance for a variant of Unicode toNFKC_Casefold() normalization
203	* which is equivalent to applying the NFKC_Simple_Casefold mappings and then NFC.
204	* See https://www.unicode.org/reports/tr44/#NFKC_Simple_Casefold
205	*
206	* Same as unorm2_getInstance(NULL, "nfkc_scf", UNORM2_COMPOSE, pErrorCode).
207	* Returns an unmodifiable singleton instance. Do not delete it.
208	* @param pErrorCode Standard ICU error code. Its input value must
209	* pass the U_SUCCESS() test, or else the function returns
210	* immediately. Check for U_FAILURE() on output or use with
211	* function chaining. (See User Guide for details.)
212	* @return the requested Normalizer2, if successful
213	* @draft ICU 74
214	*/
215	U_CAPI const UNormalizer2 * U_EXPORT2
216	unorm2_getNFKCSimpleCasefoldInstance(UErrorCode *pErrorCode);
217	#endif // U_HIDE_DRAFT_API
218
219	/**
220	* Returns a UNormalizer2 instance which uses the specified data file
221	* (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
222	* and which composes or decomposes text according to the specified mode.
223	* Returns an unmodifiable singleton instance. Do not delete it.
224	*
225	* Use packageName=NULL for data files that are part of ICU's own data.
226	* Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
227	* Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
228	* Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
229	*
230	* @param packageName NULL for ICU built-in data, otherwise application data package name
231	* @param name "nfc" or "nfkc" or "nfkc_cf" or "nfkc_scf" or name of custom data file
232	* @param mode normalization mode (compose or decompose etc.)
233	* @param pErrorCode Standard ICU error code. Its input value must
234	* pass the U_SUCCESS() test, or else the function returns
235	* immediately. Check for U_FAILURE() on output or use with
236	* function chaining. (See User Guide for details.)
237	* @return the requested UNormalizer2, if successful
238	* @stable ICU 4.4
239	*/
240	U_CAPI const UNormalizer2 * U_EXPORT2
241	unorm2_getInstance(const char *packageName,
242	const char *name,
243	UNormalization2Mode mode,
244	UErrorCode *pErrorCode);
245
246	/**
247	* Constructs a filtered normalizer wrapping any UNormalizer2 instance
248	* and a filter set.
249	* Both are aliased and must not be modified or deleted while this object
250	* is used.
251	* The filter set should be frozen; otherwise the performance will suffer greatly.
252	* @param norm2 wrapped UNormalizer2 instance
253	* @param filterSet USet which determines the characters to be normalized
254	* @param pErrorCode Standard ICU error code. Its input value must
255	* pass the U_SUCCESS() test, or else the function returns
256	* immediately. Check for U_FAILURE() on output or use with
257	* function chaining. (See User Guide for details.)
258	* @return the requested UNormalizer2, if successful
259	* @stable ICU 4.4
260	*/
261	U_CAPI UNormalizer2 * U_EXPORT2
262	unorm2_openFiltered(const UNormalizer2 norm2, const* USet filterSet, UErrorCode pErrorCode);
263
264	/**
265	* Closes a UNormalizer2 instance from unorm2_openFiltered().
266	* Do not close instances from unorm2_getInstance()!
267	* @param norm2 UNormalizer2 instance to be closed
268	* @stable ICU 4.4
269	*/
270	U_CAPI void U_EXPORT2
271	unorm2_close(UNormalizer2 *norm2);
272
273	#if U_SHOW_CPLUSPLUS_API
274
275	U_NAMESPACE_BEGIN
276
277	/**
278	* \class LocalUNormalizer2Pointer
279	* "Smart pointer" class, closes a UNormalizer2 via unorm2_close().
280	* For most methods see the LocalPointerBase base class.
281	*
282	* @see LocalPointerBase
283	* @see LocalPointer
284	* @stable ICU 4.4
285	*/
286	U_DEFINE_LOCAL_OPEN_POINTER(LocalUNormalizer2Pointer, UNormalizer2, unorm2_close);
287
288	U_NAMESPACE_END
289
290	#endif
291
292	/**
293	* Writes the normalized form of the source string to the destination string
294	* (replacing its contents) and returns the length of the destination string.
295	* The source and destination strings must be different buffers.
296	* @param norm2 UNormalizer2 instance
297	* @param src source string
298	* @param length length of the source string, or -1 if NUL-terminated
299	* @param dest destination string; its contents is replaced with normalized src
300	* @param capacity number of UChars that can be written to dest
301	* @param pErrorCode Standard ICU error code. Its input value must
302	* pass the U_SUCCESS() test, or else the function returns
303	* immediately. Check for U_FAILURE() on output or use with
304	* function chaining. (See User Guide for details.)
305	* @return dest
306	* @stable ICU 4.4
307	*/
308	U_CAPI int32_t U_EXPORT2
309	unorm2_normalize(const UNormalizer2 *norm2,
310	const UChar *src, int32_t length,
311	UChar *dest, int32_t capacity,
312	UErrorCode *pErrorCode);
313	/**
314	* Appends the normalized form of the second string to the first string
315	* (merging them at the boundary) and returns the length of the first string.
316	* The result is normalized if the first string was normalized.
317	* The first and second strings must be different buffers.
318	* @param norm2 UNormalizer2 instance
319	* @param first string, should be normalized
320	* @param firstLength length of the first string, or -1 if NUL-terminated
321	* @param firstCapacity number of UChars that can be written to first
322	* @param second string, will be normalized
323	* @param secondLength length of the source string, or -1 if NUL-terminated
324	* @param pErrorCode Standard ICU error code. Its input value must
325	* pass the U_SUCCESS() test, or else the function returns
326	* immediately. Check for U_FAILURE() on output or use with
327	* function chaining. (See User Guide for details.)
328	* @return first
329	* @stable ICU 4.4
330	*/
331	U_CAPI int32_t U_EXPORT2
332	unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
333	UChar *first, int32_t firstLength, int32_t firstCapacity,
334	const UChar *second, int32_t secondLength,
335	UErrorCode *pErrorCode);
336	/**
337	* Appends the second string to the first string
338	* (merging them at the boundary) and returns the length of the first string.
339	* The result is normalized if both the strings were normalized.
340	* The first and second strings must be different buffers.
341	* @param norm2 UNormalizer2 instance
342	* @param first string, should be normalized
343	* @param firstLength length of the first string, or -1 if NUL-terminated
344	* @param firstCapacity number of UChars that can be written to first
345	* @param second string, should be normalized
346	* @param secondLength length of the source string, or -1 if NUL-terminated
347	* @param pErrorCode Standard ICU error code. Its input value must
348	* pass the U_SUCCESS() test, or else the function returns
349	* immediately. Check for U_FAILURE() on output or use with
350	* function chaining. (See User Guide for details.)
351	* @return first
352	* @stable ICU 4.4
353	*/
354	U_CAPI int32_t U_EXPORT2
355	unorm2_append(const UNormalizer2 *norm2,
356	UChar *first, int32_t firstLength, int32_t firstCapacity,
357	const UChar *second, int32_t secondLength,
358	UErrorCode *pErrorCode);
359
360	/**
361	* Gets the decomposition mapping of c.
362	* Roughly equivalent to normalizing the String form of c
363	* on a UNORM2_DECOMPOSE UNormalizer2 instance, but much faster, and except that this function
364	* returns a negative value and does not write a string
365	* if c does not have a decomposition mapping in this instance's data.
366	* This function is independent of the mode of the UNormalizer2.
367	* @param norm2 UNormalizer2 instance
368	* @param c code point
369	* @param decomposition String buffer which will be set to c's
370	* decomposition mapping, if there is one.
371	* @param capacity number of UChars that can be written to decomposition
372	* @param pErrorCode Standard ICU error code. Its input value must
373	* pass the U_SUCCESS() test, or else the function returns
374	* immediately. Check for U_FAILURE() on output or use with
375	* function chaining. (See User Guide for details.)
376	* @return the non-negative length of c's decomposition, if there is one; otherwise a negative value
377	* @stable ICU 4.6
378	*/
379	U_CAPI int32_t U_EXPORT2
380	unorm2_getDecomposition(const UNormalizer2 *norm2,
381	UChar32 c, UChar *decomposition, int32_t capacity,
382	UErrorCode *pErrorCode);
383
384	/**
385	* Gets the raw decomposition mapping of c.
386	*
387	* This is similar to the unorm2_getDecomposition() function but returns the
388	* raw decomposition mapping as specified in UnicodeData.txt or
389	* (for custom data) in the mapping files processed by the gennorm2 tool.
390	* By contrast, unorm2_getDecomposition() returns the processed,
391	* recursively-decomposed version of this mapping.
392	*
393	* When used on a standard NFKC Normalizer2 instance,
394	* unorm2_getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
395	*
396	* When used on a standard NFC Normalizer2 instance,
397	* it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
398	* in this case, the result contains either one or two code points (=1..4 UChars).
399	*
400	* This function is independent of the mode of the UNormalizer2.
401	* @param norm2 UNormalizer2 instance
402	* @param c code point
403	* @param decomposition String buffer which will be set to c's
404	* raw decomposition mapping, if there is one.
405	* @param capacity number of UChars that can be written to decomposition
406	* @param pErrorCode Standard ICU error code. Its input value must
407	* pass the U_SUCCESS() test, or else the function returns
408	* immediately. Check for U_FAILURE() on output or use with
409	* function chaining. (See User Guide for details.)
410	* @return the non-negative length of c's raw decomposition, if there is one; otherwise a negative value
411	* @stable ICU 49
412	*/
413	U_CAPI int32_t U_EXPORT2
414	unorm2_getRawDecomposition(const UNormalizer2 *norm2,
415	UChar32 c, UChar *decomposition, int32_t capacity,
416	UErrorCode *pErrorCode);
417
418	/**
419	* Performs pairwise composition of a & b and returns the composite if there is one.
420	*
421	* Returns a composite code point c only if c has a two-way mapping to a+b.
422	* In standard Unicode normalization, this means that
423	* c has a canonical decomposition to a+b
424	* and c does not have the Full_Composition_Exclusion property.
425	*
426	* This function is independent of the mode of the UNormalizer2.
427	* @param norm2 UNormalizer2 instance
428	* @param a A (normalization starter) code point.
429	* @param b Another code point.
430	* @return The non-negative composite code point if there is one; otherwise a negative value.
431	* @stable ICU 49
432	*/
433	U_CAPI UChar32 U_EXPORT2
434	unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b);
435
436	/**
437	* Gets the combining class of c.
438	* The default implementation returns 0
439	* but all standard implementations return the Unicode Canonical_Combining_Class value.
440	* @param norm2 UNormalizer2 instance
441	* @param c code point
442	* @return c's combining class
443	* @stable ICU 49
444	*/
445	U_CAPI uint8_t U_EXPORT2
446	unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c);
447
448	/**
449	* Tests if the string is normalized.
450	* Internally, in cases where the quickCheck() method would return "maybe"
451	* (which is only possible for the two COMPOSE modes) this method
452	* resolves to "yes" or "no" to provide a definitive result,
453	* at the cost of doing more work in those cases.
454	* @param norm2 UNormalizer2 instance
455	* @param s input string
456	* @param length length of the string, or -1 if NUL-terminated
457	* @param pErrorCode Standard ICU error code. Its input value must
458	* pass the U_SUCCESS() test, or else the function returns
459	* immediately. Check for U_FAILURE() on output or use with
460	* function chaining. (See User Guide for details.)
461	* @return true if s is normalized
462	* @stable ICU 4.4
463	*/
464	U_CAPI UBool U_EXPORT2
465	unorm2_isNormalized(const UNormalizer2 *norm2,
466	const UChar *s, int32_t length,
467	UErrorCode *pErrorCode);
468
469	/**
470	* Tests if the string is normalized.
471	* For the two COMPOSE modes, the result could be "maybe" in cases that
472	* would take a little more work to resolve definitively.
473	* Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
474	* combination of quick check + normalization, to avoid
475	* re-checking the "yes" prefix.
476	* @param norm2 UNormalizer2 instance
477	* @param s input string
478	* @param length length of the string, or -1 if NUL-terminated
479	* @param pErrorCode Standard ICU error code. Its input value must
480	* pass the U_SUCCESS() test, or else the function returns
481	* immediately. Check for U_FAILURE() on output or use with
482	* function chaining. (See User Guide for details.)
483	* @return UNormalizationCheckResult
484	* @stable ICU 4.4
485	*/
486	U_CAPI UNormalizationCheckResult U_EXPORT2
487	unorm2_quickCheck(const UNormalizer2 *norm2,
488	const UChar *s, int32_t length,
489	UErrorCode *pErrorCode);
490
491	/**
492	* Returns the end of the normalized substring of the input string.
493	* In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
494	* the substring <code>UnicodeString(s, 0, end)</code>
495	* will pass the quick check with a "yes" result.
496	*
497	* The returned end index is usually one or more characters before the
498	* "no" or "maybe" character: The end index is at a normalization boundary.
499	* (See the class documentation for more about normalization boundaries.)
500	*
501	* When the goal is a normalized string and most input strings are expected
502	* to be normalized already, then call this method,
503	* and if it returns a prefix shorter than the input string,
504	* copy that prefix and use normalizeSecondAndAppend() for the remainder.
505	* @param norm2 UNormalizer2 instance
506	* @param s input string
507	* @param length length of the string, or -1 if NUL-terminated
508	* @param pErrorCode Standard ICU error code. Its input value must
509	* pass the U_SUCCESS() test, or else the function returns
510	* immediately. Check for U_FAILURE() on output or use with
511	* function chaining. (See User Guide for details.)
512	* @return "yes" span end index
513	* @stable ICU 4.4
514	*/
515	U_CAPI int32_t U_EXPORT2
516	unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
517	const UChar *s, int32_t length,
518	UErrorCode *pErrorCode);
519
520	/**
521	* Tests if the character always has a normalization boundary before it,
522	* regardless of context.
523	* For details see the Normalizer2 base class documentation.
524	* @param norm2 UNormalizer2 instance
525	* @param c character to test
526	* @return true if c has a normalization boundary before it
527	* @stable ICU 4.4
528	*/
529	U_CAPI UBool U_EXPORT2
530	unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c);
531
532	/**
533	* Tests if the character always has a normalization boundary after it,
534	* regardless of context.
535	* For details see the Normalizer2 base class documentation.
536	* @param norm2 UNormalizer2 instance
537	* @param c character to test
538	* @return true if c has a normalization boundary after it
539	* @stable ICU 4.4
540	*/
541	U_CAPI UBool U_EXPORT2
542	unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c);
543
544	/**
545	* Tests if the character is normalization-inert.
546	* For details see the Normalizer2 base class documentation.
547	* @param norm2 UNormalizer2 instance
548	* @param c character to test
549	* @return true if c is normalization-inert
550	* @stable ICU 4.4
551	*/
552	U_CAPI UBool U_EXPORT2
553	unorm2_isInert(const UNormalizer2 *norm2, UChar32 c);
554
555	/**
556	* Compares two strings for canonical equivalence.
557	* Further options include case-insensitive comparison and
558	* code point order (as opposed to code unit order).
559	*
560	* Canonical equivalence between two strings is defined as their normalized
561	* forms (NFD or NFC) being identical.
562	* This function compares strings incrementally instead of normalizing
563	* (and optionally case-folding) both strings entirely,
564	* improving performance significantly.
565	*
566	* Bulk normalization is only necessary if the strings do not fulfill the FCD
567	* conditions. Only in this case, and only if the strings are relatively long,
568	* is memory allocated temporarily.
569	* For FCD strings and short non-FCD strings there is no memory allocation.
570	*
571	* Semantically, this is equivalent to
572	* strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2))))
573	* where code point order and foldCase are all optional.
574	*
575	* UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
576	* the case folding must be performed first, then the normalization.
577	*
578	* @param s1 First source string.
579	* @param length1 Length of first source string, or -1 if NUL-terminated.
580	*
581	* @param s2 Second source string.
582	* @param length2 Length of second source string, or -1 if NUL-terminated.
583	*
584	* @param options A bit set of options:
585	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
586	* Case-sensitive comparison in code unit order, and the input strings
587	* are quick-checked for FCD.
588	*
589	* - UNORM_INPUT_IS_FCD
590	* Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
591	* If not set, the function will quickCheck for FCD
592	* and normalize if necessary.
593	*
594	* - U_COMPARE_CODE_POINT_ORDER
595	* Set to choose code point order instead of code unit order
596	* (see u_strCompare for details).
597	*
598	* - U_COMPARE_IGNORE_CASE
599	* Set to compare strings case-insensitively using case folding,
600	* instead of case-sensitively.
601	* If set, then the following case folding options are used.
602	*
603	* - Options as used with case-insensitive comparisons, currently:
604	*
605	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
606	* (see u_strCaseCompare for details)
607	*
608	* - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
609	*
610	* @param pErrorCode ICU error code in/out parameter.
611	* Must fulfill U_SUCCESS before the function call.
612	* @return <0 or 0 or >0 as usual for string comparisons
613	*
614	* @see unorm_normalize
615	* @see UNORM_FCD
616	* @see u_strCompare
617	* @see u_strCaseCompare
618	*
619	* @stable ICU 2.2
620	*/
621	U_CAPI int32_t U_EXPORT2
622	unorm_compare(const UChar *s1, int32_t length1,
623	const UChar *s2, int32_t length2,
624	uint32_t options,
625	UErrorCode *pErrorCode);
626
627	#endif /* !UCONFIG_NO_NORMALIZATION */
628	#endif /* __UNORM2_H__ */
629

source code of include/unicode/unorm2.h