1 | /* FriBidi |
2 | * fribidi-char-sets-utf8.c - UTF-8 character set conversion routines |
3 | * |
4 | * Authors: |
5 | * Behdad Esfahbod, 2001, 2002, 2004 |
6 | * Dov Grobgeld, 1999, 2000 |
7 | * |
8 | * Copyright (C) 2004 Sharif FarsiWeb, Inc |
9 | * Copyright (C) 2001,2002 Behdad Esfahbod |
10 | * Copyright (C) 1999,2000 Dov Grobgeld |
11 | * |
12 | * This library is free software; you can redistribute it and/or |
13 | * modify it under the terms of the GNU Lesser General Public |
14 | * License as published by the Free Software Foundation; either |
15 | * version 2.1 of the License, or (at your option) any later version. |
16 | * |
17 | * This library is distributed in the hope that it will be useful, |
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
20 | * Lesser General Public License for more details. |
21 | * |
22 | * You should have received a copy of the GNU Lesser General Public License |
23 | * along with this library, in a file named COPYING; if not, write to the |
24 | * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
25 | * Boston, MA 02110-1301, USA |
26 | * |
27 | * For licensing issues, contact <fribidi.license@gmail.com>. |
28 | */ |
29 | |
30 | #include <common.h> |
31 | |
32 | #include <fribidi-char-sets-utf8.h> |
33 | |
34 | #include <fribidi-unicode.h> |
35 | |
36 | FriBidiStrIndex |
37 | fribidi_utf8_to_unicode ( |
38 | /* input */ |
39 | const char *ss, |
40 | FriBidiStrIndex len, |
41 | /* output */ |
42 | FriBidiChar *us |
43 | ) |
44 | { |
45 | FriBidiStrIndex length; |
46 | const unsigned char *s = (unsigned const char *) ss; |
47 | const unsigned char *t = s; |
48 | |
49 | length = 0; |
50 | while ((FriBidiStrIndex) (s - t) < len) |
51 | { |
52 | register unsigned char ch = *s; |
53 | if (ch <= 0x7f) /* one byte */ |
54 | { |
55 | *us++ = *s++; |
56 | } |
57 | else if (ch <= 0xdf) /* 2 byte */ |
58 | { |
59 | *us++ = ((*s & 0x1f) << 6) + (*(s + 1) & 0x3f); |
60 | s += 2; |
61 | } |
62 | else if (ch <= 0xef) /* 3 byte */ |
63 | { |
64 | *us++ = |
65 | ((int) (*s & 0x0f) << 12) + |
66 | ((*(s + 1) & 0x3f) << 6) + (*(s + 2) & 0x3f); |
67 | s += 3; |
68 | } |
69 | else /* 4 byte */ |
70 | { |
71 | *us++ = |
72 | ((int) (*s & 0x07) << 18) + |
73 | ((*(s + 1) & 0x3f) << 12) + |
74 | ((*(s + 2) & 0x3f) << 6) + |
75 | ((*(s + 3) & 0x3f) << 0); |
76 | s += 4; |
77 | } |
78 | length++; |
79 | } |
80 | return (length); |
81 | } |
82 | |
83 | FriBidiStrIndex |
84 | fribidi_unicode_to_utf8 ( |
85 | /* input */ |
86 | const FriBidiChar *us, |
87 | FriBidiStrIndex len, |
88 | /* output */ |
89 | char *ss |
90 | ) |
91 | { |
92 | FriBidiStrIndex i; |
93 | unsigned char *s = (unsigned char *) ss; |
94 | unsigned char *t = s; |
95 | |
96 | for (i = 0; i < len; i++) |
97 | { |
98 | FriBidiChar mychar = us[i]; |
99 | if (mychar <= 0x7F) |
100 | { /* 7 sig bits */ |
101 | *t++ = mychar; |
102 | } |
103 | else if (mychar <= 0x7FF) |
104 | { /* 11 sig bits */ |
105 | *t++ = 0xC0 | (unsigned char) (mychar >> 6); /* upper 5 bits */ |
106 | *t++ = 0x80 | (unsigned char) (mychar & 0x3F); /* lower 6 bits */ |
107 | } |
108 | else if (mychar <= 0xFFFF) |
109 | { /* 16 sig bits */ |
110 | *t++ = 0xE0 | (unsigned char) (mychar >> 12); /* upper 4 bits */ |
111 | *t++ = 0x80 | (unsigned char) ((mychar >> 6) & 0x3F); /* next 6 bits */ |
112 | *t++ = 0x80 | (unsigned char) (mychar & 0x3F); /* lowest 6 bits */ |
113 | } |
114 | else if (mychar < FRIBIDI_UNICODE_CHARS) |
115 | { /* 21 sig bits */ |
116 | *t++ = 0xF0 | (unsigned char) ((mychar >> 18) & 0x07); /* upper 3 bits */ |
117 | *t++ = 0x80 | (unsigned char) ((mychar >> 12) & 0x3F); /* next 6 bits */ |
118 | *t++ = 0x80 | (unsigned char) ((mychar >> 6) & 0x3F); /* next 6 bits */ |
119 | *t++ = 0x80 | (unsigned char) (mychar & 0x3F); /* lowest 6 bits */ |
120 | } |
121 | } |
122 | *t = 0; |
123 | |
124 | return (t - s); |
125 | } |
126 | |
127 | /* Editor directions: |
128 | * vim:textwidth=78:tabstop=8:shiftwidth=2:autoindent:cindent |
129 | */ |
130 | |