1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_chinese_cns11643_3;
62 Lisp_Object Vcharset_chinese_cns11643_4;
63 Lisp_Object Vcharset_chinese_cns11643_5;
64 Lisp_Object Vcharset_chinese_cns11643_6;
65 Lisp_Object Vcharset_chinese_cns11643_7;
66 Lisp_Object Vcharset_ucs_bmp;
67 Lisp_Object Vcharset_latin_viscii_lower;
68 Lisp_Object Vcharset_latin_viscii_upper;
70 Lisp_Object Vcharset_chinese_big5_1;
71 Lisp_Object Vcharset_chinese_big5_2;
73 #ifdef ENABLE_COMPOSITE_CHARS
74 Lisp_Object Vcharset_composite;
76 /* Hash tables for composite chars. One maps string representing
77 composed chars to their equivalent chars; one goes the
79 Lisp_Object Vcomposite_char_char2string_hash_table;
80 Lisp_Object Vcomposite_char_string2char_hash_table;
82 static int composite_char_row_next;
83 static int composite_char_col_next;
85 #endif /* ENABLE_COMPOSITE_CHARS */
87 /* Table of charsets indexed by leading byte. */
88 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
90 /* Table of charsets indexed by type/final-byte/direction. */
92 Lisp_Object charset_by_attributes[4][128];
94 Lisp_Object charset_by_attributes[4][128][2];
98 /* Table of number of bytes in the string representation of a character
99 indexed by the first byte of that representation.
101 rep_bytes_by_first_byte(c) is more efficient than the equivalent
102 canonical computation:
104 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
106 Bytecount rep_bytes_by_first_byte[0xA0] =
107 { /* 0x00 - 0x7f are for straight ASCII */
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
115 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
116 /* 0x80 - 0x8f are for Dimension-1 official charsets */
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
120 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
122 /* 0x90 - 0x9d are for Dimension-2 official charsets */
123 /* 0x9e is for Dimension-1 private charsets */
124 /* 0x9f is for Dimension-2 private charsets */
125 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
130 Emchar_to_byte_table*
131 make_byte_from_character_table ()
133 Emchar_to_byte_table* table
134 = (Emchar_to_byte_table*) xmalloc (sizeof (Emchar_to_byte_table));
141 put_byte_from_character_table (Emchar ch, unsigned char val,
142 Emchar_to_byte_table* table)
144 if (table->base == NULL)
146 table->base = xmalloc (128);
147 table->offset = ch - (ch % 128);
149 table->base[ch - table->offset] = val;
153 int i = ch - table->offset;
157 size_t new_size = table->size - i;
160 new_size += 128 - (new_size % 128);
161 table->base = xrealloc (table->base, new_size);
162 memmove (table->base + (new_size - table->size), table->base,
164 for (j = 0; j < (new_size - table->size); j++)
166 table->offset -= (new_size - table->size);
167 table->base[ch - table->offset] = val;
168 table->size = new_size;
170 else if (i >= table->size)
172 size_t new_size = i + 1;
175 new_size += 128 - (new_size % 128);
176 table->base = xrealloc (table->base, new_size);
177 for (j = table->size; j < new_size; j++)
179 table->base[i] = val;
180 table->size = new_size;
184 table->base[i] = val;
190 get_byte_from_character_table (Emchar ch, Emchar_to_byte_table* table)
192 size_t i = ch - table->offset;
194 return table->base[i];
199 #define CHAR96(ft,b) (MIN_CHAR_96 + (ft - '0') * 96 + (b & 0x7f) - 32)
201 Emchar latin_jisx0201_to_ucs[94] =
203 0x0021 /* 0x21 EXCLAMATION MARK */,
204 0x0022 /* 0x22 QUOTATION MARK */,
205 0x0023 /* 0x23 NUMBER SIGN */,
206 0x0024 /* 0x24 DOLLAR SIGN */,
207 0x0025 /* 0x25 PERCENT SIGN */,
208 0x0026 /* 0x26 AMPERSAND */,
209 0x0027 /* 0x27 APOSTROPHE */,
210 0x0028 /* 0x28 LEFT PARENTHESIS */,
211 0x0029 /* 0x29 RIGHT PARENTHESIS */,
212 0x002A /* 0x2A ASTERISK */,
213 0x002B /* 0x2B PLUS SIGN */,
214 0x002C /* 0x2C COMMA */,
215 0x002D /* 0x2D HYPHEN-MINUS */,
216 0x002E /* 0x2E FULL STOP */,
217 0x002F /* 0x2F SOLIDUS */,
218 0x0030 /* 0x30 DIGIT ZERO */,
219 0x0031 /* 0x31 DIGIT ONE */,
220 0x0032 /* 0x32 DIGIT TWO */,
221 0x0033 /* 0x33 DIGIT THREE */,
222 0x0034 /* 0x34 DIGIT FOUR */,
223 0x0035 /* 0x35 DIGIT FIVE */,
224 0x0036 /* 0x36 DIGIT SIX */,
225 0x0037 /* 0x37 DIGIT SEVEN */,
226 0x0038 /* 0x38 DIGIT EIGHT */,
227 0x0039 /* 0x39 DIGIT NINE */,
228 0x003A /* 0x3A COLON */,
229 0x003B /* 0x3B SEMICOLON */,
230 0x003C /* 0x3C LESS-THAN SIGN */,
231 0x003D /* 0x3D EQUALS SIGN */,
232 0x003E /* 0x3E GREATER-THAN SIGN */,
233 0x003F /* 0x3F QUESTION MARK */,
234 0x0040 /* 0x40 COMMERCIAL AT */,
235 0x0041 /* 0x41 LATIN CAPITAL LETTER A */,
236 0x0042 /* 0x42 LATIN CAPITAL LETTER B */,
237 0x0043 /* 0x43 LATIN CAPITAL LETTER C */,
238 0x0044 /* 0x44 LATIN CAPITAL LETTER D */,
239 0x0045 /* 0x45 LATIN CAPITAL LETTER E */,
240 0x0046 /* 0x46 LATIN CAPITAL LETTER F */,
241 0x0047 /* 0x47 LATIN CAPITAL LETTER G */,
242 0x0048 /* 0x48 LATIN CAPITAL LETTER H */,
243 0x0049 /* 0x49 LATIN CAPITAL LETTER I */,
244 0x004A /* 0x4A LATIN CAPITAL LETTER J */,
245 0x004B /* 0x4B LATIN CAPITAL LETTER K */,
246 0x004C /* 0x4C LATIN CAPITAL LETTER L */,
247 0x004D /* 0x4D LATIN CAPITAL LETTER M */,
248 0x004E /* 0x4E LATIN CAPITAL LETTER N */,
249 0x004F /* 0x4F LATIN CAPITAL LETTER O */,
250 0x0050 /* 0x50 LATIN CAPITAL LETTER P */,
251 0x0051 /* 0x51 LATIN CAPITAL LETTER Q */,
252 0x0052 /* 0x52 LATIN CAPITAL LETTER R */,
253 0x0053 /* 0x53 LATIN CAPITAL LETTER S */,
254 0x0054 /* 0x54 LATIN CAPITAL LETTER T */,
255 0x0055 /* 0x55 LATIN CAPITAL LETTER U */,
256 0x0056 /* 0x56 LATIN CAPITAL LETTER V */,
257 0x0057 /* 0x57 LATIN CAPITAL LETTER W */,
258 0x0058 /* 0x58 LATIN CAPITAL LETTER X */,
259 0x0059 /* 0x59 LATIN CAPITAL LETTER Y */,
260 0x005A /* 0x5A LATIN CAPITAL LETTER Z */,
261 0x005B /* 0x5B LEFT SQUARE BRACKET */,
262 0x00A5 /* 0x5C YEN SIGN */,
263 0x005D /* 0x5D RIGHT SQUARE BRACKET */,
264 0x005E /* 0x5E CIRCUMFLEX ACCENT */,
265 0x005F /* 0x5F LOW LINE */,
266 0x0060 /* 0x60 GRAVE ACCENT */,
267 0x0061 /* 0x61 LATIN SMALL LETTER A */,
268 0x0062 /* 0x62 LATIN SMALL LETTER B */,
269 0x0063 /* 0x63 LATIN SMALL LETTER C */,
270 0x0064 /* 0x64 LATIN SMALL LETTER D */,
271 0x0065 /* 0x65 LATIN SMALL LETTER E */,
272 0x0066 /* 0x66 LATIN SMALL LETTER F */,
273 0x0067 /* 0x67 LATIN SMALL LETTER G */,
274 0x0068 /* 0x68 LATIN SMALL LETTER H */,
275 0x0069 /* 0x69 LATIN SMALL LETTER I */,
276 0x006A /* 0x6A LATIN SMALL LETTER J */,
277 0x006B /* 0x6B LATIN SMALL LETTER K */,
278 0x006C /* 0x6C LATIN SMALL LETTER L */,
279 0x006D /* 0x6D LATIN SMALL LETTER M */,
280 0x006E /* 0x6E LATIN SMALL LETTER N */,
281 0x006F /* 0x6F LATIN SMALL LETTER O */,
282 0x0070 /* 0x70 LATIN SMALL LETTER P */,
283 0x0071 /* 0x71 LATIN SMALL LETTER Q */,
284 0x0072 /* 0x72 LATIN SMALL LETTER R */,
285 0x0073 /* 0x73 LATIN SMALL LETTER S */,
286 0x0074 /* 0x74 LATIN SMALL LETTER T */,
287 0x0075 /* 0x75 LATIN SMALL LETTER U */,
288 0x0076 /* 0x76 LATIN SMALL LETTER V */,
289 0x0077 /* 0x77 LATIN SMALL LETTER W */,
290 0x0078 /* 0x78 LATIN SMALL LETTER X */,
291 0x0079 /* 0x79 LATIN SMALL LETTER Y */,
292 0x007A /* 0x7A LATIN SMALL LETTER Z */,
293 0x007B /* 0x7B LEFT CURLY BRACKET */,
294 0x007C /* 0x7C VERTICAL LINE */,
295 0x007D /* 0x7D RIGHT CURLY BRACKET */,
296 0x203E /* 0x7E OVERLINE */
299 Emchar latin_iso8859_2_to_ucs[96] =
301 0x00A0 /* 0xA0 NO-BREAK SPACE */,
302 0x0104 /* 0xA1 LATIN CAPITAL LETTER A WITH OGONEK */,
303 0x02D8 /* 0xA2 BREVE */,
304 0x0141 /* 0xA3 LATIN CAPITAL LETTER L WITH STROKE */,
305 0x00A4 /* 0xA4 CURRENCY SIGN */,
306 0x013D /* 0xA5 LATIN CAPITAL LETTER L WITH CARON */,
307 0x015A /* 0xA6 LATIN CAPITAL LETTER S WITH ACUTE */,
308 0x00A7 /* 0xA7 SECTION SIGN */,
309 0x00A8 /* 0xA8 DIAERESIS */,
310 0x0160 /* 0xA9 LATIN CAPITAL LETTER S WITH CARON */,
311 0x015E /* 0xAA LATIN CAPITAL LETTER S WITH CEDILLA */,
312 0x0164 /* 0xAB LATIN CAPITAL LETTER T WITH CARON */,
313 0x0179 /* 0xAC LATIN CAPITAL LETTER Z WITH ACUTE */,
314 0x00AD /* 0xAD SOFT HYPHEN */,
315 0x017D /* 0xAE LATIN CAPITAL LETTER Z WITH CARON */,
316 0x017B /* 0xAF LATIN CAPITAL LETTER Z WITH DOT ABOVE */,
317 0x00B0 /* 0xB0 DEGREE SIGN */,
318 0x0105 /* 0xB1 LATIN SMALL LETTER A WITH OGONEK */,
319 0x02DB /* 0xB2 OGONEK */,
320 0x0142 /* 0xB3 LATIN SMALL LETTER L WITH STROKE */,
321 0x00B4 /* 0xB4 ACUTE ACCENT */,
322 0x013E /* 0xB5 LATIN SMALL LETTER L WITH CARON */,
323 0x015B /* 0xB6 LATIN SMALL LETTER S WITH ACUTE */,
324 0x02C7 /* 0xB7 CARON */,
325 0x00B8 /* 0xB8 CEDILLA */,
326 0x0161 /* 0xB9 LATIN SMALL LETTER S WITH CARON */,
327 0x015F /* 0xBA LATIN SMALL LETTER S WITH CEDILLA */,
328 0x0165 /* 0xBB LATIN SMALL LETTER T WITH CARON */,
329 0x017A /* 0xBC LATIN SMALL LETTER Z WITH ACUTE */,
330 0x02DD /* 0xBD DOUBLE ACUTE ACCENT */,
331 0x017E /* 0xBE LATIN SMALL LETTER Z WITH CARON */,
332 0x017C /* 0xBF LATIN SMALL LETTER Z WITH DOT ABOVE */,
333 0x0154 /* 0xC0 LATIN CAPITAL LETTER R WITH ACUTE */,
334 0x00C1 /* 0xC1 LATIN CAPITAL LETTER A WITH ACUTE */,
335 0x00C2 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
336 0x0102 /* 0xC3 LATIN CAPITAL LETTER A WITH BREVE */,
337 0x00C4 /* 0xC4 LATIN CAPITAL LETTER A WITH DIAERESIS */,
338 0x0139 /* 0xC5 LATIN CAPITAL LETTER L WITH ACUTE */,
339 0x0106 /* 0xC6 LATIN CAPITAL LETTER C WITH ACUTE */,
340 0x00C7 /* 0xC7 LATIN CAPITAL LETTER C WITH CEDILLA */,
341 0x010C /* 0xC8 LATIN CAPITAL LETTER C WITH CARON */,
342 0x00C9 /* 0xC9 LATIN CAPITAL LETTER E WITH ACUTE */,
343 0x0118 /* 0xCA LATIN CAPITAL LETTER E WITH OGONEK */,
344 0x00CB /* 0xCB LATIN CAPITAL LETTER E WITH DIAERESIS */,
345 0x011A /* 0xCC LATIN CAPITAL LETTER E WITH CARON */,
346 0x00CD /* 0xCD LATIN CAPITAL LETTER I WITH ACUTE */,
347 0x00CE /* 0xCE LATIN CAPITAL LETTER I WITH CIRCUMFLEX */,
348 0x010E /* 0xCF LATIN CAPITAL LETTER D WITH CARON */,
349 0x0110 /* 0xD0 LATIN CAPITAL LETTER D WITH STROKE */,
350 0x0143 /* 0xD1 LATIN CAPITAL LETTER N WITH ACUTE */,
351 0x0147 /* 0xD2 LATIN CAPITAL LETTER N WITH CARON */,
352 0x00D3 /* 0xD3 LATIN CAPITAL LETTER O WITH ACUTE */,
353 0x00D4 /* 0xD4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
354 0x0150 /* 0xD5 LATIN CAPITAL LETTER O WITH DOUBLE ACUTE */,
355 0x00D6 /* 0xD6 LATIN CAPITAL LETTER O WITH DIAERESIS */,
356 0x00D7 /* 0xD7 MULTIPLICATION SIGN */,
357 0x0158 /* 0xD8 LATIN CAPITAL LETTER R WITH CARON */,
358 0x016E /* 0xD9 LATIN CAPITAL LETTER U WITH RING ABOVE */,
359 0x00DA /* 0xDA LATIN CAPITAL LETTER U WITH ACUTE */,
360 0x0170 /* 0xDB LATIN CAPITAL LETTER U WITH DOUBLE ACUTE */,
361 0x00DC /* 0xDC LATIN CAPITAL LETTER U WITH DIAERESIS */,
362 0x00DD /* 0xDD LATIN CAPITAL LETTER Y WITH ACUTE */,
363 0x0162 /* 0xDE LATIN CAPITAL LETTER T WITH CEDILLA */,
364 0x00DF /* 0xDF LATIN SMALL LETTER SHARP S */,
365 0x0155 /* 0xE0 LATIN SMALL LETTER R WITH ACUTE */,
366 0x00E1 /* 0xE1 LATIN SMALL LETTER A WITH ACUTE */,
367 0x00E2 /* 0xE2 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
368 0x0103 /* 0xE3 LATIN SMALL LETTER A WITH BREVE */,
369 0x00E4 /* 0xE4 LATIN SMALL LETTER A WITH DIAERESIS */,
370 0x013A /* 0xE5 LATIN SMALL LETTER L WITH ACUTE */,
371 0x0107 /* 0xE6 LATIN SMALL LETTER C WITH ACUTE */,
372 0x00E7 /* 0xE7 LATIN SMALL LETTER C WITH CEDILLA */,
373 0x010D /* 0xE8 LATIN SMALL LETTER C WITH CARON */,
374 0x00E9 /* 0xE9 LATIN SMALL LETTER E WITH ACUTE */,
375 0x0119 /* 0xEA LATIN SMALL LETTER E WITH OGONEK */,
376 0x00EB /* 0xEB LATIN SMALL LETTER E WITH DIAERESIS */,
377 0x011B /* 0xEC LATIN SMALL LETTER E WITH CARON */,
378 0x00ED /* 0xED LATIN SMALL LETTER I WITH ACUTE */,
379 0x00EE /* 0xEE LATIN SMALL LETTER I WITH CIRCUMFLEX */,
380 0x010F /* 0xEF LATIN SMALL LETTER D WITH CARON */,
381 0x0111 /* 0xF0 LATIN SMALL LETTER D WITH STROKE */,
382 0x0144 /* 0xF1 LATIN SMALL LETTER N WITH ACUTE */,
383 0x0148 /* 0xF2 LATIN SMALL LETTER N WITH CARON */,
384 0x00F3 /* 0xF3 LATIN SMALL LETTER O WITH ACUTE */,
385 0x00F4 /* 0xF4 LATIN SMALL LETTER O WITH CIRCUMFLEX */,
386 0x0151 /* 0xF5 LATIN SMALL LETTER O WITH DOUBLE ACUTE */,
387 0x00F6 /* 0xF6 LATIN SMALL LETTER O WITH DIAERESIS */,
388 0x00F7 /* 0xF7 DIVISION SIGN */,
389 0x0159 /* 0xF8 LATIN SMALL LETTER R WITH CARON */,
390 0x016F /* 0xF9 LATIN SMALL LETTER U WITH RING ABOVE */,
391 0x00FA /* 0xFA LATIN SMALL LETTER U WITH ACUTE */,
392 0x0171 /* 0xFB LATIN SMALL LETTER U WITH DOUBLE ACUTE */,
393 0x00FC /* 0xFC LATIN SMALL LETTER U WITH DIAERESIS */,
394 0x00FD /* 0xFD LATIN SMALL LETTER Y WITH ACUTE */,
395 0x0163 /* 0xFE LATIN SMALL LETTER T WITH CEDILLA */,
396 0x02D9 /* 0xFF DOT ABOVE */
399 Emchar latin_iso8859_3_to_ucs[96] =
401 0x00A0 /* 0xA0 NO-BREAK SPACE */,
402 0x0126 /* 0xA1 LATIN CAPITAL LETTER H WITH STROKE */,
403 0x02D8 /* 0xA2 BREVE */,
404 0x00A3 /* 0xA3 POUND SIGN */,
405 0x00A4 /* 0xA4 CURRENCY SIGN */,
407 0x0124 /* 0xA6 LATIN CAPITAL LETTER H WITH CIRCUMFLEX */,
408 0x00A7 /* 0xA7 SECTION SIGN */,
409 0x00A8 /* 0xA8 DIAERESIS */,
410 0x0130 /* 0xA9 LATIN CAPITAL LETTER I WITH DOT ABOVE */,
411 0x015E /* 0xAA LATIN CAPITAL LETTER S WITH CEDILLA */,
412 0x011E /* 0xAB LATIN CAPITAL LETTER G WITH BREVE */,
413 0x0134 /* 0xAC LATIN CAPITAL LETTER J WITH CIRCUMFLEX */,
414 0x00AD /* 0xAD SOFT HYPHEN */,
416 0x017B /* 0xAF LATIN CAPITAL LETTER Z WITH DOT ABOVE */,
417 0x00B0 /* 0xB0 DEGREE SIGN */,
418 0x0127 /* 0xB1 LATIN SMALL LETTER H WITH STROKE */,
419 0x00B2 /* 0xB2 SUPERSCRIPT TWO */,
420 0x00B3 /* 0xB3 SUPERSCRIPT THREE */,
421 0x00B4 /* 0xB4 ACUTE ACCENT */,
422 0x00B5 /* 0xB5 MICRO SIGN */,
423 0x0125 /* 0xB6 LATIN SMALL LETTER H WITH CIRCUMFLEX */,
424 0x00B7 /* 0xB7 MIDDLE DOT */,
425 0x00B8 /* 0xB8 CEDILLA */,
426 0x0131 /* 0xB9 LATIN SMALL LETTER DOTLESS I */,
427 0x015F /* 0xBA LATIN SMALL LETTER S WITH CEDILLA */,
428 0x011F /* 0xBB LATIN SMALL LETTER G WITH BREVE */,
429 0x0135 /* 0xBC LATIN SMALL LETTER J WITH CIRCUMFLEX */,
430 0x00BD /* 0xBD VULGAR FRACTION ONE HALF */,
432 0x017C /* 0xBF LATIN SMALL LETTER Z WITH DOT ABOVE */,
433 0x00C0 /* 0xC0 LATIN CAPITAL LETTER A WITH GRAVE */,
434 0x00C1 /* 0xC1 LATIN CAPITAL LETTER A WITH ACUTE */,
435 0x00C2 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
437 0x00C4 /* 0xC4 LATIN CAPITAL LETTER A WITH DIAERESIS */,
438 0x010A /* 0xC5 LATIN CAPITAL LETTER C WITH DOT ABOVE */,
439 0x0108 /* 0xC6 LATIN CAPITAL LETTER C WITH CIRCUMFLEX */,
440 0x00C7 /* 0xC7 LATIN CAPITAL LETTER C WITH CEDILLA */,
441 0x00C8 /* 0xC8 LATIN CAPITAL LETTER E WITH GRAVE */,
442 0x00C9 /* 0xC9 LATIN CAPITAL LETTER E WITH ACUTE */,
443 0x00CA /* 0xCA LATIN CAPITAL LETTER E WITH CIRCUMFLEX */,
444 0x00CB /* 0xCB LATIN CAPITAL LETTER E WITH DIAERESIS */,
445 0x00CC /* 0xCC LATIN CAPITAL LETTER I WITH GRAVE */,
446 0x00CD /* 0xCD LATIN CAPITAL LETTER I WITH ACUTE */,
447 0x00CE /* 0xCE LATIN CAPITAL LETTER I WITH CIRCUMFLEX */,
448 0x00CF /* 0xCF LATIN CAPITAL LETTER I WITH DIAERESIS */,
450 0x00D1 /* 0xD1 LATIN CAPITAL LETTER N WITH TILDE */,
451 0x00D2 /* 0xD2 LATIN CAPITAL LETTER O WITH GRAVE */,
452 0x00D3 /* 0xD3 LATIN CAPITAL LETTER O WITH ACUTE */,
453 0x00D4 /* 0xD4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
454 0x0120 /* 0xD5 LATIN CAPITAL LETTER G WITH DOT ABOVE */,
455 0x00D6 /* 0xD6 LATIN CAPITAL LETTER O WITH DIAERESIS */,
456 0x00D7 /* 0xD7 MULTIPLICATION SIGN */,
457 0x011C /* 0xD8 LATIN CAPITAL LETTER G WITH CIRCUMFLEX */,
458 0x00D9 /* 0xD9 LATIN CAPITAL LETTER U WITH GRAVE */,
459 0x00DA /* 0xDA LATIN CAPITAL LETTER U WITH ACUTE */,
460 0x00DB /* 0xDB LATIN CAPITAL LETTER U WITH CIRCUMFLEX */,
461 0x00DC /* 0xDC LATIN CAPITAL LETTER U WITH DIAERESIS */,
462 0x016C /* 0xDD LATIN CAPITAL LETTER U WITH BREVE */,
463 0x015C /* 0xDE LATIN CAPITAL LETTER S WITH CIRCUMFLEX */,
464 0x00DF /* 0xDF LATIN SMALL LETTER SHARP S */,
465 0x00E0 /* 0xE0 LATIN SMALL LETTER A WITH GRAVE */,
466 0x00E1 /* 0xE1 LATIN SMALL LETTER A WITH ACUTE */,
467 0x00E2 /* 0xE2 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
469 0x00E4 /* 0xE4 LATIN SMALL LETTER A WITH DIAERESIS */,
470 0x010B /* 0xE5 LATIN SMALL LETTER C WITH DOT ABOVE */,
471 0x0109 /* 0xE6 LATIN SMALL LETTER C WITH CIRCUMFLEX */,
472 0x00E7 /* 0xE7 LATIN SMALL LETTER C WITH CEDILLA */,
473 0x00E8 /* 0xE8 LATIN SMALL LETTER E WITH GRAVE */,
474 0x00E9 /* 0xE9 LATIN SMALL LETTER E WITH ACUTE */,
475 0x00EA /* 0xEA LATIN SMALL LETTER E WITH CIRCUMFLEX */,
476 0x00EB /* 0xEB LATIN SMALL LETTER E WITH DIAERESIS */,
477 0x00EC /* 0xEC LATIN SMALL LETTER I WITH GRAVE */,
478 0x00ED /* 0xED LATIN SMALL LETTER I WITH ACUTE */,
479 0x00EE /* 0xEE LATIN SMALL LETTER I WITH CIRCUMFLEX */,
480 0x00EF /* 0xEF LATIN SMALL LETTER I WITH DIAERESIS */,
482 0x00F1 /* 0xF1 LATIN SMALL LETTER N WITH TILDE */,
483 0x00F2 /* 0xF2 LATIN SMALL LETTER O WITH GRAVE */,
484 0x00F3 /* 0xF3 LATIN SMALL LETTER O WITH ACUTE */,
485 0x00F4 /* 0xF4 LATIN SMALL LETTER O WITH CIRCUMFLEX */,
486 0x0121 /* 0xF5 LATIN SMALL LETTER G WITH DOT ABOVE */,
487 0x00F6 /* 0xF6 LATIN SMALL LETTER O WITH DIAERESIS */,
488 0x00F7 /* 0xF7 DIVISION SIGN */,
489 0x011D /* 0xF8 LATIN SMALL LETTER G WITH CIRCUMFLEX */,
490 0x00F9 /* 0xF9 LATIN SMALL LETTER U WITH GRAVE */,
491 0x00FA /* 0xFA LATIN SMALL LETTER U WITH ACUTE */,
492 0x00FB /* 0xFB LATIN SMALL LETTER U WITH CIRCUMFLEX */,
493 0x00FC /* 0xFC LATIN SMALL LETTER U WITH DIAERESIS */,
494 0x016D /* 0xFD LATIN SMALL LETTER U WITH BREVE */,
495 0x015D /* 0xFE LATIN SMALL LETTER S WITH CIRCUMFLEX */,
496 0x02D9 /* 0xFF DOT ABOVE */
499 Emchar latin_iso8859_4_to_ucs[96] =
501 0x00A0 /* 0xA0 NO-BREAK SPACE */,
502 0x0104 /* 0xA1 LATIN CAPITAL LETTER A WITH OGONEK */,
503 0x0138 /* 0xA2 LATIN SMALL LETTER KRA */,
504 0x0156 /* 0xA3 LATIN CAPITAL LETTER R WITH CEDILLA */,
505 0x00A4 /* 0xA4 CURRENCY SIGN */,
506 0x0128 /* 0xA5 LATIN CAPITAL LETTER I WITH TILDE */,
507 0x013B /* 0xA6 LATIN CAPITAL LETTER L WITH CEDILLA */,
508 0x00A7 /* 0xA7 SECTION SIGN */,
509 0x00A8 /* 0xA8 DIAERESIS */,
510 0x0160 /* 0xA9 LATIN CAPITAL LETTER S WITH CARON */,
511 0x0112 /* 0xAA LATIN CAPITAL LETTER E WITH MACRON */,
512 0x0122 /* 0xAB LATIN CAPITAL LETTER G WITH CEDILLA */,
513 0x0166 /* 0xAC LATIN CAPITAL LETTER T WITH STROKE */,
514 0x00AD /* 0xAD SOFT HYPHEN */,
515 0x017D /* 0xAE LATIN CAPITAL LETTER Z WITH CARON */,
516 0x00AF /* 0xAF MACRON */,
517 0x00B0 /* 0xB0 DEGREE SIGN */,
518 0x0105 /* 0xB1 LATIN SMALL LETTER A WITH OGONEK */,
519 0x02DB /* 0xB2 OGONEK */,
520 0x0157 /* 0xB3 LATIN SMALL LETTER R WITH CEDILLA */,
521 0x00B4 /* 0xB4 ACUTE ACCENT */,
522 0x0129 /* 0xB5 LATIN SMALL LETTER I WITH TILDE */,
523 0x013C /* 0xB6 LATIN SMALL LETTER L WITH CEDILLA */,
524 0x02C7 /* 0xB7 CARON */,
525 0x00B8 /* 0xB8 CEDILLA */,
526 0x0161 /* 0xB9 LATIN SMALL LETTER S WITH CARON */,
527 0x0113 /* 0xBA LATIN SMALL LETTER E WITH MACRON */,
528 0x0123 /* 0xBB LATIN SMALL LETTER G WITH CEDILLA */,
529 0x0167 /* 0xBC LATIN SMALL LETTER T WITH STROKE */,
530 0x014A /* 0xBD LATIN CAPITAL LETTER ENG */,
531 0x017E /* 0xBE LATIN SMALL LETTER Z WITH CARON */,
532 0x014B /* 0xBF LATIN SMALL LETTER ENG */,
533 0x0100 /* 0xC0 LATIN CAPITAL LETTER A WITH MACRON */,
534 0x00C1 /* 0xC1 LATIN CAPITAL LETTER A WITH ACUTE */,
535 0x00C2 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
536 0x00C3 /* 0xC3 LATIN CAPITAL LETTER A WITH TILDE */,
537 0x00C4 /* 0xC4 LATIN CAPITAL LETTER A WITH DIAERESIS */,
538 0x00C5 /* 0xC5 LATIN CAPITAL LETTER A WITH RING ABOVE */,
539 0x00C6 /* 0xC6 LATIN CAPITAL LETTER AE */,
540 0x012E /* 0xC7 LATIN CAPITAL LETTER I WITH OGONEK */,
541 0x010C /* 0xC8 LATIN CAPITAL LETTER C WITH CARON */,
542 0x00C9 /* 0xC9 LATIN CAPITAL LETTER E WITH ACUTE */,
543 0x0118 /* 0xCA LATIN CAPITAL LETTER E WITH OGONEK */,
544 0x00CB /* 0xCB LATIN CAPITAL LETTER E WITH DIAERESIS */,
545 0x0116 /* 0xCC LATIN CAPITAL LETTER E WITH DOT ABOVE */,
546 0x00CD /* 0xCD LATIN CAPITAL LETTER I WITH ACUTE */,
547 0x00CE /* 0xCE LATIN CAPITAL LETTER I WITH CIRCUMFLEX */,
548 0x012A /* 0xCF LATIN CAPITAL LETTER I WITH MACRON */,
549 0x0110 /* 0xD0 LATIN CAPITAL LETTER D WITH STROKE */,
550 0x0145 /* 0xD1 LATIN CAPITAL LETTER N WITH CEDILLA */,
551 0x014C /* 0xD2 LATIN CAPITAL LETTER O WITH MACRON */,
552 0x0136 /* 0xD3 LATIN CAPITAL LETTER K WITH CEDILLA */,
553 0x00D4 /* 0xD4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
554 0x00D5 /* 0xD5 LATIN CAPITAL LETTER O WITH TILDE */,
555 0x00D6 /* 0xD6 LATIN CAPITAL LETTER O WITH DIAERESIS */,
556 0x00D7 /* 0xD7 MULTIPLICATION SIGN */,
557 0x00D8 /* 0xD8 LATIN CAPITAL LETTER O WITH STROKE */,
558 0x0172 /* 0xD9 LATIN CAPITAL LETTER U WITH OGONEK */,
559 0x00DA /* 0xDA LATIN CAPITAL LETTER U WITH ACUTE */,
560 0x00DB /* 0xDB LATIN CAPITAL LETTER U WITH CIRCUMFLEX */,
561 0x00DC /* 0xDC LATIN CAPITAL LETTER U WITH DIAERESIS */,
562 0x0168 /* 0xDD LATIN CAPITAL LETTER U WITH TILDE */,
563 0x016A /* 0xDE LATIN CAPITAL LETTER U WITH MACRON */,
564 0x00DF /* 0xDF LATIN SMALL LETTER SHARP S */,
565 0x0101 /* 0xE0 LATIN SMALL LETTER A WITH MACRON */,
566 0x00E1 /* 0xE1 LATIN SMALL LETTER A WITH ACUTE */,
567 0x00E2 /* 0xE2 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
568 0x00E3 /* 0xE3 LATIN SMALL LETTER A WITH TILDE */,
569 0x00E4 /* 0xE4 LATIN SMALL LETTER A WITH DIAERESIS */,
570 0x00E5 /* 0xE5 LATIN SMALL LETTER A WITH RING ABOVE */,
571 0x00E6 /* 0xE6 LATIN SMALL LETTER AE */,
572 0x012F /* 0xE7 LATIN SMALL LETTER I WITH OGONEK */,
573 0x010D /* 0xE8 LATIN SMALL LETTER C WITH CARON */,
574 0x00E9 /* 0xE9 LATIN SMALL LETTER E WITH ACUTE */,
575 0x0119 /* 0xEA LATIN SMALL LETTER E WITH OGONEK */,
576 0x00EB /* 0xEB LATIN SMALL LETTER E WITH DIAERESIS */,
577 0x0117 /* 0xEC LATIN SMALL LETTER E WITH DOT ABOVE */,
578 0x00ED /* 0xED LATIN SMALL LETTER I WITH ACUTE */,
579 0x00EE /* 0xEE LATIN SMALL LETTER I WITH CIRCUMFLEX */,
580 0x012B /* 0xEF LATIN SMALL LETTER I WITH MACRON */,
581 0x0111 /* 0xF0 LATIN SMALL LETTER D WITH STROKE */,
582 0x0146 /* 0xF1 LATIN SMALL LETTER N WITH CEDILLA */,
583 0x014D /* 0xF2 LATIN SMALL LETTER O WITH MACRON */,
584 0x0137 /* 0xF3 LATIN SMALL LETTER K WITH CEDILLA */,
585 0x00F4 /* 0xF4 LATIN SMALL LETTER O WITH CIRCUMFLEX */,
586 0x00F5 /* 0xF5 LATIN SMALL LETTER O WITH TILDE */,
587 0x00F6 /* 0xF6 LATIN SMALL LETTER O WITH DIAERESIS */,
588 0x00F7 /* 0xF7 DIVISION SIGN */,
589 0x00F8 /* 0xF8 LATIN SMALL LETTER O WITH STROKE */,
590 0x0173 /* 0xF9 LATIN SMALL LETTER U WITH OGONEK */,
591 0x00FA /* 0xFA LATIN SMALL LETTER U WITH ACUTE */,
592 0x00FB /* 0xFB LATIN SMALL LETTER U WITH CIRCUMFLEX */,
593 0x00FC /* 0xFC LATIN SMALL LETTER U WITH DIAERESIS */,
594 0x0169 /* 0xFD LATIN SMALL LETTER U WITH TILDE */,
595 0x016B /* 0xFE LATIN SMALL LETTER U WITH MACRON */,
596 0x02D9 /* 0xFF DOT ABOVE */
599 Emchar latin_iso8859_9_to_ucs[96] =
601 0x00A0 /* 0xA0 NO-BREAK SPACE */,
602 0x00A1 /* 0xA1 INVERTED EXCLAMATION MARK */,
603 0x00A2 /* 0xA2 CENT SIGN */,
604 0x00A3 /* 0xA3 POUND SIGN */,
605 0x00A4 /* 0xA4 CURRENCY SIGN */,
606 0x00A5 /* 0xA5 YEN SIGN */,
607 0x00A6 /* 0xA6 BROKEN BAR */,
608 0x00A7 /* 0xA7 SECTION SIGN */,
609 0x00A8 /* 0xA8 DIAERESIS */,
610 0x00A9 /* 0xA9 COPYRIGHT SIGN */,
611 0x00AA /* 0xAA FEMININE ORDINAL INDICATOR */,
612 0x00AB /* 0xAB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK */,
613 0x00AC /* 0xAC NOT SIGN */,
614 0x00AD /* 0xAD SOFT HYPHEN */,
615 0x00AE /* 0xAE REGISTERED SIGN */,
616 0x00AF /* 0xAF MACRON */,
617 0x00B0 /* 0xB0 DEGREE SIGN */,
618 0x00B1 /* 0xB1 PLUS-MINUS SIGN */,
619 0x00B2 /* 0xB2 SUPERSCRIPT TWO */,
620 0x00B3 /* 0xB3 SUPERSCRIPT THREE */,
621 0x00B4 /* 0xB4 ACUTE ACCENT */,
622 0x00B5 /* 0xB5 MICRO SIGN */,
623 0x00B6 /* 0xB6 PILCROW SIGN */,
624 0x00B7 /* 0xB7 MIDDLE DOT */,
625 0x00B8 /* 0xB8 CEDILLA */,
626 0x00B9 /* 0xB9 SUPERSCRIPT ONE */,
627 0x00BA /* 0xBA MASCULINE ORDINAL INDICATOR */,
628 0x00BB /* 0xBB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK */,
629 0x00BC /* 0xBC VULGAR FRACTION ONE QUARTER */,
630 0x00BD /* 0xBD VULGAR FRACTION ONE HALF */,
631 0x00BE /* 0xBE VULGAR FRACTION THREE QUARTERS */,
632 0x00BF /* 0xBF INVERTED QUESTION MARK */,
633 0x00C0 /* 0xC0 LATIN CAPITAL LETTER A WITH GRAVE */,
634 0x00C1 /* 0xC1 LATIN CAPITAL LETTER A WITH ACUTE */,
635 0x00C2 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
636 0x00C3 /* 0xC3 LATIN CAPITAL LETTER A WITH TILDE */,
637 0x00C4 /* 0xC4 LATIN CAPITAL LETTER A WITH DIAERESIS */,
638 0x00C5 /* 0xC5 LATIN CAPITAL LETTER A WITH RING ABOVE */,
639 0x00C6 /* 0xC6 LATIN CAPITAL LETTER AE */,
640 0x00C7 /* 0xC7 LATIN CAPITAL LETTER C WITH CEDILLA */,
641 0x00C8 /* 0xC8 LATIN CAPITAL LETTER E WITH GRAVE */,
642 0x00C9 /* 0xC9 LATIN CAPITAL LETTER E WITH ACUTE */,
643 0x00CA /* 0xCA LATIN CAPITAL LETTER E WITH CIRCUMFLEX */,
644 0x00CB /* 0xCB LATIN CAPITAL LETTER E WITH DIAERESIS */,
645 0x00CC /* 0xCC LATIN CAPITAL LETTER I WITH GRAVE */,
646 0x00CD /* 0xCD LATIN CAPITAL LETTER I WITH ACUTE */,
647 0x00CE /* 0xCE LATIN CAPITAL LETTER I WITH CIRCUMFLEX */,
648 0x00CF /* 0xCF LATIN CAPITAL LETTER I WITH DIAERESIS */,
649 0x011E /* 0xD0 LATIN CAPITAL LETTER G WITH BREVE */,
650 0x00D1 /* 0xD1 LATIN CAPITAL LETTER N WITH TILDE */,
651 0x00D2 /* 0xD2 LATIN CAPITAL LETTER O WITH GRAVE */,
652 0x00D3 /* 0xD3 LATIN CAPITAL LETTER O WITH ACUTE */,
653 0x00D4 /* 0xD4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
654 0x00D5 /* 0xD5 LATIN CAPITAL LETTER O WITH TILDE */,
655 0x00D6 /* 0xD6 LATIN CAPITAL LETTER O WITH DIAERESIS */,
656 0x00D7 /* 0xD7 MULTIPLICATION SIGN */,
657 0x00D8 /* 0xD8 LATIN CAPITAL LETTER O WITH STROKE */,
658 0x00D9 /* 0xD9 LATIN CAPITAL LETTER U WITH GRAVE */,
659 0x00DA /* 0xDA LATIN CAPITAL LETTER U WITH ACUTE */,
660 0x00DB /* 0xDB LATIN CAPITAL LETTER U WITH CIRCUMFLEX */,
661 0x00DC /* 0xDC LATIN CAPITAL LETTER U WITH DIAERESIS */,
662 0x0130 /* 0xDD LATIN CAPITAL LETTER I WITH DOT ABOVE */,
663 0x015E /* 0xDE LATIN CAPITAL LETTER S WITH CEDILLA */,
664 0x00DF /* 0xDF LATIN SMALL LETTER SHARP S */,
665 0x00E0 /* 0xE0 LATIN SMALL LETTER A WITH GRAVE */,
666 0x00E1 /* 0xE1 LATIN SMALL LETTER A WITH ACUTE */,
667 0x00E2 /* 0xE2 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
668 0x00E3 /* 0xE3 LATIN SMALL LETTER A WITH TILDE */,
669 0x00E4 /* 0xE4 LATIN SMALL LETTER A WITH DIAERESIS */,
670 0x00E5 /* 0xE5 LATIN SMALL LETTER A WITH RING ABOVE */,
671 0x00E6 /* 0xE6 LATIN SMALL LETTER AE */,
672 0x00E7 /* 0xE7 LATIN SMALL LETTER C WITH CEDILLA */,
673 0x00E8 /* 0xE8 LATIN SMALL LETTER E WITH GRAVE */,
674 0x00E9 /* 0xE9 LATIN SMALL LETTER E WITH ACUTE */,
675 0x00EA /* 0xEA LATIN SMALL LETTER E WITH CIRCUMFLEX */,
676 0x00EB /* 0xEB LATIN SMALL LETTER E WITH DIAERESIS */,
677 0x00EC /* 0xEC LATIN SMALL LETTER I WITH GRAVE */,
678 0x00ED /* 0xED LATIN SMALL LETTER I WITH ACUTE */,
679 0x00EE /* 0xEE LATIN SMALL LETTER I WITH CIRCUMFLEX */,
680 0x00EF /* 0xEF LATIN SMALL LETTER I WITH DIAERESIS */,
681 0x011F /* 0xF0 LATIN SMALL LETTER G WITH BREVE */,
682 0x00F1 /* 0xF1 LATIN SMALL LETTER N WITH TILDE */,
683 0x00F2 /* 0xF2 LATIN SMALL LETTER O WITH GRAVE */,
684 0x00F3 /* 0xF3 LATIN SMALL LETTER O WITH ACUTE */,
685 0x00F4 /* 0xF4 LATIN SMALL LETTER O WITH CIRCUMFLEX */,
686 0x00F5 /* 0xF5 LATIN SMALL LETTER O WITH TILDE */,
687 0x00F6 /* 0xF6 LATIN SMALL LETTER O WITH DIAERESIS */,
688 0x00F7 /* 0xF7 DIVISION SIGN */,
689 0x00F8 /* 0xF8 LATIN SMALL LETTER O WITH STROKE */,
690 0x00F9 /* 0xF9 LATIN SMALL LETTER U WITH GRAVE */,
691 0x00FA /* 0xFA LATIN SMALL LETTER U WITH ACUTE */,
692 0x00FB /* 0xFB LATIN SMALL LETTER U WITH CIRCUMFLEX */,
693 0x00FC /* 0xFC LATIN SMALL LETTER U WITH DIAERESIS */,
694 0x0131 /* 0xFD LATIN SMALL LETTER DOTLESS I */,
695 0x015F /* 0xFE LATIN SMALL LETTER S WITH CEDILLA */,
696 0x00FF /* 0xFF LATIN SMALL LETTER Y WITH DIAERESIS */,
699 Emchar latin_viscii_lower_to_ucs[96] =
799 Emchar latin_viscii_upper_to_ucs[96] =
899 Emchar latin_tcvn5712_to_ucs[96] =
901 0x00A0 /* 0xA0 NO-BREAK SPACE */,
902 0x0102 /* 0xA1 LATIN CAPITAL LETTER A WITH BREVE */,
903 0x00C2 /* 0xA2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
904 0x00CA /* 0xA3 LATIN CAPITAL LETTER E WITH CIRCUMFLEX */,
905 0x00D4 /* 0xA4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
906 0x01A0 /* 0xA5 LATIN CAPITAL LETTER O WITH HORN */,
907 0x01AF /* 0xA6 LATIN CAPITAL LETTER U WITH HORN */,
908 0x0110 /* 0xA7 LATIN CAPITAL LETTER D WITH STROKE */,
909 0x0103 /* 0xA8 LATIN SMALL LETTER A WITH BREVE */,
910 0x00E2 /* 0xA9 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
911 0x00EA /* 0xAA LATIN SMALL LETTER E WITH CIRCUMFLEX */,
912 0x00F4 /* 0xAB LATIN SMALL LETTER O WITH CIRCUMFLEX */,
913 0x01A1 /* 0xAC LATIN SMALL LETTER O WITH HORN */,
914 0x01B0 /* 0xAD LATIN SMALL LETTER U WITH HORN */,
915 0x0111 /* 0xAE LATIN SMALL LETTER D WITH STROKE */,
916 0x1EB0 /* 0xAF LATIN CAPITAL LETTER A WITH BREVE AND GRAVE */,
917 0x0300 /* 0xB0 COMBINING GRAVE ACCENT */,
918 0x0309 /* 0xB1 COMBINING HOOK ABOVE */,
919 0x0303 /* 0xB2 COMBINING TILDE */,
920 0x0301 /* 0xB3 COMBINING ACUTE ACCENT */,
921 0x0323 /* 0xB4 COMBINING DOT BELOW */,
922 0x00E0 /* 0xB5 LATIN SMALL LETTER A WITH GRAVE */,
923 0x1EA3 /* 0xB6 LATIN SMALL LETTER A WITH HOOK ABOVE */,
924 0x00E3 /* 0xB7 LATIN SMALL LETTER A WITH TILDE */,
925 0x00E1 /* 0xB8 LATIN SMALL LETTER A WITH ACUTE */,
926 0x1EA1 /* 0xB9 LATIN SMALL LETTER A WITH DOT BELOW */,
927 0x1EB2 /* 0xBA LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE */,
928 0x1EB1 /* 0xBB LATIN SMALL LETTER A WITH BREVE AND GRAVE */,
929 0x1EB3 /* 0xBC LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE */,
930 0x1EB5 /* 0xBD LATIN SMALL LETTER A WITH BREVE AND TILDE */,
931 0x1EAF /* 0xBE LATIN SMALL LETTER A WITH BREVE AND ACUTE */,
932 0x1EB4 /* 0xBF LATIN CAPITAL LETTER A WITH BREVE AND TILDE */,
933 0x1EAE /* 0xC0 LATIN CAPITAL LETTER A WITH BREVE AND ACUTE */,
934 0x1EA6 /* 0xC1 LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE */,
935 0x1EA8 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE */,
936 0x1EAA /* 0xC3 LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE */,
937 0x1EA4 /* 0xC4 LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE */,
938 0x1EC0 /* 0xC5 LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE */,
939 0x1EB7 /* 0xC6 LATIN SMALL LETTER A WITH BREVE AND DOT BELOW */,
940 0x1EA7 /* 0xC7 LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE */,
941 0x1EA9 /* 0xC8 LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE */,
942 0x1EAB /* 0xC9 LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE */,
943 0x1EA5 /* 0xCA LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE */,
944 0x1EAD /* 0xCB LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW */,
945 0x00E8 /* 0xCC LATIN SMALL LETTER E WITH GRAVE */,
946 0x1EC2 /* 0xCD LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE */,
947 0x1EBB /* 0xCE LATIN SMALL LETTER E WITH HOOK ABOVE */,
948 0x1EBD /* 0xCF LATIN SMALL LETTER E WITH TILDE */,
949 0x00E9 /* 0xD0 LATIN SMALL LETTER E WITH ACUTE */,
950 0x1EB9 /* 0xD1 LATIN SMALL LETTER E WITH DOT BELOW */,
951 0x1EC1 /* 0xD2 LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE */,
952 0x1EC3 /* 0xD3 LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE */,
953 0x1EC5 /* 0xD4 LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE */,
954 0x1EBF /* 0xD5 LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE */,
955 0x1EC7 /* 0xD6 LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW */,
956 0x00EC /* 0xD7 LATIN SMALL LETTER I WITH GRAVE */,
957 0x1EC9 /* 0xD8 LATIN SMALL LETTER I WITH HOOK ABOVE */,
958 0x1EC4 /* 0xD9 LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE */,
959 0x1EBE /* 0xDA LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE */,
960 0x1ED2 /* 0xDB LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE */,
961 0x0129 /* 0xDC LATIN SMALL LETTER I WITH TILDE */,
962 0x00ED /* 0xDD LATIN SMALL LETTER I WITH ACUTE */,
963 0x1ECB /* 0xDE LATIN SMALL LETTER I WITH DOT BELOW */,
964 0x00F2 /* 0xDF LATIN SMALL LETTER O WITH GRAVE */,
965 0x1ED4 /* 0xE0 LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE */,
966 0x1ECF /* 0xE1 LATIN SMALL LETTER O WITH HOOK ABOVE */,
967 0x00F5 /* 0xE2 LATIN SMALL LETTER O WITH TILDE */,
968 0x00F3 /* 0xE3 LATIN SMALL LETTER O WITH ACUTE */,
969 0x1ECD /* 0xE4 LATIN SMALL LETTER O WITH DOT BELOW */,
970 0x1ED3 /* 0xE5 LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE */,
971 0x1ED5 /* 0xE6 LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE */,
972 0x1ED7 /* 0xE7 LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE */,
973 0x1ED1 /* 0xE8 LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE */,
974 0x1ED9 /* 0xE9 LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW */,
975 0x1EDD /* 0xEA LATIN SMALL LETTER O WITH HORN AND GRAVE */,
976 0x1EDF /* 0xEB LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE */,
977 0x1EE1 /* 0xEC LATIN SMALL LETTER O WITH HORN AND TILDE */,
978 0x1EDB /* 0xED LATIN SMALL LETTER O WITH HORN AND ACUTE */,
979 0x1EE3 /* 0xEE LATIN SMALL LETTER O WITH HORN AND DOT BELOW */,
980 0x00F9 /* 0xEF LATIN SMALL LETTER U WITH GRAVE */,
981 0x1ED6 /* 0xF0 LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE */,
982 0x1EE7 /* 0xF1 LATIN SMALL LETTER U WITH HOOK ABOVE */,
983 0x0169 /* 0xF2 LATIN SMALL LETTER U WITH TILDE */,
984 0x00FA /* 0xF3 LATIN SMALL LETTER U WITH ACUTE */,
985 0x1EE5 /* 0xF4 LATIN SMALL LETTER U WITH DOT BELOW */,
986 0x1EEB /* 0xF5 LATIN SMALL LETTER U WITH HORN AND GRAVE */,
987 0x1EED /* 0xF6 LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE */,
988 0x1EEF /* 0xF7 LATIN SMALL LETTER U WITH HORN AND TILDE */,
989 0x1EE9 /* 0xF8 LATIN SMALL LETTER U WITH HORN AND ACUTE */,
990 0x1EF1 /* 0xF9 LATIN SMALL LETTER U WITH HORN AND DOT BELOW */,
991 0x1EF3 /* 0xFA LATIN SMALL LETTER Y WITH GRAVE */,
992 0x1EF7 /* 0xFB LATIN SMALL LETTER Y WITH HOOK ABOVE */,
993 0x1EF9 /* 0xFC LATIN SMALL LETTER Y WITH TILDE */,
994 0x00FD /* 0xFD LATIN SMALL LETTER Y WITH ACUTE */,
995 0x1EF5 /* 0xFE LATIN SMALL LETTER Y WITH DOT BELOW */,
996 0x1ED0 /* 0xFF LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE */
999 Charset_ID latin_a_char_to_charset[128] = {
1000 /* U+0100 */ LEADING_BYTE_LATIN_ISO8859_4,
1001 /* U+0101 */ LEADING_BYTE_LATIN_ISO8859_4,
1002 /* U+0102 */ LEADING_BYTE_LATIN_ISO8859_2,
1003 /* U+0103 */ LEADING_BYTE_LATIN_ISO8859_2,
1004 /* U+0104 */ LEADING_BYTE_LATIN_ISO8859_2,
1005 /* U+0105 */ LEADING_BYTE_LATIN_ISO8859_2,
1006 /* U+0106 */ LEADING_BYTE_LATIN_ISO8859_2,
1007 /* U+0107 */ LEADING_BYTE_LATIN_ISO8859_2,
1008 /* U+0108 */ LEADING_BYTE_LATIN_ISO8859_3,
1009 /* U+0109 */ LEADING_BYTE_LATIN_ISO8859_3,
1010 /* U+010A */ LEADING_BYTE_LATIN_ISO8859_3,
1011 /* U+010B */ LEADING_BYTE_LATIN_ISO8859_3,
1012 /* U+010C */ LEADING_BYTE_LATIN_ISO8859_2,
1013 /* U+010D */ LEADING_BYTE_LATIN_ISO8859_2,
1014 /* U+010E */ LEADING_BYTE_LATIN_ISO8859_2,
1015 /* U+010F */ LEADING_BYTE_LATIN_ISO8859_2,
1016 /* U+0110 */ LEADING_BYTE_LATIN_ISO8859_2,
1017 /* U+0111 */ LEADING_BYTE_LATIN_ISO8859_2,
1018 /* U+0112 */ LEADING_BYTE_LATIN_ISO8859_4,
1019 /* U+0113 */ LEADING_BYTE_LATIN_ISO8859_4,
1020 /* U+0114 */ LEADING_BYTE_UCS_BMP,
1021 /* U+0115 */ LEADING_BYTE_UCS_BMP,
1022 /* U+0116 */ LEADING_BYTE_LATIN_ISO8859_4,
1023 /* U+0117 */ LEADING_BYTE_LATIN_ISO8859_4,
1024 /* U+0118 */ LEADING_BYTE_LATIN_ISO8859_2,
1025 /* U+0119 */ LEADING_BYTE_LATIN_ISO8859_2,
1026 /* U+011A */ LEADING_BYTE_LATIN_ISO8859_2,
1027 /* U+011B */ LEADING_BYTE_LATIN_ISO8859_2,
1028 /* U+011C */ LEADING_BYTE_LATIN_ISO8859_3,
1029 /* U+011D */ LEADING_BYTE_LATIN_ISO8859_3,
1030 /* U+011E */ LEADING_BYTE_LATIN_ISO8859_3,
1031 /* U+011F */ LEADING_BYTE_LATIN_ISO8859_3,
1032 /* U+0120 */ LEADING_BYTE_LATIN_ISO8859_3,
1033 /* U+0121 */ LEADING_BYTE_LATIN_ISO8859_3,
1034 /* U+0122 */ LEADING_BYTE_LATIN_ISO8859_4,
1035 /* U+0123 */ LEADING_BYTE_LATIN_ISO8859_4,
1036 /* U+0124 */ LEADING_BYTE_LATIN_ISO8859_3,
1037 /* U+0125 */ LEADING_BYTE_LATIN_ISO8859_3,
1038 /* U+0126 */ LEADING_BYTE_LATIN_ISO8859_3,
1039 /* U+0127 */ LEADING_BYTE_LATIN_ISO8859_3,
1040 /* U+0128 */ LEADING_BYTE_LATIN_ISO8859_4,
1041 /* U+0129 */ LEADING_BYTE_LATIN_ISO8859_4,
1042 /* U+012A */ LEADING_BYTE_LATIN_ISO8859_4,
1043 /* U+012B */ LEADING_BYTE_LATIN_ISO8859_4,
1044 /* U+012C */ LEADING_BYTE_UCS_BMP,
1045 /* U+012D */ LEADING_BYTE_UCS_BMP,
1046 /* U+012E */ LEADING_BYTE_LATIN_ISO8859_4,
1047 /* U+012F */ LEADING_BYTE_LATIN_ISO8859_4,
1048 /* U+0130 */ LEADING_BYTE_LATIN_ISO8859_3,
1049 /* U+0131 */ LEADING_BYTE_LATIN_ISO8859_3,
1050 /* U+0132 */ LEADING_BYTE_JAPANESE_JISX0212,
1051 /* U+0133 */ LEADING_BYTE_JAPANESE_JISX0212,
1052 /* U+0134 */ LEADING_BYTE_LATIN_ISO8859_3,
1053 /* U+0135 */ LEADING_BYTE_LATIN_ISO8859_3,
1054 /* U+0136 */ LEADING_BYTE_LATIN_ISO8859_4,
1055 /* U+0137 */ LEADING_BYTE_LATIN_ISO8859_4,
1056 /* U+0138 */ LEADING_BYTE_LATIN_ISO8859_4,
1057 /* U+0139 */ LEADING_BYTE_LATIN_ISO8859_2,
1058 /* U+013A */ LEADING_BYTE_LATIN_ISO8859_2,
1059 /* U+013B */ LEADING_BYTE_LATIN_ISO8859_4,
1060 /* U+013C */ LEADING_BYTE_LATIN_ISO8859_4,
1061 /* U+013D */ LEADING_BYTE_LATIN_ISO8859_2,
1062 /* U+013E */ LEADING_BYTE_LATIN_ISO8859_2,
1063 /* U+013F */ LEADING_BYTE_JAPANESE_JISX0212,
1064 /* U+0140 */ LEADING_BYTE_JAPANESE_JISX0212,
1065 /* U+0141 */ LEADING_BYTE_LATIN_ISO8859_2,
1066 /* U+0142 */ LEADING_BYTE_LATIN_ISO8859_2,
1067 /* U+0143 */ LEADING_BYTE_LATIN_ISO8859_2,
1068 /* U+0144 */ LEADING_BYTE_LATIN_ISO8859_2,
1069 /* U+0145 */ LEADING_BYTE_LATIN_ISO8859_4,
1070 /* U+0146 */ LEADING_BYTE_LATIN_ISO8859_4,
1071 /* U+0147 */ LEADING_BYTE_LATIN_ISO8859_2,
1072 /* U+0148 */ LEADING_BYTE_LATIN_ISO8859_2,
1073 /* U+0149 */ LEADING_BYTE_JAPANESE_JISX0212,
1074 /* U+014A */ LEADING_BYTE_LATIN_ISO8859_4,
1075 /* U+014B */ LEADING_BYTE_LATIN_ISO8859_4,
1076 /* U+014C */ LEADING_BYTE_LATIN_ISO8859_4,
1077 /* U+014D */ LEADING_BYTE_LATIN_ISO8859_4,
1078 /* U+014E */ LEADING_BYTE_UCS_BMP,
1079 /* U+014F */ LEADING_BYTE_UCS_BMP,
1080 /* U+0150 */ LEADING_BYTE_LATIN_ISO8859_2,
1081 /* U+0151 */ LEADING_BYTE_LATIN_ISO8859_2,
1082 /* U+0152 */ LEADING_BYTE_JAPANESE_JISX0212,
1083 /* U+0153 */ LEADING_BYTE_JAPANESE_JISX0212,
1084 /* U+0154 */ LEADING_BYTE_LATIN_ISO8859_2,
1085 /* U+0155 */ LEADING_BYTE_LATIN_ISO8859_2,
1086 /* U+0156 */ LEADING_BYTE_LATIN_ISO8859_4,
1087 /* U+0157 */ LEADING_BYTE_LATIN_ISO8859_4,
1088 /* U+0158 */ LEADING_BYTE_LATIN_ISO8859_2,
1089 /* U+0159 */ LEADING_BYTE_LATIN_ISO8859_2,
1090 /* U+015A */ LEADING_BYTE_LATIN_ISO8859_2,
1091 /* U+015B */ LEADING_BYTE_LATIN_ISO8859_2,
1092 /* U+015C */ LEADING_BYTE_LATIN_ISO8859_3,
1093 /* U+015D */ LEADING_BYTE_LATIN_ISO8859_3,
1094 /* U+015E */ LEADING_BYTE_LATIN_ISO8859_2,
1095 /* U+015F */ LEADING_BYTE_LATIN_ISO8859_2,
1096 /* U+0160 */ LEADING_BYTE_LATIN_ISO8859_2,
1097 /* U+0161 */ LEADING_BYTE_LATIN_ISO8859_2,
1098 /* U+0162 */ LEADING_BYTE_LATIN_ISO8859_2,
1099 /* U+0163 */ LEADING_BYTE_LATIN_ISO8859_2,
1100 /* U+0164 */ LEADING_BYTE_LATIN_ISO8859_2,
1101 /* U+0165 */ LEADING_BYTE_LATIN_ISO8859_2,
1102 /* U+0166 */ LEADING_BYTE_LATIN_ISO8859_4,
1103 /* U+0167 */ LEADING_BYTE_LATIN_ISO8859_4,
1104 /* U+0168 */ LEADING_BYTE_LATIN_ISO8859_4,
1105 /* U+0169 */ LEADING_BYTE_LATIN_ISO8859_4,
1106 /* U+016A */ LEADING_BYTE_LATIN_ISO8859_4,
1107 /* U+016B */ LEADING_BYTE_LATIN_ISO8859_4,
1108 /* U+016C */ LEADING_BYTE_LATIN_ISO8859_3,
1109 /* U+016D */ LEADING_BYTE_LATIN_ISO8859_3,
1110 /* U+016E */ LEADING_BYTE_LATIN_ISO8859_2,
1111 /* U+016F */ LEADING_BYTE_LATIN_ISO8859_2,
1112 /* U+0170 */ LEADING_BYTE_LATIN_ISO8859_2,
1113 /* U+0171 */ LEADING_BYTE_LATIN_ISO8859_2,
1114 /* U+0172 */ LEADING_BYTE_LATIN_ISO8859_4,
1115 /* U+0173 */ LEADING_BYTE_LATIN_ISO8859_4,
1116 /* U+0174 */ LEADING_BYTE_JAPANESE_JISX0212,
1117 /* U+0175 */ LEADING_BYTE_JAPANESE_JISX0212,
1118 /* U+0176 */ LEADING_BYTE_JAPANESE_JISX0212,
1119 /* U+0177 */ LEADING_BYTE_JAPANESE_JISX0212,
1120 /* U+0178 */ LEADING_BYTE_JAPANESE_JISX0212,
1121 /* U+0179 */ LEADING_BYTE_LATIN_ISO8859_2,
1122 /* U+017A */ LEADING_BYTE_LATIN_ISO8859_2,
1123 /* U+017B */ LEADING_BYTE_LATIN_ISO8859_2,
1124 /* U+017C */ LEADING_BYTE_LATIN_ISO8859_2,
1125 /* U+017D */ LEADING_BYTE_LATIN_ISO8859_2,
1126 /* U+017E */ LEADING_BYTE_LATIN_ISO8859_2,
1127 /* U+017F */ LEADING_BYTE_UCS_BMP
1130 unsigned char latin_a_char_to_byte1[128] = {
1131 /* U+0100 */ 0xC0 - 0x80,
1132 /* U+0101 */ 0xE0 - 0x80,
1133 /* U+0102 */ 0xC3 - 0x80,
1134 /* U+0103 */ 0xE3 - 0x80,
1135 /* U+0104 */ 0xA1 - 0x80,
1136 /* U+0105 */ 0xB1 - 0x80,
1137 /* U+0106 */ 0xC6 - 0x80,
1138 /* U+0107 */ 0xE6 - 0x80,
1139 /* U+0108 */ 0xC6 - 0x80,
1140 /* U+0109 */ 0xE6 - 0x80,
1141 /* U+010A */ 0xC5 - 0x80,
1142 /* U+010B */ 0xE5 - 0x80,
1143 /* U+010C */ 0xC8 - 0x80,
1144 /* U+010D */ 0xE8 - 0x80,
1145 /* U+010E */ 0xCF - 0x80,
1146 /* U+010F */ 0xEF - 0x80,
1147 /* U+0110 */ 0xD0 - 0x80,
1148 /* U+0111 */ 0xF0 - 0x80,
1149 /* U+0112 */ 0xAA - 0x80,
1150 /* U+0113 */ 0xBA - 0x80,
1153 /* U+0116 */ 0xCC - 0x80,
1154 /* U+0117 */ 0xEC - 0x80,
1155 /* U+0118 */ 0xCA - 0x80,
1156 /* U+0119 */ 0xEA - 0x80,
1157 /* U+011A */ 0xCC - 0x80,
1158 /* U+011B */ 0xEC - 0x80,
1159 /* U+011C */ 0xD8 - 0x80,
1160 /* U+011D */ 0xF8 - 0x80,
1161 /* U+011E */ 0xAB - 0x80,
1162 /* U+011F */ 0xBB - 0x80,
1163 /* U+0120 */ 0xD5 - 0x80,
1164 /* U+0121 */ 0xF5 - 0x80,
1165 /* U+0122 */ 0xAB - 0x80,
1166 /* U+0123 */ 0xBB - 0x80,
1167 /* U+0124 */ 0xA6 - 0x80,
1168 /* U+0125 */ 0xB6 - 0x80,
1169 /* U+0126 */ 0xA1 - 0x80,
1170 /* U+0127 */ 0xB1 - 0x80,
1171 /* U+0128 */ 0xA5 - 0x80,
1172 /* U+0129 */ 0xB5 - 0x80,
1173 /* U+012A */ 0xCF - 0x80,
1174 /* U+012B */ 0xEF - 0x80,
1177 /* U+012E */ 0xC7 - 0x80,
1178 /* U+012F */ 0xE7 - 0x80,
1179 /* U+0130 */ 0xA9 - 0x80,
1180 /* U+0131 */ 0xB9 - 0x80,
1183 /* U+0134 */ 0xAC - 0x80,
1184 /* U+0135 */ 0xBC - 0x80,
1185 /* U+0136 */ 0xD3 - 0x80,
1186 /* U+0137 */ 0xF3 - 0x80,
1187 /* U+0138 */ 0xA2 - 0x80,
1188 /* U+0139 */ 0xC5 - 0x80,
1189 /* U+013A */ 0xE5 - 0x80,
1190 /* U+013B */ 0xA6 - 0x80,
1191 /* U+013C */ 0xB6 - 0x80,
1192 /* U+013D */ 0xA5 - 0x80,
1193 /* U+013E */ 0xB5 - 0x80,
1196 /* U+0141 */ 0xA3 - 0x80,
1197 /* U+0142 */ 0xB3 - 0x80,
1198 /* U+0143 */ 0xD1 - 0x80,
1199 /* U+0144 */ 0xF1 - 0x80,
1200 /* U+0145 */ 0xD1 - 0x80,
1201 /* U+0146 */ 0xF1 - 0x80,
1202 /* U+0147 */ 0xD2 - 0x80,
1203 /* U+0148 */ 0xF2 - 0x80,
1205 /* U+014A */ 0xBD - 0x80,
1206 /* U+014B */ 0xBF - 0x80,
1207 /* U+014C */ 0xD2 - 0x80,
1208 /* U+014D */ 0xF2 - 0x80,
1211 /* U+0150 */ 0xD5 - 0x80,
1212 /* U+0151 */ 0xF5 - 0x80,
1215 /* U+0154 */ 0xC0 - 0x80,
1216 /* U+0155 */ 0xE0 - 0x80,
1217 /* U+0156 */ 0xA3 - 0x80,
1218 /* U+0157 */ 0xB3 - 0x80,
1219 /* U+0158 */ 0xD8 - 0x80,
1220 /* U+0159 */ 0xF8 - 0x80,
1221 /* U+015A */ 0xA6 - 0x80,
1222 /* U+015B */ 0xB6 - 0x80,
1223 /* U+015C */ 0xDE - 0x80,
1224 /* U+015D */ 0xFE - 0x80,
1225 /* U+015E */ 0xAA - 0x80,
1226 /* U+015F */ 0xBA - 0x80,
1227 /* U+0160 */ 0xA9 - 0x80,
1228 /* U+0161 */ 0xB9 - 0x80,
1229 /* U+0162 */ 0xDE - 0x80,
1230 /* U+0163 */ 0xFE - 0x80,
1231 /* U+0164 */ 0xAB - 0x80,
1232 /* U+0165 */ 0xBB - 0x80,
1233 /* U+0166 */ 0xAC - 0x80,
1234 /* U+0167 */ 0xBC - 0x80,
1235 /* U+0168 */ 0xDD - 0x80,
1236 /* U+0169 */ 0xFD - 0x80,
1237 /* U+016A */ 0xDE - 0x80,
1238 /* U+016B */ 0xFE - 0x80,
1239 /* U+016C */ 0xDD - 0x80,
1240 /* U+016D */ 0xFD - 0x80,
1241 /* U+016E */ 0xD9 - 0x80,
1242 /* U+016F */ 0xF9 - 0x80,
1243 /* U+0170 */ 0xDB - 0x80,
1244 /* U+0171 */ 0xFB - 0x80,
1245 /* U+0172 */ 0xD9 - 0x80,
1246 /* U+0173 */ 0xF9 - 0x80,
1252 /* U+0179 */ 0xAC - 0x80,
1253 /* U+017A */ 0xBC - 0x80,
1254 /* U+017B */ 0xAF - 0x80,
1255 /* U+017C */ 0xBF - 0x80,
1256 /* U+017D */ 0xAE - 0x80,
1257 /* U+017E */ 0xBE - 0x80,
1261 unsigned char latin_a_char_to_byte2[128] = {
1392 Lisp_Object Vutf_2000_version;
1396 int leading_code_private_11;
1399 Lisp_Object Qcharsetp;
1401 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1402 Lisp_Object Qregistry, Qfinal, Qgraphic;
1403 Lisp_Object Qdirection;
1404 Lisp_Object Qreverse_direction_charset;
1405 Lisp_Object Qleading_byte;
1406 Lisp_Object Qshort_name, Qlong_name;
1420 Qcyrillic_iso8859_5,
1422 Qjapanese_jisx0208_1978,
1427 Qchinese_cns11643_1,
1428 Qchinese_cns11643_2,
1430 Qchinese_cns11643_3,
1431 Qchinese_cns11643_4,
1432 Qchinese_cns11643_5,
1433 Qchinese_cns11643_6,
1434 Qchinese_cns11643_7,
1436 Qlatin_viscii_lower,
1437 Qlatin_viscii_upper,
1443 Lisp_Object Ql2r, Qr2l;
1445 Lisp_Object Vcharset_hash_table;
1447 static Charset_ID next_allocated_1_byte_leading_byte;
1448 static Charset_ID next_allocated_2_byte_leading_byte;
1450 /* Composite characters are characters constructed by overstriking two
1451 or more regular characters.
1453 1) The old Mule implementation involves storing composite characters
1454 in a buffer as a tag followed by all of the actual characters
1455 used to make up the composite character. I think this is a bad
1456 idea; it greatly complicates code that wants to handle strings
1457 one character at a time because it has to deal with the possibility
1458 of great big ungainly characters. It's much more reasonable to
1459 simply store an index into a table of composite characters.
1461 2) The current implementation only allows for 16,384 separate
1462 composite characters over the lifetime of the XEmacs process.
1463 This could become a potential problem if the user
1464 edited lots of different files that use composite characters.
1465 Due to FSF bogosity, increasing the number of allowable
1466 composite characters under Mule would decrease the number
1467 of possible faces that can exist. Mule already has shrunk
1468 this to 2048, and further shrinkage would become uncomfortable.
1469 No such problems exist in XEmacs.
1471 Composite characters could be represented as 0x80 C1 C2 C3,
1472 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1473 for slightly under 2^20 (one million) composite characters
1474 over the XEmacs process lifetime, and you only need to
1475 increase the size of a Mule character from 19 to 21 bits.
1476 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1477 85 million (slightly over 2^26) composite characters. */
1480 /************************************************************************/
1481 /* Basic Emchar functions */
1482 /************************************************************************/
1484 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1485 string in STR. Returns the number of bytes stored.
1486 Do not call this directly. Use the macro set_charptr_emchar() instead.
1490 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1496 Lisp_Object charset;
1505 else if ( c <= 0x7ff )
1507 *p++ = (c >> 6) | 0xc0;
1508 *p++ = (c & 0x3f) | 0x80;
1510 else if ( c <= 0xffff )
1512 *p++ = (c >> 12) | 0xe0;
1513 *p++ = ((c >> 6) & 0x3f) | 0x80;
1514 *p++ = (c & 0x3f) | 0x80;
1516 else if ( c <= 0x1fffff )
1518 *p++ = (c >> 18) | 0xf0;
1519 *p++ = ((c >> 12) & 0x3f) | 0x80;
1520 *p++ = ((c >> 6) & 0x3f) | 0x80;
1521 *p++ = (c & 0x3f) | 0x80;
1523 else if ( c <= 0x3ffffff )
1525 *p++ = (c >> 24) | 0xf8;
1526 *p++ = ((c >> 18) & 0x3f) | 0x80;
1527 *p++ = ((c >> 12) & 0x3f) | 0x80;
1528 *p++ = ((c >> 6) & 0x3f) | 0x80;
1529 *p++ = (c & 0x3f) | 0x80;
1533 *p++ = (c >> 30) | 0xfc;
1534 *p++ = ((c >> 24) & 0x3f) | 0x80;
1535 *p++ = ((c >> 18) & 0x3f) | 0x80;
1536 *p++ = ((c >> 12) & 0x3f) | 0x80;
1537 *p++ = ((c >> 6) & 0x3f) | 0x80;
1538 *p++ = (c & 0x3f) | 0x80;
1541 BREAKUP_CHAR (c, charset, c1, c2);
1542 lb = CHAR_LEADING_BYTE (c);
1543 if (LEADING_BYTE_PRIVATE_P (lb))
1544 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1546 if (EQ (charset, Vcharset_control_1))
1555 /* Return the first character from a Mule-encoded string in STR,
1556 assuming it's non-ASCII. Do not call this directly.
1557 Use the macro charptr_emchar() instead. */
1560 non_ascii_charptr_emchar (CONST Bufbyte *str)
1573 else if ( b >= 0xf8 )
1578 else if ( b >= 0xf0 )
1583 else if ( b >= 0xe0 )
1588 else if ( b >= 0xc0 )
1598 for( ; len > 0; len-- )
1601 ch = ( ch << 6 ) | ( b & 0x3f );
1605 Bufbyte i0 = *str, i1, i2 = 0;
1606 Lisp_Object charset;
1608 if (i0 == LEADING_BYTE_CONTROL_1)
1609 return (Emchar) (*++str - 0x20);
1611 if (LEADING_BYTE_PREFIX_P (i0))
1616 charset = CHARSET_BY_LEADING_BYTE (i0);
1617 if (XCHARSET_DIMENSION (charset) == 2)
1620 return MAKE_CHAR (charset, i1, i2);
1624 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1625 Do not call this directly. Use the macro valid_char_p() instead. */
1629 non_ascii_valid_char_p (Emchar ch)
1633 /* Must have only lowest 19 bits set */
1637 f1 = CHAR_FIELD1 (ch);
1638 f2 = CHAR_FIELD2 (ch);
1639 f3 = CHAR_FIELD3 (ch);
1643 Lisp_Object charset;
1645 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1646 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1647 f2 > MAX_CHAR_FIELD2_PRIVATE)
1652 if (f3 != 0x20 && f3 != 0x7F)
1656 NOTE: This takes advantage of the fact that
1657 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1658 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1660 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1661 return (XCHARSET_CHARS (charset) == 96);
1665 Lisp_Object charset;
1667 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1668 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1669 f1 > MAX_CHAR_FIELD1_PRIVATE)
1671 if (f2 < 0x20 || f3 < 0x20)
1674 #ifdef ENABLE_COMPOSITE_CHARS
1675 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1677 if (UNBOUNDP (Fgethash (make_int (ch),
1678 Vcomposite_char_char2string_hash_table,
1683 #endif /* ENABLE_COMPOSITE_CHARS */
1685 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
1688 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1690 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1693 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1695 return (XCHARSET_CHARS (charset) == 96);
1701 /************************************************************************/
1702 /* Basic string functions */
1703 /************************************************************************/
1705 /* Copy the character pointed to by PTR into STR, assuming it's
1706 non-ASCII. Do not call this directly. Use the macro
1707 charptr_copy_char() instead. */
1710 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1712 Bufbyte *strptr = str;
1714 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1716 /* Notice fallthrough. */
1718 case 6: *++strptr = *ptr++;
1719 case 5: *++strptr = *ptr++;
1721 case 4: *++strptr = *ptr++;
1722 case 3: *++strptr = *ptr++;
1723 case 2: *++strptr = *ptr;
1728 return strptr + 1 - str;
1732 /************************************************************************/
1733 /* streams of Emchars */
1734 /************************************************************************/
1736 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1737 The functions below are not meant to be called directly; use
1738 the macros in insdel.h. */
1741 Lstream_get_emchar_1 (Lstream *stream, int ch)
1743 Bufbyte str[MAX_EMCHAR_LEN];
1744 Bufbyte *strptr = str;
1746 str[0] = (Bufbyte) ch;
1747 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1749 /* Notice fallthrough. */
1752 ch = Lstream_getc (stream);
1754 *++strptr = (Bufbyte) ch;
1756 ch = Lstream_getc (stream);
1758 *++strptr = (Bufbyte) ch;
1761 ch = Lstream_getc (stream);
1763 *++strptr = (Bufbyte) ch;
1765 ch = Lstream_getc (stream);
1767 *++strptr = (Bufbyte) ch;
1769 ch = Lstream_getc (stream);
1771 *++strptr = (Bufbyte) ch;
1776 return charptr_emchar (str);
1780 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1782 Bufbyte str[MAX_EMCHAR_LEN];
1783 Bytecount len = set_charptr_emchar (str, ch);
1784 return Lstream_write (stream, str, len);
1788 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1790 Bufbyte str[MAX_EMCHAR_LEN];
1791 Bytecount len = set_charptr_emchar (str, ch);
1792 Lstream_unread (stream, str, len);
1796 /************************************************************************/
1797 /* charset object */
1798 /************************************************************************/
1801 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
1803 struct Lisp_Charset *cs = XCHARSET (obj);
1805 markobj (cs->short_name);
1806 markobj (cs->long_name);
1807 markobj (cs->doc_string);
1808 markobj (cs->registry);
1809 markobj (cs->ccl_program);
1814 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1816 struct Lisp_Charset *cs = XCHARSET (obj);
1820 error ("printing unreadable object #<charset %s 0x%x>",
1821 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1824 write_c_string ("#<charset ", printcharfun);
1825 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1826 write_c_string (" ", printcharfun);
1827 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1828 write_c_string (" ", printcharfun);
1829 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1830 write_c_string (" ", printcharfun);
1831 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1832 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1833 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1834 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1835 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1837 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1838 CHARSET_COLUMNS (cs),
1839 CHARSET_GRAPHIC (cs),
1840 CHARSET_FINAL (cs));
1841 write_c_string (buf, printcharfun);
1842 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1843 sprintf (buf, " 0x%x>", cs->header.uid);
1844 write_c_string (buf, printcharfun);
1847 static const struct lrecord_description charset_description[] = {
1848 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1852 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1853 mark_charset, print_charset, 0, 0, 0,
1854 charset_description,
1855 struct Lisp_Charset);
1856 /* Make a new charset. */
1859 make_charset (Charset_ID id, Lisp_Object name,
1860 unsigned char type, unsigned char columns, unsigned char graphic,
1861 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1862 Lisp_Object long_name, Lisp_Object doc,
1864 Emchar* decoding_table)
1867 struct Lisp_Charset *cs =
1868 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1869 XSETCHARSET (obj, cs);
1871 CHARSET_ID (cs) = id;
1872 CHARSET_NAME (cs) = name;
1873 CHARSET_SHORT_NAME (cs) = short_name;
1874 CHARSET_LONG_NAME (cs) = long_name;
1875 CHARSET_DIRECTION (cs) = direction;
1876 CHARSET_TYPE (cs) = type;
1877 CHARSET_COLUMNS (cs) = columns;
1878 CHARSET_GRAPHIC (cs) = graphic;
1879 CHARSET_FINAL (cs) = final;
1880 CHARSET_DOC_STRING (cs) = doc;
1881 CHARSET_REGISTRY (cs) = reg;
1882 CHARSET_CCL_PROGRAM (cs) = Qnil;
1883 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1885 CHARSET_DECODING_TABLE(cs) = decoding_table;
1888 switch ( CHARSET_TYPE (cs) )
1890 case CHARSET_TYPE_94:
1891 CHARSET_DIMENSION (cs) = 1;
1892 CHARSET_CHARS (cs) = 94;
1894 if (decoding_table != NULL)
1897 CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table();
1898 for (i = 0; i < 94; i++)
1900 Emchar c = decoding_table[i];
1903 put_byte_from_character_table (c, i + 33,
1904 CHARSET_TO_BYTE1_TABLE(cs));
1908 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1909 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1912 case CHARSET_TYPE_96:
1913 CHARSET_DIMENSION (cs) = 1;
1914 CHARSET_CHARS (cs) = 96;
1916 if (decoding_table != NULL)
1919 CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table();
1920 for (i = 0; i < 96; i++)
1922 Emchar c = decoding_table[i];
1925 put_byte_from_character_table (c, i + 32,
1926 CHARSET_TO_BYTE1_TABLE(cs));
1930 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1931 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1934 case CHARSET_TYPE_94X94:
1935 CHARSET_DIMENSION (cs) = 2;
1936 CHARSET_CHARS (cs) = 94;
1938 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1939 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1942 case CHARSET_TYPE_96X96:
1943 CHARSET_DIMENSION (cs) = 2;
1944 CHARSET_CHARS (cs) = 96;
1946 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1947 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1951 case CHARSET_TYPE_128X128:
1952 CHARSET_DIMENSION (cs) = 2;
1953 CHARSET_CHARS (cs) = 128;
1955 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1956 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1959 case CHARSET_TYPE_256X256:
1960 CHARSET_DIMENSION (cs) = 2;
1961 CHARSET_CHARS (cs) = 256;
1963 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1964 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1971 if (id == LEADING_BYTE_ASCII)
1972 CHARSET_REP_BYTES (cs) = 1;
1974 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1976 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1981 /* some charsets do not have final characters. This includes
1982 ASCII, Control-1, Composite, and the two faux private
1985 assert (NILP (charset_by_attributes[type][final]));
1986 charset_by_attributes[type][final] = obj;
1988 assert (NILP (charset_by_attributes[type][final][direction]));
1989 charset_by_attributes[type][final][direction] = obj;
1993 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1994 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1997 /* official leading byte */
1998 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
2001 /* Some charsets are "faux" and don't have names or really exist at
2002 all except in the leading-byte table. */
2004 Fputhash (name, obj, Vcharset_hash_table);
2009 get_unallocated_leading_byte (int dimension)
2015 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
2018 lb = next_allocated_1_byte_leading_byte++;
2022 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
2025 lb = next_allocated_2_byte_leading_byte++;
2030 ("No more character sets free for this dimension",
2031 make_int (dimension));
2038 charset_get_byte1 (Lisp_Object charset, Emchar ch)
2040 Emchar_to_byte_table* table;
2042 if ((table = XCHARSET_TO_BYTE1_TABLE (charset)) != NULL)
2043 return get_byte_from_character_table (ch, table);
2044 else if (EQ (charset, Vcharset_ascii))
2045 return ch <= 0x7f ? ch : 0;
2046 else if (EQ (charset, Vcharset_control_1))
2047 return (0x80 <= ch) && (ch < 0xA0) ? ch & 0x7f : 0;
2048 else if (EQ (charset, Vcharset_latin_iso8859_1))
2049 return (0xA0 <= ch) && (ch <= 0xff) ? ch & 0x7f : 0;
2050 else if (EQ (charset, Vcharset_cyrillic_iso8859_5))
2051 return (MIN_CHAR_GREEK <= ch) && (ch <= MAX_CHAR_CYRILLIC) ?
2052 ch - MIN_CHAR_CYRILLIC + 0x20 : 0;
2053 else if (EQ (charset, Vcharset_greek_iso8859_7))
2054 return (MIN_CHAR_GREEK <= ch) && (ch <= MAX_CHAR_GREEK) ?
2055 ch - MIN_CHAR_GREEK + 0x20 : 0;
2056 else if (EQ (charset, Vcharset_hebrew_iso8859_8))
2057 return (MIN_CHAR_HEBREW <= ch) && (ch <= MAX_CHAR_HEBREW) ?
2058 ch - MIN_CHAR_HEBREW + 0x20 : 0;
2059 else if (EQ (charset, Vcharset_thai_tis620))
2060 return (MIN_CHAR_THAI <= ch) && (ch <= MAX_CHAR_THAI) ?
2061 ch - MIN_CHAR_THAI + 0x20 : 0;
2062 else if (EQ (charset, Vcharset_katakana_jisx0201))
2063 return (MIN_CHAR_HALFWIDTH_KATAKANA <= ch)
2064 && (ch <= MAX_CHAR_HALFWIDTH_KATAKANA) ?
2065 ch - MIN_CHAR_HALFWIDTH_KATAKANA + 0x20 : 0;
2066 else if (EQ (charset, Vcharset_ucs_bmp))
2068 else if (XCHARSET_DIMENSION (charset) == 1)
2070 if (XCHARSET_CHARS (charset) == 94)
2071 return (MIN_CHAR_94 + (XCHARSET_FINAL (charset) - '0') * 94 <= ch)
2072 && (ch < MIN_CHAR_94 + (XCHARSET_FINAL (charset) - '0' + 1) * 94) ?
2073 ((ch - MIN_CHAR_94) % 94) + 33 : 0;
2074 else /* if (XCHARSET_CHARS (charset) == 96) */
2075 return (MIN_CHAR_96 + (XCHARSET_FINAL (charset) - '0') * 96 <= ch)
2076 && (ch < MIN_CHAR_96 + (XCHARSET_FINAL (charset) - '0' + 1) * 96) ?
2077 ((ch - MIN_CHAR_94) % 96) + 32 : 0;
2079 else /* if (XCHARSET_DIMENSION (charset) == 2) */
2081 if (XCHARSET_CHARS (charset) == 94)
2082 return (MIN_CHAR_94x94
2083 + (XCHARSET_FINAL (charset) - '0') * 94 * 94 <= ch)
2084 && (ch < MIN_CHAR_94x94
2085 + (XCHARSET_FINAL (charset) - '0' + 1) * 94 * 94) ?
2086 (((ch - MIN_CHAR_94x94) / 94) % 94) + 33 : 0;
2087 else /* if (XCHARSET_CHARS (charset) == 96) */
2088 return (MIN_CHAR_96x96
2089 + (XCHARSET_FINAL (charset) - '0') * 96 * 96 <= ch)
2090 && (ch < MIN_CHAR_96x96
2091 + (XCHARSET_FINAL (charset) - '0' + 1) * 96 * 96) ?
2092 (((ch - MIN_CHAR_96x96) / 96) % 96) + 32 : 0;
2097 charset_get_byte2 (Lisp_Object charset, Emchar ch)
2099 if (XCHARSET_DIMENSION (charset) == 1)
2103 Emchar_to_byte_table* table;
2105 if ((table = XCHARSET_TO_BYTE2_TABLE (charset)) != NULL)
2106 return get_byte_from_character_table (ch, table);
2107 else if (EQ (charset, Vcharset_ucs_bmp))
2108 return (ch >> 8) & 0xff;
2109 else if (XCHARSET_CHARS (charset) == 94)
2110 return (MIN_CHAR_94x94
2111 + (XCHARSET_FINAL (charset) - '0') * 94 * 94 <= ch)
2112 && (ch < MIN_CHAR_94x94
2113 + (XCHARSET_FINAL (charset) - '0' + 1) * 94 * 94) ?
2114 ((ch - MIN_CHAR_94x94) % 94) + 33 : 0;
2115 else /* if (XCHARSET_CHARS (charset) == 96) */
2116 return (MIN_CHAR_96x96
2117 + (XCHARSET_FINAL (charset) - '0') * 96 * 96 <= ch)
2118 && (ch < MIN_CHAR_96x96
2119 + (XCHARSET_FINAL (charset) - '0' + 1) * 96 * 96) ?
2120 ((ch - MIN_CHAR_96x96) % 96) + 32 : 0;
2124 Lisp_Object Vdefault_preferred_coded_charset_list;
2128 /************************************************************************/
2129 /* Basic charset Lisp functions */
2130 /************************************************************************/
2132 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
2133 Return non-nil if OBJECT is a charset.
2137 return CHARSETP (object) ? Qt : Qnil;
2140 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
2141 Retrieve the charset of the given name.
2142 If CHARSET-OR-NAME is a charset object, it is simply returned.
2143 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
2144 nil is returned. Otherwise the associated charset object is returned.
2148 if (CHARSETP (charset_or_name))
2149 return charset_or_name;
2151 CHECK_SYMBOL (charset_or_name);
2152 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
2155 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
2156 Retrieve the charset of the given name.
2157 Same as `find-charset' except an error is signalled if there is no such
2158 charset instead of returning nil.
2162 Lisp_Object charset = Ffind_charset (name);
2165 signal_simple_error ("No such charset", name);
2169 /* We store the charsets in hash tables with the names as the key and the
2170 actual charset object as the value. Occasionally we need to use them
2171 in a list format. These routines provide us with that. */
2172 struct charset_list_closure
2174 Lisp_Object *charset_list;
2178 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
2179 void *charset_list_closure)
2181 /* This function can GC */
2182 struct charset_list_closure *chcl =
2183 (struct charset_list_closure*) charset_list_closure;
2184 Lisp_Object *charset_list = chcl->charset_list;
2186 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
2190 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
2191 Return a list of the names of all defined charsets.
2195 Lisp_Object charset_list = Qnil;
2196 struct gcpro gcpro1;
2197 struct charset_list_closure charset_list_closure;
2199 GCPRO1 (charset_list);
2200 charset_list_closure.charset_list = &charset_list;
2201 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
2202 &charset_list_closure);
2205 return charset_list;
2208 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
2209 Return the name of the given charset.
2213 return XCHARSET_NAME (Fget_charset (charset));
2216 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
2217 Define a new character set.
2218 This function is for use with Mule support.
2219 NAME is a symbol, the name by which the character set is normally referred.
2220 DOC-STRING is a string describing the character set.
2221 PROPS is a property list, describing the specific nature of the
2222 character set. Recognized properties are:
2224 'short-name Short version of the charset name (ex: Latin-1)
2225 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
2226 'registry A regular expression matching the font registry field for
2228 'dimension Number of octets used to index a character in this charset.
2229 Either 1 or 2. Defaults to 1.
2230 'columns Number of columns used to display a character in this charset.
2231 Only used in TTY mode. (Under X, the actual width of a
2232 character can be derived from the font used to display the
2233 characters.) If unspecified, defaults to the dimension
2234 (this is almost always the correct value).
2235 'chars Number of characters in each dimension (94 or 96).
2236 Defaults to 94. Note that if the dimension is 2, the
2237 character set thus described is 94x94 or 96x96.
2238 'final Final byte of ISO 2022 escape sequence. Must be
2239 supplied. Each combination of (DIMENSION, CHARS) defines a
2240 separate namespace for final bytes. Note that ISO
2241 2022 restricts the final byte to the range
2242 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
2243 dimension == 2. Note also that final bytes in the range
2244 0x30 - 0x3F are reserved for user-defined (not official)
2246 'graphic 0 (use left half of font on output) or 1 (use right half
2247 of font on output). Defaults to 0. For example, for
2248 a font whose registry is ISO8859-1, the left half
2249 (octets 0x20 - 0x7F) is the `ascii' character set, while
2250 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2251 character set. With 'graphic set to 0, the octets
2252 will have their high bit cleared; with it set to 1,
2253 the octets will have their high bit set.
2254 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2256 'ccl-program A compiled CCL program used to convert a character in
2257 this charset into an index into the font. This is in
2258 addition to the 'graphic property. The CCL program
2259 is passed the octets of the character, with the high
2260 bit cleared and set depending upon whether the value
2261 of the 'graphic property is 0 or 1.
2263 (name, doc_string, props))
2265 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2266 int direction = CHARSET_LEFT_TO_RIGHT;
2268 Lisp_Object registry = Qnil;
2269 Lisp_Object charset;
2270 Lisp_Object rest, keyword, value;
2271 Lisp_Object ccl_program = Qnil;
2272 Lisp_Object short_name = Qnil, long_name = Qnil;
2274 CHECK_SYMBOL (name);
2275 if (!NILP (doc_string))
2276 CHECK_STRING (doc_string);
2278 charset = Ffind_charset (name);
2279 if (!NILP (charset))
2280 signal_simple_error ("Cannot redefine existing charset", name);
2282 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
2284 if (EQ (keyword, Qshort_name))
2286 CHECK_STRING (value);
2290 if (EQ (keyword, Qlong_name))
2292 CHECK_STRING (value);
2296 else if (EQ (keyword, Qdimension))
2299 dimension = XINT (value);
2300 if (dimension < 1 || dimension > 2)
2301 signal_simple_error ("Invalid value for 'dimension", value);
2304 else if (EQ (keyword, Qchars))
2307 chars = XINT (value);
2308 if (chars != 94 && chars != 96)
2309 signal_simple_error ("Invalid value for 'chars", value);
2312 else if (EQ (keyword, Qcolumns))
2315 columns = XINT (value);
2316 if (columns != 1 && columns != 2)
2317 signal_simple_error ("Invalid value for 'columns", value);
2320 else if (EQ (keyword, Qgraphic))
2323 graphic = XINT (value);
2324 if (graphic < 0 || graphic > 1)
2325 signal_simple_error ("Invalid value for 'graphic", value);
2328 else if (EQ (keyword, Qregistry))
2330 CHECK_STRING (value);
2334 else if (EQ (keyword, Qdirection))
2336 if (EQ (value, Ql2r))
2337 direction = CHARSET_LEFT_TO_RIGHT;
2338 else if (EQ (value, Qr2l))
2339 direction = CHARSET_RIGHT_TO_LEFT;
2341 signal_simple_error ("Invalid value for 'direction", value);
2344 else if (EQ (keyword, Qfinal))
2346 CHECK_CHAR_COERCE_INT (value);
2347 final = XCHAR (value);
2348 if (final < '0' || final > '~')
2349 signal_simple_error ("Invalid value for 'final", value);
2352 else if (EQ (keyword, Qccl_program))
2354 CHECK_VECTOR (value);
2355 ccl_program = value;
2359 signal_simple_error ("Unrecognized property", keyword);
2363 error ("'final must be specified");
2364 if (dimension == 2 && final > 0x5F)
2366 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2370 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2372 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2374 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
2375 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
2377 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2384 /* id = CHARSET_ID_OFFSET_94 + final; */
2385 id = get_unallocated_leading_byte (dimension);
2387 else if (chars == 96)
2389 id = get_unallocated_leading_byte (dimension);
2396 else if (dimension == 2)
2400 id = get_unallocated_leading_byte (dimension);
2402 else if (chars == 96)
2404 id = get_unallocated_leading_byte (dimension);
2416 id = get_unallocated_leading_byte (dimension);
2419 if (NILP (doc_string))
2420 doc_string = build_string ("");
2422 if (NILP (registry))
2423 registry = build_string ("");
2425 if (NILP (short_name))
2426 XSETSTRING (short_name, XSYMBOL (name)->name);
2428 if (NILP (long_name))
2429 long_name = doc_string;
2432 columns = dimension;
2433 charset = make_charset (id, name, type, columns, graphic,
2434 final, direction, short_name, long_name,
2435 doc_string, registry,
2437 if (!NILP (ccl_program))
2438 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2442 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2444 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2445 NEW-NAME is the name of the new charset. Return the new charset.
2447 (charset, new_name))
2449 Lisp_Object new_charset = Qnil;
2450 int id, dimension, columns, graphic, final;
2451 int direction, type;
2452 Lisp_Object registry, doc_string, short_name, long_name;
2453 struct Lisp_Charset *cs;
2455 charset = Fget_charset (charset);
2456 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2457 signal_simple_error ("Charset already has reverse-direction charset",
2460 CHECK_SYMBOL (new_name);
2461 if (!NILP (Ffind_charset (new_name)))
2462 signal_simple_error ("Cannot redefine existing charset", new_name);
2464 cs = XCHARSET (charset);
2466 type = CHARSET_TYPE (cs);
2467 columns = CHARSET_COLUMNS (cs);
2468 dimension = CHARSET_DIMENSION (cs);
2469 id = get_unallocated_leading_byte (dimension);
2471 graphic = CHARSET_GRAPHIC (cs);
2472 final = CHARSET_FINAL (cs);
2473 direction = CHARSET_RIGHT_TO_LEFT;
2474 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2475 direction = CHARSET_LEFT_TO_RIGHT;
2476 doc_string = CHARSET_DOC_STRING (cs);
2477 short_name = CHARSET_SHORT_NAME (cs);
2478 long_name = CHARSET_LONG_NAME (cs);
2479 registry = CHARSET_REGISTRY (cs);
2481 new_charset = make_charset (id, new_name, type, columns,
2482 graphic, final, direction, short_name, long_name,
2483 doc_string, registry,
2486 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2487 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2492 /* #### Reverse direction charsets not yet implemented. */
2494 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2496 Return the reverse-direction charset parallel to CHARSET, if any.
2497 This is the charset with the same properties (in particular, the same
2498 dimension, number of characters per dimension, and final byte) as
2499 CHARSET but whose characters are displayed in the opposite direction.
2503 charset = Fget_charset (charset);
2504 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2508 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2509 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2510 If DIRECTION is omitted, both directions will be checked (left-to-right
2511 will be returned if character sets exist for both directions).
2513 (dimension, chars, final, direction))
2515 int dm, ch, fi, di = -1;
2517 Lisp_Object obj = Qnil;
2519 CHECK_INT (dimension);
2520 dm = XINT (dimension);
2521 if (dm < 1 || dm > 2)
2522 signal_simple_error ("Invalid value for DIMENSION", dimension);
2526 if (ch != 94 && ch != 96)
2527 signal_simple_error ("Invalid value for CHARS", chars);
2529 CHECK_CHAR_COERCE_INT (final);
2531 if (fi < '0' || fi > '~')
2532 signal_simple_error ("Invalid value for FINAL", final);
2534 if (EQ (direction, Ql2r))
2535 di = CHARSET_LEFT_TO_RIGHT;
2536 else if (EQ (direction, Qr2l))
2537 di = CHARSET_RIGHT_TO_LEFT;
2538 else if (!NILP (direction))
2539 signal_simple_error ("Invalid value for DIRECTION", direction);
2541 if (dm == 2 && fi > 0x5F)
2543 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2546 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2548 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2552 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2554 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2557 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2560 return XCHARSET_NAME (obj);
2564 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2565 Return short name of CHARSET.
2569 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2572 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2573 Return long name of CHARSET.
2577 return XCHARSET_LONG_NAME (Fget_charset (charset));
2580 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2581 Return description of CHARSET.
2585 return XCHARSET_DOC_STRING (Fget_charset (charset));
2588 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2589 Return dimension of CHARSET.
2593 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2596 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2597 Return property PROP of CHARSET.
2598 Recognized properties are those listed in `make-charset', as well as
2599 'name and 'doc-string.
2603 struct Lisp_Charset *cs;
2605 charset = Fget_charset (charset);
2606 cs = XCHARSET (charset);
2608 CHECK_SYMBOL (prop);
2609 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2610 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2611 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2612 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2613 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2614 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2615 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2616 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2617 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2618 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2619 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2620 if (EQ (prop, Qdirection))
2621 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2622 if (EQ (prop, Qreverse_direction_charset))
2624 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2628 return XCHARSET_NAME (obj);
2630 signal_simple_error ("Unrecognized charset property name", prop);
2631 return Qnil; /* not reached */
2634 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2635 Return charset identification number of CHARSET.
2639 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2642 /* #### We need to figure out which properties we really want to
2645 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2646 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2648 (charset, ccl_program))
2650 charset = Fget_charset (charset);
2651 CHECK_VECTOR (ccl_program);
2652 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2657 invalidate_charset_font_caches (Lisp_Object charset)
2659 /* Invalidate font cache entries for charset on all devices. */
2660 Lisp_Object devcons, concons, hash_table;
2661 DEVICE_LOOP_NO_BREAK (devcons, concons)
2663 struct device *d = XDEVICE (XCAR (devcons));
2664 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2665 if (!UNBOUNDP (hash_table))
2666 Fclrhash (hash_table);
2670 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
2671 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2672 Set the 'registry property of CHARSET to REGISTRY.
2674 (charset, registry))
2676 charset = Fget_charset (charset);
2677 CHECK_STRING (registry);
2678 XCHARSET_REGISTRY (charset) = registry;
2679 invalidate_charset_font_caches (charset);
2680 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2685 /************************************************************************/
2686 /* Lisp primitives for working with characters */
2687 /************************************************************************/
2689 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2690 Make a character from CHARSET and octets ARG1 and ARG2.
2691 ARG2 is required only for characters from two-dimensional charsets.
2692 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2693 character s with caron.
2695 (charset, arg1, arg2))
2697 struct Lisp_Charset *cs;
2699 int lowlim, highlim;
2701 charset = Fget_charset (charset);
2702 cs = XCHARSET (charset);
2704 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2705 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2707 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2709 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2710 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2713 /* It is useful (and safe, according to Olivier Galibert) to strip
2714 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2715 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2716 Latin 2 code of the character. */
2724 if (a1 < lowlim || a1 > highlim)
2725 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2727 if (CHARSET_DIMENSION (cs) == 1)
2731 ("Charset is of dimension one; second octet must be nil", arg2);
2732 return make_char (MAKE_CHAR (charset, a1, 0));
2741 a2 = XINT (arg2) & 0x7f;
2743 if (a2 < lowlim || a2 > highlim)
2744 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2746 return make_char (MAKE_CHAR (charset, a1, a2));
2749 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2750 Return the character set of char CH.
2754 CHECK_CHAR_COERCE_INT (ch);
2756 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2759 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2760 Return list of charset and one or two position-codes of CHAR.
2764 /* This function can GC */
2765 struct gcpro gcpro1, gcpro2;
2766 Lisp_Object charset = Qnil;
2767 Lisp_Object rc = Qnil;
2770 GCPRO2 (charset, rc);
2771 CHECK_CHAR_COERCE_INT (character);
2773 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2775 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2777 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2781 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2789 #ifdef ENABLE_COMPOSITE_CHARS
2790 /************************************************************************/
2791 /* composite character functions */
2792 /************************************************************************/
2795 lookup_composite_char (Bufbyte *str, int len)
2797 Lisp_Object lispstr = make_string (str, len);
2798 Lisp_Object ch = Fgethash (lispstr,
2799 Vcomposite_char_string2char_hash_table,
2805 if (composite_char_row_next >= 128)
2806 signal_simple_error ("No more composite chars available", lispstr);
2807 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2808 composite_char_col_next);
2809 Fputhash (make_char (emch), lispstr,
2810 Vcomposite_char_char2string_hash_table);
2811 Fputhash (lispstr, make_char (emch),
2812 Vcomposite_char_string2char_hash_table);
2813 composite_char_col_next++;
2814 if (composite_char_col_next >= 128)
2816 composite_char_col_next = 32;
2817 composite_char_row_next++;
2826 composite_char_string (Emchar ch)
2828 Lisp_Object str = Fgethash (make_char (ch),
2829 Vcomposite_char_char2string_hash_table,
2831 assert (!UNBOUNDP (str));
2835 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2836 Convert a string into a single composite character.
2837 The character is the result of overstriking all the characters in
2842 CHECK_STRING (string);
2843 return make_char (lookup_composite_char (XSTRING_DATA (string),
2844 XSTRING_LENGTH (string)));
2847 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2848 Return a string of the characters comprising a composite character.
2856 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2857 signal_simple_error ("Must be composite char", ch);
2858 return composite_char_string (emch);
2860 #endif /* ENABLE_COMPOSITE_CHARS */
2863 /************************************************************************/
2864 /* initialization */
2865 /************************************************************************/
2868 syms_of_mule_charset (void)
2870 DEFSUBR (Fcharsetp);
2871 DEFSUBR (Ffind_charset);
2872 DEFSUBR (Fget_charset);
2873 DEFSUBR (Fcharset_list);
2874 DEFSUBR (Fcharset_name);
2875 DEFSUBR (Fmake_charset);
2876 DEFSUBR (Fmake_reverse_direction_charset);
2877 /* DEFSUBR (Freverse_direction_charset); */
2878 DEFSUBR (Fcharset_from_attributes);
2879 DEFSUBR (Fcharset_short_name);
2880 DEFSUBR (Fcharset_long_name);
2881 DEFSUBR (Fcharset_description);
2882 DEFSUBR (Fcharset_dimension);
2883 DEFSUBR (Fcharset_property);
2884 DEFSUBR (Fcharset_id);
2885 DEFSUBR (Fset_charset_ccl_program);
2886 DEFSUBR (Fset_charset_registry);
2888 DEFSUBR (Fmake_char);
2889 DEFSUBR (Fchar_charset);
2890 DEFSUBR (Fsplit_char);
2892 #ifdef ENABLE_COMPOSITE_CHARS
2893 DEFSUBR (Fmake_composite_char);
2894 DEFSUBR (Fcomposite_char_string);
2897 defsymbol (&Qcharsetp, "charsetp");
2898 defsymbol (&Qregistry, "registry");
2899 defsymbol (&Qfinal, "final");
2900 defsymbol (&Qgraphic, "graphic");
2901 defsymbol (&Qdirection, "direction");
2902 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2903 defsymbol (&Qshort_name, "short-name");
2904 defsymbol (&Qlong_name, "long-name");
2906 defsymbol (&Ql2r, "l2r");
2907 defsymbol (&Qr2l, "r2l");
2909 /* Charsets, compatible with FSF 20.3
2910 Naming convention is Script-Charset[-Edition] */
2911 defsymbol (&Qascii, "ascii");
2912 defsymbol (&Qcontrol_1, "control-1");
2913 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2914 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2915 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2916 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2917 defsymbol (&Qthai_tis620, "thai-tis620");
2918 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2919 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2920 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2921 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2922 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2923 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2924 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2925 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2926 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2927 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2928 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2929 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2930 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2931 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2933 defsymbol (&Qchinese_cns11643_3, "chinese-cns11643-3");
2934 defsymbol (&Qchinese_cns11643_4, "chinese-cns11643-4");
2935 defsymbol (&Qchinese_cns11643_5, "chinese-cns11643-5");
2936 defsymbol (&Qchinese_cns11643_6, "chinese-cns11643-6");
2937 defsymbol (&Qchinese_cns11643_7, "chinese-cns11643-7");
2938 defsymbol (&Qucs_bmp, "ucs-bmp");
2939 defsymbol (&Qlatin_viscii_lower, "vietnamese-viscii-lower");
2940 defsymbol (&Qlatin_viscii_upper, "vietnamese-viscii-upper");
2942 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2943 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2945 defsymbol (&Qcomposite, "composite");
2949 vars_of_mule_charset (void)
2956 /* Table of charsets indexed by leading byte. */
2957 for (i = 0; i < countof (charset_by_leading_byte); i++)
2958 charset_by_leading_byte[i] = Qnil;
2961 /* Table of charsets indexed by type/final-byte. */
2962 for (i = 0; i < countof (charset_by_attributes); i++)
2963 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2964 charset_by_attributes[i][j] = Qnil;
2966 /* Table of charsets indexed by type/final-byte/direction. */
2967 for (i = 0; i < countof (charset_by_attributes); i++)
2968 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2969 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2970 charset_by_attributes[i][j][k] = Qnil;
2973 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2975 next_allocated_2_byte_leading_byte = LEADING_BYTE_CHINESE_BIG5_2 + 1;
2977 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2981 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2982 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2983 Leading-code of private TYPE9N charset of column-width 1.
2985 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2989 Vutf_2000_version = build_string("0.7 (Hirano)");
2990 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2991 Version number of UTF-2000.
2994 Vdefault_preferred_coded_charset_list = Qnil;
2995 DEFVAR_LISP ("default-preferred-coded-charset-list",
2996 &Vdefault_preferred_coded_charset_list /*
2997 Default order of preferred coded-character-set.
3003 complex_vars_of_mule_charset (void)
3005 staticpro (&Vcharset_hash_table);
3006 Vcharset_hash_table =
3007 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3009 /* Predefined character sets. We store them into variables for
3014 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
3015 CHARSET_TYPE_256X256, 1, 0, 0,
3016 CHARSET_LEFT_TO_RIGHT,
3017 build_string ("BMP"),
3018 build_string ("BMP"),
3019 build_string ("BMP"),
3024 make_charset (LEADING_BYTE_ASCII, Qascii,
3025 CHARSET_TYPE_94, 1, 0, 'B',
3026 CHARSET_LEFT_TO_RIGHT,
3027 build_string ("ASCII"),
3028 build_string ("ASCII)"),
3029 build_string ("ASCII (ISO646 IRV)"),
3030 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3032 Vcharset_control_1 =
3033 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
3034 CHARSET_TYPE_94, 1, 1, 0,
3035 CHARSET_LEFT_TO_RIGHT,
3036 build_string ("C1"),
3037 build_string ("Control characters"),
3038 build_string ("Control characters 128-191"),
3041 Vcharset_latin_iso8859_1 =
3042 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
3043 CHARSET_TYPE_96, 1, 1, 'A',
3044 CHARSET_LEFT_TO_RIGHT,
3045 build_string ("Latin-1"),
3046 build_string ("ISO8859-1 (Latin-1)"),
3047 build_string ("ISO8859-1 (Latin-1)"),
3048 build_string ("iso8859-1"),
3050 Vcharset_latin_iso8859_2 =
3051 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
3052 CHARSET_TYPE_96, 1, 1, 'B',
3053 CHARSET_LEFT_TO_RIGHT,
3054 build_string ("Latin-2"),
3055 build_string ("ISO8859-2 (Latin-2)"),
3056 build_string ("ISO8859-2 (Latin-2)"),
3057 build_string ("iso8859-2"),
3058 latin_iso8859_2_to_ucs);
3059 Vcharset_latin_iso8859_3 =
3060 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
3061 CHARSET_TYPE_96, 1, 1, 'C',
3062 CHARSET_LEFT_TO_RIGHT,
3063 build_string ("Latin-3"),
3064 build_string ("ISO8859-3 (Latin-3)"),
3065 build_string ("ISO8859-3 (Latin-3)"),
3066 build_string ("iso8859-3"),
3067 latin_iso8859_3_to_ucs);
3068 Vcharset_latin_iso8859_4 =
3069 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
3070 CHARSET_TYPE_96, 1, 1, 'D',
3071 CHARSET_LEFT_TO_RIGHT,
3072 build_string ("Latin-4"),
3073 build_string ("ISO8859-4 (Latin-4)"),
3074 build_string ("ISO8859-4 (Latin-4)"),
3075 build_string ("iso8859-4"),
3076 latin_iso8859_4_to_ucs);
3077 Vcharset_thai_tis620 =
3078 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
3079 CHARSET_TYPE_96, 1, 1, 'T',
3080 CHARSET_LEFT_TO_RIGHT,
3081 build_string ("TIS620"),
3082 build_string ("TIS620 (Thai)"),
3083 build_string ("TIS620.2529 (Thai)"),
3084 build_string ("tis620"),
3086 Vcharset_greek_iso8859_7 =
3087 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
3088 CHARSET_TYPE_96, 1, 1, 'F',
3089 CHARSET_LEFT_TO_RIGHT,
3090 build_string ("ISO8859-7"),
3091 build_string ("ISO8859-7 (Greek)"),
3092 build_string ("ISO8859-7 (Greek)"),
3093 build_string ("iso8859-7"),
3095 Vcharset_arabic_iso8859_6 =
3096 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
3097 CHARSET_TYPE_96, 1, 1, 'G',
3098 CHARSET_RIGHT_TO_LEFT,
3099 build_string ("ISO8859-6"),
3100 build_string ("ISO8859-6 (Arabic)"),
3101 build_string ("ISO8859-6 (Arabic)"),
3102 build_string ("iso8859-6"),
3104 Vcharset_hebrew_iso8859_8 =
3105 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
3106 CHARSET_TYPE_96, 1, 1, 'H',
3107 CHARSET_RIGHT_TO_LEFT,
3108 build_string ("ISO8859-8"),
3109 build_string ("ISO8859-8 (Hebrew)"),
3110 build_string ("ISO8859-8 (Hebrew)"),
3111 build_string ("iso8859-8"),
3113 Vcharset_katakana_jisx0201 =
3114 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
3115 CHARSET_TYPE_94, 1, 1, 'I',
3116 CHARSET_LEFT_TO_RIGHT,
3117 build_string ("JISX0201 Kana"),
3118 build_string ("JISX0201.1976 (Japanese Kana)"),
3119 build_string ("JISX0201.1976 Japanese Kana"),
3120 build_string ("jisx0201.1976"),
3122 Vcharset_latin_jisx0201 =
3123 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
3124 CHARSET_TYPE_94, 1, 0, 'J',
3125 CHARSET_LEFT_TO_RIGHT,
3126 build_string ("JISX0201 Roman"),
3127 build_string ("JISX0201.1976 (Japanese Roman)"),
3128 build_string ("JISX0201.1976 Japanese Roman"),
3129 build_string ("jisx0201.1976"),
3130 latin_jisx0201_to_ucs);
3131 Vcharset_cyrillic_iso8859_5 =
3132 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
3133 CHARSET_TYPE_96, 1, 1, 'L',
3134 CHARSET_LEFT_TO_RIGHT,
3135 build_string ("ISO8859-5"),
3136 build_string ("ISO8859-5 (Cyrillic)"),
3137 build_string ("ISO8859-5 (Cyrillic)"),
3138 build_string ("iso8859-5"),
3140 Vcharset_latin_iso8859_9 =
3141 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
3142 CHARSET_TYPE_96, 1, 1, 'M',
3143 CHARSET_LEFT_TO_RIGHT,
3144 build_string ("Latin-5"),
3145 build_string ("ISO8859-9 (Latin-5)"),
3146 build_string ("ISO8859-9 (Latin-5)"),
3147 build_string ("iso8859-9"),
3148 latin_iso8859_9_to_ucs);
3149 Vcharset_japanese_jisx0208_1978 =
3150 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
3151 CHARSET_TYPE_94X94, 2, 0, '@',
3152 CHARSET_LEFT_TO_RIGHT,
3153 build_string ("JISX0208.1978"),
3154 build_string ("JISX0208.1978 (Japanese)"),
3156 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
3157 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3159 Vcharset_chinese_gb2312 =
3160 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
3161 CHARSET_TYPE_94X94, 2, 0, 'A',
3162 CHARSET_LEFT_TO_RIGHT,
3163 build_string ("GB2312"),
3164 build_string ("GB2312)"),
3165 build_string ("GB2312 Chinese simplified"),
3166 build_string ("gb2312"),
3168 Vcharset_japanese_jisx0208 =
3169 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
3170 CHARSET_TYPE_94X94, 2, 0, 'B',
3171 CHARSET_LEFT_TO_RIGHT,
3172 build_string ("JISX0208"),
3173 build_string ("JISX0208.1983/1990 (Japanese)"),
3174 build_string ("JISX0208.1983/1990 Japanese Kanji"),
3175 build_string ("jisx0208.19\\(83\\|90\\)"),
3177 Vcharset_korean_ksc5601 =
3178 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
3179 CHARSET_TYPE_94X94, 2, 0, 'C',
3180 CHARSET_LEFT_TO_RIGHT,
3181 build_string ("KSC5601"),
3182 build_string ("KSC5601 (Korean"),
3183 build_string ("KSC5601 Korean Hangul and Hanja"),
3184 build_string ("ksc5601"),
3186 Vcharset_japanese_jisx0212 =
3187 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
3188 CHARSET_TYPE_94X94, 2, 0, 'D',
3189 CHARSET_LEFT_TO_RIGHT,
3190 build_string ("JISX0212"),
3191 build_string ("JISX0212 (Japanese)"),
3192 build_string ("JISX0212 Japanese Supplement"),
3193 build_string ("jisx0212"),
3196 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3197 Vcharset_chinese_cns11643_1 =
3198 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
3199 CHARSET_TYPE_94X94, 2, 0, 'G',
3200 CHARSET_LEFT_TO_RIGHT,
3201 build_string ("CNS11643-1"),
3202 build_string ("CNS11643-1 (Chinese traditional)"),
3204 ("CNS 11643 Plane 1 Chinese traditional"),
3205 build_string (CHINESE_CNS_PLANE_RE("1")),
3207 Vcharset_chinese_cns11643_2 =
3208 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
3209 CHARSET_TYPE_94X94, 2, 0, 'H',
3210 CHARSET_LEFT_TO_RIGHT,
3211 build_string ("CNS11643-2"),
3212 build_string ("CNS11643-2 (Chinese traditional)"),
3214 ("CNS 11643 Plane 2 Chinese traditional"),
3215 build_string (CHINESE_CNS_PLANE_RE("2")),
3218 Vcharset_chinese_cns11643_3 =
3219 make_charset (LEADING_BYTE_CHINESE_CNS11643_3, Qchinese_cns11643_3,
3220 CHARSET_TYPE_94X94, 2, 0, 'I',
3221 CHARSET_LEFT_TO_RIGHT,
3222 build_string ("CNS11643-3"),
3223 build_string ("CNS11643-3 (Chinese traditional)"),
3225 ("CNS 11643 Plane 3 Chinese traditional"),
3226 build_string (CHINESE_CNS_PLANE_RE("3")),
3228 Vcharset_chinese_cns11643_4 =
3229 make_charset (LEADING_BYTE_CHINESE_CNS11643_4, Qchinese_cns11643_4,
3230 CHARSET_TYPE_94X94, 2, 0, 'J',
3231 CHARSET_LEFT_TO_RIGHT,
3232 build_string ("CNS11643-4"),
3233 build_string ("CNS11643-4 (Chinese traditional)"),
3235 ("CNS 11643 Plane 4 Chinese traditional"),
3236 build_string (CHINESE_CNS_PLANE_RE("4")),
3238 Vcharset_chinese_cns11643_5 =
3239 make_charset (LEADING_BYTE_CHINESE_CNS11643_5, Qchinese_cns11643_5,
3240 CHARSET_TYPE_94X94, 2, 0, 'K',
3241 CHARSET_LEFT_TO_RIGHT,
3242 build_string ("CNS11643-5"),
3243 build_string ("CNS11643-5 (Chinese traditional)"),
3245 ("CNS 11643 Plane 5 Chinese traditional"),
3246 build_string (CHINESE_CNS_PLANE_RE("5")),
3248 Vcharset_chinese_cns11643_6 =
3249 make_charset (LEADING_BYTE_CHINESE_CNS11643_6, Qchinese_cns11643_6,
3250 CHARSET_TYPE_94X94, 2, 0, 'L',
3251 CHARSET_LEFT_TO_RIGHT,
3252 build_string ("CNS11643-6"),
3253 build_string ("CNS11643-6 (Chinese traditional)"),
3255 ("CNS 11643 Plane 6 Chinese traditional"),
3256 build_string (CHINESE_CNS_PLANE_RE("6")),
3258 Vcharset_chinese_cns11643_7 =
3259 make_charset (LEADING_BYTE_CHINESE_CNS11643_7, Qchinese_cns11643_7,
3260 CHARSET_TYPE_94X94, 2, 0, 'M',
3261 CHARSET_LEFT_TO_RIGHT,
3262 build_string ("CNS11643-7"),
3263 build_string ("CNS11643-7 (Chinese traditional)"),
3265 ("CNS 11643 Plane 7 Chinese traditional"),
3266 build_string (CHINESE_CNS_PLANE_RE("7")),
3268 Vcharset_latin_viscii_lower =
3269 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
3270 CHARSET_TYPE_96, 1, 1, '1',
3271 CHARSET_LEFT_TO_RIGHT,
3272 build_string ("VISCII lower"),
3273 build_string ("VISCII lower (Vietnamese)"),
3274 build_string ("VISCII lower (Vietnamese)"),
3275 build_string ("VISCII1.1"),
3276 latin_viscii_lower_to_ucs);
3277 Vcharset_latin_viscii_upper =
3278 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
3279 CHARSET_TYPE_96, 1, 1, '2',
3280 CHARSET_LEFT_TO_RIGHT,
3281 build_string ("VISCII upper"),
3282 build_string ("VISCII upper (Vietnamese)"),
3283 build_string ("VISCII upper (Vietnamese)"),
3284 build_string ("VISCII1.1"),
3285 latin_viscii_upper_to_ucs);
3287 Vcharset_chinese_big5_1 =
3288 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
3289 CHARSET_TYPE_94X94, 2, 0, '0',
3290 CHARSET_LEFT_TO_RIGHT,
3291 build_string ("Big5"),
3292 build_string ("Big5 (Level-1)"),
3294 ("Big5 Level-1 Chinese traditional"),
3295 build_string ("big5"),
3297 Vcharset_chinese_big5_2 =
3298 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
3299 CHARSET_TYPE_94X94, 2, 0, '1',
3300 CHARSET_LEFT_TO_RIGHT,
3301 build_string ("Big5"),
3302 build_string ("Big5 (Level-2)"),
3304 ("Big5 Level-2 Chinese traditional"),
3305 build_string ("big5"),
3308 #ifdef ENABLE_COMPOSITE_CHARS
3309 /* #### For simplicity, we put composite chars into a 96x96 charset.
3310 This is going to lead to problems because you can run out of
3311 room, esp. as we don't yet recycle numbers. */
3312 Vcharset_composite =
3313 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
3314 CHARSET_TYPE_96X96, 2, 0, 0,
3315 CHARSET_LEFT_TO_RIGHT,
3316 build_string ("Composite"),
3317 build_string ("Composite characters"),
3318 build_string ("Composite characters"),
3321 composite_char_row_next = 32;
3322 composite_char_col_next = 32;
3324 Vcomposite_char_string2char_hash_table =
3325 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3326 Vcomposite_char_char2string_hash_table =
3327 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3328 staticpro (&Vcomposite_char_string2char_hash_table);
3329 staticpro (&Vcomposite_char_char2string_hash_table);
3330 #endif /* ENABLE_COMPOSITE_CHARS */