1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_chinese_cns11643_3;
62 Lisp_Object Vcharset_chinese_cns11643_4;
63 Lisp_Object Vcharset_chinese_cns11643_5;
64 Lisp_Object Vcharset_chinese_cns11643_6;
65 Lisp_Object Vcharset_chinese_cns11643_7;
66 Lisp_Object Vcharset_ucs_bmp;
67 Lisp_Object Vcharset_latin_viscii_lower;
68 Lisp_Object Vcharset_latin_viscii_upper;
70 Lisp_Object Vcharset_chinese_big5_1;
71 Lisp_Object Vcharset_chinese_big5_2;
73 #ifdef ENABLE_COMPOSITE_CHARS
74 Lisp_Object Vcharset_composite;
76 /* Hash tables for composite chars. One maps string representing
77 composed chars to their equivalent chars; one goes the
79 Lisp_Object Vcomposite_char_char2string_hash_table;
80 Lisp_Object Vcomposite_char_string2char_hash_table;
82 static int composite_char_row_next;
83 static int composite_char_col_next;
85 #endif /* ENABLE_COMPOSITE_CHARS */
87 /* Table of charsets indexed by leading byte. */
88 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
90 /* Table of charsets indexed by type/final-byte/direction. */
92 Lisp_Object charset_by_attributes[4][128];
94 Lisp_Object charset_by_attributes[4][128][2];
98 /* Table of number of bytes in the string representation of a character
99 indexed by the first byte of that representation.
101 rep_bytes_by_first_byte(c) is more efficient than the equivalent
102 canonical computation:
104 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
106 Bytecount rep_bytes_by_first_byte[0xA0] =
107 { /* 0x00 - 0x7f are for straight ASCII */
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
115 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
116 /* 0x80 - 0x8f are for Dimension-1 official charsets */
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
120 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
122 /* 0x90 - 0x9d are for Dimension-2 official charsets */
123 /* 0x9e is for Dimension-1 private charsets */
124 /* 0x9f is for Dimension-2 private charsets */
125 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
130 Emchar_to_byte_table*
131 make_byte_from_character_table ()
133 Emchar_to_byte_table* table
134 = (Emchar_to_byte_table*) xmalloc (sizeof (Emchar_to_byte_table));
141 put_byte_from_character_table (Emchar ch, unsigned char val,
142 Emchar_to_byte_table* table)
144 if (table->base == NULL)
146 table->base = xmalloc (128);
147 table->offset = ch - (ch % 128);
149 table->base[ch - table->offset] = val;
153 int i = ch - table->offset;
157 size_t new_size = table->size - i;
160 new_size += 128 - (new_size % 128);
161 table->base = xrealloc (table->base, new_size);
162 memmove (table->base + (new_size - table->size), table->base,
164 for (j = 0; j < (new_size - table->size); j++)
166 table->offset -= (new_size - table->size);
167 table->base[ch - table->offset] = val;
168 table->size = new_size;
170 else if (i >= table->size)
172 size_t new_size = i + 1;
175 new_size += 128 - (new_size % 128);
176 table->base = xrealloc (table->base, new_size);
177 for (j = table->size; j < new_size; j++)
179 table->base[i] = val;
180 table->size = new_size;
184 table->base[i] = val;
190 get_byte_from_character_table (Emchar ch, Emchar_to_byte_table* table)
192 size_t i = ch - table->offset;
194 return table->base[i];
199 #define CHAR96(ft,b) (MIN_CHAR_96 + (ft - '0') * 96 + (b & 0x7f) - 32)
201 Emchar_to_byte_table* ucs_to_latin_jisx0201;
203 Emchar latin_jisx0201_to_ucs[94] =
205 0x0021 /* 0x21 EXCLAMATION MARK */,
206 0x0022 /* 0x22 QUOTATION MARK */,
207 0x0023 /* 0x23 NUMBER SIGN */,
208 0x0024 /* 0x24 DOLLAR SIGN */,
209 0x0025 /* 0x25 PERCENT SIGN */,
210 0x0026 /* 0x26 AMPERSAND */,
211 0x0027 /* 0x27 APOSTROPHE */,
212 0x0028 /* 0x28 LEFT PARENTHESIS */,
213 0x0029 /* 0x29 RIGHT PARENTHESIS */,
214 0x002A /* 0x2A ASTERISK */,
215 0x002B /* 0x2B PLUS SIGN */,
216 0x002C /* 0x2C COMMA */,
217 0x002D /* 0x2D HYPHEN-MINUS */,
218 0x002E /* 0x2E FULL STOP */,
219 0x002F /* 0x2F SOLIDUS */,
220 0x0030 /* 0x30 DIGIT ZERO */,
221 0x0031 /* 0x31 DIGIT ONE */,
222 0x0032 /* 0x32 DIGIT TWO */,
223 0x0033 /* 0x33 DIGIT THREE */,
224 0x0034 /* 0x34 DIGIT FOUR */,
225 0x0035 /* 0x35 DIGIT FIVE */,
226 0x0036 /* 0x36 DIGIT SIX */,
227 0x0037 /* 0x37 DIGIT SEVEN */,
228 0x0038 /* 0x38 DIGIT EIGHT */,
229 0x0039 /* 0x39 DIGIT NINE */,
230 0x003A /* 0x3A COLON */,
231 0x003B /* 0x3B SEMICOLON */,
232 0x003C /* 0x3C LESS-THAN SIGN */,
233 0x003D /* 0x3D EQUALS SIGN */,
234 0x003E /* 0x3E GREATER-THAN SIGN */,
235 0x003F /* 0x3F QUESTION MARK */,
236 0x0040 /* 0x40 COMMERCIAL AT */,
237 0x0041 /* 0x41 LATIN CAPITAL LETTER A */,
238 0x0042 /* 0x42 LATIN CAPITAL LETTER B */,
239 0x0043 /* 0x43 LATIN CAPITAL LETTER C */,
240 0x0044 /* 0x44 LATIN CAPITAL LETTER D */,
241 0x0045 /* 0x45 LATIN CAPITAL LETTER E */,
242 0x0046 /* 0x46 LATIN CAPITAL LETTER F */,
243 0x0047 /* 0x47 LATIN CAPITAL LETTER G */,
244 0x0048 /* 0x48 LATIN CAPITAL LETTER H */,
245 0x0049 /* 0x49 LATIN CAPITAL LETTER I */,
246 0x004A /* 0x4A LATIN CAPITAL LETTER J */,
247 0x004B /* 0x4B LATIN CAPITAL LETTER K */,
248 0x004C /* 0x4C LATIN CAPITAL LETTER L */,
249 0x004D /* 0x4D LATIN CAPITAL LETTER M */,
250 0x004E /* 0x4E LATIN CAPITAL LETTER N */,
251 0x004F /* 0x4F LATIN CAPITAL LETTER O */,
252 0x0050 /* 0x50 LATIN CAPITAL LETTER P */,
253 0x0051 /* 0x51 LATIN CAPITAL LETTER Q */,
254 0x0052 /* 0x52 LATIN CAPITAL LETTER R */,
255 0x0053 /* 0x53 LATIN CAPITAL LETTER S */,
256 0x0054 /* 0x54 LATIN CAPITAL LETTER T */,
257 0x0055 /* 0x55 LATIN CAPITAL LETTER U */,
258 0x0056 /* 0x56 LATIN CAPITAL LETTER V */,
259 0x0057 /* 0x57 LATIN CAPITAL LETTER W */,
260 0x0058 /* 0x58 LATIN CAPITAL LETTER X */,
261 0x0059 /* 0x59 LATIN CAPITAL LETTER Y */,
262 0x005A /* 0x5A LATIN CAPITAL LETTER Z */,
263 0x005B /* 0x5B LEFT SQUARE BRACKET */,
264 0x00A5 /* 0x5C YEN SIGN */,
265 0x005D /* 0x5D RIGHT SQUARE BRACKET */,
266 0x005E /* 0x5E CIRCUMFLEX ACCENT */,
267 0x005F /* 0x5F LOW LINE */,
268 0x0060 /* 0x60 GRAVE ACCENT */,
269 0x0061 /* 0x61 LATIN SMALL LETTER A */,
270 0x0062 /* 0x62 LATIN SMALL LETTER B */,
271 0x0063 /* 0x63 LATIN SMALL LETTER C */,
272 0x0064 /* 0x64 LATIN SMALL LETTER D */,
273 0x0065 /* 0x65 LATIN SMALL LETTER E */,
274 0x0066 /* 0x66 LATIN SMALL LETTER F */,
275 0x0067 /* 0x67 LATIN SMALL LETTER G */,
276 0x0068 /* 0x68 LATIN SMALL LETTER H */,
277 0x0069 /* 0x69 LATIN SMALL LETTER I */,
278 0x006A /* 0x6A LATIN SMALL LETTER J */,
279 0x006B /* 0x6B LATIN SMALL LETTER K */,
280 0x006C /* 0x6C LATIN SMALL LETTER L */,
281 0x006D /* 0x6D LATIN SMALL LETTER M */,
282 0x006E /* 0x6E LATIN SMALL LETTER N */,
283 0x006F /* 0x6F LATIN SMALL LETTER O */,
284 0x0070 /* 0x70 LATIN SMALL LETTER P */,
285 0x0071 /* 0x71 LATIN SMALL LETTER Q */,
286 0x0072 /* 0x72 LATIN SMALL LETTER R */,
287 0x0073 /* 0x73 LATIN SMALL LETTER S */,
288 0x0074 /* 0x74 LATIN SMALL LETTER T */,
289 0x0075 /* 0x75 LATIN SMALL LETTER U */,
290 0x0076 /* 0x76 LATIN SMALL LETTER V */,
291 0x0077 /* 0x77 LATIN SMALL LETTER W */,
292 0x0078 /* 0x78 LATIN SMALL LETTER X */,
293 0x0079 /* 0x79 LATIN SMALL LETTER Y */,
294 0x007A /* 0x7A LATIN SMALL LETTER Z */,
295 0x007B /* 0x7B LEFT CURLY BRACKET */,
296 0x007C /* 0x7C VERTICAL LINE */,
297 0x007D /* 0x7D RIGHT CURLY BRACKET */,
298 0x203E /* 0x7E OVERLINE */
302 Emchar_to_byte_table* ucs_to_latin_iso8859_2;
304 Emchar latin_iso8859_2_to_ucs[96] =
306 0x00A0 /* 0xA0 NO-BREAK SPACE */,
307 0x0104 /* 0xA1 LATIN CAPITAL LETTER A WITH OGONEK */,
308 0x02D8 /* 0xA2 BREVE */,
309 0x0141 /* 0xA3 LATIN CAPITAL LETTER L WITH STROKE */,
310 0x00A4 /* 0xA4 CURRENCY SIGN */,
311 0x013D /* 0xA5 LATIN CAPITAL LETTER L WITH CARON */,
312 0x015A /* 0xA6 LATIN CAPITAL LETTER S WITH ACUTE */,
313 0x00A7 /* 0xA7 SECTION SIGN */,
314 0x00A8 /* 0xA8 DIAERESIS */,
315 0x0160 /* 0xA9 LATIN CAPITAL LETTER S WITH CARON */,
316 0x015E /* 0xAA LATIN CAPITAL LETTER S WITH CEDILLA */,
317 0x0164 /* 0xAB LATIN CAPITAL LETTER T WITH CARON */,
318 0x0179 /* 0xAC LATIN CAPITAL LETTER Z WITH ACUTE */,
319 0x00AD /* 0xAD SOFT HYPHEN */,
320 0x017D /* 0xAE LATIN CAPITAL LETTER Z WITH CARON */,
321 0x017B /* 0xAF LATIN CAPITAL LETTER Z WITH DOT ABOVE */,
322 0x00B0 /* 0xB0 DEGREE SIGN */,
323 0x0105 /* 0xB1 LATIN SMALL LETTER A WITH OGONEK */,
324 0x02DB /* 0xB2 OGONEK */,
325 0x0142 /* 0xB3 LATIN SMALL LETTER L WITH STROKE */,
326 0x00B4 /* 0xB4 ACUTE ACCENT */,
327 0x013E /* 0xB5 LATIN SMALL LETTER L WITH CARON */,
328 0x015B /* 0xB6 LATIN SMALL LETTER S WITH ACUTE */,
329 0x02C7 /* 0xB7 CARON */,
330 0x00B8 /* 0xB8 CEDILLA */,
331 0x0161 /* 0xB9 LATIN SMALL LETTER S WITH CARON */,
332 0x015F /* 0xBA LATIN SMALL LETTER S WITH CEDILLA */,
333 0x0165 /* 0xBB LATIN SMALL LETTER T WITH CARON */,
334 0x017A /* 0xBC LATIN SMALL LETTER Z WITH ACUTE */,
335 0x02DD /* 0xBD DOUBLE ACUTE ACCENT */,
336 0x017E /* 0xBE LATIN SMALL LETTER Z WITH CARON */,
337 0x017C /* 0xBF LATIN SMALL LETTER Z WITH DOT ABOVE */,
338 0x0154 /* 0xC0 LATIN CAPITAL LETTER R WITH ACUTE */,
339 0x00C1 /* 0xC1 LATIN CAPITAL LETTER A WITH ACUTE */,
340 0x00C2 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
341 0x0102 /* 0xC3 LATIN CAPITAL LETTER A WITH BREVE */,
342 0x00C4 /* 0xC4 LATIN CAPITAL LETTER A WITH DIAERESIS */,
343 0x0139 /* 0xC5 LATIN CAPITAL LETTER L WITH ACUTE */,
344 0x0106 /* 0xC6 LATIN CAPITAL LETTER C WITH ACUTE */,
345 0x00C7 /* 0xC7 LATIN CAPITAL LETTER C WITH CEDILLA */,
346 0x010C /* 0xC8 LATIN CAPITAL LETTER C WITH CARON */,
347 0x00C9 /* 0xC9 LATIN CAPITAL LETTER E WITH ACUTE */,
348 0x0118 /* 0xCA LATIN CAPITAL LETTER E WITH OGONEK */,
349 0x00CB /* 0xCB LATIN CAPITAL LETTER E WITH DIAERESIS */,
350 0x011A /* 0xCC LATIN CAPITAL LETTER E WITH CARON */,
351 0x00CD /* 0xCD LATIN CAPITAL LETTER I WITH ACUTE */,
352 0x00CE /* 0xCE LATIN CAPITAL LETTER I WITH CIRCUMFLEX */,
353 0x010E /* 0xCF LATIN CAPITAL LETTER D WITH CARON */,
354 0x0110 /* 0xD0 LATIN CAPITAL LETTER D WITH STROKE */,
355 0x0143 /* 0xD1 LATIN CAPITAL LETTER N WITH ACUTE */,
356 0x0147 /* 0xD2 LATIN CAPITAL LETTER N WITH CARON */,
357 0x00D3 /* 0xD3 LATIN CAPITAL LETTER O WITH ACUTE */,
358 0x00D4 /* 0xD4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
359 0x0150 /* 0xD5 LATIN CAPITAL LETTER O WITH DOUBLE ACUTE */,
360 0x00D6 /* 0xD6 LATIN CAPITAL LETTER O WITH DIAERESIS */,
361 0x00D7 /* 0xD7 MULTIPLICATION SIGN */,
362 0x0158 /* 0xD8 LATIN CAPITAL LETTER R WITH CARON */,
363 0x016E /* 0xD9 LATIN CAPITAL LETTER U WITH RING ABOVE */,
364 0x00DA /* 0xDA LATIN CAPITAL LETTER U WITH ACUTE */,
365 0x0170 /* 0xDB LATIN CAPITAL LETTER U WITH DOUBLE ACUTE */,
366 0x00DC /* 0xDC LATIN CAPITAL LETTER U WITH DIAERESIS */,
367 0x00DD /* 0xDD LATIN CAPITAL LETTER Y WITH ACUTE */,
368 0x0162 /* 0xDE LATIN CAPITAL LETTER T WITH CEDILLA */,
369 0x00DF /* 0xDF LATIN SMALL LETTER SHARP S */,
370 0x0155 /* 0xE0 LATIN SMALL LETTER R WITH ACUTE */,
371 0x00E1 /* 0xE1 LATIN SMALL LETTER A WITH ACUTE */,
372 0x00E2 /* 0xE2 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
373 0x0103 /* 0xE3 LATIN SMALL LETTER A WITH BREVE */,
374 0x00E4 /* 0xE4 LATIN SMALL LETTER A WITH DIAERESIS */,
375 0x013A /* 0xE5 LATIN SMALL LETTER L WITH ACUTE */,
376 0x0107 /* 0xE6 LATIN SMALL LETTER C WITH ACUTE */,
377 0x00E7 /* 0xE7 LATIN SMALL LETTER C WITH CEDILLA */,
378 0x010D /* 0xE8 LATIN SMALL LETTER C WITH CARON */,
379 0x00E9 /* 0xE9 LATIN SMALL LETTER E WITH ACUTE */,
380 0x0119 /* 0xEA LATIN SMALL LETTER E WITH OGONEK */,
381 0x00EB /* 0xEB LATIN SMALL LETTER E WITH DIAERESIS */,
382 0x011B /* 0xEC LATIN SMALL LETTER E WITH CARON */,
383 0x00ED /* 0xED LATIN SMALL LETTER I WITH ACUTE */,
384 0x00EE /* 0xEE LATIN SMALL LETTER I WITH CIRCUMFLEX */,
385 0x010F /* 0xEF LATIN SMALL LETTER D WITH CARON */,
386 0x0111 /* 0xF0 LATIN SMALL LETTER D WITH STROKE */,
387 0x0144 /* 0xF1 LATIN SMALL LETTER N WITH ACUTE */,
388 0x0148 /* 0xF2 LATIN SMALL LETTER N WITH CARON */,
389 0x00F3 /* 0xF3 LATIN SMALL LETTER O WITH ACUTE */,
390 0x00F4 /* 0xF4 LATIN SMALL LETTER O WITH CIRCUMFLEX */,
391 0x0151 /* 0xF5 LATIN SMALL LETTER O WITH DOUBLE ACUTE */,
392 0x00F6 /* 0xF6 LATIN SMALL LETTER O WITH DIAERESIS */,
393 0x00F7 /* 0xF7 DIVISION SIGN */,
394 0x0159 /* 0xF8 LATIN SMALL LETTER R WITH CARON */,
395 0x016F /* 0xF9 LATIN SMALL LETTER U WITH RING ABOVE */,
396 0x00FA /* 0xFA LATIN SMALL LETTER U WITH ACUTE */,
397 0x0171 /* 0xFB LATIN SMALL LETTER U WITH DOUBLE ACUTE */,
398 0x00FC /* 0xFC LATIN SMALL LETTER U WITH DIAERESIS */,
399 0x00FD /* 0xFD LATIN SMALL LETTER Y WITH ACUTE */,
400 0x0163 /* 0xFE LATIN SMALL LETTER T WITH CEDILLA */,
401 0x02D9 /* 0xFF DOT ABOVE */
404 Emchar_to_byte_table* ucs_to_latin_iso8859_3;
406 Emchar latin_iso8859_3_to_ucs[96] =
408 0x00A0 /* 0xA0 NO-BREAK SPACE */,
409 0x0126 /* 0xA1 LATIN CAPITAL LETTER H WITH STROKE */,
410 0x02D8 /* 0xA2 BREVE */,
411 0x00A3 /* 0xA3 POUND SIGN */,
412 0x00A4 /* 0xA4 CURRENCY SIGN */,
414 0x0124 /* 0xA6 LATIN CAPITAL LETTER H WITH CIRCUMFLEX */,
415 0x00A7 /* 0xA7 SECTION SIGN */,
416 0x00A8 /* 0xA8 DIAERESIS */,
417 0x0130 /* 0xA9 LATIN CAPITAL LETTER I WITH DOT ABOVE */,
418 0x015E /* 0xAA LATIN CAPITAL LETTER S WITH CEDILLA */,
419 0x011E /* 0xAB LATIN CAPITAL LETTER G WITH BREVE */,
420 0x0134 /* 0xAC LATIN CAPITAL LETTER J WITH CIRCUMFLEX */,
421 0x00AD /* 0xAD SOFT HYPHEN */,
423 0x017B /* 0xAF LATIN CAPITAL LETTER Z WITH DOT ABOVE */,
424 0x00B0 /* 0xB0 DEGREE SIGN */,
425 0x0127 /* 0xB1 LATIN SMALL LETTER H WITH STROKE */,
426 0x00B2 /* 0xB2 SUPERSCRIPT TWO */,
427 0x00B3 /* 0xB3 SUPERSCRIPT THREE */,
428 0x00B4 /* 0xB4 ACUTE ACCENT */,
429 0x00B5 /* 0xB5 MICRO SIGN */,
430 0x0125 /* 0xB6 LATIN SMALL LETTER H WITH CIRCUMFLEX */,
431 0x00B7 /* 0xB7 MIDDLE DOT */,
432 0x00B8 /* 0xB8 CEDILLA */,
433 0x0131 /* 0xB9 LATIN SMALL LETTER DOTLESS I */,
434 0x015F /* 0xBA LATIN SMALL LETTER S WITH CEDILLA */,
435 0x011F /* 0xBB LATIN SMALL LETTER G WITH BREVE */,
436 0x0135 /* 0xBC LATIN SMALL LETTER J WITH CIRCUMFLEX */,
437 0x00BD /* 0xBD VULGAR FRACTION ONE HALF */,
439 0x017C /* 0xBF LATIN SMALL LETTER Z WITH DOT ABOVE */,
440 0x00C0 /* 0xC0 LATIN CAPITAL LETTER A WITH GRAVE */,
441 0x00C1 /* 0xC1 LATIN CAPITAL LETTER A WITH ACUTE */,
442 0x00C2 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
444 0x00C4 /* 0xC4 LATIN CAPITAL LETTER A WITH DIAERESIS */,
445 0x010A /* 0xC5 LATIN CAPITAL LETTER C WITH DOT ABOVE */,
446 0x0108 /* 0xC6 LATIN CAPITAL LETTER C WITH CIRCUMFLEX */,
447 0x00C7 /* 0xC7 LATIN CAPITAL LETTER C WITH CEDILLA */,
448 0x00C8 /* 0xC8 LATIN CAPITAL LETTER E WITH GRAVE */,
449 0x00C9 /* 0xC9 LATIN CAPITAL LETTER E WITH ACUTE */,
450 0x00CA /* 0xCA LATIN CAPITAL LETTER E WITH CIRCUMFLEX */,
451 0x00CB /* 0xCB LATIN CAPITAL LETTER E WITH DIAERESIS */,
452 0x00CC /* 0xCC LATIN CAPITAL LETTER I WITH GRAVE */,
453 0x00CD /* 0xCD LATIN CAPITAL LETTER I WITH ACUTE */,
454 0x00CE /* 0xCE LATIN CAPITAL LETTER I WITH CIRCUMFLEX */,
455 0x00CF /* 0xCF LATIN CAPITAL LETTER I WITH DIAERESIS */,
457 0x00D1 /* 0xD1 LATIN CAPITAL LETTER N WITH TILDE */,
458 0x00D2 /* 0xD2 LATIN CAPITAL LETTER O WITH GRAVE */,
459 0x00D3 /* 0xD3 LATIN CAPITAL LETTER O WITH ACUTE */,
460 0x00D4 /* 0xD4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
461 0x0120 /* 0xD5 LATIN CAPITAL LETTER G WITH DOT ABOVE */,
462 0x00D6 /* 0xD6 LATIN CAPITAL LETTER O WITH DIAERESIS */,
463 0x00D7 /* 0xD7 MULTIPLICATION SIGN */,
464 0x011C /* 0xD8 LATIN CAPITAL LETTER G WITH CIRCUMFLEX */,
465 0x00D9 /* 0xD9 LATIN CAPITAL LETTER U WITH GRAVE */,
466 0x00DA /* 0xDA LATIN CAPITAL LETTER U WITH ACUTE */,
467 0x00DB /* 0xDB LATIN CAPITAL LETTER U WITH CIRCUMFLEX */,
468 0x00DC /* 0xDC LATIN CAPITAL LETTER U WITH DIAERESIS */,
469 0x016C /* 0xDD LATIN CAPITAL LETTER U WITH BREVE */,
470 0x015C /* 0xDE LATIN CAPITAL LETTER S WITH CIRCUMFLEX */,
471 0x00DF /* 0xDF LATIN SMALL LETTER SHARP S */,
472 0x00E0 /* 0xE0 LATIN SMALL LETTER A WITH GRAVE */,
473 0x00E1 /* 0xE1 LATIN SMALL LETTER A WITH ACUTE */,
474 0x00E2 /* 0xE2 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
476 0x00E4 /* 0xE4 LATIN SMALL LETTER A WITH DIAERESIS */,
477 0x010B /* 0xE5 LATIN SMALL LETTER C WITH DOT ABOVE */,
478 0x0109 /* 0xE6 LATIN SMALL LETTER C WITH CIRCUMFLEX */,
479 0x00E7 /* 0xE7 LATIN SMALL LETTER C WITH CEDILLA */,
480 0x00E8 /* 0xE8 LATIN SMALL LETTER E WITH GRAVE */,
481 0x00E9 /* 0xE9 LATIN SMALL LETTER E WITH ACUTE */,
482 0x00EA /* 0xEA LATIN SMALL LETTER E WITH CIRCUMFLEX */,
483 0x00EB /* 0xEB LATIN SMALL LETTER E WITH DIAERESIS */,
484 0x00EC /* 0xEC LATIN SMALL LETTER I WITH GRAVE */,
485 0x00ED /* 0xED LATIN SMALL LETTER I WITH ACUTE */,
486 0x00EE /* 0xEE LATIN SMALL LETTER I WITH CIRCUMFLEX */,
487 0x00EF /* 0xEF LATIN SMALL LETTER I WITH DIAERESIS */,
489 0x00F1 /* 0xF1 LATIN SMALL LETTER N WITH TILDE */,
490 0x00F2 /* 0xF2 LATIN SMALL LETTER O WITH GRAVE */,
491 0x00F3 /* 0xF3 LATIN SMALL LETTER O WITH ACUTE */,
492 0x00F4 /* 0xF4 LATIN SMALL LETTER O WITH CIRCUMFLEX */,
493 0x0121 /* 0xF5 LATIN SMALL LETTER G WITH DOT ABOVE */,
494 0x00F6 /* 0xF6 LATIN SMALL LETTER O WITH DIAERESIS */,
495 0x00F7 /* 0xF7 DIVISION SIGN */,
496 0x011D /* 0xF8 LATIN SMALL LETTER G WITH CIRCUMFLEX */,
497 0x00F9 /* 0xF9 LATIN SMALL LETTER U WITH GRAVE */,
498 0x00FA /* 0xFA LATIN SMALL LETTER U WITH ACUTE */,
499 0x00FB /* 0xFB LATIN SMALL LETTER U WITH CIRCUMFLEX */,
500 0x00FC /* 0xFC LATIN SMALL LETTER U WITH DIAERESIS */,
501 0x016D /* 0xFD LATIN SMALL LETTER U WITH BREVE */,
502 0x015D /* 0xFE LATIN SMALL LETTER S WITH CIRCUMFLEX */,
503 0x02D9 /* 0xFF DOT ABOVE */
506 Emchar_to_byte_table* ucs_to_latin_iso8859_4;
508 Emchar latin_iso8859_4_to_ucs[96] =
510 0x00A0 /* 0xA0 NO-BREAK SPACE */,
511 0x0104 /* 0xA1 LATIN CAPITAL LETTER A WITH OGONEK */,
512 0x0138 /* 0xA2 LATIN SMALL LETTER KRA */,
513 0x0156 /* 0xA3 LATIN CAPITAL LETTER R WITH CEDILLA */,
514 0x00A4 /* 0xA4 CURRENCY SIGN */,
515 0x0128 /* 0xA5 LATIN CAPITAL LETTER I WITH TILDE */,
516 0x013B /* 0xA6 LATIN CAPITAL LETTER L WITH CEDILLA */,
517 0x00A7 /* 0xA7 SECTION SIGN */,
518 0x00A8 /* 0xA8 DIAERESIS */,
519 0x0160 /* 0xA9 LATIN CAPITAL LETTER S WITH CARON */,
520 0x0112 /* 0xAA LATIN CAPITAL LETTER E WITH MACRON */,
521 0x0122 /* 0xAB LATIN CAPITAL LETTER G WITH CEDILLA */,
522 0x0166 /* 0xAC LATIN CAPITAL LETTER T WITH STROKE */,
523 0x00AD /* 0xAD SOFT HYPHEN */,
524 0x017D /* 0xAE LATIN CAPITAL LETTER Z WITH CARON */,
525 0x00AF /* 0xAF MACRON */,
526 0x00B0 /* 0xB0 DEGREE SIGN */,
527 0x0105 /* 0xB1 LATIN SMALL LETTER A WITH OGONEK */,
528 0x02DB /* 0xB2 OGONEK */,
529 0x0157 /* 0xB3 LATIN SMALL LETTER R WITH CEDILLA */,
530 0x00B4 /* 0xB4 ACUTE ACCENT */,
531 0x0129 /* 0xB5 LATIN SMALL LETTER I WITH TILDE */,
532 0x013C /* 0xB6 LATIN SMALL LETTER L WITH CEDILLA */,
533 0x02C7 /* 0xB7 CARON */,
534 0x00B8 /* 0xB8 CEDILLA */,
535 0x0161 /* 0xB9 LATIN SMALL LETTER S WITH CARON */,
536 0x0113 /* 0xBA LATIN SMALL LETTER E WITH MACRON */,
537 0x0123 /* 0xBB LATIN SMALL LETTER G WITH CEDILLA */,
538 0x0167 /* 0xBC LATIN SMALL LETTER T WITH STROKE */,
539 0x014A /* 0xBD LATIN CAPITAL LETTER ENG */,
540 0x017E /* 0xBE LATIN SMALL LETTER Z WITH CARON */,
541 0x014B /* 0xBF LATIN SMALL LETTER ENG */,
542 0x0100 /* 0xC0 LATIN CAPITAL LETTER A WITH MACRON */,
543 0x00C1 /* 0xC1 LATIN CAPITAL LETTER A WITH ACUTE */,
544 0x00C2 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
545 0x00C3 /* 0xC3 LATIN CAPITAL LETTER A WITH TILDE */,
546 0x00C4 /* 0xC4 LATIN CAPITAL LETTER A WITH DIAERESIS */,
547 0x00C5 /* 0xC5 LATIN CAPITAL LETTER A WITH RING ABOVE */,
548 0x00C6 /* 0xC6 LATIN CAPITAL LETTER AE */,
549 0x012E /* 0xC7 LATIN CAPITAL LETTER I WITH OGONEK */,
550 0x010C /* 0xC8 LATIN CAPITAL LETTER C WITH CARON */,
551 0x00C9 /* 0xC9 LATIN CAPITAL LETTER E WITH ACUTE */,
552 0x0118 /* 0xCA LATIN CAPITAL LETTER E WITH OGONEK */,
553 0x00CB /* 0xCB LATIN CAPITAL LETTER E WITH DIAERESIS */,
554 0x0116 /* 0xCC LATIN CAPITAL LETTER E WITH DOT ABOVE */,
555 0x00CD /* 0xCD LATIN CAPITAL LETTER I WITH ACUTE */,
556 0x00CE /* 0xCE LATIN CAPITAL LETTER I WITH CIRCUMFLEX */,
557 0x012A /* 0xCF LATIN CAPITAL LETTER I WITH MACRON */,
558 0x0110 /* 0xD0 LATIN CAPITAL LETTER D WITH STROKE */,
559 0x0145 /* 0xD1 LATIN CAPITAL LETTER N WITH CEDILLA */,
560 0x014C /* 0xD2 LATIN CAPITAL LETTER O WITH MACRON */,
561 0x0136 /* 0xD3 LATIN CAPITAL LETTER K WITH CEDILLA */,
562 0x00D4 /* 0xD4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
563 0x00D5 /* 0xD5 LATIN CAPITAL LETTER O WITH TILDE */,
564 0x00D6 /* 0xD6 LATIN CAPITAL LETTER O WITH DIAERESIS */,
565 0x00D7 /* 0xD7 MULTIPLICATION SIGN */,
566 0x00D8 /* 0xD8 LATIN CAPITAL LETTER O WITH STROKE */,
567 0x0172 /* 0xD9 LATIN CAPITAL LETTER U WITH OGONEK */,
568 0x00DA /* 0xDA LATIN CAPITAL LETTER U WITH ACUTE */,
569 0x00DB /* 0xDB LATIN CAPITAL LETTER U WITH CIRCUMFLEX */,
570 0x00DC /* 0xDC LATIN CAPITAL LETTER U WITH DIAERESIS */,
571 0x0168 /* 0xDD LATIN CAPITAL LETTER U WITH TILDE */,
572 0x016A /* 0xDE LATIN CAPITAL LETTER U WITH MACRON */,
573 0x00DF /* 0xDF LATIN SMALL LETTER SHARP S */,
574 0x0101 /* 0xE0 LATIN SMALL LETTER A WITH MACRON */,
575 0x00E1 /* 0xE1 LATIN SMALL LETTER A WITH ACUTE */,
576 0x00E2 /* 0xE2 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
577 0x00E3 /* 0xE3 LATIN SMALL LETTER A WITH TILDE */,
578 0x00E4 /* 0xE4 LATIN SMALL LETTER A WITH DIAERESIS */,
579 0x00E5 /* 0xE5 LATIN SMALL LETTER A WITH RING ABOVE */,
580 0x00E6 /* 0xE6 LATIN SMALL LETTER AE */,
581 0x012F /* 0xE7 LATIN SMALL LETTER I WITH OGONEK */,
582 0x010D /* 0xE8 LATIN SMALL LETTER C WITH CARON */,
583 0x00E9 /* 0xE9 LATIN SMALL LETTER E WITH ACUTE */,
584 0x0119 /* 0xEA LATIN SMALL LETTER E WITH OGONEK */,
585 0x00EB /* 0xEB LATIN SMALL LETTER E WITH DIAERESIS */,
586 0x0117 /* 0xEC LATIN SMALL LETTER E WITH DOT ABOVE */,
587 0x00ED /* 0xED LATIN SMALL LETTER I WITH ACUTE */,
588 0x00EE /* 0xEE LATIN SMALL LETTER I WITH CIRCUMFLEX */,
589 0x012B /* 0xEF LATIN SMALL LETTER I WITH MACRON */,
590 0x0111 /* 0xF0 LATIN SMALL LETTER D WITH STROKE */,
591 0x0146 /* 0xF1 LATIN SMALL LETTER N WITH CEDILLA */,
592 0x014D /* 0xF2 LATIN SMALL LETTER O WITH MACRON */,
593 0x0137 /* 0xF3 LATIN SMALL LETTER K WITH CEDILLA */,
594 0x00F4 /* 0xF4 LATIN SMALL LETTER O WITH CIRCUMFLEX */,
595 0x00F5 /* 0xF5 LATIN SMALL LETTER O WITH TILDE */,
596 0x00F6 /* 0xF6 LATIN SMALL LETTER O WITH DIAERESIS */,
597 0x00F7 /* 0xF7 DIVISION SIGN */,
598 0x00F8 /* 0xF8 LATIN SMALL LETTER O WITH STROKE */,
599 0x0173 /* 0xF9 LATIN SMALL LETTER U WITH OGONEK */,
600 0x00FA /* 0xFA LATIN SMALL LETTER U WITH ACUTE */,
601 0x00FB /* 0xFB LATIN SMALL LETTER U WITH CIRCUMFLEX */,
602 0x00FC /* 0xFC LATIN SMALL LETTER U WITH DIAERESIS */,
603 0x0169 /* 0xFD LATIN SMALL LETTER U WITH TILDE */,
604 0x016B /* 0xFE LATIN SMALL LETTER U WITH MACRON */,
605 0x02D9 /* 0xFF DOT ABOVE */
608 Emchar_to_byte_table* ucs_to_latin_iso8859_9;
610 Emchar latin_iso8859_9_to_ucs[96] =
612 0x00A0 /* 0xA0 NO-BREAK SPACE */,
613 0x00A1 /* 0xA1 INVERTED EXCLAMATION MARK */,
614 0x00A2 /* 0xA2 CENT SIGN */,
615 0x00A3 /* 0xA3 POUND SIGN */,
616 0x00A4 /* 0xA4 CURRENCY SIGN */,
617 0x00A5 /* 0xA5 YEN SIGN */,
618 0x00A6 /* 0xA6 BROKEN BAR */,
619 0x00A7 /* 0xA7 SECTION SIGN */,
620 0x00A8 /* 0xA8 DIAERESIS */,
621 0x00A9 /* 0xA9 COPYRIGHT SIGN */,
622 0x00AA /* 0xAA FEMININE ORDINAL INDICATOR */,
623 0x00AB /* 0xAB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK */,
624 0x00AC /* 0xAC NOT SIGN */,
625 0x00AD /* 0xAD SOFT HYPHEN */,
626 0x00AE /* 0xAE REGISTERED SIGN */,
627 0x00AF /* 0xAF MACRON */,
628 0x00B0 /* 0xB0 DEGREE SIGN */,
629 0x00B1 /* 0xB1 PLUS-MINUS SIGN */,
630 0x00B2 /* 0xB2 SUPERSCRIPT TWO */,
631 0x00B3 /* 0xB3 SUPERSCRIPT THREE */,
632 0x00B4 /* 0xB4 ACUTE ACCENT */,
633 0x00B5 /* 0xB5 MICRO SIGN */,
634 0x00B6 /* 0xB6 PILCROW SIGN */,
635 0x00B7 /* 0xB7 MIDDLE DOT */,
636 0x00B8 /* 0xB8 CEDILLA */,
637 0x00B9 /* 0xB9 SUPERSCRIPT ONE */,
638 0x00BA /* 0xBA MASCULINE ORDINAL INDICATOR */,
639 0x00BB /* 0xBB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK */,
640 0x00BC /* 0xBC VULGAR FRACTION ONE QUARTER */,
641 0x00BD /* 0xBD VULGAR FRACTION ONE HALF */,
642 0x00BE /* 0xBE VULGAR FRACTION THREE QUARTERS */,
643 0x00BF /* 0xBF INVERTED QUESTION MARK */,
644 0x00C0 /* 0xC0 LATIN CAPITAL LETTER A WITH GRAVE */,
645 0x00C1 /* 0xC1 LATIN CAPITAL LETTER A WITH ACUTE */,
646 0x00C2 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
647 0x00C3 /* 0xC3 LATIN CAPITAL LETTER A WITH TILDE */,
648 0x00C4 /* 0xC4 LATIN CAPITAL LETTER A WITH DIAERESIS */,
649 0x00C5 /* 0xC5 LATIN CAPITAL LETTER A WITH RING ABOVE */,
650 0x00C6 /* 0xC6 LATIN CAPITAL LETTER AE */,
651 0x00C7 /* 0xC7 LATIN CAPITAL LETTER C WITH CEDILLA */,
652 0x00C8 /* 0xC8 LATIN CAPITAL LETTER E WITH GRAVE */,
653 0x00C9 /* 0xC9 LATIN CAPITAL LETTER E WITH ACUTE */,
654 0x00CA /* 0xCA LATIN CAPITAL LETTER E WITH CIRCUMFLEX */,
655 0x00CB /* 0xCB LATIN CAPITAL LETTER E WITH DIAERESIS */,
656 0x00CC /* 0xCC LATIN CAPITAL LETTER I WITH GRAVE */,
657 0x00CD /* 0xCD LATIN CAPITAL LETTER I WITH ACUTE */,
658 0x00CE /* 0xCE LATIN CAPITAL LETTER I WITH CIRCUMFLEX */,
659 0x00CF /* 0xCF LATIN CAPITAL LETTER I WITH DIAERESIS */,
660 0x011E /* 0xD0 LATIN CAPITAL LETTER G WITH BREVE */,
661 0x00D1 /* 0xD1 LATIN CAPITAL LETTER N WITH TILDE */,
662 0x00D2 /* 0xD2 LATIN CAPITAL LETTER O WITH GRAVE */,
663 0x00D3 /* 0xD3 LATIN CAPITAL LETTER O WITH ACUTE */,
664 0x00D4 /* 0xD4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
665 0x00D5 /* 0xD5 LATIN CAPITAL LETTER O WITH TILDE */,
666 0x00D6 /* 0xD6 LATIN CAPITAL LETTER O WITH DIAERESIS */,
667 0x00D7 /* 0xD7 MULTIPLICATION SIGN */,
668 0x00D8 /* 0xD8 LATIN CAPITAL LETTER O WITH STROKE */,
669 0x00D9 /* 0xD9 LATIN CAPITAL LETTER U WITH GRAVE */,
670 0x00DA /* 0xDA LATIN CAPITAL LETTER U WITH ACUTE */,
671 0x00DB /* 0xDB LATIN CAPITAL LETTER U WITH CIRCUMFLEX */,
672 0x00DC /* 0xDC LATIN CAPITAL LETTER U WITH DIAERESIS */,
673 0x0130 /* 0xDD LATIN CAPITAL LETTER I WITH DOT ABOVE */,
674 0x015E /* 0xDE LATIN CAPITAL LETTER S WITH CEDILLA */,
675 0x00DF /* 0xDF LATIN SMALL LETTER SHARP S */,
676 0x00E0 /* 0xE0 LATIN SMALL LETTER A WITH GRAVE */,
677 0x00E1 /* 0xE1 LATIN SMALL LETTER A WITH ACUTE */,
678 0x00E2 /* 0xE2 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
679 0x00E3 /* 0xE3 LATIN SMALL LETTER A WITH TILDE */,
680 0x00E4 /* 0xE4 LATIN SMALL LETTER A WITH DIAERESIS */,
681 0x00E5 /* 0xE5 LATIN SMALL LETTER A WITH RING ABOVE */,
682 0x00E6 /* 0xE6 LATIN SMALL LETTER AE */,
683 0x00E7 /* 0xE7 LATIN SMALL LETTER C WITH CEDILLA */,
684 0x00E8 /* 0xE8 LATIN SMALL LETTER E WITH GRAVE */,
685 0x00E9 /* 0xE9 LATIN SMALL LETTER E WITH ACUTE */,
686 0x00EA /* 0xEA LATIN SMALL LETTER E WITH CIRCUMFLEX */,
687 0x00EB /* 0xEB LATIN SMALL LETTER E WITH DIAERESIS */,
688 0x00EC /* 0xEC LATIN SMALL LETTER I WITH GRAVE */,
689 0x00ED /* 0xED LATIN SMALL LETTER I WITH ACUTE */,
690 0x00EE /* 0xEE LATIN SMALL LETTER I WITH CIRCUMFLEX */,
691 0x00EF /* 0xEF LATIN SMALL LETTER I WITH DIAERESIS */,
692 0x011F /* 0xF0 LATIN SMALL LETTER G WITH BREVE */,
693 0x00F1 /* 0xF1 LATIN SMALL LETTER N WITH TILDE */,
694 0x00F2 /* 0xF2 LATIN SMALL LETTER O WITH GRAVE */,
695 0x00F3 /* 0xF3 LATIN SMALL LETTER O WITH ACUTE */,
696 0x00F4 /* 0xF4 LATIN SMALL LETTER O WITH CIRCUMFLEX */,
697 0x00F5 /* 0xF5 LATIN SMALL LETTER O WITH TILDE */,
698 0x00F6 /* 0xF6 LATIN SMALL LETTER O WITH DIAERESIS */,
699 0x00F7 /* 0xF7 DIVISION SIGN */,
700 0x00F8 /* 0xF8 LATIN SMALL LETTER O WITH STROKE */,
701 0x00F9 /* 0xF9 LATIN SMALL LETTER U WITH GRAVE */,
702 0x00FA /* 0xFA LATIN SMALL LETTER U WITH ACUTE */,
703 0x00FB /* 0xFB LATIN SMALL LETTER U WITH CIRCUMFLEX */,
704 0x00FC /* 0xFC LATIN SMALL LETTER U WITH DIAERESIS */,
705 0x0131 /* 0xFD LATIN SMALL LETTER DOTLESS I */,
706 0x015F /* 0xFE LATIN SMALL LETTER S WITH CEDILLA */,
707 0x00FF /* 0xFF LATIN SMALL LETTER Y WITH DIAERESIS */,
710 Emchar_to_byte_table* ucs_to_latin_viscii_lower;
712 Emchar latin_viscii_lower_to_ucs[96] =
812 Emchar_to_byte_table* ucs_to_latin_viscii_upper;
814 Emchar latin_viscii_upper_to_ucs[96] =
915 Emchar_to_byte_table* ucs_to_latin_tcvn5712;
917 Emchar latin_tcvn5712_to_ucs[96] =
919 0x00A0 /* 0xA0 NO-BREAK SPACE */,
920 0x0102 /* 0xA1 LATIN CAPITAL LETTER A WITH BREVE */,
921 0x00C2 /* 0xA2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
922 0x00CA /* 0xA3 LATIN CAPITAL LETTER E WITH CIRCUMFLEX */,
923 0x00D4 /* 0xA4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
924 0x01A0 /* 0xA5 LATIN CAPITAL LETTER O WITH HORN */,
925 0x01AF /* 0xA6 LATIN CAPITAL LETTER U WITH HORN */,
926 0x0110 /* 0xA7 LATIN CAPITAL LETTER D WITH STROKE */,
927 0x0103 /* 0xA8 LATIN SMALL LETTER A WITH BREVE */,
928 0x00E2 /* 0xA9 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
929 0x00EA /* 0xAA LATIN SMALL LETTER E WITH CIRCUMFLEX */,
930 0x00F4 /* 0xAB LATIN SMALL LETTER O WITH CIRCUMFLEX */,
931 0x01A1 /* 0xAC LATIN SMALL LETTER O WITH HORN */,
932 0x01B0 /* 0xAD LATIN SMALL LETTER U WITH HORN */,
933 0x0111 /* 0xAE LATIN SMALL LETTER D WITH STROKE */,
934 0x1EB0 /* 0xAF LATIN CAPITAL LETTER A WITH BREVE AND GRAVE */,
935 0x0300 /* 0xB0 COMBINING GRAVE ACCENT */,
936 0x0309 /* 0xB1 COMBINING HOOK ABOVE */,
937 0x0303 /* 0xB2 COMBINING TILDE */,
938 0x0301 /* 0xB3 COMBINING ACUTE ACCENT */,
939 0x0323 /* 0xB4 COMBINING DOT BELOW */,
940 0x00E0 /* 0xB5 LATIN SMALL LETTER A WITH GRAVE */,
941 0x1EA3 /* 0xB6 LATIN SMALL LETTER A WITH HOOK ABOVE */,
942 0x00E3 /* 0xB7 LATIN SMALL LETTER A WITH TILDE */,
943 0x00E1 /* 0xB8 LATIN SMALL LETTER A WITH ACUTE */,
944 0x1EA1 /* 0xB9 LATIN SMALL LETTER A WITH DOT BELOW */,
945 0x1EB2 /* 0xBA LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE */,
946 0x1EB1 /* 0xBB LATIN SMALL LETTER A WITH BREVE AND GRAVE */,
947 0x1EB3 /* 0xBC LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE */,
948 0x1EB5 /* 0xBD LATIN SMALL LETTER A WITH BREVE AND TILDE */,
949 0x1EAF /* 0xBE LATIN SMALL LETTER A WITH BREVE AND ACUTE */,
950 0x1EB4 /* 0xBF LATIN CAPITAL LETTER A WITH BREVE AND TILDE */,
951 0x1EAE /* 0xC0 LATIN CAPITAL LETTER A WITH BREVE AND ACUTE */,
952 0x1EA6 /* 0xC1 LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE */,
953 0x1EA8 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE */,
954 0x1EAA /* 0xC3 LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE */,
955 0x1EA4 /* 0xC4 LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE */,
956 0x1EC0 /* 0xC5 LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE */,
957 0x1EB7 /* 0xC6 LATIN SMALL LETTER A WITH BREVE AND DOT BELOW */,
958 0x1EA7 /* 0xC7 LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE */,
959 0x1EA9 /* 0xC8 LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE */,
960 0x1EAB /* 0xC9 LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE */,
961 0x1EA5 /* 0xCA LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE */,
962 0x1EAD /* 0xCB LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW */,
963 0x00E8 /* 0xCC LATIN SMALL LETTER E WITH GRAVE */,
964 0x1EC2 /* 0xCD LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE */,
965 0x1EBB /* 0xCE LATIN SMALL LETTER E WITH HOOK ABOVE */,
966 0x1EBD /* 0xCF LATIN SMALL LETTER E WITH TILDE */,
967 0x00E9 /* 0xD0 LATIN SMALL LETTER E WITH ACUTE */,
968 0x1EB9 /* 0xD1 LATIN SMALL LETTER E WITH DOT BELOW */,
969 0x1EC1 /* 0xD2 LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE */,
970 0x1EC3 /* 0xD3 LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE */,
971 0x1EC5 /* 0xD4 LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE */,
972 0x1EBF /* 0xD5 LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE */,
973 0x1EC7 /* 0xD6 LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW */,
974 0x00EC /* 0xD7 LATIN SMALL LETTER I WITH GRAVE */,
975 0x1EC9 /* 0xD8 LATIN SMALL LETTER I WITH HOOK ABOVE */,
976 0x1EC4 /* 0xD9 LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE */,
977 0x1EBE /* 0xDA LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE */,
978 0x1ED2 /* 0xDB LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE */,
979 0x0129 /* 0xDC LATIN SMALL LETTER I WITH TILDE */,
980 0x00ED /* 0xDD LATIN SMALL LETTER I WITH ACUTE */,
981 0x1ECB /* 0xDE LATIN SMALL LETTER I WITH DOT BELOW */,
982 0x00F2 /* 0xDF LATIN SMALL LETTER O WITH GRAVE */,
983 0x1ED4 /* 0xE0 LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE */,
984 0x1ECF /* 0xE1 LATIN SMALL LETTER O WITH HOOK ABOVE */,
985 0x00F5 /* 0xE2 LATIN SMALL LETTER O WITH TILDE */,
986 0x00F3 /* 0xE3 LATIN SMALL LETTER O WITH ACUTE */,
987 0x1ECD /* 0xE4 LATIN SMALL LETTER O WITH DOT BELOW */,
988 0x1ED3 /* 0xE5 LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE */,
989 0x1ED5 /* 0xE6 LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE */,
990 0x1ED7 /* 0xE7 LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE */,
991 0x1ED1 /* 0xE8 LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE */,
992 0x1ED9 /* 0xE9 LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW */,
993 0x1EDD /* 0xEA LATIN SMALL LETTER O WITH HORN AND GRAVE */,
994 0x1EDF /* 0xEB LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE */,
995 0x1EE1 /* 0xEC LATIN SMALL LETTER O WITH HORN AND TILDE */,
996 0x1EDB /* 0xED LATIN SMALL LETTER O WITH HORN AND ACUTE */,
997 0x1EE3 /* 0xEE LATIN SMALL LETTER O WITH HORN AND DOT BELOW */,
998 0x00F9 /* 0xEF LATIN SMALL LETTER U WITH GRAVE */,
999 0x1ED6 /* 0xF0 LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE */,
1000 0x1EE7 /* 0xF1 LATIN SMALL LETTER U WITH HOOK ABOVE */,
1001 0x0169 /* 0xF2 LATIN SMALL LETTER U WITH TILDE */,
1002 0x00FA /* 0xF3 LATIN SMALL LETTER U WITH ACUTE */,
1003 0x1EE5 /* 0xF4 LATIN SMALL LETTER U WITH DOT BELOW */,
1004 0x1EEB /* 0xF5 LATIN SMALL LETTER U WITH HORN AND GRAVE */,
1005 0x1EED /* 0xF6 LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE */,
1006 0x1EEF /* 0xF7 LATIN SMALL LETTER U WITH HORN AND TILDE */,
1007 0x1EE9 /* 0xF8 LATIN SMALL LETTER U WITH HORN AND ACUTE */,
1008 0x1EF1 /* 0xF9 LATIN SMALL LETTER U WITH HORN AND DOT BELOW */,
1009 0x1EF3 /* 0xFA LATIN SMALL LETTER Y WITH GRAVE */,
1010 0x1EF7 /* 0xFB LATIN SMALL LETTER Y WITH HOOK ABOVE */,
1011 0x1EF9 /* 0xFC LATIN SMALL LETTER Y WITH TILDE */,
1012 0x00FD /* 0xFD LATIN SMALL LETTER Y WITH ACUTE */,
1013 0x1EF5 /* 0xFE LATIN SMALL LETTER Y WITH DOT BELOW */,
1014 0x1ED0 /* 0xFF LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE */
1017 Charset_ID latin_a_char_to_charset[128] = {
1018 /* U+0100 */ LEADING_BYTE_LATIN_ISO8859_4,
1019 /* U+0101 */ LEADING_BYTE_LATIN_ISO8859_4,
1020 /* U+0102 */ LEADING_BYTE_LATIN_ISO8859_2,
1021 /* U+0103 */ LEADING_BYTE_LATIN_ISO8859_2,
1022 /* U+0104 */ LEADING_BYTE_LATIN_ISO8859_2,
1023 /* U+0105 */ LEADING_BYTE_LATIN_ISO8859_2,
1024 /* U+0106 */ LEADING_BYTE_LATIN_ISO8859_2,
1025 /* U+0107 */ LEADING_BYTE_LATIN_ISO8859_2,
1026 /* U+0108 */ LEADING_BYTE_LATIN_ISO8859_3,
1027 /* U+0109 */ LEADING_BYTE_LATIN_ISO8859_3,
1028 /* U+010A */ LEADING_BYTE_LATIN_ISO8859_3,
1029 /* U+010B */ LEADING_BYTE_LATIN_ISO8859_3,
1030 /* U+010C */ LEADING_BYTE_LATIN_ISO8859_2,
1031 /* U+010D */ LEADING_BYTE_LATIN_ISO8859_2,
1032 /* U+010E */ LEADING_BYTE_LATIN_ISO8859_2,
1033 /* U+010F */ LEADING_BYTE_LATIN_ISO8859_2,
1034 /* U+0110 */ LEADING_BYTE_LATIN_ISO8859_2,
1035 /* U+0111 */ LEADING_BYTE_LATIN_ISO8859_2,
1036 /* U+0112 */ LEADING_BYTE_LATIN_ISO8859_4,
1037 /* U+0113 */ LEADING_BYTE_LATIN_ISO8859_4,
1038 /* U+0114 */ LEADING_BYTE_UCS_BMP,
1039 /* U+0115 */ LEADING_BYTE_UCS_BMP,
1040 /* U+0116 */ LEADING_BYTE_LATIN_ISO8859_4,
1041 /* U+0117 */ LEADING_BYTE_LATIN_ISO8859_4,
1042 /* U+0118 */ LEADING_BYTE_LATIN_ISO8859_2,
1043 /* U+0119 */ LEADING_BYTE_LATIN_ISO8859_2,
1044 /* U+011A */ LEADING_BYTE_LATIN_ISO8859_2,
1045 /* U+011B */ LEADING_BYTE_LATIN_ISO8859_2,
1046 /* U+011C */ LEADING_BYTE_LATIN_ISO8859_3,
1047 /* U+011D */ LEADING_BYTE_LATIN_ISO8859_3,
1048 /* U+011E */ LEADING_BYTE_LATIN_ISO8859_3,
1049 /* U+011F */ LEADING_BYTE_LATIN_ISO8859_3,
1050 /* U+0120 */ LEADING_BYTE_LATIN_ISO8859_3,
1051 /* U+0121 */ LEADING_BYTE_LATIN_ISO8859_3,
1052 /* U+0122 */ LEADING_BYTE_LATIN_ISO8859_4,
1053 /* U+0123 */ LEADING_BYTE_LATIN_ISO8859_4,
1054 /* U+0124 */ LEADING_BYTE_LATIN_ISO8859_3,
1055 /* U+0125 */ LEADING_BYTE_LATIN_ISO8859_3,
1056 /* U+0126 */ LEADING_BYTE_LATIN_ISO8859_3,
1057 /* U+0127 */ LEADING_BYTE_LATIN_ISO8859_3,
1058 /* U+0128 */ LEADING_BYTE_LATIN_ISO8859_4,
1059 /* U+0129 */ LEADING_BYTE_LATIN_ISO8859_4,
1060 /* U+012A */ LEADING_BYTE_LATIN_ISO8859_4,
1061 /* U+012B */ LEADING_BYTE_LATIN_ISO8859_4,
1062 /* U+012C */ LEADING_BYTE_UCS_BMP,
1063 /* U+012D */ LEADING_BYTE_UCS_BMP,
1064 /* U+012E */ LEADING_BYTE_LATIN_ISO8859_4,
1065 /* U+012F */ LEADING_BYTE_LATIN_ISO8859_4,
1066 /* U+0130 */ LEADING_BYTE_LATIN_ISO8859_3,
1067 /* U+0131 */ LEADING_BYTE_LATIN_ISO8859_3,
1068 /* U+0132 */ LEADING_BYTE_JAPANESE_JISX0212,
1069 /* U+0133 */ LEADING_BYTE_JAPANESE_JISX0212,
1070 /* U+0134 */ LEADING_BYTE_LATIN_ISO8859_3,
1071 /* U+0135 */ LEADING_BYTE_LATIN_ISO8859_3,
1072 /* U+0136 */ LEADING_BYTE_LATIN_ISO8859_4,
1073 /* U+0137 */ LEADING_BYTE_LATIN_ISO8859_4,
1074 /* U+0138 */ LEADING_BYTE_LATIN_ISO8859_4,
1075 /* U+0139 */ LEADING_BYTE_LATIN_ISO8859_2,
1076 /* U+013A */ LEADING_BYTE_LATIN_ISO8859_2,
1077 /* U+013B */ LEADING_BYTE_LATIN_ISO8859_4,
1078 /* U+013C */ LEADING_BYTE_LATIN_ISO8859_4,
1079 /* U+013D */ LEADING_BYTE_LATIN_ISO8859_2,
1080 /* U+013E */ LEADING_BYTE_LATIN_ISO8859_2,
1081 /* U+013F */ LEADING_BYTE_JAPANESE_JISX0212,
1082 /* U+0140 */ LEADING_BYTE_JAPANESE_JISX0212,
1083 /* U+0141 */ LEADING_BYTE_LATIN_ISO8859_2,
1084 /* U+0142 */ LEADING_BYTE_LATIN_ISO8859_2,
1085 /* U+0143 */ LEADING_BYTE_LATIN_ISO8859_2,
1086 /* U+0144 */ LEADING_BYTE_LATIN_ISO8859_2,
1087 /* U+0145 */ LEADING_BYTE_LATIN_ISO8859_4,
1088 /* U+0146 */ LEADING_BYTE_LATIN_ISO8859_4,
1089 /* U+0147 */ LEADING_BYTE_LATIN_ISO8859_2,
1090 /* U+0148 */ LEADING_BYTE_LATIN_ISO8859_2,
1091 /* U+0149 */ LEADING_BYTE_JAPANESE_JISX0212,
1092 /* U+014A */ LEADING_BYTE_LATIN_ISO8859_4,
1093 /* U+014B */ LEADING_BYTE_LATIN_ISO8859_4,
1094 /* U+014C */ LEADING_BYTE_LATIN_ISO8859_4,
1095 /* U+014D */ LEADING_BYTE_LATIN_ISO8859_4,
1096 /* U+014E */ LEADING_BYTE_UCS_BMP,
1097 /* U+014F */ LEADING_BYTE_UCS_BMP,
1098 /* U+0150 */ LEADING_BYTE_LATIN_ISO8859_2,
1099 /* U+0151 */ LEADING_BYTE_LATIN_ISO8859_2,
1100 /* U+0152 */ LEADING_BYTE_JAPANESE_JISX0212,
1101 /* U+0153 */ LEADING_BYTE_JAPANESE_JISX0212,
1102 /* U+0154 */ LEADING_BYTE_LATIN_ISO8859_2,
1103 /* U+0155 */ LEADING_BYTE_LATIN_ISO8859_2,
1104 /* U+0156 */ LEADING_BYTE_LATIN_ISO8859_4,
1105 /* U+0157 */ LEADING_BYTE_LATIN_ISO8859_4,
1106 /* U+0158 */ LEADING_BYTE_LATIN_ISO8859_2,
1107 /* U+0159 */ LEADING_BYTE_LATIN_ISO8859_2,
1108 /* U+015A */ LEADING_BYTE_LATIN_ISO8859_2,
1109 /* U+015B */ LEADING_BYTE_LATIN_ISO8859_2,
1110 /* U+015C */ LEADING_BYTE_LATIN_ISO8859_3,
1111 /* U+015D */ LEADING_BYTE_LATIN_ISO8859_3,
1112 /* U+015E */ LEADING_BYTE_LATIN_ISO8859_2,
1113 /* U+015F */ LEADING_BYTE_LATIN_ISO8859_2,
1114 /* U+0160 */ LEADING_BYTE_LATIN_ISO8859_2,
1115 /* U+0161 */ LEADING_BYTE_LATIN_ISO8859_2,
1116 /* U+0162 */ LEADING_BYTE_LATIN_ISO8859_2,
1117 /* U+0163 */ LEADING_BYTE_LATIN_ISO8859_2,
1118 /* U+0164 */ LEADING_BYTE_LATIN_ISO8859_2,
1119 /* U+0165 */ LEADING_BYTE_LATIN_ISO8859_2,
1120 /* U+0166 */ LEADING_BYTE_LATIN_ISO8859_4,
1121 /* U+0167 */ LEADING_BYTE_LATIN_ISO8859_4,
1122 /* U+0168 */ LEADING_BYTE_LATIN_ISO8859_4,
1123 /* U+0169 */ LEADING_BYTE_LATIN_ISO8859_4,
1124 /* U+016A */ LEADING_BYTE_LATIN_ISO8859_4,
1125 /* U+016B */ LEADING_BYTE_LATIN_ISO8859_4,
1126 /* U+016C */ LEADING_BYTE_LATIN_ISO8859_3,
1127 /* U+016D */ LEADING_BYTE_LATIN_ISO8859_3,
1128 /* U+016E */ LEADING_BYTE_LATIN_ISO8859_2,
1129 /* U+016F */ LEADING_BYTE_LATIN_ISO8859_2,
1130 /* U+0170 */ LEADING_BYTE_LATIN_ISO8859_2,
1131 /* U+0171 */ LEADING_BYTE_LATIN_ISO8859_2,
1132 /* U+0172 */ LEADING_BYTE_LATIN_ISO8859_4,
1133 /* U+0173 */ LEADING_BYTE_LATIN_ISO8859_4,
1134 /* U+0174 */ LEADING_BYTE_JAPANESE_JISX0212,
1135 /* U+0175 */ LEADING_BYTE_JAPANESE_JISX0212,
1136 /* U+0176 */ LEADING_BYTE_JAPANESE_JISX0212,
1137 /* U+0177 */ LEADING_BYTE_JAPANESE_JISX0212,
1138 /* U+0178 */ LEADING_BYTE_JAPANESE_JISX0212,
1139 /* U+0179 */ LEADING_BYTE_LATIN_ISO8859_2,
1140 /* U+017A */ LEADING_BYTE_LATIN_ISO8859_2,
1141 /* U+017B */ LEADING_BYTE_LATIN_ISO8859_2,
1142 /* U+017C */ LEADING_BYTE_LATIN_ISO8859_2,
1143 /* U+017D */ LEADING_BYTE_LATIN_ISO8859_2,
1144 /* U+017E */ LEADING_BYTE_LATIN_ISO8859_2,
1145 /* U+017F */ LEADING_BYTE_UCS_BMP
1148 unsigned char latin_a_char_to_byte1[128] = {
1149 /* U+0100 */ 0xC0 - 0x80,
1150 /* U+0101 */ 0xE0 - 0x80,
1151 /* U+0102 */ 0xC3 - 0x80,
1152 /* U+0103 */ 0xE3 - 0x80,
1153 /* U+0104 */ 0xA1 - 0x80,
1154 /* U+0105 */ 0xB1 - 0x80,
1155 /* U+0106 */ 0xC6 - 0x80,
1156 /* U+0107 */ 0xE6 - 0x80,
1157 /* U+0108 */ 0xC6 - 0x80,
1158 /* U+0109 */ 0xE6 - 0x80,
1159 /* U+010A */ 0xC5 - 0x80,
1160 /* U+010B */ 0xE5 - 0x80,
1161 /* U+010C */ 0xC8 - 0x80,
1162 /* U+010D */ 0xE8 - 0x80,
1163 /* U+010E */ 0xCF - 0x80,
1164 /* U+010F */ 0xEF - 0x80,
1165 /* U+0110 */ 0xD0 - 0x80,
1166 /* U+0111 */ 0xF0 - 0x80,
1167 /* U+0112 */ 0xAA - 0x80,
1168 /* U+0113 */ 0xBA - 0x80,
1171 /* U+0116 */ 0xCC - 0x80,
1172 /* U+0117 */ 0xEC - 0x80,
1173 /* U+0118 */ 0xCA - 0x80,
1174 /* U+0119 */ 0xEA - 0x80,
1175 /* U+011A */ 0xCC - 0x80,
1176 /* U+011B */ 0xEC - 0x80,
1177 /* U+011C */ 0xD8 - 0x80,
1178 /* U+011D */ 0xF8 - 0x80,
1179 /* U+011E */ 0xAB - 0x80,
1180 /* U+011F */ 0xBB - 0x80,
1181 /* U+0120 */ 0xD5 - 0x80,
1182 /* U+0121 */ 0xF5 - 0x80,
1183 /* U+0122 */ 0xAB - 0x80,
1184 /* U+0123 */ 0xBB - 0x80,
1185 /* U+0124 */ 0xA6 - 0x80,
1186 /* U+0125 */ 0xB6 - 0x80,
1187 /* U+0126 */ 0xA1 - 0x80,
1188 /* U+0127 */ 0xB1 - 0x80,
1189 /* U+0128 */ 0xA5 - 0x80,
1190 /* U+0129 */ 0xB5 - 0x80,
1191 /* U+012A */ 0xCF - 0x80,
1192 /* U+012B */ 0xEF - 0x80,
1195 /* U+012E */ 0xC7 - 0x80,
1196 /* U+012F */ 0xE7 - 0x80,
1197 /* U+0130 */ 0xA9 - 0x80,
1198 /* U+0131 */ 0xB9 - 0x80,
1201 /* U+0134 */ 0xAC - 0x80,
1202 /* U+0135 */ 0xBC - 0x80,
1203 /* U+0136 */ 0xD3 - 0x80,
1204 /* U+0137 */ 0xF3 - 0x80,
1205 /* U+0138 */ 0xA2 - 0x80,
1206 /* U+0139 */ 0xC5 - 0x80,
1207 /* U+013A */ 0xE5 - 0x80,
1208 /* U+013B */ 0xA6 - 0x80,
1209 /* U+013C */ 0xB6 - 0x80,
1210 /* U+013D */ 0xA5 - 0x80,
1211 /* U+013E */ 0xB5 - 0x80,
1214 /* U+0141 */ 0xA3 - 0x80,
1215 /* U+0142 */ 0xB3 - 0x80,
1216 /* U+0143 */ 0xD1 - 0x80,
1217 /* U+0144 */ 0xF1 - 0x80,
1218 /* U+0145 */ 0xD1 - 0x80,
1219 /* U+0146 */ 0xF1 - 0x80,
1220 /* U+0147 */ 0xD2 - 0x80,
1221 /* U+0148 */ 0xF2 - 0x80,
1223 /* U+014A */ 0xBD - 0x80,
1224 /* U+014B */ 0xBF - 0x80,
1225 /* U+014C */ 0xD2 - 0x80,
1226 /* U+014D */ 0xF2 - 0x80,
1229 /* U+0150 */ 0xD5 - 0x80,
1230 /* U+0151 */ 0xF5 - 0x80,
1233 /* U+0154 */ 0xC0 - 0x80,
1234 /* U+0155 */ 0xE0 - 0x80,
1235 /* U+0156 */ 0xA3 - 0x80,
1236 /* U+0157 */ 0xB3 - 0x80,
1237 /* U+0158 */ 0xD8 - 0x80,
1238 /* U+0159 */ 0xF8 - 0x80,
1239 /* U+015A */ 0xA6 - 0x80,
1240 /* U+015B */ 0xB6 - 0x80,
1241 /* U+015C */ 0xDE - 0x80,
1242 /* U+015D */ 0xFE - 0x80,
1243 /* U+015E */ 0xAA - 0x80,
1244 /* U+015F */ 0xBA - 0x80,
1245 /* U+0160 */ 0xA9 - 0x80,
1246 /* U+0161 */ 0xB9 - 0x80,
1247 /* U+0162 */ 0xDE - 0x80,
1248 /* U+0163 */ 0xFE - 0x80,
1249 /* U+0164 */ 0xAB - 0x80,
1250 /* U+0165 */ 0xBB - 0x80,
1251 /* U+0166 */ 0xAC - 0x80,
1252 /* U+0167 */ 0xBC - 0x80,
1253 /* U+0168 */ 0xDD - 0x80,
1254 /* U+0169 */ 0xFD - 0x80,
1255 /* U+016A */ 0xDE - 0x80,
1256 /* U+016B */ 0xFE - 0x80,
1257 /* U+016C */ 0xDD - 0x80,
1258 /* U+016D */ 0xFD - 0x80,
1259 /* U+016E */ 0xD9 - 0x80,
1260 /* U+016F */ 0xF9 - 0x80,
1261 /* U+0170 */ 0xDB - 0x80,
1262 /* U+0171 */ 0xFB - 0x80,
1263 /* U+0172 */ 0xD9 - 0x80,
1264 /* U+0173 */ 0xF9 - 0x80,
1270 /* U+0179 */ 0xAC - 0x80,
1271 /* U+017A */ 0xBC - 0x80,
1272 /* U+017B */ 0xAF - 0x80,
1273 /* U+017C */ 0xBF - 0x80,
1274 /* U+017D */ 0xAE - 0x80,
1275 /* U+017E */ 0xBE - 0x80,
1279 unsigned char latin_a_char_to_byte2[128] = {
1410 Lisp_Object Vutf_2000_version;
1414 int leading_code_private_11;
1417 Lisp_Object Qcharsetp;
1419 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1420 Lisp_Object Qregistry, Qfinal, Qgraphic;
1421 Lisp_Object Qdirection;
1422 Lisp_Object Qreverse_direction_charset;
1423 Lisp_Object Qleading_byte;
1424 Lisp_Object Qshort_name, Qlong_name;
1438 Qcyrillic_iso8859_5,
1440 Qjapanese_jisx0208_1978,
1445 Qchinese_cns11643_1,
1446 Qchinese_cns11643_2,
1448 Qchinese_cns11643_3,
1449 Qchinese_cns11643_4,
1450 Qchinese_cns11643_5,
1451 Qchinese_cns11643_6,
1452 Qchinese_cns11643_7,
1454 Qlatin_viscii_lower,
1455 Qlatin_viscii_upper,
1461 Lisp_Object Ql2r, Qr2l;
1463 Lisp_Object Vcharset_hash_table;
1465 static Charset_ID next_allocated_1_byte_leading_byte;
1466 static Charset_ID next_allocated_2_byte_leading_byte;
1468 /* Composite characters are characters constructed by overstriking two
1469 or more regular characters.
1471 1) The old Mule implementation involves storing composite characters
1472 in a buffer as a tag followed by all of the actual characters
1473 used to make up the composite character. I think this is a bad
1474 idea; it greatly complicates code that wants to handle strings
1475 one character at a time because it has to deal with the possibility
1476 of great big ungainly characters. It's much more reasonable to
1477 simply store an index into a table of composite characters.
1479 2) The current implementation only allows for 16,384 separate
1480 composite characters over the lifetime of the XEmacs process.
1481 This could become a potential problem if the user
1482 edited lots of different files that use composite characters.
1483 Due to FSF bogosity, increasing the number of allowable
1484 composite characters under Mule would decrease the number
1485 of possible faces that can exist. Mule already has shrunk
1486 this to 2048, and further shrinkage would become uncomfortable.
1487 No such problems exist in XEmacs.
1489 Composite characters could be represented as 0x80 C1 C2 C3,
1490 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1491 for slightly under 2^20 (one million) composite characters
1492 over the XEmacs process lifetime, and you only need to
1493 increase the size of a Mule character from 19 to 21 bits.
1494 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1495 85 million (slightly over 2^26) composite characters. */
1498 /************************************************************************/
1499 /* Basic Emchar functions */
1500 /************************************************************************/
1502 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1503 string in STR. Returns the number of bytes stored.
1504 Do not call this directly. Use the macro set_charptr_emchar() instead.
1508 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1514 Lisp_Object charset;
1523 else if ( c <= 0x7ff )
1525 *p++ = (c >> 6) | 0xc0;
1526 *p++ = (c & 0x3f) | 0x80;
1528 else if ( c <= 0xffff )
1530 *p++ = (c >> 12) | 0xe0;
1531 *p++ = ((c >> 6) & 0x3f) | 0x80;
1532 *p++ = (c & 0x3f) | 0x80;
1534 else if ( c <= 0x1fffff )
1536 *p++ = (c >> 18) | 0xf0;
1537 *p++ = ((c >> 12) & 0x3f) | 0x80;
1538 *p++ = ((c >> 6) & 0x3f) | 0x80;
1539 *p++ = (c & 0x3f) | 0x80;
1541 else if ( c <= 0x3ffffff )
1543 *p++ = (c >> 24) | 0xf8;
1544 *p++ = ((c >> 18) & 0x3f) | 0x80;
1545 *p++ = ((c >> 12) & 0x3f) | 0x80;
1546 *p++ = ((c >> 6) & 0x3f) | 0x80;
1547 *p++ = (c & 0x3f) | 0x80;
1551 *p++ = (c >> 30) | 0xfc;
1552 *p++ = ((c >> 24) & 0x3f) | 0x80;
1553 *p++ = ((c >> 18) & 0x3f) | 0x80;
1554 *p++ = ((c >> 12) & 0x3f) | 0x80;
1555 *p++ = ((c >> 6) & 0x3f) | 0x80;
1556 *p++ = (c & 0x3f) | 0x80;
1559 BREAKUP_CHAR (c, charset, c1, c2);
1560 lb = CHAR_LEADING_BYTE (c);
1561 if (LEADING_BYTE_PRIVATE_P (lb))
1562 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1564 if (EQ (charset, Vcharset_control_1))
1573 /* Return the first character from a Mule-encoded string in STR,
1574 assuming it's non-ASCII. Do not call this directly.
1575 Use the macro charptr_emchar() instead. */
1578 non_ascii_charptr_emchar (CONST Bufbyte *str)
1591 else if ( b >= 0xf8 )
1596 else if ( b >= 0xf0 )
1601 else if ( b >= 0xe0 )
1606 else if ( b >= 0xc0 )
1616 for( ; len > 0; len-- )
1619 ch = ( ch << 6 ) | ( b & 0x3f );
1623 Bufbyte i0 = *str, i1, i2 = 0;
1624 Lisp_Object charset;
1626 if (i0 == LEADING_BYTE_CONTROL_1)
1627 return (Emchar) (*++str - 0x20);
1629 if (LEADING_BYTE_PREFIX_P (i0))
1634 charset = CHARSET_BY_LEADING_BYTE (i0);
1635 if (XCHARSET_DIMENSION (charset) == 2)
1638 return MAKE_CHAR (charset, i1, i2);
1642 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1643 Do not call this directly. Use the macro valid_char_p() instead. */
1647 non_ascii_valid_char_p (Emchar ch)
1651 /* Must have only lowest 19 bits set */
1655 f1 = CHAR_FIELD1 (ch);
1656 f2 = CHAR_FIELD2 (ch);
1657 f3 = CHAR_FIELD3 (ch);
1661 Lisp_Object charset;
1663 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1664 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1665 f2 > MAX_CHAR_FIELD2_PRIVATE)
1670 if (f3 != 0x20 && f3 != 0x7F)
1674 NOTE: This takes advantage of the fact that
1675 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1676 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1678 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1679 return (XCHARSET_CHARS (charset) == 96);
1683 Lisp_Object charset;
1685 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1686 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1687 f1 > MAX_CHAR_FIELD1_PRIVATE)
1689 if (f2 < 0x20 || f3 < 0x20)
1692 #ifdef ENABLE_COMPOSITE_CHARS
1693 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1695 if (UNBOUNDP (Fgethash (make_int (ch),
1696 Vcomposite_char_char2string_hash_table,
1701 #endif /* ENABLE_COMPOSITE_CHARS */
1703 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
1706 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1708 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1711 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1713 return (XCHARSET_CHARS (charset) == 96);
1719 /************************************************************************/
1720 /* Basic string functions */
1721 /************************************************************************/
1723 /* Copy the character pointed to by PTR into STR, assuming it's
1724 non-ASCII. Do not call this directly. Use the macro
1725 charptr_copy_char() instead. */
1728 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1730 Bufbyte *strptr = str;
1732 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1734 /* Notice fallthrough. */
1736 case 6: *++strptr = *ptr++;
1737 case 5: *++strptr = *ptr++;
1739 case 4: *++strptr = *ptr++;
1740 case 3: *++strptr = *ptr++;
1741 case 2: *++strptr = *ptr;
1746 return strptr + 1 - str;
1750 /************************************************************************/
1751 /* streams of Emchars */
1752 /************************************************************************/
1754 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1755 The functions below are not meant to be called directly; use
1756 the macros in insdel.h. */
1759 Lstream_get_emchar_1 (Lstream *stream, int ch)
1761 Bufbyte str[MAX_EMCHAR_LEN];
1762 Bufbyte *strptr = str;
1764 str[0] = (Bufbyte) ch;
1765 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1767 /* Notice fallthrough. */
1770 ch = Lstream_getc (stream);
1772 *++strptr = (Bufbyte) ch;
1774 ch = Lstream_getc (stream);
1776 *++strptr = (Bufbyte) ch;
1779 ch = Lstream_getc (stream);
1781 *++strptr = (Bufbyte) ch;
1783 ch = Lstream_getc (stream);
1785 *++strptr = (Bufbyte) ch;
1787 ch = Lstream_getc (stream);
1789 *++strptr = (Bufbyte) ch;
1794 return charptr_emchar (str);
1798 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1800 Bufbyte str[MAX_EMCHAR_LEN];
1801 Bytecount len = set_charptr_emchar (str, ch);
1802 return Lstream_write (stream, str, len);
1806 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1808 Bufbyte str[MAX_EMCHAR_LEN];
1809 Bytecount len = set_charptr_emchar (str, ch);
1810 Lstream_unread (stream, str, len);
1814 /************************************************************************/
1815 /* charset object */
1816 /************************************************************************/
1819 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
1821 struct Lisp_Charset *cs = XCHARSET (obj);
1823 markobj (cs->short_name);
1824 markobj (cs->long_name);
1825 markobj (cs->doc_string);
1826 markobj (cs->registry);
1827 markobj (cs->ccl_program);
1832 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1834 struct Lisp_Charset *cs = XCHARSET (obj);
1838 error ("printing unreadable object #<charset %s 0x%x>",
1839 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1842 write_c_string ("#<charset ", printcharfun);
1843 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1844 write_c_string (" ", printcharfun);
1845 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1846 write_c_string (" ", printcharfun);
1847 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1848 write_c_string (" ", printcharfun);
1849 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1850 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1851 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1852 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1853 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1855 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1856 CHARSET_COLUMNS (cs),
1857 CHARSET_GRAPHIC (cs),
1858 CHARSET_FINAL (cs));
1859 write_c_string (buf, printcharfun);
1860 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1861 sprintf (buf, " 0x%x>", cs->header.uid);
1862 write_c_string (buf, printcharfun);
1865 static const struct lrecord_description charset_description[] = {
1866 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1870 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1871 mark_charset, print_charset, 0, 0, 0, charset_description,
1872 struct Lisp_Charset);
1873 /* Make a new charset. */
1876 make_charset (Charset_ID id, Lisp_Object name, unsigned char rep_bytes,
1877 unsigned char type, unsigned char columns, unsigned char graphic,
1878 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1879 Lisp_Object long_name, Lisp_Object doc,
1883 struct Lisp_Charset *cs =
1884 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1885 XSETCHARSET (obj, cs);
1887 CHARSET_ID (cs) = id;
1888 CHARSET_NAME (cs) = name;
1889 CHARSET_SHORT_NAME (cs) = short_name;
1890 CHARSET_LONG_NAME (cs) = long_name;
1891 CHARSET_REP_BYTES (cs) = rep_bytes;
1892 CHARSET_DIRECTION (cs) = direction;
1893 CHARSET_TYPE (cs) = type;
1894 CHARSET_COLUMNS (cs) = columns;
1895 CHARSET_GRAPHIC (cs) = graphic;
1896 CHARSET_FINAL (cs) = final;
1897 CHARSET_DOC_STRING (cs) = doc;
1898 CHARSET_REGISTRY (cs) = reg;
1899 CHARSET_CCL_PROGRAM (cs) = Qnil;
1900 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1902 switch ( CHARSET_TYPE (cs) )
1904 case CHARSET_TYPE_94:
1905 CHARSET_DIMENSION (cs) = 1;
1906 CHARSET_CHARS (cs) = 94;
1908 case CHARSET_TYPE_96:
1909 CHARSET_DIMENSION (cs) = 1;
1910 CHARSET_CHARS (cs) = 96;
1912 case CHARSET_TYPE_94X94:
1913 CHARSET_DIMENSION (cs) = 2;
1914 CHARSET_CHARS (cs) = 94;
1916 case CHARSET_TYPE_96X96:
1917 CHARSET_DIMENSION (cs) = 2;
1918 CHARSET_CHARS (cs) = 96;
1921 case CHARSET_TYPE_128X128:
1922 CHARSET_DIMENSION (cs) = 2;
1923 CHARSET_CHARS (cs) = 128;
1925 case CHARSET_TYPE_256X256:
1926 CHARSET_DIMENSION (cs) = 2;
1927 CHARSET_CHARS (cs) = 256;
1934 /* some charsets do not have final characters. This includes
1935 ASCII, Control-1, Composite, and the two faux private
1938 assert (NILP (charset_by_attributes[type][final]));
1939 charset_by_attributes[type][final] = obj;
1941 assert (NILP (charset_by_attributes[type][final][direction]));
1942 charset_by_attributes[type][final][direction] = obj;
1946 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1947 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1950 /* official leading byte */
1951 rep_bytes_by_first_byte[id] = rep_bytes;
1954 /* Some charsets are "faux" and don't have names or really exist at
1955 all except in the leading-byte table. */
1957 Fputhash (name, obj, Vcharset_hash_table);
1962 get_unallocated_leading_byte (int dimension)
1968 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1971 lb = next_allocated_1_byte_leading_byte++;
1975 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1978 lb = next_allocated_2_byte_leading_byte++;
1983 ("No more character sets free for this dimension",
1984 make_int (dimension));
1990 /************************************************************************/
1991 /* Basic charset Lisp functions */
1992 /************************************************************************/
1994 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1995 Return non-nil if OBJECT is a charset.
1999 return CHARSETP (object) ? Qt : Qnil;
2002 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
2003 Retrieve the charset of the given name.
2004 If CHARSET-OR-NAME is a charset object, it is simply returned.
2005 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
2006 nil is returned. Otherwise the associated charset object is returned.
2010 if (CHARSETP (charset_or_name))
2011 return charset_or_name;
2013 CHECK_SYMBOL (charset_or_name);
2014 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
2017 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
2018 Retrieve the charset of the given name.
2019 Same as `find-charset' except an error is signalled if there is no such
2020 charset instead of returning nil.
2024 Lisp_Object charset = Ffind_charset (name);
2027 signal_simple_error ("No such charset", name);
2031 /* We store the charsets in hash tables with the names as the key and the
2032 actual charset object as the value. Occasionally we need to use them
2033 in a list format. These routines provide us with that. */
2034 struct charset_list_closure
2036 Lisp_Object *charset_list;
2040 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
2041 void *charset_list_closure)
2043 /* This function can GC */
2044 struct charset_list_closure *chcl =
2045 (struct charset_list_closure*) charset_list_closure;
2046 Lisp_Object *charset_list = chcl->charset_list;
2048 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
2052 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
2053 Return a list of the names of all defined charsets.
2057 Lisp_Object charset_list = Qnil;
2058 struct gcpro gcpro1;
2059 struct charset_list_closure charset_list_closure;
2061 GCPRO1 (charset_list);
2062 charset_list_closure.charset_list = &charset_list;
2063 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
2064 &charset_list_closure);
2067 return charset_list;
2070 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
2071 Return the name of the given charset.
2075 return XCHARSET_NAME (Fget_charset (charset));
2078 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
2079 Define a new character set.
2080 This function is for use with Mule support.
2081 NAME is a symbol, the name by which the character set is normally referred.
2082 DOC-STRING is a string describing the character set.
2083 PROPS is a property list, describing the specific nature of the
2084 character set. Recognized properties are:
2086 'short-name Short version of the charset name (ex: Latin-1)
2087 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
2088 'registry A regular expression matching the font registry field for
2090 'dimension Number of octets used to index a character in this charset.
2091 Either 1 or 2. Defaults to 1.
2092 'columns Number of columns used to display a character in this charset.
2093 Only used in TTY mode. (Under X, the actual width of a
2094 character can be derived from the font used to display the
2095 characters.) If unspecified, defaults to the dimension
2096 (this is almost always the correct value).
2097 'chars Number of characters in each dimension (94 or 96).
2098 Defaults to 94. Note that if the dimension is 2, the
2099 character set thus described is 94x94 or 96x96.
2100 'final Final byte of ISO 2022 escape sequence. Must be
2101 supplied. Each combination of (DIMENSION, CHARS) defines a
2102 separate namespace for final bytes. Note that ISO
2103 2022 restricts the final byte to the range
2104 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
2105 dimension == 2. Note also that final bytes in the range
2106 0x30 - 0x3F are reserved for user-defined (not official)
2108 'graphic 0 (use left half of font on output) or 1 (use right half
2109 of font on output). Defaults to 0. For example, for
2110 a font whose registry is ISO8859-1, the left half
2111 (octets 0x20 - 0x7F) is the `ascii' character set, while
2112 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2113 character set. With 'graphic set to 0, the octets
2114 will have their high bit cleared; with it set to 1,
2115 the octets will have their high bit set.
2116 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2118 'ccl-program A compiled CCL program used to convert a character in
2119 this charset into an index into the font. This is in
2120 addition to the 'graphic property. The CCL program
2121 is passed the octets of the character, with the high
2122 bit cleared and set depending upon whether the value
2123 of the 'graphic property is 0 or 1.
2125 (name, doc_string, props))
2127 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2128 int direction = CHARSET_LEFT_TO_RIGHT;
2130 Lisp_Object registry = Qnil;
2131 Lisp_Object charset;
2132 Lisp_Object rest, keyword, value;
2133 Lisp_Object ccl_program = Qnil;
2134 Lisp_Object short_name = Qnil, long_name = Qnil;
2136 CHECK_SYMBOL (name);
2137 if (!NILP (doc_string))
2138 CHECK_STRING (doc_string);
2140 charset = Ffind_charset (name);
2141 if (!NILP (charset))
2142 signal_simple_error ("Cannot redefine existing charset", name);
2144 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
2146 if (EQ (keyword, Qshort_name))
2148 CHECK_STRING (value);
2152 if (EQ (keyword, Qlong_name))
2154 CHECK_STRING (value);
2158 else if (EQ (keyword, Qdimension))
2161 dimension = XINT (value);
2162 if (dimension < 1 || dimension > 2)
2163 signal_simple_error ("Invalid value for 'dimension", value);
2166 else if (EQ (keyword, Qchars))
2169 chars = XINT (value);
2170 if (chars != 94 && chars != 96)
2171 signal_simple_error ("Invalid value for 'chars", value);
2174 else if (EQ (keyword, Qcolumns))
2177 columns = XINT (value);
2178 if (columns != 1 && columns != 2)
2179 signal_simple_error ("Invalid value for 'columns", value);
2182 else if (EQ (keyword, Qgraphic))
2185 graphic = XINT (value);
2186 if (graphic < 0 || graphic > 1)
2187 signal_simple_error ("Invalid value for 'graphic", value);
2190 else if (EQ (keyword, Qregistry))
2192 CHECK_STRING (value);
2196 else if (EQ (keyword, Qdirection))
2198 if (EQ (value, Ql2r))
2199 direction = CHARSET_LEFT_TO_RIGHT;
2200 else if (EQ (value, Qr2l))
2201 direction = CHARSET_RIGHT_TO_LEFT;
2203 signal_simple_error ("Invalid value for 'direction", value);
2206 else if (EQ (keyword, Qfinal))
2208 CHECK_CHAR_COERCE_INT (value);
2209 final = XCHAR (value);
2210 if (final < '0' || final > '~')
2211 signal_simple_error ("Invalid value for 'final", value);
2214 else if (EQ (keyword, Qccl_program))
2216 CHECK_VECTOR (value);
2217 ccl_program = value;
2221 signal_simple_error ("Unrecognized property", keyword);
2225 error ("'final must be specified");
2226 if (dimension == 2 && final > 0x5F)
2228 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2232 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2234 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2236 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
2237 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
2239 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2246 /* id = CHARSET_ID_OFFSET_94 + final; */
2247 id = get_unallocated_leading_byte (dimension);
2249 else if (chars == 96)
2251 id = get_unallocated_leading_byte (dimension);
2258 else if (dimension == 2)
2262 id = get_unallocated_leading_byte (dimension);
2264 else if (chars == 96)
2266 id = get_unallocated_leading_byte (dimension);
2278 id = get_unallocated_leading_byte (dimension);
2281 if (NILP (doc_string))
2282 doc_string = build_string ("");
2284 if (NILP (registry))
2285 registry = build_string ("");
2287 if (NILP (short_name))
2288 XSETSTRING (short_name, XSYMBOL (name)->name);
2290 if (NILP (long_name))
2291 long_name = doc_string;
2294 columns = dimension;
2295 charset = make_charset (id, name, dimension + 2, type, columns, graphic,
2296 final, direction, short_name, long_name, doc_string, registry);
2297 if (!NILP (ccl_program))
2298 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2302 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2304 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2305 NEW-NAME is the name of the new charset. Return the new charset.
2307 (charset, new_name))
2309 Lisp_Object new_charset = Qnil;
2310 int id, dimension, columns, graphic, final;
2311 int direction, type;
2312 Lisp_Object registry, doc_string, short_name, long_name;
2313 struct Lisp_Charset *cs;
2315 charset = Fget_charset (charset);
2316 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2317 signal_simple_error ("Charset already has reverse-direction charset",
2320 CHECK_SYMBOL (new_name);
2321 if (!NILP (Ffind_charset (new_name)))
2322 signal_simple_error ("Cannot redefine existing charset", new_name);
2324 cs = XCHARSET (charset);
2326 type = CHARSET_TYPE (cs);
2327 columns = CHARSET_COLUMNS (cs);
2328 dimension = CHARSET_DIMENSION (cs);
2329 id = get_unallocated_leading_byte (dimension);
2331 graphic = CHARSET_GRAPHIC (cs);
2332 final = CHARSET_FINAL (cs);
2333 direction = CHARSET_RIGHT_TO_LEFT;
2334 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2335 direction = CHARSET_LEFT_TO_RIGHT;
2336 doc_string = CHARSET_DOC_STRING (cs);
2337 short_name = CHARSET_SHORT_NAME (cs);
2338 long_name = CHARSET_LONG_NAME (cs);
2339 registry = CHARSET_REGISTRY (cs);
2341 new_charset = make_charset (id, new_name, dimension + 2, type, columns,
2342 graphic, final, direction, short_name, long_name,
2343 doc_string, registry);
2345 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2346 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2351 /* #### Reverse direction charsets not yet implemented. */
2353 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2355 Return the reverse-direction charset parallel to CHARSET, if any.
2356 This is the charset with the same properties (in particular, the same
2357 dimension, number of characters per dimension, and final byte) as
2358 CHARSET but whose characters are displayed in the opposite direction.
2362 charset = Fget_charset (charset);
2363 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2367 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2368 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2369 If DIRECTION is omitted, both directions will be checked (left-to-right
2370 will be returned if character sets exist for both directions).
2372 (dimension, chars, final, direction))
2374 int dm, ch, fi, di = -1;
2376 Lisp_Object obj = Qnil;
2378 CHECK_INT (dimension);
2379 dm = XINT (dimension);
2380 if (dm < 1 || dm > 2)
2381 signal_simple_error ("Invalid value for DIMENSION", dimension);
2385 if (ch != 94 && ch != 96)
2386 signal_simple_error ("Invalid value for CHARS", chars);
2388 CHECK_CHAR_COERCE_INT (final);
2390 if (fi < '0' || fi > '~')
2391 signal_simple_error ("Invalid value for FINAL", final);
2393 if (EQ (direction, Ql2r))
2394 di = CHARSET_LEFT_TO_RIGHT;
2395 else if (EQ (direction, Qr2l))
2396 di = CHARSET_RIGHT_TO_LEFT;
2397 else if (!NILP (direction))
2398 signal_simple_error ("Invalid value for DIRECTION", direction);
2400 if (dm == 2 && fi > 0x5F)
2402 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2405 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2407 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2411 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2413 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2416 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2419 return XCHARSET_NAME (obj);
2423 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2424 Return short name of CHARSET.
2428 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2431 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2432 Return long name of CHARSET.
2436 return XCHARSET_LONG_NAME (Fget_charset (charset));
2439 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2440 Return description of CHARSET.
2444 return XCHARSET_DOC_STRING (Fget_charset (charset));
2447 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2448 Return dimension of CHARSET.
2452 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2455 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2456 Return property PROP of CHARSET.
2457 Recognized properties are those listed in `make-charset', as well as
2458 'name and 'doc-string.
2462 struct Lisp_Charset *cs;
2464 charset = Fget_charset (charset);
2465 cs = XCHARSET (charset);
2467 CHECK_SYMBOL (prop);
2468 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2469 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2470 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2471 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2472 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2473 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2474 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2475 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2476 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2477 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2478 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2479 if (EQ (prop, Qdirection))
2480 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2481 if (EQ (prop, Qreverse_direction_charset))
2483 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2487 return XCHARSET_NAME (obj);
2489 signal_simple_error ("Unrecognized charset property name", prop);
2490 return Qnil; /* not reached */
2493 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2494 Return charset identification number of CHARSET.
2498 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2501 /* #### We need to figure out which properties we really want to
2504 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2505 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2507 (charset, ccl_program))
2509 charset = Fget_charset (charset);
2510 CHECK_VECTOR (ccl_program);
2511 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2516 invalidate_charset_font_caches (Lisp_Object charset)
2518 /* Invalidate font cache entries for charset on all devices. */
2519 Lisp_Object devcons, concons, hash_table;
2520 DEVICE_LOOP_NO_BREAK (devcons, concons)
2522 struct device *d = XDEVICE (XCAR (devcons));
2523 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2524 if (!UNBOUNDP (hash_table))
2525 Fclrhash (hash_table);
2529 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
2530 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2531 Set the 'registry property of CHARSET to REGISTRY.
2533 (charset, registry))
2535 charset = Fget_charset (charset);
2536 CHECK_STRING (registry);
2537 XCHARSET_REGISTRY (charset) = registry;
2538 invalidate_charset_font_caches (charset);
2539 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2544 /************************************************************************/
2545 /* Lisp primitives for working with characters */
2546 /************************************************************************/
2548 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2549 Make a character from CHARSET and octets ARG1 and ARG2.
2550 ARG2 is required only for characters from two-dimensional charsets.
2551 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2552 character s with caron.
2554 (charset, arg1, arg2))
2556 struct Lisp_Charset *cs;
2558 int lowlim, highlim;
2560 charset = Fget_charset (charset);
2561 cs = XCHARSET (charset);
2563 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2564 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2566 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2568 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2569 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2572 /* It is useful (and safe, according to Olivier Galibert) to strip
2573 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2574 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2575 Latin 2 code of the character. */
2583 if (a1 < lowlim || a1 > highlim)
2584 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2586 if (CHARSET_DIMENSION (cs) == 1)
2590 ("Charset is of dimension one; second octet must be nil", arg2);
2591 return make_char (MAKE_CHAR (charset, a1, 0));
2600 a2 = XINT (arg2) & 0x7f;
2602 if (a2 < lowlim || a2 > highlim)
2603 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2605 return make_char (MAKE_CHAR (charset, a1, a2));
2608 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2609 Return the character set of char CH.
2613 CHECK_CHAR_COERCE_INT (ch);
2615 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2618 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2619 Return list of charset and one or two position-codes of CHAR.
2623 /* This function can GC */
2624 struct gcpro gcpro1, gcpro2;
2625 Lisp_Object charset = Qnil;
2626 Lisp_Object rc = Qnil;
2629 GCPRO2 (charset, rc);
2630 CHECK_CHAR_COERCE_INT (character);
2632 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2634 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2636 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2640 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2648 #ifdef ENABLE_COMPOSITE_CHARS
2649 /************************************************************************/
2650 /* composite character functions */
2651 /************************************************************************/
2654 lookup_composite_char (Bufbyte *str, int len)
2656 Lisp_Object lispstr = make_string (str, len);
2657 Lisp_Object ch = Fgethash (lispstr,
2658 Vcomposite_char_string2char_hash_table,
2664 if (composite_char_row_next >= 128)
2665 signal_simple_error ("No more composite chars available", lispstr);
2666 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2667 composite_char_col_next);
2668 Fputhash (make_char (emch), lispstr,
2669 Vcomposite_char_char2string_hash_table);
2670 Fputhash (lispstr, make_char (emch),
2671 Vcomposite_char_string2char_hash_table);
2672 composite_char_col_next++;
2673 if (composite_char_col_next >= 128)
2675 composite_char_col_next = 32;
2676 composite_char_row_next++;
2685 composite_char_string (Emchar ch)
2687 Lisp_Object str = Fgethash (make_char (ch),
2688 Vcomposite_char_char2string_hash_table,
2690 assert (!UNBOUNDP (str));
2694 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2695 Convert a string into a single composite character.
2696 The character is the result of overstriking all the characters in
2701 CHECK_STRING (string);
2702 return make_char (lookup_composite_char (XSTRING_DATA (string),
2703 XSTRING_LENGTH (string)));
2706 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2707 Return a string of the characters comprising a composite character.
2715 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2716 signal_simple_error ("Must be composite char", ch);
2717 return composite_char_string (emch);
2719 #endif /* ENABLE_COMPOSITE_CHARS */
2722 /************************************************************************/
2723 /* initialization */
2724 /************************************************************************/
2727 syms_of_mule_charset (void)
2729 DEFSUBR (Fcharsetp);
2730 DEFSUBR (Ffind_charset);
2731 DEFSUBR (Fget_charset);
2732 DEFSUBR (Fcharset_list);
2733 DEFSUBR (Fcharset_name);
2734 DEFSUBR (Fmake_charset);
2735 DEFSUBR (Fmake_reverse_direction_charset);
2736 /* DEFSUBR (Freverse_direction_charset); */
2737 DEFSUBR (Fcharset_from_attributes);
2738 DEFSUBR (Fcharset_short_name);
2739 DEFSUBR (Fcharset_long_name);
2740 DEFSUBR (Fcharset_description);
2741 DEFSUBR (Fcharset_dimension);
2742 DEFSUBR (Fcharset_property);
2743 DEFSUBR (Fcharset_id);
2744 DEFSUBR (Fset_charset_ccl_program);
2745 DEFSUBR (Fset_charset_registry);
2747 DEFSUBR (Fmake_char);
2748 DEFSUBR (Fchar_charset);
2749 DEFSUBR (Fsplit_char);
2751 #ifdef ENABLE_COMPOSITE_CHARS
2752 DEFSUBR (Fmake_composite_char);
2753 DEFSUBR (Fcomposite_char_string);
2756 defsymbol (&Qcharsetp, "charsetp");
2757 defsymbol (&Qregistry, "registry");
2758 defsymbol (&Qfinal, "final");
2759 defsymbol (&Qgraphic, "graphic");
2760 defsymbol (&Qdirection, "direction");
2761 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2762 defsymbol (&Qshort_name, "short-name");
2763 defsymbol (&Qlong_name, "long-name");
2765 defsymbol (&Ql2r, "l2r");
2766 defsymbol (&Qr2l, "r2l");
2768 /* Charsets, compatible with FSF 20.3
2769 Naming convention is Script-Charset[-Edition] */
2770 defsymbol (&Qascii, "ascii");
2771 defsymbol (&Qcontrol_1, "control-1");
2772 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2773 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2774 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2775 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2776 defsymbol (&Qthai_tis620, "thai-tis620");
2777 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2778 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2779 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2780 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2781 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2782 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2783 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2784 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2785 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2786 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2787 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2788 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2789 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2790 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2792 defsymbol (&Qchinese_cns11643_3, "chinese-cns11643-3");
2793 defsymbol (&Qchinese_cns11643_4, "chinese-cns11643-4");
2794 defsymbol (&Qchinese_cns11643_5, "chinese-cns11643-5");
2795 defsymbol (&Qchinese_cns11643_6, "chinese-cns11643-6");
2796 defsymbol (&Qchinese_cns11643_7, "chinese-cns11643-7");
2797 defsymbol (&Qucs_bmp, "ucs-bmp");
2798 defsymbol (&Qlatin_viscii_lower, "vietnamese-viscii-lower");
2799 defsymbol (&Qlatin_viscii_upper, "vietnamese-viscii-upper");
2801 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2802 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2804 defsymbol (&Qcomposite, "composite");
2808 vars_of_mule_charset (void)
2815 /* Table of charsets indexed by leading byte. */
2816 for (i = 0; i < countof (charset_by_leading_byte); i++)
2817 charset_by_leading_byte[i] = Qnil;
2820 /* Table of charsets indexed by type/final-byte. */
2821 for (i = 0; i < countof (charset_by_attributes); i++)
2822 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2823 charset_by_attributes[i][j] = Qnil;
2825 /* Table of charsets indexed by type/final-byte/direction. */
2826 for (i = 0; i < countof (charset_by_attributes); i++)
2827 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2828 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2829 charset_by_attributes[i][j][k] = Qnil;
2832 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2834 next_allocated_2_byte_leading_byte = LEADING_BYTE_CHINESE_BIG5_2 + 1;
2836 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2840 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2841 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2842 Leading-code of private TYPE9N charset of column-width 1.
2844 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2848 Vutf_2000_version = build_string("0.6 (Tōbushijō-mae)");
2849 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2850 Version number of UTF-2000.
2856 complex_vars_of_mule_charset (void)
2858 staticpro (&Vcharset_hash_table);
2859 Vcharset_hash_table =
2860 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2862 /* Predefined character sets. We store them into variables for
2867 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 1,
2868 CHARSET_TYPE_256X256, 1, 0, 0,
2869 CHARSET_LEFT_TO_RIGHT,
2870 build_string ("BMP"),
2871 build_string ("BMP"),
2872 build_string ("BMP"),
2876 make_charset (LEADING_BYTE_ASCII, Qascii, 1,
2877 CHARSET_TYPE_94, 1, 0, 'B',
2878 CHARSET_LEFT_TO_RIGHT,
2879 build_string ("ASCII"),
2880 build_string ("ASCII)"),
2881 build_string ("ASCII (ISO646 IRV)"),
2882 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"));
2883 Vcharset_control_1 =
2884 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
2885 CHARSET_TYPE_94, 1, 1, 0,
2886 CHARSET_LEFT_TO_RIGHT,
2887 build_string ("C1"),
2888 build_string ("Control characters"),
2889 build_string ("Control characters 128-191"),
2891 Vcharset_latin_iso8859_1 =
2892 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
2893 CHARSET_TYPE_96, 1, 1, 'A',
2894 CHARSET_LEFT_TO_RIGHT,
2895 build_string ("Latin-1"),
2896 build_string ("ISO8859-1 (Latin-1)"),
2897 build_string ("ISO8859-1 (Latin-1)"),
2898 build_string ("iso8859-1"));
2899 Vcharset_latin_iso8859_2 =
2900 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
2901 CHARSET_TYPE_96, 1, 1, 'B',
2902 CHARSET_LEFT_TO_RIGHT,
2903 build_string ("Latin-2"),
2904 build_string ("ISO8859-2 (Latin-2)"),
2905 build_string ("ISO8859-2 (Latin-2)"),
2906 build_string ("iso8859-2"));
2907 Vcharset_latin_iso8859_3 =
2908 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
2909 CHARSET_TYPE_96, 1, 1, 'C',
2910 CHARSET_LEFT_TO_RIGHT,
2911 build_string ("Latin-3"),
2912 build_string ("ISO8859-3 (Latin-3)"),
2913 build_string ("ISO8859-3 (Latin-3)"),
2914 build_string ("iso8859-3"));
2915 Vcharset_latin_iso8859_4 =
2916 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
2917 CHARSET_TYPE_96, 1, 1, 'D',
2918 CHARSET_LEFT_TO_RIGHT,
2919 build_string ("Latin-4"),
2920 build_string ("ISO8859-4 (Latin-4)"),
2921 build_string ("ISO8859-4 (Latin-4)"),
2922 build_string ("iso8859-4"));
2923 Vcharset_thai_tis620 =
2924 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
2925 CHARSET_TYPE_96, 1, 1, 'T',
2926 CHARSET_LEFT_TO_RIGHT,
2927 build_string ("TIS620"),
2928 build_string ("TIS620 (Thai)"),
2929 build_string ("TIS620.2529 (Thai)"),
2930 build_string ("tis620"));
2931 Vcharset_greek_iso8859_7 =
2932 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
2933 CHARSET_TYPE_96, 1, 1, 'F',
2934 CHARSET_LEFT_TO_RIGHT,
2935 build_string ("ISO8859-7"),
2936 build_string ("ISO8859-7 (Greek)"),
2937 build_string ("ISO8859-7 (Greek)"),
2938 build_string ("iso8859-7"));
2939 Vcharset_arabic_iso8859_6 =
2940 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
2941 CHARSET_TYPE_96, 1, 1, 'G',
2942 CHARSET_RIGHT_TO_LEFT,
2943 build_string ("ISO8859-6"),
2944 build_string ("ISO8859-6 (Arabic)"),
2945 build_string ("ISO8859-6 (Arabic)"),
2946 build_string ("iso8859-6"));
2947 Vcharset_hebrew_iso8859_8 =
2948 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
2949 CHARSET_TYPE_96, 1, 1, 'H',
2950 CHARSET_RIGHT_TO_LEFT,
2951 build_string ("ISO8859-8"),
2952 build_string ("ISO8859-8 (Hebrew)"),
2953 build_string ("ISO8859-8 (Hebrew)"),
2954 build_string ("iso8859-8"));
2955 Vcharset_katakana_jisx0201 =
2956 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
2957 CHARSET_TYPE_94, 1, 1, 'I',
2958 CHARSET_LEFT_TO_RIGHT,
2959 build_string ("JISX0201 Kana"),
2960 build_string ("JISX0201.1976 (Japanese Kana)"),
2961 build_string ("JISX0201.1976 Japanese Kana"),
2962 build_string ("jisx0201.1976"));
2963 Vcharset_latin_jisx0201 =
2964 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
2965 CHARSET_TYPE_94, 1, 0, 'J',
2966 CHARSET_LEFT_TO_RIGHT,
2967 build_string ("JISX0201 Roman"),
2968 build_string ("JISX0201.1976 (Japanese Roman)"),
2969 build_string ("JISX0201.1976 Japanese Roman"),
2970 build_string ("jisx0201.1976"));
2971 Vcharset_cyrillic_iso8859_5 =
2972 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
2973 CHARSET_TYPE_96, 1, 1, 'L',
2974 CHARSET_LEFT_TO_RIGHT,
2975 build_string ("ISO8859-5"),
2976 build_string ("ISO8859-5 (Cyrillic)"),
2977 build_string ("ISO8859-5 (Cyrillic)"),
2978 build_string ("iso8859-5"));
2979 Vcharset_latin_iso8859_9 =
2980 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
2981 CHARSET_TYPE_96, 1, 1, 'M',
2982 CHARSET_LEFT_TO_RIGHT,
2983 build_string ("Latin-5"),
2984 build_string ("ISO8859-9 (Latin-5)"),
2985 build_string ("ISO8859-9 (Latin-5)"),
2986 build_string ("iso8859-9"));
2987 Vcharset_japanese_jisx0208_1978 =
2988 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
2989 CHARSET_TYPE_94X94, 2, 0, '@',
2990 CHARSET_LEFT_TO_RIGHT,
2991 build_string ("JISX0208.1978"),
2992 build_string ("JISX0208.1978 (Japanese)"),
2994 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
2995 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"));
2996 Vcharset_chinese_gb2312 =
2997 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
2998 CHARSET_TYPE_94X94, 2, 0, 'A',
2999 CHARSET_LEFT_TO_RIGHT,
3000 build_string ("GB2312"),
3001 build_string ("GB2312)"),
3002 build_string ("GB2312 Chinese simplified"),
3003 build_string ("gb2312"));
3004 Vcharset_japanese_jisx0208 =
3005 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
3006 CHARSET_TYPE_94X94, 2, 0, 'B',
3007 CHARSET_LEFT_TO_RIGHT,
3008 build_string ("JISX0208"),
3009 build_string ("JISX0208.1983/1990 (Japanese)"),
3010 build_string ("JISX0208.1983/1990 Japanese Kanji"),
3011 build_string ("jisx0208.19\\(83\\|90\\)"));
3012 Vcharset_korean_ksc5601 =
3013 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
3014 CHARSET_TYPE_94X94, 2, 0, 'C',
3015 CHARSET_LEFT_TO_RIGHT,
3016 build_string ("KSC5601"),
3017 build_string ("KSC5601 (Korean"),
3018 build_string ("KSC5601 Korean Hangul and Hanja"),
3019 build_string ("ksc5601"));
3020 Vcharset_japanese_jisx0212 =
3021 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
3022 CHARSET_TYPE_94X94, 2, 0, 'D',
3023 CHARSET_LEFT_TO_RIGHT,
3024 build_string ("JISX0212"),
3025 build_string ("JISX0212 (Japanese)"),
3026 build_string ("JISX0212 Japanese Supplement"),
3027 build_string ("jisx0212"));
3029 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3030 Vcharset_chinese_cns11643_1 =
3031 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
3032 CHARSET_TYPE_94X94, 2, 0, 'G',
3033 CHARSET_LEFT_TO_RIGHT,
3034 build_string ("CNS11643-1"),
3035 build_string ("CNS11643-1 (Chinese traditional)"),
3037 ("CNS 11643 Plane 1 Chinese traditional"),
3038 build_string (CHINESE_CNS_PLANE_RE("1")));
3039 Vcharset_chinese_cns11643_2 =
3040 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
3041 CHARSET_TYPE_94X94, 2, 0, 'H',
3042 CHARSET_LEFT_TO_RIGHT,
3043 build_string ("CNS11643-2"),
3044 build_string ("CNS11643-2 (Chinese traditional)"),
3046 ("CNS 11643 Plane 2 Chinese traditional"),
3047 build_string (CHINESE_CNS_PLANE_RE("2")));
3049 Vcharset_chinese_cns11643_3 =
3050 make_charset (LEADING_BYTE_CHINESE_CNS11643_3, Qchinese_cns11643_3, 3,
3051 CHARSET_TYPE_94X94, 2, 0, 'I',
3052 CHARSET_LEFT_TO_RIGHT,
3053 build_string ("CNS11643-3"),
3054 build_string ("CNS11643-3 (Chinese traditional)"),
3056 ("CNS 11643 Plane 3 Chinese traditional"),
3057 build_string (CHINESE_CNS_PLANE_RE("3")));
3058 Vcharset_chinese_cns11643_4 =
3059 make_charset (LEADING_BYTE_CHINESE_CNS11643_4, Qchinese_cns11643_4, 3,
3060 CHARSET_TYPE_94X94, 2, 0, 'J',
3061 CHARSET_LEFT_TO_RIGHT,
3062 build_string ("CNS11643-4"),
3063 build_string ("CNS11643-4 (Chinese traditional)"),
3065 ("CNS 11643 Plane 4 Chinese traditional"),
3066 build_string (CHINESE_CNS_PLANE_RE("4")));
3067 Vcharset_chinese_cns11643_5 =
3068 make_charset (LEADING_BYTE_CHINESE_CNS11643_5, Qchinese_cns11643_5, 3,
3069 CHARSET_TYPE_94X94, 2, 0, 'K',
3070 CHARSET_LEFT_TO_RIGHT,
3071 build_string ("CNS11643-5"),
3072 build_string ("CNS11643-5 (Chinese traditional)"),
3074 ("CNS 11643 Plane 5 Chinese traditional"),
3075 build_string (CHINESE_CNS_PLANE_RE("5")));
3076 Vcharset_chinese_cns11643_6 =
3077 make_charset (LEADING_BYTE_CHINESE_CNS11643_6, Qchinese_cns11643_6, 3,
3078 CHARSET_TYPE_94X94, 2, 0, 'L',
3079 CHARSET_LEFT_TO_RIGHT,
3080 build_string ("CNS11643-6"),
3081 build_string ("CNS11643-6 (Chinese traditional)"),
3083 ("CNS 11643 Plane 6 Chinese traditional"),
3084 build_string (CHINESE_CNS_PLANE_RE("6")));
3085 Vcharset_chinese_cns11643_7 =
3086 make_charset (LEADING_BYTE_CHINESE_CNS11643_7, Qchinese_cns11643_7, 3,
3087 CHARSET_TYPE_94X94, 2, 0, 'M',
3088 CHARSET_LEFT_TO_RIGHT,
3089 build_string ("CNS11643-7"),
3090 build_string ("CNS11643-7 (Chinese traditional)"),
3092 ("CNS 11643 Plane 7 Chinese traditional"),
3093 build_string (CHINESE_CNS_PLANE_RE("7")));
3094 Vcharset_latin_viscii_lower =
3095 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 2,
3096 CHARSET_TYPE_96, 1, 1, '1',
3097 CHARSET_LEFT_TO_RIGHT,
3098 build_string ("VISCII lower"),
3099 build_string ("VISCII lower (Vietnamese)"),
3100 build_string ("VISCII lower (Vietnamese)"),
3101 build_string ("VISCII1.1"));
3102 Vcharset_latin_viscii_upper =
3103 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 2,
3104 CHARSET_TYPE_96, 1, 1, '2',
3105 CHARSET_LEFT_TO_RIGHT,
3106 build_string ("VISCII upper"),
3107 build_string ("VISCII upper (Vietnamese)"),
3108 build_string ("VISCII upper (Vietnamese)"),
3109 build_string ("VISCII1.1"));
3111 Vcharset_chinese_big5_1 =
3112 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
3113 CHARSET_TYPE_94X94, 2, 0, '0',
3114 CHARSET_LEFT_TO_RIGHT,
3115 build_string ("Big5"),
3116 build_string ("Big5 (Level-1)"),
3118 ("Big5 Level-1 Chinese traditional"),
3119 build_string ("big5"));
3120 Vcharset_chinese_big5_2 =
3121 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
3122 CHARSET_TYPE_94X94, 2, 0, '1',
3123 CHARSET_LEFT_TO_RIGHT,
3124 build_string ("Big5"),
3125 build_string ("Big5 (Level-2)"),
3127 ("Big5 Level-2 Chinese traditional"),
3128 build_string ("big5"));
3131 #define GENERATE_94_SET(name) \
3134 ucs_to_##name = make_byte_from_character_table(); \
3135 for (i = 0; i < 94; i++) \
3137 Emchar c = name##_to_ucs[i]; \
3139 put_byte_from_character_table (c, \
3140 i + 33, ucs_to_##name); \
3143 #define GENERATE_96_SET(name) \
3146 ucs_to_##name = make_byte_from_character_table(); \
3147 for (i = 0; i < 96; i++) \
3149 Emchar c = name##_to_ucs[i]; \
3151 put_byte_from_character_table (c, \
3152 i + 32, ucs_to_##name); \
3156 GENERATE_94_SET (latin_jisx0201);
3158 GENERATE_96_SET (latin_iso8859_2);
3159 GENERATE_96_SET (latin_iso8859_3);
3160 GENERATE_96_SET (latin_iso8859_4);
3161 GENERATE_96_SET (latin_iso8859_9);
3162 GENERATE_96_SET (latin_viscii_lower);
3163 GENERATE_96_SET (latin_viscii_upper);
3166 #ifdef ENABLE_COMPOSITE_CHARS
3167 /* #### For simplicity, we put composite chars into a 96x96 charset.
3168 This is going to lead to problems because you can run out of
3169 room, esp. as we don't yet recycle numbers. */
3170 Vcharset_composite =
3171 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
3172 CHARSET_TYPE_96X96, 2, 0, 0,
3173 CHARSET_LEFT_TO_RIGHT,
3174 build_string ("Composite"),
3175 build_string ("Composite characters"),
3176 build_string ("Composite characters"),
3179 composite_char_row_next = 32;
3180 composite_char_col_next = 32;
3182 Vcomposite_char_string2char_hash_table =
3183 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3184 Vcomposite_char_char2string_hash_table =
3185 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3186 staticpro (&Vcomposite_char_string2char_hash_table);
3187 staticpro (&Vcomposite_char_char2string_hash_table);
3188 #endif /* ENABLE_COMPOSITE_CHARS */