1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii_lower;
63 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_chinese_big5_1;
66 Lisp_Object Vcharset_chinese_big5_2;
68 #ifdef ENABLE_COMPOSITE_CHARS
69 Lisp_Object Vcharset_composite;
71 /* Hash tables for composite chars. One maps string representing
72 composed chars to their equivalent chars; one goes the
74 Lisp_Object Vcomposite_char_char2string_hash_table;
75 Lisp_Object Vcomposite_char_string2char_hash_table;
77 static int composite_char_row_next;
78 static int composite_char_col_next;
80 #endif /* ENABLE_COMPOSITE_CHARS */
82 /* Table of charsets indexed by leading byte. */
83 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
85 /* Table of charsets indexed by type/final-byte/direction. */
87 Lisp_Object charset_by_attributes[4][128];
89 Lisp_Object charset_by_attributes[4][128][2];
93 /* Table of number of bytes in the string representation of a character
94 indexed by the first byte of that representation.
96 rep_bytes_by_first_byte(c) is more efficient than the equivalent
97 canonical computation:
99 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
101 Bytecount rep_bytes_by_first_byte[0xA0] =
102 { /* 0x00 - 0x7f are for straight ASCII */
103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 /* 0x80 - 0x8f are for Dimension-1 official charsets */
113 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
115 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
117 /* 0x90 - 0x9d are for Dimension-2 official charsets */
118 /* 0x9e is for Dimension-1 private charsets */
119 /* 0x9f is for Dimension-2 private charsets */
120 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
125 Emchar_to_byte_table*
126 make_byte_from_character_table ()
128 Emchar_to_byte_table* table
129 = (Emchar_to_byte_table*) xmalloc (sizeof (Emchar_to_byte_table));
136 put_byte_from_character_table (Emchar ch, unsigned char val,
137 Emchar_to_byte_table* table)
139 if (table->base == NULL)
141 table->base = xmalloc (128);
142 table->offset = ch - (ch % 128);
144 table->base[ch - table->offset] = val;
148 int i = ch - table->offset;
152 size_t new_size = table->size - i;
155 new_size += 128 - (new_size % 128);
156 table->base = xrealloc (table->base, new_size);
157 memmove (table->base + (new_size - table->size), table->base,
159 for (j = 0; j < (new_size - table->size); j++)
161 table->offset -= (new_size - table->size);
162 table->base[ch - table->offset] = val;
163 table->size = new_size;
165 else if (i >= table->size)
167 size_t new_size = i + 1;
170 new_size += 128 - (new_size % 128);
171 table->base = xrealloc (table->base, new_size);
172 for (j = table->size; j < new_size; j++)
174 table->base[i] = val;
175 table->size = new_size;
179 table->base[i] = val;
185 get_byte_from_character_table (Emchar ch, Emchar_to_byte_table* table)
187 size_t i = ch - table->offset;
189 return table->base[i];
194 #define CHAR96(ft,b) (MIN_CHAR_96 + (ft - '0') * 96 + (b & 0x7f) - 32)
196 Emchar latin_jisx0201_to_ucs[94] =
198 0x0021 /* 0x21 EXCLAMATION MARK */,
199 0x0022 /* 0x22 QUOTATION MARK */,
200 0x0023 /* 0x23 NUMBER SIGN */,
201 0x0024 /* 0x24 DOLLAR SIGN */,
202 0x0025 /* 0x25 PERCENT SIGN */,
203 0x0026 /* 0x26 AMPERSAND */,
204 0x0027 /* 0x27 APOSTROPHE */,
205 0x0028 /* 0x28 LEFT PARENTHESIS */,
206 0x0029 /* 0x29 RIGHT PARENTHESIS */,
207 0x002A /* 0x2A ASTERISK */,
208 0x002B /* 0x2B PLUS SIGN */,
209 0x002C /* 0x2C COMMA */,
210 0x002D /* 0x2D HYPHEN-MINUS */,
211 0x002E /* 0x2E FULL STOP */,
212 0x002F /* 0x2F SOLIDUS */,
213 0x0030 /* 0x30 DIGIT ZERO */,
214 0x0031 /* 0x31 DIGIT ONE */,
215 0x0032 /* 0x32 DIGIT TWO */,
216 0x0033 /* 0x33 DIGIT THREE */,
217 0x0034 /* 0x34 DIGIT FOUR */,
218 0x0035 /* 0x35 DIGIT FIVE */,
219 0x0036 /* 0x36 DIGIT SIX */,
220 0x0037 /* 0x37 DIGIT SEVEN */,
221 0x0038 /* 0x38 DIGIT EIGHT */,
222 0x0039 /* 0x39 DIGIT NINE */,
223 0x003A /* 0x3A COLON */,
224 0x003B /* 0x3B SEMICOLON */,
225 0x003C /* 0x3C LESS-THAN SIGN */,
226 0x003D /* 0x3D EQUALS SIGN */,
227 0x003E /* 0x3E GREATER-THAN SIGN */,
228 0x003F /* 0x3F QUESTION MARK */,
229 0x0040 /* 0x40 COMMERCIAL AT */,
230 0x0041 /* 0x41 LATIN CAPITAL LETTER A */,
231 0x0042 /* 0x42 LATIN CAPITAL LETTER B */,
232 0x0043 /* 0x43 LATIN CAPITAL LETTER C */,
233 0x0044 /* 0x44 LATIN CAPITAL LETTER D */,
234 0x0045 /* 0x45 LATIN CAPITAL LETTER E */,
235 0x0046 /* 0x46 LATIN CAPITAL LETTER F */,
236 0x0047 /* 0x47 LATIN CAPITAL LETTER G */,
237 0x0048 /* 0x48 LATIN CAPITAL LETTER H */,
238 0x0049 /* 0x49 LATIN CAPITAL LETTER I */,
239 0x004A /* 0x4A LATIN CAPITAL LETTER J */,
240 0x004B /* 0x4B LATIN CAPITAL LETTER K */,
241 0x004C /* 0x4C LATIN CAPITAL LETTER L */,
242 0x004D /* 0x4D LATIN CAPITAL LETTER M */,
243 0x004E /* 0x4E LATIN CAPITAL LETTER N */,
244 0x004F /* 0x4F LATIN CAPITAL LETTER O */,
245 0x0050 /* 0x50 LATIN CAPITAL LETTER P */,
246 0x0051 /* 0x51 LATIN CAPITAL LETTER Q */,
247 0x0052 /* 0x52 LATIN CAPITAL LETTER R */,
248 0x0053 /* 0x53 LATIN CAPITAL LETTER S */,
249 0x0054 /* 0x54 LATIN CAPITAL LETTER T */,
250 0x0055 /* 0x55 LATIN CAPITAL LETTER U */,
251 0x0056 /* 0x56 LATIN CAPITAL LETTER V */,
252 0x0057 /* 0x57 LATIN CAPITAL LETTER W */,
253 0x0058 /* 0x58 LATIN CAPITAL LETTER X */,
254 0x0059 /* 0x59 LATIN CAPITAL LETTER Y */,
255 0x005A /* 0x5A LATIN CAPITAL LETTER Z */,
256 0x005B /* 0x5B LEFT SQUARE BRACKET */,
257 0x00A5 /* 0x5C YEN SIGN */,
258 0x005D /* 0x5D RIGHT SQUARE BRACKET */,
259 0x005E /* 0x5E CIRCUMFLEX ACCENT */,
260 0x005F /* 0x5F LOW LINE */,
261 0x0060 /* 0x60 GRAVE ACCENT */,
262 0x0061 /* 0x61 LATIN SMALL LETTER A */,
263 0x0062 /* 0x62 LATIN SMALL LETTER B */,
264 0x0063 /* 0x63 LATIN SMALL LETTER C */,
265 0x0064 /* 0x64 LATIN SMALL LETTER D */,
266 0x0065 /* 0x65 LATIN SMALL LETTER E */,
267 0x0066 /* 0x66 LATIN SMALL LETTER F */,
268 0x0067 /* 0x67 LATIN SMALL LETTER G */,
269 0x0068 /* 0x68 LATIN SMALL LETTER H */,
270 0x0069 /* 0x69 LATIN SMALL LETTER I */,
271 0x006A /* 0x6A LATIN SMALL LETTER J */,
272 0x006B /* 0x6B LATIN SMALL LETTER K */,
273 0x006C /* 0x6C LATIN SMALL LETTER L */,
274 0x006D /* 0x6D LATIN SMALL LETTER M */,
275 0x006E /* 0x6E LATIN SMALL LETTER N */,
276 0x006F /* 0x6F LATIN SMALL LETTER O */,
277 0x0070 /* 0x70 LATIN SMALL LETTER P */,
278 0x0071 /* 0x71 LATIN SMALL LETTER Q */,
279 0x0072 /* 0x72 LATIN SMALL LETTER R */,
280 0x0073 /* 0x73 LATIN SMALL LETTER S */,
281 0x0074 /* 0x74 LATIN SMALL LETTER T */,
282 0x0075 /* 0x75 LATIN SMALL LETTER U */,
283 0x0076 /* 0x76 LATIN SMALL LETTER V */,
284 0x0077 /* 0x77 LATIN SMALL LETTER W */,
285 0x0078 /* 0x78 LATIN SMALL LETTER X */,
286 0x0079 /* 0x79 LATIN SMALL LETTER Y */,
287 0x007A /* 0x7A LATIN SMALL LETTER Z */,
288 0x007B /* 0x7B LEFT CURLY BRACKET */,
289 0x007C /* 0x7C VERTICAL LINE */,
290 0x007D /* 0x7D RIGHT CURLY BRACKET */,
291 0x203E /* 0x7E OVERLINE */
294 Emchar latin_iso8859_2_to_ucs[96] =
296 0x00A0 /* 0xA0 NO-BREAK SPACE */,
297 0x0104 /* 0xA1 LATIN CAPITAL LETTER A WITH OGONEK */,
298 0x02D8 /* 0xA2 BREVE */,
299 0x0141 /* 0xA3 LATIN CAPITAL LETTER L WITH STROKE */,
300 0x00A4 /* 0xA4 CURRENCY SIGN */,
301 0x013D /* 0xA5 LATIN CAPITAL LETTER L WITH CARON */,
302 0x015A /* 0xA6 LATIN CAPITAL LETTER S WITH ACUTE */,
303 0x00A7 /* 0xA7 SECTION SIGN */,
304 0x00A8 /* 0xA8 DIAERESIS */,
305 0x0160 /* 0xA9 LATIN CAPITAL LETTER S WITH CARON */,
306 0x015E /* 0xAA LATIN CAPITAL LETTER S WITH CEDILLA */,
307 0x0164 /* 0xAB LATIN CAPITAL LETTER T WITH CARON */,
308 0x0179 /* 0xAC LATIN CAPITAL LETTER Z WITH ACUTE */,
309 0x00AD /* 0xAD SOFT HYPHEN */,
310 0x017D /* 0xAE LATIN CAPITAL LETTER Z WITH CARON */,
311 0x017B /* 0xAF LATIN CAPITAL LETTER Z WITH DOT ABOVE */,
312 0x00B0 /* 0xB0 DEGREE SIGN */,
313 0x0105 /* 0xB1 LATIN SMALL LETTER A WITH OGONEK */,
314 0x02DB /* 0xB2 OGONEK */,
315 0x0142 /* 0xB3 LATIN SMALL LETTER L WITH STROKE */,
316 0x00B4 /* 0xB4 ACUTE ACCENT */,
317 0x013E /* 0xB5 LATIN SMALL LETTER L WITH CARON */,
318 0x015B /* 0xB6 LATIN SMALL LETTER S WITH ACUTE */,
319 0x02C7 /* 0xB7 CARON */,
320 0x00B8 /* 0xB8 CEDILLA */,
321 0x0161 /* 0xB9 LATIN SMALL LETTER S WITH CARON */,
322 0x015F /* 0xBA LATIN SMALL LETTER S WITH CEDILLA */,
323 0x0165 /* 0xBB LATIN SMALL LETTER T WITH CARON */,
324 0x017A /* 0xBC LATIN SMALL LETTER Z WITH ACUTE */,
325 0x02DD /* 0xBD DOUBLE ACUTE ACCENT */,
326 0x017E /* 0xBE LATIN SMALL LETTER Z WITH CARON */,
327 0x017C /* 0xBF LATIN SMALL LETTER Z WITH DOT ABOVE */,
328 0x0154 /* 0xC0 LATIN CAPITAL LETTER R WITH ACUTE */,
329 0x00C1 /* 0xC1 LATIN CAPITAL LETTER A WITH ACUTE */,
330 0x00C2 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
331 0x0102 /* 0xC3 LATIN CAPITAL LETTER A WITH BREVE */,
332 0x00C4 /* 0xC4 LATIN CAPITAL LETTER A WITH DIAERESIS */,
333 0x0139 /* 0xC5 LATIN CAPITAL LETTER L WITH ACUTE */,
334 0x0106 /* 0xC6 LATIN CAPITAL LETTER C WITH ACUTE */,
335 0x00C7 /* 0xC7 LATIN CAPITAL LETTER C WITH CEDILLA */,
336 0x010C /* 0xC8 LATIN CAPITAL LETTER C WITH CARON */,
337 0x00C9 /* 0xC9 LATIN CAPITAL LETTER E WITH ACUTE */,
338 0x0118 /* 0xCA LATIN CAPITAL LETTER E WITH OGONEK */,
339 0x00CB /* 0xCB LATIN CAPITAL LETTER E WITH DIAERESIS */,
340 0x011A /* 0xCC LATIN CAPITAL LETTER E WITH CARON */,
341 0x00CD /* 0xCD LATIN CAPITAL LETTER I WITH ACUTE */,
342 0x00CE /* 0xCE LATIN CAPITAL LETTER I WITH CIRCUMFLEX */,
343 0x010E /* 0xCF LATIN CAPITAL LETTER D WITH CARON */,
344 0x0110 /* 0xD0 LATIN CAPITAL LETTER D WITH STROKE */,
345 0x0143 /* 0xD1 LATIN CAPITAL LETTER N WITH ACUTE */,
346 0x0147 /* 0xD2 LATIN CAPITAL LETTER N WITH CARON */,
347 0x00D3 /* 0xD3 LATIN CAPITAL LETTER O WITH ACUTE */,
348 0x00D4 /* 0xD4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
349 0x0150 /* 0xD5 LATIN CAPITAL LETTER O WITH DOUBLE ACUTE */,
350 0x00D6 /* 0xD6 LATIN CAPITAL LETTER O WITH DIAERESIS */,
351 0x00D7 /* 0xD7 MULTIPLICATION SIGN */,
352 0x0158 /* 0xD8 LATIN CAPITAL LETTER R WITH CARON */,
353 0x016E /* 0xD9 LATIN CAPITAL LETTER U WITH RING ABOVE */,
354 0x00DA /* 0xDA LATIN CAPITAL LETTER U WITH ACUTE */,
355 0x0170 /* 0xDB LATIN CAPITAL LETTER U WITH DOUBLE ACUTE */,
356 0x00DC /* 0xDC LATIN CAPITAL LETTER U WITH DIAERESIS */,
357 0x00DD /* 0xDD LATIN CAPITAL LETTER Y WITH ACUTE */,
358 0x0162 /* 0xDE LATIN CAPITAL LETTER T WITH CEDILLA */,
359 0x00DF /* 0xDF LATIN SMALL LETTER SHARP S */,
360 0x0155 /* 0xE0 LATIN SMALL LETTER R WITH ACUTE */,
361 0x00E1 /* 0xE1 LATIN SMALL LETTER A WITH ACUTE */,
362 0x00E2 /* 0xE2 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
363 0x0103 /* 0xE3 LATIN SMALL LETTER A WITH BREVE */,
364 0x00E4 /* 0xE4 LATIN SMALL LETTER A WITH DIAERESIS */,
365 0x013A /* 0xE5 LATIN SMALL LETTER L WITH ACUTE */,
366 0x0107 /* 0xE6 LATIN SMALL LETTER C WITH ACUTE */,
367 0x00E7 /* 0xE7 LATIN SMALL LETTER C WITH CEDILLA */,
368 0x010D /* 0xE8 LATIN SMALL LETTER C WITH CARON */,
369 0x00E9 /* 0xE9 LATIN SMALL LETTER E WITH ACUTE */,
370 0x0119 /* 0xEA LATIN SMALL LETTER E WITH OGONEK */,
371 0x00EB /* 0xEB LATIN SMALL LETTER E WITH DIAERESIS */,
372 0x011B /* 0xEC LATIN SMALL LETTER E WITH CARON */,
373 0x00ED /* 0xED LATIN SMALL LETTER I WITH ACUTE */,
374 0x00EE /* 0xEE LATIN SMALL LETTER I WITH CIRCUMFLEX */,
375 0x010F /* 0xEF LATIN SMALL LETTER D WITH CARON */,
376 0x0111 /* 0xF0 LATIN SMALL LETTER D WITH STROKE */,
377 0x0144 /* 0xF1 LATIN SMALL LETTER N WITH ACUTE */,
378 0x0148 /* 0xF2 LATIN SMALL LETTER N WITH CARON */,
379 0x00F3 /* 0xF3 LATIN SMALL LETTER O WITH ACUTE */,
380 0x00F4 /* 0xF4 LATIN SMALL LETTER O WITH CIRCUMFLEX */,
381 0x0151 /* 0xF5 LATIN SMALL LETTER O WITH DOUBLE ACUTE */,
382 0x00F6 /* 0xF6 LATIN SMALL LETTER O WITH DIAERESIS */,
383 0x00F7 /* 0xF7 DIVISION SIGN */,
384 0x0159 /* 0xF8 LATIN SMALL LETTER R WITH CARON */,
385 0x016F /* 0xF9 LATIN SMALL LETTER U WITH RING ABOVE */,
386 0x00FA /* 0xFA LATIN SMALL LETTER U WITH ACUTE */,
387 0x0171 /* 0xFB LATIN SMALL LETTER U WITH DOUBLE ACUTE */,
388 0x00FC /* 0xFC LATIN SMALL LETTER U WITH DIAERESIS */,
389 0x00FD /* 0xFD LATIN SMALL LETTER Y WITH ACUTE */,
390 0x0163 /* 0xFE LATIN SMALL LETTER T WITH CEDILLA */,
391 0x02D9 /* 0xFF DOT ABOVE */
394 Emchar latin_iso8859_3_to_ucs[96] =
396 0x00A0 /* 0xA0 NO-BREAK SPACE */,
397 0x0126 /* 0xA1 LATIN CAPITAL LETTER H WITH STROKE */,
398 0x02D8 /* 0xA2 BREVE */,
399 0x00A3 /* 0xA3 POUND SIGN */,
400 0x00A4 /* 0xA4 CURRENCY SIGN */,
402 0x0124 /* 0xA6 LATIN CAPITAL LETTER H WITH CIRCUMFLEX */,
403 0x00A7 /* 0xA7 SECTION SIGN */,
404 0x00A8 /* 0xA8 DIAERESIS */,
405 0x0130 /* 0xA9 LATIN CAPITAL LETTER I WITH DOT ABOVE */,
406 0x015E /* 0xAA LATIN CAPITAL LETTER S WITH CEDILLA */,
407 0x011E /* 0xAB LATIN CAPITAL LETTER G WITH BREVE */,
408 0x0134 /* 0xAC LATIN CAPITAL LETTER J WITH CIRCUMFLEX */,
409 0x00AD /* 0xAD SOFT HYPHEN */,
411 0x017B /* 0xAF LATIN CAPITAL LETTER Z WITH DOT ABOVE */,
412 0x00B0 /* 0xB0 DEGREE SIGN */,
413 0x0127 /* 0xB1 LATIN SMALL LETTER H WITH STROKE */,
414 0x00B2 /* 0xB2 SUPERSCRIPT TWO */,
415 0x00B3 /* 0xB3 SUPERSCRIPT THREE */,
416 0x00B4 /* 0xB4 ACUTE ACCENT */,
417 0x00B5 /* 0xB5 MICRO SIGN */,
418 0x0125 /* 0xB6 LATIN SMALL LETTER H WITH CIRCUMFLEX */,
419 0x00B7 /* 0xB7 MIDDLE DOT */,
420 0x00B8 /* 0xB8 CEDILLA */,
421 0x0131 /* 0xB9 LATIN SMALL LETTER DOTLESS I */,
422 0x015F /* 0xBA LATIN SMALL LETTER S WITH CEDILLA */,
423 0x011F /* 0xBB LATIN SMALL LETTER G WITH BREVE */,
424 0x0135 /* 0xBC LATIN SMALL LETTER J WITH CIRCUMFLEX */,
425 0x00BD /* 0xBD VULGAR FRACTION ONE HALF */,
427 0x017C /* 0xBF LATIN SMALL LETTER Z WITH DOT ABOVE */,
428 0x00C0 /* 0xC0 LATIN CAPITAL LETTER A WITH GRAVE */,
429 0x00C1 /* 0xC1 LATIN CAPITAL LETTER A WITH ACUTE */,
430 0x00C2 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
432 0x00C4 /* 0xC4 LATIN CAPITAL LETTER A WITH DIAERESIS */,
433 0x010A /* 0xC5 LATIN CAPITAL LETTER C WITH DOT ABOVE */,
434 0x0108 /* 0xC6 LATIN CAPITAL LETTER C WITH CIRCUMFLEX */,
435 0x00C7 /* 0xC7 LATIN CAPITAL LETTER C WITH CEDILLA */,
436 0x00C8 /* 0xC8 LATIN CAPITAL LETTER E WITH GRAVE */,
437 0x00C9 /* 0xC9 LATIN CAPITAL LETTER E WITH ACUTE */,
438 0x00CA /* 0xCA LATIN CAPITAL LETTER E WITH CIRCUMFLEX */,
439 0x00CB /* 0xCB LATIN CAPITAL LETTER E WITH DIAERESIS */,
440 0x00CC /* 0xCC LATIN CAPITAL LETTER I WITH GRAVE */,
441 0x00CD /* 0xCD LATIN CAPITAL LETTER I WITH ACUTE */,
442 0x00CE /* 0xCE LATIN CAPITAL LETTER I WITH CIRCUMFLEX */,
443 0x00CF /* 0xCF LATIN CAPITAL LETTER I WITH DIAERESIS */,
445 0x00D1 /* 0xD1 LATIN CAPITAL LETTER N WITH TILDE */,
446 0x00D2 /* 0xD2 LATIN CAPITAL LETTER O WITH GRAVE */,
447 0x00D3 /* 0xD3 LATIN CAPITAL LETTER O WITH ACUTE */,
448 0x00D4 /* 0xD4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
449 0x0120 /* 0xD5 LATIN CAPITAL LETTER G WITH DOT ABOVE */,
450 0x00D6 /* 0xD6 LATIN CAPITAL LETTER O WITH DIAERESIS */,
451 0x00D7 /* 0xD7 MULTIPLICATION SIGN */,
452 0x011C /* 0xD8 LATIN CAPITAL LETTER G WITH CIRCUMFLEX */,
453 0x00D9 /* 0xD9 LATIN CAPITAL LETTER U WITH GRAVE */,
454 0x00DA /* 0xDA LATIN CAPITAL LETTER U WITH ACUTE */,
455 0x00DB /* 0xDB LATIN CAPITAL LETTER U WITH CIRCUMFLEX */,
456 0x00DC /* 0xDC LATIN CAPITAL LETTER U WITH DIAERESIS */,
457 0x016C /* 0xDD LATIN CAPITAL LETTER U WITH BREVE */,
458 0x015C /* 0xDE LATIN CAPITAL LETTER S WITH CIRCUMFLEX */,
459 0x00DF /* 0xDF LATIN SMALL LETTER SHARP S */,
460 0x00E0 /* 0xE0 LATIN SMALL LETTER A WITH GRAVE */,
461 0x00E1 /* 0xE1 LATIN SMALL LETTER A WITH ACUTE */,
462 0x00E2 /* 0xE2 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
464 0x00E4 /* 0xE4 LATIN SMALL LETTER A WITH DIAERESIS */,
465 0x010B /* 0xE5 LATIN SMALL LETTER C WITH DOT ABOVE */,
466 0x0109 /* 0xE6 LATIN SMALL LETTER C WITH CIRCUMFLEX */,
467 0x00E7 /* 0xE7 LATIN SMALL LETTER C WITH CEDILLA */,
468 0x00E8 /* 0xE8 LATIN SMALL LETTER E WITH GRAVE */,
469 0x00E9 /* 0xE9 LATIN SMALL LETTER E WITH ACUTE */,
470 0x00EA /* 0xEA LATIN SMALL LETTER E WITH CIRCUMFLEX */,
471 0x00EB /* 0xEB LATIN SMALL LETTER E WITH DIAERESIS */,
472 0x00EC /* 0xEC LATIN SMALL LETTER I WITH GRAVE */,
473 0x00ED /* 0xED LATIN SMALL LETTER I WITH ACUTE */,
474 0x00EE /* 0xEE LATIN SMALL LETTER I WITH CIRCUMFLEX */,
475 0x00EF /* 0xEF LATIN SMALL LETTER I WITH DIAERESIS */,
477 0x00F1 /* 0xF1 LATIN SMALL LETTER N WITH TILDE */,
478 0x00F2 /* 0xF2 LATIN SMALL LETTER O WITH GRAVE */,
479 0x00F3 /* 0xF3 LATIN SMALL LETTER O WITH ACUTE */,
480 0x00F4 /* 0xF4 LATIN SMALL LETTER O WITH CIRCUMFLEX */,
481 0x0121 /* 0xF5 LATIN SMALL LETTER G WITH DOT ABOVE */,
482 0x00F6 /* 0xF6 LATIN SMALL LETTER O WITH DIAERESIS */,
483 0x00F7 /* 0xF7 DIVISION SIGN */,
484 0x011D /* 0xF8 LATIN SMALL LETTER G WITH CIRCUMFLEX */,
485 0x00F9 /* 0xF9 LATIN SMALL LETTER U WITH GRAVE */,
486 0x00FA /* 0xFA LATIN SMALL LETTER U WITH ACUTE */,
487 0x00FB /* 0xFB LATIN SMALL LETTER U WITH CIRCUMFLEX */,
488 0x00FC /* 0xFC LATIN SMALL LETTER U WITH DIAERESIS */,
489 0x016D /* 0xFD LATIN SMALL LETTER U WITH BREVE */,
490 0x015D /* 0xFE LATIN SMALL LETTER S WITH CIRCUMFLEX */,
491 0x02D9 /* 0xFF DOT ABOVE */
494 Emchar latin_iso8859_4_to_ucs[96] =
496 0x00A0 /* 0xA0 NO-BREAK SPACE */,
497 0x0104 /* 0xA1 LATIN CAPITAL LETTER A WITH OGONEK */,
498 0x0138 /* 0xA2 LATIN SMALL LETTER KRA */,
499 0x0156 /* 0xA3 LATIN CAPITAL LETTER R WITH CEDILLA */,
500 0x00A4 /* 0xA4 CURRENCY SIGN */,
501 0x0128 /* 0xA5 LATIN CAPITAL LETTER I WITH TILDE */,
502 0x013B /* 0xA6 LATIN CAPITAL LETTER L WITH CEDILLA */,
503 0x00A7 /* 0xA7 SECTION SIGN */,
504 0x00A8 /* 0xA8 DIAERESIS */,
505 0x0160 /* 0xA9 LATIN CAPITAL LETTER S WITH CARON */,
506 0x0112 /* 0xAA LATIN CAPITAL LETTER E WITH MACRON */,
507 0x0122 /* 0xAB LATIN CAPITAL LETTER G WITH CEDILLA */,
508 0x0166 /* 0xAC LATIN CAPITAL LETTER T WITH STROKE */,
509 0x00AD /* 0xAD SOFT HYPHEN */,
510 0x017D /* 0xAE LATIN CAPITAL LETTER Z WITH CARON */,
511 0x00AF /* 0xAF MACRON */,
512 0x00B0 /* 0xB0 DEGREE SIGN */,
513 0x0105 /* 0xB1 LATIN SMALL LETTER A WITH OGONEK */,
514 0x02DB /* 0xB2 OGONEK */,
515 0x0157 /* 0xB3 LATIN SMALL LETTER R WITH CEDILLA */,
516 0x00B4 /* 0xB4 ACUTE ACCENT */,
517 0x0129 /* 0xB5 LATIN SMALL LETTER I WITH TILDE */,
518 0x013C /* 0xB6 LATIN SMALL LETTER L WITH CEDILLA */,
519 0x02C7 /* 0xB7 CARON */,
520 0x00B8 /* 0xB8 CEDILLA */,
521 0x0161 /* 0xB9 LATIN SMALL LETTER S WITH CARON */,
522 0x0113 /* 0xBA LATIN SMALL LETTER E WITH MACRON */,
523 0x0123 /* 0xBB LATIN SMALL LETTER G WITH CEDILLA */,
524 0x0167 /* 0xBC LATIN SMALL LETTER T WITH STROKE */,
525 0x014A /* 0xBD LATIN CAPITAL LETTER ENG */,
526 0x017E /* 0xBE LATIN SMALL LETTER Z WITH CARON */,
527 0x014B /* 0xBF LATIN SMALL LETTER ENG */,
528 0x0100 /* 0xC0 LATIN CAPITAL LETTER A WITH MACRON */,
529 0x00C1 /* 0xC1 LATIN CAPITAL LETTER A WITH ACUTE */,
530 0x00C2 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
531 0x00C3 /* 0xC3 LATIN CAPITAL LETTER A WITH TILDE */,
532 0x00C4 /* 0xC4 LATIN CAPITAL LETTER A WITH DIAERESIS */,
533 0x00C5 /* 0xC5 LATIN CAPITAL LETTER A WITH RING ABOVE */,
534 0x00C6 /* 0xC6 LATIN CAPITAL LETTER AE */,
535 0x012E /* 0xC7 LATIN CAPITAL LETTER I WITH OGONEK */,
536 0x010C /* 0xC8 LATIN CAPITAL LETTER C WITH CARON */,
537 0x00C9 /* 0xC9 LATIN CAPITAL LETTER E WITH ACUTE */,
538 0x0118 /* 0xCA LATIN CAPITAL LETTER E WITH OGONEK */,
539 0x00CB /* 0xCB LATIN CAPITAL LETTER E WITH DIAERESIS */,
540 0x0116 /* 0xCC LATIN CAPITAL LETTER E WITH DOT ABOVE */,
541 0x00CD /* 0xCD LATIN CAPITAL LETTER I WITH ACUTE */,
542 0x00CE /* 0xCE LATIN CAPITAL LETTER I WITH CIRCUMFLEX */,
543 0x012A /* 0xCF LATIN CAPITAL LETTER I WITH MACRON */,
544 0x0110 /* 0xD0 LATIN CAPITAL LETTER D WITH STROKE */,
545 0x0145 /* 0xD1 LATIN CAPITAL LETTER N WITH CEDILLA */,
546 0x014C /* 0xD2 LATIN CAPITAL LETTER O WITH MACRON */,
547 0x0136 /* 0xD3 LATIN CAPITAL LETTER K WITH CEDILLA */,
548 0x00D4 /* 0xD4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
549 0x00D5 /* 0xD5 LATIN CAPITAL LETTER O WITH TILDE */,
550 0x00D6 /* 0xD6 LATIN CAPITAL LETTER O WITH DIAERESIS */,
551 0x00D7 /* 0xD7 MULTIPLICATION SIGN */,
552 0x00D8 /* 0xD8 LATIN CAPITAL LETTER O WITH STROKE */,
553 0x0172 /* 0xD9 LATIN CAPITAL LETTER U WITH OGONEK */,
554 0x00DA /* 0xDA LATIN CAPITAL LETTER U WITH ACUTE */,
555 0x00DB /* 0xDB LATIN CAPITAL LETTER U WITH CIRCUMFLEX */,
556 0x00DC /* 0xDC LATIN CAPITAL LETTER U WITH DIAERESIS */,
557 0x0168 /* 0xDD LATIN CAPITAL LETTER U WITH TILDE */,
558 0x016A /* 0xDE LATIN CAPITAL LETTER U WITH MACRON */,
559 0x00DF /* 0xDF LATIN SMALL LETTER SHARP S */,
560 0x0101 /* 0xE0 LATIN SMALL LETTER A WITH MACRON */,
561 0x00E1 /* 0xE1 LATIN SMALL LETTER A WITH ACUTE */,
562 0x00E2 /* 0xE2 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
563 0x00E3 /* 0xE3 LATIN SMALL LETTER A WITH TILDE */,
564 0x00E4 /* 0xE4 LATIN SMALL LETTER A WITH DIAERESIS */,
565 0x00E5 /* 0xE5 LATIN SMALL LETTER A WITH RING ABOVE */,
566 0x00E6 /* 0xE6 LATIN SMALL LETTER AE */,
567 0x012F /* 0xE7 LATIN SMALL LETTER I WITH OGONEK */,
568 0x010D /* 0xE8 LATIN SMALL LETTER C WITH CARON */,
569 0x00E9 /* 0xE9 LATIN SMALL LETTER E WITH ACUTE */,
570 0x0119 /* 0xEA LATIN SMALL LETTER E WITH OGONEK */,
571 0x00EB /* 0xEB LATIN SMALL LETTER E WITH DIAERESIS */,
572 0x0117 /* 0xEC LATIN SMALL LETTER E WITH DOT ABOVE */,
573 0x00ED /* 0xED LATIN SMALL LETTER I WITH ACUTE */,
574 0x00EE /* 0xEE LATIN SMALL LETTER I WITH CIRCUMFLEX */,
575 0x012B /* 0xEF LATIN SMALL LETTER I WITH MACRON */,
576 0x0111 /* 0xF0 LATIN SMALL LETTER D WITH STROKE */,
577 0x0146 /* 0xF1 LATIN SMALL LETTER N WITH CEDILLA */,
578 0x014D /* 0xF2 LATIN SMALL LETTER O WITH MACRON */,
579 0x0137 /* 0xF3 LATIN SMALL LETTER K WITH CEDILLA */,
580 0x00F4 /* 0xF4 LATIN SMALL LETTER O WITH CIRCUMFLEX */,
581 0x00F5 /* 0xF5 LATIN SMALL LETTER O WITH TILDE */,
582 0x00F6 /* 0xF6 LATIN SMALL LETTER O WITH DIAERESIS */,
583 0x00F7 /* 0xF7 DIVISION SIGN */,
584 0x00F8 /* 0xF8 LATIN SMALL LETTER O WITH STROKE */,
585 0x0173 /* 0xF9 LATIN SMALL LETTER U WITH OGONEK */,
586 0x00FA /* 0xFA LATIN SMALL LETTER U WITH ACUTE */,
587 0x00FB /* 0xFB LATIN SMALL LETTER U WITH CIRCUMFLEX */,
588 0x00FC /* 0xFC LATIN SMALL LETTER U WITH DIAERESIS */,
589 0x0169 /* 0xFD LATIN SMALL LETTER U WITH TILDE */,
590 0x016B /* 0xFE LATIN SMALL LETTER U WITH MACRON */,
591 0x02D9 /* 0xFF DOT ABOVE */
594 Emchar latin_iso8859_9_to_ucs[96] =
596 0x00A0 /* 0xA0 NO-BREAK SPACE */,
597 0x00A1 /* 0xA1 INVERTED EXCLAMATION MARK */,
598 0x00A2 /* 0xA2 CENT SIGN */,
599 0x00A3 /* 0xA3 POUND SIGN */,
600 0x00A4 /* 0xA4 CURRENCY SIGN */,
601 0x00A5 /* 0xA5 YEN SIGN */,
602 0x00A6 /* 0xA6 BROKEN BAR */,
603 0x00A7 /* 0xA7 SECTION SIGN */,
604 0x00A8 /* 0xA8 DIAERESIS */,
605 0x00A9 /* 0xA9 COPYRIGHT SIGN */,
606 0x00AA /* 0xAA FEMININE ORDINAL INDICATOR */,
607 0x00AB /* 0xAB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK */,
608 0x00AC /* 0xAC NOT SIGN */,
609 0x00AD /* 0xAD SOFT HYPHEN */,
610 0x00AE /* 0xAE REGISTERED SIGN */,
611 0x00AF /* 0xAF MACRON */,
612 0x00B0 /* 0xB0 DEGREE SIGN */,
613 0x00B1 /* 0xB1 PLUS-MINUS SIGN */,
614 0x00B2 /* 0xB2 SUPERSCRIPT TWO */,
615 0x00B3 /* 0xB3 SUPERSCRIPT THREE */,
616 0x00B4 /* 0xB4 ACUTE ACCENT */,
617 0x00B5 /* 0xB5 MICRO SIGN */,
618 0x00B6 /* 0xB6 PILCROW SIGN */,
619 0x00B7 /* 0xB7 MIDDLE DOT */,
620 0x00B8 /* 0xB8 CEDILLA */,
621 0x00B9 /* 0xB9 SUPERSCRIPT ONE */,
622 0x00BA /* 0xBA MASCULINE ORDINAL INDICATOR */,
623 0x00BB /* 0xBB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK */,
624 0x00BC /* 0xBC VULGAR FRACTION ONE QUARTER */,
625 0x00BD /* 0xBD VULGAR FRACTION ONE HALF */,
626 0x00BE /* 0xBE VULGAR FRACTION THREE QUARTERS */,
627 0x00BF /* 0xBF INVERTED QUESTION MARK */,
628 0x00C0 /* 0xC0 LATIN CAPITAL LETTER A WITH GRAVE */,
629 0x00C1 /* 0xC1 LATIN CAPITAL LETTER A WITH ACUTE */,
630 0x00C2 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
631 0x00C3 /* 0xC3 LATIN CAPITAL LETTER A WITH TILDE */,
632 0x00C4 /* 0xC4 LATIN CAPITAL LETTER A WITH DIAERESIS */,
633 0x00C5 /* 0xC5 LATIN CAPITAL LETTER A WITH RING ABOVE */,
634 0x00C6 /* 0xC6 LATIN CAPITAL LETTER AE */,
635 0x00C7 /* 0xC7 LATIN CAPITAL LETTER C WITH CEDILLA */,
636 0x00C8 /* 0xC8 LATIN CAPITAL LETTER E WITH GRAVE */,
637 0x00C9 /* 0xC9 LATIN CAPITAL LETTER E WITH ACUTE */,
638 0x00CA /* 0xCA LATIN CAPITAL LETTER E WITH CIRCUMFLEX */,
639 0x00CB /* 0xCB LATIN CAPITAL LETTER E WITH DIAERESIS */,
640 0x00CC /* 0xCC LATIN CAPITAL LETTER I WITH GRAVE */,
641 0x00CD /* 0xCD LATIN CAPITAL LETTER I WITH ACUTE */,
642 0x00CE /* 0xCE LATIN CAPITAL LETTER I WITH CIRCUMFLEX */,
643 0x00CF /* 0xCF LATIN CAPITAL LETTER I WITH DIAERESIS */,
644 0x011E /* 0xD0 LATIN CAPITAL LETTER G WITH BREVE */,
645 0x00D1 /* 0xD1 LATIN CAPITAL LETTER N WITH TILDE */,
646 0x00D2 /* 0xD2 LATIN CAPITAL LETTER O WITH GRAVE */,
647 0x00D3 /* 0xD3 LATIN CAPITAL LETTER O WITH ACUTE */,
648 0x00D4 /* 0xD4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
649 0x00D5 /* 0xD5 LATIN CAPITAL LETTER O WITH TILDE */,
650 0x00D6 /* 0xD6 LATIN CAPITAL LETTER O WITH DIAERESIS */,
651 0x00D7 /* 0xD7 MULTIPLICATION SIGN */,
652 0x00D8 /* 0xD8 LATIN CAPITAL LETTER O WITH STROKE */,
653 0x00D9 /* 0xD9 LATIN CAPITAL LETTER U WITH GRAVE */,
654 0x00DA /* 0xDA LATIN CAPITAL LETTER U WITH ACUTE */,
655 0x00DB /* 0xDB LATIN CAPITAL LETTER U WITH CIRCUMFLEX */,
656 0x00DC /* 0xDC LATIN CAPITAL LETTER U WITH DIAERESIS */,
657 0x0130 /* 0xDD LATIN CAPITAL LETTER I WITH DOT ABOVE */,
658 0x015E /* 0xDE LATIN CAPITAL LETTER S WITH CEDILLA */,
659 0x00DF /* 0xDF LATIN SMALL LETTER SHARP S */,
660 0x00E0 /* 0xE0 LATIN SMALL LETTER A WITH GRAVE */,
661 0x00E1 /* 0xE1 LATIN SMALL LETTER A WITH ACUTE */,
662 0x00E2 /* 0xE2 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
663 0x00E3 /* 0xE3 LATIN SMALL LETTER A WITH TILDE */,
664 0x00E4 /* 0xE4 LATIN SMALL LETTER A WITH DIAERESIS */,
665 0x00E5 /* 0xE5 LATIN SMALL LETTER A WITH RING ABOVE */,
666 0x00E6 /* 0xE6 LATIN SMALL LETTER AE */,
667 0x00E7 /* 0xE7 LATIN SMALL LETTER C WITH CEDILLA */,
668 0x00E8 /* 0xE8 LATIN SMALL LETTER E WITH GRAVE */,
669 0x00E9 /* 0xE9 LATIN SMALL LETTER E WITH ACUTE */,
670 0x00EA /* 0xEA LATIN SMALL LETTER E WITH CIRCUMFLEX */,
671 0x00EB /* 0xEB LATIN SMALL LETTER E WITH DIAERESIS */,
672 0x00EC /* 0xEC LATIN SMALL LETTER I WITH GRAVE */,
673 0x00ED /* 0xED LATIN SMALL LETTER I WITH ACUTE */,
674 0x00EE /* 0xEE LATIN SMALL LETTER I WITH CIRCUMFLEX */,
675 0x00EF /* 0xEF LATIN SMALL LETTER I WITH DIAERESIS */,
676 0x011F /* 0xF0 LATIN SMALL LETTER G WITH BREVE */,
677 0x00F1 /* 0xF1 LATIN SMALL LETTER N WITH TILDE */,
678 0x00F2 /* 0xF2 LATIN SMALL LETTER O WITH GRAVE */,
679 0x00F3 /* 0xF3 LATIN SMALL LETTER O WITH ACUTE */,
680 0x00F4 /* 0xF4 LATIN SMALL LETTER O WITH CIRCUMFLEX */,
681 0x00F5 /* 0xF5 LATIN SMALL LETTER O WITH TILDE */,
682 0x00F6 /* 0xF6 LATIN SMALL LETTER O WITH DIAERESIS */,
683 0x00F7 /* 0xF7 DIVISION SIGN */,
684 0x00F8 /* 0xF8 LATIN SMALL LETTER O WITH STROKE */,
685 0x00F9 /* 0xF9 LATIN SMALL LETTER U WITH GRAVE */,
686 0x00FA /* 0xFA LATIN SMALL LETTER U WITH ACUTE */,
687 0x00FB /* 0xFB LATIN SMALL LETTER U WITH CIRCUMFLEX */,
688 0x00FC /* 0xFC LATIN SMALL LETTER U WITH DIAERESIS */,
689 0x0131 /* 0xFD LATIN SMALL LETTER DOTLESS I */,
690 0x015F /* 0xFE LATIN SMALL LETTER S WITH CEDILLA */,
691 0x00FF /* 0xFF LATIN SMALL LETTER Y WITH DIAERESIS */,
694 Emchar latin_viscii_lower_to_ucs[96] =
794 Emchar latin_viscii_upper_to_ucs[96] =
894 Emchar latin_tcvn5712_to_ucs[96] =
896 0x00A0 /* 0xA0 NO-BREAK SPACE */,
897 0x0102 /* 0xA1 LATIN CAPITAL LETTER A WITH BREVE */,
898 0x00C2 /* 0xA2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
899 0x00CA /* 0xA3 LATIN CAPITAL LETTER E WITH CIRCUMFLEX */,
900 0x00D4 /* 0xA4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
901 0x01A0 /* 0xA5 LATIN CAPITAL LETTER O WITH HORN */,
902 0x01AF /* 0xA6 LATIN CAPITAL LETTER U WITH HORN */,
903 0x0110 /* 0xA7 LATIN CAPITAL LETTER D WITH STROKE */,
904 0x0103 /* 0xA8 LATIN SMALL LETTER A WITH BREVE */,
905 0x00E2 /* 0xA9 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
906 0x00EA /* 0xAA LATIN SMALL LETTER E WITH CIRCUMFLEX */,
907 0x00F4 /* 0xAB LATIN SMALL LETTER O WITH CIRCUMFLEX */,
908 0x01A1 /* 0xAC LATIN SMALL LETTER O WITH HORN */,
909 0x01B0 /* 0xAD LATIN SMALL LETTER U WITH HORN */,
910 0x0111 /* 0xAE LATIN SMALL LETTER D WITH STROKE */,
911 0x1EB0 /* 0xAF LATIN CAPITAL LETTER A WITH BREVE AND GRAVE */,
912 0x0300 /* 0xB0 COMBINING GRAVE ACCENT */,
913 0x0309 /* 0xB1 COMBINING HOOK ABOVE */,
914 0x0303 /* 0xB2 COMBINING TILDE */,
915 0x0301 /* 0xB3 COMBINING ACUTE ACCENT */,
916 0x0323 /* 0xB4 COMBINING DOT BELOW */,
917 0x00E0 /* 0xB5 LATIN SMALL LETTER A WITH GRAVE */,
918 0x1EA3 /* 0xB6 LATIN SMALL LETTER A WITH HOOK ABOVE */,
919 0x00E3 /* 0xB7 LATIN SMALL LETTER A WITH TILDE */,
920 0x00E1 /* 0xB8 LATIN SMALL LETTER A WITH ACUTE */,
921 0x1EA1 /* 0xB9 LATIN SMALL LETTER A WITH DOT BELOW */,
922 0x1EB2 /* 0xBA LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE */,
923 0x1EB1 /* 0xBB LATIN SMALL LETTER A WITH BREVE AND GRAVE */,
924 0x1EB3 /* 0xBC LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE */,
925 0x1EB5 /* 0xBD LATIN SMALL LETTER A WITH BREVE AND TILDE */,
926 0x1EAF /* 0xBE LATIN SMALL LETTER A WITH BREVE AND ACUTE */,
927 0x1EB4 /* 0xBF LATIN CAPITAL LETTER A WITH BREVE AND TILDE */,
928 0x1EAE /* 0xC0 LATIN CAPITAL LETTER A WITH BREVE AND ACUTE */,
929 0x1EA6 /* 0xC1 LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE */,
930 0x1EA8 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE */,
931 0x1EAA /* 0xC3 LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE */,
932 0x1EA4 /* 0xC4 LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE */,
933 0x1EC0 /* 0xC5 LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE */,
934 0x1EB7 /* 0xC6 LATIN SMALL LETTER A WITH BREVE AND DOT BELOW */,
935 0x1EA7 /* 0xC7 LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE */,
936 0x1EA9 /* 0xC8 LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE */,
937 0x1EAB /* 0xC9 LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE */,
938 0x1EA5 /* 0xCA LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE */,
939 0x1EAD /* 0xCB LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW */,
940 0x00E8 /* 0xCC LATIN SMALL LETTER E WITH GRAVE */,
941 0x1EC2 /* 0xCD LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE */,
942 0x1EBB /* 0xCE LATIN SMALL LETTER E WITH HOOK ABOVE */,
943 0x1EBD /* 0xCF LATIN SMALL LETTER E WITH TILDE */,
944 0x00E9 /* 0xD0 LATIN SMALL LETTER E WITH ACUTE */,
945 0x1EB9 /* 0xD1 LATIN SMALL LETTER E WITH DOT BELOW */,
946 0x1EC1 /* 0xD2 LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE */,
947 0x1EC3 /* 0xD3 LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE */,
948 0x1EC5 /* 0xD4 LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE */,
949 0x1EBF /* 0xD5 LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE */,
950 0x1EC7 /* 0xD6 LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW */,
951 0x00EC /* 0xD7 LATIN SMALL LETTER I WITH GRAVE */,
952 0x1EC9 /* 0xD8 LATIN SMALL LETTER I WITH HOOK ABOVE */,
953 0x1EC4 /* 0xD9 LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE */,
954 0x1EBE /* 0xDA LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE */,
955 0x1ED2 /* 0xDB LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE */,
956 0x0129 /* 0xDC LATIN SMALL LETTER I WITH TILDE */,
957 0x00ED /* 0xDD LATIN SMALL LETTER I WITH ACUTE */,
958 0x1ECB /* 0xDE LATIN SMALL LETTER I WITH DOT BELOW */,
959 0x00F2 /* 0xDF LATIN SMALL LETTER O WITH GRAVE */,
960 0x1ED4 /* 0xE0 LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE */,
961 0x1ECF /* 0xE1 LATIN SMALL LETTER O WITH HOOK ABOVE */,
962 0x00F5 /* 0xE2 LATIN SMALL LETTER O WITH TILDE */,
963 0x00F3 /* 0xE3 LATIN SMALL LETTER O WITH ACUTE */,
964 0x1ECD /* 0xE4 LATIN SMALL LETTER O WITH DOT BELOW */,
965 0x1ED3 /* 0xE5 LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE */,
966 0x1ED5 /* 0xE6 LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE */,
967 0x1ED7 /* 0xE7 LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE */,
968 0x1ED1 /* 0xE8 LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE */,
969 0x1ED9 /* 0xE9 LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW */,
970 0x1EDD /* 0xEA LATIN SMALL LETTER O WITH HORN AND GRAVE */,
971 0x1EDF /* 0xEB LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE */,
972 0x1EE1 /* 0xEC LATIN SMALL LETTER O WITH HORN AND TILDE */,
973 0x1EDB /* 0xED LATIN SMALL LETTER O WITH HORN AND ACUTE */,
974 0x1EE3 /* 0xEE LATIN SMALL LETTER O WITH HORN AND DOT BELOW */,
975 0x00F9 /* 0xEF LATIN SMALL LETTER U WITH GRAVE */,
976 0x1ED6 /* 0xF0 LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE */,
977 0x1EE7 /* 0xF1 LATIN SMALL LETTER U WITH HOOK ABOVE */,
978 0x0169 /* 0xF2 LATIN SMALL LETTER U WITH TILDE */,
979 0x00FA /* 0xF3 LATIN SMALL LETTER U WITH ACUTE */,
980 0x1EE5 /* 0xF4 LATIN SMALL LETTER U WITH DOT BELOW */,
981 0x1EEB /* 0xF5 LATIN SMALL LETTER U WITH HORN AND GRAVE */,
982 0x1EED /* 0xF6 LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE */,
983 0x1EEF /* 0xF7 LATIN SMALL LETTER U WITH HORN AND TILDE */,
984 0x1EE9 /* 0xF8 LATIN SMALL LETTER U WITH HORN AND ACUTE */,
985 0x1EF1 /* 0xF9 LATIN SMALL LETTER U WITH HORN AND DOT BELOW */,
986 0x1EF3 /* 0xFA LATIN SMALL LETTER Y WITH GRAVE */,
987 0x1EF7 /* 0xFB LATIN SMALL LETTER Y WITH HOOK ABOVE */,
988 0x1EF9 /* 0xFC LATIN SMALL LETTER Y WITH TILDE */,
989 0x00FD /* 0xFD LATIN SMALL LETTER Y WITH ACUTE */,
990 0x1EF5 /* 0xFE LATIN SMALL LETTER Y WITH DOT BELOW */,
991 0x1ED0 /* 0xFF LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE */
994 Lisp_Object Vutf_2000_version;
998 int leading_code_private_11;
1001 Lisp_Object Qcharsetp;
1003 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1004 Lisp_Object Qregistry, Qfinal, Qgraphic;
1005 Lisp_Object Qdirection;
1006 Lisp_Object Qreverse_direction_charset;
1007 Lisp_Object Qleading_byte;
1008 Lisp_Object Qshort_name, Qlong_name;
1022 Qcyrillic_iso8859_5,
1024 Qjapanese_jisx0208_1978,
1029 Qchinese_cns11643_1,
1030 Qchinese_cns11643_2,
1033 Qlatin_viscii_lower,
1034 Qlatin_viscii_upper,
1040 Lisp_Object Ql2r, Qr2l;
1042 Lisp_Object Vcharset_hash_table;
1044 static Charset_ID next_allocated_1_byte_leading_byte;
1045 static Charset_ID next_allocated_2_byte_leading_byte;
1047 /* Composite characters are characters constructed by overstriking two
1048 or more regular characters.
1050 1) The old Mule implementation involves storing composite characters
1051 in a buffer as a tag followed by all of the actual characters
1052 used to make up the composite character. I think this is a bad
1053 idea; it greatly complicates code that wants to handle strings
1054 one character at a time because it has to deal with the possibility
1055 of great big ungainly characters. It's much more reasonable to
1056 simply store an index into a table of composite characters.
1058 2) The current implementation only allows for 16,384 separate
1059 composite characters over the lifetime of the XEmacs process.
1060 This could become a potential problem if the user
1061 edited lots of different files that use composite characters.
1062 Due to FSF bogosity, increasing the number of allowable
1063 composite characters under Mule would decrease the number
1064 of possible faces that can exist. Mule already has shrunk
1065 this to 2048, and further shrinkage would become uncomfortable.
1066 No such problems exist in XEmacs.
1068 Composite characters could be represented as 0x80 C1 C2 C3,
1069 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1070 for slightly under 2^20 (one million) composite characters
1071 over the XEmacs process lifetime, and you only need to
1072 increase the size of a Mule character from 19 to 21 bits.
1073 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1074 85 million (slightly over 2^26) composite characters. */
1077 /************************************************************************/
1078 /* Basic Emchar functions */
1079 /************************************************************************/
1081 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1082 string in STR. Returns the number of bytes stored.
1083 Do not call this directly. Use the macro set_charptr_emchar() instead.
1087 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1093 Lisp_Object charset;
1102 else if ( c <= 0x7ff )
1104 *p++ = (c >> 6) | 0xc0;
1105 *p++ = (c & 0x3f) | 0x80;
1107 else if ( c <= 0xffff )
1109 *p++ = (c >> 12) | 0xe0;
1110 *p++ = ((c >> 6) & 0x3f) | 0x80;
1111 *p++ = (c & 0x3f) | 0x80;
1113 else if ( c <= 0x1fffff )
1115 *p++ = (c >> 18) | 0xf0;
1116 *p++ = ((c >> 12) & 0x3f) | 0x80;
1117 *p++ = ((c >> 6) & 0x3f) | 0x80;
1118 *p++ = (c & 0x3f) | 0x80;
1120 else if ( c <= 0x3ffffff )
1122 *p++ = (c >> 24) | 0xf8;
1123 *p++ = ((c >> 18) & 0x3f) | 0x80;
1124 *p++ = ((c >> 12) & 0x3f) | 0x80;
1125 *p++ = ((c >> 6) & 0x3f) | 0x80;
1126 *p++ = (c & 0x3f) | 0x80;
1130 *p++ = (c >> 30) | 0xfc;
1131 *p++ = ((c >> 24) & 0x3f) | 0x80;
1132 *p++ = ((c >> 18) & 0x3f) | 0x80;
1133 *p++ = ((c >> 12) & 0x3f) | 0x80;
1134 *p++ = ((c >> 6) & 0x3f) | 0x80;
1135 *p++ = (c & 0x3f) | 0x80;
1138 BREAKUP_CHAR (c, charset, c1, c2);
1139 lb = CHAR_LEADING_BYTE (c);
1140 if (LEADING_BYTE_PRIVATE_P (lb))
1141 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1143 if (EQ (charset, Vcharset_control_1))
1152 /* Return the first character from a Mule-encoded string in STR,
1153 assuming it's non-ASCII. Do not call this directly.
1154 Use the macro charptr_emchar() instead. */
1157 non_ascii_charptr_emchar (CONST Bufbyte *str)
1170 else if ( b >= 0xf8 )
1175 else if ( b >= 0xf0 )
1180 else if ( b >= 0xe0 )
1185 else if ( b >= 0xc0 )
1195 for( ; len > 0; len-- )
1198 ch = ( ch << 6 ) | ( b & 0x3f );
1202 Bufbyte i0 = *str, i1, i2 = 0;
1203 Lisp_Object charset;
1205 if (i0 == LEADING_BYTE_CONTROL_1)
1206 return (Emchar) (*++str - 0x20);
1208 if (LEADING_BYTE_PREFIX_P (i0))
1213 charset = CHARSET_BY_LEADING_BYTE (i0);
1214 if (XCHARSET_DIMENSION (charset) == 2)
1217 return MAKE_CHAR (charset, i1, i2);
1221 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1222 Do not call this directly. Use the macro valid_char_p() instead. */
1226 non_ascii_valid_char_p (Emchar ch)
1230 /* Must have only lowest 19 bits set */
1234 f1 = CHAR_FIELD1 (ch);
1235 f2 = CHAR_FIELD2 (ch);
1236 f3 = CHAR_FIELD3 (ch);
1240 Lisp_Object charset;
1242 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1243 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1244 f2 > MAX_CHAR_FIELD2_PRIVATE)
1249 if (f3 != 0x20 && f3 != 0x7F)
1253 NOTE: This takes advantage of the fact that
1254 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1255 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1257 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1258 return (XCHARSET_CHARS (charset) == 96);
1262 Lisp_Object charset;
1264 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1265 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1266 f1 > MAX_CHAR_FIELD1_PRIVATE)
1268 if (f2 < 0x20 || f3 < 0x20)
1271 #ifdef ENABLE_COMPOSITE_CHARS
1272 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1274 if (UNBOUNDP (Fgethash (make_int (ch),
1275 Vcomposite_char_char2string_hash_table,
1280 #endif /* ENABLE_COMPOSITE_CHARS */
1282 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
1285 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1287 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1290 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1292 return (XCHARSET_CHARS (charset) == 96);
1298 /************************************************************************/
1299 /* Basic string functions */
1300 /************************************************************************/
1302 /* Copy the character pointed to by PTR into STR, assuming it's
1303 non-ASCII. Do not call this directly. Use the macro
1304 charptr_copy_char() instead. */
1307 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1309 Bufbyte *strptr = str;
1311 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1313 /* Notice fallthrough. */
1315 case 6: *++strptr = *ptr++;
1316 case 5: *++strptr = *ptr++;
1318 case 4: *++strptr = *ptr++;
1319 case 3: *++strptr = *ptr++;
1320 case 2: *++strptr = *ptr;
1325 return strptr + 1 - str;
1329 /************************************************************************/
1330 /* streams of Emchars */
1331 /************************************************************************/
1333 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1334 The functions below are not meant to be called directly; use
1335 the macros in insdel.h. */
1338 Lstream_get_emchar_1 (Lstream *stream, int ch)
1340 Bufbyte str[MAX_EMCHAR_LEN];
1341 Bufbyte *strptr = str;
1343 str[0] = (Bufbyte) ch;
1344 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1346 /* Notice fallthrough. */
1349 ch = Lstream_getc (stream);
1351 *++strptr = (Bufbyte) ch;
1353 ch = Lstream_getc (stream);
1355 *++strptr = (Bufbyte) ch;
1358 ch = Lstream_getc (stream);
1360 *++strptr = (Bufbyte) ch;
1362 ch = Lstream_getc (stream);
1364 *++strptr = (Bufbyte) ch;
1366 ch = Lstream_getc (stream);
1368 *++strptr = (Bufbyte) ch;
1373 return charptr_emchar (str);
1377 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1379 Bufbyte str[MAX_EMCHAR_LEN];
1380 Bytecount len = set_charptr_emchar (str, ch);
1381 return Lstream_write (stream, str, len);
1385 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1387 Bufbyte str[MAX_EMCHAR_LEN];
1388 Bytecount len = set_charptr_emchar (str, ch);
1389 Lstream_unread (stream, str, len);
1393 /************************************************************************/
1394 /* charset object */
1395 /************************************************************************/
1398 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
1400 struct Lisp_Charset *cs = XCHARSET (obj);
1402 markobj (cs->short_name);
1403 markobj (cs->long_name);
1404 markobj (cs->doc_string);
1405 markobj (cs->registry);
1406 markobj (cs->ccl_program);
1411 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1413 struct Lisp_Charset *cs = XCHARSET (obj);
1417 error ("printing unreadable object #<charset %s 0x%x>",
1418 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1421 write_c_string ("#<charset ", printcharfun);
1422 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1423 write_c_string (" ", printcharfun);
1424 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1425 write_c_string (" ", printcharfun);
1426 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1427 write_c_string (" ", printcharfun);
1428 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1429 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1430 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1431 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1432 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1434 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1435 CHARSET_COLUMNS (cs),
1436 CHARSET_GRAPHIC (cs),
1437 CHARSET_FINAL (cs));
1438 write_c_string (buf, printcharfun);
1439 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1440 sprintf (buf, " 0x%x>", cs->header.uid);
1441 write_c_string (buf, printcharfun);
1444 static const struct lrecord_description charset_description[] = {
1445 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1449 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1450 mark_charset, print_charset, 0, 0, 0,
1451 charset_description,
1452 struct Lisp_Charset);
1453 /* Make a new charset. */
1456 make_charset (Charset_ID id, Lisp_Object name,
1457 unsigned char type, unsigned char columns, unsigned char graphic,
1458 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1459 Lisp_Object long_name, Lisp_Object doc,
1461 Emchar* decoding_table,
1462 Emchar ucs_min, Emchar ucs_max, Emchar code_offset)
1465 struct Lisp_Charset *cs =
1466 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1467 XSETCHARSET (obj, cs);
1469 CHARSET_ID (cs) = id;
1470 CHARSET_NAME (cs) = name;
1471 CHARSET_SHORT_NAME (cs) = short_name;
1472 CHARSET_LONG_NAME (cs) = long_name;
1473 CHARSET_DIRECTION (cs) = direction;
1474 CHARSET_TYPE (cs) = type;
1475 CHARSET_COLUMNS (cs) = columns;
1476 CHARSET_GRAPHIC (cs) = graphic;
1477 CHARSET_FINAL (cs) = final;
1478 CHARSET_DOC_STRING (cs) = doc;
1479 CHARSET_REGISTRY (cs) = reg;
1480 CHARSET_CCL_PROGRAM (cs) = Qnil;
1481 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1483 CHARSET_DECODING_TABLE(cs) = decoding_table;
1484 CHARSET_UCS_MIN(cs) = ucs_min;
1485 CHARSET_UCS_MAX(cs) = ucs_max;
1486 CHARSET_CODE_OFFSET(cs) = code_offset;
1489 switch ( CHARSET_TYPE (cs) )
1491 case CHARSET_TYPE_94:
1492 CHARSET_DIMENSION (cs) = 1;
1493 CHARSET_CHARS (cs) = 94;
1495 if (decoding_table != NULL)
1498 CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table();
1499 for (i = 0; i < 94; i++)
1501 Emchar c = decoding_table[i];
1504 put_byte_from_character_table (c, i + 33,
1505 CHARSET_TO_BYTE1_TABLE(cs));
1509 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1510 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1513 case CHARSET_TYPE_96:
1514 CHARSET_DIMENSION (cs) = 1;
1515 CHARSET_CHARS (cs) = 96;
1517 if (decoding_table != NULL)
1520 CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table();
1521 for (i = 0; i < 96; i++)
1523 Emchar c = decoding_table[i];
1526 put_byte_from_character_table (c, i + 32,
1527 CHARSET_TO_BYTE1_TABLE(cs));
1531 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1532 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1535 case CHARSET_TYPE_94X94:
1536 CHARSET_DIMENSION (cs) = 2;
1537 CHARSET_CHARS (cs) = 94;
1539 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1540 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1543 case CHARSET_TYPE_96X96:
1544 CHARSET_DIMENSION (cs) = 2;
1545 CHARSET_CHARS (cs) = 96;
1547 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1548 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1552 case CHARSET_TYPE_128X128:
1553 CHARSET_DIMENSION (cs) = 2;
1554 CHARSET_CHARS (cs) = 128;
1556 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1557 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1560 case CHARSET_TYPE_256X256:
1561 CHARSET_DIMENSION (cs) = 2;
1562 CHARSET_CHARS (cs) = 256;
1564 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1565 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1572 if (id == LEADING_BYTE_ASCII)
1573 CHARSET_REP_BYTES (cs) = 1;
1575 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1577 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1582 /* some charsets do not have final characters. This includes
1583 ASCII, Control-1, Composite, and the two faux private
1586 assert (NILP (charset_by_attributes[type][final]));
1587 charset_by_attributes[type][final] = obj;
1589 assert (NILP (charset_by_attributes[type][final][direction]));
1590 charset_by_attributes[type][final][direction] = obj;
1594 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1595 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1598 /* official leading byte */
1599 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
1602 /* Some charsets are "faux" and don't have names or really exist at
1603 all except in the leading-byte table. */
1605 Fputhash (name, obj, Vcharset_hash_table);
1610 get_unallocated_leading_byte (int dimension)
1616 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1619 lb = next_allocated_1_byte_leading_byte++;
1623 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1626 lb = next_allocated_2_byte_leading_byte++;
1631 ("No more character sets free for this dimension",
1632 make_int (dimension));
1639 charset_get_byte1 (Lisp_Object charset, Emchar ch)
1641 Emchar_to_byte_table* table;
1644 if ((table = XCHARSET_TO_BYTE1_TABLE (charset)) != NULL)
1645 return get_byte_from_character_table (ch, table);
1646 else if ((CHARSET_UCS_MIN (XCHARSET (charset)) <= ch)
1647 && (ch <= CHARSET_UCS_MAX (XCHARSET (charset))))
1648 return ch - CHARSET_UCS_MIN (XCHARSET (charset))
1649 + CHARSET_CODE_OFFSET (XCHARSET (charset));
1650 else if (XCHARSET_DIMENSION (charset) == 1)
1652 if (XCHARSET_CHARS (charset) == 94)
1654 if (((d = ch - (MIN_CHAR_94
1655 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1659 else if (XCHARSET_CHARS (charset) == 96)
1661 if (((d = ch - (MIN_CHAR_96
1662 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1669 else if (XCHARSET_DIMENSION (charset) == 2)
1671 if (XCHARSET_CHARS (charset) == 94)
1673 if (((d = ch - (MIN_CHAR_94x94
1674 + (XCHARSET_FINAL (charset) - '0') * 94 * 94)) >= 0)
1676 return (d / 94) + 33;
1678 else if (XCHARSET_CHARS (charset) == 96)
1680 if (((d = ch - (MIN_CHAR_96x96
1681 + (XCHARSET_FINAL (charset) - '0') * 96 * 96)) >= 0)
1683 return (d / 96) + 32;
1690 charset_get_byte2 (Lisp_Object charset, Emchar ch)
1692 if (XCHARSET_DIMENSION (charset) == 1)
1696 Emchar_to_byte_table* table;
1698 if ((table = XCHARSET_TO_BYTE2_TABLE (charset)) != NULL)
1699 return get_byte_from_character_table (ch, table);
1700 else if (EQ (charset, Vcharset_ucs_bmp))
1701 return (ch >> 8) & 0xff;
1702 else if (XCHARSET_CHARS (charset) == 94)
1703 return (MIN_CHAR_94x94
1704 + (XCHARSET_FINAL (charset) - '0') * 94 * 94 <= ch)
1705 && (ch < MIN_CHAR_94x94
1706 + (XCHARSET_FINAL (charset) - '0' + 1) * 94 * 94) ?
1707 ((ch - MIN_CHAR_94x94) % 94) + 33 : 0;
1708 else /* if (XCHARSET_CHARS (charset) == 96) */
1709 return (MIN_CHAR_96x96
1710 + (XCHARSET_FINAL (charset) - '0') * 96 * 96 <= ch)
1711 && (ch < MIN_CHAR_96x96
1712 + (XCHARSET_FINAL (charset) - '0' + 1) * 96 * 96) ?
1713 ((ch - MIN_CHAR_96x96) % 96) + 32 : 0;
1717 Lisp_Object Vdefault_coded_charset_priority_list;
1721 /************************************************************************/
1722 /* Basic charset Lisp functions */
1723 /************************************************************************/
1725 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1726 Return non-nil if OBJECT is a charset.
1730 return CHARSETP (object) ? Qt : Qnil;
1733 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1734 Retrieve the charset of the given name.
1735 If CHARSET-OR-NAME is a charset object, it is simply returned.
1736 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1737 nil is returned. Otherwise the associated charset object is returned.
1741 if (CHARSETP (charset_or_name))
1742 return charset_or_name;
1744 CHECK_SYMBOL (charset_or_name);
1745 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1748 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1749 Retrieve the charset of the given name.
1750 Same as `find-charset' except an error is signalled if there is no such
1751 charset instead of returning nil.
1755 Lisp_Object charset = Ffind_charset (name);
1758 signal_simple_error ("No such charset", name);
1762 /* We store the charsets in hash tables with the names as the key and the
1763 actual charset object as the value. Occasionally we need to use them
1764 in a list format. These routines provide us with that. */
1765 struct charset_list_closure
1767 Lisp_Object *charset_list;
1771 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1772 void *charset_list_closure)
1774 /* This function can GC */
1775 struct charset_list_closure *chcl =
1776 (struct charset_list_closure*) charset_list_closure;
1777 Lisp_Object *charset_list = chcl->charset_list;
1779 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1783 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1784 Return a list of the names of all defined charsets.
1788 Lisp_Object charset_list = Qnil;
1789 struct gcpro gcpro1;
1790 struct charset_list_closure charset_list_closure;
1792 GCPRO1 (charset_list);
1793 charset_list_closure.charset_list = &charset_list;
1794 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1795 &charset_list_closure);
1798 return charset_list;
1801 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1802 Return the name of the given charset.
1806 return XCHARSET_NAME (Fget_charset (charset));
1809 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1810 Define a new character set.
1811 This function is for use with Mule support.
1812 NAME is a symbol, the name by which the character set is normally referred.
1813 DOC-STRING is a string describing the character set.
1814 PROPS is a property list, describing the specific nature of the
1815 character set. Recognized properties are:
1817 'short-name Short version of the charset name (ex: Latin-1)
1818 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1819 'registry A regular expression matching the font registry field for
1821 'dimension Number of octets used to index a character in this charset.
1822 Either 1 or 2. Defaults to 1.
1823 'columns Number of columns used to display a character in this charset.
1824 Only used in TTY mode. (Under X, the actual width of a
1825 character can be derived from the font used to display the
1826 characters.) If unspecified, defaults to the dimension
1827 (this is almost always the correct value).
1828 'chars Number of characters in each dimension (94 or 96).
1829 Defaults to 94. Note that if the dimension is 2, the
1830 character set thus described is 94x94 or 96x96.
1831 'final Final byte of ISO 2022 escape sequence. Must be
1832 supplied. Each combination of (DIMENSION, CHARS) defines a
1833 separate namespace for final bytes. Note that ISO
1834 2022 restricts the final byte to the range
1835 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1836 dimension == 2. Note also that final bytes in the range
1837 0x30 - 0x3F are reserved for user-defined (not official)
1839 'graphic 0 (use left half of font on output) or 1 (use right half
1840 of font on output). Defaults to 0. For example, for
1841 a font whose registry is ISO8859-1, the left half
1842 (octets 0x20 - 0x7F) is the `ascii' character set, while
1843 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1844 character set. With 'graphic set to 0, the octets
1845 will have their high bit cleared; with it set to 1,
1846 the octets will have their high bit set.
1847 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1849 'ccl-program A compiled CCL program used to convert a character in
1850 this charset into an index into the font. This is in
1851 addition to the 'graphic property. The CCL program
1852 is passed the octets of the character, with the high
1853 bit cleared and set depending upon whether the value
1854 of the 'graphic property is 0 or 1.
1856 (name, doc_string, props))
1858 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1859 int direction = CHARSET_LEFT_TO_RIGHT;
1861 Lisp_Object registry = Qnil;
1862 Lisp_Object charset;
1863 Lisp_Object rest, keyword, value;
1864 Lisp_Object ccl_program = Qnil;
1865 Lisp_Object short_name = Qnil, long_name = Qnil;
1867 CHECK_SYMBOL (name);
1868 if (!NILP (doc_string))
1869 CHECK_STRING (doc_string);
1871 charset = Ffind_charset (name);
1872 if (!NILP (charset))
1873 signal_simple_error ("Cannot redefine existing charset", name);
1875 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1877 if (EQ (keyword, Qshort_name))
1879 CHECK_STRING (value);
1883 if (EQ (keyword, Qlong_name))
1885 CHECK_STRING (value);
1889 else if (EQ (keyword, Qdimension))
1892 dimension = XINT (value);
1893 if (dimension < 1 || dimension > 2)
1894 signal_simple_error ("Invalid value for 'dimension", value);
1897 else if (EQ (keyword, Qchars))
1900 chars = XINT (value);
1901 if (chars != 94 && chars != 96)
1902 signal_simple_error ("Invalid value for 'chars", value);
1905 else if (EQ (keyword, Qcolumns))
1908 columns = XINT (value);
1909 if (columns != 1 && columns != 2)
1910 signal_simple_error ("Invalid value for 'columns", value);
1913 else if (EQ (keyword, Qgraphic))
1916 graphic = XINT (value);
1917 if (graphic < 0 || graphic > 1)
1918 signal_simple_error ("Invalid value for 'graphic", value);
1921 else if (EQ (keyword, Qregistry))
1923 CHECK_STRING (value);
1927 else if (EQ (keyword, Qdirection))
1929 if (EQ (value, Ql2r))
1930 direction = CHARSET_LEFT_TO_RIGHT;
1931 else if (EQ (value, Qr2l))
1932 direction = CHARSET_RIGHT_TO_LEFT;
1934 signal_simple_error ("Invalid value for 'direction", value);
1937 else if (EQ (keyword, Qfinal))
1939 CHECK_CHAR_COERCE_INT (value);
1940 final = XCHAR (value);
1941 if (final < '0' || final > '~')
1942 signal_simple_error ("Invalid value for 'final", value);
1945 else if (EQ (keyword, Qccl_program))
1947 CHECK_VECTOR (value);
1948 ccl_program = value;
1952 signal_simple_error ("Unrecognized property", keyword);
1956 error ("'final must be specified");
1957 if (dimension == 2 && final > 0x5F)
1959 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1963 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1965 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1967 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1968 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1970 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1977 /* id = CHARSET_ID_OFFSET_94 + final; */
1978 id = get_unallocated_leading_byte (dimension);
1980 else if (chars == 96)
1982 id = get_unallocated_leading_byte (dimension);
1989 else if (dimension == 2)
1993 id = get_unallocated_leading_byte (dimension);
1995 else if (chars == 96)
1997 id = get_unallocated_leading_byte (dimension);
2009 id = get_unallocated_leading_byte (dimension);
2012 if (NILP (doc_string))
2013 doc_string = build_string ("");
2015 if (NILP (registry))
2016 registry = build_string ("");
2018 if (NILP (short_name))
2019 XSETSTRING (short_name, XSYMBOL (name)->name);
2021 if (NILP (long_name))
2022 long_name = doc_string;
2025 columns = dimension;
2026 charset = make_charset (id, name, type, columns, graphic,
2027 final, direction, short_name, long_name,
2028 doc_string, registry,
2030 if (!NILP (ccl_program))
2031 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2035 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2037 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2038 NEW-NAME is the name of the new charset. Return the new charset.
2040 (charset, new_name))
2042 Lisp_Object new_charset = Qnil;
2043 int id, dimension, columns, graphic, final;
2044 int direction, type;
2045 Lisp_Object registry, doc_string, short_name, long_name;
2046 struct Lisp_Charset *cs;
2048 charset = Fget_charset (charset);
2049 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2050 signal_simple_error ("Charset already has reverse-direction charset",
2053 CHECK_SYMBOL (new_name);
2054 if (!NILP (Ffind_charset (new_name)))
2055 signal_simple_error ("Cannot redefine existing charset", new_name);
2057 cs = XCHARSET (charset);
2059 type = CHARSET_TYPE (cs);
2060 columns = CHARSET_COLUMNS (cs);
2061 dimension = CHARSET_DIMENSION (cs);
2062 id = get_unallocated_leading_byte (dimension);
2064 graphic = CHARSET_GRAPHIC (cs);
2065 final = CHARSET_FINAL (cs);
2066 direction = CHARSET_RIGHT_TO_LEFT;
2067 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2068 direction = CHARSET_LEFT_TO_RIGHT;
2069 doc_string = CHARSET_DOC_STRING (cs);
2070 short_name = CHARSET_SHORT_NAME (cs);
2071 long_name = CHARSET_LONG_NAME (cs);
2072 registry = CHARSET_REGISTRY (cs);
2074 new_charset = make_charset (id, new_name, type, columns,
2075 graphic, final, direction, short_name, long_name,
2076 doc_string, registry,
2077 CHARSET_DECODING_TABLE(cs),
2078 CHARSET_UCS_MIN(cs),
2079 CHARSET_UCS_MAX(cs),
2080 CHARSET_CODE_OFFSET(cs));
2082 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2083 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2088 /* #### Reverse direction charsets not yet implemented. */
2090 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2092 Return the reverse-direction charset parallel to CHARSET, if any.
2093 This is the charset with the same properties (in particular, the same
2094 dimension, number of characters per dimension, and final byte) as
2095 CHARSET but whose characters are displayed in the opposite direction.
2099 charset = Fget_charset (charset);
2100 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2104 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2105 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2106 If DIRECTION is omitted, both directions will be checked (left-to-right
2107 will be returned if character sets exist for both directions).
2109 (dimension, chars, final, direction))
2111 int dm, ch, fi, di = -1;
2113 Lisp_Object obj = Qnil;
2115 CHECK_INT (dimension);
2116 dm = XINT (dimension);
2117 if (dm < 1 || dm > 2)
2118 signal_simple_error ("Invalid value for DIMENSION", dimension);
2122 if (ch != 94 && ch != 96)
2123 signal_simple_error ("Invalid value for CHARS", chars);
2125 CHECK_CHAR_COERCE_INT (final);
2127 if (fi < '0' || fi > '~')
2128 signal_simple_error ("Invalid value for FINAL", final);
2130 if (EQ (direction, Ql2r))
2131 di = CHARSET_LEFT_TO_RIGHT;
2132 else if (EQ (direction, Qr2l))
2133 di = CHARSET_RIGHT_TO_LEFT;
2134 else if (!NILP (direction))
2135 signal_simple_error ("Invalid value for DIRECTION", direction);
2137 if (dm == 2 && fi > 0x5F)
2139 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2142 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2144 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2148 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2150 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2153 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2156 return XCHARSET_NAME (obj);
2160 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2161 Return short name of CHARSET.
2165 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2168 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2169 Return long name of CHARSET.
2173 return XCHARSET_LONG_NAME (Fget_charset (charset));
2176 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2177 Return description of CHARSET.
2181 return XCHARSET_DOC_STRING (Fget_charset (charset));
2184 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2185 Return dimension of CHARSET.
2189 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2192 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2193 Return property PROP of CHARSET.
2194 Recognized properties are those listed in `make-charset', as well as
2195 'name and 'doc-string.
2199 struct Lisp_Charset *cs;
2201 charset = Fget_charset (charset);
2202 cs = XCHARSET (charset);
2204 CHECK_SYMBOL (prop);
2205 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2206 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2207 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2208 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2209 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2210 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2211 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2212 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2213 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2214 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2215 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2216 if (EQ (prop, Qdirection))
2217 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2218 if (EQ (prop, Qreverse_direction_charset))
2220 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2224 return XCHARSET_NAME (obj);
2226 signal_simple_error ("Unrecognized charset property name", prop);
2227 return Qnil; /* not reached */
2230 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2231 Return charset identification number of CHARSET.
2235 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2238 /* #### We need to figure out which properties we really want to
2241 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2242 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2244 (charset, ccl_program))
2246 charset = Fget_charset (charset);
2247 CHECK_VECTOR (ccl_program);
2248 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2253 invalidate_charset_font_caches (Lisp_Object charset)
2255 /* Invalidate font cache entries for charset on all devices. */
2256 Lisp_Object devcons, concons, hash_table;
2257 DEVICE_LOOP_NO_BREAK (devcons, concons)
2259 struct device *d = XDEVICE (XCAR (devcons));
2260 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2261 if (!UNBOUNDP (hash_table))
2262 Fclrhash (hash_table);
2266 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
2267 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2268 Set the 'registry property of CHARSET to REGISTRY.
2270 (charset, registry))
2272 charset = Fget_charset (charset);
2273 CHECK_STRING (registry);
2274 XCHARSET_REGISTRY (charset) = registry;
2275 invalidate_charset_font_caches (charset);
2276 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2281 /************************************************************************/
2282 /* Lisp primitives for working with characters */
2283 /************************************************************************/
2285 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2286 Make a character from CHARSET and octets ARG1 and ARG2.
2287 ARG2 is required only for characters from two-dimensional charsets.
2288 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2289 character s with caron.
2291 (charset, arg1, arg2))
2293 struct Lisp_Charset *cs;
2295 int lowlim, highlim;
2297 charset = Fget_charset (charset);
2298 cs = XCHARSET (charset);
2300 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2301 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2303 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2305 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2306 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2309 /* It is useful (and safe, according to Olivier Galibert) to strip
2310 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2311 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2312 Latin 2 code of the character. */
2320 if (a1 < lowlim || a1 > highlim)
2321 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2323 if (CHARSET_DIMENSION (cs) == 1)
2327 ("Charset is of dimension one; second octet must be nil", arg2);
2328 return make_char (MAKE_CHAR (charset, a1, 0));
2337 a2 = XINT (arg2) & 0x7f;
2339 if (a2 < lowlim || a2 > highlim)
2340 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2342 return make_char (MAKE_CHAR (charset, a1, a2));
2345 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2346 Return the character set of char CH.
2350 CHECK_CHAR_COERCE_INT (ch);
2352 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2355 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2356 Return list of charset and one or two position-codes of CHAR.
2360 /* This function can GC */
2361 struct gcpro gcpro1, gcpro2;
2362 Lisp_Object charset = Qnil;
2363 Lisp_Object rc = Qnil;
2366 GCPRO2 (charset, rc);
2367 CHECK_CHAR_COERCE_INT (character);
2369 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2371 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2373 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2377 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2385 #ifdef ENABLE_COMPOSITE_CHARS
2386 /************************************************************************/
2387 /* composite character functions */
2388 /************************************************************************/
2391 lookup_composite_char (Bufbyte *str, int len)
2393 Lisp_Object lispstr = make_string (str, len);
2394 Lisp_Object ch = Fgethash (lispstr,
2395 Vcomposite_char_string2char_hash_table,
2401 if (composite_char_row_next >= 128)
2402 signal_simple_error ("No more composite chars available", lispstr);
2403 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2404 composite_char_col_next);
2405 Fputhash (make_char (emch), lispstr,
2406 Vcomposite_char_char2string_hash_table);
2407 Fputhash (lispstr, make_char (emch),
2408 Vcomposite_char_string2char_hash_table);
2409 composite_char_col_next++;
2410 if (composite_char_col_next >= 128)
2412 composite_char_col_next = 32;
2413 composite_char_row_next++;
2422 composite_char_string (Emchar ch)
2424 Lisp_Object str = Fgethash (make_char (ch),
2425 Vcomposite_char_char2string_hash_table,
2427 assert (!UNBOUNDP (str));
2431 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2432 Convert a string into a single composite character.
2433 The character is the result of overstriking all the characters in
2438 CHECK_STRING (string);
2439 return make_char (lookup_composite_char (XSTRING_DATA (string),
2440 XSTRING_LENGTH (string)));
2443 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2444 Return a string of the characters comprising a composite character.
2452 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2453 signal_simple_error ("Must be composite char", ch);
2454 return composite_char_string (emch);
2456 #endif /* ENABLE_COMPOSITE_CHARS */
2459 /************************************************************************/
2460 /* initialization */
2461 /************************************************************************/
2464 syms_of_mule_charset (void)
2466 DEFSUBR (Fcharsetp);
2467 DEFSUBR (Ffind_charset);
2468 DEFSUBR (Fget_charset);
2469 DEFSUBR (Fcharset_list);
2470 DEFSUBR (Fcharset_name);
2471 DEFSUBR (Fmake_charset);
2472 DEFSUBR (Fmake_reverse_direction_charset);
2473 /* DEFSUBR (Freverse_direction_charset); */
2474 DEFSUBR (Fcharset_from_attributes);
2475 DEFSUBR (Fcharset_short_name);
2476 DEFSUBR (Fcharset_long_name);
2477 DEFSUBR (Fcharset_description);
2478 DEFSUBR (Fcharset_dimension);
2479 DEFSUBR (Fcharset_property);
2480 DEFSUBR (Fcharset_id);
2481 DEFSUBR (Fset_charset_ccl_program);
2482 DEFSUBR (Fset_charset_registry);
2484 DEFSUBR (Fmake_char);
2485 DEFSUBR (Fchar_charset);
2486 DEFSUBR (Fsplit_char);
2488 #ifdef ENABLE_COMPOSITE_CHARS
2489 DEFSUBR (Fmake_composite_char);
2490 DEFSUBR (Fcomposite_char_string);
2493 defsymbol (&Qcharsetp, "charsetp");
2494 defsymbol (&Qregistry, "registry");
2495 defsymbol (&Qfinal, "final");
2496 defsymbol (&Qgraphic, "graphic");
2497 defsymbol (&Qdirection, "direction");
2498 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2499 defsymbol (&Qshort_name, "short-name");
2500 defsymbol (&Qlong_name, "long-name");
2502 defsymbol (&Ql2r, "l2r");
2503 defsymbol (&Qr2l, "r2l");
2505 /* Charsets, compatible with FSF 20.3
2506 Naming convention is Script-Charset[-Edition] */
2507 defsymbol (&Qascii, "ascii");
2508 defsymbol (&Qcontrol_1, "control-1");
2509 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2510 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2511 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2512 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2513 defsymbol (&Qthai_tis620, "thai-tis620");
2514 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2515 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2516 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2517 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2518 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2519 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2520 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2521 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2522 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2523 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2524 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2525 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2526 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2527 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2529 defsymbol (&Qucs_bmp, "ucs-bmp");
2530 defsymbol (&Qlatin_viscii_lower, "vietnamese-viscii-lower");
2531 defsymbol (&Qlatin_viscii_upper, "vietnamese-viscii-upper");
2533 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2534 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2536 defsymbol (&Qcomposite, "composite");
2540 vars_of_mule_charset (void)
2547 /* Table of charsets indexed by leading byte. */
2548 for (i = 0; i < countof (charset_by_leading_byte); i++)
2549 charset_by_leading_byte[i] = Qnil;
2552 /* Table of charsets indexed by type/final-byte. */
2553 for (i = 0; i < countof (charset_by_attributes); i++)
2554 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2555 charset_by_attributes[i][j] = Qnil;
2557 /* Table of charsets indexed by type/final-byte/direction. */
2558 for (i = 0; i < countof (charset_by_attributes); i++)
2559 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2560 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2561 charset_by_attributes[i][j][k] = Qnil;
2564 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2566 next_allocated_2_byte_leading_byte = LEADING_BYTE_CHINESE_BIG5_2 + 1;
2568 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2572 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2573 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2574 Leading-code of private TYPE9N charset of column-width 1.
2576 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2580 Vutf_2000_version = build_string("0.8 (Kami)");
2581 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2582 Version number of UTF-2000.
2585 Vdefault_coded_charset_priority_list = Qnil;
2586 DEFVAR_LISP ("default-coded-charset-priority-list",
2587 &Vdefault_coded_charset_priority_list /*
2588 Default order of preferred coded-character-set.
2594 complex_vars_of_mule_charset (void)
2596 staticpro (&Vcharset_hash_table);
2597 Vcharset_hash_table =
2598 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2600 /* Predefined character sets. We store them into variables for
2605 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2606 CHARSET_TYPE_256X256, 1, 0, 0,
2607 CHARSET_LEFT_TO_RIGHT,
2608 build_string ("BMP"),
2609 build_string ("BMP"),
2610 build_string ("BMP"),
2612 NULL, 0, 0xFFFF, 0);
2614 # define latin_iso8859_2_to_ucs NULL
2615 # define latin_iso8859_3_to_ucs NULL
2616 # define latin_iso8859_4_to_ucs NULL
2617 # define latin_iso8859_9_to_ucs NULL
2618 # define latin_jisx0201_to_ucs NULL
2619 # define MIN_CHAR_THAI 0
2620 # define MAX_CHAR_THAI 0
2621 # define MIN_CHAR_GREEK 0
2622 # define MAX_CHAR_GREEK 0
2623 # define MIN_CHAR_HEBREW 0
2624 # define MAX_CHAR_HEBREW 0
2625 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2626 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2627 # define MIN_CHAR_CYRILLIC 0
2628 # define MAX_CHAR_CYRILLIC 0
2631 make_charset (LEADING_BYTE_ASCII, Qascii,
2632 CHARSET_TYPE_94, 1, 0, 'B',
2633 CHARSET_LEFT_TO_RIGHT,
2634 build_string ("ASCII"),
2635 build_string ("ASCII)"),
2636 build_string ("ASCII (ISO646 IRV)"),
2637 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2639 Vcharset_control_1 =
2640 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2641 CHARSET_TYPE_94, 1, 1, 0,
2642 CHARSET_LEFT_TO_RIGHT,
2643 build_string ("C1"),
2644 build_string ("Control characters"),
2645 build_string ("Control characters 128-191"),
2647 NULL, 0x80, 0x9F, 0);
2648 Vcharset_latin_iso8859_1 =
2649 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2650 CHARSET_TYPE_96, 1, 1, 'A',
2651 CHARSET_LEFT_TO_RIGHT,
2652 build_string ("Latin-1"),
2653 build_string ("ISO8859-1 (Latin-1)"),
2654 build_string ("ISO8859-1 (Latin-1)"),
2655 build_string ("iso8859-1"),
2656 NULL, 0xA0, 0xFF, 32);
2657 Vcharset_latin_iso8859_2 =
2658 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2659 CHARSET_TYPE_96, 1, 1, 'B',
2660 CHARSET_LEFT_TO_RIGHT,
2661 build_string ("Latin-2"),
2662 build_string ("ISO8859-2 (Latin-2)"),
2663 build_string ("ISO8859-2 (Latin-2)"),
2664 build_string ("iso8859-2"),
2665 latin_iso8859_2_to_ucs, 0, 0, 32);
2666 Vcharset_latin_iso8859_3 =
2667 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2668 CHARSET_TYPE_96, 1, 1, 'C',
2669 CHARSET_LEFT_TO_RIGHT,
2670 build_string ("Latin-3"),
2671 build_string ("ISO8859-3 (Latin-3)"),
2672 build_string ("ISO8859-3 (Latin-3)"),
2673 build_string ("iso8859-3"),
2674 latin_iso8859_3_to_ucs, 0, 0, 32);
2675 Vcharset_latin_iso8859_4 =
2676 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2677 CHARSET_TYPE_96, 1, 1, 'D',
2678 CHARSET_LEFT_TO_RIGHT,
2679 build_string ("Latin-4"),
2680 build_string ("ISO8859-4 (Latin-4)"),
2681 build_string ("ISO8859-4 (Latin-4)"),
2682 build_string ("iso8859-4"),
2683 latin_iso8859_4_to_ucs, 0, 0, 32);
2684 Vcharset_thai_tis620 =
2685 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2686 CHARSET_TYPE_96, 1, 1, 'T',
2687 CHARSET_LEFT_TO_RIGHT,
2688 build_string ("TIS620"),
2689 build_string ("TIS620 (Thai)"),
2690 build_string ("TIS620.2529 (Thai)"),
2691 build_string ("tis620"),
2692 NULL, MIN_CHAR_THAI, MAX_CHAR_THAI, 32);
2693 Vcharset_greek_iso8859_7 =
2694 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2695 CHARSET_TYPE_96, 1, 1, 'F',
2696 CHARSET_LEFT_TO_RIGHT,
2697 build_string ("ISO8859-7"),
2698 build_string ("ISO8859-7 (Greek)"),
2699 build_string ("ISO8859-7 (Greek)"),
2700 build_string ("iso8859-7"),
2701 NULL, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 32);
2702 Vcharset_arabic_iso8859_6 =
2703 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2704 CHARSET_TYPE_96, 1, 1, 'G',
2705 CHARSET_RIGHT_TO_LEFT,
2706 build_string ("ISO8859-6"),
2707 build_string ("ISO8859-6 (Arabic)"),
2708 build_string ("ISO8859-6 (Arabic)"),
2709 build_string ("iso8859-6"),
2711 Vcharset_hebrew_iso8859_8 =
2712 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2713 CHARSET_TYPE_96, 1, 1, 'H',
2714 CHARSET_RIGHT_TO_LEFT,
2715 build_string ("ISO8859-8"),
2716 build_string ("ISO8859-8 (Hebrew)"),
2717 build_string ("ISO8859-8 (Hebrew)"),
2718 build_string ("iso8859-8"),
2719 NULL, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 32);
2720 Vcharset_katakana_jisx0201 =
2721 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2722 CHARSET_TYPE_94, 1, 1, 'I',
2723 CHARSET_LEFT_TO_RIGHT,
2724 build_string ("JISX0201 Kana"),
2725 build_string ("JISX0201.1976 (Japanese Kana)"),
2726 build_string ("JISX0201.1976 Japanese Kana"),
2727 build_string ("jisx0201.1976"),
2729 MIN_CHAR_HALFWIDTH_KATAKANA,
2730 MAX_CHAR_HALFWIDTH_KATAKANA, 33);
2731 Vcharset_latin_jisx0201 =
2732 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2733 CHARSET_TYPE_94, 1, 0, 'J',
2734 CHARSET_LEFT_TO_RIGHT,
2735 build_string ("JISX0201 Roman"),
2736 build_string ("JISX0201.1976 (Japanese Roman)"),
2737 build_string ("JISX0201.1976 Japanese Roman"),
2738 build_string ("jisx0201.1976"),
2739 latin_jisx0201_to_ucs, 0, 0, 33);
2740 Vcharset_cyrillic_iso8859_5 =
2741 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2742 CHARSET_TYPE_96, 1, 1, 'L',
2743 CHARSET_LEFT_TO_RIGHT,
2744 build_string ("ISO8859-5"),
2745 build_string ("ISO8859-5 (Cyrillic)"),
2746 build_string ("ISO8859-5 (Cyrillic)"),
2747 build_string ("iso8859-5"),
2748 NULL, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 32);
2749 Vcharset_latin_iso8859_9 =
2750 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2751 CHARSET_TYPE_96, 1, 1, 'M',
2752 CHARSET_LEFT_TO_RIGHT,
2753 build_string ("Latin-5"),
2754 build_string ("ISO8859-9 (Latin-5)"),
2755 build_string ("ISO8859-9 (Latin-5)"),
2756 build_string ("iso8859-9"),
2757 latin_iso8859_9_to_ucs, 0, 0, 32);
2758 Vcharset_japanese_jisx0208_1978 =
2759 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2760 CHARSET_TYPE_94X94, 2, 0, '@',
2761 CHARSET_LEFT_TO_RIGHT,
2762 build_string ("JISX0208.1978"),
2763 build_string ("JISX0208.1978 (Japanese)"),
2765 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
2766 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2768 Vcharset_chinese_gb2312 =
2769 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2770 CHARSET_TYPE_94X94, 2, 0, 'A',
2771 CHARSET_LEFT_TO_RIGHT,
2772 build_string ("GB2312"),
2773 build_string ("GB2312)"),
2774 build_string ("GB2312 Chinese simplified"),
2775 build_string ("gb2312"),
2777 Vcharset_japanese_jisx0208 =
2778 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2779 CHARSET_TYPE_94X94, 2, 0, 'B',
2780 CHARSET_LEFT_TO_RIGHT,
2781 build_string ("JISX0208"),
2782 build_string ("JISX0208.1983/1990 (Japanese)"),
2783 build_string ("JISX0208.1983/1990 Japanese Kanji"),
2784 build_string ("jisx0208.19\\(83\\|90\\)"),
2786 Vcharset_korean_ksc5601 =
2787 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2788 CHARSET_TYPE_94X94, 2, 0, 'C',
2789 CHARSET_LEFT_TO_RIGHT,
2790 build_string ("KSC5601"),
2791 build_string ("KSC5601 (Korean"),
2792 build_string ("KSC5601 Korean Hangul and Hanja"),
2793 build_string ("ksc5601"),
2795 Vcharset_japanese_jisx0212 =
2796 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2797 CHARSET_TYPE_94X94, 2, 0, 'D',
2798 CHARSET_LEFT_TO_RIGHT,
2799 build_string ("JISX0212"),
2800 build_string ("JISX0212 (Japanese)"),
2801 build_string ("JISX0212 Japanese Supplement"),
2802 build_string ("jisx0212"),
2805 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2806 Vcharset_chinese_cns11643_1 =
2807 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2808 CHARSET_TYPE_94X94, 2, 0, 'G',
2809 CHARSET_LEFT_TO_RIGHT,
2810 build_string ("CNS11643-1"),
2811 build_string ("CNS11643-1 (Chinese traditional)"),
2813 ("CNS 11643 Plane 1 Chinese traditional"),
2814 build_string (CHINESE_CNS_PLANE_RE("1")),
2816 Vcharset_chinese_cns11643_2 =
2817 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2818 CHARSET_TYPE_94X94, 2, 0, 'H',
2819 CHARSET_LEFT_TO_RIGHT,
2820 build_string ("CNS11643-2"),
2821 build_string ("CNS11643-2 (Chinese traditional)"),
2823 ("CNS 11643 Plane 2 Chinese traditional"),
2824 build_string (CHINESE_CNS_PLANE_RE("2")),
2827 Vcharset_latin_viscii_lower =
2828 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2829 CHARSET_TYPE_96, 1, 1, '1',
2830 CHARSET_LEFT_TO_RIGHT,
2831 build_string ("VISCII lower"),
2832 build_string ("VISCII lower (Vietnamese)"),
2833 build_string ("VISCII lower (Vietnamese)"),
2834 build_string ("VISCII1.1"),
2835 latin_viscii_lower_to_ucs, 0, 0, 32);
2836 Vcharset_latin_viscii_upper =
2837 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2838 CHARSET_TYPE_96, 1, 1, '2',
2839 CHARSET_LEFT_TO_RIGHT,
2840 build_string ("VISCII upper"),
2841 build_string ("VISCII upper (Vietnamese)"),
2842 build_string ("VISCII upper (Vietnamese)"),
2843 build_string ("VISCII1.1"),
2844 latin_viscii_upper_to_ucs, 0, 0, 32);
2846 Vcharset_chinese_big5_1 =
2847 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2848 CHARSET_TYPE_94X94, 2, 0, '0',
2849 CHARSET_LEFT_TO_RIGHT,
2850 build_string ("Big5"),
2851 build_string ("Big5 (Level-1)"),
2853 ("Big5 Level-1 Chinese traditional"),
2854 build_string ("big5"),
2856 Vcharset_chinese_big5_2 =
2857 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2858 CHARSET_TYPE_94X94, 2, 0, '1',
2859 CHARSET_LEFT_TO_RIGHT,
2860 build_string ("Big5"),
2861 build_string ("Big5 (Level-2)"),
2863 ("Big5 Level-2 Chinese traditional"),
2864 build_string ("big5"),
2867 #ifdef ENABLE_COMPOSITE_CHARS
2868 /* #### For simplicity, we put composite chars into a 96x96 charset.
2869 This is going to lead to problems because you can run out of
2870 room, esp. as we don't yet recycle numbers. */
2871 Vcharset_composite =
2872 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2873 CHARSET_TYPE_96X96, 2, 0, 0,
2874 CHARSET_LEFT_TO_RIGHT,
2875 build_string ("Composite"),
2876 build_string ("Composite characters"),
2877 build_string ("Composite characters"),
2880 composite_char_row_next = 32;
2881 composite_char_col_next = 32;
2883 Vcomposite_char_string2char_hash_table =
2884 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2885 Vcomposite_char_char2string_hash_table =
2886 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2887 staticpro (&Vcomposite_char_string2char_hash_table);
2888 staticpro (&Vcomposite_char_char2string_hash_table);
2889 #endif /* ENABLE_COMPOSITE_CHARS */