1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii_lower;
63 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_chinese_big5_1;
66 Lisp_Object Vcharset_chinese_big5_2;
68 #ifdef ENABLE_COMPOSITE_CHARS
69 Lisp_Object Vcharset_composite;
71 /* Hash tables for composite chars. One maps string representing
72 composed chars to their equivalent chars; one goes the
74 Lisp_Object Vcomposite_char_char2string_hash_table;
75 Lisp_Object Vcomposite_char_string2char_hash_table;
77 static int composite_char_row_next;
78 static int composite_char_col_next;
80 #endif /* ENABLE_COMPOSITE_CHARS */
82 /* Table of charsets indexed by leading byte. */
83 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
85 /* Table of charsets indexed by type/final-byte/direction. */
87 Lisp_Object charset_by_attributes[4][128];
89 Lisp_Object charset_by_attributes[4][128][2];
93 /* Table of number of bytes in the string representation of a character
94 indexed by the first byte of that representation.
96 rep_bytes_by_first_byte(c) is more efficient than the equivalent
97 canonical computation:
99 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
101 Bytecount rep_bytes_by_first_byte[0xA0] =
102 { /* 0x00 - 0x7f are for straight ASCII */
103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 /* 0x80 - 0x8f are for Dimension-1 official charsets */
113 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
115 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
117 /* 0x90 - 0x9d are for Dimension-2 official charsets */
118 /* 0x9e is for Dimension-1 private charsets */
119 /* 0x9f is for Dimension-2 private charsets */
120 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
125 Emchar_to_byte_table*
126 make_byte_from_character_table ()
128 Emchar_to_byte_table* table
129 = (Emchar_to_byte_table*) xmalloc (sizeof (Emchar_to_byte_table));
136 put_byte_from_character_table (Emchar ch, unsigned char val,
137 Emchar_to_byte_table* table)
139 if (table->base == NULL)
141 table->base = xmalloc (128);
142 table->offset = ch - (ch % 128);
144 table->base[ch - table->offset] = val;
148 int i = ch - table->offset;
152 size_t new_size = table->size - i;
155 new_size += 128 - (new_size % 128);
156 table->base = xrealloc (table->base, new_size);
157 memmove (table->base + (new_size - table->size), table->base,
159 for (j = 0; j < (new_size - table->size); j++)
161 table->offset -= (new_size - table->size);
162 table->base[ch - table->offset] = val;
163 table->size = new_size;
165 else if (i >= table->size)
167 size_t new_size = i + 1;
170 new_size += 128 - (new_size % 128);
171 table->base = xrealloc (table->base, new_size);
172 for (j = table->size; j < new_size; j++)
174 table->base[i] = val;
175 table->size = new_size;
179 table->base[i] = val;
185 get_byte_from_character_table (Emchar ch, Emchar_to_byte_table* table)
187 size_t i = ch - table->offset;
189 return table->base[i];
194 #define CHAR96(ft,b) (MIN_CHAR_96 + (ft - '0') * 96 + (b & 0x7f) - 32)
196 Emchar latin_jisx0201_to_ucs[94] =
198 0x0021 /* 0x21 EXCLAMATION MARK */,
199 0x0022 /* 0x22 QUOTATION MARK */,
200 0x0023 /* 0x23 NUMBER SIGN */,
201 0x0024 /* 0x24 DOLLAR SIGN */,
202 0x0025 /* 0x25 PERCENT SIGN */,
203 0x0026 /* 0x26 AMPERSAND */,
204 0x0027 /* 0x27 APOSTROPHE */,
205 0x0028 /* 0x28 LEFT PARENTHESIS */,
206 0x0029 /* 0x29 RIGHT PARENTHESIS */,
207 0x002A /* 0x2A ASTERISK */,
208 0x002B /* 0x2B PLUS SIGN */,
209 0x002C /* 0x2C COMMA */,
210 0x002D /* 0x2D HYPHEN-MINUS */,
211 0x002E /* 0x2E FULL STOP */,
212 0x002F /* 0x2F SOLIDUS */,
213 0x0030 /* 0x30 DIGIT ZERO */,
214 0x0031 /* 0x31 DIGIT ONE */,
215 0x0032 /* 0x32 DIGIT TWO */,
216 0x0033 /* 0x33 DIGIT THREE */,
217 0x0034 /* 0x34 DIGIT FOUR */,
218 0x0035 /* 0x35 DIGIT FIVE */,
219 0x0036 /* 0x36 DIGIT SIX */,
220 0x0037 /* 0x37 DIGIT SEVEN */,
221 0x0038 /* 0x38 DIGIT EIGHT */,
222 0x0039 /* 0x39 DIGIT NINE */,
223 0x003A /* 0x3A COLON */,
224 0x003B /* 0x3B SEMICOLON */,
225 0x003C /* 0x3C LESS-THAN SIGN */,
226 0x003D /* 0x3D EQUALS SIGN */,
227 0x003E /* 0x3E GREATER-THAN SIGN */,
228 0x003F /* 0x3F QUESTION MARK */,
229 0x0040 /* 0x40 COMMERCIAL AT */,
230 0x0041 /* 0x41 LATIN CAPITAL LETTER A */,
231 0x0042 /* 0x42 LATIN CAPITAL LETTER B */,
232 0x0043 /* 0x43 LATIN CAPITAL LETTER C */,
233 0x0044 /* 0x44 LATIN CAPITAL LETTER D */,
234 0x0045 /* 0x45 LATIN CAPITAL LETTER E */,
235 0x0046 /* 0x46 LATIN CAPITAL LETTER F */,
236 0x0047 /* 0x47 LATIN CAPITAL LETTER G */,
237 0x0048 /* 0x48 LATIN CAPITAL LETTER H */,
238 0x0049 /* 0x49 LATIN CAPITAL LETTER I */,
239 0x004A /* 0x4A LATIN CAPITAL LETTER J */,
240 0x004B /* 0x4B LATIN CAPITAL LETTER K */,
241 0x004C /* 0x4C LATIN CAPITAL LETTER L */,
242 0x004D /* 0x4D LATIN CAPITAL LETTER M */,
243 0x004E /* 0x4E LATIN CAPITAL LETTER N */,
244 0x004F /* 0x4F LATIN CAPITAL LETTER O */,
245 0x0050 /* 0x50 LATIN CAPITAL LETTER P */,
246 0x0051 /* 0x51 LATIN CAPITAL LETTER Q */,
247 0x0052 /* 0x52 LATIN CAPITAL LETTER R */,
248 0x0053 /* 0x53 LATIN CAPITAL LETTER S */,
249 0x0054 /* 0x54 LATIN CAPITAL LETTER T */,
250 0x0055 /* 0x55 LATIN CAPITAL LETTER U */,
251 0x0056 /* 0x56 LATIN CAPITAL LETTER V */,
252 0x0057 /* 0x57 LATIN CAPITAL LETTER W */,
253 0x0058 /* 0x58 LATIN CAPITAL LETTER X */,
254 0x0059 /* 0x59 LATIN CAPITAL LETTER Y */,
255 0x005A /* 0x5A LATIN CAPITAL LETTER Z */,
256 0x005B /* 0x5B LEFT SQUARE BRACKET */,
257 0x00A5 /* 0x5C YEN SIGN */,
258 0x005D /* 0x5D RIGHT SQUARE BRACKET */,
259 0x005E /* 0x5E CIRCUMFLEX ACCENT */,
260 0x005F /* 0x5F LOW LINE */,
261 0x0060 /* 0x60 GRAVE ACCENT */,
262 0x0061 /* 0x61 LATIN SMALL LETTER A */,
263 0x0062 /* 0x62 LATIN SMALL LETTER B */,
264 0x0063 /* 0x63 LATIN SMALL LETTER C */,
265 0x0064 /* 0x64 LATIN SMALL LETTER D */,
266 0x0065 /* 0x65 LATIN SMALL LETTER E */,
267 0x0066 /* 0x66 LATIN SMALL LETTER F */,
268 0x0067 /* 0x67 LATIN SMALL LETTER G */,
269 0x0068 /* 0x68 LATIN SMALL LETTER H */,
270 0x0069 /* 0x69 LATIN SMALL LETTER I */,
271 0x006A /* 0x6A LATIN SMALL LETTER J */,
272 0x006B /* 0x6B LATIN SMALL LETTER K */,
273 0x006C /* 0x6C LATIN SMALL LETTER L */,
274 0x006D /* 0x6D LATIN SMALL LETTER M */,
275 0x006E /* 0x6E LATIN SMALL LETTER N */,
276 0x006F /* 0x6F LATIN SMALL LETTER O */,
277 0x0070 /* 0x70 LATIN SMALL LETTER P */,
278 0x0071 /* 0x71 LATIN SMALL LETTER Q */,
279 0x0072 /* 0x72 LATIN SMALL LETTER R */,
280 0x0073 /* 0x73 LATIN SMALL LETTER S */,
281 0x0074 /* 0x74 LATIN SMALL LETTER T */,
282 0x0075 /* 0x75 LATIN SMALL LETTER U */,
283 0x0076 /* 0x76 LATIN SMALL LETTER V */,
284 0x0077 /* 0x77 LATIN SMALL LETTER W */,
285 0x0078 /* 0x78 LATIN SMALL LETTER X */,
286 0x0079 /* 0x79 LATIN SMALL LETTER Y */,
287 0x007A /* 0x7A LATIN SMALL LETTER Z */,
288 0x007B /* 0x7B LEFT CURLY BRACKET */,
289 0x007C /* 0x7C VERTICAL LINE */,
290 0x007D /* 0x7D RIGHT CURLY BRACKET */,
291 0x203E /* 0x7E OVERLINE */
294 Emchar latin_iso8859_2_to_ucs[96] =
296 0x00A0 /* 0xA0 NO-BREAK SPACE */,
297 0x0104 /* 0xA1 LATIN CAPITAL LETTER A WITH OGONEK */,
298 0x02D8 /* 0xA2 BREVE */,
299 0x0141 /* 0xA3 LATIN CAPITAL LETTER L WITH STROKE */,
300 0x00A4 /* 0xA4 CURRENCY SIGN */,
301 0x013D /* 0xA5 LATIN CAPITAL LETTER L WITH CARON */,
302 0x015A /* 0xA6 LATIN CAPITAL LETTER S WITH ACUTE */,
303 0x00A7 /* 0xA7 SECTION SIGN */,
304 0x00A8 /* 0xA8 DIAERESIS */,
305 0x0160 /* 0xA9 LATIN CAPITAL LETTER S WITH CARON */,
306 0x015E /* 0xAA LATIN CAPITAL LETTER S WITH CEDILLA */,
307 0x0164 /* 0xAB LATIN CAPITAL LETTER T WITH CARON */,
308 0x0179 /* 0xAC LATIN CAPITAL LETTER Z WITH ACUTE */,
309 0x00AD /* 0xAD SOFT HYPHEN */,
310 0x017D /* 0xAE LATIN CAPITAL LETTER Z WITH CARON */,
311 0x017B /* 0xAF LATIN CAPITAL LETTER Z WITH DOT ABOVE */,
312 0x00B0 /* 0xB0 DEGREE SIGN */,
313 0x0105 /* 0xB1 LATIN SMALL LETTER A WITH OGONEK */,
314 0x02DB /* 0xB2 OGONEK */,
315 0x0142 /* 0xB3 LATIN SMALL LETTER L WITH STROKE */,
316 0x00B4 /* 0xB4 ACUTE ACCENT */,
317 0x013E /* 0xB5 LATIN SMALL LETTER L WITH CARON */,
318 0x015B /* 0xB6 LATIN SMALL LETTER S WITH ACUTE */,
319 0x02C7 /* 0xB7 CARON */,
320 0x00B8 /* 0xB8 CEDILLA */,
321 0x0161 /* 0xB9 LATIN SMALL LETTER S WITH CARON */,
322 0x015F /* 0xBA LATIN SMALL LETTER S WITH CEDILLA */,
323 0x0165 /* 0xBB LATIN SMALL LETTER T WITH CARON */,
324 0x017A /* 0xBC LATIN SMALL LETTER Z WITH ACUTE */,
325 0x02DD /* 0xBD DOUBLE ACUTE ACCENT */,
326 0x017E /* 0xBE LATIN SMALL LETTER Z WITH CARON */,
327 0x017C /* 0xBF LATIN SMALL LETTER Z WITH DOT ABOVE */,
328 0x0154 /* 0xC0 LATIN CAPITAL LETTER R WITH ACUTE */,
329 0x00C1 /* 0xC1 LATIN CAPITAL LETTER A WITH ACUTE */,
330 0x00C2 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
331 0x0102 /* 0xC3 LATIN CAPITAL LETTER A WITH BREVE */,
332 0x00C4 /* 0xC4 LATIN CAPITAL LETTER A WITH DIAERESIS */,
333 0x0139 /* 0xC5 LATIN CAPITAL LETTER L WITH ACUTE */,
334 0x0106 /* 0xC6 LATIN CAPITAL LETTER C WITH ACUTE */,
335 0x00C7 /* 0xC7 LATIN CAPITAL LETTER C WITH CEDILLA */,
336 0x010C /* 0xC8 LATIN CAPITAL LETTER C WITH CARON */,
337 0x00C9 /* 0xC9 LATIN CAPITAL LETTER E WITH ACUTE */,
338 0x0118 /* 0xCA LATIN CAPITAL LETTER E WITH OGONEK */,
339 0x00CB /* 0xCB LATIN CAPITAL LETTER E WITH DIAERESIS */,
340 0x011A /* 0xCC LATIN CAPITAL LETTER E WITH CARON */,
341 0x00CD /* 0xCD LATIN CAPITAL LETTER I WITH ACUTE */,
342 0x00CE /* 0xCE LATIN CAPITAL LETTER I WITH CIRCUMFLEX */,
343 0x010E /* 0xCF LATIN CAPITAL LETTER D WITH CARON */,
344 0x0110 /* 0xD0 LATIN CAPITAL LETTER D WITH STROKE */,
345 0x0143 /* 0xD1 LATIN CAPITAL LETTER N WITH ACUTE */,
346 0x0147 /* 0xD2 LATIN CAPITAL LETTER N WITH CARON */,
347 0x00D3 /* 0xD3 LATIN CAPITAL LETTER O WITH ACUTE */,
348 0x00D4 /* 0xD4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
349 0x0150 /* 0xD5 LATIN CAPITAL LETTER O WITH DOUBLE ACUTE */,
350 0x00D6 /* 0xD6 LATIN CAPITAL LETTER O WITH DIAERESIS */,
351 0x00D7 /* 0xD7 MULTIPLICATION SIGN */,
352 0x0158 /* 0xD8 LATIN CAPITAL LETTER R WITH CARON */,
353 0x016E /* 0xD9 LATIN CAPITAL LETTER U WITH RING ABOVE */,
354 0x00DA /* 0xDA LATIN CAPITAL LETTER U WITH ACUTE */,
355 0x0170 /* 0xDB LATIN CAPITAL LETTER U WITH DOUBLE ACUTE */,
356 0x00DC /* 0xDC LATIN CAPITAL LETTER U WITH DIAERESIS */,
357 0x00DD /* 0xDD LATIN CAPITAL LETTER Y WITH ACUTE */,
358 0x0162 /* 0xDE LATIN CAPITAL LETTER T WITH CEDILLA */,
359 0x00DF /* 0xDF LATIN SMALL LETTER SHARP S */,
360 0x0155 /* 0xE0 LATIN SMALL LETTER R WITH ACUTE */,
361 0x00E1 /* 0xE1 LATIN SMALL LETTER A WITH ACUTE */,
362 0x00E2 /* 0xE2 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
363 0x0103 /* 0xE3 LATIN SMALL LETTER A WITH BREVE */,
364 0x00E4 /* 0xE4 LATIN SMALL LETTER A WITH DIAERESIS */,
365 0x013A /* 0xE5 LATIN SMALL LETTER L WITH ACUTE */,
366 0x0107 /* 0xE6 LATIN SMALL LETTER C WITH ACUTE */,
367 0x00E7 /* 0xE7 LATIN SMALL LETTER C WITH CEDILLA */,
368 0x010D /* 0xE8 LATIN SMALL LETTER C WITH CARON */,
369 0x00E9 /* 0xE9 LATIN SMALL LETTER E WITH ACUTE */,
370 0x0119 /* 0xEA LATIN SMALL LETTER E WITH OGONEK */,
371 0x00EB /* 0xEB LATIN SMALL LETTER E WITH DIAERESIS */,
372 0x011B /* 0xEC LATIN SMALL LETTER E WITH CARON */,
373 0x00ED /* 0xED LATIN SMALL LETTER I WITH ACUTE */,
374 0x00EE /* 0xEE LATIN SMALL LETTER I WITH CIRCUMFLEX */,
375 0x010F /* 0xEF LATIN SMALL LETTER D WITH CARON */,
376 0x0111 /* 0xF0 LATIN SMALL LETTER D WITH STROKE */,
377 0x0144 /* 0xF1 LATIN SMALL LETTER N WITH ACUTE */,
378 0x0148 /* 0xF2 LATIN SMALL LETTER N WITH CARON */,
379 0x00F3 /* 0xF3 LATIN SMALL LETTER O WITH ACUTE */,
380 0x00F4 /* 0xF4 LATIN SMALL LETTER O WITH CIRCUMFLEX */,
381 0x0151 /* 0xF5 LATIN SMALL LETTER O WITH DOUBLE ACUTE */,
382 0x00F6 /* 0xF6 LATIN SMALL LETTER O WITH DIAERESIS */,
383 0x00F7 /* 0xF7 DIVISION SIGN */,
384 0x0159 /* 0xF8 LATIN SMALL LETTER R WITH CARON */,
385 0x016F /* 0xF9 LATIN SMALL LETTER U WITH RING ABOVE */,
386 0x00FA /* 0xFA LATIN SMALL LETTER U WITH ACUTE */,
387 0x0171 /* 0xFB LATIN SMALL LETTER U WITH DOUBLE ACUTE */,
388 0x00FC /* 0xFC LATIN SMALL LETTER U WITH DIAERESIS */,
389 0x00FD /* 0xFD LATIN SMALL LETTER Y WITH ACUTE */,
390 0x0163 /* 0xFE LATIN SMALL LETTER T WITH CEDILLA */,
391 0x02D9 /* 0xFF DOT ABOVE */
394 Emchar latin_iso8859_3_to_ucs[96] =
396 0x00A0 /* 0xA0 NO-BREAK SPACE */,
397 0x0126 /* 0xA1 LATIN CAPITAL LETTER H WITH STROKE */,
398 0x02D8 /* 0xA2 BREVE */,
399 0x00A3 /* 0xA3 POUND SIGN */,
400 0x00A4 /* 0xA4 CURRENCY SIGN */,
402 0x0124 /* 0xA6 LATIN CAPITAL LETTER H WITH CIRCUMFLEX */,
403 0x00A7 /* 0xA7 SECTION SIGN */,
404 0x00A8 /* 0xA8 DIAERESIS */,
405 0x0130 /* 0xA9 LATIN CAPITAL LETTER I WITH DOT ABOVE */,
406 0x015E /* 0xAA LATIN CAPITAL LETTER S WITH CEDILLA */,
407 0x011E /* 0xAB LATIN CAPITAL LETTER G WITH BREVE */,
408 0x0134 /* 0xAC LATIN CAPITAL LETTER J WITH CIRCUMFLEX */,
409 0x00AD /* 0xAD SOFT HYPHEN */,
411 0x017B /* 0xAF LATIN CAPITAL LETTER Z WITH DOT ABOVE */,
412 0x00B0 /* 0xB0 DEGREE SIGN */,
413 0x0127 /* 0xB1 LATIN SMALL LETTER H WITH STROKE */,
414 0x00B2 /* 0xB2 SUPERSCRIPT TWO */,
415 0x00B3 /* 0xB3 SUPERSCRIPT THREE */,
416 0x00B4 /* 0xB4 ACUTE ACCENT */,
417 0x00B5 /* 0xB5 MICRO SIGN */,
418 0x0125 /* 0xB6 LATIN SMALL LETTER H WITH CIRCUMFLEX */,
419 0x00B7 /* 0xB7 MIDDLE DOT */,
420 0x00B8 /* 0xB8 CEDILLA */,
421 0x0131 /* 0xB9 LATIN SMALL LETTER DOTLESS I */,
422 0x015F /* 0xBA LATIN SMALL LETTER S WITH CEDILLA */,
423 0x011F /* 0xBB LATIN SMALL LETTER G WITH BREVE */,
424 0x0135 /* 0xBC LATIN SMALL LETTER J WITH CIRCUMFLEX */,
425 0x00BD /* 0xBD VULGAR FRACTION ONE HALF */,
427 0x017C /* 0xBF LATIN SMALL LETTER Z WITH DOT ABOVE */,
428 0x00C0 /* 0xC0 LATIN CAPITAL LETTER A WITH GRAVE */,
429 0x00C1 /* 0xC1 LATIN CAPITAL LETTER A WITH ACUTE */,
430 0x00C2 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
432 0x00C4 /* 0xC4 LATIN CAPITAL LETTER A WITH DIAERESIS */,
433 0x010A /* 0xC5 LATIN CAPITAL LETTER C WITH DOT ABOVE */,
434 0x0108 /* 0xC6 LATIN CAPITAL LETTER C WITH CIRCUMFLEX */,
435 0x00C7 /* 0xC7 LATIN CAPITAL LETTER C WITH CEDILLA */,
436 0x00C8 /* 0xC8 LATIN CAPITAL LETTER E WITH GRAVE */,
437 0x00C9 /* 0xC9 LATIN CAPITAL LETTER E WITH ACUTE */,
438 0x00CA /* 0xCA LATIN CAPITAL LETTER E WITH CIRCUMFLEX */,
439 0x00CB /* 0xCB LATIN CAPITAL LETTER E WITH DIAERESIS */,
440 0x00CC /* 0xCC LATIN CAPITAL LETTER I WITH GRAVE */,
441 0x00CD /* 0xCD LATIN CAPITAL LETTER I WITH ACUTE */,
442 0x00CE /* 0xCE LATIN CAPITAL LETTER I WITH CIRCUMFLEX */,
443 0x00CF /* 0xCF LATIN CAPITAL LETTER I WITH DIAERESIS */,
445 0x00D1 /* 0xD1 LATIN CAPITAL LETTER N WITH TILDE */,
446 0x00D2 /* 0xD2 LATIN CAPITAL LETTER O WITH GRAVE */,
447 0x00D3 /* 0xD3 LATIN CAPITAL LETTER O WITH ACUTE */,
448 0x00D4 /* 0xD4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
449 0x0120 /* 0xD5 LATIN CAPITAL LETTER G WITH DOT ABOVE */,
450 0x00D6 /* 0xD6 LATIN CAPITAL LETTER O WITH DIAERESIS */,
451 0x00D7 /* 0xD7 MULTIPLICATION SIGN */,
452 0x011C /* 0xD8 LATIN CAPITAL LETTER G WITH CIRCUMFLEX */,
453 0x00D9 /* 0xD9 LATIN CAPITAL LETTER U WITH GRAVE */,
454 0x00DA /* 0xDA LATIN CAPITAL LETTER U WITH ACUTE */,
455 0x00DB /* 0xDB LATIN CAPITAL LETTER U WITH CIRCUMFLEX */,
456 0x00DC /* 0xDC LATIN CAPITAL LETTER U WITH DIAERESIS */,
457 0x016C /* 0xDD LATIN CAPITAL LETTER U WITH BREVE */,
458 0x015C /* 0xDE LATIN CAPITAL LETTER S WITH CIRCUMFLEX */,
459 0x00DF /* 0xDF LATIN SMALL LETTER SHARP S */,
460 0x00E0 /* 0xE0 LATIN SMALL LETTER A WITH GRAVE */,
461 0x00E1 /* 0xE1 LATIN SMALL LETTER A WITH ACUTE */,
462 0x00E2 /* 0xE2 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
464 0x00E4 /* 0xE4 LATIN SMALL LETTER A WITH DIAERESIS */,
465 0x010B /* 0xE5 LATIN SMALL LETTER C WITH DOT ABOVE */,
466 0x0109 /* 0xE6 LATIN SMALL LETTER C WITH CIRCUMFLEX */,
467 0x00E7 /* 0xE7 LATIN SMALL LETTER C WITH CEDILLA */,
468 0x00E8 /* 0xE8 LATIN SMALL LETTER E WITH GRAVE */,
469 0x00E9 /* 0xE9 LATIN SMALL LETTER E WITH ACUTE */,
470 0x00EA /* 0xEA LATIN SMALL LETTER E WITH CIRCUMFLEX */,
471 0x00EB /* 0xEB LATIN SMALL LETTER E WITH DIAERESIS */,
472 0x00EC /* 0xEC LATIN SMALL LETTER I WITH GRAVE */,
473 0x00ED /* 0xED LATIN SMALL LETTER I WITH ACUTE */,
474 0x00EE /* 0xEE LATIN SMALL LETTER I WITH CIRCUMFLEX */,
475 0x00EF /* 0xEF LATIN SMALL LETTER I WITH DIAERESIS */,
477 0x00F1 /* 0xF1 LATIN SMALL LETTER N WITH TILDE */,
478 0x00F2 /* 0xF2 LATIN SMALL LETTER O WITH GRAVE */,
479 0x00F3 /* 0xF3 LATIN SMALL LETTER O WITH ACUTE */,
480 0x00F4 /* 0xF4 LATIN SMALL LETTER O WITH CIRCUMFLEX */,
481 0x0121 /* 0xF5 LATIN SMALL LETTER G WITH DOT ABOVE */,
482 0x00F6 /* 0xF6 LATIN SMALL LETTER O WITH DIAERESIS */,
483 0x00F7 /* 0xF7 DIVISION SIGN */,
484 0x011D /* 0xF8 LATIN SMALL LETTER G WITH CIRCUMFLEX */,
485 0x00F9 /* 0xF9 LATIN SMALL LETTER U WITH GRAVE */,
486 0x00FA /* 0xFA LATIN SMALL LETTER U WITH ACUTE */,
487 0x00FB /* 0xFB LATIN SMALL LETTER U WITH CIRCUMFLEX */,
488 0x00FC /* 0xFC LATIN SMALL LETTER U WITH DIAERESIS */,
489 0x016D /* 0xFD LATIN SMALL LETTER U WITH BREVE */,
490 0x015D /* 0xFE LATIN SMALL LETTER S WITH CIRCUMFLEX */,
491 0x02D9 /* 0xFF DOT ABOVE */
494 Emchar latin_iso8859_4_to_ucs[96] =
496 0x00A0 /* 0xA0 NO-BREAK SPACE */,
497 0x0104 /* 0xA1 LATIN CAPITAL LETTER A WITH OGONEK */,
498 0x0138 /* 0xA2 LATIN SMALL LETTER KRA */,
499 0x0156 /* 0xA3 LATIN CAPITAL LETTER R WITH CEDILLA */,
500 0x00A4 /* 0xA4 CURRENCY SIGN */,
501 0x0128 /* 0xA5 LATIN CAPITAL LETTER I WITH TILDE */,
502 0x013B /* 0xA6 LATIN CAPITAL LETTER L WITH CEDILLA */,
503 0x00A7 /* 0xA7 SECTION SIGN */,
504 0x00A8 /* 0xA8 DIAERESIS */,
505 0x0160 /* 0xA9 LATIN CAPITAL LETTER S WITH CARON */,
506 0x0112 /* 0xAA LATIN CAPITAL LETTER E WITH MACRON */,
507 0x0122 /* 0xAB LATIN CAPITAL LETTER G WITH CEDILLA */,
508 0x0166 /* 0xAC LATIN CAPITAL LETTER T WITH STROKE */,
509 0x00AD /* 0xAD SOFT HYPHEN */,
510 0x017D /* 0xAE LATIN CAPITAL LETTER Z WITH CARON */,
511 0x00AF /* 0xAF MACRON */,
512 0x00B0 /* 0xB0 DEGREE SIGN */,
513 0x0105 /* 0xB1 LATIN SMALL LETTER A WITH OGONEK */,
514 0x02DB /* 0xB2 OGONEK */,
515 0x0157 /* 0xB3 LATIN SMALL LETTER R WITH CEDILLA */,
516 0x00B4 /* 0xB4 ACUTE ACCENT */,
517 0x0129 /* 0xB5 LATIN SMALL LETTER I WITH TILDE */,
518 0x013C /* 0xB6 LATIN SMALL LETTER L WITH CEDILLA */,
519 0x02C7 /* 0xB7 CARON */,
520 0x00B8 /* 0xB8 CEDILLA */,
521 0x0161 /* 0xB9 LATIN SMALL LETTER S WITH CARON */,
522 0x0113 /* 0xBA LATIN SMALL LETTER E WITH MACRON */,
523 0x0123 /* 0xBB LATIN SMALL LETTER G WITH CEDILLA */,
524 0x0167 /* 0xBC LATIN SMALL LETTER T WITH STROKE */,
525 0x014A /* 0xBD LATIN CAPITAL LETTER ENG */,
526 0x017E /* 0xBE LATIN SMALL LETTER Z WITH CARON */,
527 0x014B /* 0xBF LATIN SMALL LETTER ENG */,
528 0x0100 /* 0xC0 LATIN CAPITAL LETTER A WITH MACRON */,
529 0x00C1 /* 0xC1 LATIN CAPITAL LETTER A WITH ACUTE */,
530 0x00C2 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
531 0x00C3 /* 0xC3 LATIN CAPITAL LETTER A WITH TILDE */,
532 0x00C4 /* 0xC4 LATIN CAPITAL LETTER A WITH DIAERESIS */,
533 0x00C5 /* 0xC5 LATIN CAPITAL LETTER A WITH RING ABOVE */,
534 0x00C6 /* 0xC6 LATIN CAPITAL LETTER AE */,
535 0x012E /* 0xC7 LATIN CAPITAL LETTER I WITH OGONEK */,
536 0x010C /* 0xC8 LATIN CAPITAL LETTER C WITH CARON */,
537 0x00C9 /* 0xC9 LATIN CAPITAL LETTER E WITH ACUTE */,
538 0x0118 /* 0xCA LATIN CAPITAL LETTER E WITH OGONEK */,
539 0x00CB /* 0xCB LATIN CAPITAL LETTER E WITH DIAERESIS */,
540 0x0116 /* 0xCC LATIN CAPITAL LETTER E WITH DOT ABOVE */,
541 0x00CD /* 0xCD LATIN CAPITAL LETTER I WITH ACUTE */,
542 0x00CE /* 0xCE LATIN CAPITAL LETTER I WITH CIRCUMFLEX */,
543 0x012A /* 0xCF LATIN CAPITAL LETTER I WITH MACRON */,
544 0x0110 /* 0xD0 LATIN CAPITAL LETTER D WITH STROKE */,
545 0x0145 /* 0xD1 LATIN CAPITAL LETTER N WITH CEDILLA */,
546 0x014C /* 0xD2 LATIN CAPITAL LETTER O WITH MACRON */,
547 0x0136 /* 0xD3 LATIN CAPITAL LETTER K WITH CEDILLA */,
548 0x00D4 /* 0xD4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
549 0x00D5 /* 0xD5 LATIN CAPITAL LETTER O WITH TILDE */,
550 0x00D6 /* 0xD6 LATIN CAPITAL LETTER O WITH DIAERESIS */,
551 0x00D7 /* 0xD7 MULTIPLICATION SIGN */,
552 0x00D8 /* 0xD8 LATIN CAPITAL LETTER O WITH STROKE */,
553 0x0172 /* 0xD9 LATIN CAPITAL LETTER U WITH OGONEK */,
554 0x00DA /* 0xDA LATIN CAPITAL LETTER U WITH ACUTE */,
555 0x00DB /* 0xDB LATIN CAPITAL LETTER U WITH CIRCUMFLEX */,
556 0x00DC /* 0xDC LATIN CAPITAL LETTER U WITH DIAERESIS */,
557 0x0168 /* 0xDD LATIN CAPITAL LETTER U WITH TILDE */,
558 0x016A /* 0xDE LATIN CAPITAL LETTER U WITH MACRON */,
559 0x00DF /* 0xDF LATIN SMALL LETTER SHARP S */,
560 0x0101 /* 0xE0 LATIN SMALL LETTER A WITH MACRON */,
561 0x00E1 /* 0xE1 LATIN SMALL LETTER A WITH ACUTE */,
562 0x00E2 /* 0xE2 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
563 0x00E3 /* 0xE3 LATIN SMALL LETTER A WITH TILDE */,
564 0x00E4 /* 0xE4 LATIN SMALL LETTER A WITH DIAERESIS */,
565 0x00E5 /* 0xE5 LATIN SMALL LETTER A WITH RING ABOVE */,
566 0x00E6 /* 0xE6 LATIN SMALL LETTER AE */,
567 0x012F /* 0xE7 LATIN SMALL LETTER I WITH OGONEK */,
568 0x010D /* 0xE8 LATIN SMALL LETTER C WITH CARON */,
569 0x00E9 /* 0xE9 LATIN SMALL LETTER E WITH ACUTE */,
570 0x0119 /* 0xEA LATIN SMALL LETTER E WITH OGONEK */,
571 0x00EB /* 0xEB LATIN SMALL LETTER E WITH DIAERESIS */,
572 0x0117 /* 0xEC LATIN SMALL LETTER E WITH DOT ABOVE */,
573 0x00ED /* 0xED LATIN SMALL LETTER I WITH ACUTE */,
574 0x00EE /* 0xEE LATIN SMALL LETTER I WITH CIRCUMFLEX */,
575 0x012B /* 0xEF LATIN SMALL LETTER I WITH MACRON */,
576 0x0111 /* 0xF0 LATIN SMALL LETTER D WITH STROKE */,
577 0x0146 /* 0xF1 LATIN SMALL LETTER N WITH CEDILLA */,
578 0x014D /* 0xF2 LATIN SMALL LETTER O WITH MACRON */,
579 0x0137 /* 0xF3 LATIN SMALL LETTER K WITH CEDILLA */,
580 0x00F4 /* 0xF4 LATIN SMALL LETTER O WITH CIRCUMFLEX */,
581 0x00F5 /* 0xF5 LATIN SMALL LETTER O WITH TILDE */,
582 0x00F6 /* 0xF6 LATIN SMALL LETTER O WITH DIAERESIS */,
583 0x00F7 /* 0xF7 DIVISION SIGN */,
584 0x00F8 /* 0xF8 LATIN SMALL LETTER O WITH STROKE */,
585 0x0173 /* 0xF9 LATIN SMALL LETTER U WITH OGONEK */,
586 0x00FA /* 0xFA LATIN SMALL LETTER U WITH ACUTE */,
587 0x00FB /* 0xFB LATIN SMALL LETTER U WITH CIRCUMFLEX */,
588 0x00FC /* 0xFC LATIN SMALL LETTER U WITH DIAERESIS */,
589 0x0169 /* 0xFD LATIN SMALL LETTER U WITH TILDE */,
590 0x016B /* 0xFE LATIN SMALL LETTER U WITH MACRON */,
591 0x02D9 /* 0xFF DOT ABOVE */
594 Emchar latin_iso8859_9_to_ucs[96] =
596 0x00A0 /* 0xA0 NO-BREAK SPACE */,
597 0x00A1 /* 0xA1 INVERTED EXCLAMATION MARK */,
598 0x00A2 /* 0xA2 CENT SIGN */,
599 0x00A3 /* 0xA3 POUND SIGN */,
600 0x00A4 /* 0xA4 CURRENCY SIGN */,
601 0x00A5 /* 0xA5 YEN SIGN */,
602 0x00A6 /* 0xA6 BROKEN BAR */,
603 0x00A7 /* 0xA7 SECTION SIGN */,
604 0x00A8 /* 0xA8 DIAERESIS */,
605 0x00A9 /* 0xA9 COPYRIGHT SIGN */,
606 0x00AA /* 0xAA FEMININE ORDINAL INDICATOR */,
607 0x00AB /* 0xAB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK */,
608 0x00AC /* 0xAC NOT SIGN */,
609 0x00AD /* 0xAD SOFT HYPHEN */,
610 0x00AE /* 0xAE REGISTERED SIGN */,
611 0x00AF /* 0xAF MACRON */,
612 0x00B0 /* 0xB0 DEGREE SIGN */,
613 0x00B1 /* 0xB1 PLUS-MINUS SIGN */,
614 0x00B2 /* 0xB2 SUPERSCRIPT TWO */,
615 0x00B3 /* 0xB3 SUPERSCRIPT THREE */,
616 0x00B4 /* 0xB4 ACUTE ACCENT */,
617 0x00B5 /* 0xB5 MICRO SIGN */,
618 0x00B6 /* 0xB6 PILCROW SIGN */,
619 0x00B7 /* 0xB7 MIDDLE DOT */,
620 0x00B8 /* 0xB8 CEDILLA */,
621 0x00B9 /* 0xB9 SUPERSCRIPT ONE */,
622 0x00BA /* 0xBA MASCULINE ORDINAL INDICATOR */,
623 0x00BB /* 0xBB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK */,
624 0x00BC /* 0xBC VULGAR FRACTION ONE QUARTER */,
625 0x00BD /* 0xBD VULGAR FRACTION ONE HALF */,
626 0x00BE /* 0xBE VULGAR FRACTION THREE QUARTERS */,
627 0x00BF /* 0xBF INVERTED QUESTION MARK */,
628 0x00C0 /* 0xC0 LATIN CAPITAL LETTER A WITH GRAVE */,
629 0x00C1 /* 0xC1 LATIN CAPITAL LETTER A WITH ACUTE */,
630 0x00C2 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
631 0x00C3 /* 0xC3 LATIN CAPITAL LETTER A WITH TILDE */,
632 0x00C4 /* 0xC4 LATIN CAPITAL LETTER A WITH DIAERESIS */,
633 0x00C5 /* 0xC5 LATIN CAPITAL LETTER A WITH RING ABOVE */,
634 0x00C6 /* 0xC6 LATIN CAPITAL LETTER AE */,
635 0x00C7 /* 0xC7 LATIN CAPITAL LETTER C WITH CEDILLA */,
636 0x00C8 /* 0xC8 LATIN CAPITAL LETTER E WITH GRAVE */,
637 0x00C9 /* 0xC9 LATIN CAPITAL LETTER E WITH ACUTE */,
638 0x00CA /* 0xCA LATIN CAPITAL LETTER E WITH CIRCUMFLEX */,
639 0x00CB /* 0xCB LATIN CAPITAL LETTER E WITH DIAERESIS */,
640 0x00CC /* 0xCC LATIN CAPITAL LETTER I WITH GRAVE */,
641 0x00CD /* 0xCD LATIN CAPITAL LETTER I WITH ACUTE */,
642 0x00CE /* 0xCE LATIN CAPITAL LETTER I WITH CIRCUMFLEX */,
643 0x00CF /* 0xCF LATIN CAPITAL LETTER I WITH DIAERESIS */,
644 0x011E /* 0xD0 LATIN CAPITAL LETTER G WITH BREVE */,
645 0x00D1 /* 0xD1 LATIN CAPITAL LETTER N WITH TILDE */,
646 0x00D2 /* 0xD2 LATIN CAPITAL LETTER O WITH GRAVE */,
647 0x00D3 /* 0xD3 LATIN CAPITAL LETTER O WITH ACUTE */,
648 0x00D4 /* 0xD4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
649 0x00D5 /* 0xD5 LATIN CAPITAL LETTER O WITH TILDE */,
650 0x00D6 /* 0xD6 LATIN CAPITAL LETTER O WITH DIAERESIS */,
651 0x00D7 /* 0xD7 MULTIPLICATION SIGN */,
652 0x00D8 /* 0xD8 LATIN CAPITAL LETTER O WITH STROKE */,
653 0x00D9 /* 0xD9 LATIN CAPITAL LETTER U WITH GRAVE */,
654 0x00DA /* 0xDA LATIN CAPITAL LETTER U WITH ACUTE */,
655 0x00DB /* 0xDB LATIN CAPITAL LETTER U WITH CIRCUMFLEX */,
656 0x00DC /* 0xDC LATIN CAPITAL LETTER U WITH DIAERESIS */,
657 0x0130 /* 0xDD LATIN CAPITAL LETTER I WITH DOT ABOVE */,
658 0x015E /* 0xDE LATIN CAPITAL LETTER S WITH CEDILLA */,
659 0x00DF /* 0xDF LATIN SMALL LETTER SHARP S */,
660 0x00E0 /* 0xE0 LATIN SMALL LETTER A WITH GRAVE */,
661 0x00E1 /* 0xE1 LATIN SMALL LETTER A WITH ACUTE */,
662 0x00E2 /* 0xE2 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
663 0x00E3 /* 0xE3 LATIN SMALL LETTER A WITH TILDE */,
664 0x00E4 /* 0xE4 LATIN SMALL LETTER A WITH DIAERESIS */,
665 0x00E5 /* 0xE5 LATIN SMALL LETTER A WITH RING ABOVE */,
666 0x00E6 /* 0xE6 LATIN SMALL LETTER AE */,
667 0x00E7 /* 0xE7 LATIN SMALL LETTER C WITH CEDILLA */,
668 0x00E8 /* 0xE8 LATIN SMALL LETTER E WITH GRAVE */,
669 0x00E9 /* 0xE9 LATIN SMALL LETTER E WITH ACUTE */,
670 0x00EA /* 0xEA LATIN SMALL LETTER E WITH CIRCUMFLEX */,
671 0x00EB /* 0xEB LATIN SMALL LETTER E WITH DIAERESIS */,
672 0x00EC /* 0xEC LATIN SMALL LETTER I WITH GRAVE */,
673 0x00ED /* 0xED LATIN SMALL LETTER I WITH ACUTE */,
674 0x00EE /* 0xEE LATIN SMALL LETTER I WITH CIRCUMFLEX */,
675 0x00EF /* 0xEF LATIN SMALL LETTER I WITH DIAERESIS */,
676 0x011F /* 0xF0 LATIN SMALL LETTER G WITH BREVE */,
677 0x00F1 /* 0xF1 LATIN SMALL LETTER N WITH TILDE */,
678 0x00F2 /* 0xF2 LATIN SMALL LETTER O WITH GRAVE */,
679 0x00F3 /* 0xF3 LATIN SMALL LETTER O WITH ACUTE */,
680 0x00F4 /* 0xF4 LATIN SMALL LETTER O WITH CIRCUMFLEX */,
681 0x00F5 /* 0xF5 LATIN SMALL LETTER O WITH TILDE */,
682 0x00F6 /* 0xF6 LATIN SMALL LETTER O WITH DIAERESIS */,
683 0x00F7 /* 0xF7 DIVISION SIGN */,
684 0x00F8 /* 0xF8 LATIN SMALL LETTER O WITH STROKE */,
685 0x00F9 /* 0xF9 LATIN SMALL LETTER U WITH GRAVE */,
686 0x00FA /* 0xFA LATIN SMALL LETTER U WITH ACUTE */,
687 0x00FB /* 0xFB LATIN SMALL LETTER U WITH CIRCUMFLEX */,
688 0x00FC /* 0xFC LATIN SMALL LETTER U WITH DIAERESIS */,
689 0x0131 /* 0xFD LATIN SMALL LETTER DOTLESS I */,
690 0x015F /* 0xFE LATIN SMALL LETTER S WITH CEDILLA */,
691 0x00FF /* 0xFF LATIN SMALL LETTER Y WITH DIAERESIS */,
694 Emchar latin_viscii_lower_to_ucs[96] =
794 Emchar latin_viscii_upper_to_ucs[96] =
894 Emchar latin_tcvn5712_to_ucs[96] =
896 0x00A0 /* 0xA0 NO-BREAK SPACE */,
897 0x0102 /* 0xA1 LATIN CAPITAL LETTER A WITH BREVE */,
898 0x00C2 /* 0xA2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
899 0x00CA /* 0xA3 LATIN CAPITAL LETTER E WITH CIRCUMFLEX */,
900 0x00D4 /* 0xA4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
901 0x01A0 /* 0xA5 LATIN CAPITAL LETTER O WITH HORN */,
902 0x01AF /* 0xA6 LATIN CAPITAL LETTER U WITH HORN */,
903 0x0110 /* 0xA7 LATIN CAPITAL LETTER D WITH STROKE */,
904 0x0103 /* 0xA8 LATIN SMALL LETTER A WITH BREVE */,
905 0x00E2 /* 0xA9 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
906 0x00EA /* 0xAA LATIN SMALL LETTER E WITH CIRCUMFLEX */,
907 0x00F4 /* 0xAB LATIN SMALL LETTER O WITH CIRCUMFLEX */,
908 0x01A1 /* 0xAC LATIN SMALL LETTER O WITH HORN */,
909 0x01B0 /* 0xAD LATIN SMALL LETTER U WITH HORN */,
910 0x0111 /* 0xAE LATIN SMALL LETTER D WITH STROKE */,
911 0x1EB0 /* 0xAF LATIN CAPITAL LETTER A WITH BREVE AND GRAVE */,
912 0x0300 /* 0xB0 COMBINING GRAVE ACCENT */,
913 0x0309 /* 0xB1 COMBINING HOOK ABOVE */,
914 0x0303 /* 0xB2 COMBINING TILDE */,
915 0x0301 /* 0xB3 COMBINING ACUTE ACCENT */,
916 0x0323 /* 0xB4 COMBINING DOT BELOW */,
917 0x00E0 /* 0xB5 LATIN SMALL LETTER A WITH GRAVE */,
918 0x1EA3 /* 0xB6 LATIN SMALL LETTER A WITH HOOK ABOVE */,
919 0x00E3 /* 0xB7 LATIN SMALL LETTER A WITH TILDE */,
920 0x00E1 /* 0xB8 LATIN SMALL LETTER A WITH ACUTE */,
921 0x1EA1 /* 0xB9 LATIN SMALL LETTER A WITH DOT BELOW */,
922 0x1EB2 /* 0xBA LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE */,
923 0x1EB1 /* 0xBB LATIN SMALL LETTER A WITH BREVE AND GRAVE */,
924 0x1EB3 /* 0xBC LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE */,
925 0x1EB5 /* 0xBD LATIN SMALL LETTER A WITH BREVE AND TILDE */,
926 0x1EAF /* 0xBE LATIN SMALL LETTER A WITH BREVE AND ACUTE */,
927 0x1EB4 /* 0xBF LATIN CAPITAL LETTER A WITH BREVE AND TILDE */,
928 0x1EAE /* 0xC0 LATIN CAPITAL LETTER A WITH BREVE AND ACUTE */,
929 0x1EA6 /* 0xC1 LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE */,
930 0x1EA8 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE */,
931 0x1EAA /* 0xC3 LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE */,
932 0x1EA4 /* 0xC4 LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE */,
933 0x1EC0 /* 0xC5 LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE */,
934 0x1EB7 /* 0xC6 LATIN SMALL LETTER A WITH BREVE AND DOT BELOW */,
935 0x1EA7 /* 0xC7 LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE */,
936 0x1EA9 /* 0xC8 LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE */,
937 0x1EAB /* 0xC9 LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE */,
938 0x1EA5 /* 0xCA LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE */,
939 0x1EAD /* 0xCB LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW */,
940 0x00E8 /* 0xCC LATIN SMALL LETTER E WITH GRAVE */,
941 0x1EC2 /* 0xCD LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE */,
942 0x1EBB /* 0xCE LATIN SMALL LETTER E WITH HOOK ABOVE */,
943 0x1EBD /* 0xCF LATIN SMALL LETTER E WITH TILDE */,
944 0x00E9 /* 0xD0 LATIN SMALL LETTER E WITH ACUTE */,
945 0x1EB9 /* 0xD1 LATIN SMALL LETTER E WITH DOT BELOW */,
946 0x1EC1 /* 0xD2 LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE */,
947 0x1EC3 /* 0xD3 LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE */,
948 0x1EC5 /* 0xD4 LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE */,
949 0x1EBF /* 0xD5 LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE */,
950 0x1EC7 /* 0xD6 LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW */,
951 0x00EC /* 0xD7 LATIN SMALL LETTER I WITH GRAVE */,
952 0x1EC9 /* 0xD8 LATIN SMALL LETTER I WITH HOOK ABOVE */,
953 0x1EC4 /* 0xD9 LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE */,
954 0x1EBE /* 0xDA LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE */,
955 0x1ED2 /* 0xDB LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE */,
956 0x0129 /* 0xDC LATIN SMALL LETTER I WITH TILDE */,
957 0x00ED /* 0xDD LATIN SMALL LETTER I WITH ACUTE */,
958 0x1ECB /* 0xDE LATIN SMALL LETTER I WITH DOT BELOW */,
959 0x00F2 /* 0xDF LATIN SMALL LETTER O WITH GRAVE */,
960 0x1ED4 /* 0xE0 LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE */,
961 0x1ECF /* 0xE1 LATIN SMALL LETTER O WITH HOOK ABOVE */,
962 0x00F5 /* 0xE2 LATIN SMALL LETTER O WITH TILDE */,
963 0x00F3 /* 0xE3 LATIN SMALL LETTER O WITH ACUTE */,
964 0x1ECD /* 0xE4 LATIN SMALL LETTER O WITH DOT BELOW */,
965 0x1ED3 /* 0xE5 LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE */,
966 0x1ED5 /* 0xE6 LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE */,
967 0x1ED7 /* 0xE7 LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE */,
968 0x1ED1 /* 0xE8 LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE */,
969 0x1ED9 /* 0xE9 LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW */,
970 0x1EDD /* 0xEA LATIN SMALL LETTER O WITH HORN AND GRAVE */,
971 0x1EDF /* 0xEB LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE */,
972 0x1EE1 /* 0xEC LATIN SMALL LETTER O WITH HORN AND TILDE */,
973 0x1EDB /* 0xED LATIN SMALL LETTER O WITH HORN AND ACUTE */,
974 0x1EE3 /* 0xEE LATIN SMALL LETTER O WITH HORN AND DOT BELOW */,
975 0x00F9 /* 0xEF LATIN SMALL LETTER U WITH GRAVE */,
976 0x1ED6 /* 0xF0 LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE */,
977 0x1EE7 /* 0xF1 LATIN SMALL LETTER U WITH HOOK ABOVE */,
978 0x0169 /* 0xF2 LATIN SMALL LETTER U WITH TILDE */,
979 0x00FA /* 0xF3 LATIN SMALL LETTER U WITH ACUTE */,
980 0x1EE5 /* 0xF4 LATIN SMALL LETTER U WITH DOT BELOW */,
981 0x1EEB /* 0xF5 LATIN SMALL LETTER U WITH HORN AND GRAVE */,
982 0x1EED /* 0xF6 LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE */,
983 0x1EEF /* 0xF7 LATIN SMALL LETTER U WITH HORN AND TILDE */,
984 0x1EE9 /* 0xF8 LATIN SMALL LETTER U WITH HORN AND ACUTE */,
985 0x1EF1 /* 0xF9 LATIN SMALL LETTER U WITH HORN AND DOT BELOW */,
986 0x1EF3 /* 0xFA LATIN SMALL LETTER Y WITH GRAVE */,
987 0x1EF7 /* 0xFB LATIN SMALL LETTER Y WITH HOOK ABOVE */,
988 0x1EF9 /* 0xFC LATIN SMALL LETTER Y WITH TILDE */,
989 0x00FD /* 0xFD LATIN SMALL LETTER Y WITH ACUTE */,
990 0x1EF5 /* 0xFE LATIN SMALL LETTER Y WITH DOT BELOW */,
991 0x1ED0 /* 0xFF LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE */
994 Lisp_Object Vutf_2000_version;
998 int leading_code_private_11;
1001 Lisp_Object Qcharsetp;
1003 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1004 Lisp_Object Qregistry, Qfinal, Qgraphic;
1005 Lisp_Object Qdirection;
1006 Lisp_Object Qreverse_direction_charset;
1007 Lisp_Object Qleading_byte;
1008 Lisp_Object Qshort_name, Qlong_name;
1022 Qcyrillic_iso8859_5,
1024 Qjapanese_jisx0208_1978,
1029 Qchinese_cns11643_1,
1030 Qchinese_cns11643_2,
1033 Qlatin_viscii_lower,
1034 Qlatin_viscii_upper,
1040 Lisp_Object Ql2r, Qr2l;
1042 Lisp_Object Vcharset_hash_table;
1044 static Charset_ID next_allocated_1_byte_leading_byte;
1045 static Charset_ID next_allocated_2_byte_leading_byte;
1047 /* Composite characters are characters constructed by overstriking two
1048 or more regular characters.
1050 1) The old Mule implementation involves storing composite characters
1051 in a buffer as a tag followed by all of the actual characters
1052 used to make up the composite character. I think this is a bad
1053 idea; it greatly complicates code that wants to handle strings
1054 one character at a time because it has to deal with the possibility
1055 of great big ungainly characters. It's much more reasonable to
1056 simply store an index into a table of composite characters.
1058 2) The current implementation only allows for 16,384 separate
1059 composite characters over the lifetime of the XEmacs process.
1060 This could become a potential problem if the user
1061 edited lots of different files that use composite characters.
1062 Due to FSF bogosity, increasing the number of allowable
1063 composite characters under Mule would decrease the number
1064 of possible faces that can exist. Mule already has shrunk
1065 this to 2048, and further shrinkage would become uncomfortable.
1066 No such problems exist in XEmacs.
1068 Composite characters could be represented as 0x80 C1 C2 C3,
1069 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1070 for slightly under 2^20 (one million) composite characters
1071 over the XEmacs process lifetime, and you only need to
1072 increase the size of a Mule character from 19 to 21 bits.
1073 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1074 85 million (slightly over 2^26) composite characters. */
1077 /************************************************************************/
1078 /* Basic Emchar functions */
1079 /************************************************************************/
1081 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1082 string in STR. Returns the number of bytes stored.
1083 Do not call this directly. Use the macro set_charptr_emchar() instead.
1087 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1093 Lisp_Object charset;
1102 else if ( c <= 0x7ff )
1104 *p++ = (c >> 6) | 0xc0;
1105 *p++ = (c & 0x3f) | 0x80;
1107 else if ( c <= 0xffff )
1109 *p++ = (c >> 12) | 0xe0;
1110 *p++ = ((c >> 6) & 0x3f) | 0x80;
1111 *p++ = (c & 0x3f) | 0x80;
1113 else if ( c <= 0x1fffff )
1115 *p++ = (c >> 18) | 0xf0;
1116 *p++ = ((c >> 12) & 0x3f) | 0x80;
1117 *p++ = ((c >> 6) & 0x3f) | 0x80;
1118 *p++ = (c & 0x3f) | 0x80;
1120 else if ( c <= 0x3ffffff )
1122 *p++ = (c >> 24) | 0xf8;
1123 *p++ = ((c >> 18) & 0x3f) | 0x80;
1124 *p++ = ((c >> 12) & 0x3f) | 0x80;
1125 *p++ = ((c >> 6) & 0x3f) | 0x80;
1126 *p++ = (c & 0x3f) | 0x80;
1130 *p++ = (c >> 30) | 0xfc;
1131 *p++ = ((c >> 24) & 0x3f) | 0x80;
1132 *p++ = ((c >> 18) & 0x3f) | 0x80;
1133 *p++ = ((c >> 12) & 0x3f) | 0x80;
1134 *p++ = ((c >> 6) & 0x3f) | 0x80;
1135 *p++ = (c & 0x3f) | 0x80;
1138 BREAKUP_CHAR (c, charset, c1, c2);
1139 lb = CHAR_LEADING_BYTE (c);
1140 if (LEADING_BYTE_PRIVATE_P (lb))
1141 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1143 if (EQ (charset, Vcharset_control_1))
1152 /* Return the first character from a Mule-encoded string in STR,
1153 assuming it's non-ASCII. Do not call this directly.
1154 Use the macro charptr_emchar() instead. */
1157 non_ascii_charptr_emchar (CONST Bufbyte *str)
1170 else if ( b >= 0xf8 )
1175 else if ( b >= 0xf0 )
1180 else if ( b >= 0xe0 )
1185 else if ( b >= 0xc0 )
1195 for( ; len > 0; len-- )
1198 ch = ( ch << 6 ) | ( b & 0x3f );
1202 Bufbyte i0 = *str, i1, i2 = 0;
1203 Lisp_Object charset;
1205 if (i0 == LEADING_BYTE_CONTROL_1)
1206 return (Emchar) (*++str - 0x20);
1208 if (LEADING_BYTE_PREFIX_P (i0))
1213 charset = CHARSET_BY_LEADING_BYTE (i0);
1214 if (XCHARSET_DIMENSION (charset) == 2)
1217 return MAKE_CHAR (charset, i1, i2);
1221 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1222 Do not call this directly. Use the macro valid_char_p() instead. */
1226 non_ascii_valid_char_p (Emchar ch)
1230 /* Must have only lowest 19 bits set */
1234 f1 = CHAR_FIELD1 (ch);
1235 f2 = CHAR_FIELD2 (ch);
1236 f3 = CHAR_FIELD3 (ch);
1240 Lisp_Object charset;
1242 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1243 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1244 f2 > MAX_CHAR_FIELD2_PRIVATE)
1249 if (f3 != 0x20 && f3 != 0x7F)
1253 NOTE: This takes advantage of the fact that
1254 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1255 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1257 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1258 return (XCHARSET_CHARS (charset) == 96);
1262 Lisp_Object charset;
1264 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1265 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1266 f1 > MAX_CHAR_FIELD1_PRIVATE)
1268 if (f2 < 0x20 || f3 < 0x20)
1271 #ifdef ENABLE_COMPOSITE_CHARS
1272 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1274 if (UNBOUNDP (Fgethash (make_int (ch),
1275 Vcomposite_char_char2string_hash_table,
1280 #endif /* ENABLE_COMPOSITE_CHARS */
1282 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
1285 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1287 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1290 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1292 return (XCHARSET_CHARS (charset) == 96);
1298 /************************************************************************/
1299 /* Basic string functions */
1300 /************************************************************************/
1302 /* Copy the character pointed to by PTR into STR, assuming it's
1303 non-ASCII. Do not call this directly. Use the macro
1304 charptr_copy_char() instead. */
1307 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1309 Bufbyte *strptr = str;
1311 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1313 /* Notice fallthrough. */
1315 case 6: *++strptr = *ptr++;
1316 case 5: *++strptr = *ptr++;
1318 case 4: *++strptr = *ptr++;
1319 case 3: *++strptr = *ptr++;
1320 case 2: *++strptr = *ptr;
1325 return strptr + 1 - str;
1329 /************************************************************************/
1330 /* streams of Emchars */
1331 /************************************************************************/
1333 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1334 The functions below are not meant to be called directly; use
1335 the macros in insdel.h. */
1338 Lstream_get_emchar_1 (Lstream *stream, int ch)
1340 Bufbyte str[MAX_EMCHAR_LEN];
1341 Bufbyte *strptr = str;
1343 str[0] = (Bufbyte) ch;
1344 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1346 /* Notice fallthrough. */
1349 ch = Lstream_getc (stream);
1351 *++strptr = (Bufbyte) ch;
1353 ch = Lstream_getc (stream);
1355 *++strptr = (Bufbyte) ch;
1358 ch = Lstream_getc (stream);
1360 *++strptr = (Bufbyte) ch;
1362 ch = Lstream_getc (stream);
1364 *++strptr = (Bufbyte) ch;
1366 ch = Lstream_getc (stream);
1368 *++strptr = (Bufbyte) ch;
1373 return charptr_emchar (str);
1377 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1379 Bufbyte str[MAX_EMCHAR_LEN];
1380 Bytecount len = set_charptr_emchar (str, ch);
1381 return Lstream_write (stream, str, len);
1385 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1387 Bufbyte str[MAX_EMCHAR_LEN];
1388 Bytecount len = set_charptr_emchar (str, ch);
1389 Lstream_unread (stream, str, len);
1393 /************************************************************************/
1394 /* charset object */
1395 /************************************************************************/
1398 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
1400 struct Lisp_Charset *cs = XCHARSET (obj);
1402 markobj (cs->short_name);
1403 markobj (cs->long_name);
1404 markobj (cs->doc_string);
1405 markobj (cs->registry);
1406 markobj (cs->ccl_program);
1411 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1413 struct Lisp_Charset *cs = XCHARSET (obj);
1417 error ("printing unreadable object #<charset %s 0x%x>",
1418 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1421 write_c_string ("#<charset ", printcharfun);
1422 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1423 write_c_string (" ", printcharfun);
1424 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1425 write_c_string (" ", printcharfun);
1426 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1427 write_c_string (" ", printcharfun);
1428 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1429 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1430 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1431 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1432 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1434 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1435 CHARSET_COLUMNS (cs),
1436 CHARSET_GRAPHIC (cs),
1437 CHARSET_FINAL (cs));
1438 write_c_string (buf, printcharfun);
1439 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1440 sprintf (buf, " 0x%x>", cs->header.uid);
1441 write_c_string (buf, printcharfun);
1444 static const struct lrecord_description charset_description[] = {
1445 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1449 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1450 mark_charset, print_charset, 0, 0, 0,
1451 charset_description,
1452 struct Lisp_Charset);
1453 /* Make a new charset. */
1456 make_charset (Charset_ID id, Lisp_Object name,
1457 unsigned char type, unsigned char columns, unsigned char graphic,
1458 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1459 Lisp_Object long_name, Lisp_Object doc,
1461 Emchar* decoding_table,
1462 Emchar ucs_min, Emchar ucs_max, Emchar code_offset)
1465 struct Lisp_Charset *cs =
1466 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1467 XSETCHARSET (obj, cs);
1469 CHARSET_ID (cs) = id;
1470 CHARSET_NAME (cs) = name;
1471 CHARSET_SHORT_NAME (cs) = short_name;
1472 CHARSET_LONG_NAME (cs) = long_name;
1473 CHARSET_DIRECTION (cs) = direction;
1474 CHARSET_TYPE (cs) = type;
1475 CHARSET_COLUMNS (cs) = columns;
1476 CHARSET_GRAPHIC (cs) = graphic;
1477 CHARSET_FINAL (cs) = final;
1478 CHARSET_DOC_STRING (cs) = doc;
1479 CHARSET_REGISTRY (cs) = reg;
1480 CHARSET_CCL_PROGRAM (cs) = Qnil;
1481 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1483 CHARSET_DECODING_TABLE(cs) = decoding_table;
1484 CHARSET_UCS_MIN(cs) = ucs_min;
1485 CHARSET_UCS_MAX(cs) = ucs_max;
1486 CHARSET_CODE_OFFSET(cs) = code_offset;
1489 switch ( CHARSET_TYPE (cs) )
1491 case CHARSET_TYPE_94:
1492 CHARSET_DIMENSION (cs) = 1;
1493 CHARSET_CHARS (cs) = 94;
1495 if (decoding_table != NULL)
1498 CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table();
1499 for (i = 0; i < 94; i++)
1501 Emchar c = decoding_table[i];
1504 put_byte_from_character_table (c, i + 33,
1505 CHARSET_TO_BYTE1_TABLE(cs));
1509 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1510 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1513 case CHARSET_TYPE_96:
1514 CHARSET_DIMENSION (cs) = 1;
1515 CHARSET_CHARS (cs) = 96;
1517 if (decoding_table != NULL)
1520 CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table();
1521 for (i = 0; i < 96; i++)
1523 Emchar c = decoding_table[i];
1526 put_byte_from_character_table (c, i + 32,
1527 CHARSET_TO_BYTE1_TABLE(cs));
1531 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1532 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1535 case CHARSET_TYPE_94X94:
1536 CHARSET_DIMENSION (cs) = 2;
1537 CHARSET_CHARS (cs) = 94;
1539 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1540 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1543 case CHARSET_TYPE_96X96:
1544 CHARSET_DIMENSION (cs) = 2;
1545 CHARSET_CHARS (cs) = 96;
1547 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1548 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1552 case CHARSET_TYPE_128X128:
1553 CHARSET_DIMENSION (cs) = 2;
1554 CHARSET_CHARS (cs) = 128;
1556 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1557 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1560 case CHARSET_TYPE_256X256:
1561 CHARSET_DIMENSION (cs) = 2;
1562 CHARSET_CHARS (cs) = 256;
1564 CHARSET_TO_BYTE1_TABLE(cs) = NULL;
1565 CHARSET_TO_BYTE2_TABLE(cs) = NULL;
1572 if (id == LEADING_BYTE_ASCII)
1573 CHARSET_REP_BYTES (cs) = 1;
1575 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1577 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1582 /* some charsets do not have final characters. This includes
1583 ASCII, Control-1, Composite, and the two faux private
1586 assert (NILP (charset_by_attributes[type][final]));
1587 charset_by_attributes[type][final] = obj;
1589 assert (NILP (charset_by_attributes[type][final][direction]));
1590 charset_by_attributes[type][final][direction] = obj;
1594 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1595 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1598 /* official leading byte */
1599 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
1602 /* Some charsets are "faux" and don't have names or really exist at
1603 all except in the leading-byte table. */
1605 Fputhash (name, obj, Vcharset_hash_table);
1610 get_unallocated_leading_byte (int dimension)
1616 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1619 lb = next_allocated_1_byte_leading_byte++;
1623 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1626 lb = next_allocated_2_byte_leading_byte++;
1631 ("No more character sets free for this dimension",
1632 make_int (dimension));
1639 charset_get_byte1 (Lisp_Object charset, Emchar ch)
1641 Emchar_to_byte_table* table;
1644 if ((table = XCHARSET_TO_BYTE1_TABLE (charset)) != NULL)
1645 return get_byte_from_character_table (ch, table);
1646 else if ((CHARSET_UCS_MIN (XCHARSET (charset)) <= ch)
1647 && (ch <= CHARSET_UCS_MAX (XCHARSET (charset))))
1648 return ch - CHARSET_UCS_MIN (XCHARSET (charset))
1649 + CHARSET_CODE_OFFSET (XCHARSET (charset));
1650 else if (XCHARSET_DIMENSION (charset) == 1)
1652 if (XCHARSET_CHARS (charset) == 94)
1654 if (((d = ch - (MIN_CHAR_94
1655 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1659 else if (XCHARSET_CHARS (charset) == 96)
1661 if (((d = ch - (MIN_CHAR_96
1662 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1669 else if (XCHARSET_DIMENSION (charset) == 2)
1671 if (XCHARSET_CHARS (charset) == 94)
1673 if (((d = ch - (MIN_CHAR_94x94
1674 + (XCHARSET_FINAL (charset) - '0') * 94 * 94)) >= 0)
1676 return (d / 94) + 33;
1678 else if (XCHARSET_CHARS (charset) == 96)
1680 if (((d = ch - (MIN_CHAR_96x96
1681 + (XCHARSET_FINAL (charset) - '0') * 96 * 96)) >= 0)
1683 return (d / 96) + 32;
1690 charset_get_byte2 (Lisp_Object charset, Emchar ch)
1692 if (XCHARSET_DIMENSION (charset) == 1)
1696 Emchar_to_byte_table* table;
1698 if ((table = XCHARSET_TO_BYTE2_TABLE (charset)) != NULL)
1699 return get_byte_from_character_table (ch, table);
1700 else if (EQ (charset, Vcharset_ucs_bmp))
1701 return (ch >> 8) & 0xff;
1702 else if (XCHARSET_CHARS (charset) == 94)
1703 return (MIN_CHAR_94x94
1704 + (XCHARSET_FINAL (charset) - '0') * 94 * 94 <= ch)
1705 && (ch < MIN_CHAR_94x94
1706 + (XCHARSET_FINAL (charset) - '0' + 1) * 94 * 94) ?
1707 ((ch - MIN_CHAR_94x94) % 94) + 33 : 0;
1708 else /* if (XCHARSET_CHARS (charset) == 96) */
1709 return (MIN_CHAR_96x96
1710 + (XCHARSET_FINAL (charset) - '0') * 96 * 96 <= ch)
1711 && (ch < MIN_CHAR_96x96
1712 + (XCHARSET_FINAL (charset) - '0' + 1) * 96 * 96) ?
1713 ((ch - MIN_CHAR_96x96) % 96) + 32 : 0;
1717 Lisp_Object Vdefault_coded_charset_priority_list;
1721 /************************************************************************/
1722 /* Basic charset Lisp functions */
1723 /************************************************************************/
1725 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1726 Return non-nil if OBJECT is a charset.
1730 return CHARSETP (object) ? Qt : Qnil;
1733 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1734 Retrieve the charset of the given name.
1735 If CHARSET-OR-NAME is a charset object, it is simply returned.
1736 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1737 nil is returned. Otherwise the associated charset object is returned.
1741 if (CHARSETP (charset_or_name))
1742 return charset_or_name;
1744 CHECK_SYMBOL (charset_or_name);
1745 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1748 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1749 Retrieve the charset of the given name.
1750 Same as `find-charset' except an error is signalled if there is no such
1751 charset instead of returning nil.
1755 Lisp_Object charset = Ffind_charset (name);
1758 signal_simple_error ("No such charset", name);
1762 /* We store the charsets in hash tables with the names as the key and the
1763 actual charset object as the value. Occasionally we need to use them
1764 in a list format. These routines provide us with that. */
1765 struct charset_list_closure
1767 Lisp_Object *charset_list;
1771 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1772 void *charset_list_closure)
1774 /* This function can GC */
1775 struct charset_list_closure *chcl =
1776 (struct charset_list_closure*) charset_list_closure;
1777 Lisp_Object *charset_list = chcl->charset_list;
1779 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1783 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1784 Return a list of the names of all defined charsets.
1788 Lisp_Object charset_list = Qnil;
1789 struct gcpro gcpro1;
1790 struct charset_list_closure charset_list_closure;
1792 GCPRO1 (charset_list);
1793 charset_list_closure.charset_list = &charset_list;
1794 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1795 &charset_list_closure);
1798 return charset_list;
1801 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1802 Return the name of the given charset.
1806 return XCHARSET_NAME (Fget_charset (charset));
1809 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1810 Define a new character set.
1811 This function is for use with Mule support.
1812 NAME is a symbol, the name by which the character set is normally referred.
1813 DOC-STRING is a string describing the character set.
1814 PROPS is a property list, describing the specific nature of the
1815 character set. Recognized properties are:
1817 'short-name Short version of the charset name (ex: Latin-1)
1818 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1819 'registry A regular expression matching the font registry field for
1821 'dimension Number of octets used to index a character in this charset.
1822 Either 1 or 2. Defaults to 1.
1823 'columns Number of columns used to display a character in this charset.
1824 Only used in TTY mode. (Under X, the actual width of a
1825 character can be derived from the font used to display the
1826 characters.) If unspecified, defaults to the dimension
1827 (this is almost always the correct value).
1828 'chars Number of characters in each dimension (94 or 96).
1829 Defaults to 94. Note that if the dimension is 2, the
1830 character set thus described is 94x94 or 96x96.
1831 'final Final byte of ISO 2022 escape sequence. Must be
1832 supplied. Each combination of (DIMENSION, CHARS) defines a
1833 separate namespace for final bytes. Note that ISO
1834 2022 restricts the final byte to the range
1835 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1836 dimension == 2. Note also that final bytes in the range
1837 0x30 - 0x3F are reserved for user-defined (not official)
1839 'graphic 0 (use left half of font on output) or 1 (use right half
1840 of font on output). Defaults to 0. For example, for
1841 a font whose registry is ISO8859-1, the left half
1842 (octets 0x20 - 0x7F) is the `ascii' character set, while
1843 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1844 character set. With 'graphic set to 0, the octets
1845 will have their high bit cleared; with it set to 1,
1846 the octets will have their high bit set.
1847 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1849 'ccl-program A compiled CCL program used to convert a character in
1850 this charset into an index into the font. This is in
1851 addition to the 'graphic property. The CCL program
1852 is passed the octets of the character, with the high
1853 bit cleared and set depending upon whether the value
1854 of the 'graphic property is 0 or 1.
1856 (name, doc_string, props))
1858 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1859 int direction = CHARSET_LEFT_TO_RIGHT;
1861 Lisp_Object registry = Qnil;
1862 Lisp_Object charset;
1863 Lisp_Object rest, keyword, value;
1864 Lisp_Object ccl_program = Qnil;
1865 Lisp_Object short_name = Qnil, long_name = Qnil;
1867 CHECK_SYMBOL (name);
1868 if (!NILP (doc_string))
1869 CHECK_STRING (doc_string);
1871 charset = Ffind_charset (name);
1872 if (!NILP (charset))
1873 signal_simple_error ("Cannot redefine existing charset", name);
1875 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1877 if (EQ (keyword, Qshort_name))
1879 CHECK_STRING (value);
1883 if (EQ (keyword, Qlong_name))
1885 CHECK_STRING (value);
1889 else if (EQ (keyword, Qdimension))
1892 dimension = XINT (value);
1893 if (dimension < 1 || dimension > 2)
1894 signal_simple_error ("Invalid value for 'dimension", value);
1897 else if (EQ (keyword, Qchars))
1900 chars = XINT (value);
1901 if (chars != 94 && chars != 96)
1902 signal_simple_error ("Invalid value for 'chars", value);
1905 else if (EQ (keyword, Qcolumns))
1908 columns = XINT (value);
1909 if (columns != 1 && columns != 2)
1910 signal_simple_error ("Invalid value for 'columns", value);
1913 else if (EQ (keyword, Qgraphic))
1916 graphic = XINT (value);
1917 if (graphic < 0 || graphic > 1)
1918 signal_simple_error ("Invalid value for 'graphic", value);
1921 else if (EQ (keyword, Qregistry))
1923 CHECK_STRING (value);
1927 else if (EQ (keyword, Qdirection))
1929 if (EQ (value, Ql2r))
1930 direction = CHARSET_LEFT_TO_RIGHT;
1931 else if (EQ (value, Qr2l))
1932 direction = CHARSET_RIGHT_TO_LEFT;
1934 signal_simple_error ("Invalid value for 'direction", value);
1937 else if (EQ (keyword, Qfinal))
1939 CHECK_CHAR_COERCE_INT (value);
1940 final = XCHAR (value);
1941 if (final < '0' || final > '~')
1942 signal_simple_error ("Invalid value for 'final", value);
1945 else if (EQ (keyword, Qccl_program))
1947 CHECK_VECTOR (value);
1948 ccl_program = value;
1952 signal_simple_error ("Unrecognized property", keyword);
1956 error ("'final must be specified");
1957 if (dimension == 2 && final > 0x5F)
1959 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1963 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1965 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1967 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1968 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1970 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1977 /* id = CHARSET_ID_OFFSET_94 + final; */
1978 id = get_unallocated_leading_byte (dimension);
1980 else if (chars == 96)
1982 id = get_unallocated_leading_byte (dimension);
1989 else if (dimension == 2)
1993 id = get_unallocated_leading_byte (dimension);
1995 else if (chars == 96)
1997 id = get_unallocated_leading_byte (dimension);
2009 id = get_unallocated_leading_byte (dimension);
2012 if (NILP (doc_string))
2013 doc_string = build_string ("");
2015 if (NILP (registry))
2016 registry = build_string ("");
2018 if (NILP (short_name))
2019 XSETSTRING (short_name, XSYMBOL (name)->name);
2021 if (NILP (long_name))
2022 long_name = doc_string;
2025 columns = dimension;
2026 charset = make_charset (id, name, type, columns, graphic,
2027 final, direction, short_name, long_name,
2028 doc_string, registry,
2030 if (!NILP (ccl_program))
2031 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2035 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2037 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2038 NEW-NAME is the name of the new charset. Return the new charset.
2040 (charset, new_name))
2042 Lisp_Object new_charset = Qnil;
2043 int id, dimension, columns, graphic, final;
2044 int direction, type;
2045 Lisp_Object registry, doc_string, short_name, long_name;
2046 struct Lisp_Charset *cs;
2048 charset = Fget_charset (charset);
2049 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2050 signal_simple_error ("Charset already has reverse-direction charset",
2053 CHECK_SYMBOL (new_name);
2054 if (!NILP (Ffind_charset (new_name)))
2055 signal_simple_error ("Cannot redefine existing charset", new_name);
2057 cs = XCHARSET (charset);
2059 type = CHARSET_TYPE (cs);
2060 columns = CHARSET_COLUMNS (cs);
2061 dimension = CHARSET_DIMENSION (cs);
2062 id = get_unallocated_leading_byte (dimension);
2064 graphic = CHARSET_GRAPHIC (cs);
2065 final = CHARSET_FINAL (cs);
2066 direction = CHARSET_RIGHT_TO_LEFT;
2067 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2068 direction = CHARSET_LEFT_TO_RIGHT;
2069 doc_string = CHARSET_DOC_STRING (cs);
2070 short_name = CHARSET_SHORT_NAME (cs);
2071 long_name = CHARSET_LONG_NAME (cs);
2072 registry = CHARSET_REGISTRY (cs);
2074 new_charset = make_charset (id, new_name, type, columns,
2075 graphic, final, direction, short_name, long_name,
2076 doc_string, registry,
2078 CHARSET_DECODING_TABLE(cs),
2079 CHARSET_UCS_MIN(cs),
2080 CHARSET_UCS_MAX(cs),
2081 CHARSET_CODE_OFFSET(cs)
2087 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2088 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2093 /* #### Reverse direction charsets not yet implemented. */
2095 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2097 Return the reverse-direction charset parallel to CHARSET, if any.
2098 This is the charset with the same properties (in particular, the same
2099 dimension, number of characters per dimension, and final byte) as
2100 CHARSET but whose characters are displayed in the opposite direction.
2104 charset = Fget_charset (charset);
2105 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2109 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2110 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2111 If DIRECTION is omitted, both directions will be checked (left-to-right
2112 will be returned if character sets exist for both directions).
2114 (dimension, chars, final, direction))
2116 int dm, ch, fi, di = -1;
2118 Lisp_Object obj = Qnil;
2120 CHECK_INT (dimension);
2121 dm = XINT (dimension);
2122 if (dm < 1 || dm > 2)
2123 signal_simple_error ("Invalid value for DIMENSION", dimension);
2127 if (ch != 94 && ch != 96)
2128 signal_simple_error ("Invalid value for CHARS", chars);
2130 CHECK_CHAR_COERCE_INT (final);
2132 if (fi < '0' || fi > '~')
2133 signal_simple_error ("Invalid value for FINAL", final);
2135 if (EQ (direction, Ql2r))
2136 di = CHARSET_LEFT_TO_RIGHT;
2137 else if (EQ (direction, Qr2l))
2138 di = CHARSET_RIGHT_TO_LEFT;
2139 else if (!NILP (direction))
2140 signal_simple_error ("Invalid value for DIRECTION", direction);
2142 if (dm == 2 && fi > 0x5F)
2144 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2147 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2149 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2153 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2155 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2158 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2161 return XCHARSET_NAME (obj);
2165 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2166 Return short name of CHARSET.
2170 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2173 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2174 Return long name of CHARSET.
2178 return XCHARSET_LONG_NAME (Fget_charset (charset));
2181 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2182 Return description of CHARSET.
2186 return XCHARSET_DOC_STRING (Fget_charset (charset));
2189 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2190 Return dimension of CHARSET.
2194 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2197 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2198 Return property PROP of CHARSET.
2199 Recognized properties are those listed in `make-charset', as well as
2200 'name and 'doc-string.
2204 struct Lisp_Charset *cs;
2206 charset = Fget_charset (charset);
2207 cs = XCHARSET (charset);
2209 CHECK_SYMBOL (prop);
2210 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2211 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2212 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2213 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2214 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2215 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2216 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2217 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2218 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2219 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2220 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2221 if (EQ (prop, Qdirection))
2222 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2223 if (EQ (prop, Qreverse_direction_charset))
2225 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2229 return XCHARSET_NAME (obj);
2231 signal_simple_error ("Unrecognized charset property name", prop);
2232 return Qnil; /* not reached */
2235 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2236 Return charset identification number of CHARSET.
2240 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2243 /* #### We need to figure out which properties we really want to
2246 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2247 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2249 (charset, ccl_program))
2251 charset = Fget_charset (charset);
2252 CHECK_VECTOR (ccl_program);
2253 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2258 invalidate_charset_font_caches (Lisp_Object charset)
2260 /* Invalidate font cache entries for charset on all devices. */
2261 Lisp_Object devcons, concons, hash_table;
2262 DEVICE_LOOP_NO_BREAK (devcons, concons)
2264 struct device *d = XDEVICE (XCAR (devcons));
2265 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2266 if (!UNBOUNDP (hash_table))
2267 Fclrhash (hash_table);
2271 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
2272 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2273 Set the 'registry property of CHARSET to REGISTRY.
2275 (charset, registry))
2277 charset = Fget_charset (charset);
2278 CHECK_STRING (registry);
2279 XCHARSET_REGISTRY (charset) = registry;
2280 invalidate_charset_font_caches (charset);
2281 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2286 /************************************************************************/
2287 /* Lisp primitives for working with characters */
2288 /************************************************************************/
2290 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2291 Make a character from CHARSET and octets ARG1 and ARG2.
2292 ARG2 is required only for characters from two-dimensional charsets.
2293 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2294 character s with caron.
2296 (charset, arg1, arg2))
2298 struct Lisp_Charset *cs;
2300 int lowlim, highlim;
2302 charset = Fget_charset (charset);
2303 cs = XCHARSET (charset);
2305 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2306 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2308 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2310 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2311 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2314 /* It is useful (and safe, according to Olivier Galibert) to strip
2315 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2316 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2317 Latin 2 code of the character. */
2325 if (a1 < lowlim || a1 > highlim)
2326 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2328 if (CHARSET_DIMENSION (cs) == 1)
2332 ("Charset is of dimension one; second octet must be nil", arg2);
2333 return make_char (MAKE_CHAR (charset, a1, 0));
2342 a2 = XINT (arg2) & 0x7f;
2344 if (a2 < lowlim || a2 > highlim)
2345 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2347 return make_char (MAKE_CHAR (charset, a1, a2));
2350 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2351 Return the character set of char CH.
2355 CHECK_CHAR_COERCE_INT (ch);
2357 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2360 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2361 Return list of charset and one or two position-codes of CHAR.
2365 /* This function can GC */
2366 struct gcpro gcpro1, gcpro2;
2367 Lisp_Object charset = Qnil;
2368 Lisp_Object rc = Qnil;
2371 GCPRO2 (charset, rc);
2372 CHECK_CHAR_COERCE_INT (character);
2374 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2376 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2378 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2382 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2390 #ifdef ENABLE_COMPOSITE_CHARS
2391 /************************************************************************/
2392 /* composite character functions */
2393 /************************************************************************/
2396 lookup_composite_char (Bufbyte *str, int len)
2398 Lisp_Object lispstr = make_string (str, len);
2399 Lisp_Object ch = Fgethash (lispstr,
2400 Vcomposite_char_string2char_hash_table,
2406 if (composite_char_row_next >= 128)
2407 signal_simple_error ("No more composite chars available", lispstr);
2408 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2409 composite_char_col_next);
2410 Fputhash (make_char (emch), lispstr,
2411 Vcomposite_char_char2string_hash_table);
2412 Fputhash (lispstr, make_char (emch),
2413 Vcomposite_char_string2char_hash_table);
2414 composite_char_col_next++;
2415 if (composite_char_col_next >= 128)
2417 composite_char_col_next = 32;
2418 composite_char_row_next++;
2427 composite_char_string (Emchar ch)
2429 Lisp_Object str = Fgethash (make_char (ch),
2430 Vcomposite_char_char2string_hash_table,
2432 assert (!UNBOUNDP (str));
2436 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2437 Convert a string into a single composite character.
2438 The character is the result of overstriking all the characters in
2443 CHECK_STRING (string);
2444 return make_char (lookup_composite_char (XSTRING_DATA (string),
2445 XSTRING_LENGTH (string)));
2448 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2449 Return a string of the characters comprising a composite character.
2457 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2458 signal_simple_error ("Must be composite char", ch);
2459 return composite_char_string (emch);
2461 #endif /* ENABLE_COMPOSITE_CHARS */
2464 /************************************************************************/
2465 /* initialization */
2466 /************************************************************************/
2469 syms_of_mule_charset (void)
2471 DEFSUBR (Fcharsetp);
2472 DEFSUBR (Ffind_charset);
2473 DEFSUBR (Fget_charset);
2474 DEFSUBR (Fcharset_list);
2475 DEFSUBR (Fcharset_name);
2476 DEFSUBR (Fmake_charset);
2477 DEFSUBR (Fmake_reverse_direction_charset);
2478 /* DEFSUBR (Freverse_direction_charset); */
2479 DEFSUBR (Fcharset_from_attributes);
2480 DEFSUBR (Fcharset_short_name);
2481 DEFSUBR (Fcharset_long_name);
2482 DEFSUBR (Fcharset_description);
2483 DEFSUBR (Fcharset_dimension);
2484 DEFSUBR (Fcharset_property);
2485 DEFSUBR (Fcharset_id);
2486 DEFSUBR (Fset_charset_ccl_program);
2487 DEFSUBR (Fset_charset_registry);
2489 DEFSUBR (Fmake_char);
2490 DEFSUBR (Fchar_charset);
2491 DEFSUBR (Fsplit_char);
2493 #ifdef ENABLE_COMPOSITE_CHARS
2494 DEFSUBR (Fmake_composite_char);
2495 DEFSUBR (Fcomposite_char_string);
2498 defsymbol (&Qcharsetp, "charsetp");
2499 defsymbol (&Qregistry, "registry");
2500 defsymbol (&Qfinal, "final");
2501 defsymbol (&Qgraphic, "graphic");
2502 defsymbol (&Qdirection, "direction");
2503 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2504 defsymbol (&Qshort_name, "short-name");
2505 defsymbol (&Qlong_name, "long-name");
2507 defsymbol (&Ql2r, "l2r");
2508 defsymbol (&Qr2l, "r2l");
2510 /* Charsets, compatible with FSF 20.3
2511 Naming convention is Script-Charset[-Edition] */
2512 defsymbol (&Qascii, "ascii");
2513 defsymbol (&Qcontrol_1, "control-1");
2514 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2515 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2516 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2517 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2518 defsymbol (&Qthai_tis620, "thai-tis620");
2519 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2520 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2521 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2522 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2523 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2524 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2525 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2526 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2527 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2528 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2529 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2530 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2531 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2532 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2534 defsymbol (&Qucs_bmp, "ucs-bmp");
2535 defsymbol (&Qlatin_viscii_lower, "vietnamese-viscii-lower");
2536 defsymbol (&Qlatin_viscii_upper, "vietnamese-viscii-upper");
2538 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2539 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2541 defsymbol (&Qcomposite, "composite");
2545 vars_of_mule_charset (void)
2552 /* Table of charsets indexed by leading byte. */
2553 for (i = 0; i < countof (charset_by_leading_byte); i++)
2554 charset_by_leading_byte[i] = Qnil;
2557 /* Table of charsets indexed by type/final-byte. */
2558 for (i = 0; i < countof (charset_by_attributes); i++)
2559 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2560 charset_by_attributes[i][j] = Qnil;
2562 /* Table of charsets indexed by type/final-byte/direction. */
2563 for (i = 0; i < countof (charset_by_attributes); i++)
2564 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2565 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2566 charset_by_attributes[i][j][k] = Qnil;
2569 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2571 next_allocated_2_byte_leading_byte = LEADING_BYTE_CHINESE_BIG5_2 + 1;
2573 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2577 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2578 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2579 Leading-code of private TYPE9N charset of column-width 1.
2581 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2585 Vutf_2000_version = build_string("0.8 (Kami)");
2586 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2587 Version number of UTF-2000.
2590 Vdefault_coded_charset_priority_list = Qnil;
2591 DEFVAR_LISP ("default-coded-charset-priority-list",
2592 &Vdefault_coded_charset_priority_list /*
2593 Default order of preferred coded-character-set.
2599 complex_vars_of_mule_charset (void)
2601 staticpro (&Vcharset_hash_table);
2602 Vcharset_hash_table =
2603 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2605 /* Predefined character sets. We store them into variables for
2610 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2611 CHARSET_TYPE_256X256, 1, 0, 0,
2612 CHARSET_LEFT_TO_RIGHT,
2613 build_string ("BMP"),
2614 build_string ("BMP"),
2615 build_string ("BMP"),
2617 NULL, 0, 0xFFFF, 0);
2619 # define latin_iso8859_2_to_ucs NULL
2620 # define latin_iso8859_3_to_ucs NULL
2621 # define latin_iso8859_4_to_ucs NULL
2622 # define latin_iso8859_9_to_ucs NULL
2623 # define latin_jisx0201_to_ucs NULL
2624 # define MIN_CHAR_THAI 0
2625 # define MAX_CHAR_THAI 0
2626 # define MIN_CHAR_GREEK 0
2627 # define MAX_CHAR_GREEK 0
2628 # define MIN_CHAR_HEBREW 0
2629 # define MAX_CHAR_HEBREW 0
2630 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2631 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2632 # define MIN_CHAR_CYRILLIC 0
2633 # define MAX_CHAR_CYRILLIC 0
2636 make_charset (LEADING_BYTE_ASCII, Qascii,
2637 CHARSET_TYPE_94, 1, 0, 'B',
2638 CHARSET_LEFT_TO_RIGHT,
2639 build_string ("ASCII"),
2640 build_string ("ASCII)"),
2641 build_string ("ASCII (ISO646 IRV)"),
2642 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2644 Vcharset_control_1 =
2645 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2646 CHARSET_TYPE_94, 1, 1, 0,
2647 CHARSET_LEFT_TO_RIGHT,
2648 build_string ("C1"),
2649 build_string ("Control characters"),
2650 build_string ("Control characters 128-191"),
2652 NULL, 0x80, 0x9F, 0);
2653 Vcharset_latin_iso8859_1 =
2654 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2655 CHARSET_TYPE_96, 1, 1, 'A',
2656 CHARSET_LEFT_TO_RIGHT,
2657 build_string ("Latin-1"),
2658 build_string ("ISO8859-1 (Latin-1)"),
2659 build_string ("ISO8859-1 (Latin-1)"),
2660 build_string ("iso8859-1"),
2661 NULL, 0xA0, 0xFF, 32);
2662 Vcharset_latin_iso8859_2 =
2663 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2664 CHARSET_TYPE_96, 1, 1, 'B',
2665 CHARSET_LEFT_TO_RIGHT,
2666 build_string ("Latin-2"),
2667 build_string ("ISO8859-2 (Latin-2)"),
2668 build_string ("ISO8859-2 (Latin-2)"),
2669 build_string ("iso8859-2"),
2670 latin_iso8859_2_to_ucs, 0, 0, 32);
2671 Vcharset_latin_iso8859_3 =
2672 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2673 CHARSET_TYPE_96, 1, 1, 'C',
2674 CHARSET_LEFT_TO_RIGHT,
2675 build_string ("Latin-3"),
2676 build_string ("ISO8859-3 (Latin-3)"),
2677 build_string ("ISO8859-3 (Latin-3)"),
2678 build_string ("iso8859-3"),
2679 latin_iso8859_3_to_ucs, 0, 0, 32);
2680 Vcharset_latin_iso8859_4 =
2681 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2682 CHARSET_TYPE_96, 1, 1, 'D',
2683 CHARSET_LEFT_TO_RIGHT,
2684 build_string ("Latin-4"),
2685 build_string ("ISO8859-4 (Latin-4)"),
2686 build_string ("ISO8859-4 (Latin-4)"),
2687 build_string ("iso8859-4"),
2688 latin_iso8859_4_to_ucs, 0, 0, 32);
2689 Vcharset_thai_tis620 =
2690 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2691 CHARSET_TYPE_96, 1, 1, 'T',
2692 CHARSET_LEFT_TO_RIGHT,
2693 build_string ("TIS620"),
2694 build_string ("TIS620 (Thai)"),
2695 build_string ("TIS620.2529 (Thai)"),
2696 build_string ("tis620"),
2697 NULL, MIN_CHAR_THAI, MAX_CHAR_THAI, 32);
2698 Vcharset_greek_iso8859_7 =
2699 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2700 CHARSET_TYPE_96, 1, 1, 'F',
2701 CHARSET_LEFT_TO_RIGHT,
2702 build_string ("ISO8859-7"),
2703 build_string ("ISO8859-7 (Greek)"),
2704 build_string ("ISO8859-7 (Greek)"),
2705 build_string ("iso8859-7"),
2706 NULL, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 32);
2707 Vcharset_arabic_iso8859_6 =
2708 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2709 CHARSET_TYPE_96, 1, 1, 'G',
2710 CHARSET_RIGHT_TO_LEFT,
2711 build_string ("ISO8859-6"),
2712 build_string ("ISO8859-6 (Arabic)"),
2713 build_string ("ISO8859-6 (Arabic)"),
2714 build_string ("iso8859-6"),
2716 Vcharset_hebrew_iso8859_8 =
2717 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2718 CHARSET_TYPE_96, 1, 1, 'H',
2719 CHARSET_RIGHT_TO_LEFT,
2720 build_string ("ISO8859-8"),
2721 build_string ("ISO8859-8 (Hebrew)"),
2722 build_string ("ISO8859-8 (Hebrew)"),
2723 build_string ("iso8859-8"),
2724 NULL, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 32);
2725 Vcharset_katakana_jisx0201 =
2726 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2727 CHARSET_TYPE_94, 1, 1, 'I',
2728 CHARSET_LEFT_TO_RIGHT,
2729 build_string ("JISX0201 Kana"),
2730 build_string ("JISX0201.1976 (Japanese Kana)"),
2731 build_string ("JISX0201.1976 Japanese Kana"),
2732 build_string ("jisx0201.1976"),
2734 MIN_CHAR_HALFWIDTH_KATAKANA,
2735 MAX_CHAR_HALFWIDTH_KATAKANA, 33);
2736 Vcharset_latin_jisx0201 =
2737 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2738 CHARSET_TYPE_94, 1, 0, 'J',
2739 CHARSET_LEFT_TO_RIGHT,
2740 build_string ("JISX0201 Roman"),
2741 build_string ("JISX0201.1976 (Japanese Roman)"),
2742 build_string ("JISX0201.1976 Japanese Roman"),
2743 build_string ("jisx0201.1976"),
2744 latin_jisx0201_to_ucs, 0, 0, 33);
2745 Vcharset_cyrillic_iso8859_5 =
2746 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2747 CHARSET_TYPE_96, 1, 1, 'L',
2748 CHARSET_LEFT_TO_RIGHT,
2749 build_string ("ISO8859-5"),
2750 build_string ("ISO8859-5 (Cyrillic)"),
2751 build_string ("ISO8859-5 (Cyrillic)"),
2752 build_string ("iso8859-5"),
2753 NULL, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 32);
2754 Vcharset_latin_iso8859_9 =
2755 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2756 CHARSET_TYPE_96, 1, 1, 'M',
2757 CHARSET_LEFT_TO_RIGHT,
2758 build_string ("Latin-5"),
2759 build_string ("ISO8859-9 (Latin-5)"),
2760 build_string ("ISO8859-9 (Latin-5)"),
2761 build_string ("iso8859-9"),
2762 latin_iso8859_9_to_ucs, 0, 0, 32);
2763 Vcharset_japanese_jisx0208_1978 =
2764 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2765 CHARSET_TYPE_94X94, 2, 0, '@',
2766 CHARSET_LEFT_TO_RIGHT,
2767 build_string ("JISX0208.1978"),
2768 build_string ("JISX0208.1978 (Japanese)"),
2770 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
2771 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2773 Vcharset_chinese_gb2312 =
2774 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2775 CHARSET_TYPE_94X94, 2, 0, 'A',
2776 CHARSET_LEFT_TO_RIGHT,
2777 build_string ("GB2312"),
2778 build_string ("GB2312)"),
2779 build_string ("GB2312 Chinese simplified"),
2780 build_string ("gb2312"),
2782 Vcharset_japanese_jisx0208 =
2783 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2784 CHARSET_TYPE_94X94, 2, 0, 'B',
2785 CHARSET_LEFT_TO_RIGHT,
2786 build_string ("JISX0208"),
2787 build_string ("JISX0208.1983/1990 (Japanese)"),
2788 build_string ("JISX0208.1983/1990 Japanese Kanji"),
2789 build_string ("jisx0208.19\\(83\\|90\\)"),
2791 Vcharset_korean_ksc5601 =
2792 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2793 CHARSET_TYPE_94X94, 2, 0, 'C',
2794 CHARSET_LEFT_TO_RIGHT,
2795 build_string ("KSC5601"),
2796 build_string ("KSC5601 (Korean"),
2797 build_string ("KSC5601 Korean Hangul and Hanja"),
2798 build_string ("ksc5601"),
2800 Vcharset_japanese_jisx0212 =
2801 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2802 CHARSET_TYPE_94X94, 2, 0, 'D',
2803 CHARSET_LEFT_TO_RIGHT,
2804 build_string ("JISX0212"),
2805 build_string ("JISX0212 (Japanese)"),
2806 build_string ("JISX0212 Japanese Supplement"),
2807 build_string ("jisx0212"),
2810 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2811 Vcharset_chinese_cns11643_1 =
2812 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2813 CHARSET_TYPE_94X94, 2, 0, 'G',
2814 CHARSET_LEFT_TO_RIGHT,
2815 build_string ("CNS11643-1"),
2816 build_string ("CNS11643-1 (Chinese traditional)"),
2818 ("CNS 11643 Plane 1 Chinese traditional"),
2819 build_string (CHINESE_CNS_PLANE_RE("1")),
2821 Vcharset_chinese_cns11643_2 =
2822 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2823 CHARSET_TYPE_94X94, 2, 0, 'H',
2824 CHARSET_LEFT_TO_RIGHT,
2825 build_string ("CNS11643-2"),
2826 build_string ("CNS11643-2 (Chinese traditional)"),
2828 ("CNS 11643 Plane 2 Chinese traditional"),
2829 build_string (CHINESE_CNS_PLANE_RE("2")),
2832 Vcharset_latin_viscii_lower =
2833 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2834 CHARSET_TYPE_96, 1, 1, '1',
2835 CHARSET_LEFT_TO_RIGHT,
2836 build_string ("VISCII lower"),
2837 build_string ("VISCII lower (Vietnamese)"),
2838 build_string ("VISCII lower (Vietnamese)"),
2839 build_string ("VISCII1.1"),
2840 latin_viscii_lower_to_ucs, 0, 0, 32);
2841 Vcharset_latin_viscii_upper =
2842 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2843 CHARSET_TYPE_96, 1, 1, '2',
2844 CHARSET_LEFT_TO_RIGHT,
2845 build_string ("VISCII upper"),
2846 build_string ("VISCII upper (Vietnamese)"),
2847 build_string ("VISCII upper (Vietnamese)"),
2848 build_string ("VISCII1.1"),
2849 latin_viscii_upper_to_ucs, 0, 0, 32);
2851 Vcharset_chinese_big5_1 =
2852 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2853 CHARSET_TYPE_94X94, 2, 0, '0',
2854 CHARSET_LEFT_TO_RIGHT,
2855 build_string ("Big5"),
2856 build_string ("Big5 (Level-1)"),
2858 ("Big5 Level-1 Chinese traditional"),
2859 build_string ("big5"),
2861 Vcharset_chinese_big5_2 =
2862 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2863 CHARSET_TYPE_94X94, 2, 0, '1',
2864 CHARSET_LEFT_TO_RIGHT,
2865 build_string ("Big5"),
2866 build_string ("Big5 (Level-2)"),
2868 ("Big5 Level-2 Chinese traditional"),
2869 build_string ("big5"),
2872 #ifdef ENABLE_COMPOSITE_CHARS
2873 /* #### For simplicity, we put composite chars into a 96x96 charset.
2874 This is going to lead to problems because you can run out of
2875 room, esp. as we don't yet recycle numbers. */
2876 Vcharset_composite =
2877 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2878 CHARSET_TYPE_96X96, 2, 0, 0,
2879 CHARSET_LEFT_TO_RIGHT,
2880 build_string ("Composite"),
2881 build_string ("Composite characters"),
2882 build_string ("Composite characters"),
2885 composite_char_row_next = 32;
2886 composite_char_col_next = 32;
2888 Vcomposite_char_string2char_hash_table =
2889 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2890 Vcomposite_char_char2string_hash_table =
2891 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2892 staticpro (&Vcomposite_char_string2char_hash_table);
2893 staticpro (&Vcomposite_char_char2string_hash_table);
2894 #endif /* ENABLE_COMPOSITE_CHARS */