1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_chinese_cns11643_3;
62 Lisp_Object Vcharset_chinese_cns11643_4;
63 Lisp_Object Vcharset_chinese_cns11643_5;
64 Lisp_Object Vcharset_chinese_cns11643_6;
65 Lisp_Object Vcharset_chinese_cns11643_7;
66 Lisp_Object Vcharset_ucs_bmp;
68 Lisp_Object Vcharset_chinese_big5_1;
69 Lisp_Object Vcharset_chinese_big5_2;
71 #ifdef ENABLE_COMPOSITE_CHARS
72 Lisp_Object Vcharset_composite;
74 /* Hash tables for composite chars. One maps string representing
75 composed chars to their equivalent chars; one goes the
77 Lisp_Object Vcomposite_char_char2string_hash_table;
78 Lisp_Object Vcomposite_char_string2char_hash_table;
80 static int composite_char_row_next;
81 static int composite_char_col_next;
83 #endif /* ENABLE_COMPOSITE_CHARS */
85 /* Table of charsets indexed by leading byte. */
86 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
88 /* Table of charsets indexed by type/final-byte/direction. */
90 Lisp_Object charset_by_attributes[4][128];
92 Lisp_Object charset_by_attributes[4][128][2];
96 /* Table of number of bytes in the string representation of a character
97 indexed by the first byte of that representation.
99 rep_bytes_by_first_byte(c) is more efficient than the equivalent
100 canonical computation:
102 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
104 Bytecount rep_bytes_by_first_byte[0xA0] =
105 { /* 0x00 - 0x7f are for straight ASCII */
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 /* 0x80 - 0x8f are for Dimension-1 official charsets */
116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 /* 0x90 - 0x9d are for Dimension-2 official charsets */
121 /* 0x9e is for Dimension-1 private charsets */
122 /* 0x9f is for Dimension-2 private charsets */
123 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
128 Charset_ID latin_a_char_to_charset[128] = {
129 /* U+0100 */ LEADING_BYTE_LATIN_ISO8859_4,
130 /* U+0101 */ LEADING_BYTE_LATIN_ISO8859_4,
131 /* U+0102 */ LEADING_BYTE_LATIN_ISO8859_2,
132 /* U+0103 */ LEADING_BYTE_LATIN_ISO8859_2,
133 /* U+0104 */ LEADING_BYTE_LATIN_ISO8859_2,
134 /* U+0105 */ LEADING_BYTE_LATIN_ISO8859_2,
135 /* U+0106 */ LEADING_BYTE_LATIN_ISO8859_2,
136 /* U+0107 */ LEADING_BYTE_LATIN_ISO8859_2,
137 /* U+0108 */ LEADING_BYTE_LATIN_ISO8859_3,
138 /* U+0109 */ LEADING_BYTE_LATIN_ISO8859_3,
139 /* U+010A */ LEADING_BYTE_LATIN_ISO8859_3,
140 /* U+010B */ LEADING_BYTE_LATIN_ISO8859_3,
141 /* U+010C */ LEADING_BYTE_LATIN_ISO8859_2,
142 /* U+010D */ LEADING_BYTE_LATIN_ISO8859_2,
143 /* U+010E */ LEADING_BYTE_LATIN_ISO8859_2,
144 /* U+010F */ LEADING_BYTE_LATIN_ISO8859_2,
145 /* U+0110 */ LEADING_BYTE_LATIN_ISO8859_2,
146 /* U+0111 */ LEADING_BYTE_LATIN_ISO8859_2,
147 /* U+0112 */ LEADING_BYTE_LATIN_ISO8859_4,
148 /* U+0113 */ LEADING_BYTE_LATIN_ISO8859_4,
149 /* U+0114 */ LEADING_BYTE_UCS_BMP,
150 /* U+0115 */ LEADING_BYTE_UCS_BMP,
151 /* U+0116 */ LEADING_BYTE_LATIN_ISO8859_4,
152 /* U+0117 */ LEADING_BYTE_LATIN_ISO8859_4,
153 /* U+0118 */ LEADING_BYTE_LATIN_ISO8859_2,
154 /* U+0119 */ LEADING_BYTE_LATIN_ISO8859_2,
155 /* U+011A */ LEADING_BYTE_LATIN_ISO8859_2,
156 /* U+011B */ LEADING_BYTE_LATIN_ISO8859_2,
157 /* U+011C */ LEADING_BYTE_LATIN_ISO8859_3,
158 /* U+011D */ LEADING_BYTE_LATIN_ISO8859_3,
159 /* U+011E */ LEADING_BYTE_LATIN_ISO8859_3,
160 /* U+011F */ LEADING_BYTE_LATIN_ISO8859_3,
161 /* U+0120 */ LEADING_BYTE_LATIN_ISO8859_3,
162 /* U+0121 */ LEADING_BYTE_LATIN_ISO8859_3,
163 /* U+0122 */ LEADING_BYTE_LATIN_ISO8859_4,
164 /* U+0123 */ LEADING_BYTE_LATIN_ISO8859_4,
165 /* U+0124 */ LEADING_BYTE_LATIN_ISO8859_3,
166 /* U+0125 */ LEADING_BYTE_LATIN_ISO8859_3,
167 /* U+0126 */ LEADING_BYTE_LATIN_ISO8859_3,
168 /* U+0127 */ LEADING_BYTE_LATIN_ISO8859_3,
169 /* U+0128 */ LEADING_BYTE_LATIN_ISO8859_4,
170 /* U+0129 */ LEADING_BYTE_LATIN_ISO8859_4,
171 /* U+012A */ LEADING_BYTE_LATIN_ISO8859_4,
172 /* U+012B */ LEADING_BYTE_LATIN_ISO8859_4,
173 /* U+012C */ LEADING_BYTE_UCS_BMP,
174 /* U+012D */ LEADING_BYTE_UCS_BMP,
175 /* U+012E */ LEADING_BYTE_LATIN_ISO8859_4,
176 /* U+012F */ LEADING_BYTE_LATIN_ISO8859_4,
177 /* U+0130 */ LEADING_BYTE_LATIN_ISO8859_3,
178 /* U+0131 */ LEADING_BYTE_LATIN_ISO8859_3,
179 /* U+0132 */ LEADING_BYTE_JAPANESE_JISX0212,
180 /* U+0133 */ LEADING_BYTE_JAPANESE_JISX0212,
181 /* U+0134 */ LEADING_BYTE_LATIN_ISO8859_3,
182 /* U+0135 */ LEADING_BYTE_LATIN_ISO8859_3,
183 /* U+0136 */ LEADING_BYTE_LATIN_ISO8859_4,
184 /* U+0137 */ LEADING_BYTE_LATIN_ISO8859_4,
185 /* U+0138 */ LEADING_BYTE_LATIN_ISO8859_4,
186 /* U+0139 */ LEADING_BYTE_LATIN_ISO8859_2,
187 /* U+013A */ LEADING_BYTE_LATIN_ISO8859_2,
188 /* U+013B */ LEADING_BYTE_LATIN_ISO8859_4,
189 /* U+013C */ LEADING_BYTE_LATIN_ISO8859_4,
190 /* U+013D */ LEADING_BYTE_LATIN_ISO8859_2,
191 /* U+013E */ LEADING_BYTE_LATIN_ISO8859_2,
192 /* U+013F */ LEADING_BYTE_JAPANESE_JISX0212,
193 /* U+0140 */ LEADING_BYTE_JAPANESE_JISX0212,
194 /* U+0141 */ LEADING_BYTE_LATIN_ISO8859_2,
195 /* U+0142 */ LEADING_BYTE_LATIN_ISO8859_2,
196 /* U+0143 */ LEADING_BYTE_LATIN_ISO8859_2,
197 /* U+0144 */ LEADING_BYTE_LATIN_ISO8859_2,
198 /* U+0145 */ LEADING_BYTE_LATIN_ISO8859_4,
199 /* U+0146 */ LEADING_BYTE_LATIN_ISO8859_4,
200 /* U+0147 */ LEADING_BYTE_LATIN_ISO8859_2,
201 /* U+0148 */ LEADING_BYTE_LATIN_ISO8859_2,
202 /* U+0149 */ LEADING_BYTE_JAPANESE_JISX0212,
203 /* U+014A */ LEADING_BYTE_LATIN_ISO8859_4,
204 /* U+014B */ LEADING_BYTE_LATIN_ISO8859_4,
205 /* U+014C */ LEADING_BYTE_LATIN_ISO8859_4,
206 /* U+014D */ LEADING_BYTE_LATIN_ISO8859_4,
207 /* U+014E */ LEADING_BYTE_UCS_BMP,
208 /* U+014F */ LEADING_BYTE_UCS_BMP,
209 /* U+0150 */ LEADING_BYTE_LATIN_ISO8859_2,
210 /* U+0151 */ LEADING_BYTE_LATIN_ISO8859_2,
211 /* U+0152 */ LEADING_BYTE_JAPANESE_JISX0212,
212 /* U+0153 */ LEADING_BYTE_JAPANESE_JISX0212,
213 /* U+0154 */ LEADING_BYTE_LATIN_ISO8859_2,
214 /* U+0155 */ LEADING_BYTE_LATIN_ISO8859_2,
215 /* U+0156 */ LEADING_BYTE_LATIN_ISO8859_4,
216 /* U+0157 */ LEADING_BYTE_LATIN_ISO8859_4,
217 /* U+0158 */ LEADING_BYTE_LATIN_ISO8859_2,
218 /* U+0159 */ LEADING_BYTE_LATIN_ISO8859_2,
219 /* U+015A */ LEADING_BYTE_LATIN_ISO8859_2,
220 /* U+015B */ LEADING_BYTE_LATIN_ISO8859_2,
221 /* U+015C */ LEADING_BYTE_LATIN_ISO8859_3,
222 /* U+015D */ LEADING_BYTE_LATIN_ISO8859_3,
223 /* U+015E */ LEADING_BYTE_LATIN_ISO8859_2,
224 /* U+015F */ LEADING_BYTE_LATIN_ISO8859_2,
225 /* U+0160 */ LEADING_BYTE_LATIN_ISO8859_2,
226 /* U+0161 */ LEADING_BYTE_LATIN_ISO8859_2,
227 /* U+0162 */ LEADING_BYTE_LATIN_ISO8859_2,
228 /* U+0163 */ LEADING_BYTE_LATIN_ISO8859_2,
229 /* U+0164 */ LEADING_BYTE_LATIN_ISO8859_2,
230 /* U+0165 */ LEADING_BYTE_LATIN_ISO8859_2,
231 /* U+0166 */ LEADING_BYTE_LATIN_ISO8859_4,
232 /* U+0167 */ LEADING_BYTE_LATIN_ISO8859_4,
233 /* U+0168 */ LEADING_BYTE_LATIN_ISO8859_4,
234 /* U+0169 */ LEADING_BYTE_LATIN_ISO8859_4,
235 /* U+016A */ LEADING_BYTE_LATIN_ISO8859_4,
236 /* U+016B */ LEADING_BYTE_LATIN_ISO8859_4,
237 /* U+016C */ LEADING_BYTE_LATIN_ISO8859_3,
238 /* U+016D */ LEADING_BYTE_LATIN_ISO8859_3,
239 /* U+016E */ LEADING_BYTE_LATIN_ISO8859_2,
240 /* U+016F */ LEADING_BYTE_LATIN_ISO8859_2,
241 /* U+0170 */ LEADING_BYTE_LATIN_ISO8859_2,
242 /* U+0171 */ LEADING_BYTE_LATIN_ISO8859_2,
243 /* U+0172 */ LEADING_BYTE_LATIN_ISO8859_4,
244 /* U+0173 */ LEADING_BYTE_LATIN_ISO8859_4,
245 /* U+0174 */ LEADING_BYTE_JAPANESE_JISX0212,
246 /* U+0175 */ LEADING_BYTE_JAPANESE_JISX0212,
247 /* U+0176 */ LEADING_BYTE_JAPANESE_JISX0212,
248 /* U+0177 */ LEADING_BYTE_JAPANESE_JISX0212,
249 /* U+0178 */ LEADING_BYTE_JAPANESE_JISX0212,
250 /* U+0179 */ LEADING_BYTE_LATIN_ISO8859_2,
251 /* U+017A */ LEADING_BYTE_LATIN_ISO8859_2,
252 /* U+017B */ LEADING_BYTE_LATIN_ISO8859_2,
253 /* U+017C */ LEADING_BYTE_LATIN_ISO8859_2,
254 /* U+017D */ LEADING_BYTE_LATIN_ISO8859_2,
255 /* U+017E */ LEADING_BYTE_LATIN_ISO8859_2,
256 /* U+017F */ LEADING_BYTE_UCS_BMP
259 unsigned char latin_a_char_to_byte1[128] = {
260 /* U+0100 */ 0xC0 - 0x80,
261 /* U+0101 */ 0xE0 - 0x80,
262 /* U+0102 */ 0xC3 - 0x80,
263 /* U+0103 */ 0xE3 - 0x80,
264 /* U+0104 */ 0xA1 - 0x80,
265 /* U+0105 */ 0xB1 - 0x80,
266 /* U+0106 */ 0xC6 - 0x80,
267 /* U+0107 */ 0xE6 - 0x80,
268 /* U+0108 */ 0xC6 - 0x80,
269 /* U+0109 */ 0xE6 - 0x80,
270 /* U+010A */ 0xC5 - 0x80,
271 /* U+010B */ 0xE5 - 0x80,
272 /* U+010C */ 0xC8 - 0x80,
273 /* U+010D */ 0xE8 - 0x80,
274 /* U+010E */ 0xCF - 0x80,
275 /* U+010F */ 0xEF - 0x80,
276 /* U+0110 */ 0xD0 - 0x80,
277 /* U+0111 */ 0xF0 - 0x80,
278 /* U+0112 */ 0xAA - 0x80,
279 /* U+0113 */ 0xBA - 0x80,
282 /* U+0116 */ 0xCC - 0x80,
283 /* U+0117 */ 0xEC - 0x80,
284 /* U+0118 */ 0xCA - 0x80,
285 /* U+0119 */ 0xEA - 0x80,
286 /* U+011A */ 0xCC - 0x80,
287 /* U+011B */ 0xEC - 0x80,
288 /* U+011C */ 0xD8 - 0x80,
289 /* U+011D */ 0xF8 - 0x80,
290 /* U+011E */ 0xAB - 0x80,
291 /* U+011F */ 0xBB - 0x80,
292 /* U+0120 */ 0xD5 - 0x80,
293 /* U+0121 */ 0xF5 - 0x80,
294 /* U+0122 */ 0xAB - 0x80,
295 /* U+0123 */ 0xBB - 0x80,
296 /* U+0124 */ 0xA6 - 0x80,
297 /* U+0125 */ 0xB6 - 0x80,
298 /* U+0126 */ 0xA1 - 0x80,
299 /* U+0127 */ 0xB1 - 0x80,
300 /* U+0128 */ 0xA5 - 0x80,
301 /* U+0129 */ 0xB5 - 0x80,
302 /* U+012A */ 0xCF - 0x80,
303 /* U+012B */ 0xEF - 0x80,
306 /* U+012E */ 0xC7 - 0x80,
307 /* U+012F */ 0xE7 - 0x80,
308 /* U+0130 */ 0xA9 - 0x80,
309 /* U+0131 */ 0xB9 - 0x80,
312 /* U+0134 */ 0xAC - 0x80,
313 /* U+0135 */ 0xBC - 0x80,
314 /* U+0136 */ 0xD3 - 0x80,
315 /* U+0137 */ 0xF3 - 0x80,
316 /* U+0138 */ 0xA2 - 0x80,
317 /* U+0139 */ 0xC5 - 0x80,
318 /* U+013A */ 0xE5 - 0x80,
319 /* U+013B */ 0xA6 - 0x80,
320 /* U+013C */ 0xB6 - 0x80,
321 /* U+013D */ 0xA5 - 0x80,
322 /* U+013E */ 0xB5 - 0x80,
325 /* U+0141 */ 0xA3 - 0x80,
326 /* U+0142 */ 0xB3 - 0x80,
327 /* U+0143 */ 0xD1 - 0x80,
328 /* U+0144 */ 0xF1 - 0x80,
329 /* U+0145 */ 0xD1 - 0x80,
330 /* U+0146 */ 0xF1 - 0x80,
331 /* U+0147 */ 0xD2 - 0x80,
332 /* U+0148 */ 0xF2 - 0x80,
334 /* U+014A */ 0xBD - 0x80,
335 /* U+014B */ 0xBF - 0x80,
336 /* U+014C */ 0xD2 - 0x80,
337 /* U+014D */ 0xF2 - 0x80,
340 /* U+0150 */ 0xD5 - 0x80,
341 /* U+0151 */ 0xF5 - 0x80,
344 /* U+0154 */ 0xC0 - 0x80,
345 /* U+0155 */ 0xE0 - 0x80,
346 /* U+0156 */ 0xA3 - 0x80,
347 /* U+0157 */ 0xB3 - 0x80,
348 /* U+0158 */ 0xD8 - 0x80,
349 /* U+0159 */ 0xF8 - 0x80,
350 /* U+015A */ 0xA6 - 0x80,
351 /* U+015B */ 0xB6 - 0x80,
352 /* U+015C */ 0xDE - 0x80,
353 /* U+015D */ 0xFE - 0x80,
354 /* U+015E */ 0xAA - 0x80,
355 /* U+015F */ 0xBA - 0x80,
356 /* U+0160 */ 0xA9 - 0x80,
357 /* U+0161 */ 0xB9 - 0x80,
358 /* U+0162 */ 0xDE - 0x80,
359 /* U+0163 */ 0xFE - 0x80,
360 /* U+0164 */ 0xAB - 0x80,
361 /* U+0165 */ 0xBB - 0x80,
362 /* U+0166 */ 0xAC - 0x80,
363 /* U+0167 */ 0xBC - 0x80,
364 /* U+0168 */ 0xDD - 0x80,
365 /* U+0169 */ 0xFD - 0x80,
366 /* U+016A */ 0xDE - 0x80,
367 /* U+016B */ 0xFE - 0x80,
368 /* U+016C */ 0xDD - 0x80,
369 /* U+016D */ 0xFD - 0x80,
370 /* U+016E */ 0xD9 - 0x80,
371 /* U+016F */ 0xF9 - 0x80,
372 /* U+0170 */ 0xDB - 0x80,
373 /* U+0171 */ 0xFB - 0x80,
374 /* U+0172 */ 0xD9 - 0x80,
375 /* U+0173 */ 0xF9 - 0x80,
381 /* U+0179 */ 0xAC - 0x80,
382 /* U+017A */ 0xBC - 0x80,
383 /* U+017B */ 0xAF - 0x80,
384 /* U+017C */ 0xBF - 0x80,
385 /* U+017D */ 0xAE - 0x80,
386 /* U+017E */ 0xBE - 0x80,
390 unsigned char latin_a_char_to_byte2[128] = {
521 Lisp_Object Vutf_2000_version;
525 int leading_code_private_11;
528 Lisp_Object Qcharsetp;
530 /* Qdoc_string, Qdimension, Qchars defined in general.c */
531 Lisp_Object Qregistry, Qfinal, Qgraphic;
532 Lisp_Object Qdirection;
533 Lisp_Object Qreverse_direction_charset;
534 Lisp_Object Qleading_byte;
535 Lisp_Object Qshort_name, Qlong_name;
551 Qjapanese_jisx0208_1978,
570 Lisp_Object Ql2r, Qr2l;
572 Lisp_Object Vcharset_hash_table;
574 static Charset_ID next_allocated_1_byte_leading_byte;
575 static Charset_ID next_allocated_2_byte_leading_byte;
577 /* Composite characters are characters constructed by overstriking two
578 or more regular characters.
580 1) The old Mule implementation involves storing composite characters
581 in a buffer as a tag followed by all of the actual characters
582 used to make up the composite character. I think this is a bad
583 idea; it greatly complicates code that wants to handle strings
584 one character at a time because it has to deal with the possibility
585 of great big ungainly characters. It's much more reasonable to
586 simply store an index into a table of composite characters.
588 2) The current implementation only allows for 16,384 separate
589 composite characters over the lifetime of the XEmacs process.
590 This could become a potential problem if the user
591 edited lots of different files that use composite characters.
592 Due to FSF bogosity, increasing the number of allowable
593 composite characters under Mule would decrease the number
594 of possible faces that can exist. Mule already has shrunk
595 this to 2048, and further shrinkage would become uncomfortable.
596 No such problems exist in XEmacs.
598 Composite characters could be represented as 0x80 C1 C2 C3,
599 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
600 for slightly under 2^20 (one million) composite characters
601 over the XEmacs process lifetime, and you only need to
602 increase the size of a Mule character from 19 to 21 bits.
603 Or you could use 0x80 C1 C2 C3 C4, allowing for about
604 85 million (slightly over 2^26) composite characters. */
607 /************************************************************************/
608 /* Basic Emchar functions */
609 /************************************************************************/
611 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
612 string in STR. Returns the number of bytes stored.
613 Do not call this directly. Use the macro set_charptr_emchar() instead.
617 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
632 else if ( c <= 0x7ff )
634 *p++ = (c >> 6) | 0xc0;
635 *p++ = (c & 0x3f) | 0x80;
637 else if ( c <= 0xffff )
639 *p++ = (c >> 12) | 0xe0;
640 *p++ = ((c >> 6) & 0x3f) | 0x80;
641 *p++ = (c & 0x3f) | 0x80;
643 else if ( c <= 0x1fffff )
645 *p++ = (c >> 18) | 0xf0;
646 *p++ = ((c >> 12) & 0x3f) | 0x80;
647 *p++ = ((c >> 6) & 0x3f) | 0x80;
648 *p++ = (c & 0x3f) | 0x80;
650 else if ( c <= 0x3ffffff )
652 *p++ = (c >> 24) | 0xf8;
653 *p++ = ((c >> 18) & 0x3f) | 0x80;
654 *p++ = ((c >> 12) & 0x3f) | 0x80;
655 *p++ = ((c >> 6) & 0x3f) | 0x80;
656 *p++ = (c & 0x3f) | 0x80;
660 *p++ = (c >> 30) | 0xfc;
661 *p++ = ((c >> 24) & 0x3f) | 0x80;
662 *p++ = ((c >> 18) & 0x3f) | 0x80;
663 *p++ = ((c >> 12) & 0x3f) | 0x80;
664 *p++ = ((c >> 6) & 0x3f) | 0x80;
665 *p++ = (c & 0x3f) | 0x80;
668 BREAKUP_CHAR (c, charset, c1, c2);
669 lb = CHAR_LEADING_BYTE (c);
670 if (LEADING_BYTE_PRIVATE_P (lb))
671 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
673 if (EQ (charset, Vcharset_control_1))
682 /* Return the first character from a Mule-encoded string in STR,
683 assuming it's non-ASCII. Do not call this directly.
684 Use the macro charptr_emchar() instead. */
687 non_ascii_charptr_emchar (CONST Bufbyte *str)
700 else if ( b >= 0xf8 )
705 else if ( b >= 0xf0 )
710 else if ( b >= 0xe0 )
715 else if ( b >= 0xc0 )
725 for( ; len > 0; len-- )
728 ch = ( ch << 6 ) | ( b & 0x3f );
732 Bufbyte i0 = *str, i1, i2 = 0;
735 if (i0 == LEADING_BYTE_CONTROL_1)
736 return (Emchar) (*++str - 0x20);
738 if (LEADING_BYTE_PREFIX_P (i0))
743 charset = CHARSET_BY_LEADING_BYTE (i0);
744 if (XCHARSET_DIMENSION (charset) == 2)
747 return MAKE_CHAR (charset, i1, i2);
751 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
752 Do not call this directly. Use the macro valid_char_p() instead. */
756 non_ascii_valid_char_p (Emchar ch)
760 /* Must have only lowest 19 bits set */
764 f1 = CHAR_FIELD1 (ch);
765 f2 = CHAR_FIELD2 (ch);
766 f3 = CHAR_FIELD3 (ch);
772 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
773 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
774 f2 > MAX_CHAR_FIELD2_PRIVATE)
779 if (f3 != 0x20 && f3 != 0x7F)
783 NOTE: This takes advantage of the fact that
784 FIELD2_TO_OFFICIAL_LEADING_BYTE and
785 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
787 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
788 return (XCHARSET_CHARS (charset) == 96);
794 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
795 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
796 f1 > MAX_CHAR_FIELD1_PRIVATE)
798 if (f2 < 0x20 || f3 < 0x20)
801 #ifdef ENABLE_COMPOSITE_CHARS
802 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
804 if (UNBOUNDP (Fgethash (make_int (ch),
805 Vcomposite_char_char2string_hash_table,
810 #endif /* ENABLE_COMPOSITE_CHARS */
812 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
815 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
817 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
820 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
822 return (XCHARSET_CHARS (charset) == 96);
828 /************************************************************************/
829 /* Basic string functions */
830 /************************************************************************/
832 /* Copy the character pointed to by PTR into STR, assuming it's
833 non-ASCII. Do not call this directly. Use the macro
834 charptr_copy_char() instead. */
837 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
839 Bufbyte *strptr = str;
841 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
843 /* Notice fallthrough. */
845 case 6: *++strptr = *ptr++;
846 case 5: *++strptr = *ptr++;
848 case 4: *++strptr = *ptr++;
849 case 3: *++strptr = *ptr++;
850 case 2: *++strptr = *ptr;
855 return strptr + 1 - str;
859 /************************************************************************/
860 /* streams of Emchars */
861 /************************************************************************/
863 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
864 The functions below are not meant to be called directly; use
865 the macros in insdel.h. */
868 Lstream_get_emchar_1 (Lstream *stream, int ch)
870 Bufbyte str[MAX_EMCHAR_LEN];
871 Bufbyte *strptr = str;
873 str[0] = (Bufbyte) ch;
874 switch (REP_BYTES_BY_FIRST_BYTE (ch))
876 /* Notice fallthrough. */
879 ch = Lstream_getc (stream);
881 *++strptr = (Bufbyte) ch;
883 ch = Lstream_getc (stream);
885 *++strptr = (Bufbyte) ch;
888 ch = Lstream_getc (stream);
890 *++strptr = (Bufbyte) ch;
892 ch = Lstream_getc (stream);
894 *++strptr = (Bufbyte) ch;
896 ch = Lstream_getc (stream);
898 *++strptr = (Bufbyte) ch;
903 return charptr_emchar (str);
907 Lstream_fput_emchar (Lstream *stream, Emchar ch)
909 Bufbyte str[MAX_EMCHAR_LEN];
910 Bytecount len = set_charptr_emchar (str, ch);
911 return Lstream_write (stream, str, len);
915 Lstream_funget_emchar (Lstream *stream, Emchar ch)
917 Bufbyte str[MAX_EMCHAR_LEN];
918 Bytecount len = set_charptr_emchar (str, ch);
919 Lstream_unread (stream, str, len);
923 /************************************************************************/
925 /************************************************************************/
928 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
930 struct Lisp_Charset *cs = XCHARSET (obj);
932 markobj (cs->short_name);
933 markobj (cs->long_name);
934 markobj (cs->doc_string);
935 markobj (cs->registry);
936 markobj (cs->ccl_program);
941 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
943 struct Lisp_Charset *cs = XCHARSET (obj);
947 error ("printing unreadable object #<charset %s 0x%x>",
948 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
951 write_c_string ("#<charset ", printcharfun);
952 print_internal (CHARSET_NAME (cs), printcharfun, 0);
953 write_c_string (" ", printcharfun);
954 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
955 write_c_string (" ", printcharfun);
956 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
957 write_c_string (" ", printcharfun);
958 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
959 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
960 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
961 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
962 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
964 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
965 CHARSET_COLUMNS (cs),
966 CHARSET_GRAPHIC (cs),
968 write_c_string (buf, printcharfun);
969 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
970 sprintf (buf, " 0x%x>", cs->header.uid);
971 write_c_string (buf, printcharfun);
974 static const struct lrecord_description charset_description[] = {
975 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
979 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
980 mark_charset, print_charset, 0, 0, 0, charset_description,
981 struct Lisp_Charset);
982 /* Make a new charset. */
985 make_charset (Charset_ID id, Lisp_Object name, unsigned char rep_bytes,
986 unsigned char type, unsigned char columns, unsigned char graphic,
987 Bufbyte final, unsigned char direction, Lisp_Object short_name,
988 Lisp_Object long_name, Lisp_Object doc,
992 struct Lisp_Charset *cs =
993 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
994 XSETCHARSET (obj, cs);
996 CHARSET_ID (cs) = id;
997 CHARSET_NAME (cs) = name;
998 CHARSET_SHORT_NAME (cs) = short_name;
999 CHARSET_LONG_NAME (cs) = long_name;
1000 CHARSET_REP_BYTES (cs) = rep_bytes;
1001 CHARSET_DIRECTION (cs) = direction;
1002 CHARSET_TYPE (cs) = type;
1003 CHARSET_COLUMNS (cs) = columns;
1004 CHARSET_GRAPHIC (cs) = graphic;
1005 CHARSET_FINAL (cs) = final;
1006 CHARSET_DOC_STRING (cs) = doc;
1007 CHARSET_REGISTRY (cs) = reg;
1008 CHARSET_CCL_PROGRAM (cs) = Qnil;
1009 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1011 switch ( CHARSET_TYPE (cs) )
1013 case CHARSET_TYPE_94:
1014 CHARSET_DIMENSION (cs) = 1;
1015 CHARSET_CHARS (cs) = 94;
1017 case CHARSET_TYPE_96:
1018 CHARSET_DIMENSION (cs) = 1;
1019 CHARSET_CHARS (cs) = 96;
1021 case CHARSET_TYPE_94X94:
1022 CHARSET_DIMENSION (cs) = 2;
1023 CHARSET_CHARS (cs) = 94;
1025 case CHARSET_TYPE_96X96:
1026 CHARSET_DIMENSION (cs) = 2;
1027 CHARSET_CHARS (cs) = 96;
1030 case CHARSET_TYPE_128X128:
1031 CHARSET_DIMENSION (cs) = 2;
1032 CHARSET_CHARS (cs) = 128;
1034 case CHARSET_TYPE_256X256:
1035 CHARSET_DIMENSION (cs) = 2;
1036 CHARSET_CHARS (cs) = 256;
1043 /* some charsets do not have final characters. This includes
1044 ASCII, Control-1, Composite, and the two faux private
1047 assert (NILP (charset_by_attributes[type][final]));
1048 charset_by_attributes[type][final] = obj;
1050 assert (NILP (charset_by_attributes[type][final][direction]));
1051 charset_by_attributes[type][final][direction] = obj;
1055 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1056 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1059 /* official leading byte */
1060 rep_bytes_by_first_byte[id] = rep_bytes;
1063 /* Some charsets are "faux" and don't have names or really exist at
1064 all except in the leading-byte table. */
1066 Fputhash (name, obj, Vcharset_hash_table);
1071 get_unallocated_leading_byte (int dimension)
1077 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1080 lb = next_allocated_1_byte_leading_byte++;
1084 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1087 lb = next_allocated_2_byte_leading_byte++;
1092 ("No more character sets free for this dimension",
1093 make_int (dimension));
1099 /************************************************************************/
1100 /* Basic charset Lisp functions */
1101 /************************************************************************/
1103 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1104 Return non-nil if OBJECT is a charset.
1108 return CHARSETP (object) ? Qt : Qnil;
1111 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1112 Retrieve the charset of the given name.
1113 If CHARSET-OR-NAME is a charset object, it is simply returned.
1114 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1115 nil is returned. Otherwise the associated charset object is returned.
1119 if (CHARSETP (charset_or_name))
1120 return charset_or_name;
1122 CHECK_SYMBOL (charset_or_name);
1123 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1126 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1127 Retrieve the charset of the given name.
1128 Same as `find-charset' except an error is signalled if there is no such
1129 charset instead of returning nil.
1133 Lisp_Object charset = Ffind_charset (name);
1136 signal_simple_error ("No such charset", name);
1140 /* We store the charsets in hash tables with the names as the key and the
1141 actual charset object as the value. Occasionally we need to use them
1142 in a list format. These routines provide us with that. */
1143 struct charset_list_closure
1145 Lisp_Object *charset_list;
1149 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1150 void *charset_list_closure)
1152 /* This function can GC */
1153 struct charset_list_closure *chcl =
1154 (struct charset_list_closure*) charset_list_closure;
1155 Lisp_Object *charset_list = chcl->charset_list;
1157 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1161 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1162 Return a list of the names of all defined charsets.
1166 Lisp_Object charset_list = Qnil;
1167 struct gcpro gcpro1;
1168 struct charset_list_closure charset_list_closure;
1170 GCPRO1 (charset_list);
1171 charset_list_closure.charset_list = &charset_list;
1172 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1173 &charset_list_closure);
1176 return charset_list;
1179 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1180 Return the name of the given charset.
1184 return XCHARSET_NAME (Fget_charset (charset));
1187 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1188 Define a new character set.
1189 This function is for use with Mule support.
1190 NAME is a symbol, the name by which the character set is normally referred.
1191 DOC-STRING is a string describing the character set.
1192 PROPS is a property list, describing the specific nature of the
1193 character set. Recognized properties are:
1195 'short-name Short version of the charset name (ex: Latin-1)
1196 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1197 'registry A regular expression matching the font registry field for
1199 'dimension Number of octets used to index a character in this charset.
1200 Either 1 or 2. Defaults to 1.
1201 'columns Number of columns used to display a character in this charset.
1202 Only used in TTY mode. (Under X, the actual width of a
1203 character can be derived from the font used to display the
1204 characters.) If unspecified, defaults to the dimension
1205 (this is almost always the correct value).
1206 'chars Number of characters in each dimension (94 or 96).
1207 Defaults to 94. Note that if the dimension is 2, the
1208 character set thus described is 94x94 or 96x96.
1209 'final Final byte of ISO 2022 escape sequence. Must be
1210 supplied. Each combination of (DIMENSION, CHARS) defines a
1211 separate namespace for final bytes. Note that ISO
1212 2022 restricts the final byte to the range
1213 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1214 dimension == 2. Note also that final bytes in the range
1215 0x30 - 0x3F are reserved for user-defined (not official)
1217 'graphic 0 (use left half of font on output) or 1 (use right half
1218 of font on output). Defaults to 0. For example, for
1219 a font whose registry is ISO8859-1, the left half
1220 (octets 0x20 - 0x7F) is the `ascii' character set, while
1221 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1222 character set. With 'graphic set to 0, the octets
1223 will have their high bit cleared; with it set to 1,
1224 the octets will have their high bit set.
1225 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1227 'ccl-program A compiled CCL program used to convert a character in
1228 this charset into an index into the font. This is in
1229 addition to the 'graphic property. The CCL program
1230 is passed the octets of the character, with the high
1231 bit cleared and set depending upon whether the value
1232 of the 'graphic property is 0 or 1.
1234 (name, doc_string, props))
1236 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1237 int direction = CHARSET_LEFT_TO_RIGHT;
1239 Lisp_Object registry = Qnil;
1240 Lisp_Object charset;
1241 Lisp_Object rest, keyword, value;
1242 Lisp_Object ccl_program = Qnil;
1243 Lisp_Object short_name = Qnil, long_name = Qnil;
1245 CHECK_SYMBOL (name);
1246 if (!NILP (doc_string))
1247 CHECK_STRING (doc_string);
1249 charset = Ffind_charset (name);
1250 if (!NILP (charset))
1251 signal_simple_error ("Cannot redefine existing charset", name);
1253 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1255 if (EQ (keyword, Qshort_name))
1257 CHECK_STRING (value);
1261 if (EQ (keyword, Qlong_name))
1263 CHECK_STRING (value);
1267 else if (EQ (keyword, Qdimension))
1270 dimension = XINT (value);
1271 if (dimension < 1 || dimension > 2)
1272 signal_simple_error ("Invalid value for 'dimension", value);
1275 else if (EQ (keyword, Qchars))
1278 chars = XINT (value);
1279 if (chars != 94 && chars != 96)
1280 signal_simple_error ("Invalid value for 'chars", value);
1283 else if (EQ (keyword, Qcolumns))
1286 columns = XINT (value);
1287 if (columns != 1 && columns != 2)
1288 signal_simple_error ("Invalid value for 'columns", value);
1291 else if (EQ (keyword, Qgraphic))
1294 graphic = XINT (value);
1295 if (graphic < 0 || graphic > 1)
1296 signal_simple_error ("Invalid value for 'graphic", value);
1299 else if (EQ (keyword, Qregistry))
1301 CHECK_STRING (value);
1305 else if (EQ (keyword, Qdirection))
1307 if (EQ (value, Ql2r))
1308 direction = CHARSET_LEFT_TO_RIGHT;
1309 else if (EQ (value, Qr2l))
1310 direction = CHARSET_RIGHT_TO_LEFT;
1312 signal_simple_error ("Invalid value for 'direction", value);
1315 else if (EQ (keyword, Qfinal))
1317 CHECK_CHAR_COERCE_INT (value);
1318 final = XCHAR (value);
1319 if (final < '0' || final > '~')
1320 signal_simple_error ("Invalid value for 'final", value);
1323 else if (EQ (keyword, Qccl_program))
1325 CHECK_VECTOR (value);
1326 ccl_program = value;
1330 signal_simple_error ("Unrecognized property", keyword);
1334 error ("'final must be specified");
1335 if (dimension == 2 && final > 0x5F)
1337 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1341 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1343 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1345 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1346 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1348 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1355 /* id = CHARSET_ID_OFFSET_94 + final; */
1356 id = get_unallocated_leading_byte (dimension);
1358 else if (chars == 96)
1360 id = get_unallocated_leading_byte (dimension);
1367 else if (dimension == 2)
1371 id = get_unallocated_leading_byte (dimension);
1373 else if (chars == 96)
1375 id = get_unallocated_leading_byte (dimension);
1387 id = get_unallocated_leading_byte (dimension);
1390 if (NILP (doc_string))
1391 doc_string = build_string ("");
1393 if (NILP (registry))
1394 registry = build_string ("");
1396 if (NILP (short_name))
1397 XSETSTRING (short_name, XSYMBOL (name)->name);
1399 if (NILP (long_name))
1400 long_name = doc_string;
1403 columns = dimension;
1404 charset = make_charset (id, name, dimension + 2, type, columns, graphic,
1405 final, direction, short_name, long_name, doc_string, registry);
1406 if (!NILP (ccl_program))
1407 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1411 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1413 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1414 NEW-NAME is the name of the new charset. Return the new charset.
1416 (charset, new_name))
1418 Lisp_Object new_charset = Qnil;
1419 int id, dimension, columns, graphic, final;
1420 int direction, type;
1421 Lisp_Object registry, doc_string, short_name, long_name;
1422 struct Lisp_Charset *cs;
1424 charset = Fget_charset (charset);
1425 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1426 signal_simple_error ("Charset already has reverse-direction charset",
1429 CHECK_SYMBOL (new_name);
1430 if (!NILP (Ffind_charset (new_name)))
1431 signal_simple_error ("Cannot redefine existing charset", new_name);
1433 cs = XCHARSET (charset);
1435 type = CHARSET_TYPE (cs);
1436 columns = CHARSET_COLUMNS (cs);
1437 dimension = CHARSET_DIMENSION (cs);
1438 id = get_unallocated_leading_byte (dimension);
1440 graphic = CHARSET_GRAPHIC (cs);
1441 final = CHARSET_FINAL (cs);
1442 direction = CHARSET_RIGHT_TO_LEFT;
1443 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1444 direction = CHARSET_LEFT_TO_RIGHT;
1445 doc_string = CHARSET_DOC_STRING (cs);
1446 short_name = CHARSET_SHORT_NAME (cs);
1447 long_name = CHARSET_LONG_NAME (cs);
1448 registry = CHARSET_REGISTRY (cs);
1450 new_charset = make_charset (id, new_name, dimension + 2, type, columns,
1451 graphic, final, direction, short_name, long_name,
1452 doc_string, registry);
1454 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1455 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1460 /* #### Reverse direction charsets not yet implemented. */
1462 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1464 Return the reverse-direction charset parallel to CHARSET, if any.
1465 This is the charset with the same properties (in particular, the same
1466 dimension, number of characters per dimension, and final byte) as
1467 CHARSET but whose characters are displayed in the opposite direction.
1471 charset = Fget_charset (charset);
1472 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1476 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1477 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1478 If DIRECTION is omitted, both directions will be checked (left-to-right
1479 will be returned if character sets exist for both directions).
1481 (dimension, chars, final, direction))
1483 int dm, ch, fi, di = -1;
1485 Lisp_Object obj = Qnil;
1487 CHECK_INT (dimension);
1488 dm = XINT (dimension);
1489 if (dm < 1 || dm > 2)
1490 signal_simple_error ("Invalid value for DIMENSION", dimension);
1494 if (ch != 94 && ch != 96)
1495 signal_simple_error ("Invalid value for CHARS", chars);
1497 CHECK_CHAR_COERCE_INT (final);
1499 if (fi < '0' || fi > '~')
1500 signal_simple_error ("Invalid value for FINAL", final);
1502 if (EQ (direction, Ql2r))
1503 di = CHARSET_LEFT_TO_RIGHT;
1504 else if (EQ (direction, Qr2l))
1505 di = CHARSET_RIGHT_TO_LEFT;
1506 else if (!NILP (direction))
1507 signal_simple_error ("Invalid value for DIRECTION", direction);
1509 if (dm == 2 && fi > 0x5F)
1511 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1514 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1516 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1520 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1522 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1525 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1528 return XCHARSET_NAME (obj);
1532 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1533 Return short name of CHARSET.
1537 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1540 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1541 Return long name of CHARSET.
1545 return XCHARSET_LONG_NAME (Fget_charset (charset));
1548 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1549 Return description of CHARSET.
1553 return XCHARSET_DOC_STRING (Fget_charset (charset));
1556 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1557 Return dimension of CHARSET.
1561 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1564 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1565 Return property PROP of CHARSET.
1566 Recognized properties are those listed in `make-charset', as well as
1567 'name and 'doc-string.
1571 struct Lisp_Charset *cs;
1573 charset = Fget_charset (charset);
1574 cs = XCHARSET (charset);
1576 CHECK_SYMBOL (prop);
1577 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1578 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1579 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1580 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1581 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1582 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1583 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1584 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1585 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1586 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1587 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1588 if (EQ (prop, Qdirection))
1589 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1590 if (EQ (prop, Qreverse_direction_charset))
1592 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1596 return XCHARSET_NAME (obj);
1598 signal_simple_error ("Unrecognized charset property name", prop);
1599 return Qnil; /* not reached */
1602 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1603 Return charset identification number of CHARSET.
1607 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1610 /* #### We need to figure out which properties we really want to
1613 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1614 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1616 (charset, ccl_program))
1618 charset = Fget_charset (charset);
1619 CHECK_VECTOR (ccl_program);
1620 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1625 invalidate_charset_font_caches (Lisp_Object charset)
1627 /* Invalidate font cache entries for charset on all devices. */
1628 Lisp_Object devcons, concons, hash_table;
1629 DEVICE_LOOP_NO_BREAK (devcons, concons)
1631 struct device *d = XDEVICE (XCAR (devcons));
1632 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1633 if (!UNBOUNDP (hash_table))
1634 Fclrhash (hash_table);
1638 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
1639 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1640 Set the 'registry property of CHARSET to REGISTRY.
1642 (charset, registry))
1644 charset = Fget_charset (charset);
1645 CHECK_STRING (registry);
1646 XCHARSET_REGISTRY (charset) = registry;
1647 invalidate_charset_font_caches (charset);
1648 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1653 /************************************************************************/
1654 /* Lisp primitives for working with characters */
1655 /************************************************************************/
1657 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1658 Make a character from CHARSET and octets ARG1 and ARG2.
1659 ARG2 is required only for characters from two-dimensional charsets.
1660 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1661 character s with caron.
1663 (charset, arg1, arg2))
1665 struct Lisp_Charset *cs;
1667 int lowlim, highlim;
1669 charset = Fget_charset (charset);
1670 cs = XCHARSET (charset);
1672 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1673 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1674 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1675 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1678 /* It is useful (and safe, according to Olivier Galibert) to strip
1679 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1680 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1681 Latin 2 code of the character. */
1682 a1 = XINT (arg1) & 0x7f;
1683 if (a1 < lowlim || a1 > highlim)
1684 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1686 if (CHARSET_DIMENSION (cs) == 1)
1690 ("Charset is of dimension one; second octet must be nil", arg2);
1691 return make_char (MAKE_CHAR (charset, a1, 0));
1695 a2 = XINT (arg2) & 0x7f;
1696 if (a2 < lowlim || a2 > highlim)
1697 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1699 return make_char (MAKE_CHAR (charset, a1, a2));
1702 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1703 Return the character set of char CH.
1707 CHECK_CHAR_COERCE_INT (ch);
1709 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
1712 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1713 Return list of charset and one or two position-codes of CHAR.
1717 /* This function can GC */
1718 struct gcpro gcpro1, gcpro2;
1719 Lisp_Object charset = Qnil;
1720 Lisp_Object rc = Qnil;
1723 GCPRO2 (charset, rc);
1724 CHECK_CHAR_COERCE_INT (character);
1726 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1728 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1730 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1734 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1742 #ifdef ENABLE_COMPOSITE_CHARS
1743 /************************************************************************/
1744 /* composite character functions */
1745 /************************************************************************/
1748 lookup_composite_char (Bufbyte *str, int len)
1750 Lisp_Object lispstr = make_string (str, len);
1751 Lisp_Object ch = Fgethash (lispstr,
1752 Vcomposite_char_string2char_hash_table,
1758 if (composite_char_row_next >= 128)
1759 signal_simple_error ("No more composite chars available", lispstr);
1760 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1761 composite_char_col_next);
1762 Fputhash (make_char (emch), lispstr,
1763 Vcomposite_char_char2string_hash_table);
1764 Fputhash (lispstr, make_char (emch),
1765 Vcomposite_char_string2char_hash_table);
1766 composite_char_col_next++;
1767 if (composite_char_col_next >= 128)
1769 composite_char_col_next = 32;
1770 composite_char_row_next++;
1779 composite_char_string (Emchar ch)
1781 Lisp_Object str = Fgethash (make_char (ch),
1782 Vcomposite_char_char2string_hash_table,
1784 assert (!UNBOUNDP (str));
1788 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1789 Convert a string into a single composite character.
1790 The character is the result of overstriking all the characters in
1795 CHECK_STRING (string);
1796 return make_char (lookup_composite_char (XSTRING_DATA (string),
1797 XSTRING_LENGTH (string)));
1800 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1801 Return a string of the characters comprising a composite character.
1809 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1810 signal_simple_error ("Must be composite char", ch);
1811 return composite_char_string (emch);
1813 #endif /* ENABLE_COMPOSITE_CHARS */
1816 /************************************************************************/
1817 /* initialization */
1818 /************************************************************************/
1821 syms_of_mule_charset (void)
1823 DEFSUBR (Fcharsetp);
1824 DEFSUBR (Ffind_charset);
1825 DEFSUBR (Fget_charset);
1826 DEFSUBR (Fcharset_list);
1827 DEFSUBR (Fcharset_name);
1828 DEFSUBR (Fmake_charset);
1829 DEFSUBR (Fmake_reverse_direction_charset);
1830 /* DEFSUBR (Freverse_direction_charset); */
1831 DEFSUBR (Fcharset_from_attributes);
1832 DEFSUBR (Fcharset_short_name);
1833 DEFSUBR (Fcharset_long_name);
1834 DEFSUBR (Fcharset_description);
1835 DEFSUBR (Fcharset_dimension);
1836 DEFSUBR (Fcharset_property);
1837 DEFSUBR (Fcharset_id);
1838 DEFSUBR (Fset_charset_ccl_program);
1839 DEFSUBR (Fset_charset_registry);
1841 DEFSUBR (Fmake_char);
1842 DEFSUBR (Fchar_charset);
1843 DEFSUBR (Fsplit_char);
1845 #ifdef ENABLE_COMPOSITE_CHARS
1846 DEFSUBR (Fmake_composite_char);
1847 DEFSUBR (Fcomposite_char_string);
1850 defsymbol (&Qcharsetp, "charsetp");
1851 defsymbol (&Qregistry, "registry");
1852 defsymbol (&Qfinal, "final");
1853 defsymbol (&Qgraphic, "graphic");
1854 defsymbol (&Qdirection, "direction");
1855 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1856 defsymbol (&Qshort_name, "short-name");
1857 defsymbol (&Qlong_name, "long-name");
1859 defsymbol (&Ql2r, "l2r");
1860 defsymbol (&Qr2l, "r2l");
1862 /* Charsets, compatible with FSF 20.3
1863 Naming convention is Script-Charset[-Edition] */
1864 defsymbol (&Qascii, "ascii");
1865 defsymbol (&Qcontrol_1, "control-1");
1866 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1867 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1868 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1869 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1870 defsymbol (&Qthai_tis620, "thai-tis620");
1871 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1872 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1873 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1874 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1875 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1876 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1877 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1878 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1879 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1880 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1881 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1882 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1883 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1884 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1886 defsymbol (&Qchinese_cns11643_3, "chinese-cns11643-3");
1887 defsymbol (&Qchinese_cns11643_4, "chinese-cns11643-4");
1888 defsymbol (&Qchinese_cns11643_5, "chinese-cns11643-5");
1889 defsymbol (&Qchinese_cns11643_6, "chinese-cns11643-6");
1890 defsymbol (&Qchinese_cns11643_7, "chinese-cns11643-7");
1891 defsymbol (&Qucs_bmp, "ucs-bmp");
1893 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1894 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1896 defsymbol (&Qcomposite, "composite");
1900 vars_of_mule_charset (void)
1907 /* Table of charsets indexed by leading byte. */
1908 for (i = 0; i < countof (charset_by_leading_byte); i++)
1909 charset_by_leading_byte[i] = Qnil;
1912 /* Table of charsets indexed by type/final-byte. */
1913 for (i = 0; i < countof (charset_by_attributes); i++)
1914 for (j = 0; j < countof (charset_by_attributes[0]); j++)
1915 charset_by_attributes[i][j] = Qnil;
1917 /* Table of charsets indexed by type/final-byte/direction. */
1918 for (i = 0; i < countof (charset_by_attributes); i++)
1919 for (j = 0; j < countof (charset_by_attributes[0]); j++)
1920 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
1921 charset_by_attributes[i][j][k] = Qnil;
1924 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1926 next_allocated_2_byte_leading_byte = LEADING_BYTE_CHINESE_BIG5_2 + 1;
1928 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1932 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
1933 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
1934 Leading-code of private TYPE9N charset of column-width 1.
1936 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
1940 Vutf_2000_version = build_string("0.6 (Tøº’¹²bushijøº’¹²-mae)");
1941 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
1942 Version number of UTF-2000.
1948 complex_vars_of_mule_charset (void)
1950 staticpro (&Vcharset_hash_table);
1951 Vcharset_hash_table =
1952 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1954 /* Predefined character sets. We store them into variables for
1959 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 1,
1960 CHARSET_TYPE_256X256, 1, 0, 0,
1961 CHARSET_LEFT_TO_RIGHT,
1962 build_string ("BMP"),
1963 build_string ("BMP"),
1964 build_string ("BMP"),
1968 make_charset (LEADING_BYTE_ASCII, Qascii, 1,
1969 CHARSET_TYPE_94, 1, 0, 'B',
1970 CHARSET_LEFT_TO_RIGHT,
1971 build_string ("ASCII"),
1972 build_string ("ASCII)"),
1973 build_string ("ASCII (ISO646 IRV)"),
1974 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"));
1975 Vcharset_control_1 =
1976 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
1977 CHARSET_TYPE_94, 1, 1, 0,
1978 CHARSET_LEFT_TO_RIGHT,
1979 build_string ("C1"),
1980 build_string ("Control characters"),
1981 build_string ("Control characters 128-191"),
1983 Vcharset_latin_iso8859_1 =
1984 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
1985 CHARSET_TYPE_96, 1, 1, 'A',
1986 CHARSET_LEFT_TO_RIGHT,
1987 build_string ("Latin-1"),
1988 build_string ("ISO8859-1 (Latin-1)"),
1989 build_string ("ISO8859-1 (Latin-1)"),
1990 build_string ("iso8859-1"));
1991 Vcharset_latin_iso8859_2 =
1992 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
1993 CHARSET_TYPE_96, 1, 1, 'B',
1994 CHARSET_LEFT_TO_RIGHT,
1995 build_string ("Latin-2"),
1996 build_string ("ISO8859-2 (Latin-2)"),
1997 build_string ("ISO8859-2 (Latin-2)"),
1998 build_string ("iso8859-2"));
1999 Vcharset_latin_iso8859_3 =
2000 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
2001 CHARSET_TYPE_96, 1, 1, 'C',
2002 CHARSET_LEFT_TO_RIGHT,
2003 build_string ("Latin-3"),
2004 build_string ("ISO8859-3 (Latin-3)"),
2005 build_string ("ISO8859-3 (Latin-3)"),
2006 build_string ("iso8859-3"));
2007 Vcharset_latin_iso8859_4 =
2008 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
2009 CHARSET_TYPE_96, 1, 1, 'D',
2010 CHARSET_LEFT_TO_RIGHT,
2011 build_string ("Latin-4"),
2012 build_string ("ISO8859-4 (Latin-4)"),
2013 build_string ("ISO8859-4 (Latin-4)"),
2014 build_string ("iso8859-4"));
2015 Vcharset_thai_tis620 =
2016 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
2017 CHARSET_TYPE_96, 1, 1, 'T',
2018 CHARSET_LEFT_TO_RIGHT,
2019 build_string ("TIS620"),
2020 build_string ("TIS620 (Thai)"),
2021 build_string ("TIS620.2529 (Thai)"),
2022 build_string ("tis620"));
2023 Vcharset_greek_iso8859_7 =
2024 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
2025 CHARSET_TYPE_96, 1, 1, 'F',
2026 CHARSET_LEFT_TO_RIGHT,
2027 build_string ("ISO8859-7"),
2028 build_string ("ISO8859-7 (Greek)"),
2029 build_string ("ISO8859-7 (Greek)"),
2030 build_string ("iso8859-7"));
2031 Vcharset_arabic_iso8859_6 =
2032 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
2033 CHARSET_TYPE_96, 1, 1, 'G',
2034 CHARSET_RIGHT_TO_LEFT,
2035 build_string ("ISO8859-6"),
2036 build_string ("ISO8859-6 (Arabic)"),
2037 build_string ("ISO8859-6 (Arabic)"),
2038 build_string ("iso8859-6"));
2039 Vcharset_hebrew_iso8859_8 =
2040 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
2041 CHARSET_TYPE_96, 1, 1, 'H',
2042 CHARSET_RIGHT_TO_LEFT,
2043 build_string ("ISO8859-8"),
2044 build_string ("ISO8859-8 (Hebrew)"),
2045 build_string ("ISO8859-8 (Hebrew)"),
2046 build_string ("iso8859-8"));
2047 Vcharset_katakana_jisx0201 =
2048 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
2049 CHARSET_TYPE_94, 1, 1, 'I',
2050 CHARSET_LEFT_TO_RIGHT,
2051 build_string ("JISX0201 Kana"),
2052 build_string ("JISX0201.1976 (Japanese Kana)"),
2053 build_string ("JISX0201.1976 Japanese Kana"),
2054 build_string ("jisx0201.1976"));
2055 Vcharset_latin_jisx0201 =
2056 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
2057 CHARSET_TYPE_94, 1, 0, 'J',
2058 CHARSET_LEFT_TO_RIGHT,
2059 build_string ("JISX0201 Roman"),
2060 build_string ("JISX0201.1976 (Japanese Roman)"),
2061 build_string ("JISX0201.1976 Japanese Roman"),
2062 build_string ("jisx0201.1976"));
2063 Vcharset_cyrillic_iso8859_5 =
2064 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
2065 CHARSET_TYPE_96, 1, 1, 'L',
2066 CHARSET_LEFT_TO_RIGHT,
2067 build_string ("ISO8859-5"),
2068 build_string ("ISO8859-5 (Cyrillic)"),
2069 build_string ("ISO8859-5 (Cyrillic)"),
2070 build_string ("iso8859-5"));
2071 Vcharset_latin_iso8859_9 =
2072 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
2073 CHARSET_TYPE_96, 1, 1, 'M',
2074 CHARSET_LEFT_TO_RIGHT,
2075 build_string ("Latin-5"),
2076 build_string ("ISO8859-9 (Latin-5)"),
2077 build_string ("ISO8859-9 (Latin-5)"),
2078 build_string ("iso8859-9"));
2079 Vcharset_japanese_jisx0208_1978 =
2080 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
2081 CHARSET_TYPE_94X94, 2, 0, '@',
2082 CHARSET_LEFT_TO_RIGHT,
2083 build_string ("JISX0208.1978"),
2084 build_string ("JISX0208.1978 (Japanese)"),
2086 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
2087 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"));
2088 Vcharset_chinese_gb2312 =
2089 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
2090 CHARSET_TYPE_94X94, 2, 0, 'A',
2091 CHARSET_LEFT_TO_RIGHT,
2092 build_string ("GB2312"),
2093 build_string ("GB2312)"),
2094 build_string ("GB2312 Chinese simplified"),
2095 build_string ("gb2312"));
2096 Vcharset_japanese_jisx0208 =
2097 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
2098 CHARSET_TYPE_94X94, 2, 0, 'B',
2099 CHARSET_LEFT_TO_RIGHT,
2100 build_string ("JISX0208"),
2101 build_string ("JISX0208.1983/1990 (Japanese)"),
2102 build_string ("JISX0208.1983/1990 Japanese Kanji"),
2103 build_string ("jisx0208.19\\(83\\|90\\)"));
2104 Vcharset_korean_ksc5601 =
2105 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
2106 CHARSET_TYPE_94X94, 2, 0, 'C',
2107 CHARSET_LEFT_TO_RIGHT,
2108 build_string ("KSC5601"),
2109 build_string ("KSC5601 (Korean"),
2110 build_string ("KSC5601 Korean Hangul and Hanja"),
2111 build_string ("ksc5601"));
2112 Vcharset_japanese_jisx0212 =
2113 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
2114 CHARSET_TYPE_94X94, 2, 0, 'D',
2115 CHARSET_LEFT_TO_RIGHT,
2116 build_string ("JISX0212"),
2117 build_string ("JISX0212 (Japanese)"),
2118 build_string ("JISX0212 Japanese Supplement"),
2119 build_string ("jisx0212"));
2121 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2122 Vcharset_chinese_cns11643_1 =
2123 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
2124 CHARSET_TYPE_94X94, 2, 0, 'G',
2125 CHARSET_LEFT_TO_RIGHT,
2126 build_string ("CNS11643-1"),
2127 build_string ("CNS11643-1 (Chinese traditional)"),
2129 ("CNS 11643 Plane 1 Chinese traditional"),
2130 build_string (CHINESE_CNS_PLANE_RE("1")));
2131 Vcharset_chinese_cns11643_2 =
2132 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
2133 CHARSET_TYPE_94X94, 2, 0, 'H',
2134 CHARSET_LEFT_TO_RIGHT,
2135 build_string ("CNS11643-2"),
2136 build_string ("CNS11643-2 (Chinese traditional)"),
2138 ("CNS 11643 Plane 2 Chinese traditional"),
2139 build_string (CHINESE_CNS_PLANE_RE("2")));
2141 Vcharset_chinese_cns11643_3 =
2142 make_charset (LEADING_BYTE_CHINESE_CNS11643_3, Qchinese_cns11643_3, 3,
2143 CHARSET_TYPE_94X94, 2, 0, 'I',
2144 CHARSET_LEFT_TO_RIGHT,
2145 build_string ("CNS11643-3"),
2146 build_string ("CNS11643-3 (Chinese traditional)"),
2148 ("CNS 11643 Plane 3 Chinese traditional"),
2149 build_string (CHINESE_CNS_PLANE_RE("3")));
2150 Vcharset_chinese_cns11643_4 =
2151 make_charset (LEADING_BYTE_CHINESE_CNS11643_4, Qchinese_cns11643_4, 3,
2152 CHARSET_TYPE_94X94, 2, 0, 'J',
2153 CHARSET_LEFT_TO_RIGHT,
2154 build_string ("CNS11643-4"),
2155 build_string ("CNS11643-4 (Chinese traditional)"),
2157 ("CNS 11643 Plane 4 Chinese traditional"),
2158 build_string (CHINESE_CNS_PLANE_RE("4")));
2159 Vcharset_chinese_cns11643_5 =
2160 make_charset (LEADING_BYTE_CHINESE_CNS11643_5, Qchinese_cns11643_5, 3,
2161 CHARSET_TYPE_94X94, 2, 0, 'K',
2162 CHARSET_LEFT_TO_RIGHT,
2163 build_string ("CNS11643-5"),
2164 build_string ("CNS11643-5 (Chinese traditional)"),
2166 ("CNS 11643 Plane 5 Chinese traditional"),
2167 build_string (CHINESE_CNS_PLANE_RE("5")));
2168 Vcharset_chinese_cns11643_6 =
2169 make_charset (LEADING_BYTE_CHINESE_CNS11643_6, Qchinese_cns11643_6, 3,
2170 CHARSET_TYPE_94X94, 2, 0, 'L',
2171 CHARSET_LEFT_TO_RIGHT,
2172 build_string ("CNS11643-6"),
2173 build_string ("CNS11643-6 (Chinese traditional)"),
2175 ("CNS 11643 Plane 6 Chinese traditional"),
2176 build_string (CHINESE_CNS_PLANE_RE("6")));
2177 Vcharset_chinese_cns11643_7 =
2178 make_charset (LEADING_BYTE_CHINESE_CNS11643_7, Qchinese_cns11643_7, 3,
2179 CHARSET_TYPE_94X94, 2, 0, 'M',
2180 CHARSET_LEFT_TO_RIGHT,
2181 build_string ("CNS11643-7"),
2182 build_string ("CNS11643-7 (Chinese traditional)"),
2184 ("CNS 11643 Plane 7 Chinese traditional"),
2185 build_string (CHINESE_CNS_PLANE_RE("7")));
2187 Vcharset_chinese_big5_1 =
2188 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
2189 CHARSET_TYPE_94X94, 2, 0, '0',
2190 CHARSET_LEFT_TO_RIGHT,
2191 build_string ("Big5"),
2192 build_string ("Big5 (Level-1)"),
2194 ("Big5 Level-1 Chinese traditional"),
2195 build_string ("big5"));
2196 Vcharset_chinese_big5_2 =
2197 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
2198 CHARSET_TYPE_94X94, 2, 0, '1',
2199 CHARSET_LEFT_TO_RIGHT,
2200 build_string ("Big5"),
2201 build_string ("Big5 (Level-2)"),
2203 ("Big5 Level-2 Chinese traditional"),
2204 build_string ("big5"));
2207 #ifdef ENABLE_COMPOSITE_CHARS
2208 /* #### For simplicity, we put composite chars into a 96x96 charset.
2209 This is going to lead to problems because you can run out of
2210 room, esp. as we don't yet recycle numbers. */
2211 Vcharset_composite =
2212 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
2213 CHARSET_TYPE_96X96, 2, 0, 0,
2214 CHARSET_LEFT_TO_RIGHT,
2215 build_string ("Composite"),
2216 build_string ("Composite characters"),
2217 build_string ("Composite characters"),
2220 composite_char_row_next = 32;
2221 composite_char_col_next = 32;
2223 Vcomposite_char_string2char_hash_table =
2224 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2225 Vcomposite_char_char2string_hash_table =
2226 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2227 staticpro (&Vcomposite_char_string2char_hash_table);
2228 staticpro (&Vcomposite_char_char2string_hash_table);
2229 #endif /* ENABLE_COMPOSITE_CHARS */