1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_chinese_cns11643_3;
62 Lisp_Object Vcharset_chinese_cns11643_4;
63 Lisp_Object Vcharset_chinese_cns11643_5;
64 Lisp_Object Vcharset_chinese_cns11643_6;
65 Lisp_Object Vcharset_chinese_cns11643_7;
66 Lisp_Object Vcharset_ucs_bmp;
68 Lisp_Object Vcharset_chinese_big5_1;
69 Lisp_Object Vcharset_chinese_big5_2;
71 #ifdef ENABLE_COMPOSITE_CHARS
72 Lisp_Object Vcharset_composite;
74 /* Hash tables for composite chars. One maps string representing
75 composed chars to their equivalent chars; one goes the
77 Lisp_Object Vcomposite_char_char2string_hash_table;
78 Lisp_Object Vcomposite_char_string2char_hash_table;
80 static int composite_char_row_next;
81 static int composite_char_col_next;
83 #endif /* ENABLE_COMPOSITE_CHARS */
85 /* Table of charsets indexed by leading byte. */
86 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
88 /* Table of charsets indexed by type/final-byte/direction. */
90 Lisp_Object charset_by_attributes[4][128];
92 Lisp_Object charset_by_attributes[4][128][2];
96 /* Table of number of bytes in the string representation of a character
97 indexed by the first byte of that representation.
99 rep_bytes_by_first_byte(c) is more efficient than the equivalent
100 canonical computation:
102 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
104 Bytecount rep_bytes_by_first_byte[0xA0] =
105 { /* 0x00 - 0x7f are for straight ASCII */
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 /* 0x80 - 0x8f are for Dimension-1 official charsets */
116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 /* 0x90 - 0x9d are for Dimension-2 official charsets */
121 /* 0x9e is for Dimension-1 private charsets */
122 /* 0x9f is for Dimension-2 private charsets */
123 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
128 Charset_ID latin_a_char_to_charset[128] = {
129 /* U+0100 */ LEADING_BYTE_LATIN_ISO8859_4,
130 /* U+0101 */ LEADING_BYTE_LATIN_ISO8859_4,
131 /* U+0102 */ LEADING_BYTE_LATIN_ISO8859_2,
132 /* U+0103 */ LEADING_BYTE_LATIN_ISO8859_2,
133 /* U+0104 */ LEADING_BYTE_LATIN_ISO8859_2,
134 /* U+0105 */ LEADING_BYTE_LATIN_ISO8859_2,
135 /* U+0106 */ LEADING_BYTE_LATIN_ISO8859_2,
136 /* U+0107 */ LEADING_BYTE_LATIN_ISO8859_2,
137 /* U+0108 */ LEADING_BYTE_LATIN_ISO8859_3,
138 /* U+0109 */ LEADING_BYTE_LATIN_ISO8859_3,
139 /* U+010A */ LEADING_BYTE_LATIN_ISO8859_3,
140 /* U+010B */ LEADING_BYTE_LATIN_ISO8859_3,
141 /* U+010C */ LEADING_BYTE_LATIN_ISO8859_2,
142 /* U+010D */ LEADING_BYTE_LATIN_ISO8859_2,
143 /* U+010E */ LEADING_BYTE_LATIN_ISO8859_2,
144 /* U+010F */ LEADING_BYTE_LATIN_ISO8859_2,
145 /* U+0110 */ LEADING_BYTE_LATIN_ISO8859_2,
146 /* U+0111 */ LEADING_BYTE_LATIN_ISO8859_2,
147 /* U+0112 */ LEADING_BYTE_LATIN_ISO8859_4,
148 /* U+0113 */ LEADING_BYTE_LATIN_ISO8859_4,
149 /* U+0114 */ LEADING_BYTE_UCS_BMP,
150 /* U+0115 */ LEADING_BYTE_UCS_BMP,
151 /* U+0116 */ LEADING_BYTE_LATIN_ISO8859_4,
152 /* U+0117 */ LEADING_BYTE_LATIN_ISO8859_4,
153 /* U+0118 */ LEADING_BYTE_LATIN_ISO8859_2,
154 /* U+0119 */ LEADING_BYTE_LATIN_ISO8859_2,
155 /* U+011A */ LEADING_BYTE_LATIN_ISO8859_2,
156 /* U+011B */ LEADING_BYTE_LATIN_ISO8859_2,
157 /* U+011C */ LEADING_BYTE_LATIN_ISO8859_3,
158 /* U+011D */ LEADING_BYTE_LATIN_ISO8859_3,
159 /* U+011E */ LEADING_BYTE_LATIN_ISO8859_3,
160 /* U+011F */ LEADING_BYTE_LATIN_ISO8859_3,
161 /* U+0120 */ LEADING_BYTE_LATIN_ISO8859_3,
162 /* U+0121 */ LEADING_BYTE_LATIN_ISO8859_3,
163 /* U+0122 */ LEADING_BYTE_LATIN_ISO8859_4,
164 /* U+0123 */ LEADING_BYTE_LATIN_ISO8859_4,
165 /* U+0124 */ LEADING_BYTE_LATIN_ISO8859_3,
166 /* U+0125 */ LEADING_BYTE_LATIN_ISO8859_3,
167 /* U+0126 */ LEADING_BYTE_LATIN_ISO8859_3,
168 /* U+0127 */ LEADING_BYTE_LATIN_ISO8859_3,
169 /* U+0128 */ LEADING_BYTE_LATIN_ISO8859_4,
170 /* U+0129 */ LEADING_BYTE_LATIN_ISO8859_4,
171 /* U+012A */ LEADING_BYTE_LATIN_ISO8859_4,
172 /* U+012B */ LEADING_BYTE_LATIN_ISO8859_4,
173 /* U+012C */ LEADING_BYTE_UCS_BMP,
174 /* U+012D */ LEADING_BYTE_UCS_BMP,
175 /* U+012E */ LEADING_BYTE_LATIN_ISO8859_4,
176 /* U+012F */ LEADING_BYTE_LATIN_ISO8859_4,
177 /* U+0130 */ LEADING_BYTE_LATIN_ISO8859_3,
178 /* U+0131 */ LEADING_BYTE_LATIN_ISO8859_3,
179 /* U+0132 */ LEADING_BYTE_JAPANESE_JISX0212,
180 /* U+0133 */ LEADING_BYTE_JAPANESE_JISX0212,
181 /* U+0134 */ LEADING_BYTE_LATIN_ISO8859_3,
182 /* U+0135 */ LEADING_BYTE_LATIN_ISO8859_3,
183 /* U+0136 */ LEADING_BYTE_LATIN_ISO8859_4,
184 /* U+0137 */ LEADING_BYTE_LATIN_ISO8859_4,
185 /* U+0138 */ LEADING_BYTE_LATIN_ISO8859_4,
186 /* U+0139 */ LEADING_BYTE_LATIN_ISO8859_2,
187 /* U+013A */ LEADING_BYTE_LATIN_ISO8859_2,
188 /* U+013B */ LEADING_BYTE_LATIN_ISO8859_4,
189 /* U+013C */ LEADING_BYTE_LATIN_ISO8859_4,
190 /* U+013D */ LEADING_BYTE_LATIN_ISO8859_2,
191 /* U+013E */ LEADING_BYTE_LATIN_ISO8859_2,
192 /* U+013F */ LEADING_BYTE_JAPANESE_JISX0212,
193 /* U+0140 */ LEADING_BYTE_JAPANESE_JISX0212,
194 /* U+0141 */ LEADING_BYTE_LATIN_ISO8859_2,
195 /* U+0142 */ LEADING_BYTE_LATIN_ISO8859_2,
196 /* U+0143 */ LEADING_BYTE_LATIN_ISO8859_2,
197 /* U+0144 */ LEADING_BYTE_LATIN_ISO8859_2,
198 /* U+0145 */ LEADING_BYTE_LATIN_ISO8859_4,
199 /* U+0146 */ LEADING_BYTE_LATIN_ISO8859_4,
200 /* U+0147 */ LEADING_BYTE_LATIN_ISO8859_2,
201 /* U+0148 */ LEADING_BYTE_LATIN_ISO8859_2,
202 /* U+0149 */ LEADING_BYTE_JAPANESE_JISX0212,
203 /* U+014A */ LEADING_BYTE_LATIN_ISO8859_4,
204 /* U+014B */ LEADING_BYTE_LATIN_ISO8859_4,
205 /* U+014C */ LEADING_BYTE_LATIN_ISO8859_4,
206 /* U+014D */ LEADING_BYTE_LATIN_ISO8859_4,
207 /* U+014E */ LEADING_BYTE_UCS_BMP,
208 /* U+014F */ LEADING_BYTE_UCS_BMP,
209 /* U+0150 */ LEADING_BYTE_LATIN_ISO8859_2,
210 /* U+0151 */ LEADING_BYTE_LATIN_ISO8859_2,
211 /* U+0152 */ LEADING_BYTE_JAPANESE_JISX0212,
212 /* U+0153 */ LEADING_BYTE_JAPANESE_JISX0212,
213 /* U+0154 */ LEADING_BYTE_LATIN_ISO8859_2,
214 /* U+0155 */ LEADING_BYTE_LATIN_ISO8859_2,
215 /* U+0156 */ LEADING_BYTE_LATIN_ISO8859_4,
216 /* U+0157 */ LEADING_BYTE_LATIN_ISO8859_4,
217 /* U+0158 */ LEADING_BYTE_LATIN_ISO8859_2,
218 /* U+0159 */ LEADING_BYTE_LATIN_ISO8859_2,
219 /* U+015A */ LEADING_BYTE_LATIN_ISO8859_2,
220 /* U+015B */ LEADING_BYTE_LATIN_ISO8859_2,
221 /* U+015C */ LEADING_BYTE_LATIN_ISO8859_3,
222 /* U+015D */ LEADING_BYTE_LATIN_ISO8859_3,
223 /* U+015E */ LEADING_BYTE_LATIN_ISO8859_2,
224 /* U+015F */ LEADING_BYTE_LATIN_ISO8859_2,
225 /* U+0160 */ LEADING_BYTE_LATIN_ISO8859_2,
226 /* U+0161 */ LEADING_BYTE_LATIN_ISO8859_2,
227 /* U+0162 */ LEADING_BYTE_LATIN_ISO8859_2,
228 /* U+0163 */ LEADING_BYTE_LATIN_ISO8859_2,
229 /* U+0164 */ LEADING_BYTE_LATIN_ISO8859_2,
230 /* U+0165 */ LEADING_BYTE_LATIN_ISO8859_2,
231 /* U+0166 */ LEADING_BYTE_LATIN_ISO8859_4,
232 /* U+0167 */ LEADING_BYTE_LATIN_ISO8859_4,
233 /* U+0168 */ LEADING_BYTE_LATIN_ISO8859_4,
234 /* U+0169 */ LEADING_BYTE_LATIN_ISO8859_4,
235 /* U+016A */ LEADING_BYTE_LATIN_ISO8859_4,
236 /* U+016B */ LEADING_BYTE_LATIN_ISO8859_4,
237 /* U+016C */ LEADING_BYTE_LATIN_ISO8859_3,
238 /* U+016D */ LEADING_BYTE_LATIN_ISO8859_3,
239 /* U+016E */ LEADING_BYTE_LATIN_ISO8859_2,
240 /* U+016F */ LEADING_BYTE_LATIN_ISO8859_2,
241 /* U+0170 */ LEADING_BYTE_LATIN_ISO8859_2,
242 /* U+0171 */ LEADING_BYTE_LATIN_ISO8859_2,
243 /* U+0172 */ LEADING_BYTE_LATIN_ISO8859_4,
244 /* U+0173 */ LEADING_BYTE_LATIN_ISO8859_4,
245 /* U+0174 */ LEADING_BYTE_JAPANESE_JISX0212,
246 /* U+0175 */ LEADING_BYTE_JAPANESE_JISX0212,
247 /* U+0176 */ LEADING_BYTE_JAPANESE_JISX0212,
248 /* U+0177 */ LEADING_BYTE_JAPANESE_JISX0212,
249 /* U+0178 */ LEADING_BYTE_JAPANESE_JISX0212,
250 /* U+0179 */ LEADING_BYTE_LATIN_ISO8859_2,
251 /* U+017A */ LEADING_BYTE_LATIN_ISO8859_2,
252 /* U+017B */ LEADING_BYTE_LATIN_ISO8859_2,
253 /* U+017C */ LEADING_BYTE_LATIN_ISO8859_2,
254 /* U+017D */ LEADING_BYTE_LATIN_ISO8859_2,
255 /* U+017E */ LEADING_BYTE_LATIN_ISO8859_2,
256 /* U+017F */ LEADING_BYTE_UCS_BMP
259 unsigned char latin_a_char_to_byte1[128] = {
260 /* U+0100 */ 0xC0 - 0x80,
261 /* U+0101 */ 0xE0 - 0x80,
262 /* U+0102 */ 0xC3 - 0x80,
263 /* U+0103 */ 0xE3 - 0x80,
264 /* U+0104 */ 0xA1 - 0x80,
265 /* U+0105 */ 0xB1 - 0x80,
266 /* U+0106 */ 0xC6 - 0x80,
267 /* U+0107 */ 0xE6 - 0x80,
268 /* U+0108 */ 0xC6 - 0x80,
269 /* U+0109 */ 0xE6 - 0x80,
270 /* U+010A */ 0xC5 - 0x80,
271 /* U+010B */ 0xE5 - 0x80,
272 /* U+010C */ 0xC8 - 0x80,
273 /* U+010D */ 0xE8 - 0x80,
274 /* U+010E */ 0xCF - 0x80,
275 /* U+010F */ 0xEF - 0x80,
276 /* U+0110 */ 0xD0 - 0x80,
277 /* U+0111 */ 0xF0 - 0x80,
278 /* U+0112 */ 0xAA - 0x80,
279 /* U+0113 */ 0xBA - 0x80,
282 /* U+0116 */ 0xCC - 0x80,
283 /* U+0117 */ 0xEC - 0x80,
284 /* U+0118 */ 0xCA - 0x80,
285 /* U+0119 */ 0xEA - 0x80,
286 /* U+011A */ 0xCC - 0x80,
287 /* U+011B */ 0xEC - 0x80,
288 /* U+011C */ 0xD8 - 0x80,
289 /* U+011D */ 0xF8 - 0x80,
290 /* U+011E */ 0xAB - 0x80,
291 /* U+011F */ 0xBB - 0x80,
292 /* U+0120 */ 0xD5 - 0x80,
293 /* U+0121 */ 0xF5 - 0x80,
294 /* U+0122 */ 0xAB - 0x80,
295 /* U+0123 */ 0xBB - 0x80,
296 /* U+0124 */ 0xA6 - 0x80,
297 /* U+0125 */ 0xB6 - 0x80,
298 /* U+0126 */ 0xA1 - 0x80,
299 /* U+0127 */ 0xB1 - 0x80,
300 /* U+0128 */ 0xA5 - 0x80,
301 /* U+0129 */ 0xB5 - 0x80,
302 /* U+012A */ 0xCF - 0x80,
303 /* U+012B */ 0xEF - 0x80,
306 /* U+012E */ 0xC7 - 0x80,
307 /* U+012F */ 0xE7 - 0x80,
308 /* U+0130 */ 0xA9 - 0x80,
309 /* U+0131 */ 0xB9 - 0x80,
312 /* U+0134 */ 0xAC - 0x80,
313 /* U+0135 */ 0xBC - 0x80,
314 /* U+0136 */ 0xD3 - 0x80,
315 /* U+0137 */ 0xF3 - 0x80,
316 /* U+0138 */ 0xA2 - 0x80,
317 /* U+0139 */ 0xC5 - 0x80,
318 /* U+013A */ 0xE5 - 0x80,
319 /* U+013B */ 0xA6 - 0x80,
320 /* U+013C */ 0xB6 - 0x80,
321 /* U+013D */ 0xA5 - 0x80,
322 /* U+013E */ 0xB5 - 0x80,
325 /* U+0141 */ 0xA3 - 0x80,
326 /* U+0142 */ 0xB3 - 0x80,
327 /* U+0143 */ 0xD1 - 0x80,
328 /* U+0144 */ 0xF1 - 0x80,
329 /* U+0145 */ 0xD1 - 0x80,
330 /* U+0146 */ 0xF1 - 0x80,
331 /* U+0147 */ 0xD2 - 0x80,
332 /* U+0148 */ 0xF2 - 0x80,
334 /* U+014A */ 0xBD - 0x80,
335 /* U+014B */ 0xBF - 0x80,
336 /* U+014C */ 0xD2 - 0x80,
337 /* U+014D */ 0xF2 - 0x80,
340 /* U+0150 */ 0xD5 - 0x80,
341 /* U+0151 */ 0xF5 - 0x80,
344 /* U+0154 */ 0xC0 - 0x80,
345 /* U+0155 */ 0xE0 - 0x80,
346 /* U+0156 */ 0xA3 - 0x80,
347 /* U+0157 */ 0xB3 - 0x80,
348 /* U+0158 */ 0xD8 - 0x80,
349 /* U+0159 */ 0xF8 - 0x80,
350 /* U+015A */ 0xA6 - 0x80,
351 /* U+015B */ 0xB6 - 0x80,
352 /* U+015C */ 0xDE - 0x80,
353 /* U+015D */ 0xFE - 0x80,
354 /* U+015E */ 0xAA - 0x80,
355 /* U+015F */ 0xBA - 0x80,
356 /* U+0160 */ 0xA9 - 0x80,
357 /* U+0161 */ 0xB9 - 0x80,
358 /* U+0162 */ 0xDE - 0x80,
359 /* U+0163 */ 0xFE - 0x80,
360 /* U+0164 */ 0xAB - 0x80,
361 /* U+0165 */ 0xBB - 0x80,
362 /* U+0166 */ 0xAC - 0x80,
363 /* U+0167 */ 0xBC - 0x80,
364 /* U+0168 */ 0xDD - 0x80,
365 /* U+0169 */ 0xFD - 0x80,
366 /* U+016A */ 0xDE - 0x80,
367 /* U+016B */ 0xFE - 0x80,
368 /* U+016C */ 0xDD - 0x80,
369 /* U+016D */ 0xFD - 0x80,
370 /* U+016E */ 0xD9 - 0x80,
371 /* U+016F */ 0xF9 - 0x80,
372 /* U+0170 */ 0xDB - 0x80,
373 /* U+0171 */ 0xFB - 0x80,
374 /* U+0172 */ 0xD9 - 0x80,
375 /* U+0173 */ 0xF9 - 0x80,
381 /* U+0179 */ 0xAC - 0x80,
382 /* U+017A */ 0xBC - 0x80,
383 /* U+017B */ 0xAF - 0x80,
384 /* U+017C */ 0xBF - 0x80,
385 /* U+017D */ 0xAE - 0x80,
386 /* U+017E */ 0xBE - 0x80,
390 unsigned char latin_a_char_to_byte2[128] = {
521 Lisp_Object Vutf_2000_version;
525 int leading_code_private_11;
528 Lisp_Object Qcharsetp;
530 /* Qdoc_string, Qdimension, Qchars defined in general.c */
531 Lisp_Object Qregistry, Qfinal, Qgraphic;
532 Lisp_Object Qdirection;
533 Lisp_Object Qreverse_direction_charset;
534 Lisp_Object Qleading_byte;
535 Lisp_Object Qshort_name, Qlong_name;
551 Qjapanese_jisx0208_1978,
570 Lisp_Object Ql2r, Qr2l;
572 Lisp_Object Vcharset_hash_table;
574 static Charset_ID next_allocated_1_byte_leading_byte;
575 static Charset_ID next_allocated_2_byte_leading_byte;
577 /* Composite characters are characters constructed by overstriking two
578 or more regular characters.
580 1) The old Mule implementation involves storing composite characters
581 in a buffer as a tag followed by all of the actual characters
582 used to make up the composite character. I think this is a bad
583 idea; it greatly complicates code that wants to handle strings
584 one character at a time because it has to deal with the possibility
585 of great big ungainly characters. It's much more reasonable to
586 simply store an index into a table of composite characters.
588 2) The current implementation only allows for 16,384 separate
589 composite characters over the lifetime of the XEmacs process.
590 This could become a potential problem if the user
591 edited lots of different files that use composite characters.
592 Due to FSF bogosity, increasing the number of allowable
593 composite characters under Mule would decrease the number
594 of possible faces that can exist. Mule already has shrunk
595 this to 2048, and further shrinkage would become uncomfortable.
596 No such problems exist in XEmacs.
598 Composite characters could be represented as 0x80 C1 C2 C3,
599 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
600 for slightly under 2^20 (one million) composite characters
601 over the XEmacs process lifetime, and you only need to
602 increase the size of a Mule character from 19 to 21 bits.
603 Or you could use 0x80 C1 C2 C3 C4, allowing for about
604 85 million (slightly over 2^26) composite characters. */
607 /************************************************************************/
608 /* Basic Emchar functions */
609 /************************************************************************/
611 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
612 string in STR. Returns the number of bytes stored.
613 Do not call this directly. Use the macro set_charptr_emchar() instead.
617 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
632 else if ( c <= 0x7ff )
634 *p++ = (c >> 6) | 0xc0;
635 *p++ = (c & 0x3f) | 0x80;
637 else if ( c <= 0xffff )
639 *p++ = (c >> 12) | 0xe0;
640 *p++ = ((c >> 6) & 0x3f) | 0x80;
641 *p++ = (c & 0x3f) | 0x80;
643 else if ( c <= 0x1fffff )
645 *p++ = (c >> 18) | 0xf0;
646 *p++ = ((c >> 12) & 0x3f) | 0x80;
647 *p++ = ((c >> 6) & 0x3f) | 0x80;
648 *p++ = (c & 0x3f) | 0x80;
650 else if ( c <= 0x3ffffff )
652 *p++ = (c >> 24) | 0xf8;
653 *p++ = ((c >> 18) & 0x3f) | 0x80;
654 *p++ = ((c >> 12) & 0x3f) | 0x80;
655 *p++ = ((c >> 6) & 0x3f) | 0x80;
656 *p++ = (c & 0x3f) | 0x80;
660 *p++ = (c >> 30) | 0xfc;
661 *p++ = ((c >> 24) & 0x3f) | 0x80;
662 *p++ = ((c >> 18) & 0x3f) | 0x80;
663 *p++ = ((c >> 12) & 0x3f) | 0x80;
664 *p++ = ((c >> 6) & 0x3f) | 0x80;
665 *p++ = (c & 0x3f) | 0x80;
668 BREAKUP_CHAR (c, charset, c1, c2);
669 lb = CHAR_LEADING_BYTE (c);
670 if (LEADING_BYTE_PRIVATE_P (lb))
671 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
673 if (EQ (charset, Vcharset_control_1))
682 /* Return the first character from a Mule-encoded string in STR,
683 assuming it's non-ASCII. Do not call this directly.
684 Use the macro charptr_emchar() instead. */
687 non_ascii_charptr_emchar (CONST Bufbyte *str)
700 else if ( b >= 0xf8 )
705 else if ( b >= 0xf0 )
710 else if ( b >= 0xe0 )
715 else if ( b >= 0xc0 )
725 for( ; len > 0; len-- )
728 ch = ( ch << 6 ) | ( b & 0x3f );
732 Bufbyte i0 = *str, i1, i2 = 0;
735 if (i0 == LEADING_BYTE_CONTROL_1)
736 return (Emchar) (*++str - 0x20);
738 if (LEADING_BYTE_PREFIX_P (i0))
743 charset = CHARSET_BY_LEADING_BYTE (i0);
744 if (XCHARSET_DIMENSION (charset) == 2)
747 return MAKE_CHAR (charset, i1, i2);
751 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
752 Do not call this directly. Use the macro valid_char_p() instead. */
756 non_ascii_valid_char_p (Emchar ch)
760 /* Must have only lowest 19 bits set */
764 f1 = CHAR_FIELD1 (ch);
765 f2 = CHAR_FIELD2 (ch);
766 f3 = CHAR_FIELD3 (ch);
772 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
773 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
774 f2 > MAX_CHAR_FIELD2_PRIVATE)
779 if (f3 != 0x20 && f3 != 0x7F)
783 NOTE: This takes advantage of the fact that
784 FIELD2_TO_OFFICIAL_LEADING_BYTE and
785 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
787 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
788 return (XCHARSET_CHARS (charset) == 96);
794 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
795 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
796 f1 > MAX_CHAR_FIELD1_PRIVATE)
798 if (f2 < 0x20 || f3 < 0x20)
801 #ifdef ENABLE_COMPOSITE_CHARS
802 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
804 if (UNBOUNDP (Fgethash (make_int (ch),
805 Vcomposite_char_char2string_hash_table,
810 #endif /* ENABLE_COMPOSITE_CHARS */
812 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
815 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
817 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
820 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
822 return (XCHARSET_CHARS (charset) == 96);
828 /************************************************************************/
829 /* Basic string functions */
830 /************************************************************************/
832 /* Copy the character pointed to by PTR into STR, assuming it's
833 non-ASCII. Do not call this directly. Use the macro
834 charptr_copy_char() instead. */
837 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
839 Bufbyte *strptr = str;
841 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
843 /* Notice fallthrough. */
845 case 6: *++strptr = *ptr++;
846 case 5: *++strptr = *ptr++;
848 case 4: *++strptr = *ptr++;
849 case 3: *++strptr = *ptr++;
850 case 2: *++strptr = *ptr;
855 return strptr + 1 - str;
859 /************************************************************************/
860 /* streams of Emchars */
861 /************************************************************************/
863 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
864 The functions below are not meant to be called directly; use
865 the macros in insdel.h. */
868 Lstream_get_emchar_1 (Lstream *stream, int ch)
870 Bufbyte str[MAX_EMCHAR_LEN];
871 Bufbyte *strptr = str;
873 str[0] = (Bufbyte) ch;
874 switch (REP_BYTES_BY_FIRST_BYTE (ch))
876 /* Notice fallthrough. */
879 ch = Lstream_getc (stream);
881 *++strptr = (Bufbyte) ch;
883 ch = Lstream_getc (stream);
885 *++strptr = (Bufbyte) ch;
888 ch = Lstream_getc (stream);
890 *++strptr = (Bufbyte) ch;
892 ch = Lstream_getc (stream);
894 *++strptr = (Bufbyte) ch;
896 ch = Lstream_getc (stream);
898 *++strptr = (Bufbyte) ch;
903 return charptr_emchar (str);
907 Lstream_fput_emchar (Lstream *stream, Emchar ch)
909 Bufbyte str[MAX_EMCHAR_LEN];
910 Bytecount len = set_charptr_emchar (str, ch);
911 return Lstream_write (stream, str, len);
915 Lstream_funget_emchar (Lstream *stream, Emchar ch)
917 Bufbyte str[MAX_EMCHAR_LEN];
918 Bytecount len = set_charptr_emchar (str, ch);
919 Lstream_unread (stream, str, len);
923 /************************************************************************/
925 /************************************************************************/
928 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
930 struct Lisp_Charset *cs = XCHARSET (obj);
932 markobj (cs->short_name);
933 markobj (cs->long_name);
934 markobj (cs->doc_string);
935 markobj (cs->registry);
936 markobj (cs->ccl_program);
941 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
943 struct Lisp_Charset *cs = XCHARSET (obj);
947 error ("printing unreadable object #<charset %s 0x%x>",
948 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
951 write_c_string ("#<charset ", printcharfun);
952 print_internal (CHARSET_NAME (cs), printcharfun, 0);
953 write_c_string (" ", printcharfun);
954 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
955 write_c_string (" ", printcharfun);
956 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
957 write_c_string (" ", printcharfun);
958 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
959 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
960 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
961 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
962 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
964 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
965 CHARSET_COLUMNS (cs),
966 CHARSET_GRAPHIC (cs),
968 write_c_string (buf, printcharfun);
969 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
970 sprintf (buf, " 0x%x>", cs->header.uid);
971 write_c_string (buf, printcharfun);
974 static const struct lrecord_description charset_description[] = {
975 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
979 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
980 mark_charset, print_charset, 0, 0, 0, charset_description,
981 struct Lisp_Charset);
982 /* Make a new charset. */
985 make_charset (Charset_ID id, Lisp_Object name, unsigned char rep_bytes,
986 unsigned char type, unsigned char columns, unsigned char graphic,
987 Bufbyte final, unsigned char direction, Lisp_Object short_name,
988 Lisp_Object long_name, Lisp_Object doc,
992 struct Lisp_Charset *cs =
993 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
994 XSETCHARSET (obj, cs);
996 CHARSET_ID (cs) = id;
997 CHARSET_NAME (cs) = name;
998 CHARSET_SHORT_NAME (cs) = short_name;
999 CHARSET_LONG_NAME (cs) = long_name;
1000 CHARSET_REP_BYTES (cs) = rep_bytes;
1001 CHARSET_DIRECTION (cs) = direction;
1002 CHARSET_TYPE (cs) = type;
1003 CHARSET_COLUMNS (cs) = columns;
1004 CHARSET_GRAPHIC (cs) = graphic;
1005 CHARSET_FINAL (cs) = final;
1006 CHARSET_DOC_STRING (cs) = doc;
1007 CHARSET_REGISTRY (cs) = reg;
1008 CHARSET_CCL_PROGRAM (cs) = Qnil;
1009 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1011 switch ( CHARSET_TYPE (cs) )
1013 case CHARSET_TYPE_94:
1014 CHARSET_DIMENSION (cs) = 1;
1015 CHARSET_CHARS (cs) = 94;
1017 case CHARSET_TYPE_96:
1018 CHARSET_DIMENSION (cs) = 1;
1019 CHARSET_CHARS (cs) = 96;
1021 case CHARSET_TYPE_94X94:
1022 CHARSET_DIMENSION (cs) = 2;
1023 CHARSET_CHARS (cs) = 94;
1025 case CHARSET_TYPE_96X96:
1026 CHARSET_DIMENSION (cs) = 2;
1027 CHARSET_CHARS (cs) = 96;
1030 case CHARSET_TYPE_128X128:
1031 CHARSET_DIMENSION (cs) = 2;
1032 CHARSET_CHARS (cs) = 128;
1034 case CHARSET_TYPE_256X256:
1035 CHARSET_DIMENSION (cs) = 2;
1036 CHARSET_CHARS (cs) = 256;
1043 /* some charsets do not have final characters. This includes
1044 ASCII, Control-1, Composite, and the two faux private
1047 assert (NILP (charset_by_attributes[type][final]));
1048 charset_by_attributes[type][final] = obj;
1050 assert (NILP (charset_by_attributes[type][final][direction]));
1051 charset_by_attributes[type][final][direction] = obj;
1055 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1056 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1059 /* official leading byte */
1060 rep_bytes_by_first_byte[id] = rep_bytes;
1063 /* Some charsets are "faux" and don't have names or really exist at
1064 all except in the leading-byte table. */
1066 Fputhash (name, obj, Vcharset_hash_table);
1071 get_unallocated_leading_byte (int dimension)
1077 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1080 lb = next_allocated_1_byte_leading_byte++;
1084 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1087 lb = next_allocated_2_byte_leading_byte++;
1092 ("No more character sets free for this dimension",
1093 make_int (dimension));
1099 /************************************************************************/
1100 /* Basic charset Lisp functions */
1101 /************************************************************************/
1103 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1104 Return non-nil if OBJECT is a charset.
1108 return CHARSETP (object) ? Qt : Qnil;
1111 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1112 Retrieve the charset of the given name.
1113 If CHARSET-OR-NAME is a charset object, it is simply returned.
1114 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1115 nil is returned. Otherwise the associated charset object is returned.
1119 if (CHARSETP (charset_or_name))
1120 return charset_or_name;
1122 CHECK_SYMBOL (charset_or_name);
1123 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1126 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1127 Retrieve the charset of the given name.
1128 Same as `find-charset' except an error is signalled if there is no such
1129 charset instead of returning nil.
1133 Lisp_Object charset = Ffind_charset (name);
1136 signal_simple_error ("No such charset", name);
1140 /* We store the charsets in hash tables with the names as the key and the
1141 actual charset object as the value. Occasionally we need to use them
1142 in a list format. These routines provide us with that. */
1143 struct charset_list_closure
1145 Lisp_Object *charset_list;
1149 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1150 void *charset_list_closure)
1152 /* This function can GC */
1153 struct charset_list_closure *chcl =
1154 (struct charset_list_closure*) charset_list_closure;
1155 Lisp_Object *charset_list = chcl->charset_list;
1157 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1161 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1162 Return a list of the names of all defined charsets.
1166 Lisp_Object charset_list = Qnil;
1167 struct gcpro gcpro1;
1168 struct charset_list_closure charset_list_closure;
1170 GCPRO1 (charset_list);
1171 charset_list_closure.charset_list = &charset_list;
1172 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1173 &charset_list_closure);
1176 return charset_list;
1179 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1180 Return the name of the given charset.
1184 return XCHARSET_NAME (Fget_charset (charset));
1187 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1188 Define a new character set.
1189 This function is for use with Mule support.
1190 NAME is a symbol, the name by which the character set is normally referred.
1191 DOC-STRING is a string describing the character set.
1192 PROPS is a property list, describing the specific nature of the
1193 character set. Recognized properties are:
1195 'short-name Short version of the charset name (ex: Latin-1)
1196 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1197 'registry A regular expression matching the font registry field for
1199 'dimension Number of octets used to index a character in this charset.
1200 Either 1 or 2. Defaults to 1.
1201 'columns Number of columns used to display a character in this charset.
1202 Only used in TTY mode. (Under X, the actual width of a
1203 character can be derived from the font used to display the
1204 characters.) If unspecified, defaults to the dimension
1205 (this is almost always the correct value).
1206 'chars Number of characters in each dimension (94 or 96).
1207 Defaults to 94. Note that if the dimension is 2, the
1208 character set thus described is 94x94 or 96x96.
1209 'final Final byte of ISO 2022 escape sequence. Must be
1210 supplied. Each combination of (DIMENSION, CHARS) defines a
1211 separate namespace for final bytes. Note that ISO
1212 2022 restricts the final byte to the range
1213 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1214 dimension == 2. Note also that final bytes in the range
1215 0x30 - 0x3F are reserved for user-defined (not official)
1217 'graphic 0 (use left half of font on output) or 1 (use right half
1218 of font on output). Defaults to 0. For example, for
1219 a font whose registry is ISO8859-1, the left half
1220 (octets 0x20 - 0x7F) is the `ascii' character set, while
1221 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1222 character set. With 'graphic set to 0, the octets
1223 will have their high bit cleared; with it set to 1,
1224 the octets will have their high bit set.
1225 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1227 'ccl-program A compiled CCL program used to convert a character in
1228 this charset into an index into the font. This is in
1229 addition to the 'graphic property. The CCL program
1230 is passed the octets of the character, with the high
1231 bit cleared and set depending upon whether the value
1232 of the 'graphic property is 0 or 1.
1234 (name, doc_string, props))
1236 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1237 int direction = CHARSET_LEFT_TO_RIGHT;
1239 Lisp_Object registry = Qnil;
1240 Lisp_Object charset;
1241 Lisp_Object rest, keyword, value;
1242 Lisp_Object ccl_program = Qnil;
1243 Lisp_Object short_name = Qnil, long_name = Qnil;
1245 CHECK_SYMBOL (name);
1246 if (!NILP (doc_string))
1247 CHECK_STRING (doc_string);
1249 charset = Ffind_charset (name);
1250 if (!NILP (charset))
1251 signal_simple_error ("Cannot redefine existing charset", name);
1253 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1255 if (EQ (keyword, Qshort_name))
1257 CHECK_STRING (value);
1261 if (EQ (keyword, Qlong_name))
1263 CHECK_STRING (value);
1267 else if (EQ (keyword, Qdimension))
1270 dimension = XINT (value);
1271 if (dimension < 1 || dimension > 2)
1272 signal_simple_error ("Invalid value for 'dimension", value);
1275 else if (EQ (keyword, Qchars))
1278 chars = XINT (value);
1279 if (chars != 94 && chars != 96)
1280 signal_simple_error ("Invalid value for 'chars", value);
1283 else if (EQ (keyword, Qcolumns))
1286 columns = XINT (value);
1287 if (columns != 1 && columns != 2)
1288 signal_simple_error ("Invalid value for 'columns", value);
1291 else if (EQ (keyword, Qgraphic))
1294 graphic = XINT (value);
1295 if (graphic < 0 || graphic > 1)
1296 signal_simple_error ("Invalid value for 'graphic", value);
1299 else if (EQ (keyword, Qregistry))
1301 CHECK_STRING (value);
1305 else if (EQ (keyword, Qdirection))
1307 if (EQ (value, Ql2r))
1308 direction = CHARSET_LEFT_TO_RIGHT;
1309 else if (EQ (value, Qr2l))
1310 direction = CHARSET_RIGHT_TO_LEFT;
1312 signal_simple_error ("Invalid value for 'direction", value);
1315 else if (EQ (keyword, Qfinal))
1317 CHECK_CHAR_COERCE_INT (value);
1318 final = XCHAR (value);
1319 if (final < '0' || final > '~')
1320 signal_simple_error ("Invalid value for 'final", value);
1323 else if (EQ (keyword, Qccl_program))
1325 CHECK_VECTOR (value);
1326 ccl_program = value;
1330 signal_simple_error ("Unrecognized property", keyword);
1334 error ("'final must be specified");
1335 if (dimension == 2 && final > 0x5F)
1337 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1341 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1343 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1345 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1346 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1348 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1355 /* id = CHARSET_ID_OFFSET_94 + final; */
1356 id = get_unallocated_leading_byte (dimension);
1358 else if (chars == 96)
1360 id = get_unallocated_leading_byte (dimension);
1367 else if (dimension == 2)
1371 id = get_unallocated_leading_byte (dimension);
1373 else if (chars == 96)
1375 id = get_unallocated_leading_byte (dimension);
1387 id = get_unallocated_leading_byte (dimension);
1390 if (NILP (doc_string))
1391 doc_string = build_string ("");
1393 if (NILP (registry))
1394 registry = build_string ("");
1396 if (NILP (short_name))
1397 XSETSTRING (short_name, XSYMBOL (name)->name);
1399 if (NILP (long_name))
1400 long_name = doc_string;
1403 columns = dimension;
1404 charset = make_charset (id, name, dimension + 2, type, columns, graphic,
1405 final, direction, short_name, long_name, doc_string, registry);
1406 if (!NILP (ccl_program))
1407 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1411 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1413 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1414 NEW-NAME is the name of the new charset. Return the new charset.
1416 (charset, new_name))
1418 Lisp_Object new_charset = Qnil;
1419 int id, dimension, columns, graphic, final;
1420 int direction, type;
1421 Lisp_Object registry, doc_string, short_name, long_name;
1422 struct Lisp_Charset *cs;
1424 charset = Fget_charset (charset);
1425 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1426 signal_simple_error ("Charset already has reverse-direction charset",
1429 CHECK_SYMBOL (new_name);
1430 if (!NILP (Ffind_charset (new_name)))
1431 signal_simple_error ("Cannot redefine existing charset", new_name);
1433 cs = XCHARSET (charset);
1435 type = CHARSET_TYPE (cs);
1436 columns = CHARSET_COLUMNS (cs);
1437 dimension = CHARSET_DIMENSION (cs);
1438 id = get_unallocated_leading_byte (dimension);
1440 graphic = CHARSET_GRAPHIC (cs);
1441 final = CHARSET_FINAL (cs);
1442 direction = CHARSET_RIGHT_TO_LEFT;
1443 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1444 direction = CHARSET_LEFT_TO_RIGHT;
1445 doc_string = CHARSET_DOC_STRING (cs);
1446 short_name = CHARSET_SHORT_NAME (cs);
1447 long_name = CHARSET_LONG_NAME (cs);
1448 registry = CHARSET_REGISTRY (cs);
1450 new_charset = make_charset (id, new_name, dimension + 2, type, columns,
1451 graphic, final, direction, short_name, long_name,
1452 doc_string, registry);
1454 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1455 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1460 /* #### Reverse direction charsets not yet implemented. */
1462 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1464 Return the reverse-direction charset parallel to CHARSET, if any.
1465 This is the charset with the same properties (in particular, the same
1466 dimension, number of characters per dimension, and final byte) as
1467 CHARSET but whose characters are displayed in the opposite direction.
1471 charset = Fget_charset (charset);
1472 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1476 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1477 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1478 If DIRECTION is omitted, both directions will be checked (left-to-right
1479 will be returned if character sets exist for both directions).
1481 (dimension, chars, final, direction))
1483 int dm, ch, fi, di = -1;
1485 Lisp_Object obj = Qnil;
1487 CHECK_INT (dimension);
1488 dm = XINT (dimension);
1489 if (dm < 1 || dm > 2)
1490 signal_simple_error ("Invalid value for DIMENSION", dimension);
1494 if (ch != 94 && ch != 96)
1495 signal_simple_error ("Invalid value for CHARS", chars);
1497 CHECK_CHAR_COERCE_INT (final);
1499 if (fi < '0' || fi > '~')
1500 signal_simple_error ("Invalid value for FINAL", final);
1502 if (EQ (direction, Ql2r))
1503 di = CHARSET_LEFT_TO_RIGHT;
1504 else if (EQ (direction, Qr2l))
1505 di = CHARSET_RIGHT_TO_LEFT;
1506 else if (!NILP (direction))
1507 signal_simple_error ("Invalid value for DIRECTION", direction);
1509 if (dm == 2 && fi > 0x5F)
1511 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1514 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1516 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1520 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1522 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1525 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1528 return XCHARSET_NAME (obj);
1532 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1533 Return short name of CHARSET.
1537 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1540 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1541 Return long name of CHARSET.
1545 return XCHARSET_LONG_NAME (Fget_charset (charset));
1548 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1549 Return description of CHARSET.
1553 return XCHARSET_DOC_STRING (Fget_charset (charset));
1556 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1557 Return dimension of CHARSET.
1561 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1564 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1565 Return property PROP of CHARSET.
1566 Recognized properties are those listed in `make-charset', as well as
1567 'name and 'doc-string.
1571 struct Lisp_Charset *cs;
1573 charset = Fget_charset (charset);
1574 cs = XCHARSET (charset);
1576 CHECK_SYMBOL (prop);
1577 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1578 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1579 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1580 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1581 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1582 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1583 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1584 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1585 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1586 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1587 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1588 if (EQ (prop, Qdirection))
1589 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1590 if (EQ (prop, Qreverse_direction_charset))
1592 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1596 return XCHARSET_NAME (obj);
1598 signal_simple_error ("Unrecognized charset property name", prop);
1599 return Qnil; /* not reached */
1602 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1603 Return charset identification number of CHARSET.
1607 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1610 /* #### We need to figure out which properties we really want to
1613 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1614 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1616 (charset, ccl_program))
1618 charset = Fget_charset (charset);
1619 CHECK_VECTOR (ccl_program);
1620 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1625 invalidate_charset_font_caches (Lisp_Object charset)
1627 /* Invalidate font cache entries for charset on all devices. */
1628 Lisp_Object devcons, concons, hash_table;
1629 DEVICE_LOOP_NO_BREAK (devcons, concons)
1631 struct device *d = XDEVICE (XCAR (devcons));
1632 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1633 if (!UNBOUNDP (hash_table))
1634 Fclrhash (hash_table);
1638 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
1639 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1640 Set the 'registry property of CHARSET to REGISTRY.
1642 (charset, registry))
1644 charset = Fget_charset (charset);
1645 CHECK_STRING (registry);
1646 XCHARSET_REGISTRY (charset) = registry;
1647 invalidate_charset_font_caches (charset);
1648 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1653 /************************************************************************/
1654 /* Lisp primitives for working with characters */
1655 /************************************************************************/
1657 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1658 Make a character from CHARSET and octets ARG1 and ARG2.
1659 ARG2 is required only for characters from two-dimensional charsets.
1660 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1661 character s with caron.
1663 (charset, arg1, arg2))
1665 struct Lisp_Charset *cs;
1667 int lowlim, highlim;
1669 charset = Fget_charset (charset);
1670 cs = XCHARSET (charset);
1672 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1673 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1675 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
1677 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1678 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1681 /* It is useful (and safe, according to Olivier Galibert) to strip
1682 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1683 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1684 Latin 2 code of the character. */
1692 if (a1 < lowlim || a1 > highlim)
1693 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1695 if (CHARSET_DIMENSION (cs) == 1)
1699 ("Charset is of dimension one; second octet must be nil", arg2);
1700 return make_char (MAKE_CHAR (charset, a1, 0));
1709 a2 = XINT (arg2) & 0x7f;
1711 if (a2 < lowlim || a2 > highlim)
1712 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1714 return make_char (MAKE_CHAR (charset, a1, a2));
1717 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1718 Return the character set of char CH.
1722 CHECK_CHAR_COERCE_INT (ch);
1724 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
1727 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1728 Return list of charset and one or two position-codes of CHAR.
1732 /* This function can GC */
1733 struct gcpro gcpro1, gcpro2;
1734 Lisp_Object charset = Qnil;
1735 Lisp_Object rc = Qnil;
1738 GCPRO2 (charset, rc);
1739 CHECK_CHAR_COERCE_INT (character);
1741 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1743 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1745 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1749 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1757 #ifdef ENABLE_COMPOSITE_CHARS
1758 /************************************************************************/
1759 /* composite character functions */
1760 /************************************************************************/
1763 lookup_composite_char (Bufbyte *str, int len)
1765 Lisp_Object lispstr = make_string (str, len);
1766 Lisp_Object ch = Fgethash (lispstr,
1767 Vcomposite_char_string2char_hash_table,
1773 if (composite_char_row_next >= 128)
1774 signal_simple_error ("No more composite chars available", lispstr);
1775 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1776 composite_char_col_next);
1777 Fputhash (make_char (emch), lispstr,
1778 Vcomposite_char_char2string_hash_table);
1779 Fputhash (lispstr, make_char (emch),
1780 Vcomposite_char_string2char_hash_table);
1781 composite_char_col_next++;
1782 if (composite_char_col_next >= 128)
1784 composite_char_col_next = 32;
1785 composite_char_row_next++;
1794 composite_char_string (Emchar ch)
1796 Lisp_Object str = Fgethash (make_char (ch),
1797 Vcomposite_char_char2string_hash_table,
1799 assert (!UNBOUNDP (str));
1803 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1804 Convert a string into a single composite character.
1805 The character is the result of overstriking all the characters in
1810 CHECK_STRING (string);
1811 return make_char (lookup_composite_char (XSTRING_DATA (string),
1812 XSTRING_LENGTH (string)));
1815 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1816 Return a string of the characters comprising a composite character.
1824 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1825 signal_simple_error ("Must be composite char", ch);
1826 return composite_char_string (emch);
1828 #endif /* ENABLE_COMPOSITE_CHARS */
1831 /************************************************************************/
1832 /* initialization */
1833 /************************************************************************/
1836 syms_of_mule_charset (void)
1838 DEFSUBR (Fcharsetp);
1839 DEFSUBR (Ffind_charset);
1840 DEFSUBR (Fget_charset);
1841 DEFSUBR (Fcharset_list);
1842 DEFSUBR (Fcharset_name);
1843 DEFSUBR (Fmake_charset);
1844 DEFSUBR (Fmake_reverse_direction_charset);
1845 /* DEFSUBR (Freverse_direction_charset); */
1846 DEFSUBR (Fcharset_from_attributes);
1847 DEFSUBR (Fcharset_short_name);
1848 DEFSUBR (Fcharset_long_name);
1849 DEFSUBR (Fcharset_description);
1850 DEFSUBR (Fcharset_dimension);
1851 DEFSUBR (Fcharset_property);
1852 DEFSUBR (Fcharset_id);
1853 DEFSUBR (Fset_charset_ccl_program);
1854 DEFSUBR (Fset_charset_registry);
1856 DEFSUBR (Fmake_char);
1857 DEFSUBR (Fchar_charset);
1858 DEFSUBR (Fsplit_char);
1860 #ifdef ENABLE_COMPOSITE_CHARS
1861 DEFSUBR (Fmake_composite_char);
1862 DEFSUBR (Fcomposite_char_string);
1865 defsymbol (&Qcharsetp, "charsetp");
1866 defsymbol (&Qregistry, "registry");
1867 defsymbol (&Qfinal, "final");
1868 defsymbol (&Qgraphic, "graphic");
1869 defsymbol (&Qdirection, "direction");
1870 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1871 defsymbol (&Qshort_name, "short-name");
1872 defsymbol (&Qlong_name, "long-name");
1874 defsymbol (&Ql2r, "l2r");
1875 defsymbol (&Qr2l, "r2l");
1877 /* Charsets, compatible with FSF 20.3
1878 Naming convention is Script-Charset[-Edition] */
1879 defsymbol (&Qascii, "ascii");
1880 defsymbol (&Qcontrol_1, "control-1");
1881 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1882 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1883 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1884 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1885 defsymbol (&Qthai_tis620, "thai-tis620");
1886 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1887 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1888 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1889 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1890 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1891 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1892 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1893 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1894 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1895 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1896 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1897 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1898 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1899 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1901 defsymbol (&Qchinese_cns11643_3, "chinese-cns11643-3");
1902 defsymbol (&Qchinese_cns11643_4, "chinese-cns11643-4");
1903 defsymbol (&Qchinese_cns11643_5, "chinese-cns11643-5");
1904 defsymbol (&Qchinese_cns11643_6, "chinese-cns11643-6");
1905 defsymbol (&Qchinese_cns11643_7, "chinese-cns11643-7");
1906 defsymbol (&Qucs_bmp, "ucs-bmp");
1908 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1909 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1911 defsymbol (&Qcomposite, "composite");
1915 vars_of_mule_charset (void)
1922 /* Table of charsets indexed by leading byte. */
1923 for (i = 0; i < countof (charset_by_leading_byte); i++)
1924 charset_by_leading_byte[i] = Qnil;
1927 /* Table of charsets indexed by type/final-byte. */
1928 for (i = 0; i < countof (charset_by_attributes); i++)
1929 for (j = 0; j < countof (charset_by_attributes[0]); j++)
1930 charset_by_attributes[i][j] = Qnil;
1932 /* Table of charsets indexed by type/final-byte/direction. */
1933 for (i = 0; i < countof (charset_by_attributes); i++)
1934 for (j = 0; j < countof (charset_by_attributes[0]); j++)
1935 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
1936 charset_by_attributes[i][j][k] = Qnil;
1939 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1941 next_allocated_2_byte_leading_byte = LEADING_BYTE_CHINESE_BIG5_2 + 1;
1943 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1947 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
1948 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
1949 Leading-code of private TYPE9N charset of column-width 1.
1951 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
1955 Vutf_2000_version = build_string("0.6 (Tōbushijō-mae)");
1956 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
1957 Version number of UTF-2000.
1963 complex_vars_of_mule_charset (void)
1965 staticpro (&Vcharset_hash_table);
1966 Vcharset_hash_table =
1967 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1969 /* Predefined character sets. We store them into variables for
1974 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 1,
1975 CHARSET_TYPE_256X256, 1, 0, 0,
1976 CHARSET_LEFT_TO_RIGHT,
1977 build_string ("BMP"),
1978 build_string ("BMP"),
1979 build_string ("BMP"),
1983 make_charset (LEADING_BYTE_ASCII, Qascii, 1,
1984 CHARSET_TYPE_94, 1, 0, 'B',
1985 CHARSET_LEFT_TO_RIGHT,
1986 build_string ("ASCII"),
1987 build_string ("ASCII)"),
1988 build_string ("ASCII (ISO646 IRV)"),
1989 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"));
1990 Vcharset_control_1 =
1991 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
1992 CHARSET_TYPE_94, 1, 1, 0,
1993 CHARSET_LEFT_TO_RIGHT,
1994 build_string ("C1"),
1995 build_string ("Control characters"),
1996 build_string ("Control characters 128-191"),
1998 Vcharset_latin_iso8859_1 =
1999 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
2000 CHARSET_TYPE_96, 1, 1, 'A',
2001 CHARSET_LEFT_TO_RIGHT,
2002 build_string ("Latin-1"),
2003 build_string ("ISO8859-1 (Latin-1)"),
2004 build_string ("ISO8859-1 (Latin-1)"),
2005 build_string ("iso8859-1"));
2006 Vcharset_latin_iso8859_2 =
2007 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
2008 CHARSET_TYPE_96, 1, 1, 'B',
2009 CHARSET_LEFT_TO_RIGHT,
2010 build_string ("Latin-2"),
2011 build_string ("ISO8859-2 (Latin-2)"),
2012 build_string ("ISO8859-2 (Latin-2)"),
2013 build_string ("iso8859-2"));
2014 Vcharset_latin_iso8859_3 =
2015 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
2016 CHARSET_TYPE_96, 1, 1, 'C',
2017 CHARSET_LEFT_TO_RIGHT,
2018 build_string ("Latin-3"),
2019 build_string ("ISO8859-3 (Latin-3)"),
2020 build_string ("ISO8859-3 (Latin-3)"),
2021 build_string ("iso8859-3"));
2022 Vcharset_latin_iso8859_4 =
2023 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
2024 CHARSET_TYPE_96, 1, 1, 'D',
2025 CHARSET_LEFT_TO_RIGHT,
2026 build_string ("Latin-4"),
2027 build_string ("ISO8859-4 (Latin-4)"),
2028 build_string ("ISO8859-4 (Latin-4)"),
2029 build_string ("iso8859-4"));
2030 Vcharset_thai_tis620 =
2031 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
2032 CHARSET_TYPE_96, 1, 1, 'T',
2033 CHARSET_LEFT_TO_RIGHT,
2034 build_string ("TIS620"),
2035 build_string ("TIS620 (Thai)"),
2036 build_string ("TIS620.2529 (Thai)"),
2037 build_string ("tis620"));
2038 Vcharset_greek_iso8859_7 =
2039 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
2040 CHARSET_TYPE_96, 1, 1, 'F',
2041 CHARSET_LEFT_TO_RIGHT,
2042 build_string ("ISO8859-7"),
2043 build_string ("ISO8859-7 (Greek)"),
2044 build_string ("ISO8859-7 (Greek)"),
2045 build_string ("iso8859-7"));
2046 Vcharset_arabic_iso8859_6 =
2047 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
2048 CHARSET_TYPE_96, 1, 1, 'G',
2049 CHARSET_RIGHT_TO_LEFT,
2050 build_string ("ISO8859-6"),
2051 build_string ("ISO8859-6 (Arabic)"),
2052 build_string ("ISO8859-6 (Arabic)"),
2053 build_string ("iso8859-6"));
2054 Vcharset_hebrew_iso8859_8 =
2055 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
2056 CHARSET_TYPE_96, 1, 1, 'H',
2057 CHARSET_RIGHT_TO_LEFT,
2058 build_string ("ISO8859-8"),
2059 build_string ("ISO8859-8 (Hebrew)"),
2060 build_string ("ISO8859-8 (Hebrew)"),
2061 build_string ("iso8859-8"));
2062 Vcharset_katakana_jisx0201 =
2063 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
2064 CHARSET_TYPE_94, 1, 1, 'I',
2065 CHARSET_LEFT_TO_RIGHT,
2066 build_string ("JISX0201 Kana"),
2067 build_string ("JISX0201.1976 (Japanese Kana)"),
2068 build_string ("JISX0201.1976 Japanese Kana"),
2069 build_string ("jisx0201.1976"));
2070 Vcharset_latin_jisx0201 =
2071 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
2072 CHARSET_TYPE_94, 1, 0, 'J',
2073 CHARSET_LEFT_TO_RIGHT,
2074 build_string ("JISX0201 Roman"),
2075 build_string ("JISX0201.1976 (Japanese Roman)"),
2076 build_string ("JISX0201.1976 Japanese Roman"),
2077 build_string ("jisx0201.1976"));
2078 Vcharset_cyrillic_iso8859_5 =
2079 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
2080 CHARSET_TYPE_96, 1, 1, 'L',
2081 CHARSET_LEFT_TO_RIGHT,
2082 build_string ("ISO8859-5"),
2083 build_string ("ISO8859-5 (Cyrillic)"),
2084 build_string ("ISO8859-5 (Cyrillic)"),
2085 build_string ("iso8859-5"));
2086 Vcharset_latin_iso8859_9 =
2087 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
2088 CHARSET_TYPE_96, 1, 1, 'M',
2089 CHARSET_LEFT_TO_RIGHT,
2090 build_string ("Latin-5"),
2091 build_string ("ISO8859-9 (Latin-5)"),
2092 build_string ("ISO8859-9 (Latin-5)"),
2093 build_string ("iso8859-9"));
2094 Vcharset_japanese_jisx0208_1978 =
2095 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
2096 CHARSET_TYPE_94X94, 2, 0, '@',
2097 CHARSET_LEFT_TO_RIGHT,
2098 build_string ("JISX0208.1978"),
2099 build_string ("JISX0208.1978 (Japanese)"),
2101 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
2102 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"));
2103 Vcharset_chinese_gb2312 =
2104 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
2105 CHARSET_TYPE_94X94, 2, 0, 'A',
2106 CHARSET_LEFT_TO_RIGHT,
2107 build_string ("GB2312"),
2108 build_string ("GB2312)"),
2109 build_string ("GB2312 Chinese simplified"),
2110 build_string ("gb2312"));
2111 Vcharset_japanese_jisx0208 =
2112 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
2113 CHARSET_TYPE_94X94, 2, 0, 'B',
2114 CHARSET_LEFT_TO_RIGHT,
2115 build_string ("JISX0208"),
2116 build_string ("JISX0208.1983/1990 (Japanese)"),
2117 build_string ("JISX0208.1983/1990 Japanese Kanji"),
2118 build_string ("jisx0208.19\\(83\\|90\\)"));
2119 Vcharset_korean_ksc5601 =
2120 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
2121 CHARSET_TYPE_94X94, 2, 0, 'C',
2122 CHARSET_LEFT_TO_RIGHT,
2123 build_string ("KSC5601"),
2124 build_string ("KSC5601 (Korean"),
2125 build_string ("KSC5601 Korean Hangul and Hanja"),
2126 build_string ("ksc5601"));
2127 Vcharset_japanese_jisx0212 =
2128 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
2129 CHARSET_TYPE_94X94, 2, 0, 'D',
2130 CHARSET_LEFT_TO_RIGHT,
2131 build_string ("JISX0212"),
2132 build_string ("JISX0212 (Japanese)"),
2133 build_string ("JISX0212 Japanese Supplement"),
2134 build_string ("jisx0212"));
2136 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2137 Vcharset_chinese_cns11643_1 =
2138 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
2139 CHARSET_TYPE_94X94, 2, 0, 'G',
2140 CHARSET_LEFT_TO_RIGHT,
2141 build_string ("CNS11643-1"),
2142 build_string ("CNS11643-1 (Chinese traditional)"),
2144 ("CNS 11643 Plane 1 Chinese traditional"),
2145 build_string (CHINESE_CNS_PLANE_RE("1")));
2146 Vcharset_chinese_cns11643_2 =
2147 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
2148 CHARSET_TYPE_94X94, 2, 0, 'H',
2149 CHARSET_LEFT_TO_RIGHT,
2150 build_string ("CNS11643-2"),
2151 build_string ("CNS11643-2 (Chinese traditional)"),
2153 ("CNS 11643 Plane 2 Chinese traditional"),
2154 build_string (CHINESE_CNS_PLANE_RE("2")));
2156 Vcharset_chinese_cns11643_3 =
2157 make_charset (LEADING_BYTE_CHINESE_CNS11643_3, Qchinese_cns11643_3, 3,
2158 CHARSET_TYPE_94X94, 2, 0, 'I',
2159 CHARSET_LEFT_TO_RIGHT,
2160 build_string ("CNS11643-3"),
2161 build_string ("CNS11643-3 (Chinese traditional)"),
2163 ("CNS 11643 Plane 3 Chinese traditional"),
2164 build_string (CHINESE_CNS_PLANE_RE("3")));
2165 Vcharset_chinese_cns11643_4 =
2166 make_charset (LEADING_BYTE_CHINESE_CNS11643_4, Qchinese_cns11643_4, 3,
2167 CHARSET_TYPE_94X94, 2, 0, 'J',
2168 CHARSET_LEFT_TO_RIGHT,
2169 build_string ("CNS11643-4"),
2170 build_string ("CNS11643-4 (Chinese traditional)"),
2172 ("CNS 11643 Plane 4 Chinese traditional"),
2173 build_string (CHINESE_CNS_PLANE_RE("4")));
2174 Vcharset_chinese_cns11643_5 =
2175 make_charset (LEADING_BYTE_CHINESE_CNS11643_5, Qchinese_cns11643_5, 3,
2176 CHARSET_TYPE_94X94, 2, 0, 'K',
2177 CHARSET_LEFT_TO_RIGHT,
2178 build_string ("CNS11643-5"),
2179 build_string ("CNS11643-5 (Chinese traditional)"),
2181 ("CNS 11643 Plane 5 Chinese traditional"),
2182 build_string (CHINESE_CNS_PLANE_RE("5")));
2183 Vcharset_chinese_cns11643_6 =
2184 make_charset (LEADING_BYTE_CHINESE_CNS11643_6, Qchinese_cns11643_6, 3,
2185 CHARSET_TYPE_94X94, 2, 0, 'L',
2186 CHARSET_LEFT_TO_RIGHT,
2187 build_string ("CNS11643-6"),
2188 build_string ("CNS11643-6 (Chinese traditional)"),
2190 ("CNS 11643 Plane 6 Chinese traditional"),
2191 build_string (CHINESE_CNS_PLANE_RE("6")));
2192 Vcharset_chinese_cns11643_7 =
2193 make_charset (LEADING_BYTE_CHINESE_CNS11643_7, Qchinese_cns11643_7, 3,
2194 CHARSET_TYPE_94X94, 2, 0, 'M',
2195 CHARSET_LEFT_TO_RIGHT,
2196 build_string ("CNS11643-7"),
2197 build_string ("CNS11643-7 (Chinese traditional)"),
2199 ("CNS 11643 Plane 7 Chinese traditional"),
2200 build_string (CHINESE_CNS_PLANE_RE("7")));
2202 Vcharset_chinese_big5_1 =
2203 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
2204 CHARSET_TYPE_94X94, 2, 0, '0',
2205 CHARSET_LEFT_TO_RIGHT,
2206 build_string ("Big5"),
2207 build_string ("Big5 (Level-1)"),
2209 ("Big5 Level-1 Chinese traditional"),
2210 build_string ("big5"));
2211 Vcharset_chinese_big5_2 =
2212 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
2213 CHARSET_TYPE_94X94, 2, 0, '1',
2214 CHARSET_LEFT_TO_RIGHT,
2215 build_string ("Big5"),
2216 build_string ("Big5 (Level-2)"),
2218 ("Big5 Level-2 Chinese traditional"),
2219 build_string ("big5"));
2222 #ifdef ENABLE_COMPOSITE_CHARS
2223 /* #### For simplicity, we put composite chars into a 96x96 charset.
2224 This is going to lead to problems because you can run out of
2225 room, esp. as we don't yet recycle numbers. */
2226 Vcharset_composite =
2227 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
2228 CHARSET_TYPE_96X96, 2, 0, 0,
2229 CHARSET_LEFT_TO_RIGHT,
2230 build_string ("Composite"),
2231 build_string ("Composite characters"),
2232 build_string ("Composite characters"),
2235 composite_char_row_next = 32;
2236 composite_char_col_next = 32;
2238 Vcomposite_char_string2char_hash_table =
2239 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2240 Vcomposite_char_char2string_hash_table =
2241 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2242 staticpro (&Vcomposite_char_string2char_hash_table);
2243 staticpro (&Vcomposite_char_char2string_hash_table);
2244 #endif /* ENABLE_COMPOSITE_CHARS */