1 /* charset.c -- charset module.
2 Copyright (C) 2003, 2004
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24 @addtogroup m17nCharset
25 @brief Charset objects and API for them.
27 The m17n library uses @e charset objects to represent a coded
28 character sets (CCS). The m17n library supports many predefined
29 coded character sets. Moreover, application programs can add
30 other charsets. A character can belong to multiple charsets.
32 The m17n library distinguishes the following three concepts:
34 @li A @e code-point is a number assigned by the CCS to each
35 character. Code-points may or may not be continuous. The type
36 @c unsigned is used to represent a code-point. An invalid
37 code-point is represented by the macro @c MCHAR_INVALID_CODE.
39 @li A @e character @e index is the canonical index of a character
40 in a CCS. The character that has the character index N occupies
41 the Nth position when all the characters in the current CCS are
42 sorted by their code-points. Character indices in a CCS are
43 continuous and start with 0.
45 @li A @e character @e code is the internal representation in the
46 m17n library of a character. A character code is a signed integer
49 Each charset object defines how characters are converted between
50 code-points and character codes. To @e encode means converting
51 code-points to character codes and to @e decode means converting
52 character codes to code-points. */
55 @addtogroup m17nCharset
56 @brief ʸ»ú¥»¥Ã¥È¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API
58 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢Éä¹æ²½Ê¸»ú½¸¹ç (CCS) ¤ò @e ʸ»ú¥»¥Ã¥È ¤È¸Æ¤Ö¥ª
59 ¥Ö¥¸¥§¥¯¥È¤Çɽ¸½¤¹¤ë¡£m17n ¥é¥¤¥Ö¥é¥ê¤Ï¿¤¯¤ÎÉä¹æ²½Ê¸»ú½¸¹ç¤òͽ¤á
60 ¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ë¤¬¡¢¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥à¤¬Æȼ«¤Ëʸ»ú¥»¥Ã¥È¤ò
61 Äɲ乤뤳¤È¤â²Äǽ¤Ç¤¢¤ë¡£°ì¤Ä¤Îʸ»ú¤ÏÊ£¿ô¤Îʸ»ú¥»¥Ã¥È¤Ë°¤·¤Æ¤â¤è
64 m17n ¥é¥¤¥Ö¥é¥ê¤Ë¤Ï¡¢°Ê²¼¤Î°Û¤Ê¤ë³µÇ°¤¬¤¢¤ë:
66 @li @e ¥³¡¼¥É¥Ý¥¤¥ó¥È ¤È¤Ï¡¢CCS ¤¬¤½¤ÎÃæ¤Î¸Ä¡¹¤Îʸ»ú¤ËÂФ·¤ÆÄêµÁ¤¹
67 ¤ë¿ôÃͤǤ¢¤ë¡£¥³¡¼¥É¥Ý¥¤¥ó¥È¤ÏϢ³¤·¤Æ¤¤¤ë¤È¤Ï¸Â¤é¤Ê¤¤¡£
69 @li @e ʸ»ú¥¤¥ó¥Ç¥Ã¥¯¥¹ ¤È¤Ï¡¢CCS Æâ¤Ç³Æʸ»ú¤Ë³ä¤êÅö¤Æ¤é¤ì¤ëÀµµ¬²½¤µ
70 ¤ì¤¿¥¤¥ó¥Ç¥Ã¥¯¥¹¤Ç¤¢¤ë¡£Ê¸»ú¥¤¥ó¥Ç¥Ã¥¯¥¹¤¬N¤Îʸ»ú¤Ï¡¢CCS Ãæ¤ÎÁ´Ê¸»ú¤ò
71 ¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ç¥½¡¼¥È¤·¤¿¤È¤¤ËNÈÖÌܤËÍè¤ë¡£
73 @li @e ʸ»ú¥³¡¼¥É¤È¤Ï¡¢m17n ¥é¥¤¥Ö¥é¥êÆâ¤Ë¤ª¤±¤ëʸ»ú¤ÎÆâÉôɽ¸½¤Ç¤¢
74 ¤ê¡¢21 ¥Ó¥Ã¥È°Ê¾å¤ÎŤµ¤ò»ý¤ÄÉä¹çÉÕ¤À°¿ô¤Ç¤¢¤ë¡£
76 ³Æʸ»ú¥»¥Ã¥È¥ª¥Ö¥¸¥§¥¯¥È¤Ï¡¢¤½¤ì¤Ë°¤¹¤ëʸ»ú¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤Èʸ»ú
77 ¥³¡¼¥É¤È¤ÎÁê¸ßÊÑ´¹¤òµ¬Äꤹ¤ë¡£¥³¡¼¥É¥Ý¥¤¥ó¥È¤«¤éʸ»ú¥³¡¼¥É¤Ø¤ÎÊÑ´¹
78 ¤ò @e ¥Ç¥³¡¼¥É ¤È¸Æ¤Ó¡¢Ê¸»ú¥³¡¼¥É¤«¤é¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ø¤ÎÊÑ´¹¤ò @e
79 ¥¨¥ó¥³¡¼¥É ¤È¸Æ¤Ö¡£ */
82 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
83 /*** @addtogroup m17nInternal
93 #include "m17n-misc.h"
101 static int unified_max = MCHAR_MAX;
103 /** List of all charsets ever defined. */
111 static struct MCharsetList charset_list;
113 static MPlist *charset_definition_list;
115 /** Make a charset object from the template of MCharset structure
116 CHARSET, and return a pointer to the new charset object.
117 CHARSET->code_range[4N + 2] and TMPL->code_range[4N + 3] are not
121 make_charset (MCharset *charset)
123 unsigned min_code, max_code;
125 int *range = charset->code_range;
127 if (charset->dimension < 1 || charset->dimension > 4)
128 MERROR (MERROR_CHARSET, NULL);
129 if ((charset->final_byte > 0 && charset->final_byte < '0')
130 || charset->final_byte > 127)
131 MERROR (MERROR_CHARSET, NULL);
133 for (i = 0, n = 1; i < 4; i++)
135 if (range[i * 4] > range[i * 4 + 1])
136 MERROR (MERROR_CHARSET, NULL);
137 range[i * 4 + 2] = range[i * 4 + 1] - range[i * 4] + 1;
138 n *= range[i * 4 + 2];
139 range[i * 4 + 3] = n;
142 min_code = range[0] | (range[4] << 8) | (range[8] << 16) | (range[12] << 24);
143 if (charset->min_code == 0)
144 charset->min_code = min_code;
145 else if (charset->min_code < min_code)
146 MERROR (MERROR_CHARSET, NULL);
147 max_code = range[1] | (range[5] << 8) | (range[9] << 16) | (range[13] << 24);
148 if (charset->max_code == 0)
149 charset->max_code = max_code;
150 else if (charset->max_code > max_code)
151 MERROR (MERROR_CHARSET, NULL);
153 charset->code_range_min_code = min_code;
155 if (charset->method == Msubset)
159 if (charset->nparents != 1)
160 MERROR (MERROR_CHARSET, NULL);
161 parent = charset->parents[0];
162 if (parent->method == Msuperset
163 || charset->min_code - charset->subset_offset < parent->min_code
164 || charset->max_code - charset->subset_offset > parent->max_code)
165 MERROR (MERROR_CHARSET, NULL);
166 if (parent->method == Moffset)
170 code = charset->min_code - charset->subset_offset;
171 charset->min_char = DECODE_CHAR (parent, code);
172 code = charset->max_code - charset->subset_offset;
173 charset->max_char = DECODE_CHAR (parent, code);
177 unsigned min_code = charset->min_code - charset->subset_offset;
178 unsigned max_code = charset->max_code - charset->subset_offset;
179 int min_char = DECODE_CHAR (parent, min_code);
180 int max_char = min_char;
182 for (++min_code; min_code <= max_code; min_code++)
184 int c = DECODE_CHAR (parent, min_code);
190 else if (c > max_char)
194 charset->min_char = min_char;
195 charset->max_char = max_char;
199 else if (charset->method == Msuperset)
201 int min_char = 0, max_char = 0;
203 if (charset->nparents < 2)
204 MERROR (MERROR_CHARSET, NULL);
205 for (i = 0; i < charset->nparents; i++)
206 if (charset->min_code > charset->parents[i]->min_code
207 || charset->max_code < charset->parents[i]->max_code)
208 MERROR (MERROR_CHARSET, NULL);
210 for (i = 0; i < charset->nparents; i++)
212 MCharset *parent = charset->parents[i];
214 if (charset->min_code > parent->min_code
215 || charset->max_code < parent->max_code)
216 MERROR (MERROR_CHARSET, NULL);
218 min_char = parent->min_char, max_char = parent->max_char;
219 else if (parent->min_char < min_char)
220 min_char = parent->min_char;
221 else if (parent->max_char > max_char)
222 max_char = parent->max_char;
224 charset->min_char = min_char;
225 charset->max_char = max_char;
231 = (charset->dimension == 1
233 && (charset->dimension == 2
235 && (charset->dimension == 3
236 || range[10] == 256)))));
238 if (! charset->no_code_gap)
242 memset (charset->code_range_mask, 0,
243 sizeof charset->code_range_mask);
244 for (i = 0; i < 4; i++)
245 for (j = range[i * 4]; j <= range[i * 4 + 1]; j++)
246 charset->code_range_mask[j] |= (1 << i);
249 if (charset->method == Moffset)
251 charset->max_char = charset->min_char + range[15] - 1;
252 if (charset->min_char < 0
253 || charset->max_char < 0 || charset->max_char > unified_max)
254 MERROR (MERROR_CHARSET, NULL);
255 charset->simple = charset->no_code_gap;
257 else if (charset->method == Mmap || charset->method == Munify)
259 MDatabase *mdb = mdatabase_find (Mcharset, charset->name,
264 if (charset->method == Munify)
266 /* The magic number 12 below is to align to the
267 SUB_BITS_2 (defined in chartab.c) boundary in a
269 unified_max -= ((range[15] >> 12) + 1) << 12;
270 charset->unified_max = unified_max;
273 if (! mdb || ! (plist = mdatabase_load (mdb)))
274 MERROR (MERROR_CHARSET, NULL);
275 charset->decoder = mplist_value (plist);
276 charset->encoder = mplist_value (mplist_next (plist));
277 M17N_OBJECT_UNREF (plist);
278 mchartable_range (charset->encoder,
279 &charset->min_char, &charset->max_char);
280 if (charset->method == Mmap)
281 charset->simple = charset->no_code_gap;
284 = charset->unified_max + 1 + charset->code_range[15];
287 MERROR (MERROR_CHARSET, NULL);
290 MLIST_APPEND1 (&charset_list, charsets, charset, MERROR_CHARSET);
292 if (charset->final_byte > 0)
294 MLIST_APPEND1 (&mcharset__iso_2022_table, charsets, charset,
296 if (charset->revision <= 0)
298 int chars = range[2];
300 if (chars == 128) /* ASCII case */
302 else if (chars == 256) /* ISO-8859-X case */
304 MCHARSET_ISO_2022 (charset->dimension, chars, charset->final_byte)
309 charset->fully_loaded = 1;
314 load_charset_fully (MCharset *charset)
316 if (charset->method == Msubset)
318 MCharset *parent = charset->parents[0];
320 if (! parent->fully_loaded
321 && load_charset_fully (parent) < 0)
322 MERROR (MERROR_CHARSET, -1);
323 if (parent->method == Moffset)
327 code = charset->min_code - charset->subset_offset;
328 charset->min_char = DECODE_CHAR (parent, code);
329 code = charset->max_code - charset->subset_offset;
330 charset->max_char = DECODE_CHAR (parent, code);
334 unsigned min_code = charset->min_code - charset->subset_offset;
335 unsigned max_code = charset->max_code - charset->subset_offset;
336 int min_char = DECODE_CHAR (parent, min_code);
337 int max_char = min_char;
339 for (++min_code; min_code <= max_code; min_code++)
341 int c = DECODE_CHAR (parent, min_code);
347 else if (c > max_char)
351 charset->min_char = min_char;
352 charset->max_char = max_char;
355 else if (charset->method == Msuperset)
357 int min_char = 0, max_char = 0;
360 for (i = 0; i < charset->nparents; i++)
362 MCharset *parent = charset->parents[i];
364 if (! parent->fully_loaded
365 && load_charset_fully (parent) < 0)
366 MERROR (MERROR_CHARSET, -1);
368 min_char = parent->min_char, max_char = parent->max_char;
369 else if (parent->min_char < min_char)
370 min_char = parent->min_char;
371 else if (parent->max_char > max_char)
372 max_char = parent->max_char;
374 charset->min_char = min_char;
375 charset->max_char = max_char;
377 else /* charset->method is Mmap or Munify */
379 MDatabase *mdb = mdatabase_find (Mcharset, charset->name, Mnil, Mnil);
382 if (! mdb || ! (plist = mdatabase_load (mdb)))
383 MERROR (MERROR_CHARSET, -1);
384 charset->decoder = mplist_value (plist);
385 charset->encoder = mplist_value (mplist_next (plist));
386 M17N_OBJECT_UNREF (plist);
387 mchartable_range (charset->encoder,
388 &charset->min_char, &charset->max_char);
389 if (charset->method == Mmap)
390 charset->simple = charset->no_code_gap;
392 charset->max_char = charset->unified_max + 1 + charset->code_range[15];
395 charset->fully_loaded = 1;
402 MPlist *mcharset__cache;
404 /* Predefined charsets. */
405 MCharset *mcharset__ascii;
406 MCharset *mcharset__binary;
407 MCharset *mcharset__m17n;
408 MCharset *mcharset__unicode;
410 MCharsetISO2022Table mcharset__iso_2022_table;
412 /** Initialize charset handler. */
419 mcharset__cache = mplist ();
420 mplist_set (mcharset__cache, Mt, NULL);
422 MLIST_INIT1 (&charset_list, charsets, 128);
423 MLIST_INIT1 (&mcharset__iso_2022_table, charsets, 128);
424 charset_definition_list = mplist ();
426 memset (mcharset__iso_2022_table.classified, 0,
427 sizeof (mcharset__iso_2022_table.classified));
429 Mcharset = msymbol ("charset");
431 Mmethod = msymbol ("method");
432 Moffset = msymbol ("offset");
433 Mmap = msymbol ("map");
434 Munify = msymbol ("unify");
435 Msubset = msymbol ("subset");
436 Msuperset = msymbol ("superset");
438 Mdimension = msymbol ("dimension");
439 Mmin_range = msymbol ("min-range");
440 Mmax_range = msymbol ("max-range");
441 Mmin_code = msymbol ("min-code");
442 Mmax_code = msymbol ("max-code");
443 Mascii_compatible = msymbol ("ascii-compatible");
444 Mfinal_byte = msymbol ("final-byte");
445 Mrevision = msymbol ("revision");
446 Mmin_char = msymbol ("min-char");
447 Mmapfile = msymbol_as_managing_key ("mapfile");
448 Mparents = msymbol_as_managing_key ("parents");
449 Msubset_offset = msymbol ("subset-offset");
450 Mdefine_coding = msymbol ("define-coding");
451 Maliases = msymbol_as_managing_key ("aliases");
455 /* Setup predefined charsets. */
456 pl = mplist_add (pl, Mmethod, Moffset);
457 pl = mplist_add (pl, Mmin_range, (void *) 0);
458 pl = mplist_add (pl, Mmax_range, (void *) 0x7F);
459 pl = mplist_add (pl, Mascii_compatible, Mt);
460 pl = mplist_add (pl, Mfinal_byte, (void *) 'B');
461 pl = mplist_add (pl, Mmin_char, (void *) 0);
462 Mcharset_ascii = mchar_define_charset ("ascii", param);
464 mplist_put (param, Mmax_range, (void *) 0xFF);
465 mplist_put (param, Mfinal_byte, NULL);
466 Mcharset_iso_8859_1 = mchar_define_charset ("iso-8859-1", param);
468 mplist_put (param, Mmax_range, (void *) 0x10FFFF);
469 Mcharset_unicode = mchar_define_charset ("unicode", param);
471 mplist_put (param, Mmax_range, (void *) MCHAR_MAX);
472 Mcharset_m17n = mchar_define_charset ("m17n", param);
474 mplist_put (param, Mmax_range, (void *) 0xFF);
475 Mcharset_binary = mchar_define_charset ("binary", param);
477 M17N_OBJECT_UNREF (param);
479 mcharset__ascii = MCHARSET (Mcharset_ascii);
480 mcharset__binary = MCHARSET (Mcharset_binary);
481 mcharset__m17n = MCHARSET (Mcharset_m17n);
482 mcharset__unicode = MCHARSET (Mcharset_unicode);
488 mcharset__fini (void)
493 for (i = 0; i < charset_list.used; i++)
495 MCharset *charset = charset_list.charsets[i];
497 if (charset->decoder)
498 free (charset->decoder);
499 if (charset->encoder)
500 M17N_OBJECT_UNREF (charset->encoder);
503 M17N_OBJECT_UNREF (mcharset__cache);
504 MLIST_FREE1 (&charset_list, charsets);
505 MLIST_FREE1 (&mcharset__iso_2022_table, charsets);
506 MPLIST_DO (plist, charset_definition_list)
507 M17N_OBJECT_UNREF (MPLIST_VAL (plist));
508 M17N_OBJECT_UNREF (charset_definition_list);
513 mcharset__find (MSymbol name)
517 charset = msymbol_get (name, Mcharset);
520 MPlist *param = mplist_get (charset_definition_list, name);
522 MPLIST_KEY (mcharset__cache) = Mt;
525 param = mplist__from_plist (param);
526 mchar_define_charset (MSYMBOL_NAME (name), param);
527 charset = msymbol_get (name, Mcharset);
528 M17N_OBJECT_UNREF (param);
530 MPLIST_KEY (mcharset__cache) = name;
531 MPLIST_VAL (mcharset__cache) = charset;
536 /** Return the character corresponding to code-point CODE in CHARSET.
537 If CODE is invalid for CHARSET, return -1. */
540 mcharset__decode_char (MCharset *charset, unsigned code)
544 if (code < 128 && charset->ascii_compatible)
546 if (code < charset->min_code || code > charset->max_code)
549 if (! charset->fully_loaded
550 && load_charset_fully (charset) < 0)
551 MERROR (MERROR_CHARSET, -1);
553 if (charset->method == Msubset)
555 MCharset *parent = charset->parents[0];
557 code -= charset->subset_offset;
558 return DECODE_CHAR (parent, code);
561 if (charset->method == Msuperset)
565 for (i = 0; i < charset->nparents; i++)
567 MCharset *parent = charset->parents[i];
568 int c = DECODE_CHAR (parent, code);
576 idx = CODE_POINT_TO_INDEX (charset, code);
580 if (charset->method == Mmap)
581 return charset->decoder[idx];
583 if (charset->method == Munify)
585 int c = charset->decoder[idx];
588 c = charset->unified_max + 1 + idx;
592 /* Now charset->method should be Moffset. */
593 return (charset->min_char + idx);
597 /** Return the code point of character C in CHARSET. If CHARSET does not
598 contain C, return MCHAR_INVALID_CODE. */
601 mcharset__encode_char (MCharset *charset, int c)
603 if (! charset->fully_loaded
604 && load_charset_fully (charset) < 0)
605 MERROR (MERROR_CHARSET, MCHAR_INVALID_CODE);
607 if (charset->method == Msubset)
609 MCharset *parent = charset->parents[0];
610 unsigned code = ENCODE_CHAR (parent, c);
612 if (code == MCHAR_INVALID_CODE)
614 code += charset->subset_offset;
615 if (code >= charset->min_code && code <= charset->max_code)
617 return MCHAR_INVALID_CODE;
620 if (charset->method == Msuperset)
624 for (i = 0; i < charset->nparents; i++)
626 MCharset *parent = charset->parents[i];
627 unsigned code = ENCODE_CHAR (parent, c);
629 if (code != MCHAR_INVALID_CODE)
632 return MCHAR_INVALID_CODE;
635 if (c < charset->min_char || c > charset->max_char)
636 return MCHAR_INVALID_CODE;
638 if (charset->method == Mmap)
639 return (unsigned) mchartable_lookup (charset->encoder, c);
641 if (charset->method == Munify)
643 if (c > charset->unified_max)
645 c -= charset->unified_max - 1;
646 return INDEX_TO_CODE_POINT (charset, c);
648 return (unsigned) mchartable_lookup (charset->encoder, c);
651 /* Now charset->method should be Moffset */
652 c -= charset->min_char;
653 return INDEX_TO_CODE_POINT (charset, c);
657 mcharset__load_from_database ()
659 MDatabase *mdb = mdatabase_find (msymbol ("charset-list"), Mnil, Mnil, Mnil);
660 MPlist *def_list, *plist;
661 MPlist *definitions = charset_definition_list;
662 int mdebug_mask = MDEBUG_CHARSET;
667 def_list = (MPlist *) mdatabase_load (mdb);
668 MDEBUG_PRINT_TIME ("CHARSET", (stderr, " to load data."));
674 MPLIST_DO (plist, def_list)
679 if (! MPLIST_PLIST_P (plist))
680 MERROR (MERROR_CHARSET, -1);
681 pl = MPLIST_PLIST (plist);
682 if (! MPLIST_SYMBOL_P (pl))
683 MERROR (MERROR_CHARSET, -1);
684 name = MPLIST_SYMBOL (pl);
685 pl = MPLIST_NEXT (pl);
686 definitions = mplist_add (definitions, name, pl);
687 M17N_OBJECT_REF (pl);
688 mchar_define_charset (MSYMBOL_NAME (name), mplist__from_plist (pl));
689 if ((pl = mplist_find_by_value (pl, Mdefine_coding))
690 && (MSymbol) MPLIST_VAL (MPLIST_NEXT (pl)) == Mt)
691 mconv__register_charset_coding (name);
694 M17N_OBJECT_UNREF (def_list);
695 MDEBUG_PRINT_TIME ("CHARSET", (stderr, " to parse the loaded data."));
701 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
706 /*** @addtogroup m17nCharset */
712 @brief Invalid code-point.
714 The macro #MCHAR_INVALID_CODE gives the invalid code-point. */
717 @brief ̵¸ú¤Ê¥³¡¼¥É¥Ý¥¤¥ó¥È
719 ¥Þ¥¯¥í #MCHAR_INVALID_CODE ¤Ï̵¸ú¤Ê¥³¡¼¥É¥Ý¥¤¥ó¥È¤òÍ¿¤¨¤ë¡£ */
721 #define MCHAR_INVALID_CODE
725 @brief The symbol @c Mcharset.
727 Any decoded M-text has a text property whose key is the predefined
728 symbol @c Mcharset. The name of @c Mcharset is
729 <tt>"charset"</tt>. */
732 @brief ¥·¥ó¥Ü¥ë @c Mcharset
734 ¥Ç¥³¡¼¥É¤µ¤ì¤¿ M-text ¤Ï¡¢¥¡¼¤¬ @c Mcharset ¤Ç¤¢¤ë¤è¤¦¤Ê¥Æ¥¥¹¥È
735 ¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Ä¡£¥·¥ó¥Ü¥ë @c Mcharset ¤Ï <tt>"charset"</tt> ¤È¤¤
736 ¤¦Ì¾Á°¤Ç¤¢¤é¤«¤¸¤áÄêµÁ¤µ¤ì¤Æ¤¤¤ë¡£ */
742 @name Variables: Symbols representing a charset.
744 Each of the following symbols represents a predefined charset. */
747 @name ÊÑ¿ô: ʸ»ú¥»¥Ã¥È¤òɽ¤ï¤¹ÄêµÁºÑ¤ß¥·¥ó¥Ü¥ë
749 °Ê²¼¤Î³Æ¥·¥ó¥Ü¥ë¤Ï¡¢¥¡¼¤¬ @c Mcharset ¤Ç¤¢¤ê¡¢Ãͤ¬Âбþ¤¹¤ëʸ»ú¥»¥Ã
750 ¥È¥ª¥Ö¥¸¥§¥¯¥È¡Ê @c MCharset ·¿¡Ë¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¥·¥ó¥Ü¥ë¥×¥í¥Ñ
756 @brief Symbol representing the charset ASCII.
758 The symbol #Mcharset_ascii has name <tt>"ascii"</tt> and represents
759 the charset ISO 646, USA Version X3.4-1968 (ISO-IR-6). */
761 @brief ISO 646, USA Version ¤ËÂбþ¤¹¤ëʸ»ú¥»¥Ã¥È¤Î¥·¥ó¥Ü¥ë
763 ¥·¥ó¥Ü¥ë #Mcharset_ascii ¤Ï <tt>"ascii"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
764 ISO 646, USA Version X3.4-1968 (ISO-IR-6) ¤ËÂбþ¤¹¤ëʸ»ú¥»¥Ã¥È¤ò»Ø
765 Äꤹ¤ë¤¿¤á¤Ë»È¤ï¤ì¤ë¡£ */
767 MSymbol Mcharset_ascii;
771 @brief Symbol representing the charset ISO/IEC 8859/1.
773 The symbol #Mcharset_iso_8859_1 has name <tt>"iso-8859-1"</tt>
774 and represents the charset ISO/IEC 8859-1:1998. */
776 @brief ISO/IEC 8859-1:1998 ¤ËÂбþ¤¹¤ëʸ»ú¥»¥Ã¥È¤Î¥·¥ó¥Ü¥ë
778 ¥·¥ó¥Ü¥ë #Mcharset_iso_8859_1 ¤Ï <tt>"iso-8859-1"</tt> ¤È¤¤¤¦Ì¾
779 Á°¤ò»ý¤Á¡¢ISO/IEC 8859-1:1998 ¤ËÂбþ¤¹¤ëʸ»ú¥»¥Ã¥È¤ò»ØÄꤹ¤ë¤¿¤á¤Ë
782 MSymbol Mcharset_iso_8859_1;
785 @brief Symbol representing the charset Unicode.
787 The symbol #Mcharset_unicode has name <tt>"unicode"</tt> and
788 represents the charset Unicode. */
790 @brief Unicode ¤ËÂбþ¤¹¤ëʸ»ú¥»¥Ã¥È¤Î¥·¥ó¥Ü¥ë
792 ¥·¥ó¥Ü¥ë #Mcharset_unicode ¤Ï <tt>"unicode"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý
793 ¤Á¡¢Unicode ¤ËÂбþ¤¹¤ëʸ»ú¥»¥Ã¥È¤ò»ØÄꤹ¤ë¤¿¤á¤Ë»È¤ï¤ì¤ë¡£ */
795 MSymbol Mcharset_unicode;
799 @brief Symbol representing the largest charset.
801 The symbol #Mcharset_m17n has name <tt>"m17n"</tt> and
802 represents the charset that contains all characters supported by
805 @brief Á´Ê¸»ú¤ò´Þ¤àʸ»ú¥»¥Ã¥È¤Î¥·¥ó¥Ü¥ë
807 ¥·¥ó¥Ü¥ë #Mcharset_m17n ¤Ï <tt>"m17n"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
808 m17n ¥é¥¤¥Ö¥é¥ê¤¬°·¤¦Á´¤Æ¤Îʸ»ú¤ò´Þ¤àʸ»ú¥»¥Ã¥È¤ò»ØÄꤹ¤ë¤¿¤á¤Ë»È
811 MSymbol Mcharset_m17n;
815 @brief Symbol representing the charset for ill-decoded characters.
817 The symbol #Mcharset_binary has name <tt>"binary"</tt> and
818 represents the fake charset which the decoding functions put to an
819 M-text as a text property when they encounter an invalid byte
820 (sequence). See @ref m17nConv @latexonly
821 (P.\pageref{group__m17nConv}) @endlatexonly for more detail. */
823 MSymbol Mcharset_binary;
830 @name Variables: Parameter keys for mchar_define_charset ().
832 These are the predefined symbols to use as parameter keys for the
833 function mchar_define_charset () (which see). */
836 @name ÊÑ¿ô: mchar_define_charset ÍѤΥѥé¥á¡¼¥¿¡¦¥¡¼
838 ¤³¤ì¤é¤Ï¡¢´Ø¿ô mchar_define_charset () ÍѤΥѥé¥á¡¼¥¿¡¦¥¡¼¤È¤·¤Æ
839 »È¤ï¤ì¤ë¥·¥ó¥Ü¥ë¤Ç¤¢¤ë¡£ ¾Ü¤·¤¯¤Ï¤³¤Î´Ø¿ô¤Î²òÀâ¤ò»²¾È¤Î¤³¤È¡£*/
844 Parameter key for mchar_define_charset () (which see). */
852 MSymbol Mascii_compatible;
858 MSymbol Msubset_offset;
859 MSymbol Mdefine_coding;
866 @name Variables: Symbols representing charset methods.
868 These are the predefined symbols that can be a value of the
869 #Mmethod parameter of a charset used in an argument to the
870 mchar_define_charset () function.
872 A method specifies how code-points and character codes are
873 converted. See the documentation of the mchar_define_charset ()
874 function for the details. */
877 @name ÊÑ¿ô: ʸ»ú¥»¥Ã¥È¤Î¥á¥½¥Ã¥É»ØÄê¤Ë»È¤ï¤ì¤ë¥·¥ó¥Ü¥ë
879 ¤³¤ì¤é¤Ï¡¢Ê¸»ú¥»¥Ã¥È¤Î @e ¥á¥½¥Ã¥É ¤ò»ØÄꤹ¤ë¤¿¤á¤Î¥·¥ó¥Ü¥ë¤Ç¤¢¤ê¡¢
880 ´Ø¿ô mchar_define_charset () ¤Î°ú¿ô¤È¤·¤Æ»È¤ï¤ì¤ë¡£
882 ¥á¥½¥Ã¥É¤È¤Ï¡¢¥³¡¼¥É¥Ý¥¤¥ó¥È¤Èʸ»ú¥³¡¼¥É¤òÁê¸ßÊÑ´¹¤¹¤ëºÝ¤ÎÊý¼°¤Î¤³
883 ¤È¤Ç¤¢¤ë¡£¾Ü¤·¤¯¤Ï´Ø¿ô mchar_define_charset () ¤Î²òÀâ¤ò»²¾È¤Î¤³¤È¡£ */
887 @brief Symbol for the offset type method of charset.
889 The symbol #Moffset has the name <tt>"offset"</tt> and, when used
890 as a value of #Mmethod parameter of a charset, it means that the
891 conversion of code-points and character codes of the charset is
892 done by this calculation:
895 CHARACTER-CODE = CODE-POINT - MIN-CODE + MIN-CHAR
898 where, MIN-CODE is a value of #Mmin_code parameter of the charset,
899 and MIN-CHAR is a value of #Mmin_char parameter. */
902 @brief ¥ª¥Õ¥»¥Ã¥È·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë
904 ¥·¥ó¥Ü¥ë #Moffset ¤Ï <tt>"offset"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
905 mchar_define_charset () ¤Ç¥ª¥Õ¥»¥Ã¥È·¿¤Î¥á¥½¥Ã¥É¤ò»ØÄꤹ¤ë¾ì¹ç¤Î°ú
906 ¿ô¤È¤·¤ÆÍѤ¤¤é¤ì¤ë¡£*/
911 /***en @brief Symbol for the map type method of charset.
913 The symbol #Mmap has the name <tt>"map"</tt> and, when use as a
914 value of #Mmethod parameter of a charset, it means that the
915 conversion of code-points and character codes of the charset is
916 done by map looking up. The map must be given by #Mmapfile
919 /***ja @brief ¥Þ¥Ã¥×·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë
921 ¥·¥ó¥Ü¥ë #Mmap ¤Ï <tt>"map"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
922 mchar_define_charset () ¤Ç¥Þ¥Ã¥×·¿¤Î¥á¥½¥Ã¥É¤ò»ØÄꤹ¤ë¾ì¹ç¤Î°ú¿ô¤È
928 /***en @brief Symbol for the unify type method of charset.
930 The symbol #Munify has the name <tt>"unify"</tt> and, when used as
931 a value of #Mmethod parameter of a charset, it means that the
932 conversion of code-points and character codes of the charset is
933 done by map looking up and offsetting. The map must be given by
934 #Mmapfile parameter. For this kind of charset, a unique
935 consequent character code space for all characters is assigned.
936 If the map has an entry for a code-point, the conversion is done
937 by looking up the map. Otherwise, the conversion is done by this
941 CHARACTER-CODE = CODE-POINT - MIN-CODE + LOWEST-CHAR-CODE
944 where, MIN-CODE is a value of #Mmin_code parameter of the charset,
945 and LOWEST-CHAR-CODE is the lowest character code of the assigned
948 /***ja @brief Áê³·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë
950 ¥·¥ó¥Ü¥ë #Minherit ¤Ï <tt>"inherit"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
951 mchar_define_charset () ¤ÇÁê³·¿¤Î¥á¥½¥Ã¥É¤ò»ØÄꤹ¤ë¾ì¹ç¤Î°ú¿ô¤È¤·
958 @brief Symbol for the subset type method of charset.
960 The symbol #Msubset has the name <tt>"subset"</tt> and, when used
961 as a value of #Mmethod parameter of a charset, it means that the
962 charset is a subset of a parent charset. The parent charset must
963 be given by #Mparents parameter. The conversion of code-points
964 and character codes of the charset is done conceptually by this
968 CHARACTER-CODE = PARENT-CODE (CODE-POINT) + SUBSET-OFFSET
971 where, PARENT-CODE is a pseudo function that returns a character
972 code of CODE-POINT in the parent charset, and SUBSET-OFFSET is a
973 value given by #Msubset_offset parameter. */
979 @brief Symbol for the superset type method of charset.
981 The symbol #Msuperset has the name <tt>"superset"</tt> and, when
982 used as a value of #Mmethod parameter of a charset, it means that
983 the charset is a superset of parent charsets. The parent charsets
984 must be given by #Mparents parameter. */
991 @brief Define a charset.
993 The mchar_define_charset () function defines a new charset and
994 makes it accessible via a symbol whose name is $NAME. $PLIST
995 specifies parameters of the charset as below:
999 <li> Key is #Mmethod, value is a symbol.
1001 The value specifies the method for decoding/encoding code-points
1002 in the charset. It must be #Moffset, #Mmap (default), #Munify,
1003 #Msubset, or #Msuperset.
1005 <li> Key is #Mdimension, value is an integer
1007 The value specifies the dimension of code-points of the charset.
1008 It must be 1 (default), 2, 3, or 4.
1010 <li> Key is #Mmin_range, value is an unsigned integer
1012 The value specifies the minimum range of a code-point, which means
1013 that the Nth byte of the value is the minimum Nth byte of
1014 code-points of the charset. The default value is 0.
1016 <li> Key is #Mmax_range, value is an unsigned integer
1018 The value specifies the maximum range of a code-point, which means
1019 that the Nth byte of the value is the maximum Nth byte of
1020 code-points of the charset. The default value is 0xFF, 0xFFFF,
1021 0xFFFFFF, or 0xFFFFFFFF if the dimension is 1, 2, 3, or 4
1024 <li> Key is #Mmin_code, value is an unsigned integer
1026 The value specifies the minimum code-point of
1027 the charset. The default value is the minimum range.
1029 <li> Key is #Mmax_code, value is an unsigned integer
1031 The value specifies the maximum code-point of
1032 the charset. The default value is the maximum range.
1034 <li> Key is #Mascii_compatible, value is a symbol
1036 The value specifies whether the charset is ASCII compatible or
1037 not. If the value is #Mnil (default), it is not ASCII
1038 compatible, else compatible.
1040 <li> Key is #Mfinal_byte, value is an integer
1042 The value specifies the @e final @e byte of the charset registered
1043 in The International Registry. It must be 0 (default) or 32..127.
1044 The value 0 means that the charset is not in the registry.
1046 <li> Key is #Mrevision, value is an integer
1048 The value specifies the @e revision @e number of the charset
1049 registered in The International Registry. it must be 0..127. If
1050 the charset is not in The International Registry, the value is
1051 ignored. The value 0 means that the charset has no revision
1054 <li> Key is #Mmin_char, value is an integer
1056 The value specifies the minimum character code of the charset.
1057 The default value is 0.
1059 <li> Key is #Mmapfile, value is an M-text
1061 If the method is #Mmap or #Munify, a data that contains
1062 mapping information is added to the m17n database by calling
1063 mdatabase_define () with the value as an argument $EXTRA_INFO,
1064 i.e. the value is used as a file name of the data.
1066 Otherwise, this parameter is ignored.
1068 <li> Key is #Mparents, value is a plist
1070 If the method is #Msubset, the value must is a plist of length
1071 1, and the value of the plist must be a symbol representing a
1074 If the method is #Msuperset, the value must be a plist of length
1075 less than 9, and the values of the plist must be symbols
1076 representing subset charsets.
1078 Otherwise, this parameter is ignored.
1080 <li> Key is #Mdefine_coding, value is a symbol
1082 If the dimension of the charset is 1, the value specifies whether
1083 or not to define a coding system of the same name whose method is
1086 Otherwise, this parameter is ignored.
1091 If the operation was successful, mchar_define_charset () returns a
1092 symbol whose name is $NAME. Otherwise it returns #Mnil and
1093 assigns an error code to the external variable #merror_code. */
1096 @brief ʸ»ú¥»¥Ã¥È¤òÄêµÁ¤¹¤ë.
1098 ´Ø¿ô mchar_define_charset () ¤Ï¿·¤·¤¤Ê¸»ú¥»¥Ã¥È¤òÄêµÁ¤·¡¢¤½¤ì¤ò
1099 $NAME ¤È¤¤¤¦Ì¾Á°¤Î¥·¥ó¥Ü¥ë·Ðͳ¤Ç¥¢¥¯¥»¥¹¤Ç¤¤ë¤è¤¦¤Ë¤¹¤ë¡£$METHOD
1100 ¤Ï¤½¤Îʸ»ú¥»¥Ã¥È¤Ë¤ª¤±¤ë¥³¡¼¥É¥Ý¥¤¥ó¥È¤Î¥Ç¥³¡¼¥É¡¿¥¨¥ó¥³¡¼¥É¥á¥½¥Ã
1101 ¥É¤ò»ØÄꤹ¤ë¥·¥ó¥Ü¥ë¤Ç¤¢¤ê¡¢#Moffset, #Mmap, #Munify,
1102 #Msubset, #Msuperset ¤Î¤¤¤º¤ì¤«¤ÎÃͤò¤È¤ë¡£
1104 $DIMENSION ¤Ï¤½¤Îʸ»ú¥»¥Ã¥È¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤Î¼¡¸µ¤Ç¤¢¤ê¡¢1, 2, 3,
1105 4¤Î¤¤¤º¤ì¤«¤ÎÃͤò¤È¤ë¡£
1107 $CODE_RANGE ¤ÏÂ礤µ¤¬8¥Ð¥¤¥È¤ÎÇÛÎó¤Ç¤¢¤ê¡¢ÄêµÁ¤µ¤ì¤ëʸ»ú¥»¥Ã¥È¤Î
1108 ¥³¡¼¥É¥Ý¥¤¥ó¥È¶õ´Ö¤òɽ¤ï¤¹¡£Âè1¥Ð¥¤¥È¤ÈÂè2¥Ð¥¤¥È¤ÎÃͤϥ³¡¼¥É¥Ý¥¤¥ó
1109 ¥È¤ÎºÇ½é¤Î¼¡¸µ¤Ç¤ÎºÇ¾®¡¿ºÇÂç¥Ð¥¤¥È¤ÎÃͤǤ¢¤ë¡£Âè3¥Ð¥¤¥È¤ÈÂè4¥Ð¥¤¥È
1110 ¤Ï¡¢2ÈÖÌܤμ¡¸µ¤ÎºÇ¾®¡¿ºÇÂçÃͤǤ¢¤ê¡¢ °Ê²¼Æ±Íͤ˳¤¯¡£°ìÈÌŪ¤Ë¡¢Âè
1111 (2N-1)¥Ð¥¤¥È¤ÈÂè(2N)¥Ð¥¤¥È¤¬NÈÖÌܤμ¡¸µ¤ÎºÇ¾®¡¿ºÇÂçÃͤȤʤë (N =
1112 1, 2, 3, 4)¡£¥³¡¼¥É¥Ý¥¤¥ó¥È¤Î @e ʸ»ú¥¤¥ó¥Ç¥Ã¥¯¥¹ ¤Ï¤³¤ì¤é¤ÎÃͤ«¤é
1115 $MIN_CODE ¤È $MAX_CODE ¤Ï¡¢¤½¤ì¤¾¤ì¤³¤Îʸ»ú¥»¥Ã¥È¤ÎºÇ¾®¤ª¤è¤ÓºÇÂç
1116 ¥³¡¼¥É¥Ý¥¤¥ó¥È¤òɽ¤ï¤¹¡£0¤¬»ØÄꤵ¤ì¤¿¾ì¹ç¤Ï $CODE_RANGE ¤ÎÃͤ«¤é·×
1119 $FINAL_BYTE ¤Ï The International Registry ¤ËÅÐÏ¿¤µ¤ì¤Æ¤¤¤ë
1120 @e ½ªÃ¼¥Ð¥¤¥È ¤Ç¤¢¤ë¡£ÅÐÏ¿¤µ¤ì¤Æ¤¤¤Ê¤¤ CCS ¤Î¾ì¹ç¤Ë¤Ï -1 ¤Ç¤Ê¤¯¤Æ¤Ï
1123 $REVISION ¤Ï¡¢The International Registry ¤ËÅÐÏ¿¤µ¤ì¤Æ¤¤¤ë@e revision
1124 @e number ¤Ç¤¢¤ë¡£¤â¤· revision number ¤¬Â¸ºß¤·¤Ê¤¤¤Ê¤é -1 ¤Ç¤Ê¤¯
1127 @par ¥á¥½¥Ã¥É¤¬ Moffset ¤Î¾ì¹ç
1129 $MIN_CHAR ¤Ë¤ÏºÇ¾®¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤ËÂбþ¤¹¤ëʸ»ú¥³¡¼¥É¤òÍ¿¤¨¤ë¡£
1130 $NPARENTS, $PARENTS, ¤ª¤è¤Ó $SUBSET_OFFSET ¤Ï̵»ë¤µ¤ì¤ë¡£
1132 @par ¥á¥½¥Ã¥É¤¬ Mmap ¤Î¾ì¹ç
1134 m17n ¸À¸ì¾ðÊó¥Ù¡¼¥¹Ãæ¤Ç \<#Mcharset, $NAME\> ¤È¤¤¤¦¥¿¥°¤ÎÉÕ¤¤¤¿
1135 ¥Þ¥Ã¥Ô¥ó¥°¥Æ¡¼¥Ö¥ë¤ò¡¢¥Ç¥³¡¼¥É¤ª¤è¤Ó¥¨¥ó¥³¡¼¥É¤ËÍѤ¤¤ë¡£$MIN_CHAR,
1136 $NPARENTS, $PARENTS, ¤ª¤è¤Ó $SUBSET_OFFSET ¤Ï̵»ë¤µ¤ì¤ë¡£
1138 @par ¥á¥½¥Ã¥É¤¬ Msubset ¤Î¾ì¹ç
1140 $NPARENTS ¤Ï1¤Ç¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£¤Þ¤¿ $PARENTS ¤Ï·Ñ¾µ¤Î¸µ¤È¤Ê¤ëʸ
1141 »ú¥»¥Ã¥È¤òɽ¤ï¤¹¥·¥ó¥Ü¥ë¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¡£¸µ¤Îʸ»ú¥»¥Ã¥È¤Î¥³¡¼¥É
1142 ¥Ý¥¤¥ó¥È¤Ë $SUBSET_OFFSET ¤ò²Ã¤¨¤¿¤â¤Î¤¬¡¢¿·¤·¤¤Ê¸»ú¥»¥Ã¥ÈÃæ¤Ç¤Î¥³¡¼
1143 ¥É¥Ý¥¤¥ó¥È¤Ë¤Ê¤ë¡£$MIN_CHAR ¤Ï̵»ë¤µ¤ì¤ë¡£
1145 @par ¥á¥½¥Ã¥É¤¬ Msuperset ¤Î¾ì¹ç
1147 $NPARENTS ¤Ï¿Æ¤È¤Ê¤ëʸ»ú¥»¥Ã¥È¤Î¿ô¡¢$PARENTS ¤Ï¿Æʸ»ú¥»¥Ã¥È¤Î¥·¥ó
1148 ¥Ü¥ë¤ÎÇÛÎó¤òɽ¤ï¤¹¡£$MIN_CHAR ¤ª¤è¤Ó $SUBSET_OFFSET ¤Ï̵»ë¤µ¤ì¤ë¡£
1151 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mchar_define_charset () ¤Ï $NAME ¤È¤¤¤¦Ì¾Á°¤Î¥·
1152 ¥ó¥Ü¥ë¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð #Mnil ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô @c
1153 merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1157 @c MERROR_CHARSET */
1160 mchar_define_charset (char *name, MPlist *plist)
1162 MSymbol sym = msymbol (name);
1165 unsigned min_range, max_range;
1167 MText *mapfile = (MText *) mplist_get (plist, Mmapfile);
1169 MSTRUCT_CALLOC (charset, MERROR_CHARSET);
1170 charset->name = sym;
1171 charset->method = (MSymbol) mplist_get (plist, Mmethod);
1172 if (! charset->method)
1175 charset->method = Mmap;
1177 charset->method = Moffset;
1179 if (charset->method == Mmap || charset->method == Munify)
1182 MERROR (MERROR_CHARSET, Mnil);
1183 mdatabase_define (Mcharset, sym, Mnil, Mnil, NULL, mapfile->data);
1185 if (! (charset->dimension = (int) mplist_get (plist, Mdimension)))
1186 charset->dimension = 1;
1188 min_range = (unsigned) mplist_get (plist, Mmin_range);
1189 if ((pl = mplist_find_by_key (plist, Mmax_range)))
1191 max_range = (unsigned) MPLIST_VAL (pl);
1192 if (max_range >= 0x1000000)
1193 charset->dimension = 4;
1194 else if (max_range >= 0x10000 && charset->dimension < 3)
1195 charset->dimension = 3;
1196 else if (max_range >= 0x100 && charset->dimension < 2)
1197 charset->dimension = 2;
1199 else if (charset->dimension == 1)
1201 else if (charset->dimension == 2)
1203 else if (charset->dimension == 3)
1204 max_range = 0xFFFFFF;
1206 max_range = 0xFFFFFFFF;
1208 memset (charset->code_range, 0, sizeof charset->code_range);
1209 for (i = 0; i < charset->dimension; i++, min_range >>= 8, max_range >>= 8)
1211 charset->code_range[i * 4] = min_range & 0xFF;
1212 charset->code_range[i * 4 + 1] = max_range & 0xFF;
1214 if ((charset->min_code = (int) mplist_get (plist, Mmin_code)) < min_range)
1215 charset->min_code = min_range;
1216 if ((charset->max_code = (int) mplist_get (plist, Mmax_code)) > max_range)
1217 charset->max_code = max_range;
1218 charset->ascii_compatible
1219 = (MSymbol) mplist_get (plist, Mascii_compatible) != Mnil;
1220 charset->final_byte = (int) mplist_get (plist, Mfinal_byte);
1221 charset->revision = (int) mplist_get (plist, Mrevision);
1222 charset->min_char = (int) mplist_get (plist, Mmin_char);
1223 pl = (MPlist *) mplist_get (plist, Mparents);
1224 charset->nparents = pl ? mplist_length (pl) : 0;
1225 if (charset->nparents > 8)
1226 charset->nparents = 8;
1227 for (i = 0; i < charset->nparents; i++, pl = MPLIST_NEXT (pl))
1229 MSymbol parent_name;
1231 if (MPLIST_KEY (pl) != Msymbol)
1232 MERROR (MERROR_CHARSET, Mnil);
1233 parent_name = MPLIST_SYMBOL (pl);
1234 if (! (charset->parents[i] = MCHARSET (parent_name)))
1235 MERROR (MERROR_CHARSET, Mnil);
1238 charset->subset_offset = (int) mplist_get (plist, Msubset_offset);
1240 msymbol_put (sym, Mcharset, charset);
1241 charset = make_charset (charset);
1244 msymbol_put (msymbol__canonicalize (sym), Mcharset, charset);
1246 for (pl = (MPlist *) mplist_get (plist, Maliases);
1247 pl && MPLIST_KEY (pl) == Msymbol;
1248 pl = MPLIST_NEXT (pl))
1250 MSymbol alias = MPLIST_SYMBOL (pl);
1252 msymbol_put (alias, Mcharset, charset);
1253 msymbol_put (msymbol__canonicalize (alias), Mcharset, charset);
1256 if (mplist_get (plist, Mdefine_coding)
1257 && charset->dimension == 1
1258 && charset->code_range[0] == 0 && charset->code_range[1] == 255)
1259 mconv__register_charset_coding (sym);
1266 @brief Resolve charset name.
1268 The mchar_resolve_charset () function returns $SYMBOL if it
1269 represents a charset. Otherwise, canonicalize $SYMBOL as to a
1270 charset name, and if the canonicalized name represents a charset,
1271 return it. Otherwise, return #Mnil. */
1274 mchar_resolve_charset (MSymbol symbol)
1276 MCharset *charset = (MCharset *) msymbol_get (symbol, Mcharset);
1280 symbol = msymbol__canonicalize (symbol);
1281 charset = (MCharset *) msymbol_get (symbol, Mcharset);
1284 return (charset ? charset->name : Mnil);
1290 @brief List symbols representing a charset.
1292 The mchar_list_charsets () function makes an array of symbols
1293 representing a charset, stores the pointer to the array in a place
1294 pointed to by $SYMBOLS, and returns the length of the array. */
1297 mchar_list_charset (MSymbol **symbols)
1301 MTABLE_MALLOC ((*symbols), charset_list.used, MERROR_CHARSET);
1302 for (i = 0; i < charset_list.used; i++)
1303 (*symbols)[i] = charset_list.charsets[i]->name;
1310 @brief Decode a code-point.
1312 The mchar_decode () function decodes code-point $CODE in the
1313 charset represented by the symbol $CHARSET_NAME to get a character
1317 If decoding was successful, mchar_decode () returns the decoded
1318 character code. Otherwise it returns -1. */
1321 @brief ¥³¡¼¥É¥Ý¥¤¥ó¥È¤ò¥Ç¥³¡¼¥É¤¹¤ë
1323 ´Ø¿ô mchar_decode () ¤Ï¡¢¥·¥ó¥Ü¥ë $CHARSET_NAME ¤Ç¼¨¤µ¤ì¤ëʸ»ú¥»¥Ã
1324 ¥ÈÆâ¤Î $CODE ¤È¤¤¤¦¥³¡¼¥É¥Ý¥¤¥ó¥È¤ò¥Ç¥³¡¼¥É¤·¤Æʸ»ú¥³¡¼¥É¤òÆÀ¤ë¡£
1327 ¥Ç¥³¡¼¥É¤¬À®¸ù¤¹¤ì¤Ð¡¢mchar_decode () ¤Ï¥Ç¥³¡¼¥É¤µ¤ì¤¿Ê¸»ú¥³¡¼¥É¤ò
1328 ÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£ */
1335 mchar_decode (MSymbol charset_name, unsigned code)
1337 MCharset *charset = MCHARSET (charset_name);
1340 return MCHAR_INVALID_CODE;
1341 return DECODE_CHAR (charset, code);
1347 @brief Encode a character code.
1349 The mchar_encode () function encodes character code $C to get a
1350 code-point in the charset represented by the symbol $CHARSET_NAME.
1353 If encoding was successful, mchar_encode () returns the encoded
1354 code-point. Otherwise it returns #MCHAR_INVALID_CODE. */
1357 @brief ʸ»ú¥³¡¼¥É¤ò¥¨¥ó¥³¡¼¥É¤¹¤ë
1359 ´Ø¿ô mchar_encode () ¤Ï¡¢Ê¸»ú¥³¡¼¥É $C ¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¥·¥ó¥Ü¥ë
1360 $CHARSET_NAME ¤Ç¼¨¤µ¤ì¤ëʸ»ú¥»¥Ã¥ÈÆâ¤Ë¤ª¤±¤ë¥³¡¼¥É¥Ý¥¤¥ó¥È¤òÆÀ¤ë¡£
1363 ¥¨¥ó¥³¡¼¥É¤¬À®¸ù¤¹¤ì¤Ð¡¢mchar_encode () ¤Ï¥¨¥ó¡¼¥É¤µ¤ì¤¿¥³¡¼¥É¥Ý¥¤
1364 ¥ó¥È¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð #MCHAR_INVALID_CODE ¤òÊÖ¤¹¡£ */
1371 mchar_encode (MSymbol charset_name, int c)
1373 MCharset *charset = MCHARSET (charset_name);
1376 return MCHAR_INVALID_CODE;
1377 return ENCODE_CHAR (charset, c);
1383 @brief Call a function for all the characters in a specified charset.
1385 The mcharset_map_chars () function calls $FUNC for all the
1386 characters in the charset named $CHARSET_NAME. A call is done for
1387 a chunk of consecutive characters rather than character by
1390 $FUNC receives three arguments: $FROM, $TO, and $ARG. $FROM and
1391 $TO specify the range of character codes in $CHARSET. $ARG is the
1395 If the operation was successful, mcharset_map_chars () returns 0.
1396 Otherwise, it returns -1 and assigns an error code to the external
1397 variable #merror_code. */
1400 @brief »ØÄꤷ¤¿Ê¸»ú¥»¥Ã¥È¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ËÂФ·¤Æ´Ø¿ô¤ò¸Æ¤Ö
1402 ´Ø¿ô mcharset_map_chars () ¤Ï $CHARSET_NAME ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Äʸ»ú¥»¥Ã
1403 ¥ÈÃæ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ËÂФ·¤Æ $FUNC ¤ò¸Æ¤Ö¡£¸Æ¤Ó½Ð¤·¤Ï°ìʸ»úËè¤Ç¤Ï¤Ê
1404 ¤¯¡¢Ï¢Â³¤·¤¿Ê¸»ú¤Î¤Þ¤È¤Þ¤êñ°Ì¤Ç¹Ô¤Ê¤ï¤ì¤ë¡£
1406 ´Ø¿ô $FUNC ¤Ë¤Ï$FROM, $TO, $ARG ¤Î£³°ú¿ô¤¬ÅϤµ¤ì¤ë¡£$FROM ¤È $TO
1407 ¤Ï $CHARSET Ãæ¤Îʸ»ú¥³¡¼¥É¤ÎÈϰϤò»ØÄꤹ¤ë¡£$ARG ¤Ï $FUNC_ARG ¤ÈƱ
1411 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mcharset_map_chars () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð
1412 -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1416 @c MERROR_CHARSET */
1419 mchar_map_charset (MSymbol charset_name,
1420 void (*func) (int from, int to, void *arg),
1425 charset = MCHARSET (charset_name);
1427 MERROR (MERROR_CHARSET, -1);
1429 if (charset->encoder)
1431 int c = charset->min_char;
1434 if ((int) mchartable__lookup (charset->encoder, c, &next_c, 1) < 0)
1436 while (c <= charset->max_char)
1438 if ((int) mchartable__lookup (charset->encoder, c, &next_c, 1) >= 0)
1439 (*func) (c, next_c - 1, func_arg);
1444 (*func) (charset->min_char, charset->max_char, func_arg);