1 /* charset.c -- charset module.
2 Copyright (C) 2003, 2004
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24 @addtogroup m17nCharset
25 @brief Charset objects and API for them.
27 The m17n library uses @e charset objects to represent a coded
28 character sets (CCS). The m17n library supports many predefined
29 coded character sets. Moreover, application programs can add
30 other charsets. A character can belong to multiple charsets.
32 The m17n library distinguishes the following three concepts:
34 @li A @e code-point is a number assigned by the CCS to each
35 character. Code-points may or may not be continuous. The type
36 @c unsigned is used to represent a code-point. An invalid
37 code-point is represented by the macro @c MCHAR_INVALID_CODE.
39 @li A @e character @e index is the canonical index of a character
40 in a CCS. The character that has the character index N occupies
41 the Nth position when all the characters in the current CCS are
42 sorted by their code-points. Character indices in a CCS are
43 continuous and start with 0.
45 @li A @e character @e code is the internal representation in the
46 m17n library of a character. A character code is a signed integer
49 Each charset object defines how characters are converted between
50 code-points and character codes. To @e encode means converting
51 code-points to character codes and to @e decode means converting
52 character codes to code-points. */
55 @addtogroup m17nCharset
56 @brief ʸ»ú¥»¥Ã¥È¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API
58 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢Éä¹æ²½Ê¸»ú½¸¹ç (CCS) ¤ò @e ʸ»ú¥»¥Ã¥È ¤È¸Æ¤Ö¥ª
59 ¥Ö¥¸¥§¥¯¥È¤Çɽ¸½¤¹¤ë¡£m17n ¥é¥¤¥Ö¥é¥ê¤Ï¿¤¯¤ÎÉä¹æ²½Ê¸»ú½¸¹ç¤òͽ¤á
60 ¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ë¤¬¡¢¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥à¤¬Æȼ«¤Ëʸ»ú¥»¥Ã¥È¤ò
61 Äɲ乤뤳¤È¤â²Äǽ¤Ç¤¢¤ë¡£°ì¤Ä¤Îʸ»ú¤ÏÊ£¿ô¤Îʸ»ú¥»¥Ã¥È¤Ë°¤·¤Æ¤â¤è
64 m17n ¥é¥¤¥Ö¥é¥ê¤Ë¤Ï¡¢°Ê²¼¤Î°Û¤Ê¤ë³µÇ°¤¬¤¢¤ë:
66 @li @e ¥³¡¼¥É¥Ý¥¤¥ó¥È ¤È¤Ï¡¢CCS ¤¬¤½¤ÎÃæ¤Î¸Ä¡¹¤Îʸ»ú¤ËÂФ·¤ÆÄêµÁ¤¹
67 ¤ë¿ôÃͤǤ¢¤ë¡£¥³¡¼¥É¥Ý¥¤¥ó¥È¤ÏϢ³¤·¤Æ¤¤¤ë¤È¤Ï¸Â¤é¤Ê¤¤¡£
69 @li @e ʸ»ú¥¤¥ó¥Ç¥Ã¥¯¥¹ ¤È¤Ï¡¢CCS Æâ¤Ç³Æʸ»ú¤Ë³ä¤êÅö¤Æ¤é¤ì¤ëÀµµ¬²½¤µ
70 ¤ì¤¿¥¤¥ó¥Ç¥Ã¥¯¥¹¤Ç¤¢¤ë¡£Ê¸»ú¥¤¥ó¥Ç¥Ã¥¯¥¹¤¬N¤Îʸ»ú¤Ï¡¢CCS Ãæ¤ÎÁ´Ê¸»ú¤ò
71 ¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ç¥½¡¼¥È¤·¤¿¤È¤¤ËNÈÖÌܤËÍè¤ë¡£
73 @li @e ʸ»ú¥³¡¼¥É¤È¤Ï¡¢m17n ¥é¥¤¥Ö¥é¥êÆâ¤Ë¤ª¤±¤ëʸ»ú¤ÎÆâÉôɽ¸½¤Ç¤¢
74 ¤ê¡¢21 ¥Ó¥Ã¥È°Ê¾å¤ÎŤµ¤ò»ý¤ÄÉä¹çÉÕ¤À°¿ô¤Ç¤¢¤ë¡£
76 ³Æʸ»ú¥»¥Ã¥È¥ª¥Ö¥¸¥§¥¯¥È¤Ï¡¢¤½¤ì¤Ë°¤¹¤ëʸ»ú¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤Èʸ»ú
77 ¥³¡¼¥É¤È¤ÎÁê¸ßÊÑ´¹¤òµ¬Äꤹ¤ë¡£¥³¡¼¥É¥Ý¥¤¥ó¥È¤«¤éʸ»ú¥³¡¼¥É¤Ø¤ÎÊÑ´¹
78 ¤ò @e ¥Ç¥³¡¼¥É ¤È¸Æ¤Ó¡¢Ê¸»ú¥³¡¼¥É¤«¤é¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ø¤ÎÊÑ´¹¤ò @e
79 ¥¨¥ó¥³¡¼¥É ¤È¸Æ¤Ö¡£ */
82 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
83 /*** @addtogroup m17nInternal
93 #include "m17n-misc.h"
101 static int unified_max;
103 /** List of all charsets ever defined. */
111 static struct MCharsetList charset_list;
113 static MPlist *charset_definition_list;
115 /** Make a charset object from the template of MCharset structure
116 CHARSET, and return a pointer to the new charset object.
117 CHARSET->code_range[4N + 2] and CHARSET->code_range[4N + 3] are
121 make_charset (MCharset *charset)
123 unsigned min_code, max_code;
125 int *range = charset->code_range;
127 if (charset->dimension < 1 || charset->dimension > 4)
128 MERROR (MERROR_CHARSET, NULL);
129 if ((charset->final_byte > 0 && charset->final_byte < '0')
130 || charset->final_byte > 127)
131 MERROR (MERROR_CHARSET, NULL);
133 for (i = 0, n = 1; i < 4; i++)
135 if (range[i * 4] > range[i * 4 + 1])
136 MERROR (MERROR_CHARSET, NULL);
137 range[i * 4 + 2] = range[i * 4 + 1] - range[i * 4] + 1;
138 n *= range[i * 4 + 2];
139 range[i * 4 + 3] = n;
142 min_code = range[0] | (range[4] << 8) | (range[8] << 16) | (range[12] << 24);
143 if (charset->min_code == 0)
144 charset->min_code = min_code;
145 else if (charset->min_code < min_code)
146 MERROR (MERROR_CHARSET, NULL);
147 max_code = range[1] | (range[5] << 8) | (range[9] << 16) | (range[13] << 24);
148 if (charset->max_code == 0)
149 charset->max_code = max_code;
150 else if (charset->max_code > max_code)
151 MERROR (MERROR_CHARSET, NULL);
153 charset->code_range_min_code = min_code;
154 charset->fully_loaded = 0;
157 if (charset->method == Msubset)
161 if (charset->nparents != 1)
162 MERROR (MERROR_CHARSET, NULL);
163 parent = charset->parents[0];
164 if (parent->method == Msuperset
165 || charset->min_code - charset->subset_offset < parent->min_code
166 || charset->max_code - charset->subset_offset > parent->max_code)
167 MERROR (MERROR_CHARSET, NULL);
169 else if (charset->method == Msuperset)
171 if (charset->nparents < 2)
172 MERROR (MERROR_CHARSET, NULL);
173 for (i = 0; i < charset->nparents; i++)
174 if (charset->min_code > charset->parents[i]->min_code
175 || charset->max_code < charset->parents[i]->max_code)
176 MERROR (MERROR_CHARSET, NULL);
181 = (charset->dimension == 1
183 && (charset->dimension == 2
185 && (charset->dimension == 3
186 || range[10] == 256)))));
188 if (! charset->no_code_gap)
192 memset (charset->code_range_mask, 0,
193 sizeof charset->code_range_mask);
194 for (i = 0; i < 4; i++)
195 for (j = range[i * 4]; j <= range[i * 4 + 1]; j++)
196 charset->code_range_mask[j] |= (1 << i);
199 if (charset->method == Moffset)
201 charset->max_char = charset->min_char + range[15] - 1;
202 if (charset->min_char < 0
203 || charset->max_char < 0 || charset->max_char > unified_max)
204 MERROR (MERROR_CHARSET, NULL);
205 charset->simple = charset->no_code_gap;
206 charset->fully_loaded = 1;
208 else if (charset->method == Munify)
210 /* The magic number 12 below is to align to the SUB_BITS_2
211 (defined in chartab.c) boundary in a char-table. */
212 unified_max -= ((range[15] >> 12) + 1) << 12;
213 charset->unified_max = unified_max;
215 else if (charset->method != Mmap)
216 MERROR (MERROR_CHARSET, NULL);
219 MLIST_APPEND1 (&charset_list, charsets, charset, MERROR_CHARSET);
221 if (charset->final_byte > 0)
223 MLIST_APPEND1 (&mcharset__iso_2022_table, charsets, charset,
225 if (charset->revision <= 0)
227 int chars = range[2];
229 if (chars == 128) /* ASCII case */
231 else if (chars == 256) /* ISO-8859-X case */
233 MCHARSET_ISO_2022 (charset->dimension, chars, charset->final_byte)
242 load_charset_fully (MCharset *charset)
244 if (charset->method == Msubset)
246 MCharset *parent = charset->parents[0];
248 if (! parent->fully_loaded
249 && load_charset_fully (parent) < 0)
250 MERROR (MERROR_CHARSET, -1);
251 if (parent->method == Moffset)
255 code = charset->min_code - charset->subset_offset;
256 charset->min_char = DECODE_CHAR (parent, code);
257 code = charset->max_code - charset->subset_offset;
258 charset->max_char = DECODE_CHAR (parent, code);
262 unsigned min_code = charset->min_code - charset->subset_offset;
263 unsigned max_code = charset->max_code - charset->subset_offset;
264 int min_char = DECODE_CHAR (parent, min_code);
265 int max_char = min_char;
267 for (++min_code; min_code <= max_code; min_code++)
269 int c = DECODE_CHAR (parent, min_code);
275 else if (c > max_char)
279 charset->min_char = min_char;
280 charset->max_char = max_char;
283 else if (charset->method == Msuperset)
285 int min_char = 0, max_char = 0;
288 for (i = 0; i < charset->nparents; i++)
290 MCharset *parent = charset->parents[i];
292 if (! parent->fully_loaded
293 && load_charset_fully (parent) < 0)
294 MERROR (MERROR_CHARSET, -1);
296 min_char = parent->min_char, max_char = parent->max_char;
297 else if (parent->min_char < min_char)
298 min_char = parent->min_char;
299 else if (parent->max_char > max_char)
300 max_char = parent->max_char;
302 charset->min_char = min_char;
303 charset->max_char = max_char;
305 else /* charset->method is Mmap or Munify */
307 MDatabase *mdb = mdatabase_find (Mcharset, charset->name, Mnil, Mnil);
310 if (! mdb || ! (plist = mdatabase_load (mdb)))
311 MERROR (MERROR_CHARSET, -1);
312 charset->decoder = mplist_value (plist);
313 charset->encoder = mplist_value (mplist_next (plist));
314 M17N_OBJECT_UNREF (plist);
315 mchartable_range (charset->encoder,
316 &charset->min_char, &charset->max_char);
317 if (charset->method == Mmap)
318 charset->simple = charset->no_code_gap;
320 charset->max_char = charset->unified_max + 1 + charset->code_range[15];
323 charset->fully_loaded = 1;
330 MPlist *mcharset__cache;
332 /* Predefined charsets. */
333 MCharset *mcharset__ascii;
334 MCharset *mcharset__binary;
335 MCharset *mcharset__m17n;
336 MCharset *mcharset__unicode;
338 MCharsetISO2022Table mcharset__iso_2022_table;
340 /** Initialize charset handler. */
347 unified_max = MCHAR_MAX;
349 mcharset__cache = mplist ();
350 mplist_set (mcharset__cache, Mt, NULL);
352 MLIST_INIT1 (&charset_list, charsets, 128);
353 MLIST_INIT1 (&mcharset__iso_2022_table, charsets, 128);
354 charset_definition_list = mplist ();
356 memset (mcharset__iso_2022_table.classified, 0,
357 sizeof (mcharset__iso_2022_table.classified));
359 Mcharset = msymbol ("charset");
361 Mmethod = msymbol ("method");
362 Moffset = msymbol ("offset");
363 Mmap = msymbol ("map");
364 Munify = msymbol ("unify");
365 Msubset = msymbol ("subset");
366 Msuperset = msymbol ("superset");
368 Mdimension = msymbol ("dimension");
369 Mmin_range = msymbol ("min-range");
370 Mmax_range = msymbol ("max-range");
371 Mmin_code = msymbol ("min-code");
372 Mmax_code = msymbol ("max-code");
373 Mascii_compatible = msymbol ("ascii-compatible");
374 Mfinal_byte = msymbol ("final-byte");
375 Mrevision = msymbol ("revision");
376 Mmin_char = msymbol ("min-char");
377 Mmapfile = msymbol_as_managing_key ("mapfile");
378 Mparents = msymbol_as_managing_key ("parents");
379 Msubset_offset = msymbol ("subset-offset");
380 Mdefine_coding = msymbol ("define-coding");
381 Maliases = msymbol_as_managing_key ("aliases");
385 /* Setup predefined charsets. */
386 pl = mplist_add (pl, Mmethod, Moffset);
387 pl = mplist_add (pl, Mmin_range, (void *) 0);
388 pl = mplist_add (pl, Mmax_range, (void *) 0x7F);
389 pl = mplist_add (pl, Mascii_compatible, Mt);
390 pl = mplist_add (pl, Mfinal_byte, (void *) 'B');
391 pl = mplist_add (pl, Mmin_char, (void *) 0);
392 Mcharset_ascii = mchar_define_charset ("ascii", param);
394 mplist_put (param, Mmax_range, (void *) 0xFF);
395 mplist_put (param, Mfinal_byte, NULL);
396 Mcharset_iso_8859_1 = mchar_define_charset ("iso-8859-1", param);
398 mplist_put (param, Mmax_range, (void *) 0x10FFFF);
399 Mcharset_unicode = mchar_define_charset ("unicode", param);
401 mplist_put (param, Mmax_range, (void *) MCHAR_MAX);
402 Mcharset_m17n = mchar_define_charset ("m17n", param);
404 mplist_put (param, Mmax_range, (void *) 0xFF);
405 Mcharset_binary = mchar_define_charset ("binary", param);
407 M17N_OBJECT_UNREF (param);
409 mcharset__ascii = MCHARSET (Mcharset_ascii);
410 mcharset__binary = MCHARSET (Mcharset_binary);
411 mcharset__m17n = MCHARSET (Mcharset_m17n);
412 mcharset__unicode = MCHARSET (Mcharset_unicode);
418 mcharset__fini (void)
423 for (i = 0; i < charset_list.used; i++)
425 MCharset *charset = charset_list.charsets[i];
427 if (charset->decoder)
428 free (charset->decoder);
429 if (charset->encoder)
430 M17N_OBJECT_UNREF (charset->encoder);
433 M17N_OBJECT_UNREF (mcharset__cache);
434 MLIST_FREE1 (&charset_list, charsets);
435 MLIST_FREE1 (&mcharset__iso_2022_table, charsets);
436 MPLIST_DO (plist, charset_definition_list)
437 M17N_OBJECT_UNREF (MPLIST_VAL (plist));
438 M17N_OBJECT_UNREF (charset_definition_list);
443 mcharset__find (MSymbol name)
447 charset = msymbol_get (name, Mcharset);
450 MPlist *param = mplist_get (charset_definition_list, name);
452 MPLIST_KEY (mcharset__cache) = Mt;
455 param = mplist__from_plist (param);
456 mchar_define_charset (MSYMBOL_NAME (name), param);
457 charset = msymbol_get (name, Mcharset);
458 M17N_OBJECT_UNREF (param);
460 MPLIST_KEY (mcharset__cache) = name;
461 MPLIST_VAL (mcharset__cache) = charset;
466 /** Return the character corresponding to code-point CODE in CHARSET.
467 If CODE is invalid for CHARSET, return -1. */
470 mcharset__decode_char (MCharset *charset, unsigned code)
474 if (code < 128 && charset->ascii_compatible)
476 if (code < charset->min_code || code > charset->max_code)
479 if (! charset->fully_loaded
480 && load_charset_fully (charset) < 0)
481 MERROR (MERROR_CHARSET, -1);
483 if (charset->method == Msubset)
485 MCharset *parent = charset->parents[0];
487 code -= charset->subset_offset;
488 return DECODE_CHAR (parent, code);
491 if (charset->method == Msuperset)
495 for (i = 0; i < charset->nparents; i++)
497 MCharset *parent = charset->parents[i];
498 int c = DECODE_CHAR (parent, code);
506 idx = CODE_POINT_TO_INDEX (charset, code);
510 if (charset->method == Mmap)
511 return charset->decoder[idx];
513 if (charset->method == Munify)
515 int c = charset->decoder[idx];
518 c = charset->unified_max + 1 + idx;
522 /* Now charset->method should be Moffset. */
523 return (charset->min_char + idx);
527 /** Return the code point of character C in CHARSET. If CHARSET does not
528 contain C, return MCHAR_INVALID_CODE. */
531 mcharset__encode_char (MCharset *charset, int c)
533 if (! charset->fully_loaded
534 && load_charset_fully (charset) < 0)
535 MERROR (MERROR_CHARSET, MCHAR_INVALID_CODE);
537 if (charset->method == Msubset)
539 MCharset *parent = charset->parents[0];
540 unsigned code = ENCODE_CHAR (parent, c);
542 if (code == MCHAR_INVALID_CODE)
544 code += charset->subset_offset;
545 if (code >= charset->min_code && code <= charset->max_code)
547 return MCHAR_INVALID_CODE;
550 if (charset->method == Msuperset)
554 for (i = 0; i < charset->nparents; i++)
556 MCharset *parent = charset->parents[i];
557 unsigned code = ENCODE_CHAR (parent, c);
559 if (code != MCHAR_INVALID_CODE)
562 return MCHAR_INVALID_CODE;
565 if (c < charset->min_char || c > charset->max_char)
566 return MCHAR_INVALID_CODE;
568 if (charset->method == Mmap)
569 return (unsigned) mchartable_lookup (charset->encoder, c);
571 if (charset->method == Munify)
573 if (c > charset->unified_max)
575 c -= charset->unified_max - 1;
576 return INDEX_TO_CODE_POINT (charset, c);
578 return (unsigned) mchartable_lookup (charset->encoder, c);
581 /* Now charset->method should be Moffset */
582 c -= charset->min_char;
583 return INDEX_TO_CODE_POINT (charset, c);
587 mcharset__load_from_database ()
589 MDatabase *mdb = mdatabase_find (msymbol ("charset-list"), Mnil, Mnil, Mnil);
590 MPlist *def_list, *plist;
591 MPlist *definitions = charset_definition_list;
592 int mdebug_mask = MDEBUG_CHARSET;
597 def_list = (MPlist *) mdatabase_load (mdb);
598 MDEBUG_PRINT_TIME ("CHARSET", (stderr, " to load data."));
604 MPLIST_DO (plist, def_list)
609 if (! MPLIST_PLIST_P (plist))
610 MERROR (MERROR_CHARSET, -1);
611 pl = MPLIST_PLIST (plist);
612 if (! MPLIST_SYMBOL_P (pl))
613 MERROR (MERROR_CHARSET, -1);
614 name = MPLIST_SYMBOL (pl);
615 pl = MPLIST_NEXT (pl);
616 definitions = mplist_add (definitions, name, pl);
617 M17N_OBJECT_REF (pl);
618 p = mplist__from_plist (pl);
619 mchar_define_charset (MSYMBOL_NAME (name), p);
620 M17N_OBJECT_UNREF (p);
621 if ((pl = mplist_find_by_value (pl, Mdefine_coding))
622 && (MSymbol) MPLIST_VAL (MPLIST_NEXT (pl)) == Mt)
623 mconv__register_charset_coding (name);
626 M17N_OBJECT_UNREF (def_list);
627 MDEBUG_PRINT_TIME ("CHARSET", (stderr, " to parse the loaded data."));
633 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
638 /*** @addtogroup m17nCharset */
644 @brief Invalid code-point.
646 The macro #MCHAR_INVALID_CODE gives the invalid code-point. */
649 @brief ̵¸ú¤Ê¥³¡¼¥É¥Ý¥¤¥ó¥È
651 ¥Þ¥¯¥í #MCHAR_INVALID_CODE ¤Ï̵¸ú¤Ê¥³¡¼¥É¥Ý¥¤¥ó¥È¤òÍ¿¤¨¤ë¡£ */
653 #define MCHAR_INVALID_CODE
657 @brief The symbol @c Mcharset.
659 Any decoded M-text has a text property whose key is the predefined
660 symbol @c Mcharset. The name of @c Mcharset is
661 <tt>"charset"</tt>. */
664 @brief ¥·¥ó¥Ü¥ë @c Mcharset
666 ¥Ç¥³¡¼¥É¤µ¤ì¤¿ M-text ¤Ï¡¢¥¡¼¤¬ @c Mcharset ¤Ç¤¢¤ë¤è¤¦¤Ê¥Æ¥¥¹¥È
667 ¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Ä¡£¥·¥ó¥Ü¥ë @c Mcharset ¤Ï <tt>"charset"</tt> ¤È¤¤
668 ¤¦Ì¾Á°¤Ç¤¢¤é¤«¤¸¤áÄêµÁ¤µ¤ì¤Æ¤¤¤ë¡£ */
674 @name Variables: Symbols representing a charset.
676 Each of the following symbols represents a predefined charset. */
679 @name ÊÑ¿ô: ʸ»ú¥»¥Ã¥È¤òɽ¤ï¤¹ÄêµÁºÑ¤ß¥·¥ó¥Ü¥ë
681 °Ê²¼¤Î³Æ¥·¥ó¥Ü¥ë¤Ï¡¢¥¡¼¤¬ @c Mcharset ¤Ç¤¢¤ê¡¢Ãͤ¬Âбþ¤¹¤ëʸ»ú¥»¥Ã
682 ¥È¥ª¥Ö¥¸¥§¥¯¥È¡Ê @c MCharset ·¿¡Ë¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¥·¥ó¥Ü¥ë¥×¥í¥Ñ
688 @brief Symbol representing the charset ASCII.
690 The symbol #Mcharset_ascii has name <tt>"ascii"</tt> and represents
691 the charset ISO 646, USA Version X3.4-1968 (ISO-IR-6). */
693 @brief ISO 646, USA Version ¤ËÂбþ¤¹¤ëʸ»ú¥»¥Ã¥È¤Î¥·¥ó¥Ü¥ë
695 ¥·¥ó¥Ü¥ë #Mcharset_ascii ¤Ï <tt>"ascii"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
696 ISO 646, USA Version X3.4-1968 (ISO-IR-6) ¤ËÂбþ¤¹¤ëʸ»ú¥»¥Ã¥È¤ò»Ø
697 Äꤹ¤ë¤¿¤á¤Ë»È¤ï¤ì¤ë¡£ */
699 MSymbol Mcharset_ascii;
703 @brief Symbol representing the charset ISO/IEC 8859/1.
705 The symbol #Mcharset_iso_8859_1 has name <tt>"iso-8859-1"</tt>
706 and represents the charset ISO/IEC 8859-1:1998. */
708 @brief ISO/IEC 8859-1:1998 ¤ËÂбþ¤¹¤ëʸ»ú¥»¥Ã¥È¤Î¥·¥ó¥Ü¥ë
710 ¥·¥ó¥Ü¥ë #Mcharset_iso_8859_1 ¤Ï <tt>"iso-8859-1"</tt> ¤È¤¤¤¦Ì¾
711 Á°¤ò»ý¤Á¡¢ISO/IEC 8859-1:1998 ¤ËÂбþ¤¹¤ëʸ»ú¥»¥Ã¥È¤ò»ØÄꤹ¤ë¤¿¤á¤Ë
714 MSymbol Mcharset_iso_8859_1;
717 @brief Symbol representing the charset Unicode.
719 The symbol #Mcharset_unicode has name <tt>"unicode"</tt> and
720 represents the charset Unicode. */
722 @brief Unicode ¤ËÂбþ¤¹¤ëʸ»ú¥»¥Ã¥È¤Î¥·¥ó¥Ü¥ë
724 ¥·¥ó¥Ü¥ë #Mcharset_unicode ¤Ï <tt>"unicode"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý
725 ¤Á¡¢Unicode ¤ËÂбþ¤¹¤ëʸ»ú¥»¥Ã¥È¤ò»ØÄꤹ¤ë¤¿¤á¤Ë»È¤ï¤ì¤ë¡£ */
727 MSymbol Mcharset_unicode;
731 @brief Symbol representing the largest charset.
733 The symbol #Mcharset_m17n has name <tt>"m17n"</tt> and
734 represents the charset that contains all characters supported by
737 @brief Á´Ê¸»ú¤ò´Þ¤àʸ»ú¥»¥Ã¥È¤Î¥·¥ó¥Ü¥ë
739 ¥·¥ó¥Ü¥ë #Mcharset_m17n ¤Ï <tt>"m17n"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
740 m17n ¥é¥¤¥Ö¥é¥ê¤¬°·¤¦Á´¤Æ¤Îʸ»ú¤ò´Þ¤àʸ»ú¥»¥Ã¥È¤ò»ØÄꤹ¤ë¤¿¤á¤Ë»È
743 MSymbol Mcharset_m17n;
747 @brief Symbol representing the charset for ill-decoded characters.
749 The symbol #Mcharset_binary has name <tt>"binary"</tt> and
750 represents the fake charset which the decoding functions put to an
751 M-text as a text property when they encounter an invalid byte
752 (sequence). See @ref m17nConv @latexonly
753 (P.\pageref{group__m17nConv}) @endlatexonly for more detail. */
755 MSymbol Mcharset_binary;
762 @name Variables: Parameter keys for mchar_define_charset ().
764 These are the predefined symbols to use as parameter keys for the
765 function mchar_define_charset () (which see). */
768 @name ÊÑ¿ô: mchar_define_charset ÍѤΥѥé¥á¡¼¥¿¡¦¥¡¼
770 ¤³¤ì¤é¤Ï¡¢´Ø¿ô mchar_define_charset () ÍѤΥѥé¥á¡¼¥¿¡¦¥¡¼¤È¤·¤Æ
771 »È¤ï¤ì¤ë¥·¥ó¥Ü¥ë¤Ç¤¢¤ë¡£ ¾Ü¤·¤¯¤Ï¤³¤Î´Ø¿ô¤Î²òÀâ¤ò»²¾È¤Î¤³¤È¡£*/
776 Parameter key for mchar_define_charset () (which see). */
784 MSymbol Mascii_compatible;
790 MSymbol Msubset_offset;
791 MSymbol Mdefine_coding;
798 @name Variables: Symbols representing charset methods.
800 These are the predefined symbols that can be a value of the
801 #Mmethod parameter of a charset used in an argument to the
802 mchar_define_charset () function.
804 A method specifies how code-points and character codes are
805 converted. See the documentation of the mchar_define_charset ()
806 function for the details. */
809 @name ÊÑ¿ô: ʸ»ú¥»¥Ã¥È¤Î¥á¥½¥Ã¥É»ØÄê¤Ë»È¤ï¤ì¤ë¥·¥ó¥Ü¥ë
811 ¤³¤ì¤é¤Ï¡¢Ê¸»ú¥»¥Ã¥È¤Î @e ¥á¥½¥Ã¥É ¤ò»ØÄꤹ¤ë¤¿¤á¤Î¥·¥ó¥Ü¥ë¤Ç¤¢¤ê¡¢
812 ´Ø¿ô mchar_define_charset () ¤Î°ú¿ô¤È¤·¤Æ»È¤ï¤ì¤ë¡£
814 ¥á¥½¥Ã¥É¤È¤Ï¡¢¥³¡¼¥É¥Ý¥¤¥ó¥È¤Èʸ»ú¥³¡¼¥É¤òÁê¸ßÊÑ´¹¤¹¤ëºÝ¤ÎÊý¼°¤Î¤³
815 ¤È¤Ç¤¢¤ë¡£¾Ü¤·¤¯¤Ï´Ø¿ô mchar_define_charset () ¤Î²òÀâ¤ò»²¾È¤Î¤³¤È¡£ */
819 @brief Symbol for the offset type method of charset.
821 The symbol #Moffset has the name <tt>"offset"</tt> and, when used
822 as a value of #Mmethod parameter of a charset, it means that the
823 conversion of code-points and character codes of the charset is
824 done by this calculation:
827 CHARACTER-CODE = CODE-POINT - MIN-CODE + MIN-CHAR
830 where, MIN-CODE is a value of #Mmin_code parameter of the charset,
831 and MIN-CHAR is a value of #Mmin_char parameter. */
834 @brief ¥ª¥Õ¥»¥Ã¥È·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë
836 ¥·¥ó¥Ü¥ë #Moffset ¤Ï <tt>"offset"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
837 mchar_define_charset () ¤Ç¥ª¥Õ¥»¥Ã¥È·¿¤Î¥á¥½¥Ã¥É¤ò»ØÄꤹ¤ë¾ì¹ç¤Î°ú
838 ¿ô¤È¤·¤ÆÍѤ¤¤é¤ì¤ë¡£*/
843 /***en @brief Symbol for the map type method of charset.
845 The symbol #Mmap has the name <tt>"map"</tt> and, when use as a
846 value of #Mmethod parameter of a charset, it means that the
847 conversion of code-points and character codes of the charset is
848 done by map looking up. The map must be given by #Mmapfile
851 /***ja @brief ¥Þ¥Ã¥×·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë
853 ¥·¥ó¥Ü¥ë #Mmap ¤Ï <tt>"map"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
854 mchar_define_charset () ¤Ç¥Þ¥Ã¥×·¿¤Î¥á¥½¥Ã¥É¤ò»ØÄꤹ¤ë¾ì¹ç¤Î°ú¿ô¤È
860 /***en @brief Symbol for the unify type method of charset.
862 The symbol #Munify has the name <tt>"unify"</tt> and, when used as
863 a value of #Mmethod parameter of a charset, it means that the
864 conversion of code-points and character codes of the charset is
865 done by map looking up and offsetting. The map must be given by
866 #Mmapfile parameter. For this kind of charset, a unique
867 consequent character code space for all characters is assigned.
868 If the map has an entry for a code-point, the conversion is done
869 by looking up the map. Otherwise, the conversion is done by this
873 CHARACTER-CODE = CODE-POINT - MIN-CODE + LOWEST-CHAR-CODE
876 where, MIN-CODE is a value of #Mmin_code parameter of the charset,
877 and LOWEST-CHAR-CODE is the lowest character code of the assigned
880 /***ja @brief Áê³·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë
882 ¥·¥ó¥Ü¥ë #Minherit ¤Ï <tt>"inherit"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
883 mchar_define_charset () ¤ÇÁê³·¿¤Î¥á¥½¥Ã¥É¤ò»ØÄꤹ¤ë¾ì¹ç¤Î°ú¿ô¤È¤·
890 @brief Symbol for the subset type method of charset.
892 The symbol #Msubset has the name <tt>"subset"</tt> and, when used
893 as a value of #Mmethod parameter of a charset, it means that the
894 charset is a subset of a parent charset. The parent charset must
895 be given by #Mparents parameter. The conversion of code-points
896 and character codes of the charset is done conceptually by this
900 CHARACTER-CODE = PARENT-CODE (CODE-POINT) + SUBSET-OFFSET
903 where, PARENT-CODE is a pseudo function that returns a character
904 code of CODE-POINT in the parent charset, and SUBSET-OFFSET is a
905 value given by #Msubset_offset parameter. */
911 @brief Symbol for the superset type method of charset.
913 The symbol #Msuperset has the name <tt>"superset"</tt> and, when
914 used as a value of #Mmethod parameter of a charset, it means that
915 the charset is a superset of parent charsets. The parent charsets
916 must be given by #Mparents parameter. */
923 @brief Define a charset.
925 The mchar_define_charset () function defines a new charset and
926 makes it accessible via a symbol whose name is $NAME. $PLIST
927 specifies parameters of the charset as below:
931 <li> Key is #Mmethod, value is a symbol.
933 The value specifies the method for decoding/encoding code-points
934 in the charset. It must be #Moffset, #Mmap (default), #Munify,
935 #Msubset, or #Msuperset.
937 <li> Key is #Mdimension, value is an integer
939 The value specifies the dimension of code-points of the charset.
940 It must be 1 (default), 2, 3, or 4.
942 <li> Key is #Mmin_range, value is an unsigned integer
944 The value specifies the minimum range of a code-point, which means
945 that the Nth byte of the value is the minimum Nth byte of
946 code-points of the charset. The default value is 0.
948 <li> Key is #Mmax_range, value is an unsigned integer
950 The value specifies the maximum range of a code-point, which means
951 that the Nth byte of the value is the maximum Nth byte of
952 code-points of the charset. The default value is 0xFF, 0xFFFF,
953 0xFFFFFF, or 0xFFFFFFFF if the dimension is 1, 2, 3, or 4
956 <li> Key is #Mmin_code, value is an unsigned integer
958 The value specifies the minimum code-point of
959 the charset. The default value is the minimum range.
961 <li> Key is #Mmax_code, value is an unsigned integer
963 The value specifies the maximum code-point of
964 the charset. The default value is the maximum range.
966 <li> Key is #Mascii_compatible, value is a symbol
968 The value specifies whether the charset is ASCII compatible or
969 not. If the value is #Mnil (default), it is not ASCII
970 compatible, else compatible.
972 <li> Key is #Mfinal_byte, value is an integer
974 The value specifies the @e final @e byte of the charset registered
975 in The International Registry. It must be 0 (default) or 32..127.
976 The value 0 means that the charset is not in the registry.
978 <li> Key is #Mrevision, value is an integer
980 The value specifies the @e revision @e number of the charset
981 registered in The International Registry. it must be 0..127. If
982 the charset is not in The International Registry, the value is
983 ignored. The value 0 means that the charset has no revision
986 <li> Key is #Mmin_char, value is an integer
988 The value specifies the minimum character code of the charset.
989 The default value is 0.
991 <li> Key is #Mmapfile, value is an M-text
993 If the method is #Mmap or #Munify, a data that contains
994 mapping information is added to the m17n database by calling
995 mdatabase_define () with the value as an argument $EXTRA_INFO,
996 i.e. the value is used as a file name of the data.
998 Otherwise, this parameter is ignored.
1000 <li> Key is #Mparents, value is a plist
1002 If the method is #Msubset, the value must is a plist of length
1003 1, and the value of the plist must be a symbol representing a
1006 If the method is #Msuperset, the value must be a plist of length
1007 less than 9, and the values of the plist must be symbols
1008 representing subset charsets.
1010 Otherwise, this parameter is ignored.
1012 <li> Key is #Mdefine_coding, value is a symbol
1014 If the dimension of the charset is 1, the value specifies whether
1015 or not to define a coding system of the same name whose method is
1018 Otherwise, this parameter is ignored.
1023 If the operation was successful, mchar_define_charset () returns a
1024 symbol whose name is $NAME. Otherwise it returns #Mnil and
1025 assigns an error code to the external variable #merror_code. */
1028 @brief ʸ»ú¥»¥Ã¥È¤òÄêµÁ¤¹¤ë.
1030 ´Ø¿ô mchar_define_charset () ¤Ï¿·¤·¤¤Ê¸»ú¥»¥Ã¥È¤òÄêµÁ¤·¡¢¤½¤ì¤ò
1031 $NAME ¤È¤¤¤¦Ì¾Á°¤Î¥·¥ó¥Ü¥ë·Ðͳ¤Ç¥¢¥¯¥»¥¹¤Ç¤¤ë¤è¤¦¤Ë¤¹¤ë¡£$METHOD
1032 ¤Ï¤½¤Îʸ»ú¥»¥Ã¥È¤Ë¤ª¤±¤ë¥³¡¼¥É¥Ý¥¤¥ó¥È¤Î¥Ç¥³¡¼¥É¡¿¥¨¥ó¥³¡¼¥É¥á¥½¥Ã
1033 ¥É¤ò»ØÄꤹ¤ë¥·¥ó¥Ü¥ë¤Ç¤¢¤ê¡¢#Moffset, #Mmap, #Munify,
1034 #Msubset, #Msuperset ¤Î¤¤¤º¤ì¤«¤ÎÃͤò¤È¤ë¡£
1036 $DIMENSION ¤Ï¤½¤Îʸ»ú¥»¥Ã¥È¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤Î¼¡¸µ¤Ç¤¢¤ê¡¢1, 2, 3,
1037 4¤Î¤¤¤º¤ì¤«¤ÎÃͤò¤È¤ë¡£
1039 $CODE_RANGE ¤ÏÂ礤µ¤¬8¥Ð¥¤¥È¤ÎÇÛÎó¤Ç¤¢¤ê¡¢ÄêµÁ¤µ¤ì¤ëʸ»ú¥»¥Ã¥È¤Î
1040 ¥³¡¼¥É¥Ý¥¤¥ó¥È¶õ´Ö¤òɽ¤ï¤¹¡£Âè1¥Ð¥¤¥È¤ÈÂè2¥Ð¥¤¥È¤ÎÃͤϥ³¡¼¥É¥Ý¥¤¥ó
1041 ¥È¤ÎºÇ½é¤Î¼¡¸µ¤Ç¤ÎºÇ¾®¡¿ºÇÂç¥Ð¥¤¥È¤ÎÃͤǤ¢¤ë¡£Âè3¥Ð¥¤¥È¤ÈÂè4¥Ð¥¤¥È
1042 ¤Ï¡¢2ÈÖÌܤμ¡¸µ¤ÎºÇ¾®¡¿ºÇÂçÃͤǤ¢¤ê¡¢ °Ê²¼Æ±Íͤ˳¤¯¡£°ìÈÌŪ¤Ë¡¢Âè
1043 (2N-1)¥Ð¥¤¥È¤ÈÂè(2N)¥Ð¥¤¥È¤¬NÈÖÌܤμ¡¸µ¤ÎºÇ¾®¡¿ºÇÂçÃͤȤʤë (N =
1044 1, 2, 3, 4)¡£¥³¡¼¥É¥Ý¥¤¥ó¥È¤Î @e ʸ»ú¥¤¥ó¥Ç¥Ã¥¯¥¹ ¤Ï¤³¤ì¤é¤ÎÃͤ«¤é
1047 $MIN_CODE ¤È $MAX_CODE ¤Ï¡¢¤½¤ì¤¾¤ì¤³¤Îʸ»ú¥»¥Ã¥È¤ÎºÇ¾®¤ª¤è¤ÓºÇÂç
1048 ¥³¡¼¥É¥Ý¥¤¥ó¥È¤òɽ¤ï¤¹¡£0¤¬»ØÄꤵ¤ì¤¿¾ì¹ç¤Ï $CODE_RANGE ¤ÎÃͤ«¤é·×
1051 $FINAL_BYTE ¤Ï The International Registry ¤ËÅÐÏ¿¤µ¤ì¤Æ¤¤¤ë
1052 @e ½ªÃ¼¥Ð¥¤¥È ¤Ç¤¢¤ë¡£ÅÐÏ¿¤µ¤ì¤Æ¤¤¤Ê¤¤ CCS ¤Î¾ì¹ç¤Ë¤Ï -1 ¤Ç¤Ê¤¯¤Æ¤Ï
1055 $REVISION ¤Ï¡¢The International Registry ¤ËÅÐÏ¿¤µ¤ì¤Æ¤¤¤ë@e revision
1056 @e number ¤Ç¤¢¤ë¡£¤â¤· revision number ¤¬Â¸ºß¤·¤Ê¤¤¤Ê¤é -1 ¤Ç¤Ê¤¯
1059 @par ¥á¥½¥Ã¥É¤¬ Moffset ¤Î¾ì¹ç
1061 $MIN_CHAR ¤Ë¤ÏºÇ¾®¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤ËÂбþ¤¹¤ëʸ»ú¥³¡¼¥É¤òÍ¿¤¨¤ë¡£
1062 $NPARENTS, $PARENTS, ¤ª¤è¤Ó $SUBSET_OFFSET ¤Ï̵»ë¤µ¤ì¤ë¡£
1064 @par ¥á¥½¥Ã¥É¤¬ Mmap ¤Î¾ì¹ç
1066 m17n ¸À¸ì¾ðÊó¥Ù¡¼¥¹Ãæ¤Ç \<#Mcharset, $NAME\> ¤È¤¤¤¦¥¿¥°¤ÎÉÕ¤¤¤¿
1067 ¥Þ¥Ã¥Ô¥ó¥°¥Æ¡¼¥Ö¥ë¤ò¡¢¥Ç¥³¡¼¥É¤ª¤è¤Ó¥¨¥ó¥³¡¼¥É¤ËÍѤ¤¤ë¡£$MIN_CHAR,
1068 $NPARENTS, $PARENTS, ¤ª¤è¤Ó $SUBSET_OFFSET ¤Ï̵»ë¤µ¤ì¤ë¡£
1070 @par ¥á¥½¥Ã¥É¤¬ Msubset ¤Î¾ì¹ç
1072 $NPARENTS ¤Ï1¤Ç¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£¤Þ¤¿ $PARENTS ¤Ï·Ñ¾µ¤Î¸µ¤È¤Ê¤ëʸ
1073 »ú¥»¥Ã¥È¤òɽ¤ï¤¹¥·¥ó¥Ü¥ë¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¡£¸µ¤Îʸ»ú¥»¥Ã¥È¤Î¥³¡¼¥É
1074 ¥Ý¥¤¥ó¥È¤Ë $SUBSET_OFFSET ¤ò²Ã¤¨¤¿¤â¤Î¤¬¡¢¿·¤·¤¤Ê¸»ú¥»¥Ã¥ÈÃæ¤Ç¤Î¥³¡¼
1075 ¥É¥Ý¥¤¥ó¥È¤Ë¤Ê¤ë¡£$MIN_CHAR ¤Ï̵»ë¤µ¤ì¤ë¡£
1077 @par ¥á¥½¥Ã¥É¤¬ Msuperset ¤Î¾ì¹ç
1079 $NPARENTS ¤Ï¿Æ¤È¤Ê¤ëʸ»ú¥»¥Ã¥È¤Î¿ô¡¢$PARENTS ¤Ï¿Æʸ»ú¥»¥Ã¥È¤Î¥·¥ó
1080 ¥Ü¥ë¤ÎÇÛÎó¤òɽ¤ï¤¹¡£$MIN_CHAR ¤ª¤è¤Ó $SUBSET_OFFSET ¤Ï̵»ë¤µ¤ì¤ë¡£
1083 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mchar_define_charset () ¤Ï $NAME ¤È¤¤¤¦Ì¾Á°¤Î¥·
1084 ¥ó¥Ü¥ë¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð #Mnil ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô @c
1085 merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1089 @c MERROR_CHARSET */
1092 mchar_define_charset (char *name, MPlist *plist)
1094 MSymbol sym = msymbol (name);
1097 unsigned min_range, max_range;
1099 MText *mapfile = (MText *) mplist_get (plist, Mmapfile);
1101 MSTRUCT_CALLOC (charset, MERROR_CHARSET);
1102 charset->name = sym;
1103 charset->method = (MSymbol) mplist_get (plist, Mmethod);
1104 if (! charset->method)
1107 charset->method = Mmap;
1109 charset->method = Moffset;
1111 if (charset->method == Mmap || charset->method == Munify)
1114 MERROR (MERROR_CHARSET, Mnil);
1115 mdatabase_define (Mcharset, sym, Mnil, Mnil, NULL, mapfile->data);
1117 if (! (charset->dimension = (int) mplist_get (plist, Mdimension)))
1118 charset->dimension = 1;
1120 min_range = (unsigned) mplist_get (plist, Mmin_range);
1121 if ((pl = mplist_find_by_key (plist, Mmax_range)))
1123 max_range = (unsigned) MPLIST_VAL (pl);
1124 if (max_range >= 0x1000000)
1125 charset->dimension = 4;
1126 else if (max_range >= 0x10000 && charset->dimension < 3)
1127 charset->dimension = 3;
1128 else if (max_range >= 0x100 && charset->dimension < 2)
1129 charset->dimension = 2;
1131 else if (charset->dimension == 1)
1133 else if (charset->dimension == 2)
1135 else if (charset->dimension == 3)
1136 max_range = 0xFFFFFF;
1138 max_range = 0xFFFFFFFF;
1140 memset (charset->code_range, 0, sizeof charset->code_range);
1141 for (i = 0; i < charset->dimension; i++, min_range >>= 8, max_range >>= 8)
1143 charset->code_range[i * 4] = min_range & 0xFF;
1144 charset->code_range[i * 4 + 1] = max_range & 0xFF;
1146 if ((charset->min_code = (int) mplist_get (plist, Mmin_code)) < min_range)
1147 charset->min_code = min_range;
1148 if ((charset->max_code = (int) mplist_get (plist, Mmax_code)) > max_range)
1149 charset->max_code = max_range;
1150 charset->ascii_compatible
1151 = (MSymbol) mplist_get (plist, Mascii_compatible) != Mnil;
1152 charset->final_byte = (int) mplist_get (plist, Mfinal_byte);
1153 charset->revision = (int) mplist_get (plist, Mrevision);
1154 charset->min_char = (int) mplist_get (plist, Mmin_char);
1155 pl = (MPlist *) mplist_get (plist, Mparents);
1156 charset->nparents = pl ? mplist_length (pl) : 0;
1157 if (charset->nparents > 8)
1158 charset->nparents = 8;
1159 for (i = 0; i < charset->nparents; i++, pl = MPLIST_NEXT (pl))
1161 MSymbol parent_name;
1163 if (MPLIST_KEY (pl) != Msymbol)
1164 MERROR (MERROR_CHARSET, Mnil);
1165 parent_name = MPLIST_SYMBOL (pl);
1166 if (! (charset->parents[i] = MCHARSET (parent_name)))
1167 MERROR (MERROR_CHARSET, Mnil);
1170 charset->subset_offset = (int) mplist_get (plist, Msubset_offset);
1172 msymbol_put (sym, Mcharset, charset);
1173 charset = make_charset (charset);
1176 msymbol_put (msymbol__canonicalize (sym), Mcharset, charset);
1178 for (pl = (MPlist *) mplist_get (plist, Maliases);
1179 pl && MPLIST_KEY (pl) == Msymbol;
1180 pl = MPLIST_NEXT (pl))
1182 MSymbol alias = MPLIST_SYMBOL (pl);
1184 msymbol_put (alias, Mcharset, charset);
1185 msymbol_put (msymbol__canonicalize (alias), Mcharset, charset);
1188 if (mplist_get (plist, Mdefine_coding)
1189 && charset->dimension == 1
1190 && charset->code_range[0] == 0 && charset->code_range[1] == 255)
1191 mconv__register_charset_coding (sym);
1198 @brief Resolve charset name.
1200 The mchar_resolve_charset () function returns $SYMBOL if it
1201 represents a charset. Otherwise, canonicalize $SYMBOL as to a
1202 charset name, and if the canonicalized name represents a charset,
1203 return it. Otherwise, return #Mnil. */
1206 mchar_resolve_charset (MSymbol symbol)
1208 MCharset *charset = (MCharset *) msymbol_get (symbol, Mcharset);
1212 symbol = msymbol__canonicalize (symbol);
1213 charset = (MCharset *) msymbol_get (symbol, Mcharset);
1216 return (charset ? charset->name : Mnil);
1222 @brief List symbols representing a charset.
1224 The mchar_list_charsets () function makes an array of symbols
1225 representing a charset, stores the pointer to the array in a place
1226 pointed to by $SYMBOLS, and returns the length of the array. */
1229 mchar_list_charset (MSymbol **symbols)
1233 MTABLE_MALLOC ((*symbols), charset_list.used, MERROR_CHARSET);
1234 for (i = 0; i < charset_list.used; i++)
1235 (*symbols)[i] = charset_list.charsets[i]->name;
1242 @brief Decode a code-point.
1244 The mchar_decode () function decodes code-point $CODE in the
1245 charset represented by the symbol $CHARSET_NAME to get a character
1249 If decoding was successful, mchar_decode () returns the decoded
1250 character code. Otherwise it returns -1. */
1253 @brief ¥³¡¼¥É¥Ý¥¤¥ó¥È¤ò¥Ç¥³¡¼¥É¤¹¤ë
1255 ´Ø¿ô mchar_decode () ¤Ï¡¢¥·¥ó¥Ü¥ë $CHARSET_NAME ¤Ç¼¨¤µ¤ì¤ëʸ»ú¥»¥Ã
1256 ¥ÈÆâ¤Î $CODE ¤È¤¤¤¦¥³¡¼¥É¥Ý¥¤¥ó¥È¤ò¥Ç¥³¡¼¥É¤·¤Æʸ»ú¥³¡¼¥É¤òÆÀ¤ë¡£
1259 ¥Ç¥³¡¼¥É¤¬À®¸ù¤¹¤ì¤Ð¡¢mchar_decode () ¤Ï¥Ç¥³¡¼¥É¤µ¤ì¤¿Ê¸»ú¥³¡¼¥É¤ò
1260 ÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£ */
1267 mchar_decode (MSymbol charset_name, unsigned code)
1269 MCharset *charset = MCHARSET (charset_name);
1272 return MCHAR_INVALID_CODE;
1273 return DECODE_CHAR (charset, code);
1279 @brief Encode a character code.
1281 The mchar_encode () function encodes character code $C to get a
1282 code-point in the charset represented by the symbol $CHARSET_NAME.
1285 If encoding was successful, mchar_encode () returns the encoded
1286 code-point. Otherwise it returns #MCHAR_INVALID_CODE. */
1289 @brief ʸ»ú¥³¡¼¥É¤ò¥¨¥ó¥³¡¼¥É¤¹¤ë
1291 ´Ø¿ô mchar_encode () ¤Ï¡¢Ê¸»ú¥³¡¼¥É $C ¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¥·¥ó¥Ü¥ë
1292 $CHARSET_NAME ¤Ç¼¨¤µ¤ì¤ëʸ»ú¥»¥Ã¥ÈÆâ¤Ë¤ª¤±¤ë¥³¡¼¥É¥Ý¥¤¥ó¥È¤òÆÀ¤ë¡£
1295 ¥¨¥ó¥³¡¼¥É¤¬À®¸ù¤¹¤ì¤Ð¡¢mchar_encode () ¤Ï¥¨¥ó¡¼¥É¤µ¤ì¤¿¥³¡¼¥É¥Ý¥¤
1296 ¥ó¥È¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð #MCHAR_INVALID_CODE ¤òÊÖ¤¹¡£ */
1303 mchar_encode (MSymbol charset_name, int c)
1305 MCharset *charset = MCHARSET (charset_name);
1308 return MCHAR_INVALID_CODE;
1309 return ENCODE_CHAR (charset, c);
1315 @brief Call a function for all the characters in a specified charset.
1317 The mcharset_map_chars () function calls $FUNC for all the
1318 characters in the charset named $CHARSET_NAME. A call is done for
1319 a chunk of consecutive characters rather than character by
1322 $FUNC receives three arguments: $FROM, $TO, and $ARG. $FROM and
1323 $TO specify the range of character codes in $CHARSET. $ARG is the
1327 If the operation was successful, mcharset_map_chars () returns 0.
1328 Otherwise, it returns -1 and assigns an error code to the external
1329 variable #merror_code. */
1332 @brief »ØÄꤷ¤¿Ê¸»ú¥»¥Ã¥È¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ËÂФ·¤Æ´Ø¿ô¤ò¸Æ¤Ö
1334 ´Ø¿ô mcharset_map_chars () ¤Ï $CHARSET_NAME ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Äʸ»ú¥»¥Ã
1335 ¥ÈÃæ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ËÂФ·¤Æ $FUNC ¤ò¸Æ¤Ö¡£¸Æ¤Ó½Ð¤·¤Ï°ìʸ»úËè¤Ç¤Ï¤Ê
1336 ¤¯¡¢Ï¢Â³¤·¤¿Ê¸»ú¤Î¤Þ¤È¤Þ¤êñ°Ì¤Ç¹Ô¤Ê¤ï¤ì¤ë¡£
1338 ´Ø¿ô $FUNC ¤Ë¤Ï$FROM, $TO, $ARG ¤Î£³°ú¿ô¤¬ÅϤµ¤ì¤ë¡£$FROM ¤È $TO
1339 ¤Ï $CHARSET Ãæ¤Îʸ»ú¥³¡¼¥É¤ÎÈϰϤò»ØÄꤹ¤ë¡£$ARG ¤Ï $FUNC_ARG ¤ÈƱ
1343 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mcharset_map_chars () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð
1344 -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1348 @c MERROR_CHARSET */
1351 mchar_map_charset (MSymbol charset_name,
1352 void (*func) (int from, int to, void *arg),
1357 charset = MCHARSET (charset_name);
1359 MERROR (MERROR_CHARSET, -1);
1361 if (charset->encoder)
1363 int c = charset->min_char;
1366 if ((int) mchartable__lookup (charset->encoder, c, &next_c, 1) < 0)
1368 while (c <= charset->max_char)
1370 if ((int) mchartable__lookup (charset->encoder, c, &next_c, 1) >= 0)
1371 (*func) (c, next_c - 1, func_arg);
1376 (*func) (charset->min_char, charset->max_char, func_arg);