1 /* charset.c -- charset module.
2 Copyright (C) 2003, 2004
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
23 @addtogroup m17nCharset
24 @brief Charset objects and API for them.
26 The m17n library uses @e charset objects to represent a coded
27 character sets (CCS). The m17n library supports many predefined
28 coded character sets. Moreover, application programs can add
29 other charsets. A character can belong to multiple charsets.
31 The m17n library distinguishes the following three concepts:
33 @li A @e code-point is a number assigned by the CCS to each
34 character. Code-points may or may not be continuous. The type
35 @c unsigned is used to represent a code-point. An invalid
36 code-point is represented by the macro @c MCHAR_INVALID_CODE.
38 @li A @e character @e index is the canonical index of a character
39 in a CCS. The character that has the character index N occupies
40 the Nth position when all the characters in the current CCS are
41 sorted by their code-points. Character indices in a CCS are
42 continuous and start with 0.
44 @li A @e character @e code is the internal representation in the
45 m17n library of a character. A character code is a signed integer
48 Each charset object defines how characters are converted between
49 code-points and character codes. To @e encode means converting
50 code-points to character codes and to @e decode means converting
51 character codes to code-points. */
54 @addtogroup m17nCharset
55 @brief ʸ»ú¥»¥Ã¥È¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API.
57 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢Éä¹æ²½Ê¸»ú½¸¹ç (CCS) ¤ò @e ʸ»ú¥»¥Ã¥È
58 ¤È¸Æ¤Ö¥ª¥Ö¥¸¥§¥¯¥È¤Çɽ¸½¤¹¤ë¡£
59 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¿¤¯¤ÎÉä¹æ²½Ê¸»ú½¸¹ç¤ò¤¢¤é¤«¤¸¤á¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ë¤·¡¢¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥à¤¬Æȼ«¤Ëʸ»ú¥»¥Ã¥È¤òÄɲ乤뤳¤È¤â²Äǽ¤Ç¤¢¤ë¡£
60 °ì¤Ä¤Îʸ»ú¤ÏÊ£¿ô¤Îʸ»ú¥»¥Ã¥È¤Ë°¤·¤Æ¤â¤è¤¤¡£
62 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢°Ê²¼¤Î³µÇ°¤ò¶èÊ̤·¤Æ¤¤¤ë:
64 @li @e ¥³¡¼¥É¥Ý¥¤¥ó¥È ¤È¤Ï¡¢CCS ¤¬¤½¤ÎÃæ¤Î¸Ä¡¹¤Îʸ»ú¤ËÂФ·¤ÆÄêµÁ¤¹¤ë¿ôÃͤǤ¢¤ë¡£
65 ¥³¡¼¥É¥Ý¥¤¥ó¥È¤ÏϢ³¤·¤Æ¤¤¤ë¤È¤Ï¸Â¤é¤Ê¤¤¡£¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ï
66 @c unsigned ·¿¤Ë¤è¤Ã¤Æɽ¤µ¤ì¤ë¡£Ìµ¸ú¤Ê¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ï¥Þ¥¯¥í
67 @c MCHAR_INVALID_CODE ¤Çɽ¤µ¤ì¤ë¡£
69 @li @e ʸ»ú¥¤¥ó¥Ç¥Ã¥¯¥¹ ¤È¤Ï¡¢CCS Æâ¤Ç³Æʸ»ú¤Ë³ä¤êÅö¤Æ¤é¤ì¤ëÀµµ¬²½¤µ¤ì¤¿¥¤¥ó¥Ç¥Ã¥¯¥¹¤Ç¤¢¤ë¡£
70 ʸ»ú¥¤¥ó¥Ç¥Ã¥¯¥¹¤¬ N ¤Îʸ»ú¤Ï¡¢CCS Ãæ¤ÎÁ´Ê¸»ú¤ò¥³¡¼¥É¥Ý¥¤¥ó¥È½ç¤Ëʤ٤¿¤È¤¤Ë N ÈÖÌܤ˸½¤ï¤ì¤ë¡£
71 CCS Ãæ¤Îʸ»ú¥¤¥ó¥Ç¥Ã¥¯¥¹¤ÏϢ³¤·¤Æ¤ª¤ê¡¢0 ¤«¤é»Ï¤Þ¤ë¡£
73 @li @e ʸ»ú¥³¡¼¥É ¤È¤Ï¡¢m17n ¥é¥¤¥Ö¥é¥êÆâ¤Ë¤ª¤±¤ëʸ»ú¤ÎÆâÉôɽ¸½¤Ç¤¢¤ê¡¢21 ¥Ó¥Ã¥È°Ê¾å¤ÎŤµ¤ò»ý¤ÄÉä¹çÉÕ¤À°¿ô¤Ç¤¢¤ë¡£
75 ³Æʸ»ú¥»¥Ã¥È¥ª¥Ö¥¸¥§¥¯¥È¤Ï¡¢¤½¤Îʸ»ú¥»¥Ã¥È¤Ë°¤¹¤ëʸ»ú¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤Èʸ»ú¥³¡¼¥É¤È¤Î´Ö¤ÎÊÑ´¹¤òµ¬Äꤹ¤ë¡£
76 ¥³¡¼¥É¥Ý¥¤¥ó¥È¤«¤éʸ»ú¥³¡¼¥É¤Ø¤ÎÊÑ´¹¤ò @e ¥Ç¥³¡¼¥É
77 ¤È¸Æ¤Ó¡¢Ê¸»ú¥³¡¼¥É¤«¤é¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ø¤ÎÊÑ´¹¤ò @e ¥¨¥ó¥³¡¼¥É ¤È¸Æ¤Ö¡£ */
80 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
81 /*** @addtogroup m17nInternal
91 #include "m17n-misc.h"
99 static int unified_max;
101 /** List of all charsets ever defined. */
109 static struct MCharsetList charset_list;
111 static MPlist *charset_definition_list;
113 /** Make a charset object from the template of MCharset structure
114 CHARSET, and return a pointer to the new charset object.
115 CHARSET->code_range[4N + 2] and CHARSET->code_range[4N + 3] are
119 make_charset (MCharset *charset)
121 unsigned min_code, max_code;
123 int *range = charset->code_range;
125 if (charset->dimension < 1 || charset->dimension > 4)
126 MERROR (MERROR_CHARSET, NULL);
127 if ((charset->final_byte > 0 && charset->final_byte < '0')
128 || charset->final_byte > 127)
129 MERROR (MERROR_CHARSET, NULL);
131 for (i = 0, n = 1; i < 4; i++)
133 if (range[i * 4] > range[i * 4 + 1])
134 MERROR (MERROR_CHARSET, NULL);
135 range[i * 4 + 2] = range[i * 4 + 1] - range[i * 4] + 1;
136 n *= range[i * 4 + 2];
137 range[i * 4 + 3] = n;
140 min_code = range[0] | (range[4] << 8) | (range[8] << 16) | (range[12] << 24);
141 if (charset->min_code == 0)
142 charset->min_code = min_code;
143 else if (charset->min_code < min_code)
144 MERROR (MERROR_CHARSET, NULL);
145 max_code = range[1] | (range[5] << 8) | (range[9] << 16) | (range[13] << 24);
146 if (charset->max_code == 0)
147 charset->max_code = max_code;
148 else if (charset->max_code > max_code)
149 MERROR (MERROR_CHARSET, NULL);
151 charset->code_range_min_code = min_code;
152 charset->fully_loaded = 0;
155 if (charset->method == Msubset)
159 if (charset->nparents != 1)
160 MERROR (MERROR_CHARSET, NULL);
161 parent = charset->parents[0];
162 if (parent->method == Msuperset
163 || charset->min_code - charset->subset_offset < parent->min_code
164 || charset->max_code - charset->subset_offset > parent->max_code)
165 MERROR (MERROR_CHARSET, NULL);
167 else if (charset->method == Msuperset)
169 if (charset->nparents < 2)
170 MERROR (MERROR_CHARSET, NULL);
171 for (i = 0; i < charset->nparents; i++)
172 if (charset->min_code > charset->parents[i]->min_code
173 || charset->max_code < charset->parents[i]->max_code)
174 MERROR (MERROR_CHARSET, NULL);
179 = (charset->dimension == 1
181 && (charset->dimension == 2
183 && (charset->dimension == 3
184 || range[10] == 256)))));
186 if (! charset->no_code_gap)
190 memset (charset->code_range_mask, 0,
191 sizeof charset->code_range_mask);
192 for (i = 0; i < 4; i++)
193 for (j = range[i * 4]; j <= range[i * 4 + 1]; j++)
194 charset->code_range_mask[j] |= (1 << i);
197 if (charset->method == Moffset)
199 charset->max_char = charset->min_char + range[15] - 1;
200 if (charset->min_char < 0
201 || charset->max_char < 0 || charset->max_char > unified_max)
202 MERROR (MERROR_CHARSET, NULL);
203 charset->simple = charset->no_code_gap;
204 charset->fully_loaded = 1;
206 else if (charset->method == Munify)
208 /* The magic number 12 below is to align to the SUB_BITS_2
209 (defined in chartab.c) boundary in a char-table. */
210 unified_max -= ((range[15] >> 12) + 1) << 12;
211 charset->unified_max = unified_max;
213 else if (charset->method != Mmap)
214 MERROR (MERROR_CHARSET, NULL);
217 MLIST_APPEND1 (&charset_list, charsets, charset, MERROR_CHARSET);
219 if (charset->final_byte > 0)
221 MLIST_APPEND1 (&mcharset__iso_2022_table, charsets, charset,
223 if (charset->revision <= 0)
225 int chars = range[2];
227 if (chars == 128) /* ASCII case */
229 else if (chars == 256) /* ISO-8859-X case */
231 MCHARSET_ISO_2022 (charset->dimension, chars, charset->final_byte)
240 load_charset_fully (MCharset *charset)
242 if (charset->method == Msubset)
244 MCharset *parent = charset->parents[0];
246 if (! parent->fully_loaded
247 && load_charset_fully (parent) < 0)
248 MERROR (MERROR_CHARSET, -1);
249 if (parent->method == Moffset)
253 code = charset->min_code - charset->subset_offset;
254 charset->min_char = DECODE_CHAR (parent, code);
255 code = charset->max_code - charset->subset_offset;
256 charset->max_char = DECODE_CHAR (parent, code);
260 unsigned min_code = charset->min_code - charset->subset_offset;
261 unsigned max_code = charset->max_code - charset->subset_offset;
262 int min_char = DECODE_CHAR (parent, min_code);
263 int max_char = min_char;
265 for (++min_code; min_code <= max_code; min_code++)
267 int c = DECODE_CHAR (parent, min_code);
273 else if (c > max_char)
277 charset->min_char = min_char;
278 charset->max_char = max_char;
281 else if (charset->method == Msuperset)
283 int min_char = 0, max_char = 0;
286 for (i = 0; i < charset->nparents; i++)
288 MCharset *parent = charset->parents[i];
290 if (! parent->fully_loaded
291 && load_charset_fully (parent) < 0)
292 MERROR (MERROR_CHARSET, -1);
294 min_char = parent->min_char, max_char = parent->max_char;
295 else if (parent->min_char < min_char)
296 min_char = parent->min_char;
297 else if (parent->max_char > max_char)
298 max_char = parent->max_char;
300 charset->min_char = min_char;
301 charset->max_char = max_char;
303 else /* charset->method is Mmap or Munify */
305 MDatabase *mdb = mdatabase_find (Mcharset, charset->name, Mnil, Mnil);
308 if (! mdb || ! (plist = mdatabase_load (mdb)))
309 MERROR (MERROR_CHARSET, -1);
310 charset->decoder = mplist_value (plist);
311 charset->encoder = mplist_value (mplist_next (plist));
312 M17N_OBJECT_UNREF (plist);
313 mchartable_range (charset->encoder,
314 &charset->min_char, &charset->max_char);
315 if (charset->method == Mmap)
316 charset->simple = charset->no_code_gap;
318 charset->max_char = charset->unified_max + 1 + charset->code_range[15];
321 charset->fully_loaded = 1;
328 MPlist *mcharset__cache;
330 /* Predefined charsets. */
331 MCharset *mcharset__ascii;
332 MCharset *mcharset__binary;
333 MCharset *mcharset__m17n;
334 MCharset *mcharset__unicode;
336 MCharsetISO2022Table mcharset__iso_2022_table;
338 /** Initialize charset handler. */
345 unified_max = MCHAR_MAX;
347 mcharset__cache = mplist ();
348 mplist_set (mcharset__cache, Mt, NULL);
350 MLIST_INIT1 (&charset_list, charsets, 128);
351 MLIST_INIT1 (&mcharset__iso_2022_table, charsets, 128);
352 charset_definition_list = mplist ();
354 memset (mcharset__iso_2022_table.classified, 0,
355 sizeof (mcharset__iso_2022_table.classified));
357 Mcharset = msymbol ("charset");
359 Mmethod = msymbol ("method");
360 Moffset = msymbol ("offset");
361 Mmap = msymbol ("map");
362 Munify = msymbol ("unify");
363 Msubset = msymbol ("subset");
364 Msuperset = msymbol ("superset");
366 Mdimension = msymbol ("dimension");
367 Mmin_range = msymbol ("min-range");
368 Mmax_range = msymbol ("max-range");
369 Mmin_code = msymbol ("min-code");
370 Mmax_code = msymbol ("max-code");
371 Mascii_compatible = msymbol ("ascii-compatible");
372 Mfinal_byte = msymbol ("final-byte");
373 Mrevision = msymbol ("revision");
374 Mmin_char = msymbol ("min-char");
375 Mmapfile = msymbol_as_managing_key ("mapfile");
376 Mparents = msymbol_as_managing_key ("parents");
377 Msubset_offset = msymbol ("subset-offset");
378 Mdefine_coding = msymbol ("define-coding");
379 Maliases = msymbol_as_managing_key ("aliases");
383 /* Setup predefined charsets. */
384 pl = mplist_add (pl, Mmethod, Moffset);
385 pl = mplist_add (pl, Mmin_range, (void *) 0);
386 pl = mplist_add (pl, Mmax_range, (void *) 0x7F);
387 pl = mplist_add (pl, Mascii_compatible, Mt);
388 pl = mplist_add (pl, Mfinal_byte, (void *) 'B');
389 pl = mplist_add (pl, Mmin_char, (void *) 0);
390 Mcharset_ascii = mchar_define_charset ("ascii", param);
392 mplist_put (param, Mmax_range, (void *) 0xFF);
393 mplist_put (param, Mfinal_byte, NULL);
394 Mcharset_iso_8859_1 = mchar_define_charset ("iso-8859-1", param);
396 mplist_put (param, Mmax_range, (void *) 0x10FFFF);
397 Mcharset_unicode = mchar_define_charset ("unicode", param);
399 mplist_put (param, Mmax_range, (void *) MCHAR_MAX);
400 Mcharset_m17n = mchar_define_charset ("m17n", param);
402 mplist_put (param, Mmax_range, (void *) 0xFF);
403 Mcharset_binary = mchar_define_charset ("binary", param);
405 M17N_OBJECT_UNREF (param);
407 mcharset__ascii = MCHARSET (Mcharset_ascii);
408 mcharset__binary = MCHARSET (Mcharset_binary);
409 mcharset__m17n = MCHARSET (Mcharset_m17n);
410 mcharset__unicode = MCHARSET (Mcharset_unicode);
416 mcharset__fini (void)
421 for (i = 0; i < charset_list.used; i++)
423 MCharset *charset = charset_list.charsets[i];
425 if (charset->decoder)
426 free (charset->decoder);
427 if (charset->encoder)
428 M17N_OBJECT_UNREF (charset->encoder);
431 M17N_OBJECT_UNREF (mcharset__cache);
432 MLIST_FREE1 (&charset_list, charsets);
433 MLIST_FREE1 (&mcharset__iso_2022_table, charsets);
434 MPLIST_DO (plist, charset_definition_list)
435 M17N_OBJECT_UNREF (MPLIST_VAL (plist));
436 M17N_OBJECT_UNREF (charset_definition_list);
441 mcharset__find (MSymbol name)
445 charset = msymbol_get (name, Mcharset);
448 MPlist *param = mplist_get (charset_definition_list, name);
450 MPLIST_KEY (mcharset__cache) = Mt;
453 param = mplist__from_plist (param);
454 mchar_define_charset (MSYMBOL_NAME (name), param);
455 charset = msymbol_get (name, Mcharset);
456 M17N_OBJECT_UNREF (param);
458 MPLIST_KEY (mcharset__cache) = name;
459 MPLIST_VAL (mcharset__cache) = charset;
464 /** Return the character corresponding to code-point CODE in CHARSET.
465 If CODE is invalid for CHARSET, return -1. */
468 mcharset__decode_char (MCharset *charset, unsigned code)
472 if (code < 128 && charset->ascii_compatible)
474 if (code < charset->min_code || code > charset->max_code)
477 if (! charset->fully_loaded
478 && load_charset_fully (charset) < 0)
479 MERROR (MERROR_CHARSET, -1);
481 if (charset->method == Msubset)
483 MCharset *parent = charset->parents[0];
485 code -= charset->subset_offset;
486 return DECODE_CHAR (parent, code);
489 if (charset->method == Msuperset)
493 for (i = 0; i < charset->nparents; i++)
495 MCharset *parent = charset->parents[i];
496 int c = DECODE_CHAR (parent, code);
504 idx = CODE_POINT_TO_INDEX (charset, code);
508 if (charset->method == Mmap)
509 return charset->decoder[idx];
511 if (charset->method == Munify)
513 int c = charset->decoder[idx];
516 c = charset->unified_max + 1 + idx;
520 /* Now charset->method should be Moffset. */
521 return (charset->min_char + idx);
525 /** Return the code point of character C in CHARSET. If CHARSET does not
526 contain C, return MCHAR_INVALID_CODE. */
529 mcharset__encode_char (MCharset *charset, int c)
531 if (! charset->fully_loaded
532 && load_charset_fully (charset) < 0)
533 MERROR (MERROR_CHARSET, MCHAR_INVALID_CODE);
535 if (charset->method == Msubset)
537 MCharset *parent = charset->parents[0];
538 unsigned code = ENCODE_CHAR (parent, c);
540 if (code == MCHAR_INVALID_CODE)
542 code += charset->subset_offset;
543 if (code >= charset->min_code && code <= charset->max_code)
545 return MCHAR_INVALID_CODE;
548 if (charset->method == Msuperset)
552 for (i = 0; i < charset->nparents; i++)
554 MCharset *parent = charset->parents[i];
555 unsigned code = ENCODE_CHAR (parent, c);
557 if (code != MCHAR_INVALID_CODE)
560 return MCHAR_INVALID_CODE;
563 if (c < charset->min_char || c > charset->max_char)
564 return MCHAR_INVALID_CODE;
566 if (charset->method == Mmap)
567 return (unsigned) mchartable_lookup (charset->encoder, c);
569 if (charset->method == Munify)
571 if (c > charset->unified_max)
573 c -= charset->unified_max - 1;
574 return INDEX_TO_CODE_POINT (charset, c);
576 return (unsigned) mchartable_lookup (charset->encoder, c);
579 /* Now charset->method should be Moffset */
580 c -= charset->min_char;
581 return INDEX_TO_CODE_POINT (charset, c);
585 mcharset__load_from_database ()
587 MDatabase *mdb = mdatabase_find (msymbol ("charset-list"), Mnil, Mnil, Mnil);
588 MPlist *def_list, *plist;
589 MPlist *definitions = charset_definition_list;
590 int mdebug_mask = MDEBUG_CHARSET;
595 def_list = (MPlist *) mdatabase_load (mdb);
596 MDEBUG_PRINT_TIME ("CHARSET", (stderr, " to load data."));
602 MPLIST_DO (plist, def_list)
607 if (! MPLIST_PLIST_P (plist))
608 MERROR (MERROR_CHARSET, -1);
609 pl = MPLIST_PLIST (plist);
610 if (! MPLIST_SYMBOL_P (pl))
611 MERROR (MERROR_CHARSET, -1);
612 name = MPLIST_SYMBOL (pl);
613 pl = MPLIST_NEXT (pl);
614 definitions = mplist_add (definitions, name, pl);
615 M17N_OBJECT_REF (pl);
616 p = mplist__from_plist (pl);
617 mchar_define_charset (MSYMBOL_NAME (name), p);
618 M17N_OBJECT_UNREF (p);
621 M17N_OBJECT_UNREF (def_list);
622 MDEBUG_PRINT_TIME ("CHARSET", (stderr, " to parse the loaded data."));
628 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
633 /*** @addtogroup m17nCharset */
639 @brief Invalid code-point.
641 The macro #MCHAR_INVALID_CODE gives the invalid code-point. */
644 @brief ̵¸ú¤Ê¥³¡¼¥É¥Ý¥¤¥ó¥È.
646 ¥Þ¥¯¥í #MCHAR_INVALID_CODE ¤Ï̵¸ú¤Ê¥³¡¼¥É¥Ý¥¤¥ó¥È¤ò¼¨¤¹¡£ */
648 #define MCHAR_INVALID_CODE
652 @brief The symbol @c Mcharset.
654 Any decoded M-text has a text property whose key is the predefined
655 symbol @c Mcharset. The name of @c Mcharset is
656 <tt>"charset"</tt>. */
659 @brief ¥·¥ó¥Ü¥ë @c Mcharset.
661 ¥Ç¥³¡¼¥É¤µ¤ì¤¿ M-text ¤Ï¡¢¥¡¼¤¬ @c Mcharset
662 ¤Ç¤¢¤ë¤è¤¦¤Ê¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Ä¡£
663 ¥·¥ó¥Ü¥ë @c Mcharset ¤Ï <tt>"charset"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¡£ */
669 @name Variables: Symbols representing a charset.
671 Each of the following symbols represents a predefined charset. */
674 @name ÊÑ¿ô: ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ëÄêµÁºÑ¤ß¥·¥ó¥Ü¥ë.
676 °Ê²¼¤Î³Æ¥·¥ó¥Ü¥ë¤Ï¡¢ÄêµÁºÑ¤ßʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¡£ */
681 @brief Symbol representing the charset ASCII.
683 The symbol #Mcharset_ascii has name <tt>"ascii"</tt> and represents
684 the charset ISO 646, USA Version X3.4-1968 (ISO-IR-6). */
686 @brief ASCII ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¥·¥ó¥Ü¥ë.
688 ¥·¥ó¥Ü¥ë #Mcharset_ascii ¤Ï <tt>"ascii"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
689 ISO 646, USA Version X3.4-1968 (ISO-IR-6) ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¡£
692 MSymbol Mcharset_ascii;
696 @brief Symbol representing the charset ISO/IEC 8859/1.
698 The symbol #Mcharset_iso_8859_1 has name <tt>"iso-8859-1"</tt>
699 and represents the charset ISO/IEC 8859-1:1998. */
701 @brief ISO/IEC 8859-1:1998 ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¥·¥ó¥Ü¥ë.
703 ¥·¥ó¥Ü¥ë #Mcharset_iso_8859_1 ¤Ï <tt>"iso-8859-1"</tt>
704 ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢ISO/IEC 8859-1:1998 ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¡£
707 MSymbol Mcharset_iso_8859_1;
710 @brief Symbol representing the charset Unicode.
712 The symbol #Mcharset_unicode has name <tt>"unicode"</tt> and
713 represents the charset Unicode. */
715 @brief Unicode ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¥·¥ó¥Ü¥ë.
717 ¥·¥ó¥Ü¥ë #Mcharset_unicode ¤Ï <tt>"unicode"</tt>
718 ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Unicode ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¡£ */
720 MSymbol Mcharset_unicode;
724 @brief Symbol representing the largest charset.
726 The symbol #Mcharset_m17n has name <tt>"m17n"</tt> and
727 represents the charset that contains all characters supported by
730 @brief Á´Ê¸»ú¤ò´Þ¤àʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¥·¥ó¥Ü¥ë.
732 ¥·¥ó¥Ü¥ë #Mcharset_m17n ¤Ï <tt>"m17n"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
733 m17n ¥é¥¤¥Ö¥é¥ê¤¬°·¤¦Á´¤Æ¤Îʸ»ú¤ò´Þ¤àʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¡£ */
735 MSymbol Mcharset_m17n;
739 @brief Symbol representing the charset for ill-decoded characters.
741 The symbol #Mcharset_binary has name <tt>"binary"</tt> and
742 represents the fake charset which the decoding functions put to an
743 M-text as a text property when they encounter an invalid byte
746 See @ref m17nConv for more details. */
749 @brief Àµ¤·¤¯¥Ç¥³¡¼¥É¤Ç¤¤Ê¤¤Ê¸»ú¤Îʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¥·¥ó¥Ü¥ë.
751 ¥·¥ó¥Ü¥ë #Mcharset_binary ¤Ï <tt>"binary"</tt>
752 ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢µ¶¤Î (fake) ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¡£
753 ¥Ç¥³¡¼¥É´Ø¿ô¤Ï¡¢M-text ¤Î¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤È¤·¤Æ¡¢Ìµ¸ú¤Ê¥Ð¥¤¥È¡Ê¥·¡¼¥¯¥¨¥ó¥¹¡Ë¤ËÁø¶ø¤·¤¿°ÌÃÖ¤òÉղ乤롣
755 ¾ÜºÙ¤Ï @ref m17nConv »²¾È¤Î¤³¤È¡£ */
757 MSymbol Mcharset_binary;
764 @name Variables: Parameter keys for mchar_define_charset ().
766 These are the predefined symbols to use as parameter keys for the
767 function mchar_define_charset () (which see). */
770 @name ÊÑ¿ô: mchar_define_charset ÍѤΥѥé¥á¡¼¥¿¡¦¥¡¼
772 ¤³¤ì¤é¤Ï¡¢´Ø¿ô mchar_define_charset () ÍѤΥѥé¥á¡¼¥¿¡¦¥¡¼¤È¤·¤Æ»È¤ï¤ì¤ë¥·¥ó¥Ü¥ë¤Ç¤¢¤ë¡£
773 ¾Ü¤·¤¯¤Ï¤³¤Î´Ø¿ô¤Î²òÀâ¤ò»²¾È¤Î¤³¤È¡£*/
778 Parameter key for mchar_define_charset () (which see). */
781 ´Ø¿ô mchar_define_charset () ÍѤΥѥé¥á¡¼¥¿¡¦¥¡¼.
782 ¾Ü¤·¤¯¤Ï¤³¤Î´Ø¿ô¤Î²òÀâ¤ò»²¾È¤Î¤³¤È¡£*/
790 MSymbol Mascii_compatible;
796 MSymbol Msubset_offset;
797 MSymbol Mdefine_coding;
804 @name Variables: Symbols representing charset methods.
806 These are the predefined symbols that can be a value of the
807 #Mmethod parameter of a charset used in an argument to the
808 mchar_define_charset () function.
810 A method specifies how code-points and character codes are
811 converted. See the documentation of the mchar_define_charset ()
812 function for the details. */
815 @name ÊÑ¿ô: ʸ»ú¥»¥Ã¥È¤Î¥á¥½¥Ã¥É»ØÄê¤Ë»È¤ï¤ì¤ë¥·¥ó¥Ü¥ë
817 ¤³¤ì¤é¤Ï¡¢Ê¸»ú¥»¥Ã¥È¤Î @e ¥á¥½¥Ã¥É ¤ò»ØÄꤹ¤ë¤¿¤á¤ÎÄêµÁºÑ¤ß¥·¥ó¥Ü¥ë¤Ç¤¢¤ê¡¢Ê¸»ú¥»¥Ã¥È¤Î
818 #Mmethod ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤȤʤ뤳¤È¤¬¤Ç¤¤ë¡£
819 ¤³¤ÎÃͤϴؿô mchar_define_charset () ¤Î°ú¿ô¤È¤·¤Æ»È¤ï¤ì¤ë¡£
821 ¥á¥½¥Ã¥É¤È¤Ï¡¢¥³¡¼¥É¥Ý¥¤¥ó¥È¤Èʸ»ú¥³¡¼¥É¤òÁê¸ßÊÑ´¹¤¹¤ëºÝ¤ÎÊý¼°¤Î¤³¤È¤Ç¤¢¤ë¡£
822 ¾Ü¤·¤¯¤Ï´Ø¿ô mchar_define_charset () ¤Î²òÀâ¤ò»²¾È¤Î¤³¤È¡£ */
826 @brief Symbol for the offset type method of charset.
828 The symbol #Moffset has the name <tt>"offset"</tt> and, when used
829 as a value of #Mmethod parameter of a charset, it means that the
830 conversion of code-points and character codes of the charset is
831 done by this calculation:
834 CHARACTER-CODE = CODE-POINT - MIN-CODE + MIN-CHAR
837 where, MIN-CODE is a value of #Mmin_code parameter of the charset,
838 and MIN-CHAR is a value of #Mmin_char parameter. */
841 @brief ¥ª¥Õ¥»¥Ã¥È·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë.
843 ¥·¥ó¥Ü¥ë #Moffset ¤Ï <tt>"offset"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú¥»¥Ã¥È¤Î
844 #Mmethod ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤¿¾ì¹ç¤Ë¤Ï¡¢¥³¡¼¥É¥Ý¥¤¥ó¥È¤Èʸ»ú¥»¥Ã¥È¤Îʸ»ú¥³¡¼¥É¤Î´Ö¤ÎÊÑ´¹¤¬°Ê²¼¤Î¼°¤Ë½¾¤Ã¤Æ¹Ô¤ï¤ì¤ë¤³¤È¤ò°ÕÌ£¤¹¤ë¡£
847 ʸ»ú¥³¡¼¥É = ¥³¡¼¥É¥Ý¥¤¥ó¥È - MIN-CODE + MIN-CHAR
850 ¤³¤³¤Ç¡¢MIN-CODE ¤Ïʸ»ú¥»¥Ã¥È¤Î #Mmin_code ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤǤ¢¤ê¡¢MIN-CHAR ¤Ï
851 #Mmin_char ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤǤ¢¤ë¡£ */
856 /***en @brief Symbol for the map type method of charset.
858 The symbol #Mmap has the name <tt>"map"</tt> and, when used as a
859 value of #Mmethod parameter of a charset, it means that the
860 conversion of code-points and character codes of the charset is
861 done by map looking up. The map must be given by #Mmapfile
864 /***ja @brief ¥Þ¥Ã¥×·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë.
866 ¥·¥ó¥Ü¥ë #Mmap ¤Ï <tt>"map"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú¥»¥Ã¥È¤Î
867 #Mmethod ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤¿¾ì¹ç¤Ë¤Ï¡¢¥³¡¼¥É¥Ý¥¤¥ó¥È¤Èʸ»ú¥»¥Ã¥È¤Îʸ»ú¥³¡¼¥É¤Î´Ö¤ÎÊÑ´¹¤¬¥Þ¥Ã¥×¤ò»²¾È¤¹¤ë¤³¤È¤Ë¤è¤Ã¤Æ¹Ô¤ï¤ì¤ë¤³¤È¤ò°ÕÌ£¤¹¤ë¡£
868 ¥Þ¥Ã¥×¤Ï #Mmapfile ¥Ñ¥é¥á¡¼¥¿¤È¤·¤ÆÍ¿¤¨¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£ */
873 /***en @brief Symbol for the unify type method of charset.
875 The symbol #Munify has the name <tt>"unify"</tt> and, when used as
876 a value of #Mmethod parameter of a charset, it means that the
877 conversion of code-points and character codes of the charset is
878 done by map looking up and offsetting. The map must be given by
879 #Mmapfile parameter. For this kind of charset, a unique
880 continuous character code space for all characters is assigned.
882 If the map has an entry for a code-point, the conversion is done
883 by looking up the map. Otherwise, the conversion is done by this
887 CHARACTER-CODE = CODE-POINT - MIN-CODE + LOWEST-CHAR-CODE
890 where, MIN-CODE is a value of #Mmin_code parameter of the charset,
891 and LOWEST-CHAR-CODE is the lowest character code of the assigned
894 /***ja @brief ¥æ¥Ë¥Õ¥¡¥¤·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë.
896 ¥·¥ó¥Ü¥ë #Minherit ¤Ï <tt>"unify"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú¥»¥Ã¥È¤Î
897 #Mmethod ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤¿¾ì¹ç¤Ë¤Ï¡¢¥³¡¼¥É¥Ý¥¤¥ó¥È¤Èʸ»ú¥»¥Ã¥È¤Îʸ»ú¥³¡¼¥É¤Î´Ö¤ÎÊÑ´¹¤¬¡¢¥Þ¥Ã¥×¤Î»²¾È¤È¥ª¥Õ¥»¥Ã¥È¤ÎÁȤ߹ç¤ï¤»¤Ë¤è¤Ã¤Æ¹Ô¤ï¤ì¤ë¤³¤È¤ò°ÕÌ£¤¹¤ë¡£
898 ¥Þ¥Ã¥×¤Ï #Mmapfile ¥Ñ¥é¥á¡¼¥¿¤È¤·¤ÆÍ¿¤¨¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£
899 ¤³¤Î¼ï¤Î³Æʸ»ú¥»¥Ã¥È¤Ë¤Ï¡¢Á´Ê¸»ú¤ËÂФ·¤ÆϢ³¤¹¤ë¥³¡¼¥É¥¹¥Ú¡¼¥¹¤¬¤½¤ì¤¾¤ì³ä¤êÅö¤Æ¤é¤ì¤ë¡£
901 ¥³¡¼¥É¥Ý¥¤¥ó¥È¤¬¥Þ¥Ã¥×¤Ë´Þ¤Þ¤ì¤Æ¤¤¤ì¤Ð¡¢ÊÑ´¹¤Ï¥Þ¥Ã¥×»²¾È¤Ë¤è¤Ã¤Æ¹Ô¤ï¤ì¤ë¡£
902 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢°Ê²¼¤Î¼°¤Ë½¾¤¦¡£
905 CHARACTER-CODE = CODE-POINT - MIN-CODE + LOWEST-CHAR-CODE
908 ¤³¤³¤Ç¡¢MIN-CODE ¤Ïʸ»ú¥»¥Ã¥È¤Î #Mmin_code ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤǤ¢¤ê¡¢
909 LOWEST-CHAR-CODE ¤Ï³ä¤êÅö¤Æ¤é¤ì¤¿¥³¡¼¥É¥¹¥Ú¡¼¥¹¤ÎºÇ¤â¾®¤µ¤¤Ê¸»ú¥³¡¼¥É¤Ç¤¢¤ë¡£
916 @brief Symbol for the subset type method of charset.
918 The symbol #Msubset has the name <tt>"subset"</tt> and, when used
919 as a value of #Mmethod parameter of a charset, it means that the
920 charset is a subset of a parent charset. The parent charset must
921 be given by #Mparents parameter. The conversion of code-points
922 and character codes of the charset is done conceptually by this
926 CHARACTER-CODE = PARENT-CODE (CODE-POINT) + SUBSET-OFFSET
929 where, PARENT-CODE is a pseudo function that returns a character
930 code of CODE-POINT in the parent charset, and SUBSET-OFFSET is a
931 value given by #Msubset_offset parameter. */
933 /***ja @brief ¥µ¥Ö¥»¥Ã¥È·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë.
935 ¥·¥ó¥Ü¥ë #Msubset ¤Ï <tt>"subset"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú¥»¥Ã¥È¤Î
936 #Mmethod ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤¿¾ì¹ç¤Ë¤Ï¡¢¤³¤Îʸ»ú¥»¥Ã¥È¤¬Ê̤Îʸ»ú¥»¥Ã¥È¡Ê¿Æʸ»ú¥»¥Ã¥È¡Ë¤ÎÉôʬ½¸¹ç¤Ç¤¢¤ë¤³¤È¤ò°ÕÌ£¤¹¤ë¡£
937 ¿Æʸ»ú¥»¥Ã¥È¤Ï #Mparents ¥Ñ¥é¥á¡¼¥¿¤Ë¤è¤Ã¤ÆÍ¿¤¨¤é¤ì¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
938 ¥³¡¼¥É¥Ý¥¤¥ó¥È¤Èʸ»ú¥»¥Ã¥È¤Îʸ»ú¥³¡¼¥É¤Î´Ö¤ÎÊÑ´¹¤Ï¡¢³µÇ°Åª¤Ë¤Ï°Ê²¼¤Î¼°¤Ë½¾¤¦¡£
941 CHARACTER-CODE = PARENT-CODE (CODE-POINT) + SUBSET-OFFSET
944 ¤³¤³¤Ç PARENT-CODE ¤Ï CODE-POINT
945 ¤Î¿Æʸ»ú¥»¥Ã¥ÈÃæ¤Ç¤Îʸ»ú¥³¡¼¥É¤òÊÖ¤¹µ¼´Ø¿ô¤Ç¤¢¤ê¡¢SUBSET-OFFSET ¤Ï
946 #Msubset_offset ¥Ñ¥é¥á¡¼¥¿¤ÇÍ¿¤¨¤é¤ì¤ëÃͤǤ¢¤ë¡£
953 @brief Symbol for the superset type method of charset.
955 The symbol #Msuperset has the name <tt>"superset"</tt> and, when
956 used as a value of #Mmethod parameter of a charset, it means that
957 the charset is a superset of parent charsets. The parent charsets
958 must be given by #Mparents parameter. */
961 @brief ¥¹¡¼¥Ñ¡¼¥»¥Ã¥È·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë.
963 ¥·¥ó¥Ü¥ë #Msuperset ¤Ï <tt>"superset"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú¥»¥Ã¥È¤Î
964 #Mmethod ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤¿¾ì¹ç¤Ë¤Ï¡¢¤³¤Îʸ»ú¥»¥Ã¥È¤¬Ê̤Îʸ»ú¥»¥Ã¥È¡Ê¿Æʸ»ú¥»¥Ã¥È¡Ë¤Î¾å°Ì½¸¹ç¤Ç¤¢¤ë¤³¤È¤ò°ÕÌ£¤¹¤ë¡£
965 ¿Æʸ»ú¥»¥Ã¥È¤Ï #Mparents ¥Ñ¥é¥á¡¼¥¿¤Ë¤è¤Ã¤ÆÍ¿¤¨¤é¤ì¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
973 @brief Define a charset.
975 The mchar_define_charset () function defines a new charset and
976 makes it accessible via a symbol whose name is $NAME. $PLIST
977 specifies parameters of the charset as below:
981 <li> Key is #Mmethod, value is a symbol.
983 The value specifies the method for decoding/encoding code-points
984 in the charset. It must be #Moffset, #Mmap (default), #Munify,
985 #Msubset, or #Msuperset.
987 <li> Key is #Mdimension, value is an integer
989 The value specifies the dimension of code-points of the charset.
990 It must be 1 (default), 2, 3, or 4.
992 <li> Key is #Mmin_range, value is an unsigned integer
994 The value specifies the minimum range of a code-point, which means
995 that the Nth byte of the value is the minimum Nth byte of
996 code-points of the charset. The default value is 0.
998 <li> Key is #Mmax_range, value is an unsigned integer
1000 The value specifies the maximum range of a code-point, which means
1001 that the Nth byte of the value is the maximum Nth byte of
1002 code-points of the charset. The default value is 0xFF, 0xFFFF,
1003 0xFFFFFF, or 0xFFFFFFFF if the dimension is 1, 2, 3, or 4
1006 <li> Key is #Mmin_code, value is an unsigned integer
1008 The value specifies the minimum code-point of
1009 the charset. The default value is the minimum range.
1011 <li> Key is #Mmax_code, value is an unsigned integer
1013 The value specifies the maximum code-point of
1014 the charset. The default value is the maximum range.
1016 <li> Key is #Mascii_compatible, value is a symbol
1018 The value specifies whether the charset is ASCII compatible or
1019 not. If the value is #Mnil (default), it is not ASCII
1020 compatible, else compatible.
1022 <li> Key is #Mfinal_byte, value is an integer
1024 The value specifies the @e final @e byte of the charset registered
1025 in The International Registry. It must be 0 (default) or 32..127.
1026 The value 0 means that the charset is not in the registry.
1028 <li> Key is #Mrevision, value is an integer
1030 The value specifies the @e revision @e number of the charset
1031 registered in The International Registry. It must be 0..127. If
1032 the charset is not in The International Registry, the value is
1033 ignored. The value 0 means that the charset has no revision
1036 <li> Key is #Mmin_char, value is an integer
1038 The value specifies the minimum character code of the charset.
1039 The default value is 0.
1041 <li> Key is #Mmapfile, value is an M-text
1043 If the method is #Mmap or #Munify, a data that contains
1044 mapping information is added to the m17n database by calling
1045 the function mdatabase_define () with the value as an argument $EXTRA_INFO,
1046 i.e. the value is used as a file name of the data.
1048 Otherwise, this parameter is ignored.
1050 <li> Key is #Mparents, value is a plist
1052 If the method is #Msubset, the value must is a plist of length
1053 1, and the value of the plist must be a symbol representing a
1056 If the method is #Msuperset, the value must be a plist of length
1057 less than 9, and the values of the plist must be symbols
1058 representing subset charsets.
1060 Otherwise, this parameter is ignored.
1062 <li> Key is #Mdefine_coding, value is a symbol
1064 If the dimension of the charset is 1, the value specifies whether
1065 or not to define a coding system of the same name whose type is
1066 #Mcharset. A coding system is defined if the value is not #Mnil.
1068 Otherwise, this parameter is ignored.
1073 If the operation was successful, mchar_define_charset () returns a
1074 symbol whose name is $NAME. Otherwise it returns #Mnil and
1075 assigns an error code to the external variable #merror_code. */
1078 @brief ʸ»ú¥»¥Ã¥È¤òÄêµÁ¤¹¤ë.
1080 ´Ø¿ô mchar_define_charset () ¤Ï¿·¤·¤¤Ê¸»ú¥»¥Ã¥È¤òÄêµÁ¤·¡¢¤½¤ì¤ò
1081 $NAME ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¥·¥ó¥Ü¥ë·Ðͳ¤Ç¥¢¥¯¥»¥¹¤Ç¤¤ë¤è¤¦¤Ë¤¹¤ë¡£
1082 $PLIST ¤ÏÄêµÁ¤µ¤ì¤ëʸ»ú¥»¥Ã¥È¤Î¥Ñ¥é¥á¡¼¥¿¤ò°Ê²¼¤Î¤è¤¦¤Ë»ØÄꤹ¤ë¡£
1086 <li> ¥¡¼¤¬ #Mmethod ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
1088 Ãͤϡ¢#Moffset, #Mmap (¥Ç¥Õ¥©¥ë¥ÈÃÍ), #Munify, #Msubset,
1089 #Msuperset ¤Î¤¤¤º¤ì¤«¤Ç¤¢¤ê¡¢Ê¸»ú¥»¥Ã¥È¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤ò¥Ç¥³¡¼¥É¡¿¥¨¥ó¥³¡¼¥É¤¹¤ëºÝ¤Î¥á¥½¥Ã¥É¤ò»ØÄꤹ¤ë¡£
1091 <li> ¥¡¼¤¬ #Mdimension ¤ÇÃͤ¬À°¿ôÃͤλþ
1093 Ãͤϡ¢1 (¥Ç¥Õ¥©¥ë¥ÈÃÍ), 2, 3, 4
1094 ¤Î¤¤¤º¤ì¤«¤Ç¤¢¤ê¡¢Ê¸»ú¥»¥Ã¥È¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤Î¼¡¸µ¤Ç¤¢¤ë¡£
1096 <li> ¥¡¼¤¬ #Mmin_range ¤ÇÃͤ¬ÈóÉéÀ°¿ôÃͤλþ
1098 Ãͤϥ³¡¼¥É¥Ý¥¤¥ó¥È¤ÎºÇ¾®¤ÎÃͤǤ¢¤ë¡£¤¹¤Ê¤ï¤Á¡¢¤³¤ÎÃͤΠN
1099 ÈÖÌܤΥХ¤¥È¤Ï¤³¤Îʸ»ú¥»¥Ã¥È¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤Î N ÈÖÌܤΥХ¤¥È¤ÎºÇ¾®¤Î¤â¤Î¤È¤Ê¤ë¡£
1102 <li> ¥¡¼¤¬ #Mmax_range ¤ÇÃͤ¬ÈóÉéÀ°¿ôÃͤλþ
1104 Ãͤϥ³¡¼¥É¥Ý¥¤¥ó¥È¤ÎºÇÂç¤ÎÃͤǤ¢¤ë¡£¤¹¤Ê¤ï¤Á¡¢¤³¤ÎÃͤΠN
1105 ÈÖÌܤΥХ¤¥È¤Ï¤³¤Îʸ»ú¥»¥Ã¥È¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤Î N ÈÖÌܤΥХ¤¥È¤ÎºÇÂç¤Î¤â¤Î¤È¤Ê¤ë¡£
1106 ¥Ç¥Õ¥©¥ë¥ÈÃͤϡ¢¥³¡¼¥É¥Ý¥¤¥ó¥È¤Î¼¡¸µ¤¬ 1, 2, 3, 4 ¤Î»þ¡¢¤½¤ì¤¾¤ì
1107 0xFF, 0xFFFF, 0xFFFFFF, 0xFFFFFFFF ¡£
1109 <li> ¥¡¼¤¬ #Mmin_code ¤ÇÃͤ¬ÈóÉéÀ°¿ôÃͤλþ
1111 ÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤ÎºÇ¾®¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ç¤¢¤ë¡£¥Ç¥Õ¥©¥ë¥ÈÃͤÏ
1114 <li> ¥¡¼¤¬ #Mmax_code ¤ÇÃͤ¬ÈóÉéÀ°¿ôÃͤλþ
1116 ÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤ÎºÇÂç¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ç¤¢¤ë¡£¥Ç¥Õ¥©¥ë¥ÈÃͤÏ
1119 <li> ¥¡¼¤¬ #Mascii_compatible ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
1121 ÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤¬ ASCII ¸ß´¹¤Ç¤¢¤ë¤«¤É¤¦¤«¤ò¼¨¤¹¡£¥Ç¥Õ¥©¥ë¥ÈÃͤÎ
1122 #Mnil ¤Ç¤¢¤ì¤Ð¸ß´¹¤Ç¤Ï¤Ê¤¯¡¢¤½¤ì°Ê³°¤Î¾ì¹ç¤Ï¸ß´¹¤Ç¤¢¤ë¡£
1124 <li> ¥¡¼¤¬ #Mfinal_byte ¤ÇÃͤ¬À°¿ôÃͤλþ
1126 ÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤Î The International Registry ¤ËÅÐÏ¿¤µ¤ì¤Æ¤¤¤ë
1127 @e ½ªÃ¼¥Ð¥¤¥È ¤Ç¤¢¤ê¡¢0 (¥Ç¥Õ¥©¥ë¥ÈÃÍ) ¤Ç¤¢¤ë¤« 32..127 ¤Ç¤¢¤ë¡£0
1128 ¤ÏÅÐÏ¿¤µ¤ì¤Æ¤¤¤Ê¤¤¤³¤È¤ò°ÕÌ£¤¹¤ë¡£
1130 <li> ¥¡¼¤¬ #Mrevision ¤ÇÃͤ¬À°¿ôÃͤλþ
1132 ÃÍ¤Ï The International Registry ¤ËÅÐÏ¿¤µ¤ì¤Æ¤¤¤ë @e revision @e
1133 number ¤Ç¤¢¤ê¡¢0..127 ¤Ç¤¢¤ë¡£
1134 ʸ»ú¥»¥Ã¥È¤¬ÅÐÏ¿¤µ¤ì¤Æ¤¤¤Ê¤¤¾ì¹ç¤Ë¤Ï¤³¤ÎÃͤÏ̵»ë¤µ¤ì¤ë¡£
1135 0 ¤Ï revision number ¤¬Â¸ºß¤·¤Ê¤¤¤³¤È¤ò°ÕÌ£¤¹¤ë¡£
1137 <li> ¥¡¼¤¬ #Mmin_char ¤ÇÃͤ¬À°¿ôÃͤλþ
1139 ÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤ÎºÇ¾®¤Îʸ»ú¥³¡¼¥É¤Ç¤¢¤ë¡£¥Ç¥Õ¥©¥ë¥ÈÃÍ¤Ï 0 ¡£
1141 <li> ¥¡¼¤¬ #Mmapfile ¤ÇÃͤ¬ M-text ¤Î»þ
1143 ¥á¥½¥Ã¥É¤¬ #Mmap ¤« #Munify ¤Î»þ¡¢´Ø¿ô mdatabase_define ()
1144 ¤ò¤³¤ÎÃͤò°ú¿ô $EXTRA_INFO ¤È¤·¤Æ¸Æ¤Ö¤³¤È¤Ë¤è¤Ã¤Æ¡¢¥Þ¥Ã¥Ô¥ó¥°¤Ë´Ø¤¹¤ë¥Ç¡¼¥¿¤¬
1145 m17n ¥Ç¡¼¥¿¥Ù¡¼¥¹¤ËÄɲ䵤ì¤ë¡£
1146 ¤¹¤Ê¤ï¤Á¡¢¤³¤ÎÃͤϥǡ¼¥¿¥Õ¥¡¥¤¥ë¤Î̾Á°¤Ç¤¢¤ë¡£
1148 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢¤³¤Î¥Ñ¥é¥á¡¼¥¿¤Ï̵»ë¤µ¤ì¤ë¡£
1150 <li> ¥¡¼¤¬ #Mparents ¤ÇÃͤ¬ plist ¤Î»þ
1152 ¥á¥½¥Ã¥É¤¬ #Msubset ¤Ê¤é¤Ð¡¢ÃͤÏŤµ 1 ¤Î plist
1153 ¤Ç¤¢¤ê¡¢¤½¤ÎÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤Î¾å°Ì½¸¹ç¤È¤Ê¤ëʸ»ú¥»¥Ã¥È¤ò¼¨¤¹¥·¥ó¥Ü¥ë¤Ç¤¢¤ë¡£
1155 ¥á¥½¥Ã¥É¤¬ #Msuperset ¤Ê¤é¤Ð¡¢ÃͤÏŤµ 8 °Ê²¼¤Î plist
1156 ¤Ç¤¢¤ê¡¢¤½¤ì¤é¤ÎÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤Î²¼°Ì½¸¹ç¤Ç¤¢¤ëʸ»ú¥»¥Ã¥È¤ò¼¨¤¹¥·¥ó¥Ü¥ë¤Ç¤¢¤ë¡£
1158 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢¤³¤Î¥Ñ¥é¥á¡¼¥¿¤Ï̵»ë¤µ¤ì¤ë¡£
1160 <li> ¥¡¼¤¬ #Mdefine_coding ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
1162 ʸ»ú¥»¥Ã¥È¤Î¼¡¸µ¤¬ 1 ¤Ê¤é¤Ð¡¢Ãͤ¬ #Mnil °Ê³°¤Î¾ì¹ç¤Ë #Mcharset ·¿
1163 ¤ÇƱ¤¸Ì¾Á°¤ò»ý¤Ä¥³¡¼¥É·Ï¤òÄêµÁ¤¹¤ë¡£
1165 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢¤³¤Î¥Ñ¥é¥á¡¼¥¿¤Ï̵»ë¤µ¤ì¤ë¡£
1170 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mchar_define_charset() ¤Ï $NAME
1171 ¤È¤¤¤¦Ì¾Á°¤Î¥·¥ó¥Ü¥ë¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð #Mnil ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1172 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£*/
1176 @c MERROR_CHARSET */
1179 mchar_define_charset (const char *name, MPlist *plist)
1181 MSymbol sym = msymbol (name);
1184 unsigned min_range, max_range;
1186 MText *mapfile = (MText *) mplist_get (plist, Mmapfile);
1188 MSTRUCT_CALLOC (charset, MERROR_CHARSET);
1189 charset->name = sym;
1190 charset->method = (MSymbol) mplist_get (plist, Mmethod);
1191 if (! charset->method)
1194 charset->method = Mmap;
1196 charset->method = Moffset;
1198 if (charset->method == Mmap || charset->method == Munify)
1201 MERROR (MERROR_CHARSET, Mnil);
1202 mdatabase_define (Mcharset, sym, Mnil, Mnil, NULL, mapfile->data);
1204 if (! (charset->dimension = (int) mplist_get (plist, Mdimension)))
1205 charset->dimension = 1;
1207 min_range = (unsigned) mplist_get (plist, Mmin_range);
1208 if ((pl = mplist_find_by_key (plist, Mmax_range)))
1210 max_range = (unsigned) MPLIST_VAL (pl);
1211 if (max_range >= 0x1000000)
1212 charset->dimension = 4;
1213 else if (max_range >= 0x10000 && charset->dimension < 3)
1214 charset->dimension = 3;
1215 else if (max_range >= 0x100 && charset->dimension < 2)
1216 charset->dimension = 2;
1218 else if (charset->dimension == 1)
1220 else if (charset->dimension == 2)
1222 else if (charset->dimension == 3)
1223 max_range = 0xFFFFFF;
1225 max_range = 0xFFFFFFFF;
1227 memset (charset->code_range, 0, sizeof charset->code_range);
1228 for (i = 0; i < charset->dimension; i++, min_range >>= 8, max_range >>= 8)
1230 charset->code_range[i * 4] = min_range & 0xFF;
1231 charset->code_range[i * 4 + 1] = max_range & 0xFF;
1233 if ((charset->min_code = (int) mplist_get (plist, Mmin_code)) < min_range)
1234 charset->min_code = min_range;
1235 if ((charset->max_code = (int) mplist_get (plist, Mmax_code)) > max_range)
1236 charset->max_code = max_range;
1237 charset->ascii_compatible
1238 = (MSymbol) mplist_get (plist, Mascii_compatible) != Mnil;
1239 charset->final_byte = (int) mplist_get (plist, Mfinal_byte);
1240 charset->revision = (int) mplist_get (plist, Mrevision);
1241 charset->min_char = (int) mplist_get (plist, Mmin_char);
1242 pl = (MPlist *) mplist_get (plist, Mparents);
1243 charset->nparents = pl ? mplist_length (pl) : 0;
1244 if (charset->nparents > 8)
1245 charset->nparents = 8;
1246 for (i = 0; i < charset->nparents; i++, pl = MPLIST_NEXT (pl))
1248 MSymbol parent_name;
1250 if (MPLIST_KEY (pl) != Msymbol)
1251 MERROR (MERROR_CHARSET, Mnil);
1252 parent_name = MPLIST_SYMBOL (pl);
1253 if (! (charset->parents[i] = MCHARSET (parent_name)))
1254 MERROR (MERROR_CHARSET, Mnil);
1257 charset->subset_offset = (int) mplist_get (plist, Msubset_offset);
1259 msymbol_put (sym, Mcharset, charset);
1260 charset = make_charset (charset);
1263 msymbol_put (msymbol__canonicalize (sym), Mcharset, charset);
1265 for (pl = (MPlist *) mplist_get (plist, Maliases);
1266 pl && MPLIST_KEY (pl) == Msymbol;
1267 pl = MPLIST_NEXT (pl))
1269 MSymbol alias = MPLIST_SYMBOL (pl);
1271 msymbol_put (alias, Mcharset, charset);
1272 msymbol_put (msymbol__canonicalize (alias), Mcharset, charset);
1275 if (mplist_get (plist, Mdefine_coding)
1276 && charset->dimension == 1
1277 && charset->code_range[0] == 0 && charset->code_range[1] == 255)
1278 mconv__register_charset_coding (sym);
1285 @brief Resolve charset name.
1287 The mchar_resolve_charset () function returns $SYMBOL if it
1288 represents a charset. Otherwise, canonicalize $SYMBOL as to a
1289 charset name, and if the canonicalized name represents a charset,
1290 return it. Otherwise, return #Mnil. */
1293 @brief ʸ»ú¥»¥Ã¥È̾¤ò²ò·è¤¹¤ë.
1295 ´Ø¿ô mchar_resolve_charset () ¤Ï $SYMBOL
1296 ¤¬Ê¸»ú¥»¥Ã¥È¤ò¼¨¤·¤Æ¤¤¤ì¤Ð¤½¤ì¤òÊÖ¤¹¡£
1298 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢$SYMBOL ¤òʸ»ú¥»¥Ã¥È̾¤È¤·¤ÆÀµµ¬²½¤·¡¢¤½¤ì¤¬Ê¸»ú¥»¥Ã¥È¤ò¼¨¤·¤Æ¤¤¤Æ¤¤¤ì¤ÐÀµµ¬²½¤·¤¿¤â¤Î¤òÊÖ¤¹¡£
1299 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢#Mnil ¤òÊÖ¤¹¡£ */
1302 mchar_resolve_charset (MSymbol symbol)
1304 MCharset *charset = (MCharset *) msymbol_get (symbol, Mcharset);
1308 symbol = msymbol__canonicalize (symbol);
1309 charset = (MCharset *) msymbol_get (symbol, Mcharset);
1312 return (charset ? charset->name : Mnil);
1318 @brief List symbols representing charsets.
1320 The mchar_list_charsets () function makes an array of symbols
1321 representing a charset, stores the pointer to the array in a place
1322 pointed to by $SYMBOLS, and returns the length of the array. */
1325 @brief ʸ»ú¥»¥Ã¥È¤òɽ¤ï¤¹¥·¥ó¥Ü¥ë¤òÎóµó¤¹¤ë.
1327 ´Ø¿ô mchar_list_charsets ()
1328 ¤Ï¡¢Ê¸»ú¥»¥Ã¥È¤ò¼¨¤¹¥·¥ó¥Ü¥ë¤òʤ٤¿ÇÛÎó¤òºî¤ê¡¢$SYMBOLS
1329 ¤Ç¥Ý¥¤¥ó¥È¤µ¤ì¤¿¾ì½ê¤Ë¤³¤ÎÇÛÎó¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÃÖ¤¡¢ÇÛÎó¤ÎŤµ¤òÊÖ¤¹¡£ */
1332 mchar_list_charset (MSymbol **symbols)
1336 MTABLE_MALLOC ((*symbols), charset_list.used, MERROR_CHARSET);
1337 for (i = 0; i < charset_list.used; i++)
1338 (*symbols)[i] = charset_list.charsets[i]->name;
1345 @brief Decode a code-point.
1347 The mchar_decode () function decodes code-point $CODE in the
1348 charset represented by the symbol $CHARSET_NAME to get a character
1352 If decoding was successful, mchar_decode () returns the decoded
1353 character code. Otherwise it returns -1. */
1356 @brief ¥³¡¼¥É¥Ý¥¤¥ó¥È¤ò¥Ç¥³¡¼¥É¤¹¤ë.
1358 ´Ø¿ô mchar_decode () ¤Ï¡¢¥·¥ó¥Ü¥ë $CHARSET_NAME ¤Ç¼¨¤µ¤ì¤ëʸ»ú¥»¥Ã¥ÈÆâ¤Î
1359 $CODE ¤È¤¤¤¦¥³¡¼¥É¥Ý¥¤¥ó¥È¤ò¥Ç¥³¡¼¥É¤·¤Æʸ»ú¥³¡¼¥É¤òÆÀ¤ë¡£
1362 ¥Ç¥³¡¼¥É¤¬À®¸ù¤¹¤ì¤Ð¡¢mchar_decode () ¤Ï¥Ç¥³¡¼¥É¤µ¤ì¤¿Ê¸»ú¥³¡¼¥É¤òÊÖ¤¹¡£
1363 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£ */
1370 mchar_decode (MSymbol charset_name, unsigned code)
1372 MCharset *charset = MCHARSET (charset_name);
1375 return MCHAR_INVALID_CODE;
1376 return DECODE_CHAR (charset, code);
1382 @brief Encode a character code.
1384 The mchar_encode () function encodes character code $C to get a
1385 code-point in the charset represented by the symbol $CHARSET_NAME.
1388 If encoding was successful, mchar_encode () returns the encoded
1389 code-point. Otherwise it returns #MCHAR_INVALID_CODE. */
1392 @brief ʸ»ú¥³¡¼¥É¤ò¥¨¥ó¥³¡¼¥É¤¹¤ë.
1394 ´Ø¿ô mchar_encode () ¤Ï¡¢Ê¸»ú¥³¡¼¥É $C ¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¥·¥ó¥Ü¥ë
1395 $CHARSET_NAME ¤Ç¼¨¤µ¤ì¤ëʸ»ú¥»¥Ã¥ÈÆâ¤Ë¤ª¤±¤ë¥³¡¼¥É¥Ý¥¤¥ó¥È¤òÆÀ¤ë¡£
1398 ¥¨¥ó¥³¡¼¥É¤¬À®¸ù¤¹¤ì¤Ð¡¢mchar_encode () ¤Ï¥¨¥ó¡¼¥É¤µ¤ì¤¿¥³¡¼¥É¥Ý¥¤¥ó¥È¤òÊÖ¤¹¡£
1399 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð #MCHAR_INVALID_CODE ¤òÊÖ¤¹¡£ */
1406 mchar_encode (MSymbol charset_name, int c)
1408 MCharset *charset = MCHARSET (charset_name);
1411 return MCHAR_INVALID_CODE;
1412 return ENCODE_CHAR (charset, c);
1418 @brief Call a function for all the characters in a specified charset.
1420 The mcharset_map_chars () function calls $FUNC for all the
1421 characters in the charset named $CHARSET_NAME. A call is done for
1422 a chunk of consecutive characters rather than character by
1425 $FUNC receives three arguments: $FROM, $TO, and $ARG. $FROM and
1426 $TO specify the range of character codes in $CHARSET. $ARG is the
1430 If the operation was successful, mcharset_map_chars () returns 0.
1431 Otherwise, it returns -1 and assigns an error code to the external
1432 variable #merror_code. */
1435 @brief »ØÄꤷ¤¿Ê¸»ú¥»¥Ã¥È¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ËÂФ·¤Æ´Ø¿ô¤ò¸Æ¤Ö.
1437 ´Ø¿ô mcharset_map_chars () ¤Ï $CHARSET_NAME
1438 ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Äʸ»ú¥»¥Ã¥ÈÃæ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ËÂФ·¤Æ $FUNC ¤ò¸Æ¤Ö¡£
1439 ¸Æ¤Ó½Ð¤·¤Ï°ìʸ»úËè¤Ç¤Ï¤Ê¤¯¡¢Ï¢Â³¤·¤¿Ê¸»ú¤Î¤Þ¤È¤Þ¤êñ°Ì¤Ç¹Ô¤Ê¤ï¤ì¤ë¡£
1441 ´Ø¿ô $FUNC ¤Ë¤Ï$FROM, $TO, $ARG ¤Î£³°ú¿ô¤¬ÅϤµ¤ì¤ë¡£$FROM ¤È $TO
1442 ¤Ï $CHARSET Ãæ¤Îʸ»ú¥³¡¼¥É¤ÎÈϰϤò»ØÄꤹ¤ë¡£$ARG ¤Ï $FUNC_ARG
1446 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mcharset_map_chars () ¤Ï 0 ¤òÊÖ¤¹¡£
1447 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1451 @c MERROR_CHARSET */
1454 mchar_map_charset (MSymbol charset_name,
1455 void (*func) (int from, int to, void *arg),
1460 charset = MCHARSET (charset_name);
1462 MERROR (MERROR_CHARSET, -1);
1464 if (charset->encoder)
1466 int c = charset->min_char;
1469 if ((int) mchartable__lookup (charset->encoder, c, &next_c, 1) < 0)
1471 while (c <= charset->max_char)
1473 if ((int) mchartable__lookup (charset->encoder, c, &next_c, 1) >= 0)
1474 (*func) (c, next_c - 1, func_arg);
1479 (*func) (charset->min_char, charset->max_char, func_arg);