1 /* charset.c -- charset module.
2 Copyright (C) 2003, 2004
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
23 @addtogroup m17nCharset
24 @brief Charset objects and API for them.
26 The m17n library uses @e charset objects to represent a coded
27 character sets (CCS). The m17n library supports many predefined
28 coded character sets. Moreover, application programs can add
29 other charsets. A character can belong to multiple charsets.
31 The m17n library distinguishes the following three concepts:
33 @li A @e code-point is a number assigned by the CCS to each
34 character. Code-points may or may not be continuous. The type
35 @c unsigned is used to represent a code-point. An invalid
36 code-point is represented by the macro @c MCHAR_INVALID_CODE.
38 @li A @e character @e index is the canonical index of a character
39 in a CCS. The character that has the character index N occupies
40 the Nth position when all the characters in the current CCS are
41 sorted by their code-points. Character indices in a CCS are
42 continuous and start with 0.
44 @li A @e character @e code is the internal representation in the
45 m17n library of a character. A character code is a signed integer
48 Each charset object defines how characters are converted between
49 code-points and character codes. To @e encode means converting
50 code-points to character codes and to @e decode means converting
51 character codes to code-points. */
54 @addtogroup m17nCharset
55 @brief ʸ»ú¥»¥Ã¥È¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API.
57 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢Éä¹æ²½Ê¸»ú½¸¹ç (CCS) ¤ò @e ʸ»ú¥»¥Ã¥È ¤È¸Æ¤Ö¥ª
58 ¥Ö¥¸¥§¥¯¥È¤Çɽ¸½¤¹¤ë¡£m17n ¥é¥¤¥Ö¥é¥ê¤Ï¿¤¯¤ÎÉä¹æ²½Ê¸»ú½¸¹ç¤ò¤¢¤é¤«¤¸¤á
59 ¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ë¤·¡¢¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥à¤¬Æȼ«¤Ëʸ»ú¥»¥Ã¥È¤ò
60 Äɲ乤뤳¤È¤â²Äǽ¤Ç¤¢¤ë¡£°ì¤Ä¤Îʸ»ú¤ÏÊ£¿ô¤Îʸ»ú¥»¥Ã¥È¤Ë°¤·¤Æ¤â¤è
63 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢°Ê²¼¤Î³µÇ°¤ò¶èÊ̤·¤Æ¤¤¤ë:
65 @li @e ¥³¡¼¥É¥Ý¥¤¥ó¥È ¤È¤Ï¡¢CCS ¤¬¤½¤ÎÃæ¤Î¸Ä¡¹¤Îʸ»ú¤ËÂФ·¤ÆÄêµÁ¤¹
66 ¤ë¿ôÃͤǤ¢¤ë¡£¥³¡¼¥É¥Ý¥¤¥ó¥È¤ÏϢ³¤·¤Æ¤¤¤ë¤È¤Ï¸Â¤é¤Ê¤¤¡£¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ï
67 @c unsigned ·¿¤Ë¤è¤Ã¤Æɽ¤µ¤ì¤ë¡£Ìµ¸ú¤Ê¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ï¥Þ¥¯¥í
68 @c MCHAR_INVALID_CODE ¤Çɽ¤µ¤ì¤ë¡£
70 @li @e ʸ»ú¥¤¥ó¥Ç¥Ã¥¯¥¹ ¤È¤Ï¡¢CCS Æâ¤Ç³Æʸ»ú¤Ë³ä¤êÅö¤Æ¤é¤ì¤ëÀµµ¬²½
71 ¤µ¤ì¤¿¥¤¥ó¥Ç¥Ã¥¯¥¹¤Ç¤¢¤ë¡£Ê¸»ú¥¤¥ó¥Ç¥Ã¥¯¥¹¤¬ N ¤Îʸ»ú¤Ï¡¢CCS Ãæ¤Î
72 Á´Ê¸»ú¤ò¥³¡¼¥É¥Ý¥¤¥ó¥È½ç¤Ëʤ٤¿¤È¤¤Ë N ÈÖÌܤ˸½¤ï¤ì¤ë¡£CCS Ãæ¤Î
73 ʸ»ú¥¤¥ó¥Ç¥Ã¥¯¥¹¤ÏϢ³¤·¤Æ¤ª¤ê¡¢0 ¤«¤é»Ï¤Þ¤ë¡£
75 @li @e ʸ»ú¥³¡¼¥É ¤È¤Ï¡¢m17n ¥é¥¤¥Ö¥é¥êÆâ¤Ë¤ª¤±¤ëʸ»ú¤ÎÆâÉôɽ¸½¤Ç¤¢
76 ¤ê¡¢21 ¥Ó¥Ã¥È°Ê¾å¤ÎŤµ¤ò»ý¤ÄÉä¹çÉÕ¤À°¿ô¤Ç¤¢¤ë¡£
78 ³Æʸ»ú¥»¥Ã¥È¥ª¥Ö¥¸¥§¥¯¥È¤Ï¡¢¤½¤Îʸ»ú¥»¥Ã¥È¤Ë°¤¹¤ëʸ»ú¤Î¥³¡¼¥É¥Ý¥¤
79 ¥ó¥È¤Èʸ»ú¥³¡¼¥É¤È¤Î´Ö¤ÎÊÑ´¹¤òµ¬Äꤹ¤ë¡£¥³¡¼¥É¥Ý¥¤¥ó¥È¤«¤éʸ»ú¥³¡¼
80 ¥É¤Ø¤ÎÊÑ´¹¤ò @e ¥Ç¥³¡¼¥É ¤È¸Æ¤Ó¡¢Ê¸»ú¥³¡¼¥É¤«¤é¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ø¤Î
81 ÊÑ´¹¤ò @e ¥¨¥ó¥³¡¼¥É ¤È¸Æ¤Ö¡£ */
84 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
85 /*** @addtogroup m17nInternal
95 #include "m17n-misc.h"
103 static int unified_max;
105 /** List of all charsets ever defined. */
113 static struct MCharsetList charset_list;
115 static MPlist *charset_definition_list;
117 /** Make a charset object from the template of MCharset structure
118 CHARSET, and return a pointer to the new charset object.
119 CHARSET->code_range[4N + 2] and CHARSET->code_range[4N + 3] are
123 make_charset (MCharset *charset)
125 unsigned min_code, max_code;
127 int *range = charset->code_range;
129 if (charset->dimension < 1 || charset->dimension > 4)
130 MERROR (MERROR_CHARSET, NULL);
131 if ((charset->final_byte > 0 && charset->final_byte < '0')
132 || charset->final_byte > 127)
133 MERROR (MERROR_CHARSET, NULL);
135 for (i = 0, n = 1; i < 4; i++)
137 if (range[i * 4] > range[i * 4 + 1])
138 MERROR (MERROR_CHARSET, NULL);
139 range[i * 4 + 2] = range[i * 4 + 1] - range[i * 4] + 1;
140 n *= range[i * 4 + 2];
141 range[i * 4 + 3] = n;
144 min_code = range[0] | (range[4] << 8) | (range[8] << 16) | (range[12] << 24);
145 if (charset->min_code == 0)
146 charset->min_code = min_code;
147 else if (charset->min_code < min_code)
148 MERROR (MERROR_CHARSET, NULL);
149 max_code = range[1] | (range[5] << 8) | (range[9] << 16) | (range[13] << 24);
150 if (charset->max_code == 0)
151 charset->max_code = max_code;
152 else if (charset->max_code > max_code)
153 MERROR (MERROR_CHARSET, NULL);
155 charset->code_range_min_code = min_code;
156 charset->fully_loaded = 0;
159 if (charset->method == Msubset)
163 if (charset->nparents != 1)
164 MERROR (MERROR_CHARSET, NULL);
165 parent = charset->parents[0];
166 if (parent->method == Msuperset
167 || charset->min_code - charset->subset_offset < parent->min_code
168 || charset->max_code - charset->subset_offset > parent->max_code)
169 MERROR (MERROR_CHARSET, NULL);
171 else if (charset->method == Msuperset)
173 if (charset->nparents < 2)
174 MERROR (MERROR_CHARSET, NULL);
175 for (i = 0; i < charset->nparents; i++)
176 if (charset->min_code > charset->parents[i]->min_code
177 || charset->max_code < charset->parents[i]->max_code)
178 MERROR (MERROR_CHARSET, NULL);
183 = (charset->dimension == 1
185 && (charset->dimension == 2
187 && (charset->dimension == 3
188 || range[10] == 256)))));
190 if (! charset->no_code_gap)
194 memset (charset->code_range_mask, 0,
195 sizeof charset->code_range_mask);
196 for (i = 0; i < 4; i++)
197 for (j = range[i * 4]; j <= range[i * 4 + 1]; j++)
198 charset->code_range_mask[j] |= (1 << i);
201 if (charset->method == Moffset)
203 charset->max_char = charset->min_char + range[15] - 1;
204 if (charset->min_char < 0
205 || charset->max_char < 0 || charset->max_char > unified_max)
206 MERROR (MERROR_CHARSET, NULL);
207 charset->simple = charset->no_code_gap;
208 charset->fully_loaded = 1;
210 else if (charset->method == Munify)
212 /* The magic number 12 below is to align to the SUB_BITS_2
213 (defined in chartab.c) boundary in a char-table. */
214 unified_max -= ((range[15] >> 12) + 1) << 12;
215 charset->unified_max = unified_max;
217 else if (charset->method != Mmap)
218 MERROR (MERROR_CHARSET, NULL);
221 MLIST_APPEND1 (&charset_list, charsets, charset, MERROR_CHARSET);
223 if (charset->final_byte > 0)
225 MLIST_APPEND1 (&mcharset__iso_2022_table, charsets, charset,
227 if (charset->revision <= 0)
229 int chars = range[2];
231 if (chars == 128) /* ASCII case */
233 else if (chars == 256) /* ISO-8859-X case */
235 MCHARSET_ISO_2022 (charset->dimension, chars, charset->final_byte)
244 load_charset_fully (MCharset *charset)
246 if (charset->method == Msubset)
248 MCharset *parent = charset->parents[0];
250 if (! parent->fully_loaded
251 && load_charset_fully (parent) < 0)
252 MERROR (MERROR_CHARSET, -1);
253 if (parent->method == Moffset)
257 code = charset->min_code - charset->subset_offset;
258 charset->min_char = DECODE_CHAR (parent, code);
259 code = charset->max_code - charset->subset_offset;
260 charset->max_char = DECODE_CHAR (parent, code);
264 unsigned min_code = charset->min_code - charset->subset_offset;
265 unsigned max_code = charset->max_code - charset->subset_offset;
266 int min_char = DECODE_CHAR (parent, min_code);
267 int max_char = min_char;
269 for (++min_code; min_code <= max_code; min_code++)
271 int c = DECODE_CHAR (parent, min_code);
277 else if (c > max_char)
281 charset->min_char = min_char;
282 charset->max_char = max_char;
285 else if (charset->method == Msuperset)
287 int min_char = 0, max_char = 0;
290 for (i = 0; i < charset->nparents; i++)
292 MCharset *parent = charset->parents[i];
294 if (! parent->fully_loaded
295 && load_charset_fully (parent) < 0)
296 MERROR (MERROR_CHARSET, -1);
298 min_char = parent->min_char, max_char = parent->max_char;
299 else if (parent->min_char < min_char)
300 min_char = parent->min_char;
301 else if (parent->max_char > max_char)
302 max_char = parent->max_char;
304 charset->min_char = min_char;
305 charset->max_char = max_char;
307 else /* charset->method is Mmap or Munify */
309 MDatabase *mdb = mdatabase_find (Mcharset, charset->name, Mnil, Mnil);
312 if (! mdb || ! (plist = mdatabase_load (mdb)))
313 MERROR (MERROR_CHARSET, -1);
314 charset->decoder = mplist_value (plist);
315 charset->encoder = mplist_value (mplist_next (plist));
316 M17N_OBJECT_UNREF (plist);
317 mchartable_range (charset->encoder,
318 &charset->min_char, &charset->max_char);
319 if (charset->method == Mmap)
320 charset->simple = charset->no_code_gap;
322 charset->max_char = charset->unified_max + 1 + charset->code_range[15];
325 charset->fully_loaded = 1;
332 MPlist *mcharset__cache;
334 /* Predefined charsets. */
335 MCharset *mcharset__ascii;
336 MCharset *mcharset__binary;
337 MCharset *mcharset__m17n;
338 MCharset *mcharset__unicode;
340 MCharsetISO2022Table mcharset__iso_2022_table;
342 /** Initialize charset handler. */
349 unified_max = MCHAR_MAX;
351 mcharset__cache = mplist ();
352 mplist_set (mcharset__cache, Mt, NULL);
354 MLIST_INIT1 (&charset_list, charsets, 128);
355 MLIST_INIT1 (&mcharset__iso_2022_table, charsets, 128);
356 charset_definition_list = mplist ();
358 memset (mcharset__iso_2022_table.classified, 0,
359 sizeof (mcharset__iso_2022_table.classified));
361 Mcharset = msymbol ("charset");
363 Mmethod = msymbol ("method");
364 Moffset = msymbol ("offset");
365 Mmap = msymbol ("map");
366 Munify = msymbol ("unify");
367 Msubset = msymbol ("subset");
368 Msuperset = msymbol ("superset");
370 Mdimension = msymbol ("dimension");
371 Mmin_range = msymbol ("min-range");
372 Mmax_range = msymbol ("max-range");
373 Mmin_code = msymbol ("min-code");
374 Mmax_code = msymbol ("max-code");
375 Mascii_compatible = msymbol ("ascii-compatible");
376 Mfinal_byte = msymbol ("final-byte");
377 Mrevision = msymbol ("revision");
378 Mmin_char = msymbol ("min-char");
379 Mmapfile = msymbol_as_managing_key ("mapfile");
380 Mparents = msymbol_as_managing_key ("parents");
381 Msubset_offset = msymbol ("subset-offset");
382 Mdefine_coding = msymbol ("define-coding");
383 Maliases = msymbol_as_managing_key ("aliases");
387 /* Setup predefined charsets. */
388 pl = mplist_add (pl, Mmethod, Moffset);
389 pl = mplist_add (pl, Mmin_range, (void *) 0);
390 pl = mplist_add (pl, Mmax_range, (void *) 0x7F);
391 pl = mplist_add (pl, Mascii_compatible, Mt);
392 pl = mplist_add (pl, Mfinal_byte, (void *) 'B');
393 pl = mplist_add (pl, Mmin_char, (void *) 0);
394 Mcharset_ascii = mchar_define_charset ("ascii", param);
396 mplist_put (param, Mmax_range, (void *) 0xFF);
397 mplist_put (param, Mfinal_byte, NULL);
398 Mcharset_iso_8859_1 = mchar_define_charset ("iso-8859-1", param);
400 mplist_put (param, Mmax_range, (void *) 0x10FFFF);
401 Mcharset_unicode = mchar_define_charset ("unicode", param);
403 mplist_put (param, Mmax_range, (void *) MCHAR_MAX);
404 Mcharset_m17n = mchar_define_charset ("m17n", param);
406 mplist_put (param, Mmax_range, (void *) 0xFF);
407 Mcharset_binary = mchar_define_charset ("binary", param);
409 M17N_OBJECT_UNREF (param);
411 mcharset__ascii = MCHARSET (Mcharset_ascii);
412 mcharset__binary = MCHARSET (Mcharset_binary);
413 mcharset__m17n = MCHARSET (Mcharset_m17n);
414 mcharset__unicode = MCHARSET (Mcharset_unicode);
420 mcharset__fini (void)
425 for (i = 0; i < charset_list.used; i++)
427 MCharset *charset = charset_list.charsets[i];
429 if (charset->decoder)
430 free (charset->decoder);
431 if (charset->encoder)
432 M17N_OBJECT_UNREF (charset->encoder);
435 M17N_OBJECT_UNREF (mcharset__cache);
436 MLIST_FREE1 (&charset_list, charsets);
437 MLIST_FREE1 (&mcharset__iso_2022_table, charsets);
438 MPLIST_DO (plist, charset_definition_list)
439 M17N_OBJECT_UNREF (MPLIST_VAL (plist));
440 M17N_OBJECT_UNREF (charset_definition_list);
445 mcharset__find (MSymbol name)
449 charset = msymbol_get (name, Mcharset);
452 MPlist *param = mplist_get (charset_definition_list, name);
454 MPLIST_KEY (mcharset__cache) = Mt;
457 param = mplist__from_plist (param);
458 mchar_define_charset (MSYMBOL_NAME (name), param);
459 charset = msymbol_get (name, Mcharset);
460 M17N_OBJECT_UNREF (param);
462 MPLIST_KEY (mcharset__cache) = name;
463 MPLIST_VAL (mcharset__cache) = charset;
468 /** Return the character corresponding to code-point CODE in CHARSET.
469 If CODE is invalid for CHARSET, return -1. */
472 mcharset__decode_char (MCharset *charset, unsigned code)
476 if (code < 128 && charset->ascii_compatible)
478 if (code < charset->min_code || code > charset->max_code)
481 if (! charset->fully_loaded
482 && load_charset_fully (charset) < 0)
483 MERROR (MERROR_CHARSET, -1);
485 if (charset->method == Msubset)
487 MCharset *parent = charset->parents[0];
489 code -= charset->subset_offset;
490 return DECODE_CHAR (parent, code);
493 if (charset->method == Msuperset)
497 for (i = 0; i < charset->nparents; i++)
499 MCharset *parent = charset->parents[i];
500 int c = DECODE_CHAR (parent, code);
508 idx = CODE_POINT_TO_INDEX (charset, code);
512 if (charset->method == Mmap)
513 return charset->decoder[idx];
515 if (charset->method == Munify)
517 int c = charset->decoder[idx];
520 c = charset->unified_max + 1 + idx;
524 /* Now charset->method should be Moffset. */
525 return (charset->min_char + idx);
529 /** Return the code point of character C in CHARSET. If CHARSET does not
530 contain C, return MCHAR_INVALID_CODE. */
533 mcharset__encode_char (MCharset *charset, int c)
535 if (! charset->fully_loaded
536 && load_charset_fully (charset) < 0)
537 MERROR (MERROR_CHARSET, MCHAR_INVALID_CODE);
539 if (charset->method == Msubset)
541 MCharset *parent = charset->parents[0];
542 unsigned code = ENCODE_CHAR (parent, c);
544 if (code == MCHAR_INVALID_CODE)
546 code += charset->subset_offset;
547 if (code >= charset->min_code && code <= charset->max_code)
549 return MCHAR_INVALID_CODE;
552 if (charset->method == Msuperset)
556 for (i = 0; i < charset->nparents; i++)
558 MCharset *parent = charset->parents[i];
559 unsigned code = ENCODE_CHAR (parent, c);
561 if (code != MCHAR_INVALID_CODE)
564 return MCHAR_INVALID_CODE;
567 if (c < charset->min_char || c > charset->max_char)
568 return MCHAR_INVALID_CODE;
570 if (charset->method == Mmap)
571 return (unsigned) mchartable_lookup (charset->encoder, c);
573 if (charset->method == Munify)
575 if (c > charset->unified_max)
577 c -= charset->unified_max - 1;
578 return INDEX_TO_CODE_POINT (charset, c);
580 return (unsigned) mchartable_lookup (charset->encoder, c);
583 /* Now charset->method should be Moffset */
584 c -= charset->min_char;
585 return INDEX_TO_CODE_POINT (charset, c);
589 mcharset__load_from_database ()
591 MDatabase *mdb = mdatabase_find (msymbol ("charset-list"), Mnil, Mnil, Mnil);
592 MPlist *def_list, *plist;
593 MPlist *definitions = charset_definition_list;
594 int mdebug_mask = MDEBUG_CHARSET;
599 def_list = (MPlist *) mdatabase_load (mdb);
600 MDEBUG_PRINT_TIME ("CHARSET", (stderr, " to load data."));
606 MPLIST_DO (plist, def_list)
611 if (! MPLIST_PLIST_P (plist))
612 MERROR (MERROR_CHARSET, -1);
613 pl = MPLIST_PLIST (plist);
614 if (! MPLIST_SYMBOL_P (pl))
615 MERROR (MERROR_CHARSET, -1);
616 name = MPLIST_SYMBOL (pl);
617 pl = MPLIST_NEXT (pl);
618 definitions = mplist_add (definitions, name, pl);
619 M17N_OBJECT_REF (pl);
620 p = mplist__from_plist (pl);
621 mchar_define_charset (MSYMBOL_NAME (name), p);
622 M17N_OBJECT_UNREF (p);
623 if ((pl = mplist_find_by_value (pl, Mdefine_coding))
624 && (MSymbol) MPLIST_VAL (MPLIST_NEXT (pl)) == Mt)
625 mconv__register_charset_coding (name);
628 M17N_OBJECT_UNREF (def_list);
629 MDEBUG_PRINT_TIME ("CHARSET", (stderr, " to parse the loaded data."));
635 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
640 /*** @addtogroup m17nCharset */
646 @brief Invalid code-point.
648 The macro #MCHAR_INVALID_CODE gives the invalid code-point. */
651 @brief ̵¸ú¤Ê¥³¡¼¥É¥Ý¥¤¥ó¥È.
653 ¥Þ¥¯¥í #MCHAR_INVALID_CODE ¤Ï̵¸ú¤Ê¥³¡¼¥É¥Ý¥¤¥ó¥È¤òÍ¿¤¨¤ë¡£ */
655 #define MCHAR_INVALID_CODE
659 @brief The symbol @c Mcharset.
661 Any decoded M-text has a text property whose key is the predefined
662 symbol @c Mcharset. The name of @c Mcharset is
663 <tt>"charset"</tt>. */
666 @brief ¥·¥ó¥Ü¥ë @c Mcharset.
668 ¥Ç¥³¡¼¥É¤µ¤ì¤¿ M-text ¤Ï¡¢¥¡¼¤¬ @c Mcharset ¤Ç¤¢¤ë¤è¤¦¤Ê¥Æ¥¥¹¥È
669 ¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Ä¡£¥·¥ó¥Ü¥ë @c Mcharset ¤Ï <tt>"charset"</tt> ¤È¤¤
676 @name Variables: Symbols representing a charset.
678 Each of the following symbols represents a predefined charset. */
681 @name ÊÑ¿ô: ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ëÄêµÁºÑ¤ß¥·¥ó¥Ü¥ë
683 °Ê²¼¤Î³Æ¥·¥ó¥Ü¥ë¤Ï¡¢ÄêµÁºÑ¤ßʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¡£ */
688 @brief Symbol representing the charset ASCII.
690 The symbol #Mcharset_ascii has name <tt>"ascii"</tt> and represents
691 the charset ISO 646, USA Version X3.4-1968 (ISO-IR-6). */
693 @brief ASCII ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¥·¥ó¥Ü¥ë.
695 ¥·¥ó¥Ü¥ë #Mcharset_ascii ¤Ï <tt>"ascii"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
696 ISO 646, USA Version X3.4-1968 (ISO-IR-6) ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¡£
699 MSymbol Mcharset_ascii;
703 @brief Symbol representing the charset ISO/IEC 8859/1.
705 The symbol #Mcharset_iso_8859_1 has name <tt>"iso-8859-1"</tt>
706 and represents the charset ISO/IEC 8859-1:1998. */
708 @brief ISO/IEC 8859-1:1998 ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¥·¥ó¥Ü¥ë.
710 ¥·¥ó¥Ü¥ë #Mcharset_iso_8859_1 ¤Ï <tt>"iso-8859-1"</tt> ¤È¤¤¤¦Ì¾
711 Á°¤ò»ý¤Á¡¢ISO/IEC 8859-1:1998 ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¡£
714 MSymbol Mcharset_iso_8859_1;
717 @brief Symbol representing the charset Unicode.
719 The symbol #Mcharset_unicode has name <tt>"unicode"</tt> and
720 represents the charset Unicode. */
722 @brief Unicode ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¥·¥ó¥Ü¥ë.
724 ¥·¥ó¥Ü¥ë #Mcharset_unicode ¤Ï <tt>"unicode"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý
725 ¤Á¡¢Unicode ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¡£ */
727 MSymbol Mcharset_unicode;
731 @brief Symbol representing the largest charset.
733 The symbol #Mcharset_m17n has name <tt>"m17n"</tt> and
734 represents the charset that contains all characters supported by
737 @brief Á´Ê¸»ú¤ò´Þ¤àʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¥·¥ó¥Ü¥ë.
739 ¥·¥ó¥Ü¥ë #Mcharset_m17n ¤Ï <tt>"m17n"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
740 m17n ¥é¥¤¥Ö¥é¥ê¤¬°·¤¦Á´¤Æ¤Îʸ»ú¤ò´Þ¤àʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¡£ */
742 MSymbol Mcharset_m17n;
746 @brief Symbol representing the charset for ill-decoded characters.
748 The symbol #Mcharset_binary has name <tt>"binary"</tt> and
749 represents the fake charset which the decoding functions put to an
750 M-text as a text property when they encounter an invalid byte
751 (sequence). See @ref m17nConv @latexonly
752 (P.\pageref{group__m17nConv}) @endlatexonly for more detail. */
755 @brief Àµ¤·¤¯¥Ç¥³¡¼¥É¤Ç¤¤Ê¤¤Ê¸»ú¤Îʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¥·¥ó¥Ü¥ë.
757 ¥·¥ó¥Ü¥ë #Mcharset_binary ¤Ï <tt>"binary"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
758 µ¶¤Î (fake) ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¡£¥Ç¥³¡¼¥É´Ø¿ô¤Ï¡¢M-text ¤Î¥Æ¥¥¹
759 ¥È¥×¥í¥Ñ¥Æ¥£¤È¤·¤Æ¡¢Ìµ¸ú¤Ê¥Ð¥¤¥È¡Ê¥·¡¼¥¯¥¨¥ó¥¹¡Ë¤ËÁø¶ø¤·¤¿°ÌÃÖ¤òÉղ乤롣
761 ¾ÜºÙ¤Ï @ref m17nConv @latexonly
762 (P.\pageref{group__m17nConv}) @endlatexonly »²¾È¤Î¤³¤È¡£ */
764 MSymbol Mcharset_binary;
771 @name Variables: Parameter keys for mchar_define_charset ().
773 These are the predefined symbols to use as parameter keys for the
774 function mchar_define_charset () (which see). */
777 @name ÊÑ¿ô: mchar_define_charset ÍѤΥѥé¥á¡¼¥¿¡¦¥¡¼
779 ¤³¤ì¤é¤Ï¡¢´Ø¿ô mchar_define_charset () ÍѤΥѥé¥á¡¼¥¿¡¦¥¡¼¤È¤·¤Æ
780 »È¤ï¤ì¤ë¥·¥ó¥Ü¥ë¤Ç¤¢¤ë¡£ ¾Ü¤·¤¯¤Ï¤³¤Î´Ø¿ô¤Î²òÀâ¤ò»²¾È¤Î¤³¤È¡£*/
785 Parameter key for mchar_define_charset () (which see). */
788 ´Ø¿ô mchar_define_charset () ÍѤΥѥé¥á¡¼¥¿¡¦¥¡¼. */
796 MSymbol Mascii_compatible;
802 MSymbol Msubset_offset;
803 MSymbol Mdefine_coding;
810 @name Variables: Symbols representing charset methods.
812 These are the predefined symbols that can be a value of the
813 #Mmethod parameter of a charset used in an argument to the
814 mchar_define_charset () function.
816 A method specifies how code-points and character codes are
817 converted. See the documentation of the mchar_define_charset ()
818 function for the details. */
821 @name ÊÑ¿ô: ʸ»ú¥»¥Ã¥È¤Î¥á¥½¥Ã¥É»ØÄê¤Ë»È¤ï¤ì¤ë¥·¥ó¥Ü¥ë
823 ¤³¤ì¤é¤Ï¡¢Ê¸»ú¥»¥Ã¥È¤Î @e ¥á¥½¥Ã¥É ¤ò»ØÄꤹ¤ë¤¿¤á¤ÎÄêµÁºÑ¤ß¥·¥ó¥Ü
824 ¥ë¤Ç¤¢¤ê¡¢Ê¸»ú¥»¥Ã¥È¤Î #Mmethod ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤȤʤ뤳¤È¤¬¤Ç¤¤ë¡£
825 ¤³¤ÎÃͤϴؿô mchar_define_charset () ¤Î°ú¿ô¤È¤·¤Æ»È¤ï¤ì¤ë¡£
827 ¥á¥½¥Ã¥É¤È¤Ï¡¢¥³¡¼¥É¥Ý¥¤¥ó¥È¤Èʸ»ú¥³¡¼¥É¤òÁê¸ßÊÑ´¹¤¹¤ëºÝ¤ÎÊý¼°¤Î¤³
828 ¤È¤Ç¤¢¤ë¡£¾Ü¤·¤¯¤Ï´Ø¿ô mchar_define_charset () ¤Î²òÀâ¤ò»²¾È¤Î¤³¤È¡£ */
832 @brief Symbol for the offset type method of charset.
834 The symbol #Moffset has the name <tt>"offset"</tt> and, when used
835 as a value of #Mmethod parameter of a charset, it means that the
836 conversion of code-points and character codes of the charset is
837 done by this calculation:
840 CHARACTER-CODE = CODE-POINT - MIN-CODE + MIN-CHAR
843 where, MIN-CODE is a value of #Mmin_code parameter of the charset,
844 and MIN-CHAR is a value of #Mmin_char parameter. */
847 @brief ¥ª¥Õ¥»¥Ã¥È·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë.
849 ¥·¥ó¥Ü¥ë #Moffset ¤Ï <tt>"offset"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú¥»¥Ã
850 ¥È¤Î #Mmethod ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤¿¾ì¹ç¤Ë¤Ï¡¢¥³¡¼¥É¥Ý¥¤¥ó
851 ¥È¤Èʸ»ú¥»¥Ã¥È¤Îʸ»ú¥³¡¼¥É¤Î´Ö¤ÎÊÑ´¹¤¬°Ê²¼¤Î¼°¤Ë½¾¤Ã¤Æ¹Ô¤ï¤ì¤ë¤³¤È
855 ʸ»ú¥³¡¼¥É = ¥³¡¼¥É¥Ý¥¤¥ó¥È - MIN-CODE + MIN-CHAR
858 ¤³¤³¤Ç¡¢MIN-CODE ¤Ïʸ»ú¥»¥Ã¥È¤Î #Mmin_code ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤǤ¢¤ê¡¢MIN-CHAR ¤Ï
859 #Mmin_char ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤǤ¢¤ë¡£ */
864 /***en @brief Symbol for the map type method of charset.
866 The symbol #Mmap has the name <tt>"map"</tt> and, when used as a
867 value of #Mmethod parameter of a charset, it means that the
868 conversion of code-points and character codes of the charset is
869 done by map looking up. The map must be given by #Mmapfile
872 /***ja @brief ¥Þ¥Ã¥×·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë.
874 ¥·¥ó¥Ü¥ë #Mmap ¤Ï <tt>"map"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú¥»¥Ã¥È¤Î
875 #Mmethod ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤¿¾ì¹ç¤Ë¤Ï¡¢¥³¡¼¥É¥Ý¥¤¥ó¥È¤È
876 ʸ»ú¥»¥Ã¥È¤Îʸ»ú¥³¡¼¥É¤Î´Ö¤ÎÊÑ´¹¤¬¥Þ¥Ã¥×¤ò»²¾È¤¹¤ë¤³¤È¤Ë¤è¤Ã¤Æ¹Ô¤ï
877 ¤ì¤ë¤³¤È¤ò°ÕÌ£¤¹¤ë¡£¥Þ¥Ã¥×¤Ï #Mmapfile ¥Ñ¥é¥á¡¼¥¿¤È¤·¤ÆÍ¿¤¨¤Ê¤±¤ì
883 /***en @brief Symbol for the unify type method of charset.
885 The symbol #Munify has the name <tt>"unify"</tt> and, when used as
886 a value of #Mmethod parameter of a charset, it means that the
887 conversion of code-points and character codes of the charset is
888 done by map looking up and offsetting. The map must be given by
889 #Mmapfile parameter. For this kind of charset, a unique
890 continuous character code space for all characters is assigned.
892 If the map has an entry for a code-point, the conversion is done
893 by looking up the map. Otherwise, the conversion is done by this
897 CHARACTER-CODE = CODE-POINT - MIN-CODE + LOWEST-CHAR-CODE
900 where, MIN-CODE is a value of #Mmin_code parameter of the charset,
901 and LOWEST-CHAR-CODE is the lowest character code of the assigned
904 /***ja @brief ¥æ¥Ë¥Õ¥¡¥¤·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë.
906 ¥·¥ó¥Ü¥ë #Minherit ¤Ï <tt>"unify"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú¥»¥Ã
907 ¥È¤Î #Mmethod ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤¿¾ì¹ç¤Ë¤Ï¡¢¥³¡¼¥É¥Ý¥¤¥ó
908 ¥È¤Èʸ»ú¥»¥Ã¥È¤Îʸ»ú¥³¡¼¥É¤Î´Ö¤ÎÊÑ´¹¤¬¡¢¥Þ¥Ã¥×¤Î»²¾È¤È¥ª¥Õ¥»¥Ã¥È¤Î
909 ÁȤ߹ç¤ï¤»¤Ë¤è¤Ã¤Æ¹Ô¤ï¤ì¤ë¤³¤È¤ò°ÕÌ£¤¹¤ë¡£¥Þ¥Ã¥×¤Ï #Mmapfile ¥Ñ¥é
910 ¥á¡¼¥¿¤È¤·¤ÆÍ¿¤¨¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£¤³¤Î¼ï¤Î³Æʸ»ú¥»¥Ã¥È¤Ë¤Ï¡¢Á´Ê¸»ú
911 ¤ËÂФ·¤ÆϢ³¤¹¤ë¥³¡¼¥É¥¹¥Ú¡¼¥¹¤¬¤½¤ì¤¾¤ì³ä¤êÅö¤Æ¤é¤ì¤ë¡£
913 ¥³¡¼¥É¥Ý¥¤¥ó¥È¤¬¥Þ¥Ã¥×¤Ë´Þ¤Þ¤ì¤Æ¤¤¤ì¤Ð¡¢ÊÑ´¹¤Ï¥Þ¥Ã¥×»²¾È¤Ë¤è¤Ã¤Æ¹Ô
914 ¤ï¤ì¤ë¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢°Ê²¼¤Î¼°¤Ë½¾¤¦¡£
917 CHARACTER-CODE = CODE-POINT - MIN-CODE + LOWEST-CHAR-CODE
920 ¤³¤³¤Ç¡¢MIN-CODE ¤Ïʸ»ú¥»¥Ã¥È¤Î #Mmin_code ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤǤ¢¤ê¡¢
921 LOWEST-CHAR-CODE ¤Ï³ä¤êÅö¤Æ¤é¤ì¤¿¥³¡¼¥É¥¹¥Ú¡¼¥¹¤ÎºÇ¤â¾®¤µ¤¤Ê¸»ú¥³¡¼
929 @brief Symbol for the subset type method of charset.
931 The symbol #Msubset has the name <tt>"subset"</tt> and, when used
932 as a value of #Mmethod parameter of a charset, it means that the
933 charset is a subset of a parent charset. The parent charset must
934 be given by #Mparents parameter. The conversion of code-points
935 and character codes of the charset is done conceptually by this
939 CHARACTER-CODE = PARENT-CODE (CODE-POINT) + SUBSET-OFFSET
942 where, PARENT-CODE is a pseudo function that returns a character
943 code of CODE-POINT in the parent charset, and SUBSET-OFFSET is a
944 value given by #Msubset_offset parameter. */
946 /***ja @brief ¥µ¥Ö¥»¥Ã¥È·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë.
948 ¥·¥ó¥Ü¥ë #Msubset ¤Ï <tt>"subset"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú¥»¥Ã
949 ¥È¤Î #Mmethod ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤¿¾ì¹ç¤Ë¤Ï¡¢¤³¤Îʸ»ú¥»¥Ã
950 ¥È¤¬Ê̤Îʸ»ú¥»¥Ã¥È¡Ê¿Æʸ»ú¥»¥Ã¥È¡Ë¤ÎÉôʬ½¸¹ç¤Ç¤¢¤ë¤³¤È¤ò°ÕÌ£¤¹¤ë¡£
951 ¿Æʸ»ú¥»¥Ã¥È¤Ï #Mparents ¥Ñ¥é¥á¡¼¥¿¤Ë¤è¤Ã¤ÆÍ¿¤¨¤é¤ì¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
952 ¥³¡¼¥É¥Ý¥¤¥ó¥È¤Èʸ»ú¥»¥Ã¥È¤Îʸ»ú¥³¡¼¥É¤Î´Ö¤ÎÊÑ´¹¤Ï¡¢³µÇ°Åª¤Ë¤Ï
956 CHARACTER-CODE = PARENT-CODE (CODE-POINT) + SUBSET-OFFSET
959 ¤³¤³¤Ç PARENT-CODE ¤Ï CODE-POINT ¤Î¿Æʸ»ú¥»¥Ã¥ÈÃæ¤Ç¤Îʸ»ú¥³¡¼¥É¤ò
960 ÊÖ¤¹µ¼´Ø¿ô¤Ç¤¢¤ê¡¢SUBSET-OFFSET ¤Ï #Msubset_offset ¥Ñ¥é¥á¡¼¥¿¤ÇÍ¿
968 @brief Symbol for the superset type method of charset.
970 The symbol #Msuperset has the name <tt>"superset"</tt> and, when
971 used as a value of #Mmethod parameter of a charset, it means that
972 the charset is a superset of parent charsets. The parent charsets
973 must be given by #Mparents parameter. */
976 @brief ¥¹¡¼¥Ñ¡¼¥»¥Ã¥È·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë.
978 ¥·¥ó¥Ü¥ë #Msuperset ¤Ï <tt>"superset"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú
979 ¥»¥Ã¥È¤Î #Mmethod ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤¿¾ì¹ç¤Ë¤Ï¡¢¤³¤Îʸ»ú
980 ¥»¥Ã¥È¤¬Ê̤Îʸ»ú¥»¥Ã¥È¡Ê¿Æʸ»ú¥»¥Ã¥È¡Ë¤Î¾å°Ì½¸¹ç¤Ç¤¢¤ë¤³¤È¤ò°ÕÌ£¤¹
981 ¤ë¡£¿Æʸ»ú¥»¥Ã¥È¤Ï #Mparents ¥Ñ¥é¥á¡¼¥¿¤Ë¤è¤Ã¤ÆÍ¿¤¨¤é¤ì¤Ê¤¯¤Æ¤Ï¤Ê
990 @brief Define a charset.
992 The mchar_define_charset () function defines a new charset and
993 makes it accessible via a symbol whose name is $NAME. $PLIST
994 specifies parameters of the charset as below:
998 <li> Key is #Mmethod, value is a symbol.
1000 The value specifies the method for decoding/encoding code-points
1001 in the charset. It must be #Moffset, #Mmap (default), #Munify,
1002 #Msubset, or #Msuperset.
1004 <li> Key is #Mdimension, value is an integer
1006 The value specifies the dimension of code-points of the charset.
1007 It must be 1 (default), 2, 3, or 4.
1009 <li> Key is #Mmin_range, value is an unsigned integer
1011 The value specifies the minimum range of a code-point, which means
1012 that the Nth byte of the value is the minimum Nth byte of
1013 code-points of the charset. The default value is 0.
1015 <li> Key is #Mmax_range, value is an unsigned integer
1017 The value specifies the maximum range of a code-point, which means
1018 that the Nth byte of the value is the maximum Nth byte of
1019 code-points of the charset. The default value is 0xFF, 0xFFFF,
1020 0xFFFFFF, or 0xFFFFFFFF if the dimension is 1, 2, 3, or 4
1023 <li> Key is #Mmin_code, value is an unsigned integer
1025 The value specifies the minimum code-point of
1026 the charset. The default value is the minimum range.
1028 <li> Key is #Mmax_code, value is an unsigned integer
1030 The value specifies the maximum code-point of
1031 the charset. The default value is the maximum range.
1033 <li> Key is #Mascii_compatible, value is a symbol
1035 The value specifies whether the charset is ASCII compatible or
1036 not. If the value is #Mnil (default), it is not ASCII
1037 compatible, else compatible.
1039 <li> Key is #Mfinal_byte, value is an integer
1041 The value specifies the @e final @e byte of the charset registered
1042 in The International Registry. It must be 0 (default) or 32..127.
1043 The value 0 means that the charset is not in the registry.
1045 <li> Key is #Mrevision, value is an integer
1047 The value specifies the @e revision @e number of the charset
1048 registered in The International Registry. It must be 0..127. If
1049 the charset is not in The International Registry, the value is
1050 ignored. The value 0 means that the charset has no revision
1053 <li> Key is #Mmin_char, value is an integer
1055 The value specifies the minimum character code of the charset.
1056 The default value is 0.
1058 <li> Key is #Mmapfile, value is an M-text
1060 If the method is #Mmap or #Munify, a data that contains
1061 mapping information is added to the m17n database by calling
1062 mdatabase_define () with the value as an argument $EXTRA_INFO,
1063 i.e. the value is used as a file name of the data.
1065 Otherwise, this parameter is ignored.
1067 <li> Key is #Mparents, value is a plist
1069 If the method is #Msubset, the value must is a plist of length
1070 1, and the value of the plist must be a symbol representing a
1073 If the method is #Msuperset, the value must be a plist of length
1074 less than 9, and the values of the plist must be symbols
1075 representing subset charsets.
1077 Otherwise, this parameter is ignored.
1079 <li> Key is #Mdefine_coding, value is a symbol
1081 If the dimension of the charset is 1, the value specifies whether
1082 or not to define a coding system of the same name whose type is
1083 #Mcharset. A coding system is defined if the value is not #Mnil.
1085 Otherwise, this parameter is ignored.
1090 If the operation was successful, mchar_define_charset () returns a
1091 symbol whose name is $NAME. Otherwise it returns #Mnil and
1092 assigns an error code to the external variable #merror_code. */
1095 @brief ʸ»ú¥»¥Ã¥È¤òÄêµÁ¤¹¤ë.
1097 ´Ø¿ô mchar_define_charset () ¤Ï¿·¤·¤¤Ê¸»ú¥»¥Ã¥È¤òÄêµÁ¤·¡¢¤½¤ì¤ò
1098 $NAME ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¥·¥ó¥Ü¥ë·Ðͳ¤Ç¥¢¥¯¥»¥¹¤Ç¤¤ë¤è¤¦¤Ë¤¹¤ë¡£
1099 $PLIST ¤ÏÄêµÁ¤µ¤ì¤ëʸ»ú¥»¥Ã¥È¤Î¥Ñ¥é¥á¡¼¥¿¤ò°Ê²¼¤Î¤è¤¦¤Ë»ØÄꤹ¤ë¡£
1103 <li> ¥¡¼¤¬ #Mmethod ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
1105 Ãͤϡ¢#Moffset, #Mmap (¥Ç¥Õ¥©¥ë¥ÈÃÍ), #Munify, #Msubset,
1106 #Msuperset ¤Î¤¤¤º¤ì¤«¤Ç¤¢¤ê¡¢Ê¸»ú¥»¥Ã¥È¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤ò¥Ç¥³¡¼¥É¡¿
1107 ¥¨¥ó¥³¡¼¥É¤¹¤ëºÝ¤Î¥á¥½¥Ã¥É¤ò»ØÄꤹ¤ë¡£
1109 <li> ¥¡¼¤¬ #Mdimension ¤ÇÃͤ¬À°¿ôÃͤλþ
1111 Ãͤϡ¢1 (¥Ç¥Õ¥©¥ë¥ÈÃÍ), 2, 3, 4 ¤Î¤¤¤º¤ì¤«¤Ç¤¢¤ê¡¢Ê¸»ú¥»¥Ã¥È¤Î¥³¡¼
1112 ¥É¥Ý¥¤¥ó¥È¤Î¼¡¸µ¤Ç¤¢¤ë¡£
1114 <li> ¥¡¼¤¬ #Mmin_range ¤ÇÃͤ¬ÈóÉéÀ°¿ôÃͤλþ
1116 Ãͤϥ³¡¼¥É¥Ý¥¤¥ó¥È¤ÎºÇ¾®¤ÎÃͤǤ¢¤ë¡£¤¹¤Ê¤ï¤Á¡¢¤³¤ÎÃͤΠN ÈÖÌܤΥÐ
1117 ¥¤¥È¤Ï¤³¤Îʸ»ú¥»¥Ã¥È¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤Î N ÈÖÌܤΥХ¤¥È¤ÎºÇ¾®¤Î¤â¤Î
1118 ¤È¤Ê¤ë¡£¥Ç¥Õ¥©¥ë¥ÈÃÍ¤Ï 0 ¡£
1120 <li> ¥¡¼¤¬ #Mmax_range ¤ÇÃͤ¬ÈóÉéÀ°¿ôÃͤλþ
1122 Ãͤϥ³¡¼¥É¥Ý¥¤¥ó¥È¤ÎºÇÂç¤ÎÃͤǤ¢¤ë¡£¤¹¤Ê¤ï¤Á¡¢¤³¤ÎÃͤΠN ÈÖÌܤΥÐ
1123 ¥¤¥È¤Ï¤³¤Îʸ»ú¥»¥Ã¥È¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤Î N ÈÖÌܤΥХ¤¥È¤ÎºÇÂç¤Î¤â¤Î
1124 ¤È¤Ê¤ë¡£¥Ç¥Õ¥©¥ë¥ÈÃͤϡ¢¥³¡¼¥É¥Ý¥¤¥ó¥È¤Î¼¡¸µ¤¬ 1, 2, 3, 4 ¤Î»þ¡¢¤½
1125 ¤ì¤¾¤ì 0xFF, 0xFFFF, 0xFFFFFF, 0xFFFFFFFF ¡£
1127 <li> ¥¡¼¤¬ #Mmin_code ¤ÇÃͤ¬ÈóÉéÀ°¿ôÃͤλþ
1129 ÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤ÎºÇ¾®¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ç¤¢¤ë¡£¥Ç¥Õ¥©¥ë¥ÈÃͤÏ
1132 <li> ¥¡¼¤¬ #Mmax_code ¤ÇÃͤ¬ÈóÉéÀ°¿ôÃͤλþ
1134 ÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤ÎºÇÂç¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ç¤¢¤ë¡£¥Ç¥Õ¥©¥ë¥ÈÃͤÏ
1137 <li> ¥¡¼¤¬ #Mascii_compatible ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
1139 ÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤¬ ASCII ¸ß´¹¤Ç¤¢¤ë¤«¤É¤¦¤«¤ò¼¨¤¹¡£¥Ç¥Õ¥©¥ë¥ÈÃͤÎ
1140 #Mnil ¤Ç¤¢¤ì¤Ð¸ß´¹¤Ç¤Ï¤Ê¤¯¡¢¤½¤ì°Ê³°¤Î¾ì¹ç¤Ï¸ß´¹¤Ç¤¢¤ë¡£
1142 <li> ¥¡¼¤¬ #Mfinal_byte ¤ÇÃͤ¬À°¿ôÃͤλþ
1144 ÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤Î The International Registry ¤ËÅÐÏ¿¤µ¤ì¤Æ¤¤¤ë
1145 @e ½ªÃ¼¥Ð¥¤¥È ¤Ç¤¢¤ê¡¢0 (¥Ç¥Õ¥©¥ë¥ÈÃÍ) ¤Ç¤¢¤ë¤« 32..127 ¤Ç¤¢¤ë¡£0
1146 ¤ÏÅÐÏ¿¤µ¤ì¤Æ¤¤¤Ê¤¤¤³¤È¤ò°ÕÌ£¤¹¤ë¡£
1148 <li> ¥¡¼¤¬ #Mrevision ¤ÇÃͤ¬À°¿ôÃͤλþ
1150 ÃÍ¤Ï The International Registry ¤ËÅÐÏ¿¤µ¤ì¤Æ¤¤¤ë @e revision @e
1151 number ¤Ç¤¢¤ê¡¢0..127 ¤Ç¤¢¤ë¡£Ê¸»ú¥»¥Ã¥È¤¬ÅÐÏ¿¤µ¤ì¤Æ¤¤¤Ê¤¤¾ì¹ç¤Ë¤Ï
1152 ¤³¤ÎÃͤÏ̵»ë¤µ¤ì¤ë¡£ 0 ¤Ï revision number ¤¬Â¸ºß¤·¤Ê¤¤¤³¤È¤ò°ÕÌ£¤¹
1155 <li> ¥¡¼¤¬ #Mmin_char ¤ÇÃͤ¬À°¿ôÃͤλþ
1157 ÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤ÎºÇ¾®¤Îʸ»ú¥³¡¼¥É¤Ç¤¢¤ë¡£¥Ç¥Õ¥©¥ë¥ÈÃÍ¤Ï 0 ¡£
1159 <li> ¥¡¼¤¬ #Mmapfile ¤ÇÃͤ¬ M-text ¤Î»þ
1161 ¥á¥½¥Ã¥É¤¬ #Mmap ¤« #Munify ¤Î»þ¡¢´Ø¿ô mdatabase_define () ¤ò¤³¤Î
1162 Ãͤò°ú¿ô $EXTRA_INFO ¤È¤·¤Æ¸Æ¤Ö¤³¤È¤Ë¤è¤Ã¤Æ¡¢¥Þ¥Ã¥Ô¥ó¥°¤Ë´Ø¤¹¤ë¥Ç¡¼
1163 ¥¿¤¬ m17n ¥Ç¡¼¥¿¥Ù¡¼¥¹¤ËÄɲ䵤ì¤ë¡£¤¹¤Ê¤ï¤Á¡¢¤³¤ÎÃͤϥǡ¼¥¿¥Õ¥¡¥¤
1166 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢¤³¤Î¥Ñ¥é¥á¡¼¥¿¤Ï̵»ë¤µ¤ì¤ë¡£
1168 <li> ¥¡¼¤¬ #Mparents ¤ÇÃͤ¬ plist ¤Î»þ
1170 ¥á¥½¥Ã¥É¤¬ #Msubset ¤Ê¤é¤Ð¡¢ÃͤÏŤµ 1 ¤Î plist ¤Ç¤¢¤ê¡¢¤½¤ÎÃͤϤ³
1171 ¤Îʸ»ú¥»¥Ã¥È¤Î¾å°Ì½¸¹ç¤È¤Ê¤ëʸ»ú¥»¥Ã¥È¤ò¼¨¤¹¥·¥ó¥Ü¥ë¤Ç¤¢¤ë¡£
1173 ¥á¥½¥Ã¥É¤¬ #Msuperset ¤Ê¤é¤Ð¡¢ÃͤÏŤµ 8 °Ê²¼¤Î plist ¤Ç¤¢¤ê¡¢¤½¤ì
1174 ¤é¤ÎÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤Î²¼°Ì½¸¹ç¤Ç¤¢¤ëʸ»ú¥»¥Ã¥È¤ò¼¨¤¹¥·¥ó¥Ü¥ë¤Ç¤¢
1177 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢¤³¤Î¥Ñ¥é¥á¡¼¥¿¤Ï̵»ë¤µ¤ì¤ë¡£
1179 <li> ¥¡¼¤¬ #Mdefine_coding ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
1181 ʸ»ú¥»¥Ã¥È¤Î¼¡¸µ¤¬ 1 ¤Ê¤é¤Ð¡¢ÃÍ¤Ï #Mcharset ¥¿¥¤¥×¤ÇƱ¤¸Ì¾Á°¤Î¥³¡¼
1182 ¥É·Ï¤òÄêµÁ¤¹¤ë¤«¤É¤¦¤«¤ò»ØÄꤹ¤ë¡£Ãͤ¬ #Mnil °Ê³°¤Î¾ì¹ç¤ËÄêµÁ¤¹¤ë¡£
1184 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢¤³¤Î¥Ñ¥é¥á¡¼¥¿¤Ï̵»ë¤µ¤ì¤ë¡£
1189 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mchar_define_charset() ¤Ï $NAME ¤È¤¤¤¦Ì¾
1190 Á°¤Î¥· ¥ó¥Ü¥ë¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð #Mnil ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1191 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£*/
1195 @c MERROR_CHARSET */
1198 mchar_define_charset (char *name, MPlist *plist)
1200 MSymbol sym = msymbol (name);
1203 unsigned min_range, max_range;
1205 MText *mapfile = (MText *) mplist_get (plist, Mmapfile);
1207 MSTRUCT_CALLOC (charset, MERROR_CHARSET);
1208 charset->name = sym;
1209 charset->method = (MSymbol) mplist_get (plist, Mmethod);
1210 if (! charset->method)
1213 charset->method = Mmap;
1215 charset->method = Moffset;
1217 if (charset->method == Mmap || charset->method == Munify)
1220 MERROR (MERROR_CHARSET, Mnil);
1221 mdatabase_define (Mcharset, sym, Mnil, Mnil, NULL, mapfile->data);
1223 if (! (charset->dimension = (int) mplist_get (plist, Mdimension)))
1224 charset->dimension = 1;
1226 min_range = (unsigned) mplist_get (plist, Mmin_range);
1227 if ((pl = mplist_find_by_key (plist, Mmax_range)))
1229 max_range = (unsigned) MPLIST_VAL (pl);
1230 if (max_range >= 0x1000000)
1231 charset->dimension = 4;
1232 else if (max_range >= 0x10000 && charset->dimension < 3)
1233 charset->dimension = 3;
1234 else if (max_range >= 0x100 && charset->dimension < 2)
1235 charset->dimension = 2;
1237 else if (charset->dimension == 1)
1239 else if (charset->dimension == 2)
1241 else if (charset->dimension == 3)
1242 max_range = 0xFFFFFF;
1244 max_range = 0xFFFFFFFF;
1246 memset (charset->code_range, 0, sizeof charset->code_range);
1247 for (i = 0; i < charset->dimension; i++, min_range >>= 8, max_range >>= 8)
1249 charset->code_range[i * 4] = min_range & 0xFF;
1250 charset->code_range[i * 4 + 1] = max_range & 0xFF;
1252 if ((charset->min_code = (int) mplist_get (plist, Mmin_code)) < min_range)
1253 charset->min_code = min_range;
1254 if ((charset->max_code = (int) mplist_get (plist, Mmax_code)) > max_range)
1255 charset->max_code = max_range;
1256 charset->ascii_compatible
1257 = (MSymbol) mplist_get (plist, Mascii_compatible) != Mnil;
1258 charset->final_byte = (int) mplist_get (plist, Mfinal_byte);
1259 charset->revision = (int) mplist_get (plist, Mrevision);
1260 charset->min_char = (int) mplist_get (plist, Mmin_char);
1261 pl = (MPlist *) mplist_get (plist, Mparents);
1262 charset->nparents = pl ? mplist_length (pl) : 0;
1263 if (charset->nparents > 8)
1264 charset->nparents = 8;
1265 for (i = 0; i < charset->nparents; i++, pl = MPLIST_NEXT (pl))
1267 MSymbol parent_name;
1269 if (MPLIST_KEY (pl) != Msymbol)
1270 MERROR (MERROR_CHARSET, Mnil);
1271 parent_name = MPLIST_SYMBOL (pl);
1272 if (! (charset->parents[i] = MCHARSET (parent_name)))
1273 MERROR (MERROR_CHARSET, Mnil);
1276 charset->subset_offset = (int) mplist_get (plist, Msubset_offset);
1278 msymbol_put (sym, Mcharset, charset);
1279 charset = make_charset (charset);
1282 msymbol_put (msymbol__canonicalize (sym), Mcharset, charset);
1284 for (pl = (MPlist *) mplist_get (plist, Maliases);
1285 pl && MPLIST_KEY (pl) == Msymbol;
1286 pl = MPLIST_NEXT (pl))
1288 MSymbol alias = MPLIST_SYMBOL (pl);
1290 msymbol_put (alias, Mcharset, charset);
1291 msymbol_put (msymbol__canonicalize (alias), Mcharset, charset);
1294 if (mplist_get (plist, Mdefine_coding)
1295 && charset->dimension == 1
1296 && charset->code_range[0] == 0 && charset->code_range[1] == 255)
1297 mconv__register_charset_coding (sym);
1304 @brief Resolve charset name.
1306 The mchar_resolve_charset () function returns $SYMBOL if it
1307 represents a charset. Otherwise, canonicalize $SYMBOL as to a
1308 charset name, and if the canonicalized name represents a charset,
1309 return it. Otherwise, return #Mnil. */
1312 @brief ʸ»ú¥»¥Ã¥È̾¤ò²ò·è¤¹¤ë.
1314 ´Ø¿ô mchar_resolve_charset () ¤Ï $SYMBOL ¤¬Ê¸»ú¥»¥Ã¥È¤ò¼¨¤·¤Æ¤¤¤ì
1317 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢$SYMBOL ¤òʸ»ú¥»¥Ã¥È̾¤È¤·¤ÆÀµµ¬²½¤·¡¢¤½¤ì¤¬Ê¸»ú¥»¥Ã
1318 ¥È¤ò¼¨¤·¤Æ¤¤¤Æ¤¤¤ì¤ÐÀµµ¬²½¤·¤¿¤â¤Î¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢#Mnil ¤ò
1322 mchar_resolve_charset (MSymbol symbol)
1324 MCharset *charset = (MCharset *) msymbol_get (symbol, Mcharset);
1328 symbol = msymbol__canonicalize (symbol);
1329 charset = (MCharset *) msymbol_get (symbol, Mcharset);
1332 return (charset ? charset->name : Mnil);
1338 @brief List symbols representing charsets.
1340 The mchar_list_charsets () function makes an array of symbols
1341 representing a charset, stores the pointer to the array in a place
1342 pointed to by $SYMBOLS, and returns the length of the array. */
1345 @brief ʸ»ú¥»¥Ã¥È¤òɽ¤ï¤¹¥·¥ó¥Ü¥ë¤òÎóµó¤¹¤ë.
1347 ´Ø¿ô mchar_list_charsets () ¤Ï¡¢Ê¸»ú¥»¥Ã¥È¤ò¼¨¤¹¥·¥ó¥Ü¥ë¤òʤ٤¿ÇÛ
1348 Îó¤òºî¤ê¡¢$SYMBOLS ¤Ç¥Ý¥¤¥ó¥È¤µ¤ì¤¿¾ì½ê¤Ë¤³¤ÎÇÛÎó¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÃÖ
1349 ¤¡¢ÇÛÎó¤ÎŤµ¤òÊÖ¤¹¡£ */
1352 mchar_list_charset (MSymbol **symbols)
1356 MTABLE_MALLOC ((*symbols), charset_list.used, MERROR_CHARSET);
1357 for (i = 0; i < charset_list.used; i++)
1358 (*symbols)[i] = charset_list.charsets[i]->name;
1365 @brief Decode a code-point.
1367 The mchar_decode () function decodes code-point $CODE in the
1368 charset represented by the symbol $CHARSET_NAME to get a character
1372 If decoding was successful, mchar_decode () returns the decoded
1373 character code. Otherwise it returns -1. */
1376 @brief ¥³¡¼¥É¥Ý¥¤¥ó¥È¤ò¥Ç¥³¡¼¥É¤¹¤ë.
1378 ´Ø¿ô mchar_decode () ¤Ï¡¢¥·¥ó¥Ü¥ë $CHARSET_NAME ¤Ç¼¨¤µ¤ì¤ëʸ»ú¥»¥Ã
1379 ¥ÈÆâ¤Î $CODE ¤È¤¤¤¦¥³¡¼¥É¥Ý¥¤¥ó¥È¤ò¥Ç¥³¡¼¥É¤·¤Æʸ»ú¥³¡¼¥É¤òÆÀ¤ë¡£
1382 ¥Ç¥³¡¼¥É¤¬À®¸ù¤¹¤ì¤Ð¡¢mchar_decode () ¤Ï¥Ç¥³¡¼¥É¤µ¤ì¤¿Ê¸»ú¥³¡¼¥É¤ò
1383 ÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£ */
1390 mchar_decode (MSymbol charset_name, unsigned code)
1392 MCharset *charset = MCHARSET (charset_name);
1395 return MCHAR_INVALID_CODE;
1396 return DECODE_CHAR (charset, code);
1402 @brief Encode a character code.
1404 The mchar_encode () function encodes character code $C to get a
1405 code-point in the charset represented by the symbol $CHARSET_NAME.
1408 If encoding was successful, mchar_encode () returns the encoded
1409 code-point. Otherwise it returns #MCHAR_INVALID_CODE. */
1412 @brief ʸ»ú¥³¡¼¥É¤ò¥¨¥ó¥³¡¼¥É¤¹¤ë.
1414 ´Ø¿ô mchar_encode () ¤Ï¡¢Ê¸»ú¥³¡¼¥É $C ¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¥·¥ó¥Ü¥ë
1415 $CHARSET_NAME ¤Ç¼¨¤µ¤ì¤ëʸ»ú¥»¥Ã¥ÈÆâ¤Ë¤ª¤±¤ë¥³¡¼¥É¥Ý¥¤¥ó¥È¤òÆÀ¤ë¡£
1418 ¥¨¥ó¥³¡¼¥É¤¬À®¸ù¤¹¤ì¤Ð¡¢mchar_encode () ¤Ï¥¨¥ó¡¼¥É¤µ¤ì¤¿¥³¡¼¥É¥Ý¥¤
1419 ¥ó¥È¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð #MCHAR_INVALID_CODE ¤òÊÖ¤¹¡£ */
1426 mchar_encode (MSymbol charset_name, int c)
1428 MCharset *charset = MCHARSET (charset_name);
1431 return MCHAR_INVALID_CODE;
1432 return ENCODE_CHAR (charset, c);
1438 @brief Call a function for all the characters in a specified charset.
1440 The mcharset_map_chars () function calls $FUNC for all the
1441 characters in the charset named $CHARSET_NAME. A call is done for
1442 a chunk of consecutive characters rather than character by
1445 $FUNC receives three arguments: $FROM, $TO, and $ARG. $FROM and
1446 $TO specify the range of character codes in $CHARSET. $ARG is the
1450 If the operation was successful, mcharset_map_chars () returns 0.
1451 Otherwise, it returns -1 and assigns an error code to the external
1452 variable #merror_code. */
1455 @brief »ØÄꤷ¤¿Ê¸»ú¥»¥Ã¥È¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ËÂФ·¤Æ´Ø¿ô¤ò¸Æ¤Ö.
1457 ´Ø¿ô mcharset_map_chars () ¤Ï $CHARSET_NAME ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Äʸ»ú¥»¥Ã
1458 ¥ÈÃæ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ËÂФ·¤Æ $FUNC ¤ò¸Æ¤Ö¡£¸Æ¤Ó½Ð¤·¤Ï°ìʸ»úËè¤Ç¤Ï¤Ê
1459 ¤¯¡¢Ï¢Â³¤·¤¿Ê¸»ú¤Î¤Þ¤È¤Þ¤êñ°Ì¤Ç¹Ô¤Ê¤ï¤ì¤ë¡£
1461 ´Ø¿ô $FUNC ¤Ë¤Ï$FROM, $TO, $ARG ¤Î£³°ú¿ô¤¬ÅϤµ¤ì¤ë¡£$FROM ¤È $TO
1462 ¤Ï $CHARSET Ãæ¤Îʸ»ú¥³¡¼¥É¤ÎÈϰϤò»ØÄꤹ¤ë¡£$ARG ¤Ï $FUNC_ARG ¤ÈƱ
1466 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mcharset_map_chars () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð
1467 -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1471 @c MERROR_CHARSET */
1474 mchar_map_charset (MSymbol charset_name,
1475 void (*func) (int from, int to, void *arg),
1480 charset = MCHARSET (charset_name);
1482 MERROR (MERROR_CHARSET, -1);
1484 if (charset->encoder)
1486 int c = charset->min_char;
1489 if ((int) mchartable__lookup (charset->encoder, c, &next_c, 1) < 0)
1491 while (c <= charset->max_char)
1493 if ((int) mchartable__lookup (charset->encoder, c, &next_c, 1) >= 0)
1494 (*func) (c, next_c - 1, func_arg);
1499 (*func) (charset->min_char, charset->max_char, func_arg);