1 /* charset.c -- charset module.
2 Copyright (C) 2003, 2004
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
23 @addtogroup m17nCharset
24 @brief Charset objects and API for them.
26 The m17n library uses @e charset objects to represent a coded
27 character sets (CCS). The m17n library supports many predefined
28 coded character sets. Moreover, application programs can add
29 other charsets. A character can belong to multiple charsets.
31 The m17n library distinguishes the following three concepts:
33 @li A @e code-point is a number assigned by the CCS to each
34 character. Code-points may or may not be continuous. The type
35 @c unsigned is used to represent a code-point. An invalid
36 code-point is represented by the macro @c MCHAR_INVALID_CODE.
38 @li A @e character @e index is the canonical index of a character
39 in a CCS. The character that has the character index N occupies
40 the Nth position when all the characters in the current CCS are
41 sorted by their code-points. Character indices in a CCS are
42 continuous and start with 0.
44 @li A @e character @e code is the internal representation in the
45 m17n library of a character. A character code is a signed integer
48 Each charset object defines how characters are converted between
49 code-points and character codes. To @e encode means converting
50 code-points to character codes and to @e decode means converting
51 character codes to code-points. */
54 @addtogroup m17nCharset
55 @brief ʸ»ú¥»¥Ã¥È¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API.
57 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢Éä¹æ²½Ê¸»ú½¸¹ç (CCS) ¤ò @e ʸ»ú¥»¥Ã¥È ¤È¸Æ¤Ö¥ª
58 ¥Ö¥¸¥§¥¯¥È¤Çɽ¸½¤¹¤ë¡£m17n ¥é¥¤¥Ö¥é¥ê¤Ï¿¤¯¤ÎÉä¹æ²½Ê¸»ú½¸¹ç¤ò¤¢¤é¤«¤¸¤á
59 ¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ë¤·¡¢¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥à¤¬Æȼ«¤Ëʸ»ú¥»¥Ã¥È¤ò
60 Äɲ乤뤳¤È¤â²Äǽ¤Ç¤¢¤ë¡£°ì¤Ä¤Îʸ»ú¤ÏÊ£¿ô¤Îʸ»ú¥»¥Ã¥È¤Ë°¤·¤Æ¤â¤è
63 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢°Ê²¼¤Î³µÇ°¤ò¶èÊ̤·¤Æ¤¤¤ë:
65 @li @e ¥³¡¼¥É¥Ý¥¤¥ó¥È ¤È¤Ï¡¢CCS ¤¬¤½¤ÎÃæ¤Î¸Ä¡¹¤Îʸ»ú¤ËÂФ·¤ÆÄêµÁ¤¹
66 ¤ë¿ôÃͤǤ¢¤ë¡£¥³¡¼¥É¥Ý¥¤¥ó¥È¤ÏϢ³¤·¤Æ¤¤¤ë¤È¤Ï¸Â¤é¤Ê¤¤¡£¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ï
67 @c unsigned ·¿¤Ë¤è¤Ã¤Æɽ¤µ¤ì¤ë¡£Ìµ¸ú¤Ê¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ï¥Þ¥¯¥í
68 @c MCHAR_INVALID_CODE ¤Çɽ¤µ¤ì¤ë¡£
70 @li @e ʸ»ú¥¤¥ó¥Ç¥Ã¥¯¥¹ ¤È¤Ï¡¢CCS Æâ¤Ç³Æʸ»ú¤Ë³ä¤êÅö¤Æ¤é¤ì¤ëÀµµ¬²½
71 ¤µ¤ì¤¿¥¤¥ó¥Ç¥Ã¥¯¥¹¤Ç¤¢¤ë¡£Ê¸»ú¥¤¥ó¥Ç¥Ã¥¯¥¹¤¬ N ¤Îʸ»ú¤Ï¡¢CCS Ãæ¤Î
72 Á´Ê¸»ú¤ò¥³¡¼¥É¥Ý¥¤¥ó¥È½ç¤Ëʤ٤¿¤È¤¤Ë N ÈÖÌܤ˸½¤ï¤ì¤ë¡£CCS Ãæ¤Î
73 ʸ»ú¥¤¥ó¥Ç¥Ã¥¯¥¹¤ÏϢ³¤·¤Æ¤ª¤ê¡¢0 ¤«¤é»Ï¤Þ¤ë¡£
75 @li @e ʸ»ú¥³¡¼¥É ¤È¤Ï¡¢m17n ¥é¥¤¥Ö¥é¥êÆâ¤Ë¤ª¤±¤ëʸ»ú¤ÎÆâÉôɽ¸½¤Ç¤¢
76 ¤ê¡¢21 ¥Ó¥Ã¥È°Ê¾å¤ÎŤµ¤ò»ý¤ÄÉä¹çÉÕ¤À°¿ô¤Ç¤¢¤ë¡£
78 ³Æʸ»ú¥»¥Ã¥È¥ª¥Ö¥¸¥§¥¯¥È¤Ï¡¢¤½¤Îʸ»ú¥»¥Ã¥È¤Ë°¤¹¤ëʸ»ú¤Î¥³¡¼¥É¥Ý¥¤
79 ¥ó¥È¤Èʸ»ú¥³¡¼¥É¤È¤Î´Ö¤ÎÊÑ´¹¤òµ¬Äꤹ¤ë¡£¥³¡¼¥É¥Ý¥¤¥ó¥È¤«¤éʸ»ú¥³¡¼
80 ¥É¤Ø¤ÎÊÑ´¹¤ò @e ¥Ç¥³¡¼¥É ¤È¸Æ¤Ó¡¢Ê¸»ú¥³¡¼¥É¤«¤é¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ø¤Î
81 ÊÑ´¹¤ò @e ¥¨¥ó¥³¡¼¥É ¤È¸Æ¤Ö¡£ */
84 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
85 /*** @addtogroup m17nInternal
95 #include "m17n-misc.h"
103 static int unified_max;
105 /** List of all charsets ever defined. */
113 static struct MCharsetList charset_list;
115 static MPlist *charset_definition_list;
117 /** Make a charset object from the template of MCharset structure
118 CHARSET, and return a pointer to the new charset object.
119 CHARSET->code_range[4N + 2] and CHARSET->code_range[4N + 3] are
123 make_charset (MCharset *charset)
125 unsigned min_code, max_code;
127 int *range = charset->code_range;
129 if (charset->dimension < 1 || charset->dimension > 4)
130 MERROR (MERROR_CHARSET, NULL);
131 if ((charset->final_byte > 0 && charset->final_byte < '0')
132 || charset->final_byte > 127)
133 MERROR (MERROR_CHARSET, NULL);
135 for (i = 0, n = 1; i < 4; i++)
137 if (range[i * 4] > range[i * 4 + 1])
138 MERROR (MERROR_CHARSET, NULL);
139 range[i * 4 + 2] = range[i * 4 + 1] - range[i * 4] + 1;
140 n *= range[i * 4 + 2];
141 range[i * 4 + 3] = n;
144 min_code = range[0] | (range[4] << 8) | (range[8] << 16) | (range[12] << 24);
145 if (charset->min_code == 0)
146 charset->min_code = min_code;
147 else if (charset->min_code < min_code)
148 MERROR (MERROR_CHARSET, NULL);
149 max_code = range[1] | (range[5] << 8) | (range[9] << 16) | (range[13] << 24);
150 if (charset->max_code == 0)
151 charset->max_code = max_code;
152 else if (charset->max_code > max_code)
153 MERROR (MERROR_CHARSET, NULL);
155 charset->code_range_min_code = min_code;
156 charset->fully_loaded = 0;
159 if (charset->method == Msubset)
163 if (charset->nparents != 1)
164 MERROR (MERROR_CHARSET, NULL);
165 parent = charset->parents[0];
166 if (parent->method == Msuperset
167 || charset->min_code - charset->subset_offset < parent->min_code
168 || charset->max_code - charset->subset_offset > parent->max_code)
169 MERROR (MERROR_CHARSET, NULL);
171 else if (charset->method == Msuperset)
173 if (charset->nparents < 2)
174 MERROR (MERROR_CHARSET, NULL);
175 for (i = 0; i < charset->nparents; i++)
176 if (charset->min_code > charset->parents[i]->min_code
177 || charset->max_code < charset->parents[i]->max_code)
178 MERROR (MERROR_CHARSET, NULL);
183 = (charset->dimension == 1
185 && (charset->dimension == 2
187 && (charset->dimension == 3
188 || range[10] == 256)))));
190 if (! charset->no_code_gap)
194 memset (charset->code_range_mask, 0,
195 sizeof charset->code_range_mask);
196 for (i = 0; i < 4; i++)
197 for (j = range[i * 4]; j <= range[i * 4 + 1]; j++)
198 charset->code_range_mask[j] |= (1 << i);
201 if (charset->method == Moffset)
203 charset->max_char = charset->min_char + range[15] - 1;
204 if (charset->min_char < 0
205 || charset->max_char < 0 || charset->max_char > unified_max)
206 MERROR (MERROR_CHARSET, NULL);
207 charset->simple = charset->no_code_gap;
208 charset->fully_loaded = 1;
210 else if (charset->method == Munify)
212 /* The magic number 12 below is to align to the SUB_BITS_2
213 (defined in chartab.c) boundary in a char-table. */
214 unified_max -= ((range[15] >> 12) + 1) << 12;
215 charset->unified_max = unified_max;
217 else if (charset->method != Mmap)
218 MERROR (MERROR_CHARSET, NULL);
221 MLIST_APPEND1 (&charset_list, charsets, charset, MERROR_CHARSET);
223 if (charset->final_byte > 0)
225 MLIST_APPEND1 (&mcharset__iso_2022_table, charsets, charset,
227 if (charset->revision <= 0)
229 int chars = range[2];
231 if (chars == 128) /* ASCII case */
233 else if (chars == 256) /* ISO-8859-X case */
235 MCHARSET_ISO_2022 (charset->dimension, chars, charset->final_byte)
244 load_charset_fully (MCharset *charset)
246 if (charset->method == Msubset)
248 MCharset *parent = charset->parents[0];
250 if (! parent->fully_loaded
251 && load_charset_fully (parent) < 0)
252 MERROR (MERROR_CHARSET, -1);
253 if (parent->method == Moffset)
257 code = charset->min_code - charset->subset_offset;
258 charset->min_char = DECODE_CHAR (parent, code);
259 code = charset->max_code - charset->subset_offset;
260 charset->max_char = DECODE_CHAR (parent, code);
264 unsigned min_code = charset->min_code - charset->subset_offset;
265 unsigned max_code = charset->max_code - charset->subset_offset;
266 int min_char = DECODE_CHAR (parent, min_code);
267 int max_char = min_char;
269 for (++min_code; min_code <= max_code; min_code++)
271 int c = DECODE_CHAR (parent, min_code);
277 else if (c > max_char)
281 charset->min_char = min_char;
282 charset->max_char = max_char;
285 else if (charset->method == Msuperset)
287 int min_char = 0, max_char = 0;
290 for (i = 0; i < charset->nparents; i++)
292 MCharset *parent = charset->parents[i];
294 if (! parent->fully_loaded
295 && load_charset_fully (parent) < 0)
296 MERROR (MERROR_CHARSET, -1);
298 min_char = parent->min_char, max_char = parent->max_char;
299 else if (parent->min_char < min_char)
300 min_char = parent->min_char;
301 else if (parent->max_char > max_char)
302 max_char = parent->max_char;
304 charset->min_char = min_char;
305 charset->max_char = max_char;
307 else /* charset->method is Mmap or Munify */
309 MDatabase *mdb = mdatabase_find (Mcharset, charset->name, Mnil, Mnil);
312 if (! mdb || ! (plist = mdatabase_load (mdb)))
313 MERROR (MERROR_CHARSET, -1);
314 charset->decoder = mplist_value (plist);
315 charset->encoder = mplist_value (mplist_next (plist));
316 M17N_OBJECT_UNREF (plist);
317 mchartable_range (charset->encoder,
318 &charset->min_char, &charset->max_char);
319 if (charset->method == Mmap)
320 charset->simple = charset->no_code_gap;
322 charset->max_char = charset->unified_max + 1 + charset->code_range[15];
325 charset->fully_loaded = 1;
332 MPlist *mcharset__cache;
334 /* Predefined charsets. */
335 MCharset *mcharset__ascii;
336 MCharset *mcharset__binary;
337 MCharset *mcharset__m17n;
338 MCharset *mcharset__unicode;
340 MCharsetISO2022Table mcharset__iso_2022_table;
342 /** Initialize charset handler. */
349 unified_max = MCHAR_MAX;
351 mcharset__cache = mplist ();
352 mplist_set (mcharset__cache, Mt, NULL);
354 MLIST_INIT1 (&charset_list, charsets, 128);
355 MLIST_INIT1 (&mcharset__iso_2022_table, charsets, 128);
356 charset_definition_list = mplist ();
358 memset (mcharset__iso_2022_table.classified, 0,
359 sizeof (mcharset__iso_2022_table.classified));
361 Mcharset = msymbol ("charset");
363 Mmethod = msymbol ("method");
364 Moffset = msymbol ("offset");
365 Mmap = msymbol ("map");
366 Munify = msymbol ("unify");
367 Msubset = msymbol ("subset");
368 Msuperset = msymbol ("superset");
370 Mdimension = msymbol ("dimension");
371 Mmin_range = msymbol ("min-range");
372 Mmax_range = msymbol ("max-range");
373 Mmin_code = msymbol ("min-code");
374 Mmax_code = msymbol ("max-code");
375 Mascii_compatible = msymbol ("ascii-compatible");
376 Mfinal_byte = msymbol ("final-byte");
377 Mrevision = msymbol ("revision");
378 Mmin_char = msymbol ("min-char");
379 Mmapfile = msymbol_as_managing_key ("mapfile");
380 Mparents = msymbol_as_managing_key ("parents");
381 Msubset_offset = msymbol ("subset-offset");
382 Mdefine_coding = msymbol ("define-coding");
383 Maliases = msymbol_as_managing_key ("aliases");
387 /* Setup predefined charsets. */
388 pl = mplist_add (pl, Mmethod, Moffset);
389 pl = mplist_add (pl, Mmin_range, (void *) 0);
390 pl = mplist_add (pl, Mmax_range, (void *) 0x7F);
391 pl = mplist_add (pl, Mascii_compatible, Mt);
392 pl = mplist_add (pl, Mfinal_byte, (void *) 'B');
393 pl = mplist_add (pl, Mmin_char, (void *) 0);
394 Mcharset_ascii = mchar_define_charset ("ascii", param);
396 mplist_put (param, Mmax_range, (void *) 0xFF);
397 mplist_put (param, Mfinal_byte, NULL);
398 Mcharset_iso_8859_1 = mchar_define_charset ("iso-8859-1", param);
400 mplist_put (param, Mmax_range, (void *) 0x10FFFF);
401 Mcharset_unicode = mchar_define_charset ("unicode", param);
403 mplist_put (param, Mmax_range, (void *) MCHAR_MAX);
404 Mcharset_m17n = mchar_define_charset ("m17n", param);
406 mplist_put (param, Mmax_range, (void *) 0xFF);
407 Mcharset_binary = mchar_define_charset ("binary", param);
409 M17N_OBJECT_UNREF (param);
411 mcharset__ascii = MCHARSET (Mcharset_ascii);
412 mcharset__binary = MCHARSET (Mcharset_binary);
413 mcharset__m17n = MCHARSET (Mcharset_m17n);
414 mcharset__unicode = MCHARSET (Mcharset_unicode);
420 mcharset__fini (void)
425 for (i = 0; i < charset_list.used; i++)
427 MCharset *charset = charset_list.charsets[i];
429 if (charset->decoder)
430 free (charset->decoder);
431 if (charset->encoder)
432 M17N_OBJECT_UNREF (charset->encoder);
435 M17N_OBJECT_UNREF (mcharset__cache);
436 MLIST_FREE1 (&charset_list, charsets);
437 MLIST_FREE1 (&mcharset__iso_2022_table, charsets);
438 MPLIST_DO (plist, charset_definition_list)
439 M17N_OBJECT_UNREF (MPLIST_VAL (plist));
440 M17N_OBJECT_UNREF (charset_definition_list);
445 mcharset__find (MSymbol name)
449 charset = msymbol_get (name, Mcharset);
452 MPlist *param = mplist_get (charset_definition_list, name);
454 MPLIST_KEY (mcharset__cache) = Mt;
457 param = mplist__from_plist (param);
458 mchar_define_charset (MSYMBOL_NAME (name), param);
459 charset = msymbol_get (name, Mcharset);
460 M17N_OBJECT_UNREF (param);
462 MPLIST_KEY (mcharset__cache) = name;
463 MPLIST_VAL (mcharset__cache) = charset;
468 /** Return the character corresponding to code-point CODE in CHARSET.
469 If CODE is invalid for CHARSET, return -1. */
472 mcharset__decode_char (MCharset *charset, unsigned code)
476 if (code < 128 && charset->ascii_compatible)
478 if (code < charset->min_code || code > charset->max_code)
481 if (! charset->fully_loaded
482 && load_charset_fully (charset) < 0)
483 MERROR (MERROR_CHARSET, -1);
485 if (charset->method == Msubset)
487 MCharset *parent = charset->parents[0];
489 code -= charset->subset_offset;
490 return DECODE_CHAR (parent, code);
493 if (charset->method == Msuperset)
497 for (i = 0; i < charset->nparents; i++)
499 MCharset *parent = charset->parents[i];
500 int c = DECODE_CHAR (parent, code);
508 idx = CODE_POINT_TO_INDEX (charset, code);
512 if (charset->method == Mmap)
513 return charset->decoder[idx];
515 if (charset->method == Munify)
517 int c = charset->decoder[idx];
520 c = charset->unified_max + 1 + idx;
524 /* Now charset->method should be Moffset. */
525 return (charset->min_char + idx);
529 /** Return the code point of character C in CHARSET. If CHARSET does not
530 contain C, return MCHAR_INVALID_CODE. */
533 mcharset__encode_char (MCharset *charset, int c)
535 if (! charset->fully_loaded
536 && load_charset_fully (charset) < 0)
537 MERROR (MERROR_CHARSET, MCHAR_INVALID_CODE);
539 if (charset->method == Msubset)
541 MCharset *parent = charset->parents[0];
542 unsigned code = ENCODE_CHAR (parent, c);
544 if (code == MCHAR_INVALID_CODE)
546 code += charset->subset_offset;
547 if (code >= charset->min_code && code <= charset->max_code)
549 return MCHAR_INVALID_CODE;
552 if (charset->method == Msuperset)
556 for (i = 0; i < charset->nparents; i++)
558 MCharset *parent = charset->parents[i];
559 unsigned code = ENCODE_CHAR (parent, c);
561 if (code != MCHAR_INVALID_CODE)
564 return MCHAR_INVALID_CODE;
567 if (c < charset->min_char || c > charset->max_char)
568 return MCHAR_INVALID_CODE;
570 if (charset->method == Mmap)
571 return (unsigned) mchartable_lookup (charset->encoder, c);
573 if (charset->method == Munify)
575 if (c > charset->unified_max)
577 c -= charset->unified_max - 1;
578 return INDEX_TO_CODE_POINT (charset, c);
580 return (unsigned) mchartable_lookup (charset->encoder, c);
583 /* Now charset->method should be Moffset */
584 c -= charset->min_char;
585 return INDEX_TO_CODE_POINT (charset, c);
589 mcharset__load_from_database ()
591 MDatabase *mdb = mdatabase_find (msymbol ("charset-list"), Mnil, Mnil, Mnil);
592 MPlist *def_list, *plist;
593 MPlist *definitions = charset_definition_list;
594 int mdebug_mask = MDEBUG_CHARSET;
599 def_list = (MPlist *) mdatabase_load (mdb);
600 MDEBUG_PRINT_TIME ("CHARSET", (stderr, " to load data."));
606 MPLIST_DO (plist, def_list)
611 if (! MPLIST_PLIST_P (plist))
612 MERROR (MERROR_CHARSET, -1);
613 pl = MPLIST_PLIST (plist);
614 if (! MPLIST_SYMBOL_P (pl))
615 MERROR (MERROR_CHARSET, -1);
616 name = MPLIST_SYMBOL (pl);
617 pl = MPLIST_NEXT (pl);
618 definitions = mplist_add (definitions, name, pl);
619 M17N_OBJECT_REF (pl);
620 p = mplist__from_plist (pl);
621 mchar_define_charset (MSYMBOL_NAME (name), p);
622 M17N_OBJECT_UNREF (p);
625 M17N_OBJECT_UNREF (def_list);
626 MDEBUG_PRINT_TIME ("CHARSET", (stderr, " to parse the loaded data."));
632 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
637 /*** @addtogroup m17nCharset */
643 @brief Invalid code-point.
645 The macro #MCHAR_INVALID_CODE gives the invalid code-point. */
648 @brief ̵¸ú¤Ê¥³¡¼¥É¥Ý¥¤¥ó¥È.
650 ¥Þ¥¯¥í #MCHAR_INVALID_CODE ¤Ï̵¸ú¤Ê¥³¡¼¥É¥Ý¥¤¥ó¥È¤òÍ¿¤¨¤ë¡£ */
652 #define MCHAR_INVALID_CODE
656 @brief The symbol @c Mcharset.
658 Any decoded M-text has a text property whose key is the predefined
659 symbol @c Mcharset. The name of @c Mcharset is
660 <tt>"charset"</tt>. */
663 @brief ¥·¥ó¥Ü¥ë @c Mcharset.
665 ¥Ç¥³¡¼¥É¤µ¤ì¤¿ M-text ¤Ï¡¢¥¡¼¤¬ @c Mcharset ¤Ç¤¢¤ë¤è¤¦¤Ê¥Æ¥¥¹¥È
666 ¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Ä¡£¥·¥ó¥Ü¥ë @c Mcharset ¤Ï <tt>"charset"</tt> ¤È¤¤
673 @name Variables: Symbols representing a charset.
675 Each of the following symbols represents a predefined charset. */
678 @name ÊÑ¿ô: ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ëÄêµÁºÑ¤ß¥·¥ó¥Ü¥ë
680 °Ê²¼¤Î³Æ¥·¥ó¥Ü¥ë¤Ï¡¢ÄêµÁºÑ¤ßʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¡£ */
685 @brief Symbol representing the charset ASCII.
687 The symbol #Mcharset_ascii has name <tt>"ascii"</tt> and represents
688 the charset ISO 646, USA Version X3.4-1968 (ISO-IR-6). */
690 @brief ASCII ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¥·¥ó¥Ü¥ë.
692 ¥·¥ó¥Ü¥ë #Mcharset_ascii ¤Ï <tt>"ascii"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
693 ISO 646, USA Version X3.4-1968 (ISO-IR-6) ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¡£
696 MSymbol Mcharset_ascii;
700 @brief Symbol representing the charset ISO/IEC 8859/1.
702 The symbol #Mcharset_iso_8859_1 has name <tt>"iso-8859-1"</tt>
703 and represents the charset ISO/IEC 8859-1:1998. */
705 @brief ISO/IEC 8859-1:1998 ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¥·¥ó¥Ü¥ë.
707 ¥·¥ó¥Ü¥ë #Mcharset_iso_8859_1 ¤Ï <tt>"iso-8859-1"</tt> ¤È¤¤¤¦Ì¾
708 Á°¤ò»ý¤Á¡¢ISO/IEC 8859-1:1998 ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¡£
711 MSymbol Mcharset_iso_8859_1;
714 @brief Symbol representing the charset Unicode.
716 The symbol #Mcharset_unicode has name <tt>"unicode"</tt> and
717 represents the charset Unicode. */
719 @brief Unicode ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¥·¥ó¥Ü¥ë.
721 ¥·¥ó¥Ü¥ë #Mcharset_unicode ¤Ï <tt>"unicode"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý
722 ¤Á¡¢Unicode ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¡£ */
724 MSymbol Mcharset_unicode;
728 @brief Symbol representing the largest charset.
730 The symbol #Mcharset_m17n has name <tt>"m17n"</tt> and
731 represents the charset that contains all characters supported by
734 @brief Á´Ê¸»ú¤ò´Þ¤àʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¥·¥ó¥Ü¥ë.
736 ¥·¥ó¥Ü¥ë #Mcharset_m17n ¤Ï <tt>"m17n"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
737 m17n ¥é¥¤¥Ö¥é¥ê¤¬°·¤¦Á´¤Æ¤Îʸ»ú¤ò´Þ¤àʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¡£ */
739 MSymbol Mcharset_m17n;
743 @brief Symbol representing the charset for ill-decoded characters.
745 The symbol #Mcharset_binary has name <tt>"binary"</tt> and
746 represents the fake charset which the decoding functions put to an
747 M-text as a text property when they encounter an invalid byte
748 (sequence). See @ref m17nConv @latexonly
749 (P.\pageref{group__m17nConv}) @endlatexonly for more details. */
752 @brief Àµ¤·¤¯¥Ç¥³¡¼¥É¤Ç¤¤Ê¤¤Ê¸»ú¤Îʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¥·¥ó¥Ü¥ë.
754 ¥·¥ó¥Ü¥ë #Mcharset_binary ¤Ï <tt>"binary"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
755 µ¶¤Î (fake) ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¡£¥Ç¥³¡¼¥É´Ø¿ô¤Ï¡¢M-text ¤Î¥Æ¥¥¹
756 ¥È¥×¥í¥Ñ¥Æ¥£¤È¤·¤Æ¡¢Ìµ¸ú¤Ê¥Ð¥¤¥È¡Ê¥·¡¼¥¯¥¨¥ó¥¹¡Ë¤ËÁø¶ø¤·¤¿°ÌÃÖ¤òÉղ乤롣
758 ¾ÜºÙ¤Ï @ref m17nConv @latexonly
759 (P.\pageref{group__m17nConv}) @endlatexonly »²¾È¤Î¤³¤È¡£ */
761 MSymbol Mcharset_binary;
768 @name Variables: Parameter keys for mchar_define_charset ().
770 These are the predefined symbols to use as parameter keys for the
771 function mchar_define_charset () (which see). */
774 @name ÊÑ¿ô: mchar_define_charset ÍѤΥѥé¥á¡¼¥¿¡¦¥¡¼
776 ¤³¤ì¤é¤Ï¡¢´Ø¿ô mchar_define_charset () ÍѤΥѥé¥á¡¼¥¿¡¦¥¡¼¤È¤·¤Æ
777 »È¤ï¤ì¤ë¥·¥ó¥Ü¥ë¤Ç¤¢¤ë¡£ ¾Ü¤·¤¯¤Ï¤³¤Î´Ø¿ô¤Î²òÀâ¤ò»²¾È¤Î¤³¤È¡£*/
782 Parameter key for mchar_define_charset () (which see). */
785 ´Ø¿ô mchar_define_charset () ÍѤΥѥé¥á¡¼¥¿¡¦¥¡¼. */
793 MSymbol Mascii_compatible;
799 MSymbol Msubset_offset;
800 MSymbol Mdefine_coding;
807 @name Variables: Symbols representing charset methods.
809 These are the predefined symbols that can be a value of the
810 #Mmethod parameter of a charset used in an argument to the
811 mchar_define_charset () function.
813 A method specifies how code-points and character codes are
814 converted. See the documentation of the mchar_define_charset ()
815 function for the details. */
818 @name ÊÑ¿ô: ʸ»ú¥»¥Ã¥È¤Î¥á¥½¥Ã¥É»ØÄê¤Ë»È¤ï¤ì¤ë¥·¥ó¥Ü¥ë
820 ¤³¤ì¤é¤Ï¡¢Ê¸»ú¥»¥Ã¥È¤Î @e ¥á¥½¥Ã¥É ¤ò»ØÄꤹ¤ë¤¿¤á¤ÎÄêµÁºÑ¤ß¥·¥ó¥Ü
821 ¥ë¤Ç¤¢¤ê¡¢Ê¸»ú¥»¥Ã¥È¤Î #Mmethod ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤȤʤ뤳¤È¤¬¤Ç¤¤ë¡£
822 ¤³¤ÎÃͤϴؿô mchar_define_charset () ¤Î°ú¿ô¤È¤·¤Æ»È¤ï¤ì¤ë¡£
824 ¥á¥½¥Ã¥É¤È¤Ï¡¢¥³¡¼¥É¥Ý¥¤¥ó¥È¤Èʸ»ú¥³¡¼¥É¤òÁê¸ßÊÑ´¹¤¹¤ëºÝ¤ÎÊý¼°¤Î¤³
825 ¤È¤Ç¤¢¤ë¡£¾Ü¤·¤¯¤Ï´Ø¿ô mchar_define_charset () ¤Î²òÀâ¤ò»²¾È¤Î¤³¤È¡£ */
829 @brief Symbol for the offset type method of charset.
831 The symbol #Moffset has the name <tt>"offset"</tt> and, when used
832 as a value of #Mmethod parameter of a charset, it means that the
833 conversion of code-points and character codes of the charset is
834 done by this calculation:
837 CHARACTER-CODE = CODE-POINT - MIN-CODE + MIN-CHAR
840 where, MIN-CODE is a value of #Mmin_code parameter of the charset,
841 and MIN-CHAR is a value of #Mmin_char parameter. */
844 @brief ¥ª¥Õ¥»¥Ã¥È·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë.
846 ¥·¥ó¥Ü¥ë #Moffset ¤Ï <tt>"offset"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú¥»¥Ã
847 ¥È¤Î #Mmethod ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤¿¾ì¹ç¤Ë¤Ï¡¢¥³¡¼¥É¥Ý¥¤¥ó
848 ¥È¤Èʸ»ú¥»¥Ã¥È¤Îʸ»ú¥³¡¼¥É¤Î´Ö¤ÎÊÑ´¹¤¬°Ê²¼¤Î¼°¤Ë½¾¤Ã¤Æ¹Ô¤ï¤ì¤ë¤³¤È
852 ʸ»ú¥³¡¼¥É = ¥³¡¼¥É¥Ý¥¤¥ó¥È - MIN-CODE + MIN-CHAR
855 ¤³¤³¤Ç¡¢MIN-CODE ¤Ïʸ»ú¥»¥Ã¥È¤Î #Mmin_code ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤǤ¢¤ê¡¢MIN-CHAR ¤Ï
856 #Mmin_char ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤǤ¢¤ë¡£ */
861 /***en @brief Symbol for the map type method of charset.
863 The symbol #Mmap has the name <tt>"map"</tt> and, when used as a
864 value of #Mmethod parameter of a charset, it means that the
865 conversion of code-points and character codes of the charset is
866 done by map looking up. The map must be given by #Mmapfile
869 /***ja @brief ¥Þ¥Ã¥×·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë.
871 ¥·¥ó¥Ü¥ë #Mmap ¤Ï <tt>"map"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú¥»¥Ã¥È¤Î
872 #Mmethod ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤¿¾ì¹ç¤Ë¤Ï¡¢¥³¡¼¥É¥Ý¥¤¥ó¥È¤È
873 ʸ»ú¥»¥Ã¥È¤Îʸ»ú¥³¡¼¥É¤Î´Ö¤ÎÊÑ´¹¤¬¥Þ¥Ã¥×¤ò»²¾È¤¹¤ë¤³¤È¤Ë¤è¤Ã¤Æ¹Ô¤ï
874 ¤ì¤ë¤³¤È¤ò°ÕÌ£¤¹¤ë¡£¥Þ¥Ã¥×¤Ï #Mmapfile ¥Ñ¥é¥á¡¼¥¿¤È¤·¤ÆÍ¿¤¨¤Ê¤±¤ì
880 /***en @brief Symbol for the unify type method of charset.
882 The symbol #Munify has the name <tt>"unify"</tt> and, when used as
883 a value of #Mmethod parameter of a charset, it means that the
884 conversion of code-points and character codes of the charset is
885 done by map looking up and offsetting. The map must be given by
886 #Mmapfile parameter. For this kind of charset, a unique
887 continuous character code space for all characters is assigned.
889 If the map has an entry for a code-point, the conversion is done
890 by looking up the map. Otherwise, the conversion is done by this
894 CHARACTER-CODE = CODE-POINT - MIN-CODE + LOWEST-CHAR-CODE
897 where, MIN-CODE is a value of #Mmin_code parameter of the charset,
898 and LOWEST-CHAR-CODE is the lowest character code of the assigned
901 /***ja @brief ¥æ¥Ë¥Õ¥¡¥¤·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë.
903 ¥·¥ó¥Ü¥ë #Minherit ¤Ï <tt>"unify"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú¥»¥Ã
904 ¥È¤Î #Mmethod ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤¿¾ì¹ç¤Ë¤Ï¡¢¥³¡¼¥É¥Ý¥¤¥ó
905 ¥È¤Èʸ»ú¥»¥Ã¥È¤Îʸ»ú¥³¡¼¥É¤Î´Ö¤ÎÊÑ´¹¤¬¡¢¥Þ¥Ã¥×¤Î»²¾È¤È¥ª¥Õ¥»¥Ã¥È¤Î
906 ÁȤ߹ç¤ï¤»¤Ë¤è¤Ã¤Æ¹Ô¤ï¤ì¤ë¤³¤È¤ò°ÕÌ£¤¹¤ë¡£¥Þ¥Ã¥×¤Ï #Mmapfile ¥Ñ¥é
907 ¥á¡¼¥¿¤È¤·¤ÆÍ¿¤¨¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£¤³¤Î¼ï¤Î³Æʸ»ú¥»¥Ã¥È¤Ë¤Ï¡¢Á´Ê¸»ú
908 ¤ËÂФ·¤ÆϢ³¤¹¤ë¥³¡¼¥É¥¹¥Ú¡¼¥¹¤¬¤½¤ì¤¾¤ì³ä¤êÅö¤Æ¤é¤ì¤ë¡£
910 ¥³¡¼¥É¥Ý¥¤¥ó¥È¤¬¥Þ¥Ã¥×¤Ë´Þ¤Þ¤ì¤Æ¤¤¤ì¤Ð¡¢ÊÑ´¹¤Ï¥Þ¥Ã¥×»²¾È¤Ë¤è¤Ã¤Æ¹Ô
911 ¤ï¤ì¤ë¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢°Ê²¼¤Î¼°¤Ë½¾¤¦¡£
914 CHARACTER-CODE = CODE-POINT - MIN-CODE + LOWEST-CHAR-CODE
917 ¤³¤³¤Ç¡¢MIN-CODE ¤Ïʸ»ú¥»¥Ã¥È¤Î #Mmin_code ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤǤ¢¤ê¡¢
918 LOWEST-CHAR-CODE ¤Ï³ä¤êÅö¤Æ¤é¤ì¤¿¥³¡¼¥É¥¹¥Ú¡¼¥¹¤ÎºÇ¤â¾®¤µ¤¤Ê¸»ú¥³¡¼
926 @brief Symbol for the subset type method of charset.
928 The symbol #Msubset has the name <tt>"subset"</tt> and, when used
929 as a value of #Mmethod parameter of a charset, it means that the
930 charset is a subset of a parent charset. The parent charset must
931 be given by #Mparents parameter. The conversion of code-points
932 and character codes of the charset is done conceptually by this
936 CHARACTER-CODE = PARENT-CODE (CODE-POINT) + SUBSET-OFFSET
939 where, PARENT-CODE is a pseudo function that returns a character
940 code of CODE-POINT in the parent charset, and SUBSET-OFFSET is a
941 value given by #Msubset_offset parameter. */
943 /***ja @brief ¥µ¥Ö¥»¥Ã¥È·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë.
945 ¥·¥ó¥Ü¥ë #Msubset ¤Ï <tt>"subset"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú¥»¥Ã
946 ¥È¤Î #Mmethod ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤¿¾ì¹ç¤Ë¤Ï¡¢¤³¤Îʸ»ú¥»¥Ã
947 ¥È¤¬Ê̤Îʸ»ú¥»¥Ã¥È¡Ê¿Æʸ»ú¥»¥Ã¥È¡Ë¤ÎÉôʬ½¸¹ç¤Ç¤¢¤ë¤³¤È¤ò°ÕÌ£¤¹¤ë¡£
948 ¿Æʸ»ú¥»¥Ã¥È¤Ï #Mparents ¥Ñ¥é¥á¡¼¥¿¤Ë¤è¤Ã¤ÆÍ¿¤¨¤é¤ì¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
949 ¥³¡¼¥É¥Ý¥¤¥ó¥È¤Èʸ»ú¥»¥Ã¥È¤Îʸ»ú¥³¡¼¥É¤Î´Ö¤ÎÊÑ´¹¤Ï¡¢³µÇ°Åª¤Ë¤Ï
953 CHARACTER-CODE = PARENT-CODE (CODE-POINT) + SUBSET-OFFSET
956 ¤³¤³¤Ç PARENT-CODE ¤Ï CODE-POINT ¤Î¿Æʸ»ú¥»¥Ã¥ÈÃæ¤Ç¤Îʸ»ú¥³¡¼¥É¤ò
957 ÊÖ¤¹µ¼´Ø¿ô¤Ç¤¢¤ê¡¢SUBSET-OFFSET ¤Ï #Msubset_offset ¥Ñ¥é¥á¡¼¥¿¤ÇÍ¿
965 @brief Symbol for the superset type method of charset.
967 The symbol #Msuperset has the name <tt>"superset"</tt> and, when
968 used as a value of #Mmethod parameter of a charset, it means that
969 the charset is a superset of parent charsets. The parent charsets
970 must be given by #Mparents parameter. */
973 @brief ¥¹¡¼¥Ñ¡¼¥»¥Ã¥È·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë.
975 ¥·¥ó¥Ü¥ë #Msuperset ¤Ï <tt>"superset"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú
976 ¥»¥Ã¥È¤Î #Mmethod ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤¿¾ì¹ç¤Ë¤Ï¡¢¤³¤Îʸ»ú
977 ¥»¥Ã¥È¤¬Ê̤Îʸ»ú¥»¥Ã¥È¡Ê¿Æʸ»ú¥»¥Ã¥È¡Ë¤Î¾å°Ì½¸¹ç¤Ç¤¢¤ë¤³¤È¤ò°ÕÌ£¤¹
978 ¤ë¡£¿Æʸ»ú¥»¥Ã¥È¤Ï #Mparents ¥Ñ¥é¥á¡¼¥¿¤Ë¤è¤Ã¤ÆÍ¿¤¨¤é¤ì¤Ê¤¯¤Æ¤Ï¤Ê
987 @brief Define a charset.
989 The mchar_define_charset () function defines a new charset and
990 makes it accessible via a symbol whose name is $NAME. $PLIST
991 specifies parameters of the charset as below:
995 <li> Key is #Mmethod, value is a symbol.
997 The value specifies the method for decoding/encoding code-points
998 in the charset. It must be #Moffset, #Mmap (default), #Munify,
999 #Msubset, or #Msuperset.
1001 <li> Key is #Mdimension, value is an integer
1003 The value specifies the dimension of code-points of the charset.
1004 It must be 1 (default), 2, 3, or 4.
1006 <li> Key is #Mmin_range, value is an unsigned integer
1008 The value specifies the minimum range of a code-point, which means
1009 that the Nth byte of the value is the minimum Nth byte of
1010 code-points of the charset. The default value is 0.
1012 <li> Key is #Mmax_range, value is an unsigned integer
1014 The value specifies the maximum range of a code-point, which means
1015 that the Nth byte of the value is the maximum Nth byte of
1016 code-points of the charset. The default value is 0xFF, 0xFFFF,
1017 0xFFFFFF, or 0xFFFFFFFF if the dimension is 1, 2, 3, or 4
1020 <li> Key is #Mmin_code, value is an unsigned integer
1022 The value specifies the minimum code-point of
1023 the charset. The default value is the minimum range.
1025 <li> Key is #Mmax_code, value is an unsigned integer
1027 The value specifies the maximum code-point of
1028 the charset. The default value is the maximum range.
1030 <li> Key is #Mascii_compatible, value is a symbol
1032 The value specifies whether the charset is ASCII compatible or
1033 not. If the value is #Mnil (default), it is not ASCII
1034 compatible, else compatible.
1036 <li> Key is #Mfinal_byte, value is an integer
1038 The value specifies the @e final @e byte of the charset registered
1039 in The International Registry. It must be 0 (default) or 32..127.
1040 The value 0 means that the charset is not in the registry.
1042 <li> Key is #Mrevision, value is an integer
1044 The value specifies the @e revision @e number of the charset
1045 registered in The International Registry. It must be 0..127. If
1046 the charset is not in The International Registry, the value is
1047 ignored. The value 0 means that the charset has no revision
1050 <li> Key is #Mmin_char, value is an integer
1052 The value specifies the minimum character code of the charset.
1053 The default value is 0.
1055 <li> Key is #Mmapfile, value is an M-text
1057 If the method is #Mmap or #Munify, a data that contains
1058 mapping information is added to the m17n database by calling
1059 mdatabase_define () with the value as an argument $EXTRA_INFO,
1060 i.e. the value is used as a file name of the data.
1062 Otherwise, this parameter is ignored.
1064 <li> Key is #Mparents, value is a plist
1066 If the method is #Msubset, the value must is a plist of length
1067 1, and the value of the plist must be a symbol representing a
1070 If the method is #Msuperset, the value must be a plist of length
1071 less than 9, and the values of the plist must be symbols
1072 representing subset charsets.
1074 Otherwise, this parameter is ignored.
1076 <li> Key is #Mdefine_coding, value is a symbol
1078 If the dimension of the charset is 1, the value specifies whether
1079 or not to define a coding system of the same name whose type is
1080 #Mcharset. A coding system is defined if the value is not #Mnil.
1082 Otherwise, this parameter is ignored.
1087 If the operation was successful, mchar_define_charset () returns a
1088 symbol whose name is $NAME. Otherwise it returns #Mnil and
1089 assigns an error code to the external variable #merror_code. */
1092 @brief ʸ»ú¥»¥Ã¥È¤òÄêµÁ¤¹¤ë.
1094 ´Ø¿ô mchar_define_charset () ¤Ï¿·¤·¤¤Ê¸»ú¥»¥Ã¥È¤òÄêµÁ¤·¡¢¤½¤ì¤ò
1095 $NAME ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¥·¥ó¥Ü¥ë·Ðͳ¤Ç¥¢¥¯¥»¥¹¤Ç¤¤ë¤è¤¦¤Ë¤¹¤ë¡£
1096 $PLIST ¤ÏÄêµÁ¤µ¤ì¤ëʸ»ú¥»¥Ã¥È¤Î¥Ñ¥é¥á¡¼¥¿¤ò°Ê²¼¤Î¤è¤¦¤Ë»ØÄꤹ¤ë¡£
1100 <li> ¥¡¼¤¬ #Mmethod ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
1102 Ãͤϡ¢#Moffset, #Mmap (¥Ç¥Õ¥©¥ë¥ÈÃÍ), #Munify, #Msubset,
1103 #Msuperset ¤Î¤¤¤º¤ì¤«¤Ç¤¢¤ê¡¢Ê¸»ú¥»¥Ã¥È¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤ò¥Ç¥³¡¼¥É¡¿
1104 ¥¨¥ó¥³¡¼¥É¤¹¤ëºÝ¤Î¥á¥½¥Ã¥É¤ò»ØÄꤹ¤ë¡£
1106 <li> ¥¡¼¤¬ #Mdimension ¤ÇÃͤ¬À°¿ôÃͤλþ
1108 Ãͤϡ¢1 (¥Ç¥Õ¥©¥ë¥ÈÃÍ), 2, 3, 4 ¤Î¤¤¤º¤ì¤«¤Ç¤¢¤ê¡¢Ê¸»ú¥»¥Ã¥È¤Î¥³¡¼
1109 ¥É¥Ý¥¤¥ó¥È¤Î¼¡¸µ¤Ç¤¢¤ë¡£
1111 <li> ¥¡¼¤¬ #Mmin_range ¤ÇÃͤ¬ÈóÉéÀ°¿ôÃͤλþ
1113 Ãͤϥ³¡¼¥É¥Ý¥¤¥ó¥È¤ÎºÇ¾®¤ÎÃͤǤ¢¤ë¡£¤¹¤Ê¤ï¤Á¡¢¤³¤ÎÃͤΠN ÈÖÌܤΥÐ
1114 ¥¤¥È¤Ï¤³¤Îʸ»ú¥»¥Ã¥È¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤Î N ÈÖÌܤΥХ¤¥È¤ÎºÇ¾®¤Î¤â¤Î
1115 ¤È¤Ê¤ë¡£¥Ç¥Õ¥©¥ë¥ÈÃÍ¤Ï 0 ¡£
1117 <li> ¥¡¼¤¬ #Mmax_range ¤ÇÃͤ¬ÈóÉéÀ°¿ôÃͤλþ
1119 Ãͤϥ³¡¼¥É¥Ý¥¤¥ó¥È¤ÎºÇÂç¤ÎÃͤǤ¢¤ë¡£¤¹¤Ê¤ï¤Á¡¢¤³¤ÎÃͤΠN ÈÖÌܤΥÐ
1120 ¥¤¥È¤Ï¤³¤Îʸ»ú¥»¥Ã¥È¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤Î N ÈÖÌܤΥХ¤¥È¤ÎºÇÂç¤Î¤â¤Î
1121 ¤È¤Ê¤ë¡£¥Ç¥Õ¥©¥ë¥ÈÃͤϡ¢¥³¡¼¥É¥Ý¥¤¥ó¥È¤Î¼¡¸µ¤¬ 1, 2, 3, 4 ¤Î»þ¡¢¤½
1122 ¤ì¤¾¤ì 0xFF, 0xFFFF, 0xFFFFFF, 0xFFFFFFFF ¡£
1124 <li> ¥¡¼¤¬ #Mmin_code ¤ÇÃͤ¬ÈóÉéÀ°¿ôÃͤλþ
1126 ÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤ÎºÇ¾®¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ç¤¢¤ë¡£¥Ç¥Õ¥©¥ë¥ÈÃͤÏ
1129 <li> ¥¡¼¤¬ #Mmax_code ¤ÇÃͤ¬ÈóÉéÀ°¿ôÃͤλþ
1131 ÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤ÎºÇÂç¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ç¤¢¤ë¡£¥Ç¥Õ¥©¥ë¥ÈÃͤÏ
1134 <li> ¥¡¼¤¬ #Mascii_compatible ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
1136 ÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤¬ ASCII ¸ß´¹¤Ç¤¢¤ë¤«¤É¤¦¤«¤ò¼¨¤¹¡£¥Ç¥Õ¥©¥ë¥ÈÃͤÎ
1137 #Mnil ¤Ç¤¢¤ì¤Ð¸ß´¹¤Ç¤Ï¤Ê¤¯¡¢¤½¤ì°Ê³°¤Î¾ì¹ç¤Ï¸ß´¹¤Ç¤¢¤ë¡£
1139 <li> ¥¡¼¤¬ #Mfinal_byte ¤ÇÃͤ¬À°¿ôÃͤλþ
1141 ÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤Î The International Registry ¤ËÅÐÏ¿¤µ¤ì¤Æ¤¤¤ë
1142 @e ½ªÃ¼¥Ð¥¤¥È ¤Ç¤¢¤ê¡¢0 (¥Ç¥Õ¥©¥ë¥ÈÃÍ) ¤Ç¤¢¤ë¤« 32..127 ¤Ç¤¢¤ë¡£0
1143 ¤ÏÅÐÏ¿¤µ¤ì¤Æ¤¤¤Ê¤¤¤³¤È¤ò°ÕÌ£¤¹¤ë¡£
1145 <li> ¥¡¼¤¬ #Mrevision ¤ÇÃͤ¬À°¿ôÃͤλþ
1147 ÃÍ¤Ï The International Registry ¤ËÅÐÏ¿¤µ¤ì¤Æ¤¤¤ë @e revision @e
1148 number ¤Ç¤¢¤ê¡¢0..127 ¤Ç¤¢¤ë¡£Ê¸»ú¥»¥Ã¥È¤¬ÅÐÏ¿¤µ¤ì¤Æ¤¤¤Ê¤¤¾ì¹ç¤Ë¤Ï
1149 ¤³¤ÎÃͤÏ̵»ë¤µ¤ì¤ë¡£ 0 ¤Ï revision number ¤¬Â¸ºß¤·¤Ê¤¤¤³¤È¤ò°ÕÌ£¤¹
1152 <li> ¥¡¼¤¬ #Mmin_char ¤ÇÃͤ¬À°¿ôÃͤλþ
1154 ÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤ÎºÇ¾®¤Îʸ»ú¥³¡¼¥É¤Ç¤¢¤ë¡£¥Ç¥Õ¥©¥ë¥ÈÃÍ¤Ï 0 ¡£
1156 <li> ¥¡¼¤¬ #Mmapfile ¤ÇÃͤ¬ M-text ¤Î»þ
1158 ¥á¥½¥Ã¥É¤¬ #Mmap ¤« #Munify ¤Î»þ¡¢´Ø¿ô mdatabase_define () ¤ò¤³¤Î
1159 Ãͤò°ú¿ô $EXTRA_INFO ¤È¤·¤Æ¸Æ¤Ö¤³¤È¤Ë¤è¤Ã¤Æ¡¢¥Þ¥Ã¥Ô¥ó¥°¤Ë´Ø¤¹¤ë¥Ç¡¼
1160 ¥¿¤¬ m17n ¥Ç¡¼¥¿¥Ù¡¼¥¹¤ËÄɲ䵤ì¤ë¡£¤¹¤Ê¤ï¤Á¡¢¤³¤ÎÃͤϥǡ¼¥¿¥Õ¥¡¥¤
1163 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢¤³¤Î¥Ñ¥é¥á¡¼¥¿¤Ï̵»ë¤µ¤ì¤ë¡£
1165 <li> ¥¡¼¤¬ #Mparents ¤ÇÃͤ¬ plist ¤Î»þ
1167 ¥á¥½¥Ã¥É¤¬ #Msubset ¤Ê¤é¤Ð¡¢ÃͤÏŤµ 1 ¤Î plist ¤Ç¤¢¤ê¡¢¤½¤ÎÃͤϤ³
1168 ¤Îʸ»ú¥»¥Ã¥È¤Î¾å°Ì½¸¹ç¤È¤Ê¤ëʸ»ú¥»¥Ã¥È¤ò¼¨¤¹¥·¥ó¥Ü¥ë¤Ç¤¢¤ë¡£
1170 ¥á¥½¥Ã¥É¤¬ #Msuperset ¤Ê¤é¤Ð¡¢ÃͤÏŤµ 8 °Ê²¼¤Î plist ¤Ç¤¢¤ê¡¢¤½¤ì
1171 ¤é¤ÎÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤Î²¼°Ì½¸¹ç¤Ç¤¢¤ëʸ»ú¥»¥Ã¥È¤ò¼¨¤¹¥·¥ó¥Ü¥ë¤Ç¤¢
1174 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢¤³¤Î¥Ñ¥é¥á¡¼¥¿¤Ï̵»ë¤µ¤ì¤ë¡£
1176 <li> ¥¡¼¤¬ #Mdefine_coding ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
1178 ʸ»ú¥»¥Ã¥È¤Î¼¡¸µ¤¬ 1 ¤Ê¤é¤Ð¡¢ÃÍ¤Ï #Mcharset ¥¿¥¤¥×¤ÇƱ¤¸Ì¾Á°¤Î¥³¡¼
1179 ¥É·Ï¤òÄêµÁ¤¹¤ë¤«¤É¤¦¤«¤ò»ØÄꤹ¤ë¡£Ãͤ¬ #Mnil °Ê³°¤Î¾ì¹ç¤ËÄêµÁ¤¹¤ë¡£
1181 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢¤³¤Î¥Ñ¥é¥á¡¼¥¿¤Ï̵»ë¤µ¤ì¤ë¡£
1186 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mchar_define_charset() ¤Ï $NAME ¤È¤¤¤¦Ì¾
1187 Á°¤Î¥· ¥ó¥Ü¥ë¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð #Mnil ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1188 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£*/
1192 @c MERROR_CHARSET */
1195 mchar_define_charset (const char *name, MPlist *plist)
1197 MSymbol sym = msymbol (name);
1200 unsigned min_range, max_range;
1202 MText *mapfile = (MText *) mplist_get (plist, Mmapfile);
1204 MSTRUCT_CALLOC (charset, MERROR_CHARSET);
1205 charset->name = sym;
1206 charset->method = (MSymbol) mplist_get (plist, Mmethod);
1207 if (! charset->method)
1210 charset->method = Mmap;
1212 charset->method = Moffset;
1214 if (charset->method == Mmap || charset->method == Munify)
1217 MERROR (MERROR_CHARSET, Mnil);
1218 mdatabase_define (Mcharset, sym, Mnil, Mnil, NULL, mapfile->data);
1220 if (! (charset->dimension = (int) mplist_get (plist, Mdimension)))
1221 charset->dimension = 1;
1223 min_range = (unsigned) mplist_get (plist, Mmin_range);
1224 if ((pl = mplist_find_by_key (plist, Mmax_range)))
1226 max_range = (unsigned) MPLIST_VAL (pl);
1227 if (max_range >= 0x1000000)
1228 charset->dimension = 4;
1229 else if (max_range >= 0x10000 && charset->dimension < 3)
1230 charset->dimension = 3;
1231 else if (max_range >= 0x100 && charset->dimension < 2)
1232 charset->dimension = 2;
1234 else if (charset->dimension == 1)
1236 else if (charset->dimension == 2)
1238 else if (charset->dimension == 3)
1239 max_range = 0xFFFFFF;
1241 max_range = 0xFFFFFFFF;
1243 memset (charset->code_range, 0, sizeof charset->code_range);
1244 for (i = 0; i < charset->dimension; i++, min_range >>= 8, max_range >>= 8)
1246 charset->code_range[i * 4] = min_range & 0xFF;
1247 charset->code_range[i * 4 + 1] = max_range & 0xFF;
1249 if ((charset->min_code = (int) mplist_get (plist, Mmin_code)) < min_range)
1250 charset->min_code = min_range;
1251 if ((charset->max_code = (int) mplist_get (plist, Mmax_code)) > max_range)
1252 charset->max_code = max_range;
1253 charset->ascii_compatible
1254 = (MSymbol) mplist_get (plist, Mascii_compatible) != Mnil;
1255 charset->final_byte = (int) mplist_get (plist, Mfinal_byte);
1256 charset->revision = (int) mplist_get (plist, Mrevision);
1257 charset->min_char = (int) mplist_get (plist, Mmin_char);
1258 pl = (MPlist *) mplist_get (plist, Mparents);
1259 charset->nparents = pl ? mplist_length (pl) : 0;
1260 if (charset->nparents > 8)
1261 charset->nparents = 8;
1262 for (i = 0; i < charset->nparents; i++, pl = MPLIST_NEXT (pl))
1264 MSymbol parent_name;
1266 if (MPLIST_KEY (pl) != Msymbol)
1267 MERROR (MERROR_CHARSET, Mnil);
1268 parent_name = MPLIST_SYMBOL (pl);
1269 if (! (charset->parents[i] = MCHARSET (parent_name)))
1270 MERROR (MERROR_CHARSET, Mnil);
1273 charset->subset_offset = (int) mplist_get (plist, Msubset_offset);
1275 msymbol_put (sym, Mcharset, charset);
1276 charset = make_charset (charset);
1279 msymbol_put (msymbol__canonicalize (sym), Mcharset, charset);
1281 for (pl = (MPlist *) mplist_get (plist, Maliases);
1282 pl && MPLIST_KEY (pl) == Msymbol;
1283 pl = MPLIST_NEXT (pl))
1285 MSymbol alias = MPLIST_SYMBOL (pl);
1287 msymbol_put (alias, Mcharset, charset);
1288 msymbol_put (msymbol__canonicalize (alias), Mcharset, charset);
1291 if (mplist_get (plist, Mdefine_coding)
1292 && charset->dimension == 1
1293 && charset->code_range[0] == 0 && charset->code_range[1] == 255)
1294 mconv__register_charset_coding (sym);
1301 @brief Resolve charset name.
1303 The mchar_resolve_charset () function returns $SYMBOL if it
1304 represents a charset. Otherwise, canonicalize $SYMBOL as to a
1305 charset name, and if the canonicalized name represents a charset,
1306 return it. Otherwise, return #Mnil. */
1309 @brief ʸ»ú¥»¥Ã¥È̾¤ò²ò·è¤¹¤ë.
1311 ´Ø¿ô mchar_resolve_charset () ¤Ï $SYMBOL ¤¬Ê¸»ú¥»¥Ã¥È¤ò¼¨¤·¤Æ¤¤¤ì
1314 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢$SYMBOL ¤òʸ»ú¥»¥Ã¥È̾¤È¤·¤ÆÀµµ¬²½¤·¡¢¤½¤ì¤¬Ê¸»ú¥»¥Ã
1315 ¥È¤ò¼¨¤·¤Æ¤¤¤Æ¤¤¤ì¤ÐÀµµ¬²½¤·¤¿¤â¤Î¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢#Mnil ¤ò
1319 mchar_resolve_charset (MSymbol symbol)
1321 MCharset *charset = (MCharset *) msymbol_get (symbol, Mcharset);
1325 symbol = msymbol__canonicalize (symbol);
1326 charset = (MCharset *) msymbol_get (symbol, Mcharset);
1329 return (charset ? charset->name : Mnil);
1335 @brief List symbols representing charsets.
1337 The mchar_list_charsets () function makes an array of symbols
1338 representing a charset, stores the pointer to the array in a place
1339 pointed to by $SYMBOLS, and returns the length of the array. */
1342 @brief ʸ»ú¥»¥Ã¥È¤òɽ¤ï¤¹¥·¥ó¥Ü¥ë¤òÎóµó¤¹¤ë.
1344 ´Ø¿ô mchar_list_charsets () ¤Ï¡¢Ê¸»ú¥»¥Ã¥È¤ò¼¨¤¹¥·¥ó¥Ü¥ë¤òʤ٤¿ÇÛ
1345 Îó¤òºî¤ê¡¢$SYMBOLS ¤Ç¥Ý¥¤¥ó¥È¤µ¤ì¤¿¾ì½ê¤Ë¤³¤ÎÇÛÎó¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÃÖ
1346 ¤¡¢ÇÛÎó¤ÎŤµ¤òÊÖ¤¹¡£ */
1349 mchar_list_charset (MSymbol **symbols)
1353 MTABLE_MALLOC ((*symbols), charset_list.used, MERROR_CHARSET);
1354 for (i = 0; i < charset_list.used; i++)
1355 (*symbols)[i] = charset_list.charsets[i]->name;
1362 @brief Decode a code-point.
1364 The mchar_decode () function decodes code-point $CODE in the
1365 charset represented by the symbol $CHARSET_NAME to get a character
1369 If decoding was successful, mchar_decode () returns the decoded
1370 character code. Otherwise it returns -1. */
1373 @brief ¥³¡¼¥É¥Ý¥¤¥ó¥È¤ò¥Ç¥³¡¼¥É¤¹¤ë.
1375 ´Ø¿ô mchar_decode () ¤Ï¡¢¥·¥ó¥Ü¥ë $CHARSET_NAME ¤Ç¼¨¤µ¤ì¤ëʸ»ú¥»¥Ã
1376 ¥ÈÆâ¤Î $CODE ¤È¤¤¤¦¥³¡¼¥É¥Ý¥¤¥ó¥È¤ò¥Ç¥³¡¼¥É¤·¤Æʸ»ú¥³¡¼¥É¤òÆÀ¤ë¡£
1379 ¥Ç¥³¡¼¥É¤¬À®¸ù¤¹¤ì¤Ð¡¢mchar_decode () ¤Ï¥Ç¥³¡¼¥É¤µ¤ì¤¿Ê¸»ú¥³¡¼¥É¤ò
1380 ÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£ */
1387 mchar_decode (MSymbol charset_name, unsigned code)
1389 MCharset *charset = MCHARSET (charset_name);
1392 return MCHAR_INVALID_CODE;
1393 return DECODE_CHAR (charset, code);
1399 @brief Encode a character code.
1401 The mchar_encode () function encodes character code $C to get a
1402 code-point in the charset represented by the symbol $CHARSET_NAME.
1405 If encoding was successful, mchar_encode () returns the encoded
1406 code-point. Otherwise it returns #MCHAR_INVALID_CODE. */
1409 @brief ʸ»ú¥³¡¼¥É¤ò¥¨¥ó¥³¡¼¥É¤¹¤ë.
1411 ´Ø¿ô mchar_encode () ¤Ï¡¢Ê¸»ú¥³¡¼¥É $C ¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¥·¥ó¥Ü¥ë
1412 $CHARSET_NAME ¤Ç¼¨¤µ¤ì¤ëʸ»ú¥»¥Ã¥ÈÆâ¤Ë¤ª¤±¤ë¥³¡¼¥É¥Ý¥¤¥ó¥È¤òÆÀ¤ë¡£
1415 ¥¨¥ó¥³¡¼¥É¤¬À®¸ù¤¹¤ì¤Ð¡¢mchar_encode () ¤Ï¥¨¥ó¡¼¥É¤µ¤ì¤¿¥³¡¼¥É¥Ý¥¤
1416 ¥ó¥È¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð #MCHAR_INVALID_CODE ¤òÊÖ¤¹¡£ */
1423 mchar_encode (MSymbol charset_name, int c)
1425 MCharset *charset = MCHARSET (charset_name);
1428 return MCHAR_INVALID_CODE;
1429 return ENCODE_CHAR (charset, c);
1435 @brief Call a function for all the characters in a specified charset.
1437 The mcharset_map_chars () function calls $FUNC for all the
1438 characters in the charset named $CHARSET_NAME. A call is done for
1439 a chunk of consecutive characters rather than character by
1442 $FUNC receives three arguments: $FROM, $TO, and $ARG. $FROM and
1443 $TO specify the range of character codes in $CHARSET. $ARG is the
1447 If the operation was successful, mcharset_map_chars () returns 0.
1448 Otherwise, it returns -1 and assigns an error code to the external
1449 variable #merror_code. */
1452 @brief »ØÄꤷ¤¿Ê¸»ú¥»¥Ã¥È¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ËÂФ·¤Æ´Ø¿ô¤ò¸Æ¤Ö.
1454 ´Ø¿ô mcharset_map_chars () ¤Ï $CHARSET_NAME ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Äʸ»ú¥»¥Ã
1455 ¥ÈÃæ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ËÂФ·¤Æ $FUNC ¤ò¸Æ¤Ö¡£¸Æ¤Ó½Ð¤·¤Ï°ìʸ»úËè¤Ç¤Ï¤Ê
1456 ¤¯¡¢Ï¢Â³¤·¤¿Ê¸»ú¤Î¤Þ¤È¤Þ¤êñ°Ì¤Ç¹Ô¤Ê¤ï¤ì¤ë¡£
1458 ´Ø¿ô $FUNC ¤Ë¤Ï$FROM, $TO, $ARG ¤Î£³°ú¿ô¤¬ÅϤµ¤ì¤ë¡£$FROM ¤È $TO
1459 ¤Ï $CHARSET Ãæ¤Îʸ»ú¥³¡¼¥É¤ÎÈϰϤò»ØÄꤹ¤ë¡£$ARG ¤Ï $FUNC_ARG ¤ÈƱ
1463 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mcharset_map_chars () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð
1464 -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1468 @c MERROR_CHARSET */
1471 mchar_map_charset (MSymbol charset_name,
1472 void (*func) (int from, int to, void *arg),
1477 charset = MCHARSET (charset_name);
1479 MERROR (MERROR_CHARSET, -1);
1481 if (charset->encoder)
1483 int c = charset->min_char;
1486 if ((int) mchartable__lookup (charset->encoder, c, &next_c, 1) < 0)
1488 while (c <= charset->max_char)
1490 if ((int) mchartable__lookup (charset->encoder, c, &next_c, 1) >= 0)
1491 (*func) (c, next_c - 1, func_arg);
1496 (*func) (charset->min_char, charset->max_char, func_arg);