1 /* charset.c -- charset module.
2 Copyright (C) 2003, 2004
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
23 @addtogroup m17nCharset
24 @brief Charset objects and API for them.
26 The m17n library uses @e charset objects to represent a coded
27 character sets (CCS). The m17n library supports many predefined
28 coded character sets. Moreover, application programs can add
29 other charsets. A character can belong to multiple charsets.
31 The m17n library distinguishes the following three concepts:
33 @li A @e code-point is a number assigned by the CCS to each
34 character. Code-points may or may not be continuous. The type
35 @c unsigned is used to represent a code-point. An invalid
36 code-point is represented by the macro @c MCHAR_INVALID_CODE.
38 @li A @e character @e index is the canonical index of a character
39 in a CCS. The character that has the character index N occupies
40 the Nth position when all the characters in the current CCS are
41 sorted by their code-points. Character indices in a CCS are
42 continuous and start with 0.
44 @li A @e character @e code is the internal representation in the
45 m17n library of a character. A character code is a signed integer
48 Each charset object defines how characters are converted between
49 code-points and character codes. To @e encode means converting
50 code-points to character codes and to @e decode means converting
51 character codes to code-points. */
54 @addtogroup m17nCharset
55 @brief ʸ»ú¥»¥Ã¥È¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API.
57 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢Éä¹æ²½Ê¸»ú½¸¹ç (CCS) ¤ò @e ʸ»ú¥»¥Ã¥È ¤È¸Æ¤Ö¥ª
58 ¥Ö¥¸¥§¥¯¥È¤Çɽ¸½¤¹¤ë¡£m17n ¥é¥¤¥Ö¥é¥ê¤Ï¿¤¯¤ÎÉä¹æ²½Ê¸»ú½¸¹ç¤ò¤¢¤é¤«¤¸¤á
59 ¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ë¤·¡¢¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥à¤¬Æȼ«¤Ëʸ»ú¥»¥Ã¥È¤ò
60 Äɲ乤뤳¤È¤â²Äǽ¤Ç¤¢¤ë¡£°ì¤Ä¤Îʸ»ú¤ÏÊ£¿ô¤Îʸ»ú¥»¥Ã¥È¤Ë°¤·¤Æ¤â¤è
63 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢°Ê²¼¤Î³µÇ°¤ò¶èÊ̤·¤Æ¤¤¤ë:
65 @li @e ¥³¡¼¥É¥Ý¥¤¥ó¥È ¤È¤Ï¡¢CCS ¤¬¤½¤ÎÃæ¤Î¸Ä¡¹¤Îʸ»ú¤ËÂФ·¤ÆÄêµÁ¤¹
66 ¤ë¿ôÃͤǤ¢¤ë¡£¥³¡¼¥É¥Ý¥¤¥ó¥È¤ÏϢ³¤·¤Æ¤¤¤ë¤È¤Ï¸Â¤é¤Ê¤¤¡£¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ï
67 @c unsigned ·¿¤Ë¤è¤Ã¤Æɽ¤µ¤ì¤ë¡£Ìµ¸ú¤Ê¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ï¥Þ¥¯¥í
68 @c MCHAR_INVALID_CODE ¤Çɽ¤µ¤ì¤ë¡£
70 @li @e ʸ»ú¥¤¥ó¥Ç¥Ã¥¯¥¹ ¤È¤Ï¡¢CCS Æâ¤Ç³Æʸ»ú¤Ë³ä¤êÅö¤Æ¤é¤ì¤ëÀµµ¬²½
71 ¤µ¤ì¤¿¥¤¥ó¥Ç¥Ã¥¯¥¹¤Ç¤¢¤ë¡£Ê¸»ú¥¤¥ó¥Ç¥Ã¥¯¥¹¤¬N¤Îʸ»ú¤Ï¡¢CCS Ãæ¤ÎÁ´
72 ʸ»ú¤ò¥³¡¼¥É¥Ý¥¤¥ó¥È½ç¤Ëʤ٤¿¤È¤¤ËNÈÖÌܤËÍè¤ë¡£CCSÃæ¤Îʸ»ú¥¤¥ó
73 ¥Ç¥Ã¥¯¥¹¤ÏϢ³¤·¤Æ¤ª¤ê¡¢0¤«¤é»Ï¤Þ¤ë¡£
75 @li @e ʸ»ú¥³¡¼¥É ¤È¤Ï¡¢m17n ¥é¥¤¥Ö¥é¥êÆâ¤Ë¤ª¤±¤ëʸ»ú¤ÎÆâÉôɽ¸½¤Ç¤¢
76 ¤ê¡¢21 ¥Ó¥Ã¥È°Ê¾å¤ÎŤµ¤ò»ý¤ÄÉä¹çÉÕ¤À°¿ô¤Ç¤¢¤ë¡£
78 ³Æʸ»ú¥»¥Ã¥È¥ª¥Ö¥¸¥§¥¯¥È¤Ï¡¢¤½¤Îʸ»ú¥»¥Ã¥È¤Ë°¤¹¤ëʸ»ú¤Î¥³¡¼¥É¥Ý¥¤
79 ¥ó¥È¤Èʸ»ú¥³¡¼¥É¤È¤Î´Ö¤ÎÊÑ´¹¤òµ¬Äꤹ¤ë¡£¥³¡¼¥É¥Ý¥¤¥ó¥È¤«¤éʸ»ú¥³¡¼
80 ¥É¤Ø¤ÎÊÑ´¹¤ò @e ¥Ç¥³¡¼¥É ¤È¸Æ¤Ó¡¢Ê¸»ú¥³¡¼¥É¤«¤é¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ø¤Î
81 ÊÑ´¹¤ò @e ¥¨¥ó¥³¡¼¥É ¤È¸Æ¤Ö¡£ */
84 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
85 /*** @addtogroup m17nInternal
95 #include "m17n-misc.h"
103 static int unified_max;
105 /** List of all charsets ever defined. */
113 static struct MCharsetList charset_list;
115 static MPlist *charset_definition_list;
117 /** Make a charset object from the template of MCharset structure
118 CHARSET, and return a pointer to the new charset object.
119 CHARSET->code_range[4N + 2] and CHARSET->code_range[4N + 3] are
123 make_charset (MCharset *charset)
125 unsigned min_code, max_code;
127 int *range = charset->code_range;
129 if (charset->dimension < 1 || charset->dimension > 4)
130 MERROR (MERROR_CHARSET, NULL);
131 if ((charset->final_byte > 0 && charset->final_byte < '0')
132 || charset->final_byte > 127)
133 MERROR (MERROR_CHARSET, NULL);
135 for (i = 0, n = 1; i < 4; i++)
137 if (range[i * 4] > range[i * 4 + 1])
138 MERROR (MERROR_CHARSET, NULL);
139 range[i * 4 + 2] = range[i * 4 + 1] - range[i * 4] + 1;
140 n *= range[i * 4 + 2];
141 range[i * 4 + 3] = n;
144 min_code = range[0] | (range[4] << 8) | (range[8] << 16) | (range[12] << 24);
145 if (charset->min_code == 0)
146 charset->min_code = min_code;
147 else if (charset->min_code < min_code)
148 MERROR (MERROR_CHARSET, NULL);
149 max_code = range[1] | (range[5] << 8) | (range[9] << 16) | (range[13] << 24);
150 if (charset->max_code == 0)
151 charset->max_code = max_code;
152 else if (charset->max_code > max_code)
153 MERROR (MERROR_CHARSET, NULL);
155 charset->code_range_min_code = min_code;
156 charset->fully_loaded = 0;
159 if (charset->method == Msubset)
163 if (charset->nparents != 1)
164 MERROR (MERROR_CHARSET, NULL);
165 parent = charset->parents[0];
166 if (parent->method == Msuperset
167 || charset->min_code - charset->subset_offset < parent->min_code
168 || charset->max_code - charset->subset_offset > parent->max_code)
169 MERROR (MERROR_CHARSET, NULL);
171 else if (charset->method == Msuperset)
173 if (charset->nparents < 2)
174 MERROR (MERROR_CHARSET, NULL);
175 for (i = 0; i < charset->nparents; i++)
176 if (charset->min_code > charset->parents[i]->min_code
177 || charset->max_code < charset->parents[i]->max_code)
178 MERROR (MERROR_CHARSET, NULL);
183 = (charset->dimension == 1
185 && (charset->dimension == 2
187 && (charset->dimension == 3
188 || range[10] == 256)))));
190 if (! charset->no_code_gap)
194 memset (charset->code_range_mask, 0,
195 sizeof charset->code_range_mask);
196 for (i = 0; i < 4; i++)
197 for (j = range[i * 4]; j <= range[i * 4 + 1]; j++)
198 charset->code_range_mask[j] |= (1 << i);
201 if (charset->method == Moffset)
203 charset->max_char = charset->min_char + range[15] - 1;
204 if (charset->min_char < 0
205 || charset->max_char < 0 || charset->max_char > unified_max)
206 MERROR (MERROR_CHARSET, NULL);
207 charset->simple = charset->no_code_gap;
208 charset->fully_loaded = 1;
210 else if (charset->method == Munify)
212 /* The magic number 12 below is to align to the SUB_BITS_2
213 (defined in chartab.c) boundary in a char-table. */
214 unified_max -= ((range[15] >> 12) + 1) << 12;
215 charset->unified_max = unified_max;
217 else if (charset->method != Mmap)
218 MERROR (MERROR_CHARSET, NULL);
221 MLIST_APPEND1 (&charset_list, charsets, charset, MERROR_CHARSET);
223 if (charset->final_byte > 0)
225 MLIST_APPEND1 (&mcharset__iso_2022_table, charsets, charset,
227 if (charset->revision <= 0)
229 int chars = range[2];
231 if (chars == 128) /* ASCII case */
233 else if (chars == 256) /* ISO-8859-X case */
235 MCHARSET_ISO_2022 (charset->dimension, chars, charset->final_byte)
244 load_charset_fully (MCharset *charset)
246 if (charset->method == Msubset)
248 MCharset *parent = charset->parents[0];
250 if (! parent->fully_loaded
251 && load_charset_fully (parent) < 0)
252 MERROR (MERROR_CHARSET, -1);
253 if (parent->method == Moffset)
257 code = charset->min_code - charset->subset_offset;
258 charset->min_char = DECODE_CHAR (parent, code);
259 code = charset->max_code - charset->subset_offset;
260 charset->max_char = DECODE_CHAR (parent, code);
264 unsigned min_code = charset->min_code - charset->subset_offset;
265 unsigned max_code = charset->max_code - charset->subset_offset;
266 int min_char = DECODE_CHAR (parent, min_code);
267 int max_char = min_char;
269 for (++min_code; min_code <= max_code; min_code++)
271 int c = DECODE_CHAR (parent, min_code);
277 else if (c > max_char)
281 charset->min_char = min_char;
282 charset->max_char = max_char;
285 else if (charset->method == Msuperset)
287 int min_char = 0, max_char = 0;
290 for (i = 0; i < charset->nparents; i++)
292 MCharset *parent = charset->parents[i];
294 if (! parent->fully_loaded
295 && load_charset_fully (parent) < 0)
296 MERROR (MERROR_CHARSET, -1);
298 min_char = parent->min_char, max_char = parent->max_char;
299 else if (parent->min_char < min_char)
300 min_char = parent->min_char;
301 else if (parent->max_char > max_char)
302 max_char = parent->max_char;
304 charset->min_char = min_char;
305 charset->max_char = max_char;
307 else /* charset->method is Mmap or Munify */
309 MDatabase *mdb = mdatabase_find (Mcharset, charset->name, Mnil, Mnil);
312 if (! mdb || ! (plist = mdatabase_load (mdb)))
313 MERROR (MERROR_CHARSET, -1);
314 charset->decoder = mplist_value (plist);
315 charset->encoder = mplist_value (mplist_next (plist));
316 M17N_OBJECT_UNREF (plist);
317 mchartable_range (charset->encoder,
318 &charset->min_char, &charset->max_char);
319 if (charset->method == Mmap)
320 charset->simple = charset->no_code_gap;
322 charset->max_char = charset->unified_max + 1 + charset->code_range[15];
325 charset->fully_loaded = 1;
332 MPlist *mcharset__cache;
334 /* Predefined charsets. */
335 MCharset *mcharset__ascii;
336 MCharset *mcharset__binary;
337 MCharset *mcharset__m17n;
338 MCharset *mcharset__unicode;
340 MCharsetISO2022Table mcharset__iso_2022_table;
342 /** Initialize charset handler. */
349 unified_max = MCHAR_MAX;
351 mcharset__cache = mplist ();
352 mplist_set (mcharset__cache, Mt, NULL);
354 MLIST_INIT1 (&charset_list, charsets, 128);
355 MLIST_INIT1 (&mcharset__iso_2022_table, charsets, 128);
356 charset_definition_list = mplist ();
358 memset (mcharset__iso_2022_table.classified, 0,
359 sizeof (mcharset__iso_2022_table.classified));
361 Mcharset = msymbol ("charset");
363 Mmethod = msymbol ("method");
364 Moffset = msymbol ("offset");
365 Mmap = msymbol ("map");
366 Munify = msymbol ("unify");
367 Msubset = msymbol ("subset");
368 Msuperset = msymbol ("superset");
370 Mdimension = msymbol ("dimension");
371 Mmin_range = msymbol ("min-range");
372 Mmax_range = msymbol ("max-range");
373 Mmin_code = msymbol ("min-code");
374 Mmax_code = msymbol ("max-code");
375 Mascii_compatible = msymbol ("ascii-compatible");
376 Mfinal_byte = msymbol ("final-byte");
377 Mrevision = msymbol ("revision");
378 Mmin_char = msymbol ("min-char");
379 Mmapfile = msymbol_as_managing_key ("mapfile");
380 Mparents = msymbol_as_managing_key ("parents");
381 Msubset_offset = msymbol ("subset-offset");
382 Mdefine_coding = msymbol ("define-coding");
383 Maliases = msymbol_as_managing_key ("aliases");
387 /* Setup predefined charsets. */
388 pl = mplist_add (pl, Mmethod, Moffset);
389 pl = mplist_add (pl, Mmin_range, (void *) 0);
390 pl = mplist_add (pl, Mmax_range, (void *) 0x7F);
391 pl = mplist_add (pl, Mascii_compatible, Mt);
392 pl = mplist_add (pl, Mfinal_byte, (void *) 'B');
393 pl = mplist_add (pl, Mmin_char, (void *) 0);
394 Mcharset_ascii = mchar_define_charset ("ascii", param);
396 mplist_put (param, Mmax_range, (void *) 0xFF);
397 mplist_put (param, Mfinal_byte, NULL);
398 Mcharset_iso_8859_1 = mchar_define_charset ("iso-8859-1", param);
400 mplist_put (param, Mmax_range, (void *) 0x10FFFF);
401 Mcharset_unicode = mchar_define_charset ("unicode", param);
403 mplist_put (param, Mmax_range, (void *) MCHAR_MAX);
404 Mcharset_m17n = mchar_define_charset ("m17n", param);
406 mplist_put (param, Mmax_range, (void *) 0xFF);
407 Mcharset_binary = mchar_define_charset ("binary", param);
409 M17N_OBJECT_UNREF (param);
411 mcharset__ascii = MCHARSET (Mcharset_ascii);
412 mcharset__binary = MCHARSET (Mcharset_binary);
413 mcharset__m17n = MCHARSET (Mcharset_m17n);
414 mcharset__unicode = MCHARSET (Mcharset_unicode);
420 mcharset__fini (void)
425 for (i = 0; i < charset_list.used; i++)
427 MCharset *charset = charset_list.charsets[i];
429 if (charset->decoder)
430 free (charset->decoder);
431 if (charset->encoder)
432 M17N_OBJECT_UNREF (charset->encoder);
435 M17N_OBJECT_UNREF (mcharset__cache);
436 MLIST_FREE1 (&charset_list, charsets);
437 MLIST_FREE1 (&mcharset__iso_2022_table, charsets);
438 MPLIST_DO (plist, charset_definition_list)
439 M17N_OBJECT_UNREF (MPLIST_VAL (plist));
440 M17N_OBJECT_UNREF (charset_definition_list);
445 mcharset__find (MSymbol name)
449 charset = msymbol_get (name, Mcharset);
452 MPlist *param = mplist_get (charset_definition_list, name);
454 MPLIST_KEY (mcharset__cache) = Mt;
457 param = mplist__from_plist (param);
458 mchar_define_charset (MSYMBOL_NAME (name), param);
459 charset = msymbol_get (name, Mcharset);
460 M17N_OBJECT_UNREF (param);
462 MPLIST_KEY (mcharset__cache) = name;
463 MPLIST_VAL (mcharset__cache) = charset;
468 /** Return the character corresponding to code-point CODE in CHARSET.
469 If CODE is invalid for CHARSET, return -1. */
472 mcharset__decode_char (MCharset *charset, unsigned code)
476 if (code < 128 && charset->ascii_compatible)
478 if (code < charset->min_code || code > charset->max_code)
481 if (! charset->fully_loaded
482 && load_charset_fully (charset) < 0)
483 MERROR (MERROR_CHARSET, -1);
485 if (charset->method == Msubset)
487 MCharset *parent = charset->parents[0];
489 code -= charset->subset_offset;
490 return DECODE_CHAR (parent, code);
493 if (charset->method == Msuperset)
497 for (i = 0; i < charset->nparents; i++)
499 MCharset *parent = charset->parents[i];
500 int c = DECODE_CHAR (parent, code);
508 idx = CODE_POINT_TO_INDEX (charset, code);
512 if (charset->method == Mmap)
513 return charset->decoder[idx];
515 if (charset->method == Munify)
517 int c = charset->decoder[idx];
520 c = charset->unified_max + 1 + idx;
524 /* Now charset->method should be Moffset. */
525 return (charset->min_char + idx);
529 /** Return the code point of character C in CHARSET. If CHARSET does not
530 contain C, return MCHAR_INVALID_CODE. */
533 mcharset__encode_char (MCharset *charset, int c)
535 if (! charset->fully_loaded
536 && load_charset_fully (charset) < 0)
537 MERROR (MERROR_CHARSET, MCHAR_INVALID_CODE);
539 if (charset->method == Msubset)
541 MCharset *parent = charset->parents[0];
542 unsigned code = ENCODE_CHAR (parent, c);
544 if (code == MCHAR_INVALID_CODE)
546 code += charset->subset_offset;
547 if (code >= charset->min_code && code <= charset->max_code)
549 return MCHAR_INVALID_CODE;
552 if (charset->method == Msuperset)
556 for (i = 0; i < charset->nparents; i++)
558 MCharset *parent = charset->parents[i];
559 unsigned code = ENCODE_CHAR (parent, c);
561 if (code != MCHAR_INVALID_CODE)
564 return MCHAR_INVALID_CODE;
567 if (c < charset->min_char || c > charset->max_char)
568 return MCHAR_INVALID_CODE;
570 if (charset->method == Mmap)
571 return (unsigned) mchartable_lookup (charset->encoder, c);
573 if (charset->method == Munify)
575 if (c > charset->unified_max)
577 c -= charset->unified_max - 1;
578 return INDEX_TO_CODE_POINT (charset, c);
580 return (unsigned) mchartable_lookup (charset->encoder, c);
583 /* Now charset->method should be Moffset */
584 c -= charset->min_char;
585 return INDEX_TO_CODE_POINT (charset, c);
589 mcharset__load_from_database ()
591 MDatabase *mdb = mdatabase_find (msymbol ("charset-list"), Mnil, Mnil, Mnil);
592 MPlist *def_list, *plist;
593 MPlist *definitions = charset_definition_list;
594 int mdebug_mask = MDEBUG_CHARSET;
599 def_list = (MPlist *) mdatabase_load (mdb);
600 MDEBUG_PRINT_TIME ("CHARSET", (stderr, " to load data."));
606 MPLIST_DO (plist, def_list)
611 if (! MPLIST_PLIST_P (plist))
612 MERROR (MERROR_CHARSET, -1);
613 pl = MPLIST_PLIST (plist);
614 if (! MPLIST_SYMBOL_P (pl))
615 MERROR (MERROR_CHARSET, -1);
616 name = MPLIST_SYMBOL (pl);
617 pl = MPLIST_NEXT (pl);
618 definitions = mplist_add (definitions, name, pl);
619 M17N_OBJECT_REF (pl);
620 p = mplist__from_plist (pl);
621 mchar_define_charset (MSYMBOL_NAME (name), p);
622 M17N_OBJECT_UNREF (p);
623 if ((pl = mplist_find_by_value (pl, Mdefine_coding))
624 && (MSymbol) MPLIST_VAL (MPLIST_NEXT (pl)) == Mt)
625 mconv__register_charset_coding (name);
628 M17N_OBJECT_UNREF (def_list);
629 MDEBUG_PRINT_TIME ("CHARSET", (stderr, " to parse the loaded data."));
635 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
640 /*** @addtogroup m17nCharset */
646 @brief Invalid code-point.
648 The macro #MCHAR_INVALID_CODE gives the invalid code-point. */
651 @brief ̵¸ú¤Ê¥³¡¼¥É¥Ý¥¤¥ó¥È
653 ¥Þ¥¯¥í #MCHAR_INVALID_CODE ¤Ï̵¸ú¤Ê¥³¡¼¥É¥Ý¥¤¥ó¥È¤òÍ¿¤¨¤ë¡£ */
655 #define MCHAR_INVALID_CODE
659 @brief The symbol @c Mcharset.
661 Any decoded M-text has a text property whose key is the predefined
662 symbol @c Mcharset. The name of @c Mcharset is
663 <tt>"charset"</tt>. */
666 @brief ¥·¥ó¥Ü¥ë @c Mcharset
668 ¥Ç¥³¡¼¥É¤µ¤ì¤¿ M-text ¤Ï¡¢¥¡¼¤¬ @c Mcharset ¤Ç¤¢¤ë¤è¤¦¤Ê¥Æ¥¥¹¥È
669 ¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Ä¡£¥·¥ó¥Ü¥ë @c Mcharset ¤Ï <tt>"charset"</tt> ¤È¤¤
676 @name Variables: Symbols representing a charset.
678 Each of the following symbols represents a predefined charset. */
681 @name ÊÑ¿ô: ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ëÄêµÁºÑ¤ß¥·¥ó¥Ü¥ë
683 °Ê²¼¤Î³Æ¥·¥ó¥Ü¥ë¤Ï¡¢ÄêµÁºÑ¤ßʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¡£ */
688 @brief Symbol representing the charset ASCII.
690 The symbol #Mcharset_ascii has name <tt>"ascii"</tt> and represents
691 the charset ISO 646, USA Version X3.4-1968 (ISO-IR-6). */
693 @brief ASCII ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¥·¥ó¥Ü¥ë
695 ¥·¥ó¥Ü¥ë #Mcharset_ascii ¤Ï <tt>"ascii"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
696 ISO 646, USA Version X3.4-1968 (ISO-IR-6) ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¡£
699 MSymbol Mcharset_ascii;
703 @brief Symbol representing the charset ISO/IEC 8859/1.
705 The symbol #Mcharset_iso_8859_1 has name <tt>"iso-8859-1"</tt>
706 and represents the charset ISO/IEC 8859-1:1998. */
708 @brief ISO/IEC 8859-1:1998 ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¥·¥ó¥Ü¥ë
710 ¥·¥ó¥Ü¥ë #Mcharset_iso_8859_1 ¤Ï <tt>"iso-8859-1"</tt> ¤È¤¤¤¦Ì¾
711 Á°¤ò»ý¤Á¡¢ISO/IEC 8859-1:1998 ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¡£
714 MSymbol Mcharset_iso_8859_1;
717 @brief Symbol representing the charset Unicode.
719 The symbol #Mcharset_unicode has name <tt>"unicode"</tt> and
720 represents the charset Unicode. */
722 @brief Unicode ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¥·¥ó¥Ü¥ë
724 ¥·¥ó¥Ü¥ë #Mcharset_unicode ¤Ï <tt>"unicode"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý
725 ¤Á¡¢Unicode ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¡£ */
727 MSymbol Mcharset_unicode;
731 @brief Symbol representing the largest charset.
733 The symbol #Mcharset_m17n has name <tt>"m17n"</tt> and
734 represents the charset that contains all characters supported by
737 @brief Á´Ê¸»ú¤ò´Þ¤àʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¥·¥ó¥Ü¥ë
739 ¥·¥ó¥Ü¥ë #Mcharset_m17n ¤Ï <tt>"m17n"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
740 m17n ¥é¥¤¥Ö¥é¥ê¤¬°·¤¦Á´¤Æ¤Îʸ»ú¤ò´Þ¤àʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¡£ */
742 MSymbol Mcharset_m17n;
746 @brief Symbol representing the charset for ill-decoded characters.
748 The symbol #Mcharset_binary has name <tt>"binary"</tt> and
749 represents the fake charset which the decoding functions put to an
750 M-text as a text property when they encounter an invalid byte
751 (sequence). See @ref m17nConv @latexonly
752 (P.\pageref{group__m17nConv}) @endlatexonly for more detail. */
755 @brief ¥Ç¥³¡¼¥É¤Ç¤¤Ê¤¤Ê¸»ú¤Îʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¥·¥ó¥Ü¥ë
757 ¥·¥ó¥Ü¥ë #Mcharset_binary ¤Ï <tt>"binary"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
758 µ¶¤Î (fake) ʸ»ú¥»¥Ã¥È¤òɽ¸½¤¹¤ë¡£¥Ç¥³¡¼¥É´Ø¿ô¤Ï¡¢M-text ¤Î¥Æ¥¥¹
759 ¥È¥×¥í¥Ñ¥Æ¥£¤È¤·¤Æ¡¢Ìµ¸ú¤Ê¥Ð¥¤¥È¡Ê¥·¡¼¥¯¥¨¥ó¥¹¡Ë¤ËÁø¶ø¤·¤¿°ÌÃÖ¤òÉղ乤롣
761 ¾ÜºÙ¤Ï @ref m17nConv @latexonly
762 (P.\pageref{group__m17nConv}) @endlatexonly »²¾È¤Î¤³¤È¡£ */
764 MSymbol Mcharset_binary;
771 @name Variables: Parameter keys for mchar_define_charset ().
773 These are the predefined symbols to use as parameter keys for the
774 function mchar_define_charset () (which see). */
777 @name ÊÑ¿ô: mchar_define_charset ÍѤΥѥé¥á¡¼¥¿¡¦¥¡¼
779 ¤³¤ì¤é¤Ï¡¢´Ø¿ô mchar_define_charset () ÍѤΥѥé¥á¡¼¥¿¡¦¥¡¼¤È¤·¤Æ
780 »È¤ï¤ì¤ë¥·¥ó¥Ü¥ë¤Ç¤¢¤ë¡£ ¾Ü¤·¤¯¤Ï¤³¤Î´Ø¿ô¤Î²òÀâ¤ò»²¾È¤Î¤³¤È¡£*/
785 Parameter key for mchar_define_charset () (which see). */
788 ´Ø¿ô mchar_define_charset () ÍѤΥѥé¥á¡¼¥¿¡¦¥¡¼. */
796 MSymbol Mascii_compatible;
802 MSymbol Msubset_offset;
803 MSymbol Mdefine_coding;
810 @name Variables: Symbols representing charset methods.
812 These are the predefined symbols that can be a value of the
813 #Mmethod parameter of a charset used in an argument to the
814 mchar_define_charset () function.
816 A method specifies how code-points and character codes are
817 converted. See the documentation of the mchar_define_charset ()
818 function for the details. */
821 @name ÊÑ¿ô: ʸ»ú¥»¥Ã¥È¤Î¥á¥½¥Ã¥É»ØÄê¤Ë»È¤ï¤ì¤ë¥·¥ó¥Ü¥ë
823 ¤³¤ì¤é¤Ï¡¢Ê¸»ú¥»¥Ã¥È¤Î @e ¥á¥½¥Ã¥É ¤ò»ØÄꤹ¤ë¤¿¤á¤ÎÄêµÁºÑ¤ß¥·¥ó¥Ü
824 ¥ë¤Ç¤¢¤ê¡¢Ê¸»ú¥»¥Ã¥È¤Î #Mmethod ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤȤʤ뤳¤È¤¬¤Ç¤¤ë¡£
825 ¤³¤ÎÃͤϴؿô mchar_define_charset () ¤Î°ú¿ô¤È¤·¤Æ»È¤ï¤ì¤ë
827 ¥á¥½¥Ã¥É¤È¤Ï¡¢¥³¡¼¥É¥Ý¥¤¥ó¥È¤Èʸ»ú¥³¡¼¥É¤òÁê¸ßÊÑ´¹¤¹¤ëºÝ¤ÎÊý¼°¤Î¤³
828 ¤È¤Ç¤¢¤ë¡£¾Ü¤·¤¯¤Ï´Ø¿ô mchar_define_charset () ¤Î²òÀâ¤ò»²¾È¤Î¤³¤È¡£ */
832 @brief Symbol for the offset type method of charset.
834 The symbol #Moffset has the name <tt>"offset"</tt> and, when used
835 as a value of #Mmethod parameter of a charset, it means that the
836 conversion of code-points and character codes of the charset is
837 done by this calculation:
840 CHARACTER-CODE = CODE-POINT - MIN-CODE + MIN-CHAR
843 where, MIN-CODE is a value of #Mmin_code parameter of the charset,
844 and MIN-CHAR is a value of #Mmin_char parameter. */
847 @brief ¥ª¥Õ¥»¥Ã¥È·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë
849 ¥·¥ó¥Ü¥ë #Moffset ¤Ï <tt>"offset"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú¥»¥Ã
850 ¥È¤Î #Mmethod ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤¿¾ì¹ç¤Ë¤Ï¡¢¥³¡¼¥É¥Ý¥¤¥ó
851 ¥È¤Èʸ»ú¥»¥Ã¥È¤Îʸ»ú¥³¡¼¥É¤Î´Ö¤ÎÊÑ´¹¤¬°Ê²¼¤Î¼°¤Ë½¾¤Ã¤Æ¹Ô¤ï¤ì¤ë¤³¤È
855 ʸ»ú¥³¡¼¥É = ¥³¡¼¥É¥Ý¥¤¥ó¥È - MIN-CODE + MIN-CHAR
858 ¤³¤³¤Ç¡¢MIN-CODE ¤Ïʸ»ú¥»¥Ã¥È¤Î #Mmin_code ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤǤ¢¤ê¡¢MIN-CHAR ¤Ï
859 #Mmin_char ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤǤ¢¤ë¡£ */
864 /***en @brief Symbol for the map type method of charset.
866 The symbol #Mmap has the name <tt>"map"</tt> and, when used as a
867 value of #Mmethod parameter of a charset, it means that the
868 conversion of code-points and character codes of the charset is
869 done by map looking up. The map must be given by #Mmapfile
872 /***ja @brief ¥Þ¥Ã¥×·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë
874 ¥·¥ó¥Ü¥ë #Mmap ¤Ï <tt>"map"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú¥»¥Ã¥È¤Î
875 #Mmethod ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤¿¾ì¹ç¤Ë¤Ï¡¢¥³¡¼¥É¥Ý¥¤¥ó¥È¤È
876 ʸ»ú¥»¥Ã¥È¤Îʸ»ú¥³¡¼¥É¤Î´Ö¤ÎÊÑ´¹¤¬
877 ¥Þ¥Ã¥×¤ò»²¾È¤¹¤ë¤³¤È¤Ë¤è¤Ã¤Æ¹Ô¤ï¤ì¤ë¤³¤È¤ò°ÕÌ£¤¹¤ë¡£¥Þ¥Ã¥×¤Ï
878 #Mmapfile ¥Ñ¥é¥á¡¼¥¿¤È¤·¤ÆÍ¿¤¨¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£ */
883 /***en @brief Symbol for the unify type method of charset.
885 The symbol #Munify has the name <tt>"unify"</tt> and, when used as
886 a value of #Mmethod parameter of a charset, it means that the
887 conversion of code-points and character codes of the charset is
888 done by map looking up and offsetting. The map must be given by
889 #Mmapfile parameter. For this kind of charset, a unique
890 continuous character code space for all characters is assigned.
891 If the map has an entry for a code-point, the conversion is done
892 by looking up the map. Otherwise, the conversion is done by this
896 CHARACTER-CODE = CODE-POINT - MIN-CODE + LOWEST-CHAR-CODE
899 where, MIN-CODE is a value of #Mmin_code parameter of the charset,
900 and LOWEST-CHAR-CODE is the lowest character code of the assigned
903 /***ja @brief ¥æ¥Ë¥Õ¥¡¥¤·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë
905 ¥·¥ó¥Ü¥ë #Minherit ¤Ï <tt>"unify"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú¥»¥Ã
906 ¥È¤Î #Mmethod ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤¿¾ì¹ç¤Ë¤Ï¡¢¥³¡¼¥É¥Ý¥¤¥ó
907 ¥È¤Èʸ»ú¥»¥Ã¥È¤Îʸ»ú¥³¡¼¥É¤Î´Ö¤ÎÊÑ´¹¤¬¡¢¥Þ¥Ã¥×¤Î»²¾È¤È¥ª¥Õ¥»¥Ã¥È¤Î
908 ÁȤ߹ç¤ï¤»¤Ë¤è¤Ã¤Æ¹Ô¤ï¤ì¤ë¤³¤È¤ò°ÕÌ£¤¹¤ë¡£¥Þ¥Ã¥×¤Ï#Mmapfile ¥Ñ¥é¥á¡¼
909 ¥¿¤È¤·¤ÆÍ¿¤¨¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£¤³¤Î¼ï¤Î³Æʸ»ú¥»¥Ã¥È¤Ë¤Ï¡¢Á´Ê¸»ú¤ËÂФ·
910 ¤ÆϢ³¤¹¤ë¥³¡¼¥É¥¹¥Ú¡¼¥¹¤¬¤½¤ì¤¾¤ì³ä¤êÅö¤Æ¤é¤ì¤ë¡£
912 ¥³¡¼¥É¥Ý¥¤¥ó¥È¤¬¥Þ¥Ã¥×¤Ë´Þ¤Þ¤ì¤Æ¤¤¤ì¤Ð¡¢ÊÑ´¹¤Ï¥Þ¥Ã¥×»²¾È¤Ë¤è¤Ã¤Æ¹Ô
913 ¤ï¤ì¤ë¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢°Ê²¼¤Î¼°¤Ë½¾¤¦¡£
916 CHARACTER-CODE = CODE-POINT - MIN-CODE + LOWEST-CHAR-CODE
919 ¤³¤³¤Ç¡¢MIN-CODE ¤Ïʸ»ú¥»¥Ã¥È¤Î #Mmin_code ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤǤ¢¤ê¡¢
920 LOWEST-CHAR-CODE ¤Ï³ä¤êÅö¤Æ¤é¤ì¤¿¥³¡¼¥É¥¹¥Ú¡¼¥¹¤ÎºÇ¤â¾®¤µ¤¤Ê¸»ú¥³¡¼
928 @brief Symbol for the subset type method of charset.
930 The symbol #Msubset has the name <tt>"subset"</tt> and, when used
931 as a value of #Mmethod parameter of a charset, it means that the
932 charset is a subset of a parent charset. The parent charset must
933 be given by #Mparents parameter. The conversion of code-points
934 and character codes of the charset is done conceptually by this
938 CHARACTER-CODE = PARENT-CODE (CODE-POINT) + SUBSET-OFFSET
941 where, PARENT-CODE is a pseudo function that returns a character
942 code of CODE-POINT in the parent charset, and SUBSET-OFFSET is a
943 value given by #Msubset_offset parameter. */
945 /***ja @brief ¥µ¥Ö¥»¥Ã¥È·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë
947 ¥·¥ó¥Ü¥ë #Msubset ¤Ï <tt>"subset"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú¥»¥Ã
948 ¥È¤Î #Mmethod ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤¿¾ì¹ç¤Ë¤Ï¡¢¤³¤Îʸ»ú¥»¥Ã
949 ¥È¤¬Ê̤Îʸ»ú¥»¥Ã¥È¡Ê¿Æʸ»ú¥»¥Ã¥È¡Ë¤ÎÉôʬ½¸¹ç¤Ç¤¢¤ë¤³¤È¤ò°ÕÌ£¤¹¤ë¡£
950 ¿Æʸ»ú¥»¥Ã¥È¤Ï #Mparents ¥Ñ¥é¥á¡¼¥¿¤Ë¤è¤Ã¤ÆÍ¿¤¨¤é¤ì¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
951 ¥³¡¼¥É¥Ý¥¤¥ó¥È¤Èʸ»ú¥»¥Ã¥È¤Îʸ»ú¥³¡¼¥É¤Î´Ö¤ÎÊÑ´¹¤Ï¡¢³µÇ°Åª¤Ë¤Ï
955 CHARACTER-CODE = PARENT-CODE (CODE-POINT) + SUBSET-OFFSET
958 ¤³¤³¤Ç PARENT-CODE ¤Ï CODE-POINT ¤Î¿Æʸ»ú¥»¥Ã¥ÈÃæ¤Ç¤Îʸ»ú¥³¡¼¥É¤ò
959 ÊÖ¤¹µ¼´Ø¿ô¤Ç¤¢¤ê¡¢SUBSET-OFFSET ¤Ï #Msubset_offset parameter ¤ÇÍ¿
967 @brief Symbol for the superset type method of charset.
969 The symbol #Msuperset has the name <tt>"superset"</tt> and, when
970 used as a value of #Mmethod parameter of a charset, it means that
971 the charset is a superset of parent charsets. The parent charsets
972 must be given by #Mparents parameter. */
975 @brief ¥¹¡¼¥Ñ¡¼¥»¥Ã¥È·¿¤Î¥á¥½¥Ã¥É¤ò¼¨¤¹¥·¥ó¥Ü¥ë
977 ¥·¥ó¥Ü¥ë #Msuperset ¤Ï <tt>"superset"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú
978 ¥»¥Ã¥È¤Î #Mmethod ¥Ñ¥é¥á¡¼¥¿¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤¿¾ì¹ç¤Ë¤Ï¡¢¤³¤Îʸ»ú
979 ¥»¥Ã¥È¤¬Ê̤Îʸ»ú¥»¥Ã¥È¡Ê¿Æʸ»ú¥»¥Ã¥È¡Ë¤Î¾å°Ì½¸¹ç¤Ç¤¢¤ë¤³¤È¤ò°ÕÌ£¤¹
980 ¤ë¡£¿Æʸ»ú¥»¥Ã¥È¤Ï #Mparents ¥Ñ¥é¥á¡¼¥¿¤Ë¤è¤Ã¤ÆÍ¿¤¨¤é¤ì¤Ê¤¯¤Æ¤Ï¤Ê
989 @brief Define a charset.
991 The mchar_define_charset () function defines a new charset and
992 makes it accessible via a symbol whose name is $NAME. $PLIST
993 specifies parameters of the charset as below:
997 <li> Key is #Mmethod, value is a symbol.
999 The value specifies the method for decoding/encoding code-points
1000 in the charset. It must be #Moffset, #Mmap (default), #Munify,
1001 #Msubset, or #Msuperset.
1003 <li> Key is #Mdimension, value is an integer
1005 The value specifies the dimension of code-points of the charset.
1006 It must be 1 (default), 2, 3, or 4.
1008 <li> Key is #Mmin_range, value is an unsigned integer
1010 The value specifies the minimum range of a code-point, which means
1011 that the Nth byte of the value is the minimum Nth byte of
1012 code-points of the charset. The default value is 0.
1014 <li> Key is #Mmax_range, value is an unsigned integer
1016 The value specifies the maximum range of a code-point, which means
1017 that the Nth byte of the value is the maximum Nth byte of
1018 code-points of the charset. The default value is 0xFF, 0xFFFF,
1019 0xFFFFFF, or 0xFFFFFFFF if the dimension is 1, 2, 3, or 4
1022 <li> Key is #Mmin_code, value is an unsigned integer
1024 The value specifies the minimum code-point of
1025 the charset. The default value is the minimum range.
1027 <li> Key is #Mmax_code, value is an unsigned integer
1029 The value specifies the maximum code-point of
1030 the charset. The default value is the maximum range.
1032 <li> Key is #Mascii_compatible, value is a symbol
1034 The value specifies whether the charset is ASCII compatible or
1035 not. If the value is #Mnil (default), it is not ASCII
1036 compatible, else compatible.
1038 <li> Key is #Mfinal_byte, value is an integer
1040 The value specifies the @e final @e byte of the charset registered
1041 in The International Registry. It must be 0 (default) or 32..127.
1042 The value 0 means that the charset is not in the registry.
1044 <li> Key is #Mrevision, value is an integer
1046 The value specifies the @e revision @e number of the charset
1047 registered in The International Registry. it must be 0..127. If
1048 the charset is not in The International Registry, the value is
1049 ignored. The value 0 means that the charset has no revision
1052 <li> Key is #Mmin_char, value is an integer
1054 The value specifies the minimum character code of the charset.
1055 The default value is 0.
1057 <li> Key is #Mmapfile, value is an M-text
1059 If the method is #Mmap or #Munify, a data that contains
1060 mapping information is added to the m17n database by calling
1061 mdatabase_define () with the value as an argument $EXTRA_INFO,
1062 i.e. the value is used as a file name of the data.
1064 Otherwise, this parameter is ignored.
1066 <li> Key is #Mparents, value is a plist
1068 If the method is #Msubset, the value must is a plist of length
1069 1, and the value of the plist must be a symbol representing a
1072 If the method is #Msuperset, the value must be a plist of length
1073 less than 9, and the values of the plist must be symbols
1074 representing subset charsets.
1076 Otherwise, this parameter is ignored.
1078 <li> Key is #Mdefine_coding, value is a symbol
1080 If the dimension of the charset is 1, the value specifies whether
1081 or not to define a coding system of the same name whose type is
1082 #Mcharset. A coding system is defined if the value is not #Mnil.
1084 Otherwise, this parameter is ignored.
1089 If the operation was successful, mchar_define_charset () returns a
1090 symbol whose name is $NAME. Otherwise it returns #Mnil and
1091 assigns an error code to the external variable #merror_code. */
1094 @brief ʸ»ú¥»¥Ã¥È¤òÄêµÁ¤¹¤ë.
1096 ´Ø¿ô mchar_define_charset () ¤Ï¿·¤·¤¤Ê¸»ú¥»¥Ã¥È¤òÄêµÁ¤·¡¢¤½¤ì¤ò
1097 $NAME ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¥·¥ó¥Ü¥ë·Ðͳ¤Ç¥¢¥¯¥»¥¹¤Ç¤¤ë¤è¤¦¤Ë¤¹¤ë¡£
1098 $PLIST ¤ÏÄêµÁ¤µ¤ì¤ëʸ»ú¥»¥Ã¥È¤Î¥Ñ¥é¥á¡¼¥¿¤ò°Ê²¼¤Î¤è¤¦¤Ë»ØÄꤹ¤ë¡£
1102 <li> ¥¡¼¤¬ #Mmethod ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
1104 Ãͤϡ¢#Moffset, #Mmap (¥Ç¥Õ¥©¥ë¥ÈÃÍ), #Munify, #Msubset,
1105 #Msuperset ¤Î¤¤¤º¤ì¤«¤Ç¤¢¤ê¡¢Ê¸»ú¥»¥Ã¥È¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤ò¥Ç¥³¡¼¥É¡¿
1106 ¥¨¥ó¥³¡¼¥É¤¹¤ëºÝ¤Î¥á¥½¥Ã¥É¤ò»ØÄꤹ¤ë¡£
1108 <li> ¥¡¼¤¬ #Mdimension ¤ÇÃͤ¬À°¿ôÃͤλþ
1110 Ãͤϡ¢1 (¥Ç¥Õ¥©¥ë¥ÈÃÍ), 2, 3, 4 ¤Î¤¤¤º¤ì¤«¤Ç¤¢¤ê¡¢Ê¸»ú¥»¥Ã¥È¤Î¥³¡¼
1111 ¥É¥Ý¥¤¥ó¥È¤Î¼¡¸µ¤Ç¤¢¤ë¡£
1113 <li> ¥¡¼¤¬ #Mmin_range ¤ÇÃͤ¬ÈóÉéÀ°¿ôÃͤλþ
1115 Ãͤϥ³¡¼¥É¥Ý¥¤¥ó¥È¤ÎºÇ¾®¤ÎÃͤǤ¢¤ë¡£¤¹¤Ê¤ï¤Á¡¢¤³¤ÎÃͤΠN ÈÖÌܤΥÐ
1116 ¥¤¥È¤Ï¤³¤Îʸ»ú¥»¥Ã¥È¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤Î N ÈÖÌܤΥХ¤¥È¤ÎºÇ¾®¤Î¤â¤Î
1117 ¤È¤Ê¤ë¡£¥Ç¥Õ¥©¥ë¥ÈÃÍ¤Ï 0 ¡£
1119 <li> ¥¡¼¤¬ #Mmax_range ¤ÇÃͤ¬ÈóÉéÀ°¿ôÃͤλþ
1121 Ãͤϥ³¡¼¥É¥Ý¥¤¥ó¥È¤ÎºÇÂç¤ÎÃͤǤ¢¤ë¡£¤¹¤Ê¤ï¤Á¡¢¤³¤ÎÃͤΠN ÈÖÌܤΥÐ
1122 ¥¤¥È¤Ï¤³¤Îʸ»ú¥»¥Ã¥È¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤Î N ÈÖÌܤΥХ¤¥È¤ÎºÇÂç¤Î¤â¤Î
1123 ¤È¤Ê¤ë¡£¥Ç¥Õ¥©¥ë¥ÈÃͤϡ¢¥³¡¼¥É¥Ý¥¤¥ó¥È¤Î¼¡¸µ¤¬ 1, 2, 3, 4 ¤Î»þ¡¢¤½
1124 ¤ì¤¾¤ì 0xFF, 0xFFFF, 0xFFFFFF, 0xFFFFFFFF ¡£
1126 <li> ¥¡¼¤¬ #Mmin_code ¤ÇÃͤ¬ÈóÉéÀ°¿ôÃͤλþ
1128 ÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤ÎºÇ¾®¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ç¤¢¤ë¡£¥Ç¥Õ¥©¥ë¥ÈÃͤÏ
1131 <li> ¥¡¼¤¬ #Mmax_code ¤ÇÃͤ¬ÈóÉéÀ°¿ôÃͤλþ
1133 ÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤ÎºÇÂç¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ç¤¢¤ë¡£¥Ç¥Õ¥©¥ë¥ÈÃͤÏ
1136 <li> ¥¡¼¤¬ #Mascii_compatible ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
1138 ÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤¬ ASCII ¸ß´¹¤Ç¤¢¤ë¤«¡¹¤«¤ò¼¨¤¹¡£¥Ç¥Õ¥©¥ë¥ÈÃͤÎ
1139 #Mnil ¤Ç¤¢¤ì¤Ð¸ß´¹¤Ç¤Ï¤Ê¤¯¡¢¤½¤ì°Ê³°¤Î¾ì¹ç¤Ï¸ß´¹¤Ç¤¢¤ë¡£
1141 <li> ¥¡¼¤¬ #Mfinal_byte ¤ÇÃͤ¬À°¿ôÃͤλþ
1143 ÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤Î The International Registry ¤ËÅÐÏ¿¤µ¤ì¤Æ¤¤¤ë @e
1144 ½ªÃ¼¥Ð¥¤¥È¤Ç¤¢¤ê¡¢0 (¥Ç¥Õ¥©¥ë¥ÈÃÍ) ¤Ç¤¢¤ë¤« 32..127 ¤Ç¤¢¤ë¡£0 ¤ÏÅÐ
1145 Ï¿¤µ¤ì¤Æ¤¤¤Ê¤¤¤³¤È¤ò°ÕÌ£¤¹¤ë¡£
1147 <li> ¥¡¼¤¬ #Mrevision ¤ÇÃͤ¬À°¿ôÃͤλþ
1149 ÃÍ¤Ï The International Registry ¤ËÅÐÏ¿¤µ¤ì¤Æ¤¤¤ë @e revision @e
1150 number ¤Ç¤¢¤ê¡¢0..127 ¤Ç¤¢¤ë¡£Ê¸»ú¥»¥Ã¥È¤¬ÅÐÏ¿¤µ¤ì¤Æ¤¤¤Ê¤¤¾ì¹ç¤Ë¤Ï
1151 ¤³¤ÎÃͤÏ̵»ë¤µ¤ì¤ë¡£ 0 ¤Ï revision number ¤¬Â¸ºß¤·¤Ê¤¤¤³¤È¤ò°ÕÌ£¤¹
1154 <li> ¥¡¼¤¬ #Mmin_char ¤ÇÃͤ¬À°¿ôÃͤλþ
1156 ÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤ÎºÇ¾®¤Îʸ»ú¥³¡¼¥É¤Ç¤¢¤ë¡£¥Ç¥Õ¥©¥ë¥ÈÃÍ¤Ï 0 ¡£
1158 <li> ¥¡¼¤¬ #Mmapfile ¤ÇÃͤ¬ M-text ¤Î»þ
1160 ¥á¥½¥Ã¥É¤¬ #Mmap ¤« #Munify ¤Î»þ¡¢´Ø¿ô mdatabase_define () ¤ò¤³¤Î
1161 Ãͤò°ú¿ô $EXTRA_INFO ¤È¤·¤Æ¸Æ¤Ö¤³¤È¤Ë¤è¤Ã¤Æ¡¢¥Þ¥Ã¥Ô¥ó¥°¤Ë´Ø¤¹¤ë¥Ç¡¼
1162 ¥¿¤¬ m17n ¥Ç¡¼¥¿¥Ù¡¼¥¹¤ËÄɲ䵤ì¤ë¡£¤¹¤Ê¤ï¤Á¡¢¤³¤ÎÃͤϥǡ¼¥¿¥Õ¥¡¥¤
1165 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢¤³¤Î¥Ñ¥é¥á¡¼¥¿¤Ï̵»ë¤µ¤ì¤ë¡£
1167 <li> ¥¡¼¤¬ #Mparents ¤ÇÃͤ¬ plist ¤Î»þ
1169 ¥á¥½¥Ã¥É¤¬ #Msubset ¤Ê¤é¤Ð¡¢ÃͤÏŤµ 1 ¤Î plist ¤Ç¤¢¤ê¡¢¤½¤ÎÃͤϤ³
1170 ¤Îʸ»ú¥»¥Ã¥È¤Î¾å°Ì½¸¹ç¤È¤Ê¤ëʸ»ú¥»¥Ã¥È¤ò¼¨¤¹¥·¥ó¥Ü¥ë¤Ç¤¢¤ë¡£
1172 ¥á¥½¥Ã¥É¤¬ #Msuperset ¤Ê¤é¤Ð¡¢ÃͤÏŤµ 8 °Ê²¼¤Î plist ¤Ç¤¢¤ê¡¢¤½¤ì
1173 ¤é¤ÎÃͤϤ³¤Îʸ»ú¥»¥Ã¥È¤Î²¼°Ì½¸¹ç¤Ç¤¢¤ëʸ»ú¥»¥Ã¥È¤ò¼¨¤¹¥·¥ó¥Ü¥ë¤Ç¤¢
1176 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢¤³¤Î¥Ñ¥é¥á¡¼¥¿¤Ï̵»ë¤µ¤ì¤ë¡£
1178 <li> ¥¡¼¤¬ #Mdefine_coding ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
1180 ʸ»ú¥»¥Ã¥È¤Î¼¡¸µ¤¬ 1 ¤Ê¤é¤Ð¡¢ÃÍ¤Ï #Mcharset ¥¿¥¤¥×¤ÇƱ¤¸Ì¾Á°¤Î¥³¡¼
1181 ¥É·Ï¤òÄêµÁ¤¹¤ë¤«¤É¤¦¤«¤ò»ØÄꤹ¤ë¡£Ãͤ¬ #Mnil °Ê³°¤Î¾ì¹ç¤ËÄêµÁ¤¹¤ë¡£
1183 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢¤³¤Î¥Ñ¥é¥á¡¼¥¿¤Ï̵»ë¤µ¤ì¤ë¡£
1187 @return ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mchar_define_charset() ¤Ï $NAME ¤È¤¤¤¦Ì¾
1188 Á°¤Î¥· ¥ó¥Ü¥ë¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð #Mnil ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
1189 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£*/
1193 @c MERROR_CHARSET */
1196 mchar_define_charset (char *name, MPlist *plist)
1198 MSymbol sym = msymbol (name);
1201 unsigned min_range, max_range;
1203 MText *mapfile = (MText *) mplist_get (plist, Mmapfile);
1205 MSTRUCT_CALLOC (charset, MERROR_CHARSET);
1206 charset->name = sym;
1207 charset->method = (MSymbol) mplist_get (plist, Mmethod);
1208 if (! charset->method)
1211 charset->method = Mmap;
1213 charset->method = Moffset;
1215 if (charset->method == Mmap || charset->method == Munify)
1218 MERROR (MERROR_CHARSET, Mnil);
1219 mdatabase_define (Mcharset, sym, Mnil, Mnil, NULL, mapfile->data);
1221 if (! (charset->dimension = (int) mplist_get (plist, Mdimension)))
1222 charset->dimension = 1;
1224 min_range = (unsigned) mplist_get (plist, Mmin_range);
1225 if ((pl = mplist_find_by_key (plist, Mmax_range)))
1227 max_range = (unsigned) MPLIST_VAL (pl);
1228 if (max_range >= 0x1000000)
1229 charset->dimension = 4;
1230 else if (max_range >= 0x10000 && charset->dimension < 3)
1231 charset->dimension = 3;
1232 else if (max_range >= 0x100 && charset->dimension < 2)
1233 charset->dimension = 2;
1235 else if (charset->dimension == 1)
1237 else if (charset->dimension == 2)
1239 else if (charset->dimension == 3)
1240 max_range = 0xFFFFFF;
1242 max_range = 0xFFFFFFFF;
1244 memset (charset->code_range, 0, sizeof charset->code_range);
1245 for (i = 0; i < charset->dimension; i++, min_range >>= 8, max_range >>= 8)
1247 charset->code_range[i * 4] = min_range & 0xFF;
1248 charset->code_range[i * 4 + 1] = max_range & 0xFF;
1250 if ((charset->min_code = (int) mplist_get (plist, Mmin_code)) < min_range)
1251 charset->min_code = min_range;
1252 if ((charset->max_code = (int) mplist_get (plist, Mmax_code)) > max_range)
1253 charset->max_code = max_range;
1254 charset->ascii_compatible
1255 = (MSymbol) mplist_get (plist, Mascii_compatible) != Mnil;
1256 charset->final_byte = (int) mplist_get (plist, Mfinal_byte);
1257 charset->revision = (int) mplist_get (plist, Mrevision);
1258 charset->min_char = (int) mplist_get (plist, Mmin_char);
1259 pl = (MPlist *) mplist_get (plist, Mparents);
1260 charset->nparents = pl ? mplist_length (pl) : 0;
1261 if (charset->nparents > 8)
1262 charset->nparents = 8;
1263 for (i = 0; i < charset->nparents; i++, pl = MPLIST_NEXT (pl))
1265 MSymbol parent_name;
1267 if (MPLIST_KEY (pl) != Msymbol)
1268 MERROR (MERROR_CHARSET, Mnil);
1269 parent_name = MPLIST_SYMBOL (pl);
1270 if (! (charset->parents[i] = MCHARSET (parent_name)))
1271 MERROR (MERROR_CHARSET, Mnil);
1274 charset->subset_offset = (int) mplist_get (plist, Msubset_offset);
1276 msymbol_put (sym, Mcharset, charset);
1277 charset = make_charset (charset);
1280 msymbol_put (msymbol__canonicalize (sym), Mcharset, charset);
1282 for (pl = (MPlist *) mplist_get (plist, Maliases);
1283 pl && MPLIST_KEY (pl) == Msymbol;
1284 pl = MPLIST_NEXT (pl))
1286 MSymbol alias = MPLIST_SYMBOL (pl);
1288 msymbol_put (alias, Mcharset, charset);
1289 msymbol_put (msymbol__canonicalize (alias), Mcharset, charset);
1292 if (mplist_get (plist, Mdefine_coding)
1293 && charset->dimension == 1
1294 && charset->code_range[0] == 0 && charset->code_range[1] == 255)
1295 mconv__register_charset_coding (sym);
1302 @brief Resolve charset name.
1304 The mchar_resolve_charset () function returns $SYMBOL if it
1305 represents a charset. Otherwise, canonicalize $SYMBOL as to a
1306 charset name, and if the canonicalized name represents a charset,
1307 return it. Otherwise, return #Mnil. */
1310 @brief ʸ»ú¥»¥Ã¥È̾¤ò²ò·è¤¹¤ë
1312 ´Ø¿ô mchar_resolve_charset () ¤Ï $SYMBOL ¤¬Ê¸»ú¥»¥Ã¥È¤ò¼¨¤·¤Æ¤¤¤ì
1315 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢$SYMBOL ¤òʸ»ú¥»¥Ã¥È̾¤È¤·¤ÆÀµµ¬²½¤·¡¢¤½¤ì¤¬Ê¸»ú¥»¥Ã
1316 ¥È¤ò¼¨¤·¤Æ¤¤¤Æ¤¤¤ì¤ÐÀµµ¬²½¤·¤¿¤â¤Î¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¡¢#Mnil ¤ò
1320 mchar_resolve_charset (MSymbol symbol)
1322 MCharset *charset = (MCharset *) msymbol_get (symbol, Mcharset);
1326 symbol = msymbol__canonicalize (symbol);
1327 charset = (MCharset *) msymbol_get (symbol, Mcharset);
1330 return (charset ? charset->name : Mnil);
1336 @brief List symbols representing a charset.
1338 The mchar_list_charsets () function makes an array of symbols
1339 representing a charset, stores the pointer to the array in a place
1340 pointed to by $SYMBOLS, and returns the length of the array. */
1343 @brief ʸ»ú¥»¥Ã¥È¤òɽ¤ï¤¹¥·¥ó¥Ü¥ë¤ÎÎóµó
1345 ´Ø¿ô mchar_list_charsets () ¤Ï¡¢Ê¸»ú¥»¥Ã¥È¤ò¼¨¤¹¥·¥ó¥Ü¥ë¤òʤ٤¿ÇÛ
1346 Îó¤òºî¤ê¡¢$SYMBOLS ¤Ç¥Ý¥¤¥ó¥È¤µ¤ì¤¿¾ì½ê¤Ë¤³¤ÎÇÛÎó¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÃÖ
1347 ¤¡¢ÇÛÎó¤ÎŤµ¤òÊÖ¤¹¡£ */
1350 mchar_list_charset (MSymbol **symbols)
1354 MTABLE_MALLOC ((*symbols), charset_list.used, MERROR_CHARSET);
1355 for (i = 0; i < charset_list.used; i++)
1356 (*symbols)[i] = charset_list.charsets[i]->name;
1363 @brief Decode a code-point.
1365 The mchar_decode () function decodes code-point $CODE in the
1366 charset represented by the symbol $CHARSET_NAME to get a character
1370 If decoding was successful, mchar_decode () returns the decoded
1371 character code. Otherwise it returns -1. */
1374 @brief ¥³¡¼¥É¥Ý¥¤¥ó¥È¤ò¥Ç¥³¡¼¥É¤¹¤ë
1376 ´Ø¿ô mchar_decode () ¤Ï¡¢¥·¥ó¥Ü¥ë $CHARSET_NAME ¤Ç¼¨¤µ¤ì¤ëʸ»ú¥»¥Ã
1377 ¥ÈÆâ¤Î $CODE ¤È¤¤¤¦¥³¡¼¥É¥Ý¥¤¥ó¥È¤ò¥Ç¥³¡¼¥É¤·¤Æʸ»ú¥³¡¼¥É¤òÆÀ¤ë¡£
1380 ¥Ç¥³¡¼¥É¤¬À®¸ù¤¹¤ì¤Ð¡¢mchar_decode () ¤Ï¥Ç¥³¡¼¥É¤µ¤ì¤¿Ê¸»ú¥³¡¼¥É¤ò
1381 ÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤¹¡£ */
1388 mchar_decode (MSymbol charset_name, unsigned code)
1390 MCharset *charset = MCHARSET (charset_name);
1393 return MCHAR_INVALID_CODE;
1394 return DECODE_CHAR (charset, code);
1400 @brief Encode a character code.
1402 The mchar_encode () function encodes character code $C to get a
1403 code-point in the charset represented by the symbol $CHARSET_NAME.
1406 If encoding was successful, mchar_encode () returns the encoded
1407 code-point. Otherwise it returns #MCHAR_INVALID_CODE. */
1410 @brief ʸ»ú¥³¡¼¥É¤ò¥¨¥ó¥³¡¼¥É¤¹¤ë
1412 ´Ø¿ô mchar_encode () ¤Ï¡¢Ê¸»ú¥³¡¼¥É $C ¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¥·¥ó¥Ü¥ë
1413 $CHARSET_NAME ¤Ç¼¨¤µ¤ì¤ëʸ»ú¥»¥Ã¥ÈÆâ¤Ë¤ª¤±¤ë¥³¡¼¥É¥Ý¥¤¥ó¥È¤òÆÀ¤ë¡£
1416 ¥¨¥ó¥³¡¼¥É¤¬À®¸ù¤¹¤ì¤Ð¡¢mchar_encode () ¤Ï¥¨¥ó¡¼¥É¤µ¤ì¤¿¥³¡¼¥É¥Ý¥¤
1417 ¥ó¥È¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð #MCHAR_INVALID_CODE ¤òÊÖ¤¹¡£ */
1424 mchar_encode (MSymbol charset_name, int c)
1426 MCharset *charset = MCHARSET (charset_name);
1429 return MCHAR_INVALID_CODE;
1430 return ENCODE_CHAR (charset, c);
1436 @brief Call a function for all the characters in a specified charset.
1438 The mcharset_map_chars () function calls $FUNC for all the
1439 characters in the charset named $CHARSET_NAME. A call is done for
1440 a chunk of consecutive characters rather than character by
1443 $FUNC receives three arguments: $FROM, $TO, and $ARG. $FROM and
1444 $TO specify the range of character codes in $CHARSET. $ARG is the
1448 If the operation was successful, mcharset_map_chars () returns 0.
1449 Otherwise, it returns -1 and assigns an error code to the external
1450 variable #merror_code. */
1453 @brief »ØÄꤷ¤¿Ê¸»ú¥»¥Ã¥È¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ËÂФ·¤Æ´Ø¿ô¤ò¸Æ¤Ö
1455 ´Ø¿ô mcharset_map_chars () ¤Ï $CHARSET_NAME ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Äʸ»ú¥»¥Ã
1456 ¥ÈÃæ¤Î¤¹¤Ù¤Æ¤Îʸ»ú¤ËÂФ·¤Æ $FUNC ¤ò¸Æ¤Ö¡£¸Æ¤Ó½Ð¤·¤Ï°ìʸ»úËè¤Ç¤Ï¤Ê
1457 ¤¯¡¢Ï¢Â³¤·¤¿Ê¸»ú¤Î¤Þ¤È¤Þ¤êñ°Ì¤Ç¹Ô¤Ê¤ï¤ì¤ë¡£
1459 ´Ø¿ô $FUNC ¤Ë¤Ï$FROM, $TO, $ARG ¤Î£³°ú¿ô¤¬ÅϤµ¤ì¤ë¡£$FROM ¤È $TO
1460 ¤Ï $CHARSET Ãæ¤Îʸ»ú¥³¡¼¥É¤ÎÈϰϤò»ØÄꤹ¤ë¡£$ARG ¤Ï $FUNC_ARG ¤ÈƱ
1464 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mcharset_map_chars () ¤Ï 0 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð
1465 -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
1469 @c MERROR_CHARSET */
1472 mchar_map_charset (MSymbol charset_name,
1473 void (*func) (int from, int to, void *arg),
1478 charset = MCHARSET (charset_name);
1480 MERROR (MERROR_CHARSET, -1);
1482 if (charset->encoder)
1484 int c = charset->min_char;
1487 if ((int) mchartable__lookup (charset->encoder, c, &next_c, 1) < 0)
1489 while (c <= charset->max_char)
1491 if ((int) mchartable__lookup (charset->encoder, c, &next_c, 1) >= 0)
1492 (*func) (c, next_c - 1, func_arg);
1497 (*func) (charset->min_char, charset->max_char, func_arg);