1 /* character.c -- character module.
2 Copyright (C) 2003, 2004
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24 @addtogroup m17nCharacter
25 @brief Character objects and API for them.
27 The m17n library represents a @e character by a character code (an
28 integer). The minimum character code is @c 0. The maximum
29 character code is defined by the macro #MCHAR_MAX. It is
30 assured that #MCHAR_MAX is not smaller than @c 0x3FFFFF (22
33 Characters @c 0 to @c 0x10FFFF are equivalent to the Unicode
34 characters of the same code values.
36 A character can have zero or more properties called @e character
37 @e properties. A character property consists of a @e key and a
38 @e value, where key is a symbol and value is anything that can be
39 cast to <tt>(void *)</tt>. "The character property that belongs
40 to character C and whose key is K" may be shortened to "the K
44 @addtogroup m17nCharacter
45 @brief ʸ»ú¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API.
47 m17n ¥é¥¤¥Ö¥é¥ê¤Ï @e ʸ»ú ¤òʸ»ú¥³¡¼¥É¡ÊÀ°¿ô¡Ë¤Çɽ¸½¤¹¤ë¡£
48 ºÇ¾®¤Îʸ»ú¥³¡¼¥É¤Ï @c 0 ¤Ç¤¢¤ê¡¢ºÇÂç¤Îʸ»ú¥³¡¼¥É¤Ï¥Þ¥¯¥í #MCHAR_MAX
49 ¤Ë¤è¤Ã¤ÆÄêµÁ¤µ¤ì¤Æ¤¤¤ë¡£#MCHAR_MAX ¤Ï @c 0x3FFFFF¡Ê22¥Ó¥Ã¥È¡Ë
50 °Ê¾å¤Ç¤¢¤ë¤³¤È¤¬Êݾڤµ¤ì¤Æ¤¤¤ë¡£
52 @c 0 ¤«¤é @c 0x10FFFF ¤Þ¤Ç¤Îʸ»ú¤Ï¡¢¤½¤ì¤ÈƱ¤¸Ãͤò»ý¤Ä Unicode
53 ¤Îʸ»ú¤Ë³ä¤êÅö¤Æ¤é¤ì¤Æ¤¤¤ë¡£
55 ³Æʸ»ú¤Ï @e ʸ»ú¥×¥í¥Ñ¥Æ¥£ ¤È¸Æ¤Ö¥×¥í¥Ñ¥Æ¥£¤ò 0 ¸Ä°Ê¾å»ý¤Ä¤³¤È¤¬¤Ç¤¤ë¡£
56 ʸ»ú¥×¥í¥Ñ¥Æ¥£¤Ï @e ¥¡¼ ¤È @e ÃÍ ¤«¤é¤Ê¤ë¡£
57 ¥¡¼¤Ï¥·¥ó¥Ü¥ë¤Ç¤¢¤ê¡¢ÃÍ¤Ï <tt>(void *)</tt> ·¿¤Ë¥¥ã¥¹¥È¤Ç¤¤ë¤â¤Î¤Ê¤é²¿¤Ç¤â¤è¤¤¡£
58 ¡Öʸ»ú C ¤Îʸ»ú¥×¥í¥Ñ¥Æ¥£¤Î¤¦¤Á¥¡¼¤¬ K ¤Ç¤¢¤ë¤â¤Î¡×¤ò´Êñ¤Ë¡Öʸ»ú C
59 ¤Î K ¥×¥í¥Ñ¥Æ¥£¡×¤È¸Æ¤Ö¤³¤È¤¬¤¢¤ë¡£ */
62 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
63 /*** @addtogroup m17nInternal
73 #include "m17n-core.h"
74 #include "m17n-misc.h"
84 static MPlist *char_prop_list;
87 free_string (int from, int to, void *str, void *arg)
98 char_prop_list = mplist ();
101 = mchar_define_property ("name", Mstring);
103 = mchar_define_property ("category", Msymbol);
105 = mchar_define_property ("combining-class", Minteger);
107 = mchar_define_property ("bidirectional-category", Msymbol);
109 = mchar_define_property ("simple-case-folding", Minteger);
110 Mcomplicated_case_folding
111 = mchar_define_property ("complicated-case-folding", Mtext);
113 = mchar_define_property ("script", Msymbol);
123 for (p = char_prop_list; mplist_key (p) != Mnil; p = mplist_next (p))
125 MCharPropRecord *record = mplist_value (p);
129 if (record->type == Mstring)
130 mchartable_map (record->table, NULL, free_string, NULL);
131 M17N_OBJECT_UNREF (record->table);
135 M17N_OBJECT_UNREF (char_prop_list);
139 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
143 /*** @addtogroup m17nCharacter */
149 @brief Maximum character code.
151 The macro #MCHAR_MAX gives the maximum character code. */
154 @brief ʸ»ú¥³¡¼¥É¤ÎºÇÂçÃÍ.
156 ¥Þ¥¯¥í #MCHAR_MAX ¤Ïʸ»ú¥³¡¼¥É¤ÎºÇÂçÃͤòɽ¤¹¡£ */
160 #endif /* FOR_DOXYGEN */
163 @ingroup m17nCharacter
164 @name Variables: Keys of character properties
166 These symbols are used as keys of character properties. */
169 @name ÊÑ¿ô: ʸ»ú¥×¥í¥Ñ¥Æ¥£¤Î¥¡¼
171 ¤³¤ì¤é¤Î¥·¥ó¥Ü¥ë¤Ïʸ»ú¥×¥í¥Ñ¥Æ¥£¤Î¥¡¼¤È¤·¤Æ»È¤ï¤ì¤ë¡£*/
176 @brief Key for script.
178 The symbol #Mscript has the name <tt>"script"</tt> and is used as the key
179 of a character property. The value of such a property is a symbol
180 representing the script to which the character belongs.
182 Each symbol that represents a script has one of the names listed in
183 the <em>Unicode Technical Report #24</em>. */
186 @brief ¥¹¥¯¥ê¥×¥È¤òɽ¤ï¤¹¥¡¼.
188 ¥·¥ó¥Ü¥ë #Mscript ¤Ï <tt>"script"</tt>
189 ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú¥×¥í¥Ñ¥Æ¥£¤Î¥¡¼¤È¤·¤Æ»È¤ï¤ì¤ë¡£
190 ¤³¤Î¥×¥í¥Ñ¥Æ¥£¤ÎÃͤϡ¢¤³¤Îʸ»ú¤Î°¤¹¤ë¥¹¥¯¥ê¥×¥È¤òɽ¤ï¤¹¥·¥ó¥Ü¥ë¤Ç¤¢¤ë¡£
192 ¥¹¥¯¥ê¥×¥È¤òɽ¤ï¤¹¥·¥ó¥Ü¥ë¤Î̾Á°¤Ï¡¢<em>Unicode Technical Report
193 #24</em> ¤Ë¥ê¥¹¥È¤µ¤ì¤Æ¤¤¤ë¤â¤Î¤Î¤¤¤º¤ì¤«¤Ç¤¢¤ë¡£ */
200 @brief Key for character name.
202 The symbol #Mname has the name <tt>"name"</tt> and is used as
203 the key of a character property. The value of such a property is a
204 C-string representing the name of the character. */
207 @brief ̾Á°¤òɽ¤ï¤¹¥¡¼.
209 ¥·¥ó¥Ü¥ë #Mname ¤Ï <tt>"name"</tt>
210 ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú¥×¥í¥Ñ¥Æ¥£¤Î¥¡¼¤È¤·¤Æ»È¤ï¤ì¤ë¡£
211 ¤³¤Î¥×¥í¥Ñ¥Æ¥£¤ÎÃͤϤ½¤Îʸ»ú¤Î̾Á°¤òɽ¤ï¤¹ C ¤Îʸ»úÎó¤Ç¤¢¤ë¡£ */
218 @brief Key for general category.
220 The symbol #Mcategory has the name <tt>"category"</tt> and is
221 used as the key of a character property. The value of such a
222 property is a symbol representing the <em>general category</em> of
225 Each symbol that represents a general category has one of the
226 names listed as abbreviations for <em>General Category</em> in
230 @brief °ìÈÌ¥«¥Æ¥´¥ê¤òɽ¤ï¤¹¥¡¼.
232 ¥·¥ó¥Ü¥ë #Mcategory ¤Ï <tt>"category"</tt>
233 ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú¥×¥í¥Ñ¥Æ¥£¤Î¥¡¼¤È¤·¤Æ»È¤ï¤ì¤ë¡£
234 ¤³¤Î¥×¥í¥Ñ¥Æ¥£¤ÎÃͤϡ¢Âбþ¤¹¤ë <em>°ìÈÌ¥«¥Æ¥´¥ê</em> ¤òɽ¤ï¤¹¥·¥ó¥Ü¥ë¤Ç¤¢¤ë¡£
236 °ìÈÌ¥«¥Æ¥´¥ê¤òɽ¤ï¤¹¥·¥ó¥Ü¥ë¤Î̾Á°¤Ï¡¢<em>General Category</em>
237 ¤Î¾Êά·Á¤È¤·¤Æ Unicode ¤ËÄêµÁ¤µ¤ì¤Æ¤¤¤ë¤â¤Î¤Ç¤¢¤ë¡£ */
244 @brief Key for canonical combining class.
246 The symbol #Mcombining_class has the name
247 <tt>"combining-class"</tt> and is used as the key of a character
248 property. The value of such a property is an integer that
249 represents the <em>canonical combining class</em> of the character.
251 The meaning of each integer that represents a canonical combining
252 class is identical to the one defined in Unicode. */
255 @brief ɸ½à·ë¹ç¥¯¥é¥¹¤òɽ¤ï¤¹¥¡¼.
257 ¥·¥ó¥Ü¥ë #Mcombining_class ¤Ï <tt>"combining-class"</tt>
258 ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú¥×¥í¥Ñ¥Æ¥£¤Î¥¡¼¤È¤·¤Æ»È¤ï¤ì¤ë¡£
259 ¤³¤Î¥×¥í¥Ñ¥Æ¥£¤ÎÃͤϡ¢Âбþ¤¹¤ë @e ɸ½à·ë¹ç¥¯¥é¥¹ ¤òɽ¤ï¤¹À°¿ô¤Ç¤¢¤ë¡£
261 ɸ½à·ë¹ç¥¯¥é¥¹¤òɽ¤ï¤¹À°¿ô¤Î°ÕÌ£¤Ï¡¢Unicode
262 ¤ËÄêµÁ¤µ¤ì¤Æ¤¤¤ë¤â¤Î¤ÈƱ¤¸¤Ç¤¢¤ë¡£ */
264 MSymbol Mcombining_class;
268 @brief Key for bidi category.
270 The symbol #Mbidi_category has the name <tt>"bidi-category"</tt>
271 and is used as the key of a character property. The value of such
272 a property is a symbol that represents the <em>bidirectional
273 category</em> of the character.
275 Each symbol that represents a bidirectional category has one of
276 the names listed as types of <em>Bidirectional Category</em> in
280 @brief ÁÐÊý¸þ¥«¥Æ¥´¥ê¤òɽ¤ï¤¹¥¡¼.
282 ¥·¥ó¥Ü¥ë #Mbidi_category ¤Ï <tt>"bidi-category"</tt>
283 ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú¥×¥í¥Ñ¥Æ¥£¤Î¥¡¼¤È¤·¤Æ»È¤ï¤ì¤ë¡£
284 ¤³¤Î¥×¥í¥Ñ¥Æ¥£¤ÎÃͤϡ¢Âбþ¤¹¤ë @e ÁÐÊý¸þ¥«¥Æ¥´¥ê ¤òɽ¤ï¤¹¥·¥ó¥Ü¥ë¤Ç¤¢¤ë¡£
286 ÁÐÊý¸þ¥«¥Æ¥´¥ê¤òɽ¤ï¤¹¥·¥ó¥Ü¥ë¤Î̾Á°¤Ï¡¢<em>Bidirectional
287 Category</em> ¤Î·¿¤È¤·¤Æ Unicode ¤ËÄêµÁ¤µ¤ì¤Æ¤¤¤ë¤â¤Î¤Ç¤¢¤ë¡£ */
289 MSymbol Mbidi_category;
293 @brief Key for corresponding single lowercase character.
295 The symbol #Msimple_case_folding has the name
296 <tt>"simple-case-folding"</tt> and is used as the key of a
297 character property. The value of such a property is the
298 corresponding single lowercase character that is used when
299 comparing M-texts ignoring cases.
301 If a character requires a complicated comparison (i.e. cannot be
302 compared by simply mapping to another single character), the value
303 of such a property is @c 0xFFFF. In this case, the character has
304 another property whose key is #Mcomplicated_case_folding. */
307 @brief Âбþ¤¹¤ë¾®Ê¸»ú°ìʸ»ú¤òɽ¤ï¤¹¥¡¼.
309 ¥·¥ó¥Ü¥ë #Msimple_case_folding ¤Ï <tt>"simple-case-folding"</tt>
310 ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú¥×¥í¥Ñ¥Æ¥£¤Î¥¡¼¤È¤·¤Æ»È¤ï¤ì¤ë¡£
311 ¤³¤Î¥×¥í¥Ñ¥Æ¥£¤ÎÃͤϡ¢Âбþ¤¹¤ë¾®Ê¸»ú°ìʸ»ú¤Ç¤¢¤ê¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤¿Ê¸»úÎóÈæ³Ó¤ÎºÝ¤Ë»È¤ï¤ì¤ë¡£
313 Ê£»¨¤ÊÈæ³ÓÊýË¡¤òɬÍפȤ¹¤ëʸ»ú¤Ç¤¢¤Ã¤¿¾ì¹ç
314 ¡ÊÊ̤ΰìʸ»ú¤ÈÂбþÉÕ¤±¤ë¤³¤È¤Ë¤è¤Ã¤ÆÈæ³Ó¤Ç¤¤Ê¤¤¾ì¹ç¡Ë¡¢¤³¤Î¥×¥í¥Ñ¥Æ¥£¤ÎÃͤÏ
315 @c 0xFFFF ¤Ë¤Ê¤ë¡£¤³¤Î¾ì¹ç¤½¤Îʸ»ú¤Ï¡¢#Mcomplicated_case_folding
316 ¤È¤¤¤¦¥¡¼¤Îʸ»ú¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Ä¡£ */
318 MSymbol Msimple_case_folding;
320 @brief Key for corresponding multiple lowercase characters.
322 The symbol #Mcomplicated_case_folding has the name
323 <tt>"complicated-case-folding"</tt> and is used as the key of a
324 character property. The value of such a property is the
325 corresponding M-text that contains a sequence of lowercase
326 characters to be used for comparing M-texts ignoring case. */
329 @brief Âбþ¤¹¤ë¾®Ê¸»ú¤ÎÎó¤òɽ¤ï¤¹¥¡¼.
331 ¥·¥ó¥Ü¥ë #Mcomplicated_case_folding ¤Ï
332 <tt>"complicated-case-folding"</tt>
333 ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢Ê¸»ú¥×¥í¥Ñ¥Æ¥£¤Î¥¡¼¤È¤·¤Æ»È¤ï¤ì¤ë¡£
334 ¤³¤Î¥×¥í¥Ñ¥Æ¥£¤ÎÃͤϡ¢Âбþ¤¹¤ë¾®Ê¸»úÎ󤫤é¤Ê¤ë M-text ¤Ç¤¢¤ê¡¢Âçʸ»ú¡¿¾®Ê¸»ú¤Î¶èÊ̤ò̵»ë¤·¤¿Ê¸»úÎóÈæ³Ó¤ÎºÝ¤Ë»È
338 MSymbol Mcomplicated_case_folding;
344 @brief Define a character property.
346 The mchar_define_property () function searches the m17n database
347 for a data whose tags are \<#Mchar_table, $TYPE, $SYM \>.
348 Here, $SYM is a symbol whose name is $NAME. $TYPE must be
349 #Mstring, #Mtext, #Msymbol, #Minteger, or #Mplist.
352 If the operation was successful, mchar_define_property () returns
353 $SYM. Otherwise it returns #Mnil. */
356 @brief ʸ»ú¥×¥í¥Ñ¥Æ¥£¤òÄêµÁ¤¹¤ë.
358 ´Ø¿ô mchar_define_property () ¤Ï¡¢ \<#Mchar_table, $TYPE, $SYM \>
359 ¤È¤¤¤¦¥¿¥°¤ò»ý¤Ã¤¿¥Ç¡¼¥¿¥Ù¡¼¥¹¤ò m17n ¸À¸ì¾ðÊó¥Ù¡¼¥¹¤«¤éõ¤¹¡£
360 ¤³¤³¤Ç $SYM ¤Ï $NAME ¤È¤¤¤¦Ì¾Á°¤Î¥·¥ó¥Ü¥ë¤Ç¤¢¤ë¡£$TYPE ¤Ï#Mstring,
361 #Mtext, #Msymbol, #Minteger, #Mplist ¤Î¤¤¤º¤ì¤«¤Ç¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£
364 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mchar_define_property () ¤Ï$SYM ¤òÊÖ¤¹¡£
365 ¼ºÇÔ¤·¤¿¾ì¹ç¤Ï #Mnil ¤òÊÖ¤¹¡£ */
372 mchar_get_prop (), mchar_put_prop () */
375 mchar_define_property (const char *name, MSymbol type)
377 MSymbol key = msymbol (name);
378 MCharPropRecord *record;
380 record = mplist_get (char_prop_list, key);
384 M17N_OBJECT_UNREF (record->table);
388 MSTRUCT_CALLOC (record, MERROR_CHAR);
389 mplist_put (char_prop_list, key, record);
393 if (mdatabase__finder
394 && (record->mdb = (*mdatabase__finder) (Mchar_table, type, key, Mnil)))
396 record->table = NULL;
400 void *default_value = NULL;
403 if (type == Minteger)
404 default_value = (void *) -1;
405 record->table = mchartable (type, default_value);
414 @brief Get the value of a character property.
416 The mchar_get_prop () function searches character $C for the
417 character property whose key is $KEY.
420 If the operation was successful, mchar_get_prop () returns the
421 value of the character property. Otherwise it returns @c
425 @brief ʸ»ú¥×¥í¥Ñ¥Æ¥£¤ÎÃͤòÆÀ¤ë.
427 ´Ø¿ô mchar_get_prop () ¤Ï¡¢Ê¸»ú $C ¤Îʸ»ú¥×¥í¥Ñ¥Æ¥£¤Î¤¦¤Á¥¡¼¤¬
428 $KEY ¤Ç¤¢¤ë¤â¤Î¤òõ¤¹¡£
431 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mchar_get_prop () ¤Ï¸«¤Ä¤«¤Ã¤¿¥×¥í¥Ñ¥Æ¥£¤ÎÃͤòÊÖ¤¹¡£
432 ¼ºÇÔ¤·¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤¹¡£
434 @latexonly \IPAlabel{mchar_get_prop} @endlatexonly
438 @c MERROR_SYMBOL, @c MERROR_DB
441 mchar_define_property (), mchar_put_prop () */
444 mchar_get_prop (int c, MSymbol key)
446 MCharPropRecord *record;
448 record = mplist_get (char_prop_list, key);
453 record->table = (*mdatabase__loader) (record->mdb);
455 MERROR (MERROR_DB, NULL);
458 return mchartable_lookup (record->table, c);
464 @brief Set the value of a character property.
466 The mchar_put_prop () function searches character $C for the
467 character property whose key is $KEY and assigns $VAL to the value
468 of the found property.
471 If the operation was successful, mchar_put_prop () returns 0.
472 Otherwise, it returns -1. */
474 @brief ʸ»ú¥×¥í¥Ñ¥Æ¥£¤ÎÃͤòÀßÄꤹ¤ë.
476 ´Ø¿ô mchar_put_prop () ¤Ï¡¢Ê¸»ú $C ¤Îʸ»ú¥×¥í¥Ñ¥Æ¥£¤Î¤¦¤Á¥¡¼¤¬ $KEY
477 ¤Ç¤¢¤ë¤â¤Î¤òõ¤·¡¢¤½¤ÎÃͤȤ·¤Æ $VAL ¤òÀßÄꤹ¤ë¡£
480 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð mchar_put_prop () ¤Ï0¤òÊÖ¤¹¡£¼ºÇÔ¤·¤¿¾ì¹ç¤Ï-1¤òÊÖ¤¹¡£ */
483 @c MERROR_SYMBOL, @c MERROR_DB
486 mchar_define_property (), mchar_get_prop () */
489 mchar_put_prop (int c, MSymbol key, void *val)
491 MCharPropRecord *record;
493 record = mplist_get (char_prop_list, key);
498 record->table = (*mdatabase__loader) (record->mdb);
500 MERROR (MERROR_DB, -1);
503 return mchartable_set (record->table, c, val);