1 /* language.c -- language (and script) module.
2 Copyright (C) 2003, 2004, 2006
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
28 #include "m17n-misc.h"
35 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
37 static MPlist *language_list;
38 static MPlist *script_list;
39 static MPlist *langname_list;
42 load_lang_script_list (MSymbol tag0, MSymbol tag1, MSymbol tag2, MSymbol tag3)
44 MDatabase *mdb = mdatabase_find (tag0, tag1, tag2, tag3);
45 MPlist *plist, *pl, *p;
48 || ! (plist = mdatabase_load (mdb)))
50 /* Check at least if the plist is ((SYMBOL ...) ...). */
51 for (pl = plist; ! MPLIST_TAIL_P (pl);)
53 if (! MPLIST_PLIST_P (pl))
54 mplist__pop_unref (pl);
57 p = MPLIST_PLIST (pl);
58 if (! MPLIST_SYMBOL_P (p))
59 mplist__pop_unref (pl);
61 pl = MPLIST_NEXT (pl);
68 init_language_list (void)
70 language_list = load_lang_script_list (msymbol ("standard"), Mlanguage,
71 msymbol ("iso639"), Mnil);
74 language_list = mplist ();
75 MERROR (MERROR_DB, -1);
82 init_script_list (void)
84 script_list = load_lang_script_list (msymbol ("standard"), Mscript,
85 msymbol ("unicode"), Mnil);
88 script_list = mplist ();
89 MERROR (MERROR_DB, -1);
100 msymbol_put_func (Mlanguage, Mtext_prop_serializer,
101 M17N_FUNC (msymbol__serializer));
102 msymbol_put_func (Mlanguage, Mtext_prop_deserializer,
103 M17N_FUNC (msymbol__deserializer));
104 Miso639_2 = msymbol ("iso639-2");
105 Miso639_1 = msymbol ("iso639-1");
107 language_list = script_list = langname_list = NULL;
116 M17N_OBJECT_UNREF (language_list);
117 M17N_OBJECT_UNREF (script_list);
119 MPLIST_DO (pl, langname_list)
120 M17N_OBJECT_UNREF (MPLIST_VAL (pl));
121 M17N_OBJECT_UNREF (langname_list);
127 @brief Get information about a language.
129 The mlanguage_info () function returns a well-formed @e plist that
130 contains information about $LANGUAGE. $LANGUAGE is a symbol whose
131 name is an ISO639-2 3-letter language code, an ISO639-1 2-letter
132 language codes, or an English word.
134 The format of the plist is:
137 (ISO639-2 [ISO639-1 | nil] ENGLISH-NAME ["NATIVE-NAME" | nil]
138 ["REPRESENTATIVE-CHARACTERS"])
141 where, ISO639-2 is a symbol whose name is 3-letter language code
142 of ISO639-2, ISO639-1 is a symbol whose name is 2-letter language
143 code of ISO639-1, ENGLISH-NAME is a symbol whose name is the
144 English name of the language, "NATIVE-NAME" is an M-text written
145 by the most natural way in the language,
146 "REPRESENTATIVE-CHARACTERS" is an M-text that contains
147 representative characters used by the language.
149 It is assured that the formats of both M-texts are
153 If the information is available, this function returns a plist
154 that should not be modified nor freed. Otherwise, it returns
161 mlanguage__info (MSymbol language)
166 && init_language_list () < 0)
169 MPLIST_DO (plist, language_list)
171 MPlist *pl = MPLIST_PLIST (plist);
173 if (MPLIST_SYMBOL (pl) == language)
175 if (MPLIST_TAIL_P (pl))
177 pl = MPLIST_NEXT (pl);
178 if (MPLIST_SYMBOL_P (pl) && MPLIST_SYMBOL (pl) == language)
179 return MPLIST_PLIST (plist);
180 if (MPLIST_TAIL_P (pl))
182 pl = MPLIST_NEXT (pl);
183 if (MPLIST_MTEXT_P (pl))
185 MText *mt = MPLIST_MTEXT (pl);
187 if (mtext_nbytes (mt) == MSYMBOL_NAMELEN (language)
188 && memcmp (MTEXT_DATA (MPLIST_MTEXT (pl)),
189 MSYMBOL_NAME (language),
190 MSYMBOL_NAMELEN (language)) == 0)
191 return MPLIST_PLIST (plist);
198 mscript__info (MSymbol script)
203 && init_script_list () < 0)
205 MPLIST_DO (plist, script_list)
207 MPlist *pl = MPLIST_PLIST (plist);
209 if (MPLIST_SYMBOL (pl) == script)
216 mscript__char_list (MSymbol name)
218 MPlist *plist = mscript__info (name);
220 if (plist /* script name */
221 && (plist = MPLIST_NEXT (plist)) /* language list */
222 && ! MPLIST_TAIL_P (plist)
223 && (plist = MPLIST_NEXT (plist)) /* char list */
224 && MPLIST_PLIST_P (plist))
225 return MPLIST_PLIST (plist);
230 mscript__otf_tag (MSymbol script)
232 MPlist *plist = mscript__info (script);
234 if (plist /* script name */
235 && (plist = MPLIST_NEXT (plist)) /* language list */
236 && ! MPLIST_TAIL_P (plist)
237 && (plist = MPLIST_NEXT (plist)) /* char list */
238 && ! MPLIST_TAIL_P (plist)
239 && (plist = MPLIST_NEXT (plist)) /* otf tag */
240 && MPLIST_SYMBOL_P (plist))
241 return MPLIST_SYMBOL (plist);
246 mscript__from_otf_tag (MSymbol otf_tag)
249 /* As it is expected that this function is called in a sequence with
250 the same argument, we use a cache. */
251 static MSymbol last_otf_tag, script;
255 last_otf_tag = script = Mnil;
256 if (init_script_list () < 0)
259 if (otf_tag == last_otf_tag)
261 last_otf_tag = otf_tag;
263 MPLIST_DO (plist, script_list)
265 MPlist *pl = MPLIST_PLIST (plist), *p;
267 if (pl /* script name */
268 && (p = MPLIST_NEXT (pl)) /* language tag */
269 && ! MPLIST_TAIL_P (p)
270 && (p = MPLIST_NEXT (p)) /* char list */
271 && ! MPLIST_TAIL_P (p)
272 && (p = MPLIST_NEXT (p)) /* otf tag */
273 && MPLIST_SYMBOL_P (p)
274 && otf_tag == MPLIST_SYMBOL (p))
276 script = MPLIST_SYMBOL (pl);
283 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
288 MSymbol Miso639_1, Miso639_2;
293 @brief List 3-letter language codes.
295 The mlanguage_list () funciton returns a well-formed plist whose
296 keys are #Msymbol and values are symbols whose names are ISO639-2
297 3-letter language codes.
300 This function returns a plist. The caller should free it by
301 m17n_object_unref ().
307 @brief 3ʸ»ú¸À¸ì¥³¡¼¥É¤ò¥ê¥¹¥È¤¹¤ë.
309 ´Ø¿ô mlanguage_list () ¤Ï¡¢À°·Á¼° (well-formed) plist ¤òÊÖ¤¹¡£³Æ¥¡¼
310 ¤Ï #Msymbol ¤Ç¤¢¤ê¡¢¸Ä¡¹¤ÎÃÍ¤Ï ISO639-2 ¤ËÄê¤á¤é¤ì¤¿3ʸ»ú¸À¸ì¥³¡¼
311 ¥É¤ò̾Á°¤È¤¹¤ë¥·¥ó¥Ü¥ë¤Ç¤¢¤ë¡£
314 ¤³¤Î´Ø¿ô¤¬ÊÖ¤¹ plist ¤Ï¡¢¸Æ¤Ó½Ð¤·Â¦¤¬ m17n_object_unref () ¤ò»È¤Ã
315 ¤Æ²òÊü¤¹¤ëɬÍפ¬¤¢¤ë¡£
321 mlanguage_list (void)
323 MPlist *plist, *pl, *p, *p0;
326 && init_language_list () < 0)
328 plist = pl = mplist ();
329 MPLIST_DO (p, language_list)
331 p0 = MPLIST_PLIST (p);
332 pl = mplist_add (pl, Msymbol, MPLIST_VAL (p0));
340 @brief Get a language code.
342 The mlanguage_code () function returns a symbol whose name is the
343 ISO639 language code of $LANGUAGE. $LANGUAGE is a symbol whose
344 name is an ISO639-2 3-letter language code, an ISO639-1 2-letter
345 language codes, or an English word.
347 $LEN specifies the type of the returned language code. If it is
348 3, an ISO639-2 3-letter language code is returned. If it is 2, an
349 ISO639-1 2-letter language code is returned when defined;
350 otherwise #Mnil is returned. If it is 0, a 2-letter code is
351 returned when defined; otherwise a 3-letter code is returned.
354 If the information is available, this function returns a non-#Mnil
355 symbol. Otherwise, it returns #Mnil.
358 mlanguage_name_list (), mlanguage_text (). */
361 @brief ¸À¸ì¥³¡¼¥É¤òÆÀ¤ë.
363 ´Ø¿ô mlanguage_code () ¤Ï¡¢$LANGUAGE ¤ËÂбþ¤·¤¿ ISO-639 ¸À¸ì¥³¡¼¥É
364 ¤¬Ì¾Á°¤Ç¤¢¤ë¤è¤¦¤Ê¥·¥ó¥Ü¥ë¤òÊÖ¤¹¡£$LANGUAGE ¤Ï¥·¥ó¥Ü¥ë¤Ç¤¢¤ê¡¢¤½¤Î
365 ̾Á°¤Ï¡¢ISO639-2 3ʸ»ú¸À¸ì¥³¡¼¥É¡¢ISO639-1 2ʸ»ú¸À¸ì¥³¡¼¥É¡¢±Ñ¸ì̾¡¢
368 $LEN ¤ÏÊÖ¤µ¤ì¤ë¸À¸ì¥³¡¼¥É¤Î¼ïÎà¤ò·èÄꤹ¤ë¡£$LEN ¤¬3¤Î¾ì¹ç¤Ï
369 ISO639-2 3ʸ»ú¸À¸ì¥³¡¼¥É¤¬ÊÖ¤µ¤ì¤ë¡£2¤Î¾ì¹ç¤Ï¡¢¤â¤·ÄêµÁ¤µ¤ì¤Æ¤¤¤ì
370 ¤Ð ISO639-1 2ʸ»ú¸À¸ì¥³¡¼¥É¤¬¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð #Mnil ¤¬ÊÖ¤µ¤ì¤ë¡£0
371 ¤Î¾ì¹ç¤Ï¡¢¤â¤·ÄêµÁ¤µ¤ì¤Æ¤¤¤ì¤Ð2ʸ»ú¥³¡¼¥É¤¬¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð3ʸ»ú¥³¡¼
375 ¤â¤·¾ðÊó¤¬ÆÀ¤é¤ì¤ì¤Ð¡¢¤³¤Î´Ø¿ô¤Ï #Mnil °Ê³°¤Î¥·¥ó¥Ü¥ë¤òÊÖ¤¹¡£¤½¤¦
376 ¤Ç¤Ê¤±¤ì¤Ð #Mnil ¤òÊÖ¤¹¡£
379 mlanguage_name (), mlanguage_text (). */
382 mlanguage_code (MSymbol language, int len)
384 MPlist *plist = mlanguage__info (language);
389 if (! MPLIST_SYMBOL_P (plist))
391 code = MPLIST_SYMBOL (plist);
394 plist = MPLIST_NEXT (plist);
395 return ((MPLIST_SYMBOL_P (plist) && MPLIST_SYMBOL (plist) != Mnil)
396 ? MPLIST_SYMBOL (plist)
397 : len == 0 ? code : Mnil);
403 @brief Return the language names written in the specified language.
405 The mlanguage_name_list () function returns a plist of LANGUAGE's
406 names written in TARGET language.
408 LANGUAGE and TARGET must be a symbol whose name is an ISO639-2
409 3-letter language code or an ISO639-1 2-letter language codes.
410 TARGET may be #Mnil, in which case, the language of the current
411 locale is used. If locale is not set or is C, English is used.
414 If the information is available, this function returns a non-empty
415 plist whose keys are #Mtext and values are M-texts of the
416 translated language names. Otherwise, @c NULL is returned.
417 The returned plist should not be modified nor freed.
420 mlanguage_code (), mlanguage_text (). */
423 mlanguage_name_list (MSymbol language, MSymbol target)
427 plist = mlanguage__info (language);
430 language = mplist_value (plist);
433 plist = mlanguage__info (target);
436 target = mplist_value (plist);
440 MLocale *locale = mlocale_set (LC_MESSAGES, NULL);
443 target = msymbol ("eng");
446 target = mlocale_get_prop (locale, Mlanguage);
447 plist = mlanguage__info (target);
450 target = mplist_value (plist);
453 /* Now both LANGUAGE and TARGET are 3-letter codes. */
456 plist = mplist_get (langname_list, target);
458 langname_list = mplist (), plist = NULL;
461 MDatabase *mdb = mdatabase_find (Mlanguage, Mname, target, Mnil);
464 || ! (plist = mdatabase_load (mdb)))
467 mplist__pop_unref (plist);
468 langname_list = mplist_push (langname_list, target, plist);
469 MPLIST_SET_NESTED_P (langname_list);
471 /* PLIST == ((LANGUAGE TRANSLATED) ...) */
472 plist = mplist__assq (plist, language);
473 if (! plist || MPLIST_TAIL_P (plist))
475 plist = MPLIST_PLIST (plist);
476 plist = MPLIST_NEXT (plist);
483 @brief Return the language name written in that language.
485 The mlanguage_text () function returns, in the form of M-text, the
486 language name of $LANGUAGE written in $LANGUAGE. If the
487 representative characters of the language are known, the
488 characters of the returned M-text has a text property whose key is
489 #Mtext and whose value is an M-text that contains the
490 representative characters.
493 If the information is available, this function returns an M-text
494 that should not be modified nor freed. Otherwise, it returns @c
498 mlanguage_code (), mlanguage_name (). */
501 @brief Í¿¤¨¤é¤ì¤¿¸À¸ì¼«¿È¤Ç½ñ¤«¤ì¤¿¸À¸ì̾¤òÊÖ¤¹.
503 ´Ø¿ô mlanguage_text () ¤Ï¡¢¸À¸ì $LANGUAGE ¤Ç½ñ¤«¤ì¤¿ $LANGUAGE ¤Î
504 ̾Á°¤ò M-text ¤Î·Á¼°¤ÇÊÖ¤¹¡£¤½¤Î¸À¸ì¤ÎÂåɽŪ¤Êʸ»ú¤¬¤ï¤«¤Ã¤Æ¤¤¤ë¾ì
505 ¹ç¤Ï¡¢ÊÖ¤µ¤ì¤ë M-text ¤Î³Æʸ»ú¤Ë¡¢¥¡¼¤¬ #Mtext ¤ÇÃͤ¬¤½¤ÎÂåɽŪ¤Ê
506 ʸ»ú¤ò´Þ¤à M-text ¤Ç¤¢¤ë¤è¤¦¤Ê¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤¬Éղ䵤ì¤ë¡£
509 µá¤á¤ë¾ðÊó¤¬ÆÀ¤é¤ì¤¿¾ì¹ç¡¢¤³¤Î´Ø¿ô¤¬ÊÖ¤¹ M-text ¤òÊѹ¹¤·¤¿¤ê²òÊü¤·
510 ¤¿¤ê¤·¤Æ¤Ï¤¤¤±¤Ê¤¤¡£¾ðÊó¤¬ÆÀ¤é¤ì¤Ê¤«¤Ã¤¿¾ì¹ç¤Ï @c NULL ¤¬ÊÖ¤µ¤ì¤ë¡£
513 mlanguage_code (), mlanguage_name (). */
516 mlanguage_text (MSymbol language)
518 MPlist *plist = mlanguage__info (language);
523 plist = MPLIST_NEXT (plist);
524 if (MPLIST_TAIL_P (plist))
526 plist = MPLIST_NEXT (plist);
527 if (MPLIST_TAIL_P (plist))
529 plist = MPLIST_NEXT (plist);
530 if (! MPLIST_MTEXT_P (plist))
532 mt = MPLIST_MTEXT (plist);
533 if (mtext_nchars (mt) == 0)
535 plist = MPLIST_NEXT (plist);
536 if (MPLIST_MTEXT_P (plist)
537 && ! mtext_get_prop (mt, 0, Mtext))
538 mtext_put_prop (mt, 0, mtext_nchars (mt), Mtext, MPLIST_MTEXT (plist));
543 @brief List script names.
545 The mscript_list () funciton returns a well-formed plist whose
546 keys are #Msymbol and values are symbols whose names are script
550 This function returns a plist. The caller should free it by
551 m17n_object_unref ().
554 mscript_language_list (), mlanguage_list (). */
557 @brief ¥¹¥¯¥ê¥×¥È̾¤ò¥ê¥¹¥È¤¹¤ë.
559 ´Ø¿ô mscript_list () ¤Ï¡¢À°·Á¼° (well-formed) plist ¤òÊÖ¤¹¡£³Æ¥¡¼
560 ¤Ï #Msymbol ¤Ç¤¢¤ê¡¢¸Ä¡¹¤ÎÃͤϥ¹¥¯¥ê¥×¥È̾¤ò̾Á°¤È¤¹¤ë¥·¥ó¥Ü¥ë¤Ç¤¢
564 ¤³¤Î´Ø¿ô¤¬ÊÖ¤¹ plist ¤Ï¡¢¸Æ¤Ó½Ð¤·Â¦¤¬ m17n_object_unref () ¤ò»È¤Ã
565 ¤Æ²òÊü¤¹¤ëɬÍפ¬¤¢¤ë¡£
568 mscript_language_list (), mlanguage_list (). */
573 MPlist *plist, *pl, *p, *p0;
576 && init_script_list () < 0)
578 plist = pl = mplist ();
579 MPLIST_DO (p, script_list)
581 p0 = MPLIST_PLIST (p);
582 pl = mplist_add (pl, Msymbol, MPLIST_VAL (p0));
590 @brief List languages that use a specified script.
592 The mscript_language_list () function lists languages that use
593 $SCRIPT. $SCRIPT is a symbol whose name is the lower-cased
594 version of a script name that appears in the Unicode Character
599 This function returns a well-formed plist whose keys are #Msymbol
600 and values are symbols whose names are ISO639-1 2-letter codes (or
601 ISO639-2 3-letter codes, if the former is not available). The
602 caller should not modify nor free it. If the m17n library does
603 not know about $SCRIPT, it returns @ c NULL.
606 mscript_list (), mlanguage_list (). */
609 @brief Í¿¤¨¤é¤ì¤¿¥¹¥¯¥ê¥×¥È¤òÍѤ¤¤ë¸À¸ì¤ò¥ê¥¹¥È¤¹¤ë.
611 ´Ø¿ô mscript_language_list () ¤Ï¡¢$SCRIPT ¤òÍѤ¤¤ë¸À¸ì¤ò¥ê¥¹¥È¤¹¤ë¡£
612 $SCRIPT ¤Ï¥·¥ó¥Ü¥ë¤Ç¡¢¤½¤Î̾Á°¤Ï Unicode Character Database ¤Ë¼¨¤µ
613 ¤ì¤Æ¤¤¤ë¥¹¥¯¥ê¥×¥È̾¤ò¤¹¤Ù¤Æ¾®Ê¸»ú¤Ë¤·¤¿¤â¤Î¤Ç¤¢¤ë¡£
615 @return ¤³¤Î´Ø¿ô¤Ï¡¢À°·Á¼° (well-formed) plist ¤òÊÖ¤¹¡£³Æ¥¡¼¤Ï
616 #Msymbol ¤Ç¤¢¤ê¡¢¸Ä¡¹¤ÎÃÍ¤Ï ISO639-1 ¤ËÄê¤á¤é¤ì¤¿2ʸ»ú¸À¸ì¥³¡¼¥É
617 (ÄêµÁ¤µ¤ì¤Æ¤¤¤Ê¤¤¾ì¹ç¤Ï ISO639-2 ¤ËÄê¤á¤é¤ì¤¿3ʸ»ú¸À¸ì¥³¡¼¥É) ¤ò̾
618 Á°¤È¤¹¤ë¥·¥ó¥Ü¥ë¤Ç¤¢¤ë¡£ÊÖ¤µ¤ì¤ë plist ¤ÏÊѹ¹¤·¤¿¤ê²òÊü¤·¤¿¤ê¤·¤Æ
619 ¤Ï¤Ê¤é¤Ê¤¤¡£$SCRIPT ¤¬Ì¤ÃΤξì¹ç¤Ï @c NULL ¤¬ÊÖ¤µ¤ì¤ë¡£
622 mscript_list (), mlanguage_list (). */
626 mscript_language_list (MSymbol script)
628 MPlist *plist = mscript__info (script);
630 if (plist /* script name */
631 && (plist = MPLIST_NEXT (plist)) /* language list */
632 && MPLIST_PLIST_P (plist))
633 return MPLIST_PLIST (plist);
640 @name Obsolete functions
643 @name Obsolete ¤Ê´Ø¿ô
648 @brief Get an English language name.
650 This function is obsolete. Use mlanguage_name_list () instead.
652 The mlanguage_name () function returns a symbol whose name is an
653 English name of $LANGUAGE. $LANGUAGE is a symbol whose name is an
654 ISO639-2 3-letter language code, an ISO639-1 2-letter language
655 codes, or an English word.
658 If the information is available, this function returns a non-#Mnil
659 symbol. Otherwise, it returns #Mnil.
662 mlanguage_code (), mlanguage_text (). */
665 @brief ¸À¸ì¤Î±Ñ¸ì̾¤òÆÀ¤ë.
667 ´Ø¿ô mlanguage_name () ¤Ï¡¢$LANGUAGE ¤Î±Ñ¸ì̾¤ò̾Á°¤È¤¹¤ë¤è¤¦¤Ê¥·
668 ¥ó¥Ü¥ë¤òÊÖ¤¹¡£$LANGUAGE ¤Ï¥·¥ó¥Ü¥ë¤Ç¤¢¤ê¡¢¤½¤Î̾Á°¤Ï¡¢ISO639-2 3ʸ
669 »ú¸À¸ì¥³¡¼¥É¡¢ISO639-1 2ʸ»ú¸À¸ì¥³¡¼¥É¡¢±Ñ¸ì̾¡¢¤Î¤¤¤º¤ì¤«¤Ç¤¢¤ë¡£
672 µá¤á¤Æ¤¤¤ë¾ðÊó¤¬ÆÀ¤é¤ì¤ë¤Ê¤é¡¢¤³¤Î´Ø¿ô¤Ï #Mnil °Ê³°¤Î¥·¥ó¥Ü¥ë¤òÊÖ
673 ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð #Mnil ¤òÊÖ¤¹¡£
676 mlanguage_code (), mlanguage_text (). */
679 mlanguage_name (MSymbol language)
681 MPlist *plist = mlanguage__info (language);
684 if (! plist) /* 3-letter code */
686 plist = MPLIST_NEXT (plist); /* 2-letter code */
687 if (MPLIST_TAIL_P (plist))
689 plist = MPLIST_NEXT (plist); /* english name */
690 if (MPLIST_MTEXT_P (plist))
692 mt = MPLIST_MTEXT (plist);
693 if (mtext_nbytes (mt) != MSYMBOL_NAMELEN (language)
694 || memcmp (MTEXT_DATA (MPLIST_MTEXT (plist)),
695 MSYMBOL_NAME (language),
696 MSYMBOL_NAMELEN (language)))