1 /* language.c -- language (and script) module.
2 Copyright (C) 2003, 2004, 2006
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
28 #include "m17n-misc.h"
35 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
37 static MPlist *language_list;
38 static MPlist *script_list;
39 static MPlist *langname_list;
42 load_lang_script_list (MSymbol tag0, MSymbol tag1, MSymbol tag2, MSymbol tag3)
44 MDatabase *mdb = mdatabase_find (tag0, tag1, tag2, tag3);
45 MPlist *plist, *pl, *p;
48 || ! (plist = mdatabase_load (mdb)))
50 /* Check at least if the plist is ((SYMBOL ...) ...). */
51 for (pl = plist; ! MPLIST_TAIL_P (pl);)
53 if (! MPLIST_PLIST_P (pl))
54 mplist__pop_unref (pl);
57 p = MPLIST_PLIST (pl);
58 if (! MPLIST_SYMBOL_P (p))
59 mplist__pop_unref (pl);
61 pl = MPLIST_NEXT (pl);
68 init_language_list (void)
70 language_list = load_lang_script_list (msymbol ("standard"), Mlanguage,
71 msymbol ("iso639"), Mnil);
74 language_list = mplist ();
75 MERROR (MERROR_DB, -1);
82 init_script_list (void)
84 script_list = load_lang_script_list (msymbol ("standard"), Mscript,
85 msymbol ("unicode"), Mnil);
88 script_list = mplist ();
89 MERROR (MERROR_DB, -1);
100 msymbol_put_func (Mlanguage, Mtext_prop_serializer,
101 M17N_FUNC (msymbol__serializer));
102 msymbol_put_func (Mlanguage, Mtext_prop_deserializer,
103 M17N_FUNC (msymbol__deserializer));
104 Miso639_2 = msymbol ("iso639-2");
105 Miso639_1 = msymbol ("iso639-1");
107 language_list = script_list = langname_list = NULL;
114 M17N_OBJECT_UNREF (language_list);
115 M17N_OBJECT_UNREF (script_list);
116 M17N_OBJECT_UNREF (langname_list);
122 @brief Get information about a language.
124 The mlanguage_info () function returns a well-formed @e plist that
125 contains information about $LANGUAGE. $LANGUAGE is a symbol whose
126 name is an ISO639-2 3-letter language code, an ISO639-1 2-letter
127 language codes, or an English word.
129 The format of the plist is:
132 (ISO639-2 [ISO639-1 | nil] ENGLISH-NAME ["NATIVE-NAME" | nil]
133 ["REPRESENTATIVE-CHARACTERS"])
136 where, ISO639-2 is a symbol whose name is 3-letter language code
137 of ISO639-2, ISO639-1 is a symbol whose name is 2-letter language
138 code of ISO639-1, ENGLISH-NAME is a symbol whose name is the
139 English name of the language, "NATIVE-NAME" is an M-text written
140 by the most natural way in the language,
141 "REPRESENTATIVE-CHARACTERS" is an M-text that contains
142 representative characters used by the language.
144 It is assured that the formats of both M-texts are
148 If the information is available, this function returns a plist
149 that should not be modified nor freed. Otherwise, it returns
156 mlanguage__info (MSymbol language)
161 && init_language_list () < 0)
164 MPLIST_DO (plist, language_list)
166 MPlist *pl = MPLIST_PLIST (plist);
168 if (MPLIST_SYMBOL (pl) == language)
170 if (MPLIST_TAIL_P (pl))
172 pl = MPLIST_NEXT (pl);
173 if (MPLIST_SYMBOL_P (pl) && MPLIST_SYMBOL (pl) == language)
174 return MPLIST_PLIST (plist);
175 if (MPLIST_TAIL_P (pl))
177 pl = MPLIST_NEXT (pl);
178 if (MPLIST_MTEXT_P (pl))
180 MText *mt = MPLIST_MTEXT (pl);
182 if (mtext_nbytes (mt) == MSYMBOL_NAMELEN (language)
183 && memcmp (MTEXT_DATA (MPLIST_MTEXT (pl)),
184 MSYMBOL_NAME (language),
185 MSYMBOL_NAMELEN (language)) == 0)
186 return MPLIST_PLIST (plist);
193 mscript__info (MSymbol script)
198 && init_script_list () < 0)
200 MPLIST_DO (plist, script_list)
202 MPlist *pl = MPLIST_PLIST (plist);
204 if (MPLIST_SYMBOL (pl) == script)
211 mscript__char_list (MSymbol name)
213 MPlist *plist = mscript__info (name);
215 if (plist /* script name */
216 && (plist = MPLIST_NEXT (plist)) /* language list */
217 && ! MPLIST_TAIL_P (plist)
218 && (plist = MPLIST_NEXT (plist)) /* char list */
219 && MPLIST_PLIST_P (plist))
220 return MPLIST_PLIST (plist);
225 mscript__otf_tag (MSymbol script)
227 MPlist *plist = mscript__info (script);
229 if (plist /* script name */
230 && (plist = MPLIST_NEXT (plist)) /* language list */
231 && ! MPLIST_TAIL_P (plist)
232 && (plist = MPLIST_NEXT (plist)) /* char list */
233 && ! MPLIST_TAIL_P (plist)
234 && (plist = MPLIST_NEXT (plist)) /* otf tag */
235 && MPLIST_SYMBOL_P (plist))
236 return MPLIST_SYMBOL (plist);
241 mscript__from_otf_tag (MSymbol otf_tag)
244 /* As it is expected that this function is called in a sequence with
245 the same argument, we use a cache. */
246 static MSymbol last_otf_tag, script;
250 last_otf_tag = script = Mnil;
251 if (init_script_list () < 0)
254 if (otf_tag == last_otf_tag)
256 last_otf_tag = otf_tag;
258 MPLIST_DO (plist, script_list)
260 MPlist *pl = MPLIST_PLIST (plist), *p;
262 if (pl /* script name */
263 && (p = MPLIST_NEXT (pl)) /* language tag */
264 && ! MPLIST_TAIL_P (p)
265 && (p = MPLIST_NEXT (p)) /* char list */
266 && ! MPLIST_TAIL_P (p)
267 && (p = MPLIST_NEXT (p)) /* otf tag */
268 && MPLIST_SYMBOL_P (p)
269 && otf_tag == MPLIST_SYMBOL (p))
271 script = MPLIST_SYMBOL (pl);
278 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
283 MSymbol Miso639_1, Miso639_2;
288 @brief List 3-letter language codes.
290 The mlanguage_list () funciton returns a well-formed plist whose
291 keys are #Msymbol and values are symbols whose names are ISO639-2
292 3-letter language codes.
295 This function returns a plist. The caller should free it by
296 m17n_object_unref ().
302 @brief 3
\e$BJ8;z8@8l%3!<%I$r%j%9%H$9$k
\e(B.
304 \e$B4X?t
\e(B mlanguage_list ()
\e$B$O!"@07A<0
\e(B (well-formed) plist
\e$B$rJV$9!#3F%-!<
\e(B
305 \e$B$O
\e(B #Msymbol
\e$B$G$"$j!"8D!9$NCM$O
\e(B ISO639-2
\e$B$KDj$a$i$l$?
\e(B3
\e$BJ8;z8@8l%3!<
\e(B
306 \e$B%I$rL>A0$H$9$k%7%s%\%k$G$"$k!#
\e(B
309 \e$B$3$N4X?t$,JV$9
\e(B plist
\e$B$O!"8F$S=P$7B&$,
\e(B m17n_object_unref ()
\e$B$r;H$C
\e(B
310 \e$B$F2rJ|$9$kI,MW$,$"$k!#
\e(B
316 mlanguage_list (void)
318 MPlist *plist, *pl, *p, *p0;
321 && init_language_list () < 0)
323 plist = pl = mplist ();
324 MPLIST_DO (p, language_list)
326 p0 = MPLIST_PLIST (p);
327 pl = mplist_add (pl, Msymbol, MPLIST_VAL (p0));
335 @brief Get a language code.
337 The mlanguage_code () function returns a symbol whose name is the
338 ISO639 language code of $LANGUAGE. $LANGUAGE is a symbol whose
339 name is an ISO639-2 3-letter language code, an ISO639-1 2-letter
340 language codes, or an English word.
342 $LEN specifies the type of the returned language code. If it is
343 3, an ISO639-2 3-letter language code is returned. If it is 2, an
344 ISO639-1 2-letter language code is returned when defined;
345 otherwise #Mnil is returned. If it is 0, a 2-letter code is
346 returned when defined; otherwise a 3-letter code is returned.
349 If the information is available, this function returns a non-#Mnil
350 symbol. Otherwise, it returns #Mnil.
353 mlanguage_names (), mlanguage_text (). */
356 @brief
\e$B8@8l%3!<%I$rF@$k
\e(B.
358 \e$B4X?t
\e(B mlanguage_code ()
\e$B$O!"
\e(B$LANGUAGE
\e$B$KBP1~$7$?
\e(B ISO-639
\e$B8@8l%3!<%I
\e(B
359 \e$B$,L>A0$G$"$k$h$&$J%7%s%\%k$rJV$9!#
\e(B$LANGUAGE
\e$B$O%7%s%\%k$G$"$j!"$=$N
\e(B
360 \e$BL>A0$O!"
\e(BISO639-2 3
\e$BJ8;z8@8l%3!<%I!"
\e(BISO639-1 2
\e$BJ8;z8@8l%3!<%I!"1Q8lL>!"
\e(B
361 \e$B$N$$$:$l$+$G$"$k!#
\e(B
363 $LEN
\e$B$OJV$5$l$k8@8l%3!<%I$N<oN`$r7hDj$9$k!#
\e(B$LEN
\e$B$,
\e(B3
\e$B$N>l9g$O
\e(B
364 ISO639-2 3
\e$BJ8;z8@8l%3!<%I$,JV$5$l$k!#
\e(B2
\e$B$N>l9g$O!"$b$7Dj5A$5$l$F$$$l
\e(B
365 \e$B$P
\e(B ISO639-1 2
\e$BJ8;z8@8l%3!<%I$,!"$=$&$G$J$1$l$P
\e(B #Mnil
\e$B$,JV$5$l$k!#
\e(B0
366 \e$B$N>l9g$O!"$b$7Dj5A$5$l$F$$$l$P
\e(B2
\e$BJ8;z%3!<%I$,!"$=$&$G$J$1$l$P
\e(B3
\e$BJ8;z%3!<
\e(B
367 \e$B%I$,JV$5$l$k!#
\e(B
370 \e$B$b$7>pJs$,F@$i$l$l$P!"$3$N4X?t$O
\e(B #Mnil
\e$B0J30$N%7%s%\%k$rJV$9!#$=$&
\e(B
371 \e$B$G$J$1$l$P
\e(B #Mnil
\e$B$rJV$9!#
\e(B
374 mlanguage_name (), mlanguage_text (). */
377 mlanguage_code (MSymbol language, int len)
379 MPlist *plist = mlanguage__info (language);
384 if (! MPLIST_SYMBOL_P (plist))
386 code = MPLIST_SYMBOL (plist);
389 plist = MPLIST_NEXT (plist);
390 return ((MPLIST_SYMBOL_P (plist) && MPLIST_SYMBOL (plist) != Mnil)
391 ? MPLIST_SYMBOL (plist)
392 : len == 0 ? code : Mnil);
398 @brief Return the language names written in the specified language.
400 The mlanguage_name_list () function returns a plist of LANGUAGE's
401 names written in TARGET language.
403 LANGUAGE and TARGET must be a symbol whose name is an ISO639-2
404 3-letter language code or an ISO639-1 2-letter language codes.
405 TARGET may be #Mnil, in which case, the language of the current
406 locale is used. If locale is not set or is C, English is used.
409 If the information is available, this function returns a non-empty
410 plist whose keys are #Mtext and values are M-texts of the
411 translated language names. Otherwise, @c NULL is returned.
412 The returned plist should not be modified nor freed.
415 mlanguage_code (), mlanguage_text (). */
418 mlanguage_name_list (MSymbol language, MSymbol target)
422 plist = mlanguage__info (language);
425 language = mplist_value (plist);
428 plist = mlanguage__info (target);
431 target = mplist_value (plist);
435 MLocale *locale = mlocale_set (LC_MESSAGES, NULL);
438 target = msymbol ("eng");
441 target = mlocale_get_prop (locale, Mlanguage);
442 plist = mlanguage__info (target);
445 target = mplist_value (plist);
448 /* Now both LANGUAGE and TARGET are 3-letter codes. */
451 plist = mplist_get (langname_list, target);
453 langname_list = mplist (), plist = NULL;
456 MDatabase *mdb = mdatabase_find (Mlanguage, Mname, target, Mnil);
459 || ! (plist = mdatabase_load (mdb)))
462 mplist__pop_unref (plist);
463 langname_list = mplist_push (langname_list, target, plist);
464 MPLIST_SET_NESTED_P (langname_list);
466 /* PLIST == ((LANGUAGE TRANSLATED) ...) */
467 plist = mplist__assq (plist, language);
468 if (! plist || MPLIST_TAIL_P (plist))
470 plist = MPLIST_PLIST (plist);
471 plist = MPLIST_NEXT (plist);
478 @brief Return the language name written in that language.
480 The mlanguage_text () function returns, in the form of M-text, the
481 language name of $LANGUAGE written in $LANGUAGE. If the
482 representative characters of the language are known, the
483 characters of the returned M-text has a text property whose key is
484 #Mtext and whose value is an M-text that contains the
485 representative characters.
488 If the information is available, this function returns an M-text
489 that should not be modified nor freed. Otherwise, it returns @c
493 mlanguage_code (), mlanguage_name (). */
496 @brief
\e$BM?$($i$l$?8@8l<+?H$G=q$+$l$?8@8lL>$rJV$9
\e(B.
498 \e$B4X?t
\e(B mlanguage_text ()
\e$B$O!"8@8l
\e(B $LANGUAGE
\e$B$G=q$+$l$?
\e(B $LANGUAGE
\e$B$N
\e(B
499 \e$BL>A0$r
\e(B M-text
\e$B$N7A<0$GJV$9!#$=$N8@8l$NBeI=E*$JJ8;z$,$o$+$C$F$$$k>l
\e(B
500 \e$B9g$O!"JV$5$l$k
\e(B M-text
\e$B$N3FJ8;z$K!"%-!<$,
\e(B #Mtext
\e$B$GCM$,$=$NBeI=E*$J
\e(B
501 \e$BJ8;z$r4^$`
\e(B M-text
\e$B$G$"$k$h$&$J%F%-%9%H%W%m%Q%F%#$,IU2C$5$l$k!#
\e(B
504 \e$B5a$a$k>pJs$,F@$i$l$?>l9g!"$3$N4X?t$,JV$9
\e(B M-text
\e$B$rJQ99$7$?$j2rJ|$7
\e(B
505 \e$B$?$j$7$F$O$$$1$J$$!#>pJs$,F@$i$l$J$+$C$?>l9g$O
\e(B @c NULL
\e$B$,JV$5$l$k!#
\e(B
508 mlanguage_code (), mlanguage_name (). */
511 mlanguage_text (MSymbol language)
513 MPlist *plist = mlanguage__info (language);
518 plist = MPLIST_NEXT (plist);
519 if (MPLIST_TAIL_P (plist))
521 plist = MPLIST_NEXT (plist);
522 if (MPLIST_TAIL_P (plist))
524 plist = MPLIST_NEXT (plist);
525 if (! MPLIST_MTEXT_P (plist))
527 mt = MPLIST_MTEXT (plist);
528 if (mtext_nchars (mt) == 0)
530 plist = MPLIST_NEXT (plist);
531 if (MPLIST_MTEXT_P (plist)
532 && ! mtext_get_prop (mt, 0, Mtext))
533 mtext_put_prop (mt, 0, mtext_nchars (mt), Mtext, MPLIST_MTEXT (plist));
538 @brief List script names.
540 The mscript_list () funciton returns a well-formed plist whose
541 keys are #Msymbol and values are symbols whose names are script
545 This function returns a plist. The caller should free it by
546 m17n_object_unref ().
549 mscript_language_list (), mlanguage_list (). */
552 @brief
\e$B%9%/%j%W%HL>$r%j%9%H$9$k
\e(B.
554 \e$B4X?t
\e(B mscript_list ()
\e$B$O!"@07A<0
\e(B (well-formed) plist
\e$B$rJV$9!#3F%-!<
\e(B
555 \e$B$O
\e(B #Msymbol
\e$B$G$"$j!"8D!9$NCM$O%9%/%j%W%HL>$rL>A0$H$9$k%7%s%\%k$G$"
\e(B
559 \e$B$3$N4X?t$,JV$9
\e(B plist
\e$B$O!"8F$S=P$7B&$,
\e(B m17n_object_unref ()
\e$B$r;H$C
\e(B
560 \e$B$F2rJ|$9$kI,MW$,$"$k!#
\e(B
563 mscript_language_list (), mlanguage_list (). */
568 MPlist *plist, *pl, *p, *p0;
571 && init_script_list () < 0)
573 plist = pl = mplist ();
574 MPLIST_DO (p, script_list)
576 p0 = MPLIST_PLIST (p);
577 pl = mplist_add (pl, Msymbol, MPLIST_VAL (p0));
585 @brief List languages that use a specified script.
587 The mscript_language_list () function lists languages that use
588 $SCRIPT. $SCRIPT is a symbol whose name is the lower-cased
589 version of a script name that appears in the Unicode Character
594 This function returns a well-formed plist whose keys are #Msymbol
595 and values are symbols whose names are ISO639-1 2-letter codes (or
596 ISO639-2 3-letter codes, if the former is not available). The
597 caller should not modify nor free it. If the m17n library does
598 not know about $SCRIPT, it returns @ c NULL.
601 mscript_list (), mlanguage_list (). */
604 @brief
\e$BM?$($i$l$?%9%/%j%W%H$rMQ$$$k8@8l$r%j%9%H$9$k
\e(B.
606 \e$B4X?t
\e(B mscript_language_list ()
\e$B$O!"
\e(B$SCRIPT
\e$B$rMQ$$$k8@8l$r%j%9%H$9$k!#
\e(B
607 $SCRIPT
\e$B$O%7%s%\%k$G!"$=$NL>A0$O
\e(B Unicode Character Database
\e$B$K<($5
\e(B
608 \e$B$l$F$$$k%9%/%j%W%HL>$r$9$Y$F>.J8;z$K$7$?$b$N$G$"$k!#
\e(B
610 @return
\e$B$3$N4X?t$O!"@07A<0
\e(B (well-formed) plist
\e$B$rJV$9!#3F%-!<$O
\e(B
611 #Msymbol
\e$B$G$"$j!"8D!9$NCM$O
\e(B ISO639-1
\e$B$KDj$a$i$l$?
\e(B2
\e$BJ8;z8@8l%3!<%I
\e(B
612 (
\e$BDj5A$5$l$F$$$J$$>l9g$O
\e(B ISO639-2
\e$B$KDj$a$i$l$?
\e(B3
\e$BJ8;z8@8l%3!<%I
\e(B)
\e$B$rL>
\e(B
613 \e$BA0$H$9$k%7%s%\%k$G$"$k!#JV$5$l$k
\e(B plist
\e$B$OJQ99$7$?$j2rJ|$7$?$j$7$F
\e(B
614 \e$B$O$J$i$J$$!#
\e(B$SCRIPT
\e$B$,L$CN$N>l9g$O
\e(B @c NULL
\e$B$,JV$5$l$k!#
\e(B
617 mscript_list (), mlanguage_list (). */
621 mscript_language_list (MSymbol script)
623 MPlist *plist = mscript__info (script);
625 if (plist /* script name */
626 && (plist = MPLIST_NEXT (plist)) /* language list */
627 && MPLIST_PLIST_P (plist))
628 return MPLIST_PLIST (plist);
635 @name Obsolete functions
638 @name Obsolete
\e$B$J4X?t
\e(B
643 @brief Get an English language name.
645 This function is obsolete. Use mlanguage_name_list () instead.
647 The mlanguage_name () function returns a symbol whose name is an
648 English name of $LANGUAGE. $LANGUAGE is a symbol whose name is an
649 ISO639-2 3-letter language code, an ISO639-1 2-letter language
650 codes, or an English word.
653 If the information is available, this function returns a non-#Mnil
654 symbol. Otherwise, it returns #Mnil.
657 mlanguage_code (), mlanguage_text (). */
660 @brief
\e$B8@8l$N1Q8lL>$rF@$k
\e(B.
662 \e$B4X?t
\e(B mlanguage_name ()
\e$B$O!"
\e(B$LANGUAGE
\e$B$N1Q8lL>$rL>A0$H$9$k$h$&$J%7
\e(B
663 \e$B%s%\%k$rJV$9!#
\e(B$LANGUAGE
\e$B$O%7%s%\%k$G$"$j!"$=$NL>A0$O!"
\e(BISO639-2 3
\e$BJ8
\e(B
664 \e$B;z8@8l%3!<%I!"
\e(BISO639-1 2
\e$BJ8;z8@8l%3!<%I!"1Q8lL>!"$N$$$:$l$+$G$"$k!#
\e(B
667 \e$B5a$a$F$$$k>pJs$,F@$i$l$k$J$i!"$3$N4X?t$O
\e(B #Mnil
\e$B0J30$N%7%s%\%k$rJV
\e(B
668 \e$B$9!#$=$&$G$J$1$l$P
\e(B #Mnil
\e$B$rJV$9!#
\e(B
671 mlanguage_code (), mlanguage_text (). */
674 mlanguage_name (MSymbol language)
676 MPlist *plist = mlanguage__info (language);
679 if (! plist) /* 3-letter code */
681 plist = MPLIST_NEXT (plist); /* 2-letter code */
682 if (MPLIST_TAIL_P (plist))
684 plist = MPLIST_NEXT (plist); /* english name */
685 if (MPLIST_MTEXT_P (plist))
687 mt = MPLIST_MTEXT (plist);
688 if (mtext_nbytes (mt) != MSYMBOL_NAMELEN (language)
689 || memcmp (MTEXT_DATA (MPLIST_MTEXT (plist)),
690 MSYMBOL_NAME (language),
691 MSYMBOL_NAMELEN (language)))