1 /* language.c -- language (and script) module.
2 Copyright (C) 2003, 2004, 2006
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
26 #include "m17n-misc.h"
33 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
35 static MPlist *language_list;
36 static MPlist *script_list;
39 load_lang_script_list (MSymbol tag0, MSymbol tag1, MSymbol tag2, MSymbol tag3)
41 MDatabase *mdb = mdatabase_find (tag0, tag1, tag2, tag3);
42 MPlist *plist, *pl, *p;
45 || ! (plist = mdatabase_load (mdb)))
47 /* Check at least if the plist is ((SYMBOL ...) ...). */
50 if (! MPLIST_PLIST_P (pl))
52 p = MPLIST_PLIST (pl);
53 if (! MPLIST_SYMBOL_P (p))
56 if (! MPLIST_TAIL_P (pl))
58 M17N_OBJECT_UNREF (plist);
65 init_language_list (void)
67 language_list = load_lang_script_list (msymbol ("standard"), Mlanguage,
68 msymbol ("iso639"), Mnil);
71 language_list = mplist ();
72 MERROR (MERROR_DB, -1);
79 init_script_list (void)
81 script_list = load_lang_script_list (msymbol ("standard"), Mscript,
82 msymbol ("unicode"), Mnil);
85 script_list = mplist ();
86 MERROR (MERROR_DB, -1);
97 msymbol_put (Mlanguage, Mtext_prop_serializer,
98 (void *) msymbol__serializer);
99 msymbol_put (Mlanguage, Mtext_prop_deserializer,
100 (void *) msymbol__deserializer);
101 Miso639_2 = msymbol ("iso639-2");
102 Miso639_1 = msymbol ("iso639-1");
104 language_list = script_list = NULL;
111 M17N_OBJECT_UNREF (language_list);
112 language_list = NULL;
113 M17N_OBJECT_UNREF (script_list);
120 @brief Get information about a language.
122 The mlanguage_info () function returns a well-formed @e plist that
123 contains information about $LANGUAGE. $LANGUAGE is a symbol whose
124 name is an ISO639-2 3-letter language code, an ISO639-1 2-letter
125 language codes, or an English word.
127 The format of the plist is:
130 (ISO639-2 [ISO639-1 | nil] ENGLISH-NAME ["NATIVE-NAME" | nil]
131 ["REPRESENTATIVE-CHARACTERS"])
134 where, ISO639-2 is a symbol whose name is 3-letter language code
135 of ISO639-2, ISO639-1 is a symbol whose name is 2-letter language
136 code of ISO639-1, ENGLISH-NAME is a symbol whose name is the
137 English name of the language, "NATIVE-NAME" is an M-text written
138 by the most natural way in the language,
139 "REPRESENTATIVE-CHARACTERS" is an M-text that contains
140 representative characters used by the language.
142 It is assured that the formats of both M-texts are
146 If the information is available, this function returns a plist
147 that should not be modified nor freed. Otherwise, it returns
154 mlanguage__info (MSymbol language)
159 && init_language_list () < 0)
162 MPLIST_DO (plist, language_list)
164 MPlist *pl = MPLIST_PLIST (plist);
166 if (MPLIST_SYMBOL_P (pl) && MPLIST_SYMBOL (pl) == language)
167 return MPLIST_PLIST (plist);
168 if (! MPLIST_TAIL_P (pl))
170 pl = MPLIST_NEXT (pl);
171 if (MPLIST_SYMBOL_P (pl) && MPLIST_SYMBOL (pl) == language)
172 return MPLIST_PLIST (plist);
173 if (! MPLIST_TAIL_P (pl))
175 pl = MPLIST_NEXT (pl);
176 if (MPLIST_SYMBOL_P (pl) && MPLIST_SYMBOL (pl) == language)
177 return MPLIST_PLIST (plist);
185 mscript__info (MSymbol script)
190 && init_script_list () < 0)
192 MPLIST_DO (plist, script_list)
194 MPlist *pl = MPLIST_PLIST (plist);
196 if (MPLIST_SYMBOL (pl) == script)
203 mscript__char_list (MSymbol name)
205 MPlist *plist = mscript__info (name);
207 if (plist /* script name */
208 && (plist = MPLIST_NEXT (plist)) /* language list */
209 && ! MPLIST_TAIL_P (plist)
210 && (plist = MPLIST_NEXT (plist)) /* char list */
211 && MPLIST_PLIST_P (plist))
212 return MPLIST_PLIST (plist);
217 mscript__otf_tag (MSymbol script)
219 MPlist *plist = mscript__info (script);
221 if (plist /* script name */
222 && (plist = MPLIST_NEXT (plist)) /* language list */
223 && ! MPLIST_TAIL_P (plist)
224 && (plist = MPLIST_NEXT (plist)) /* char list */
225 && ! MPLIST_TAIL_P (plist)
226 && (plist = MPLIST_NEXT (plist)) /* otf tag */
227 && MPLIST_SYMBOL_P (plist))
228 return MPLIST_SYMBOL (plist);
233 mscript__from_otf_tag (MSymbol otf_tag)
236 /* As it is expected that this function is called in a sequence with
237 the same argument, we use a cache. */
238 static MSymbol last_otf_tag, script;
242 last_otf_tag = script = Mnil;
243 if (init_script_list () < 0)
246 if (otf_tag == last_otf_tag)
248 last_otf_tag = otf_tag;
250 MPLIST_DO (plist, script_list)
252 MPlist *pl = MPLIST_PLIST (plist), *p;
254 if (pl /* script name */
255 && (p = MPLIST_NEXT (pl)) /* language tag */
256 && ! MPLIST_TAIL_P (p)
257 && (p = MPLIST_NEXT (p)) /* char list */
258 && ! MPLIST_TAIL_P (p)
259 && (p = MPLIST_NEXT (p)) /* otf tag */
260 && MPLIST_SYMBOL_P (p)
261 && otf_tag == MPLIST_SYMBOL (p))
263 script = MPLIST_SYMBOL (pl);
270 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
275 MSymbol Miso639_1, Miso639_2;
280 @brief List 3-letter language codes.
282 The mlanguage_list () funciton returns a well-formed plist whose
283 keys are #Msymbol and values are symbols whose names are ISO639-2
284 3-letter language codes.
287 This function returns a plist. The caller should free it by
288 m17n_object_unref ().
294 @brief 3
\e$BJ8;z8@8l%3!<%I$r%j%9%H$9$k
\e(B.
296 \e$B4X?t
\e(B mlanguage_list ()
\e$B$O!"@07A<0
\e(B (well-formed) plist
\e$B$rJV$9!#3F%-!<
\e(B
297 \e$B$O
\e(B #Msymbol
\e$B$G$"$j!"8D!9$NCM$O
\e(B ISO639-2
\e$B$KDj$a$i$l$?
\e(B3
\e$BJ8;z8@8l%3!<
\e(B
298 \e$B%I$rL>A0$H$9$k%7%s%\%k$G$"$k!#
\e(B
301 \e$B$3$N4X?t$,JV$9
\e(B plist
\e$B$O!"8F$S=P$7B&$,
\e(B m17n_object_unref ()
\e$B$r;H$C
\e(B
302 \e$B$F2rJ|$9$kI,MW$,$"$k!#
\e(B
308 mlanguage_list (void)
310 MPlist *plist, *pl, *p, *p0;
313 && init_language_list () < 0)
315 plist = pl = mplist ();
316 MPLIST_DO (p, language_list)
318 p0 = MPLIST_PLIST (p);
319 pl = mplist_add (pl, Msymbol, MPLIST_VAL (p0));
327 @brief Get a language code.
329 The mlanguage_code () function returns a symbol whose name is the
330 ISO639 language code of $LANGUAGE. $LANGUAGE is a symbol whose
331 name is an ISO639-2 3-letter language code, an ISO639-1 2-letter
332 language codes, or an English word.
334 $LEN specifies the type of the returned language code. If it is
335 3, an ISO639-2 3-letter language code is returned. If it is 2, an
336 ISO639-1 2-letter language code is returned when defined;
337 otherwise #Mnil is returned. If it is 0, a 2-letter code is
338 returned when defined; otherwise a 3-letter code is returned.
341 If the information is available, this function returns a non-#Mnil
342 symbol. Otherwise, it returns #Mnil.
345 mlanguage_name (), mlanguage_text (). */
348 @brief
\e$B8@8l%3!<%I$rF@$k
\e(B.
350 \e$B4X?t
\e(B mlanguage_code ()
\e$B$O!"
\e(B$LANGUAGE
\e$B$KBP1~$7$?
\e(B ISO-639
\e$B8@8l%3!<%I
\e(B
351 \e$B$,L>A0$G$"$k$h$&$J%7%s%\%k$rJV$9!#
\e(B$LANGUAGE
\e$B$O%7%s%\%k$G$"$j!"$=$N
\e(B
352 \e$BL>A0$O!"
\e(BISO639-2 3
\e$BJ8;z8@8l%3!<%I!"
\e(BISO639-1 2
\e$BJ8;z8@8l%3!<%I!"1Q8lL>!"
\e(B
353 \e$B$N$$$:$l$+$G$"$k!#
\e(B
355 $LEN
\e$B$OJV$5$l$k8@8l%3!<%I$N<oN`$r7hDj$9$k!#
\e(B$LEN
\e$B$,
\e(B3
\e$B$N>l9g$O
\e(B
356 ISO639-2 3
\e$BJ8;z8@8l%3!<%I$,JV$5$l$k!#
\e(B2
\e$B$N>l9g$O!"$b$7Dj5A$5$l$F$$$l
\e(B
357 \e$B$P
\e(B ISO639-1 2
\e$BJ8;z8@8l%3!<%I$,!"$=$&$G$J$1$l$P
\e(B #Mnil
\e$B$,JV$5$l$k!#
\e(B0
358 \e$B$N>l9g$O!"$b$7Dj5A$5$l$F$$$l$P
\e(B2
\e$BJ8;z%3!<%I$,!"$=$&$G$J$1$l$P
\e(B3
\e$BJ8;z%3!<
\e(B
359 \e$B%I$,JV$5$l$k!#
\e(B
362 \e$B$b$7>pJs$,F@$i$l$l$P!"$3$N4X?t$O
\e(B #Mnil
\e$B0J30$N%7%s%\%k$rJV$9!#$=$&
\e(B
363 \e$B$G$J$1$l$P
\e(B #Mnil
\e$B$rJV$9!#
\e(B
366 mlanguage_name (), mlanguage_text (). */
369 mlanguage_code (MSymbol language, int len)
371 MPlist *plist = mlanguage__info (language);
376 if (! MPLIST_SYMBOL_P (plist))
378 code = MPLIST_SYMBOL (plist);
381 plist = MPLIST_NEXT (plist);
382 return ((MPLIST_SYMBOL_P (plist) && MPLIST_SYMBOL (plist) != Mnil)
383 ? MPLIST_SYMBOL (plist)
384 : len == 0 ? code : Mnil);
390 @brief Get an English language name.
392 The mlanguage_name () function returns a symbol whose name is an
393 English name of $LANGUAGE. $LANGUAGE is a symbol whose name is an
394 ISO639-2 3-letter language code, an ISO639-1 2-letter language
395 codes, or an English word.
398 If the information is available, this function returns a non-#Mnil
399 symbol. Otherwise, it returns #Mnil.
402 mlanguage_code (), mlanguage_text (). */
405 @brief
\e$B8@8l$N1Q8lL>$rF@$k
\e(B.
407 \e$B4X?t
\e(B mlanguage_name ()
\e$B$O!"
\e(B$LANGUAGE
\e$B$N1Q8lL>$rL>A0$H$9$k$h$&$J%7
\e(B
408 \e$B%s%\%k$rJV$9!#
\e(B$LANGUAGE
\e$B$O%7%s%\%k$G$"$j!"$=$NL>A0$O!"
\e(BISO639-2 3
\e$BJ8
\e(B
409 \e$B;z8@8l%3!<%I!"
\e(BISO639-1 2
\e$BJ8;z8@8l%3!<%I!"1Q8lL>!"$N$$$:$l$+$G$"$k!#
\e(B
412 \e$B5a$a$F$$$k>pJs$,F@$i$l$k$J$i!"$3$N4X?t$O
\e(B #Mnil
\e$B0J30$N%7%s%\%k$rJV
\e(B
413 \e$B$9!#$=$&$G$J$1$l$P
\e(B #Mnil
\e$B$rJV$9!#
\e(B
416 mlanguage_code (), mlanguage_text (). */
419 mlanguage_name (MSymbol language)
421 MPlist *plist = mlanguage__info (language);
423 if (! plist) /* 3-letter code */
425 plist = MPLIST_NEXT (plist); /* 2-letter code */
426 if (MPLIST_TAIL_P (plist))
428 plist = MPLIST_NEXT (plist); /* english name */
429 if (! MPLIST_SYMBOL_P (plist))
431 return MPLIST_SYMBOL (plist);
437 @brief Return the language name written in that language.
439 The mlanguage_text () function returns, in the form of M-text, the
440 language name of $LANGUAGE written in $LANGUAGE. If the
441 representative characters of the language are known, the
442 characters of the returned M-text has a text property whose key is
443 #Mtext and whose value is an M-text that contains the
444 representative characters.
447 If the information is available, this function returns an M-text
448 that should not be modified nor freed. Otherwise, it returns @c
452 mlanguage_code (), mlanguage_name (). */
455 @brief
\e$BM?$($i$l$?8@8l<+?H$G=q$+$l$?8@8lL>$rJV$9
\e(B.
457 \e$B4X?t
\e(B mlanguage_text ()
\e$B$O!"8@8l
\e(B $LANGUAGE
\e$B$G=q$+$l$?
\e(B $LANGUAGE
\e$B$N
\e(B
458 \e$BL>A0$r
\e(B M-text
\e$B$N7A<0$GJV$9!#$=$N8@8l$NBeI=E*$JJ8;z$,$o$+$C$F$$$k>l
\e(B
459 \e$B9g$O!"JV$5$l$k
\e(B M-text
\e$B$N3FJ8;z$K!"%-!<$,
\e(B #Mtext
\e$B$GCM$,$=$NBeI=E*$J
\e(B
460 \e$BJ8;z$r4^$`
\e(B M-text
\e$B$G$"$k$h$&$J%F%-%9%H%W%m%Q%F%#$,IU2C$5$l$k!#
\e(B
463 \e$B5a$a$k>pJs$,F@$i$l$?>l9g!"$3$N4X?t$,JV$9
\e(B M-text
\e$B$rJQ99$7$?$j2rJ|$7
\e(B
464 \e$B$?$j$7$F$O$$$1$J$$!#>pJs$,F@$i$l$J$+$C$?>l9g$O
\e(B @c NULL
\e$B$,JV$5$l$k!#
\e(B
467 mlanguage_code (), mlanguage_name (). */
470 mlanguage_text (MSymbol language)
472 MPlist *plist = mlanguage__info (language);
477 plist = MPLIST_NEXT (plist);
478 if (MPLIST_TAIL_P (plist))
480 plist = MPLIST_NEXT (plist);
481 if (MPLIST_TAIL_P (plist))
483 plist = MPLIST_NEXT (plist);
484 if (! MPLIST_MTEXT_P (plist))
486 mt = MPLIST_MTEXT (plist);
487 if (mtext_nchars (mt) == 0)
489 plist = MPLIST_NEXT (plist);
490 if (MPLIST_MTEXT_P (plist)
491 && ! mtext_get_prop (mt, 0, Mtext))
492 mtext_put_prop (mt, 0, mtext_nchars (mt), Mtext, MPLIST_MTEXT (plist));
497 @brief List script names.
499 The mscript_list () funciton returns a well-formed plist whose
500 keys are #Msymbol and values are symbols whose names are script
504 This function returns a plist. The caller should free it by
505 m17n_object_unref ().
508 mscript_language_list (), mlanguage_list (). */
511 @brief
\e$B%9%/%j%W%HL>$r%j%9%H$9$k
\e(B.
513 \e$B4X?t
\e(B mscript_list ()
\e$B$O!"@07A<0
\e(B (well-formed) plist
\e$B$rJV$9!#3F%-!<
\e(B
514 \e$B$O
\e(B #Msymbol
\e$B$G$"$j!"8D!9$NCM$O%9%/%j%W%HL>$rL>A0$H$9$k%7%s%\%k$G$"
\e(B
518 \e$B$3$N4X?t$,JV$9
\e(B plist
\e$B$O!"8F$S=P$7B&$,
\e(B m17n_object_unref ()
\e$B$r;H$C
\e(B
519 \e$B$F2rJ|$9$kI,MW$,$"$k!#
\e(B
522 mscript_language_list (), mlanguage_list (). */
527 MPlist *plist, *pl, *p, *p0;
530 && init_script_list () < 0)
532 plist = pl = mplist ();
533 MPLIST_DO (p, script_list)
535 p0 = MPLIST_PLIST (p);
536 pl = mplist_add (pl, Msymbol, MPLIST_VAL (p0));
544 @brief List languages that use a specified script.
546 The mscript_language_list () function lists languages that use
547 $SCRIPT. $SCRIPT is a symbol whose name is the lower-cased
548 version of a script name that appears in the Unicode Character
553 This function returns a well-formed plist whose keys are #Msymbol
554 and values are symbols whose names are ISO639-1 2-letter codes (or
555 ISO639-2 3-letter codes, if the former is not available). The
556 caller should not modify nor free it. If the m17n library does
557 not know about $SCRIPT, it returns @ c NULL.
560 mscript_list (), mlanguage_list (). */
563 @brief
\e$BM?$($i$l$?%9%/%j%W%H$rMQ$$$k8@8l$r%j%9%H$9$k
\e(B.
565 \e$B4X?t
\e(B mscript_language_list ()
\e$B$O!"
\e(B$SCRIPT
\e$B$rMQ$$$k8@8l$r%j%9%H$9$k!#
\e(B
566 $SCRIPT
\e$B$O%7%s%\%k$G!"$=$NL>A0$O
\e(B Unicode Character Database
\e$B$K<($5
\e(B
567 \e$B$l$F$$$k%9%/%j%W%HL>$r$9$Y$F>.J8;z$K$7$?$b$N$G$"$k!#
\e(B
569 @return
\e$B$3$N4X?t$O!"@07A<0
\e(B (well-formed) plist
\e$B$rJV$9!#3F%-!<$O
\e(B
570 #Msymbol
\e$B$G$"$j!"8D!9$NCM$O
\e(B ISO639-1
\e$B$KDj$a$i$l$?
\e(B2
\e$BJ8;z8@8l%3!<%I
\e(B
571 (
\e$BDj5A$5$l$F$$$J$$>l9g$O
\e(B ISO639-2
\e$B$KDj$a$i$l$?
\e(B3
\e$BJ8;z8@8l%3!<%I
\e(B)
\e$B$rL>
\e(B
572 \e$BA0$H$9$k%7%s%\%k$G$"$k!#JV$5$l$k
\e(B plist
\e$B$OJQ99$7$?$j2rJ|$7$?$j$7$F
\e(B
573 \e$B$O$J$i$J$$!#
\e(B$SCRIPT
\e$B$,L$CN$N>l9g$O
\e(B @c NULL
\e$B$,JV$5$l$k!#
\e(B
576 mscript_list (), mlanguage_list (). */
580 mscript_language_list (MSymbol script)
582 MPlist *plist = mscript__info (script);
584 if (plist /* script name */
585 && (plist = MPLIST_NEXT (plist)) /* language list */
586 && MPLIST_PLIST_P (plist))
587 return MPLIST_PLIST (plist);