From f8cb23d378c88ac09d72e6cef1dbc0ae014c2841 Mon Sep 17 00:00:00 2001 From: handa Date: Thu, 10 May 2007 02:25:34 +0000 Subject: [PATCH] New file. --- LANGDATA/ISO-639-2.txt | 484 ++++++++++++++++++++++++++++++++++++++++++++++++ LANGDATA/native.txt | 366 ++++++++++++++++++++++++++++++++++++ LANGNAME.awk | 130 +++++++++++++ LANGUAGE.awk | 98 ++++++++++ 4 files changed, 1078 insertions(+) create mode 100644 LANGDATA/ISO-639-2.txt create mode 100644 LANGDATA/native.txt create mode 100644 LANGNAME.awk create mode 100644 LANGUAGE.awk diff --git a/LANGDATA/ISO-639-2.txt b/LANGDATA/ISO-639-2.txt new file mode 100644 index 0000000..f1bf7fb --- /dev/null +++ b/LANGDATA/ISO-639-2.txt @@ -0,0 +1,484 @@ +aar||aa|Afar|afar +abk||ab|Abkhazian|abkhaze +ace|||Achinese|aceh +ach|||Acoli|acoli +ada|||Adangme|adangme +ady|||Adyghe; Adygei|adyghé +afa|||Afro-Asiatic (Other)|afro-asiatiques, autres langues +afh|||Afrihili|afrihili +afr||af|Afrikaans|afrikaans +ain|||Ainu|aïnou +aka||ak|Akan|akan +akk|||Akkadian|akkadien +alb|sqi|sq|Albanian|albanais +ale|||Aleut|aléoute +alg|||Algonquian languages|algonquines, langues +alt|||Southern Altai|altai du Sud +amh||am|Amharic|amharique +ang|||English, Old (ca.450-1100)|anglo-saxon (ca.450-1100) +anp|||Angika|angika +apa|||Apache languages|apache +ara||ar|Arabic|arabe +arc|||Aramaic|araméen +arg||an|Aragonese|aragonais +arm|hye|hy|Armenian|arménien +arn|||Mapudungun; Mapuche|mapudungun; mapuche; mapuce +arp|||Arapaho|arapaho +art|||Artificial (Other)|artificielles, autres langues +arw|||Arawak|arawak +asm||as|Assamese|assamais +ast|||Asturian; Bable|asturien; bable +ath|||Athapascan languages|athapascanes, langues +aus|||Australian languages|australiennes, langues +ava||av|Avaric|avar +ave||ae|Avestan|avestique +awa|||Awadhi|awadhi +aym||ay|Aymara|aymara +aze||az|Azerbaijani|azéri +bad|||Banda languages|banda, langues +bai|||Bamileke languages|bamilékés, langues +bak||ba|Bashkir|bachkir +bal|||Baluchi|baloutchi +bam||bm|Bambara|bambara +ban|||Balinese|balinais +baq|eus|eu|Basque|basque +bas|||Basa|basa +bat|||Baltic (Other)|baltiques, autres langues +bej|||Beja|bedja +bel||be|Belarusian|biélorusse +bem|||Bemba|bemba +ben||bn|Bengali|bengali +ber|||Berber (Other)|berbères, autres langues +bho|||Bhojpuri|bhojpuri +bih||bh|Bihari|bihari +bik|||Bikol|bikol +bin|||Bini; Edo|bini; edo +bis||bi|Bislama|bichlamar +bla|||Siksika|blackfoot +bnt|||Bantu (Other)|bantoues, autres langues +bos||bs|Bosnian|bosniaque +bra|||Braj|braj +bre||br|Breton|breton +btk|||Batak languages|batak, langues +bua|||Buriat|bouriate +bug|||Buginese|bugi +bul||bg|Bulgarian|bulgare +bur|mya|my|Burmese|birman +byn|||Blin; Bilin|blin; bilen +cad|||Caddo|caddo +cai|||Central American Indian (Other)|indiennes d'Amérique centrale, autres langues +car|||Galibi Carib|karib; galibi; carib +cat||ca|Catalan; Valencian|catalan; valencien +cau|||Caucasian (Other)|caucasiennes, autres langues +ceb|||Cebuano|cebuano +cel|||Celtic (Other)|celtiques, autres langues +cha||ch|Chamorro|chamorro +chb|||Chibcha|chibcha +che||ce|Chechen|tchétchène +chg|||Chagatai|djaghataï +chi|zho|zh|Chinese|chinois +chk|||Chuukese|chuuk +chm|||Mari|mari +chn|||Chinook jargon|chinook, jargon +cho|||Choctaw|choctaw +chp|||Chipewyan|chipewyan +chr|||Cherokee|cherokee +chu||cu|Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic|slavon d'église; vieux slave; slavon liturgique; vieux bulgare +chv||cv|Chuvash|tchouvache +chy|||Cheyenne|cheyenne +cmc|||Chamic languages|chames, langues +cop|||Coptic|copte +cor||kw|Cornish|cornique +cos||co|Corsican|corse +cpe|||Creoles and pidgins, English based (Other)|créoles et pidgins anglais, autres +cpf|||Creoles and pidgins, French-based (Other)|créoles et pidgins français, autres +cpp|||Creoles and pidgins, Portuguese-based (Other)|créoles et pidgins portugais, autres +cre||cr|Cree|cree +crh|||Crimean Tatar; Crimean Turkish|tatar de Crimé +crp|||Creoles and pidgins (Other)|créoles et pidgins divers +csb|||Kashubian|kachoube +cus|||Cushitic (Other)|couchitiques, autres langues +cze|ces|cs|Czech|tchèque +dak|||Dakota|dakota +dan||da|Danish|danois +dar|||Dargwa|dargwa +day|||Land Dayak languages|dayak, langues +del|||Delaware|delaware +den|||Slave (Athapascan)|esclave (athapascan) +dgr|||Dogrib|dogrib +din|||Dinka|dinka +div||dv|Divehi; Dhivehi; Maldivian|maldivien +doi|||Dogri|dogri +dra|||Dravidian (Other)|dravidiennes, autres langues +dsb|||Lower Sorbian|bas-sorabe +dua|||Duala|douala +dum|||Dutch, Middle (ca.1050-1350)|néerlandais moyen (ca. 1050-1350) +dut|nld|nl|Dutch; Flemish|néerlandais; flamand +dyu|||Dyula|dioula +dzo||dz|Dzongkha|dzongkha +efi|||Efik|efik +egy|||Egyptian (Ancient)|égyptien +eka|||Ekajuk|ekajuk +elx|||Elamite|élamite +eng||en|English|anglais +enm|||English, Middle (1100-1500)|anglais moyen (1100-1500) +epo||eo|Esperanto|espéranto +est||et|Estonian|estonien +ewe||ee|Ewe|éwé +ewo|||Ewondo|éwondo +fan|||Fang|fang +fao||fo|Faroese|féroïen +fat|||Fanti|fanti +fij||fj|Fijian|fidjien +fil|||Filipino; Pilipino|filipino; pilipino +fin||fi|Finnish|finnois +fiu|||Finno-Ugrian (Other)|finno-ougriennes, autres langues +fon|||Fon|fon +fre|fra|fr|French|français +frm|||French, Middle (ca.1400-1600)|français moyen (1400-1600) +fro|||French, Old (842-ca.1400)|français ancien (842-ca.1400) +frr|||Northern Frisian|frison septentrional +frs|||Eastern Frisian|frison oriental +fry||fy|Western Frisian|frison occidental +ful||ff|Fulah|peul +fur|||Friulian|frioulan +gaa|||Ga|ga +gay|||Gayo|gayo +gba|||Gbaya|gbaya +gem|||Germanic (Other)|germaniques, autres langues +geo|kat|ka|Georgian|géorgien +ger|deu|de|German|allemand +gez|||Geez|guèze +gil|||Gilbertese|kiribati +gla||gd|Gaelic; Scottish Gaelic|gaélique; gaélique écossais +gle||ga|Irish|irlandais +glg||gl|Galician|galicien +glv||gv|Manx|manx; mannois +gmh|||German, Middle High (ca.1050-1500)|allemand, moyen haut (ca. 1050-1500) +goh|||German, Old High (ca.750-1050)|allemand, vieux haut (ca. 750-1050) +gon|||Gondi|gond +gor|||Gorontalo|gorontalo +got|||Gothic|gothique +grb|||Grebo|grebo +grc|||Greek, Ancient (to 1453)|grec ancien (jusqu'à 1453) +gre|ell|el|Greek, Modern (1453-)|grec moderne (après 1453) +grn||gn|Guarani|guarani +gsw|||Swiss German; Alemannic|alémanique +guj||gu|Gujarati|goudjrati +gwi|||Gwich'in|gwich'in +hai|||Haida|haida +hat||ht|Haitian; Haitian Creole|haïtien; créole haïtien +hau||ha|Hausa|haoussa +haw|||Hawaiian|hawaïen +heb||he|Hebrew|hébreu +her||hz|Herero|herero +hil|||Hiligaynon|hiligaynon +him|||Himachali|himachali +hin||hi|Hindi|hindi +hit|||Hittite|hittite +hmn|||Hmong|hmong +hmo||ho|Hiri Motu|hiri motu +hsb|||Upper Sorbian|haut-sorabe +hun||hu|Hungarian|hongrois +hup|||Hupa|hupa +iba|||Iban|iban +ibo||ig|Igbo|igbo +ice|isl|is|Icelandic|islandais +ido||io|Ido|ido +iii||ii|Sichuan Yi|yi de Sichuan +ijo|||Ijo languages|ijo, langues +iku||iu|Inuktitut|inuktitut +ile||ie|Interlingue|interlingue +ilo|||Iloko|ilocano +ina||ia|Interlingua (International Auxiliary Language Association)|interlingua (langue auxiliaire internationale) +inc|||Indic (Other)|indo-aryennes, autres langues +ind||id|Indonesian|indonésien +ine|||Indo-European (Other)|indo-européennes, autres langues +inh|||Ingush|ingouche +ipk||ik|Inupiaq|inupiaq +ira|||Iranian (Other)|iraniennes, autres langues +iro|||Iroquoian languages|iroquoises, langues (famille) +ita||it|Italian|italien +jav||jv|Javanese|javanais +jbo|||Lojban|lojban +jpn||ja|Japanese|japonais +jpr|||Judeo-Persian|judéo-persan +jrb|||Judeo-Arabic|judéo-arabe +kaa|||Kara-Kalpak|karakalpak +kab|||Kabyle|kabyle +kac|||Kachin; Jingpho|kachin; jingpho +kal||kl|Kalaallisut; Greenlandic|groenlandais +kam|||Kamba|kamba +kan||kn|Kannada|kannada +kar|||Karen languages|karen, langues +kas||ks|Kashmiri|kashmiri +kau||kr|Kanuri|kanouri +kaw|||Kawi|kawi +kaz||kk|Kazakh|kazakh +kbd|||Kabardian|kabardien +kha|||Khasi|khasi +khi|||Khoisan (Other)|khoisan, autres langues +khm||km|Central Khmer|khmer central +kho|||Khotanese|khotanais +kik||ki|Kikuyu; Gikuyu|kikuyu +kin||rw|Kinyarwanda|rwanda +kir||ky|Kirghiz; Kyrgyz|kirghiz +kmb|||Kimbundu|kimbundu +kok|||Konkani|konkani +kom||kv|Komi|kom +kon||kg|Kongo|kongo +kor||ko|Korean|coréen +kos|||Kosraean|kosrae +kpe|||Kpelle|kpellé +krc|||Karachay-Balkar|karatchai balkar +krl|||Karelian|carélien +kro|||Kru languages|krou, langues +kru|||Kurukh|kurukh +kua||kj|Kuanyama; Kwanyama|kuanyama; kwanyama +kum|||Kumyk|koumyk +kur||ku|Kurdish|kurde +kut|||Kutenai|kutenai +lad|||Ladino|judéo-espagnol +lah|||Lahnda|lahnda +lam|||Lamba|lamba +lao||lo|Lao|lao +lat||la|Latin|latin +lav||lv|Latvian|letton +lez|||Lezghian|lezghien +lim||li|Limburgan; Limburger; Limburgish|limbourgeois +lin||ln|Lingala|lingala +lit||lt|Lithuanian|lituanien +lol|||Mongo|mongo +loz|||Lozi|lozi +ltz||lb|Luxembourgish; Letzeburgesch|luxembourgeois +lua|||Luba-Lulua|luba-lulua +lub||lu|Luba-Katanga|luba-katanga +lug||lg|Ganda|ganda +lui|||Luiseno|luiseno +lun|||Lunda|lunda +luo|||Luo (Kenya and Tanzania)|luo (Kenya et Tanzanie) +lus|||Lushai|lushai +mac|mkd|mk|Macedonian|macédonien +mad|||Madurese|madourais +mag|||Magahi|magahi +mah||mh|Marshallese|marshall +mai|||Maithili|maithili +mak|||Makasar|makassar +mal||ml|Malayalam|malayalam +man|||Mandingo|mandingue +mao|mri|mi|Maori|maori +map|||Austronesian (Other)|malayo-polynésiennes, autres langues +mar||mr|Marathi|marathe +mas|||Masai|massaï +may|msa|ms|Malay|malais +mdf|||Moksha|moksa +mdr|||Mandar|mandar +men|||Mende|mendé +mga|||Irish, Middle (900-1200)|irlandais moyen (900-1200) +mic|||Mi'kmaq; Micmac|mi'kmaq; micmac +min|||Minangkabau|minangkabau +mis|||Miscellaneous languages|diverses, langues +mkh|||Mon-Khmer (Other)|môn-khmer, autres langues +mlg||mg|Malagasy|malgache +mlt||mt|Maltese|maltais +mnc|||Manchu|mandchou +mni|||Manipuri|manipuri +mno|||Manobo languages|manobo, langues +moh|||Mohawk|mohawk +mol||mo|Moldavian|moldave +mon||mn|Mongolian|mongol +mos|||Mossi|moré +mul|||Multiple languages|multilingue +mun|||Munda languages|mounda, langues +mus|||Creek|muskogee +mwl|||Mirandese|mirandais +mwr|||Marwari|marvari +myn|||Mayan languages|maya, langues +myv|||Erzya|erza +nah|||Nahuatl languages|nahuatl, langues +nai|||North American Indian|indiennes d'Amérique du Nord, autres langues +nap|||Neapolitan|napolitain +nau||na|Nauru|nauruan +nav||nv|Navajo; Navaho|navaho +nbl||nr|Ndebele, South; South Ndebele|ndébélé du Sud +nde||nd|Ndebele, North; North Ndebele|ndébélé du Nord +ndo||ng|Ndonga|ndonga +nds|||Low German; Low Saxon; German, Low; Saxon, Low|bas allemand; bas saxon; allemand, bas; saxon, bas +nep||ne|Nepali|népalais +new|||Nepal Bhasa; Newari|nepal bhasa; newari +nia|||Nias|nias +nic|||Niger-Kordofanian (Other)|nigéro-congolaises, autres langues +niu|||Niuean|niué +nno||nn|Norwegian Nynorsk; Nynorsk, Norwegian|norvégien nynorsk; nynorsk, norvégien +nob||nb|Bokmål, Norwegian; Norwegian Bokmål|norvégien bokmål +nog|||Nogai|nogaï; nogay +non|||Norse, Old|norrois, vieux +nor||no|Norwegian|norvégien +nso|||Pedi; Sepedi; Northern Sotho|pedi; sepedi; sotho du Nord +nub|||Nubian languages|nubiennes, langues +nwc|||Classical Newari; Old Newari; Classical Nepal Bhasa|newari classique +nya||ny|Chichewa; Chewa; Nyanja|chichewa; chewa; nyanja +nym|||Nyamwezi|nyamwezi +nyn|||Nyankole|nyankolé +nyo|||Nyoro|nyoro +nzi|||Nzima|nzema +oci||oc|Occitan (post 1500); Provençal|occitan (après 1500); provençal +oji||oj|Ojibwa|ojibwa +ori||or|Oriya|oriya +orm||om|Oromo|galla +osa|||Osage|osage +oss||os|Ossetian; Ossetic|ossète +ota|||Turkish, Ottoman (1500-1928)|turc ottoman (1500-1928) +oto|||Otomian languages|otomangue, langues +paa|||Papuan (Other)|papoues, autres langues +pag|||Pangasinan|pangasinan +pal|||Pahlavi|pahlavi +pam|||Pampanga|pampangan +pan||pa|Panjabi; Punjabi|pendjabi +pap|||Papiamento|papiamento +pau|||Palauan|palau +peo|||Persian, Old (ca.600-400 B.C.)|perse, vieux (ca. 600-400 av. J.-C.) +per|fas|fa|Persian|persan +phi|||Philippine (Other)|philippines, autres langues +phn|||Phoenician|phénicien +pli||pi|Pali|pali +pol||pl|Polish|polonais +pon|||Pohnpeian|pohnpei +por||pt|Portuguese|portugais +pra|||Prakrit languages|prâkrit +pro|||Provençal, Old (to 1500)|provençal ancien (jusqu'à 1500) +pus||ps|Pushto|pachto +qaa-qtz|||Reserved for local use|réservée à l'usage local +que||qu|Quechua|quechua +raj|||Rajasthani|rajasthani +rap|||Rapanui|rapanui +rar|||Rarotongan; Cook Islands Maori|rarotonga; maori des îles Cook +roa|||Romance (Other)|romanes, autres langues +roh||rm|Romansh|romanche +rom|||Romany|tsigane +rum|ron|ro|Romanian|roumain +run||rn|Rundi|rundi +rup|||Aromanian; Arumanian; Macedo-Romanian|aroumain; macédo-roumain +rus||ru|Russian|russe +sad|||Sandawe|sandawe +sag||sg|Sango|sango +sah|||Yakut|iakoute +sai|||South American Indian (Other)|indiennes d'Amérique du Sud, autres langues +sal|||Salishan languages|salish, langues +sam|||Samaritan Aramaic|samaritain +san||sa|Sanskrit|sanskrit +sas|||Sasak|sasak +sat|||Santali|santal +scc|srp|sr|Serbian|serbe +scn|||Sicilian|sicilien +sco|||Scots|écossais +scr|hrv|hr|Croatian|croate +sel|||Selkup|selkoupe +sem|||Semitic (Other)|sémitiques, autres langues +sga|||Irish, Old (to 900)|irlandais ancien (jusqu'à 900) +sgn|||Sign Languages|langues des signes +shn|||Shan|chan +sid|||Sidamo|sidamo +sin||si|Sinhala; Sinhalese|singhalais +sio|||Siouan languages|sioux, langues +sit|||Sino-Tibetan (Other)|sino-tibétaines, autres langues +sla|||Slavic (Other)|slaves, autres langues +slo|slk|sk|Slovak|slovaque +slv||sl|Slovenian|slovène +sma|||Southern Sami|sami du Sud +sme||se|Northern Sami|sami du Nord +smi|||Sami languages (Other)|sami, autres langues +smj|||Lule Sami|sami de Lule +smn|||Inari Sami|sami d'Inari +smo||sm|Samoan|samoan +sms|||Skolt Sami|sami skolt +sna||sn|Shona|shona +snd||sd|Sindhi|sindhi +snk|||Soninke|soninké +sog|||Sogdian|sogdien +som||so|Somali|somali +son|||Songhai languages|songhai, langues +sot||st|Sotho, Southern|sotho du Sud +spa||es|Spanish; Castilian|espagnol; castillan +srd||sc|Sardinian|sarde +srn|||Sranan Tongo|sranan tongo +srr|||Serer|sérère +ssa|||Nilo-Saharan (Other)|nilo-sahariennes, autres langues +ssw||ss|Swati|swati +suk|||Sukuma|sukuma +sun||su|Sundanese|soundanais +sus|||Susu|soussou +sux|||Sumerian|sumérien +swa||sw|Swahili|swahili +swe||sv|Swedish|suédois +syr|||Syriac|syriaque +tah||ty|Tahitian|tahitien +tai|||Tai (Other)|thaïes, autres langues +tam||ta|Tamil|tamoul +tat||tt|Tatar|tatar +tel||te|Telugu|télougou +tem|||Timne|temne +ter|||Tereno|tereno +tet|||Tetum|tetum +tgk||tg|Tajik|tadjik +tgl||tl|Tagalog|tagalog +tha||th|Thai|thaï +tib|bod|bo|Tibetan|tibétain +tig|||Tigre|tigré +tir||ti|Tigrinya|tigrigna +tiv|||Tiv|tiv +tkl|||Tokelau|tokelau +tlh|||Klingon; tlhIngan-Hol|klingon +tli|||Tlingit|tlingit +tmh|||Tamashek|tamacheq +tog|||Tonga (Nyasa)|tonga (Nyasa) +ton||to|Tonga (Tonga Islands)|tongan (Îles Tonga) +tpi|||Tok Pisin|tok pisin +tsi|||Tsimshian|tsimshian +tsn||tn|Tswana|tswana +tso||ts|Tsonga|tsonga +tuk||tk|Turkmen|turkmène +tum|||Tumbuka|tumbuka +tup|||Tupi languages|tupi, langues +tur||tr|Turkish|turc +tut|||Altaic (Other)|altaïques, autres langues +tvl|||Tuvalu|tuvalu +twi||tw|Twi|twi +tyv|||Tuvinian|touva +udm|||Udmurt|oudmourte +uga|||Ugaritic|ougaritique +uig||ug|Uighur; Uyghur|ouïgour +ukr||uk|Ukrainian|ukrainien +umb|||Umbundu|umbundu +und|||Undetermined|indéterminée +urd||ur|Urdu|ourdou +uzb||uz|Uzbek|ouszbek +vai|||Vai|vaï +ven||ve|Venda|venda +vie||vi|Vietnamese|vietnamien +vol||vo|Volapük|volapük +vot|||Votic|vote +wak|||Wakashan languages|wakashennes, langues +wal|||Walamo|walamo +war|||Waray|waray +was|||Washo|washo +wel|cym|cy|Welsh|gallois +wen|||Sorbian languages|sorabes, langues +wln||wa|Walloon|wallon +wol||wo|Wolof|wolof +xal|||Kalmyk; Oirat|kalmouk; oïrat +xho||xh|Xhosa|xhosa +yao|||Yao|yao +yap|||Yapese|yapois +yid||yi|Yiddish|yiddish +yor||yo|Yoruba|yoruba +ypk|||Yupik languages|yupik, langues +zap|||Zapotec|zapotèque +zen|||Zenaga|zenaga +zha||za|Zhuang; Chuang|zhuang; chuang +znd|||Zande languages|zandé, langues +zul||zu|Zulu|zoulou +zun|||Zuni|zuni +zxx|||No linguistic content|pas de contenu linguistique +nqo||NULL|N'Ko|n'ko +zza|NULL||Zaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazaki|zaza; dimili; dimli; kirdki; kirmanjki; zazaki diff --git a/LANGDATA/native.txt b/LANGDATA/native.txt new file mode 100644 index 0000000..77bf209 --- /dev/null +++ b/LANGDATA/native.txt @@ -0,0 +1,366 @@ +## Copyright (C) 2007 -*- coding:utf-8; -*- +## National Institute of Advanced Industrial Science and Technology (AIST) +## Registration Number H15PRO112 + +## This file is part of the m17n database; a sub-part of the m17n +## library. + +## The m17n library is free software; you can redistribute it and/or +## modify it under the terms of the GNU Lesser General Public License +## as published by the Free Software Foundation; either version 2.1 of +## the License, or (at your option) any later version. + +## The m17n library is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## Lesser General Public License for more details. + +## You should have received a copy of the GNU Lesser General Public +## License along with the m17n library; if not, write to the Free +## Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +## Boston, MA 02110-1301, USA. +## +## Each line of this file has this format: +## ISO639-2 | NATIVE-NAME [ | EXTRA-CHARS ] +## ISO639-2: 3-letter language code of ISO639-2. +;; NATIVE-NAME: Native name of the language extracted from: +;; EXTRA-CHARS: Extra characters that uniquifies the language. +;; +;; NATIVE-NAME is mainly got from these sites: +;; http://www.unicode.org/Public/cldr/cldr1.2.zip +;; http://www.geonames.de/ +;; http://www.omniglot.com/language/names.htm +;; http://www.mediaglyphs.org/mg/p/langnames.html +;; Other sources of NATIVE-NAME are: +;; herero, kara-kalpak, kanuri, kuanyama, luba-katanga, ganda, ndonga, +;; Southern/Nothern/LulæInari/Skolt Sami, Sundanese, Twi +;; from http://www.rtt.org/ISO/TC37/SC2/WG1/639/ISO639-identifiers.html +;; +;; cebuano, ewondo, panpangan, rarotongan, zapotec, kikuyu, chuukese +;; chinook, hiligaynon, iloko, khasi, kosraean, kru, lamba, mossi, +;; neapolitan, +;; from http://www.christusrex.org/www1/pater/ +;; +;; lojban from http://www.lojban.org/ +;; interlingue from http://www.interlingue.org/ +;; interlingua from http://www.interlingua.com/ +;; afrihili from http://www.langmaker.com +;; fula from http://people.w3.org/rishida/names/languages.html +;; klingon from http://www.geocities.com/hippietrail/langtable.html +;; kachin from http://kachinnews.com +;; mirandese from http://mirandes.no.sapo.pt/ +;; masai from http://darkwing.uoregon.edu/~dlpayne/maasai/maling.htm +;; nias from +;; http://www.rosettaproject.org:8080/emeldbase/NIP/reply_html?comid=131 +;; osage from http://www.osage-ncoa.org/pages/language.shtml +;; yao from http://www.sim.org/PG.asp?pgID=67&fun=1 + +aar|Qafar +abk|Абхазо +ady|адыгэбзэ +afh|El-Afrihili +afr|Afrikaans +aka|akana +akk|akkadû +alb|shqipe +sqi|shqipe +alt|алтай тил +amh|አማርኛ +ara|العربية +arc|בְּאַרָמִית +arg|aragonés +arm|Հայերէն +hye|Հայերէն +arn|Araucano +asm|অসমীয়া +ast|Asturianu +ava|магIарул мацI +aym|Aymará +aze|азәрбајҹанҹа +bak|башҡортса +bal|بلچي +bam|Bamanankan +ban|basa Bali +baq|euskara +eus|euskara +bel|Беларускі +bem|chiBemba +ben|বাংলা +bho|भोजपुरी +bis|Bislama +bos|Bošnjački +bre|ar brezhoneg +bua|буряад хэлэн +bul|Български +bur|ဴမန္မာစာ +mya|ဴမန္မာစာ +byn|ብሊን +cat|català +ceb|Bisayocebuano +cha|Chamoru +che|Нохчийн +chi|中文|ㄅㄆㄇ +zho|中文|ㄅㄆㄇ +chk|Chuk +chm|марий +chn|Chinook Wawa +chp|ᑌᓀᓲᒢᕄᓀ +chr|ᏣᎳᎩ +chu|ѩзыкъ словѣньскъ +chv|чӑваш чӗлхи +chy|Tsétsêhéstâhese +cop|μετν̀ρεμν̀χημι +cor|kernewek +cos|Corsu +cre|ᓀᐦᐃᔭᐍᐏᐣ +crh|Къырым татар +csb|kaszëbsczi jãzëk +cze|Čeština +ces|Čeština +dan|Dansk +dar|дарган мез +del|Lënape +din|Thuɔŋjäŋ +div|ދިވެހިބަސް +doi|डोगरी +dsb|dolnoserbski +dut|Nederlands +nld|Nederlands +dzo|རྫོང་ཁ +eng|English +epo|esperanto +est|Eesti keel +ewe|Ɛʋɛ +ewo|Ewondo +fao|Føroyska +fij|vosa Vakaviti +fil|Filipino +fin|suomi +fon|fongbe +fre|français +fra|français +fry|Frysk +ful|Fulfulde +fur|Furlan +geo|ქართული +kat|ქართული +ger|Deutsch +deu|Deutsch +gez|ግዕዝኛ +gil|Ikiribati +gla|Gàidhlig +gle|Gaeilge +glg|galego +glv|Gaelg +got|𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰 +gre|Ελληνικά +ell|Ελληνικά +grn|ava ñe'ê +guj|ગુજરાતી +hai|Xaad Kil +hat|kreyòl ayisyen +hau|هَوُسَ +haw|ʻōlelo Hawaiʻi +heb|עברית +her|otshiherero +hil|Ilongo +hin|हिंदी +hmn|Kuv tsis has lug Moob tau +hmo|Hiri Motu +hsb|hornjoserbsce +hun|magyar +ibo|Igbo +ice|Íslenska +isl|Íslenska +iku|ᐃᓄᒃᑎᑐᑦ +ile|Interlingue +ilo|Ilocano +ina|interlingua +ind|Bahasa Indonesia +inh|ГІалгІай +ipk|Ieupiatun +ita|italiano +jav|basa Jawa +jbo|lojban +jpn|日本語|あアー、。 +kaa|ҝараҝалпаҝ +kac|Kachin +kal|kalaallisut +kan|ಕನ್ನಡ +kas|कश्मीरी +kau|kanuri +kaz|Қазақ +kbd|къэбэрдеибзэ +kha|Khasi +khm|ភាសាខ្មែរ +kik|Gikũyũ +kin|kinyaRwanda +kir|Кыргыз +kok|कोंकणी +kom|коми кыв +kon|kiKongo +kor|한국어 +kos|Kosrae +krc|тилкъарачай-малкъар къарачай-малкъар +kro|Kru +kua|oshikwanyama +kum|къумукъ тил +kur|كوردي +lad|ladino +lam|ChiLamba +lao|ລາວ +lat|Latine +lav|latviešu valoda +lez|лезги чІал +lin|lingara +lit|Lietuvių kalba +loz|siLozi +ltz|Lëtzebuergesch +lub|tshiluba +lug|luganda +lun|chiLunda +luo|Dholuo +mac|македонски +mkd|македонски +mah|Ebon +mai|मैथिली +mal|മലയാളം +mao|te reo Māori +mri|te reo Māori +mar|मराठी +mas|Maa +may|Bahasa Melayu +msa|Bahasa Melayu +mic|Míkmaw +mlg|Malagasy +mlt|Malti +mni|बिष्नुप्रीय मणिपुरी +moh|Kanien'keha +mol|молдовеняскэлимба лимба +mon|Монгол хэл +mos|Mòoré +mwl|Mirandesa +myv|эрзянь кель +nah|nahuatlahtolli +nap|Napolitano +nau|Ekakairũ Naoero +nav|Diné +nbl|isiNdebele +nde|isiNdebele +ndo|oshindonga +nds|Plattdüütsch +nep|नेपाली +nia|Li Niha +niu|faka-Niue +nno|norsk nynorsk +nob|norsk bokmål +nog|ногай тили +non|norskr +nor|norsk +nso|sePedi +nya|Chicewa +oci|Occitan +oji|ᐊᓂᔑᓇᐯ +ori|ଓଡ଼ିଆ +orm|Oromoo +osa|Wazhazhe +oss|ирон ӕвзаг +pam|Panpango +pan|ਪੰਜਾਬੀ +pap|Papiamentu +pau|tekoi ra Belau +per|فارسی +fas|فارسی +pli|पािळ +pol|polski +por|português +pro|prouvençau +pus|پښتو +que|Runa Simi +rar|Rarotongan +roh|Rumantsch +rom|Romani šib +rum|Limba Română +ron|Limba Română +run|kiRubdi +rus|Русский +sag|sängö +sah|саха тыла +san|संस्कृत +sat|संथाली +scc|Српски +srp|Српски +scn|Siculo +sco|Lallans +scr|hrvatski +hrv|hrvatski +sel|селькуп +sid|Sidaamu Afo +sin|සිංහල +slo|slovenčina +slk|slovenčina +slv|slovenščina +sma|åarjelsaemiengïele +sme|davvisámegiella +smj|julevusámegiella +smn|aanaar kielâ +smo|le gagana Samoa +sms|sää'mǩiõll +sna|chiShona +snd|سنڌي +som|Soomaaliga +sot|seSotho +spa|español +srd|Susardu +ssw|siSwati +sun|bahasa Sunda +swa|Kiswahili +swe|svenska +syr|ܣܘܪܝܝܐ +tah|te reo Tahiti +tam|தமிழ் +tat|Татар +tel|తెలుగు +tet|tetun +tgk|Таҷикй +tgl|Tagalog +tha|ไทย +tib|བོད་སྐད་ +bod|བོད་སྐད་ +tig|ትግረ +tir|ትግርኛ +tlh|  +tmh|تَمَاشَقْ +tog|chiTonga +ton|faka-Tonga +tpi|Tok Pisin +tsn|seTswana +tso|xiTsonga +tuk|türkmençe +tur|Türkçe +tvl|'gana Tuvalu +twi|twi +tyv|тыва дыл +udm|удмурт кыл +uig|ﺋۇيغۇر +ukr|Українська +urd|اردو +uzb|Ўзбек +ven|tshiVenda +vie|Tiếng Việt +vol|Volapük +vot|vad'd'a tšeeli +wal|ወላይታቱ +wel|Cymraeg +cym|Cymraeg +wln|Walon +wol|Wolof +xal|хальмг келн +xho|isiXhosa +yao|chiYao +yid|ייִדיש +yor|Yorùbá +zap|Zapoteco +zha|Saw cuengh +znd|paZande +zul|isiZulu diff --git a/LANGNAME.awk b/LANGNAME.awk new file mode 100644 index 0000000..530ef1a --- /dev/null +++ b/LANGNAME.awk @@ -0,0 +1,130 @@ +# LANGNAME.awk -- awk script to generate LANGNAME.en and LANGNAME.fr +# Copyright (C) 2007 +# National Institute of Advanced Industrial Science and Technology (AIST) +# Registration Number H15PRO112 + +# This file is part of the m17n database; a sub-part of the m17n +# library. + +# The m17n library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public License +# as published by the Free Software Foundation; either version 2.1 of +# the License, or (at your option) any later version. + +# The m17n library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the m17n library; if not, write to the Free +# Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. + +BEGIN { + if (LANG == "en") { + Language = "English"; + LANGUAGE = "ENGLISH"; + } else { + Language = "French"; + LANGUAGE = "FRENCH"; + } + printf ";; LANGNAME.%s -- %s language names -*- mode:lisp; coding:utf-8; -*-\n", LANG, Language; + print ";; Copyright (C) 2007"; + print ";; National Institute of Advanced Industrial Science and Technology (AIST)"; + print ";; Registration Number H15PRO112"; + print ""; + print ";; This file is part of the m17n database; a sub-part of the m17n"; + print ";; library."; + print ""; + print ";; The m17n library is free software; you can redistribute it and/or"; + print ";; modify it under the terms of the GNU Lesser General Public License"; + print ";; as published by the Free Software Foundation; either version 2.1 of"; + print ";; the License, or (at your option) any later version."; + print ""; + print ";; The m17n library is distributed in the hope that it will be useful,"; + print ";; but WITHOUT ANY WARRANTY; without even the implied warranty of"; + print ";; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU"; + print ";; Lesser General Public License for more details."; + print ""; + print ";; You should have received a copy of the GNU Lesser General Public"; + print ";; License along with the m17n library; if not, write to the Free"; + print ";; Software Foundation, Inc., 51 Franklin Street, Fifth Floor,"; + print ";; Boston, MA 02110-1301, USA."; + print ";;"; + print ";; Each line of this file has this form:"; + printf ";; (ISO639-2 \"%s-NAME\" ...)\n", LANGUAGE; + print ";; ISO639-2: 3-letter language code of ISO639-2."; + printf ";; %s-NAME: %s name of the language\n", LANGUAGE, Language; + print ";; in ISO639-2 with the following modifications:"; + if (LANG == "en") { + print ";; 'Greek, Modern (1453-)' is changed to 'Greek'"; + print ";; Tailing ' (Other)' is deleted."; + print ";; Tailing ' languages' is deleted."; + print ";; Tailing part specifying the date (e.g. ' (ca.450-1100)') is deleted."; + print ";; Reorder, for instance, 'English, Old' to 'Old English'."; + } else { + print ";; 'grec moderne (après 1453)' is changed to 'grec'"; + print ";; Tailing ', autres langues' is deleted."; + print ";; Tailing ', autres' is deleted."; + print ";; Tailing ', langues' is deleted."; + print ";; Tailing ', langues (famille)' is deleted."; + print ";; Tailing part specifying the date (e.g. ' (ca.450-1100)') is deleted."; + print ";; Reorder, for instance, 'saxon, bas' to 'bas saxon'."; + } + print ";; Delete duplicated names after the above modifications."; + print ""; +} + +{ + iso = $1; + for (idx in names) delete names[idx]; + if (LANG == "en") { + split($4, array, "; "); + for (idx in array) { + name = array[idx] + if (name == "Greek, Modern (1453-)") + name = "Greek"; + else { + gsub(" \(Other\)$", "", name); + gsub(" languages$", "", name); + if (name ~ /\(.*[0-9].*\)$/) + gsub(" \([^)]*\)$", "", name); + if (name ~ ", ") { + split(name,array2,", "); + name = array2[2] " " array2[1]; + } + } + names[name] = 1; + } + } else { + split($5, array, "; "); + for (idx in array) { + name = array[idx] + if (name == "grec ancien (jusqu'à 1453)") + name = "grec"; + else { + gsub(", autres langues$", "", name); + gsub(", autres$", "", name); + gsub(", langues$", "", name); + gsub(", langues (famille)$", "", name); + if (name ~ /\(.*[0-9].*\)$/) + gsub(" \([^)]*\)$", "", name); + if (name ~ ", ") { + split(name,array2,", "); + name = array2[2] " " array2[1]; + } + } + names[name] = 1; + } + } + printf "(%s", $1; + for (name in names) { + printf " \"%s\"", name; + } + printf ")\n"; +} + +# Local Variables: +# coding: utf-8 +# End: diff --git a/LANGUAGE.awk b/LANGUAGE.awk new file mode 100644 index 0000000..07c9302 --- /dev/null +++ b/LANGUAGE.awk @@ -0,0 +1,98 @@ +# LANGUAGE.awk -- awk script to generate LANGUAGE.tbl +# Copyright (C) 2007 +# National Institute of Advanced Industrial Science and Technology (AIST) +# Registration Number H15PRO112 + +# This file is part of the m17n database; a sub-part of the m17n +# library. + +# The m17n library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public License +# as published by the Free Software Foundation; either version 2.1 of +# the License, or (at your option) any later version. + +# The m17n library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the m17n library; if not, write to the Free +# Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. + +BEGIN { + print ";; LANGUAGE.tbl -- ISO639 Language Code -*- mode:lisp; coding:utf-8; -*-"; + print ";; Copyright (C) 2007"; + print ";; National Institute of Advanced Industrial Science and Technology (AIST)"; + print ";; Registration Number H15PRO112"; + print ""; + print ";; This file is part of the m17n database; a sub-part of the m17n"; + print ";; library."; + print ""; + print ";; The m17n library is free software; you can redistribute it and/or"; + print ";; modify it under the terms of the GNU Lesser General Public License"; + print ";; as published by the Free Software Foundation; either version 2.1 of"; + print ";; the License, or (at your option) any later version."; + print ""; + print ";; The m17n library is distributed in the hope that it will be useful,"; + print ";; but WITHOUT ANY WARRANTY; without even the implied warranty of"; + print ";; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU"; + print ";; Lesser General Public License for more details."; + print ""; + print ";; You should have received a copy of the GNU Lesser General Public"; + print ";; License along with the m17n library; if not, write to the Free"; + print ";; Software Foundation, Inc., 51 Franklin Street, Fifth Floor,"; + print ";; Boston, MA 02110-1301, USA."; + print ";;"; + print ";; The file format is this:"; + print ";; (ISO639-2 ISO639-1 \"ENGLISH-NAME\" [ \"NATIVE-NAME\" [ \"EXTRA-CHARS\" ]]) ..."; + print ";; ISO639-2: 3-letter language code of ISO639-2."; + print ";; ISO639-1: 2-letter language code of ISO639-1, or nil if it doesn't exist."; + print ";; ENGLISH-NAME: English name of the language. The first name listed"; + print ";; in ISO639-2 with the following modifications:"; + print ";; 'Greek, Modern (1453-)' is changed to 'Greek'"; + print ";; Tailing ' (Other)' is deleted."; + print ";; Tailing ' languages' is deleted."; + print ";; Tailing part specifying the date (e.g. ' (ca.450-1100)') is deleted."; + print ";; Reorder, for instance, 'English, Old' to 'Old English'."; + while (getline < "LANGDATA/native.txt") { + if ($0 ~ /^[a-z]/) { + NATIVE[$1] = $2; + if ($3 != "") + CHARS[$1] = $3; + } else if ($0 ~ /^;;/) { + print; + } + } +} + +/^[a-z][a-z][a-z]\|/ { + native = NATIVE[$1]; + chars = CHARS[$1]; + two_letter = $3; + if (two_letter == "" || two_letter == "NULL") + two_letter = "nil"; + name = $4; + if (name == "Greek, Modern (1453-)") + name = "Greek"; + else { + gsub("; .*", "", name); + gsub(" \(Other\)$", "", name); + gsub(" languages$", "", name); + if (name ~ /\(.*[0-9].*\)$/) + gsub(" \([^)]*\)$", "", name); + if (name ~ ", ") { + split(name,array,", "); + name = array[2] " " array[1]; + } + } + printf "(%s %-3s \"%s\"", $1, two_letter, name; + if (native != "") + printf " \"%s\"", native; + else if (chars != "") + printf " nil"; + if (chars != "") + printf " \"%s\"", chars; + printf ")\n"; +} -- 1.7.10.4