From: ntakahas Date: Mon, 5 Sep 2005 12:53:59 +0000 (+0000) Subject: New file. X-Git-Tag: REL-1-3-0~131 X-Git-Url: http://git.chise.org/gitweb/?a=commitdiff_plain;h=676fe1d34b0ae408c5d16b4fb618b3bd0b943bd8;p=m17n%2Fm17n-db.git New file. --- diff --git a/CASE-MAPPING.awk b/CASE-MAPPING.awk new file mode 100644 index 0000000..fdb2dcf --- /dev/null +++ b/CASE-MAPPING.awk @@ -0,0 +1,61 @@ +# CASE-MAPPING.awk -- awk script to generate CASE-MAPPING.tab +# Copyright (C) 2005 +# National Institute of Advanced Industrial Science and Technology (AIST) +# Registration Number H15PRO112 + +# This file is part of the m17n database; a sub-part of the m17n +# library. + +# The m17n library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public License +# as published by the Free Software Foundation; either version 2.1 of +# the License, or (at your option) any later version. + +# The m17n library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the m17n library; if not, write to the Free +# Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +# 02111-1307, USA. + +BEGIN { + FS = ";"; + FILE = 1; +} + +function mtext (str) { + if (n = split (str, A, " ")) + { + printf ("\""); + for (i = 1; i <= n; i++) + printf ("\\u%s", A[i]); + printf ("\""); + } + else + printf ("\"\\u%s\"", str); + printf (" "); +} + +FILE == 1 && /^[^#]/ && NF == 5 { + printf ("0x%s ( ", $1); + mtext ($2); + mtext ($3); + mtext ($4); + printf (")\n"); + X[$1] = 1; + } + +/^0000;/ { + FILE = 2; +} + +FILE == 2 && /^[^#]/ && ($13 || $14 || $15) && ! X[$1] { + if (! $13) $13 = $1; + if (! $14) $14 = $1; + if (! $15) $15 = $1; + printf ("0x%s ( \"\\u%s\" \"\\u%s\" \"\\u%s\" )\n", + $1, $14, $15, $13); +} diff --git a/CASED.awk b/CASED.awk new file mode 100644 index 0000000..e0ecb78 --- /dev/null +++ b/CASED.awk @@ -0,0 +1,104 @@ +# CASED.awk -- awk script to generate CASED.tab +# Copyright (C) 2005 +# National Institute of Advanced Industrial Science and Technology (AIST) +# Registration Number H15PRO112 + +# This file is part of the m17n database; a sub-part of the m17n +# library. + +# The m17n library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public License +# as published by the Free Software Foundation; either version 2.1 of +# the License, or (at your option) any later version. + +# The m17n library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the m17n library; if not, write to the Free +# Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +# 02111-1307, USA. + +BEGIN { + tohex["0"] = 1; + tohex["1"] = 2; + tohex["2"] = 3; + tohex["3"] = 4; + tohex["4"] = 5; + tohex["5"] = 6; + tohex["6"] = 7; + tohex["7"] = 8; + tohex["8"] = 9; + tohex["9"] = 10; + tohex["A"] = 11; + tohex["B"] = 12; + tohex["C"] = 13; + tohex["D"] = 14; + tohex["E"] = 15; + tohex["F"] = 16; + tohex["a"] = 11; + tohex["b"] = 12; + tohex["c"] = 13; + tohex["d"] = 14; + tohex["e"] = 15; + tohex["f"] = 16; + + FILE = 1; + FS = ";"; +} + +function decode_hex(str, idx) { + n = 0; + len = length(str); + for (i = idx; i <= len; i++) + { + c = tohex[substr (str, i, 1)]; + if (c == 0) + break; + n = n * 16 + c - 1; + } + return n; +} + +function single (str, bit) { +i = decode_hex (str, 1); +if (cased[i] != bit) + cased[i] += bit; + } + +function range (str, bit) { +end = decode_hex (str, index (str, "..") + 2); +for (i = decode_hex (str, 1); i <= end; i++) + if (cased[i] != bit) + cased[i] += bit; + } + +FILE == 1 && /^[^#]/ { + if ($3 ~ /L[ltu]/) single ($1, 1); + else if ($3 ~ /Mn|Me|Cf|Lm|Sk/) range ($1, 2); + } + +/^# PropList-.+\.txt/ { +FILE = 2; +} + +FILE == 2 && /^[^#;]+; *Other_(Upp|Low)ercase/ { +if (index ($1, ".")) range ($1, 1); +else single ($1, 1); +} + +/^# WordBreakProperty-.+\.txt/ { +FILE = 3; +} + +FILE == 3 && /^[^#;]+; *MidLetter/ { +if (index ($1, ".")) range ($1, 2); +else single ($1, 2); +} + +END { +for (i in cased) + printf ("0x%X %d\n", i, cased[i]); + }