1 # SCRIPT.awk -- awk script to generate SCRIPT.tab
3 # National Institute of Advanced Industrial Science and Technology (AIST)
4 # Registration Number H15PRO112
6 # This file is part of the m17n database; a sub-part of the m17n
9 # The m17n library is free software; you can redistribute it and/or
10 # modify it under the terms of the GNU Lesser General Public License
11 # as published by the Free Software Foundation; either version 2.1 of
12 # the License, or (at your option) any later version.
14 # The m17n library is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 # Lesser General Public License for more details.
19 # You should have received a copy of the GNU Lesser General Public
20 # License along with the m17n library; if not, write to the Free
21 # Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
22 # Boston, MA 02110-1301, USA.
48 FS = "[ \t]*[;#][ \t]*";
54 function decode_hex(str, idx) {
57 for (i = idx; i <= len; i++) {
58 c = tohex[substr(str, i, 1)];
66 function initialize() {
68 while (getline line < "UNIDATA/UnicodeData.txt" > 0) {
69 if (line ~ /^[0-9A-F][0-9A-F]*/) {
70 last = decode_hex(line, 1);
72 range[range_index++] = first;
73 range[range_index++] = last;
75 } else if (line ~ /First>/) {
84 function char_exist_p(c) {
87 for (i = 0; i < range_index; i += 2)
88 if (range[i] >= c && range[i + 1] <= c)
93 function maybe_emit(ch1, ch2, this_script) {
94 if (initialized == 0) {
97 print "# Ranges may contain non-existing character codes.";
98 print "0x0-0x3FFFFF common";
101 script = this_script;
103 if (script == this_script) {
104 for (j = last + 1; j < ch1; j++)
112 if (script != "Common") {
114 printf "0x%04X %s\n", first, tolower(script);
116 printf "0x%04X-0x%04X %s\n", first, last, tolower(script);
120 script = this_script;
125 maybe_emit(decode_hex($0, 1), decode_hex($0, match($0, "\\.\\.") + 2), $2);
130 ch = decode_hex($0, 1);
131 maybe_emit(ch, ch, $2);
136 if (script != "Common") {
138 printf "0x%04X %s\n", first, tolower (script);
140 printf "0x%04X-0x%04X %s\n", first, last, tolower (script);
142 while (getline < "SCRIPT.ext" > 0) {
143 if ($0 ~ /^[0-9A-F][0-9A-F]*/) {