1 # gb18030-4.awk -- awk script to make a charset map for 4-byte part of GB18030
3 # National Institute of Advanced Industrial Science and Technology (AIST)
4 # Registration Number H15PRO112
6 # This file is part of the m17n database, a sub-part of the m17n library.
8 # The m17n library is free software; you can redistribute it and/or
9 # modify it under the terms of the GNU General Public License as
10 # published by the Free Software Foundation; either version 2, or (at
11 # your option) any later version.
13 # The m17n library is distributed in the hope that it will be
14 # useful, but WITHOUT ANY WARRANTY; without even the implied warranty
15 # of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with the m17n database; see the file COPYING. If not, write
20 # to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 # Boston, MA 02111-1307, USA.
38 function decode_hex(str) {
41 for (i = 1; i <= len; i++)
43 c = substr (str, i, 1);
44 if (c >= "0" && c <= "9")
45 n = n * 16 + (c - "0");
47 n = n * 16 + tohex[c];
52 function gb_to_index(gb) {
55 idx = (((b0 - 129)) * 191 + b1 - 64);
61 function index_to_gb(idx) {
64 b2 = (idx % 126) + 129;
67 b0 = int(idx / 10) + 129;
68 return sprintf("%02X%02X%02X%02X", b0, b1, b2, b3);
77 gb_from = gb_to_index(decode_hex(substr($1, 3, 4)));
78 gb_to = gb_to_index(decode_hex(substr($1, 10, 4)));
79 unicode = decode_hex(substr($2, 3, 4));
80 while (gb_from <= gb_to)
89 gb = decode_hex(substr($1, 3, 4));
90 unicode = decode_hex(substr($2, 3, 4));
99 for (i = 128; i <= 65536; i++)
103 if (i < 55296 || i >= 57344)
113 else if (from_gb >= 0)
115 if (from_gb + 1 == to_gb)
116 printf "0x%s\t\t0x%04X\n",
117 index_to_gb(from_gb), from_i;
119 printf "0x%s-0x%s\t0x%04X\n",
120 index_to_gb(from_gb), index_to_gb(to_gb - 1), from_i;