1 # gb18030-2.awk -- awk script to make a charset map for 2-byte part of GB18030
3 # National Institute of Advanced Industrial Science and Technology (AIST)
4 # Registration Number H15PRO112
6 # This file is part of the m17n database, a sub-part of the m17n library.
8 # The m17n library is free software; you can redistribute it and/or
9 # modify it under the terms of the GNU General Public License as
10 # published by the Free Software Foundation; either version 2, or (at
11 # your option) any later version.
13 # The m17n library is distributed in the hope that it will be
14 # useful, but WITHOUT ANY WARRANTY; without even the implied warranty
15 # of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with the m17n database; see the file COPYING. If not, write
20 # to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 # Boston, MA 02111-1307, USA.
42 function decode_hex(str) {
45 for (i = 1; i <= len; i++)
47 c = substr (str, i, 1);
48 if (c >= "0" && c <= "9")
49 n = n * 16 + (c - "0");
51 n = n * 16 + tohex[c];
56 function gb_to_index(gb) {
59 idx = (((b0 - 129)) * 191 + b1 - 64);
65 function index_to_gb(idx) {
66 b0 = int(idx / 191) + 129;
67 b1 = (idx % 191) + 64;
70 return (b0 * 256 + b1);
79 gb = gb_to_index(decode_hex(substr($1, 3, 4)));
80 unicode = decode_hex(substr($2, 3, 4));
81 if ((gb == to_gb + 1) && (unicode == to_unicode + 1))
89 printf "0x%04X 0x%04X\n", index_to_gb(from_gb), from_unicode;
90 else if (from_gb < to_gb)
91 printf "0x%04X-0x%04X 0x%04X\n",
92 index_to_gb(from_gb), index_to_gb(to_gb), from_unicode;
94 from_unicode = to_unicode = unicode;
100 printf "0x%04X-0x%04X 0x%04X\n",
101 index_to_gb(from_gb), index_to_gb(to_gb), from_unicode;