# You should have received a copy of the GNU Lesser General Public
# License along with the m17n library; if not, write to the Free
-# Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-# 02111-1307, USA.
+# Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
BEGIN {
tohex["0"] = 1;
tohex["e"] = 15;
tohex["f"] = 16;
- FILE = 1;
- FS = ";";
+ FS = "[ \t]*[;#][ \t]*";
}
function decode_hex(str, idx) {
len = length(str);
for (i = idx; i <= len; i++)
{
- c = tohex[substr (str, i, 1)];
+ c = tohex[substr(str, i, 1)];
if (c == 0)
break;
n = n * 16 + c - 1;
return n;
}
-function single (str, bit) {
-i = decode_hex (str, 1);
-if (cased[i] != bit)
- cased[i] += bit;
- }
-
-function range (str, bit) {
-end = decode_hex (str, index (str, "..") + 2);
-for (i = decode_hex (str, 1); i <= end; i++)
- if (cased[i] != bit)
- cased[i] += bit;
- }
-
-FILE == 1 && /^[^#]/ {
- if ($3 ~ /L[ltu]/) single ($1, 1);
- else if ($3 ~ /Mn|Me|Cf|Lm|Sk/) range ($1, 2);
- }
-
-/^# PropList-.+\.txt/ {
-FILE = 2;
+function single(str, bit) {
+ i = decode_hex(str, 1);
+ if (cased[i] != bit)
+ cased[i] += bit;
}
-FILE == 2 && /^[^#;]+; *Other_(Upp|Low)ercase/ {
-if (index ($1, ".")) range ($1, 1);
-else single ($1, 1);
+function range(str, bit) {
+ end = decode_hex(str, index(str, "..") + 2);
+ for (i = decode_hex(str, 1); i <= end; i++)
+ if (cased[i] != bit)
+ cased[i] += bit;
}
-/^# WordBreakProperty-.+\.txt/ {
-FILE = 3;
-}
+/^[^\#]/ {
+ if (FILENAME == "UNIDATA/UnicodeData.txt") {
+ if ($3 ~ /L[ltu]/)
+ single($1, 1);
+ else if ($3 ~ /Mn|Me|Cf|Lm|Sk/)
+ single($1, 2);
+ }
-FILE == 3 && /^[^#;]+; *MidLetter/ {
-if (index ($1, ".")) range ($1, 2);
-else single ($1, 2);
+ else if (FILENAME == "UNIDATA/PropList.txt") {
+ if ($2 ~ /Other_(Upp|Low)ercase/) {
+ if (index($1, "."))
+ range($1, 1);
+ else
+ single($1, 1);
+ }
+ }
+
+ else { # FILE == "WordBreakProperty.txt"
+ if ($2 == "MidLetter") {
+ if (index($1, "."))
+ range($1, 2);
+ else
+ single($1, 2);
+ }
+ }
}
END {
-for (i in cased)
- printf ("0x%X %d\n", i, cased[i]);
- }
+ for (i in cased)
+ printf ("0x%X %d\n", i, cased[i]);
+}