non-jisx0208 start
authorakr <akr>
Fri, 7 Apr 2000 12:53:06 +0000 (12:53 +0000)
committerakr <akr>
Fri, 7 Apr 2000 12:53:06 +0000 (12:53 +0000)
ChangeLog [new file with mode: 0644]
Makefile [new file with mode: 0644]
README [new file with mode: 0644]
add [new file with mode: 0755]
decode_euc [new file with mode: 0755]
decode_sjis [new file with mode: 0755]
extract [new file with mode: 0755]
non-jisx0201 [new file with mode: 0644]
non-jisx0208-template [new file with mode: 0644]
non-jisx0208.el [new file with mode: 0644]
remove [new file with mode: 0755]

diff --git a/ChangeLog b/ChangeLog
new file mode 100644 (file)
index 0000000..1fd1f0b
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,15 @@
+2000-04-07  Tanaka Akira  <akr@m17n.org>
+
+       * Set up `standard-translation-table-for-decode'.
+
+1999-02-13  Tanaka Akira  <akr@jaist.ac.jp>
+
+       * decode_sjis, decode_euc: Use `/usr/local/bin/perl'.
+
+1999-02-13  Tanaka Akira  <akr@jaist.ac.jp>
+
+       * Makefile: New variable `STRICT_TO_1983'.
+
+1999-02-13  Tanaka Akira  <akr@jaist.ac.jp>
+
+       * ChangeLog: New file.
diff --git a/Makefile b/Makefile
new file mode 100644 (file)
index 0000000..a1d309f
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,76 @@
+EMACS=emacs
+UNICODE_MAPPINGS=ftp://ftp.unicode.org/Public/MAPPINGS
+
+MAINTAINER=$(shell test -f non-jisx0208.el || echo yes)
+
+# make UNICODE_MAPPINGS=ftp://ftp.jaist.ac.jp/pub/misc/character/ftp.unicode.org/Public/MAPPINGS MAINTAINER=yes
+
+all: non-jisx0208.elc
+
+clean:
+       -rm -f _*
+       -rm -f non-jisx0208
+       -rm -f non-jisx0208.elc
+       -rm -f JIS0208.TXT JIS0212.TXT KSC5601.TXT GB2312.TXT CP932.TXT JAPANESE.TXT
+
+maintainer-clean: clean
+       -rm -f non-jisx0208.el
+
+non-jisx0208.elc: non-jisx0208.el
+       $(EMACS) -batch -f batch-byte-compile non-jisx0208.el
+
+ifeq "$(MAINTAINER)" "yes"
+non-jisx0208.el: _non-jisx0208-prefix non-jisx0201 non-jisx0208 _non-jisx0208-suffix
+       cat _non-jisx0208-prefix non-jisx0201 non-jisx0208 _non-jisx0208-suffix > non-jisx0208.el
+endif
+
+_non-jisx0208-prefix: non-jisx0208-template
+       awk '/TABLE/ {exit} {print}' non-jisx0208-template > _non-jisx0208-prefix
+
+_non-jisx0208-suffix: non-jisx0208-template
+       awk '{if(f) print} /TABLE/ {f=1}' non-jisx0208-template > _non-jisx0208-suffix
+
+non-jisx0208: _WIN+MAC JIS0212.TXT _KSC5601.TXT GB2312.TXT
+       ./extract japanese-jisx0208 _WIN+MAC japanese-jisx0212 JIS0212.TXT korean-ksc5601 _KSC5601.TXT chinese-gb2312 GB2312.TXT > non-jisx0208
+
+_WIN+MAC: _WIN-JISX0208 _MAC-JISX0208
+       ./add _WIN-JISX0208 _MAC-JISX0208 > _WIN+MAC
+
+_WIN-JISX0208: _JIS0208.TXT _CP932.TXT
+       ./remove _JIS0208.TXT _CP932.TXT > _WIN-JISX0208
+
+_MAC-JISX0208: _JIS0208.TXT _JAPANESE.TXT
+       ./remove _JIS0208.TXT _JAPANESE.TXT > _MAC-JISX0208
+
+_CP932.TXT: CP932.TXT
+       ./decode_sjis CP932.TXT | cut -f 1,3,4- > _CP932.TXT
+
+_JAPANESE.TXT: JAPANESE.TXT
+       ./decode_sjis JAPANESE.TXT | cut -f 1,3,4- > _JAPANESE.TXT
+
+#STRICT_TO_1983=cat
+STRICT_TO_1983=egrep -v '^0x742[56]'
+_JIS0208.TXT: JIS0208.TXT
+       ./decode_sjis JIS0208.TXT | $(STRICT_TO_1983) | cut -f 1,5- > _JIS0208.TXT
+
+_KSC5601.TXT: KSC5601.TXT
+       ./decode_euc KSC5601.TXT | cut -f 1,3,4- > _KSC5601.TXT
+
+JIS0208.TXT:
+       wget $(UNICODE_MAPPINGS)/EASTASIA/JIS/JIS0208.TXT
+
+JIS0212.TXT:
+       wget $(UNICODE_MAPPINGS)/EASTASIA/JIS/JIS0212.TXT 
+
+KSC5601.TXT:
+       wget $(UNICODE_MAPPINGS)/EASTASIA/KSC/KSC5601.TXT 
+
+GB2312.TXT:
+       wget $(UNICODE_MAPPINGS)/EASTASIA/GB/GB2312.TXT 
+
+CP932.TXT:
+       wget $(UNICODE_MAPPINGS)/VENDORS/MICSFT/WINDOWS/CP932.TXT
+
+JAPANESE.TXT:
+       wget $(UNICODE_MAPPINGS)/VENDORS/APPLE/JAPANESE.TXT
+
diff --git a/README b/README
new file mode 100644 (file)
index 0000000..1421c60
--- /dev/null
+++ b/README
@@ -0,0 +1,32 @@
+Requirement:
+
+  GNU Emacs 20.3 or later.
+  GNU make
+
+Instllation:
+
+  % cd /usr/local/share/emacs/site-lisp
+  % tar xfz <somewhere>/non-jisx0208.tar.gz
+  % cd non-jisx0208
+  % make
+
+Setup your environment:
+
+  Add following code to ~/.emacs
+
+(require 'non-jisx0208)
+(setq standard-translation-table-for-decode
+      (get 'non-jisx0208-translation-table 'translation-table))
+
+Test:
+
+  Evaluate following expression in *scratch*.
+
+  (decode-coding-string "\e$B-!\e(B" 'iso-2022-jp)
+
+  If instllation and setup is successed, you'll see
+  circled digit one.
+
+Author:
+  Tanaka Akira <akr@jaist.ac.jp>
+
diff --git a/add b/add
new file mode 100755 (executable)
index 0000000..78ddc0f
--- /dev/null
+++ b/add
@@ -0,0 +1,14 @@
+#!/usr/local/bin/perl
+
+while(@ARGV) {
+  $name = shift;
+
+  open(A, $name) || die "cannot open $name: $!\n";
+  while(<A>) {
+    next unless /^0x([0-9A-Fa-f]{4})\s/;
+    next if $set{$1};
+    $set{$1} = 1;
+    print;
+  }
+  close(A);
+}
diff --git a/decode_euc b/decode_euc
new file mode 100755 (executable)
index 0000000..40d956c
--- /dev/null
@@ -0,0 +1,9 @@
+#!/usr/local/bin/perl
+
+while(<>) {
+  next unless /^0x([0-9A-F]{4})\s/;
+  $data = pack('H*', $1);
+  next unless $data =~ /[\xa1-\xfe][\xa1-\xfe]/;
+  $data =~ tr/\x80-\xff/\x00-\x7f/;
+  printf "0x%s\t$_", unpack('H*', $data);
+}
diff --git a/decode_sjis b/decode_sjis
new file mode 100755 (executable)
index 0000000..3f60a88
--- /dev/null
@@ -0,0 +1,12 @@
+#!/usr/local/bin/perl
+
+require 'jcode.pl';
+
+while(<>) {
+  next unless /^0x([0-9A-F]{4})\s/;
+  $data = pack('H*', $1);
+  next unless $data =~ /[\x81-\x9f\xe0-\xef][\x40-\x7e\x80-\xfc]/;
+  &jcode::convert(\$data, 'euc', 'sjis');
+  $data =~ tr/\x80-\xff/\x00-\x7f/;
+  printf "0x%s\t$_", unpack('H*', $data);
+}
diff --git a/extract b/extract
new file mode 100755 (executable)
index 0000000..0b76a96
--- /dev/null
+++ b/extract
@@ -0,0 +1,28 @@
+#!/usr/local/bin/perl
+
+$src_charset = shift;
+$src_file = shift;
+
+open(F, $src_file) || die "cannot open $src_file: $!\n";
+while(<F>) {
+  next unless /^0x([0-9a-fA-F]{2})([0-9a-fA-F]{2})\s+0x([0-9a-fA-F]{4})\s/;
+  $set{$3} = [] unless defined $set{$3};
+  push @{$set{$3}}, ",(make-char '$src_charset ?\\x$1 ?\\x$2)";
+}
+close(F);
+
+while(@ARGV) {
+  $dst_charset = shift;
+  $dst_file = shift;
+  open(F, $dst_file) || die "cannot open $dst_file: $!\n";
+  while(<F>) {
+    next unless /^0x([0-9a-fA-F]{2})([0-9a-fA-F]{2})\s+0x([0-9a-fA-F]{4})\s/;
+    next unless defined $set{$3};
+    $dst = ",(make-char '$dst_charset ?\\x$1 ?\\x$2)";
+    foreach $src (@{$set{$3}}) {
+      print "($src . $dst); $3 $'";
+    }
+    delete  $set{$3};
+  }
+  close(F);
+}
diff --git a/non-jisx0201 b/non-jisx0201
new file mode 100644 (file)
index 0000000..1758f85
--- /dev/null
@@ -0,0 +1,12 @@
+,@(let (i l)
+    (setq i 33)
+    (while (<= i 91)
+      (setq l (cons (cons (make-char 'latin-jisx0201 i)
+                         (make-char 'ascii i)) l)
+           i (1+ i)))
+    (setq i 93)
+    (while (<= i 125)
+      (setq l (cons (cons (make-char 'latin-jisx0201 i)
+                         (make-char 'ascii i)) l)
+           i (1+ i)))
+    l)
diff --git a/non-jisx0208-template b/non-jisx0208-template
new file mode 100644 (file)
index 0000000..5f5c39c
--- /dev/null
@@ -0,0 +1,11 @@
+(define-translation-table
+  'non-jisx0208-translation-table
+  (eval-when-compile
+    `(
+      TABLE
+      )))
+
+(setq standard-translation-table-for-decode
+      (get 'non-jisx0208-translation-table 'translation-table))
+
+(provide 'non-jisx0208)
diff --git a/non-jisx0208.el b/non-jisx0208.el
new file mode 100644 (file)
index 0000000..3c35030
--- /dev/null
@@ -0,0 +1,22 @@
+(define-translation-table
+  'non-jisx0208-translation-table
+  (eval-when-compile
+    `(
+,@(let (i l)
+    (setq i 33)
+    (while (<= i 91)
+      (setq l (cons (cons (make-char 'latin-jisx0201 i)
+                         (make-char 'ascii i)) l)
+           i (1+ i)))
+    (setq i 93)
+    (while (<= i 125)
+      (setq l (cons (cons (make-char 'latin-jisx0201 i)
+                         (make-char 'ascii i)) l)
+           i (1+ i)))
+    l)
+      )))
+
+(setq standard-translation-table-for-decode
+      (get 'non-jisx0208-translation-table 'translation-table))
+
+(provide 'non-jisx0208)
diff --git a/remove b/remove
new file mode 100755 (executable)
index 0000000..1959b6c
--- /dev/null
+++ b/remove
@@ -0,0 +1,18 @@
+#!/usr/local/bin/perl
+
+open(A, $ARGV[0]) || die "cannot open $ARGV[0]: $!\n";
+while(<A>) {
+  next unless /^0x([0-9A-Fa-f]{4})\s/;
+  $set{$1} = 1;
+}
+close(A);
+
+open(B, $ARGV[1]) || die "cannot open $ARGV[1]: $!\n";
+while(<B>) {
+  next unless /^0x([0-9A-Fa-f]{4})\s+0x[0-9A-Fa-f]{4}\s/;
+  next if $set{$1};
+  print;
+}
+close(B);
+
+