From 57553d71dbd449ab5c9297b5e153f8917046f1d1 Mon Sep 17 00:00:00 2001 From: akr Date: Fri, 7 Apr 2000 12:53:06 +0000 Subject: [PATCH 1/1] non-jisx0208 --- ChangeLog | 15 ++++++++++ Makefile | 76 +++++++++++++++++++++++++++++++++++++++++++++++++ README | 32 +++++++++++++++++++++ add | 14 +++++++++ decode_euc | 9 ++++++ decode_sjis | 12 ++++++++ extract | 28 ++++++++++++++++++ non-jisx0201 | 12 ++++++++ non-jisx0208-template | 11 +++++++ non-jisx0208.el | 22 ++++++++++++++ remove | 18 ++++++++++++ 11 files changed, 249 insertions(+) create mode 100644 ChangeLog create mode 100644 Makefile create mode 100644 README create mode 100755 add create mode 100755 decode_euc create mode 100755 decode_sjis create mode 100755 extract create mode 100644 non-jisx0201 create mode 100644 non-jisx0208-template create mode 100644 non-jisx0208.el create mode 100755 remove diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..1fd1f0b --- /dev/null +++ b/ChangeLog @@ -0,0 +1,15 @@ +2000-04-07 Tanaka Akira + + * Set up `standard-translation-table-for-decode'. + +1999-02-13 Tanaka Akira + + * decode_sjis, decode_euc: Use `/usr/local/bin/perl'. + +1999-02-13 Tanaka Akira + + * Makefile: New variable `STRICT_TO_1983'. + +1999-02-13 Tanaka Akira + + * ChangeLog: New file. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a1d309f --- /dev/null +++ b/Makefile @@ -0,0 +1,76 @@ +EMACS=emacs +UNICODE_MAPPINGS=ftp://ftp.unicode.org/Public/MAPPINGS + +MAINTAINER=$(shell test -f non-jisx0208.el || echo yes) + +# make UNICODE_MAPPINGS=ftp://ftp.jaist.ac.jp/pub/misc/character/ftp.unicode.org/Public/MAPPINGS MAINTAINER=yes + +all: non-jisx0208.elc + +clean: + -rm -f _* + -rm -f non-jisx0208 + -rm -f non-jisx0208.elc + -rm -f JIS0208.TXT JIS0212.TXT KSC5601.TXT GB2312.TXT CP932.TXT JAPANESE.TXT + +maintainer-clean: clean + -rm -f non-jisx0208.el + +non-jisx0208.elc: non-jisx0208.el + $(EMACS) -batch -f batch-byte-compile non-jisx0208.el + +ifeq "$(MAINTAINER)" "yes" +non-jisx0208.el: _non-jisx0208-prefix non-jisx0201 non-jisx0208 _non-jisx0208-suffix + cat _non-jisx0208-prefix non-jisx0201 non-jisx0208 _non-jisx0208-suffix > non-jisx0208.el +endif + +_non-jisx0208-prefix: non-jisx0208-template + awk '/TABLE/ {exit} {print}' non-jisx0208-template > _non-jisx0208-prefix + +_non-jisx0208-suffix: non-jisx0208-template + awk '{if(f) print} /TABLE/ {f=1}' non-jisx0208-template > _non-jisx0208-suffix + +non-jisx0208: _WIN+MAC JIS0212.TXT _KSC5601.TXT GB2312.TXT + ./extract japanese-jisx0208 _WIN+MAC japanese-jisx0212 JIS0212.TXT korean-ksc5601 _KSC5601.TXT chinese-gb2312 GB2312.TXT > non-jisx0208 + +_WIN+MAC: _WIN-JISX0208 _MAC-JISX0208 + ./add _WIN-JISX0208 _MAC-JISX0208 > _WIN+MAC + +_WIN-JISX0208: _JIS0208.TXT _CP932.TXT + ./remove _JIS0208.TXT _CP932.TXT > _WIN-JISX0208 + +_MAC-JISX0208: _JIS0208.TXT _JAPANESE.TXT + ./remove _JIS0208.TXT _JAPANESE.TXT > _MAC-JISX0208 + +_CP932.TXT: CP932.TXT + ./decode_sjis CP932.TXT | cut -f 1,3,4- > _CP932.TXT + +_JAPANESE.TXT: JAPANESE.TXT + ./decode_sjis JAPANESE.TXT | cut -f 1,3,4- > _JAPANESE.TXT + +#STRICT_TO_1983=cat +STRICT_TO_1983=egrep -v '^0x742[56]' +_JIS0208.TXT: JIS0208.TXT + ./decode_sjis JIS0208.TXT | $(STRICT_TO_1983) | cut -f 1,5- > _JIS0208.TXT + +_KSC5601.TXT: KSC5601.TXT + ./decode_euc KSC5601.TXT | cut -f 1,3,4- > _KSC5601.TXT + +JIS0208.TXT: + wget $(UNICODE_MAPPINGS)/EASTASIA/JIS/JIS0208.TXT + +JIS0212.TXT: + wget $(UNICODE_MAPPINGS)/EASTASIA/JIS/JIS0212.TXT + +KSC5601.TXT: + wget $(UNICODE_MAPPINGS)/EASTASIA/KSC/KSC5601.TXT + +GB2312.TXT: + wget $(UNICODE_MAPPINGS)/EASTASIA/GB/GB2312.TXT + +CP932.TXT: + wget $(UNICODE_MAPPINGS)/VENDORS/MICSFT/WINDOWS/CP932.TXT + +JAPANESE.TXT: + wget $(UNICODE_MAPPINGS)/VENDORS/APPLE/JAPANESE.TXT + diff --git a/README b/README new file mode 100644 index 0000000..1421c60 --- /dev/null +++ b/README @@ -0,0 +1,32 @@ +Requirement: + + GNU Emacs 20.3 or later. + GNU make + +Instllation: + + % cd /usr/local/share/emacs/site-lisp + % tar xfz /non-jisx0208.tar.gz + % cd non-jisx0208 + % make + +Setup your environment: + + Add following code to ~/.emacs + +(require 'non-jisx0208) +(setq standard-translation-table-for-decode + (get 'non-jisx0208-translation-table 'translation-table)) + +Test: + + Evaluate following expression in *scratch*. + + (decode-coding-string "\e$B-!\e(B" 'iso-2022-jp) + + If instllation and setup is successed, you'll see + circled digit one. + +Author: + Tanaka Akira + diff --git a/add b/add new file mode 100755 index 0000000..78ddc0f --- /dev/null +++ b/add @@ -0,0 +1,14 @@ +#!/usr/local/bin/perl + +while(@ARGV) { + $name = shift; + + open(A, $name) || die "cannot open $name: $!\n"; + while() { + next unless /^0x([0-9A-Fa-f]{4})\s/; + next if $set{$1}; + $set{$1} = 1; + print; + } + close(A); +} diff --git a/decode_euc b/decode_euc new file mode 100755 index 0000000..40d956c --- /dev/null +++ b/decode_euc @@ -0,0 +1,9 @@ +#!/usr/local/bin/perl + +while(<>) { + next unless /^0x([0-9A-F]{4})\s/; + $data = pack('H*', $1); + next unless $data =~ /[\xa1-\xfe][\xa1-\xfe]/; + $data =~ tr/\x80-\xff/\x00-\x7f/; + printf "0x%s\t$_", unpack('H*', $data); +} diff --git a/decode_sjis b/decode_sjis new file mode 100755 index 0000000..3f60a88 --- /dev/null +++ b/decode_sjis @@ -0,0 +1,12 @@ +#!/usr/local/bin/perl + +require 'jcode.pl'; + +while(<>) { + next unless /^0x([0-9A-F]{4})\s/; + $data = pack('H*', $1); + next unless $data =~ /[\x81-\x9f\xe0-\xef][\x40-\x7e\x80-\xfc]/; + &jcode::convert(\$data, 'euc', 'sjis'); + $data =~ tr/\x80-\xff/\x00-\x7f/; + printf "0x%s\t$_", unpack('H*', $data); +} diff --git a/extract b/extract new file mode 100755 index 0000000..0b76a96 --- /dev/null +++ b/extract @@ -0,0 +1,28 @@ +#!/usr/local/bin/perl + +$src_charset = shift; +$src_file = shift; + +open(F, $src_file) || die "cannot open $src_file: $!\n"; +while() { + next unless /^0x([0-9a-fA-F]{2})([0-9a-fA-F]{2})\s+0x([0-9a-fA-F]{4})\s/; + $set{$3} = [] unless defined $set{$3}; + push @{$set{$3}}, ",(make-char '$src_charset ?\\x$1 ?\\x$2)"; +} +close(F); + +while(@ARGV) { + $dst_charset = shift; + $dst_file = shift; + open(F, $dst_file) || die "cannot open $dst_file: $!\n"; + while() { + next unless /^0x([0-9a-fA-F]{2})([0-9a-fA-F]{2})\s+0x([0-9a-fA-F]{4})\s/; + next unless defined $set{$3}; + $dst = ",(make-char '$dst_charset ?\\x$1 ?\\x$2)"; + foreach $src (@{$set{$3}}) { + print "($src . $dst); $3 $'"; + } + delete $set{$3}; + } + close(F); +} diff --git a/non-jisx0201 b/non-jisx0201 new file mode 100644 index 0000000..1758f85 --- /dev/null +++ b/non-jisx0201 @@ -0,0 +1,12 @@ +,@(let (i l) + (setq i 33) + (while (<= i 91) + (setq l (cons (cons (make-char 'latin-jisx0201 i) + (make-char 'ascii i)) l) + i (1+ i))) + (setq i 93) + (while (<= i 125) + (setq l (cons (cons (make-char 'latin-jisx0201 i) + (make-char 'ascii i)) l) + i (1+ i))) + l) diff --git a/non-jisx0208-template b/non-jisx0208-template new file mode 100644 index 0000000..5f5c39c --- /dev/null +++ b/non-jisx0208-template @@ -0,0 +1,11 @@ +(define-translation-table + 'non-jisx0208-translation-table + (eval-when-compile + `( + TABLE + ))) + +(setq standard-translation-table-for-decode + (get 'non-jisx0208-translation-table 'translation-table)) + +(provide 'non-jisx0208) diff --git a/non-jisx0208.el b/non-jisx0208.el new file mode 100644 index 0000000..3c35030 --- /dev/null +++ b/non-jisx0208.el @@ -0,0 +1,22 @@ +(define-translation-table + 'non-jisx0208-translation-table + (eval-when-compile + `( +,@(let (i l) + (setq i 33) + (while (<= i 91) + (setq l (cons (cons (make-char 'latin-jisx0201 i) + (make-char 'ascii i)) l) + i (1+ i))) + (setq i 93) + (while (<= i 125) + (setq l (cons (cons (make-char 'latin-jisx0201 i) + (make-char 'ascii i)) l) + i (1+ i))) + l) + ))) + +(setq standard-translation-table-for-decode + (get 'non-jisx0208-translation-table 'translation-table)) + +(provide 'non-jisx0208) diff --git a/remove b/remove new file mode 100755 index 0000000..1959b6c --- /dev/null +++ b/remove @@ -0,0 +1,18 @@ +#!/usr/local/bin/perl + +open(A, $ARGV[0]) || die "cannot open $ARGV[0]: $!\n"; +while() { + next unless /^0x([0-9A-Fa-f]{4})\s/; + $set{$1} = 1; +} +close(A); + +open(B, $ARGV[1]) || die "cannot open $ARGV[1]: $!\n"; +while() { + next unless /^0x([0-9A-Fa-f]{4})\s+0x[0-9A-Fa-f]{4}\s/; + next if $set{$1}; + print; +} +close(B); + + -- 1.7.10.4