From: eto Date: Tue, 25 Feb 2003 08:10:06 +0000 (+0000) Subject: change JoyoList class X-Git-Url: http://git.chise.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7de8e8557686e33c77def7482e2d61d74d182286;p=chise%2Fruby.git change JoyoList class --- diff --git a/src/chise.rb b/src/chise.rb index 2b2fade..9ca61fe 100755 --- a/src/chise.rb +++ b/src/chise.rb @@ -227,7 +227,6 @@ module CHISE class Character #=============================================================== 文字オブジェクト BASIC_KANJI = "人子女母父王口耳手足力目首毛心犬牛鳥貝角羽虫馬魚羊肉皮米竹木麦豆山川雨風水土石金田穴日月火音糸刀舟門戸衣矢弓車皿一二三四五六七八九十百千万寸尺上中下本玉立回食行止交向歩考入示走生出来書言大小白青多少高長" - COMPOSIT_KANJI = "鳴名加品古知問間聞取兄見切分粉貧林森校東明住位好岩砂里男畑習休短空坂島倉美孝赤看光初努協解新歌語話張強忘悲答晴現正字安守灰秋秒困国医包同合舌居右左受友反道返迷花菜集机主太氷州点店庫仕帳幼防引配早直班筆重番北化比死夏後進酒福私家世内谷半原前寺思電雲気布旅衆泣" def is_basic_kanji? BASIC_KANJI.include?(self.to_s) end @@ -238,6 +237,7 @@ module CHISE @check_all_database = false end attr_reader :char_id + def to_i() @char_id end def mcs_utf8() Character.u4itou8(@char_id) end def mcs_hex() sprintf("%x", @char_id) end @@ -347,7 +347,7 @@ module CHISE def ucs() #p 'ucs' #ar = %w{ucs ucs-big5 ucs-cdp ucs-cns ucs-jis ucs-ks =>ucs =>ucs* =>ucs-jis} #ar = %w{ucs ucs-jis ucs-big5 ucs-cdp ucs-cns ucs-ks =>ucs =>ucs* =>ucs-jis} - ar = %w{ucs ucs-jis =>ucs-jis} + ar = %w{ucs-jis ucs =>ucs-jis} #並び順は恣意的で、ucs-jisを先に出している。本来はこれも指定できるようにするべき。 ar.each {|a| #p [a] u = get_char_attribute(a) @@ -360,12 +360,52 @@ module CHISE def to_utf8() Uconv.u4tou8(Character.u4itou4(ucs())) end #UTF8文字列を返す #alias to_s to_utf8 alias to_s mcs_utf8 + def map_utf8() + u = ucs() + if u.nil? || 0xffff < u + return to_er() + else + return to_utf8() + end + end + def map_ucs_er() + u = ucs() + if u.nil? || 0xffff < u + return to_er() + else + return Character.get(u).to_er() + end + end + def to_euc() + u = ucs() + return "" if u.nil? || 0xffff < u + Uconv.u16toeuc(Uconv.u4tou16(Character.u4itou4(ucs()))) + end + def map_euc() + e = to_euc() + return e if e != "" + return to_er() + end + def to_sjis() + u = ucs() + return "" if u.nil? || 0xffff < u + Uconv.u16tosjis(Uconv.u4tou16(Character.u4itou4(ucs()))) + end + def map_sjis() + e = to_sjis() + return e if e != "" + return to_er() + end #---------------------------------------------------------------------- def to_er(codesys=nil) #実体参照を返す、希望するcodesysが引数(未実装) return "" if @char_id == nil - return sprintf("&U+%04X;", @char_id) if @char_id <= 0xffff - return sprintf("&U-%05X;", @char_id) if @char_id <= 0xfffff +# return sprintf("&U+%04X;", @char_id) if @char_id <= 0xffff + return sprintf("&#x%04x;", @char_id) if @char_id <= 0xffff +# return sprintf("&#%05d;", @char_id) if @char_id <= 0xffff +# return sprintf("&U-%05X;", @char_id) if @char_id <= 0xfffff + return sprintf("&#x%05x;", @char_id) if @char_id <= 0xfffff +# return sprintf("&#%06d;", @char_id) if @char_id <= 0xfffff EntityReference.each_codesys {|codesys, er_prefix, keta, numtype| code = self[codesys] next if code == nil @@ -407,13 +447,13 @@ module CHISE def inspect_ids(hex_flag=false) ids = decompose ar = [] - ar << (hex_flag ? mcs_hex : to_utf8) + ar << (hex_flag ? "x"+mcs_hex : to_utf8) if to_s != ids #idsが部品そのものだったら部品追加はしない ids.each_char {|ch| char = ch.char next if char.is_ids? if hex_flag then - ar << char.mcs_hex + ar << "x"+char.mcs_hex else u = char.to_utf8 if u != "" @@ -1291,6 +1331,8 @@ iso-10646-comment include Singleton #JP_JOYO_FILE = DB_DIR+"/../jp-joyo.txt" #EUC-jisx0213 JP_JOYO_FILE = DB_DIR+"/../joyo-ucs.txt" #UCS + COMPOSIT_KANJI = "鳴名加品古知問間聞取兄見切分粉貧林森校東明住位好岩砂里男畑習休短空坂島倉美孝赤看光初努協解新歌語話張強忘悲答晴現正字安守灰秋秒困国医包同合舌居右左受友反道返迷花菜集机主太氷州点店庫仕帳幼防引配早直班筆重番北化比死夏後進酒福私家世内谷半原前寺思電雲気布旅衆泣" +# COMPOSIT_KANJI = "鳴名加品古" def initialize @nchars = [] read_file @@ -1307,15 +1349,9 @@ iso-10646-comment end } end - def each_char - @nchars.each {|ch| - yield(ch) - } - end - def dump_all_ids(hex_flag=false) - each_char {|ch| + def dump_ids(ar) + ar.each {|ch| char = ch.char -# print char.inspect_ids(hex_flag), "\n" print char.inspect_ids(true), "\t;", char.inspect_ids(false), "\n" } end