From: eto Date: Wed, 5 Mar 2003 14:10:29 +0000 (+0000) Subject: - add method_missing to String class only if the char_length == 1. X-Git-Url: http://git.chise.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=545951c3f879a03a861ddb6ebaa643c0f1a05b19;p=chise%2Fruby.git - add method_missing to String class only if the char_length == 1. - map_ to_ from_ attribute alias added. --- diff --git a/src/chise.rb b/src/chise.rb index f1f991d..cb547a5 100755 --- a/src/chise.rb +++ b/src/chise.rb @@ -10,22 +10,36 @@ $debug = true #これはテスト用 #$stdout.binmode if $debug $stdout.sync = true if $debug -class String +class String #====================================================================== def to_a() self.split(//) end #$KCODEが設定されているので、UTF-8的な一文字づつがchにはいる def each_char() to_a.each {|ch| yield ch } end + def each_character() to_a.each {|ch| yield ch.char } end def char_length() to_a.length end def char_at(n) to_a()[n] end def char() Character.get(to_a[0]) end -# def method_missing(mid, *args) char.method_missing(mid, *args) end + #alias to_c char #悩み中 def char_id() char.char_id() end def get_char_attribute(a) char.get_char_attribute(a) end - def ucs() char.ucs() end + #def ucs() char.ucs() end def to_utf8() return to_a.map {|ch| ch.char.to_utf8 }.join('') end + def method_missing(mid, *args) + if char_length == 1 #省略形が有効なのは、一文字の時だけ + char.method_missing(mid, *args) + else + raise NameError, "undefined method `#{mid.id2name}'", caller(1) + end + end + + def map_utf8() map_char {|ch| ch.char.map_utf8 } end + alias map_ucs map_utf8 + def map_ucs_er() map_char {|ch| ch.char.map_ucs_er } end + def to_er() map_char {|ch| ch.char.to_er } end + #put関係、[]関係は用意しないことにした。 def de_er!() #EntityReferenceを取り除く return self unless self =~ Regexp.new(EntityReference::REGEXP_PART) #それらしいのが無ければ何もしない @@ -49,7 +63,7 @@ class String def decompose() map_char {|ch| ch.char.decompose } end def decompose!() self.replace(self.decompose); self; end - def decompose_all(level=nil) + def decompose_all_nu(level=nil) level = 0 if level.nil? if 10 < level p ['too many recursive', self] @@ -59,6 +73,7 @@ class String return de.decompose_all(level+1) if de != self #なにか変化があったから再帰 de #もうこれ以上変化は無さそうだぞと。 end + def decompose_all() map_char {|ch| ch.char.decompose_all } end def decompose_all!() self.replace(self.decompose_all); self; end def find() #"日雲"→"曇"とかいう感じの操作 @@ -114,7 +129,7 @@ class String end end -module CHISE +module CHISE #====================================================================== def windows?() (RUBY_PLATFORM =~ /cygwin/ || RUBY_PLATFORM =~ /mswin32/ || RUBY_PLATFORM =~ /mingw32/) end @@ -159,7 +174,7 @@ module CHISE er = $1 #ついでに中身の部分を取り出す return $1.hex if er =~ /^MCS-([0-9A-Fa-f]+)/ #MCS # if er =~ /^U[-+]?([0-9A-Fa-f]+)/ #Unicode直打ち - if er =~ /^U[-+]?([0-9A-Fa-f]+)/ || er =~ /^#([0-9A-Fa-f]+)/ #Unicode直打ち + if er =~ /^U[-+]?([0-9A-Fa-f]+)/ || er =~ /^#x([0-9A-Fa-f]+)/ #Unicode直打ち return $1.hex end @@ -245,17 +260,26 @@ module CHISE def self.get(char_id) CharacterFactory.instance.get(char_id) end #flyweightパターン #---------------------------------------------------------------------- - def get_char_attribute(a) # XEmacs UTF-2000互換API群 + def normalize_attribute_name(b) + a = b.dup a.gsub!(/_/, '-') #underlineは-に置換 + a.sub!(/^map-/, '=>') + a.sub!(/^to-/, '->') + a.sub!(/^from-/, '<-') + a + end + def get_char_attribute(b) # XEmacs UTF-2000互換API群 + a = normalize_attribute_name(b) + #p [a, b] atr = @attributes[a] return atr if atr != nil atr = check_database(a) @attributes[a] = atr if atr != nil - return get_char_attribute("=jis-x0208") if a =~ /jisx0208/ + return get_char_attribute("=jis-x0208") if a =~ /jisx0208/ #ここだけ特殊形 return @attributes[a] end - def put_char_attribute(a,v) - a.gsub!(/_/, '-') #underlineは-に置換 + def put_char_attribute(b,v) + a = normalize_attribute_name(b) @attributes[a] = v; CharDB.instance.put(a, mcs_utf8(), v) end @@ -294,11 +318,17 @@ module CHISE char_id = 0x80000000 + char_id if char_id < 0 #補数表現 return char_id.to_i elsif char_id.is_a?(String) - return char_id.to_i if char_id =~ /^\d+$/ #文字列による数字だったら数値化してreturn + return char_id.to_i if char_id =~ /^\d+$/ && 1 < char_id.length #文字列による数字だったら数値化してreturn return EntityReference.parse(char_id) if char_id =~ Regexp.new(EntityReference::REGEXP_ALL) #実体参照? char_id.sub!(/^\?/, '') if char_id =~ /^\?/ #もし先頭に?がついていたら削除 #このへん本当はもっとちゃんとチェックするべし - u4 = Uconv.u8tou4(char_id) #UCS-4文字列に変換 + begin + u4 = Uconv.u8tou4(char_id) #UCS-4文字列に変換 + rescue + p $! + p char_id + return 0 + end return Character.u4tou4i(u4) #UCS-4数値にしてreturn else raise ArgumentError, "unknown object for char_id", caller(1) @@ -309,7 +339,7 @@ module CHISE return (u4[3] << 24 | u4[2] << 16 | u4[1] << 8 | u4[0]) #UCS-4数値にしてreturn end def self.u4itou4(num) - return "" if num == nil + return "" unless num.is_a?(Integer) return sprintf("%c%c%c%c", num&0xff, (num >> 8)&0xff, (num >> 16)&0xff, (num >> 24)&0xff) #UCS-4数値を文字列にしてreturn end def self.u4itou8(char_id) #ucsの数値を受けとり、UTF-8の文字一文字を返す @@ -368,6 +398,7 @@ module CHISE return to_utf8() end end + alias map_ucs map_utf8 def map_ucs_er() u = ucs() if u.nil? || 0xffff < u @@ -400,12 +431,8 @@ module CHISE #---------------------------------------------------------------------- def to_er(codesys=nil) #実体参照を返す、希望するcodesysが引数(未実装) return "" if @char_id == nil -# return sprintf("&U+%04X;", @char_id) if @char_id <= 0xffff return sprintf("&#x%04x;", @char_id) if @char_id <= 0xffff -# return sprintf("&#%05d;", @char_id) if @char_id <= 0xffff -# return sprintf("&U-%05X;", @char_id) if @char_id <= 0xfffff return sprintf("&#x%05x;", @char_id) if @char_id <= 0xfffff -# return sprintf("&#%06d;", @char_id) if @char_id <= 0xfffff EntityReference.each_codesys {|codesys, er_prefix, keta, numtype| code = self[codesys] next if code == nil @@ -428,7 +455,7 @@ module CHISE "<"+ar.join(',')+">" end alias inspect inspect_x - def inspect_all_codesys() + def inspect_all_codesys() #未完成 #to_erを全てのcodesysにおいて実行する。その結果をコンパクトにまとめる end def inspect_all() @@ -459,7 +486,6 @@ module CHISE if u != "" ar << u else -# ar << char.to_s ar << char.to_er end end @@ -473,8 +499,12 @@ module CHISE k = self.to_s # idss = self['ids'] # return idss if idss - return k if self.is_basic_kanji? #基本漢字はstop kanjiとするぞと。 +# return k if self.is_basic_kanji? #基本漢字はstop kanjiとするぞと。 + return self['ids-represent'] if self['ids-represent'] #ids_representを持っている場合はその値とする。 + return self['ids-element'] if self['ids-element'] #ids_elementを持っている場合はその値とする。 + idss = self['ids-meaning'] + return idss if idss != nil && 0 < idss.length && k != idss idss = self['ids-aggregated'] return idss if idss != nil && 0 < idss.length && k != idss idss = self['ids'] @@ -493,6 +523,29 @@ module CHISE # end # return idss end + def decompose_all + pde = "" + de = self.decompose #出発点 + level = 0 + while true + pde = de + de = pde.decompose #もう一度分解をしてみる。 + break if pde == de #ループを抜けだす + exit if 10 < level #p ['too many recursive', self] + level += 1 + end + return de + end + def decompose_all_nu(level=nil) + level = 0 if level.nil? + if 10 < level + p ['too many recursive', self] + exit + end + de = self.decompose + return de.decompose_all(level+1) if de != self #なにか変化があったから再帰 + return de #もうこれ以上変化は無さそうだぞと。 + end def is_ids?() 0x2ff0 <= @char_id && @char_id <= 0x2fff end def ids_operator_argc() return 0 unless is_ids? @@ -501,7 +554,7 @@ module CHISE end end - class DBS #======================================================================複数のDBを集めたclass + class DBS #======================================================================複数のDBを集めたclass、未完成 end class ADB < BDB::Hash #======================================================================一つのDB @@ -563,7 +616,6 @@ module CHISE unix.gsub!(%r|\+|, '*') return unix end -# def windows?() DB.windows?() end def get_filename(t) return @pre + DB.unix_to_win(t) + @post if windows? return @pre + t + @post @@ -589,7 +641,6 @@ module CHISE keys << t } return keys - #return keys.sort end def close_db(t) db = get(t) @@ -1327,7 +1378,7 @@ iso-10646-comment end end - class JoyoList + class JoyoList #====================================================================== include Singleton #JP_JOYO_FILE = DB_DIR+"/../jp-joyo.txt" #EUC-jisx0213 JP_JOYO_FILE = DB_DIR+"/../joyo-ucs.txt" #UCS diff --git a/t/tc_char.rb b/t/tc_char.rb index 3ca5ca7..ca0d7d8 100755 --- a/t/tc_char.rb +++ b/t/tc_char.rb @@ -23,6 +23,7 @@ class TC_Character < Test::Unit::TestCase test_char(Character.get("字")) test_char(Character.new("字")) test_char("字".char) + test_char("字") end def test_create assert_equal(23383, Character.parse_char_id("字")) @@ -108,7 +109,31 @@ class TC_Character < Test::Unit::TestCase assert_equal("字", "&U5B57;".de_er) assert_equal("字", "&U-5B57;".de_er) assert_equal("字", "&U+5B57;".de_er) - assert_equal("字", "B57;".de_er) + assert_equal("字", "字".de_er) + end + def test_alias + assert_raises(NameError, message=""){ + t = "字字".ucs + } + assert_equal(23383, "字".ucs) + assert_equal(0x5B57, "字".ucs) + assert_equal(0x5B57, @char.ucs) + + assert_equal(0xfa55, "突".char['=>ucs-jis']) + assert_equal(0xfa55, "突".map_ucs_jis) + + char1 = Character.get("23383") + char2 = Character.get(23383) + assert_equal(char1, char2) + + char1 = Character.get("2") + char2 = Character.get(2) + assert_not_equal(char1, char2) + + assert_equal("(((name . \"FULLWIDTH DIGIT ONE\") (ucs . 65297)))", "1".char['->fullwidth']) + assert_equal("(((name . \"FULLWIDTH DIGIT ONE\") (ucs . 65297)))", "1".char.to_fullwidth) + assert_equal("(((name . \"DIGIT ONE\") (ucs . 49)))", "1".char['<-fullwidth']) + assert_equal("(((name . \"DIGIT ONE\") (ucs . 49)))", "1".char.from_fullwidth) end def teardown() @char = nil end end diff --git a/t/tc_str.rb b/t/tc_str.rb index 4cddd0d..009b354 100755 --- a/t/tc_str.rb +++ b/t/tc_str.rb @@ -23,9 +23,9 @@ class TC_String < Test::Unit::TestCase end def test_attributes assert_equal(23383, "字".ucs) - assert_equal(23383, "字字".ucs) + #assert_equal(23383, "字字".ucs) assert_equal(25991, "文".ucs) - assert_equal(25991, @str.ucs) + #assert_equal(25991, @str.ucs) end def test_er @char = @str.char_at(1)