From 1fcb1467ee6ef47f5948268ebdfd0a57037d9f4c Mon Sep 17 00:00:00 2001 From: eto Date: Mon, 10 Nov 2003 08:10:12 +0000 Subject: [PATCH] change directory --- doc/index.html | 4 +- src/chise.rb | 627 +------------------------------------------------------- t/tc_ids.rb | 92 +++++++++ 3 files changed, 103 insertions(+), 620 deletions(-) diff --git a/doc/index.html b/doc/index.html index 0f19b59..ddf1d66 100755 --- a/doc/index.html +++ b/doc/index.html @@ -46,13 +46,13 @@ Rubyへ移植することを試みたモジュールである。
  • 2003-0312 ruby-chise-20030312.tar.bz2
    様々なライブラリを追加。
  • 2003-1004 ruby-chise-20031004.tar.bz2
    -新しい素性名に対応した。Copyright noticeをきちんといれた。 +新しい素性名に対応した。Copyright noticeをいれた。

    ■CVS access


    ■ライセンス

    -GPLです。付属のCOPYINGをごらん下さい。 +

    GPLです。COPYINGをごらんください。

    diff --git a/src/chise.rb b/src/chise.rb index f2ef8da..28b0b98 100755 --- a/src/chise.rb +++ b/src/chise.rb @@ -1,619 +1,10 @@ #!/usr/bin/env ruby -# -# Ruby/CHISE module by eto 2002-1114 -# Copyright (C) 2002-2003 Kouichirou Eto -# All rights reserved. -# This is free software with ABSOLUTELY NO WARRANTY. -# -# You can redistribute it and/or modify it under the terms of -# the GNU General Public License version 2. -# - -require 'bdb' -require 'uconv' -require 'singleton' -require 'rbchise' -require 'db' -require 'ids' - -$KCODE = 'u' #今のところこれ以外では動かない。String.splitが影響大。inspectも影響。 -$debug = false #これはテスト用 -$debug = true #これはテスト用 -$stdout.binmode if $debug -$stdout.sync = true if $debug - -class String #====================================================================== - def to_a() self.split(//) end #$KCODEが設定されているので、UTF-8的な一文字づつがchにはいる - def each_char() to_a.each {|ch| yield ch } end - def each_character() to_a.each {|ch| yield ch.char } end - def char_length() to_a.length end - def char_at(n) to_a()[n] end - def char() Character.get(to_a[0]) end - #alias to_c char #悩み中 - #def char_id() char.char_id() end #なんとなく廃止 - #def get_char_attribute(a) char.get_char_attribute(a) end #なんとなく廃止 - #def ucs() char.ucs() end - def to_utf8() - return to_a.map {|ch| - ch.char.to_utf8 - }.join('') - end - - def map_char(block = Proc.new) - return unless block_given? - return self.to_a.map {|ch| (block.call(ch)).to_s }.join("") - end - def map_char!(block = Proc.new) - return unless block_given? - self.replace(self.map_char {|ch| block.call(ch)}) - end - def map_character(block = Proc.new) - return unless block_given? - return self.to_a.map {|ch| (block.call(ch.char)).to_s }.join("") - end - def map_character!(block = Proc.new) - return unless block_given? - self.replace(self.map_char {|ch| block.call(ch.char)}) - end - - def method_missing(mid, *args) - if char_length == 1 #省略形が有効なのは、一文字の時だけ - char.method_missing(mid, *args) - else - raise NameError, "undefined method `#{mid.id2name}'", caller(1) - end - end - - def map_utf8() map_char {|ch| ch.char.map_utf8 } end - alias map_ucs map_utf8 - def map_ucs_er() map_char {|ch| ch.char.map_ucs_er } end - def to_er() map_char {|ch| ch.char.to_er } end - - #put関係、[]関係は用意しないことにした。 - def de_er!() #EntityReferenceを取り除く - return self unless self =~ Regexp.new(EntityReference::REGEXP_PART) #それらしいのが無ければ何もしない - er = "&"+$1+";" - self.sub!(Regexp.new(Regexp.escape(er)), Character.new(er).mcs_utf8) #変換自体はCharacterにまかせる - return self.de_er! if self =~ Regexp.new(EntityReference::REGEXP_PART) #まだあったら再帰 - return self - end - def de_er() return self.dup.de_er!; end - - def inspect_all() map_char {|ch| ch.char.inspect_all } end - def inspect_x() map_char {|ch| ch.char.inspect_x } end - - def to_euc() map_char {|ch| ch.char.to_euc } end - def map_euc() map_char {|ch| ch.char.map_euc } end - def to_sjis() map_char {|ch| ch.char.to_sjis } end - def map_sjis() map_char {|ch| ch.char.map_sjis } end - - def decompose() map_char {|ch| ch.char.decompose } end - def decompose!() self.replace(self.decompose); self; end - def decompose_all_nu(level=nil) - level = 0 if level.nil? - if 10 < level - p ['too many recursive', self] - exit - end - de = self.decompose - return de.decompose_all(level+1) if de != self #なにか変化があったから再帰 - de #もうこれ以上変化は無さそうだぞと。 - end - def decompose_all() map_char {|ch| ch.char.decompose_all } end - def decompose_all!() self.replace(self.decompose_all); self; end - - def find() #"日雲"→"曇"とかいう感じの操作 - ar = [] - length = char_length() - each_char {|ch| - char = ch.char - ar << char.ids_contained #その文字を含んでいる漢字のリスト - } - h = Hash.new(0) - ar.each {|list| - next if list.nil? - list.each_char {|ch| - h[ch] += 1 - } - } - str = "" - h.each {|k, v| - # p [k, v] - if length == v #全部に顔を出していたら - str += k - end - } - # p str - str - end - def compose() - db = CodesysDB.instance - composed = db.get('ids', self) - return "" if composed.nil? #なかったよと。 - return "" if composed.char_length == 0 #なにごと? - return composed if composed.char_length == 1 - composed.each_char {|ch| - char = ch.char - return ch if char.has_attribute? #とりあえず最初にみつかったものを返すというヌルい仕様 - } - return "" #attributeを持つものが一つも無かったら、""にする - end - def aggregate() - #selfである文字列をIDSだと仮定し、それを完全にcomposeしきらないで、 - #その部分集合だけをとりだして、compose可能であればできるだけcomposeする。 - tree = IDS_Tree.new(self) - return self if tree.depth <= 1 #sub_nodesが無い場合はここでさよなら - tree.sub_nodes.each {|node| - c = node.compose - next if c.nil? || c == "" - # print "#{self} #{node} #{c}\n" - # p [self, node, c] - n = self.gsub(node, c) - return n.aggregate - } - return self #おきかえられるものがまったくなかったら、自分をかえす。 - end -end - -module CHISE #====================================================================== - def windows?() - (RUBY_PLATFORM =~ /cygwin/ || RUBY_PLATFORM =~ /mswin32/ || RUBY_PLATFORM =~ /mingw32/) - end - module_function :windows? - if windows?() - DB_DIR = 'd:/work/chise/char-db' #この後に/sysmtem-char-id/ucsという感じに続く - IDS_DB_DIR = 'd:/work/chise/ids/' #この後にIDS-JIS-X0208-1990.txtという感じに続く - else - DB_DIR = '/usr/local/lib/xemacs-21.4.12/i686-pc-linux/char-db' #この後に/sysmtem-char-id/ucsという感じに続く - IDS_DB_DIR = '/home/eto/work/chise/ids/' #この後にIDS-JIS-X0208-1990.txtという感じに続く - end - - class EntityReference #====================================================================== - #状況によってどのERに変換するかが異なる可能性があるので、普通のclassとして実装したほうがいい? -# CODESYS_TABLE = [ -# %w( chinese-big5-cdp CDP- 4 X), -# %w( ideograph-daikanwa M- 5 d), -# %w( ideograph-cbeta CB 5 d), -# %w( ideograph-gt GT- 5 d), -# %w( ideograph-gt-k GT-K 5 d), -# %w( japanese-jisx0208-1990 J90- 4 X), -# %w( japanese-jisx0208 J83- 4 X), -# %w( japanese-jisx0213-1 JX1- 4 X), -# %w( japanese-jisx0213-2 JX2- 4 X), -# %w( japanese-jisx0212 JSP- 4 X), -# %w( japanese-jisx0208-1978 J78- 4 X), -# %w( chinese-cns11643-1 C1- 4 X), -# %w( chinese-cns11643-2 C2- 4 X), -# %w( chinese-cns11643-3 C3- 4 X), -# %w( chinese-cns11643-4 C4- 4 X), -# %w( chinese-cns11643-5 C5- 4 X), -# %w( chinese-cns11643-6 C6- 4 X), -# %w( chinese-cns11643-7 C7- 4 X), -# %w( korean-ksc5601 K0- 4 X), -# ] -# CODESYS_ORDER = %w(japanese chinese korean ideograph) - CODESYS_TABLE = [ - %w( =jis-x0208-1990 J90- 4 X), - %w( =jis-x0208-1983 J83- 4 X), - %w( =jis-x0208-1978 J78- 4 X), - %w( =jis-x0208 J90- 4 X), #継承のアドホックな実装 - %w( =jis-x0208 J83- 4 X), #継承のアドホックな実装 - %w( =jis-x0208 J78- 4 X), #継承のアドホックな実装 - %w( =jis-x0213-1-2000 JX1- 4 X), - %w( =jis-x0213-2-2000 JX2- 4 X), - %w( =jis-x0212 JSP- 4 X), - %w( =big5-cdp CDP- 4 X), - %w( =cns11643-1 C1- 4 X), - %w( =cns11643-2 C2- 4 X), - %w( =cns11643-3 C3- 4 X), - %w( =cns11643-4 C4- 4 X), - %w( =cns11643-5 C5- 4 X), - %w( =cns11643-6 C6- 4 X), - %w( =cns11643-7 C7- 4 X), - %w( =ks-x1001 K0- 4 X), - %w( =daikanwa M- 5 d), - %w( =cbeta CB 5 d), - %w( =gt GT- 5 d), - %w( =gt-k GT-K 5 d), - ] - REGEXP_PART = "&([-+0-9A-Za-z#]+);" - REGEXP_ALL = "^#{REGEXP_PART}$" - - def self.match?(er) (er =~ Regexp.new(REGEXP_PART)) != nil end - - def self.parse(er) #char_idをFIXNUMで返す - return "" unless er =~ Regexp.new(REGEXP_ALL) #なんか、間違ってる? - er = $1 #ついでに中身の部分を取り出す - return $1.hex if er =~ /^MCS-([0-9A-Fa-f]+)/ #MCS - # if er =~ /^U[-+]?([0-9A-Fa-f]+)/ #Unicode直打ち - if er =~ /^U[-+]?([0-9A-Fa-f]+)/ || er =~ /^#x([0-9A-Fa-f]+)/ #Unicode直打ち - return $1.hex - end - - er.sub!(/^I-/, '') if er =~ /^I-/ #I-がついてるとどう違うのかはよくわからない - each_codesys {|codesys, er_prefix, keta, numtype| #p [codesys, er_prefix, keta, numtype] - numtyperegex = '\d' #if numtype == 'd' - numtyperegex = '[0-9A-Fa-f]' if numtype == 'X' - regexpstr = "^#{er_prefix}(#{numtyperegex}{#{keta},#{keta}})$" - if er =~ Regexp.new(regexpstr) - codestr = $1 - code = codestr.to_i #if numtype == 'd' - code = codestr.hex if numtype == 'X' - char_id_u8 = EntityReference.get_database(codesys, code) - char_id_num = Character.parse_char_id(char_id_u8) - next if char_id_num == nil - return char_id_num - end - } - return "" - end - - def self.each_codesys() -# CODESYS_ORDER.each {|lang| -# CODESYS_TABLE.each {|codesys, er_prefix, keta, numtype| #普通こういう書き方はしない。ループ一個にする。 -# next unless codesys =~ lang -# yield(codesys, er_prefix, keta, numtype) -# } -# } - CODESYS_TABLE.each {|codesys, er_prefix, keta, numtype| - yield(codesys, er_prefix, keta, numtype) - } - end - def self.get_database(codesys, code) - c = CodesysDB.instance.get(codesys, code) - return c if c != nil -# if codesys =~ /-jisx0208/ -# #return self.get_database("=jis-x0208", code) #再帰でどうだ? -# c = CodesysDB.instance.get("=jis-x0208", code) -# return c -# end - return nil - end - end - - class CharacterFactory #============================================文字オブジェクトの生成、cache - include Singleton - MAX = 10000 - def initialize - @max = MAX - reset() - end - def get(char_id) - check_max() - n = Character.parse_char_id(char_id) - c = @chars[n] - @chars[n] = Character.new(n) if @chars[n] == nil - return @chars[n] - end - def reset() - @chars = nil - @chars = Hash.new - GC.start #ガーベージコレクション - end - def length() @chars.length; end - def check_max() - reset if @max < @chars.length #MAXを超えたらresetしてしまう。乱暴じゃがcacheなのでこれでいいのだ。 - end - end - - class Character #=============================================================== 文字オブジェクト - def initialize(char_id=nil) - @char_id = Character.parse_char_id(char_id) - @attributes = Hash.new - @check_all_database = false - end - attr_reader :char_id - def to_i() @char_id end - def mcs_utf8() Character.u4itou8(@char_id) end - def mcs_hex() sprintf("%x", @char_id) end - - #---------------------------------------------------------------------- - def self.get(char_id) CharacterFactory.instance.get(char_id) end #flyweightパターン - - #---------------------------------------------------------------------- - def normalize_attribute_name(b) - a = b.dup - a.gsub!(/_/, '-') #underlineは-に置換 - a.sub!(/-at-/, '@') - a.sub!(/^map-/, '=>') - a.sub!(/^to-/, '->') - a.sub!(/^from-/, '<-') - a - end - def get_char_attribute(b) # XEmacs UTF-2000互換API群 - a = normalize_attribute_name(b) - #p [a, b] - atr = @attributes[a] - return atr if atr - atr = check_database(a) - if atr - @attributes[a] = atr - return atr - end - return get_char_attribute("="+a) unless a =~ /^=/ #頭に=がついてない場合はそれが省略されていることを仮定して、再帰する - nil - end - def put_char_attribute(b,v) - a = normalize_attribute_name(b) - @attributes[a] = v; - CharDB.instance.put(a, mcs_utf8(), v) - end - def char_attribute_alist() check_all_database(); @attributes; end - def char_attribute_list() check_all_database(); @attributes.keys; end - alias [] get_char_attribute #その略称 - alias []= put_char_attribute - alias alist char_attribute_alist - alias list char_attribute_list - - def method_missing(mid, *args) #参考:ostruct.rb - mname = mid.id2name - return get_char_attribute(mname) if args.length == 0 - put_char_attribute(mname.chop, args[0]) if mname =~ /=$/ #代入 - end - - def has_attribute?() #意味のあるattributeを持ってますか? - keys = list - keys.delete_if {|k| - k =~ /ids/ - } - return (keys.length != 0) - end - - #---------------------------------------------------------------------- - def ==(ch) - return false if ch == nil - return false unless ch.is_a? Character - self.char_id == ch.char_id - end - - #---------------------------------------------------------------------- - def self.parse_char_id(char_id) #FIXNUMを返す - return nil if char_id == nil - if char_id.is_a?(Numeric) #p [char_id] - char_id = 0x80000000 + char_id if char_id < 0 #補数表現 - return char_id.to_i - elsif char_id.is_a?(String) - return char_id.to_i if char_id =~ /^\d+$/ && 1 < char_id.length #文字列による数字だったら数値化してreturn - return EntityReference.parse(char_id) if char_id =~ Regexp.new(EntityReference::REGEXP_ALL) #実体参照? - char_id.sub!(/^\?/, '') if char_id =~ /^\?/ #もし先頭に?がついていたら削除 - #このへん本当はもっとちゃんとチェックするべし - begin - u4 = Uconv.u8tou4(char_id) #UCS-4文字列に変換 - rescue - p $! - p char_id - return 0 - end - return Character.u4tou4i(u4) #UCS-4数値にしてreturn - else - raise ArgumentError, "unknown object for char_id", caller(1) - end - end - def self.u4tou4i(u4) - return 0 if u4 == nil || u4 == "" - return (u4[3] << 24 | u4[2] << 16 | u4[1] << 8 | u4[0]) #UCS-4数値にしてreturn - end - def self.u4itou4(num) - return "" unless num.is_a?(Integer) - return sprintf("%c%c%c%c", num&0xff, (num >> 8)&0xff, (num >> 16)&0xff, (num >> 24)&0xff) #UCS-4数値を文字列にしてreturn - end - def self.u4itou8(char_id) #ucsの数値を受けとり、UTF-8の文字一文字を返す - begin - u4 = Character.u4itou4(char_id) - u8 = Uconv.u4tou8(u4) - return u8 - rescue - #raise ArgumentError, "invalid char_id (#{char_id})", caller(1) - #print "error\n" - return "" - end - end - - #---------------------------------------------------------------------- - def check_database(a) - db = CharDB.instance - u8 = mcs_utf8() - v = db.get(a, u8) #u8で表される文字のaアトリビュートを調べる。 - return v - end - def check_all_database() #現在の@char_idから、文字データベースを参照する - return if @check_all_database - return if @char_id == nil - db = CharDB.instance - u8 = mcs_utf8() - atrs = db.get_all(u8) #u8で表される文字のアトリビュートを全部持ってこい - atrs.each {|a,v| - @attributes[a] = v #とかいう感じで代入するのでええかな? - } - @check_all_database = true #重い処理なので一応checkする - end - - #---------------------------------------------------------------------- - def ucs() #p 'ucs' - #ar = %w{ucs ucs-big5 ucs-cdp ucs-cns ucs-jis ucs-ks =>ucs =>ucs* =>ucs-jis} - #ar = %w{ucs ucs-jis ucs-big5 ucs-cdp ucs-cns ucs-ks =>ucs =>ucs* =>ucs-jis} - ar = %w{ucs-jis ucs =>ucs-jis} - #並び順は恣意的で、ucs-jisを先に出している。本来はこれも指定できるようにするべき。 - ar.each {|a| #p [a] - u = get_char_attribute(a) - return u if u != nil - } - return nil - end - - #----------------------------------------------------------------------CCS関係 - def to_utf8() Uconv.u4tou8(Character.u4itou4(ucs())) end #UTF8文字列を返す - #alias to_s to_utf8 - alias to_s mcs_utf8 - def map_utf8() - u = ucs() - if u.nil? || 0xffff < u - return to_er() - else - return to_utf8() - end - end - alias map_ucs map_utf8 - def map_ucs_er() - u = ucs() - if u.nil? || 0xffff < u - return to_er() - else - return Character.get(u).to_er() - end - end - def to_euc() - u = ucs() - return "" if u.nil? || 0xffff < u - Uconv.u16toeuc(Uconv.u4tou16(Character.u4itou4(ucs()))) - end - def map_euc() - e = to_euc() - return e if e != "" - return to_er() - end - def to_sjis() - u = ucs() - return "" if u.nil? || 0xffff < u - Uconv.u16tosjis(Uconv.u4tou16(Character.u4itou4(ucs()))) - end - def map_sjis() - e = to_sjis() - return e if e != "" - return to_er() - end - - #---------------------------------------------------------------------- - def to_er(codesys=nil) #実体参照を返す、希望するcodesysが引数(未実装) - return "" if @char_id == nil - return sprintf("&#x%04x;", @char_id) if @char_id <= 0xffff - return sprintf("&#x%05x;", @char_id) if @char_id <= 0xfffff - EntityReference.each_codesys {|codesys, er_prefix, keta, numtype| - code = self[codesys] - next if code == nil - return sprintf("&#{er_prefix}%0#{keta}#{numtype};", code) - } - return sprintf("&MCS-%08X;", @char_id) #本当はこれは無しにしたい - end - def to_er_list() - ar = [] - EntityReference.each_codesys {|codesys, er_prefix, keta, numtype| - er = to_er(codesys) - ar << er if er != nil - } - ar - end - - def inspect_x() - return "<>" if @char_id == nil - ar = [to_utf8(), to_er().sub(/^&/,'').chop] - "<"+ar.join(',')+">" - end - alias inspect inspect_x - def inspect_all_codesys() #未完成 - #to_erを全てのcodesysにおいて実行する。その結果をコンパクトにまとめる - end - def inspect_all() - ar = [inspect.chop] - alist.to_a.sort.each {|a, v| ar << "#{a}:#{v}" } - return ar.join(',')+">" - end - def dump_all() - ar = [inspect] - alist.to_a.sort.each {|a, v| ar << "#{a}:#{v}" } - return ar.join('\n')+'\n' - end - def get_attributes() - str = "" - alist.to_a.sort.each {|a, v| - str += "#{a}: #{v}\n" - } - str - end - - def inspect_ids(hex_flag=false) - ids = decompose - ar = [] - ar << (hex_flag ? "x"+mcs_hex : to_utf8) - if to_s != ids #idsが部品そのものだったら部品追加はしない - ids.each_char {|ch| - char = ch.char - next if char.is_ids? - if hex_flag then - ar << "x"+char.mcs_hex - else - u = char.to_utf8 - if u != "" - ar << u - else - ar << char.to_er - end - end - } - end - return "("+ar.join("\t")+")" - end - - #----------------------------------------------------------------------IDS関係 - def decompose - k = self.to_s - # idss = self['ids'] - # return idss if idss - # return k if self.is_basic_kanji? #基本漢字はstop kanjiとするぞと。 - return self['ids-represent'] if self['ids-represent'] #ids_representを持っている場合はその値とする。 - return self['ids-element'] if self['ids-element'] #ids_elementを持っている場合はその値とする。 - - idss = self['ids-meaning'] - return idss if idss != nil && 0 < idss.length && k != idss - idss = self['ids-aggregated'] - return idss if idss != nil && 0 < idss.length && k != idss - idss = self['ids'] - return idss if idss != nil && 0 < idss.length && k != idss - return k - # return k if idss.nil? || idss.length == 0 || k == idss - # if idss.char_length == 2 - # p ['What???', k, idss, k.inspect_all] - # #return idssx[1] #二個目だけ返すとか? - # return k #IDSに展開する方法が無いと。 - # end - # return k if k == idss - # if idss.include?(k) #この二文字のBUG対策 - # #return idss.sub(k, '') - # return k #IDSに展開する方法が無いと。 - # end - # return idss - end - def decompose_all - pde = "" - de = self.decompose #出発点 - level = 0 - while true - pde = de - de = pde.decompose #もう一度分解をしてみる。 - break if pde == de #ループを抜けだす - exit if 10 < level #p ['too many recursive', self] - level += 1 - end - return de - end - def decompose_all_nu(level=nil) - level = 0 if level.nil? - if 10 < level - p ['too many recursive', self] - exit - end - de = self.decompose - return de.decompose_all(level+1) if de != self #なにか変化があったから再帰 - return de #もうこれ以上変化は無さそうだぞと。 - end - def is_ids?() 0x2ff0 <= @char_id && @char_id <= 0x2fff end - def ids_operator_argc() - return 0 unless is_ids? - return 3 if @char_id == 0x2ff2 || @char_id == 0x2ff3 - return 2 - end - end - -end - -#----------------------------------------------------------------------終了 +# $Id: chise.rb,v 1.18 2003-11-10 08:10:12 eto Exp $ +# Copyright (C) 2002-2003 Kouichirou Eto, All rights reserved. +# This is free software with ABSOLUTELY NO WARRANTY. +# You can redistribute it and/or modify it under the terms of the GNU GPL2. + +require 'chise/char' +require 'chise/rbchise' +require 'chise/db' +require 'chise/ids' diff --git a/t/tc_ids.rb b/t/tc_ids.rb index 288e513..884472d 100755 --- a/t/tc_ids.rb +++ b/t/tc_ids.rb @@ -171,6 +171,98 @@ class TC_IDS < Test::Unit::TestCase assert_equal("⿰木木", "⿰木木".compose.decompose) assert_equal("林".ucs, "⿰木木".compose.ucs) end + def test_idc_shortcut + assert_equal(IDC_LR, "林".decompose.first_char) + assert_equal(IDC_LR+"木木", "林".decompose) + + assert_equal(IDC_AB, "森".decompose.first_char) + assert_equal(IDC_AB+"木林", "森".decompose) + assert_equal(IDC_AB+"火火", "炎".decompose) + + assert_equal(IDC_LMR, "班".decompose.first_char) + assert_equal(IDC_LMR+"彳"+IDC_AB+"山王"+"攵", "徴".decompose) #meaning? + + assert_equal(IDC_AMB, "鼻".decompose.first_char) + assert_equal(IDC_AMB+"自田廾", "鼻".decompose) + assert_equal(IDC_AMB+"士冖匕", "壱".decompose) + assert_equal(IDC_AMB+"穴厶心", "窓".decompose) + assert_equal(IDC_AMB+"丗冖巾", "帯".decompose) + + assert_equal(IDC_FS, "囲".decompose.first_char) + assert_equal(IDC_FS+"囗井", "囲".decompose) + assert_equal(IDC_FS+"行韋", "衛".decompose) + assert_equal(IDC_FS+"行圭", "街".decompose) + assert_equal(IDC_FS+"行重", "衝".decompose) + assert_equal(IDC_FS+IDC_AB+"一凵田", "画".decompose) + + assert_equal(IDC_FA, "問".decompose.first_char) + assert_equal(IDC_FA+"門口", "問".decompose) + assert_equal(IDC_FA+"門"+IDC_LR+"豆寸", "闘".decompose) + assert_equal(IDC_FA+"戌女", "威".decompose) + assert_equal(IDC_FA+"茂臣", "蔵".decompose) + assert_equal(IDC_FA+"尺旦", "昼".decompose) + assert_equal(IDC_FA+"冂入", "内".decompose) + assert_equal(IDC_FA+"几丶", "凡".decompose) + assert_equal(IDC_FA+"几"+IDC_AB+"丿虫", "風".decompose) + + assert_equal(IDC_FB, "凶".decompose.first_char) + assert_equal(IDC_AB+"止"+IDC_FB+"凵米", "歯".decompose) + + assert_equal(IDC_FL, "匠".decompose.first_char) + assert_equal(IDC_FL+"匚斤", "匠".decompose) + assert_equal(IDC_FL+"匚矢", "医".decompose) + assert_equal(IDC_FL+"匚若", "匿".decompose) + assert_equal(IDC_FL+"匚儿", "匹".decompose) + + assert_equal(IDC_FUL, "庁".decompose.first_char) + assert_equal(IDC_FUL+"广丁", "庁".decompose) + assert_equal(IDC_FUL+"歹匕", "死".decompose) + assert_equal(IDC_FUL+"尹口", "君".decompose) + assert_equal(IDC_FUL+"麻鬼", "魔".decompose) + assert_equal(IDC_FUL+"府肉", "腐".decompose) + assert_equal(IDC_FUL+"麻手", "摩".decompose) + assert_equal(IDC_FUL+"虍思", "慮".decompose) + assert_equal(IDC_FUL+"食口", "倉".decompose) + assert_equal(IDC_AB+"日"+IDC_FUL+"耳又", "最".decompose) + assert_equal(IDC_FUL+"手目", "看".decompose) #meaning + assert_equal(IDC_FUL+"辰口", "唇".decompose) #? + + assert_equal(IDC_FUR, "句".decompose.first_char) + assert_equal(IDC_FUR+"勹口", "句".decompose) + assert_equal(IDC_FUR+"勹丶", "勺".decompose) + assert_equal(IDC_FUR+"勹日", "旬".decompose) + assert_equal(IDC_FUR+"戈廾", "戒".decompose) + assert_equal(IDC_FUR+"弋工", "式".decompose) + assert_equal(IDC_FUR+"刀丿", "刃".decompose) + assert_equal(IDC_FUR+"鳥山", "島".decompose) #meaning + + assert_equal(IDC_FLL, "通".decompose.first_char) + assert_equal(IDC_FLL+"廴聿", "建".decompose) + assert_equal(IDC_FLL+"走戉", "越".decompose) + assert_equal(IDC_FLL+"走巳", "起".decompose) + assert_equal(IDC_FLL+"走取", "趣".decompose) + assert_equal(IDC_FLL+"走召", "超".decompose) + assert_equal(IDC_FLL+"是頁", "題".decompose) + assert_equal(IDC_FLL+"免力", "勉".decompose) + assert_equal(IDC_FLL+"鬼未", "魅".decompose) + assert_equal(IDC_FLL+"黒犬", "黙".decompose) + + assert_equal(IDC_O, "太".decompose.first_char) + assert_equal(IDC_O+"大丶", "太".decompose) + assert_equal(IDC_O+"衣中", "衷".decompose) + assert_equal(IDC_O+"衣里", "裏".decompose) + assert_equal(IDC_O+"勹巳", "包".decompose) + assert_equal(IDC_O+"勹乂", "匁".decompose) + assert_equal(IDC_O+"木日", "東".decompose) + assert_equal(IDC_O+"弍一", "弐".decompose) + assert_equal(IDC_O+"衣保", "褒".decompose) + end + def test_glyph_decompose + assert_equal("音", "音".decompose) +# assert_equal(IDC_AB+"立日", "音".glyph_decompose) + assert_equal(IDC_FLL, "世".decompose.first_char) + assert_equal("世", "世".glyph_decompose) + end def test_find # p "日雲".find #"曇" assert(4 <= "日雲".find .char_length) #"曇" -- 1.7.10.4