#!/usr/bin/env ruby
-#
-# Ruby/CHISE module by eto 2002-1114
-# Copyright (C) 2002-2003 Kouichirou Eto
-# All rights reserved.
-# This is free software with ABSOLUTELY NO WARRANTY.
-#
-# You can redistribute it and/or modify it under the terms of
-# the GNU General Public License version 2.
-#
-
-require 'bdb'
-require 'uconv'
-require 'singleton'
-require 'rbchise'
-require 'db'
-require 'ids'
-
-$KCODE = 'u' #今のところこれ以外では動かない。String.splitが影響大。inspectも影響。
-$debug = false #これはテスト用
-$debug = true #これはテスト用
-$stdout.binmode if $debug
-$stdout.sync = true if $debug
-
-class String #======================================================================
- def to_a() self.split(//) end #$KCODEが設定されているので、UTF-8的な一文字づつがchにはいる
- def each_char() to_a.each {|ch| yield ch } end
- def each_character() to_a.each {|ch| yield ch.char } end
- def char_length() to_a.length end
- def char_at(n) to_a()[n] end
- def char() Character.get(to_a[0]) end
- #alias to_c char #悩み中
- #def char_id() char.char_id() end #なんとなく廃止
- #def get_char_attribute(a) char.get_char_attribute(a) end #なんとなく廃止
- #def ucs() char.ucs() end
- def to_utf8()
- return to_a.map {|ch|
- ch.char.to_utf8
- }.join('')
- end
-
- def map_char(block = Proc.new)
- return unless block_given?
- return self.to_a.map {|ch| (block.call(ch)).to_s }.join("")
- end
- def map_char!(block = Proc.new)
- return unless block_given?
- self.replace(self.map_char {|ch| block.call(ch)})
- end
- def map_character(block = Proc.new)
- return unless block_given?
- return self.to_a.map {|ch| (block.call(ch.char)).to_s }.join("")
- end
- def map_character!(block = Proc.new)
- return unless block_given?
- self.replace(self.map_char {|ch| block.call(ch.char)})
- end
-
- def method_missing(mid, *args)
- if char_length == 1 #省略形が有効なのは、一文字の時だけ
- char.method_missing(mid, *args)
- else
- raise NameError, "undefined method `#{mid.id2name}'", caller(1)
- end
- end
-
- def map_utf8() map_char {|ch| ch.char.map_utf8 } end
- alias map_ucs map_utf8
- def map_ucs_er() map_char {|ch| ch.char.map_ucs_er } end
- def to_er() map_char {|ch| ch.char.to_er } end
-
- #put関係、[]関係は用意しないことにした。
- def de_er!() #EntityReferenceを取り除く
- return self unless self =~ Regexp.new(EntityReference::REGEXP_PART) #それらしいのが無ければ何もしない
- er = "&"+$1+";"
- self.sub!(Regexp.new(Regexp.escape(er)), Character.new(er).mcs_utf8) #変換自体はCharacterにまかせる
- return self.de_er! if self =~ Regexp.new(EntityReference::REGEXP_PART) #まだあったら再帰
- return self
- end
- def de_er() return self.dup.de_er!; end
-
- def inspect_all() map_char {|ch| ch.char.inspect_all } end
- def inspect_x() map_char {|ch| ch.char.inspect_x } end
-
- def to_euc() map_char {|ch| ch.char.to_euc } end
- def map_euc() map_char {|ch| ch.char.map_euc } end
- def to_sjis() map_char {|ch| ch.char.to_sjis } end
- def map_sjis() map_char {|ch| ch.char.map_sjis } end
-
- def decompose() map_char {|ch| ch.char.decompose } end
- def decompose!() self.replace(self.decompose); self; end
- def decompose_all_nu(level=nil)
- level = 0 if level.nil?
- if 10 < level
- p ['too many recursive', self]
- exit
- end
- de = self.decompose
- return de.decompose_all(level+1) if de != self #なにか変化があったから再帰
- de #もうこれ以上変化は無さそうだぞと。
- end
- def decompose_all() map_char {|ch| ch.char.decompose_all } end
- def decompose_all!() self.replace(self.decompose_all); self; end
-
- def find() #"日雲"→"曇"とかいう感じの操作
- ar = []
- length = char_length()
- each_char {|ch|
- char = ch.char
- ar << char.ids_contained #その文字を含んでいる漢字のリスト
- }
- h = Hash.new(0)
- ar.each {|list|
- next if list.nil?
- list.each_char {|ch|
- h[ch] += 1
- }
- }
- str = ""
- h.each {|k, v|
- # p [k, v]
- if length == v #全部に顔を出していたら
- str += k
- end
- }
- # p str
- str
- end
- def compose()
- db = CodesysDB.instance
- composed = db.get('ids', self)
- return "" if composed.nil? #なかったよと。
- return "" if composed.char_length == 0 #なにごと?
- return composed if composed.char_length == 1
- composed.each_char {|ch|
- char = ch.char
- return ch if char.has_attribute? #とりあえず最初にみつかったものを返すというヌルい仕様
- }
- return "" #attributeを持つものが一つも無かったら、""にする
- end
- def aggregate()
- #selfである文字列をIDSだと仮定し、それを完全にcomposeしきらないで、
- #その部分集合だけをとりだして、compose可能であればできるだけcomposeする。
- tree = IDS_Tree.new(self)
- return self if tree.depth <= 1 #sub_nodesが無い場合はここでさよなら
- tree.sub_nodes.each {|node|
- c = node.compose
- next if c.nil? || c == ""
- # print "#{self} #{node} #{c}\n"
- # p [self, node, c]
- n = self.gsub(node, c)
- return n.aggregate
- }
- return self #おきかえられるものがまったくなかったら、自分をかえす。
- end
-end
-
-module CHISE #======================================================================
- def windows?()
- (RUBY_PLATFORM =~ /cygwin/ || RUBY_PLATFORM =~ /mswin32/ || RUBY_PLATFORM =~ /mingw32/)
- end
- module_function :windows?
- if windows?()
- DB_DIR = 'd:/work/chise/char-db' #この後に/sysmtem-char-id/ucsという感じに続く
- IDS_DB_DIR = 'd:/work/chise/ids/' #この後にIDS-JIS-X0208-1990.txtという感じに続く
- else
- DB_DIR = '/usr/local/lib/xemacs-21.4.12/i686-pc-linux/char-db' #この後に/sysmtem-char-id/ucsという感じに続く
- IDS_DB_DIR = '/home/eto/work/chise/ids/' #この後にIDS-JIS-X0208-1990.txtという感じに続く
- end
-
- class EntityReference #======================================================================
- #状況によってどのERに変換するかが異なる可能性があるので、普通のclassとして実装したほうがいい?
-# CODESYS_TABLE = [
-# %w( chinese-big5-cdp CDP- 4 X),
-# %w( ideograph-daikanwa M- 5 d),
-# %w( ideograph-cbeta CB 5 d),
-# %w( ideograph-gt GT- 5 d),
-# %w( ideograph-gt-k GT-K 5 d),
-# %w( japanese-jisx0208-1990 J90- 4 X),
-# %w( japanese-jisx0208 J83- 4 X),
-# %w( japanese-jisx0213-1 JX1- 4 X),
-# %w( japanese-jisx0213-2 JX2- 4 X),
-# %w( japanese-jisx0212 JSP- 4 X),
-# %w( japanese-jisx0208-1978 J78- 4 X),
-# %w( chinese-cns11643-1 C1- 4 X),
-# %w( chinese-cns11643-2 C2- 4 X),
-# %w( chinese-cns11643-3 C3- 4 X),
-# %w( chinese-cns11643-4 C4- 4 X),
-# %w( chinese-cns11643-5 C5- 4 X),
-# %w( chinese-cns11643-6 C6- 4 X),
-# %w( chinese-cns11643-7 C7- 4 X),
-# %w( korean-ksc5601 K0- 4 X),
-# ]
-# CODESYS_ORDER = %w(japanese chinese korean ideograph)
- CODESYS_TABLE = [
- %w( =jis-x0208-1990 J90- 4 X),
- %w( =jis-x0208-1983 J83- 4 X),
- %w( =jis-x0208-1978 J78- 4 X),
- %w( =jis-x0208 J90- 4 X), #継承のアドホックな実装
- %w( =jis-x0208 J83- 4 X), #継承のアドホックな実装
- %w( =jis-x0208 J78- 4 X), #継承のアドホックな実装
- %w( =jis-x0213-1-2000 JX1- 4 X),
- %w( =jis-x0213-2-2000 JX2- 4 X),
- %w( =jis-x0212 JSP- 4 X),
- %w( =big5-cdp CDP- 4 X),
- %w( =cns11643-1 C1- 4 X),
- %w( =cns11643-2 C2- 4 X),
- %w( =cns11643-3 C3- 4 X),
- %w( =cns11643-4 C4- 4 X),
- %w( =cns11643-5 C5- 4 X),
- %w( =cns11643-6 C6- 4 X),
- %w( =cns11643-7 C7- 4 X),
- %w( =ks-x1001 K0- 4 X),
- %w( =daikanwa M- 5 d),
- %w( =cbeta CB 5 d),
- %w( =gt GT- 5 d),
- %w( =gt-k GT-K 5 d),
- ]
- REGEXP_PART = "&([-+0-9A-Za-z#]+);"
- REGEXP_ALL = "^#{REGEXP_PART}$"
-
- def self.match?(er) (er =~ Regexp.new(REGEXP_PART)) != nil end
-
- def self.parse(er) #char_idをFIXNUMで返す
- return "" unless er =~ Regexp.new(REGEXP_ALL) #なんか、間違ってる?
- er = $1 #ついでに中身の部分を取り出す
- return $1.hex if er =~ /^MCS-([0-9A-Fa-f]+)/ #MCS
- # if er =~ /^U[-+]?([0-9A-Fa-f]+)/ #Unicode直打ち
- if er =~ /^U[-+]?([0-9A-Fa-f]+)/ || er =~ /^#x([0-9A-Fa-f]+)/ #Unicode直打ち
- return $1.hex
- end
-
- er.sub!(/^I-/, '') if er =~ /^I-/ #I-がついてるとどう違うのかはよくわからない
- each_codesys {|codesys, er_prefix, keta, numtype| #p [codesys, er_prefix, keta, numtype]
- numtyperegex = '\d' #if numtype == 'd'
- numtyperegex = '[0-9A-Fa-f]' if numtype == 'X'
- regexpstr = "^#{er_prefix}(#{numtyperegex}{#{keta},#{keta}})$"
- if er =~ Regexp.new(regexpstr)
- codestr = $1
- code = codestr.to_i #if numtype == 'd'
- code = codestr.hex if numtype == 'X'
- char_id_u8 = EntityReference.get_database(codesys, code)
- char_id_num = Character.parse_char_id(char_id_u8)
- next if char_id_num == nil
- return char_id_num
- end
- }
- return ""
- end
-
- def self.each_codesys()
-# CODESYS_ORDER.each {|lang|
-# CODESYS_TABLE.each {|codesys, er_prefix, keta, numtype| #普通こういう書き方はしない。ループ一個にする。
-# next unless codesys =~ lang
-# yield(codesys, er_prefix, keta, numtype)
-# }
-# }
- CODESYS_TABLE.each {|codesys, er_prefix, keta, numtype|
- yield(codesys, er_prefix, keta, numtype)
- }
- end
- def self.get_database(codesys, code)
- c = CodesysDB.instance.get(codesys, code)
- return c if c != nil
-# if codesys =~ /-jisx0208/
-# #return self.get_database("=jis-x0208", code) #再帰でどうだ?
-# c = CodesysDB.instance.get("=jis-x0208", code)
-# return c
-# end
- return nil
- end
- end
-
- class CharacterFactory #============================================文字オブジェクトの生成、cache
- include Singleton
- MAX = 10000
- def initialize
- @max = MAX
- reset()
- end
- def get(char_id)
- check_max()
- n = Character.parse_char_id(char_id)
- c = @chars[n]
- @chars[n] = Character.new(n) if @chars[n] == nil
- return @chars[n]
- end
- def reset()
- @chars = nil
- @chars = Hash.new
- GC.start #ガーベージコレクション
- end
- def length() @chars.length; end
- def check_max()
- reset if @max < @chars.length #MAXを超えたらresetしてしまう。乱暴じゃがcacheなのでこれでいいのだ。
- end
- end
-
- class Character #=============================================================== 文字オブジェクト
- def initialize(char_id=nil)
- @char_id = Character.parse_char_id(char_id)
- @attributes = Hash.new
- @check_all_database = false
- end
- attr_reader :char_id
- def to_i() @char_id end
- def mcs_utf8() Character.u4itou8(@char_id) end
- def mcs_hex() sprintf("%x", @char_id) end
-
- #----------------------------------------------------------------------
- def self.get(char_id) CharacterFactory.instance.get(char_id) end #flyweightパターン
-
- #----------------------------------------------------------------------
- def normalize_attribute_name(b)
- a = b.dup
- a.gsub!(/_/, '-') #underlineは-に置換
- a.sub!(/-at-/, '@')
- a.sub!(/^map-/, '=>')
- a.sub!(/^to-/, '->')
- a.sub!(/^from-/, '<-')
- a
- end
- def get_char_attribute(b) # XEmacs UTF-2000互換API群
- a = normalize_attribute_name(b)
- #p [a, b]
- atr = @attributes[a]
- return atr if atr
- atr = check_database(a)
- if atr
- @attributes[a] = atr
- return atr
- end
- return get_char_attribute("="+a) unless a =~ /^=/ #頭に=がついてない場合はそれが省略されていることを仮定して、再帰する
- nil
- end
- def put_char_attribute(b,v)
- a = normalize_attribute_name(b)
- @attributes[a] = v;
- CharDB.instance.put(a, mcs_utf8(), v)
- end
- def char_attribute_alist() check_all_database(); @attributes; end
- def char_attribute_list() check_all_database(); @attributes.keys; end
- alias [] get_char_attribute #その略称
- alias []= put_char_attribute
- alias alist char_attribute_alist
- alias list char_attribute_list
-
- def method_missing(mid, *args) #参考:ostruct.rb
- mname = mid.id2name
- return get_char_attribute(mname) if args.length == 0
- put_char_attribute(mname.chop, args[0]) if mname =~ /=$/ #代入
- end
-
- def has_attribute?() #意味のあるattributeを持ってますか?
- keys = list
- keys.delete_if {|k|
- k =~ /ids/
- }
- return (keys.length != 0)
- end
-
- #----------------------------------------------------------------------
- def ==(ch)
- return false if ch == nil
- return false unless ch.is_a? Character
- self.char_id == ch.char_id
- end
-
- #----------------------------------------------------------------------
- def self.parse_char_id(char_id) #FIXNUMを返す
- return nil if char_id == nil
- if char_id.is_a?(Numeric) #p [char_id]
- char_id = 0x80000000 + char_id if char_id < 0 #補数表現
- return char_id.to_i
- elsif char_id.is_a?(String)
- return char_id.to_i if char_id =~ /^\d+$/ && 1 < char_id.length #文字列による数字だったら数値化してreturn
- return EntityReference.parse(char_id) if char_id =~ Regexp.new(EntityReference::REGEXP_ALL) #実体参照?
- char_id.sub!(/^\?/, '') if char_id =~ /^\?/ #もし先頭に?がついていたら削除
- #このへん本当はもっとちゃんとチェックするべし
- begin
- u4 = Uconv.u8tou4(char_id) #UCS-4文字列に変換
- rescue
- p $!
- p char_id
- return 0
- end
- return Character.u4tou4i(u4) #UCS-4数値にしてreturn
- else
- raise ArgumentError, "unknown object for char_id", caller(1)
- end
- end
- def self.u4tou4i(u4)
- return 0 if u4 == nil || u4 == ""
- return (u4[3] << 24 | u4[2] << 16 | u4[1] << 8 | u4[0]) #UCS-4数値にしてreturn
- end
- def self.u4itou4(num)
- return "" unless num.is_a?(Integer)
- return sprintf("%c%c%c%c", num&0xff, (num >> 8)&0xff, (num >> 16)&0xff, (num >> 24)&0xff) #UCS-4数値を文字列にしてreturn
- end
- def self.u4itou8(char_id) #ucsの数値を受けとり、UTF-8の文字一文字を返す
- begin
- u4 = Character.u4itou4(char_id)
- u8 = Uconv.u4tou8(u4)
- return u8
- rescue
- #raise ArgumentError, "invalid char_id (#{char_id})", caller(1)
- #print "error\n"
- return ""
- end
- end
-
- #----------------------------------------------------------------------
- def check_database(a)
- db = CharDB.instance
- u8 = mcs_utf8()
- v = db.get(a, u8) #u8で表される文字のaアトリビュートを調べる。
- return v
- end
- def check_all_database() #現在の@char_idから、文字データベースを参照する
- return if @check_all_database
- return if @char_id == nil
- db = CharDB.instance
- u8 = mcs_utf8()
- atrs = db.get_all(u8) #u8で表される文字のアトリビュートを全部持ってこい
- atrs.each {|a,v|
- @attributes[a] = v #とかいう感じで代入するのでええかな?
- }
- @check_all_database = true #重い処理なので一応checkする
- end
-
- #----------------------------------------------------------------------
- def ucs() #p 'ucs'
- #ar = %w{ucs ucs-big5 ucs-cdp ucs-cns ucs-jis ucs-ks =>ucs =>ucs* =>ucs-jis}
- #ar = %w{ucs ucs-jis ucs-big5 ucs-cdp ucs-cns ucs-ks =>ucs =>ucs* =>ucs-jis}
- ar = %w{ucs-jis ucs =>ucs-jis}
- #並び順は恣意的で、ucs-jisを先に出している。本来はこれも指定できるようにするべき。
- ar.each {|a| #p [a]
- u = get_char_attribute(a)
- return u if u != nil
- }
- return nil
- end
-
- #----------------------------------------------------------------------CCS関係
- def to_utf8() Uconv.u4tou8(Character.u4itou4(ucs())) end #UTF8文字列を返す
- #alias to_s to_utf8
- alias to_s mcs_utf8
- def map_utf8()
- u = ucs()
- if u.nil? || 0xffff < u
- return to_er()
- else
- return to_utf8()
- end
- end
- alias map_ucs map_utf8
- def map_ucs_er()
- u = ucs()
- if u.nil? || 0xffff < u
- return to_er()
- else
- return Character.get(u).to_er()
- end
- end
- def to_euc()
- u = ucs()
- return "" if u.nil? || 0xffff < u
- Uconv.u16toeuc(Uconv.u4tou16(Character.u4itou4(ucs())))
- end
- def map_euc()
- e = to_euc()
- return e if e != ""
- return to_er()
- end
- def to_sjis()
- u = ucs()
- return "" if u.nil? || 0xffff < u
- Uconv.u16tosjis(Uconv.u4tou16(Character.u4itou4(ucs())))
- end
- def map_sjis()
- e = to_sjis()
- return e if e != ""
- return to_er()
- end
-
- #----------------------------------------------------------------------
- def to_er(codesys=nil) #実体参照を返す、希望するcodesysが引数(未実装)
- return "" if @char_id == nil
- return sprintf("&#x%04x;", @char_id) if @char_id <= 0xffff
- return sprintf("&#x%05x;", @char_id) if @char_id <= 0xfffff
- EntityReference.each_codesys {|codesys, er_prefix, keta, numtype|
- code = self[codesys]
- next if code == nil
- return sprintf("&#{er_prefix}%0#{keta}#{numtype};", code)
- }
- return sprintf("&MCS-%08X;", @char_id) #本当はこれは無しにしたい
- end
- def to_er_list()
- ar = []
- EntityReference.each_codesys {|codesys, er_prefix, keta, numtype|
- er = to_er(codesys)
- ar << er if er != nil
- }
- ar
- end
-
- def inspect_x()
- return "<>" if @char_id == nil
- ar = [to_utf8(), to_er().sub(/^&/,'').chop]
- "<"+ar.join(',')+">"
- end
- alias inspect inspect_x
- def inspect_all_codesys() #未完成
- #to_erを全てのcodesysにおいて実行する。その結果をコンパクトにまとめる
- end
- def inspect_all()
- ar = [inspect.chop]
- alist.to_a.sort.each {|a, v| ar << "#{a}:#{v}" }
- return ar.join(',')+">"
- end
- def dump_all()
- ar = [inspect]
- alist.to_a.sort.each {|a, v| ar << "#{a}:#{v}" }
- return ar.join('\n')+'\n'
- end
- def get_attributes()
- str = ""
- alist.to_a.sort.each {|a, v|
- str += "#{a}: #{v}\n"
- }
- str
- end
-
- def inspect_ids(hex_flag=false)
- ids = decompose
- ar = []
- ar << (hex_flag ? "x"+mcs_hex : to_utf8)
- if to_s != ids #idsが部品そのものだったら部品追加はしない
- ids.each_char {|ch|
- char = ch.char
- next if char.is_ids?
- if hex_flag then
- ar << "x"+char.mcs_hex
- else
- u = char.to_utf8
- if u != ""
- ar << u
- else
- ar << char.to_er
- end
- end
- }
- end
- return "("+ar.join("\t")+")"
- end
-
- #----------------------------------------------------------------------IDS関係
- def decompose
- k = self.to_s
- # idss = self['ids']
- # return idss if idss
- # return k if self.is_basic_kanji? #基本漢字はstop kanjiとするぞと。
- return self['ids-represent'] if self['ids-represent'] #ids_representを持っている場合はその値とする。
- return self['ids-element'] if self['ids-element'] #ids_elementを持っている場合はその値とする。
-
- idss = self['ids-meaning']
- return idss if idss != nil && 0 < idss.length && k != idss
- idss = self['ids-aggregated']
- return idss if idss != nil && 0 < idss.length && k != idss
- idss = self['ids']
- return idss if idss != nil && 0 < idss.length && k != idss
- return k
- # return k if idss.nil? || idss.length == 0 || k == idss
- # if idss.char_length == 2
- # p ['What???', k, idss, k.inspect_all]
- # #return idssx[1] #二個目だけ返すとか?
- # return k #IDSに展開する方法が無いと。
- # end
- # return k if k == idss
- # if idss.include?(k) #<C5-4C4D><C6-4A37>この二文字のBUG対策
- # #return idss.sub(k, '')
- # return k #IDSに展開する方法が無いと。
- # end
- # return idss
- end
- def decompose_all
- pde = ""
- de = self.decompose #出発点
- level = 0
- while true
- pde = de
- de = pde.decompose #もう一度分解をしてみる。
- break if pde == de #ループを抜けだす
- exit if 10 < level #p ['too many recursive', self]
- level += 1
- end
- return de
- end
- def decompose_all_nu(level=nil)
- level = 0 if level.nil?
- if 10 < level
- p ['too many recursive', self]
- exit
- end
- de = self.decompose
- return de.decompose_all(level+1) if de != self #なにか変化があったから再帰
- return de #もうこれ以上変化は無さそうだぞと。
- end
- def is_ids?() 0x2ff0 <= @char_id && @char_id <= 0x2fff end
- def ids_operator_argc()
- return 0 unless is_ids?
- return 3 if @char_id == 0x2ff2 || @char_id == 0x2ff3
- return 2
- end
- end
-
-end
-
-#----------------------------------------------------------------------終了
+# $Id: chise.rb,v 1.18 2003-11-10 08:10:12 eto Exp $
+# Copyright (C) 2002-2003 Kouichirou Eto, All rights reserved.
+# This is free software with ABSOLUTELY NO WARRANTY.
+# You can redistribute it and/or modify it under the terms of the GNU GPL2.
+
+require 'chise/char'
+require 'chise/rbchise'
+require 'chise/db'
+require 'chise/ids'
assert_equal("⿰木木", "⿰木木".compose.decompose)
assert_equal("林".ucs, "⿰木木".compose.ucs)
end
+ def test_idc_shortcut
+ assert_equal(IDC_LR, "林".decompose.first_char)
+ assert_equal(IDC_LR+"木木", "林".decompose)
+
+ assert_equal(IDC_AB, "森".decompose.first_char)
+ assert_equal(IDC_AB+"木林", "森".decompose)
+ assert_equal(IDC_AB+"火火", "炎".decompose)
+
+ assert_equal(IDC_LMR, "班".decompose.first_char)
+ assert_equal(IDC_LMR+"彳"+IDC_AB+"山王"+"攵", "徴".decompose) #meaning?
+
+ assert_equal(IDC_AMB, "鼻".decompose.first_char)
+ assert_equal(IDC_AMB+"自田廾", "鼻".decompose)
+ assert_equal(IDC_AMB+"士冖匕", "壱".decompose)
+ assert_equal(IDC_AMB+"穴厶心", "窓".decompose)
+ assert_equal(IDC_AMB+"丗冖巾", "帯".decompose)
+
+ assert_equal(IDC_FS, "囲".decompose.first_char)
+ assert_equal(IDC_FS+"囗井", "囲".decompose)
+ assert_equal(IDC_FS+"行韋", "衛".decompose)
+ assert_equal(IDC_FS+"行圭", "街".decompose)
+ assert_equal(IDC_FS+"行重", "衝".decompose)
+ assert_equal(IDC_FS+IDC_AB+"一凵田", "画".decompose)
+
+ assert_equal(IDC_FA, "問".decompose.first_char)
+ assert_equal(IDC_FA+"門口", "問".decompose)
+ assert_equal(IDC_FA+"門"+IDC_LR+"豆寸", "闘".decompose)
+ assert_equal(IDC_FA+"戌女", "威".decompose)
+ assert_equal(IDC_FA+"茂臣", "蔵".decompose)
+ assert_equal(IDC_FA+"尺旦", "昼".decompose)
+ assert_equal(IDC_FA+"冂入", "内".decompose)
+ assert_equal(IDC_FA+"几丶", "凡".decompose)
+ assert_equal(IDC_FA+"几"+IDC_AB+"丿虫", "風".decompose)
+
+ assert_equal(IDC_FB, "凶".decompose.first_char)
+ assert_equal(IDC_AB+"止"+IDC_FB+"凵米", "歯".decompose)
+
+ assert_equal(IDC_FL, "匠".decompose.first_char)
+ assert_equal(IDC_FL+"匚斤", "匠".decompose)
+ assert_equal(IDC_FL+"匚矢", "医".decompose)
+ assert_equal(IDC_FL+"匚若", "匿".decompose)
+ assert_equal(IDC_FL+"匚儿", "匹".decompose)
+
+ assert_equal(IDC_FUL, "庁".decompose.first_char)
+ assert_equal(IDC_FUL+"广丁", "庁".decompose)
+ assert_equal(IDC_FUL+"歹匕", "死".decompose)
+ assert_equal(IDC_FUL+"尹口", "君".decompose)
+ assert_equal(IDC_FUL+"麻鬼", "魔".decompose)
+ assert_equal(IDC_FUL+"府肉", "腐".decompose)
+ assert_equal(IDC_FUL+"麻手", "摩".decompose)
+ assert_equal(IDC_FUL+"虍思", "慮".decompose)
+ assert_equal(IDC_FUL+"食口", "倉".decompose)
+ assert_equal(IDC_AB+"日"+IDC_FUL+"耳又", "最".decompose)
+ assert_equal(IDC_FUL+"手目", "看".decompose) #meaning
+ assert_equal(IDC_FUL+"辰口", "唇".decompose) #?
+
+ assert_equal(IDC_FUR, "句".decompose.first_char)
+ assert_equal(IDC_FUR+"勹口", "句".decompose)
+ assert_equal(IDC_FUR+"勹丶", "勺".decompose)
+ assert_equal(IDC_FUR+"勹日", "旬".decompose)
+ assert_equal(IDC_FUR+"戈廾", "戒".decompose)
+ assert_equal(IDC_FUR+"弋工", "式".decompose)
+ assert_equal(IDC_FUR+"刀丿", "刃".decompose)
+ assert_equal(IDC_FUR+"鳥山", "島".decompose) #meaning
+
+ assert_equal(IDC_FLL, "通".decompose.first_char)
+ assert_equal(IDC_FLL+"廴聿", "建".decompose)
+ assert_equal(IDC_FLL+"走戉", "越".decompose)
+ assert_equal(IDC_FLL+"走巳", "起".decompose)
+ assert_equal(IDC_FLL+"走取", "趣".decompose)
+ assert_equal(IDC_FLL+"走召", "超".decompose)
+ assert_equal(IDC_FLL+"是頁", "題".decompose)
+ assert_equal(IDC_FLL+"免力", "勉".decompose)
+ assert_equal(IDC_FLL+"鬼未", "魅".decompose)
+ assert_equal(IDC_FLL+"黒犬", "黙".decompose)
+
+ assert_equal(IDC_O, "太".decompose.first_char)
+ assert_equal(IDC_O+"大丶", "太".decompose)
+ assert_equal(IDC_O+"衣中", "衷".decompose)
+ assert_equal(IDC_O+"衣里", "裏".decompose)
+ assert_equal(IDC_O+"勹巳", "包".decompose)
+ assert_equal(IDC_O+"勹乂", "匁".decompose)
+ assert_equal(IDC_O+"木日", "東".decompose)
+ assert_equal(IDC_O+"弍一", "弐".decompose)
+ assert_equal(IDC_O+"衣保", "褒".decompose)
+ end
+ def test_glyph_decompose
+ assert_equal("音", "音".decompose)
+# assert_equal(IDC_AB+"立日", "音".glyph_decompose)
+ assert_equal(IDC_FLL, "世".decompose.first_char)
+ assert_equal("世", "世".glyph_decompose)
+ end
def test_find
# p "日雲".find #"曇"
assert(4 <= "日雲".find .char_length) #"曇"