#$stdout.binmode if $debug
$stdout.sync = true if $debug
-class String
+class String #======================================================================
def to_a() self.split(//) end #$KCODEが設定されているので、UTF-8的な一文字づつがchにはいる
def each_char() to_a.each {|ch| yield ch } end
+ def each_character() to_a.each {|ch| yield ch.char } end
def char_length() to_a.length end
def char_at(n) to_a()[n] end
def char() Character.get(to_a[0]) end
-# def method_missing(mid, *args) char.method_missing(mid, *args) end
+ #alias to_c char #悩み中
def char_id() char.char_id() end
def get_char_attribute(a) char.get_char_attribute(a) end
- def ucs() char.ucs() end
+ #def ucs() char.ucs() end
def to_utf8()
return to_a.map {|ch|
ch.char.to_utf8
}.join('')
end
+ def method_missing(mid, *args)
+ if char_length == 1 #省略形が有効なのは、一文字の時だけ
+ char.method_missing(mid, *args)
+ else
+ raise NameError, "undefined method `#{mid.id2name}'", caller(1)
+ end
+ end
+
+ def map_utf8() map_char {|ch| ch.char.map_utf8 } end
+ alias map_ucs map_utf8
+ def map_ucs_er() map_char {|ch| ch.char.map_ucs_er } end
+ def to_er() map_char {|ch| ch.char.to_er } end
+
#put関係、[]関係は用意しないことにした。
def de_er!() #EntityReferenceを取り除く
return self unless self =~ Regexp.new(EntityReference::REGEXP_PART) #それらしいのが無ければ何もしない
def decompose() map_char {|ch| ch.char.decompose } end
def decompose!() self.replace(self.decompose); self; end
- def decompose_all(level=nil)
+ def decompose_all_nu(level=nil)
level = 0 if level.nil?
if 10 < level
p ['too many recursive', self]
return de.decompose_all(level+1) if de != self #なにか変化があったから再帰
de #もうこれ以上変化は無さそうだぞと。
end
+ def decompose_all() map_char {|ch| ch.char.decompose_all } end
def decompose_all!() self.replace(self.decompose_all); self; end
def find() #"日雲"→"曇"とかいう感じの操作
end
end
-module CHISE
+module CHISE #======================================================================
def windows?()
(RUBY_PLATFORM =~ /cygwin/ || RUBY_PLATFORM =~ /mswin32/ || RUBY_PLATFORM =~ /mingw32/)
end
er = $1 #ついでに中身の部分を取り出す
return $1.hex if er =~ /^MCS-([0-9A-Fa-f]+)/ #MCS
# if er =~ /^U[-+]?([0-9A-Fa-f]+)/ #Unicode直打ち
- if er =~ /^U[-+]?([0-9A-Fa-f]+)/ || er =~ /^#([0-9A-Fa-f]+)/ #Unicode直打ち
+ if er =~ /^U[-+]?([0-9A-Fa-f]+)/ || er =~ /^#x([0-9A-Fa-f]+)/ #Unicode直打ち
return $1.hex
end
def self.get(char_id) CharacterFactory.instance.get(char_id) end #flyweightパターン
#----------------------------------------------------------------------
- def get_char_attribute(a) # XEmacs UTF-2000互換API群
+ def normalize_attribute_name(b)
+ a = b.dup
a.gsub!(/_/, '-') #underlineは-に置換
+ a.sub!(/^map-/, '=>')
+ a.sub!(/^to-/, '->')
+ a.sub!(/^from-/, '<-')
+ a
+ end
+ def get_char_attribute(b) # XEmacs UTF-2000互換API群
+ a = normalize_attribute_name(b)
+ #p [a, b]
atr = @attributes[a]
return atr if atr != nil
atr = check_database(a)
@attributes[a] = atr if atr != nil
- return get_char_attribute("=jis-x0208") if a =~ /jisx0208/
+ return get_char_attribute("=jis-x0208") if a =~ /jisx0208/ #ここだけ特殊形
return @attributes[a]
end
- def put_char_attribute(a,v)
- a.gsub!(/_/, '-') #underlineは-に置換
+ def put_char_attribute(b,v)
+ a = normalize_attribute_name(b)
@attributes[a] = v;
CharDB.instance.put(a, mcs_utf8(), v)
end
char_id = 0x80000000 + char_id if char_id < 0 #補数表現
return char_id.to_i
elsif char_id.is_a?(String)
- return char_id.to_i if char_id =~ /^\d+$/ #文字列による数字だったら数値化してreturn
+ return char_id.to_i if char_id =~ /^\d+$/ && 1 < char_id.length #文字列による数字だったら数値化してreturn
return EntityReference.parse(char_id) if char_id =~ Regexp.new(EntityReference::REGEXP_ALL) #実体参照?
char_id.sub!(/^\?/, '') if char_id =~ /^\?/ #もし先頭に?がついていたら削除
#このへん本当はもっとちゃんとチェックするべし
- u4 = Uconv.u8tou4(char_id) #UCS-4文字列に変換
+ begin
+ u4 = Uconv.u8tou4(char_id) #UCS-4文字列に変換
+ rescue
+ p $!
+ p char_id
+ return 0
+ end
return Character.u4tou4i(u4) #UCS-4数値にしてreturn
else
raise ArgumentError, "unknown object for char_id", caller(1)
return (u4[3] << 24 | u4[2] << 16 | u4[1] << 8 | u4[0]) #UCS-4数値にしてreturn
end
def self.u4itou4(num)
- return "" if num == nil
+ return "" unless num.is_a?(Integer)
return sprintf("%c%c%c%c", num&0xff, (num >> 8)&0xff, (num >> 16)&0xff, (num >> 24)&0xff) #UCS-4数値を文字列にしてreturn
end
def self.u4itou8(char_id) #ucsの数値を受けとり、UTF-8の文字一文字を返す
return to_utf8()
end
end
+ alias map_ucs map_utf8
def map_ucs_er()
u = ucs()
if u.nil? || 0xffff < u
#----------------------------------------------------------------------
def to_er(codesys=nil) #実体参照を返す、希望するcodesysが引数(未実装)
return "" if @char_id == nil
-# return sprintf("&U+%04X;", @char_id) if @char_id <= 0xffff
return sprintf("&#x%04x;", @char_id) if @char_id <= 0xffff
-# return sprintf("&#%05d;", @char_id) if @char_id <= 0xffff
-# return sprintf("&U-%05X;", @char_id) if @char_id <= 0xfffff
return sprintf("&#x%05x;", @char_id) if @char_id <= 0xfffff
-# return sprintf("&#%06d;", @char_id) if @char_id <= 0xfffff
EntityReference.each_codesys {|codesys, er_prefix, keta, numtype|
code = self[codesys]
next if code == nil
"<"+ar.join(',')+">"
end
alias inspect inspect_x
- def inspect_all_codesys()
+ def inspect_all_codesys() #未完成
#to_erを全てのcodesysにおいて実行する。その結果をコンパクトにまとめる
end
def inspect_all()
if u != ""
ar << u
else
-# ar << char.to_s
ar << char.to_er
end
end
k = self.to_s
# idss = self['ids']
# return idss if idss
- return k if self.is_basic_kanji? #基本漢字はstop kanjiとするぞと。
+# return k if self.is_basic_kanji? #基本漢字はstop kanjiとするぞと。
+ return self['ids-represent'] if self['ids-represent'] #ids_representを持っている場合はその値とする。
+ return self['ids-element'] if self['ids-element'] #ids_elementを持っている場合はその値とする。
+ idss = self['ids-meaning']
+ return idss if idss != nil && 0 < idss.length && k != idss
idss = self['ids-aggregated']
return idss if idss != nil && 0 < idss.length && k != idss
idss = self['ids']
# end
# return idss
end
+ def decompose_all
+ pde = ""
+ de = self.decompose #出発点
+ level = 0
+ while true
+ pde = de
+ de = pde.decompose #もう一度分解をしてみる。
+ break if pde == de #ループを抜けだす
+ exit if 10 < level #p ['too many recursive', self]
+ level += 1
+ end
+ return de
+ end
+ def decompose_all_nu(level=nil)
+ level = 0 if level.nil?
+ if 10 < level
+ p ['too many recursive', self]
+ exit
+ end
+ de = self.decompose
+ return de.decompose_all(level+1) if de != self #なにか変化があったから再帰
+ return de #もうこれ以上変化は無さそうだぞと。
+ end
def is_ids?() 0x2ff0 <= @char_id && @char_id <= 0x2fff end
def ids_operator_argc()
return 0 unless is_ids?
end
end
- class DBS #======================================================================複数のDBを集めたclass
+ class DBS #======================================================================複数のDBを集めたclass、未完成
end
class ADB < BDB::Hash #======================================================================一つのDB
unix.gsub!(%r|\+|, '*')
return unix
end
-# def windows?() DB.windows?() end
def get_filename(t)
return @pre + DB.unix_to_win(t) + @post if windows?
return @pre + t + @post
keys << t
}
return keys
- #return keys.sort
end
def close_db(t)
db = get(t)
end
end
- class JoyoList
+ class JoyoList #======================================================================
include Singleton
#JP_JOYO_FILE = DB_DIR+"/../jp-joyo.txt" #EUC-jisx0213
JP_JOYO_FILE = DB_DIR+"/../joyo-ucs.txt" #UCS
test_char(Character.get("字"))
test_char(Character.new("字"))
test_char("字".char)
+ test_char("字")
end
def test_create
assert_equal(23383, Character.parse_char_id("字"))
assert_equal("字", "&U5B57;".de_er)
assert_equal("字", "&U-5B57;".de_er)
assert_equal("字", "&U+5B57;".de_er)
- assert_equal("字", "B57;".de_er)
+ assert_equal("字", "字".de_er)
+ end
+ def test_alias
+ assert_raises(NameError, message=""){
+ t = "字字".ucs
+ }
+ assert_equal(23383, "字".ucs)
+ assert_equal(0x5B57, "字".ucs)
+ assert_equal(0x5B57, @char.ucs)
+
+ assert_equal(0xfa55, "突".char['=>ucs-jis'])
+ assert_equal(0xfa55, "突".map_ucs_jis)
+
+ char1 = Character.get("23383")
+ char2 = Character.get(23383)
+ assert_equal(char1, char2)
+
+ char1 = Character.get("2")
+ char2 = Character.get(2)
+ assert_not_equal(char1, char2)
+
+ assert_equal("(((name . \"FULLWIDTH DIGIT ONE\") (ucs . 65297)))", "1".char['->fullwidth'])
+ assert_equal("(((name . \"FULLWIDTH DIGIT ONE\") (ucs . 65297)))", "1".char.to_fullwidth)
+ assert_equal("(((name . \"DIGIT ONE\") (ucs . 49)))", "1".char['<-fullwidth'])
+ assert_equal("(((name . \"DIGIT ONE\") (ucs . 49)))", "1".char.from_fullwidth)
end
def teardown() @char = nil end
end