class Character #=============================================================== 文字オブジェクト
BASIC_KANJI = "人子女母父王口耳手足力目首毛心犬牛鳥貝角羽虫馬魚羊肉皮米竹木麦豆山川雨風水土石金田穴日月火音糸刀舟門戸衣矢弓車皿一二三四五六七八九十百千万寸尺上中下本玉立回食行止交向歩考入示走生出来書言大小白青多少高長"
- COMPOSIT_KANJI = "鳴名加品古知問間聞取兄見切分粉貧林森校東明住位好岩砂里男畑習休短空坂島倉美孝赤看光初努協解新歌語話張強忘悲答晴現正字安守灰秋秒困国医包同合舌居右左受友反道返迷花菜集机主太氷州点店庫仕帳幼防引配早直班筆重番北化比死夏後進酒福私家世内谷半原前寺思電雲気布旅衆泣"
def is_basic_kanji?
BASIC_KANJI.include?(self.to_s)
end
@check_all_database = false
end
attr_reader :char_id
+ def to_i() @char_id end
def mcs_utf8() Character.u4itou8(@char_id) end
def mcs_hex() sprintf("%x", @char_id) end
def ucs() #p 'ucs'
#ar = %w{ucs ucs-big5 ucs-cdp ucs-cns ucs-jis ucs-ks =>ucs =>ucs* =>ucs-jis}
#ar = %w{ucs ucs-jis ucs-big5 ucs-cdp ucs-cns ucs-ks =>ucs =>ucs* =>ucs-jis}
- ar = %w{ucs ucs-jis =>ucs-jis}
+ ar = %w{ucs-jis ucs =>ucs-jis}
#並び順は恣意的で、ucs-jisを先に出している。本来はこれも指定できるようにするべき。
ar.each {|a| #p [a]
u = get_char_attribute(a)
def to_utf8() Uconv.u4tou8(Character.u4itou4(ucs())) end #UTF8文字列を返す
#alias to_s to_utf8
alias to_s mcs_utf8
+ def map_utf8()
+ u = ucs()
+ if u.nil? || 0xffff < u
+ return to_er()
+ else
+ return to_utf8()
+ end
+ end
+ def map_ucs_er()
+ u = ucs()
+ if u.nil? || 0xffff < u
+ return to_er()
+ else
+ return Character.get(u).to_er()
+ end
+ end
+ def to_euc()
+ u = ucs()
+ return "" if u.nil? || 0xffff < u
+ Uconv.u16toeuc(Uconv.u4tou16(Character.u4itou4(ucs())))
+ end
+ def map_euc()
+ e = to_euc()
+ return e if e != ""
+ return to_er()
+ end
+ def to_sjis()
+ u = ucs()
+ return "" if u.nil? || 0xffff < u
+ Uconv.u16tosjis(Uconv.u4tou16(Character.u4itou4(ucs())))
+ end
+ def map_sjis()
+ e = to_sjis()
+ return e if e != ""
+ return to_er()
+ end
#----------------------------------------------------------------------
def to_er(codesys=nil) #実体参照を返す、希望するcodesysが引数(未実装)
return "" if @char_id == nil
- return sprintf("&U+%04X;", @char_id) if @char_id <= 0xffff
- return sprintf("&U-%05X;", @char_id) if @char_id <= 0xfffff
+# return sprintf("&U+%04X;", @char_id) if @char_id <= 0xffff
+ return sprintf("&#x%04x;", @char_id) if @char_id <= 0xffff
+# return sprintf("&#%05d;", @char_id) if @char_id <= 0xffff
+# return sprintf("&U-%05X;", @char_id) if @char_id <= 0xfffff
+ return sprintf("&#x%05x;", @char_id) if @char_id <= 0xfffff
+# return sprintf("&#%06d;", @char_id) if @char_id <= 0xfffff
EntityReference.each_codesys {|codesys, er_prefix, keta, numtype|
code = self[codesys]
next if code == nil
def inspect_ids(hex_flag=false)
ids = decompose
ar = []
- ar << (hex_flag ? mcs_hex : to_utf8)
+ ar << (hex_flag ? "x"+mcs_hex : to_utf8)
if to_s != ids #idsが部品そのものだったら部品追加はしない
ids.each_char {|ch|
char = ch.char
next if char.is_ids?
if hex_flag then
- ar << char.mcs_hex
+ ar << "x"+char.mcs_hex
else
u = char.to_utf8
if u != ""
include Singleton
#JP_JOYO_FILE = DB_DIR+"/../jp-joyo.txt" #EUC-jisx0213
JP_JOYO_FILE = DB_DIR+"/../joyo-ucs.txt" #UCS
+ COMPOSIT_KANJI = "鳴名加品古知問間聞取兄見切分粉貧林森校東明住位好岩砂里男畑習休短空坂島倉美孝赤看光初努協解新歌語話張強忘悲答晴現正字安守灰秋秒困国医包同合舌居右左受友反道返迷花菜集机主太氷州点店庫仕帳幼防引配早直班筆重番北化比死夏後進酒福私家世内谷半原前寺思電雲気布旅衆泣"
+# COMPOSIT_KANJI = "鳴名加品古"
def initialize
@nchars = []
read_file
end
}
end
- def each_char
- @nchars.each {|ch|
- yield(ch)
- }
- end
- def dump_all_ids(hex_flag=false)
- each_char {|ch|
+ def dump_ids(ar)
+ ar.each {|ch|
char = ch.char
-# print char.inspect_ids(hex_flag), "\n"
print char.inspect_ids(true), "\t;", char.inspect_ids(false), "\n"
}
end