1 # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
7 class CharacterFactory # generate Character object and cache them
10 MAX_CACHE_CHARACTER = 10000
14 @parser = CharacterParser.new
23 n = @parser.parse(char_id)
24 @chars[n] = Character.new(n) if @chars[n].nil?
29 clear if MAX_CACHE_CHARACTER < @chars.length # clear all caches.
38 def initialize(char_id=nil)
39 @parser = CharacterParser.new
40 @char_id = @parser.parse(char_id)
41 @attributes = Hash.new
42 @check_all_database = false
46 def to_i() @char_id end
47 def mcs_utf8() Character.u4itou8(@char_id) end
48 def mcs_hex() sprintf("%x", @char_id) end
50 def self.get(char_id) # flyweight pattern
51 CharacterFactory.instance.get(char_id)
54 def normalize_attribute_name(b)
56 a.gsub!(/_/, "-") #underline
\82Í-
\82É
\92u
\8a·
60 a.sub!(/^from-/, "<-")
64 def get_char_attribute(b) # XEmacs UTF-2000
\8cÝ
\8a·API
\8cQ
65 a = normalize_attribute_name(b)
69 atr = check_database(a)
74 return get_char_attribute("="+a) unless a =~ /^=/ #
\93ª
\82É=
\82ª
\82Â
\82¢
\82Ä
\82È
\82¢
\8fê
\8d\87\82Í
\82»
\82ê
\82ª
\8fÈ
\97ª
\82³
\82ê
\82Ä
\82¢
\82é
\82±
\82Æ
\82ð
\89¼
\92è
\82µ
\82Ä
\81A
\8dÄ
\8bA
\82·
\82é
78 def put_char_attribute(b,v)
79 a = normalize_attribute_name(b)
81 CharDB.instance.put(a, mcs_utf8(), v)
84 def char_attribute_alist() check_all_database(); @attributes; end
85 def char_attribute_list() check_all_database(); @attributes.keys; end
86 alias [] get_char_attribute #
\82»
\82Ì
\97ª
\8fÌ
87 alias []= put_char_attribute
88 alias alist char_attribute_alist
89 alias list char_attribute_list
91 def method_missing(mid, *args) # ref. ostruct.rb
93 return get_char_attribute(mname) if args.length == 0
94 put_char_attribute(mname.chop, args[0]) if mname =~ /=$/ #
\91ã
\93ü
97 def has_attribute?() #
\88Ó
\96¡
\82Ì
\82 \82éattribute
\82ð
\8e\9d\82Á
\82Ä
\82Ü
\82·
\82©?
102 return (keys.length != 0)
106 return false if ch.nil?
107 return false unless ch.is_a? Character
108 self.char_id == ch.char_id
111 def self.u4itou4(num)
112 return "" unless num.is_a?(Integer)
113 return sprintf("%c%c%c%c", num&0xff, (num >> 8)&0xff, (num >> 16)&0xff, (num >> 24)&0xff) #UCS-4
\90\94\92l
\82ð
\95¶
\8e\9a\97ñ
\82É
\82µ
\82Äreturn
116 def self.u4itou8(char_id) #ucs
\82Ì
\90\94\92l
\82ð
\8eó
\82¯
\82Æ
\82è
\81AUTF-8
\82Ì
\95¶
\8e\9a\88ê
\95¶
\8e\9a\82ð
\95Ô
\82·
118 u4 = Character.u4itou4(char_id)
119 u8 = Uconv.u4tou8(u4)
122 #raise ArgumentError, "invalid char_id (#{char_id})", caller(1)
128 def check_database(a)
131 v = db.get(a, u8) # u8
\82Å
\95\
\82³
\82ê
\82é
\95¶
\8e\9a\82Ìa
\83A
\83g
\83\8a\83r
\83\85\81[
\83g
\82ð
\92²
\82×
\82é
\81B
135 def check_all_database() #
\8c»
\8dÝ
\82Ì@char_id
\82©
\82ç
\81A
\95¶
\8e\9a\83f
\81[
\83^
\83x
\81[
\83X
\82ð
\8eQ
\8fÆ
\82·
\82é
136 return if @check_all_database
137 return if @char_id.nil?
140 atrs = db.get_all(u8) #u8
\82Å
\95\
\82³
\82ê
\82é
\95¶
\8e\9a\82Ì
\83A
\83g
\83\8a\83r
\83\85\81[
\83g
\82ð
\91S
\95\94\8e\9d\82Á
\82Ä
\82±
\82¢
142 @attributes[a] = v #
\82Æ
\82©
\82¢
\82¤
\8a´
\82¶
\82Å
\91ã
\93ü
\82·
\82é
\82Ì
\82Å
\82¦
\82¦
\82©
\82È?
144 @check_all_database = true #
\8fd
\82¢
\8f\88\97\9d\82È
\82Ì
\82Å
\88ê
\89\9echeck
\82·
\82é
149 #ar = %w{ucs ucs-big5 ucs-cdp ucs-cns ucs-jis ucs-ks =>ucs =>ucs* =>ucs-jis}
150 #ar = %w{ucs ucs-jis ucs-big5 ucs-cdp ucs-cns ucs-ks =>ucs =>ucs* =>ucs-jis}
151 ar = %w{ucs-jis ucs =>ucs-jis}
152 #
\95À
\82Ñ
\8f\87\82Í
\9c\93\88Ó
\93I
\82Å
\81Aucs-jis
\82ð
\90æ
\82É
\8fo
\82µ
\82Ä
\82¢
\82é
\81B
\96{
\97\88\82Í
\82±
\82ê
\82à
\8ew
\92è
\82Å
\82«
\82é
\82æ
\82¤
\82É
\82·
\82é
\82×
\82«
\81B
154 u = get_char_attribute(a)
160 #-------------------------------------------------------------------CCS
\8aÖ
\8cW
161 def to_utf8() Uconv.u4tou8(Character.u4itou4(ucs())) end #UTF8
\95¶
\8e\9a\97ñ
\82ð
\95Ô
\82·
167 if u.nil? || 0xffff < u
173 alias map_ucs map_utf8
177 if u.nil? || 0xffff < u
180 return Character.get(u).to_er()
186 return "" if u.nil? || 0xffff < u
187 Uconv.u16toeuc(Uconv.u4tou16(Character.u4itou4(ucs())))
198 return "" if u.nil? || 0xffff < u
199 Uconv.u16tosjis(Uconv.u4tou16(Character.u4itou4(ucs())))
208 def to_er(codesys=nil) #
\8eÀ
\91Ì
\8eQ
\8fÆ
\82ð
\95Ô
\82·
\81A
\8aó
\96]
\82·
\82écodesys
\82ª
\88ø
\90\94(
\96¢
\8eÀ
\91\95)
209 return "" if @char_id.nil?
210 return sprintf("&#x%04x;", @char_id) if @char_id <= 0xffff
211 return sprintf("&#x%05x;", @char_id) if @char_id <= 0xfffff
212 EntityReference.each_codesys {|codesys, er_prefix, keta, numtype|
215 return sprintf("&#{er_prefix}%0#{keta}#{numtype};", code)
217 return sprintf("&MCS-%08X;", @char_id) #
\96{
\93\96\82Í
\82±
\82ê
\82Í
\96³
\82µ
\82É
\82µ
\82½
\82¢
222 EntityReference.each_codesys {|codesys, er_prefix, keta, numtype|
230 return "<>" if @char_id.nil?
231 ar = [to_utf8(), to_er().sub(/^&/,"").chop]
234 alias inspect inspect_x
236 def inspect_all_codesys() #
\96¢
\8a®
\90¬
237 #to_er
\82ð
\91S
\82Ä
\82Ìcodesys
\82É
\82¨
\82¢
\82Ä
\8eÀ
\8ds
\82·
\82é
\81B
\82»
\82Ì
\8c\8b\89Ê
\82ð
\83R
\83\93\83p
\83N
\83g
\82É
\82Ü
\82Æ
\82ß
\82é
242 alist.to_a.sort.each {|a, v| ar << "#{a}:#{v}" }
243 return ar.join(",")+">"
248 alist.to_a.sort.each {|a, v| ar << "#{a}:#{v}" }
249 return ar.join('\n')+'\n'
254 alist.to_a.sort.each {|a, v|
255 str += "#{a}: #{v}\n"
260 def inspect_ids(hex_flag=false)
263 ar << (hex_flag ? "x"+mcs_hex : to_utf8)
264 if to_s != ids #ids
\82ª
\95\94\95i
\82»
\82Ì
\82à
\82Ì
\82¾
\82Á
\82½
\82ç
\95\94\95i
\92Ç
\89Á
\82Í
\82µ
\82È
\82¢
269 ar << "x"+char.mcs_hex
280 return "("+ar.join("\t")+")"
283 #--------------------------------------------------------------------IDS
\8aÖ
\8cW
284 def glyph_decompose() do_decompose(false) end
285 def decompose() do_decompose(true) end
286 def do_decompose(check_meaning = true)
289 # return idss if idss
290 # return k if self.is_basic_kanji? #
\8aî
\96{
\8a¿
\8e\9a\82Ístop kanji
\82Æ
\82·
\82é
\82¼
\82Æ
\81B
292 return self["ids-represent"] if self["ids-represent"] #ids_represent
\82ð
\8e\9d\82Á
\82Ä
\82¢
\82é
\8fê
\8d\87\82Í
\82»
\82Ì
\92l
\82Æ
\82·
\82é
\81B
293 return self["ids-element"] if self["ids-element"] #ids_element
\82ð
\8e\9d\82Á
\82Ä
\82¢
\82é
\8fê
\8d\87\82Í
\82»
\82Ì
\92l
\82Æ
\82·
\82é
\81B
294 idss = self["ids-meaning"]
295 return idss if idss && 0 < idss.length && k != idss
297 idss = self["ids-aggregated"]
298 return idss if idss && 0 < idss.length && k != idss
300 return idss if idss && 0 < idss.length && k != idss
302 # return k if idss.nil? || idss.length == 0 || k == idss
303 # if idss.char_length == 2
304 # p ["What???", k, idss, k.inspect_all]
305 # #return idssx[1] #
\93ñ
\8cÂ
\96Ú
\82¾
\82¯
\95Ô
\82·
\82Æ
\82©?
306 # return k #IDS
\82É
\93W
\8aJ
\82·
\82é
\95û
\96@
\82ª
\96³
\82¢
\82Æ
\81B
308 # return k if k == idss
309 # if idss.include?(k) #<C5-4C4D><C6-4A37>
\82±
\82Ì
\93ñ
\95¶
\8e\9a\82ÌBUG
\91Î
\8dô
310 # #return idss.sub(k, "")
311 # return k #IDS
\82É
\93W
\8aJ
\82·
\82é
\95û
\96@
\82ª
\96³
\82¢
\82Æ
\81B
318 de = self.decompose #
\8fo
\94
\93_
322 de = pde.decompose #
\82à
\82¤
\88ê
\93x
\95ª
\89ð
\82ð
\82µ
\82Ä
\82Ý
\82é
\81B
323 break if pde == de #
\83\8b\81[
\83v
\82ð
\94²
\82¯
\82¾
\82·
324 exit if 10 < level #p ["too many recursive", self]
330 def decompose_all_nu(level=nil)
331 level = 0 if level.nil?
333 p ["too many recursive", self]
337 return de.decompose_all(level+1) if de != self #
\82È
\82É
\82©
\95Ï
\89»
\82ª
\82 \82Á
\82½
\82©
\82ç
\8dÄ
\8bA
338 return de #
\82à
\82¤
\82±
\82ê
\88È
\8fã
\95Ï
\89»
\82Í
\96³
\82³
\82»
\82¤
\82¾
\82¼
\82Æ
\81B
341 def is_ids?() 0x2ff0 <= @char_id && @char_id <= 0x2fff end
343 def ids_operator_argc()
344 return 0 unless is_ids?
345 return 3 if @char_id == 0x2ff2 || @char_id == 0x2ff3