1 # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
7 class CharacterFactory # generate Character object and cache them
10 MAX_CACHE_CHARACTER = 10000
14 @parser = CharacterParser.new
23 mcs = @parser.parse(char_id)
24 @chars[mcs] = Character.new(mcs) if @chars[mcs].nil?
29 clear if MAX_CACHE_CHARACTER < @chars.length # clear all caches.
34 def initialize(char_id=nil)
37 @check_all_database = false
46 def to_i() @char_id end
47 def mcs_utf8() Character.u4itou8(@char_id) end
48 def mcs_hex() sprintf("%x", @char_id) end
50 def self.get(char_id) # flyweight pattern
51 CharacterFactory.instance.get(char_id)
54 def normalize_attribute_name(b)
56 a.gsub!(/_/, "-") #underline
\82Í-
\82É
\92u
\8a·
60 a.sub!(/^from-/, "<-")
64 def get_char_attribute(b) # XEmacs CHISE compatible API
65 a = normalize_attribute_name(b)
69 atr = check_database(a)
74 return get_char_attribute("="+a) unless a =~ /^=/
75 #
\93ª
\82É=
\82ª
\82Â
\82¢
\82Ä
\82È
\82¢
\8fê
\8d\87\82Í
\82»
\82ê
\82ª
\8fÈ
\97ª
\82³
\82ê
\82Ä
\82¢
\82é
\82±
\82Æ
\82ð
\89¼
\92è
\82µ
\82Ä
\81A
\8dÄ
\8bA
\82·
\82é
79 def put_char_attribute(b,v)
80 a = normalize_attribute_name(b)
82 CharDB.instance.put(a, mcs_utf8(), v)
85 def char_attribute_alist() check_all_database(); @attributes; end
86 def char_attribute_list() check_all_database(); @attributes.keys; end
87 alias [] get_char_attribute #
\82»
\82Ì
\97ª
\8fÌ
88 alias []= put_char_attribute
89 alias alist char_attribute_alist
90 alias list char_attribute_list
92 def method_missing(mid, *args) # ref. ostruct.rb
94 return get_char_attribute(mname) if args.length == 0
95 put_char_attribute(mname.chop, args[0]) if mname =~ /=$/ #
\91ã
\93ü
98 def has_attribute?() #
\88Ó
\96¡
\82Ì
\82 \82éattribute
\82ð
\8e\9d\82Á
\82Ä
\82Ü
\82·
\82©?
103 return (keys.length != 0)
107 return false if ch.nil?
108 return false unless ch.is_a? Character
109 self.char_id == ch.char_id
112 def self.u4itou4(num)
113 return "" unless num.is_a?(Integer)
114 return sprintf("%c%c%c%c", num&0xff, (num >> 8)&0xff, (num >> 16)&0xff, (num >> 24)&0xff) #UCS-4
\90\94\92l
\82ð
\95¶
\8e\9a\97ñ
\82É
\82µ
\82Äreturn
117 def self.u4itou8(char_id) #ucs
\82Ì
\90\94\92l
\82ð
\8eó
\82¯
\82Æ
\82è
\81AUTF-8
\82Ì
\95¶
\8e\9a\88ê
\95¶
\8e\9a\82ð
\95Ô
\82·
119 u4 = Character.u4itou4(char_id)
120 u8 = Uconv.u4tou8(u4)
123 #raise ArgumentError, "invalid char_id (#{char_id})", caller(1)
129 def check_database(a)
132 v = db.get(a, u8) # u8
\82Å
\95\
\82³
\82ê
\82é
\95¶
\8e\9a\82Ìa
\83A
\83g
\83\8a\83r
\83\85\81[
\83g
\82ð
\92²
\82×
\82é
\81B
136 def check_all_database() #
\8c»
\8dÝ
\82Ì@char_id
\82©
\82ç
\81A
\95¶
\8e\9a\83f
\81[
\83^
\83x
\81[
\83X
\82ð
\8eQ
\8fÆ
\82·
\82é
137 return if @check_all_database
138 return if @char_id.nil?
141 atrs = db.get_all(u8) #u8
\82Å
\95\
\82³
\82ê
\82é
\95¶
\8e\9a\82Ì
\83A
\83g
\83\8a\83r
\83\85\81[
\83g
\82ð
\91S
\95\94\8e\9d\82Á
\82Ä
\82±
\82¢
143 @attributes[a] = v #
\82Æ
\82©
\82¢
\82¤
\8a´
\82¶
\82Å
\91ã
\93ü
\82·
\82é
\82Ì
\82Å
\82¦
\82¦
\82©
\82È?
145 @check_all_database = true #
\8fd
\82¢
\8f\88\97\9d\82È
\82Ì
\82Å
\88ê
\89\9echeck
\82·
\82é
150 #ar = %w{ucs ucs-big5 ucs-cdp ucs-cns ucs-jis ucs-ks =>ucs =>ucs* =>ucs-jis}
151 #ar = %w{ucs ucs-jis ucs-big5 ucs-cdp ucs-cns ucs-ks =>ucs =>ucs* =>ucs-jis}
152 ar = %w{ucs-jis ucs =>ucs-jis}
153 #
\95À
\82Ñ
\8f\87\82Í
\9c\93\88Ó
\93I
\82Å
\81Aucs-jis
\82ð
\90æ
\82É
\8fo
\82µ
\82Ä
\82¢
\82é
\81B
\96{
\97\88\82Í
\82±
\82ê
\82à
\8ew
\92è
\82Å
\82«
\82é
\82æ
\82¤
\82É
\82·
\82é
\82×
\82«
\81B
155 u = get_char_attribute(a)
161 #-------------------------------------------------------------------CCS
\8aÖ
\8cW
162 def to_utf8() Uconv.u4tou8(Character.u4itou4(ucs())) end #UTF8
\95¶
\8e\9a\97ñ
\82ð
\95Ô
\82·
168 if u.nil? || 0xffff < u
174 alias map_ucs map_utf8
178 if u.nil? || 0xffff < u
181 return Character.get(u).to_er()
187 return "" if u.nil? || 0xffff < u
188 Uconv.u16toeuc(Uconv.u4tou16(Character.u4itou4(ucs())))
199 return "" if u.nil? || 0xffff < u
200 Uconv.u16tosjis(Uconv.u4tou16(Character.u4itou4(ucs())))
209 def to_er(codesys=nil) #
\8eÀ
\91Ì
\8eQ
\8fÆ
\82ð
\95Ô
\82·
\81A
\8aó
\96]
\82·
\82écodesys
\82ª
\88ø
\90\94(
\96¢
\8eÀ
\91\95)
210 return "" if @char_id.nil?
211 return sprintf("&#x%04x;", @char_id) if @char_id <= 0xffff
212 return sprintf("&#x%05x;", @char_id) if @char_id <= 0xfffff
213 EntityReference.each_codesys {|codesys, er_prefix, keta, numtype|
216 return sprintf("&#{er_prefix}%0#{keta}#{numtype};", code)
218 return sprintf("&MCS-%08X;", @char_id) #
\96{
\93\96\82Í
\82±
\82ê
\82Í
\96³
\82µ
\82É
\82µ
\82½
\82¢
223 EntityReference.each_codesys {|codesys, er_prefix, keta, numtype|
231 return "<>" if @char_id.nil?
232 ar = [to_utf8(), to_er().sub(/^&/,"").chop]
235 alias inspect inspect_x
237 def inspect_all_codesys() #
\96¢
\8a®
\90¬
238 #to_er
\82ð
\91S
\82Ä
\82Ìcodesys
\82É
\82¨
\82¢
\82Ä
\8eÀ
\8ds
\82·
\82é
\81B
\82»
\82Ì
\8c\8b\89Ê
\82ð
\83R
\83\93\83p
\83N
\83g
\82É
\82Ü
\82Æ
\82ß
\82é
243 alist.to_a.sort.each {|a, v| ar << "#{a}:#{v}" }
244 return ar.join(",")+">"
249 alist.to_a.sort.each {|a, v| ar << "#{a}:#{v}" }
250 return ar.join('\n')+'\n'
255 alist.to_a.sort.each {|a, v|
256 str += "#{a}: #{v}\n"
261 def inspect_ids(hex_flag=false)
264 ar << (hex_flag ? "x"+mcs_hex : to_utf8)
265 if to_s != ids #ids
\82ª
\95\94\95i
\82»
\82Ì
\82à
\82Ì
\82¾
\82Á
\82½
\82ç
\95\94\95i
\92Ç
\89Á
\82Í
\82µ
\82È
\82¢
270 ar << "x"+char.mcs_hex
281 return "("+ar.join("\t")+")"
284 #--------------------------------------------------------------------IDS
\8aÖ
\8cW
285 def glyph_decompose() do_decompose(false) end
286 def decompose() do_decompose(true) end
287 def do_decompose(check_meaning = true)
290 # return idss if idss
291 # return k if self.is_basic_kanji? #
\8aî
\96{
\8a¿
\8e\9a\82Ístop kanji
\82Æ
\82·
\82é
\82¼
\82Æ
\81B
293 return self["ids-represent"] if self["ids-represent"] #ids_represent
\82ð
\8e\9d\82Á
\82Ä
\82¢
\82é
\8fê
\8d\87\82Í
\82»
\82Ì
\92l
\82Æ
\82·
\82é
\81B
294 return self["ids-element"] if self["ids-element"] #ids_element
\82ð
\8e\9d\82Á
\82Ä
\82¢
\82é
\8fê
\8d\87\82Í
\82»
\82Ì
\92l
\82Æ
\82·
\82é
\81B
295 idss = self["ids-meaning"]
296 return idss if idss && 0 < idss.length && k != idss
298 idss = self["ids-aggregated"]
299 return idss if idss && 0 < idss.length && k != idss
301 return idss if idss && 0 < idss.length && k != idss
303 # return k if idss.nil? || idss.length == 0 || k == idss
304 # if idss.char_length == 2
305 # p ["What???", k, idss, k.inspect_all]
306 # #return idssx[1] #
\93ñ
\8cÂ
\96Ú
\82¾
\82¯
\95Ô
\82·
\82Æ
\82©?
307 # return k #IDS
\82É
\93W
\8aJ
\82·
\82é
\95û
\96@
\82ª
\96³
\82¢
\82Æ
\81B
309 # return k if k == idss
310 # if idss.include?(k) #<C5-4C4D><C6-4A37>
\82±
\82Ì
\93ñ
\95¶
\8e\9a\82ÌBUG
\91Î
\8dô
311 # #return idss.sub(k, "")
312 # return k #IDS
\82É
\93W
\8aJ
\82·
\82é
\95û
\96@
\82ª
\96³
\82¢
\82Æ
\81B
319 de = self.decompose #
\8fo
\94
\93_
323 de = pde.decompose #
\82à
\82¤
\88ê
\93x
\95ª
\89ð
\82ð
\82µ
\82Ä
\82Ý
\82é
\81B
324 break if pde == de #
\83\8b\81[
\83v
\82ð
\94²
\82¯
\82¾
\82·
325 exit if 10 < level #p ["too many recursive", self]
331 def decompose_all_nu(level=nil)
332 level = 0 if level.nil?
334 p ["too many recursive", self]
338 return de.decompose_all(level+1) if de != self #
\82È
\82É
\82©
\95Ï
\89»
\82ª
\82 \82Á
\82½
\82©
\82ç
\8dÄ
\8bA
339 return de #
\82à
\82¤
\82±
\82ê
\88È
\8fã
\95Ï
\89»
\82Í
\96³
\82³
\82»
\82¤
\82¾
\82¼
\82Æ
\81B
342 def is_ids?() 0x2ff0 <= @char_id && @char_id <= 0x2fff end
344 def ids_operator_argc()
345 return 0 unless is_ids?
346 return 3 if @char_id == 0x2ff2 || @char_id == 0x2ff3