update.
[chise/ruby.git] / chise / character.rb
1 # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
2
3 require "singleton"
4 require "chise/parser"
5 require "chise/chisedb"
6 require "chise/iconv"
7 require "chise/utf8"
8 require "chise/ids"
9
10 module CHISE
11   class CharacterFactory # generate Character object and cache them
12     include Singleton
13
14     MAX_CACHE_CHARACTER = 10000
15
16     def initialize
17       clear
18       @parser = CharacterParser.new
19     end
20
21     def clear
22       @chars = {}
23     end
24
25     def get(s)
26       check_max
27       mcs = @parser.parse(s)
28       @chars[mcs] = Character.new(mcs) if @chars[mcs].nil?
29       @chars[mcs]
30     end
31
32     def check_max
33       clear if MAX_CACHE_CHARACTER < @chars.length # clear all cache
34     end
35   end
36
37   class Character
38     include UTF8Value
39     include CharacterIDC
40     include CharacterIDS
41
42     def initialize(char_id)
43       raise if char_id.nil?
44       raise unless char_id.kind_of?(Integer) # make sure char_id is Integer.
45       raise if char_id < 0 # make sure char_id is positive.
46       @char_id = char_id
47       @char_id.freeze
48       # @utf8_mcs = CHISE.i_tou8(@char_id)
49       @utf8_mcs = itou8(@char_id)
50       @utf8_mcs.freeze
51       @feature = {}
52       # @check_all_done = nil
53     end
54     attr_reader :char_id
55     attr_reader :utf8_mcs
56
57     def self.get(s)
58       CharacterFactory.instance.get(s)
59     end
60
61     def inspect
62       sprintf("Char:%x", @char_id)
63     end
64
65     def to_s()  @utf8_mcs;      end
66
67     def [](f)
68       f = normalize_feature_name(f)
69
70       v = @feature[f]
71       return v if v
72       v = @feature["="+f]
73       return v if v
74
75       v = get_feature(f)
76       if v
77         @feature[f] = v
78         return v
79       end
80
81       v = get_feature("="+f)
82       if v
83         @feature["="+f] = v
84         return v
85       end
86
87       nil
88     end
89
90     def []=(k,v)
91       f = normalize_feature_name(k)
92       cd = ChiseDB.instance
93       ft = cd.get_feature(f)
94       ft.set_value(@char_id, v)
95       @feature[f] = v;
96     end
97
98     def method_missing(mid, *args) # ref. ostruct.rb
99       mname = mid.id2name
100
101       return self[mname] if args.empty? # get
102
103       if args.length == 1 && /=\Z/ =~ mname # put
104         self[mname.chop] = args.shift
105         return
106       end
107
108       raise "error"
109     end
110
111     def to_er
112       en = EntityReferenceEncoder.new
113       en.to_er(self)
114     end
115
116     def each_feature
117       cd = ChiseDB.instance
118       cd.each_feature {|f|
119         ft = cd.get_feature(f)
120         begin
121           v = ft.get_value(@char_id)
122           next if v.nil?
123           yield(f, v)
124         ensure
125           ft.close
126         end
127       }
128     end
129
130     def hash_feature
131       h = {}
132       each_feature {|k, v|
133         h[k] = v
134       }
135       h
136     end
137
138     private
139
140     def get_feature(f)
141       cd = ChiseDB.instance
142       cd.load_feature(f, @char_id)
143     end
144
145     def normalize_feature_name(a)
146       a = a.gsub(/_/, "-") #underlineは-に置換
147       a = a.sub(/-at-/,    "@")
148       a = a.sub(/-plus-/,  "+")
149       a = a.sub(/\Amap-/,  "=>")
150       a = a.sub(/\Ato-/,   "->")
151       a = a.sub(/\Afrom-/, "<-")
152       a
153     end
154
155   end
156 end