f3ac1429bfad76ae7a4752e9aa1ffc029ca1f951
[chise/ruby.git] / chise / character.rb
1 # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
2
3 require "singleton"
4 require "chise/parser"
5 require "chise/chisedb"
6 require "chise/iconv"
7 require "chise/utf8"
8 require "chise/ids"
9
10 module CHISE
11   class CharacterFactory # generate Character object and cache them
12     include Singleton
13
14     MAX_CACHE_CHARACTER = 10000
15
16     def initialize
17       clear
18       @parser = CharacterParser.new
19     end
20
21     def clear
22       @chars = {}
23     end
24
25     def get(s)
26       check_max
27       mcs = @parser.parse(s)
28       @chars[mcs] = Character.new(mcs) if @chars[mcs].nil?
29       @chars[mcs]
30     end
31
32     def check_max
33       clear if MAX_CACHE_CHARACTER < @chars.length # clear all cache
34     end
35   end
36
37   class Character
38     include UTF8Value
39     include CharacterIDC
40     include CharacterIDS
41
42     def initialize(char_id)
43       raise if char_id.nil?
44       raise unless char_id.kind_of?(Integer) # make sure char_id is Integer.
45       raise if char_id < 0 # make sure char_id is positive.
46       @char_id = char_id
47       @char_id.freeze
48       @utf8_mcs = itou8(@char_id)
49       @utf8_mcs.freeze
50       @feature = {}
51     end
52     attr_reader :char_id
53     attr_reader :utf8_mcs
54
55     def self.get(s)
56       CharacterFactory.instance.get(s)
57     end
58
59     def inspect
60       sprintf("Char:%x", @char_id)
61     end
62
63     def to_s()  @utf8_mcs;      end
64
65     def [](f)
66       f = normalize_feature_name(f)
67
68       v = @feature[f]
69       return v if v
70       v = @feature["="+f]
71       return v if v
72
73       v = get_feature(f)
74       if v
75         @feature[f] = v
76         return v
77       end
78
79       v = get_feature("="+f)
80       if v
81         @feature["="+f] = v
82         return v
83       end
84
85       nil
86     end
87
88     def []=(k,v)
89       f = normalize_feature_name(k)
90       cd = ChiseDB.instance
91       feature = cd.get_feature(f)
92       feature.set_value(@char_id, v)
93       @feature[f] = v;
94     end
95
96     def method_missing(mid, *args) # ref. ostruct.rb
97       mname = mid.id2name
98
99       return self[mname] if args.empty? # get
100
101       if args.length == 1 && /=\Z/ =~ mname # put
102         self[mname.chop] = args.shift
103         return
104       end
105
106       raise "error"
107     end
108
109     def to_er
110       en = EntityReferenceEncoder.new
111       en.to_er(self)
112     end
113
114     def each_feature
115       cd = ChiseDB.instance
116       cd.each_feature_name {|f|
117         feature = cd.get_feature(f)
118         begin
119           v = feature.get_value(@char_id)
120           next if v.nil?
121           yield(f, v)
122         ensure
123           feature.close # important
124         end
125       }
126     end
127
128     def hash_feature
129       h = {}
130       each_feature {|k, v|
131         h[k] = v
132       }
133       h
134     end
135
136     private
137
138     def get_feature(f)
139       cd = ChiseDB.instance
140       cd.load_feature(@char_id, f)
141     end
142
143     def normalize_feature_name(a)
144       a = a.gsub(/_/, "-") #underlineは-に置換
145       a = a.sub(/-at-/,    "@")
146       a = a.sub(/-plus-/,  "+")
147       a = a.sub(/\Amap-/,  "=>")
148       a = a.sub(/\Ato-/,   "->")
149       a = a.sub(/\Afrom-/, "<-")
150       a
151     end
152
153   end
154 end