2 # CHISE.pm by Shigeki Moro
3 # $Id: CHISE.pm,v 1.5 2003-08-10 15:52:15 moro Exp $
14 overload::constant ( 'qr' => \&ChiseLikeRegex )
18 overload::remove_constant
21 our ($EXCLUSIVE, $HAVE_INTERSECTION, $PROPER_SUBSET, $PROPER_SUPERSET, $EQSET);
22 ($EXCLUSIVE, $HAVE_INTERSECTION, $PROPER_SUBSET, $PROPER_SUPERSET, $EQSET)
25 # データベースの場を指。いずれはlibchiseに...
26 # データベースの全ファイルをchownしないとえないかも
28 if (-e '/usr/local/lib/chise/char-db') {
29 $DB_HOME = '/usr/local/lib/chise/char-db';
30 } elsif (-e '/sw/lib/xemacs-21.4.11/powerpc-apple-darwin6.6/char-db') {
31 $DB_HOME = '/sw/lib/xemacs-21.4.11/powerpc-apple-darwin6.6/char-db';
32 } elsif (-e '/usr/local/lib/xemacs-21.4.11/i686-pc-linux/char-db') {
33 $DB_HOME = '/usr/local/lib/xemacs-21.4.11/i686-pc-linux/char-db';
34 } elsif (-e '/usr/local/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db') {
35 $DB_HOME = '/usr/local/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db';
36 } elsif (-e '/usr/local/xemacs-utf2000/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db'){
37 $DB_HOME = '/usr/local/xemacs-utf2000/lib/xemacs-21.4.11/powerpc-apple-darwin6.4/char-db';
38 } elsif (-e '/usr/local/lib/xemacs-21.4.10/i686-pc-linux/char-db') {
39 $DB_HOME = '/usr/local/lib/xemacs-21.4.10/i686-pc-linux/char-db';
40 } elsif (-e '/usr/local/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db') {
41 $DB_HOME = '/usr/local/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db';
42 } elsif (-e '/usr/local/xemacs-utf2000/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db'){
43 $DB_HOME = '/usr/local/xemacs-utf2000/lib/xemacs-21.4.10/powerpc-apple-darwin6.4/char-db';
44 } elsif (-e 'd:/work/chise/char-db'){
45 $DB_HOME = 'd:/work/chise/char-db';
47 print STDERR "CHISE.pm: No database found.\n";
48 print STDERR "CHISE.pm: Please set \$DB_HOME to CHISE.pm.\n";
52 #my $cache_size = 1024 * 1024 * 64;
55 my %alias_reverse = ();
56 for (glob "$DB_HOME/system-char-id/*") {
59 s/^\->/to_/; # Perlではリファレンスに
60 s/^<\-/from_/; # "-" などがえないため、しておく。
65 $alias_reverse{$i} = $_;
69 'radical','ideographic-radical',
70 'strokes','total-strokes',
73 #--- exportする予の関数 -------------------------------------#
78 my $class = ref($invocant) || $invocant;
81 my $key = ($self->chars)[0]; # 要エラーチェック
83 for my $dbname (glob "$DB_HOME/system-char-id/*") {
84 $value = &getvalue($dbname, $key);
86 $dbname =~ s/^.*\/([^\/]+)$/$1/;
87 $result->{$alias_reverse{$dbname}} = $value;
90 return bless $result, $class;
96 my $class = ref($invocant) || $invocant;
99 for my $i (keys %$self) {
100 $result->{$alias_reverse{$alias{$i}}} = $$self{$i};
102 return bless $result, $class;
106 # 字オブジェクトが持っている全属性をprint
108 for my $i (keys %$self) {
109 print "$i => $$self{$i}\n";
116 my $self = $model->define_char(%$model, @_);
123 delete $$model{$_} foreach (@_);
124 my $self = $model->define_char(%$model);
129 # 例: $s->morohashi_daikanwa
130 # cf. get_char_attribute
131 for my $attrname (keys %alias) {
132 my $slot = __PACKAGE__ . "::$attrname";
136 my $dbname = exists $alias{$attrname}
137 ? $alias_reverse{$alias{$attrname}}
139 return $self->{$dbname};
142 #for my $attrname (keys %alias) {
143 # my $slot = __PACKAGE__ . "::$attrname";
148 # for my $i ($self->chars) {
149 # my $j = &getvalue("$DB_HOME/system-char-id/$alias{$attrname}", $i);
150 # push @result, $j if $j;
160 my($aonly, $bonly, $common) = (0, 0, 0);
161 for my $i (&cup(keys %$a, keys %$b)) {
162 if (exists $a->{$i} and exists $b->{$i}) {
163 if ($a->{$i} eq $b->{$i}) {
166 $aonly++ if (exists $a->{$i});
167 $bonly++ if (exists $b->{$i});
172 return $EXCLUSIVE; # 他
173 } elsif ($aonly == 0 and $bonly == 0) {
175 } elsif ($aonly == 0) {
176 return $PROPER_SUBSET; # $aは$bの合
177 } elsif ($bonly == 0) {
178 return $PROPER_SUPERSET; # $bは$aの合
180 return $HAVE_INTERSECTION;
184 sub rate_of_coincidence {
185 # 字オブジェクトどうしの属性の一致を出す。
187 my($all_attr, $common_attr) = (0, 0);
188 for my $i (&cup(keys %$a, keys %$b)) {
189 if ($a->{$i} eq $b->{$i}) {
193 $all_attr++ if (exists $a->{$i});
194 $all_attr++ if (exists $b->{$i});
197 return $all_attr ? ($common_attr / $all_attr) : 0;
204 'C1','chinese-cns11643-1',
205 'C2','chinese-cns11643-2',
206 'C3','chinese-cns11643-3',
207 'C4','chinese-cns11643-4',
208 'C5','chinese-cns11643-5',
209 'C6','chinese-cns11643-6',
210 'C7','chinese-cns11643-7',
211 'CB','ideograph-cbeta',
212 'CDP','chinese-big5-cdp',
214 'GT-K','ideograph-gt',
215 'HZK1','ideograph-hanziku-1',
216 'HZK2','ideograph-hanziku-2',
217 'HZK3','ideograph-hanziku-3',
218 'HZK4','ideograph-hanziku-4',
219 'HZK5','ideograph-hanziku-5',
220 'HZK6','ideograph-hanziku-6',
221 'HZK7','ideograph-hanziku-7',
222 'HZK8','ideograph-hanziku-8',
223 'HZK9','ideograph-hanziku-9',
224 'HZK10','ideograph-hanziku-10',
225 'HZK11','ideograph-hanziku-11',
226 'HZK12','ideograph-hanziku-12',
227 'J78','japanese-jisx0208-1978',
228 'J83','japanese-jisx0208',
229 'J90','japanese-jisx0208-1990',
230 'JSP','japanese-jisx0212',
231 'JX1','japanese-jisx0213-1',
232 'JX2','japanese-jisx0213-2',
233 'K0','korean-ksc5601',
234 'M','ideograph-daikanwa',
236 my $keys = join '|', sort keys %alias;
237 my($phname, $phvalue) = ($ph =~ /^\&(?:I\-)?($keys)\-?([0-9a-f]+);$/i);
238 if (exists $alias{$phname}) {
239 if ($alias{$phname} =~ /daikanwa|gt/) {
242 $phvalue = "0x$phvalue";
244 tie my %h, "BerkeleyDB::Hash",
245 -Filename => "$DB_HOME/$alias{$phname}/system-char-id"
246 or die "Cannot open file $alias{$phname}: $! $BerkeleyDB::Error\n";
247 if (exists $h{$phvalue}) {
250 #print STDERR "\tCan't convert $phname - $phvalue (no value in db).\n";
255 #print STDERR "\tCan't convert $phname - $phvalue.\n";
260 #--- 正規表現のCHISE的拡張 ------------------------------------#
262 sub ChiseLikeRegex ($) {
263 my ($RegexLiteral) = @_;
264 #print STDERR "BEFORE: $RegexLiteral\n"; # for debug
265 $RegexLiteral =~ s/\\same_strokes_(\d)/(??{CHISE->same_strokes(\$$1)})/g;
266 #$RegexLiteral =~ s/\\same_strokes_(\d)/[川山三]/g;
267 #print STDERR "AFTER: $RegexLiteral\n"; # for debug
268 return $RegexLiteral;
273 my $backtrace = shift;
274 my $db = "$DB_HOME/system-char-id/total-strokes";
275 my $temp = &getvalue($db, "?$backtrace");
277 for my $i (&getkeys($db, $temp)) {
281 #print STDERR $result, "\n";
285 #--- モジュール内のみでう予の関数 ----------------------#
291 for my $attrname (keys %$self) {
293 my $dbname = exists $alias{$attrname} ? $alias{$attrname} : $attrname;
294 if (-f "$DB_HOME/$dbname/system-char-id") {
295 @tmp = (&getvalue("$DB_HOME/$dbname/system-char-id", $self->{$attrname}));
296 @result = &cap(\@result, \@tmp);
297 } elsif (-f "$DB_HOME/system-char-id/$dbname") {
298 @tmp = &getkeys("$DB_HOME/system-char-id/$dbname", $self->{$attrname});
299 @result = &cap(\@result, \@tmp);
301 die "cannot find $attrname: $! $BerkeleyDB::Error\n";
311 for my $i ($self->chars) {
313 # To Do: 私用域のはさないようにしないと。
321 my($dbname, $key) = @_;
323 my $db = new BerkeleyDB::Hash
324 #tie my %h, "BerkeleyDB::Hash",
325 -Filename => $dbname;
328 $db->db_get($key, $value);
335 my($dbname, $value) = @_;
336 my $db = new BerkeleyDB::Hash
337 #tie my %h, "BerkeleyDB::Hash",
338 -Filename => $dbname;
340 my ($k, $v) = ("", "") ;
341 my $cursor = $db->db_cursor() ;
342 #for my $k (keys %h) {
343 while ($cursor->c_get($k, $v, DB_NEXT) == 0) {
344 push @keys, $k if ($v eq $value);
345 #push @keys, $key if ($h{$key} eq $value);
358 } elsif ($$a[0] eq '*') { # '*'は全体合
360 } elsif ($$b[0] eq '*') {
365 for my $i (@$a, @$b) {
367 push @result, $i if ($result{$i} == 2);
380 for my $i (@_) { $result{$i}++; }
384 #--------------------------------------------------------#