1 #-----------------------------------------------------------------------------
3 binmode STDOUT, ":utf8";
4 require 'chiseperl.pl';
20 #$graphviz = '"C:\Program Files\ATT\Graphviz\bin\dot.exe"';
21 $graphviz = '/usr/bin/dot';
23 $fontGB = "serif, SimSun, 'AR_PL_ShanHeiSun_Uni', 'AR_PL_SungtiL_GB', '文鼎PL简报宋', STSong";
24 $fontJP = "'MS 明朝', 'IPAMincho', 'Sazanami Mincho', 'ヒラギノ明朝 Pro W3', HiraMinPro-W3";
25 $fontUCS = "'MS 明朝', 'IPAMincho', 'Sazanami Mincho', SimSun, 'SimSun (Founder Extended)', 'AR_PL_ShanHeiSun_Uni', 'AR_PL_Mingti2L_Big5', 'AR_PL_SungtiL_GB', '文鼎PL简报宋', '文鼎PL細上海宋', UnBatang, 'Baekmuk Batang', '은 신문', 'ヒラギノ明朝 Pro W3', STSong, 'LiSong Pro', AppleMyungjo";
26 $fontBIG5 = "MingLiU, 'AR_PL_Mingti2L_Big5', '文鼎PL細上海宋', 'LiSong Pro'";
27 $fontKS = "BatangChe, UnBatang, 'Baekmuk Batang', '은 신문', AppleMyungjo";
29 #-----------------------------------------------------------------------------
32 foreach(@chise_feature){
38 &process(eval('0x'.$ARGV[0]));
40 #-----------------------------------------------------------------------------
43 open FH, ">:utf8", "$file.$txt";
44 print FH "digraph g{\r\n";
45 print FH "\trankdir = \"LR\";\r\n";
46 $be_child = ",".join(",", values(%family)).",";
47 foreach(sort(keys %family)){
48 if($be_child =~ m/,$_,/){
53 foreach(sort(keys %char)){
54 printf FH "\t\"%X\" [ label = \"$char{$_}\", shape = \"record\" ];\r\n", $_;
55 #printf FH "\t\"%X\" [ label = \"$char{$_}\", shape = \"record\", fontname = \"simhei.ttf\" ];\r\n", $_;
57 foreach(sort(keys %link)){
58 @temp = split(/-/, $_);
59 printf FH "\t\"%X\" -> \"%X\" [ label = \"$link{$_}\" ];\r\n", $temp[0], $temp[1];
65 $dummy = `$graphviz -Tpng -o$file.$png $file.$txt`;
68 $dummy = `$graphviz -Tsvg -o$file.$svg $file.$txt`;
70 #<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="ja" lang="ja">
73 open FH, "<:utf8", "$file.$svg";
74 open FH2, ">:utf8", "$file.$html";
76 <html xmlns="http://www.w3.org/1999/xhtml">
78 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
81 <img src="$file.$png" usemap="#object" border="0">
84 if(m/x\=\"([0-9]+)\" y\=\"([0-9]+)\"\>\=cns11643\&\#45\;([1-9])\:([0-9A-F]{4})/){
85 $left = int($1 / $rate) + $x;
86 $top = int($2 / $rate) + $y;
87 print FH2 "<div style=\"position: absolute; top: $top; left: $left;\">";
88 print FH2 "<img src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/CNS$3/$4.gif\">\r\n";
90 } elsif(m/x\=\"([0-9]+)\" y\=\"([0-9]+)\"\>\=jef\&\#45\;china3\:([0-9A-F]{4})/){
91 $left = int($1 / $rate) + $x;
92 $top = int($2 / $rate) + $y;
94 $target =~ tr/A-F/a-f/;
95 print FH2 "<div style=\"position: absolute; top: $top; left: $left;\">";
96 print FH2 "<img src=\"http://kanji.zinbun.kyoto-u.ac.jp/db/CHINA3/Gaiji/$target.gif\" width=\"40\" height=\"40\">\r\n";
98 #} elsif(m/x\=\"([0-9]+)\" y\=\"([0-9]+)\"\>\=gb2312\:([0-9A-F]{4})/){
99 # $left = int($1 / $rate) + $x;
100 # $top = int($2 / $rate) + $y;
101 # print FH2 "<div style=\"position: absolute; top: $top; left: $left;\">";
102 # $ku = int(eval('0x'.$3) / 0x100) - 0x20;
103 # $ten = sprintf "%02d", eval('0x'.$3) % 0x100 - 0x20;
104 # print FH2 "<img src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/GB0/$ku-$ten.gif\">\r\n";
105 # print FH2 "</div>";
106 } elsif(m/x\=\"([0-9]+)\" y\=\"([0-9]+)\"\>\=gb2312\:([0-9A-F]{4})/){
107 $left = int($1 / $rate) + $x;
108 $top = int($2 / $rate) + $y;
109 print FH2 "<div style=\"position: absolute; top: $top; left: $left; font-family: $fontGB; font-size: 40px;\">";
111 $char =~ s/([0-9A-F]{2})/pack('H2', $1)/eg;
112 Encode::from_to($char, 'gb2312-raw', 'utf-8');
114 print FH2 "$char\r\n";
116 } elsif(m/x\=\"([0-9]+)\" y\=\"([0-9]+)\"\>\=jis\&\#45\;x0208\@19(78|83|90|97)\:([0-9A-F]{4})/){
117 $left = int($1 / $rate) + $x;
118 $top = int($2 / $rate) + $y;
119 print FH2 "<div style=\"position: absolute; top: $top; left: $left;\">";
120 $ku = int(eval('0x'.$4) / 0x100) - 0x20;
121 $ten = sprintf "%02d", eval('0x'.$4) % 0x100 - 0x20;
122 print FH2 "<img src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/JIS-$3/$ku-$ten.gif\">\r\n";
124 } elsif(m/x\=\"([0-9]+)\" y\=\"([0-9]+)\"\>\=jis\&\#45\;x0212\:([0-9A-F]{4})/){
125 $left = int($1 / $rate) + $x;
126 $top = int($2 / $rate) + $y;
127 print FH2 "<div style=\"position: absolute; top: $top; left: $left;\">";
128 $ku = int(eval('0x'.$3) / 0x100) - 0x20;
129 $ten = sprintf "%02d", eval('0x'.$3) % 0x100 - 0x20;
130 print FH2 "<img src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/JIS-SP/$ku-$ten.gif\">\r\n";
132 } elsif(m/x\=\"([0-9]+)\" y\=\"([0-9]+)\"\>\=zinbun\&\#45;oracle\:([0-9A-F]{1,4})/){
133 $left = int($1 / $rate) + $x - 20;
134 $top = int($2 / $rate) + $y;
135 my $number = sprintf "%04d", eval('0x'.$3);
136 print FH2 "<img style=\"position: absolute; top: $top; left: $left;\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/ZOB-1968/$number.png\" width=\"80\" height=\"80\">";
138 } elsif(m/x\=\"([0-9]+)\" y\=\"([0-9]+)\"\>\=ucs\:([0-9A-F]{4,6})/){
139 $left = int($1 / $rate) + $x - 20;
140 $top = int($2 / $rate) + $y;
141 print FH2 "<div style=\"position: absolute; top: $top; left: $left; font-family: $fontUCS; font-size: 40px;\">";
142 print FH2 "[".pack('U',eval('0x'.$3))."]\r\n";
144 } elsif(m/x\=\"([0-9]+)\" y\=\"([0-9]+)\"\>\=big5\:([0-9A-F]{4,6})/){
145 $left = int($1 / $rate) + $x;
146 $top = int($2 / $rate) + $y;
147 print FH2 "<div style=\"position: absolute; top: $top; left: $left; font-family: $fontBIG5; font-size: 40px;\">";
149 $char =~ s/([0-9A-F]{2})/pack('H2', $1)/eg;
150 Encode::from_to($char, 'big5', 'utf-8');
152 print FH2 "$char\r\n";
154 #} elsif(m/x\=\"([0-9]+)\" y\=\"([0-9]+)\"\>\=hanziku\&\#45\;([0-9]{1,2})\:([0-9A-F]{4})/){
155 # $left = int($1 / $rate) + $x;
156 # $top = int($2 / $rate) + $y;
157 # #$number = sprintf "%02d", $3;
160 # $numberk =~ tr/12345678/一二三四五六七八/;
161 # #print FH2 "<div style=\"position: absolute; top: $top; left: $left; font-family: 'hzk${number}u'; font-size: 40px;\">";
162 # print FH2 "<div style=\"position: absolute; top: $top; left: $left; font-family: '細明體外字集$numberk'; font-size: 40px;\">";
163 # #printf FH2 pack('U', chise::chise_ds_decode_char($chise_ds, chise_tools::get_uchar("=hanziku-$number"), eval('0x'.$4)));
164 # #printf FH2 pack('U', chise::chise_ds_decode_char($chise_ds, chise_tools::get_uchar("=big5"), eval('0x'.$4)));
166 # $char =~ s/([0-9A-F]{2})/pack('H2', $1)/eg;
167 # Encode::from_to($char, 'big5', 'utf-8');
168 # utf8::decode($char);
169 # print FH2 "$char\r\n";
170 # print FH2 "</div>";
171 } elsif(m/x\=\"([0-9]+)\" y\=\"([0-9]+)\"\>\=big5\&\#45\;cdp\:([0-9A-F]{4,6})/){
172 $left = int($1 / $rate) + $x;
173 $top = int($2 / $rate) + $y;
174 print FH2 "<div style=\"position: absolute; top: $top; left: $left; font-family: EUDC; font-size: 40px;\">";
175 printf FH2 pack('U', chise::chise_ds_decode_char($chise_ds, chise_tools::get_uchar('=big5-pua'), eval('0x'.$3)));
177 } elsif(m/x\=\"([0-9]+)\" y\=\"([0-9]+)\"\>\=jis\&\#45\;x0208\:([0-9A-F]{4,6})/){
178 $left = int($1 / $rate) + $x;
179 $top = int($2 / $rate) + $y;
180 print FH2 "<div style=\"position: absolute; top: $top; left: $left; font-family: $fontJP; font-size: 40px;\">";
182 $char =~ s/([0-9A-F]{2})/pack('H2', $1)/eg;
183 Encode::from_to($char, 'jis0208-raw', 'utf-8');
185 print FH2 "$char\r\n";
187 } elsif(m/x\=\"([0-9]+)\" y\=\"([0-9]+)\"\>\=ks\&\#45\;x1001\:([0-9A-F]{4,6})/){
188 $left = int($1 / $rate) + $x;
189 $top = int($2 / $rate) + $y;
190 print FH2 "<div style=\"position: absolute; top: $top; left: $left; font-family: $fontKS; font-size: 40px;\">";
192 $char =~ s/([0-9A-F]{2})/pack('H2', $1)/eg;
193 Encode::from_to($char, 'ksc5601-raw', 'utf-8');
195 print FH2 "$char\r\n";
197 } elsif(m/x\=\"([0-9]+)\" y\=\"([0-9]+)\"\>\=gt\&\#45\;pj\&\#45\;([0-9]{1,2}):([0-9A-F]{4})/){
198 $left = int($1 / $rate) + $x;
199 $top = int($2 / $rate) + $y;
200 $number = sprintf("%02d", $3);
201 print FH2 "<div style=\"position: absolute; top: $top; left: $left; font-family: GT2000-$number; font-size: 40px;\">";
203 $char =~ s/([0-9A-F]{2})/pack('H2', $1)/eg;
204 Encode::from_to($char, 'jis0208-raw', 'utf-8');
206 print FH2 "$char\r\n";
208 } elsif(m/x\=\"([0-9]+)\" y\=\"([0-9]+)\"\>\=gt\&\#45\;pj\&\#45\;(k[12]):([0-9A-F]{4})/){
209 $left = int($1 / $rate) + $x;
210 $top = int($2 / $rate) + $y;
211 print FH2 "<div style=\"position: absolute; top: $top; left: $left; font-family: GT2000-$3; font-size: 40px;\">";
213 $char =~ s/([0-9A-F]{2})/pack('H2', $1)/eg;
214 Encode::from_to($char, 'jis0208-raw', 'utf-8');
216 print FH2 "$char\r\n";
221 print FH2 "<map name=\"object\">\n";
226 m/points=\"[0-9]+,[0-9]+ ([0-9]+),([0-9]+) [0-9]+,[0-9]+ ([0-9]+),([0-9]+) /;
227 print FH2 "<area href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=%26u-$target%3B\" shape=\"rect\" coords=\"".int($3 / $rate).", ".int($4 / $rate).", ".int($1 / $rate).", ".int($2 / $rate)."\">\n";
230 elsif(m/^<g id=\"node[0-9]+\" class=\"node\"><title>(2{0,1}[0-9A-F]{4})<\/title>\n$/){
235 print FH2 "</map>\n";
246 #-----------------------------------------------------------------------------
248 my ($buffer, @buffer, $feature);
249 if(exists($char{$_[0]})){
254 if($_[0] =~ m/^\(\(/){
255 $char{$_[0]} .= "$_[0]\\n";
261 if((substr($_,0,1) eq '=' && substr($_,1,1) ne '>') || $_ eq "morohashi-daikanwa"){
262 $buffer = get_feature_value($_, $_[0]);
264 if($_ eq "=daikanwa" || $_ eq "=gt" || $_ eq "=gt-k"){
265 $char{$_[0]} .= sprintf("$_:%05d\\n", $buffer);
266 #} elsif($_ =~ m/^\=(gt-pj-[0-9]{1,2}|gt-pj-k[12]|big5|big5-cdp|jis-x0208|ks-x1001|ucs|cns11643-[1-9]|gb2312|jis-x0208\@1978|jis-x0208\@1983|jis-x0208\@1990|jis-x0208\@1997|jis-x0212|hanziku-[1-8]|jef-china3)$/){
267 } elsif($_ =~ m/^\=(gt-pj-[0-9]{1,2}|gt-pj-k[12]|big5|big5-cdp|jis-x0208|ks-x1001|ucs|cns11643-[1-9]|gb2312|jis-x0208\@1978|jis-x0208\@1983|jis-x0208\@1990|jis-x0208\@1997|jis-x0212|jef-china3)$/){
268 $char{$_[0]} .= sprintf("$_:%X\\n \\n \\n", $buffer);
269 } elsif($_ =~ m/^\=(zinbun-oracle)$/){
270 $char{$_[0]} .= sprintf("$_:%X\\n \\n \\n \\n \\n", $buffer);
271 } elsif($_ eq "morohashi-daikanwa"){
272 $char{$_[0]} .= "$_:$buffer\\n";
274 $char{$_[0]} .= sprintf("$_:%X\\n", $buffer);
281 if(substr($_,0,2) eq '->'){
282 $buffer = get_feature_value($_, $_[0]);
285 @buffer = &parse($buffer);
286 if(scalar(@buffer) == 0){
290 if($feature eq "->subsumptive" || $feature eq "->denotational"){
291 $family{$_[0]} .= "$_,";
295 $link{$_[0]."-".$_} .= $feature."\\n";
304 if(substr($_,0,2) eq '<-'){
305 $buffer = get_feature_value($_, $_[0]);
308 @buffer = &parse($buffer);
309 if(scalar(@buffer) == 0){
313 #$link{$_."-".$_[0]} .= $feature."\\n";
321 if(substr($_,0,2) eq '=>'){
322 $buffer = get_feature_value($_, $_[0]);
325 @buffer = &parse($buffer);
326 if(scalar(@buffer) == 0){
330 $link{$_."-".$_[0]} .= $feature."\\n";
341 utf8::decode($target);
342 if(substr($target,0,1) eq '('){
343 if(substr($target,0,2) eq '(('){ # feature is S-expression
344 push(@temp2, substr($target,1,length($substr)-1));
346 @temp = split(/ /, substr($target, 1, length($target) - 2));
348 push(@temp2, unpack('U*', substr($_, 1)));
357 my @temp = split(/,/, $family{$_[0]});
358 printf FH "\t"x$_[1]."subgraph cluster%X\ {\n", $_[0];
360 printf FH "\t\t\"%X\" [ color = white ];\n", $_[0];
362 printf FH "\t"x($_[1] + 1)."style = filled; color = gray%d;\n", 100 - ($_[1] - 1) * 20;
363 printf FH "\t"x($_[1] + 1)."\"%X\" [ color = gray%d ];\n", $_[0], 100 - ($_[1] - 1) * 20;
366 if(exists($family{$_})){
369 printf FH "\t"x($_[1] + 1)."\"%X\";\n", $_;
372 print FH "\t"x$_[1]."}\n";
374 #-----------------------------------------------------------------------------