From: eto Date: Wed, 9 Jun 2004 17:34:55 +0000 (+0000) Subject: i X-Git-Url: http://git.chise.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e0d31160ff3741273147db91f6d5109d13acbff0;p=chise%2Fruby.git i --- diff --git a/0ext/sample.rb b/0ext/sample.rb new file mode 100755 index 0000000..b4f98f5 --- /dev/null +++ b/0ext/sample.rb @@ -0,0 +1,18 @@ +#!/usr/bin/env ruby +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +require "chise.so" +$KCODE = "u" + +db_dir = "/cygdrive/c/chise/char-db" +ds = CHISE::DataSource.new(CHISE::DataSource::Berkeley_DB, db_dir) + +dt = ds.open_decoding_table("=daikanwa") +char_id = dt.get_char(364) # get a character by Daikanwa number 364. + +ft = ds.open_feature_table("ideographic-structure") +value = ft.get_value(char_id) +printf("#x%X => %s\n", char_id, value.to_s) +ft.close + +ds.close diff --git a/chise/char.rb b/chise/char.rb new file mode 100755 index 0000000..473ba0d --- /dev/null +++ b/chise/char.rb @@ -0,0 +1,15 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +require "chise/db" +require "chise/config" +require "chise/iconv" +require "chise/uconv" +require "chise/util" + +require "chise/string" +require "chise/character" + +$debug = false #これはテスト用 +$debug = true #これはテスト用 +$stdout.binmode if $debug +$stdout.sync = true if $debug diff --git a/chise/character.rb b/chise/character.rb new file mode 100755 index 0000000..b2d0972 --- /dev/null +++ b/chise/character.rb @@ -0,0 +1,349 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +require "singleton" +require "chise/parser" + +module CHISE + class CharacterFactory # generate Character object and cache them + include Singleton + + MAX_CACHE_CHARACTER = 10000 + + def initialize + clear + @parser = CharacterParser.new + end + + def clear + @chars = {} + end + + def get(char_id) + check_max + n = @parser.parse(char_id) + @chars[n] = Character.new(n) if @chars[n].nil? + @chars[n] + end + + def check_max + clear if MAX_CACHE_CHARACTER < @chars.length # clear all caches. + end + + def nu_length + @chars.length + end + end + + class Character + def initialize(char_id=nil) + @parser = CharacterParser.new + @char_id = @parser.parse(char_id) + @attributes = Hash.new + @check_all_database = false + end + attr_reader :char_id + + def to_i() @char_id end + def mcs_utf8() Character.u4itou8(@char_id) end + def mcs_hex() sprintf("%x", @char_id) end + + def self.get(char_id) # flyweight pattern + CharacterFactory.instance.get(char_id) + end + + def normalize_attribute_name(b) + a = b.dup + a.gsub!(/_/, "-") #underline‚Í-‚É’uŠ· + a.sub!(/-at-/, "@") + a.sub!(/^map-/, "=>") + a.sub!(/^to-/, "->") + a.sub!(/^from-/, "<-") + a + end + + def get_char_attribute(b) # XEmacs UTF-2000ŒÝŠ·APIŒQ + a = normalize_attribute_name(b) + #p [a, b] + atr = @attributes[a] + return atr if atr + atr = check_database(a) + if atr + @attributes[a] = atr + return atr + end + return get_char_attribute("="+a) unless a =~ /^=/ #“ª‚É=‚ª‚‚¢‚Ä‚È‚¢ê‡‚Í‚»‚ꂪÈ—ª‚³‚ê‚Ä‚¢‚邱‚Æ‚ð‰¼’è‚µ‚āAÄ‹A‚·‚é + nil + end + + def put_char_attribute(b,v) + a = normalize_attribute_name(b) + @attributes[a] = v; + CharDB.instance.put(a, mcs_utf8(), v) + end + + def char_attribute_alist() check_all_database(); @attributes; end + def char_attribute_list() check_all_database(); @attributes.keys; end + alias [] get_char_attribute #‚»‚Ì—ªÌ + alias []= put_char_attribute + alias alist char_attribute_alist + alias list char_attribute_list + + def method_missing(mid, *args) # ref. ostruct.rb + mname = mid.id2name + return get_char_attribute(mname) if args.length == 0 + put_char_attribute(mname.chop, args[0]) if mname =~ /=$/ #‘ã“ü + end + + def has_attribute?() # ˆÓ–¡‚Ì‚ ‚éattribute‚ðŽ‚Á‚Ä‚Ü‚·‚©? + keys = list + keys.delete_if {|k| + k =~ /ids/ + } + return (keys.length != 0) + end + + def ==(ch) + return false if ch.nil? + return false unless ch.is_a? Character + self.char_id == ch.char_id + end + + def self.u4itou4(num) + return "" unless num.is_a?(Integer) + return sprintf("%c%c%c%c", num&0xff, (num >> 8)&0xff, (num >> 16)&0xff, (num >> 24)&0xff) #UCS-4”’l‚𕶎š—ñ‚É‚µ‚Äreturn + end + + def self.u4itou8(char_id) #ucs‚̐”’l‚ðŽó‚¯‚Æ‚èAUTF-8‚Ì•¶Žšˆê•¶Žš‚ð•Ô‚· + begin + u4 = Character.u4itou4(char_id) + u8 = Uconv.u4tou8(u4) + return u8 + rescue + #raise ArgumentError, "invalid char_id (#{char_id})", caller(1) + #print "error\n" + return "" + end + end + + def check_database(a) + db = CharDB.instance + u8 = mcs_utf8() + v = db.get(a, u8) # u8‚Å•\‚³‚ê‚镶Žš‚ÌaƒAƒgƒŠƒrƒ…[ƒg‚𒲂ׂéB + v + end + + def check_all_database() # Œ»Ý‚Ì@char_id‚©‚çA•¶Žšƒf[ƒ^ƒx[ƒX‚ðŽQÆ‚·‚é + return if @check_all_database + return if @char_id.nil? + db = CharDB.instance + u8 = mcs_utf8() + atrs = db.get_all(u8) #u8‚Å•\‚³‚ê‚镶Žš‚̃AƒgƒŠƒrƒ…[ƒg‚ð‘S•”Ž‚Á‚Ä‚±‚¢ + atrs.each {|a,v| + @attributes[a] = v #‚Æ‚©‚¢‚¤Š´‚¶‚Å‘ã“ü‚·‚é‚Ì‚Å‚¦‚¦‚©‚È? + } + @check_all_database = true #d‚¢ˆ—‚Ȃ̂ňꉞcheck‚·‚é + end + + def ucs() + #p "ucs" + #ar = %w{ucs ucs-big5 ucs-cdp ucs-cns ucs-jis ucs-ks =>ucs =>ucs* =>ucs-jis} + #ar = %w{ucs ucs-jis ucs-big5 ucs-cdp ucs-cns ucs-ks =>ucs =>ucs* =>ucs-jis} + ar = %w{ucs-jis ucs =>ucs-jis} + #•À‚я‡‚Íœ“ˆÓ“I‚ŁAucs-jis‚ðæ‚ɏo‚µ‚Ä‚¢‚éB–{—ˆ‚Í‚±‚ê‚àŽw’è‚Å‚«‚é‚悤‚É‚·‚é‚ׂ«B + ar.each {|a| #p [a] + u = get_char_attribute(a) + return u if u + } + nil + end + + #-------------------------------------------------------------------CCSŠÖŒW + def to_utf8() Uconv.u4tou8(Character.u4itou4(ucs())) end #UTF8•¶Žš—ñ‚ð•Ô‚· + #alias to_s to_utf8 + alias to_s mcs_utf8 + + def map_utf8() + u = ucs() + if u.nil? || 0xffff < u + return to_er() + else + return to_utf8() + end + end + alias map_ucs map_utf8 + + def map_ucs_er() + u = ucs() + if u.nil? || 0xffff < u + return to_er() + else + return Character.get(u).to_er() + end + end + + def to_euc() + u = ucs() + return "" if u.nil? || 0xffff < u + Uconv.u16toeuc(Uconv.u4tou16(Character.u4itou4(ucs()))) + end + + def map_euc() + e = to_euc() + return e if e != "" + return to_er() + end + + def to_sjis() + u = ucs() + return "" if u.nil? || 0xffff < u + Uconv.u16tosjis(Uconv.u4tou16(Character.u4itou4(ucs()))) + end + + def map_sjis() + e = to_sjis() + return e if e != "" + return to_er() + end + + def to_er(codesys=nil) #ŽÀ‘ÌŽQÆ‚ð•Ô‚·AŠó–]‚·‚écodesys‚ªˆø”(–¢ŽÀ‘•) + return "" if @char_id.nil? + return sprintf("&#x%04x;", @char_id) if @char_id <= 0xffff + return sprintf("&#x%05x;", @char_id) if @char_id <= 0xfffff + EntityReference.each_codesys {|codesys, er_prefix, keta, numtype| + code = self[codesys] + next if code.nil? + return sprintf("&#{er_prefix}%0#{keta}#{numtype};", code) + } + return sprintf("&MCS-%08X;", @char_id) #–{“–‚Í‚±‚ê‚Í–³‚µ‚É‚µ‚½‚¢ + end + + def to_er_list() + ar = [] + EntityReference.each_codesys {|codesys, er_prefix, keta, numtype| + er = to_er(codesys) + ar << er if er + } + ar + end + + def inspect_x() + return "<>" if @char_id.nil? + ar = [to_utf8(), to_er().sub(/^&/,"").chop] + "<"+ar.join(",")+">" + end + alias inspect inspect_x + + def inspect_all_codesys() #–¢Š®¬ + #to_er‚ð‘S‚Ä‚Ìcodesys‚É‚¨‚¢‚ÄŽÀs‚·‚éB‚»‚ÌŒ‹‰Ê‚ðƒRƒ“ƒpƒNƒg‚É‚Ü‚Æ‚ß‚é + end + + def inspect_all() + ar = [inspect.chop] + alist.to_a.sort.each {|a, v| ar << "#{a}:#{v}" } + return ar.join(",")+">" + end + + def dump_all() + ar = [inspect] + alist.to_a.sort.each {|a, v| ar << "#{a}:#{v}" } + return ar.join('\n')+'\n' + end + + def get_attributes() + str = "" + alist.to_a.sort.each {|a, v| + str += "#{a}: #{v}\n" + } + str + end + + def inspect_ids(hex_flag=false) + ids = decompose + ar = [] + ar << (hex_flag ? "x"+mcs_hex : to_utf8) + if to_s != ids #ids‚ª•”•i‚»‚Ì‚à‚Ì‚¾‚Á‚½‚ç•”•i’ljÁ‚Í‚µ‚È‚¢ + ids.each_char {|ch| + char = ch.char + next if char.is_ids? + if hex_flag then + ar << "x"+char.mcs_hex + else + u = char.to_utf8 + if u != "" + ar << u + else + ar << char.to_er + end + end + } + end + return "("+ar.join("\t")+")" + end + + #--------------------------------------------------------------------IDSŠÖŒW + def glyph_decompose() do_decompose(false) end + def decompose() do_decompose(true) end + def do_decompose(check_meaning = true) + k = self.to_s + # idss = self["ids"] + # return idss if idss + # return k if self.is_basic_kanji? #Šî–{Š¿Žš‚Ístop kanji‚Æ‚·‚邼‚ƁB + if check_meaning + return self["ids-represent"] if self["ids-represent"] #ids_represent‚ðŽ‚Á‚Ä‚¢‚éê‡‚Í‚»‚Ì’l‚Æ‚·‚éB + return self["ids-element"] if self["ids-element"] #ids_element‚ðŽ‚Á‚Ä‚¢‚éê‡‚Í‚»‚Ì’l‚Æ‚·‚éB + idss = self["ids-meaning"] + return idss if idss && 0 < idss.length && k != idss + end + idss = self["ids-aggregated"] + return idss if idss && 0 < idss.length && k != idss + idss = self["ids"] + return idss if idss && 0 < idss.length && k != idss + return k + # return k if idss.nil? || idss.length == 0 || k == idss + # if idss.char_length == 2 + # p ["What???", k, idss, k.inspect_all] + # #return idssx[1] #“ñŒÂ–Ú‚¾‚¯•Ô‚·‚Æ‚©? + # return k #IDS‚É“WŠJ‚·‚é•û–@‚ª–³‚¢‚ƁB + # end + # return k if k == idss + # if idss.include?(k) #‚±‚Ì“ñ•¶Žš‚ÌBUG‘΍ô + # #return idss.sub(k, "") + # return k #IDS‚É“WŠJ‚·‚é•û–@‚ª–³‚¢‚ƁB + # end + # return idss + end + + def decompose_all + pde = "" + de = self.decompose #o”­“_ + level = 0 + while true + pde = de + de = pde.decompose #‚à‚¤ˆê“x•ª‰ð‚ð‚µ‚Ä‚Ý‚éB + break if pde == de #ƒ‹[ƒv‚𔲂¯‚¾‚· + exit if 10 < level #p ["too many recursive", self] + level += 1 + end + return de + end + + def decompose_all_nu(level=nil) + level = 0 if level.nil? + if 10 < level + p ["too many recursive", self] + exit + end + de = self.decompose + return de.decompose_all(level+1) if de != self #‚È‚É‚©•Ï‰»‚ª‚ ‚Á‚½‚©‚çÄ‹A + return de #‚à‚¤‚±‚êˆÈã•Ï‰»‚Í–³‚³‚»‚¤‚¾‚¼‚ƁB + end + + def is_ids?() 0x2ff0 <= @char_id && @char_id <= 0x2fff end + + def ids_operator_argc() + return 0 unless is_ids? + return 3 if @char_id == 0x2ff2 || @char_id == 0x2ff3 + return 2 + end + end +end diff --git a/chise/codeviewer.rb b/chise/codeviewer.rb new file mode 100755 index 0000000..0c70df4 --- /dev/null +++ b/chise/codeviewer.rb @@ -0,0 +1,165 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +require "stroke" + +module StrokeFont + class CodeViewer + WIDTH, HEIGHT = 256, 256 + SCALE = 2 + def initialize(cx=0, cy=0) + @cx, @cy = cx, cy + @s = SCALE + @x1, @y1 = @cx-@s*WIDTH/2, @cy-@s*HEIGHT/2 + @x2, @y2 = @cx+@s*WIDTH/2, @cy+@s*HEIGHT/2 + @px, @py = @cx, @cy #とりあえず中心が開始点 + @pw, @ph, @pr = 30, 30, 10 + @dragging = false + @onkeydown = false + @code = 0 + calc_code + end + attr_reader :code + def draw + colorHSV(0, 0, 100, 10) #まずは下敷きになる枠を書きます。 + rect(@x1, @y1, @x2, @y2) + lineWidth(1) + colorHSV(0, 0, 100, 50) + b = 8; s = @s*WIDTH/b + (0..b).each {|n| + line(@x1, @y1+n*s, @x2, @y1+n*s) + line(@x1+n*s, @y1, @x1+n*s, @y2) + } + colorHSV(0, 100, 100, 100) # 次にポインターを書きます + circle(@px, @py, @pr) + line(@px-@pw/2, @py, @px+@pw/2, @py) + line(@px, @py-@ph/2, @px, @py+@ph/2) + end + def onMouse(x, y) + if @onkeydown + x, y = @px, @py + end + if @dragging || @onkeydown + @onkeydown = false + @px, @py = x, y #p [x, y] + @px = @x1 if @px < @x1 + @py = @y1 if @py < @y1 + @px = @x2-1 if @x2-1 < @px + @py = @y2-1 if @y2-1 < @py + return calc_code + else + return false + end + end + def calc_code() + x = ((@px - @x1)/@s).to_i + y = HEIGHT-1 - ((@py - @y1)/@s).to_i + code = x + y*WIDTH + if @code != code + @code = code + return true #changed + #p [x, y, code] + printf("%02x %02x %04x\n", x, y, @code) + else + return false + end + end + def show_list(list) + colorHSV(0, 100, 100, 100) + list.each {|code| + x, y = code_to_xy(code) + rect(x, y, x+2, y-2) + } + end + def code_to_xy(code) + cx = code % WIDTH + cy = HEIGHT - (code / WIDTH) #intになる? + x = cx * SCALE + @x1 + y = cy * SCALE + @y1 + return x, y + end + def length(x, y) Math.sqrt(x*x + y*y) end + def onMouseDown(x, y) + if length(@px-x, @py-y) < @pr + @dragging = true + end + end + def onMouseUp(x, y) @dragging = false end + def onKeyDown(key) + @onkeydown = true + case key + when 273 + @py += @s + when 274 + @py -= @s + when 276 + @px -= @s + when 275 + @px += @s + end + end + end +end + +if $0 == __FILE__ + $LOAD_PATH << "../../src" + require "chise" + include CHISE + require "chise/stroke" + include StrokeFont + + def setup + useSmooth() + window(-300,-300,300,300) + background 0,0,20 + useFramerate(30) + @cs = CodeViewer.new + @csf1 = CSFFont.new() #普通の文字 + @csf2 = CSFFont.new(CSF_KOUKOTSU_FILE) #甲骨文字 + @key = 1 + @kage = KageFont.new() + @changed = nil + end + + def display + @changed = @cs.onMouse(mouseX, mouseY) #変化があったか? + @cs.draw + @cs.show_list(@kage.cache_list) + code = @cs.code + + push + scale 0.2 + translate -500,-500 + lineWidth(2) + draw_kage(code) + draw_csf(code) + pop + end + + def draw_kage(code) + char = Character.get(code) + return if char.nil? + @kage.init(code) if @changed + @kage.print(code) if @changed + @kage.draw(code) + end + + def draw_csf(ucs) + char = Character.get(ucs) + return if char.nil? + j = char.japanese_jisx0208 + return if j.nil? + code = j + csf = @key == 1 ? @csf1 : @csf2 + csf.init(code) if @changed + csf.print(code) if @changed + csf.draw(code) + end + + def onMouseDown(x, y) @cs.onMouseDown(x, y)end + def onMouseUp(x, y) @cs.onMouseUp(x, y)end + def onKeyDown(key) + @key = key + @cs.onKeyDown(key) + end + mainloop +end diff --git a/chise/config.rb b/chise/config.rb new file mode 100755 index 0000000..1494d39 --- /dev/null +++ b/chise/config.rb @@ -0,0 +1,33 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +require "singleton" + +module CHISE + def windows?() + rp = RUBY_PLATFORM + rp =~ /cygwin/ || rp =~ /mswin32/ || rp =~ /mingw32/ + end + module_function :windows? + + class Config + include Singleton + def initialize + @base_dir = File.expand_path(File.dirname(__FILE__)+"/../..") +# @base_dir = "c:/chise" +# qp @base_dir + + if CHISE.windows?() + @db_dir = @base_dir+"/char-db" # /sysmtem-char-id/ucs + @ids_dir = @base_dir+"/ids" # /IDS-JIS-X0208-1990.txt + else + @db_dir = "/usr/local/lib/xemacs-21.4.12/i686-pc-linux/char-db" + @ids_dir = "/home/eto/chise/ids" + end + + @csf_dir = @base_dir+"/csf" + instance_variables.each {|name| + self.class.class_eval { attr_reader name.delete("@") } + } + end + end +end diff --git a/chise/csf.rb b/chise/csf.rb new file mode 100755 index 0000000..e29c219 --- /dev/null +++ b/chise/csf.rb @@ -0,0 +1,119 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +require "sgl" +#require "kconv" +require "uconv" +require "chise/config" + +module StrokeFont + CSF_FONT_DIR = Config.instance.csf_dir+"/" + CSF_DEFAULT_FILE = "KST32B.CSF1" + CSF_KOUKOTSU_FILE = "KST32ZX.CSF1" + + class CSFStrokeMaker + DEST_WIDTH = 1000 #‘å‚«‚³‚ð‚¨‚¨‚æ‚»1000x1000‚ɐ³‹K‰»‚·‚éB + ORG_WIDTH = 32 #Œ³‚̃TƒCƒY‚́A‰¡30~c32 + def initialize + @x, @y, @nx, @ny = 0, 0, 0, 0 + @strokes = Strokes.new + end + attr_reader :strokes + def move_to_x(x) @x = x; @nx = x; end + def draw_to_x(x) @nx = x; drawline; @x = @nx; @y = @ny; end + def next_x_to(x) @nx = x; end + def move_to_y(y) @y = y; @ny = y; end + def draw_to_y(y) @ny = y; drawline; @x = @nx; @y = @ny; end + def drawline() + @strokes.add_line(t(@x), t(@y), t(@nx), t(@ny)) + end + def t(a) a*DEST_WIDTH/ORG_WIDTH; end + end + + class CSFParser + def self.parse(str) #Strokes‚ð•Ô‚· + return Strokes.new if str == nil + sm = CSFStrokeMaker.new + (0...str.length).each {|i| + n = str[i] + if 0x21 <= n && n <= 0x26 + sm.move_to_x(n - 0x21) + elsif 0x28 <= n && n <= 0x3f + sm.move_to_x(n - 0x28 + 6) + elsif 0x40 <= n && n <= 0x5b + sm.draw_to_x(n - 0x40) + elsif 0x5e <= n && n <= 0x5f + sm.draw_to_x(n - 0x5e + 28) + elsif 0x60 <= n && n <= 0x7d + sm.next_x_to(n - 0x60) + elsif 0x7e == n + sm.move_to_y(n - 0x7e) + elsif 0xa1 <= n && n <= 0xbf + sm.move_to_y(n - 0xa1 + 1) + elsif 0xc0 <= n && n <= 0xdf + sm.draw_to_y(n - 0xc0) + end + } + return sm.strokes + end + end + + class CSFGlyph + def initialize(code, stroke) + @code = code + @stroke_str = stroke + @strokes = nil + end + attr_reader :strokes + def parse() + return if @strokes + @strokes = CSFParser.parse(@stroke_str) + end + def init + parse if @strokes.nil? + end + end + + class CSFFont + def initialize(file=CSF_DEFAULT_FILE) + @file = CSF_FONT_DIR + file + @glyphs = [] + read_file + @rend = nil + @rend = StrokesRenderer.new + @rend.hsv = [50, 100, 100] + end + def read_file() + open(@file) {|f| + while(line = f.gets) + next if line =~ /^\*/ + c, s = line.split + code = c.hex #JIS‚Ì’l‚ª”’l‚Å‚Í‚¢‚é + @glyphs[code] = CSFGlyph.new(code, s) + end + } + end + def init(code) + glyph = @glyphs[code] + return if glyph == nil + glyph.init() + glyph.parse + @rend.set_strokes(glyph.strokes) + end + def draw(code) #ˆø”‚É‚ÍJIS‚𐔒l‰»‚µ‚½‚à‚Ì‚ª‚Í‚¢‚é + glyph = @glyphs[code] + return if glyph == nil + @rend.draw + end + def print(code) + jis = JISX0208.new + char = jis.get_char(code) + printf("[%s][%04x]\n", char.nil? ? "nil" : char.map_sjis, code) + end + def ucs_to_jis(ucs) + char = Character.get(ucs) + j = char.japanese_jisx0208 + return j + end + end + +end diff --git a/chise/db.rb b/chise/db.rb new file mode 100755 index 0000000..2d82242 --- /dev/null +++ b/chise/db.rb @@ -0,0 +1,434 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +require "singleton" +require "bdb" +require "chise/config" +require "chise/util" +require "chise/rbchise" + +module CHISE + + class DBS # collection of DBs. not yet + end + + class ADB < BDB::Hash # A DataBase. + def initialize(*args) + super + @filename = args[0] + @modified = false + at_exit { +# p ["at_exit", @filename, @modified] + if @modified +# p ["close", @filename, @modified] + self.close #これがないと、うまくデータベースがセーブされないのです。 + end + } + end + + def self.open_create(filename) + ADB.open(filename, nil, BDB::CREATE | BDB::EXCL) #上書きはしない + end + + def mykey(key) + if key.is_a?(String) + if key.char_length == 1 + return "?"+key #Stringだったら引く前に?を足す + end + end + #key = key.to_s if key.is_a?(Numeric) #NumberだったらStringにする。 + #ここで && key ! =~ /^\?/ をいれると、?自身を検索できなくなってしまう。 + return key + end + + def myvalue(v) + return v if v == nil + return v.to_i if v =~ /^\d+$/ #数字だったらここで変換しておく + return v.sub(/^\?/, "") if v =~ /^\?/ #冒頭の?は取り除く + return $1 if v =~ /^"(.+)"$/ #最初と最後に"がついていたら、取り除く + #p ["get", v, t, key, db] + #return parse_sexp(v) if v =~ /^\(.+\)$/ #最初と最後が()の時は、S式にparseする + return v #それ以外って何? + end + + def myget(key) #keyキーを引いて返す + key = mykey(key) + v = get(key) #存在しなかったらnilを返すことになる + return myvalue(v) + end + + def myput(key, v) #keyにvをいれる + key = mykey(key) + put(key, v) #putする + @modified = true + end + end + + class DB # abstract class for DataBase + # translate file name for deal with Windows file system. + def self.unix_to_win(unix_path) + win = unix_path.gsub(//, ")") + win = win.gsub(/\*/, "+") + win = win.gsub(/\?/, "!") + win + end + + def self.win_to_unix(win_path) + unix = win_path.gsub(/\)/, ">") + unix = unix.gsub(/\(/, "<") + unix = unix.gsub(/\!/, "?") + unix = unix.gsub(/\+/, "*") + unix + end + + def get_filename(t) + return @pre + DB.unix_to_win(t) + @post if CHISE.windows? + return @pre + t + @post + end + + def get_dirname(t) File.dirname(get_filename(t)) end + + def open_dbs() + @dbs = Hash.new + keys = find_keys() + keys.each {|key| open_db(key) } + end + + def find_keys() + files = [] + Dir.glob(@glob){|f| + next if ! File.file?(f) + next if f =~ /.txt$/ + files << f + } + keys = [] + files.each {|f| + t = DB.win_to_unix(f) + t.sub!(%r|^#{@pre}|, "") + t.sub!(%r|#{@post}$|, "") if @post != "" + keys << t + } + return keys + end + + def close_db(t) + db = get(t) + return nil if db.nil? + db.close + @dbs.delete(t) + end + + def open_db(t) + return nil if get(t) #すでにopenしていたら再openはしない。 + begin + bdb = ADB.open(get_filename(t), nil, 0) + @dbs[t] = bdb if bdb != nil + rescue + p ["open error", get_filename(t)]; return nil + end + true + end + + def make_db(t, h=nil) #tという名前でhという中身のデータベースを作る + return nil if get(t) #すでにある場合はreturn + Dir.mkdir(get_dirname(t)) unless FileTest.exist?(get_dirname(t)) + db = nil + begin + db = ADB.open_create(get_filename(t)) #上書きはしない + if h != nil + h.each {|k, v| + k = "?"+k if k.is_a?(String) + db[k] = v + } + end + db.close + rescue + p ["make error", get_filename(t)]; return nil + end + true + end + + def make_db_no_question_mark(t, h=nil) #tという名前でhという中身のデータベースを作る + return nil if get(t) #すでにある場合はreturn + Dir.mkdir(get_dirname(t)) unless FileTest.exist?(get_dirname(t)) + db = nil + begin + db = ADB.open_create(get_filename(t)) #上書きはしない + if h != nil + h.each {|k, v| + # k = "?"+k if k.is_a?(String) + db[k] = v + } + end + db.close + rescue + p ["make error", get_filename(t)]; return nil + end + true + end + + def remove_db(t) #tという名前のデータベースを消去する + db = get(t) + if db + db.close + @dbs.delete(t) + end + begin + File.unlink(get_filename(t)) if FileTest.file?(get_filename(t)) + rescue + p ["unlink error", get_filename(t)]; return nil + end + dn = get_dirname(t) + Dir.rmdir(dn) if FileTest.directory?(dn) && Dir.entries(dn).length <= 2 #空directoryだったら消す + true + end + + def to_num(s) + return s.to_i if s =~ /^\d+$/ + s + end + + def dump_db(t) + db = get(t) + return nil unless db + file = get_filename(t) + open("#{file}.txt", "w"){|out| + # out.binmode.sync = true + ar = db.to_a + ar.map! {|k, v| [to_num(k), to_num(v)] } + ar.sort.each {|k, v| + out.printf("%s\t%s\n", k, v) + } + } + true + end + + def each_db() @dbs.to_a.sort.each {|t, db| yield(t, db) } end + def dump_all() each_db {|t, db| dump_db(t) } end + def close_all() each_db {|t, db| db.close } end + def keys() @dbs.keys end + + def each(t) + return unless block_given? + db = @dbs[t] + return nil unless db + db.each {|k, v| + k = to_num(k) + v = to_num(v) + k.sub!(/^\?/, "") if k =~ /^\?/ #冒頭の?は取り除く + vv = get(t, k) #p ["each", t, k, v, vv] + yield(k, vv) + } + end + + def each_sort(t) + return unless block_given? + db = @dbs[t] + return nil unless db + ar = db.to_a + ar.map! {|k, v| [to_num(k), to_num(v)] } + ar.sort.each {|k, v| + k.sub!(/^\?/, "") if k =~ /^\?/ #冒頭の?は取り除く + vv = get(t, k) #p ["each", t, k, v, vv] + yield(k, vv) + } + end + + def get(t, key=nil) #tというデータベースのkeyキーを引いて返す + db = @dbs[t] + return db if key.nil? + return nil unless db + db.myget(key) + end + + def put(t, key, v) #tというデータベースのkeyにvをいれる + db = @dbs[t] + if db == nil + db = make_db(t) + db = open_db(t) + db = @dbs[t] + end + db.myput(key, v) #putする + end + end + + class CharDB < DB # An Attribute DataBase. Key is in UTF8-MCS. + include Singleton + + def initialize() + super + dir = Config.instance.db_dir + @glob, @pre, @post = "#{dir}/system-char-id/*", "#{dir}/system-char-id/", "" + open_dbs() + end + + def get_all(u8) #全データベースのu8キーを引いてHashにまとめて返す + atrs = Hash.new + @dbs.each {|t, db| + v = get(t, u8) + atrs[t] = v if v != nil + } + atrs + end + end + + class CodesysDB < DB # A CodeSystem DataBase. + include Singleton + + def initialize() + super + dir = Config.instance.db_dir + @glob, @pre, @post = "#{dir}/*/system-char-id", "#{dir}/", "/system-char-id" + open_dbs() + end + + #def keys() @dbs.keys.sort end #どんなCodesysの情報を持っているかの一覧 + def keys() @dbs.keys end #どんなCodesysの情報を持っているかの一覧 + + def get_codesys(t) + db = get(t) + return nil unless db + return Codesys.new(t) + end + end + + class Codesys < DB + def initialize(name) + #super + @name = name + @dbs = CodesysDB.instance + end + + def keys() #どんなコードポイントの情報を持っているかの一覧 + ks = @dbs.get(@name).keys +# if @name =~ /jisx0208/ #特別処理 +# n = @dbs.get("=jis-x0208").keys +# # p ["keys", @name, ks, n] +# ks += n +# end + ks.map! {|k| to_num(k) } + ks + end + + def get(key) + v = @dbs.get(@name, key) + return v if v +# if @name =~ /jisx0208/ #jisx0208が含まれている場合だけ特別処理する +# return @dbs.get("=jis-x0208", key) +# end + return nil + end + + def each() + return unless block_given? + db = @dbs.get(@name) + return nil unless db + db.each {|k, v| + k = to_num(k) + v = to_num(v) + k.sub!(/^\?/, "") if k =~ /^\?/ #冒頭の?は取り除く + vv = @dbs.get(@name, k) #p ["each", t, k, v, vv] + yield(k, vv) + } + end + + def each_sort() + return unless block_given? + db = @dbs.get(@name) + return nil unless db + ar = db.to_a + ar.map! {|k, v| [to_num(k), to_num(v)] } + ar.sort.each {|k, v| + k.sub!(/^\?/, "") if k =~ /^\?/ #冒頭の?は取り除く + vv = @dbs.get(@name, k) #p ["each", t, k, v, vv] + yield(k, vv) + } + end + end + + class JISX0208 + def initialize + db = CodesysDB.instance + @common = db.get_codesys("=jis-x0208") + @newest = db.get_codesys("japanese-jisx0208-1990") + end + def get_char(code) + char = @common.get(code) + return char unless char.nil? + char = @newest.get(code) + return char unless char.nil? + return nil + end + end + + class DBS_Management # DataBase file management + OBSOLETE_ATTRIBUTES = " +cns-radical +cns-radical? +kangxi-radical +daikanwa-radical +unicode-radical + +cns-strokes +kangxi-strokes +daikanwa-strokes +shinjigen-1-radical +gb-original-radical +japanese-strokes +jis-strokes-a +jis-strokes-b +jisx0208-strokes +jis-x0213-strokes +jisx0213-strokes +unicode-strokes + +totalstrokes +cns-total-strokes +jis-total-strokes-b + +non-morohashi + +=>ucs* +#=>mojikyo +#=mojikyo +->identical + +ancient-ideograph-of +ancient-char-of-shinjigen-1 +original-ideograph-of +original-char-of-shinjigen-1 +simplified-ideograph-of +vulgar-ideograph-of +vulgar-char-of-shinjigen-1 +ideograph= +ideographic-variants +variant-of-shinjigen-1 + +iso-10646-comment +".split + + def initialize + dir = Config.instance.db_dir + @odir = dir+"/system-char-id/obsolete" #直打ちしている。 + end + + def move_obsolete_files # move obsolete BDB files to obsolete directory + db = CharDB.instance + db.close_all + Dir.mkdir(@odir) unless FileTest.directory? @odir + OBSOLETE_ATTRIBUTES.each {|attr| + next if attr =~ /^#/ + filename = db.get_filename(attr) + move_to_obsolete(filename) + move_to_obsolete(filename+".txt") + } + end + + def move_to_obsolete(file) + cmd = "mv \"#{file}\" #{@odir}" + # p cmd + system cmd + end + + end +end diff --git a/chise/defkanji.rb b/chise/defkanji.rb new file mode 100755 index 0000000..e2ea84c --- /dev/null +++ b/chise/defkanji.rb @@ -0,0 +1,168 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +require "singleton" + +module CHISE + class IDS_Definition + include Singleton + + #●ids-element: stop-character。これ以上分割しない、基本要素文字。 + #その文字自身を値として持つ。それ以外の値を持つ場合は、それを代りの値として使うことを意味する。 + #つまり、101字の基本漢字は最小構成要素なので、それ以上分解しませんよ、ということ。 + IDS_ELEMENT = "六百回交向歩考出書多少高可" + BASIC_KANJI = "人子女母父王口耳手足力目首毛心犬牛鳥貝角羽虫馬魚羊肉皮米竹木麦豆山川雨風水土石金田穴日月火音糸刀舟門戸衣矢弓車皿一二三四五六七八九十百千万寸尺上中下本玉立回食行止交向歩考入示走生出来書言大小白青多少高長" + + AWASE_KANJI = "鳴名加品古知問間聞取兄見切分粉貧林森校東明住位好岩砂里男畑習休短空坂島倉美孝赤看光初努協解新歌語話張強忘悲答晴現正字安守灰秋秒困国医包同合舌居右左受友反道返迷花菜集机主太氷州点店庫仕帳幼防引配早直班筆重番北化比死夏後進酒福私家世内谷半原前寺思電雲気布旅衆泣" + + #●ids-meaning: 意味的に考えるところのIDS。形としてはつながらないが、意味としてはこう分割できるというものを表す。 + IDS_MEANING = " +見 ⿱目儿 +東 ⿻木日 +里 ⿱田土 +赤 ⿱大火 +舌 ⿱千口 +太 ⿻大丶 +#州 ⿻砂川 +#重 ⿱?土 +世 ⿺十⿰十十 +内 ⿵冂入 +è°· ⿱⿱八&CDP-8B5D;口 +半 ⿱八牛 +孝 ⿱老子 +島 ⿹鳥山 +美 ⿱羊大 +看 ⿸手目 +倉 ⿸食口 +å¼· ⿰弓⿱厶虫 +乑 ⿲人人人 +果 ⿱田木 +比 ⿰匕匕 +鼓 â¿°壴支 +#承 +é¼» ⿳自田廾 +風 ⿵几⿱丿虫 +#夜 +&CDP-8C47; ⿰亡月 +黙 ⿺黒犬 +".split("\n") + + #●ids-represent: これを持つもつ字は、それ以外の字を意味するということを意味する。 + #例えば人偏は、人を意味する。これは再帰的に摘要する。 + IDS_REPRESENT = " +亻 人 +&CDP-8B5D; 人 +衤 è¡£ +𠂆 厂 +𠂇 厂 +⺡ æ°´ +氵 æ°´ +&CDP-8CCC; 羊 +𦍌 羊 +&CDP-8B52; 老 +&CDP-8CBB; é³¥ +&CDP-8BEA; 刀 +刂 刀 +朩 木 +⺩ 王 +土 土 +𠤎 匕 +&CDP-8BC4; 泉 +⻗ 雨 +&CDP-8BE8; 申 +糹 糸 +⻊ 足 +&J90-5469; 夂 +&CDP-8BD5; 西 +&CDP-8BA9; 牛 +⺮ 竹 +黑 黒 +&CDP-8971; 冫 +".split("\n") + + #●ids-link: 横列びの関係があるもの + IDS_LINK = " +艮 &CDP-8B7C; +&CDP-8B48; 四 +寸 才 +㐅 ç±³ +#この部品はいろいろあるのだけど…。 +".split("\n") + + def check + #show_ids(IDS_ELEMENT) #ar = BASIC_KANJI+IDS_ELEMENT + #show_ids(IDS_MEANING.join()) + #show_ids(KanjiList.instance.joyo) + #show_ids(KanjiList.instance.awase(0)) + #show_ids(open("0208-shell.txt").read) + #p "&U+2ff2;".de_er + #p "休".decompose.to_er + eval_print_loop + end + def show_ids(str) + str.de_er.to_a.sort.uniq.each {|ch| + char = ch.char + ids = char.decompose_all + print "#{char} #{ids.map_ucs}\n" #できるだけucsに正規化するべし。 + } + end + def eval_print_loop + while true + line = STDIN.gets + line.chomp! + exit if line.nil? || line =~ /end/ + print "#{line.decompose} #{line.decompose.to_er}\n" + print "#{line.decompose_all} #{line.decompose_all.to_er}\n" + end + end + + def define_all + define_ids_element + define_ids_meaning + define_ids_represent + define_ids_link + end + def define_ids_element + IDS_ELEMENT.each_char {|ch| + char = ch.char + char.ids_element = ch + } + end + def define_ids_meaning + IDS_MEANING.each {|line| + next if line.nil? || line == "" || line =~ /^#/ + ch, ids = line.split + char = ch.de_er.char + char.ids_meaning = ids.de_er + print "#{ch} #{ids}\n" + } + end + def define_ids_represent + IDS_REPRESENT.each {|line| + next if line.nil? || line == "" || line =~ /^#/ + ch, rep = line.split + char = ch.de_er.char + char.ids_represent = rep.de_er + print "#{char.map_ucs} #{rep}\n" + } + end + def define_ids_link + IDS_LINK.each {|line| + next if line.nil? || line == "" || line =~ /^#/ + ch, rep = line.split + char = ch.de_er.char + char.ids_link = rep.de_er + print "#{char.map_ucs} #{rep}\n" + } + end + + end +end + +if $0 == __FILE__ + $LOAD_PATH << "../../src" + require "chise" + include CHISE + ide = CHISE::IDS_Definition.instance +# ide.go + ide.define_all +end diff --git a/chise/graphviz.rb b/chise/graphviz.rb new file mode 100755 index 0000000..415c1c9 --- /dev/null +++ b/chise/graphviz.rb @@ -0,0 +1,138 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +class DotElement + def initialize() + @attr = Hash.new + end + def method_missing(mid, *args) #ŽQl:ostruct.rb + mname = mid.id2name + return @attr[mname] if args.length == 0 + @attr[mname.chop] = args[0] if mname =~ /=$/ #‘ã“ü + end + def to_s + str = " #{mainstr()} " + str << "[" + @attr.map {|k, v| %Q|#{k}="#{v}"| }.join(" ") + "]" if 0 < @attr.length + str << ";\n" + str + end +end + +class DotPage < DotElement + def initialize() + super() + @nodes = [] + @edges = [] + end + def mainstr() @name.to_s; end + def add_node(node) + return unless node.is_a? DotNode + @nodes << node + end + def add_edge(edge) + return unless edge.is_a? DotEdge + @edges << edge + end + def to_s() + str = "digraph G {\n" +# str << %Q| size="6, 6"\n| + str << @attr.map {|k, v| %Q| #{k}="#{v}"\n| }.join("") + @nodes.each {|node| + str << node.to_s + } + @edges.each {|edge| + str << edge.to_s + } + str << "}\n" + str + end +end + +class DotNode < DotElement + def initialize(name) + @name = name + super() + end + def mainstr() @name.to_s; end +end + +class DotEdge < DotElement + def initialize(from, to) + @from, @to = from, to + super() + end + def mainstr() "#{@from.to_s} -> #{@to.to_s}"; end +end + +class Graphviz # abstract class + DOT = 0 + TWOPI = 1 + NEATO = 2 + + def initialize(type=DOT) + @type = type + @codepage = nil + @target = nil + @in = nil + @out = nil + end + attr_accessor :type, :codepage, :target, :in, :out + + def post_process() #“¯‚¶ƒtƒ@ƒCƒ‹‚ɏ㏑‚«‚É‚·‚éB + str = open(@out).read + str.gsub!(/&#x/, "&#x") + #str.gsub!(/font-family:MS Gothic;/, "") + #str.gsub!(/font-family:MS Gothic;/, "font-family:MS-Mincho;") + str.gsub!(/font-family:Times New Roman;/, "") + open(@out, "w"){|out| out.print str} + end +end + +class GraphvizCLI < Graphviz +# COMMAND_DIR = "c:\Program Files\ATT\Graphviz\bin" + COMMAND_DIR = "" #PATH‚ª‚Æ‚¨‚Á‚Ä‚È‚¢‚Æ‚¾‚ß‚Ý‚½‚¢B + NAMES = "dot twopi neato".split + def generate(debug=false) + ar = [] + #ar << COMMAND_DIR + NAMES[@type] + ".exe" + ar << COMMAND_DIR + NAMES[@type] + #ar << "-V" + #ar << "-Gpack" + ar << "-T#{@target}" + ar << "-o #{@out}" + ar << @in + cmd = ar.join(" ") + print cmd,"\n" + system cmd + if @target =~ /svg/i + post_process + end + end +end + +class GraphvizOLE < Graphviz + #NAMES = "DOT TWOPI NEATO".split + def generate(debug=false) + require "win32ole" + names = "DOT TWOPI NEATO".split + @ole = WIN32OLE.new("Wingraphviz." + names[@type]) + @ole.codepage = cp if @codepage #neato.codepage = 65001 #codepage: 65001 Unicode UTF-8 + @instr = open(@in).read + if @target =~ /svg/i + result = @ole.toSVG(@instr) + open(@out, "w"){|out| out.print result} + post_process + elsif @target =~ /svg/i + result = @ole.toPNG(@dot) + result.save(@out) + end + end + def nu_to_png(filename=nil) + return "" unless @dot.is_a?(String) + png = @ole.toPNG(@dot) + png.save(filename) unless filename.nil? + return png + end + #p neato.validate(str) + #ps = neato.toPS(str) + #open("test.ps", "w"){|out| out.print ps } +end diff --git a/chise/iconv.rb b/chise/iconv.rb new file mode 100755 index 0000000..e6f97cb --- /dev/null +++ b/chise/iconv.rb @@ -0,0 +1,86 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +require "iconv" + +class Iconv + def self.iconv_to_utf8(from, str) + iconv = Iconv.new(from, "UTF-8") + out = "" + begin + out << iconv.iconv(str) + rescue Iconv::IllegalSequence => e + out << e.success + ch, str = e.failed.split(//u, 2) + out << if respond_to?(:unknown_unicode_handler) + u = ch.unpack("U").first + unknown_unicode_handler(u) + else + "?" + end + retry + end + out + end + + def self.unknown_unicode_handler (u) + sprintf("&#x%04x;", u) + end + + def self.iconv_to_from(to, from, str) + iconv = Iconv.new(to, from) + out = "" + begin + out << iconv.iconv(str) + rescue Iconv::IllegalSequence => e + out << e.success + ch, str = e.failed.split(//u, 2) + out << "?" + retry + rescue Iconv::InvalidCharacter => e + out << e.success + ch, str = e.failed.split(//u, 2) + out << "?" + retry + end + out + end +end + +class String + def euctou8() Iconv.iconv_to_from("UTF-8", "EUC-JP", self) end + def u8toeuc() Iconv.iconv_to_from("EUC-JP", "UTF-8", self) end + def sjistou8() Iconv.iconv_to_from("UTF-8", "Shift_JIS", self) end + def u8tosjis() Iconv.iconv_to_from("Shift_JIS", "UTF-8", self) end + def jistou8() Iconv.iconv_to_from("UTF-8", "ISO-2022-JP", self) end + + def u8tojis() + i = Iconv.new("ISO-2022-JP", "UTF-8") + i.iconv(self)+i.close + end + + def u8tou16 + Iconv.iconv_to_from("UTF-16", "UTF-8", self).sub(/\A\376\377/, "") + end + + def u8tou32 + Iconv.iconv_to_from("UTF-32", "UTF-8", self).sub(/\A\0\0\376\377/, "") + end + + def u32tou8 + Iconv.iconv_to_from("UTF-8", "UTF-32", self) + end + + def u32tou16 + Iconv.iconv_to_from("UTF-16", "UTF-32", self).sub(/\A\376\377/, "") + end + + def u16toeuc() Iconv.iconv_to_from("EUC-JP", "UTF-16", self) end + def u16tosjis() Iconv.iconv_to_from("Shift_JIS", "UTF-16", self) end + + def u32to_i + return 0 if length == 0 + s = self +# return (s[3] << 24 | s[2] << 16 | s[1] << 8 | s[0]) + return (s[0] << 24 | s[1] << 16 | s[2] << 8 | s[3]) + end +end diff --git a/chise/ids.rb b/chise/ids.rb new file mode 100755 index 0000000..46e039c --- /dev/null +++ b/chise/ids.rb @@ -0,0 +1,508 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +$LOAD_PATH << "../../lib" if $0 == __FILE__ +require "chise/db" + +module CHISE +# IDC_LEFT_TO_RIGHT = "â¿°" +# IDC_ABOVE_TO_BELOW = "⿱" +# IDC_LEFT_TO_MIDDLE_AND_RIGHT = "⿲" +# IDC_ABOVE_TO_MIDDLE_AND_BELOW = "⿳" +# IDC_FULL_SURROUND = "â¿´" +# IDC_SURROUND_FROM_ABOVE = "⿵" +# IDC_SURROUND_FROM_BELOW = "⿶" +# IDC_SURROUND_FROM_LEFT = "â¿·" +# IDC_SURROUND_FROM_UPPER_LEFT = "⿸" +# IDC_SURROUND_FROM_UPPER_RIGHT = "⿹" +# IDC_SURROUND_FROM_LOWER_LEFT = "⿺" +# IDC_OVERLAID = "â¿»" + + IDC_LEFT_TO_RIGHT = "\342\277\260" #2FF0 + IDC_ABOVE_TO_BELOW = "\342\277\261" + IDC_LEFT_TO_MIDDLE_AND_RIGHT = "\342\277\262" + IDC_ABOVE_TO_MIDDLE_AND_BELOW = "\342\277\263" + IDC_FULL_SURROUND = "\342\277\264" #2FF4 + IDC_SURROUND_FROM_ABOVE = "\342\277\265" + IDC_SURROUND_FROM_BELOW = "\342\277\266" + IDC_SURROUND_FROM_LEFT = "\342\277\267" + IDC_SURROUND_FROM_UPPER_LEFT = "\342\277\270" + IDC_SURROUND_FROM_UPPER_RIGHT = "\342\277\271" + IDC_SURROUND_FROM_LOWER_LEFT = "\342\277\272" + IDC_OVERLAID = "\342\277\273" + + IDC_LR = IDC_LEFT_TO_RIGHT + IDC_AB = IDC_ABOVE_TO_BELOW + IDC_LM = IDC_LEFT_TO_MIDDLE_AND_RIGHT + IDC_AM = IDC_ABOVE_TO_MIDDLE_AND_BELOW + IDC_FS = IDC_FULL_SURROUND + IDC_FA = IDC_SURROUND_FROM_ABOVE + IDC_FB = IDC_SURROUND_FROM_BELOW + IDC_FL = IDC_SURROUND_FROM_LEFT + IDC_UL = IDC_SURROUND_FROM_UPPER_LEFT + IDC_UR = IDC_SURROUND_FROM_UPPER_RIGHT + IDC_LL = IDC_SURROUND_FROM_LOWER_LEFT + IDC_OV = IDC_OVERLAID + + IDC_LMR = IDC_LM + IDC_AMB = IDC_AM + IDC_FUL = IDC_UL + IDC_FUR = IDC_UR + IDC_FLL = IDC_LL + IDC_O = IDC_OV + + class IDS_TEXT_DB < DB #====================================================================== + include Singleton + IDS_LIST = " +IDS-UCS-Basic.txt +#IDS-UCS-Compat-Supplement.txt +#IDS-UCS-Compat.txt +IDS-UCS-Ext-A.txt +IDS-UCS-Ext-B-1.txt +IDS-UCS-Ext-B-2.txt +IDS-UCS-Ext-B-3.txt +IDS-UCS-Ext-B-4.txt +IDS-UCS-Ext-B-5.txt +IDS-UCS-Ext-B-6.txt +IDS-JIS-X0208-1990.txt +IDS-Daikanwa-01.txt +IDS-Daikanwa-02.txt +IDS-Daikanwa-03.txt +IDS-Daikanwa-04.txt +IDS-Daikanwa-05.txt +IDS-Daikanwa-06.txt +IDS-Daikanwa-07.txt +IDS-Daikanwa-08.txt +IDS-Daikanwa-09.txt +IDS-Daikanwa-10.txt +IDS-Daikanwa-11.txt +IDS-Daikanwa-12.txt +IDS-Daikanwa-dx.txt +IDS-Daikanwa-ho.txt +IDS-CBETA.txt +".split + def initialize() + super + @ids_list = IDS_LIST + @chars = [] + + @dir = Config.instance.ids_dir + + @glob, @pre, @post = "#{@dir}/db/*", "#{@dir}/db/", "" + dir = File.dirname(@pre) + Dir.mkdir(dir) unless FileTest.exist?(dir) + open_dbs() + end + + def each_file() + return unless block_given? + @ids_list.each {|file| + next if file =~ /^#/ + yield(@dir+file) + } + end + + def each_line(file) + open(file){|f| + while line = f.gets + next if line =~ /^;/ #コメントはとばす + line.chomp! + code, char, ids = line.split + yield(code, char, ids) + end + } + end + def dump_text_all + each_file {|file| + dir = File.dirname(file) + "/../ids-new/" + Dir.mkdir(dir) if ! FileTest.directory?(dir) + newfile = dir + File.basename(file) + p [file, newfile] + open(newfile, "w"){|out| + out.binmode.sync = true + each_line(file){|code, ch, ids| + char = Character.get(ch) + ids = char.decompose + out.print "#{code} #{ch} #{ids}\n" + } + } + } + end + def make_ids_error + each_file {|file| + dir = File.dirname(file) + "/../ids-error" + Dir.mkdir(dir) unless FileTest.exist?(dir) + errfile = dir + "/" + File.basename(file) + # p [file, errfile] + open(errfile, "w"){|out| + out.binmode.sync = true + each_line(file){|code, ch, ids| + char = Character.get(ch) + ids_error = char["ids-error"] + next if ids_error.nil? + out.print "#{code} #{ch} #{ids} #{ids_error}\n" + } + } + } + end + end + + class IDS_DB < DB # BDB化したIDS DBを扱う + include Singleton + def initialize + @dbs = CharDB.instance + end + def make_ids_db + db = IDS_TEXT_DB.instance + db.each_file {|file| + @char_counter = 0 + @same_ids_counter = 0 + @good_ids_counter = 0 + @conflict_ids_counter = 0 + db.each_line(file){|code, ch, ids| + @char_counter += 1 + + ids = "" if ids == nil + next if ids == "" #IDSが定義されていない場合は、さっくりと無視するべしよ。 + + charimg = Character.get(ch) #実体参照である可能性がある + + next if code =~ /'$/ || code =~ /"$/ #大漢和番号のダッシュ付きは無視する + char = Character.get("&"+code+";") #code表記を元に実体参照を作って解釈する + if char.nil? || char.to_s == "" #うまく文字にならなかった + print "char == null #{char.inspect} #{code} #{ch} #{ids}\n" unless code =~ /^M-/ || code =~ /^CB/ + #大漢和、CBETA以外の場合は、エラーメッセージ。 + next + end + if char != charimg #code表記と文字が一致していない? + unless code =~ /^M-/ || code =~ /^MH-/ || code =~ /^CB/ #食い違っていて当然であるので何もしない + print "unknown char #{char.inspect} #{code} #{ch} #{ids}\n" + next #それ以外の場合はエラーメッセージをだして、次へ。 + end + end + #next if !char.has_attribute? #isolated characterはまぎれこませない。 + + ids.de_er! #実体参照を解除する + next if ids == char.to_s #もし文字とまったく一緒なら、意味が無いので情報を持たない + next if ids.char_length == 1 + + idstree = IDS_Tree.new(ids) + c = idstree.check_integrity + c = "contains self" if ids.include?(char.to_s) + if c #ちょっとでもエラーがある場合は、 + char["ids-error"] = c #エラーを記録して、データとしては保持しない + next + end + + if char["ids"].nil? || char["ids"] == "" #元々IDSが無かった場合は、 + char["ids"] = ids #普通に代入すればそれでいいです。 + @good_ids_counter += 1 + else #しかしいままでにすでにIDSが定義されていた場合は? + if char["ids"] == ids #新しいIDSと古いIDSが完全に一致するなら無視しましょう。 + @same_ids_counter += 1 + else #しかしいままでのIDSと新しいIDSが食い違った場合は? + @conflict_ids_counter += 1 + # print "conflict #{char.inspect} #{code} #{ids} #{char["ids"]}\n" + end + end + } + print "#{file} #{@char_counter} #{@same_ids_counter} #{@conflict_ids_counter} #{@good_ids_counter}\n" + CharacterFactory.instance.reset() + } + @dbs.dump_db("ids-error") #テキスト化する + @dbs.dump_db("ids") #テキスト化する + end + def make_ids_reverse + h = Hash.new + @dbs.each("ids") {|k, v| + char = k.char + ids = char.decompose + h[ids] = "" if h[ids].nil? + h[ids] += k #追加する + } + h.each {|k, v| + h[k] = char_sort(v) #文字の順番を、よく使うっぽいものからの順番にする + } + h.delete_if {|k, v| #h[k]が""になる可能性もあるが、それはkeyとして入れないことにする。 + v == "" + } + print "length #{h.length}\n" + cdb = CodesysDB.instance + cdb.make_db_no_question_mark("ids", h) + cdb.open_db("ids") #これが無いと、dump_dbされません。 + cdb.dump_db("ids") + end + def char_sort(composed) + return composed if composed.char_length == 1 + ar = composed.to_a + arorg = ar.dup + ar2 = [] + ar.dup.each {|ch| + char = ch.char + if char.char_id < 0xfffff #Unicodeっぽい? + ar2 << ch + ar.delete(ch) + end + } + if 0 < ar.length + EntityReference.each_codesys{|codesys, er_prefix, keta, numtype| + ar.each {|ch| + char = ch.char + v = char[codesys] + # p [codesys, v] if v + if v #EntityReferenceの順番に準拠する。 + ar2 << ch + ar.delete(ch) + end + } + } + end + if 0 < ar.length + # p ["yokuwakaran character", ar, ar[0].inspect_all, arorg] + EntityReference.each_codesys{|codesys, er_prefix, keta, numtype| + ar.dup.each {|ch| + char = ch.char + v = char[codesys] + # p [codesys, v] if v + } + } + end + return ar2.join("") + end + def dump_ids_duplicated + open("ids-duplicated.txt", "w"){|out| + #out.binmode + CodesysDB.instance.each("ids") {|k, v| + if v.nil? + out.print "nil #{k} #{v}\n" + next + end + n = v.char_length + next if n == 1 + out.print "#{n} #{k} #{v}" + v.each_char {|ch| + char = ch.char + out.print " #{char.inspect}" + } + out.print "\n" + } + } + end + def make_ids_aggregated + @dbs.each("ids") {|k, v| + char = k.char + ids = char.decompose + ag = ids.aggregate + char["ids-aggregated"] = ag + } + @dbs.dump_db("ids-aggregated") + end + def dump_ids_aggregated + open("ids-aggregated.txt", "w"){|out| + #out.binmode + @dbs.each("ids") {|k, v| + char = k.char + ids = char["ids"] + ag = char["ids-aggregated"] + out.print "#{char.to_s} #{ag} #{ids}\n" if ids != ag + } + } + end + def make_ids_parts + @dbs.each("ids") {|k, v| + char = k.char + pids = char.to_s + ar = [] + counter = 0 + loop { + ids = pids.decompose + break if ids == pids #これ以上分割できないようだったら終了〜。 + ar += ids.to_a + counter += 1 + p [char.to_s, pids, ids, ar] if 10 < counter #これは何かおかしいぞと + pids = ids + } + ar.sort! + ar.uniq! + #やっぱりIDS文字も加えることにする. by eto 2003-02-05 + # ar.delete_if {|ch| + # ch.char.is_ids? #IDS文字はまぎれこませない。 + # } + str = ar.join("") + char["ids-parts"] = str + } + @dbs.dump_db("ids-parts") + end + def make_ids_contained + h = Hash.new + @dbs.each("ids-parts") {|k, v| + char = k.char + parts = char.ids_parts + parts.each_char {|ch| + # part = ch.char + h[ch] = [] if h[ch].nil? + h[ch] << k + # h[ch] += k + # part["ids-contained"] = "" if part["ids-contained"].nil? + # part["ids-contained"] += k + } + } + h.each {|k, v| + char = k.char + v.sort! + char["ids-contained"] = v.join("") + + } + @dbs.dump_db("ids-contained") + end + def make_ids_decomposed + @dbs.each("ids") {|k, v| + char = k.char + de= char.decompose_all + char["ids-decomposed"] = de + } + @dbs.dump_db("ids-decomposed") + end + end + + class Node < Array #==================================木構造の中の一つの枝 + def initialize(nodeleaf=nil, nodenum=nil) + super() + @nodeleaf = nodeleaf + @nodenum = nodenum + if @nodeleaf + original_add(@nodeleaf) + end + end + attr_reader :nodenum + alias original_add << + private :original_add + def <<(obj) + original_add(obj) + @nodenum -= 1 if @nodenum + end + def nodes + ar = [] + ar << self.to_s + self.each {|n| + ar += n.nodes if n.is_a? Node + } + return ar + end + end + + class Tree #==================================================木構造を扱う + def initialize() + @root = Node.new() + @stack = [@root] + @leafnum = 0 + @depth = 1 #stackの深さが最大になったところの値、木構造が無いときは1となる + end + def depth() @depth - 1 end + def add_node(nodeleaf=nil, nodenum=nil) #枝を追加 + new_node = Node.new(nodeleaf, nodenum) + @stack.last << new_node + @stack << new_node + if @depth < @stack.length + @depth = @stack.length + end + self + end + def end_node() #この枝は終り + @stack.pop + self + end + def add_leaf(a) #葉を追加 + @stack.last << a + end_check() + self + end + def end_check() + n = @stack.last.nodenum + if n && n == 0 + end_node() + end_check() #再帰 + end + end + def check_integrity + n = @stack.last.nodenum + return nil if @root.length == 0 #no tree is good tree + return "unmatch leaves" if n && n != 0 + return "extra nodes" if @root.first.is_a?(Node) && @root.length != 1 + return "extra leaves" if @root.length != 1 + return nil + end + def nodes + r = @root.nodes + r.shift + r + end + def sub_nodes + r = nodes + r.shift + r + end + def to_s() @root.to_s end + def inspect() @root.inspect end + end + + class IDS_Tree < Tree + def initialize(str) + @str = str + super() + parse() + end + def parse() + @str.each_char {|ch| + char = Character.new(ch) + if is_ids?(char) + add_node(char, ids_operator_argc(char)) + else + add_leaf(char) + end + } + end + def is_ids?(obj) + return true if "+*".include?(obj.to_s) #テスト用ですかね + return true if obj.is_ids? + return false + end + def ids_operator_argc(obj) + return obj.ids_operator_argc if 0 < obj.ids_operator_argc + return 2 #テスト用ってことで + end + def check_integrity + r = super + return r if r #不完全がすでにわかっているならreturn + return "contains ques" if @str =~ /\?/ #?が含まれている? + return nil + end + end + + class IDS #=========================================IDSそのものを扱うclass + def initialize(str) #IDS文字列をうけとる。 + @str = str + end + def parse + end + def parse_x #柔軟型のParse. IDSキャラクターが前にきてなくてもよい。などなど。 + end + end + + class Counter + #使い方 + #counter = Counter.new(50) { exit } + #counter.count + def initialize(max) + @max = max + @count = 0 + @proc = proc + end + def count + @count += 1 + if @max <= @count + @proc.call + end + end + end + +end diff --git a/chise/kage.rb b/chise/kage.rb new file mode 100755 index 0000000..46c0cbc --- /dev/null +++ b/chise/kage.rb @@ -0,0 +1,167 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +require "sgl" +require "singleton" +$LOAD_PATH << "../../lib" if $0 == __FILE__ +require "chise/kageserver" + +#こんな感じのフォーマットになっている。 +# +# +# +# +# +# + +module StrokeFont + class QuadraticPath #========================動的な分割に対応できるようにする + DEFAULT_DIVIDE = 4 + def initialize(p1, p2, p3) + @p1, @p2, @p3 = p1, p2, p3 + @num = DEFAULT_DIVIDE + end + def divide_adaptic #適応的分割数をする。 + end + def divide() + divide_num(@num) + end + def divide_num(num) #分割数を指定できる + #p [num] + x1, y1 = @p1 + x2, y2 = @p2 + x3, y3 = @p3 + #2次のbezier曲線の計算式、 P(t) = (1-t)^2*P1 + 2*t*(1-t)*P2 + t^2*P3 + curve = [] + (num+1).times {|i| #ここで最後の点を含めないのがポイント。これによって次の曲線との重複が無いようにしている。 + t = (i.to_f)/num + x = (1-t)*(1-t)*x1 + 2*t*(1-t)*x2 + t*t*x3 + y = (1-t)*(1-t)*y1 + 2*t*(1-t)*y2 + t*t*y3 + curve << [x,y] + } + #p curve + return curve + end + end + + class PathResolver + #M 50,540 950,255 + #M 330,50 330,900 M 330,900 Q 330,950 380,950 M 380,950 840,950 M 840,950 Q 890,950 915,850 + def initialize + reset + end + def reset + @lines = [] + @px, @py = -1, -1 + end + def parse(str) + reset + cmd = [] + str.split.each {|par| + if par.length == 1 #コマンドである + exec_cmd(cmd) if 0 < cmd.length #前のコマンドが残っていたら実行 + cmd = [par] #cmdを新規生成 + elsif par =~ /[,0-9]+/ #座標値である + sx, sy = par.split(/,/) + x, y = sx.to_i, sy.to_i + cmd << [x, y] #cmdへのargを追加 + end + } + exec_cmd(cmd) if 0 < cmd.length #前のコマンドが残っていたら実行 + @lines + end + def exec_cmd(cmd) + c = cmd.shift #先頭をとる + case c + when "M" + cmd.each {|x, y| moveto(x, y) } + when "Q" + quadratic([@px, @py], cmd.shift, cmd.shift) + end + end + def moveto(x, y) + @lines << [@px, @py, x, y] if @px != -1 + @px, @py = x, y + end + def quadratic(p1, p2, p3) + #p ["quadratic", p1, p2, p3] + qp = QuadraticPath.new(p1, p2, p3) + curve = qp.divide + curve.each {|x, y| + moveto(x, y) + } + end + end + + class KageParser + def self.parse(svg) + @strokes = Strokes.new + pr = PathResolver.new + lines = svg.split(/\n/) + lines.each {|line| + next unless line =~ /^/) + end + def read_list + h = {} + open("kage-list.txt"){|f| + while line = f.gets + if line =~ /u([0-9a-f]+)\.skeleton/ + code = $1 + num = code.hex + error = false + error = true if line =~ /error/ + h[num] = error + end + end + } + return h + end + def get_all + #error_h = read_list + STDOUT.binmode + @kn = KanjiNetwork.new + @kl = KanjiList.instance + #list = @kl.awase(0) + #list = @kl.awase(1) + #list = @kl.joyo + #list = open("../../jis.txt").read + list = @kl.jisx0208() + @kn.make_network(list) + nodes, edges = @kn.nodes_and_edges + ar = [] + nodes.each {|ch| + char = ch.char + num = char.to_i + next if 0xffff < num + next if num == 0x3561 + next if num == 0x4fdb + next if num == 0x58d1 + next if num == 0x891d + next if num == 0x8902 + next if num < 0x8900 + ar << num + } + get_ar(ar) + end + def get_ar(ar) + ar.each {|num| #intの数列 + char = Character.get(num) + ch = char.to_s + er = char.to_er + #TYPES.each_with_index {|type, index| + #(0..2).each {|index| + (0..0).each {|index| + result = get(num, index) #cacheに保存するべしと。 + next if result + err = "error" + print "#{er} #{ch} #{err}\n" + } + } + end + def test_kanji + char = "&CDP-8BA5;".de_er + #p char.inspect_all + #str = (char.to_s+"真") + str = (char.to_s+"直") + p str.find + end +end + +if $0 == __FILE__ + ks = KageServer.instance + #print ks.get(0x4e03) + ks.get_all +end diff --git a/chise/kanjilist.rb b/chise/kanjilist.rb new file mode 100755 index 0000000..362e564 --- /dev/null +++ b/chise/kanjilist.rb @@ -0,0 +1,84 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +require "singleton" + +module CHISE + class KanjiList + include Singleton + + BASIC_KANJI = "人子女母父王口耳手足力目首毛心犬牛鳥貝角羽虫馬魚羊肉皮米竹木麦豆山川雨風水土石金田穴日月火音糸刀舟門戸衣矢弓車皿一二三四五六七八九十百千万寸尺上中下本玉立回食行止交向歩考入示走生出来書言大小白青多少高長" + + AWASE_KANJI_LIST = "鳴名加品古知問間聞取兄見切分粉貧林森校東明住位好岩砂里男畑習休短空坂島倉美孝赤看光初努協解新歌語話張強忘悲答晴現正字安守灰秋秒困国医包同合舌居右左受友反道返迷花菜集机主太氷州点店庫仕帳幼防引配早直班筆重番北化比死夏後進酒福私家世内谷半原前寺思電雲気布旅衆泣" + + TEST_KANJI_LIST = "鳴名 +鳴名加知医短男畑秋居古灰 +加知医短男畑秋居古灰 +品問間聞取兄見切分粉貧林森校東明住位好岩砂里習休空坂島 +倉美孝赤看光初努協解新歌語話張強忘悲答晴現正字安守秒困国包同合舌 +右左受友反道返迷花菜集机主太氷州点店庫仕帳幼防引配早直班筆重番北化比 +死夏後進酒福私家世内谷半原前寺思電雲気布旅衆泣".split + + KYOIKU_KANJI_LIST = "愛悪圧安暗案以位囲委意易異移胃衣遺医域育一印員因引飲院右宇羽雨運雲営映栄永泳英衛液益駅円園延沿演遠塩央往応横王黄億屋恩温音下化仮何価加可夏家科果歌河火花荷課貨過我画芽賀会解回快改械海灰界絵開階貝外害街各拡格確覚角閣革学楽額割活株寒刊巻完官干幹感慣漢看管簡観間関館丸岸眼岩顔願危喜器基寄希揮机旗期機帰気汽季紀規記貴起技疑義議客逆久休吸宮弓急救求泣球究級給旧牛去居挙許漁魚京供競共協境強教橋胸興郷鏡業局曲極玉勤均禁筋近金銀九句区苦具空君訓群軍郡係兄型形径敬景系経計警軽芸劇激欠決潔穴結血月件健券建憲検権犬研絹県見険験元原厳減源現言限個古呼固己庫戸故湖五午後語誤護交候光公功効厚口向后好孝工幸広康校構港皇紅耕考航行講鉱鋼降高号合刻告国穀黒骨今困根混左差査砂座再最妻才採済災祭細菜裁際在材罪財坂作昨策桜冊刷察札殺雑皿三参山散産算蚕賛酸残仕使司史四士始姉姿子市師志思指支枝止死氏私糸紙至視詞詩試誌資飼歯事似児字寺持時次治磁示耳自辞式識七失室質実舎写射捨社者謝車借尺若弱主取守手種酒首受授樹収周宗就州修拾秋終習衆週集住十従縦重宿祝縮熟出術述春準純順処初所暑署書諸助女序除傷勝商唱将小少承招昭松消焼照省章笑証象賞障 上乗城場常情条状蒸植織職色食信心新森深申真神臣親身進針人仁図垂推水数寸世制勢性成政整星晴正清生盛精聖声製西誠青静税席昔石積績責赤切接折設節説雪絶舌先千宣専川戦泉浅洗染線船選銭前善然全祖素組創倉奏層想操早巣争相窓総草装走送像増臓蔵造側則息束測足速属族続卒存孫尊損村他多太打体対帯待態貸退隊代台大第題宅達谷単担探炭短誕団断暖段男談値知地池置築竹茶着中仲宙忠昼柱注虫著貯丁兆帳庁張朝潮町腸調長頂鳥直賃追痛通低停定底庭弟提程敵的笛適鉄典天展店転点伝田電徒登都努度土党冬刀島投東湯灯当等答糖統討豆頭働動同堂導童道銅得徳特毒独読届内南難二肉日乳入任認熱年念燃納能脳農波派破馬俳拝敗背肺配倍梅買売博白麦箱畑八発判半反板版犯班飯晩番否悲批比皮秘肥費非飛備美鼻必筆百俵標氷票表評病秒品貧不付夫婦富布府父負武部風副復服福腹複仏物分奮粉文聞兵平並閉陛米別変片編辺返便勉弁保歩補墓暮母包報宝放方法訪豊亡忘暴望棒貿防北牧本妹枚毎幕末万満味未密脈民務夢無名命明盟迷鳴綿面模毛木目問門夜野矢役約薬訳油輸優勇友有由遊郵夕予余預幼容曜様洋用羊葉要陽養欲浴翌来落乱卵覧利理裏里陸律率立略流留旅両料良量領力緑林臨輪類令例冷礼歴列練連路労朗老六録論和話" + + KYOIKU_GAKUNEN_LIST = "一右雨円王音下火花学気休金九空月犬見五口校左三山四子糸字耳七車手十出女小上森人水正生青石赤先千川早足村大男中虫町天田土二日入年白八百文本名木目夕立力林六 +引雲遠黄何夏家科歌画会回海絵貝外楽間顔帰汽記牛魚京強教玉近形計元原古戸午後語交光工広考行高合国黒今才作算市思止紙寺時自室社弱首秋春書少場色食心新親図数星晴声西切雪船前組草走多太体台谷知地池竹茶昼朝長鳥通弟店点電冬刀東当答頭同道読南馬買売麦半番父風分聞米歩母方北妹毎明鳴毛門夜野友曜用来理里話 +悪安暗意医育員飲院運泳駅園横屋温化荷界開階角活寒感館岸岩期起客宮急球究級去橋業局曲銀苦具君係兄軽決血研県言庫湖公向幸港号根祭細坂仕使始指死詩歯事持次式実写者主取守酒受州拾終習週集住重所暑助勝商昭消章乗植深申神身進世整線全送息族他打対待代第題炭短着柱注丁帳調直追定庭鉄転登都度島投湯等動童内肉農波配畑発反板悲皮美鼻氷表病秒品負部服福物平返勉放万味命面問役薬油有由遊予様洋葉陽落流旅両緑礼列路和 +愛案以位囲委胃衣印栄英塩央億加課貨芽改械害各覚完官漢管観関願喜器希旗機季紀議救求給挙漁競共協鏡極区軍郡型景芸欠結健建験固候功康航告差最菜材昨刷察殺参散産残司史士姉氏試治辞失借種周宿順初唱焼照省象賞信真臣勢成清静席積折節説戦浅選然倉想争相側速続卒孫帯隊達単談置貯腸低停底的典伝徒努灯働堂毒熱念敗倍博飯費飛必筆標票不付夫府副粉兵別変辺便包法望牧末満脈民約勇要養浴利陸料良量輪類令例冷歴練連労老録 +圧易移因営永衛液益演往応恩仮価果河過賀解快格確額刊幹慣歓眼基寄規技義逆久旧居許境興均禁句訓群経潔件券検絹険減現限個故護効厚構耕講鉱混査再妻採災際在罪財雑蚕賛酸師志支資似児示識質舎謝授収修衆祝術述準序除承招称証常情条状織職制性政精製税績責接設絶舌銭善祖素総像増造則測属損態貸退団断築張提程敵適統導銅得徳特独任燃能破判版犯比肥非備俵評貧婦富布武復複仏編弁保墓報豊暴貿防未務無迷綿輸余預容率略留領 +異遺域壱宇羽映延沿可我灰街拡閣革割株勧巻干看簡丸危揮机貴疑吸弓泣供胸郷勤筋径敬系警劇穴兼憲権厳源呼己誤后好孝皇紅鋼降刻穀骨困砂座済裁策冊姿私至視詞誌磁射捨尺釈若樹需宗就従縦縮熟純処署諸傷将笑障城蒸針仁垂推寸是聖誠宣専泉洗染創奏層操窓臓蔵俗存尊宅担探暖段値仲宙忠著兆庁潮頂賃痛展党糖討届難弐乳認納脳派俳拝背肺班晩否批秘腹奮閉陛片補宝訪亡忘棒枚幕密盟模矢訳優郵幼羊欲翌乱卵覧裏律臨朗論".split + + SHOUKEI = 0 #象形文字 + SHIJI = 1 #指事文字 + KAII = 2 #会意文字 + KEISEI = 3 #形声文字 + + #0文字の行もあるため、漢字列の最後に-を入れた。利用するときにはchopすること。 + KYOIKU_KUBUN_LIST = " +右雨王音火貝九玉金月犬見口左山子糸耳車手十女人水夕石川早足大竹虫天田土日年白文木目立力六- +一二三四五下七小上生中入八本- +円休出森正赤千男町名林- +花学気空校字青先草村百- +羽雲夏画回会外角弓牛魚京兄原戸古午工交行高黄才止矢自首心西長鳥弟刀東肉馬米歩母方北万毛門用来- +- +科楽岩顔汽教計公谷黒今思春少声雪走多太台直電内売半番父明鳴友里- +引園遠何家歌海絵活間丸記帰強近形元言後語広光考合国細作算市姉紙寺時室社弱配秋週書場色食新親図数星晴切船線前組体地池茶昼朝通店点冬当答頭同道読南買麦風分聞毎妹夜野曜理話- +向皿主州章申身丁豆平面由予羊両業曲- +- +悪安飲運央屋化開客究級宮去橋区血決県庫幸港祭仕死使者守取酒集重宿暑助乗植神真深世整全想息族他対第調庭投農畑皮悲美氷秒品福物辺勉牧命役有遊葉陽流旅- +晴医委育員院泳駅横温荷界階寒感漢館岸起期急球局銀苦具君係軽研湖号根始指歯詩次事持式実写受拾終習住所昭消商勝進昔相送速打待代題炭短談着注柱帳追定笛鉄転都度島湯登等動童波配倍箱発反坂板鼻筆表病負部服味問薬油洋様落録礼列練路和- +衣栄果求士氏臣巣束単兆飛必不包未無要良老- +卒末- +以位胃印加官希器共競好告最刷殺参史児祝初信折争倉孫帯典働毒夫付兵別脈民利料令連労- +愛案囲英塩億貨課芽改械害街各覚完管関観願季紀喜旗機議泣救給挙漁協鏡極訓軍郡径型景芸欠結建健験固功候航康差菜材昨札察産散残司試治辞失借種周順松笑唱焼照賞成省清静席積節説浅戦選然側続隊達置仲貯帳低底停的伝徒努灯堂特得熱念敗梅博飯費票標府副粉辺変便法望牧満約勇養浴陸量輸類冷例歴録- +易益再率- +久支非- +因永衛採示授制性製則断燃弁保綿- +圧移営液演応往桜恩可仮価河過賀快解格確額刊幹慣眼基寄規技義逆旧居許境均禁句群経潔件券検限現減個護効厚耕鉱構興講混査災妻際在財罪雑酸賛志枝師資飼似識質舎謝政勢精税責績接設舌絶銭祖素総造像増測属損退貸態団築張提程適敵統銅導徳独任能破犯判版比肥備俵評貧布婦富武復複仏編墓報豊防貿暴務夢迷輸余預容略留領- +異我革系千己后冊至尺泉片卵並- +寸- +域看危郷筋孝皇降刻穀骨困砂座 裁策蚕私姿射宗衆従仁染善奏尊段難乳脳拝班奮亡牧郵幼- +遺宇映延沿灰拡閣割株巻簡机揮貴疑吸供胸勤敬警劇激穴絹権憲源厳呼誤紅鋼済視詞誌磁捨若樹収就縦縮熟純処署諸除将傷障城蒸針垂推盛聖誠宣専洗窓創装層操蔵臓存宅担探誕暖値宙忠著庁頂潮賃痛展討党糖届認納派背肺俳晩否批秘腹陛閉補暮宝訪忘棒幕密盟模訳優欲翌乱覧裏律臨朗論- +".split + + JOYO_KANJI_LIST = "亜哀愛悪握圧扱安暗案以位依偉囲委威尉意慰易為異移維緯胃衣違遺医井域育一壱逸稲芋印員因姻引飲院陰隠韻右宇羽雨渦浦運雲営影映栄永泳英衛詠鋭液疫益駅悦謁越閲円園宴延援沿演炎煙猿縁遠鉛塩汚凹央奥往応押横欧殴王翁黄沖億屋憶乙卸恩温穏音下化仮何価佳加可夏嫁家寡科暇果架歌河火禍稼箇花荷華菓課貨過蚊我画芽賀雅餓介会解回塊壊快怪悔懐戒拐改械海灰界皆絵開階貝劾外害慨概涯街該垣嚇各拡格核殻獲確穫覚角較郭閣隔革学岳楽額掛潟割喝括活渇滑褐轄且株刈乾冠寒刊勘勧巻喚堪完官寛干幹患感慣憾換敢棺款歓汗漢環甘監看管簡緩缶肝艦観貫還鑑間閑関陥館丸含岸眼岩頑顔願企危喜器基奇寄岐希幾忌揮机旗既期棋棄機帰気汽祈季紀規記貴起軌輝飢騎鬼偽儀宜戯技擬欺犠疑義議菊吉喫詰却客脚虐逆丘久休及吸宮弓急救朽求泣球究窮級糾給旧牛去居巨拒拠挙虚許距漁魚享京供競共凶協叫境峡強恐恭挟教橋況狂狭矯胸脅興郷鏡響驚仰凝暁業局曲極玉勤均斤琴禁筋緊菌襟謹近金吟銀九句区苦駆具愚虞空偶遇隅屈掘靴繰桑勲君薫訓群軍郡係傾刑兄啓型契形径恵慶憩掲携敬景渓系経継茎蛍計警軽鶏芸迎鯨劇撃激傑欠決潔穴結血月件倹健兼券剣圏堅嫌建憲懸検権犬献研絹県肩見謙賢軒遣険顕験元原厳幻弦減源玄現言限個古呼固孤己庫弧戸故枯湖誇雇顧鼓五互午呉娯後御悟碁語誤護交侯候光公功効厚口向后坑好孔孝工巧幸広康恒慌抗拘控攻更校構江洪港溝甲皇硬稿紅絞綱耕考肯航荒行衡講貢購郊酵鉱鋼降項香高剛号合拷豪克刻告国穀酷黒獄腰骨込今困墾婚恨懇昆根混紺魂佐唆左差査砂詐鎖座債催再最妻宰彩才採栽歳済災砕祭斎細菜裁載際剤在材罪財坂咲崎作削搾昨策索錯桜冊刷察撮擦札殺雑皿三傘参山惨散桟産算蚕賛酸暫残仕伺使刺司史嗣四士始姉姿子市師志思指支施旨枝止死氏祉私糸紙紫肢脂至視詞詩試誌諮資賜雌飼歯事似侍児字寺慈持時次滋治璽磁示耳自辞式識軸七執失室湿漆疾質実芝舎写射捨赦斜煮社者謝車遮蛇邪借勺尺爵酌釈若寂弱主取守手朱殊狩珠種趣酒首儒受寿授樹需囚収周宗就州修愁拾秀秋終習臭舟衆襲週酬集醜住充十従柔汁渋獣縦重銃叔宿淑祝縮粛塾熟出術述俊春瞬准循旬殉準潤盾純巡遵順処初所暑庶緒署書諸助叙女序徐除傷償勝匠升召商唱奨宵将小少尚床彰承抄招掌昇昭晶松沼消渉焼焦照症省硝礁祥称章笑粧紹肖衝訟証詔詳象賞鐘障上丈乗冗剰城場壌嬢常情条浄状畳蒸譲醸錠嘱飾植殖織職色触食辱伸信侵唇娠寝審心慎振新森浸深申真神紳臣薪親診身辛進針震人仁刃尋甚尽迅陣酢図吹垂帥推水炊睡粋衰遂酔錘随髄崇数枢据杉澄寸世瀬畝是制勢姓征性成政整星晴正清牲生盛精聖声製西誠誓請逝青静斉税隻席惜斥昔析石積籍績責赤跡切拙接摂折設窃節説雪絶舌仙先千占宣専川戦扇栓泉浅洗染潜旋線繊船薦践選遷銭銑鮮前善漸然全禅繕塑措疎礎祖租粗素組訴阻僧創双倉喪壮奏層想捜掃挿操早曹巣槽燥争相窓総草荘葬藻装走送遭霜騒像増憎臓蔵贈造促側則即息束測足速俗属賊族続卒存孫尊損村他多太堕妥惰打駄体対耐帯待怠態替泰滞胎袋貸退逮隊代台大第題滝卓宅択拓沢濯託濁諾但達奪脱棚谷丹単嘆担探淡炭短端胆誕鍛団壇弾断暖段男談値知地恥池痴稚置致遅築畜竹蓄逐秩窒茶嫡着中仲宙忠抽昼柱注虫衷鋳駐著貯丁兆帳庁弔張彫徴懲挑朝潮町眺聴脹腸調超跳長頂鳥勅直朕沈珍賃鎮陳津墜追痛通塚漬坪釣亭低停偵貞呈堤定帝底庭廷弟抵提程締艇訂逓邸泥摘敵滴的笛適哲徹撤迭鉄典天展店添転点伝殿田電吐塗徒斗渡登途都努度土奴怒倒党冬凍刀唐塔島悼投搭東桃棟盗湯灯当痘等答筒糖統到討謄豆踏逃透陶頭騰闘働動同堂導洞童胴道銅峠匿得徳特督篤毒独読凸突届屯豚曇鈍内縄南軟難二尼弐肉日乳入如尿任妊忍認寧猫熱年念燃粘悩濃納能脳農把覇波派破婆馬俳廃拝排敗杯背肺輩配倍培媒梅買売賠陪伯博拍泊白舶薄迫漠爆縛麦箱肌畑八鉢発髪伐罰抜閥伴判半反帆搬板版犯班畔繁般藩販範煩頒飯晩番盤蛮卑否妃彼悲扉批披比泌疲皮碑秘罷肥被費避非飛備尾微美鼻匹必筆姫百俵標氷漂票表評描病秒苗品浜貧賓頻敏瓶不付夫婦富布府怖扶敷普浮父符腐膚譜負賦赴附侮武舞部封風伏副復幅服福腹複覆払沸仏物分噴墳憤奮粉紛雰文聞丙併兵塀幣平弊柄並閉陛米壁癖別偏変片編辺返遍便勉弁保舗捕歩補穂募墓慕暮母簿倣俸包報奉宝峰崩抱放方法泡砲縫胞芳褒訪豊邦飽乏亡傍剖坊妨帽忘忙房暴望某棒冒紡肪膨謀貿防北僕墨撲朴牧没堀奔本翻凡盆摩磨魔麻埋妹枚毎幕膜又抹末繭万慢満漫味未魅岬密脈妙民眠務夢無矛霧婿娘名命明盟迷銘鳴滅免綿面模茂妄毛猛盲網耗木黙目戻問紋門匁夜野矢厄役約薬訳躍柳愉油癒諭輸唯優勇友幽悠憂有猶由裕誘遊郵雄融夕予余与誉預幼容庸揚揺擁曜様洋溶用窯羊葉要謡踊陽養抑欲浴翌翼羅裸来頼雷絡落酪乱卵欄濫覧利吏履理痢裏里離陸律率立略流留硫粒隆竜慮旅虜了僚両寮料涼猟療糧良量陵領力緑倫厘林臨輪隣塁涙累類令例冷励礼鈴隷零霊麗齢暦歴列劣烈裂廉恋練連錬炉路露労廊朗楼浪漏老郎六録論和話賄惑枠湾腕" + + JINMEI_KANJI_LIST = "丑丞乃之也亘亥亦亨亮伊伍伎伶伽佑侃侑倖倭偲允冴冶凌凜凪凱勁匡卯叡只叶吾呂哉唄啄喬嘉圭尭奈奎媛嬉孟宏宥寅峻崚嵐嵩嵯嶺巌巳巴巽庄弘弥彗彦彪彬怜恕悌惇惟惣慧憧拳捷捺敦斐於旦旭旺昂昌昴晃晋晏晟晨智暉暢曙朋朔李杏杜柊柚柾栗栞桂桐梓梢梧梨椋椎椰椿楊楓楠榛槙槻樺橘檀欣欽毅毬汀汐汰沙洲洵洸浩淳渚渥湧滉漱澪煕熊燎燦燿爽爾猪玖玲琢琳瑚瑛瑞瑠瑶瑳璃甫皐皓眉眸睦瞭瞳矩碧碩磯祐禄禎秦稀稔稜穣竣笙笹紗紘紬絃絢綜綸綺綾緋翔翠耀耶聡肇胡胤脩舜艶芙芹苑茉茄茅茜莉莞菖菫萌萩葵蒔蒼蓉蓮蔦蕉蕗藍藤蘭虎虹蝶衿袈裟詢誼諄諒赳輔辰迪遥遼邑那郁酉醇釆錦鎌阿隼雛霞靖鞠須頌颯馨駒駿魁鮎鯉鯛鳩鳳鴻鵬鶴鷹鹿麟麿黎黛亀" + + JISX0208_KANJI_LIST = "亜唖娃阿哀愛挨姶逢葵茜穐悪握渥旭葦芦鯵梓圧斡扱宛姐虻飴絢綾鮎或粟袷安庵按暗案闇鞍杏以伊位依偉囲夷委威尉惟意慰易椅為畏異移維緯胃萎衣謂違遺医井亥域育郁磯一壱溢逸稲茨芋鰯允印咽員因姻引飲淫胤蔭院陰隠韻吋右宇烏羽迂雨卯鵜窺丑碓臼渦嘘唄欝蔚鰻姥厩浦瓜閏噂云運雲荏餌叡営嬰影映曳栄永泳洩瑛盈穎頴英衛詠鋭液疫益駅悦謁越閲榎厭円園堰奄宴延怨掩援沿演炎焔煙燕猿縁艶苑薗遠鉛鴛塩於汚甥凹央奥往応押旺横欧殴王翁襖鴬鴎黄岡沖荻億屋憶臆桶牡乙俺卸恩温穏音下化仮何伽価佳加可嘉夏嫁家寡科暇果架歌河火珂禍禾稼箇花苛茄荷華菓蝦課嘩貨迦過霞蚊俄峨我牙画臥芽蛾賀雅餓駕介会解回塊壊廻快怪悔恢懐戒拐改魁晦械海灰界皆絵芥蟹開階貝凱劾外咳害崖慨概涯碍蓋街該鎧骸浬馨蛙垣柿蛎鈎劃嚇各廓拡撹格核殻獲確穫覚角赫較郭閣隔革学岳楽額顎掛笠樫橿梶鰍潟割喝恰括活渇滑葛褐轄且鰹叶椛樺鞄株兜竃蒲釜鎌噛鴨栢茅萱粥刈苅瓦乾侃冠寒刊勘勧巻喚堪姦完官寛干幹患感慣憾換敢柑桓棺款歓汗漢澗潅環甘監看竿管簡緩缶翰肝艦莞観諌貫還鑑間閑関陥韓館舘丸含岸巌玩癌眼岩翫贋雁頑顔願企伎危喜器基奇嬉寄岐希幾忌揮机旗既期棋棄機帰毅気汽畿祈季稀紀徽規記貴起軌輝飢騎鬼亀偽儀妓宜戯技擬欺犠疑祇義蟻誼議掬菊鞠吉吃喫桔橘詰砧杵黍却客脚虐逆丘久仇休及吸宮弓急救朽求汲泣灸球究窮笈級糾給旧牛去居巨拒拠挙渠虚許距鋸漁禦魚亨享京供侠僑兇競共凶協匡卿叫喬境峡強彊怯恐恭挟教橋況狂狭矯胸脅興蕎郷鏡響饗驚仰凝尭暁業局曲極玉桐粁僅勤均巾錦斤欣欽琴禁禽筋緊芹菌衿襟謹近金吟銀九倶句区狗玖矩苦躯駆駈駒具愚虞喰空偶寓遇隅串櫛釧屑屈掘窟沓靴轡窪熊隈粂栗繰桑鍬勲君薫訓群軍郡卦袈祁係傾刑兄啓圭珪型契形径恵慶慧憩掲携敬景桂渓畦稽系経継繋罫茎荊蛍計詣警軽頚鶏芸迎鯨劇戟撃激隙桁傑欠決潔穴結血訣月件倹倦健兼券剣喧圏堅嫌建憲懸拳捲検権牽犬献研硯絹県肩見謙賢軒遣鍵険顕験鹸元原厳幻弦減源玄現絃舷言諺限乎個古呼固姑孤己庫弧戸故枯湖狐糊袴股胡菰虎誇跨鈷雇顧鼓五互伍午呉吾娯後御悟梧檎瑚碁語誤護醐乞鯉交佼侯候倖光公功効勾厚口向后喉坑垢好孔孝宏工巧巷幸広庚康弘恒慌抗拘控攻昂晃更杭校梗構江洪浩港溝甲皇硬稿糠紅紘絞綱耕考肯肱腔膏航荒行衡講貢購郊酵鉱砿鋼閤降項香高鴻剛劫号合壕拷濠豪轟麹克刻告国穀酷鵠黒獄漉腰甑忽惚骨狛込此頃今困坤墾婚恨懇昏昆根梱混痕紺艮魂些佐叉唆嵯左差査沙瑳砂詐鎖裟坐座挫債催再最哉塞妻宰彩才採栽歳済災采犀砕砦祭斎細菜裁載際剤在材罪財冴坂阪堺榊肴咲崎埼碕鷺作削咋搾昨朔柵窄策索錯桜鮭笹匙冊刷察拶撮擦札殺薩雑皐鯖捌錆鮫皿晒三傘参山惨撒散桟燦珊産算纂蚕讃賛酸餐斬暫残仕仔伺使刺司史嗣四士始姉姿子屍市師志思指支孜斯施旨枝止死氏獅祉私糸紙紫肢脂至視詞詩試誌諮資賜雌飼歯事似侍児字寺慈持時次滋治爾璽痔磁示而耳自蒔辞汐鹿式識鴫竺軸宍雫七叱執失嫉室悉湿漆疾質実蔀篠偲柴芝屡蕊縞舎写射捨赦斜煮社紗者謝車遮蛇邪借勺尺杓灼爵酌釈錫若寂弱惹主取守手朱殊狩珠種腫趣酒首儒受呪寿授樹綬需囚収周宗就州修愁拾洲秀秋終繍習臭舟蒐衆襲讐蹴輯週酋酬集醜什住充十従戎柔汁渋獣縦重銃叔夙宿淑祝縮粛塾熟出術述俊峻春瞬竣舜駿准循旬楯殉淳準潤盾純巡遵醇順処初所暑曙渚庶緒署書薯藷諸助叙女序徐恕鋤除傷償勝匠升召哨商唱嘗奨妾娼宵将小少尚庄床廠彰承抄招掌捷昇昌昭晶松梢樟樵沼消渉湘焼焦照症省硝礁祥称章笑粧紹肖菖蒋蕉衝裳訟証詔詳象賞醤鉦鍾鐘障鞘上丈丞乗冗剰城場壌嬢常情擾条杖浄状畳穣蒸譲醸錠嘱埴飾拭植殖燭織職色触食蝕辱尻伸信侵唇娠寝審心慎振新晋森榛浸深申疹真神秦紳臣芯薪親診身辛進針震人仁刃塵壬尋甚尽腎訊迅陣靭笥諏須酢図厨逗吹垂帥推水炊睡粋翠衰遂酔錐錘随瑞髄崇嵩数枢趨雛据杉椙菅頗雀裾澄摺寸世瀬畝是凄制勢姓征性成政整星晴棲栖正清牲生盛精聖声製西誠誓請逝醒青静斉税脆隻席惜戚斥昔析石積籍績脊責赤跡蹟碩切拙接摂折設窃節説雪絶舌蝉仙先千占宣専尖川戦扇撰栓栴泉浅洗染潜煎煽旋穿箭線繊羨腺舛船薦詮賎践選遷銭銑閃鮮前善漸然全禅繕膳糎噌塑岨措曾曽楚狙疏疎礎祖租粗素組蘇訴阻遡鼠僧創双叢倉喪壮奏爽宋層匝惣想捜掃挿掻操早曹巣槍槽漕燥争痩相窓糟総綜聡草荘葬蒼藻装走送遭鎗霜騒像増憎臓蔵贈造促側則即息捉束測足速俗属賊族続卒袖其揃存孫尊損村遜他多太汰詑唾堕妥惰打柁舵楕陀駄騨体堆対耐岱帯待怠態戴替泰滞胎腿苔袋貸退逮隊黛鯛代台大第醍題鷹滝瀧卓啄宅托択拓沢濯琢託鐸濁諾茸凧蛸只叩但達辰奪脱巽竪辿棚谷狸鱈樽誰丹単嘆坦担探旦歎淡湛炭短端箪綻耽胆蛋誕鍛団壇弾断暖檀段男談値知地弛恥智池痴稚置致蜘遅馳築畜竹筑蓄逐秩窒茶嫡着中仲宙忠抽昼柱注虫衷註酎鋳駐樗瀦猪苧著貯丁兆凋喋寵帖帳庁弔張彫徴懲挑暢朝潮牒町眺聴脹腸蝶調諜超跳銚長頂鳥勅捗直朕沈珍賃鎮陳津墜椎槌追鎚痛通塚栂掴槻佃漬柘辻蔦綴鍔椿潰坪壷嬬紬爪吊釣鶴亭低停偵剃貞呈堤定帝底庭廷弟悌抵挺提梯汀碇禎程締艇訂諦蹄逓邸鄭釘鼎泥摘擢敵滴的笛適鏑溺哲徹撤轍迭鉄典填天展店添纏甜貼転顛点伝殿澱田電兎吐堵塗妬屠徒斗杜渡登菟賭途都鍍砥砺努度土奴怒倒党冬凍刀唐塔塘套宕島嶋悼投搭東桃梼棟盗淘湯涛灯燈当痘祷等答筒糖統到董蕩藤討謄豆踏逃透鐙陶頭騰闘働動同堂導憧撞洞瞳童胴萄道銅峠鴇匿得徳涜特督禿篤毒独読栃橡凸突椴届鳶苫寅酉瀞噸屯惇敦沌豚遁頓呑曇鈍奈那内乍凪薙謎灘捺鍋楢馴縄畷南楠軟難汝二尼弐迩匂賑肉虹廿日乳入如尿韮任妊忍認濡禰祢寧葱猫熱年念捻撚燃粘乃廼之埜嚢悩濃納能脳膿農覗蚤巴把播覇杷波派琶破婆罵芭馬俳廃拝排敗杯盃牌背肺輩配倍培媒梅楳煤狽買売賠陪這蝿秤矧萩伯剥博拍柏泊白箔粕舶薄迫曝漠爆縛莫駁麦函箱硲箸肇筈櫨幡肌畑畠八鉢溌発醗髪伐罰抜筏閥鳩噺塙蛤隼伴判半反叛帆搬斑板氾汎版犯班畔繁般藩販範釆煩頒飯挽晩番盤磐蕃蛮匪卑否妃庇彼悲扉批披斐比泌疲皮碑秘緋罷肥被誹費避非飛樋簸備尾微枇毘琵眉美鼻柊稗匹疋髭彦膝菱肘弼必畢筆逼桧姫媛紐百謬俵彪標氷漂瓢票表評豹廟描病秒苗錨鋲蒜蛭鰭品彬斌浜瀕貧賓頻敏瓶不付埠夫婦富冨布府怖扶敷斧普浮父符腐膚芙譜負賦赴阜附侮撫武舞葡蕪部封楓風葺蕗伏副復幅服福腹複覆淵弗払沸仏物鮒分吻噴墳憤扮焚奮粉糞紛雰文聞丙併兵塀幣平弊柄並蔽閉陛米頁僻壁癖碧別瞥蔑箆偏変片篇編辺返遍便勉娩弁鞭保舗鋪圃捕歩甫補輔穂募墓慕戊暮母簿菩倣俸包呆報奉宝峰峯崩庖抱捧放方朋法泡烹砲縫胞芳萌蓬蜂褒訪豊邦鋒飽鳳鵬乏亡傍剖坊妨帽忘忙房暴望某棒冒紡肪膨謀貌貿鉾防吠頬北僕卜墨撲朴牧睦穆釦勃没殆堀幌奔本翻凡盆摩磨魔麻埋妹昧枚毎哩槙幕膜枕鮪柾鱒桝亦俣又抹末沫迄侭繭麿万慢満漫蔓味未魅巳箕岬密蜜湊蓑稔脈妙粍民眠務夢無牟矛霧鵡椋婿娘冥名命明盟迷銘鳴姪牝滅免棉綿緬面麺摸模茂妄孟毛猛盲網耗蒙儲木黙目杢勿餅尤戻籾貰問悶紋門匁也冶夜爺耶野弥矢厄役約薬訳躍靖柳薮鑓愉愈油癒諭輸唯佑優勇友宥幽悠憂揖有柚湧涌猶猷由祐裕誘遊邑郵雄融夕予余与誉輿預傭幼妖容庸揚揺擁曜楊様洋溶熔用窯羊耀葉蓉要謡踊遥陽養慾抑欲沃浴翌翼淀羅螺裸来莱頼雷洛絡落酪乱卵嵐欄濫藍蘭覧利吏履李梨理璃痢裏裡里離陸律率立葎掠略劉流溜琉留硫粒隆竜龍侶慮旅虜了亮僚両凌寮料梁涼猟療瞭稜糧良諒遼量陵領力緑倫厘林淋燐琳臨輪隣鱗麟瑠塁涙累類令伶例冷励嶺怜玲礼苓鈴隷零霊麗齢暦歴列劣烈裂廉恋憐漣煉簾練聯蓮連錬呂魯櫓炉賂路露労婁廊弄朗楼榔浪漏牢狼篭老聾蝋郎六麓禄肋録論倭和話歪賄脇惑枠鷲亙亘鰐詫藁蕨椀湾碗腕弌丐丕个丱丶丼丿乂乖乘亂亅豫亊舒弍于亞亟亠亢亰亳亶从仍仄仆仂仗仞仭仟价伉佚估佛佝佗佇佶侈侏侘佻佩佰侑佯來侖儘俔俟俎俘俛俑俚俐俤俥倚倨倔倪倥倅伜俶倡倩倬俾俯們倆偃假會偕偐偈做偖偬偸傀傚傅傴傲僉僊傳僂僖僞僥僭僣僮價僵儉儁儂儖儕儔儚儡儺儷儼儻儿兀兒兌兔兢竸兩兪兮冀冂囘册冉冏冑冓冕冖冤冦冢冩冪冫决冱冲冰况冽凅凉凛几處凩凭凰凵凾刄刋刔刎刧刪刮刳刹剏剄剋剌剞剔剪剴剩剳剿剽劍劔劒剱劈劑辨辧劬劭劼劵勁勍勗勞勣勦飭勠勳勵勸勹匆匈甸匍匐匏匕匚匣匯匱匳匸區卆卅丗卉卍凖卞卩卮夘卻卷厂厖厠厦厥厮厰厶參簒雙叟曼燮叮叨叭叺吁吽呀听吭吼吮吶吩吝呎咏呵咎呟呱呷呰咒呻咀呶咄咐咆哇咢咸咥咬哄哈咨咫哂咤咾咼哘哥哦唏唔哽哮哭哺哢唹啀啣啌售啜啅啖啗唸唳啝喙喀咯喊喟啻啾喘喞單啼喃喩喇喨嗚嗅嗟嗄嗜嗤嗔嘔嗷嘖嗾嗽嘛嗹噎噐營嘴嘶嘲嘸噫噤嘯噬噪嚆嚀嚊嚠嚔嚏嚥嚮嚶嚴囂嚼囁囃囀囈囎囑囓囗囮囹圀囿圄圉圈國圍圓團圖嗇圜圦圷圸坎圻址坏坩埀垈坡坿垉垓垠垳垤垪垰埃埆埔埒埓堊埖埣堋堙堝塲堡塢塋塰毀塒堽塹墅墹墟墫墺壞墻墸墮壅壓壑壗壙壘壥壜壤壟壯壺壹壻壼壽夂夊夐夛梦夥夬夭夲夸夾竒奕奐奎奚奘奢奠奧奬奩奸妁妝佞侫妣妲姆姨姜妍姙姚娥娟娑娜娉娚婀婬婉娵娶婢婪媚媼媾嫋嫂媽嫣嫗嫦嫩嫖嫺嫻嬌嬋嬖嬲嫐嬪嬶嬾孃孅孀孑孕孚孛孥孩孰孳孵學斈孺宀它宦宸寃寇寉寔寐寤實寢寞寥寫寰寶寳尅將專對尓尠尢尨尸尹屁屆屎屓屐屏孱屬屮乢屶屹岌岑岔妛岫岻岶岼岷峅岾峇峙峩峽峺峭嶌峪崋崕崗嵜崟崛崑崔崢崚崙崘嵌嵒嵎嵋嵬嵳嵶嶇嶄嶂嶢嶝嶬嶮嶽嶐嶷嶼巉巍巓巒巖巛巫已巵帋帚帙帑帛帶帷幄幃幀幎幗幔幟幢幤幇幵并幺麼广庠廁廂廈廐廏廖廣廝廚廛廢廡廨廩廬廱廳廰廴廸廾弃弉彝彜弋弑弖弩弭弸彁彈彌彎弯彑彖彗彙彡彭彳彷徃徂彿徊很徑徇從徙徘徠徨徭徼忖忻忤忸忱忝悳忿怡恠怙怐怩怎怱怛怕怫怦怏怺恚恁恪恷恟恊恆恍恣恃恤恂恬恫恙悁悍惧悃悚悄悛悖悗悒悧悋惡悸惠惓悴忰悽惆悵惘慍愕愆惶惷愀惴惺愃愡惻惱愍愎慇愾愨愧慊愿愼愬愴愽慂慄慳慷慘慙慚慫慴慯慥慱慟慝慓慵憙憖憇憬憔憚憊憑憫憮懌懊應懷懈懃懆憺懋罹懍懦懣懶懺懴懿懽懼懾戀戈戉戍戌戔戛戞戡截戮戰戲戳扁扎扞扣扛扠扨扼抂抉找抒抓抖拔抃抔拗拑抻拏拿拆擔拈拜拌拊拂拇抛拉挌拮拱挧挂挈拯拵捐挾捍搜捏掖掎掀掫捶掣掏掉掟掵捫捩掾揩揀揆揣揉插揶揄搖搴搆搓搦搶攝搗搨搏摧摯摶摎攪撕撓撥撩撈撼據擒擅擇撻擘擂擱擧舉擠擡抬擣擯攬擶擴擲擺攀擽攘攜攅攤攣攫攴攵攷收攸畋效敖敕敍敘敞敝敲數斂斃變斛斟斫斷旃旆旁旄旌旒旛旙无旡旱杲昊昃旻杳昵昶昴昜晏晄晉晁晞晝晤晧晨晟晢晰暃暈暎暉暄暘暝曁暹曉暾暼曄暸曖曚曠昿曦曩曰曵曷朏朖朞朦朧霸朮朿朶杁朸朷杆杞杠杙杣杤枉杰枩杼杪枌枋枦枡枅枷柯枴柬枳柩枸柤柞柝柢柮枹柎柆柧檜栞框栩桀桍栲桎梳栫桙档桷桿梟梏梭梔條梛梃檮梹桴梵梠梺椏梍桾椁棊椈棘椢椦棡椌棍棔棧棕椶椒椄棗棣椥棹棠棯椨椪椚椣椡棆楹楷楜楸楫楔楾楮椹楴椽楙椰楡楞楝榁楪榲榮槐榿槁槓榾槎寨槊槝榻槃榧樮榑榠榜榕榴槞槨樂樛槿權槹槲槧樅榱樞槭樔槫樊樒櫁樣樓橄樌橲樶橸橇橢橙橦橈樸樢檐檍檠檄檢檣檗蘗檻櫃櫂檸檳檬櫞櫑櫟檪櫚櫪櫻欅蘖櫺欒欖鬱欟欸欷盜欹飮歇歃歉歐歙歔歛歟歡歸歹歿殀殄殃殍殘殕殞殤殪殫殯殲殱殳殷殼毆毋毓毟毬毫毳毯麾氈氓气氛氤氣汞汕汢汪沂沍沚沁沛汾汨汳沒沐泄泱泓沽泗泅泝沮沱沾沺泛泯泙泪洟衍洶洫洽洸洙洵洳洒洌浣涓浤浚浹浙涎涕濤涅淹渕渊涵淇淦涸淆淬淞淌淨淒淅淺淙淤淕淪淮渭湮渮渙湲湟渾渣湫渫湶湍渟湃渺湎渤滿渝游溂溪溘滉溷滓溽溯滄溲滔滕溏溥滂溟潁漑灌滬滸滾漿滲漱滯漲滌漾漓滷澆潺潸澁澀潯潛濳潭澂潼潘澎澑濂潦澳澣澡澤澹濆澪濟濕濬濔濘濱濮濛瀉瀋濺瀑瀁瀏濾瀛瀚潴瀝瀘瀟瀰瀾瀲灑灣炙炒炯烱炬炸炳炮烟烋烝烙焉烽焜焙煥煕熈煦煢煌煖煬熏燻熄熕熨熬燗熹熾燒燉燔燎燠燬燧燵燼燹燿爍爐爛爨爭爬爰爲爻爼爿牀牆牋牘牴牾犂犁犇犒犖犢犧犹犲狃狆狄狎狒狢狠狡狹狷倏猗猊猜猖猝猴猯猩猥猾獎獏默獗獪獨獰獸獵獻獺珈玳珎玻珀珥珮珞璢琅瑯琥珸琲琺瑕琿瑟瑙瑁瑜瑩瑰瑣瑪瑶瑾璋璞璧瓊瓏瓔珱瓠瓣瓧瓩瓮瓲瓰瓱瓸瓷甄甃甅甌甎甍甕甓甞甦甬甼畄畍畊畉畛畆畚畩畤畧畫畭畸當疆疇畴疊疉疂疔疚疝疥疣痂疳痃疵疽疸疼疱痍痊痒痙痣痞痾痿痼瘁痰痺痲痳瘋瘍瘉瘟瘧瘠瘡瘢瘤瘴瘰瘻癇癈癆癜癘癡癢癨癩癪癧癬癰癲癶癸發皀皃皈皋皎皖皓皙皚皰皴皸皹皺盂盍盖盒盞盡盥盧盪蘯盻眈眇眄眩眤眞眥眦眛眷眸睇睚睨睫睛睥睿睾睹瞎瞋瞑瞠瞞瞰瞶瞹瞿瞼瞽瞻矇矍矗矚矜矣矮矼砌砒礦砠礪硅碎硴碆硼碚碌碣碵碪碯磑磆磋磔碾碼磅磊磬磧磚磽磴礇礒礑礙礬礫祀祠祗祟祚祕祓祺祿禊禝禧齋禪禮禳禹禺秉秕秧秬秡秣稈稍稘稙稠稟禀稱稻稾稷穃穗穉穡穢穩龝穰穹穽窈窗窕窘窖窩竈窰窶竅竄窿邃竇竊竍竏竕竓站竚竝竡竢竦竭竰笂笏笊笆笳笘笙笞笵笨笶筐筺笄筍笋筌筅筵筥筴筧筰筱筬筮箝箘箟箍箜箚箋箒箏筝箙篋篁篌篏箴篆篝篩簑簔篦篥籠簀簇簓篳篷簗簍篶簣簧簪簟簷簫簽籌籃籔籏籀籐籘籟籤籖籥籬籵粃粐粤粭粢粫粡粨粳粲粱粮粹粽糀糅糂糘糒糜糢鬻糯糲糴糶糺紆紂紜紕紊絅絋紮紲紿紵絆絳絖絎絲絨絮絏絣經綉絛綏絽綛綺綮綣綵緇綽綫總綢綯緜綸綟綰緘緝緤緞緻緲緡縅縊縣縡縒縱縟縉縋縢繆繦縻縵縹繃縷縲縺繧繝繖繞繙繚繹繪繩繼繻纃緕繽辮繿纈纉續纒纐纓纔纖纎纛纜缸缺罅罌罍罎罐网罕罔罘罟罠罨罩罧罸羂羆羃羈羇羌羔羞羝羚羣羯羲羹羮羶羸譱翅翆翊翕翔翡翦翩翳翹飜耆耄耋耒耘耙耜耡耨耿耻聊聆聒聘聚聟聢聨聳聲聰聶聹聽聿肄肆肅肛肓肚肭冐肬胛胥胙胝胄胚胖脉胯胱脛脩脣脯腋隋腆脾腓腑胼腱腮腥腦腴膃膈膊膀膂膠膕膤膣腟膓膩膰膵膾膸膽臀臂膺臉臍臑臙臘臈臚臟臠臧臺臻臾舁舂舅與舊舍舐舖舩舫舸舳艀艙艘艝艚艟艤艢艨艪艫舮艱艷艸艾芍芒芫芟芻芬苡苣苟苒苴苳苺莓范苻苹苞茆苜茉苙茵茴茖茲茱荀茹荐荅茯茫茗茘莅莚莪莟莢莖茣莎莇莊荼莵荳荵莠莉莨菴萓菫菎菽萃菘萋菁菷萇菠菲萍萢萠莽萸蔆菻葭萪萼蕚蒄葷葫蒭葮蒂葩葆萬葯葹萵蓊葢蒹蒿蒟蓙蓍蒻蓚蓐蓁蓆蓖蒡蔡蓿蓴蔗蔘蔬蔟蔕蔔蓼蕀蕣蕘蕈蕁蘂蕋蕕薀薤薈薑薊薨蕭薔薛藪薇薜蕷蕾薐藉薺藏薹藐藕藝藥藜藹蘊蘓蘋藾藺蘆蘢蘚蘰蘿虍乕虔號虧虱蚓蚣蚩蚪蚋蚌蚶蚯蛄蛆蚰蛉蠣蚫蛔蛞蛩蛬蛟蛛蛯蜒蜆蜈蜀蜃蛻蜑蜉蜍蛹蜊蜴蜿蜷蜻蜥蜩蜚蝠蝟蝸蝌蝎蝴蝗蝨蝮蝙蝓蝣蝪蠅螢螟螂螯蟋螽蟀蟐雖螫蟄螳蟇蟆螻蟯蟲蟠蠏蠍蟾蟶蟷蠎蟒蠑蠖蠕蠢蠡蠱蠶蠹蠧蠻衄衂衒衙衞衢衫袁衾袞衵衽袵衲袂袗袒袮袙袢袍袤袰袿袱裃裄裔裘裙裝裹褂裼裴裨裲褄褌褊褓襃褞褥褪褫襁襄褻褶褸襌褝襠襞襦襤襭襪襯襴襷襾覃覈覊覓覘覡覩覦覬覯覲覺覽覿觀觚觜觝觧觴觸訃訖訐訌訛訝訥訶詁詛詒詆詈詼詭詬詢誅誂誄誨誡誑誥誦誚誣諄諍諂諚諫諳諧諤諱謔諠諢諷諞諛謌謇謚諡謖謐謗謠謳鞫謦謫謾謨譁譌譏譎證譖譛譚譫譟譬譯譴譽讀讌讎讒讓讖讙讚谺豁谿豈豌豎豐豕豢豬豸豺貂貉貅貊貍貎貔豼貘戝貭貪貽貲貳貮貶賈賁賤賣賚賽賺賻贄贅贊贇贏贍贐齎贓賍贔贖赧赭赱赳趁趙跂趾趺跏跚跖跌跛跋跪跫跟跣跼踈踉跿踝踞踐踟蹂踵踰踴蹊蹇蹉蹌蹐蹈蹙蹤蹠踪蹣蹕蹶蹲蹼躁躇躅躄躋躊躓躑躔躙躪躡躬躰軆躱躾軅軈軋軛軣軼軻軫軾輊輅輕輒輙輓輜輟輛輌輦輳輻輹轅轂輾轌轉轆轎轗轜轢轣轤辜辟辣辭辯辷迚迥迢迪迯邇迴逅迹迺逑逕逡逍逞逖逋逧逶逵逹迸遏遐遑遒逎遉逾遖遘遞遨遯遶隨遲邂遽邁邀邊邉邏邨邯邱邵郢郤扈郛鄂鄒鄙鄲鄰酊酖酘酣酥酩酳酲醋醉醂醢醫醯醪醵醴醺釀釁釉釋釐釖釟釡釛釼釵釶鈞釿鈔鈬鈕鈑鉞鉗鉅鉉鉤鉈銕鈿鉋鉐銜銖銓銛鉚鋏銹銷鋩錏鋺鍄錮錙錢錚錣錺錵錻鍜鍠鍼鍮鍖鎰鎬鎭鎔鎹鏖鏗鏨鏥鏘鏃鏝鏐鏈鏤鐚鐔鐓鐃鐇鐐鐶鐫鐵鐡鐺鑁鑒鑄鑛鑠鑢鑞鑪鈩鑰鑵鑷鑽鑚鑼鑾钁鑿閂閇閊閔閖閘閙閠閨閧閭閼閻閹閾闊濶闃闍闌闕闔闖關闡闥闢阡阨阮阯陂陌陏陋陷陜陞陝陟陦陲陬隍隘隕隗險隧隱隲隰隴隶隸隹雎雋雉雍襍雜霍雕雹霄霆霈霓霎霑霏霖霙霤霪霰霹霽霾靄靆靈靂靉靜靠靤靦靨勒靫靱靹鞅靼鞁靺鞆鞋鞏鞐鞜鞨鞦鞣鞳鞴韃韆韈韋韜韭齏韲竟韶韵頏頌頸頤頡頷頽顆顏顋顫顯顰顱顴顳颪颯颱颶飄飃飆飩飫餃餉餒餔餘餡餝餞餤餠餬餮餽餾饂饉饅饐饋饑饒饌饕馗馘馥馭馮馼駟駛駝駘駑駭駮駱駲駻駸騁騏騅駢騙騫騷驅驂驀驃騾驕驍驛驗驟驢驥驤驩驫驪骭骰骼髀髏髑髓體髞髟髢髣髦髯髫髮髴髱髷髻鬆鬘鬚鬟鬢鬣鬥鬧鬨鬩鬪鬮鬯鬲魄魃魏魍魎魑魘魴鮓鮃鮑鮖鮗鮟鮠鮨鮴鯀鯊鮹鯆鯏鯑鯒鯣鯢鯤鯔鯡鰺鯲鯱鯰鰕鰔鰉鰓鰌鰆鰈鰒鰊鰄鰮鰛鰥鰤鰡鰰鱇鰲鱆鰾鱚鱠鱧鱶鱸鳧鳬鳰鴉鴈鳫鴃鴆鴪鴦鶯鴣鴟鵄鴕鴒鵁鴿鴾鵆鵈鵝鵞鵤鵑鵐鵙鵲鶉鶇鶫鵯鵺鶚鶤鶩鶲鷄鷁鶻鶸鶺鷆鷏鷂鷙鷓鷸鷦鷭鷯鷽鸚鸛鸞鹵鹹鹽麁麈麋麌麒麕麑麝麥麩麸麪麭靡黌黎黏黐黔黜點黝黠黥黨黯黴黶黷黹黻黼黽鼇鼈皷鼕鼡鼬鼾齊齒齔齣齟齠齡齦齧齬齪齷齲齶龕龜龠堯槇遙瑤凜熙" + + def basic() BASIC_KANJI_LIST; end + def awase() AWASE_KANJI_LIST; end + def test(num=0) TEST_KANJI_LIST[num]; end + def kyoiku(gakunen=nil, kubun=nil) + return KYOIKU_KANJI_LIST if gakunen.nil? + return gakunen.map {|nen| kyoiku(nen) }.join("") if gakunen.is_a?(Range) && kubun.nil? + return KYOIKU_GAKUNEN_LIST[gakunen-1] if gakunen.is_a?(Integer) && kubun.nil? #1引いてます + return gakunen.map {|nen| kyoiku(nen, kubun) }.join("") if gakunen.is_a?(Range) && (! kubun.nil?) + return KYOIKU_KUBUN_LIST[(gakunen-1)*4+kubun].chop if gakunen.is_a?(Integer) && (! kubun.nil?) + return "" + end + def joyo() JOYO_KANJI_LIST; end + def jinmei() JINMEI_KANJI_LIST; end + def jisx0208() JISX0208_KANJI_LIST; end + end +end diff --git a/chise/makegraph.rb b/chise/makegraph.rb new file mode 100755 index 0000000..b289295 --- /dev/null +++ b/chise/makegraph.rb @@ -0,0 +1,122 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +$LOAD_PATH << "../../lib" if $0 == __FILE__ +require "chise" +include CHISE +require "chise/network" + +module CHISE + class MakeGraph #==================================実務的な処理を引き受ける。 + #GRAPHVIZ_CMD = 0 # CLI + GRAPHVIZ_CMD = 1 # OLE + GRAPHVIZ_TYPE = Graphviz::TWOPI + #GRAPHVIZ_TYPE = Graphviz::NEATO + START_GAKUNEN = 1 + END_GAKUNEN = 6 + + def initialize() + @kl = KanjiList.instance + @hn = KanjiNetwork.new + end + + def go() + make_min + #make_html_all + #make_svg_all + end + + def make_html_all() + open("gakunen.html", "w"){|out| + out.print %Q|
\n|
+	  (1..2).each {|type|
+	  typestr = type.to_s
+	  out.print %Q|#{typestr}グラフ\n|
+	    (1..6).each {|gakunen|
+	    out.print %Q|小学#{gakunen}年生 |
+	      base = "han-#{type}-#{gakunen}-all"
+	    out.print %Q|all |
+	      (0..3).each {|kubun|
+	      base = "han-#{type}-#{gakunen}-#{kubun}"
+	      out.print %Q|#{kubun} |
+	    }
+	    out.print %Q|\n|
+	  }
+	}
+	out.print %Q|
\n| + } + end + + def make_min + @hn.reset() + @hn.make_network(@kl.awase(0)) + # @hn.make_network(@kl.awase(1)) + @hn.out("min.dot") #途中状態を保存 + #graphviz(Graphviz::NEATO, "min.dot", "min.svg") + graphviz(GRAPHVIZ_TYPE, "min.dot", "min.svg") + end + + def make_svg_all() + (START_GAKUNEN..END_GAKUNEN).each {|gakunen| + (0..3).each {|kubun| + make_dot(gakunen, kubun) + (2..2).each {|type| + make_svg(type, gakunen, kubun) + } + } + make_dot(gakunen, nil) + (2..2).each {|type| + unless type == 2 && 5 <= gakunen + make_svg(type, gakunen, nil) + end + } + } + end + + def gaku_name(gakunen, kubun=nil) + return "#{gakunen}nen-all" if kubun.nil? + return "#{gakunen}nen-#{kubun}" + end + + def make_dot(gakunen, kubun=nil) #Graphvizのtype, 学年, 象形、指示などの区分 + @hn.reset() + list = @kl.kyoiku(1..gakunen, kubun) + @hn.make_network(list) + dotf = "list-"+gaku_name(gakunen, kubun)+".dot" + @hn.out(dotf) #途中状態を保存する + p [dotf, "done"] + end + + def make_svg(type, gakunen, kubun=nil) #Graphvizのtype, 学年, 象形、指示などの区分 + g = gaku_name(gakunen, kubun) + dotf = "list-"+g+".dot" + svgf = "han-"+g+"-"+GraphvizCLI::NAMES[type]+".svg" + graphviz(type, dotf, svgf) + end + + def dot2svg(dir) + Dir.chdir(dir) + Dir.glob("*.dot").each {|inf| + out = inf.sub(/.dot$/, ".svg") + type = 1 if inf =~ /^han-1/ + type = 2 if inf =~ /^han-2/ + graphviz(type, inf, out) + } + end + + def graphviz(type, inf, out) + gv = GraphvizCLI.new() if GRAPHVIZ_CMD == 0 + gv = GraphvizOLE.new() if GRAPHVIZ_CMD == 1 + gv.type = type + gv.target = "svg" + gv.in = inf + gv.out = out + gv.generate() + end + + end +end + +if $0 == __FILE__ + mg = MakeGraph.new + mg.go +end diff --git a/chise/network.rb b/chise/network.rb new file mode 100755 index 0000000..18bb9a7 --- /dev/null +++ b/chise/network.rb @@ -0,0 +1,147 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +$LOAD_PATH << "../../lib" if $0 == __FILE__ +require "chise" +include CHISE +require "chise/kanjilist" +require "chise/defkanji" +require "chise/graphviz" + +module CHISE + class KanjiNetwork #=========================漢字間のネットワークを計算する。 + def initialize() + reset() + @formatter = GraphvizFormatter.new(self) + # @formatter = XSpringiesFormatter.new(self) + end + attr_reader :h, :list + def reset + @h = Hash.new + @done = Hash.new + @list = [] + end + def make_network(list) #@h, @listに結果を入れていく。 + list.each_char {|ch| + make_network_one(ch) + } + end + + def make_network_one(ch) #実質的にこれが本体。再帰で呼ばれる。 + return if @done[ch] + + @h[ch] = [] + @list << ch #登場文字リストに追加 + + char = ch.char + ids = char.decompose + if ids.char_length == 1 #分割できない文字は、リンクを探す。 + link = char.ids_represent if char.ids_represent + link = char.ids_link if char.ids_link + return if link.nil? + @h[ch] << link #親字に追加する。 + @list << link + return + end + + ids.each_char {|idsch| + idschar = idsch.char + next if idschar.is_ids? + @h[ch] << idsch #親字に追加する。 + @list << idsch + make_network_one(idsch) #再帰する。 + } + @done[ch] = true + end + + def nodes_and_edges() #これを二つのarrayで返す + nodear = [] + nodeh = Hash.new + @list.sort.uniq.each_with_index {|ch, index| + nodear[index] = ch + nodeh[ch] = index + } + edgear = [] + @h.each {|ch, ar| + ar.each {|idsch| + edgear << [nodeh[ch], nodeh[idsch]] + } + } + return nodear, edgear + end + + def out(filename) + open(filename, "w"){|out| out.print to_s } + end + def to_s() @formatter.to_s; end + end + + class GraphvizFormatter #======================================== Graphviz関係 + def initialize(network) + @network = network + end + def to_s() #Graphvizのフォーマット、dotフォーマットに変換する。 + page = DotPage.new + page.size = "5.5, 5.5" + #page.pack = "true" + #page.epsilon = 0.001 + #page.epsilon = 0.01 + #page.overlap = false + #page.overlap = scale + #page.spline = true + + #p @network.list + @network.list.sort.uniq.each {|ch| + char = ch.char + node = DotNode.new(char.to_i) + node.label = char.map_ucs_er #node.label = char.to_utf8 + node.fontsize = (node.label =~ /^&#/) ? 12 : 6 + #node.fontsize = (node.label =~ /^&#/) ? 24 : 6 + node.shape = "plaintext" + node.fontname = "MS-Mincho" #呪われてるがしかたがない + #node.fontname = "Arial-Unicode-MS" #ダメ + page.add_node(node) + } + + @network.h.each {|ch, ar| + char = ch.char + ar.each {|idsch| + idschar = idsch.char + edge = DotEdge.new(char.to_i, idschar.to_i) + #edge.len = 4 + #edge.len = 2 + #edge.color = "gray" + edge.color = "lightgray" + page.add_edge(edge) + } + } + return page.to_s + end + end + + class XSpringiesFormatter #================================================= + def initialize(network) + @network = network + end + + def to_s # #1.0 *** XSpringies data file + str = "" + @network.list.sort.uniq.each {|ch| + char = ch.char + num = char.to_i + label = char.map_ucs_er + str << "mass #{num}\n" + } + @network.h.each {|ch, ar| + char = ch.char + ar.each {|idsch| + idschar = idsch.char + from = char.to_i + to = idschar.to_i + str << "spng #{from} #{to}\n" + } + } + str + end + + end +end diff --git a/chise/parser.rb b/chise/parser.rb new file mode 100755 index 0000000..6402fca --- /dev/null +++ b/chise/parser.rb @@ -0,0 +1,120 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +module CHISE + class CharacterParser + def parse(c) # parse a value and return a number + return 0 if c.nil? # what? should raise exception? + + if c.kind_of?(Numeric) + c = 0x80000000 + c if c < 0 # negative value + return c.to_i + end + + raise "unknown object" unless c.kind_of?(String) + + return c.to_i if /^\d+$/ =~ c # only numbers? + + return parse_er(c) if is_er?(c) # ER? + + c = c.sub(/\A\?/, "") if /\A\?/ =~ c # remove a "?" in the head + + u4 = c.u8tou32 # translate from UTF-8 to UTF-32 + u4.u32to_i # translate UTF-32 to UCS number + end + + PART = "&([-+0-9A-Za-z#]+);" + ALL = "\\A#{PART}\\Z" + PART_RE = Regexp.new(PART) + ALL_RE = Regexp.new(ALL) + + def contain_er?(s) (PART_RE =~ s) != nil; end + def is_er?(s) (ALL_RE =~ s) != nil; end + + # the order is important. The primary sharset should be selectable. + CODESYS_TABLE = [ + %w( =jis-x0208-1990 J90- 4 X), + %w( =jis-x0208-1983 J83- 4 X), + %w( =jis-x0208-1978 J78- 4 X), + %w( =jis-x0208 J90- 4 X), # Œp³‚̃AƒhƒzƒbƒN‚ÈŽÀ‘• + %w( =jis-x0208 J83- 4 X), # Œp³‚̃AƒhƒzƒbƒN‚ÈŽÀ‘• + %w( =jis-x0208 J78- 4 X), # Œp³‚̃AƒhƒzƒbƒN‚ÈŽÀ‘• + %w( =jis-x0213-1-2000 JX1- 4 X), + %w( =jis-x0213-2-2000 JX2- 4 X), + %w( =jis-x0212 JSP- 4 X), + %w( =big5-cdp CDP- 4 X), + %w( =cns11643-1 C1- 4 X), + %w( =cns11643-2 C2- 4 X), + %w( =cns11643-3 C3- 4 X), + %w( =cns11643-4 C4- 4 X), + %w( =cns11643-5 C5- 4 X), + %w( =cns11643-6 C6- 4 X), + %w( =cns11643-7 C7- 4 X), + %w( =ks-x1001 K0- 4 X), + %w( =daikanwa M- 5 d), + %w( =cbeta CB 5 d), + %w( =gt GT- 5 d), + %w( =gt-k GT-K 5 d), + ] + PRIVATE_USE_AREA = 0xe000 + + def parse_er(s) # parse a ER and return a number (FIXNUM) + unless ALL_RE =~ s # I do not use is_er? to get $1. + raise "wrong ER." + end + + s = $1 # extract the part of ER + + if s =~ /\AMCS-([0-9A-Fa-f]+)\Z/ # MCS. It's a mystery. + return $1.hex + end + + if s =~ /\AU[-+]?([0-9A-Fa-f]+)\Z/ || + s =~ /\A#x([0-9A-Fa-f]+)\Z/ # Unicode code point in Hex. + return $1.hex + end + + if s =~ /\A#([0-9]+)\Z/ # Unicode code point in Decimal. + return $1.to_i + end + + if s =~ /\Amy-([0-9]+)\Z/ # my own code point. It's a secret. + return PRIVATE_USE_AREA + $1.to_i # private use area of Unicode. + end + + if s =~ /\AI-/ # I- stands for Isolated character. It's a wonder. + s = s.sub(/\AI-/, "") + end + + CODESYS_TABLE.each {|codesys, er_prefix, keta, numtype| + if numtype == "d" + nre = '\d' + elsif numtype == "X" + nre = "[0-9A-Fa-f]" + else + next + end + + re = "\\A#{er_prefix}(#{nre}{#{keta},#{keta}})\\Z" + next unless Regexp.new(re) =~ s + + codestr = $1 + if numtype == "d" + code = codestr.to_i + else + code = codestr.hex + end + + u8 = CodesysDB.instance.get(codesys, code) + next if u8.nil? + + num = parse(u8) + next if num.nil? + + return num + } + + raise "unknown Entity Reference" + end + + end +end diff --git a/chise/qp.rb b/chise/qp.rb new file mode 100755 index 0000000..e397d01 --- /dev/null +++ b/chise/qp.rb @@ -0,0 +1,38 @@ +# Copyright (C) 2003-2004 Kouichirou Eto, All rights reserved. +# This is free software with ABSOLUTELY NO WARRANTY. +# You can redistribute it and/or modify it under the terms of the GNU GPL2. + +module QP + def caller_msg(ca) + file, linenum, msg = ca.first.split(/:([0-9]+):/) + msg = $1 if msg =~ /^in `(.+)'$/ + File.basename(file)+":"+linenum+":"+msg + end + + def ar_inspect(ar) + "["+ar.map {|arg| arg.inspect }.join(", ")+"]" + end + + def runtime() + $__qp_t__ = Time.now unless defined? $__qp_t__ + sprintf("%.1f", Time.now - $__qp_t__) + end + + def qp(ca, *args) + QP.caller_msg(ca)+"\t"+QP.ar_inspect(args) + end + + def tp(ca, *args) + QP.runtime()+":"+QP.qp(ca, *args) + end + + module_function :caller_msg, :ar_inspect, :runtime, :qp, :tp +end + +def qp(*args) + print QP.qp(caller(1), *args)+"\n" +end + +def tp(*args) + print QP.tp(caller(1), *args)+"\n" +end diff --git a/chise/rbchise.rb b/chise/rbchise.rb new file mode 100755 index 0000000..8d86213 --- /dev/null +++ b/chise/rbchise.rb @@ -0,0 +1,66 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. +# "rbchise.so" ext compatible library by eto 2003-0317 + +require "bdb" +require "chise/config" + +module CHISE + class DataSource + NONE = 0 + Berkeley_DB = 1 + + def initialize(type = Berkeley_DB, location = nil) + @type, @location = type, location + @location = Config.instance.db_dir if @location.nil? + @dbs = {} + at_exit { + @dbs.each {|k, db| + db.close + } + } + end + + def open_decoding_table(ccs) + db = open(ccs, "system-char-id") + DecodingTable.new(ccs, db) + end + + def open_feature_table(feature) + db = open("system-char-id", feature) + FeatureTable.new(feature, db) + end + + def open(from, to) # real_subtpe, accessmask, modemask + name = from+"/"+to + return @dbs[name] if @dbs[name] + file = @location+"/"+name + @dbs[name] = BDB::Hash.open(file, nil, 0) + end + end + + class AttributeTable # abstract class + end + + class DecodingTable < AttributeTable + def initialize(ccs, db) + @ccs, @db = ccs, db + end + def get_char(code_point) + @db.get(code_point) + end + def put_char(code_point, cid) + @db.put(code_point, cid) + end + end + + class FeatureTable < AttributeTable + def initialize(feature, db) + @feature, @db = feature, db + end + def get_value(char_id) + @db.get(char_id) + end + def each + end + end +end diff --git a/chise/string.rb b/chise/string.rb new file mode 100755 index 0000000..aba2ce6 --- /dev/null +++ b/chise/string.rb @@ -0,0 +1,146 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +class String + def to_a() + ar = self.split(//u) # split self to chars as UTF-8 + ar + end + + def each_char() to_a.each {|ch| yield ch } end + def each_character() to_a.each {|ch| yield ch.char } end + def char_length() to_a.length end + def char_at(n) to_a()[n] end + def first_char() to_a[0] end + def char() CHISE::Character.get(to_a[0]) end + def to_utf8() + return to_a.map {|ch| + ch.char.to_utf8 + }.join("") + end + + def map_char(block = Proc.new) + return unless block_given? + return self.to_a.map {|ch| (block.call(ch)).to_s }.join("") + end + + def map_char!(block = Proc.new) + return unless block_given? + self.replace(self.map_char {|ch| block.call(ch)}) + end + + def map_character(block = Proc.new) + return unless block_given? + return self.to_a.map {|ch| (block.call(ch.char)).to_s }.join("") + end + + def map_character!(block = Proc.new) + return unless block_given? + self.replace(self.map_char {|ch| block.call(ch.char)}) + end + + def method_missing(mid, *args) + if char_length == 1 #È—ªŒ`‚ª—LŒø‚Ȃ̂́Aˆê•¶Žš‚ÌŽž‚¾‚¯ + char.method_missing(mid, *args) + else + raise NameError, "undefined method `#{mid.id2name}'", caller(1) + end + end + + def map_utf8() map_char {|ch| ch.char.map_utf8 } end + alias map_ucs map_utf8 + + def map_ucs_er() map_char {|ch| ch.char.map_ucs_er } end + def to_er() map_char {|ch| ch.char.to_er } end + + #putŠÖŒWA[]ŠÖŒW‚Í—pˆÓ‚µ‚È‚¢‚±‚Æ‚É‚µ‚½B + def de_er!() #EntityReference‚ðŽæ‚菜‚­ + return self unless self =~ Regexp.new(EntityReference::REGEXP_PART) #‚»‚ê‚炵‚¢‚Ì‚ª–³‚¯‚ê‚Ή½‚à‚µ‚È‚¢ + er = "&"+$1+";" + self.sub!(Regexp.new(Regexp.escape(er)), Character.new(er).mcs_utf8) #•ÏŠ·Ž©‘Ì‚ÍCharacter‚É‚Ü‚©‚¹‚é + return self.de_er! if self =~ Regexp.new(EntityReference::REGEXP_PART) #‚Ü‚¾‚ ‚Á‚½‚çÄ‹A + return self + end + + def de_er() return self.dup.de_er!; end + + def inspect_all() map_char {|ch| ch.char.inspect_all } end + def inspect_x() map_char {|ch| ch.char.inspect_x } end + +# def to_euc() map_char {|ch| ch.char.to_euc } end + def map_euc() map_char {|ch| ch.char.map_euc } end +# def to_sjis() map_char {|ch| ch.char.to_sjis } end + def map_sjis() map_char {|ch| ch.char.map_sjis } end + + def glyph_decompose() map_char {|ch| ch.char.glyph_decompose } end + def decompose() map_char {|ch| ch.char.decompose } end + def decompose!() self.replace(self.decompose); self; end + + def nu_decompose_all(level=nil) + level = 0 if level.nil? + if 10 < level + p ["too many recursive", self] + exit + end + de = self.decompose + return de.decompose_all(level+1) if de != self #‚È‚É‚©•Ï‰»‚ª‚ ‚Á‚½‚©‚çÄ‹A + de #‚à‚¤‚±‚êˆÈã•Ï‰»‚Í–³‚³‚»‚¤‚¾‚¼‚ƁB + end + + def decompose_all() map_char {|ch| ch.char.decompose_all } end + def decompose_all!() self.replace(self.decompose_all); self; end + + def find() #"“ú‰_"¨"“Ü"‚Æ‚©‚¢‚¤Š´‚¶‚Ì‘€ì + ar = [] + length = char_length() + each_char {|ch| + char = ch.char + ar << char.ids_contained #‚»‚Ì•¶Žš‚ðŠÜ‚ñ‚Å‚¢‚銿Žš‚̃ŠƒXƒg + } + h = Hash.new(0) + ar.each {|list| + next if list.nil? + list.each_char {|ch| + h[ch] += 1 + } + } + str = "" + h.each {|k, v| + # p [k, v] + if length == v #‘S•”‚ÉŠç‚ðo‚µ‚Ä‚¢‚½‚ç + str += k + end + } + # p str + str + end + + def compose() + db = CHISE::CodesysDB.instance + composed = db.get("ids", self) + return "" if composed.nil? #‚È‚©‚Á‚½‚æ‚ƁB + return "" if composed.char_length == 0 #‚È‚É‚²‚Æ? + return composed if composed.char_length == 1 + composed.each_char {|ch| + char = ch.char + return ch if char.has_attribute? #‚Æ‚è‚ ‚¦‚¸Å‰‚ɂ݂‚©‚Á‚½‚à‚Ì‚ð•Ô‚·‚Æ‚¢‚¤ƒkƒ‹‚¢Žd—l + } + return "" #attribute‚ðŽ‚Â‚à‚Ì‚ªˆê‚Â‚à–³‚©‚Á‚½‚çA""‚É‚·‚é + end + + def aggregate() + #self‚Å‚ ‚镶Žš—ñ‚ðIDS‚¾‚Ɖ¼’肵A‚»‚ê‚ðŠ®‘S‚Écompose‚µ‚«‚ç‚È‚¢‚ŁA + #‚»‚Ì•”•ªW‡‚¾‚¯‚ð‚Ƃ肾‚µ‚āAcompose‰Â”\‚Å‚ ‚ê‚΂ł«‚邾‚¯compose‚·‚éB + tree = CHISE::IDS_Tree.new(self) + return self if tree.depth <= 1 #sub_nodes‚ª–³‚¢ê‡‚Í‚±‚±‚Å‚³‚æ‚È‚ç + tree.sub_nodes.each {|node| + c = node.compose + next if c.nil? || c == "" + # print "#{self} #{node} #{c}\n" + # p [self, node, c] + n = self.gsub(node, c) + return n.aggregate + } + return self #‚¨‚«‚©‚¦‚ç‚ê‚é‚à‚Ì‚ª‚Ü‚Á‚½‚­‚È‚©‚Á‚½‚çAŽ©•ª‚ð‚©‚¦‚·B + end + +end diff --git a/chise/stroke.rb b/chise/stroke.rb new file mode 100755 index 0000000..1504a7d --- /dev/null +++ b/chise/stroke.rb @@ -0,0 +1,101 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. +# StrokeFont library by eto 2003-0311 + +require "sgl" +$LOAD_PATH << "../../lib" if $0 == __FILE__ +require "chise/kage" +require "chise/csf" + +module StrokeFont + class StrokesRenderer + def initialize + @start_time = nil + @strokes = nil +# @hsv = [0, 0, 100] #白 + @hsv = [0, 0, 0] #黒 + init + end + attr_accessor :hsv + + def init() @start_time = Time.now; end + + def set_strokes(strokes) + @strokes = strokes + init + end + + def draw + return if @strokes.nil? + @strokes.strokes.each_with_index {|stroke, index| + #draw_delay(stroke, index) + draw_alpha(stroke, 100) + } + end + + def draw_alpha(stroke, time) + px, py = 0, 0 + span = 0.1 + time += span*2 + stroke.points.each {|x, y| + a = time / span + colorHSV(@hsv[0], @hsv[1], @hsv[2], a*100.0) + line(px, py, x, y) if (px != 0 || py != 0) #最初の点ではない + px, py = x, y + time -= span + } + end + + def draw_delay(stroke, index) + now = Time.now + @start_time = Time.now if @start_time == nil + diff = now - @start_time #開始からの秒数がはいる + draw_alpha(stroke, diff - index*0.3) + end + end + + class Stroke #====================================================== 一本の線 + def initialize + @points = [] + @length = nil + end + attr_reader :points + + def add_point(x, y) + @points << [x, y] + end + + def length #未チェック + return @length if @length + len = 0.0 + px, py = -1, -1 + @points.each {|x, y| + if px != -1 + len += Math.sqrt((x-px)*(x-px)+(y-py)*(y-py)) + end + px, py = x, y + } + @length = len + return @length + end + end + + class Strokes #===================================================== 複数の線 + def initialize + @strokes = [] + @px1, @py1, @px2, @py2 = 0, 0, 0, 0 + @x1, @y1, @x2, @y2 = 0, 0, 0, 0 + @px, @py = -1, -1 + end + attr_reader :strokes + + def add_line(x1, y1, x2, y2) + if (@px != x1 || @py != y1) #以前の点とつながっていなかったら、 + @strokes << Stroke.new + @strokes.last.add_point(x1, y1) + end + @strokes.last.add_point(x2, y2) + @px, @py = x2, y2 + end + + end +end diff --git a/chise/uconv.rb b/chise/uconv.rb new file mode 100755 index 0000000..4da600c --- /dev/null +++ b/chise/uconv.rb @@ -0,0 +1,11 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +require "chise/iconv" + +class Uconv + def self.u8tou4(s) s.u8tou32; end + def self.u4tou8(s) s.u32tou8; end + def self.u4tou16(s) s.u32tou16; end + def self.u16toeuc(s) s.u16toeuc; end + def self.u16tosjis(s) s.u16tosjis; end +end diff --git a/lib/chise.rb b/lib/chise.rb deleted file mode 100755 index e6699c3..0000000 --- a/lib/chise.rb +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env ruby -# $Id: chise.rb,v 1.1 2003-11-10 08:32:10 eto Exp $ -# Copyright (C) 2002-2003 Kouichirou Eto, All rights reserved. -# This is free software with ABSOLUTELY NO WARRANTY. -# You can redistribute it and/or modify it under the terms of the GNU GPL2. - -require 'chise/char' -require 'chise/rbchise' -require 'chise/db' -require 'chise/ids' diff --git a/sample/sample-lc2003.rb b/sample/sample-lc2003.rb new file mode 100755 index 0000000..be7d06f --- /dev/null +++ b/sample/sample-lc2003.rb @@ -0,0 +1,91 @@ +#!/usr/bin/env ruby + +$LOAD_PATH.unshift("..") +require "chise/char" + +def test1 +p "字" # "字" +p "字".ucs # 23383 +p "字".total_strokes # 6 +p "字".inspect_all # 保有する素性情報を全て出力する +p "字".daikanwa # 6942(大漢和番号) +end + +def test2 +p "字字".ucs # 例外 +end + +def test3 +char = "字".char # Character classのインスタンスを生成 +char = Character.get("字") # 上記とまったく同じ +end + +def test4 +# 数値参照化する +p "字". to_er # "字" + +#数値参照を復号 +p "字".de_er # "字" + +# JIS X 0208-1990 による実体参照を復号 +p "&J90-3B7A;".de_er + +# 大漢和番号による実体参照を復号 +p "&M-06942;".de_er +end + +def test5 + str = "文字列" + str.each_character{|c| + p c + } + str2 = str.map_character{|c| + c.to_er + } + p str2 + p str.char_length + p str.to_er + p str2.de_er +end + +def test6 +"木".mydepth = 1 +"林".mydepth = 2 +"森".mydepth = 3 +end + +def test7 +p "木".mydepth +p "林".mydepth +p "森".mydepth +end + +def test8 +p "字".decompose +p "字".decompose_all +p "榊".decompose +p "榊".decompose_all +p "終了".decompose +p "終了".decompose_all +p "鬱".decompose +p "鬱".decompose_all +end + +def test9 +p "#x2ff0;木木".compose +end + +def test10 +p "日雲".find +end + +test1 +#test2 +test3 +test4 +test5 +test6 +test7 +test8 +test9 +test10 diff --git a/sample/sample1.rb b/sample/sample1.rb new file mode 100755 index 0000000..9b32d00 --- /dev/null +++ b/sample/sample1.rb @@ -0,0 +1,18 @@ +#!/usr/bin/env ruby + +$LOAD_PATH.unshift("..") +require "chise/char" + +str = "字" #Stringを拡張している。UTF8で与えること。 +p str.ucs #とすると、その文字のucsの値が表示される +p str.total_strokes #画数が表示される +p str.gb2312 #などなど +str.char.alist.each {|a, v| #こんな感じで全属性を表示できる + print a, ': ', v, "\n" +} +p str.inspect_x #Characterについての情報が表示される。 +p str.inspect_all #持っている属性情報を全て表示する。 + +str = "文字列" #もちろん一文字でなく文字列も扱える。UTF-8で与える。 +p str.inspect_x #各文字の情報が表示される。 +p str.inspect_all #各文字の属性情報を全て表示する。 diff --git a/sample/t.html b/sample/t.html new file mode 100755 index 0000000..a6689a4 --- /dev/null +++ b/sample/t.html @@ -0,0 +1,21 @@ + + + +Ruby/CHISE + + + + + +

+ +"<衝,#x885d,=cns11643-1:28269,=daikanwa:34069,=gb12345:13157,=gt:45946,=gt-pj-1:15959,=jis-x0208:15959,=ks-x1001:30074,=ucs:34909,ideographic-radical:144,ideographic-strokes:9,ids:⿴行重,ids-aggregated:⿴行重,ids-contained:?,ids-decomposed:⿴行重,ids-parts:⿴行重,shinjigen-2:7330,total-strokes:15>" + + +"<行,#x884c,=cns11643-1:18535,=daikanwa:34029,=gb2312:20560,=gt:45899,=gt-k:1612,=gt-pj-1:14676,=jis-x0208:14676,=ks-x1001:31292,=ucs:34892,ideographic-radical:144,ideographic-strokes:0, + +shinjigen-2:7321,total-strokes:6>" + + + + diff --git a/sample/t.txt b/sample/t.txt new file mode 100644 index 0000000..3e069d2 --- /dev/null +++ b/sample/t.txt @@ -0,0 +1,2 @@ +"<衝,#x885d,=cns11643-1:28269,=daikanwa:34069,=gb12345:13157,=gt:45946,=gt-pj-1:15959,=jis-x0208:15959,=ks-x1001:30074,=ucs:34909,ideographic-radical:144,ideographic-strokes:9,ids:⿴行重,ids-aggregated:⿴行重,ids-contained:𧁬,ids-decomposed:⿴行重,ids-parts:⿴行重,shinjigen-2:7330,total-strokes:15>" +"<行,#x884c,=cns11643-1:18535,=daikanwa:34029,=gb2312:20560,=gt:45899,=gt-k:1612,=gt-pj-1:14676,=jis-x0208:14676,=ks-x1001:31292,=ucs:34892,ideographic-radical:144,ideographic-strokes:0,ids-contained:㗸㘅㤚㦣䀪䓷䕔䘕䘖䘗䘙䚘䟰䡓䯒䰢䲗哘垳愆桁椼洐烆珩筕絎绗胻荇葕蘅衍衎衏衐衑衒術衔衕衖街衘衙衚衛衜衝衞衟衠衡衢裄讆讏躛銜餰鴴鸻𠒣𠾑𡆚𡭑𢔖𢔬𢔮𢕁𢕅𢕋𢕥𢕵𢖅𢖋𢖍𢖨𢙡𢫱𢯼𣆯𣟉𣻚𣽣𤀵𤜂𤫄𥞧𥲋𥶽𦌫𦨵𦸇𧁬𧄇𧊔𧊽𧍢𧎘𧗝𧗞𧗟𧗠𧗡𧗢𧗣𧗤𧗥𧗦𧗧𧗨𧗩𧗪𧗫𧗬𧗭𧗯𧗰𧗱𧗲𧗳𧗴𧗶𧗷𧗸𧗹𧗺𧗻𧗼𧗽𧗿𧘀𧘁𧘂𧘃𧘄𧘅𧘆𧲔𧲝𧲞𧻥𧾦𨇙𨴠𩇐𩜾ø»Š†”ø»”·œø»–Š‘ø½‹¡³ø½‹¢‹,shinjigen-2:7321,total-strokes:6>" diff --git a/sample/t1.rb b/sample/t1.rb new file mode 100755 index 0000000..e6f3771 --- /dev/null +++ b/sample/t1.rb @@ -0,0 +1,9 @@ +#!/usr/bin/env ruby +$KCODE = 'u' +$LOAD_PATH.unshift("..") +require "chise/char" +p "字" # "字" +p "字".ucs # 23383 +p "字".daikanwa # 6942(大漢和番号) +p "字".total_strokes # 6 +p "字".inspect_all # 保有する素性情報を全て出力する diff --git a/sample/t10.rb b/sample/t10.rb new file mode 100755 index 0000000..7e31279 --- /dev/null +++ b/sample/t10.rb @@ -0,0 +1,8 @@ +#!/usr/bin/env ruby +$KCODE = 'u' +$LOAD_PATH << '../lib' +require 'chise' +include CHISE + +p "衝".inspect_all +p "行".inspect_all diff --git a/sample/t14.rb b/sample/t14.rb new file mode 100755 index 0000000..339c2d0 --- /dev/null +++ b/sample/t14.rb @@ -0,0 +1,19 @@ +#!/usr/bin/env ruby +$KCODE = 'u' +$LOAD_PATH << '../lib' +require 'chise' +include CHISE + +p "木".inspect_all +exit + +#str = "門火" +str = "木" +p str.find +str.find.each_character{|c| + puts c.ids + puts c.inspect_all +} + +#p "日雲".find.inspect_all +#p "木缶".find.inspect_all diff --git a/sample/t15.rb b/sample/t15.rb new file mode 100755 index 0000000..4325092 --- /dev/null +++ b/sample/t15.rb @@ -0,0 +1,19 @@ +#!/usr/bin/env ruby +$KCODE = 'u' +$LOAD_PATH << '../lib' +require 'chise' +include CHISE + +puts "鬼".find.split(//u).sort.join +exit + +#str = "門火" +str = "木" +p str.find +str.find.each_character{|c| + puts c.ids + puts c.inspect_all +} + +#p "日雲".find.inspect_all +#p "木缶".find.inspect_all diff --git a/sample/t2.rb b/sample/t2.rb new file mode 100755 index 0000000..741e361 --- /dev/null +++ b/sample/t2.rb @@ -0,0 +1,11 @@ +#!/usr/bin/env ruby +$KCODE = 'u' +$LOAD_PATH.unshift("..") +require "chise/char" +p "字".decompose # "+宀子" +p "榊".decompose +p "榊".decompose_all +p "鬱".decompose # "木缶木冖鬯彡" +p "鬱".decompose_all # 再帰的分解 +p "終了".decompose +p "終了".decompose_all diff --git a/sample/t3.rb b/sample/t3.rb new file mode 100755 index 0000000..8e4f4ac --- /dev/null +++ b/sample/t3.rb @@ -0,0 +1,6 @@ +#!/usr/bin/env ruby +$KCODE = 'u' +$LOAD_PATH.unshift("..") +require "chise/char" +p "⿰木木".compose +p "⿱木⿰木木".de_er.compose diff --git a/sample/t4.rb b/sample/t4.rb new file mode 100755 index 0000000..3547fb6 --- /dev/null +++ b/sample/t4.rb @@ -0,0 +1,11 @@ +#!/usr/bin/env ruby +$KCODE = 'u' +$LOAD_PATH.unshift("..") +require "chise/char" +p "日雲".find +"日雲".find.each_character{|c| + puts c.ids +} + +#p "日雲".find.inspect_all +#p "木缶".find.inspect_all diff --git a/sample/t5.rb b/sample/t5.rb new file mode 100755 index 0000000..1a676f1 --- /dev/null +++ b/sample/t5.rb @@ -0,0 +1,8 @@ +#!/usr/bin/env ruby +$KCODE = 'u' +$LOAD_PATH.unshift("..") +require "chise/char" + +ki = Uconv.sjistou8("–Ø") +res = (("\xE2\xBF\xB0"+ki+ki).compose) +puts Uconv.u8tosjis(res) diff --git a/sample/t6.rb b/sample/t6.rb new file mode 100755 index 0000000..1a0a34c --- /dev/null +++ b/sample/t6.rb @@ -0,0 +1,9 @@ +#!/usr/bin/env ruby +$KCODE = 'u' +$LOAD_PATH.unshift("..") +require "chise/char" + +(0x2ff0..0x2ffb).each {|i| + char = Character.get(i) + p [char.name, char] +} diff --git a/sample/t7.rb b/sample/t7.rb new file mode 100755 index 0000000..0e99c82 --- /dev/null +++ b/sample/t7.rb @@ -0,0 +1,14 @@ +#!/usr/bin/env ruby +$KCODE = "u" +$LOAD_PATH.unshift("..") +require "chise/char" +require "chise/kanjilist" + +def atom_list(list) + list.map_character {|char| + char.decompose.char_length == 1 ? char.decompose : "" + } +end + +puts atom_list(KanjiList::JOYO_KANJI_LIST) +puts atom_list(KanjiList::JISX0208_KANJI_LIST) diff --git a/sample/t8.rb b/sample/t8.rb new file mode 100755 index 0000000..8920735 --- /dev/null +++ b/sample/t8.rb @@ -0,0 +1,15 @@ +#!/usr/bin/env ruby +$KCODE = 'u' +$LOAD_PATH << '../lib' +require 'chise' +require 'chise/kanjilist' +include CHISE + +[IDC_LR, IDC_AB, IDC_LMR, IDC_AMB, IDC_FS, IDC_FA, IDC_FB, IDC_FL, IDC_FUL, IDC_FUR, IDC_FLL, IDC_O].each {|idc| + p idc + KanjiList::JOYO_KANJI_LIST.each_character {|char| +# d = char.decompose + d = char.glyph_decompose + p [char, d] if d.include?(idc) + } +} diff --git a/sample/t9.rb b/sample/t9.rb new file mode 100755 index 0000000..772a305 --- /dev/null +++ b/sample/t9.rb @@ -0,0 +1,28 @@ +#!/usr/bin/env ruby +$KCODE = 'u' +$LOAD_PATH << '../lib' +require 'chise' +require 'chise/kanjilist' +include CHISE + +def atom_list(list) + list.map_character {|char| + d = yield(char) + d.char_length == 1 ? d : "" + } +end + +def check_list(list) + d = atom_list(list){|char| char.decompose } + g = atom_list(list){|char| char.glyph_decompose } + da = d.to_a + ga = g.to_a + wa = da & ga + dd = (da - wa).join("") + gg = (ga - wa).join("") + puts "意味で分解できない文字: "+d, "これだけに含まれる文字: "+dd + puts "形で分解できない文字: "+g, "これだけに含まれる文字: "+gg +end + +check_list(KanjiList::JOYO_KANJI_LIST) +check_list(KanjiList::JISX0208_KANJI_LIST) diff --git a/sample/test1.rb b/sample/test1.rb new file mode 100755 index 0000000..b6645a3 --- /dev/null +++ b/sample/test1.rb @@ -0,0 +1,21 @@ +#!/usr/bin/env ruby +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +$LOAD_PATH.unshift("..") +require "chise" +include CHISE +$KCODE = "u" + +str = "字" #Stringを拡張している。UTF8で与えること。 +p str.ucs #とすると、その文字のucsの値が表示される +p str.total_strokes #画数が表示される +p str.chinese_gb2312 #などなど +str.char.alist.each {|a, v| #こんな感じで全属性を表示できる + print a, ": ", v, "\n" +} +p str.inspect_x #Characterについての情報が表示される。 +p str.inspect_all #持っている属性情報を全て表示する。 + +str = "文字列" #もちろん一文字でなく文字列も扱える。UTF-8で与える。 +p str.inspect_x #各文字の情報が表示される。 +p str.inspect_all #各文字の属性情報を全て表示する。 diff --git a/test/common.rb b/test/common.rb new file mode 100755 index 0000000..faa4295 --- /dev/null +++ b/test/common.rb @@ -0,0 +1,13 @@ +# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. + +$VERBOSE = true + +$LOAD_PATH.unshift("..") +require "test/unit" +require "chise/char" +require "chise/qp" +#include CHISE + +class String + alias su sjistou8 +end diff --git a/test/test1.rb b/test/test1.rb deleted file mode 100755 index b6645a3..0000000 --- a/test/test1.rb +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env ruby -# Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved. - -$LOAD_PATH.unshift("..") -require "chise" -include CHISE -$KCODE = "u" - -str = "字" #Stringを拡張している。UTF8で与えること。 -p str.ucs #とすると、その文字のucsの値が表示される -p str.total_strokes #画数が表示される -p str.chinese_gb2312 #などなど -str.char.alist.each {|a, v| #こんな感じで全属性を表示できる - print a, ": ", v, "\n" -} -p str.inspect_x #Characterについての情報が表示される。 -p str.inspect_all #持っている属性情報を全て表示する。 - -str = "文字列" #もちろん一文字でなく文字列も扱える。UTF-8で与える。 -p str.inspect_x #各文字の情報が表示される。 -p str.inspect_all #各文字の属性情報を全て表示する。 diff --git a/tools/sample-lc2003.rb b/tools/sample-lc2003.rb deleted file mode 100755 index be7d06f..0000000 --- a/tools/sample-lc2003.rb +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/env ruby - -$LOAD_PATH.unshift("..") -require "chise/char" - -def test1 -p "字" # "字" -p "字".ucs # 23383 -p "字".total_strokes # 6 -p "字".inspect_all # 保有する素性情報を全て出力する -p "字".daikanwa # 6942(大漢和番号) -end - -def test2 -p "字字".ucs # 例外 -end - -def test3 -char = "字".char # Character classのインスタンスを生成 -char = Character.get("字") # 上記とまったく同じ -end - -def test4 -# 数値参照化する -p "字". to_er # "字" - -#数値参照を復号 -p "字".de_er # "字" - -# JIS X 0208-1990 による実体参照を復号 -p "&J90-3B7A;".de_er - -# 大漢和番号による実体参照を復号 -p "&M-06942;".de_er -end - -def test5 - str = "文字列" - str.each_character{|c| - p c - } - str2 = str.map_character{|c| - c.to_er - } - p str2 - p str.char_length - p str.to_er - p str2.de_er -end - -def test6 -"木".mydepth = 1 -"林".mydepth = 2 -"森".mydepth = 3 -end - -def test7 -p "木".mydepth -p "林".mydepth -p "森".mydepth -end - -def test8 -p "字".decompose -p "字".decompose_all -p "榊".decompose -p "榊".decompose_all -p "終了".decompose -p "終了".decompose_all -p "鬱".decompose -p "鬱".decompose_all -end - -def test9 -p "#x2ff0;木木".compose -end - -def test10 -p "日雲".find -end - -test1 -#test2 -test3 -test4 -test5 -test6 -test7 -test8 -test9 -test10 diff --git a/tools/sample1.rb b/tools/sample1.rb deleted file mode 100755 index 9b32d00..0000000 --- a/tools/sample1.rb +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env ruby - -$LOAD_PATH.unshift("..") -require "chise/char" - -str = "字" #Stringを拡張している。UTF8で与えること。 -p str.ucs #とすると、その文字のucsの値が表示される -p str.total_strokes #画数が表示される -p str.gb2312 #などなど -str.char.alist.each {|a, v| #こんな感じで全属性を表示できる - print a, ': ', v, "\n" -} -p str.inspect_x #Characterについての情報が表示される。 -p str.inspect_all #持っている属性情報を全て表示する。 - -str = "文字列" #もちろん一文字でなく文字列も扱える。UTF-8で与える。 -p str.inspect_x #各文字の情報が表示される。 -p str.inspect_all #各文字の属性情報を全て表示する。 diff --git a/tools/t1.rb b/tools/t1.rb deleted file mode 100755 index e6f3771..0000000 --- a/tools/t1.rb +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env ruby -$KCODE = 'u' -$LOAD_PATH.unshift("..") -require "chise/char" -p "字" # "字" -p "字".ucs # 23383 -p "字".daikanwa # 6942(大漢和番号) -p "字".total_strokes # 6 -p "字".inspect_all # 保有する素性情報を全て出力する diff --git a/tools/t2.rb b/tools/t2.rb deleted file mode 100755 index 741e361..0000000 --- a/tools/t2.rb +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env ruby -$KCODE = 'u' -$LOAD_PATH.unshift("..") -require "chise/char" -p "字".decompose # "+宀子" -p "榊".decompose -p "榊".decompose_all -p "鬱".decompose # "木缶木冖鬯彡" -p "鬱".decompose_all # 再帰的分解 -p "終了".decompose -p "終了".decompose_all diff --git a/tools/t3.rb b/tools/t3.rb deleted file mode 100755 index 8e4f4ac..0000000 --- a/tools/t3.rb +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env ruby -$KCODE = 'u' -$LOAD_PATH.unshift("..") -require "chise/char" -p "⿰木木".compose -p "⿱木⿰木木".de_er.compose diff --git a/tools/t4.rb b/tools/t4.rb deleted file mode 100755 index 3547fb6..0000000 --- a/tools/t4.rb +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env ruby -$KCODE = 'u' -$LOAD_PATH.unshift("..") -require "chise/char" -p "日雲".find -"日雲".find.each_character{|c| - puts c.ids -} - -#p "日雲".find.inspect_all -#p "木缶".find.inspect_all diff --git a/tools/t5.rb b/tools/t5.rb deleted file mode 100755 index 1a676f1..0000000 --- a/tools/t5.rb +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env ruby -$KCODE = 'u' -$LOAD_PATH.unshift("..") -require "chise/char" - -ki = Uconv.sjistou8("–Ø") -res = (("\xE2\xBF\xB0"+ki+ki).compose) -puts Uconv.u8tosjis(res) diff --git a/tools/t6.rb b/tools/t6.rb deleted file mode 100755 index 1a0a34c..0000000 --- a/tools/t6.rb +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env ruby -$KCODE = 'u' -$LOAD_PATH.unshift("..") -require "chise/char" - -(0x2ff0..0x2ffb).each {|i| - char = Character.get(i) - p [char.name, char] -} diff --git a/tools/t7.rb b/tools/t7.rb deleted file mode 100755 index 0e99c82..0000000 --- a/tools/t7.rb +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env ruby -$KCODE = "u" -$LOAD_PATH.unshift("..") -require "chise/char" -require "chise/kanjilist" - -def atom_list(list) - list.map_character {|char| - char.decompose.char_length == 1 ? char.decompose : "" - } -end - -puts atom_list(KanjiList::JOYO_KANJI_LIST) -puts atom_list(KanjiList::JISX0208_KANJI_LIST) diff --git a/tools/t8.rb b/tools/t8.rb deleted file mode 100755 index 8920735..0000000 --- a/tools/t8.rb +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env ruby -$KCODE = 'u' -$LOAD_PATH << '../lib' -require 'chise' -require 'chise/kanjilist' -include CHISE - -[IDC_LR, IDC_AB, IDC_LMR, IDC_AMB, IDC_FS, IDC_FA, IDC_FB, IDC_FL, IDC_FUL, IDC_FUR, IDC_FLL, IDC_O].each {|idc| - p idc - KanjiList::JOYO_KANJI_LIST.each_character {|char| -# d = char.decompose - d = char.glyph_decompose - p [char, d] if d.include?(idc) - } -} diff --git a/tools/t9.rb b/tools/t9.rb deleted file mode 100755 index 772a305..0000000 --- a/tools/t9.rb +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env ruby -$KCODE = 'u' -$LOAD_PATH << '../lib' -require 'chise' -require 'chise/kanjilist' -include CHISE - -def atom_list(list) - list.map_character {|char| - d = yield(char) - d.char_length == 1 ? d : "" - } -end - -def check_list(list) - d = atom_list(list){|char| char.decompose } - g = atom_list(list){|char| char.glyph_decompose } - da = d.to_a - ga = g.to_a - wa = da & ga - dd = (da - wa).join("") - gg = (ga - wa).join("") - puts "意味で分解できない文字: "+d, "これだけに含まれる文字: "+dd - puts "形で分解できない文字: "+g, "これだけに含まれる文字: "+gg -end - -check_list(KanjiList::JOYO_KANJI_LIST) -check_list(KanjiList::JISX0208_KANJI_LIST)