From 588a5e863bc585dea37d996a0c1bd3ced8c6dd69 Mon Sep 17 00:00:00 2001 From: handa Date: Thu, 16 Jul 2009 11:41:07 +0000 Subject: [PATCH] *** empty log message *** --- MCharTable.cs | 122 +++++++++++++- MDatabase.cs | 28 ++-- MPlist.cs | 495 +++++++++++++++++++++++++++++++++++---------------------- MSymbol.cs | 1 + MText.cs | 7 + chartab.cs | 15 +- database.cs | 17 +- temp.plist | 8 +- 8 files changed, 466 insertions(+), 227 deletions(-) diff --git a/MCharTable.cs b/MCharTable.cs index 39e0714..1d9adc7 100644 --- a/MCharTable.cs +++ b/MCharTable.cs @@ -1,6 +1,7 @@ using System; using System.Collections; using System.Collections.Generic; +using System.IO; using M17N; using M17N.Core; @@ -78,8 +79,11 @@ namespace M17N.Core public override string ToString () { - return String.Format ("[U+{0:X}..U+{1:X} {2}]", from, to, - value == null ? "null" : value); + return ((from == to) + ? String.Format ("[U+{0:X} {1}]", from, + value == null ? "null" : value) + : String.Format ("[U+{0:X}..U+{1:X} {2}]", from, to, + value == null ? "null" : value)); } } @@ -176,8 +180,14 @@ namespace M17N.Core { set { MCharRange.CheckChar (from); - MCharRange.CheckChar (to); - set_range (from, to, value); + + if (from == to) + Set (from, value); + else + { + MCharRange.CheckChar (to); + set_range (from, to, value); + } } } @@ -290,4 +300,108 @@ namespace M17N.Core public void Dispose () {} } } + + public class MCharProp : MCharTable + { + private static Dictionary char_prop + = new Dictionary (); + + public static void Define (MSymbol prop, MDatabase mdb) + { + char_prop[prop] = mdb; + } + + public MCharProp (MSymbol prop) + { + MDatabase mdb; + + if (! char_prop.TryGetValue (prop, out mdb)) + throw new Exception ("Undefined character property: " + prop); + mdb.Load (this); + } + } + + public partial class MDatabase : IComparable + { + private bool read_range (MStreamReader mst, out int from, out int to) + { + if (! mst.ReadInteger (out from)) + { + to = from; + return false; + } + to = mst.Read (); + if (to < 0) + return false; + if (to != '-') + { + to = from; + return true; + } + return mst.ReadInteger (out to); + } + + private MCharTable load_char_table (MCharTable table) + { + MSymbol type = tag[1]; + + using (FileStream stream = FileInfo.OpenRead ()) + { + MStreamReader mst = new MStreamReader (stream, ';', true); + int c, from, to; + + while ((c = mst.Peek ()) >= 0) + { + if (c != '#' + && read_range (mst, out from, out to) + && mst.SkipSpace (out c)) + { + object value = null; + + if (type == MSymbol.integer) + { + int i; + if (mst.ReadInteger (out i)) + value = i; + } + else if (type == MSymbol.symbol) + { + MSymbol sym; + if (mst.ReadSymbol (out sym, -1)) + value = sym; + } + else if (type == MSymbol.mtext) + { + MText mt; + if (mst.ReadMText (out mt)) + value = mt; + } + else if (type == MSymbol.plist) + { + value = new MPlist (mst); + } + else if (type == MSymbol.mstring) + { + string str; + if (mst.ReadString (out str)) + value = str; + } + if (value != null) + table[from, to] = value; + } + mst.ForwardLine (); + } + } + return table; + } + + public object Load (MCharTable table) + { + if (loader != null || Info.Format != Mchar_table) + throw new ArgumentException ("Not a database of CharTable type"); + if (! update_status ()) + throw new Exception ("Database invalid"); + return load_char_table (table); + } + } } diff --git a/MDatabase.cs b/MDatabase.cs index 32de922..e6cbd8b 100644 --- a/MDatabase.cs +++ b/MDatabase.cs @@ -137,7 +137,7 @@ namespace M17N.Core } } - public class MDatabase : IComparable + public partial class MDatabase : IComparable { /// Identifier of a MDatabase. public struct Tag : IEquatable @@ -616,14 +616,16 @@ namespace M17N.Core this.tag = tag; this.Info = info; DBType = this.tag.HasWildcard ? MDBType.WILDCARD : MDBType.AUTO; - if (this.tag[0] == Mchar_table || this.tag[0] == Mcharset) - Info.Format = this.tag[0]; + if (tag[0] == Mchar_table || tag[0] == Mcharset) + Info.Format = tag[0]; ListIndex = list_idx; DirIndex = dir_idx; if (Path.IsPathRooted (Info.Filename)) DBStatus = MDBStatus.READY; else DBStatus = MDBStatus.NOT_READY; + if (Info.Format == Mchar_table) + MCharProp.Define (tag[2], this); } public override String ToString () { @@ -867,11 +869,12 @@ namespace M17N.Core { if (loader != null) return loader (tag, ExtraInfo); - if (Info.Format == Mchar_table) - return load_char_table (); + throw new Exception ("Use Load (MCharTable) to load this database"); if (Info.Format == Mcharset) - return load_charset (); + throw new Exception ("Use Load (MCharset) to load this database"); + if (! update_status ()) + throw new Exception ("Database invalid"); MPlist plist = null; using (FileStream stream = File.OpenRead (FileInfo.FullName)) @@ -884,23 +887,14 @@ namespace M17N.Core if (loader != null || Info.Format != MSymbol.plist) throw new ArgumentException ("Key can't be specified for loading this database"); - + if (! update_status ()) + throw new Exception ("Database invalid"); MPlist plist = null; using (FileStream stream = File.OpenRead (FileInfo.FullName)) plist = new MPlist (stream, key, stop); return plist; } - private object load_charset () - { - return null; - } - - private object load_char_table () - { - return null; - } - /// Return a list of currently available database /// directory names. public static string[] DirectoryList () diff --git a/MPlist.cs b/MPlist.cs index 3c9b140..480c9d2 100644 --- a/MPlist.cs +++ b/MPlist.cs @@ -80,7 +80,7 @@ namespace M17N.Core } } - private MPlist (MStreamReader reader) + internal MPlist (MStreamReader reader) { MSymbol key; object val; @@ -363,220 +363,331 @@ namespace M17N.Core return (! current.IsEmpty); } } + } - private class MStreamReader : StreamReader - { - private static char[] escaped_char = new char[128]; - private static int[] hexadecimal = new int[128]; - - public MStreamReader (Stream stream) : base (stream) - { - } - - static MStreamReader () - { - for (int i = 0; i < 128; i++) - escaped_char[i] = (char) i; - escaped_char['e'] = (char) 27; - escaped_char['b'] = '\b'; - escaped_char['f'] = '\f'; - escaped_char['n'] = '\n'; - escaped_char['r'] = '\r'; - escaped_char['t'] = '\t'; - escaped_char['\\'] = '\\'; - for (int i = 0; i < 128; i++) - hexadecimal[i] = -1; - for (int i = '0'; i <= '9'; i++) - hexadecimal[i] = i - '0'; - for (int i = 'A'; i <= 'F'; i++) - hexadecimal[i] = hexadecimal[i + 'a' - 'A'] = i -'A' + 10; - } + public class MStreamReader : StreamReader + { + private static char[] escaped_char = new char[128]; + private static int[] hexadecimal = new int[128]; + private char comment_start; + private bool line_oriented; - private int PeekChar () + public MStreamReader (Stream stream) : base (stream) { - bool comment = false; - int c; - - while ((c = Peek ()) != -1) - { - if (comment) - { - if ((c = Read ()) == '\n') - comment = false; - } - else - { - if (c == ';') - comment = true; - else if (c != ' ' && c != '\t' && c != '\n') - return c; - Read (); - } - } - return c; + comment_start = ';'; + line_oriented = false; } - private int ReadHexadecimal () + public MStreamReader (Stream stream, char comment_start, + bool line_oriented) : base (stream) { - int i = 0, c; - - while ((c = Peek ()) >= 0 && c < 128 && (c = hexadecimal[c]) >= 0) - { - Read (); - i = (i * 16) + c; - } - return i; + this.comment_start = comment_start; + this.line_oriented = line_oriented; } - private int ReadInteger () + static MStreamReader () { - int i = 0, c; - - while ((c = Peek ()) >= '0' && c <= '9') - i = (i * 10) + (Read () - '0'); - return i; + for (int i = 0; i < 128; i++) + escaped_char[i] = (char) i; + escaped_char['0'] = (char) 0; + escaped_char['e'] = (char) 27; + escaped_char['a'] = '\a'; + escaped_char['b'] = '\b'; + escaped_char['f'] = '\f'; + escaped_char['n'] = '\n'; + escaped_char['r'] = '\r'; + escaped_char['t'] = '\t'; + escaped_char['v'] = '\v'; + for (int i = 0; i < 128; i++) + hexadecimal[i] = -1; + for (int i = '0'; i <= '9'; i++) + hexadecimal[i] = i - '0'; + for (int i = 'A'; i <= 'F'; i++) + hexadecimal[i] = hexadecimal[i + 'a' - 'A'] = i -'A' + 10; } - private int ReadChar () - { - int c = Read (); + private int ReadHexadecimal (int max) + { + int i = 0, c; - if (c == '\\') - { - c = Read (); - if (c == -1) - return -1; - if (c == 'x' || c == 'u') - return ReadHexadecimal (); - if (c < 128) - c = escaped_char[c]; - } - return c; - } + while ((c = Peek ()) >= 0 && c < 128 && (c = hexadecimal[c]) >= 0) + { + if (max >= 0 && (i * 16) + c >= max) + break; + Read (); + i = (i * 16) + c; + } + return i; + } - private MText ReadMtext () - { - MText mt = new MText (); - int c; + public bool ForwardLine () + { + int c; + while ((c = Read ()) >=0 && c != '\n'); + return (c == '\n'); + } - while ((c = Peek ()) != -1 && c != '"') - { - if (c == '\\') - { - c = ReadChar (); - if (Peek () == '\n') - { - ReadChar (); - continue; - } - if (c == -1) - { - mt.Cat ('\\'); - break; - } - mt.Cat (c); - } - else - mt.Cat (Read ()); - } - if (c == '"') + public bool SkipSpace (out int c) + { + while ((c = Peek ()) == ' ' && c == '\t' && c == '\f') + Read (); + return (c >= 0); + } + + public bool PeekChar (out int c) + { + while ((c = Peek ()) != -1) + { + if (c == comment_start) + ForwardLine (); + else if (c != ' ' && c != '\t' && c != '\n') + return true; + else if (c == '\n' && line_oriented) + return false; + else + Read (); + } + return false; + } + + public bool ReadInteger (out int i) + { + int c = Peek (); + + i = 0; + if (c < 0) + return false; + if (c == '0') + { Read (); - return mt; - } + c = Peek (); + if (c == 'x') + { + Read (); + i = ReadHexadecimal (-1); + return true; + } + } + while ((c = Peek ()) >= '0' && c <= '9') + i = (i * 10) + (Read () - '0'); + return true; + } - private string ReadSymbolName () - { - int c = Peek (); + public bool ReadChar (out int c) + { + c = Read (); + if (c < 0 || (line_oriented && c == '\n')) + return false; + if (c == '\\') + { + c = Read (); + if (c == '\n') + return ReadChar (out c); + if (c < 0) + c = '\\'; + else if (c == 'x' || c == 'u' || c == 'U') + c = ReadHexadecimal (0x10FFFF); + else if (c < 128) + c = escaped_char[c]; + } + return true; + } - if (c == -1 || c == '(' || c == ')' || c == ' ' || c == '\n' || c == '"') - return ""; + private bool read_string (out string str, int prefix, bool for_symbol) + { + char[] buf = new char[256]; + int c; + int i = 0; + + str = null; + if (prefix >= 0) + buf[i++] = (char) prefix; + while ((c = Peek ()) >= 0 + && c != '\"' + && (! for_symbol + || (c != '(' && c != ')' && c != ' ' && c != '\t' && c != '\n'))) + { + if (! ReadChar (out c)) + break; + if (c < 0x10000) + { + buf[i++] = (char) c; + } + else + { + buf[i++] = (char) (0xD800 + ((c - 0x10000) >> 10)); + buf[i++] = (char) (0xDC00 + ((c - 0x10000) & 0x3FF)); + } + if (i >= 255) + { + if (str == null) + str = new string (buf, 0, i); + else + str += new string (buf, 0, i); + i = 0; + } + } + if (c == '\"' && ! for_symbol) Read (); - if (c == '\\') - { - c = Read (); - if (c == -1) - c = '\\'; - } - return (char) c + ReadSymbolName (); - } + if (i > 0) + { + if (str == null) + str = new string (buf, 0, i); + else + str += new string (buf, 0, i); + } + return (str != null); + } - public bool ReadElement (out MSymbol key, out object val) - { - int c = PeekChar (); + public bool ReadString (out string str) + { + return read_string (out str, -1, false); + } - if (c == '(') - { - Read (); - val = new MPlist (this); - key = MSymbol.plist; - } - else if (c == '"') - { - Read (); - val = ReadMtext (); - key = MSymbol.mtext; - } - else if (c >= '0' && c <= '9') - { - int i = ReadInteger (); + public bool ReadMText (out MText mt) + { + int c = Peek (); - val = i; - key = MSymbol.integer; - } - else if (c == '-') - { - Read (); - c = Peek (); - if (c >= '0' && c <= '9') - { - int i = ReadInteger (); - val = - i; - key = MSymbol.integer; - } - else - { - string str = ReadSymbolName (); + if (c == '"') + { + string str; - val = MSymbol.Of ("-" + str); - key = MSymbol.symbol; - } - } - else if (c == '?') - { - Read (); - val = ReadChar (); - key = MSymbol.integer; - } - else if (c == '#') - { - Read (); - if ((c = Peek ()) == 'x' || c == 'u') - { - Read (); - val = ReadHexadecimal (); - key = MSymbol.integer; - } - else - { - val = MSymbol.Of ("#" + (char) c + ReadSymbolName ()); - key = MSymbol.symbol; - } - } - else if (c == -1 || c == ')') - { - if (c == ')') + Read (); + if (read_string (out str, -1, false)) + mt = new MText (str); + else + mt = new MText (); + return true; + } + mt = new MText (); + if (c == '\\') + { + while ((c = Peek ()) == '\\') + { Read (); - val = null; - key = MSymbol.nil; - return false; - } - else - { - val = MSymbol.Of (ReadSymbolName ()); - key = MSymbol.symbol; - } - return true; - } + c = Peek (); + if (c != 'x') + break; + Read (); + mt.Cat (ReadHexadecimal (0x10FFFF)); + } + return true; + } + return false; + } + + public bool ReadSymbol (out MSymbol sym, int prefix) + { + string str; + + if (read_string (out str, prefix, true)) + { + sym = MSymbol.Of (str); + return true; + } + sym = MSymbol.nil; + return false; + } + + internal bool ReadElement (out MSymbol key, out object val) + { + int c; + + if (! PeekChar (out c)) + { + val = null; + key = MSymbol.nil; + return false; + } + + if (c == '(') + { + Read (); + val = new MPlist (this); + key = MSymbol.plist; + } + else if (c == '"' || c == '\\') + { + MText mt; + ReadMText (out mt); + val = mt; + key = MSymbol.mtext; + } + else if (c >= '0' && c <= '9') + { + int i; + ReadInteger (out i); + val = i; + key = MSymbol.integer; + } + else if (c == '-') + { + Read (); + c = Peek (); + if (c >= '0' && c <= '9') + { + int i; + ReadInteger (out i); + val = - i; + key = MSymbol.integer; + } + else + { + MSymbol sym; + + ReadSymbol (out sym, '-'); + val = sym; + key = MSymbol.symbol; + } + } + else if (c == '?') + { + Read (); + if (ReadChar (out c)) + { + val = c; + key = MSymbol.integer; + } + else + { + val = null; + key = MSymbol.nil; + } + } + else if (c == '#') + { + Read (); + if ((c = Peek ()) == 'x' || c == 'u') + { + Read (); + val = ReadHexadecimal (-1); + key = MSymbol.integer; + } + else + { + MSymbol sym; + + ReadSymbol (out sym, '#'); + val = sym; + key = MSymbol.symbol; + } + } + else if (c == ')') + { + Read (); + val = null; + key = MSymbol.nil; + return false; + } + else + { + MSymbol sym; + + ReadSymbol (out sym, -1); + val = sym; + key = MSymbol.symbol; + } + return true; } } } diff --git a/MSymbol.cs b/MSymbol.cs index 0fbadab..39f9958 100644 --- a/MSymbol.cs +++ b/MSymbol.cs @@ -17,6 +17,7 @@ namespace M17N.Core public static MSymbol t = MSymbol.Of ("t"); public static MSymbol symbol = MSymbol.Of ("symbol"); public static MSymbol mtext = MSymbol.Of ("mtext"); + public static MSymbol mstring = MSymbol.Of ("string"); public static MSymbol plist = MSymbol.Of ("plist"); public static MSymbol integer = MSymbol.Of ("integer"); diff --git a/MText.cs b/MText.cs index 7f6de04..05e6fcf 100644 --- a/MText.cs +++ b/MText.cs @@ -152,6 +152,13 @@ namespace M17N.Core intervals = new MPlist (); } + public MText (byte[] str, int offset, int length) + { + sb = new StringBuilder (utf8.GetString (str, offset, length)); + nchars = count_chars (sb); + intervals = new MPlist (); + } + public MText (String str) { sb = new StringBuilder (str); diff --git a/chartab.cs b/chartab.cs index 02c7485..7262d93 100644 --- a/chartab.cs +++ b/chartab.cs @@ -30,7 +30,20 @@ public class Test Console.WriteLine ("# Using MCharRange from U+3000 to U+0"); MCharRange r = new MCharRange (0x3000, tbl); do { - Console.WriteLine (r); + if (r.Value != null) + Console.WriteLine (r); } while (r.Prev ()); + + Console.WriteLine ("# Using MCharProp of BIDI"); + MDatabase.ApplicationDir = "/usr/local/share/m17n"; + MCharProp bidi = new MCharProp (MSymbol.Of ("bidirectional-category")); + r = new MCharRange (0, bidi); + while (r.To < 0x800) + { + if (r.Value != null) + Console.WriteLine (r); + r.Next (); + } + Console.WriteLine ("..."); } } diff --git a/database.cs b/database.cs index ccd5127..5247bb5 100644 --- a/database.cs +++ b/database.cs @@ -10,20 +10,15 @@ public class Test { //M17n.debug = true; MDatabase.ApplicationDir = "/usr/local/share/m17n"; - MDatabase.Tag tag = new MDatabase.Tag (MSymbol.Of ("input-method"), - MSymbol.t, - MSymbol.Of ("unicode")); + MDatabase.Tag tag + = new MDatabase.Tag (MSymbol.Of ("standard"), + MSymbol.Of ("script"), + MSymbol.Of ("unicode")); foreach (string dirname in MDatabase.DirectoryList ()) Console.WriteLine (dirname); - while (true) - { - Console.Write ("mdb> "); - string str = Console.ReadLine (); - if (str == null || str == "quit" || str == "exit") - break; - Console.WriteLine (MDatabase.Find (tag)); - } + MDatabase mdb = MDatabase.Find (tag); + Console.WriteLine (mdb.Load ()); } } diff --git a/temp.plist b/temp.plist index 3689780..d1d9645 100644 --- a/temp.plist +++ b/temp.plist @@ -1,4 +1,8 @@ "\xC0" -(a 123) +(a 123 0x123) -10 -?\xC0 \ No newline at end of file +?\xC0 +"\ +continuous line" +"漢字𐀀\x1000000k" +\x6f22\x1000000k -- 1.7.10.4