using System; using System.Text; using System.Collections; using System.Collections.Generic; using M17N.Core; namespace M17N.Core { #if false public enum MTextFormat { MTEXT_FORMAT_US_ASCII, MTEXT_FORMAT_UTF_8, MTEXT_FORMAT_UTF_16BE, MTEXT_FORMAT_UTF_16LE, MTEXT_FORMAT_UTF_32BE, MTEXT_FORMAT_UTF_32LE, } #endif public class MTextProperty { internal MSymbol key; internal object val; [FlagsAttribute] internal enum Flag : byte { None = 0, FrontSticky = 1, RearSticky = 2, Sensitive = 4 }; internal Flag flags; public MSymbol Key { get { return key; } } public object Val { get { return val; } } public bool FrontSticky { get { return (flags & Flag.FrontSticky) != Flag.None; } } public bool RearSticky { get { return (flags & Flag.RearSticky) != Flag.None; } } public bool Sensitive { get { return (flags & Flag.Sensitive) != Flag.None; } } public MTextProperty (MSymbol key, object val) { this.key = key; this.val = val; flags |= Flag.RearSticky; } public MTextProperty (MSymbol key, object val, bool front_sticky, bool rear_sticky, bool sensitive) { this.key = key; this.val = val; if (front_sticky) flags |= Flag.FrontSticky; if (rear_sticky) flags |= Flag.RearSticky; if (sensitive) flags |= Flag.Sensitive; } public override string ToString () { return key.ToString () + ":" + val; } } public class MText : IEnumerable, IEquatable, IComparable { #if false public enum MTextFormat format; #endif private StringBuilder sb; private int nchars; private int cache_pos; private int cache_idx; private MPlist intervals; private MPlist default_property; private bool read_only; private static UTF8Encoding utf8 = new UTF8Encoding (); private static int count_chars (String str) { int len = str.Length, n = 0; for (int i = 0; i < len; i++, n++) if (surrogate_high_p (str[i])) i++; return n; } private static int count_chars (StringBuilder str) { int len = str.Length, n = 0; for (int i = 0; i < len; i++, n++) if (surrogate_high_p (str[i])) i++; return n; } public MText () { sb = new StringBuilder (); intervals = new MPlist (); } public MText (byte[] str) { sb = new StringBuilder (utf8.GetString (str)); nchars = count_chars (sb); intervals = new MPlist (); } public MText (String str) { sb = new StringBuilder (str); nchars = count_chars (str); intervals = new MPlist (); } public MText (StringBuilder str) { sb = str; nchars = count_chars (str); intervals = new MPlist (); } public static MText operator+ (MText mt1, MText mt2) { MText mt = new MText (); mt.sb.Append (mt1.sb); mt.sb.Append (mt2.sb); mt.nchars = mt1.nchars + mt2.nchars; return mt; } // Public properties public bool ReadOnly { get { return read_only; } } public int Length { get { return nchars; } } // Public methods // for IEnumerable interface public IEnumerator GetEnumerator() { return new MTextEnum (this); } // for IEquatable interface public bool Equals (MText other) { return this.sb.Equals (other.sb); } // for IComparable interface public int CompareTo (MText other) { return this.sb.ToString ().CompareTo (other.sb.ToString ()); } public override String ToString () { return "\"" + sb.ToString () + "\""; } private static bool surrogate_high_p (char c) { return (c >= 0xD800 && c < 0xDC00); } private static bool surrogate_low_p (char c) { return (c >= 0xDC00 && c < 0xE000); } private static int inc_idx (StringBuilder sb, int i) { return (i + (surrogate_high_p (sb[i]) ? 2 : 1)); } private static int dec_idx (StringBuilder sb, int i) { return (i - (surrogate_low_p (sb[i - 1]) ? 2 : 1)); } private static int pos_to_idx (MText mt, int pos) { if (pos == mt.cache_pos) return mt.cache_idx; int p, i; bool forward; if (pos < mt.cache_pos) { if (mt.cache_pos == mt.cache_idx) return mt.cache_idx; if (pos < mt.cache_pos - pos) { p = i = 0; forward = true; } else { p = mt.cache_pos; i = mt.cache_idx; forward = false; } } else { if (mt.nchars - mt.cache_pos == mt.sb.Length - mt.cache_idx) return (mt.cache_idx + pos - mt.cache_pos); if (pos - mt.cache_pos < mt.nchars - pos) { p = mt.cache_pos; i = mt.cache_idx; forward = true; } else { p = mt.nchars; i = mt.sb.Length; forward = false; } } if (forward) for (; p < pos; i = inc_idx (mt.sb, i), p++); else for (; p > pos; i = dec_idx (mt.sb, i), p--); mt.cache_pos = p; mt.cache_idx = i; return i; } private void check_pos (int pos, bool tail_ok) { if (pos < 0 || (tail_ok ? pos > nchars : pos >= nchars)) throw new Exception ("Invalid MText position:" + pos); } private void check_range (int from, int to, bool zero_ok) { if (from < 0 || (zero_ok ? from > to : from >= to) || to > nchars) throw new Exception ("Invalid MText range"); } private void insert (int pos, MText mt2, int from, int to) { check_pos (pos, true); int pos_idx = pos_to_idx (this, pos); int from_idx = pos_to_idx (mt2, from); int to_idx = pos_to_idx (mt2, to); sb.Insert (pos_idx, mt2.sb.ToString (from_idx, to_idx - from_idx)); nchars += to - from; foreach (MPlist plist in mt2.intervals) if (intervals.Find (plist.Key) == null) intervals.Push (plist.Key, new MInterval (plist.Key, this)); foreach (MPlist plist in intervals) { MPlist p = mt2.intervals.Find (plist.Key); MInterval interval; if (p.IsEmpty) interval = new MInterval (plist.Key, to - from); else interval = ((MInterval) p.Val).copy (from, to); ((MInterval) plist.Val).Insert (pos, interval); } } public int this[int i] { set { i = pos_to_idx (this, i); if (value < 0x10000) { if (surrogate_high_p (sb[i])) sb.Remove (i, 1); sb[i] = (char) value; } else { char high = (char) (0xD800 + ((value - 0x10000) >> 10)); char low = (char) (0xDC00 + ((value - 0x10000) & 0x3FF)); if (! surrogate_high_p (sb[i])) sb.Insert (i, 0); sb[i] = high; sb[i + 1] = low; } } get { i = pos_to_idx (this, i); return (surrogate_high_p (sb[i]) ? ((sb[i] - 0xD800) << 10) + (sb[i + 1] - 0xDC00) + 0x10000 : sb[i]); } } public MText Dup () { return (new MText (sb.ToString ())); } public MText Ins (int pos, MText mt) { insert (pos, mt, 0, mt.nchars); return this; } public MText Ins (int pos, MText mt, int from, int to) { insert (pos, mt, from, to); return this; } public MText Del (int from, int to) { check_range (from, to, true); sb.Remove (from, pos_to_idx (this, to) - pos_to_idx (this, from)); nchars -= to - from; if (nchars > 0) foreach (MPlist plist in intervals) ((MInterval) plist.Val).Delete (from, to); else intervals = new MPlist (); return this; } public object GetProp (int pos, MSymbol key) { check_pos (pos, false); MInterval i = (MInterval) intervals.Find (key).Val; if (i == null) return null; MTextProperty prop = i.Get (pos); return (prop != null ? prop.Val : null); } public object GetProp (int pos, MSymbol key, out MTextProperty prop) { check_pos (pos, false); MInterval i = (MInterval) intervals.Find (key).Val; if (i == null) return (prop = null); prop = i.Get (pos); return (prop != null ? prop.Val : null); } public object GetProp (int pos, MSymbol key, out MTextProperty[] array) { check_pos (pos, false); MInterval i = (MInterval) intervals.Find (key).Val; if (i == null) return (array = null); MTextProperty prop = i.Get (pos, out array); return (prop != null ? prop.Val : null); } public void PushProp (int from, int to, MSymbol key, object val) { check_range (from, to, false); PushProp (from, to, new MTextProperty (key, val)); } public void PushProp (int from, int to, MTextProperty prop) { if (from < 0) { if (default_property == null) default_property = new MPlist (); default_property.Push (prop.key, prop.val); } else { MInterval root = (MInterval) intervals.Find (prop.key).Val; if (root == null) { root = new MInterval (prop.key, this); intervals.Push (prop.key, root); } root.Push (from, to, prop); } } public void DumpProp () { Console.Write ("("); foreach (MPlist p in intervals) ((MInterval) p.Val).Dump (); Console.WriteLine (")"); } private class MInterval { // position: 0 1 2 3 4 5 6 7 // | A | B | C | D | E F | G | // interval |---|---|---|<->|-------|---| // |---|<->|---| |<----->|---| // |<->| |<->| |<->| // // [7 (3 4)] // [3 (1 2)] [3 (4 6)] // [1 (0 1)] [2 (2 3)] [1 (6 7)] // private static int count = 0; private int id; private int total_length; private int from, to; private MSymbol key; private MPlist stack; private MInterval left, right, parent; private MText mtext; public MInterval (MSymbol key, int length) { if (length <= 0) throw new Exception ("Invalid interval length"); this.key = key; total_length = length; stack = new MPlist (); id = count++; } public MInterval (MSymbol key, MText mt) { this.key = key; mtext = mt; total_length = mt.sb.Length; from = 0; to = total_length; stack = new MPlist (); id = count++; } public MTextProperty Get (int pos) { MInterval i = find (pos); return (i.stack.IsEmpty ? null : (MTextProperty) i.stack.Val); } public MTextProperty Get (int pos, out MTextProperty[] array) { MInterval i = find (pos); if (i.stack.IsEmpty) { array = null; return null; } array = new MTextProperty[i.stack.Count]; int idx; MPlist p; for (idx = 0, p = i.stack; ! p.IsEmpty; idx++, p = p.Next) array[idx] = (MTextProperty) p.Val; return array[idx - 1]; } private MInterval (MSymbol key, int length, MPlist stack) { this.key = key; total_length = length; from = 0; to = total_length; this.stack = stack.Clone (); id = count++; } private void update_from_to () { if (parent == null) { from = LeftLength; to = total_length - RightLength; } else if (parent.left == this) { from = parent.from - total_length + LeftLength; to = parent.from - RightLength; } else { from = parent.to + LeftLength; to = parent.to + total_length - RightLength; } } private int LeftLength { get { return (left == null ? 0 : left.total_length); } } private int RightLength { get { return (right == null ? 0 : right.total_length); } } private MInterval left_most_node { get { return (left == null ? this : left.left_most_node); } } private MInterval right_most_node { get { return (right == null ? this : right.right_most_node); } } private MInterval prev { get { MInterval i; if (left != null) for (i = left; i.right != null; i = i.right); else for (i = parent; i != null && i.left == null; i = i.parent); return i; } } private MInterval next { get { MInterval i; if (right != null) for (i = right; i.left != null; i = i.left); else for (i = parent; i != null && i.right == null; i = i.parent); return i; } } private MInterval find (int pos) { update_from_to (); if (pos < from) return left.find (pos); if (pos >= to) return right.find (pos); return this; } // p-. or .-p p-. or .-p // .-this-. .-right-. // left .-right-. -> .-this-. c2 // c1 c2 left c1 private MInterval promote_right () { int right_length = right.total_length; MInterval c1; if (parent == null) mtext.intervals.Put (key, right); else if (parent.left == this) parent.left = right; else parent.right = right; right.parent = parent; c1 = right.left; right.left = this; parent = right; right = c1; parent.total_length += total_length; total_length -= right_length; if (c1 != null) { c1.parent = this; parent.total_length -= c1.total_length; total_length += c1.total_length; } return parent; } // p-. or .-p p-. or .-p // .-this-. .-left-. // .-left-. .-right-. -> c1 .-this-. // c1 c2 c2 right private MInterval promote_left () { int left_length = left.total_length; MInterval c1; if (parent == null) mtext.intervals.Put (key, left); else if (parent.left == this) parent.left = left; else parent.right = left; left.parent = parent; c1 = left.left; left.right = this; parent = left; left = c1; parent.total_length += total_length; total_length -= left_length; if (c1 != null) { c1.parent = this; parent.total_length -= c1.total_length; total_length += c1.total_length; } return parent; } private MInterval balance () { MInterval i = this; while (true) { // .-this-. // .-left-. .-right-. // c1 c2 c3 c4 int diff = i.LeftLength - i.RightLength; int new_diff; if (diff > 0) { new_diff = (i.total_length - i.LeftLength + i.left.RightLength - i.left.LeftLength); if (Math.Abs (new_diff) >= diff) break; i = i.promote_left (); i.right.balance (); } else if (diff < 0) { new_diff = (i.total_length - i.RightLength + i.right.LeftLength - i.right.RightLength); if (Math.Abs (new_diff) >= diff) break; i = i.promote_right (); i.left.balance (); } } return i; } public MInterval copy (int start, int end) { MInterval this_copy, left_copy = null, right_copy = null; update_from_to (); if (start < from) { if (end <= from) return left.copy (start, end); left_copy = left.copy (start, from); } else if (end > to) { if (start >= to) return right.copy (start, end); right_copy = right.copy (to, end); } this_copy = new MInterval (key, end - start, stack); this_copy.left = left_copy; this_copy.right = right_copy; return this_copy; } // this-. ==> this-. // right newright-. // right private MInterval divide_right (int pos) { MInterval interval = new MInterval (key, to - pos, stack); Console.Write ("divide-right({0}) at ", pos); DumpOne (false, true); to = pos; if (right != null) { interval.right = right; right.parent = interval; interval.total_length += right.total_length; } interval.parent = this; right = interval; return interval; } // .-this ==> .-this // left .-newleft // left private MInterval divide_left (int pos) { MInterval interval = new MInterval (key, pos - from, stack); Console.Write ("divide-reft({0}) at ", pos); DumpOne (false, true); from = pos; if (left != null) { interval.left = left; left.parent = interval; interval.total_length += left.total_length; } interval.parent = this; left = interval; return interval; } private void remove_properties (MTextProperty.Flag flags) { for (MPlist p = stack; ! p.IsEmpty;) { MTextProperty prop = (MTextProperty) p.Val; if ((prop.flags & flags) == flags) p.Pop (); else p = p.Next; } } private void merge_front_properties (MPlist plist) { for (MInterval i = left_most_node; i != null; i = i.next) { if (! stack.IsEmpty) break; for (MPlist p = plist; ! p.IsEmpty; p = p.Next) { MTextProperty prop = (MTextProperty) p.Val; if ((prop.flags & MTextProperty.Flag.RearSticky) == MTextProperty.Flag.RearSticky) i.stack.Push (prop.key, prop); } } } private void merge_rear_properties (MPlist plist) { for (MInterval i = right_most_node; i != null; i = i.prev) { if (! stack.IsEmpty) break; for (MPlist p = plist; ! p.IsEmpty; p = p.Next) { MTextProperty prop = (MTextProperty) p.Val; if ((prop.flags & MTextProperty.Flag.FrontSticky) == MTextProperty.Flag.FrontSticky) i.stack.Push (prop.key, prop); } } } public void Insert (int pos, MInterval interval) { update_from_to (); Console.Write ("insert({0}) at ", pos); DumpOne (false, true); if (pos < from || (pos == from && left == null && pos > 0)) { prev.Insert (pos, interval); return; } if (pos > to || (pos == to && right == null && next != null)) { next.Insert (pos, interval); return; } if (pos > from && pos < to) { remove_properties (MTextProperty.Flag.Sensitive); divide_right (pos).Insert (pos, interval); return; } if (pos == from) { if (pos > 0) { prev.remove_properties (MTextProperty.Flag.Sensitive|MTextProperty.Flag.RearSticky); interval.merge_front_properties (prev.stack); } remove_properties (MTextProperty.Flag.Sensitive|MTextProperty.Flag.FrontSticky); interval.merge_rear_properties (stack); // INTERVAL is ready to insert. // // .-this-. ==> .-this-. // left-. left-. // child .-interval // child if (pos > 0) { MInterval i = left.right_most_node; i.left = interval; interval.parent = i; for (; i != null; i = i.parent) i.total_length += interval.total_length; } else { left = interval; for (MInterval i = this; i != null; i = i.parent) i.total_length += interval.total_length; } } else // pos == to { if (right != null) { MInterval left_most = right.left_most_node; left_most.remove_properties (MTextProperty.Flag.Sensitive|MTextProperty.Flag.FrontSticky); interval.merge_rear_properties (left_most.stack); } remove_properties (MTextProperty.Flag.Sensitive|MTextProperty.Flag.RearSticky); interval.merge_front_properties (stack); // INTERVAL is ready to insert. // // .-this-. ==> .-this-. // .-right .-right // child interval-. // child if (right != null) { MInterval i = right.left_most_node; i.left = interval; interval.parent = i; for (; i != null; i = i.parent) i.total_length += interval.total_length; } else { right = interval; for (MInterval i = this; i != null; i = i.parent) i.total_length += interval.total_length; } } } private void vacate_node (MInterval interval) { Console.WriteLine ("vacate #{0} to #{1}", id, interval.id); if (interval != null) interval.parent = parent; if (parent == null) { mtext.intervals.Put (key, interval); } else { if (this == parent.right) parent.right = interval; else parent.left = interval; int diff = total_length; if (interval != null) diff -= interval.total_length; for (MInterval i = parent; i != null; i = i.parent) i.total_length -= diff; } } public void Delete (int start, int end) { update_from_to (); Console.Write ("delete({0} {1}) at ", start, end); DumpOne (false, true); if (start < from) { if (end <= from) { left.Delete (start, end); return; } left.Delete (start, from); to -= from - start; end -= from - start; from = start; } if (end > to) { if (start >= to) { right.Delete (start, end); return; } right.Delete (to, end); end = to; } if (start == from && end == to) { if (right == null) { vacate_node (left); } else { if (left != null) { MInterval i; for (i = right; i.left != null; i = i.left) i.total_length += left.total_length; i.total_length += left.total_length; i.left = left; left.parent = i; } vacate_node (right); } } else { int len = end - start; for (MInterval i = this; i != null; i = i.parent) i.total_length -= len; } } public void Push (int start, int end, MTextProperty prop) { update_from_to (); Console.Write ("push({0} {1}) at ", start, end); DumpOne (false, true); if (start < from) { if (end <= from) { left.Push (start, end, prop); return; } left.Push (start, from, prop); start = from; } if (end > to) { if (start >= to) { right.Push (start, end, prop); return; } right.Push (to, end, prop); end = to; } if (start > from) divide_left (start); if (end < to) divide_right (end); stack.Push (prop.key, prop); } private void DumpOne (bool with_prop, bool newline) { Console.Write ("#{0}({1} {2} {3}", id, total_length, from, to); if (with_prop) foreach (MPlist p in stack) Console.Write (" " + p.Val); Console.Write (")"); if (newline) Console.WriteLine (); } public void Dump () { update_from_to (); if (left != null) left.Dump (); if (from > 0) Console.Write (" "); DumpOne (true, false); if (right != null) right.Dump (); } } private class MTextEnum : IEnumerator { private MText mt; private int pos = -1; public MTextEnum (MText mt) { this.mt = mt; } public bool MoveNext () { pos++; return (pos < mt.nchars); } public void Reset () { pos = -1; } public object Current { get { //if (pos < 0 || pos >= mt.nchars) //throw new InvalidOperationException (); return mt[pos]; } } } } }