(G0-3947): Fix `total-strokes'.

[chise/xemacs-chise.git] / src / mule-charset.c
diff --git a/src/mule-charset.c b/src/mule-charset.c

index 24a74c7..5a6fc4c 100644 (file)
--- a/src/mule-charset.c
+++ b/src/mule-charset.c
@@ -1,7 +1,7 @@
  /* Functions to handle multilingual characters.
     Copyright (C) 1992, 1995 Free Software Foundation, Inc.
     Copyright (C) 1995 Sun Microsystems, Inc.
-   Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
+   Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko
  
  This file is part of XEmacs.
  
@@ -77,7 +77,7 @@ Lisp_Object Vcharset_latin_tcvn5712;
  Lisp_Object Vcharset_latin_viscii_lower;
  Lisp_Object Vcharset_latin_viscii_upper;
  Lisp_Object Vcharset_chinese_big5;
-Lisp_Object Vcharset_chinese_big5_cdp;
+/* Lisp_Object Vcharset_chinese_big5_cdp; */
  Lisp_Object Vcharset_ideograph_hanziku_1;
  Lisp_Object Vcharset_ideograph_hanziku_2;
  Lisp_Object Vcharset_ideograph_hanziku_3;
@@ -176,20 +176,6 @@ const Bytecount rep_bytes_by_first_byte[0xA0] =
  
  #ifdef UTF2000
  
-INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
-INLINE_HEADER int
-CHARSET_BYTE_SIZE (Lisp_Charset* cs)
-{
-  /* ad-hoc method for `ascii' */
-  if ((CHARSET_CHARS (cs) == 94) &&
-      (CHARSET_BYTE_OFFSET (cs) != 33))
-    return 128 - CHARSET_BYTE_OFFSET (cs);
-  else
-    return CHARSET_CHARS (cs);
-}
-
-#define XCHARSET_BYTE_SIZE(ccs)        CHARSET_BYTE_SIZE (XCHARSET (ccs))
-
  int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
  int
  decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
@@ -218,58 +204,6 @@ decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
    return 0;
  }
  
-INLINE_HEADER void
-decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
-                           int code_point);
-INLINE_HEADER void
-decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
-                           int code_point)
-{
-  int i = -1;
-
-  while (dim > 0)
-    {
-      Lisp_Object nv;
-
-      dim--;
-      i = ((code_point >> (8 * dim)) & 255) - byte_offset;
-      nv = XVECTOR_DATA(v)[i];
-      if (!VECTORP (nv))
-       break;
-      v = nv;
-    }
-  if (i >= 0)
-    XVECTOR_DATA(v)[i] = Qnil;
-}
-
-INLINE_HEADER void
-decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
-                        int code_point, Lisp_Object character);
-INLINE_HEADER void
-decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
-                        int code_point, Lisp_Object character)
-{
-  int i = -1;
-  Lisp_Object nv;
-  int ccs_len = XVECTOR_LENGTH (v);
-
-  while (dim > 0)
-    {
-      dim--;
-      i = ((code_point >> (8 * dim)) & 255) - byte_offset;
-      nv = XVECTOR_DATA(v)[i];
-      if (dim > 0)
-       {
-         if (!VECTORP (nv))
-           nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
-         v = nv;
-       }
-      else
-       break;
-    }
-  XVECTOR_DATA(v)[i] = character;
-}
-
  Lisp_Object
  put_char_ccs_code_point (Lisp_Object character,
                          Lisp_Object ccs, Lisp_Object value)
@@ -279,9 +213,6 @@ put_char_ccs_code_point (Lisp_Object character,
        || (XCHAR (character) != XINT (value)))
      {
        Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
-      int dim = XCHARSET_DIMENSION (ccs);
-      int ccs_len = XCHARSET_BYTE_SIZE (ccs);
-      int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
        int code_point;
  
        if (CONSP (value))
@@ -331,16 +262,10 @@ put_char_ccs_code_point (Lisp_Object character,
           Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
           if (INTP (cpos))
             {
-             decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
+             decoding_table_remove_char (ccs, XINT (cpos));
             }
         }
-      else
-       {
-         XCHARSET_DECODING_TABLE (ccs)
-           = v = make_vector (ccs_len, Qnil);
-       }
-
-      decoding_table_put_char (v, dim, byte_offset, code_point, character);
+      decoding_table_put_char (ccs, code_point, character);
      }
    return value;
  }
@@ -357,15 +282,12 @@ remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
  
        if (!NILP (cpos))
         {
-         decoding_table_remove_char (decoding_table,
-                                     XCHARSET_DIMENSION (ccs),
-                                     XCHARSET_BYTE_OFFSET (ccs),
-                                     XINT (cpos));
+         decoding_table_remove_char (ccs, XINT (cpos));
         }
      }
    if (CHAR_TABLEP (encoding_table))
      {
-      put_char_id_table (XCHAR_TABLE(encoding_table), character, Qnil);
+      put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
      }
    return Qt;
  }
@@ -428,7 +350,7 @@ Lisp_Object Qascii,
    Qvietnamese_viscii_lower,
    Qvietnamese_viscii_upper,
    Qchinese_big5,
-  Qchinese_big5_cdp,
+  /*  Qchinese_big5_cdp, */
    Qideograph_hanziku_1,
    Qideograph_hanziku_2,
    Qideograph_hanziku_3,
@@ -895,7 +817,7 @@ make_charset (Charset_ID id, Lisp_Object name,
    CHARSET_CCL_PROGRAM  (cs) = Qnil;
    CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
  #ifdef UTF2000
-  CHARSET_DECODING_TABLE(cs) = Qnil;
+  CHARSET_DECODING_TABLE(cs) = Qunbound;
    CHARSET_MIN_CODE     (cs) = min_code;
    CHARSET_MAX_CODE     (cs) = max_code;
    CHARSET_CODE_OFFSET  (cs) = code_offset;
@@ -984,12 +906,44 @@ get_unallocated_leading_byte (int dimension)
  #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
  
  Emchar
+decode_defined_char (Lisp_Object ccs, int code_point)
+{
+  int dim = XCHARSET_DIMENSION (ccs);
+  Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
+  Emchar char_id = -1;
+  Lisp_Object mother;
+
+  while (dim > 0)
+    {
+      dim--;
+      decoding_table
+       = get_ccs_octet_table (decoding_table, ccs,
+                              (code_point >> (dim * 8)) & 255);
+    }
+  if (CHARP (decoding_table))
+    return XCHAR (decoding_table);
+  if (char_id >= 0)
+    return char_id;
+  else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
+    {
+      if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
+       {
+         if ( EQ (mother, Vcharset_ucs) )
+           return DECODE_CHAR (mother, code_point);
+         else
+           return decode_defined_char (mother, code_point);
+       }
+    }
+  return -1;
+}
+
+Emchar
  decode_builtin_char (Lisp_Object charset, int code_point)
  {
    Lisp_Object mother = XCHARSET_MOTHER (charset);
    int final;
  
-  if ( CHARSETP (mother) )
+  if ( CHARSETP (mother) && (XCHARSET_MAX_CODE (charset) > 0) )
      {
        int code = code_point;
  
@@ -1028,8 +982,10 @@ decode_builtin_char (Lisp_Object charset, int code_point)
               + (row - (18 + 32)) * 94
               + cell - 33;
         }
-      return DECODE_CHAR (mother, code + XCHARSET_CODE_OFFSET(charset));
+      return
+       decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
      }
+#if 0
    else if (EQ (charset, Vcharset_chinese_big5))
      {
        int c1 = code_point >> 8;
@@ -1056,6 +1012,7 @@ decode_builtin_char (Lisp_Object charset, int code_point)
           code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
         }
      }
+#endif
    if ((final = XCHARSET_FINAL (charset)) >= '0')
      {
        if (XCHARSET_DIMENSION (charset) == 1)
@@ -1134,7 +1091,8 @@ charset_code_point (Lisp_Object charset, Emchar ch)
         code = charset_code_point (mother, ch);
        else
         code = ch;
-      if ( (min <= code) && (code <= max) )
+      if ( ((max == 0) && CHARSETP (mother)) ||
+          ((min <= code) && (code <= max)) )
         {
           int d = code - XCHARSET_CODE_OFFSET (charset);
  
@@ -1493,6 +1451,7 @@ character set.  Recognized properties are:
                 this character set.
  'dimension     Number of octets used to index a character in this charset.
                 Either 1 or 2.  Defaults to 1.
+               If UTF-2000 feature is enabled, 3 or 4 are also available.
  'columns       Number of columns used to display a character in this charset.
                 Only used in TTY mode. (Under X, the actual width of a
                 character can be derived from the font used to display the
@@ -1501,6 +1460,7 @@ character set.  Recognized properties are:
  'chars         Number of characters in each dimension (94 or 96).
                 Defaults to 94.  Note that if the dimension is 2, the
                 character set thus described is 94x94 or 96x96.
+               If UTF-2000 feature is enabled, 128 or 256 are also available.
  'final         Final byte of ISO 2022 escape sequence.  Must be
                 supplied.  Each combination of (DIMENSION, CHARS) defines a
                 separate namespace for final bytes.  Note that ISO
@@ -1525,6 +1485,13 @@ character set.  Recognized properties are:
                 is passed the octets of the character, with the high
                 bit cleared and set depending upon whether the value
                 of the 'graphic property is 0 or 1.
+'mother                [UTF-2000 only] Base coded-charset.
+'code-min      [UTF-2000 only] Minimum code-point of a base coded-charset.
+'code-max      [UTF-2000 only] Maximum code-point of a base coded-charset.
+'code-offset   [UTF-2000 only] Offset for a code-point of a base
+               coded-charset.
+'conversion    [UTF-2000 only] Conversion for a code-point of a base
+               coded-charset (94x60 or 94x94x60).
  */
         (name, doc_string, props))
  {
@@ -2117,7 +2084,7 @@ If corresponding character is not found, nil is returned.
    if (NILP (defined_only))
      c = DECODE_CHAR (charset, c);
    else
-    c = DECODE_DEFINED_CHAR (charset, c);
+    c = decode_defined_char (charset, c);
    return c >= 0 ? make_char (c) : Qnil;
  }
  
@@ -2518,7 +2485,7 @@ syms_of_mule_charset (void)
    defsymbol (&Qideograph_daikanwa_2,   "ideograph-daikanwa-2");
    defsymbol (&Qideograph_daikanwa,     "ideograph-daikanwa");
    defsymbol (&Qchinese_big5,           "chinese-big5");
-  defsymbol (&Qchinese_big5_cdp,       "chinese-big5-cdp");
+  /*  defsymbol (&Qchinese_big5_cdp,   "chinese-big5-cdp"); */
    defsymbol (&Qideograph_hanziku_1,    "ideograph-hanziku-1");
    defsymbol (&Qideograph_hanziku_2,    "ideograph-hanziku-2");
    defsymbol (&Qideograph_hanziku_3,    "ideograph-hanziku-3");
@@ -2612,7 +2579,7 @@ complex_vars_of_mule_charset (void)
                   build_string ("UCS"),
                   build_string ("ISO/IEC 10646"),
                   build_string (""),
-                 Qnil, 0, 0xFFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
+                 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
    staticpro (&Vcharset_ucs_bmp);
    Vcharset_ucs_bmp =
      make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
@@ -2651,17 +2618,17 @@ complex_vars_of_mule_charset (void)
                   build_string ("UCS for CNS 11643"),
                   build_string ("ISO/IEC 10646 for CNS 11643"),
                   build_string (""),
-                 Qnil, 0, 0, 0, 0,
-                 Qnil, CONVERSION_IDENTICAL);
+                 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
    staticpro (&Vcharset_ucs_jis);
    Vcharset_ucs_jis =
      make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
                   2, 2, 0, CHARSET_LEFT_TO_RIGHT,
                   build_string ("UCS for JIS"),
                   build_string ("UCS for JIS X 0208, 0212 and 0213"),
-                 build_string ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
+                 build_string
+                 ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
                   build_string (""),
-                 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
+                 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
    staticpro (&Vcharset_ucs_ks);
    Vcharset_ucs_ks =
      make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
@@ -2670,7 +2637,7 @@ complex_vars_of_mule_charset (void)
                   build_string ("UCS for CCS defined by KS"),
                   build_string ("ISO/IEC 10646 for Korean Standards"),
                   build_string (""),
-                 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
+                 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
    staticpro (&Vcharset_ucs_big5);
    Vcharset_ucs_big5 =
      make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
@@ -2679,7 +2646,7 @@ complex_vars_of_mule_charset (void)
                   build_string ("UCS for Big5"),
                   build_string ("ISO/IEC 10646 for Big5"),
                   build_string (""),
-                 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
+                 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
  #else
  # define MIN_CHAR_THAI 0
  # define MAX_CHAR_THAI 0
@@ -2955,11 +2922,11 @@ complex_vars_of_mule_charset (void)
                   build_string ("Big5"),
                   build_string ("Big5"),
                   build_string ("Big5 Chinese traditional"),
-                 build_string ("big5"),
+                 build_string ("big5-0"),
                   Qnil,
-                 0 /* MIN_CHAR_BIG5_CDP */,
-                 0 /* MAX_CHAR_BIG5_CDP */, 0, 0,
-                 Qnil, CONVERSION_IDENTICAL);
+                 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
+                 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
+#if 0
    staticpro (&Vcharset_chinese_big5_cdp);
    Vcharset_chinese_big5_cdp =
      make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
@@ -2970,6 +2937,7 @@ complex_vars_of_mule_charset (void)
                   build_string ("big5\\.cdp-0"),
                   Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
                   MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
+#endif
  #define DEF_HANZIKU(n)                                                 \
    staticpro (&Vcharset_ideograph_hanziku_##n);                         \
    Vcharset_ideograph_hanziku_##n =                                     \