Sync up with XEmacs 21.4.17.
[chise/xemacs-chise.git.1] / src / mule-charset.c
index 6c07d63..e32fcf2 100644 (file)
@@ -176,14 +176,101 @@ decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
   return 0;
 }
 
+void
+decoding_table_put_char (Lisp_Object ccs,
+                        int code_point, Lisp_Object character)
+{
+#if 1
+  Lisp_Object table1 = XCHARSET_DECODING_TABLE (ccs);
+  int dim = XCHARSET_DIMENSION (ccs);
+
+  if (dim == 1)
+    XCHARSET_DECODING_TABLE (ccs)
+      = put_ccs_octet_table (table1, ccs, code_point, character);
+  else if (dim == 2)
+    {
+      Lisp_Object table2
+       = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 8));
+
+      table2 = put_ccs_octet_table (table2, ccs,
+                                   (unsigned char)code_point, character);
+      XCHARSET_DECODING_TABLE (ccs)
+       = put_ccs_octet_table (table1, ccs,
+                              (unsigned char)(code_point >> 8), table2);
+    }
+  else if (dim == 3)
+    {
+      Lisp_Object table2
+       = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 16));
+      Lisp_Object table3
+       = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >>  8));
+
+      table3 = put_ccs_octet_table (table3, ccs,
+                                   (unsigned char)code_point, character);
+      table2 = put_ccs_octet_table (table2, ccs,
+                                   (unsigned char)(code_point >> 8), table3);
+      XCHARSET_DECODING_TABLE (ccs)
+       = put_ccs_octet_table (table1, ccs,
+                              (unsigned char)(code_point >> 16), table2);
+    }
+  else /* if (dim == 4) */
+    {
+      Lisp_Object table2
+       = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 24));
+      Lisp_Object table3
+       = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 16));
+      Lisp_Object table4
+       = get_ccs_octet_table (table3, ccs, (unsigned char)(code_point >>  8));
+
+      table4 = put_ccs_octet_table (table4, ccs,
+                                   (unsigned char)code_point, character);
+      table3 = put_ccs_octet_table (table3, ccs,
+                                   (unsigned char)(code_point >>  8), table4);
+      table2 = put_ccs_octet_table (table2, ccs,
+                                   (unsigned char)(code_point >> 16), table3);
+      XCHARSET_DECODING_TABLE (ccs)
+       = put_ccs_octet_table (table1, ccs,
+                              (unsigned char)(code_point >> 24), table2);
+    }
+#else
+  Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
+  int dim = XCHARSET_DIMENSION (ccs);
+  int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
+  int i = -1;
+  Lisp_Object nv;
+  int ccs_len = XVECTOR_LENGTH (v);
+
+  while (dim > 0)
+    {
+      dim--;
+      i = ((code_point >> (8 * dim)) & 255) - byte_offset;
+      nv = XVECTOR_DATA(v)[i];
+      if (dim > 0)
+       {
+         if (!VECTORP (nv))
+           {
+             if (EQ (nv, character))
+               return;
+             else
+               nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
+           }
+         v = nv;
+       }
+      else
+       break;
+    }
+  XVECTOR_DATA(v)[i] = character;
+#endif
+}
+
 Lisp_Object
 put_char_ccs_code_point (Lisp_Object character,
                         Lisp_Object ccs, Lisp_Object value)
 {
-  if ( !(EQ (XCHARSET_NAME (ccs), Qmap_ucs)
-        && INTP (value) && (XINT (value) < 0xF0000))
-       || !INTP (value)
-       /* || (XCHAR (character) != XINT (value)) */ )
+  if ( !( EQ (XCHARSET_NAME (ccs), Qmap_ucs)
+         && INTP (value) && (XINT (value) < 0xF0000)
+         && XCHAR (character) == XINT (value) )
+       || !INTP (value) )
     {
       Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
       int code_point;
@@ -821,15 +908,22 @@ get_unallocated_leading_byte (int dimension)
 #else
   if (dimension == 1)
     {
-      if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
+      if (chlook->next_allocated_1_byte_leading_byte >
+         MAX_LEADING_BYTE_PRIVATE_1)
        lb = 0;
       else
        lb = chlook->next_allocated_1_byte_leading_byte++;
     }
   else
     {
-      if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
-       lb = 0;
+      /* awfully fragile, but correct */
+#if MAX_LEADING_BYTE_PRIVATE_2 == 255
+      if (chlook->next_allocated_2_byte_leading_byte == 0)
+#else
+      if (chlook->next_allocated_2_byte_leading_byte >
+         MAX_LEADING_BYTE_PRIVATE_2)
+#endif
+        lb = 0;
       else
        lb = chlook->next_allocated_2_byte_leading_byte++;
     }
@@ -970,7 +1064,7 @@ decode_builtin_char (Lisp_Object charset, int code_point)
     {
       if ( CHARSETP (mother) )
        {
-         int code
+         EMACS_INT code
            = decode_ccs_conversion (XCHARSET_CONVERSION (charset),
                                     code_point);
 
@@ -1223,6 +1317,55 @@ charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
 }
 
 int
+encode_char_2 (Emchar ch, Lisp_Object* charset)
+{
+  Lisp_Object charsets = Vdefault_coded_charset_priority_list;
+  int code_point;
+
+  while (!NILP (charsets))
+    {
+      *charset = Ffind_charset (Fcar (charsets));
+      if ( !NILP (*charset)
+          && (XCHARSET_DIMENSION (*charset) <= 2) )
+       {
+         code_point = charset_code_point (*charset, ch, 0);
+         if (code_point >= 0)
+           return code_point;
+
+         if ( !NILP (Vdisplay_coded_charset_priority_use_inheritance) &&
+              NILP (Vdisplay_coded_charset_priority_use_hierarchy_order) )
+           {
+             code_point = encode_char_2_search_children (ch, charset);
+             if (code_point >= 0)
+               return code_point;
+           }
+       }
+      charsets = Fcdr (charsets);            
+    }
+  
+  if ( !NILP (Vdisplay_coded_charset_priority_use_inheritance) &&
+       !NILP (Vdisplay_coded_charset_priority_use_hierarchy_order) )
+    {
+      charsets = Vdefault_coded_charset_priority_list;
+      while (!NILP (charsets))
+       {
+         *charset = Ffind_charset (Fcar (charsets));
+         if ( !NILP (*charset)
+              && (XCHARSET_DIMENSION (*charset) <= 2) )
+           {
+             code_point = encode_char_2_search_children (ch, charset);
+             if (code_point >= 0)
+               return code_point;
+           }
+         charsets = Fcdr (charsets);         
+       }
+    }
+
+  /* otherwise --- maybe for bootstrap */
+  return encode_builtin_char_1 (ch, charset);
+}
+
+int
 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
 {
   if (c <= MAX_CHAR_BASIC_LATIN)
@@ -1344,6 +1487,8 @@ encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
 }
 
 Lisp_Object Vdefault_coded_charset_priority_list;
+Lisp_Object Vdisplay_coded_charset_priority_use_inheritance;
+Lisp_Object Vdisplay_coded_charset_priority_use_hierarchy_order;
 #endif
 
 \f
@@ -2363,6 +2508,170 @@ load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
   return -1;
 #endif /* not HAVE_LIBCHISE */
 }
+
+#ifdef HAVE_LIBCHISE
+DEFUN ("save-charset-properties", Fsave_charset_properties, 1, 1, 0, /*
+Save properties of CHARSET.
+*/
+       (charset))
+{
+  struct Lisp_Charset *cs;
+  CHISE_Property property;
+  Lisp_Object ccs;
+  unsigned char* feature_name;
+
+  ccs = Fget_charset (charset);
+  cs = XCHARSET (ccs);
+
+  if ( open_chise_data_source_maybe () )
+    return -1;
+
+  if ( SYMBOLP (charset) && !EQ (charset, XCHARSET_NAME (ccs)) )
+    {
+      property = chise_ds_get_property (default_chise_data_source,
+                                       "true-name");
+      feature_name = XSTRING_DATA (Fsymbol_name (charset));
+      chise_feature_set_property_value
+       (chise_ds_get_feature (default_chise_data_source, feature_name),
+        property, XSTRING_DATA (Fprin1_to_string (CHARSET_NAME (cs),
+                                                  Qnil)));
+      chise_property_sync (property);
+    }
+  charset = XCHARSET_NAME (ccs);
+  feature_name = XSTRING_DATA (Fsymbol_name (charset));
+
+  property = chise_ds_get_property (default_chise_data_source,
+                                   "description");
+  chise_feature_set_property_value
+    (chise_ds_get_feature (default_chise_data_source, feature_name),
+     property, XSTRING_DATA (Fprin1_to_string
+                            (CHARSET_DOC_STRING (cs), Qnil)));
+  chise_property_sync (property);
+
+  property = chise_ds_get_property (default_chise_data_source, "type");
+  chise_feature_set_property_value
+    (chise_ds_get_feature (default_chise_data_source, feature_name),
+     property, "CCS");
+  chise_property_sync (property);
+
+  property = chise_ds_get_property (default_chise_data_source, "chars");
+  chise_feature_set_property_value
+    (chise_ds_get_feature (default_chise_data_source, feature_name),
+     property, XSTRING_DATA (Fprin1_to_string (make_int
+                                              (CHARSET_CHARS (cs)),
+                                              Qnil)));
+  chise_property_sync (property);
+
+  property = chise_ds_get_property (default_chise_data_source, "dimension");
+  chise_feature_set_property_value
+    (chise_ds_get_feature (default_chise_data_source, feature_name),
+     property, XSTRING_DATA (Fprin1_to_string (make_int
+                                              (CHARSET_DIMENSION (cs)),
+                                              Qnil)));
+  chise_property_sync (property);
+
+  if ( CHARSET_FINAL (cs) != 0 )
+    {
+      property = chise_ds_get_property (default_chise_data_source,
+                                       "final-byte");
+      chise_feature_set_property_value
+       (chise_ds_get_feature (default_chise_data_source, feature_name),
+        property, XSTRING_DATA (Fprin1_to_string (make_int
+                                                  (CHARSET_FINAL (cs)),
+                                                  Qnil)));
+      chise_property_sync (property);
+    }
+
+  if ( !NILP (CHARSET_MOTHER (cs)) )
+    {
+      Lisp_Object mother = CHARSET_MOTHER (cs);
+
+      if ( CHARSETP (mother) )
+       mother = XCHARSET_NAME (mother);
+
+      property = chise_ds_get_property (default_chise_data_source,
+                                       "mother");
+      chise_feature_set_property_value
+       (chise_ds_get_feature (default_chise_data_source, feature_name),
+        property, XSTRING_DATA (Fprin1_to_string (mother, Qnil)));
+      chise_property_sync (property);
+    }
+
+  if ( CHARSET_MAX_CODE (cs) != 0 )
+    {
+      char str[16];
+
+      property = chise_ds_get_property (default_chise_data_source,
+                                       "mother-code-min");
+      if ( CHARSET_MIN_CODE (cs) == 0 )
+       chise_feature_set_property_value
+         (chise_ds_get_feature (default_chise_data_source, feature_name),
+          property, "0");
+      else
+       {
+         sprintf (str, "#x%X", CHARSET_MIN_CODE (cs));
+         chise_feature_set_property_value
+           (chise_ds_get_feature (default_chise_data_source, feature_name),
+            property, str);
+       }
+      chise_property_sync (property);
+
+      property = chise_ds_get_property (default_chise_data_source,
+                                       "mother-code-max");
+      sprintf (str, "#x%X", CHARSET_MAX_CODE (cs));
+      chise_feature_set_property_value
+       (chise_ds_get_feature (default_chise_data_source, feature_name),
+        property, str);
+      chise_property_sync (property);
+
+      property = chise_ds_get_property (default_chise_data_source,
+                                       "mother-code-offset");
+      if ( CHARSET_CODE_OFFSET (cs) == 0 )
+       chise_feature_set_property_value
+         (chise_ds_get_feature (default_chise_data_source, feature_name),
+          property, "0");
+      else
+       {
+         sprintf (str, "#x%X", CHARSET_CODE_OFFSET (cs));
+         chise_feature_set_property_value
+           (chise_ds_get_feature (default_chise_data_source, feature_name),
+            property, str);
+       }
+      chise_property_sync (property);
+
+      property = chise_ds_get_property (default_chise_data_source,
+                                       "mother-code-conversion");
+      if ( CHARSET_CONVERSION (cs) == CONVERSION_IDENTICAL )
+       chise_feature_set_property_value
+         (chise_ds_get_feature (default_chise_data_source, feature_name),
+          property, "identical");
+      else
+       {
+         Lisp_Object sym = Qnil;
+
+         if ( CHARSET_CONVERSION (cs) == CONVERSION_94x60 )
+           sym = Q94x60;
+         else if ( CHARSET_CONVERSION (cs) == CONVERSION_94x94x60 )
+           sym = Q94x94x60;
+         else if ( CHARSET_CONVERSION (cs) == CONVERSION_BIG5_1 )
+           sym = Qbig5_1;
+         else if ( CHARSET_CONVERSION (cs) == CONVERSION_BIG5_2 )
+           sym = Qbig5_2;
+         if ( !NILP (sym) )
+           chise_feature_set_property_value
+             (chise_ds_get_feature (default_chise_data_source, feature_name),
+              property, XSTRING_DATA (Fprin1_to_string (sym, Qnil)));
+         else
+           chise_feature_set_property_value
+             (chise_ds_get_feature (default_chise_data_source, feature_name),
+              property, "unknown");
+       }
+      chise_property_sync (property);
+    }
+  return Qnil;
+}
+#endif /* HAVE_LIBCHISE */
+
 #endif /* HAVE_CHISE */
 #endif /* UTF2000 */
 
@@ -2399,7 +2708,8 @@ Make a builtin character from CHARSET and code-point CODE.
 */
        (charset, code))
 {
-  int c;
+  EMACS_INT c;
+  Emchar ch;
 
   charset = Fget_charset (charset);
   CHECK_INT (code);
@@ -2433,9 +2743,9 @@ Make a builtin character from CHARSET and code-point CODE.
   if (XCHARSET_GRAPHIC (charset) == 1)
     c &= 0x7F7F7F7F;
 #endif
-  c = decode_builtin_char (charset, c);
+  ch = decode_builtin_char (charset, c);
   return
-    c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil, Qnil);
+    ch >= 0 ? make_char (ch) : Fdecode_char (charset, code, Qnil, Qnil);
 }
 #endif
 
@@ -2704,6 +3014,9 @@ syms_of_mule_charset (void)
 #ifdef HAVE_CHISE
   DEFSUBR (Fsave_charset_mapping_table);
   DEFSUBR (Freset_charset_mapping_table);
+#ifdef HAVE_LIBCHISE
+  DEFSUBR (Fsave_charset_properties);
+#endif /* HAVE_LIBCHISE */
 #endif /* HAVE_CHISE */
   DEFSUBR (Fdecode_char);
   DEFSUBR (Fdecode_builtin_char);
@@ -2760,10 +3073,10 @@ syms_of_mule_charset (void)
   defsymbol (&Qlatin_jisx0201,         "latin-jisx0201");
   defsymbol (&Qcyrillic_iso8859_5,     "cyrillic-iso8859-5");
   defsymbol (&Qlatin_iso8859_9,                "latin-iso8859-9");
-  defsymbol (&Qmap_jis_x0208_1978,     "=jis-x0208-1978");
+  defsymbol (&Qmap_jis_x0208_1978,     "=jis-x0208@1978");
   defsymbol (&Qmap_gb2312,             "=gb2312");
   defsymbol (&Qmap_gb12345,            "=gb12345");
-  defsymbol (&Qmap_jis_x0208_1983,     "=jis-x0208-1983");
+  defsymbol (&Qmap_jis_x0208_1983,     "=jis-x0208@1983");
   defsymbol (&Qmap_ks_x1001,           "=ks-x1001");
   defsymbol (&Qmap_jis_x0212,          "=jis-x0212");
   defsymbol (&Qmap_cns11643_1,         "=cns11643-1");
@@ -2782,7 +3095,7 @@ syms_of_mule_charset (void)
   defsymbol (&Qvietnamese_viscii_lower,        "vietnamese-viscii-lower");
   defsymbol (&Qvietnamese_viscii_upper,        "vietnamese-viscii-upper");
   defsymbol (&Qmap_jis_x0208,          "=jis-x0208");
-  defsymbol (&Qmap_jis_x0208_1990,     "=jis-x0208-1990");
+  defsymbol (&Qmap_jis_x0208_1990,     "=jis-x0208@1990");
   defsymbol (&Qmap_big5,               "=big5");
   defsymbol (&Qethiopic_ucs,           "ethiopic-ucs");
 #endif
@@ -2841,6 +3154,16 @@ Leading-code of private TYPE9N charset of column-width 1.
               &Vdefault_coded_charset_priority_list /*
 Default order of preferred coded-character-sets.
 */ );
+  Vdisplay_coded_charset_priority_use_inheritance = Qt;
+  DEFVAR_LISP ("display-coded-charset-priority-use-inheritance",
+              &Vdisplay_coded_charset_priority_use_inheritance /*
+If non-nil, use character inheritance.
+*/ );
+  Vdisplay_coded_charset_priority_use_hierarchy_order = Qt;
+  DEFVAR_LISP ("display-coded-charset-priority-use-hierarchy-order",
+              &Vdisplay_coded_charset_priority_use_hierarchy_order /*
+If non-nil, prefer nearest character in hierarchy order.
+*/ );
 #endif
 }
 
@@ -2881,7 +3204,7 @@ complex_vars_of_mule_charset (void)
                  build_string ("UCS-BMP"),
                  build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
                  build_string
-                 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
+                 ("\\(ISO10646\\(\\.[0-9]+\\)?-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
                  Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
   staticpro (&Vcharset_ucs_smp);
   Vcharset_ucs_smp =