+2004-12-14 MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
+
+ * char-ucs.h (decoding_table_put_char): Moved to mule-charset.c;
+ changed to normal function.
+ (encode_char_2): Likewise.
+
+ * mule-charset.c (decoding_table_put_char): Copied from
+ char-ucs.h; changed to normal function.
+ (encode_char_2): Likewise.
+
+2004-12-13 MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
+
+ * char-ucs.h (Vdisplay_coded_charset_priority_use_inheritance):
+ New extern.
+ (Vdisplay_coded_charset_priority_use_hierarchy_order): Likewise.
+ (encode_char_2_search_children): New inline function.
+ (encode_char_2): Refer
+ `Vdisplay_coded_charset_priority_use_{inheritance|hierarchy_order}'
+ and use `encode_char_2_search_children'.
+
+2004-12-04 MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
+
+ * mule-charset.c
+ (Vdisplay_coded_charset_priority_use_inheritance): New variable.
+ (Vdisplay_coded_charset_priority_use_hierarchy_order): Likewise.
+ (vars_of_mule_charset): Add new variable
+ `display-coded-charset-priority-use-inheritance' and
+ `display-coded-charset-priority-use-hierarchy-order'.
+
+2004-11-20 MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
+
+ * char-ucs.h (Q_subsumptive): New extern.
+ (Q_denotational): Likewise.
+ (encode_char_2): Search children specified by `->subsumptive' and
+ `->denotational'.
+
+2004-11-08 MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
+
+ * mule-charset.c (complex_vars_of_mule_charset): Modify the
+ X-registry of `ucs-bmp'.
+
+2004-10-30 MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
+
+ * chartab.c (char-variants): Refer `->subsumptive',
+ `->denotational' and `->identical' as same as `->ucs-unified'.
+
+2004-10-14 MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
+
+ * chartab.c (Fput_char_attribute): Convert char-specs in value of
+ `{<-|->}{fullwidth|halfwidth}[^*]*' to characters and put reverse
+ links.
+
+2004-09-14 MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
+
+ * text-coding.c: Add EXFUN for `Fregexp_quote'.
+ (decode_add_er_char): Fix problem about infinite loop when a CCS
+ specified in `coded-charset-entity-reference-alist' is not
+ existed.
+
+2004-09-09 MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
+
+ * mule-charset.c (syms_of_mule_charset): Rename
+ `=jis-x0208-{1978|1983|1990}' to `=jis-x0208@{1978|1983|1990}'.
+
2004-08-28 MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
* fns.c (simplify_char_spec): Don't allocate new character-object
extern Lisp_Object Qsystem_char_id;
extern Lisp_Object Qmap_ucs, Qucs;
+extern Lisp_Object Q_subsumptive, Q_denotational;
Lisp_Object put_char_ccs_code_point (Lisp_Object character,
Lisp_Object ccs, Lisp_Object value);
}
}
-INLINE_HEADER void
+void
decoding_table_put_char (Lisp_Object ccs,
int code_point, Lisp_Object character);
-INLINE_HEADER void
-decoding_table_put_char (Lisp_Object ccs,
- int code_point, Lisp_Object character)
-{
-#if 1
- Lisp_Object table1 = XCHARSET_DECODING_TABLE (ccs);
- int dim = XCHARSET_DIMENSION (ccs);
-
- if (dim == 1)
- XCHARSET_DECODING_TABLE (ccs)
- = put_ccs_octet_table (table1, ccs, code_point, character);
- else if (dim == 2)
- {
- Lisp_Object table2
- = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 8));
-
- table2 = put_ccs_octet_table (table2, ccs,
- (unsigned char)code_point, character);
- XCHARSET_DECODING_TABLE (ccs)
- = put_ccs_octet_table (table1, ccs,
- (unsigned char)(code_point >> 8), table2);
- }
- else if (dim == 3)
- {
- Lisp_Object table2
- = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 16));
- Lisp_Object table3
- = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 8));
-
- table3 = put_ccs_octet_table (table3, ccs,
- (unsigned char)code_point, character);
- table2 = put_ccs_octet_table (table2, ccs,
- (unsigned char)(code_point >> 8), table3);
- XCHARSET_DECODING_TABLE (ccs)
- = put_ccs_octet_table (table1, ccs,
- (unsigned char)(code_point >> 16), table2);
- }
- else /* if (dim == 4) */
- {
- Lisp_Object table2
- = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 24));
- Lisp_Object table3
- = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 16));
- Lisp_Object table4
- = get_ccs_octet_table (table3, ccs, (unsigned char)(code_point >> 8));
-
- table4 = put_ccs_octet_table (table4, ccs,
- (unsigned char)code_point, character);
- table3 = put_ccs_octet_table (table3, ccs,
- (unsigned char)(code_point >> 8), table4);
- table2 = put_ccs_octet_table (table2, ccs,
- (unsigned char)(code_point >> 16), table3);
- XCHARSET_DECODING_TABLE (ccs)
- = put_ccs_octet_table (table1, ccs,
- (unsigned char)(code_point >> 24), table2);
- }
-#else
- Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
- int dim = XCHARSET_DIMENSION (ccs);
- int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
- int i = -1;
- Lisp_Object nv;
- int ccs_len = XVECTOR_LENGTH (v);
-
- while (dim > 0)
- {
- dim--;
- i = ((code_point >> (8 * dim)) & 255) - byte_offset;
- nv = XVECTOR_DATA(v)[i];
- if (dim > 0)
- {
- if (!VECTORP (nv))
- {
- if (EQ (nv, character))
- return;
- else
- nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
- }
- v = nv;
- }
- else
- break;
- }
- XVECTOR_DATA(v)[i] = character;
-#endif
-}
INLINE_HEADER void
decoding_table_remove_char (Lisp_Object ccs, int code_point);
extern Lisp_Object Vcharacter_attribute_table;
+int encode_char_2 (Emchar ch, Lisp_Object* charset);
int encode_builtin_char_1 (Emchar c, Lisp_Object* charset);
int charset_code_point (Lisp_Object charset, Emchar ch, int defined_only);
int range_charset_code_point (Lisp_Object charset, Emchar ch);
extern Lisp_Object Vdefault_coded_charset_priority_list;
+extern Lisp_Object Vdisplay_coded_charset_priority_use_inheritance;
+extern Lisp_Object Vdisplay_coded_charset_priority_use_hierarchy_order;
+
EXFUN (Ffind_charset, 1);
INLINE_HEADER int encode_char_1 (Emchar ch, Lisp_Object* charset);
return encode_builtin_char_1 (ch, charset);
}
-INLINE_HEADER int encode_char_2 (Emchar ch, Lisp_Object* charset);
INLINE_HEADER int
-encode_char_2 (Emchar ch, Lisp_Object* charset)
+encode_char_2_search_children (Emchar ch, Lisp_Object* charset);
+INLINE_HEADER int
+encode_char_2_search_children (Emchar ch, Lisp_Object* charset)
{
- Lisp_Object charsets = Vdefault_coded_charset_priority_list;
+ int code_point;
+ Lisp_Object rest;
- while (!NILP (charsets))
+ rest = Fget_char_attribute (make_char (ch), Q_subsumptive, Qnil);
+ for ( ; !NILP (rest); rest = XCDR (rest) )
{
- *charset = Ffind_charset (Fcar (charsets));
- if ( !NILP (*charset)
- && (XCHARSET_DIMENSION (*charset) <= 2) )
- {
- int code_point = charset_code_point (*charset, ch, 0);
+ Lisp_Object c = XCAR (rest);
- if (code_point >= 0)
- return code_point;
- }
- charsets = Fcdr (charsets);
+ code_point = charset_code_point (*charset, XCHAR (c), 0);
+ if (code_point >= 0)
+ return code_point;
}
-
- /* otherwise --- maybe for bootstrap */
- return encode_builtin_char_1 (ch, charset);
+ rest = Fget_char_attribute (make_char (ch), Q_denotational, Qnil);
+ for ( ; !NILP (rest); rest = XCDR (rest) )
+ {
+ Lisp_Object c = XCAR (rest);
+
+ code_point = charset_code_point (*charset, XCHAR (c), 0);
+ if (code_point >= 0)
+ return code_point;
+ }
+ return -1;
}
#define ENCODE_CHAR(ch, charset) encode_char_1 (ch, &(charset))
*/
(character))
{
- Lisp_Object ret;
-
CHECK_CHAR (character);
- ret = Fchar_feature (character, Q_ucs_unified, Qnil,
- Qnil, Qnil);
- if (CONSP (ret))
- return Fcopy_list (ret);
- else
- return Qnil;
+ return
+ nconc2
+ (Fcopy_list (Fget_char_attribute (character, Q_subsumptive, Qnil)),
+ (nconc2
+ (Fcopy_list (Fget_char_attribute (character, Q_denotational, Qnil)),
+ (nconc2
+ (Fcopy_list (Fget_char_attribute (character, Q_identical, Qnil)),
+ Fcopy_list (Fchar_feature (character, Q_ucs_unified, Qnil,
+ Qnil, Qnil)))))));
}
#endif
EQ (attribute, Q_component) ||
EQ (attribute, Q_component_of) ||
!NILP (Fstring_match
- (build_string ("^\\(<-\\|->\\)\\(simplified"
- "\\|same\\|vulgar\\|wrong"
- "\\|original\\|ancient"
+ (build_string ("^\\(<-\\|->\\)\\("
+ "fullwidth\\|halfwidth"
+ "\\|simplified\\|vulgar\\|wrong"
+ "\\|same\\|original\\|ancient"
"\\)[^*]*$"),
Fsymbol_name (attribute),
Qnil, Qnil)) )
return 0;
}
+void
+decoding_table_put_char (Lisp_Object ccs,
+ int code_point, Lisp_Object character)
+{
+#if 1
+ Lisp_Object table1 = XCHARSET_DECODING_TABLE (ccs);
+ int dim = XCHARSET_DIMENSION (ccs);
+
+ if (dim == 1)
+ XCHARSET_DECODING_TABLE (ccs)
+ = put_ccs_octet_table (table1, ccs, code_point, character);
+ else if (dim == 2)
+ {
+ Lisp_Object table2
+ = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 8));
+
+ table2 = put_ccs_octet_table (table2, ccs,
+ (unsigned char)code_point, character);
+ XCHARSET_DECODING_TABLE (ccs)
+ = put_ccs_octet_table (table1, ccs,
+ (unsigned char)(code_point >> 8), table2);
+ }
+ else if (dim == 3)
+ {
+ Lisp_Object table2
+ = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 16));
+ Lisp_Object table3
+ = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 8));
+
+ table3 = put_ccs_octet_table (table3, ccs,
+ (unsigned char)code_point, character);
+ table2 = put_ccs_octet_table (table2, ccs,
+ (unsigned char)(code_point >> 8), table3);
+ XCHARSET_DECODING_TABLE (ccs)
+ = put_ccs_octet_table (table1, ccs,
+ (unsigned char)(code_point >> 16), table2);
+ }
+ else /* if (dim == 4) */
+ {
+ Lisp_Object table2
+ = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 24));
+ Lisp_Object table3
+ = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 16));
+ Lisp_Object table4
+ = get_ccs_octet_table (table3, ccs, (unsigned char)(code_point >> 8));
+
+ table4 = put_ccs_octet_table (table4, ccs,
+ (unsigned char)code_point, character);
+ table3 = put_ccs_octet_table (table3, ccs,
+ (unsigned char)(code_point >> 8), table4);
+ table2 = put_ccs_octet_table (table2, ccs,
+ (unsigned char)(code_point >> 16), table3);
+ XCHARSET_DECODING_TABLE (ccs)
+ = put_ccs_octet_table (table1, ccs,
+ (unsigned char)(code_point >> 24), table2);
+ }
+#else
+ Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
+ int dim = XCHARSET_DIMENSION (ccs);
+ int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
+ int i = -1;
+ Lisp_Object nv;
+ int ccs_len = XVECTOR_LENGTH (v);
+
+ while (dim > 0)
+ {
+ dim--;
+ i = ((code_point >> (8 * dim)) & 255) - byte_offset;
+ nv = XVECTOR_DATA(v)[i];
+ if (dim > 0)
+ {
+ if (!VECTORP (nv))
+ {
+ if (EQ (nv, character))
+ return;
+ else
+ nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
+ }
+ v = nv;
+ }
+ else
+ break;
+ }
+ XVECTOR_DATA(v)[i] = character;
+#endif
+}
+
Lisp_Object
put_char_ccs_code_point (Lisp_Object character,
Lisp_Object ccs, Lisp_Object value)
}
int
+encode_char_2 (Emchar ch, Lisp_Object* charset)
+{
+ Lisp_Object charsets = Vdefault_coded_charset_priority_list;
+ int code_point;
+
+ while (!NILP (charsets))
+ {
+ *charset = Ffind_charset (Fcar (charsets));
+ if ( !NILP (*charset)
+ && (XCHARSET_DIMENSION (*charset) <= 2) )
+ {
+ code_point = charset_code_point (*charset, ch, 0);
+ if (code_point >= 0)
+ return code_point;
+
+ if ( !NILP (Vdisplay_coded_charset_priority_use_inheritance) &&
+ NILP (Vdisplay_coded_charset_priority_use_hierarchy_order) )
+ {
+ code_point = encode_char_2_search_children (ch, charset);
+ if (code_point >= 0)
+ return code_point;
+ }
+ }
+ charsets = Fcdr (charsets);
+ }
+
+ if ( !NILP (Vdisplay_coded_charset_priority_use_inheritance) &&
+ !NILP (Vdisplay_coded_charset_priority_use_hierarchy_order) )
+ {
+ charsets = Vdefault_coded_charset_priority_list;
+ while (!NILP (charsets))
+ {
+ *charset = Ffind_charset (Fcar (charsets));
+ if ( !NILP (*charset)
+ && (XCHARSET_DIMENSION (*charset) <= 2) )
+ {
+ code_point = encode_char_2_search_children (ch, charset);
+ if (code_point >= 0)
+ return code_point;
+ }
+ charsets = Fcdr (charsets);
+ }
+ }
+
+ /* otherwise --- maybe for bootstrap */
+ return encode_builtin_char_1 (ch, charset);
+}
+
+int
encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
{
if (c <= MAX_CHAR_BASIC_LATIN)
}
Lisp_Object Vdefault_coded_charset_priority_list;
+Lisp_Object Vdisplay_coded_charset_priority_use_inheritance;
+Lisp_Object Vdisplay_coded_charset_priority_use_hierarchy_order;
#endif
\f
defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
- defsymbol (&Qmap_jis_x0208_1978, "=jis-x0208-1978");
+ defsymbol (&Qmap_jis_x0208_1978, "=jis-x0208@1978");
defsymbol (&Qmap_gb2312, "=gb2312");
defsymbol (&Qmap_gb12345, "=gb12345");
- defsymbol (&Qmap_jis_x0208_1983, "=jis-x0208-1983");
+ defsymbol (&Qmap_jis_x0208_1983, "=jis-x0208@1983");
defsymbol (&Qmap_ks_x1001, "=ks-x1001");
defsymbol (&Qmap_jis_x0212, "=jis-x0212");
defsymbol (&Qmap_cns11643_1, "=cns11643-1");
defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
defsymbol (&Qmap_jis_x0208, "=jis-x0208");
- defsymbol (&Qmap_jis_x0208_1990, "=jis-x0208-1990");
+ defsymbol (&Qmap_jis_x0208_1990, "=jis-x0208@1990");
defsymbol (&Qmap_big5, "=big5");
defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
#endif
&Vdefault_coded_charset_priority_list /*
Default order of preferred coded-character-sets.
*/ );
+ Vdisplay_coded_charset_priority_use_inheritance = Qt;
+ DEFVAR_LISP ("display-coded-charset-priority-use-inheritance",
+ &Vdisplay_coded_charset_priority_use_inheritance /*
+If non-nil, use character inheritance.
+*/ );
+ Vdisplay_coded_charset_priority_use_hierarchy_order = Qt;
+ DEFVAR_LISP ("display-coded-charset-priority-use-hierarchy-order",
+ &Vdisplay_coded_charset_priority_use_hierarchy_order /*
+If non-nil, prefer nearest character in hierarchy order.
+*/ );
#endif
}
build_string ("UCS-BMP"),
build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
build_string
- ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
+ ("\\(ISO10646\\(\\.[0-9]+\\)?-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
staticpro (&Vcharset_ucs_smp);
Vcharset_ucs_smp =
}
}
+EXFUN (Fregexp_quote, 1);
+
void decode_add_er_char (struct decoding_stream *str, Emchar character,
unsigned_char_dynarr* dst);
void
{
Lisp_Object string = make_string (str->er_buf,
str->er_counter);
- Lisp_Object rest = Vcoded_charset_entity_reference_alist;
+ Lisp_Object rest;
Lisp_Object cell;
Lisp_Object ret;
Lisp_Object pat;
Lisp_Object char_type;
int base;
- while (!NILP (rest))
+ for ( rest = Vcoded_charset_entity_reference_alist;
+ !NILP (rest); rest = Fcdr (rest) )
{
cell = Fcar (rest);
ccs = Fcar (cell);
DECODE_ADD_UCS_CHAR (chr, dst);
goto decoded;
}
- rest = Fcdr (rest);
}
if (!NILP (Fstring_match (build_string ("^&MCS-\\([0-9A-F]+\\)$"),
string, Qnil, Qnil)))