From 3bb53dc907705a4fa7ebc520abf3e4dc93199779 Mon Sep 17 00:00:00 2001 From: tomo Date: Fri, 28 Jan 2005 15:43:14 +0000 Subject: [PATCH] Sync up with r21-4-15-chise-0_21-32. --- src/ChangeLog | 64 +++++++++++++++++++++ src/char-ucs.h | 130 +++++++++---------------------------------- src/chartab.c | 24 ++++---- src/mule-charset.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++-- src/text-coding.c | 8 ++- 5 files changed, 261 insertions(+), 121 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 1faef7e..7952c0e 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,67 @@ +2004-12-14 MORIOKA Tomohiko + + * char-ucs.h (decoding_table_put_char): Moved to mule-charset.c; + changed to normal function. + (encode_char_2): Likewise. + + * mule-charset.c (decoding_table_put_char): Copied from + char-ucs.h; changed to normal function. + (encode_char_2): Likewise. + +2004-12-13 MORIOKA Tomohiko + + * char-ucs.h (Vdisplay_coded_charset_priority_use_inheritance): + New extern. + (Vdisplay_coded_charset_priority_use_hierarchy_order): Likewise. + (encode_char_2_search_children): New inline function. + (encode_char_2): Refer + `Vdisplay_coded_charset_priority_use_{inheritance|hierarchy_order}' + and use `encode_char_2_search_children'. + +2004-12-04 MORIOKA Tomohiko + + * mule-charset.c + (Vdisplay_coded_charset_priority_use_inheritance): New variable. + (Vdisplay_coded_charset_priority_use_hierarchy_order): Likewise. + (vars_of_mule_charset): Add new variable + `display-coded-charset-priority-use-inheritance' and + `display-coded-charset-priority-use-hierarchy-order'. + +2004-11-20 MORIOKA Tomohiko + + * char-ucs.h (Q_subsumptive): New extern. + (Q_denotational): Likewise. + (encode_char_2): Search children specified by `->subsumptive' and + `->denotational'. + +2004-11-08 MORIOKA Tomohiko + + * mule-charset.c (complex_vars_of_mule_charset): Modify the + X-registry of `ucs-bmp'. + +2004-10-30 MORIOKA Tomohiko + + * chartab.c (char-variants): Refer `->subsumptive', + `->denotational' and `->identical' as same as `->ucs-unified'. + +2004-10-14 MORIOKA Tomohiko + + * chartab.c (Fput_char_attribute): Convert char-specs in value of + `{<-|->}{fullwidth|halfwidth}[^*]*' to characters and put reverse + links. + +2004-09-14 MORIOKA Tomohiko + + * text-coding.c: Add EXFUN for `Fregexp_quote'. + (decode_add_er_char): Fix problem about infinite loop when a CCS + specified in `coded-charset-entity-reference-alist' is not + existed. + +2004-09-09 MORIOKA Tomohiko + + * mule-charset.c (syms_of_mule_charset): Rename + `=jis-x0208-{1978|1983|1990}' to `=jis-x0208@{1978|1983|1990}'. + 2004-08-28 MORIOKA Tomohiko * fns.c (simplify_char_spec): Don't allocate new character-object diff --git a/src/char-ucs.h b/src/char-ucs.h index 078dcea..c6e68b3 100644 --- a/src/char-ucs.h +++ b/src/char-ucs.h @@ -50,6 +50,7 @@ EXFUN (Fget_charset, 1); extern Lisp_Object Qsystem_char_id; extern Lisp_Object Qmap_ucs, Qucs; +extern Lisp_Object Q_subsumptive, Q_denotational; Lisp_Object put_char_ccs_code_point (Lisp_Object character, Lisp_Object ccs, Lisp_Object value); @@ -542,95 +543,9 @@ put_ccs_octet_table (Lisp_Object table, Lisp_Object ccs, int code, } } -INLINE_HEADER void +void decoding_table_put_char (Lisp_Object ccs, int code_point, Lisp_Object character); -INLINE_HEADER void -decoding_table_put_char (Lisp_Object ccs, - int code_point, Lisp_Object character) -{ -#if 1 - Lisp_Object table1 = XCHARSET_DECODING_TABLE (ccs); - int dim = XCHARSET_DIMENSION (ccs); - - if (dim == 1) - XCHARSET_DECODING_TABLE (ccs) - = put_ccs_octet_table (table1, ccs, code_point, character); - else if (dim == 2) - { - Lisp_Object table2 - = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 8)); - - table2 = put_ccs_octet_table (table2, ccs, - (unsigned char)code_point, character); - XCHARSET_DECODING_TABLE (ccs) - = put_ccs_octet_table (table1, ccs, - (unsigned char)(code_point >> 8), table2); - } - else if (dim == 3) - { - Lisp_Object table2 - = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 16)); - Lisp_Object table3 - = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 8)); - - table3 = put_ccs_octet_table (table3, ccs, - (unsigned char)code_point, character); - table2 = put_ccs_octet_table (table2, ccs, - (unsigned char)(code_point >> 8), table3); - XCHARSET_DECODING_TABLE (ccs) - = put_ccs_octet_table (table1, ccs, - (unsigned char)(code_point >> 16), table2); - } - else /* if (dim == 4) */ - { - Lisp_Object table2 - = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 24)); - Lisp_Object table3 - = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 16)); - Lisp_Object table4 - = get_ccs_octet_table (table3, ccs, (unsigned char)(code_point >> 8)); - - table4 = put_ccs_octet_table (table4, ccs, - (unsigned char)code_point, character); - table3 = put_ccs_octet_table (table3, ccs, - (unsigned char)(code_point >> 8), table4); - table2 = put_ccs_octet_table (table2, ccs, - (unsigned char)(code_point >> 16), table3); - XCHARSET_DECODING_TABLE (ccs) - = put_ccs_octet_table (table1, ccs, - (unsigned char)(code_point >> 24), table2); - } -#else - Lisp_Object v = XCHARSET_DECODING_TABLE (ccs); - int dim = XCHARSET_DIMENSION (ccs); - int byte_offset = XCHARSET_BYTE_OFFSET (ccs); - int i = -1; - Lisp_Object nv; - int ccs_len = XVECTOR_LENGTH (v); - - while (dim > 0) - { - dim--; - i = ((code_point >> (8 * dim)) & 255) - byte_offset; - nv = XVECTOR_DATA(v)[i]; - if (dim > 0) - { - if (!VECTORP (nv)) - { - if (EQ (nv, character)) - return; - else - nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil)); - } - v = nv; - } - else - break; - } - XVECTOR_DATA(v)[i] = character; -#endif -} INLINE_HEADER void decoding_table_remove_char (Lisp_Object ccs, int code_point); @@ -676,11 +591,15 @@ MAKE_CHAR (Lisp_Object charset, int c1, int c2) extern Lisp_Object Vcharacter_attribute_table; +int encode_char_2 (Emchar ch, Lisp_Object* charset); int encode_builtin_char_1 (Emchar c, Lisp_Object* charset); int charset_code_point (Lisp_Object charset, Emchar ch, int defined_only); int range_charset_code_point (Lisp_Object charset, Emchar ch); extern Lisp_Object Vdefault_coded_charset_priority_list; +extern Lisp_Object Vdisplay_coded_charset_priority_use_inheritance; +extern Lisp_Object Vdisplay_coded_charset_priority_use_hierarchy_order; + EXFUN (Ffind_charset, 1); INLINE_HEADER int encode_char_1 (Emchar ch, Lisp_Object* charset); @@ -706,28 +625,33 @@ encode_char_1 (Emchar ch, Lisp_Object* charset) return encode_builtin_char_1 (ch, charset); } -INLINE_HEADER int encode_char_2 (Emchar ch, Lisp_Object* charset); INLINE_HEADER int -encode_char_2 (Emchar ch, Lisp_Object* charset) +encode_char_2_search_children (Emchar ch, Lisp_Object* charset); +INLINE_HEADER int +encode_char_2_search_children (Emchar ch, Lisp_Object* charset) { - Lisp_Object charsets = Vdefault_coded_charset_priority_list; + int code_point; + Lisp_Object rest; - while (!NILP (charsets)) + rest = Fget_char_attribute (make_char (ch), Q_subsumptive, Qnil); + for ( ; !NILP (rest); rest = XCDR (rest) ) { - *charset = Ffind_charset (Fcar (charsets)); - if ( !NILP (*charset) - && (XCHARSET_DIMENSION (*charset) <= 2) ) - { - int code_point = charset_code_point (*charset, ch, 0); + Lisp_Object c = XCAR (rest); - if (code_point >= 0) - return code_point; - } - charsets = Fcdr (charsets); + code_point = charset_code_point (*charset, XCHAR (c), 0); + if (code_point >= 0) + return code_point; } - - /* otherwise --- maybe for bootstrap */ - return encode_builtin_char_1 (ch, charset); + rest = Fget_char_attribute (make_char (ch), Q_denotational, Qnil); + for ( ; !NILP (rest); rest = XCDR (rest) ) + { + Lisp_Object c = XCAR (rest); + + code_point = charset_code_point (*charset, XCHAR (c), 0); + if (code_point >= 0) + return code_point; + } + return -1; } #define ENCODE_CHAR(ch, charset) encode_char_1 (ch, &(charset)) diff --git a/src/chartab.c b/src/chartab.c index 02ef2df..33f1ee2 100644 --- a/src/chartab.c +++ b/src/chartab.c @@ -1235,15 +1235,16 @@ Return variants of CHARACTER. */ (character)) { - Lisp_Object ret; - CHECK_CHAR (character); - ret = Fchar_feature (character, Q_ucs_unified, Qnil, - Qnil, Qnil); - if (CONSP (ret)) - return Fcopy_list (ret); - else - return Qnil; + return + nconc2 + (Fcopy_list (Fget_char_attribute (character, Q_subsumptive, Qnil)), + (nconc2 + (Fcopy_list (Fget_char_attribute (character, Q_denotational, Qnil)), + (nconc2 + (Fcopy_list (Fget_char_attribute (character, Q_identical, Qnil)), + Fcopy_list (Fchar_feature (character, Q_ucs_unified, Qnil, + Qnil, Qnil))))))); } #endif @@ -3508,9 +3509,10 @@ Store CHARACTER's ATTRIBUTE with VALUE. EQ (attribute, Q_component) || EQ (attribute, Q_component_of) || !NILP (Fstring_match - (build_string ("^\\(<-\\|->\\)\\(simplified" - "\\|same\\|vulgar\\|wrong" - "\\|original\\|ancient" + (build_string ("^\\(<-\\|->\\)\\(" + "fullwidth\\|halfwidth" + "\\|simplified\\|vulgar\\|wrong" + "\\|same\\|original\\|ancient" "\\)[^*]*$"), Fsymbol_name (attribute), Qnil, Qnil)) ) diff --git a/src/mule-charset.c b/src/mule-charset.c index bcfe600..30b720a 100644 --- a/src/mule-charset.c +++ b/src/mule-charset.c @@ -176,6 +176,93 @@ decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len) return 0; } +void +decoding_table_put_char (Lisp_Object ccs, + int code_point, Lisp_Object character) +{ +#if 1 + Lisp_Object table1 = XCHARSET_DECODING_TABLE (ccs); + int dim = XCHARSET_DIMENSION (ccs); + + if (dim == 1) + XCHARSET_DECODING_TABLE (ccs) + = put_ccs_octet_table (table1, ccs, code_point, character); + else if (dim == 2) + { + Lisp_Object table2 + = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 8)); + + table2 = put_ccs_octet_table (table2, ccs, + (unsigned char)code_point, character); + XCHARSET_DECODING_TABLE (ccs) + = put_ccs_octet_table (table1, ccs, + (unsigned char)(code_point >> 8), table2); + } + else if (dim == 3) + { + Lisp_Object table2 + = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 16)); + Lisp_Object table3 + = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 8)); + + table3 = put_ccs_octet_table (table3, ccs, + (unsigned char)code_point, character); + table2 = put_ccs_octet_table (table2, ccs, + (unsigned char)(code_point >> 8), table3); + XCHARSET_DECODING_TABLE (ccs) + = put_ccs_octet_table (table1, ccs, + (unsigned char)(code_point >> 16), table2); + } + else /* if (dim == 4) */ + { + Lisp_Object table2 + = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 24)); + Lisp_Object table3 + = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 16)); + Lisp_Object table4 + = get_ccs_octet_table (table3, ccs, (unsigned char)(code_point >> 8)); + + table4 = put_ccs_octet_table (table4, ccs, + (unsigned char)code_point, character); + table3 = put_ccs_octet_table (table3, ccs, + (unsigned char)(code_point >> 8), table4); + table2 = put_ccs_octet_table (table2, ccs, + (unsigned char)(code_point >> 16), table3); + XCHARSET_DECODING_TABLE (ccs) + = put_ccs_octet_table (table1, ccs, + (unsigned char)(code_point >> 24), table2); + } +#else + Lisp_Object v = XCHARSET_DECODING_TABLE (ccs); + int dim = XCHARSET_DIMENSION (ccs); + int byte_offset = XCHARSET_BYTE_OFFSET (ccs); + int i = -1; + Lisp_Object nv; + int ccs_len = XVECTOR_LENGTH (v); + + while (dim > 0) + { + dim--; + i = ((code_point >> (8 * dim)) & 255) - byte_offset; + nv = XVECTOR_DATA(v)[i]; + if (dim > 0) + { + if (!VECTORP (nv)) + { + if (EQ (nv, character)) + return; + else + nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil)); + } + v = nv; + } + else + break; + } + XVECTOR_DATA(v)[i] = character; +#endif +} + Lisp_Object put_char_ccs_code_point (Lisp_Object character, Lisp_Object ccs, Lisp_Object value) @@ -1223,6 +1310,55 @@ charset_code_point (Lisp_Object charset, Emchar ch, int defined_only) } int +encode_char_2 (Emchar ch, Lisp_Object* charset) +{ + Lisp_Object charsets = Vdefault_coded_charset_priority_list; + int code_point; + + while (!NILP (charsets)) + { + *charset = Ffind_charset (Fcar (charsets)); + if ( !NILP (*charset) + && (XCHARSET_DIMENSION (*charset) <= 2) ) + { + code_point = charset_code_point (*charset, ch, 0); + if (code_point >= 0) + return code_point; + + if ( !NILP (Vdisplay_coded_charset_priority_use_inheritance) && + NILP (Vdisplay_coded_charset_priority_use_hierarchy_order) ) + { + code_point = encode_char_2_search_children (ch, charset); + if (code_point >= 0) + return code_point; + } + } + charsets = Fcdr (charsets); + } + + if ( !NILP (Vdisplay_coded_charset_priority_use_inheritance) && + !NILP (Vdisplay_coded_charset_priority_use_hierarchy_order) ) + { + charsets = Vdefault_coded_charset_priority_list; + while (!NILP (charsets)) + { + *charset = Ffind_charset (Fcar (charsets)); + if ( !NILP (*charset) + && (XCHARSET_DIMENSION (*charset) <= 2) ) + { + code_point = encode_char_2_search_children (ch, charset); + if (code_point >= 0) + return code_point; + } + charsets = Fcdr (charsets); + } + } + + /* otherwise --- maybe for bootstrap */ + return encode_builtin_char_1 (ch, charset); +} + +int encode_builtin_char_1 (Emchar c, Lisp_Object* charset) { if (c <= MAX_CHAR_BASIC_LATIN) @@ -1344,6 +1480,8 @@ encode_builtin_char_1 (Emchar c, Lisp_Object* charset) } Lisp_Object Vdefault_coded_charset_priority_list; +Lisp_Object Vdisplay_coded_charset_priority_use_inheritance; +Lisp_Object Vdisplay_coded_charset_priority_use_hierarchy_order; #endif @@ -2928,10 +3066,10 @@ syms_of_mule_charset (void) defsymbol (&Qlatin_jisx0201, "latin-jisx0201"); defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5"); defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9"); - defsymbol (&Qmap_jis_x0208_1978, "=jis-x0208-1978"); + defsymbol (&Qmap_jis_x0208_1978, "=jis-x0208@1978"); defsymbol (&Qmap_gb2312, "=gb2312"); defsymbol (&Qmap_gb12345, "=gb12345"); - defsymbol (&Qmap_jis_x0208_1983, "=jis-x0208-1983"); + defsymbol (&Qmap_jis_x0208_1983, "=jis-x0208@1983"); defsymbol (&Qmap_ks_x1001, "=ks-x1001"); defsymbol (&Qmap_jis_x0212, "=jis-x0212"); defsymbol (&Qmap_cns11643_1, "=cns11643-1"); @@ -2950,7 +3088,7 @@ syms_of_mule_charset (void) defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower"); defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper"); defsymbol (&Qmap_jis_x0208, "=jis-x0208"); - defsymbol (&Qmap_jis_x0208_1990, "=jis-x0208-1990"); + defsymbol (&Qmap_jis_x0208_1990, "=jis-x0208@1990"); defsymbol (&Qmap_big5, "=big5"); defsymbol (&Qethiopic_ucs, "ethiopic-ucs"); #endif @@ -3009,6 +3147,16 @@ Leading-code of private TYPE9N charset of column-width 1. &Vdefault_coded_charset_priority_list /* Default order of preferred coded-character-sets. */ ); + Vdisplay_coded_charset_priority_use_inheritance = Qt; + DEFVAR_LISP ("display-coded-charset-priority-use-inheritance", + &Vdisplay_coded_charset_priority_use_inheritance /* +If non-nil, use character inheritance. +*/ ); + Vdisplay_coded_charset_priority_use_hierarchy_order = Qt; + DEFVAR_LISP ("display-coded-charset-priority-use-hierarchy-order", + &Vdisplay_coded_charset_priority_use_hierarchy_order /* +If non-nil, prefer nearest character in hierarchy order. +*/ ); #endif } @@ -3049,7 +3197,7 @@ complex_vars_of_mule_charset (void) build_string ("UCS-BMP"), build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"), build_string - ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"), + ("\\(ISO10646\\(\\.[0-9]+\\)?-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"), Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL); staticpro (&Vcharset_ucs_smp); Vcharset_ucs_smp = diff --git a/src/text-coding.c b/src/text-coding.c index 9b6e7dc..b3f80ff 100644 --- a/src/text-coding.c +++ b/src/text-coding.c @@ -3246,6 +3246,8 @@ decode_flush_er_chars (struct decoding_stream *str, unsigned_char_dynarr* dst) } } +EXFUN (Fregexp_quote, 1); + void decode_add_er_char (struct decoding_stream *str, Emchar character, unsigned_char_dynarr* dst); void @@ -3267,7 +3269,7 @@ decode_add_er_char (struct decoding_stream *str, Emchar c, { Lisp_Object string = make_string (str->er_buf, str->er_counter); - Lisp_Object rest = Vcoded_charset_entity_reference_alist; + Lisp_Object rest; Lisp_Object cell; Lisp_Object ret; Lisp_Object pat; @@ -3275,7 +3277,8 @@ decode_add_er_char (struct decoding_stream *str, Emchar c, Lisp_Object char_type; int base; - while (!NILP (rest)) + for ( rest = Vcoded_charset_entity_reference_alist; + !NILP (rest); rest = Fcdr (rest) ) { cell = Fcar (rest); ccs = Fcar (cell); @@ -3337,7 +3340,6 @@ decode_add_er_char (struct decoding_stream *str, Emchar c, DECODE_ADD_UCS_CHAR (chr, dst); goto decoded; } - rest = Fcdr (rest); } if (!NILP (Fstring_match (build_string ("^&MCS-\\([0-9A-F]+\\)$"), string, Qnil, Qnil))) -- 1.7.10.4