X-Git-Url: http://git.chise.org/gitweb/?p=chise%2Fxemacs-chise.git.1;a=blobdiff_plain;f=src%2Ftext-coding.c;h=7071934bff39be37ac2b7e861aea76dedd2ff5bc;hp=4b8f2d2bd057adb056acf8797b95c923aa275976;hb=7b241b273a632ab80d7c620b5add28d5f11b0fd3;hpb=0cbb0d1c451c164beae371763b59ce64347f5ce5 diff --git a/src/text-coding.c b/src/text-coding.c index 4b8f2d2..7071934 100644 --- a/src/text-coding.c +++ b/src/text-coding.c @@ -1,7 +1,8 @@ /* Code conversion functions. Copyright (C) 1991, 1995 Free Software Foundation, Inc. Copyright (C) 1995 Sun Microsystems, Inc. - Copyright (C) 1999,2000,2001,2002,2003,2004 MORIOKA Tomohiko + Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2011, + 2012, 2013, 2016 MORIOKA Tomohiko This file is part of XEmacs. @@ -37,6 +38,9 @@ Boston, MA 02111-1307, USA. */ #include "mule-ccl.h" #include "chartab.h" #endif +#ifdef USE_CONCORD_OBJECT_SYSTEM_TO_COMPOSE +#include +#endif #include "file-coding.h" Lisp_Object Qcoding_system_error; @@ -106,9 +110,11 @@ Lisp_Object Qshort, Qno_ascii_eol, Qno_ascii_cntl, Qseven, Qlock_shift; #endif #ifdef UTF2000 Lisp_Object Qutf_8_mcs; -Lisp_Object Qdisable_composition; +Lisp_Object Qdisable_composition, Qenable_decomposition; +Lisp_Object Qccs_priority_list; Lisp_Object Quse_entity_reference; Lisp_Object Qd, Qx, QX; +Lisp_Object Vdecomposition_feature_list; #endif Lisp_Object Qencode, Qdecode; @@ -846,8 +852,10 @@ character set. Recognized properties are: `coding-system-property' will return nil.) 'disable-composition - If non-nil, composition/decomposition for combining characters - are disabled. + If non-nil, composition for combining characters is disabled. + +'enable-decomposition + If non-nil, decomposition for combining characters is enabled. 'use-entity-reference If non-nil, SGML style entity-reference is used for non-system-characters. @@ -1004,6 +1012,8 @@ if TYPE is 'ccl: #ifdef UTF2000 else if (EQ (key, Qdisable_composition)) CODING_SYSTEM_DISABLE_COMPOSITION (codesys) = !NILP (value); + else if (EQ (key, Qenable_decomposition)) + CODING_SYSTEM_ENABLE_DECOMPOSITION (codesys) = !NILP (value); else if (EQ (key, Quse_entity_reference)) CODING_SYSTEM_USE_ENTITY_REFERENCE (codesys) = !NILP (value); #endif @@ -1052,6 +1062,12 @@ if TYPE is 'ccl: parse_charset_conversion_specs (codesys->iso2022.output_conv, value); } +#ifdef UTF2000 + else if (EQ (key, Qccs_priority_list)) + { + codesys->ccs_priority_list = value; + } +#endif else signal_simple_error ("Unrecognized property", key); } @@ -1063,6 +1079,8 @@ if TYPE is 'ccl: CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, 1) = value; else if (EQ (key, Qcharset_g2)) CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, 2) = value; + else if (EQ (key, Qcharset_g3)) + CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, 3) = value; else signal_simple_error ("Unrecognized property", key); } @@ -1482,8 +1500,12 @@ Return the PROP property of CODING-SYSTEM. #ifdef UTF2000 else if (EQ (prop, Qdisable_composition)) return XCODING_SYSTEM_DISABLE_COMPOSITION (coding_system) ? Qt : Qnil; + else if (EQ (prop, Qenable_decomposition)) + return XCODING_SYSTEM_ENABLE_DECOMPOSITION (coding_system) ? Qt : Qnil; else if (EQ (prop, Quse_entity_reference)) return XCODING_SYSTEM_USE_ENTITY_REFERENCE (coding_system) ? Qt : Qnil; + else if (EQ (prop, Qccs_priority_list)) + return XCODING_SYSTEM_CCS_PRIORITY_LIST (coding_system); #endif else if (type == CODESYS_ISO2022) { @@ -2186,8 +2208,8 @@ do { \ } \ } while (0) -INLINE_HEADER void DECODE_ADD_UCS_CHAR(Emchar c, unsigned_char_dynarr* dst); -INLINE_HEADER void +static void DECODE_ADD_UCS_CHAR(Emchar c, unsigned_char_dynarr* dst); +static void DECODE_ADD_UCS_CHAR(Emchar c, unsigned_char_dynarr* dst) { if ( c <= 0x7f ) @@ -2311,12 +2333,17 @@ struct decoding_stream unsigned char counter; #endif #ifdef UTF2000 + char bom_flag; unsigned char er_counter; unsigned char er_buf[ER_BUF_SIZE]; unsigned combined_char_count; Emchar combined_chars[16]; +#ifdef USE_CONCORD_OBJECT_SYSTEM_TO_COMPOSE + COS_object combining_table; +#else Lisp_Object combining_table; +#endif /* USE_CONCORD_OBJECT_SYSTEM_TO_COMPOSE */ #endif struct detection_state decst; }; @@ -2455,9 +2482,14 @@ reset_decoding_stream (struct decoding_stream *str) str->counter = 0; #endif /* MULE */ #ifdef UTF2000 + str->bom_flag = 0; str->er_counter = 0; str->combined_char_count = 0; +#ifdef USE_CONCORD_OBJECT_SYSTEM_TO_COMPOSE + str->combining_table = COS_NIL; +#else str->combining_table = Qnil; +#endif /* USE_CONCORD_OBJECT_SYSTEM_TO_COMPOSE */ #endif if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_AUTODETECT || CODING_SYSTEM_EOL_TYPE (str->codesys) == EOL_AUTODETECT) @@ -3246,6 +3278,8 @@ decode_flush_er_chars (struct decoding_stream *str, unsigned_char_dynarr* dst) } } +EXFUN (Fregexp_quote, 1); + void decode_add_er_char (struct decoding_stream *str, Emchar character, unsigned_char_dynarr* dst); void @@ -3267,7 +3301,7 @@ decode_add_er_char (struct decoding_stream *str, Emchar c, { Lisp_Object string = make_string (str->er_buf, str->er_counter); - Lisp_Object rest = Vcoded_charset_entity_reference_alist; + Lisp_Object rest; Lisp_Object cell; Lisp_Object ret; Lisp_Object pat; @@ -3275,7 +3309,8 @@ decode_add_er_char (struct decoding_stream *str, Emchar c, Lisp_Object char_type; int base; - while (!NILP (rest)) + for ( rest = Vcoded_charset_entity_reference_alist; + !NILP (rest); rest = Fcdr (rest) ) { cell = Fcar (rest); ccs = Fcar (cell); @@ -3295,6 +3330,7 @@ decode_add_er_char (struct decoding_stream *str, Emchar c, pat = ret; else continue; + pat = Fregexp_quote (pat); cell = Fcdr (cell); cell = Fcdr (cell); @@ -3333,10 +3369,16 @@ decode_add_er_char (struct decoding_stream *str, Emchar c, ? DECODE_CHAR (ccs, code, 0) : decode_builtin_char (ccs, code); - DECODE_ADD_UCS_CHAR (chr, dst); + if ( chr >= 0 ) + DECODE_ADD_UCS_CHAR (chr, dst); + else + { + Dynarr_add_many (dst, str->er_buf, str->er_counter); + Dynarr_add (dst, ';'); + } + goto decoded; } - rest = Fcdr (rest); } if (!NILP (Fstring_match (build_string ("^&MCS-\\([0-9A-F]+\\)$"), string, Qnil, Qnil))) @@ -3392,11 +3434,22 @@ char_encode_as_entity_reference (Emchar ch, char* buf) char_type = Qnil; if (!NILP (ccs = Ffind_charset (ccs))) { - int code_point = charset_code_point (ccs, ch, 0); + int code_point + = charset_code_point (ccs, ch, + NILP (char_type) ? + CHAR_ALL : CHAR_ISOLATED_ONLY ); if ( (code_point >= 0) - && (NILP (char_type) - || DECODE_CHAR (ccs, code_point, 0) != ch) ) + && ( NILP (char_type) +#if 1 + || ( charset_code_point (ccs, ch, CHAR_DEFINED_ONLY) + == -1 ) +#endif +#if 0 + || ( DECODE_CHAR (ccs, code_point, 0) != ch ) +#endif + ) + ) { Lisp_Object ret; @@ -3455,7 +3508,7 @@ char_encode_as_entity_reference (Emchar ch, char* buf) /************************************************************************/ /* character composition */ /************************************************************************/ -extern Lisp_Object Qcomposition; +extern Lisp_Object Qcomposition, Qrep_decomposition; INLINE_HEADER void COMPOSE_FLUSH_CHARS (struct decoding_stream *str, unsigned_char_dynarr* dst); @@ -3467,9 +3520,27 @@ COMPOSE_FLUSH_CHARS (struct decoding_stream *str, unsigned_char_dynarr* dst) for (i = 0; i < str->combined_char_count; i++) decode_add_er_char (str, str->combined_chars[i], dst); str->combined_char_count = 0; +#ifdef USE_CONCORD_OBJECT_SYSTEM_TO_COMPOSE + str->combining_table = COS_NIL; +#else str->combining_table = Qnil; +#endif /* USE_CONCORD_OBJECT_SYSTEM_TO_COMPOSE */ } +extern CONCORD_DS concord_current_env; + +#if 0 +static int +concord_setup_env_maybe () +{ + if (concord_current_env == NULL) + { + concord_open_env ("/usr/local/share/chise/1.0/db/"); + } + return 0; +} +#endif + void COMPOSE_ADD_CHAR (struct decoding_stream *str, Emchar character, unsigned_char_dynarr* dst); void @@ -3478,6 +3549,72 @@ COMPOSE_ADD_CHAR (struct decoding_stream *str, { if (CODING_SYSTEM_DISABLE_COMPOSITION (str->codesys)) decode_add_er_char (str, character, dst); +#ifdef USE_CONCORD_OBJECT_SYSTEM_TO_COMPOSE + else if (!cos_cons_p (str->combining_table)) + { + COS_object ret; + + /* concord_setup_env_maybe (); */ + open_chise_data_source_maybe (); + ret = concord_object_get_attribute (cos_make_char (character), + COS_COMPOSITION); + + if (!cos_cons_p (ret)) + decode_add_er_char (str, character, dst); + else + { + str->combined_chars[0] = character; + str->combined_char_count = 1; + str->combining_table = ret; + } + } + else + { + COS_object ret + = cos_cdr (cos_assoc (cos_make_char (character), + str->combining_table)); + + if (cos_char_p (ret)) + { + Emchar char2 = cos_char_id (ret); + COS_object ret2; + + /* concord_setup_env_maybe (); */ + open_chise_data_source_maybe (); + ret2 = concord_object_get_attribute (ret, COS_COMPOSITION); + + if (!cos_cons_p (ret2)) + { + decode_add_er_char (str, char2, dst); + str->combined_char_count = 0; + str->combining_table = COS_NIL; + } + else + { + str->combined_chars[0] = char2; + str->combined_char_count = 1; + str->combining_table = ret2; + } + } + else + { + /* concord_setup_env_maybe (); */ + open_chise_data_source_maybe (); + ret = concord_object_get_attribute (cos_make_char (character), + COS_COMPOSITION); + + COMPOSE_FLUSH_CHARS (str, dst); + if (!cos_cons_p (ret)) + decode_add_er_char (str, character, dst); + else + { + str->combined_chars[0] = character; + str->combined_char_count = 1; + str->combining_table = ret; + } + } + } +#else else if (!CONSP (str->combining_table)) { Lisp_Object ret @@ -3533,6 +3670,7 @@ COMPOSE_ADD_CHAR (struct decoding_stream *str, } } } +#endif /* USE_CONCORD_OBJECT_SYSTEM_TO_COMPOSE */ } #else /* not UTF2000 */ #define COMPOSE_FLUSH_CHARS(str, dst) @@ -4485,6 +4623,7 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, eol_type_t eol_type = str->eol_type; unsigned char counter = str->counter; #ifdef UTF2000 + int bom_flag = str->bom_flag; Lisp_Object ccs = CODING_SYSTEM_ISO2022_INITIAL_CHARSET (DECODING_STREAM_DATA (decoding)->codesys, 0); @@ -4500,11 +4639,20 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, COMPOSE_FLUSH_CHARS (str, dst); decode_flush_er_chars (str, dst); DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst); + + if ( bom_flag == 0 ) + bom_flag = -1; + DECODE_ADD_UCS_CHAR (c, dst); } else if ( c < 0xC0 ) - /* decode_add_er_char (str, c, dst); */ - COMPOSE_ADD_CHAR (str, c, dst); + { + if ( bom_flag == 0 ) + bom_flag = -1; + + /* decode_add_er_char (str, c, dst); */ + COMPOSE_ADD_CHAR (str, c, dst); + } else { /* decode_flush_er_chars (str, dst); */ @@ -4542,6 +4690,17 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, { Emchar char_id; + if ( bom_flag == 0 ) + { + if ( cpos == 0xFEFF ) + { + bom_flag = 1; + goto decoded; + } + else + bom_flag = -1; + } + if (!NILP (ccs)) { char_id = decode_defined_char (ccs, cpos, 0); @@ -4552,6 +4711,7 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, else char_id = cpos; COMPOSE_ADD_CHAR (str, char_id, dst); + decoded: cpos = 0; counter = 0; } @@ -4584,6 +4744,9 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, str->flags = flags; str->cpos = cpos; str->counter = counter; +#ifdef UTF2000 + str->bom_flag = bom_flag; +#endif } void @@ -4611,10 +4774,47 @@ char_encode_utf8 (struct encoding_stream *str, Emchar ch, if ( (code_point < 0) || (code_point > 0xEFFFF) ) { - Lisp_Object map - = CODING_SYSTEM_ISO2022_INITIAL_CHARSET (str->codesys, 1); - Lisp_Object ret; + Lisp_Object map, ret; + if ( CODING_SYSTEM_ENABLE_DECOMPOSITION (str->codesys) ) + { + Lisp_Object rest = Vdecomposition_feature_list; + Lisp_Object decomp_f; + Lisp_Object seq = Qnil; + struct gcpro gcpro1; + + while ( CONSP (rest) ) + { + decomp_f = XCAR (rest); + GCPRO1 (rest); + seq = Fchar_feature (make_char (ch), decomp_f, Qnil, + Qnil, Qnil); + UNGCPRO; + if ( !NILP (seq) ) + break; + rest = XCDR (rest); + } + + if ( CONSP (seq) ) + { + Lisp_Object base = Fcar (seq); + + seq = Fcdr (seq); + if ( CHARP (base) && CONSP (seq) ) + { + Lisp_Object comb = Fcar (seq); + + if ( CHARP (comb) ) + { + char_encode_utf8 (str, XCHAR (base), dst, flags); + char_encode_utf8 (str, XCHAR (comb), dst, flags); + return; + } + } + } + } + + map = CODING_SYSTEM_ISO2022_INITIAL_CHARSET (str->codesys, 1); if ( !NILP (map) && INTP (ret = Fchar_feature (make_char (ch), map, Qnil, @@ -4627,6 +4827,13 @@ char_encode_utf8 (struct encoding_stream *str, Emchar ch, map, Qnil, Qnil, Qnil)) ) code_point = XINT (ret); + else if ( !NILP (map = + CODING_SYSTEM_ISO2022_INITIAL_CHARSET + (str->codesys, 3)) + && INTP (ret = Fchar_feature (make_char (ch), + map, Qnil, + Qnil, Qnil)) ) + code_point = XINT (ret); else if (CODING_SYSTEM_USE_ENTITY_REFERENCE (str->codesys)) { char buf[18]; @@ -5970,7 +6177,19 @@ char_encode_iso2022 (struct encoding_stream *str, Emchar ch, { Lisp_Object original_default_coded_charset_priority_list = Vdefault_coded_charset_priority_list; - + Vdefault_coded_charset_priority_list + = CODING_SYSTEM_CCS_PRIORITY_LIST (codesys); + while (!EQ (Vdefault_coded_charset_priority_list, Qnil)) + { + code_point = ENCODE_CHAR (ch, charset); + if (XCHARSET_FINAL (charset)) + goto found; + Vdefault_coded_charset_priority_list + = Fcdr (Fmemq (XCHARSET_NAME (charset), + Vdefault_coded_charset_priority_list)); + } + Vdefault_coded_charset_priority_list + = original_default_coded_charset_priority_list; while (!EQ (Vdefault_coded_charset_priority_list, Qnil)) { code_point = ENCODE_CHAR (ch, charset); @@ -6347,6 +6566,8 @@ syms_of_file_coding (void) #ifdef UTF2000 defsymbol (&Qutf_8_mcs, "utf-8-mcs"); defsymbol (&Qdisable_composition, "disable-composition"); + defsymbol (&Qenable_decomposition, "enable-decomposition"); + defsymbol (&Qccs_priority_list, "ccs-priority-list"); defsymbol (&Quse_entity_reference, "use-entity-reference"); defsymbol (&Qd, "d"); defsymbol (&Qx, "x"); @@ -6475,6 +6696,14 @@ and behaviors of various editing commands. Setting this to nil does not do anything. */ ); enable_multibyte_characters = 1; + +#ifdef UTF2000 + DEFVAR_LISP ("decomposition-feature-list", + &Vdecomposition_feature_list /* +List of `=decomposition@FOO' feature to encode characters as IVS. +*/ ); + Vdecomposition_feature_list = Qnil; +#endif } void @@ -6520,11 +6749,15 @@ complex_vars_of_file_coding (void) DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qescape_quoted); DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qinput_charset_conversion); DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qoutput_charset_conversion); +#ifdef UTF2000 + DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qccs_priority_list); +#endif DEFINE_CODESYS_PROP (CODESYS_PROP_CCL, Qencode); DEFINE_CODESYS_PROP (CODESYS_PROP_CCL, Qdecode); #ifdef UTF2000 DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK, Qdisable_composition); + DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK, Qenable_decomposition); DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK, Quse_entity_reference); #endif #endif /* MULE */