X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Ftext-coding.c;h=8015b7cae2d67c0521aa008207d02d096f7a1708;hb=99acb31ad5095183b2c9c7c45093edf5cd378890;hp=b28c2d73260341713cc872b6ae93ad802c0559d7;hpb=6f39425d9242fa7af570309aef32cae510bfad44;p=chise%2Fxemacs-chise.git.1 diff --git a/src/text-coding.c b/src/text-coding.c index b28c2d7..8015b7c 100644 --- a/src/text-coding.c +++ b/src/text-coding.c @@ -1,7 +1,8 @@ /* Code conversion functions. Copyright (C) 1991, 1995 Free Software Foundation, Inc. Copyright (C) 1995 Sun Microsystems, Inc. - Copyright (C) 1999,2000,2001,2002,2003,2004,2005,2008 MORIOKA Tomohiko + Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2011 + MORIOKA Tomohiko This file is part of XEmacs. @@ -110,6 +111,7 @@ Lisp_Object Qdisable_composition; Lisp_Object Qccs_priority_list; Lisp_Object Quse_entity_reference; Lisp_Object Qd, Qx, QX; +Lisp_Object Vdecomposition_feature_list; #endif Lisp_Object Qencode, Qdecode; @@ -2320,6 +2322,7 @@ struct decoding_stream unsigned char counter; #endif #ifdef UTF2000 + char bom_flag; unsigned char er_counter; unsigned char er_buf[ER_BUF_SIZE]; @@ -2464,6 +2467,7 @@ reset_decoding_stream (struct decoding_stream *str) str->counter = 0; #endif /* MULE */ #ifdef UTF2000 + str->bom_flag = 0; str->er_counter = 0; str->combined_char_count = 0; str->combining_table = Qnil; @@ -3411,11 +3415,22 @@ char_encode_as_entity_reference (Emchar ch, char* buf) char_type = Qnil; if (!NILP (ccs = Ffind_charset (ccs))) { - int code_point = charset_code_point (ccs, ch, 0); + int code_point + = charset_code_point (ccs, ch, + NILP (char_type) ? + CHAR_ALL : CHAR_ISOLATED_ONLY ); if ( (code_point >= 0) - && (NILP (char_type) - || DECODE_CHAR (ccs, code_point, 0) != ch) ) + && ( NILP (char_type) +#if 1 + || ( charset_code_point (ccs, ch, CHAR_DEFINED_ONLY) + == -1 ) +#endif +#if 0 + || ( DECODE_CHAR (ccs, code_point, 0) != ch ) +#endif + ) + ) { Lisp_Object ret; @@ -3474,7 +3489,7 @@ char_encode_as_entity_reference (Emchar ch, char* buf) /************************************************************************/ /* character composition */ /************************************************************************/ -extern Lisp_Object Qcomposition; +extern Lisp_Object Qcomposition, Qrep_decomposition; INLINE_HEADER void COMPOSE_FLUSH_CHARS (struct decoding_stream *str, unsigned_char_dynarr* dst); @@ -4504,6 +4519,7 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, eol_type_t eol_type = str->eol_type; unsigned char counter = str->counter; #ifdef UTF2000 + int bom_flag = str->bom_flag; Lisp_Object ccs = CODING_SYSTEM_ISO2022_INITIAL_CHARSET (DECODING_STREAM_DATA (decoding)->codesys, 0); @@ -4519,11 +4535,20 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, COMPOSE_FLUSH_CHARS (str, dst); decode_flush_er_chars (str, dst); DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst); + + if ( bom_flag == 0 ) + bom_flag = -1; + DECODE_ADD_UCS_CHAR (c, dst); } else if ( c < 0xC0 ) - /* decode_add_er_char (str, c, dst); */ - COMPOSE_ADD_CHAR (str, c, dst); + { + if ( bom_flag == 0 ) + bom_flag = -1; + + /* decode_add_er_char (str, c, dst); */ + COMPOSE_ADD_CHAR (str, c, dst); + } else { /* decode_flush_er_chars (str, dst); */ @@ -4561,6 +4586,17 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, { Emchar char_id; + if ( bom_flag == 0 ) + { + if ( cpos == 0xFEFF ) + { + bom_flag = 1; + goto decoded; + } + else + bom_flag = -1; + } + if (!NILP (ccs)) { char_id = decode_defined_char (ccs, cpos, 0); @@ -4571,6 +4607,7 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, else char_id = cpos; COMPOSE_ADD_CHAR (str, char_id, dst); + decoded: cpos = 0; counter = 0; } @@ -4603,6 +4640,9 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, str->flags = flags; str->cpos = cpos; str->counter = counter; +#ifdef UTF2000 + str->bom_flag = bom_flag; +#endif } void @@ -4630,10 +4670,43 @@ char_encode_utf8 (struct encoding_stream *str, Emchar ch, if ( (code_point < 0) || (code_point > 0xEFFFF) ) { - Lisp_Object map - = CODING_SYSTEM_ISO2022_INITIAL_CHARSET (str->codesys, 1); - Lisp_Object ret; + Lisp_Object rest = Vdecomposition_feature_list; + Lisp_Object decomp_f; + Lisp_Object seq = Qnil; + Lisp_Object map, ret; + struct gcpro gcpro1; + while ( CONSP (rest) ) + { + decomp_f = XCAR (rest); + GCPRO1 (rest); + seq = Fchar_feature (make_char (ch), decomp_f, Qnil, + Qnil, Qnil); + UNGCPRO; + if ( !NILP (seq) ) + break; + rest = XCDR (rest); + } + + if ( CONSP (seq) ) + { + Lisp_Object base = Fcar (seq); + + seq = Fcdr (seq); + if ( CHARP (base) && CONSP (seq) ) + { + Lisp_Object comb = Fcar (seq); + + if ( CHARP (comb) ) + { + char_encode_utf8 (str, XCHAR (base), dst, flags); + char_encode_utf8 (str, XCHAR (comb), dst, flags); + return; + } + } + } + + map = CODING_SYSTEM_ISO2022_INITIAL_CHARSET (str->codesys, 1); if ( !NILP (map) && INTP (ret = Fchar_feature (make_char (ch), map, Qnil, @@ -6507,6 +6580,14 @@ and behaviors of various editing commands. Setting this to nil does not do anything. */ ); enable_multibyte_characters = 1; + +#ifdef UTF2000 + DEFVAR_LISP ("decomposition-feature-list", + &Vdecomposition_feature_list /* +List of `=decomposition@FOO' feature to encode characters as IVS. +*/ ); + Vdecomposition_feature_list = Qnil; +#endif } void