X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Ftext-coding.c;h=79c9dcb158a01a9e40bcec3b7bf6ca8debc9b54b;hb=27d956d0eb66388b5ba8c31e1764c625fdd36b7b;hp=7be2d3eb7ea1d25ad15567fea542977d8d2a6c19;hpb=4ba75b78e2d9780611ceea1aa95f0ac18fc6a29d;p=chise%2Fxemacs-chise.git.1 diff --git a/src/text-coding.c b/src/text-coding.c index 7be2d3e..79c9dcb 100644 --- a/src/text-coding.c +++ b/src/text-coding.c @@ -1,7 +1,7 @@ /* Code conversion functions. Copyright (C) 1991, 1995 Free Software Foundation, Inc. Copyright (C) 1995 Sun Microsystems, Inc. - Copyright (C) 1999,2000,2001,2002,2003,2004 MORIOKA Tomohiko + Copyright (C) 1999,2000,2001,2002,2003,2004,2005,2008 MORIOKA Tomohiko This file is part of XEmacs. @@ -107,6 +107,7 @@ Lisp_Object Qshort, Qno_ascii_eol, Qno_ascii_cntl, Qseven, Qlock_shift; #ifdef UTF2000 Lisp_Object Qutf_8_mcs; Lisp_Object Qdisable_composition; +Lisp_Object Qccs_priority_list; Lisp_Object Quse_entity_reference; Lisp_Object Qd, Qx, QX; #endif @@ -1052,6 +1053,12 @@ if TYPE is 'ccl: parse_charset_conversion_specs (codesys->iso2022.output_conv, value); } +#ifdef UTF2000 + else if (EQ (key, Qccs_priority_list)) + { + codesys->ccs_priority_list = value; + } +#endif else signal_simple_error ("Unrecognized property", key); } @@ -1484,6 +1491,8 @@ Return the PROP property of CODING-SYSTEM. return XCODING_SYSTEM_DISABLE_COMPOSITION (coding_system) ? Qt : Qnil; else if (EQ (prop, Quse_entity_reference)) return XCODING_SYSTEM_USE_ENTITY_REFERENCE (coding_system) ? Qt : Qnil; + else if (EQ (prop, Qccs_priority_list)) + return XCODING_SYSTEM_CCS_PRIORITY_LIST (coding_system); #endif else if (type == CODESYS_ISO2022) { @@ -2311,6 +2320,7 @@ struct decoding_stream unsigned char counter; #endif #ifdef UTF2000 + char bom_flag; unsigned char er_counter; unsigned char er_buf[ER_BUF_SIZE]; @@ -2455,6 +2465,7 @@ reset_decoding_stream (struct decoding_stream *str) str->counter = 0; #endif /* MULE */ #ifdef UTF2000 + str->bom_flag = 0; str->er_counter = 0; str->combined_char_count = 0; str->combining_table = Qnil; @@ -3246,6 +3257,8 @@ decode_flush_er_chars (struct decoding_stream *str, unsigned_char_dynarr* dst) } } +EXFUN (Fregexp_quote, 1); + void decode_add_er_char (struct decoding_stream *str, Emchar character, unsigned_char_dynarr* dst); void @@ -3267,7 +3280,7 @@ decode_add_er_char (struct decoding_stream *str, Emchar c, { Lisp_Object string = make_string (str->er_buf, str->er_counter); - Lisp_Object rest = Vcoded_charset_entity_reference_alist; + Lisp_Object rest; Lisp_Object cell; Lisp_Object ret; Lisp_Object pat; @@ -3275,7 +3288,8 @@ decode_add_er_char (struct decoding_stream *str, Emchar c, Lisp_Object char_type; int base; - while (!NILP (rest)) + for ( rest = Vcoded_charset_entity_reference_alist; + !NILP (rest); rest = Fcdr (rest) ) { cell = Fcar (rest); ccs = Fcar (cell); @@ -3295,6 +3309,7 @@ decode_add_er_char (struct decoding_stream *str, Emchar c, pat = ret; else continue; + pat = Fregexp_quote (pat); cell = Fcdr (cell); cell = Fcdr (cell); @@ -3333,10 +3348,16 @@ decode_add_er_char (struct decoding_stream *str, Emchar c, ? DECODE_CHAR (ccs, code, 0) : decode_builtin_char (ccs, code); - DECODE_ADD_UCS_CHAR (chr, dst); + if ( chr >= 0 ) + DECODE_ADD_UCS_CHAR (chr, dst); + else + { + Dynarr_add_many (dst, str->er_buf, str->er_counter); + Dynarr_add (dst, ';'); + } + goto decoded; } - rest = Fcdr (rest); } if (!NILP (Fstring_match (build_string ("^&MCS-\\([0-9A-F]+\\)$"), string, Qnil, Qnil))) @@ -4485,6 +4506,7 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, eol_type_t eol_type = str->eol_type; unsigned char counter = str->counter; #ifdef UTF2000 + int bom_flag = str->bom_flag; Lisp_Object ccs = CODING_SYSTEM_ISO2022_INITIAL_CHARSET (DECODING_STREAM_DATA (decoding)->codesys, 0); @@ -4500,11 +4522,20 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, COMPOSE_FLUSH_CHARS (str, dst); decode_flush_er_chars (str, dst); DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst); + + if ( bom_flag == 0 ) + bom_flag = -1; + DECODE_ADD_UCS_CHAR (c, dst); } else if ( c < 0xC0 ) - /* decode_add_er_char (str, c, dst); */ - COMPOSE_ADD_CHAR (str, c, dst); + { + if ( bom_flag == 0 ) + bom_flag = -1; + + /* decode_add_er_char (str, c, dst); */ + COMPOSE_ADD_CHAR (str, c, dst); + } else { /* decode_flush_er_chars (str, dst); */ @@ -4542,6 +4573,17 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, { Emchar char_id; + if ( bom_flag == 0 ) + { + if ( cpos == 0xFEFF ) + { + bom_flag = 1; + goto decoded; + } + else + bom_flag = -1; + } + if (!NILP (ccs)) { char_id = decode_defined_char (ccs, cpos, 0); @@ -4552,6 +4594,7 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, else char_id = cpos; COMPOSE_ADD_CHAR (str, char_id, dst); + decoded: cpos = 0; counter = 0; } @@ -4584,6 +4627,9 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, str->flags = flags; str->cpos = cpos; str->counter = counter; +#ifdef UTF2000 + str->bom_flag = bom_flag; +#endif } void @@ -4609,7 +4655,7 @@ char_encode_utf8 (struct encoding_stream *str, Emchar ch, = CODING_SYSTEM_ISO2022_INITIAL_CHARSET (str->codesys, 0); int code_point = charset_code_point (ucs_ccs, ch, 0); - if ( (code_point < 0) || (code_point > 0x10FFFF) ) + if ( (code_point < 0) || (code_point > 0xEFFFF) ) { Lisp_Object map = CODING_SYSTEM_ISO2022_INITIAL_CHARSET (str->codesys, 1); @@ -5970,7 +6016,19 @@ char_encode_iso2022 (struct encoding_stream *str, Emchar ch, { Lisp_Object original_default_coded_charset_priority_list = Vdefault_coded_charset_priority_list; - + Vdefault_coded_charset_priority_list + = CODING_SYSTEM_CCS_PRIORITY_LIST (codesys); + while (!EQ (Vdefault_coded_charset_priority_list, Qnil)) + { + code_point = ENCODE_CHAR (ch, charset); + if (XCHARSET_FINAL (charset)) + goto found; + Vdefault_coded_charset_priority_list + = Fcdr (Fmemq (XCHARSET_NAME (charset), + Vdefault_coded_charset_priority_list)); + } + Vdefault_coded_charset_priority_list + = original_default_coded_charset_priority_list; while (!EQ (Vdefault_coded_charset_priority_list, Qnil)) { code_point = ENCODE_CHAR (ch, charset); @@ -6347,6 +6405,7 @@ syms_of_file_coding (void) #ifdef UTF2000 defsymbol (&Qutf_8_mcs, "utf-8-mcs"); defsymbol (&Qdisable_composition, "disable-composition"); + defsymbol (&Qccs_priority_list, "ccs-priority-list"); defsymbol (&Quse_entity_reference, "use-entity-reference"); defsymbol (&Qd, "d"); defsymbol (&Qx, "x"); @@ -6520,6 +6579,9 @@ complex_vars_of_file_coding (void) DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qescape_quoted); DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qinput_charset_conversion); DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qoutput_charset_conversion); +#ifdef UTF2000 + DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qccs_priority_list); +#endif DEFINE_CODESYS_PROP (CODESYS_PROP_CCL, Qencode); DEFINE_CODESYS_PROP (CODESYS_PROP_CCL, Qdecode);