X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Ftext-coding.c;h=e9749b131195b742fd930598d163f8345ed79ccd;hb=a7018caf28cfca1998adae78b74f2d513da83838;hp=b0f987625e1e680b17f60a4349b684fcbc2dbcdf;hpb=7c800d4821261afacd6326a55941484246ad6de6;p=chise%2Fxemacs-chise.git diff --git a/src/text-coding.c b/src/text-coding.c index b0f9876..e9749b1 100644 --- a/src/text-coding.c +++ b/src/text-coding.c @@ -1,7 +1,7 @@ /* Code conversion functions. Copyright (C) 1991, 1995 Free Software Foundation, Inc. Copyright (C) 1995 Sun Microsystems, Inc. - Copyright (C) 1999,2000 MORIOKA Tomohiko + Copyright (C) 1999,2000,2001 MORIOKA Tomohiko This file is part of XEmacs. @@ -47,17 +47,17 @@ Lisp_Object Vcoding_system_for_write; Lisp_Object Vfile_name_coding_system; /* Table of symbols identifying each coding category. */ -Lisp_Object coding_category_symbol[CODING_CATEGORY_LAST + 1]; +Lisp_Object coding_category_symbol[CODING_CATEGORY_LAST]; struct file_coding_dump { /* Coding system currently associated with each coding category. */ - Lisp_Object coding_category_system[CODING_CATEGORY_LAST + 1]; + Lisp_Object coding_category_system[CODING_CATEGORY_LAST]; /* Table of all coding categories in decreasing order of priority. This describes a permutation of the possible coding categories. */ - int coding_category_by_priority[CODING_CATEGORY_LAST + 1]; + int coding_category_by_priority[CODING_CATEGORY_LAST]; #if defined(MULE) && !defined(UTF2000) Lisp_Object ucs_to_mule_table[65536]; @@ -65,7 +65,7 @@ struct file_coding_dump { } *fcd; static const struct lrecord_description fcd_description_1[] = { - { XD_LISP_OBJECT_ARRAY, offsetof (struct file_coding_dump, coding_category_system), CODING_CATEGORY_LAST + 1 }, + { XD_LISP_OBJECT_ARRAY, offsetof (struct file_coding_dump, coding_category_system), CODING_CATEGORY_LAST }, #if defined(MULE) && !defined(UTF2000) { XD_LISP_OBJECT_ARRAY, offsetof (struct file_coding_dump, ucs_to_mule_table), countof (fcd->ucs_to_mule_table) }, #endif @@ -197,8 +197,10 @@ static int detect_coding_big5 (struct detection_state *st, const Extbyte *src, size_t n); static void decode_coding_big5 (Lstream *decoding, const Extbyte *src, unsigned_char_dynarr *dst, size_t n); -static void encode_coding_big5 (Lstream *encoding, const Bufbyte *src, - unsigned_char_dynarr *dst, size_t n); +void char_encode_big5 (struct encoding_stream *str, Emchar c, + unsigned_char_dynarr *dst, unsigned int *flags); +void char_finish_big5 (struct encoding_stream *str, + unsigned_char_dynarr *dst, unsigned int *flags); static int detect_coding_ucs4 (struct detection_state *st, const Extbyte *src, size_t n); @@ -887,7 +889,6 @@ if TYPE is 'ccl: (name, type, doc_string, props)) { Lisp_Coding_System *codesys; - Lisp_Object rest, key, value; enum coding_system_type ty; int need_to_setup_eol_systems = 1; @@ -919,122 +920,124 @@ if TYPE is 'ccl: CHECK_STRING (doc_string); CODING_SYSTEM_DOC_STRING (codesys) = doc_string; - EXTERNAL_PROPERTY_LIST_LOOP (rest, key, value, props) - { - if (EQ (key, Qmnemonic)) - { - if (!NILP (value)) - CHECK_STRING (value); - CODING_SYSTEM_MNEMONIC (codesys) = value; - } + { + EXTERNAL_PROPERTY_LIST_LOOP_3 (key, value, props) + { + if (EQ (key, Qmnemonic)) + { + if (!NILP (value)) + CHECK_STRING (value); + CODING_SYSTEM_MNEMONIC (codesys) = value; + } - else if (EQ (key, Qeol_type)) - { - need_to_setup_eol_systems = NILP (value); - if (EQ (value, Qt)) - value = Qnil; - CODING_SYSTEM_EOL_TYPE (codesys) = symbol_to_eol_type (value); - } + else if (EQ (key, Qeol_type)) + { + need_to_setup_eol_systems = NILP (value); + if (EQ (value, Qt)) + value = Qnil; + CODING_SYSTEM_EOL_TYPE (codesys) = symbol_to_eol_type (value); + } - else if (EQ (key, Qpost_read_conversion)) - CODING_SYSTEM_POST_READ_CONVERSION (codesys) = value; - else if (EQ (key, Qpre_write_conversion)) - CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys) = value; + else if (EQ (key, Qpost_read_conversion)) + CODING_SYSTEM_POST_READ_CONVERSION (codesys) = value; + else if (EQ (key, Qpre_write_conversion)) + CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys) = value; #ifdef UTF2000 - else if (EQ (key, Qdisable_composition)) - CODING_SYSTEM_DISABLE_COMPOSITION (codesys) = !NILP (value); + else if (EQ (key, Qdisable_composition)) + CODING_SYSTEM_DISABLE_COMPOSITION (codesys) = !NILP (value); #endif #ifdef MULE - else if (ty == CODESYS_ISO2022) - { + else if (ty == CODESYS_ISO2022) + { #define FROB_INITIAL_CHARSET(charset_num) \ CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, charset_num) = \ ((EQ (value, Qt) || EQ (value, Qnil)) ? value : Fget_charset (value)) - if (EQ (key, Qcharset_g0)) FROB_INITIAL_CHARSET (0); - else if (EQ (key, Qcharset_g1)) FROB_INITIAL_CHARSET (1); - else if (EQ (key, Qcharset_g2)) FROB_INITIAL_CHARSET (2); - else if (EQ (key, Qcharset_g3)) FROB_INITIAL_CHARSET (3); + if (EQ (key, Qcharset_g0)) FROB_INITIAL_CHARSET (0); + else if (EQ (key, Qcharset_g1)) FROB_INITIAL_CHARSET (1); + else if (EQ (key, Qcharset_g2)) FROB_INITIAL_CHARSET (2); + else if (EQ (key, Qcharset_g3)) FROB_INITIAL_CHARSET (3); #define FROB_FORCE_CHARSET(charset_num) \ CODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (codesys, charset_num) = !NILP (value) - else if (EQ (key, Qforce_g0_on_output)) FROB_FORCE_CHARSET (0); - else if (EQ (key, Qforce_g1_on_output)) FROB_FORCE_CHARSET (1); - else if (EQ (key, Qforce_g2_on_output)) FROB_FORCE_CHARSET (2); - else if (EQ (key, Qforce_g3_on_output)) FROB_FORCE_CHARSET (3); + else if (EQ (key, Qforce_g0_on_output)) FROB_FORCE_CHARSET (0); + else if (EQ (key, Qforce_g1_on_output)) FROB_FORCE_CHARSET (1); + else if (EQ (key, Qforce_g2_on_output)) FROB_FORCE_CHARSET (2); + else if (EQ (key, Qforce_g3_on_output)) FROB_FORCE_CHARSET (3); #define FROB_BOOLEAN_PROPERTY(prop) \ CODING_SYSTEM_ISO2022_##prop (codesys) = !NILP (value) - else if (EQ (key, Qshort)) FROB_BOOLEAN_PROPERTY (SHORT); - else if (EQ (key, Qno_ascii_eol)) FROB_BOOLEAN_PROPERTY (NO_ASCII_EOL); - else if (EQ (key, Qno_ascii_cntl)) FROB_BOOLEAN_PROPERTY (NO_ASCII_CNTL); - else if (EQ (key, Qseven)) FROB_BOOLEAN_PROPERTY (SEVEN); - else if (EQ (key, Qlock_shift)) FROB_BOOLEAN_PROPERTY (LOCK_SHIFT); - else if (EQ (key, Qno_iso6429)) FROB_BOOLEAN_PROPERTY (NO_ISO6429); - else if (EQ (key, Qescape_quoted)) FROB_BOOLEAN_PROPERTY (ESCAPE_QUOTED); + else if (EQ (key, Qshort)) FROB_BOOLEAN_PROPERTY (SHORT); + else if (EQ (key, Qno_ascii_eol)) FROB_BOOLEAN_PROPERTY (NO_ASCII_EOL); + else if (EQ (key, Qno_ascii_cntl)) FROB_BOOLEAN_PROPERTY (NO_ASCII_CNTL); + else if (EQ (key, Qseven)) FROB_BOOLEAN_PROPERTY (SEVEN); + else if (EQ (key, Qlock_shift)) FROB_BOOLEAN_PROPERTY (LOCK_SHIFT); + else if (EQ (key, Qno_iso6429)) FROB_BOOLEAN_PROPERTY (NO_ISO6429); + else if (EQ (key, Qescape_quoted)) FROB_BOOLEAN_PROPERTY (ESCAPE_QUOTED); - else if (EQ (key, Qinput_charset_conversion)) - { - codesys->iso2022.input_conv = - Dynarr_new (charset_conversion_spec); - parse_charset_conversion_specs (codesys->iso2022.input_conv, - value); - } - else if (EQ (key, Qoutput_charset_conversion)) - { - codesys->iso2022.output_conv = - Dynarr_new (charset_conversion_spec); - parse_charset_conversion_specs (codesys->iso2022.output_conv, - value); - } - else - signal_simple_error ("Unrecognized property", key); - } - else if (EQ (type, Qccl)) - { - Lisp_Object sym; - struct ccl_program test_ccl; - Extbyte *suffix; - - /* Check key first. */ - if (EQ (key, Qdecode)) - suffix = "-ccl-decode"; - else if (EQ (key, Qencode)) - suffix = "-ccl-encode"; - else - signal_simple_error ("Unrecognized property", key); + else if (EQ (key, Qinput_charset_conversion)) + { + codesys->iso2022.input_conv = + Dynarr_new (charset_conversion_spec); + parse_charset_conversion_specs (codesys->iso2022.input_conv, + value); + } + else if (EQ (key, Qoutput_charset_conversion)) + { + codesys->iso2022.output_conv = + Dynarr_new (charset_conversion_spec); + parse_charset_conversion_specs (codesys->iso2022.output_conv, + value); + } + else + signal_simple_error ("Unrecognized property", key); + } + else if (EQ (type, Qccl)) + { + Lisp_Object sym; + struct ccl_program test_ccl; + Extbyte *suffix; + + /* Check key first. */ + if (EQ (key, Qdecode)) + suffix = "-ccl-decode"; + else if (EQ (key, Qencode)) + suffix = "-ccl-encode"; + else + signal_simple_error ("Unrecognized property", key); - /* If value is vector, register it as a ccl program - associated with an newly created symbol for - backward compatibility. */ - if (VECTORP (value)) - { - sym = Fintern (concat2 (Fsymbol_name (name), - build_string (suffix)), - Qnil); - Fregister_ccl_program (sym, value); - } - else - { - CHECK_SYMBOL (value); - sym = value; - } - /* check if the given ccl programs are valid. */ - if (setup_ccl_program (&test_ccl, sym) < 0) - signal_simple_error ("Invalid CCL program", value); + /* If value is vector, register it as a ccl program + associated with an newly created symbol for + backward compatibility. */ + if (VECTORP (value)) + { + sym = Fintern (concat2 (Fsymbol_name (name), + build_string (suffix)), + Qnil); + Fregister_ccl_program (sym, value); + } + else + { + CHECK_SYMBOL (value); + sym = value; + } + /* check if the given ccl programs are valid. */ + if (setup_ccl_program (&test_ccl, sym) < 0) + signal_simple_error ("Invalid CCL program", value); - if (EQ (key, Qdecode)) - CODING_SYSTEM_CCL_DECODE (codesys) = sym; - else if (EQ (key, Qencode)) - CODING_SYSTEM_CCL_ENCODE (codesys) = sym; + if (EQ (key, Qdecode)) + CODING_SYSTEM_CCL_DECODE (codesys) = sym; + else if (EQ (key, Qencode)) + CODING_SYSTEM_CCL_ENCODE (codesys) = sym; - } + } #endif /* MULE */ - else - signal_simple_error ("Unrecognized property", key); - } + else + signal_simple_error ("Unrecognized property", key); + } + } if (need_to_setup_eol_systems) setup_eol_coding_systems (codesys); @@ -1464,7 +1467,7 @@ decode_coding_category (Lisp_Object symbol) int i; CHECK_SYMBOL (symbol); - for (i = 0; i <= CODING_CATEGORY_LAST; i++) + for (i = 0; i < CODING_CATEGORY_LAST; i++) if (EQ (coding_category_symbol[i], symbol)) return i; @@ -1480,7 +1483,7 @@ Return a list of all recognized coding categories. int i; Lisp_Object list = Qnil; - for (i = CODING_CATEGORY_LAST; i >= 0; i--) + for (i = CODING_CATEGORY_LAST - 1; i >= 0; i--) list = Fcons (coding_category_symbol[i], list); return list; } @@ -1494,13 +1497,13 @@ previously. */ (list)) { - int category_to_priority[CODING_CATEGORY_LAST + 1]; + int category_to_priority[CODING_CATEGORY_LAST]; int i, j; Lisp_Object rest; /* First generate a list that maps coding categories to priorities. */ - for (i = 0; i <= CODING_CATEGORY_LAST; i++) + for (i = 0; i < CODING_CATEGORY_LAST; i++) category_to_priority[i] = -1; /* Highest priority comes from the specified list. */ @@ -1517,7 +1520,7 @@ previously. /* Now go through the existing categories by priority to retrieve the categories not yet specified and preserve their priority order. */ - for (j = 0; j <= CODING_CATEGORY_LAST; j++) + for (j = 0; j < CODING_CATEGORY_LAST; j++) { int cat = fcd->coding_category_by_priority[j]; if (category_to_priority[cat] < 0) @@ -1527,7 +1530,7 @@ previously. /* Now we need to construct the inverse of the mapping we just constructed. */ - for (i = 0; i <= CODING_CATEGORY_LAST; i++) + for (i = 0; i < CODING_CATEGORY_LAST; i++) fcd->coding_category_by_priority[category_to_priority[i]] = i; /* Phew! That was confusing. */ @@ -1542,7 +1545,7 @@ Return a list of coding categories in descending order of priority. int i; Lisp_Object list = Qnil; - for (i = CODING_CATEGORY_LAST; i >= 0; i--) + for (i = CODING_CATEGORY_LAST - 1; i >= 0; i--) list = Fcons (coding_category_symbol[fcd->coding_category_by_priority[i]], list); return list; @@ -1792,7 +1795,7 @@ coding_system_from_mask (int mask) #endif /* Look through the coding categories by priority and find the first one that is allowed. */ - for (i = 0; i <= CODING_CATEGORY_LAST; i++) + for (i = 0; i < CODING_CATEGORY_LAST; i++) { cat = fcd->coding_category_by_priority[i]; if ((mask & (1 << cat)) && @@ -1990,7 +1993,7 @@ type. Optional arg BUFFER defaults to the current buffer. #ifdef MULE decst.mask = postprocess_iso2022_mask (decst.mask); #endif - for (i = CODING_CATEGORY_LAST; i >= 0; i--) + for (i = CODING_CATEGORY_LAST - 1; i >= 0; i--) { int sys = fcd->coding_category_by_priority[i]; if (decst.mask & (1 << sys)) @@ -2226,7 +2229,8 @@ COMPOSE_ADD_CHAR(struct decoding_stream *str, else if (!CHAR_ID_TABLE_P (str->combining_table)) { Lisp_Object ret - = get_char_id_table (character, Vcharacter_composition_table); + = get_char_id_table (XCHAR_ID_TABLE(Vcharacter_composition_table), + character); if (NILP (ret)) DECODE_ADD_UCS_CHAR (character, dst); @@ -2240,12 +2244,15 @@ COMPOSE_ADD_CHAR(struct decoding_stream *str, else { Lisp_Object ret - = get_char_id_table (character, str->combining_table); + = get_char_id_table (XCHAR_ID_TABLE(str->combining_table), + character); if (CHARP (ret)) { Emchar char2 = XCHARVAL (ret); - ret = get_char_id_table (char2, Vcharacter_composition_table); + ret = + get_char_id_table (XCHAR_ID_TABLE(Vcharacter_composition_table), + char2); if (NILP (ret)) { DECODE_ADD_UCS_CHAR (char2, dst); @@ -2900,6 +2907,10 @@ reset_encoding_stream (struct encoding_stream *str) str->encode_char = &char_encode_shift_jis; str->finish = &char_finish_shift_jis; break; + case CODESYS_BIG5: + str->encode_char = &char_encode_big5; + str->finish = &char_finish_big5; + break; default: break; } @@ -3015,9 +3026,6 @@ mule_encode (Lstream *encoding, const Bufbyte *src, encode_coding_no_conversion (encoding, src, dst, n); break; #ifdef MULE - case CODESYS_BIG5: - encode_coding_big5 (encoding, src, dst, n); - break; case CODESYS_CCL: str->ccl.last_block = str->flags & CODING_STATE_END; /* When applying ccl program to stream, MUST NOT set NULL @@ -3445,8 +3453,13 @@ Return the corresponding character code in SHIFT-JIS as a cons of two bytes. contains frequently used characters and the latter contains less frequently used characters. */ +#ifdef UTF2000 +#define BYTE_BIG5_TWO_BYTE_1_P(c) \ + ((c) >= 0x81 && (c) <= 0xFE) +#else #define BYTE_BIG5_TWO_BYTE_1_P(c) \ ((c) >= 0xA1 && (c) <= 0xFE) +#endif /* Is this the second byte of a Shift-JIS two-byte char? */ @@ -3532,8 +3545,11 @@ detect_coding_big5 (struct detection_state *st, const Extbyte *src, size_t n) while (n--) { unsigned char c = *(unsigned char *)src++; - if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO || - (c >= 0x80 && c <= 0xA0)) + if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO +#ifndef UTF2000 + || (c >= 0x80 && c <= 0xA0) +#endif + ) return 0; if (st->big5.in_second_byte) { @@ -3541,7 +3557,13 @@ detect_coding_big5 (struct detection_state *st, const Extbyte *src, size_t n) if (c < 0x40 || (c >= 0x80 && c <= 0xA0)) return 0; } - else if (c >= 0xA1) + else if ( +#ifdef UTF2000 + c >= 0x81 +#else + c >= 0xA1 +#endif + ) st->big5.in_second_byte = 1; } return CODING_CATEGORY_BIG5_MASK; @@ -3604,66 +3626,71 @@ decode_coding_big5 (Lstream *decoding, const Extbyte *src, /* Convert internally-formatted data to Big5. */ -static void -encode_coding_big5 (Lstream *encoding, const Bufbyte *src, - unsigned_char_dynarr *dst, size_t n) +void +char_encode_big5 (struct encoding_stream *str, Emchar ch, + unsigned_char_dynarr *dst, unsigned int *flags) { -#ifndef UTF2000 - unsigned char c; - struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); - unsigned int flags = str->flags; - unsigned int ch = str->ch; eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys); - while (n--) + if (ch == '\n') { - c = *src++; - if (c == '\n') - { - if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT) - Dynarr_add (dst, '\r'); - if (eol_type != EOL_CR) - Dynarr_add (dst, '\n'); - } - else if (BYTE_ASCII_P (c)) - { - /* ASCII. */ - Dynarr_add (dst, c); - } - else if (BUFBYTE_LEADING_BYTE_P (c)) + if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT) + Dynarr_add (dst, '\r'); + if (eol_type != EOL_CR) + Dynarr_add (dst, ch); + } + else + { +#ifdef UTF2000 + int code_point; + + if ((code_point = charset_code_point (Vcharset_ascii, ch)) >= 0) + Dynarr_add (dst, code_point); + else if ((code_point + = charset_code_point (Vcharset_chinese_big5, ch)) >= 0) { - if (c == LEADING_BYTE_CHINESE_BIG5_1 || - c == LEADING_BYTE_CHINESE_BIG5_2) - { - /* A recognized leading byte. */ - ch = c; - continue; /* not done with this character. */ - } - /* otherwise just ignore this character. */ + Dynarr_add (dst, code_point >> 8); + Dynarr_add (dst, code_point & 0xFF); } - else if (ch == LEADING_BYTE_CHINESE_BIG5_1 || - ch == LEADING_BYTE_CHINESE_BIG5_2) + else if ((code_point + = charset_code_point (Vcharset_chinese_big5_1, ch)) >= 0) { - /* Previous char was a recognized leading byte. */ - ch = (ch << 8) | c; - continue; /* not done with this character. */ + unsigned int I + = ((code_point >> 8) - 33) * (0xFF - 0xA1) + + ((code_point & 0xFF) - 33); + unsigned char b1 = I / BIG5_SAME_ROW + 0xA1; + unsigned char b2 = I % BIG5_SAME_ROW; + + b2 += b2 < 0x3F ? 0x40 : 0x62; + Dynarr_add (dst, b1); + Dynarr_add (dst, b2); } - else if (ch) + else if ((code_point + = charset_code_point (Vcharset_chinese_big5_2, ch)) >= 0) { - /* Encountering second byte of a Big5 character. */ + unsigned int I + = ((code_point >> 8) - 33) * (0xFF - 0xA1) + + ((code_point & 0xFF) - 33); unsigned char b1, b2; - ENCODE_BIG5 (ch >> 8, ch & 0xFF, c, b1, b2); + I += BIG5_SAME_ROW * (0xC9 - 0xA1); + b1 = I / BIG5_SAME_ROW + 0xA1; + b2 = I % BIG5_SAME_ROW; + b2 += b2 < 0x3F ? 0x40 : 0x62; Dynarr_add (dst, b1); Dynarr_add (dst, b2); } - - ch = 0; + else + Dynarr_add (dst, '?'); +#else +#endif } +} - str->flags = flags; - str->ch = ch; -#endif +void +char_finish_big5 (struct encoding_stream *str, unsigned_char_dynarr *dst, + unsigned int *flags) +{ } @@ -3837,6 +3864,95 @@ detect_coding_utf8 (struct detection_state *st, const Extbyte *src, size_t n) } static void +decode_output_utf8_partial_char (unsigned char counter, + unsigned int cpos, + unsigned_char_dynarr *dst) +{ + if (counter == 5) + DECODE_ADD_BINARY_CHAR ( (cpos|0xFC), dst); + else if (counter == 4) + { + if (cpos < (1 << 6)) + DECODE_ADD_BINARY_CHAR ( (cpos|0xF8), dst); + else + { + DECODE_ADD_BINARY_CHAR ( ((cpos >> 6)|0xFC), dst); + DECODE_ADD_BINARY_CHAR ( ((cpos&0x3F)|0x80), dst); + } + } + else if (counter == 3) + { + if (cpos < (1 << 6)) + DECODE_ADD_BINARY_CHAR ( (cpos|0xF0), dst); + else if (cpos < (1 << 12)) + { + DECODE_ADD_BINARY_CHAR ( ((cpos >> 6)|0xF8), dst); + DECODE_ADD_BINARY_CHAR ( ((cpos&0x3F)|0x80), dst); + } + else + { + DECODE_ADD_BINARY_CHAR ( ( (cpos >> 12)|0xFC), dst); + DECODE_ADD_BINARY_CHAR ( (((cpos >> 6)&0x3F)|0x80), dst); + DECODE_ADD_BINARY_CHAR ( ( (cpos &0x3F)|0x80), dst); + } + } + else if (counter == 2) + { + if (cpos < (1 << 6)) + DECODE_ADD_BINARY_CHAR ( (cpos|0xE0), dst); + else if (cpos < (1 << 12)) + { + DECODE_ADD_BINARY_CHAR ( ((cpos >> 6)|0xF0), dst); + DECODE_ADD_BINARY_CHAR ( ((cpos&0x3F)|0x80), dst); + } + else if (cpos < (1 << 18)) + { + DECODE_ADD_BINARY_CHAR ( ( (cpos >> 12)|0xF8), dst); + DECODE_ADD_BINARY_CHAR ( (((cpos >> 6)&0x3F)|0x80), dst); + DECODE_ADD_BINARY_CHAR ( ( (cpos &0x3F)|0x80), dst); + } + else + { + DECODE_ADD_BINARY_CHAR ( ( (cpos >> 18)|0xFC), dst); + DECODE_ADD_BINARY_CHAR ( (((cpos >> 12)&0x3F)|0x80), dst); + DECODE_ADD_BINARY_CHAR ( (((cpos >> 6)&0x3F)|0x80), dst); + DECODE_ADD_BINARY_CHAR ( ( (cpos &0x3F)|0x80), dst); + } + } + else + { + if (cpos < (1 << 6)) + DECODE_ADD_BINARY_CHAR ( (cpos|0xC0), dst); + else if (cpos < (1 << 12)) + { + DECODE_ADD_BINARY_CHAR ( ((cpos >> 6)|0xE0), dst); + DECODE_ADD_BINARY_CHAR ( ((cpos&0x3F)|0x80), dst); + } + else if (cpos < (1 << 18)) + { + DECODE_ADD_BINARY_CHAR ( ( (cpos >> 12)|0xF0), dst); + DECODE_ADD_BINARY_CHAR ( (((cpos >> 6)&0x3F)|0x80), dst); + DECODE_ADD_BINARY_CHAR ( ( (cpos &0x3F)|0x80), dst); + } + else if (cpos < (1 << 24)) + { + DECODE_ADD_BINARY_CHAR ( ( (cpos >> 18)|0xF8), dst); + DECODE_ADD_BINARY_CHAR ( (((cpos >> 12)&0x3F)|0x80), dst); + DECODE_ADD_BINARY_CHAR ( (((cpos >> 6)&0x3F)|0x80), dst); + DECODE_ADD_BINARY_CHAR ( ( (cpos &0x3F)|0x80), dst); + } + else + { + DECODE_ADD_BINARY_CHAR ( ( (cpos >> 24)|0xFC), dst); + DECODE_ADD_BINARY_CHAR ( (((cpos >> 18)&0x3F)|0x80), dst); + DECODE_ADD_BINARY_CHAR ( (((cpos >> 12)&0x3F)|0x80), dst); + DECODE_ADD_BINARY_CHAR ( (((cpos >> 6)&0x3F)|0x80), dst); + DECODE_ADD_BINARY_CHAR ( ( (cpos &0x3F)|0x80), dst); + } + } +} + +static void decode_coding_utf8 (Lstream *decoding, const Extbyte *src, unsigned_char_dynarr *dst, size_t n) { @@ -3896,88 +4012,7 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, } else { - if (counter == 5) - DECODE_ADD_BINARY_CHAR ( (cpos|0xFC), dst); - else if (counter == 4) - { - if (cpos < (1 << 6)) - DECODE_ADD_BINARY_CHAR ( (cpos|0xF8), dst); - else - { - DECODE_ADD_BINARY_CHAR ( ((cpos >> 6)|0xFC), dst); - DECODE_ADD_BINARY_CHAR ( ((cpos&0x3F)|0x80), dst); - } - } - else if (counter == 3) - { - if (cpos < (1 << 6)) - DECODE_ADD_BINARY_CHAR ( (cpos|0xF0), dst); - else if (cpos < (1 << 12)) - { - DECODE_ADD_BINARY_CHAR ( ((cpos >> 6)|0xF8), dst); - DECODE_ADD_BINARY_CHAR ( ((cpos&0x3F)|0x80), dst); - } - else - { - DECODE_ADD_BINARY_CHAR ( ( (cpos >> 12)|0xFC), dst); - DECODE_ADD_BINARY_CHAR ( (((cpos >> 6)&0x3F)|0x80), dst); - DECODE_ADD_BINARY_CHAR ( ( (cpos &0x3F)|0x80), dst); - } - } - else if (counter == 2) - { - if (cpos < (1 << 6)) - DECODE_ADD_BINARY_CHAR ( (cpos|0xE0), dst); - else if (cpos < (1 << 12)) - { - DECODE_ADD_BINARY_CHAR ( ((cpos >> 6)|0xF0), dst); - DECODE_ADD_BINARY_CHAR ( ((cpos&0x3F)|0x80), dst); - } - else if (cpos < (1 << 18)) - { - DECODE_ADD_BINARY_CHAR ( ( (cpos >> 12)|0xF8), dst); - DECODE_ADD_BINARY_CHAR ( (((cpos >> 6)&0x3F)|0x80), dst); - DECODE_ADD_BINARY_CHAR ( ( (cpos &0x3F)|0x80), dst); - } - else - { - DECODE_ADD_BINARY_CHAR ( ( (cpos >> 18)|0xFC), dst); - DECODE_ADD_BINARY_CHAR ( (((cpos >> 12)&0x3F)|0x80), dst); - DECODE_ADD_BINARY_CHAR ( (((cpos >> 6)&0x3F)|0x80), dst); - DECODE_ADD_BINARY_CHAR ( ( (cpos &0x3F)|0x80), dst); - } - } - else - { - if (cpos < (1 << 6)) - DECODE_ADD_BINARY_CHAR ( (cpos|0xC0), dst); - else if (cpos < (1 << 12)) - { - DECODE_ADD_BINARY_CHAR ( ((cpos >> 6)|0xE0), dst); - DECODE_ADD_BINARY_CHAR ( ((cpos&0x3F)|0x80), dst); - } - else if (cpos < (1 << 18)) - { - DECODE_ADD_BINARY_CHAR ( ( (cpos >> 12)|0xF0), dst); - DECODE_ADD_BINARY_CHAR ( (((cpos >> 6)&0x3F)|0x80), dst); - DECODE_ADD_BINARY_CHAR ( ( (cpos &0x3F)|0x80), dst); - } - else if (cpos < (1 << 24)) - { - DECODE_ADD_BINARY_CHAR ( ( (cpos >> 18)|0xF8), dst); - DECODE_ADD_BINARY_CHAR ( (((cpos >> 12)&0x3F)|0x80), dst); - DECODE_ADD_BINARY_CHAR ( (((cpos >> 6)&0x3F)|0x80), dst); - DECODE_ADD_BINARY_CHAR ( ( (cpos &0x3F)|0x80), dst); - } - else - { - DECODE_ADD_BINARY_CHAR ( ( (cpos >> 24)|0xFC), dst); - DECODE_ADD_BINARY_CHAR ( (((cpos >> 18)&0x3F)|0x80), dst); - DECODE_ADD_BINARY_CHAR ( (((cpos >> 12)&0x3F)|0x80), dst); - DECODE_ADD_BINARY_CHAR ( (((cpos >> 6)&0x3F)|0x80), dst); - DECODE_ADD_BINARY_CHAR ( ( (cpos &0x3F)|0x80), dst); - } - } + decode_output_utf8_partial_char (counter, cpos, dst); DECODE_ADD_BINARY_CHAR (c, dst); cpos = 0; counter = 0; @@ -3988,7 +4023,7 @@ decode_coding_utf8 (Lstream *decoding, const Extbyte *src, if (flags & CODING_STATE_END) if (counter > 0) { - DECODE_ADD_BINARY_CHAR (cpos, dst); + decode_output_utf8_partial_char (counter, cpos, dst); cpos = 0; counter = 0; } @@ -5764,10 +5799,10 @@ vars_of_file_coding (void) int i; fcd = xnew (struct file_coding_dump); - dumpstruct (&fcd, &fcd_description); + dump_add_root_struct_ptr (&fcd, &fcd_description); /* Initialize to something reasonable ... */ - for (i = 0; i <= CODING_CATEGORY_LAST; i++) + for (i = 0; i < CODING_CATEGORY_LAST; i++) { fcd->coding_category_system[i] = Qnil; fcd->coding_category_by_priority[i] = i; @@ -5831,7 +5866,7 @@ complex_vars_of_file_coding (void) make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ); the_codesys_prop_dynarr = Dynarr_new (codesys_prop); - dumpstruct (&the_codesys_prop_dynarr, &codesys_prop_dynarr_description); + dump_add_root_struct_ptr (&the_codesys_prop_dynarr, &codesys_prop_dynarr_description); #define DEFINE_CODESYS_PROP(Prop_Type, Sym) do \ { \ @@ -5907,7 +5942,7 @@ complex_vars_of_file_coding (void) #if defined(MULE) && !defined(UTF2000) { - unsigned int i; + size_t i; for (i = 0; i < countof (fcd->ucs_to_mule_table); i++) fcd->ucs_to_mule_table[i] = Qnil;