/* Code conversion functions.
Copyright (C) 1991, 1995 Free Software Foundation, Inc.
Copyright (C) 1995 Sun Microsystems, Inc.
- Copyright (C) 1999,2000,2001,2002,2003,2004 MORIOKA Tomohiko
+ Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2011
+ MORIOKA Tomohiko
This file is part of XEmacs.
#ifdef UTF2000
Lisp_Object Qutf_8_mcs;
Lisp_Object Qdisable_composition;
+Lisp_Object Qccs_priority_list;
Lisp_Object Quse_entity_reference;
Lisp_Object Qd, Qx, QX;
#endif
parse_charset_conversion_specs (codesys->iso2022.output_conv,
value);
}
+#ifdef UTF2000
+ else if (EQ (key, Qccs_priority_list))
+ {
+ codesys->ccs_priority_list = value;
+ }
+#endif
else
signal_simple_error ("Unrecognized property", key);
}
return XCODING_SYSTEM_DISABLE_COMPOSITION (coding_system) ? Qt : Qnil;
else if (EQ (prop, Quse_entity_reference))
return XCODING_SYSTEM_USE_ENTITY_REFERENCE (coding_system) ? Qt : Qnil;
+ else if (EQ (prop, Qccs_priority_list))
+ return XCODING_SYSTEM_CCS_PRIORITY_LIST (coding_system);
#endif
else if (type == CODESYS_ISO2022)
{
unsigned char counter;
#endif
#ifdef UTF2000
+ char bom_flag;
unsigned char er_counter;
unsigned char er_buf[ER_BUF_SIZE];
str->counter = 0;
#endif /* MULE */
#ifdef UTF2000
+ str->bom_flag = 0;
str->er_counter = 0;
str->combined_char_count = 0;
str->combining_table = Qnil;
}
}
+EXFUN (Fregexp_quote, 1);
+
void decode_add_er_char (struct decoding_stream *str, Emchar character,
unsigned_char_dynarr* dst);
void
{
Lisp_Object string = make_string (str->er_buf,
str->er_counter);
- Lisp_Object rest = Vcoded_charset_entity_reference_alist;
+ Lisp_Object rest;
Lisp_Object cell;
Lisp_Object ret;
Lisp_Object pat;
Lisp_Object char_type;
int base;
- while (!NILP (rest))
+ for ( rest = Vcoded_charset_entity_reference_alist;
+ !NILP (rest); rest = Fcdr (rest) )
{
cell = Fcar (rest);
ccs = Fcar (cell);
pat = ret;
else
continue;
+ pat = Fregexp_quote (pat);
cell = Fcdr (cell);
cell = Fcdr (cell);
? DECODE_CHAR (ccs, code, 0)
: decode_builtin_char (ccs, code);
- DECODE_ADD_UCS_CHAR (chr, dst);
+ if ( chr >= 0 )
+ DECODE_ADD_UCS_CHAR (chr, dst);
+ else
+ {
+ Dynarr_add_many (dst, str->er_buf, str->er_counter);
+ Dynarr_add (dst, ';');
+ }
+
goto decoded;
}
- rest = Fcdr (rest);
}
if (!NILP (Fstring_match (build_string ("^&MCS-\\([0-9A-F]+\\)$"),
string, Qnil, Qnil)))
/************************************************************************/
/* character composition */
/************************************************************************/
-extern Lisp_Object Qcomposition;
+extern Lisp_Object Qcomposition, Qrep_decomposition;
INLINE_HEADER void
COMPOSE_FLUSH_CHARS (struct decoding_stream *str, unsigned_char_dynarr* dst);
eol_type_t eol_type = str->eol_type;
unsigned char counter = str->counter;
#ifdef UTF2000
+ int bom_flag = str->bom_flag;
Lisp_Object ccs
= CODING_SYSTEM_ISO2022_INITIAL_CHARSET (DECODING_STREAM_DATA
(decoding)->codesys, 0);
COMPOSE_FLUSH_CHARS (str, dst);
decode_flush_er_chars (str, dst);
DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
+
+ if ( bom_flag == 0 )
+ bom_flag = -1;
+
DECODE_ADD_UCS_CHAR (c, dst);
}
else if ( c < 0xC0 )
- /* decode_add_er_char (str, c, dst); */
- COMPOSE_ADD_CHAR (str, c, dst);
+ {
+ if ( bom_flag == 0 )
+ bom_flag = -1;
+
+ /* decode_add_er_char (str, c, dst); */
+ COMPOSE_ADD_CHAR (str, c, dst);
+ }
else
{
/* decode_flush_er_chars (str, dst); */
{
Emchar char_id;
+ if ( bom_flag == 0 )
+ {
+ if ( cpos == 0xFEFF )
+ {
+ bom_flag = 1;
+ goto decoded;
+ }
+ else
+ bom_flag = -1;
+ }
+
if (!NILP (ccs))
{
char_id = decode_defined_char (ccs, cpos, 0);
else
char_id = cpos;
COMPOSE_ADD_CHAR (str, char_id, dst);
+ decoded:
cpos = 0;
counter = 0;
}
str->flags = flags;
str->cpos = cpos;
str->counter = counter;
+#ifdef UTF2000
+ str->bom_flag = bom_flag;
+#endif
}
void
= CODING_SYSTEM_ISO2022_INITIAL_CHARSET (str->codesys, 0);
int code_point = charset_code_point (ucs_ccs, ch, 0);
- if ( (code_point < 0) || (code_point > 0x10FFFF) )
+ if ( (code_point < 0) || (code_point > 0xEFFFF) )
{
- Lisp_Object map
- = CODING_SYSTEM_ISO2022_INITIAL_CHARSET (str->codesys, 1);
- Lisp_Object ret;
+ Lisp_Object seq = Fchar_feature (make_char (ch),
+ Qrep_decomposition, Qnil,
+ Qnil, Qnil);
+ Lisp_Object map, ret;
+
+ if ( CONSP (seq) )
+ {
+ Lisp_Object base = Fcar (seq);
+ seq = Fcdr (seq);
+ if ( CHARP (base) && CONSP (seq) )
+ {
+ Lisp_Object comb = Fcar (seq);
+
+ if ( CHARP (comb) )
+ {
+ char_encode_utf8 (str, XCHAR (base), dst, flags);
+ char_encode_utf8 (str, XCHAR (comb), dst, flags);
+ return;
+ }
+ }
+ }
+
+ map = CODING_SYSTEM_ISO2022_INITIAL_CHARSET (str->codesys, 1);
if ( !NILP (map)
&& INTP (ret = Fchar_feature (make_char (ch),
map, Qnil,
{
Lisp_Object original_default_coded_charset_priority_list
= Vdefault_coded_charset_priority_list;
-
+ Vdefault_coded_charset_priority_list
+ = CODING_SYSTEM_CCS_PRIORITY_LIST (codesys);
+ while (!EQ (Vdefault_coded_charset_priority_list, Qnil))
+ {
+ code_point = ENCODE_CHAR (ch, charset);
+ if (XCHARSET_FINAL (charset))
+ goto found;
+ Vdefault_coded_charset_priority_list
+ = Fcdr (Fmemq (XCHARSET_NAME (charset),
+ Vdefault_coded_charset_priority_list));
+ }
+ Vdefault_coded_charset_priority_list
+ = original_default_coded_charset_priority_list;
while (!EQ (Vdefault_coded_charset_priority_list, Qnil))
{
code_point = ENCODE_CHAR (ch, charset);
#ifdef UTF2000
defsymbol (&Qutf_8_mcs, "utf-8-mcs");
defsymbol (&Qdisable_composition, "disable-composition");
+ defsymbol (&Qccs_priority_list, "ccs-priority-list");
defsymbol (&Quse_entity_reference, "use-entity-reference");
defsymbol (&Qd, "d");
defsymbol (&Qx, "x");
DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qescape_quoted);
DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qinput_charset_conversion);
DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qoutput_charset_conversion);
+#ifdef UTF2000
+ DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qccs_priority_list);
+#endif
DEFINE_CODESYS_PROP (CODESYS_PROP_CCL, Qencode);
DEFINE_CODESYS_PROP (CODESYS_PROP_CCL, Qdecode);