/* Code conversion functions.
Copyright (C) 1991, 1995 Free Software Foundation, Inc.
Copyright (C) 1995 Sun Microsystems, Inc.
- Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko
+ Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2011,
+ 2012, 2013, 2016 MORIOKA Tomohiko
This file is part of XEmacs.
#include "mule-ccl.h"
#include "chartab.h"
#endif
+#ifdef USE_CONCORD_OBJECT_SYSTEM_TO_COMPOSE
+#include <cos.h>
+#endif
#include "file-coding.h"
Lisp_Object Qcoding_system_error;
#endif
#ifdef UTF2000
Lisp_Object Qutf_8_mcs;
-Lisp_Object Qdisable_composition;
+Lisp_Object Qdisable_composition, Qenable_decomposition;
+Lisp_Object Qccs_priority_list;
Lisp_Object Quse_entity_reference;
Lisp_Object Qd, Qx, QX;
+Lisp_Object Vdecomposition_feature_list;
#endif
Lisp_Object Qencode, Qdecode;
`coding-system-property' will return nil.)
'disable-composition
- If non-nil, composition/decomposition for combining characters
- are disabled.
+ If non-nil, composition for combining characters is disabled.
+
+'enable-decomposition
+ If non-nil, decomposition for combining characters is enabled.
'use-entity-reference
If non-nil, SGML style entity-reference is used for non-system-characters.
#ifdef UTF2000
else if (EQ (key, Qdisable_composition))
CODING_SYSTEM_DISABLE_COMPOSITION (codesys) = !NILP (value);
+ else if (EQ (key, Qenable_decomposition))
+ CODING_SYSTEM_ENABLE_DECOMPOSITION (codesys) = !NILP (value);
else if (EQ (key, Quse_entity_reference))
CODING_SYSTEM_USE_ENTITY_REFERENCE (codesys) = !NILP (value);
#endif
parse_charset_conversion_specs (codesys->iso2022.output_conv,
value);
}
+#ifdef UTF2000
+ else if (EQ (key, Qccs_priority_list))
+ {
+ codesys->ccs_priority_list = value;
+ }
+#endif
else
signal_simple_error ("Unrecognized property", key);
}
CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, 1) = value;
else if (EQ (key, Qcharset_g2))
CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, 2) = value;
+ else if (EQ (key, Qcharset_g3))
+ CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, 3) = value;
else
signal_simple_error ("Unrecognized property", key);
}
#ifdef UTF2000
else if (EQ (prop, Qdisable_composition))
return XCODING_SYSTEM_DISABLE_COMPOSITION (coding_system) ? Qt : Qnil;
+ else if (EQ (prop, Qenable_decomposition))
+ return XCODING_SYSTEM_ENABLE_DECOMPOSITION (coding_system) ? Qt : Qnil;
else if (EQ (prop, Quse_entity_reference))
return XCODING_SYSTEM_USE_ENTITY_REFERENCE (coding_system) ? Qt : Qnil;
+ else if (EQ (prop, Qccs_priority_list))
+ return XCODING_SYSTEM_CCS_PRIORITY_LIST (coding_system);
#endif
else if (type == CODESYS_ISO2022)
{
} \
} while (0)
-INLINE_HEADER void DECODE_ADD_UCS_CHAR(Emchar c, unsigned_char_dynarr* dst);
-INLINE_HEADER void
+static void DECODE_ADD_UCS_CHAR(Emchar c, unsigned_char_dynarr* dst);
+static void
DECODE_ADD_UCS_CHAR(Emchar c, unsigned_char_dynarr* dst)
{
if ( c <= 0x7f )
#define DECODING_STREAM_DATA(stream) LSTREAM_TYPE_DATA (stream, decoding)
+#define ER_BUF_SIZE 24
+
struct decoding_stream
{
/* Coding system that governs the conversion. */
unsigned char counter;
#endif
#ifdef UTF2000
+ char bom_flag;
unsigned char er_counter;
- unsigned char er_buf[16];
+ unsigned char er_buf[ER_BUF_SIZE];
unsigned combined_char_count;
Emchar combined_chars[16];
+#ifdef USE_CONCORD_OBJECT_SYSTEM_TO_COMPOSE
+ COS_object combining_table;
+#else
Lisp_Object combining_table;
+#endif /* USE_CONCORD_OBJECT_SYSTEM_TO_COMPOSE */
#endif
struct detection_state decst;
};
str->counter = 0;
#endif /* MULE */
#ifdef UTF2000
+ str->bom_flag = 0;
str->er_counter = 0;
str->combined_char_count = 0;
+#ifdef USE_CONCORD_OBJECT_SYSTEM_TO_COMPOSE
+ str->combining_table = COS_NIL;
+#else
str->combining_table = Qnil;
+#endif /* USE_CONCORD_OBJECT_SYSTEM_TO_COMPOSE */
#endif
if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_AUTODETECT
|| CODING_SYSTEM_EOL_TYPE (str->codesys) == EOL_AUTODETECT)
}
}
+EXFUN (Fregexp_quote, 1);
+
void decode_add_er_char (struct decoding_stream *str, Emchar character,
unsigned_char_dynarr* dst);
void
{
Lisp_Object string = make_string (str->er_buf,
str->er_counter);
- Lisp_Object rest = Vcoded_charset_entity_reference_alist;
+ Lisp_Object rest;
Lisp_Object cell;
Lisp_Object ret;
Lisp_Object pat;
Lisp_Object char_type;
int base;
- while (!NILP (rest))
+ for ( rest = Vcoded_charset_entity_reference_alist;
+ !NILP (rest); rest = Fcdr (rest) )
{
cell = Fcar (rest);
ccs = Fcar (cell);
pat = ret;
else
continue;
+ pat = Fregexp_quote (pat);
cell = Fcdr (cell);
cell = Fcdr (cell);
? DECODE_CHAR (ccs, code, 0)
: decode_builtin_char (ccs, code);
- DECODE_ADD_UCS_CHAR (chr, dst);
+ if ( chr >= 0 )
+ DECODE_ADD_UCS_CHAR (chr, dst);
+ else
+ {
+ Dynarr_add_many (dst, str->er_buf, str->er_counter);
+ Dynarr_add (dst, ';');
+ }
+
goto decoded;
}
- rest = Fcdr (rest);
}
if (!NILP (Fstring_match (build_string ("^&MCS-\\([0-9A-F]+\\)$"),
string, Qnil, Qnil)))
decoded:
str->er_counter = 0;
}
- else if ( (str->er_counter >= 16) || (c >= 0x7F) )
+ else if ( (str->er_counter >= ER_BUF_SIZE) || (c >= 0x7F) )
{
Dynarr_add_many (dst, str->er_buf, str->er_counter);
str->er_counter = 0;
Lisp_Object ccs;
Lisp_Object char_type;
int format_columns, idx;
- char format[18];
+ char format[ER_BUF_SIZE];
while (!NILP (rest))
{
char_type = Qnil;
if (!NILP (ccs = Ffind_charset (ccs)))
{
- int code_point = charset_code_point (ccs, ch, 0);
+ int code_point
+ = charset_code_point (ccs, ch,
+ NILP (char_type) ?
+ CHAR_ALL : CHAR_ISOLATED_ONLY );
if ( (code_point >= 0)
- && (NILP (char_type)
- || DECODE_CHAR (ccs, code_point, 0) != ch) )
+ && ( NILP (char_type)
+#if 1
+ || ( charset_code_point (ccs, ch, CHAR_DEFINED_ONLY)
+ == -1 )
+#endif
+#if 0
+ || ( DECODE_CHAR (ccs, code_point, 0) != ch )
+#endif
+ )
+ )
{
Lisp_Object ret;
cell = Fcdr (cell);
ret = Fcar (cell);
- if (STRINGP (ret) && ((idx = XSTRING_LENGTH (ret)) <= 6))
+ if ( STRINGP (ret) &&
+ ( (idx = XSTRING_LENGTH (ret)) <= (ER_BUF_SIZE - 4) ) )
{
format[0] = '&';
strncpy (&format[1], XSTRING_DATA (ret), idx);
idx++;
}
else
- continue;
+ goto try_next;
cell = Fcdr (cell);
ret = Fcar (cell);
{
format[idx++] = '%';
format_columns = XINT (ret);
- if ( (2 <= format_columns) && (format_columns <= 8) )
+ if ( (2 <= format_columns) && (format_columns <= 8)
+ && (idx + format_columns <= ER_BUF_SIZE - 1) )
{
format [idx++] = '0';
format [idx++] = '0' + format_columns;
}
}
+ else
+ goto try_next;
cell = Fcdr (cell);
ret = Fcar (cell);
else if (EQ (ret, QX))
format [idx++] = 'X';
else
- continue;
+ goto try_next;
format [idx++] = ';';
format [idx++] = 0;
return;
}
}
+ try_next:
rest = Fcdr (rest);
}
sprintf (buf, "&MCS-%08X;", ch);
/************************************************************************/
/* character composition */
/************************************************************************/
-extern Lisp_Object Qcomposition;
+extern Lisp_Object Qcomposition, Qrep_decomposition;
INLINE_HEADER void
COMPOSE_FLUSH_CHARS (struct decoding_stream *str, unsigned_char_dynarr* dst);
for (i = 0; i < str->combined_char_count; i++)
decode_add_er_char (str, str->combined_chars[i], dst);
str->combined_char_count = 0;
+#ifdef USE_CONCORD_OBJECT_SYSTEM_TO_COMPOSE
+ str->combining_table = COS_NIL;
+#else
str->combining_table = Qnil;
+#endif /* USE_CONCORD_OBJECT_SYSTEM_TO_COMPOSE */
}
+extern CONCORD_DS concord_current_env;
+
+#if 0
+static int
+concord_setup_env_maybe ()
+{
+ if (concord_current_env == NULL)
+ {
+ concord_open_env ("/usr/local/share/chise/1.0/db/");
+ }
+ return 0;
+}
+#endif
+
void COMPOSE_ADD_CHAR (struct decoding_stream *str, Emchar character,
unsigned_char_dynarr* dst);
void
{
if (CODING_SYSTEM_DISABLE_COMPOSITION (str->codesys))
decode_add_er_char (str, character, dst);
+#ifdef USE_CONCORD_OBJECT_SYSTEM_TO_COMPOSE
+ else if (!cos_cons_p (str->combining_table))
+ {
+ COS_object ret;
+
+ /* concord_setup_env_maybe (); */
+ open_chise_data_source_maybe ();
+ ret = concord_object_get_attribute (cos_make_char (character),
+ COS_COMPOSITION);
+
+ if (!cos_cons_p (ret))
+ decode_add_er_char (str, character, dst);
+ else
+ {
+ str->combined_chars[0] = character;
+ str->combined_char_count = 1;
+ str->combining_table = ret;
+ }
+ }
+ else
+ {
+ COS_object ret
+ = cos_cdr (cos_assoc (cos_make_char (character),
+ str->combining_table));
+
+ if (cos_char_p (ret))
+ {
+ Emchar char2 = cos_char_id (ret);
+ COS_object ret2;
+
+ /* concord_setup_env_maybe (); */
+ open_chise_data_source_maybe ();
+ ret2 = concord_object_get_attribute (ret, COS_COMPOSITION);
+
+ if (!cos_cons_p (ret2))
+ {
+ decode_add_er_char (str, char2, dst);
+ str->combined_char_count = 0;
+ str->combining_table = COS_NIL;
+ }
+ else
+ {
+ str->combined_chars[0] = char2;
+ str->combined_char_count = 1;
+ str->combining_table = ret2;
+ }
+ }
+ else
+ {
+ /* concord_setup_env_maybe (); */
+ open_chise_data_source_maybe ();
+ ret = concord_object_get_attribute (cos_make_char (character),
+ COS_COMPOSITION);
+
+ COMPOSE_FLUSH_CHARS (str, dst);
+ if (!cos_cons_p (ret))
+ decode_add_er_char (str, character, dst);
+ else
+ {
+ str->combined_chars[0] = character;
+ str->combined_char_count = 1;
+ str->combining_table = ret;
+ }
+ }
+ }
+#else
else if (!CONSP (str->combining_table))
{
Lisp_Object ret
- = Fget_char_attribute (make_char (character), Qcomposition, Qnil);
+ = Fchar_feature (make_char (character), Qcomposition, Qnil,
+ Qnil, Qnil);
if (NILP (ret))
decode_add_er_char (str, character, dst);
if (CHARP (ret))
{
Emchar char2 = XCHARVAL (ret);
- Lisp_Object ret2 = Fget_char_attribute (ret, Qcomposition, Qnil);
+ Lisp_Object ret2 = Fchar_feature (ret, Qcomposition, Qnil,
+ Qnil, Qnil);
if (NILP (ret2))
{
}
else
{
- ret = Fget_char_attribute (make_char (character), Qcomposition,
- Qnil);
+ ret = Fchar_feature (make_char (character), Qcomposition, Qnil,
+ Qnil, Qnil);
COMPOSE_FLUSH_CHARS (str, dst);
if (NILP (ret))
}
}
}
+#endif /* USE_CONCORD_OBJECT_SYSTEM_TO_COMPOSE */
}
#else /* not UTF2000 */
#define COMPOSE_FLUSH_CHARS(str, dst)
eol_type_t eol_type = str->eol_type;
unsigned char counter = str->counter;
#ifdef UTF2000
+ int bom_flag = str->bom_flag;
Lisp_Object ccs
= CODING_SYSTEM_ISO2022_INITIAL_CHARSET (DECODING_STREAM_DATA
(decoding)->codesys, 0);
COMPOSE_FLUSH_CHARS (str, dst);
decode_flush_er_chars (str, dst);
DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
+
+ if ( bom_flag == 0 )
+ bom_flag = -1;
+
DECODE_ADD_UCS_CHAR (c, dst);
}
else if ( c < 0xC0 )
- /* decode_add_er_char (str, c, dst); */
- COMPOSE_ADD_CHAR (str, c, dst);
+ {
+ if ( bom_flag == 0 )
+ bom_flag = -1;
+
+ /* decode_add_er_char (str, c, dst); */
+ COMPOSE_ADD_CHAR (str, c, dst);
+ }
else
{
/* decode_flush_er_chars (str, dst); */
{
Emchar char_id;
+ if ( bom_flag == 0 )
+ {
+ if ( cpos == 0xFEFF )
+ {
+ bom_flag = 1;
+ goto decoded;
+ }
+ else
+ bom_flag = -1;
+ }
+
if (!NILP (ccs))
{
char_id = decode_defined_char (ccs, cpos, 0);
else
char_id = cpos;
COMPOSE_ADD_CHAR (str, char_id, dst);
+ decoded:
cpos = 0;
counter = 0;
}
str->flags = flags;
str->cpos = cpos;
str->counter = counter;
+#ifdef UTF2000
+ str->bom_flag = bom_flag;
+#endif
}
void
= CODING_SYSTEM_ISO2022_INITIAL_CHARSET (str->codesys, 0);
int code_point = charset_code_point (ucs_ccs, ch, 0);
- if ( (code_point < 0) || (code_point > 0x10FFFF) )
+ if ( (code_point < 0) || (code_point > 0xEFFFF) )
{
- Lisp_Object map
- = CODING_SYSTEM_ISO2022_INITIAL_CHARSET (str->codesys, 1);
- Lisp_Object ret;
+ Lisp_Object map, ret;
+
+ if ( CODING_SYSTEM_ENABLE_DECOMPOSITION (str->codesys) )
+ {
+ Lisp_Object rest = Vdecomposition_feature_list;
+ Lisp_Object decomp_f;
+ Lisp_Object seq = Qnil;
+ struct gcpro gcpro1;
+
+ while ( CONSP (rest) )
+ {
+ decomp_f = XCAR (rest);
+ GCPRO1 (rest);
+ seq = Fchar_feature (make_char (ch), decomp_f, Qnil,
+ Qnil, Qnil);
+ UNGCPRO;
+ if ( !NILP (seq) )
+ break;
+ rest = XCDR (rest);
+ }
+
+ if ( CONSP (seq) )
+ {
+ Lisp_Object base = Fcar (seq);
+ seq = Fcdr (seq);
+ if ( CHARP (base) && CONSP (seq) )
+ {
+ Lisp_Object comb = Fcar (seq);
+
+ if ( CHARP (comb) )
+ {
+ char_encode_utf8 (str, XCHAR (base), dst, flags);
+ char_encode_utf8 (str, XCHAR (comb), dst, flags);
+ return;
+ }
+ }
+ }
+ }
+
+ map = CODING_SYSTEM_ISO2022_INITIAL_CHARSET (str->codesys, 1);
if ( !NILP (map)
- && INTP (ret = Fget_char_attribute (make_char (ch),
- map, Qnil)) )
+ && INTP (ret = Fchar_feature (make_char (ch),
+ map, Qnil,
+ Qnil, Qnil)) )
code_point = XINT (ret);
else if ( !NILP (map =
CODING_SYSTEM_ISO2022_INITIAL_CHARSET
(str->codesys, 2))
- && INTP (ret = Fget_char_attribute (make_char (ch),
- map, Qnil)) )
+ && INTP (ret = Fchar_feature (make_char (ch),
+ map, Qnil,
+ Qnil, Qnil)) )
+ code_point = XINT (ret);
+ else if ( !NILP (map =
+ CODING_SYSTEM_ISO2022_INITIAL_CHARSET
+ (str->codesys, 3))
+ && INTP (ret = Fchar_feature (make_char (ch),
+ map, Qnil,
+ Qnil, Qnil)) )
code_point = XINT (ret);
else if (CODING_SYSTEM_USE_ENTITY_REFERENCE (str->codesys))
{
{
Lisp_Object original_default_coded_charset_priority_list
= Vdefault_coded_charset_priority_list;
-
+ Vdefault_coded_charset_priority_list
+ = CODING_SYSTEM_CCS_PRIORITY_LIST (codesys);
+ while (!EQ (Vdefault_coded_charset_priority_list, Qnil))
+ {
+ code_point = ENCODE_CHAR (ch, charset);
+ if (XCHARSET_FINAL (charset))
+ goto found;
+ Vdefault_coded_charset_priority_list
+ = Fcdr (Fmemq (XCHARSET_NAME (charset),
+ Vdefault_coded_charset_priority_list));
+ }
+ Vdefault_coded_charset_priority_list
+ = original_default_coded_charset_priority_list;
while (!EQ (Vdefault_coded_charset_priority_list, Qnil))
{
code_point = ENCODE_CHAR (ch, charset);
#ifdef UTF2000
defsymbol (&Qutf_8_mcs, "utf-8-mcs");
defsymbol (&Qdisable_composition, "disable-composition");
+ defsymbol (&Qenable_decomposition, "enable-decomposition");
+ defsymbol (&Qccs_priority_list, "ccs-priority-list");
defsymbol (&Quse_entity_reference, "use-entity-reference");
defsymbol (&Qd, "d");
defsymbol (&Qx, "x");
Setting this to nil does not do anything.
*/ );
enable_multibyte_characters = 1;
+
+#ifdef UTF2000
+ DEFVAR_LISP ("decomposition-feature-list",
+ &Vdecomposition_feature_list /*
+List of `=decomposition@FOO' feature to encode characters as IVS.
+*/ );
+ Vdecomposition_feature_list = Qnil;
+#endif
}
void
DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qescape_quoted);
DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qinput_charset_conversion);
DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qoutput_charset_conversion);
+#ifdef UTF2000
+ DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qccs_priority_list);
+#endif
DEFINE_CODESYS_PROP (CODESYS_PROP_CCL, Qencode);
DEFINE_CODESYS_PROP (CODESYS_PROP_CCL, Qdecode);
#ifdef UTF2000
DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK, Qdisable_composition);
+ DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK, Qenable_decomposition);
DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK, Quse_entity_reference);
#endif
#endif /* MULE */