/* Code conversion functions.
Copyright (C) 1991, 1995 Free Software Foundation, Inc.
Copyright (C) 1995 Sun Microsystems, Inc.
- Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
+ Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko
This file is part of XEmacs.
/* Synched up with: Mule 2.3. Not in FSF. */
/* Rewritten by Ben Wing <ben@xemacs.org>. */
+/* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
#include <config.h>
#include "lisp.h"
};
#ifdef UTF2000
-extern Lisp_Object Vcharacter_composition_table;
+extern Lisp_Object Qcomposition;
+
+INLINE_HEADER void
+decode_flush_er_chars (struct decoding_stream *str, unsigned_char_dynarr* dst);
+INLINE_HEADER void
+decode_flush_er_chars (struct decoding_stream *str, unsigned_char_dynarr* dst)
+{
+ if ( str->er_counter > 0)
+ {
+ Dynarr_add_many (dst, str->er_buf, str->er_counter);
+ str->er_counter = 0;
+ }
+}
+
+void decode_add_er_char (struct decoding_stream *str, Emchar character,
+ unsigned_char_dynarr* dst);
+void
+decode_add_er_char (struct decoding_stream *str, Emchar c,
+ unsigned_char_dynarr* dst)
+{
+ if (str->er_counter == 0)
+ {
+ if (CODING_SYSTEM_USE_ENTITY_REFERENCE (str->codesys)
+ && (c == '&') )
+ {
+ str->er_buf[0] = '&';
+ str->er_counter++;
+ }
+ else
+ DECODE_ADD_UCS_CHAR (c, dst);
+ }
+ else if (c == ';')
+ {
+ Lisp_Object string = make_string (str->er_buf,
+ str->er_counter);
+ Lisp_Object rest = Vcoded_charset_entity_reference_alist;
+ Lisp_Object cell;
+ Lisp_Object ret;
+ Lisp_Object pat;
+ Lisp_Object ccs;
+ int base;
+
+ while (!NILP (rest))
+ {
+ cell = Fcar (rest);
+ ccs = Fcar (cell);
+ if (NILP (ccs = Ffind_charset (ccs)))
+ continue;
+
+ cell = Fcdr (cell);
+ ret = Fcar (cell);
+ if (STRINGP (ret))
+ pat = ret;
+ else
+ continue;
+
+ cell = Fcdr (cell);
+ cell = Fcdr (cell);
+ ret = Fcar (cell);
+ if (EQ (ret, Qd))
+ {
+ pat = concat3 (build_string ("^&"),
+ pat, build_string ("\\([0-9]+\\)$"));
+ base = 10;
+ }
+ else if (EQ (ret, Qx))
+ {
+ pat = concat3 (build_string ("^&"),
+ pat, build_string ("\\([0-9a-f]+\\)$"));
+ base = 16;
+ }
+ else if (EQ (ret, QX))
+ {
+ pat = concat3 (build_string ("^&"),
+ pat, build_string ("\\([0-9A-F]+\\)$"));
+ base = 16;
+ }
+ else
+ continue;
+
+ if (!NILP (Fstring_match (pat, string, Qnil, Qnil)))
+ {
+ int code
+ = XINT (Fstring_to_number
+ (Fsubstring (string,
+ Fmatch_beginning (make_int (1)),
+ Fmatch_end (make_int (1))),
+ make_int (base)));
+
+ DECODE_ADD_UCS_CHAR (DECODE_CHAR (ccs, code), dst);
+ goto decoded;
+ }
+ rest = Fcdr (rest);
+ }
+ if (!NILP (Fstring_match (build_string ("^&MCS-\\([0-9A-F]+\\)$"),
+ string, Qnil, Qnil)))
+ {
+ int code
+ = XINT (Fstring_to_number
+ (Fsubstring (string,
+ Fmatch_beginning (make_int (1)),
+ Fmatch_end (make_int (1))),
+ make_int (16)));
+
+ DECODE_ADD_UCS_CHAR (code, dst);
+ }
+ else
+ {
+ Dynarr_add_many (dst, str->er_buf, str->er_counter);
+ Dynarr_add (dst, ';');
+ }
+ decoded:
+ str->er_counter = 0;
+ }
+ else if ( (str->er_counter >= 16) || (c >= 0x7F) )
+ {
+ Dynarr_add_many (dst, str->er_buf, str->er_counter);
+ str->er_counter = 0;
+ DECODE_ADD_UCS_CHAR (c, dst);
+ }
+ else
+ str->er_buf[str->er_counter++] = c;
+}
INLINE_HEADER void
COMPOSE_FLUSH_CHARS (struct decoding_stream *str, unsigned_char_dynarr* dst);
unsigned i;
for (i = 0; i < str->combined_char_count; i++)
- DECODE_ADD_UCS_CHAR (str->combined_chars[i], dst);
+ decode_add_er_char (str, str->combined_chars[i], dst);
str->combined_char_count = 0;
str->combining_table = Qnil;
}
-void COMPOSE_ADD_CHAR(struct decoding_stream *str, Emchar character,
- unsigned_char_dynarr* dst);
+void COMPOSE_ADD_CHAR (struct decoding_stream *str, Emchar character,
+ unsigned_char_dynarr* dst);
void
-COMPOSE_ADD_CHAR(struct decoding_stream *str,
- Emchar character, unsigned_char_dynarr* dst)
+COMPOSE_ADD_CHAR (struct decoding_stream *str,
+ Emchar character, unsigned_char_dynarr* dst)
{
if (CODING_SYSTEM_DISABLE_COMPOSITION (str->codesys))
- DECODE_ADD_UCS_CHAR (character, dst);
- else if (!CHAR_TABLEP (str->combining_table))
+ decode_add_er_char (str, character, dst);
+ else if (!CONSP (str->combining_table))
{
Lisp_Object ret
- = get_char_id_table (XCHAR_TABLE(Vcharacter_composition_table),
- character);
+ = Fget_char_attribute (make_char (character), Qcomposition, Qnil);
if (NILP (ret))
- DECODE_ADD_UCS_CHAR (character, dst);
+ decode_add_er_char (str, character, dst);
else
{
str->combined_chars[0] = character;
}
else
{
- Lisp_Object ret
- = get_char_id_table (XCHAR_TABLE(str->combining_table),
- character);
+ Lisp_Object ret = Fcdr (Fassq (make_char (character), str->combining_table));
if (CHARP (ret))
{
Emchar char2 = XCHARVAL (ret);
- ret =
- get_char_id_table (XCHAR_TABLE(Vcharacter_composition_table),
- char2);
+ ret = Fget_char_attribute (make_char (character), Qcomposition, Qnil);
if (NILP (ret))
{
- DECODE_ADD_UCS_CHAR (char2, dst);
+ decode_add_er_char (str, character, dst);
str->combined_char_count = 0;
str->combining_table = Qnil;
}
str->combining_table = ret;
}
}
- else if (CHAR_TABLEP (ret))
- {
- str->combined_chars[str->combined_char_count++] = character;
- str->combining_table = ret;
- }
else
{
COMPOSE_FLUSH_CHARS (str, dst);
- DECODE_ADD_UCS_CHAR (character, dst);
+ decode_add_er_char (str, character, dst);
}
}
}
{
#ifdef UTF2000
int code_point = (cpos << 8) | c;
- Emchar char_id = DECODE_DEFINED_CHAR (ccs, code_point);
+ Emchar char_id = decode_defined_char (ccs, code_point);
if (char_id < 0)
char_id = DECODE_CHAR (Vcharset_chinese_big5, code_point);
unsigned int cpos = str->cpos;
eol_type_t eol_type = str->eol_type;
unsigned char counter = str->counter;
- unsigned char er_counter = str->er_counter;
while (n--)
{
unsigned char c = *(unsigned char *)src++;
if (counter == 0)
{
- if ( c < 0xC0 )
+ if ( c < ' ' )
{
- if (!CODING_SYSTEM_USE_ENTITY_REFERENCE (str->codesys))
- {
- DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
- DECODE_ADD_UCS_CHAR (c, dst);
- }
- else if (er_counter == 0)
- {
- if (c == '&')
- {
- str->er_buf[0] = '&';
- er_counter++;
- }
- else
- {
- DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
- DECODE_ADD_UCS_CHAR (c, dst);
- }
- }
- else if (c == ';')
- {
- Lisp_Object string = make_string (str->er_buf, er_counter);
- Lisp_Object rest = Vcoded_charset_entity_reference_alist;
- Lisp_Object cell;
- Lisp_Object ret;
- Lisp_Object pat;
- Lisp_Object ccs;
- int base;
-
- while (!NILP (rest))
- {
- cell = Fcar (rest);
- ccs = Fcar (cell);
- if (NILP (ccs = Ffind_charset (ccs)))
- continue;
-
- cell = Fcdr (cell);
- ret = Fcar (cell);
- if (STRINGP (ret))
- pat = ret;
- else
- continue;
-
- cell = Fcdr (cell);
- cell = Fcdr (cell);
- ret = Fcar (cell);
- if (EQ (ret, Qd))
- {
- pat = concat3 (build_string ("^&"),
- pat, build_string ("\\([0-9]+\\)$"));
- base = 10;
- }
- else if (EQ (ret, Qx))
- {
- pat = concat3 (build_string ("^&"),
- pat,
- build_string ("\\([0-9a-f]+\\)$"));
- base = 16;
- }
- else if (EQ (ret, QX))
- {
- pat = concat3 (build_string ("^&"),
- pat,
- build_string ("\\([0-9A-F]+\\)$"));
- base = 16;
- }
- else
- continue;
-
- if (!NILP (Fstring_match (pat, string, Qnil, Qnil)))
- {
- int code
- = XINT (Fstring_to_number
- (Fsubstring (string,
- Fmatch_beginning
- (make_int (1)),
- Fmatch_end (make_int (1))),
- make_int (base)));
-
- DECODE_ADD_UCS_CHAR (DECODE_CHAR (ccs, code), dst);
- goto decoded;
- }
- rest = Fcdr (rest);
- }
- if (!NILP (Fstring_match
- (build_string ("^&MCS-\\([0-9A-F]+\\)$"),
- string, Qnil, Qnil)))
- {
- int code
- = XINT (Fstring_to_number
- (Fsubstring (string,
- Fmatch_beginning
- (make_int (1)),
- Fmatch_end (make_int (1))),
- make_int (16)));
-
- DECODE_ADD_UCS_CHAR (code, dst);
- }
- else
- {
- Dynarr_add_many (dst, str->er_buf, er_counter);
- Dynarr_add (dst, ';');
- }
- decoded:
- er_counter = 0;
- }
- else if ( (er_counter >= 16) || (c <= ' ') || (c >= 0x7F) )
- {
- Dynarr_add_many (dst, str->er_buf, er_counter);
- er_counter = 0;
- DECODE_ADD_UCS_CHAR (c, dst);
- }
- else
- str->er_buf[er_counter++] = c;
+ COMPOSE_FLUSH_CHARS (str, dst);
+ decode_flush_er_chars (str, dst);
+ DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
+ DECODE_ADD_UCS_CHAR (c, dst);
}
+ else if ( c < 0xC0 )
+ /* decode_add_er_char (str, c, dst); */
+ COMPOSE_ADD_CHAR (str, c, dst);
else
{
- Dynarr_add_many (dst, str->er_buf, er_counter);
- er_counter = 0;
+ /* decode_flush_er_chars (str, dst); */
if ( c < 0xE0 )
{
cpos = c & 0x1f;
cpos = ( cpos << 6 ) | ( c & 0x3f );
if (counter == 1)
{
- DECODE_ADD_UCS_CHAR (cpos, dst);
+ /* DECODE_ADD_UCS_CHAR (cpos, dst); */
+ COMPOSE_ADD_CHAR (str, cpos, dst);
cpos = 0;
counter = 0;
}
}
else
{
+ COMPOSE_FLUSH_CHARS (str, dst);
+ decode_flush_er_chars (str, dst);
decode_output_utf8_partial_char (counter, cpos, dst);
DECODE_ADD_BINARY_CHAR (c, dst);
cpos = 0;
if (flags & CODING_STATE_END)
{
+ COMPOSE_FLUSH_CHARS (str, dst);
+ decode_flush_er_chars (str, dst);
if (counter > 0)
{
decode_output_utf8_partial_char (counter, cpos, dst);
cpos = 0;
counter = 0;
}
- else if ( er_counter > 0)
- {
- Dynarr_add_many (dst, str->er_buf, er_counter);
- er_counter = 0;
- }
}
str->flags = flags;
str->cpos = cpos;
str->counter = counter;
- str->er_counter = er_counter;
}
void
case ISO_ESC_LITERAL:
COMPOSE_FLUSH_CHARS (str, dst);
+ decode_flush_er_chars (str, dst);
DECODE_ADD_BINARY_CHAR (c, dst);
break;
/* Output the (possibly invalid) sequence */
int i;
COMPOSE_FLUSH_CHARS (str, dst);
+ decode_flush_er_chars (str, dst);
for (i = 0; i < str->iso2022.esc_bytes_index; i++)
DECODE_ADD_BINARY_CHAR (str->iso2022.esc_bytes[i], dst);
flags &= CODING_STATE_ISO2022_LOCK;
escape sequence; it could mess things up anyway.
Just add it now. */
COMPOSE_FLUSH_CHARS (str, dst);
+ decode_flush_er_chars (str, dst);
DECODE_ADD_BINARY_CHAR (c, dst);
}
}
if (counter)
{
COMPOSE_FLUSH_CHARS (str, dst);
+ decode_flush_er_chars (str, dst);
while (counter > 0)
{
counter--;
if (flags & CODING_STATE_SS2)
{
COMPOSE_FLUSH_CHARS (str, dst);
+ decode_flush_er_chars (str, dst);
DECODE_ADD_BINARY_CHAR (ISO_CODE_SS2, dst);
flags &= ~CODING_STATE_SS2;
}
if (flags & CODING_STATE_SS3)
{
COMPOSE_FLUSH_CHARS (str, dst);
+ decode_flush_er_chars (str, dst);
DECODE_ADD_BINARY_CHAR (ISO_CODE_SS3, dst);
flags &= ~CODING_STATE_SS3;
}
if (c == '\r')
{
COMPOSE_FLUSH_CHARS (str, dst);
+ decode_flush_er_chars (str, dst);
if (eol_type == EOL_CR)
Dynarr_add (dst, '\n');
else if (eol_type != EOL_CRLF || flags & CODING_STATE_CR)
if (!parse_iso2022_esc (coding_system, &str->iso2022, c, &flags, 1))
{
COMPOSE_FLUSH_CHARS (str, dst);
+ decode_flush_er_chars (str, dst);
DECODE_ADD_BINARY_CHAR (c, dst);
}
}
if (c == '\r')
{
COMPOSE_FLUSH_CHARS (str, dst);
+ decode_flush_er_chars (str, dst);
if (eol_type == EOL_CR)
Dynarr_add (dst, '\n');
else if (eol_type != EOL_CRLF || flags & CODING_STATE_CR)
to preserve it for the output. */
{
COMPOSE_FLUSH_CHARS (str, dst);
+ decode_flush_er_chars (str, dst);
while (counter > 0)
{
counter--;
if (flags & CODING_STATE_END)
{
COMPOSE_FLUSH_CHARS (str, dst);
+ decode_flush_er_chars (str, dst);
DECODE_OUTPUT_PARTIAL_CHAR (cpos);
}
str->flags = flags;