/* Code conversion functions.
Copyright (C) 1991, 1995 Free Software Foundation, Inc.
Copyright (C) 1995 Sun Microsystems, Inc.
+ Copyright (C) 1999,2000 MORIOKA Tomohiko
This file is part of XEmacs.
Lisp_Object Qctext, Qescape_quoted;
Lisp_Object Qshort, Qno_ascii_eol, Qno_ascii_cntl, Qseven, Qlock_shift;
#endif
+#ifdef UTF2000
+Lisp_Object Qdisable_composition;
+#endif
Lisp_Object Qencode, Qdecode;
Lisp_Object Vcoding_system_hash_table;
signal_simple_error ("Invalid charset conversion spec", car);
from = Fget_charset (XCAR (car));
to = Fget_charset (XCAR (XCDR (car)));
- if (XCHARSET_TYPE (from) != XCHARSET_TYPE (to))
+ if ( (XCHARSET_CHARS (from) != XCHARSET_CHARS (to)) ||
+ (XCHARSET_DIMENSION (from) != XCHARSET_DIMENSION (to)) )
signal_simple_error_2
("Attempted conversion between different charset types",
from, to);
converted to nil when stored internally, and
`coding-system-property' will return nil.)
+'disable-composition
+ If non-nil, composition/decomposition for combining characters
+ are disabled.
+
'post-read-conversion
Function called after a file has been read in, to perform the
decoding. Called with two arguments, BEG and END, denoting
CODING_SYSTEM_EOL_TYPE (codesys) = symbol_to_eol_type (value);
}
- else if (EQ (key, Qpost_read_conversion)) CODING_SYSTEM_POST_READ_CONVERSION (codesys) = value;
- else if (EQ (key, Qpre_write_conversion)) CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys) = value;
+ else if (EQ (key, Qpost_read_conversion))
+ CODING_SYSTEM_POST_READ_CONVERSION (codesys) = value;
+ else if (EQ (key, Qpre_write_conversion))
+ CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys) = value;
+#ifdef UTF2000
+ else if (EQ (key, Qdisable_composition))
+ CODING_SYSTEM_DISABLE_COMPOSITION (codesys) = !NILP (value);
+#endif
#ifdef MULE
else if (ty == CODESYS_ISO2022)
{
/* counter for UTF-8 or UCS-4 */
unsigned char counter;
#endif
+#ifdef UTF2000
+ unsigned combined_char_count;
+ Emchar combined_chars[16];
+ Lisp_Object combining_table;
+#endif
struct detection_state decst;
};
+#ifdef UTF2000
+extern Lisp_Object Vcharacter_composition_table;
+
+INLINE void
+COMPOSE_FLUSH_CHARS (struct decoding_stream *str, unsigned_char_dynarr* dst)
+{
+ unsigned i;
+
+ for (i = 0; i < str->combined_char_count; i++)
+ DECODE_ADD_UCS_CHAR (str->combined_chars[i], dst);
+ str->combined_char_count = 0;
+ str->combining_table = Qnil;
+}
+
+void
+COMPOSE_ADD_CHAR(struct decoding_stream *str,
+ Emchar character, unsigned_char_dynarr* dst)
+{
+ if (CODING_SYSTEM_DISABLE_COMPOSITION (str->codesys))
+ DECODE_ADD_UCS_CHAR (character, dst);
+ else if (!CHAR_CODE_TABLE_P (str->combining_table))
+ {
+ Lisp_Object ret
+ = get_char_code_table (character, Vcharacter_composition_table);
+
+ if (NILP (ret))
+ DECODE_ADD_UCS_CHAR (character, dst);
+ else
+ {
+ str->combined_chars[0] = character;
+ str->combined_char_count = 1;
+ str->combining_table = ret;
+ }
+ }
+ else
+ {
+ Lisp_Object ret
+ = get_char_code_table (character, str->combining_table);
+
+ if (CHARP (ret))
+ {
+ Emchar char2 = XCHARVAL (ret);
+ ret = get_char_code_table (char2, Vcharacter_composition_table);
+ if (NILP (ret))
+ {
+ DECODE_ADD_UCS_CHAR (char2, dst);
+ str->combined_char_count = 0;
+ str->combining_table = Qnil;
+ }
+ else
+ {
+ str->combined_chars[0] = char2;
+ str->combined_char_count = 1;
+ str->combining_table = ret;
+ }
+ }
+ else if (CHAR_CODE_TABLE_P (ret))
+ {
+ str->combined_chars[str->combined_char_count++] = character;
+ str->combining_table = ret;
+ }
+ else
+ {
+ COMPOSE_FLUSH_CHARS (str, dst);
+ DECODE_ADD_UCS_CHAR (character, dst);
+ }
+ }
+}
+#else /* not UTF2000 */
+#define COMPOSE_FLUSH_CHARS(str, dst)
+#define COMPOSE_ADD_CHAR(str, ch, dst) DECODE_ADD_UCS_CHAR (ch, dst)
+#endif /* UTF2000 */
+
static ssize_t decoding_reader (Lstream *stream,
unsigned char *data, size_t size);
static ssize_t decoding_writer (Lstream *stream,
}
str->counter = 0;
#endif /* MULE */
+#ifdef UTF2000
+ str->combined_char_count = 0;
+ str->combining_table = Qnil;
+#endif
str->flags = str->ch = 0;
}
Lisp_Object charset;
unsigned int c1, c2, s1, s2;
#ifdef UTF2000
- Lisp_Object value = charset_code_point (Vcharset_latin_jisx0201, ch);
- Lisp_Object ret = Fcar (value);
+ int code_point = charset_code_point (Vcharset_latin_jisx0201, ch);
- if (INTP (ret))
+ if (code_point >= 0)
{
charset = Vcharset_latin_jisx0201;
- c1 = XINT (ret);
+ c1 = code_point;
c2 = 0;
}
else
#endif /* ENABLE_COMPOSITE_CHARS */
case ISO_ESC_LITERAL:
+ COMPOSE_FLUSH_CHARS (str, dst);
DECODE_ADD_BINARY_CHAR (c, dst);
break;
{
/* Output the (possibly invalid) sequence */
int i;
+ COMPOSE_FLUSH_CHARS (str, dst);
for (i = 0; i < str->iso2022.esc_bytes_index; i++)
DECODE_ADD_BINARY_CHAR (str->iso2022.esc_bytes[i], dst);
flags &= CODING_STATE_ISO2022_LOCK;
/* No sense in reprocessing the final byte of the
escape sequence; it could mess things up anyway.
Just add it now. */
+ COMPOSE_FLUSH_CHARS (str, dst);
DECODE_ADD_BINARY_CHAR (c, dst);
}
}
/* If we were in the middle of a character, dump out the
partial character. */
- DECODE_OUTPUT_PARTIAL_CHAR (ch);
+ if (ch)
+ {
+ COMPOSE_FLUSH_CHARS (str, dst);
+ DECODE_ADD_BINARY_CHAR (ch, dst);
+ ch = 0;
+ }
/* If we just saw a single-shift character, dump it out.
This may dump out the wrong sort of single-shift character,
wrong. */
if (flags & CODING_STATE_SS2)
{
+ COMPOSE_FLUSH_CHARS (str, dst);
DECODE_ADD_BINARY_CHAR (ISO_CODE_SS2, dst);
flags &= ~CODING_STATE_SS2;
}
if (flags & CODING_STATE_SS3)
{
+ COMPOSE_FLUSH_CHARS (str, dst);
DECODE_ADD_BINARY_CHAR (ISO_CODE_SS3, dst);
flags &= ~CODING_STATE_SS3;
}
/***** Now handle the control characters. *****/
/* Handle CR/LF */
+#ifdef UTF2000
+ if (c == '\r')
+ {
+ COMPOSE_FLUSH_CHARS (str, dst);
+ if (eol_type == EOL_CR)
+ Dynarr_add (dst, '\n');
+ else if (eol_type != EOL_CRLF || flags & CODING_STATE_CR)
+ Dynarr_add (dst, c);
+ else
+ flags |= CODING_STATE_CR;
+ goto label_continue_loop;
+ }
+ else if (flags & CODING_STATE_CR)
+ { /* eol_type == CODING_SYSTEM_EOL_CRLF */
+ if (c != '\n')
+ Dynarr_add (dst, '\r');
+ flags &= ~CODING_STATE_CR;
+ }
+#else
DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
+#endif
flags &= CODING_STATE_ISO2022_LOCK;
if (!parse_iso2022_esc (coding_system, &str->iso2022, c, &flags, 1))
- DECODE_ADD_BINARY_CHAR (c, dst);
+ {
+ COMPOSE_FLUSH_CHARS (str, dst);
+ DECODE_ADD_BINARY_CHAR (c, dst);
+ }
}
else
{ /* Graphic characters */
#endif
int reg;
+#ifdef UTF2000
+ if (c == '\r')
+ {
+ COMPOSE_FLUSH_CHARS (str, dst);
+ if (eol_type == EOL_CR)
+ Dynarr_add (dst, '\n');
+ else if (eol_type != EOL_CRLF || flags & CODING_STATE_CR)
+ Dynarr_add (dst, c);
+ else
+ flags |= CODING_STATE_CR;
+ goto label_continue_loop;
+ }
+ else if (flags & CODING_STATE_CR)
+ { /* eol_type == CODING_SYSTEM_EOL_CRLF */
+ if (c != '\n')
+ Dynarr_add (dst, '\r');
+ flags &= ~CODING_STATE_CR;
+ }
+#else
DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
+#endif
/* Now determine the charset. */
reg = ((flags & CODING_STATE_SS2) ? 2
outside the range of the charset. Insert that char literally
to preserve it for the output. */
{
+ COMPOSE_FLUSH_CHARS (str, dst);
DECODE_OUTPUT_PARTIAL_CHAR (ch);
DECODE_ADD_BINARY_CHAR (c, dst);
}
#ifdef UTF2000
if (XCHARSET_DIMENSION (charset) == 1)
{
- DECODE_OUTPUT_PARTIAL_CHAR (ch);
- DECODE_ADD_UCS_CHAR
- (MAKE_CHAR (charset, c & 0x7F, 0), dst);
+ if (ch)
+ {
+ COMPOSE_FLUSH_CHARS (str, dst);
+ DECODE_ADD_BINARY_CHAR (ch, dst);
+ ch = 0;
+ }
+ COMPOSE_ADD_CHAR (str,
+ MAKE_CHAR (charset, c & 0x7F, 0), dst);
}
else if (ch)
{
- DECODE_ADD_UCS_CHAR
- (MAKE_CHAR (charset, ch & 0x7F, c & 0x7F), dst);
+ COMPOSE_ADD_CHAR (str,
+ MAKE_CHAR (charset, ch & 0x7F, c & 0x7F),
+ dst);
ch = 0;
}
else
}
if (flags & CODING_STATE_END)
- DECODE_OUTPUT_PARTIAL_CHAR (ch);
-
+ {
+ COMPOSE_FLUSH_CHARS (str, dst);
+ DECODE_OUTPUT_PARTIAL_CHAR (ch);
+ }
str->flags = flags;
str->ch = ch;
}
{
static CONST char inter94[] = "()*+";
static CONST char inter96[] = ",-./";
- unsigned int type;
+ unsigned short chars;
+ unsigned char dimension;
unsigned char final;
Lisp_Object old_charset = str->iso2022.charset[reg];
if (!CHARSETP (charset))
/* charset might be an initial nil or t. */
return;
- type = XCHARSET_TYPE (charset);
+ chars = XCHARSET_CHARS (charset);
+ dimension = XCHARSET_DIMENSION (charset);
final = XCHARSET_FINAL (charset);
if (!str->iso2022.force_charset_on_output[reg] &&
CHARSETP (old_charset) &&
- XCHARSET_TYPE (old_charset) == type &&
+ XCHARSET_CHARS (old_charset) == chars &&
+ XCHARSET_DIMENSION (old_charset) == dimension &&
XCHARSET_FINAL (old_charset) == final)
return;
}
Dynarr_add (dst, ISO_CODE_ESC);
- switch (type)
+ switch (chars)
{
- case CHARSET_TYPE_94:
- Dynarr_add (dst, inter94[reg]);
- break;
- case CHARSET_TYPE_96:
- Dynarr_add (dst, inter96[reg]);
- break;
- case CHARSET_TYPE_94X94:
- Dynarr_add (dst, '$');
- if (reg != 0
- || !(CODING_SYSTEM_ISO2022_SHORT (str->codesys))
- || final < '@'
- || final > 'B')
+ case 94:
+ if (dimension == 1)
Dynarr_add (dst, inter94[reg]);
+ else
+ {
+ Dynarr_add (dst, '$');
+ if (reg != 0
+ || !(CODING_SYSTEM_ISO2022_SHORT (str->codesys))
+ || final < '@'
+ || final > 'B')
+ Dynarr_add (dst, inter94[reg]);
+ }
break;
- case CHARSET_TYPE_96X96:
- Dynarr_add (dst, '$');
- Dynarr_add (dst, inter96[reg]);
+ case 96:
+ if (dimension == 1)
+ Dynarr_add (dst, inter96[reg]);
+ else
+ {
+ Dynarr_add (dst, '$');
+ Dynarr_add (dst, inter96[reg]);
+ }
break;
}
Dynarr_add (dst, final);
reg = -1;
for (i = 0; i < 4; i++)
{
- Lisp_Object code_point;
+ int code_point;
if ((CHARSETP (charset = str->iso2022.charset[i])
- && !EQ (code_point = charset_code_point (charset, ch), Qnil))
+ && ((code_point = charset_code_point (charset, ch)) >= 0))
||
(CHARSETP
(charset
= CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i))
- && !EQ (code_point = charset_code_point (charset, ch), Qnil)))
+ && ((code_point = charset_code_point (charset, ch)) >= 0)))
{
- Lisp_Object ret = Fcar (code_point);
-
- if (INTP (ret))
+ if (XCHARSET_DIMENSION (charset) == 1)
{
- byte1 = XINT (ret);
- ret = Fcar (Fcdr (code_point));
- if (INTP (ret))
- byte2 = XINT (ret);
- else
- byte2 = 0;
+ byte1 = code_point;
+ byte2 = 0;
}
- else
+ else /* if (XCHARSET_DIMENSION (charset) == 2) */
{
- byte1 = 0;
- byte2 = 0;
+ byte1 = code_point >> 8;
+ byte2 = code_point & 255;
}
reg = i;
break;
defsymbol (&Qlock_shift, "lock-shift");
defsymbol (&Qescape_quoted, "escape-quoted");
#endif /* MULE */
+#ifdef UTF2000
+ defsymbol (&Qdisable_composition, "disable-composition");
+#endif
defsymbol (&Qencode, "encode");
defsymbol (&Qdecode, "decode");