/* Code conversion functions.
Copyright (C) 1991, 1995 Free Software Foundation, Inc.
Copyright (C) 1995 Sun Microsystems, Inc.
- Copyright (C) 1999,2000 MORIOKA Tomohiko
+ Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
This file is part of XEmacs.
#include "elhash.h"
#include "insdel.h"
#include "lstream.h"
+#include "opaque.h"
#ifdef MULE
#include "mule-ccl.h"
#include "chartab.h"
This describes a permutation of the possible coding categories. */
int coding_category_by_priority[CODING_CATEGORY_LAST + 1];
-#ifndef UTF2000
+#if defined(MULE) && !defined(UTF2000)
Lisp_Object ucs_to_mule_table[65536];
#endif
} *fcd;
static const struct lrecord_description fcd_description_1[] = {
- { XD_LISP_OBJECT, offsetof(struct file_coding_dump, coding_category_system), CODING_CATEGORY_LAST + 1 },
-#ifndef UTF2000
- { XD_LISP_OBJECT, offsetof(struct file_coding_dump, ucs_to_mule_table), 65536 },
+ { XD_LISP_OBJECT_ARRAY, offsetof (struct file_coding_dump, coding_category_system), CODING_CATEGORY_LAST + 1 },
+#if defined(MULE) && !defined(UTF2000)
+ { XD_LISP_OBJECT_ARRAY, offsetof (struct file_coding_dump, ucs_to_mule_table), countof (fcd->ucs_to_mule_table) },
#endif
{ XD_END }
};
static const struct struct_description fcd_description = {
- sizeof(struct file_coding_dump),
+ sizeof (struct file_coding_dump),
fcd_description_1
};
Lisp_Object Qforce_g2_on_output, Qforce_g3_on_output;
Lisp_Object Qno_iso6429;
Lisp_Object Qinput_charset_conversion, Qoutput_charset_conversion;
-Lisp_Object Qctext, Qescape_quoted;
+Lisp_Object Qescape_quoted;
Lisp_Object Qshort, Qno_ascii_eol, Qno_ascii_cntl, Qseven, Qlock_shift;
#endif
+#ifdef UTF2000
+Lisp_Object Qdisable_composition;
+#endif
Lisp_Object Qencode, Qdecode;
Lisp_Object Vcoding_system_hash_table;
struct detection_state;
static void
-text_encode_generic (Lstream *encoding, CONST unsigned char *src,
- unsigned_char_dynarr *dst, unsigned int n);
+text_encode_generic (Lstream *encoding, const Bufbyte *src,
+ unsigned_char_dynarr *dst, size_t n);
static int detect_coding_sjis (struct detection_state *st,
- CONST unsigned char *src,
- unsigned int n);
-static void decode_coding_sjis (Lstream *decoding,
- CONST unsigned char *src,
- unsigned_char_dynarr *dst,
- unsigned int n);
+ const Extbyte *src, size_t n);
+static void decode_coding_sjis (Lstream *decoding, const Extbyte *src,
+ unsigned_char_dynarr *dst, size_t n);
void char_encode_shift_jis (struct encoding_stream *str, Emchar c,
unsigned_char_dynarr *dst, unsigned int *flags);
void char_finish_shift_jis (struct encoding_stream *str,
unsigned_char_dynarr *dst, unsigned int *flags);
static int detect_coding_big5 (struct detection_state *st,
- CONST unsigned char *src,
- unsigned int n);
-static void decode_coding_big5 (Lstream *decoding,
- CONST unsigned char *src,
- unsigned_char_dynarr *dst, unsigned int n);
-static void encode_coding_big5 (Lstream *encoding,
- CONST unsigned char *src,
- unsigned_char_dynarr *dst, unsigned int n);
+ const Extbyte *src, size_t n);
+static void decode_coding_big5 (Lstream *decoding, const Extbyte *src,
+ unsigned_char_dynarr *dst, size_t n);
+void char_encode_big5 (struct encoding_stream *str, Emchar c,
+ unsigned_char_dynarr *dst, unsigned int *flags);
+void char_finish_big5 (struct encoding_stream *str,
+ unsigned_char_dynarr *dst, unsigned int *flags);
+
static int detect_coding_ucs4 (struct detection_state *st,
- CONST unsigned char *src,
- unsigned int n);
-static void decode_coding_ucs4 (Lstream *decoding,
- CONST unsigned char *src,
- unsigned_char_dynarr *dst, unsigned int n);
+ const Extbyte *src, size_t n);
+static void decode_coding_ucs4 (Lstream *decoding, const Extbyte *src,
+ unsigned_char_dynarr *dst, size_t n);
void char_encode_ucs4 (struct encoding_stream *str, Emchar c,
unsigned_char_dynarr *dst, unsigned int *flags);
void char_finish_ucs4 (struct encoding_stream *str,
unsigned_char_dynarr *dst, unsigned int *flags);
static int detect_coding_utf8 (struct detection_state *st,
- CONST unsigned char *src,
- unsigned int n);
-static void decode_coding_utf8 (Lstream *decoding,
- CONST unsigned char *src,
- unsigned_char_dynarr *dst, unsigned int n);
+ const Extbyte *src, size_t n);
+static void decode_coding_utf8 (Lstream *decoding, const Extbyte *src,
+ unsigned_char_dynarr *dst, size_t n);
void char_encode_utf8 (struct encoding_stream *str, Emchar c,
unsigned_char_dynarr *dst, unsigned int *flags);
void char_finish_utf8 (struct encoding_stream *str,
static void reset_iso2022 (Lisp_Object coding_system,
struct iso2022_decoder *iso);
static int detect_coding_iso2022 (struct detection_state *st,
- CONST unsigned char *src,
- unsigned int n);
-static void decode_coding_iso2022 (Lstream *decoding,
- CONST unsigned char *src,
- unsigned_char_dynarr *dst, unsigned int n);
+ const Extbyte *src, size_t n);
+static void decode_coding_iso2022 (Lstream *decoding, const Extbyte *src,
+ unsigned_char_dynarr *dst, size_t n);
void char_encode_iso2022 (struct encoding_stream *str, Emchar c,
unsigned_char_dynarr *dst, unsigned int *flags);
void char_finish_iso2022 (struct encoding_stream *str,
unsigned_char_dynarr *dst, unsigned int *flags);
#endif /* MULE */
-static void decode_coding_no_conversion (Lstream *decoding,
- CONST unsigned char *src,
- unsigned_char_dynarr *dst,
- unsigned int n);
-static void encode_coding_no_conversion (Lstream *encoding,
- CONST unsigned char *src,
- unsigned_char_dynarr *dst,
- unsigned int n);
-static void mule_decode (Lstream *decoding, CONST unsigned char *src,
- unsigned_char_dynarr *dst, unsigned int n);
-static void mule_encode (Lstream *encoding, CONST unsigned char *src,
- unsigned_char_dynarr *dst, unsigned int n);
+static void decode_coding_no_conversion (Lstream *decoding, const Extbyte *src,
+ unsigned_char_dynarr *dst, size_t n);
+static void encode_coding_no_conversion (Lstream *encoding, const Bufbyte *src,
+ unsigned_char_dynarr *dst, size_t n);
+static void mule_decode (Lstream *decoding, const Extbyte *src,
+ unsigned_char_dynarr *dst, size_t n);
+static void mule_encode (Lstream *encoding, const Bufbyte *src,
+ unsigned_char_dynarr *dst, size_t n);
typedef struct codesys_prop codesys_prop;
struct codesys_prop
} codesys_prop_dynarr;
static const struct lrecord_description codesys_prop_description_1[] = {
- { XD_LISP_OBJECT, offsetof(codesys_prop, sym), 1 },
+ { XD_LISP_OBJECT, offsetof (codesys_prop, sym) },
{ XD_END }
};
static const struct struct_description codesys_prop_description = {
- sizeof(codesys_prop),
+ sizeof (codesys_prop),
codesys_prop_description_1
};
static const struct lrecord_description codesys_prop_dynarr_description_1[] = {
- XD_DYNARR_DESC(codesys_prop_dynarr, &codesys_prop_description),
+ XD_DYNARR_DESC (codesys_prop_dynarr, &codesys_prop_description),
{ XD_END }
};
static const struct struct_description codesys_prop_dynarr_description = {
- sizeof(codesys_prop_dynarr),
+ sizeof (codesys_prop_dynarr),
codesys_prop_dynarr_description_1
};
#ifdef MULE
static const struct lrecord_description ccs_description_1[] = {
- { XD_LISP_OBJECT, offsetof(charset_conversion_spec, from_charset), 2 },
+ { XD_LISP_OBJECT, offsetof (charset_conversion_spec, from_charset) },
+ { XD_LISP_OBJECT, offsetof (charset_conversion_spec, to_charset) },
{ XD_END }
};
static const struct struct_description ccs_description = {
- sizeof(charset_conversion_spec),
+ sizeof (charset_conversion_spec),
ccs_description_1
};
static const struct lrecord_description ccsd_description_1[] = {
- XD_DYNARR_DESC(charset_conversion_spec_dynarr, &ccs_description),
+ XD_DYNARR_DESC (charset_conversion_spec_dynarr, &ccs_description),
{ XD_END }
};
static const struct struct_description ccsd_description = {
- sizeof(charset_conversion_spec_dynarr),
+ sizeof (charset_conversion_spec_dynarr),
ccsd_description_1
};
#endif
static const struct lrecord_description coding_system_description[] = {
- { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, name), 2 },
- { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, mnemonic), 3 },
- { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, eol_lf), 3 },
+ { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, name) },
+ { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, doc_string) },
+ { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, mnemonic) },
+ { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, post_read_conversion) },
+ { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, pre_write_conversion) },
+ { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, eol_lf) },
+ { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, eol_crlf) },
+ { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, eol_cr) },
#ifdef MULE
- { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, iso2022.initial_charset), 4 },
- { XD_STRUCT_PTR, offsetof(struct Lisp_Coding_System, iso2022.input_conv), 1, &ccsd_description },
- { XD_STRUCT_PTR, offsetof(struct Lisp_Coding_System, iso2022.output_conv), 1, &ccsd_description },
- { XD_LISP_OBJECT, offsetof(struct Lisp_Coding_System, ccl.decode), 2 },
+ { XD_LISP_OBJECT_ARRAY, offsetof (Lisp_Coding_System, iso2022.initial_charset), 4 },
+ { XD_STRUCT_PTR, offsetof (Lisp_Coding_System, iso2022.input_conv), 1, &ccsd_description },
+ { XD_STRUCT_PTR, offsetof (Lisp_Coding_System, iso2022.output_conv), 1, &ccsd_description },
+ { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, ccl.decode) },
+ { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, ccl.encode) },
#endif
{ XD_END }
};
mark_coding_system, print_coding_system,
finalize_coding_system,
0, 0, coding_system_description,
- struct Lisp_Coding_System);
+ Lisp_Coding_System);
static Lisp_Object
mark_coding_system (Lisp_Object obj)
}
}
-static enum eol_type
+static eol_type_t
symbol_to_eol_type (Lisp_Object symbol)
{
CHECK_SYMBOL (symbol);
}
static Lisp_Object
-eol_type_to_symbol (enum eol_type type)
+eol_type_to_symbol (eol_type_t type)
{
switch (type)
{
*/
(coding_system_or_name))
{
- if (CODING_SYSTEMP (coding_system_or_name))
- return coding_system_or_name;
-
if (NILP (coding_system_or_name))
coding_system_or_name = Qbinary;
+ else if (CODING_SYSTEMP (coding_system_or_name))
+ return coding_system_or_name;
else
CHECK_SYMBOL (coding_system_or_name);
- return Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil);
+ while (1)
+ {
+ coding_system_or_name =
+ Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil);
+
+ if (CODING_SYSTEMP (coding_system_or_name) || NILP (coding_system_or_name))
+ return coding_system_or_name;
+ }
}
DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /*
JIS (the Japanese encoding commonly used for e-mail), EUC (the
standard Unix encoding for Japanese and other languages), and
Compound Text (the encoding used in X11). You can specify more
- specific information about the conversion with the FLAGS argument.
+ specific information about the conversion with the PROPS argument.
'big5
Big5 (the encoding commonly used for Taiwanese).
'ccl
converted to nil when stored internally, and
`coding-system-property' will return nil.)
+'disable-composition
+ If non-nil, composition/decomposition for combining characters
+ are disabled.
+
'post-read-conversion
Function called after a file has been read in, to perform the
- decoding. Called with two arguments, BEG and END, denoting
+ decoding. Called with two arguments, START and END, denoting
a region of the current buffer to be decoded.
'pre-write-conversion
Function called before a file is written out, to perform the
- encoding. Called with two arguments, BEG and END, denoting
+ encoding. Called with two arguments, START and END, denoting
a region of the current buffer to be encoded.
(name, type, doc_string, props))
{
Lisp_Coding_System *codesys;
- Lisp_Object rest, key, value;
enum coding_system_type ty;
int need_to_setup_eol_systems = 1;
CHECK_STRING (doc_string);
CODING_SYSTEM_DOC_STRING (codesys) = doc_string;
- EXTERNAL_PROPERTY_LIST_LOOP (rest, key, value, props)
- {
- if (EQ (key, Qmnemonic))
- {
- if (!NILP (value))
- CHECK_STRING (value);
- CODING_SYSTEM_MNEMONIC (codesys) = value;
- }
+ {
+ EXTERNAL_PROPERTY_LIST_LOOP_3 (key, value, props)
+ {
+ if (EQ (key, Qmnemonic))
+ {
+ if (!NILP (value))
+ CHECK_STRING (value);
+ CODING_SYSTEM_MNEMONIC (codesys) = value;
+ }
- else if (EQ (key, Qeol_type))
- {
- need_to_setup_eol_systems = NILP (value);
- if (EQ (value, Qt))
- value = Qnil;
- CODING_SYSTEM_EOL_TYPE (codesys) = symbol_to_eol_type (value);
- }
+ else if (EQ (key, Qeol_type))
+ {
+ need_to_setup_eol_systems = NILP (value);
+ if (EQ (value, Qt))
+ value = Qnil;
+ CODING_SYSTEM_EOL_TYPE (codesys) = symbol_to_eol_type (value);
+ }
- else if (EQ (key, Qpost_read_conversion)) CODING_SYSTEM_POST_READ_CONVERSION (codesys) = value;
- else if (EQ (key, Qpre_write_conversion)) CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys) = value;
+ else if (EQ (key, Qpost_read_conversion))
+ CODING_SYSTEM_POST_READ_CONVERSION (codesys) = value;
+ else if (EQ (key, Qpre_write_conversion))
+ CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys) = value;
+#ifdef UTF2000
+ else if (EQ (key, Qdisable_composition))
+ CODING_SYSTEM_DISABLE_COMPOSITION (codesys) = !NILP (value);
+#endif
#ifdef MULE
- else if (ty == CODESYS_ISO2022)
- {
+ else if (ty == CODESYS_ISO2022)
+ {
#define FROB_INITIAL_CHARSET(charset_num) \
CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, charset_num) = \
((EQ (value, Qt) || EQ (value, Qnil)) ? value : Fget_charset (value))
- if (EQ (key, Qcharset_g0)) FROB_INITIAL_CHARSET (0);
- else if (EQ (key, Qcharset_g1)) FROB_INITIAL_CHARSET (1);
- else if (EQ (key, Qcharset_g2)) FROB_INITIAL_CHARSET (2);
- else if (EQ (key, Qcharset_g3)) FROB_INITIAL_CHARSET (3);
+ if (EQ (key, Qcharset_g0)) FROB_INITIAL_CHARSET (0);
+ else if (EQ (key, Qcharset_g1)) FROB_INITIAL_CHARSET (1);
+ else if (EQ (key, Qcharset_g2)) FROB_INITIAL_CHARSET (2);
+ else if (EQ (key, Qcharset_g3)) FROB_INITIAL_CHARSET (3);
#define FROB_FORCE_CHARSET(charset_num) \
CODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (codesys, charset_num) = !NILP (value)
- else if (EQ (key, Qforce_g0_on_output)) FROB_FORCE_CHARSET (0);
- else if (EQ (key, Qforce_g1_on_output)) FROB_FORCE_CHARSET (1);
- else if (EQ (key, Qforce_g2_on_output)) FROB_FORCE_CHARSET (2);
- else if (EQ (key, Qforce_g3_on_output)) FROB_FORCE_CHARSET (3);
+ else if (EQ (key, Qforce_g0_on_output)) FROB_FORCE_CHARSET (0);
+ else if (EQ (key, Qforce_g1_on_output)) FROB_FORCE_CHARSET (1);
+ else if (EQ (key, Qforce_g2_on_output)) FROB_FORCE_CHARSET (2);
+ else if (EQ (key, Qforce_g3_on_output)) FROB_FORCE_CHARSET (3);
#define FROB_BOOLEAN_PROPERTY(prop) \
CODING_SYSTEM_ISO2022_##prop (codesys) = !NILP (value)
- else if (EQ (key, Qshort)) FROB_BOOLEAN_PROPERTY (SHORT);
- else if (EQ (key, Qno_ascii_eol)) FROB_BOOLEAN_PROPERTY (NO_ASCII_EOL);
- else if (EQ (key, Qno_ascii_cntl)) FROB_BOOLEAN_PROPERTY (NO_ASCII_CNTL);
- else if (EQ (key, Qseven)) FROB_BOOLEAN_PROPERTY (SEVEN);
- else if (EQ (key, Qlock_shift)) FROB_BOOLEAN_PROPERTY (LOCK_SHIFT);
- else if (EQ (key, Qno_iso6429)) FROB_BOOLEAN_PROPERTY (NO_ISO6429);
- else if (EQ (key, Qescape_quoted)) FROB_BOOLEAN_PROPERTY (ESCAPE_QUOTED);
+ else if (EQ (key, Qshort)) FROB_BOOLEAN_PROPERTY (SHORT);
+ else if (EQ (key, Qno_ascii_eol)) FROB_BOOLEAN_PROPERTY (NO_ASCII_EOL);
+ else if (EQ (key, Qno_ascii_cntl)) FROB_BOOLEAN_PROPERTY (NO_ASCII_CNTL);
+ else if (EQ (key, Qseven)) FROB_BOOLEAN_PROPERTY (SEVEN);
+ else if (EQ (key, Qlock_shift)) FROB_BOOLEAN_PROPERTY (LOCK_SHIFT);
+ else if (EQ (key, Qno_iso6429)) FROB_BOOLEAN_PROPERTY (NO_ISO6429);
+ else if (EQ (key, Qescape_quoted)) FROB_BOOLEAN_PROPERTY (ESCAPE_QUOTED);
- else if (EQ (key, Qinput_charset_conversion))
- {
- codesys->iso2022.input_conv =
- Dynarr_new (charset_conversion_spec);
- parse_charset_conversion_specs (codesys->iso2022.input_conv,
- value);
- }
- else if (EQ (key, Qoutput_charset_conversion))
- {
- codesys->iso2022.output_conv =
- Dynarr_new (charset_conversion_spec);
- parse_charset_conversion_specs (codesys->iso2022.output_conv,
- value);
- }
- else
- signal_simple_error ("Unrecognized property", key);
- }
- else if (EQ (type, Qccl))
- {
- if (EQ (key, Qdecode))
- {
- CHECK_VECTOR (value);
- CODING_SYSTEM_CCL_DECODE (codesys) = value;
- }
- else if (EQ (key, Qencode))
- {
- CHECK_VECTOR (value);
- CODING_SYSTEM_CCL_ENCODE (codesys) = value;
- }
- else
- signal_simple_error ("Unrecognized property", key);
- }
+ else if (EQ (key, Qinput_charset_conversion))
+ {
+ codesys->iso2022.input_conv =
+ Dynarr_new (charset_conversion_spec);
+ parse_charset_conversion_specs (codesys->iso2022.input_conv,
+ value);
+ }
+ else if (EQ (key, Qoutput_charset_conversion))
+ {
+ codesys->iso2022.output_conv =
+ Dynarr_new (charset_conversion_spec);
+ parse_charset_conversion_specs (codesys->iso2022.output_conv,
+ value);
+ }
+ else
+ signal_simple_error ("Unrecognized property", key);
+ }
+ else if (EQ (type, Qccl))
+ {
+ Lisp_Object sym;
+ struct ccl_program test_ccl;
+ Extbyte *suffix;
+
+ /* Check key first. */
+ if (EQ (key, Qdecode))
+ suffix = "-ccl-decode";
+ else if (EQ (key, Qencode))
+ suffix = "-ccl-encode";
+ else
+ signal_simple_error ("Unrecognized property", key);
+
+ /* If value is vector, register it as a ccl program
+ associated with an newly created symbol for
+ backward compatibility. */
+ if (VECTORP (value))
+ {
+ sym = Fintern (concat2 (Fsymbol_name (name),
+ build_string (suffix)),
+ Qnil);
+ Fregister_ccl_program (sym, value);
+ }
+ else
+ {
+ CHECK_SYMBOL (value);
+ sym = value;
+ }
+ /* check if the given ccl programs are valid. */
+ if (setup_ccl_program (&test_ccl, sym) < 0)
+ signal_simple_error ("Invalid CCL program", value);
+
+ if (EQ (key, Qdecode))
+ CODING_SYSTEM_CCL_DECODE (codesys) = sym;
+ else if (EQ (key, Qencode))
+ CODING_SYSTEM_CCL_ENCODE (codesys) = sym;
+
+ }
#endif /* MULE */
- else
- signal_simple_error ("Unrecognized property", key);
- }
+ else
+ signal_simple_error ("Unrecognized property", key);
+ }
+ }
if (need_to_setup_eol_systems)
setup_eol_coding_systems (codesys);
return new_coding_system;
}
+DEFUN ("coding-system-canonical-name-p", Fcoding_system_canonical_name_p, 1, 1, 0, /*
+Return t if OBJECT names a coding system, and is not a coding system alias.
+*/
+ (object))
+{
+ return CODING_SYSTEMP (Fgethash (object, Vcoding_system_hash_table, Qnil))
+ ? Qt : Qnil;
+}
+
+DEFUN ("coding-system-alias-p", Fcoding_system_alias_p, 1, 1, 0, /*
+Return t if OBJECT is a coding system alias.
+All coding system aliases are created by `define-coding-system-alias'.
+*/
+ (object))
+{
+ return SYMBOLP (Fgethash (object, Vcoding_system_hash_table, Qzero))
+ ? Qt : Qnil;
+}
+
+DEFUN ("coding-system-aliasee", Fcoding_system_aliasee, 1, 1, 0, /*
+Return the coding-system symbol for which symbol ALIAS is an alias.
+*/
+ (alias))
+{
+ Lisp_Object aliasee = Fgethash (alias, Vcoding_system_hash_table, Qnil);
+ if (SYMBOLP (aliasee))
+ return aliasee;
+ else
+ signal_simple_error ("Symbol is not a coding system alias", alias);
+ return Qnil; /* To keep the compiler happy */
+}
+
+static Lisp_Object
+append_suffix_to_symbol (Lisp_Object symbol, const char *ascii_string)
+{
+ return Fintern (concat2 (Fsymbol_name (symbol), build_string (ascii_string)),
+ Qnil);
+}
+
+/* A maphash function, for removing dangling coding system aliases. */
+static int
+dangling_coding_system_alias_p (Lisp_Object alias,
+ Lisp_Object aliasee,
+ void *dangling_aliases)
+{
+ if (SYMBOLP (aliasee)
+ && NILP (Fgethash (aliasee, Vcoding_system_hash_table, Qnil)))
+ {
+ (*(int *) dangling_aliases)++;
+ return 1;
+ }
+ else
+ return 0;
+}
+
DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias, 2, 2, 0, /*
-Define symbol ALIAS as an alias for coding system CODING-SYSTEM.
+Define symbol ALIAS as an alias for coding system ALIASEE.
+
+You can use this function to redefine an alias that has already been defined,
+but you cannot redefine a name which is the canonical name for a coding system.
+\(a canonical name of a coding system is what is returned when you call
+`coding-system-name' on a coding system).
+
+ALIASEE itself can be an alias, which allows you to define nested aliases.
+
+You are forbidden, however, from creating alias loops or `dangling' aliases.
+These will be detected, and an error will be signaled if you attempt to do so.
+
+If ALIASEE is nil, then ALIAS will simply be undefined.
+
+See also `coding-system-alias-p', `coding-system-aliasee',
+and `coding-system-canonical-name-p'.
*/
- (alias, coding_system))
+ (alias, aliasee))
{
+ Lisp_Object real_coding_system, probe;
+
CHECK_SYMBOL (alias);
- if (!NILP (Ffind_coding_system (alias)))
- signal_simple_error ("Symbol already names a coding system", alias);
- coding_system = Fget_coding_system (coding_system);
- Fputhash (alias, coding_system, Vcoding_system_hash_table);
- /* Set up aliases for subsidiaries. */
- if (XCODING_SYSTEM_EOL_TYPE (coding_system) == EOL_AUTODETECT)
+ if (!NILP (Fcoding_system_canonical_name_p (alias)))
+ signal_simple_error
+ ("Symbol is the canonical name of a coding system and cannot be redefined",
+ alias);
+
+ if (NILP (aliasee))
{
- Lisp_Object str;
- XSETSTRING (str, symbol_name (XSYMBOL (alias)));
-#define FROB(type, name) \
- do { \
- Lisp_Object subsidiary = XCODING_SYSTEM_EOL_##type (coding_system); \
- if (!NILP (subsidiary)) \
- Fdefine_coding_system_alias \
- (Fintern (concat2 (str, build_string (name)), Qnil), subsidiary); \
- } while (0)
- FROB (LF, "-unix");
- FROB (CRLF, "-dos");
- FROB (CR, "-mac");
-#undef FROB
+ Lisp_Object subsidiary_unix = append_suffix_to_symbol (alias, "-unix");
+ Lisp_Object subsidiary_dos = append_suffix_to_symbol (alias, "-dos");
+ Lisp_Object subsidiary_mac = append_suffix_to_symbol (alias, "-mac");
+
+ Fremhash (alias, Vcoding_system_hash_table);
+
+ /* Undefine subsidiary aliases,
+ presumably created by a previous call to this function */
+ if (! NILP (Fcoding_system_alias_p (subsidiary_unix)) &&
+ ! NILP (Fcoding_system_alias_p (subsidiary_dos)) &&
+ ! NILP (Fcoding_system_alias_p (subsidiary_mac)))
+ {
+ Fdefine_coding_system_alias (subsidiary_unix, Qnil);
+ Fdefine_coding_system_alias (subsidiary_dos, Qnil);
+ Fdefine_coding_system_alias (subsidiary_mac, Qnil);
+ }
+
+ /* Undefine dangling coding system aliases. */
+ {
+ int dangling_aliases;
+
+ do {
+ dangling_aliases = 0;
+ elisp_map_remhash (dangling_coding_system_alias_p,
+ Vcoding_system_hash_table,
+ &dangling_aliases);
+ } while (dangling_aliases > 0);
+ }
+
+ return Qnil;
}
+
+ if (CODING_SYSTEMP (aliasee))
+ aliasee = XCODING_SYSTEM_NAME (aliasee);
+
+ /* Checks that aliasee names a coding-system */
+ real_coding_system = Fget_coding_system (aliasee);
+
+ /* Check for coding system alias loops */
+ if (EQ (alias, aliasee))
+ alias_loop: signal_simple_error_2
+ ("Attempt to create a coding system alias loop", alias, aliasee);
+
+ for (probe = aliasee;
+ SYMBOLP (probe);
+ probe = Fgethash (probe, Vcoding_system_hash_table, Qzero))
+ {
+ if (EQ (probe, alias))
+ goto alias_loop;
+ }
+
+ Fputhash (alias, aliasee, Vcoding_system_hash_table);
+
+ /* Set up aliases for subsidiaries.
+ #### There must be a better way to handle subsidiary coding systems. */
+ {
+ static const char *suffixes[] = { "-unix", "-dos", "-mac" };
+ int i;
+ for (i = 0; i < countof (suffixes); i++)
+ {
+ Lisp_Object alias_subsidiary =
+ append_suffix_to_symbol (alias, suffixes[i]);
+ Lisp_Object aliasee_subsidiary =
+ append_suffix_to_symbol (aliasee, suffixes[i]);
+
+ if (! NILP (Ffind_coding_system (aliasee_subsidiary)))
+ Fdefine_coding_system_alias (alias_subsidiary, aliasee_subsidiary);
+ }
+ }
/* FSF return value is a vector of [ALIAS-unix ALIAS-dos ALIAS-mac],
but it doesn't look intentional, so I'd rather return something
meaningful or nothing at all. */
}
static Lisp_Object
-subsidiary_coding_system (Lisp_Object coding_system, enum eol_type type)
+subsidiary_coding_system (Lisp_Object coding_system, eol_type_t type)
{
Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system);
Lisp_Object new_coding_system;
case EOL_LF: new_coding_system = CODING_SYSTEM_EOL_LF (cs); break;
case EOL_CR: new_coding_system = CODING_SYSTEM_EOL_CR (cs); break;
case EOL_CRLF: new_coding_system = CODING_SYSTEM_EOL_CRLF (cs); break;
- default: abort ();
+ default: abort (); return Qnil;
}
return NILP (new_coding_system) ? coding_system : new_coding_system;
struct detection_state
{
- enum eol_type eol_type;
+ eol_type_t eol_type;
int seen_non_ascii;
int mask;
#ifdef MULE
return (mask & (mask - 1)) == 0;
}
-static enum eol_type
-detect_eol_type (struct detection_state *st, CONST unsigned char *src,
- unsigned int n)
+static eol_type_t
+detect_eol_type (struct detection_state *st, const Extbyte *src,
+ size_t n)
{
- int c;
-
while (n--)
{
- c = *src++;
+ unsigned char c = *(unsigned char *)src++;
if (c == '\n')
{
if (st->eol.just_saw_cr)
*/
static int
-detect_coding_type (struct detection_state *st, CONST Extbyte *src,
- unsigned int n, int just_do_eol)
+detect_coding_type (struct detection_state *st, const Extbyte *src,
+ size_t n, int just_do_eol)
{
- int c;
-
if (st->eol_type == EOL_AUTODETECT)
st->eol_type = detect_eol_type (st, src, n);
{
for (; n; n--, src++)
{
- c = *src;
+ unsigned char c = *(unsigned char *) src;
if ((c < 0x20 && !acceptable_control_char_p (c)) || c >= 0x80)
{
st->seen_non_ascii = 1;
void
determine_real_coding_system (Lstream *stream, Lisp_Object *codesys_in_out,
- enum eol_type *eol_type_in_out)
+ eol_type_t *eol_type_in_out)
{
struct detection_state decst;
DEFUN ("detect-coding-region", Fdetect_coding_region, 2, 3, 0, /*
Detect coding system of the text in the region between START and END.
-Returned a list of possible coding systems ordered by priority.
-If only ASCII characters are found, it returns 'undecided or one of
+Return a list of possible coding systems ordered by priority.
+If only ASCII characters are found, return 'undecided or one of
its subsidiary coding systems according to a detected end-of-line
type. Optional arg BUFFER defaults to the current buffer.
*/
decst.mask = ~0;
while (1)
{
- unsigned char random_buffer[4096];
+ Extbyte random_buffer[4096];
ssize_t nread = Lstream_read (istr, random_buffer, sizeof (random_buffer));
if (!nread)
} \
} while (0)
-INLINE void
+INLINE_HEADER void DECODE_ADD_UCS_CHAR(Emchar c, unsigned_char_dynarr* dst);
+INLINE_HEADER void
DECODE_ADD_UCS_CHAR(Emchar c, unsigned_char_dynarr* dst)
{
if ( c <= 0x7f )
Some of these flags are dependent on the coding system. */
unsigned int flags;
- /* CH holds a partially built-up character. Since we only deal
- with one- and two-byte characters at the moment, we only use
- this to store the first byte of a two-byte character. */
- unsigned int ch;
+ /* CPOS holds a partially built-up code-point of character. */
+ unsigned int cpos;
/* EOL_TYPE specifies the type of end-of-line conversion that
currently applies. We need to keep this separate from the
EOL type stored in CODESYS because the latter might indicate
automatic EOL-type detection while the former will always
indicate a particular EOL type. */
- enum eol_type eol_type;
+ eol_type_t eol_type;
#ifdef MULE
/* Additional ISO2022 information. We define the structure above
because it's also needed by the detection routines. */
/* counter for UTF-8 or UCS-4 */
unsigned char counter;
#endif
+#ifdef UTF2000
+ unsigned combined_char_count;
+ Emchar combined_chars[16];
+ Lisp_Object combining_table;
+#endif
struct detection_state decst;
};
+#ifdef UTF2000
+extern Lisp_Object Vcharacter_composition_table;
+
+INLINE_HEADER void
+COMPOSE_FLUSH_CHARS (struct decoding_stream *str, unsigned_char_dynarr* dst);
+INLINE_HEADER void
+COMPOSE_FLUSH_CHARS (struct decoding_stream *str, unsigned_char_dynarr* dst)
+{
+ unsigned i;
+
+ for (i = 0; i < str->combined_char_count; i++)
+ DECODE_ADD_UCS_CHAR (str->combined_chars[i], dst);
+ str->combined_char_count = 0;
+ str->combining_table = Qnil;
+}
+
+void COMPOSE_ADD_CHAR(struct decoding_stream *str, Emchar character,
+ unsigned_char_dynarr* dst);
+void
+COMPOSE_ADD_CHAR(struct decoding_stream *str,
+ Emchar character, unsigned_char_dynarr* dst)
+{
+ if (CODING_SYSTEM_DISABLE_COMPOSITION (str->codesys))
+ DECODE_ADD_UCS_CHAR (character, dst);
+ else if (!CHAR_ID_TABLE_P (str->combining_table))
+ {
+ Lisp_Object ret
+ = get_char_id_table (character, Vcharacter_composition_table);
+
+ if (NILP (ret))
+ DECODE_ADD_UCS_CHAR (character, dst);
+ else
+ {
+ str->combined_chars[0] = character;
+ str->combined_char_count = 1;
+ str->combining_table = ret;
+ }
+ }
+ else
+ {
+ Lisp_Object ret
+ = get_char_id_table (character, str->combining_table);
+
+ if (CHARP (ret))
+ {
+ Emchar char2 = XCHARVAL (ret);
+ ret = get_char_id_table (char2, Vcharacter_composition_table);
+ if (NILP (ret))
+ {
+ DECODE_ADD_UCS_CHAR (char2, dst);
+ str->combined_char_count = 0;
+ str->combining_table = Qnil;
+ }
+ else
+ {
+ str->combined_chars[0] = char2;
+ str->combined_char_count = 1;
+ str->combining_table = ret;
+ }
+ }
+ else if (CHAR_ID_TABLE_P (ret))
+ {
+ str->combined_chars[str->combined_char_count++] = character;
+ str->combining_table = ret;
+ }
+ else
+ {
+ COMPOSE_FLUSH_CHARS (str, dst);
+ DECODE_ADD_UCS_CHAR (character, dst);
+ }
+ }
+}
+#else /* not UTF2000 */
+#define COMPOSE_FLUSH_CHARS(str, dst)
+#define COMPOSE_ADD_CHAR(str, ch, dst) DECODE_ADD_UCS_CHAR (ch, dst)
+#endif /* UTF2000 */
+
static ssize_t decoding_reader (Lstream *stream,
unsigned char *data, size_t size);
static ssize_t decoding_writer (Lstream *stream,
- CONST unsigned char *data, size_t size);
+ const unsigned char *data, size_t size);
static int decoding_rewinder (Lstream *stream);
static int decoding_seekable_p (Lstream *stream);
static int decoding_flusher (Lstream *stream);
/* There might be some more end data produced in the translation.
See the comment above. */
str->flags |= CODING_STATE_END;
- mule_decode (stream, data, str->runoff, read_size);
+ mule_decode (stream, (Extbyte *) data, str->runoff, read_size);
}
if (data - orig_data == 0)
}
static ssize_t
-decoding_writer (Lstream *stream, CONST unsigned char *data, size_t size)
+decoding_writer (Lstream *stream, const unsigned char *data, size_t size)
{
struct decoding_stream *str = DECODING_STREAM_DATA (stream);
ssize_t retval;
/* Decode all our data into the runoff, and then attempt to write
it all out to the other end. Remove whatever chunk we succeeded
in writing. */
- mule_decode (stream, data, str->runoff, size);
+ mule_decode (stream, (Extbyte *) data, str->runoff, size);
retval = Lstream_write (str->other_end, Dynarr_atp (str->runoff, 0),
Dynarr_length (str->runoff));
if (retval > 0)
}
str->counter = 0;
#endif /* MULE */
- str->flags = str->ch = 0;
+#ifdef UTF2000
+ str->combined_char_count = 0;
+ str->combining_table = Qnil;
+#endif
+ str->flags = str->cpos = 0;
}
static int
static Lisp_Object
make_decoding_stream_1 (Lstream *stream, Lisp_Object codesys,
- CONST char *mode)
+ const char *mode)
{
Lstream *lstr = Lstream_new (lstream_decoding, mode);
struct decoding_stream *str = DECODING_STREAM_DATA (lstr);
be used for both reading and writing. */
static void
-mule_decode (Lstream *decoding, CONST unsigned char *src,
- unsigned_char_dynarr *dst, unsigned int n)
+mule_decode (Lstream *decoding, const Extbyte *src,
+ unsigned_char_dynarr *dst, size_t n)
{
struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
break;
case CODESYS_CCL:
str->ccl.last_block = str->flags & CODING_STATE_END;
- ccl_driver (&str->ccl, src, dst, n, 0, CCL_MODE_DECODING);
+ /* When applying ccl program to stream, MUST NOT set NULL
+ pointer to src. */
+ ccl_driver (&str->ccl, (src ? (unsigned char *)src : (unsigned char*)""),
+ dst, n, 0, CCL_MODE_DECODING);
break;
case CODESYS_ISO2022:
decode_coding_iso2022 (decoding, src, dst, n);
};
static ssize_t encoding_reader (Lstream *stream, unsigned char *data, size_t size);
-static ssize_t encoding_writer (Lstream *stream, CONST unsigned char *data,
+static ssize_t encoding_writer (Lstream *stream, const unsigned char *data,
size_t size);
static int encoding_rewinder (Lstream *stream);
static int encoding_seekable_p (Lstream *stream);
}
static ssize_t
-encoding_writer (Lstream *stream, CONST unsigned char *data, size_t size)
+encoding_writer (Lstream *stream, const unsigned char *data, size_t size)
{
struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
ssize_t retval;
str->encode_char = &char_encode_shift_jis;
str->finish = &char_finish_shift_jis;
break;
+ case CODESYS_BIG5:
+ str->encode_char = &char_encode_big5;
+ str->finish = &char_finish_big5;
+ break;
default:
break;
}
static Lisp_Object
make_encoding_stream_1 (Lstream *stream, Lisp_Object codesys,
- CONST char *mode)
+ const char *mode)
{
Lstream *lstr = Lstream_new (lstream_encoding, mode);
struct encoding_stream *str = ENCODING_STREAM_DATA (lstr);
Store the encoded data into DST. */
static void
-mule_encode (Lstream *encoding, CONST unsigned char *src,
- unsigned_char_dynarr *dst, unsigned int n)
+mule_encode (Lstream *encoding, const Bufbyte *src,
+ unsigned_char_dynarr *dst, size_t n)
{
struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
encode_coding_no_conversion (encoding, src, dst, n);
break;
#ifdef MULE
- case CODESYS_BIG5:
- encode_coding_big5 (encoding, src, dst, n);
- break;
case CODESYS_CCL:
str->ccl.last_block = str->flags & CODING_STATE_END;
- ccl_driver (&str->ccl, src, dst, n, 0, CCL_MODE_ENCODING);
+ /* When applying ccl program to stream, MUST NOT set NULL
+ pointer to src. */
+ ccl_driver (&str->ccl, ((src) ? src : (unsigned char*)""),
+ dst, n, 0, CCL_MODE_ENCODING);
break;
#endif /* MULE */
default:
#ifdef MULE
\f
static void
-text_encode_generic (Lstream *encoding, CONST unsigned char *src,
- unsigned_char_dynarr *dst, unsigned int n)
+text_encode_generic (Lstream *encoding, const Bufbyte *src,
+ unsigned_char_dynarr *dst, size_t n)
{
unsigned char c;
unsigned char char_boundary;
((c) >= 0xA1 && (c) <= 0xDF)
static int
-detect_coding_sjis (struct detection_state *st, CONST unsigned char *src,
- unsigned int n)
+detect_coding_sjis (struct detection_state *st, const Extbyte *src, size_t n)
{
- int c;
-
while (n--)
{
- c = *src++;
+ unsigned char c = *(unsigned char *)src++;
if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
return 0;
if (st->shift_jis.in_second_byte)
/* Convert Shift-JIS data to internal format. */
static void
-decode_coding_sjis (Lstream *decoding, CONST unsigned char *src,
- unsigned_char_dynarr *dst, unsigned int n)
+decode_coding_sjis (Lstream *decoding, const Extbyte *src,
+ unsigned_char_dynarr *dst, size_t n)
{
- unsigned char c;
struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
unsigned int flags = str->flags;
- unsigned int ch = str->ch;
+ unsigned int cpos = str->cpos;
eol_type_t eol_type = str->eol_type;
while (n--)
{
- c = *src++;
+ unsigned char c = *(unsigned char *)src++;
- if (ch)
+ if (cpos)
{
/* Previous character was first byte of Shift-JIS Kanji char. */
if (BYTE_SJIS_TWO_BYTE_2_P (c))
{
unsigned char e1, e2;
- DECODE_SJIS (ch, c, e1, e2);
+ DECODE_SJIS (cpos, c, e1, e2);
#ifdef UTF2000
DECODE_ADD_UCS_CHAR(MAKE_CHAR(Vcharset_japanese_jisx0208,
e1 & 0x7F,
}
else
{
- DECODE_ADD_BINARY_CHAR (ch, dst);
+ DECODE_ADD_BINARY_CHAR (cpos, dst);
DECODE_ADD_BINARY_CHAR (c, dst);
}
- ch = 0;
+ cpos = 0;
}
else
{
DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
if (BYTE_SJIS_TWO_BYTE_1_P (c))
- ch = c;
+ cpos = c;
else if (BYTE_SJIS_KATAKANA_P (c))
{
#ifdef UTF2000
label_continue_loop:;
}
- DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);
+ DECODE_HANDLE_END_OF_CONVERSION (flags, cpos, dst);
str->flags = flags;
- str->ch = ch;
+ str->cpos = cpos;
}
/* Convert internal character representation to Shift_JIS. */
}
else
{
- Lisp_Object charset;
- unsigned int c1, c2, s1, s2;
+ unsigned int s1, s2;
#ifdef UTF2000
int code_point = charset_code_point (Vcharset_latin_jisx0201, ch);
if (code_point >= 0)
+ Dynarr_add (dst, code_point);
+ else if ((code_point
+ = charset_code_point (Vcharset_japanese_jisx0208_1990, ch))
+ >= 0)
{
- charset = Vcharset_latin_jisx0201;
- c1 = code_point;
- c2 = 0;
+ ENCODE_SJIS ((code_point >> 8) | 0x80,
+ (code_point & 0xFF) | 0x80, s1, s2);
+ Dynarr_add (dst, s1);
+ Dynarr_add (dst, s2);
}
+ else if ((code_point
+ = charset_code_point (Vcharset_katakana_jisx0201, ch))
+ >= 0)
+ Dynarr_add (dst, code_point | 0x80);
+ else if ((code_point
+ = charset_code_point (Vcharset_japanese_jisx0208, ch))
+ >= 0)
+ {
+ ENCODE_SJIS ((code_point >> 8) | 0x80,
+ (code_point & 0xFF) | 0x80, s1, s2);
+ Dynarr_add (dst, s1);
+ Dynarr_add (dst, s2);
+ }
+ else if ((code_point = charset_code_point (Vcharset_ascii, ch))
+ >= 0)
+ Dynarr_add (dst, code_point);
else
-#endif
- BREAKUP_CHAR (ch, charset, c1, c2);
+ Dynarr_add (dst, '?');
+#else
+ Lisp_Object charset;
+ unsigned int c1, c2;
+
+ BREAKUP_CHAR (ch, charset, c1, c2);
if (EQ(charset, Vcharset_katakana_jisx0201))
{
}
else
Dynarr_add (dst, '?');
+#endif
}
}
}
DEFUN ("encode-shift-jis-char", Fencode_shift_jis_char, 1, 1, 0, /*
-Encode a JISX0208 character CHAR to SHIFT-JIS coding-system.
+Encode a JISX0208 character CHARACTER to SHIFT-JIS coding-system.
Return the corresponding character code in SHIFT-JIS as a cons of two bytes.
*/
- (ch))
+ (character))
{
Lisp_Object charset;
int c1, c2, s1, s2;
- CHECK_CHAR_COERCE_INT (ch);
- BREAKUP_CHAR (XCHAR (ch), charset, c1, c2);
+ CHECK_CHAR_COERCE_INT (character);
+ BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
if (EQ (charset, Vcharset_japanese_jisx0208))
{
ENCODE_SJIS (c1 | 0x80, c2 | 0x80, s1, s2);
contains frequently used characters and the latter contains less
frequently used characters. */
+#ifdef UTF2000
+#define BYTE_BIG5_TWO_BYTE_1_P(c) \
+ ((c) >= 0x81 && (c) <= 0xFE)
+#else
#define BYTE_BIG5_TWO_BYTE_1_P(c) \
((c) >= 0xA1 && (c) <= 0xFE)
+#endif
/* Is this the second byte of a Shift-JIS two-byte char? */
} while (0)
static int
-detect_coding_big5 (struct detection_state *st, CONST unsigned char *src,
- unsigned int n)
+detect_coding_big5 (struct detection_state *st, const Extbyte *src, size_t n)
{
- int c;
-
while (n--)
{
- c = *src++;
- if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO ||
- (c >= 0x80 && c <= 0xA0))
+ unsigned char c = *(unsigned char *)src++;
+ if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO
+#ifndef UTF2000
+ || (c >= 0x80 && c <= 0xA0)
+#endif
+ )
return 0;
if (st->big5.in_second_byte)
{
if (c < 0x40 || (c >= 0x80 && c <= 0xA0))
return 0;
}
- else if (c >= 0xA1)
+ else if (
+#ifdef UTF2000
+ c >= 0x81
+#else
+ c >= 0xA1
+#endif
+ )
st->big5.in_second_byte = 1;
}
return CODING_CATEGORY_BIG5_MASK;
/* Convert Big5 data to internal format. */
static void
-decode_coding_big5 (Lstream *decoding, CONST unsigned char *src,
- unsigned_char_dynarr *dst, unsigned int n)
+decode_coding_big5 (Lstream *decoding, const Extbyte *src,
+ unsigned_char_dynarr *dst, size_t n)
{
- unsigned char c;
struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
unsigned int flags = str->flags;
- unsigned int ch = str->ch;
+ unsigned int cpos = str->cpos;
eol_type_t eol_type = str->eol_type;
while (n--)
{
- c = *src++;
- if (ch)
+ unsigned char c = *(unsigned char *)src++;
+ if (cpos)
{
/* Previous character was first byte of Big5 char. */
if (BYTE_BIG5_TWO_BYTE_2_P (c))
{
+#ifdef UTF2000
+ DECODE_ADD_UCS_CHAR
+ (DECODE_CHAR (Vcharset_chinese_big5, (cpos << 8) | c),
+ dst);
+#else
unsigned char b1, b2, b3;
- DECODE_BIG5 (ch, c, b1, b2, b3);
+ DECODE_BIG5 (cpos, c, b1, b2, b3);
Dynarr_add (dst, b1);
Dynarr_add (dst, b2);
Dynarr_add (dst, b3);
+#endif
}
else
{
- DECODE_ADD_BINARY_CHAR (ch, dst);
+ DECODE_ADD_BINARY_CHAR (cpos, dst);
DECODE_ADD_BINARY_CHAR (c, dst);
}
- ch = 0;
+ cpos = 0;
}
else
{
DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
if (BYTE_BIG5_TWO_BYTE_1_P (c))
- ch = c;
+ cpos = c;
else
DECODE_ADD_BINARY_CHAR (c, dst);
}
label_continue_loop:;
}
- DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);
+ DECODE_HANDLE_END_OF_CONVERSION (flags, cpos, dst);
str->flags = flags;
- str->ch = ch;
+ str->cpos = cpos;
}
/* Convert internally-formatted data to Big5. */
-static void
-encode_coding_big5 (Lstream *encoding, CONST unsigned char *src,
- unsigned_char_dynarr *dst, unsigned int n)
+void
+char_encode_big5 (struct encoding_stream *str, Emchar ch,
+ unsigned_char_dynarr *dst, unsigned int *flags)
{
-#ifndef UTF2000
- unsigned char c;
- struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
- unsigned int flags = str->flags;
- unsigned int ch = str->ch;
eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
- while (n--)
+ if (ch == '\n')
{
- c = *src++;
- if (c == '\n')
- {
- if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
- Dynarr_add (dst, '\r');
- if (eol_type != EOL_CR)
- Dynarr_add (dst, '\n');
- }
- else if (BYTE_ASCII_P (c))
- {
- /* ASCII. */
- Dynarr_add (dst, c);
- }
- else if (BUFBYTE_LEADING_BYTE_P (c))
+ if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+ Dynarr_add (dst, '\r');
+ if (eol_type != EOL_CR)
+ Dynarr_add (dst, ch);
+ }
+ else
+ {
+#ifdef UTF2000
+ int code_point;
+
+ if ((code_point = charset_code_point (Vcharset_ascii, ch)) >= 0)
+ Dynarr_add (dst, code_point);
+ else if ((code_point
+ = charset_code_point (Vcharset_chinese_big5, ch)) >= 0)
{
- if (c == LEADING_BYTE_CHINESE_BIG5_1 ||
- c == LEADING_BYTE_CHINESE_BIG5_2)
- {
- /* A recognized leading byte. */
- ch = c;
- continue; /* not done with this character. */
- }
- /* otherwise just ignore this character. */
+ Dynarr_add (dst, code_point >> 8);
+ Dynarr_add (dst, code_point & 0xFF);
}
- else if (ch == LEADING_BYTE_CHINESE_BIG5_1 ||
- ch == LEADING_BYTE_CHINESE_BIG5_2)
+ else if ((code_point
+ = charset_code_point (Vcharset_chinese_big5_1, ch)) >= 0)
{
- /* Previous char was a recognized leading byte. */
- ch = (ch << 8) | c;
- continue; /* not done with this character. */
+ unsigned int I
+ = ((code_point >> 8) - 33) * (0xFF - 0xA1)
+ + ((code_point & 0xFF) - 33);
+ unsigned char b1 = I / BIG5_SAME_ROW + 0xA1;
+ unsigned char b2 = I % BIG5_SAME_ROW;
+
+ b2 += b2 < 0x3F ? 0x40 : 0x62;
+ Dynarr_add (dst, b1);
+ Dynarr_add (dst, b2);
}
- else if (ch)
+ else if ((code_point
+ = charset_code_point (Vcharset_chinese_big5_2, ch)) >= 0)
{
- /* Encountering second byte of a Big5 character. */
+ unsigned int I
+ = ((code_point >> 8) - 33) * (0xFF - 0xA1)
+ + ((code_point & 0xFF) - 33);
unsigned char b1, b2;
- ENCODE_BIG5 (ch >> 8, ch & 0xFF, c, b1, b2);
+ I += BIG5_SAME_ROW * (0xC9 - 0xA1);
+ b1 = I / BIG5_SAME_ROW + 0xA1;
+ b2 = I % BIG5_SAME_ROW;
+ b2 += b2 < 0x3F ? 0x40 : 0x62;
Dynarr_add (dst, b1);
Dynarr_add (dst, b2);
}
-
- ch = 0;
+ else
+ Dynarr_add (dst, '?');
+#else
+#endif
}
+}
- str->flags = flags;
- str->ch = ch;
-#endif
+void
+char_finish_big5 (struct encoding_stream *str, unsigned_char_dynarr *dst,
+ unsigned int *flags)
+{
}
}
DEFUN ("encode-big5-char", Fencode_big5_char, 1, 1, 0, /*
-Encode the Big5 character CH to BIG5 coding-system.
+Encode the Big5 character CHARACTER in the BIG5 coding-system.
Return the corresponding character code in Big5.
*/
- (ch))
+ (character))
{
Lisp_Object charset;
int c1, c2, b1, b2;
- CHECK_CHAR_COERCE_INT (ch);
- BREAKUP_CHAR (XCHAR (ch), charset, c1, c2);
+ CHECK_CHAR_COERCE_INT (character);
+ BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
if (EQ (charset, Vcharset_chinese_big5_1) ||
EQ (charset, Vcharset_chinese_big5_2))
{
/************************************************************************/
static int
-detect_coding_ucs4 (struct detection_state *st, CONST unsigned char *src,
- unsigned int n)
+detect_coding_ucs4 (struct detection_state *st, const Extbyte *src, size_t n)
{
while (n--)
{
- int c = *src++;
+ unsigned char c = *(unsigned char *)src++;
switch (st->ucs4.in_byte)
{
case 0:
}
static void
-decode_coding_ucs4 (Lstream *decoding, CONST unsigned char *src,
- unsigned_char_dynarr *dst, unsigned int n)
+decode_coding_ucs4 (Lstream *decoding, const Extbyte *src,
+ unsigned_char_dynarr *dst, size_t n)
{
struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
unsigned int flags = str->flags;
- unsigned int ch = str->ch;
+ unsigned int cpos = str->cpos;
unsigned char counter = str->counter;
while (n--)
{
- unsigned char c = *src++;
+ unsigned char c = *(unsigned char *)src++;
switch (counter)
{
case 0:
- ch = c;
+ cpos = c;
counter = 3;
break;
case 1:
- DECODE_ADD_UCS_CHAR ((ch << 8) | c, dst);
- ch = 0;
+ DECODE_ADD_UCS_CHAR ((cpos << 8) | c, dst);
+ cpos = 0;
counter = 0;
break;
default:
- ch = ( ch << 8 ) | c;
+ cpos = ( cpos << 8 ) | c;
counter--;
}
}
if (counter & CODING_STATE_END)
- DECODE_OUTPUT_PARTIAL_CHAR (ch);
+ DECODE_OUTPUT_PARTIAL_CHAR (cpos);
- str->flags = flags;
- str->ch = ch;
- str->counter = counter;
+ str->flags = flags;
+ str->cpos = cpos;
+ str->counter = counter;
}
void
/************************************************************************/
static int
-detect_coding_utf8 (struct detection_state *st, CONST unsigned char *src,
- unsigned int n)
+detect_coding_utf8 (struct detection_state *st, const Extbyte *src, size_t n)
{
while (n--)
{
- unsigned char c = *src++;
+ unsigned char c = *(unsigned char *)src++;
switch (st->utf8.in_byte)
{
case 0:
}
static void
-decode_coding_utf8 (Lstream *decoding, CONST unsigned char *src,
- unsigned_char_dynarr *dst, unsigned int n)
+decode_output_utf8_partial_char (unsigned char counter,
+ unsigned int cpos,
+ unsigned_char_dynarr *dst)
+{
+ if (counter == 5)
+ DECODE_ADD_BINARY_CHAR ( (cpos|0xFC), dst);
+ else if (counter == 4)
+ {
+ if (cpos < (1 << 6))
+ DECODE_ADD_BINARY_CHAR ( (cpos|0xF8), dst);
+ else
+ {
+ DECODE_ADD_BINARY_CHAR ( ((cpos >> 6)|0xFC), dst);
+ DECODE_ADD_BINARY_CHAR ( ((cpos&0x3F)|0x80), dst);
+ }
+ }
+ else if (counter == 3)
+ {
+ if (cpos < (1 << 6))
+ DECODE_ADD_BINARY_CHAR ( (cpos|0xF0), dst);
+ else if (cpos < (1 << 12))
+ {
+ DECODE_ADD_BINARY_CHAR ( ((cpos >> 6)|0xF8), dst);
+ DECODE_ADD_BINARY_CHAR ( ((cpos&0x3F)|0x80), dst);
+ }
+ else
+ {
+ DECODE_ADD_BINARY_CHAR ( ( (cpos >> 12)|0xFC), dst);
+ DECODE_ADD_BINARY_CHAR ( (((cpos >> 6)&0x3F)|0x80), dst);
+ DECODE_ADD_BINARY_CHAR ( ( (cpos &0x3F)|0x80), dst);
+ }
+ }
+ else if (counter == 2)
+ {
+ if (cpos < (1 << 6))
+ DECODE_ADD_BINARY_CHAR ( (cpos|0xE0), dst);
+ else if (cpos < (1 << 12))
+ {
+ DECODE_ADD_BINARY_CHAR ( ((cpos >> 6)|0xF0), dst);
+ DECODE_ADD_BINARY_CHAR ( ((cpos&0x3F)|0x80), dst);
+ }
+ else if (cpos < (1 << 18))
+ {
+ DECODE_ADD_BINARY_CHAR ( ( (cpos >> 12)|0xF8), dst);
+ DECODE_ADD_BINARY_CHAR ( (((cpos >> 6)&0x3F)|0x80), dst);
+ DECODE_ADD_BINARY_CHAR ( ( (cpos &0x3F)|0x80), dst);
+ }
+ else
+ {
+ DECODE_ADD_BINARY_CHAR ( ( (cpos >> 18)|0xFC), dst);
+ DECODE_ADD_BINARY_CHAR ( (((cpos >> 12)&0x3F)|0x80), dst);
+ DECODE_ADD_BINARY_CHAR ( (((cpos >> 6)&0x3F)|0x80), dst);
+ DECODE_ADD_BINARY_CHAR ( ( (cpos &0x3F)|0x80), dst);
+ }
+ }
+ else
+ {
+ if (cpos < (1 << 6))
+ DECODE_ADD_BINARY_CHAR ( (cpos|0xC0), dst);
+ else if (cpos < (1 << 12))
+ {
+ DECODE_ADD_BINARY_CHAR ( ((cpos >> 6)|0xE0), dst);
+ DECODE_ADD_BINARY_CHAR ( ((cpos&0x3F)|0x80), dst);
+ }
+ else if (cpos < (1 << 18))
+ {
+ DECODE_ADD_BINARY_CHAR ( ( (cpos >> 12)|0xF0), dst);
+ DECODE_ADD_BINARY_CHAR ( (((cpos >> 6)&0x3F)|0x80), dst);
+ DECODE_ADD_BINARY_CHAR ( ( (cpos &0x3F)|0x80), dst);
+ }
+ else if (cpos < (1 << 24))
+ {
+ DECODE_ADD_BINARY_CHAR ( ( (cpos >> 18)|0xF8), dst);
+ DECODE_ADD_BINARY_CHAR ( (((cpos >> 12)&0x3F)|0x80), dst);
+ DECODE_ADD_BINARY_CHAR ( (((cpos >> 6)&0x3F)|0x80), dst);
+ DECODE_ADD_BINARY_CHAR ( ( (cpos &0x3F)|0x80), dst);
+ }
+ else
+ {
+ DECODE_ADD_BINARY_CHAR ( ( (cpos >> 24)|0xFC), dst);
+ DECODE_ADD_BINARY_CHAR ( (((cpos >> 18)&0x3F)|0x80), dst);
+ DECODE_ADD_BINARY_CHAR ( (((cpos >> 12)&0x3F)|0x80), dst);
+ DECODE_ADD_BINARY_CHAR ( (((cpos >> 6)&0x3F)|0x80), dst);
+ DECODE_ADD_BINARY_CHAR ( ( (cpos &0x3F)|0x80), dst);
+ }
+ }
+}
+
+static void
+decode_coding_utf8 (Lstream *decoding, const Extbyte *src,
+ unsigned_char_dynarr *dst, size_t n)
{
struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
- unsigned int flags = str->flags;
- unsigned int ch = str->ch;
- eol_type_t eol_type = str->eol_type;
- unsigned char counter = str->counter;
+ unsigned int flags = str->flags;
+ unsigned int cpos = str->cpos;
+ eol_type_t eol_type = str->eol_type;
+ unsigned char counter = str->counter;
while (n--)
{
- unsigned char c = *src++;
- switch (counter)
+ unsigned char c = *(unsigned char *)src++;
+ if (counter == 0)
{
- case 0:
- if ( c >= 0xfc )
+ if ( c < 0xC0 )
{
- ch = c & 0x01;
- counter = 5;
+ DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
+ DECODE_ADD_UCS_CHAR (c, dst);
}
- else if ( c >= 0xf8 )
+ else if ( c < 0xE0 )
{
- ch = c & 0x03;
- counter = 4;
+ cpos = c & 0x1f;
+ counter = 1;
}
- else if ( c >= 0xf0 )
+ else if ( c < 0xF0 )
{
- ch = c & 0x07;
- counter = 3;
+ cpos = c & 0x0f;
+ counter = 2;
}
- else if ( c >= 0xe0 )
+ else if ( c < 0xF8 )
{
- ch = c & 0x0f;
- counter = 2;
+ cpos = c & 0x07;
+ counter = 3;
}
- else if ( c >= 0xc0 )
+ else if ( c < 0xFC )
{
- ch = c & 0x1f;
- counter = 1;
+ cpos = c & 0x03;
+ counter = 4;
}
else
{
- DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
- DECODE_ADD_UCS_CHAR (c, dst);
+ cpos = c & 0x01;
+ counter = 5;
}
- break;
- case 1:
- ch = ( ch << 6 ) | ( c & 0x3f );
- DECODE_ADD_UCS_CHAR (ch, dst);
- ch = 0;
+ }
+ else if ( (c & 0xC0) == 0x80 )
+ {
+ cpos = ( cpos << 6 ) | ( c & 0x3f );
+ if (counter == 1)
+ {
+ DECODE_ADD_UCS_CHAR (cpos, dst);
+ cpos = 0;
+ counter = 0;
+ }
+ else
+ counter--;
+ }
+ else
+ {
+ decode_output_utf8_partial_char (counter, cpos, dst);
+ DECODE_ADD_BINARY_CHAR (c, dst);
+ cpos = 0;
counter = 0;
- break;
- default:
- ch = ( ch << 6 ) | ( c & 0x3f );
- counter--;
}
label_continue_loop:;
}
if (flags & CODING_STATE_END)
- DECODE_OUTPUT_PARTIAL_CHAR (ch);
-
- str->flags = flags;
- str->ch = ch;
- str->counter = counter;
+ if (counter > 0)
+ {
+ decode_output_utf8_partial_char (counter, cpos, dst);
+ cpos = 0;
+ counter = 0;
+ }
+ str->flags = flags;
+ str->cpos = cpos;
+ str->counter = counter;
}
void
}
if (0x40 <= c && c <= 0x42)
{
- cs = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94, c,
+ /* 94^n-set */
+ cs = CHARSET_BY_ATTRIBUTES (94, -1, c,
*flags & CODING_STATE_R2L ?
CHARSET_RIGHT_TO_LEFT :
CHARSET_LEFT_TO_RIGHT);
default:
{
- int type =-1;
+ int chars = 0;
+ int single = 0;
if (c < '0' || c > '~')
return 0; /* bad final byte */
if (iso->esc >= ISO_ESC_2_8 &&
iso->esc <= ISO_ESC_2_15)
{
- type = ((iso->esc >= ISO_ESC_2_12) ?
- CHARSET_TYPE_96 : CHARSET_TYPE_94);
+ chars = (iso->esc >= ISO_ESC_2_12) ? 96 : 94;
+ single = 1; /* single-byte */
reg = (iso->esc - ISO_ESC_2_8) & 3;
}
else if (iso->esc >= ISO_ESC_2_4_8 &&
iso->esc <= ISO_ESC_2_4_15)
{
- type = ((iso->esc >= ISO_ESC_2_4_12) ?
- CHARSET_TYPE_96X96 : CHARSET_TYPE_94X94);
+ chars = (iso->esc >= ISO_ESC_2_4_12) ? 96 : 94;
+ single = -1; /* multi-byte */
reg = (iso->esc - ISO_ESC_2_4_8) & 3;
}
else
abort();
}
- cs = CHARSET_BY_ATTRIBUTES (type, c,
+ cs = CHARSET_BY_ATTRIBUTES (chars, single, c,
*flags & CODING_STATE_R2L ?
CHARSET_RIGHT_TO_LEFT :
CHARSET_LEFT_TO_RIGHT);
}
static int
-detect_coding_iso2022 (struct detection_state *st, CONST unsigned char *src,
- unsigned int n)
+detect_coding_iso2022 (struct detection_state *st, const Extbyte *src, size_t n)
{
int mask;
while (n--)
{
- int c = *src++;
+ unsigned char c = *(unsigned char *)src++;
if (c >= 0xA0)
{
mask &= ~CODING_CATEGORY_ISO_7_MASK;
/* Convert ISO2022-format data to internal format. */
static void
-decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
- unsigned_char_dynarr *dst, unsigned int n)
+decode_coding_iso2022 (Lstream *decoding, const Extbyte *src,
+ unsigned_char_dynarr *dst, size_t n)
{
struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
- unsigned int flags = str->flags;
- unsigned int ch = str->ch;
- eol_type_t eol_type = str->eol_type;
+ unsigned int flags = str->flags;
+ unsigned int cpos = str->cpos;
+ unsigned char counter = str->counter;
+ eol_type_t eol_type = str->eol_type;
#ifdef ENABLE_COMPOSITE_CHARS
unsigned_char_dynarr *real_dst = dst;
#endif
while (n--)
{
- unsigned char c = *src++;
+ unsigned char c = *(unsigned char *)src++;
if (flags & CODING_STATE_ESCAPE)
{ /* Within ESC sequence */
int retval = parse_iso2022_esc (coding_system, &str->iso2022,
#endif /* ENABLE_COMPOSITE_CHARS */
case ISO_ESC_LITERAL:
+ COMPOSE_FLUSH_CHARS (str, dst);
DECODE_ADD_BINARY_CHAR (c, dst);
break;
{
/* Output the (possibly invalid) sequence */
int i;
+ COMPOSE_FLUSH_CHARS (str, dst);
for (i = 0; i < str->iso2022.esc_bytes_index; i++)
DECODE_ADD_BINARY_CHAR (str->iso2022.esc_bytes[i], dst);
flags &= CODING_STATE_ISO2022_LOCK;
/* No sense in reprocessing the final byte of the
escape sequence; it could mess things up anyway.
Just add it now. */
+ COMPOSE_FLUSH_CHARS (str, dst);
DECODE_ADD_BINARY_CHAR (c, dst);
}
}
- ch = 0;
+ cpos = 0;
+ counter = 0;
}
else if (BYTE_C0_P (c) || BYTE_C1_P (c))
{ /* Control characters */
/* If we were in the middle of a character, dump out the
partial character. */
- DECODE_OUTPUT_PARTIAL_CHAR (ch);
+ if (counter)
+ {
+ COMPOSE_FLUSH_CHARS (str, dst);
+ while (counter > 0)
+ {
+ counter--;
+ DECODE_ADD_BINARY_CHAR
+ ((unsigned char)(cpos >> (counter * 8)), dst);
+ }
+ cpos = 0;
+ }
/* If we just saw a single-shift character, dump it out.
This may dump out the wrong sort of single-shift character,
wrong. */
if (flags & CODING_STATE_SS2)
{
+ COMPOSE_FLUSH_CHARS (str, dst);
DECODE_ADD_BINARY_CHAR (ISO_CODE_SS2, dst);
flags &= ~CODING_STATE_SS2;
}
if (flags & CODING_STATE_SS3)
{
+ COMPOSE_FLUSH_CHARS (str, dst);
DECODE_ADD_BINARY_CHAR (ISO_CODE_SS3, dst);
flags &= ~CODING_STATE_SS3;
}
/***** Now handle the control characters. *****/
/* Handle CR/LF */
+#ifdef UTF2000
+ if (c == '\r')
+ {
+ COMPOSE_FLUSH_CHARS (str, dst);
+ if (eol_type == EOL_CR)
+ Dynarr_add (dst, '\n');
+ else if (eol_type != EOL_CRLF || flags & CODING_STATE_CR)
+ Dynarr_add (dst, c);
+ else
+ flags |= CODING_STATE_CR;
+ goto label_continue_loop;
+ }
+ else if (flags & CODING_STATE_CR)
+ { /* eol_type == CODING_SYSTEM_EOL_CRLF */
+ if (c != '\n')
+ Dynarr_add (dst, '\r');
+ flags &= ~CODING_STATE_CR;
+ }
+#else
DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
+#endif
flags &= CODING_STATE_ISO2022_LOCK;
if (!parse_iso2022_esc (coding_system, &str->iso2022, c, &flags, 1))
- DECODE_ADD_BINARY_CHAR (c, dst);
+ {
+ COMPOSE_FLUSH_CHARS (str, dst);
+ DECODE_ADD_BINARY_CHAR (c, dst);
+ }
}
else
{ /* Graphic characters */
#endif
int reg;
+#ifdef UTF2000
+ if (c == '\r')
+ {
+ COMPOSE_FLUSH_CHARS (str, dst);
+ if (eol_type == EOL_CR)
+ Dynarr_add (dst, '\n');
+ else if (eol_type != EOL_CRLF || flags & CODING_STATE_CR)
+ Dynarr_add (dst, c);
+ else
+ flags |= CODING_STATE_CR;
+ goto label_continue_loop;
+ }
+ else if (flags & CODING_STATE_CR)
+ { /* eol_type == CODING_SYSTEM_EOL_CRLF */
+ if (c != '\n')
+ Dynarr_add (dst, '\r');
+ flags &= ~CODING_STATE_CR;
+ }
+#else
DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
+#endif
/* Now determine the charset. */
reg = ((flags & CODING_STATE_SS2) ? 2
outside the range of the charset. Insert that char literally
to preserve it for the output. */
{
- DECODE_OUTPUT_PARTIAL_CHAR (ch);
+ COMPOSE_FLUSH_CHARS (str, dst);
+ while (counter > 0)
+ {
+ counter--;
+ DECODE_ADD_BINARY_CHAR
+ ((unsigned char)(cpos >> (counter * 8)), dst);
+ }
+ cpos = 0;
DECODE_ADD_BINARY_CHAR (c, dst);
}
}
#ifdef UTF2000
- if (XCHARSET_DIMENSION (charset) == 1)
- {
- DECODE_OUTPUT_PARTIAL_CHAR (ch);
- DECODE_ADD_UCS_CHAR
- (MAKE_CHAR (charset, c & 0x7F, 0), dst);
- }
- else if (ch)
+ counter++;
+ if (XCHARSET_DIMENSION (charset) == counter)
{
- DECODE_ADD_UCS_CHAR
- (MAKE_CHAR (charset, ch & 0x7F, c & 0x7F), dst);
- ch = 0;
+ COMPOSE_ADD_CHAR (str,
+ DECODE_CHAR (charset,
+ ((cpos & 0x7F7F7F) << 8)
+ | (c & 0x7F)),
+ dst);
+ cpos = 0;
+ counter = 0;
}
else
- ch = c;
+ cpos = (cpos << 8) | c;
#else
lb = XCHARSET_LEADING_BYTE (charset);
switch (XCHARSET_REP_BYTES (charset))
#endif
}
- if (!ch)
+ if (!cpos)
flags &= CODING_STATE_ISO2022_LOCK;
}
}
if (flags & CODING_STATE_END)
- DECODE_OUTPUT_PARTIAL_CHAR (ch);
-
- str->flags = flags;
- str->ch = ch;
+ {
+ COMPOSE_FLUSH_CHARS (str, dst);
+ DECODE_OUTPUT_PARTIAL_CHAR (cpos);
+ }
+ str->flags = flags;
+ str->cpos = cpos;
+ str->counter = counter;
}
iso2022_designate (Lisp_Object charset, unsigned char reg,
struct encoding_stream *str, unsigned_char_dynarr *dst)
{
- static CONST char inter94[] = "()*+";
- static CONST char inter96[] = ",-./";
+ static const char inter94[] = "()*+";
+ static const char inter96[] = ",-./";
unsigned short chars;
unsigned char dimension;
unsigned char final;
int i;
Lisp_Object charset = str->iso2022.current_charset;
int half = str->iso2022.current_half;
- unsigned int byte1, byte2;
+ int code_point = -1;
if (ch <= 0x7F)
{
reg = -1;
for (i = 0; i < 4; i++)
{
- int code_point;
-
if ((CHARSETP (charset = str->iso2022.charset[i])
&& ((code_point = charset_code_point (charset, ch)) >= 0))
||
= CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i))
&& ((code_point = charset_code_point (charset, ch)) >= 0)))
{
- if (XCHARSET_DIMENSION (charset) == 1)
- {
- byte1 = code_point;
- byte2 = 0;
- }
- else /* if (XCHARSET_DIMENSION (charset) == 2) */
- {
- byte1 = code_point >> 8;
- byte2 = code_point & 255;
- }
reg = i;
break;
}
while (!EQ (Vdefault_coded_charset_priority_list, Qnil))
{
- BREAKUP_CHAR (ch, charset, byte1, byte2);
+ code_point = ENCODE_CHAR (ch, charset);
if (XCHARSET_FINAL (charset))
goto found;
Vdefault_coded_charset_priority_list
= Fcdr (Fmemq (XCHARSET_NAME (charset),
Vdefault_coded_charset_priority_list));
}
- BREAKUP_CHAR (ch, charset, byte1, byte2);
+ code_point = ENCODE_CHAR (ch, charset);
if (!XCHARSET_FINAL (charset))
{
charset = Vcharset_ascii;
- byte1 = '~';
+ code_point = '~';
}
found:
Vdefault_coded_charset_priority_list
switch (XCHARSET_DIMENSION (charset))
{
case 1:
- Dynarr_add (dst, byte1 | charmask);
+ Dynarr_add (dst, (code_point & 0xFF) | charmask);
break;
case 2:
- Dynarr_add (dst, byte1 | charmask);
- Dynarr_add (dst, byte2 | charmask);
+ Dynarr_add (dst, ((code_point >> 8) & 0xFF) | charmask);
+ Dynarr_add (dst, ( code_point & 0xFF) | charmask);
+ break;
+ case 3:
+ Dynarr_add (dst, ((code_point >> 16) & 0xFF) | charmask);
+ Dynarr_add (dst, ((code_point >> 8) & 0xFF) | charmask);
+ Dynarr_add (dst, ( code_point & 0xFF) | charmask);
+ break;
+ case 4:
+ Dynarr_add (dst, ((code_point >> 24) & 0xFF) | charmask);
+ Dynarr_add (dst, ((code_point >> 16) & 0xFF) | charmask);
+ Dynarr_add (dst, ((code_point >> 8) & 0xFF) | charmask);
+ Dynarr_add (dst, ( code_point & 0xFF) | charmask);
break;
default:
abort ();
contain all 256 possible byte values and that are not to be
interpreted as being in any particular decoding. */
static void
-decode_coding_no_conversion (Lstream *decoding, CONST unsigned char *src,
- unsigned_char_dynarr *dst, unsigned int n)
+decode_coding_no_conversion (Lstream *decoding, const Extbyte *src,
+ unsigned_char_dynarr *dst, size_t n)
{
- unsigned char c;
struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
unsigned int flags = str->flags;
- unsigned int ch = str->ch;
+ unsigned int cpos = str->cpos;
eol_type_t eol_type = str->eol_type;
while (n--)
{
- c = *src++;
+ unsigned char c = *(unsigned char *)src++;
DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
DECODE_ADD_BINARY_CHAR (c, dst);
label_continue_loop:;
}
- DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);
+ DECODE_HANDLE_END_OF_CONVERSION (flags, cpos, dst);
str->flags = flags;
- str->ch = ch;
+ str->cpos = cpos;
}
static void
-encode_coding_no_conversion (Lstream *encoding, CONST unsigned char *src,
- unsigned_char_dynarr *dst, unsigned int n)
+encode_coding_no_conversion (Lstream *encoding, const Bufbyte *src,
+ unsigned_char_dynarr *dst, size_t n)
{
unsigned char c;
struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
}
\f
-/************************************************************************/
-/* Simple internal/external functions */
-/************************************************************************/
-
-static Extbyte_dynarr *conversion_out_dynarr;
-static Bufbyte_dynarr *conversion_in_dynarr;
-
-/* Determine coding system from coding format */
-
-/* #### not correct for all values of `fmt'! */
-static Lisp_Object
-external_data_format_to_coding_system (enum external_data_format fmt)
-{
- switch (fmt)
- {
- case FORMAT_FILENAME:
- case FORMAT_TERMINAL:
- if (EQ (Vfile_name_coding_system, Qnil) ||
- EQ (Vfile_name_coding_system, Qbinary))
- return Qnil;
- else
- return Fget_coding_system (Vfile_name_coding_system);
-#ifdef MULE
- case FORMAT_CTEXT:
- return Fget_coding_system (Qctext);
-#endif
- default:
- return Qnil;
- }
-}
-
-Extbyte *
-convert_to_external_format (CONST Bufbyte *ptr,
- Bytecount len,
- Extcount *len_out,
- enum external_data_format fmt)
-{
- Lisp_Object coding_system = external_data_format_to_coding_system (fmt);
-
- if (!conversion_out_dynarr)
- conversion_out_dynarr = Dynarr_new (Extbyte);
- else
- Dynarr_reset (conversion_out_dynarr);
-
- if (NILP (coding_system))
- {
- CONST Bufbyte *end = ptr + len;
-
- for (; ptr < end;)
- {
-#ifdef UTF2000
- Bufbyte c =
- (*ptr < 0xc0) ? *ptr :
- ((*ptr & 0x1f) << 6) | (*(ptr+1) & 0x3f);
-#else
- Bufbyte c =
- (BYTE_ASCII_P (*ptr)) ? *ptr :
- (*ptr == LEADING_BYTE_CONTROL_1) ? (*(ptr+1) - 0x20) :
- (*ptr == LEADING_BYTE_LATIN_ISO8859_1) ? (*(ptr+1)) :
- '~';
-#endif
- Dynarr_add (conversion_out_dynarr, (Extbyte) c);
- INC_CHARPTR (ptr);
- }
-
-#ifdef ERROR_CHECK_BUFPOS
- assert (ptr == end);
-#endif
- }
- else
- {
- Lisp_Object instream, outstream, da_outstream;
- Lstream *istr, *ostr;
- struct gcpro gcpro1, gcpro2, gcpro3;
- char tempbuf[1024]; /* some random amount */
-
- instream = make_fixed_buffer_input_stream ((unsigned char *) ptr, len);
- da_outstream = make_dynarr_output_stream
- ((unsigned_char_dynarr *) conversion_out_dynarr);
- outstream =
- make_encoding_output_stream (XLSTREAM (da_outstream), coding_system);
- istr = XLSTREAM (instream);
- ostr = XLSTREAM (outstream);
- GCPRO3 (instream, outstream, da_outstream);
- while (1)
- {
- int size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf));
- if (!size_in_bytes)
- break;
- Lstream_write (ostr, tempbuf, size_in_bytes);
- }
- Lstream_close (istr);
- Lstream_close (ostr);
- UNGCPRO;
- Lstream_delete (istr);
- Lstream_delete (ostr);
- Lstream_delete (XLSTREAM (da_outstream));
- }
- *len_out = Dynarr_length (conversion_out_dynarr);
- Dynarr_add (conversion_out_dynarr, 0); /* remember to zero-terminate! */
- return Dynarr_atp (conversion_out_dynarr, 0);
-}
-
-Bufbyte *
-convert_from_external_format (CONST Extbyte *ptr,
- Extcount len,
- Bytecount *len_out,
- enum external_data_format fmt)
-{
- Lisp_Object coding_system = external_data_format_to_coding_system (fmt);
-
- if (!conversion_in_dynarr)
- conversion_in_dynarr = Dynarr_new (Bufbyte);
- else
- Dynarr_reset (conversion_in_dynarr);
-
- if (NILP (coding_system))
- {
- CONST Extbyte *end = ptr + len;
- for (; ptr < end; ptr++)
- {
- Extbyte c = *ptr;
- DECODE_ADD_BINARY_CHAR (c, conversion_in_dynarr);
- }
- }
- else
- {
- Lisp_Object instream, outstream, da_outstream;
- Lstream *istr, *ostr;
- struct gcpro gcpro1, gcpro2, gcpro3;
- char tempbuf[1024]; /* some random amount */
-
- instream = make_fixed_buffer_input_stream ((unsigned char *) ptr, len);
- da_outstream = make_dynarr_output_stream
- ((unsigned_char_dynarr *) conversion_in_dynarr);
- outstream =
- make_decoding_output_stream (XLSTREAM (da_outstream), coding_system);
- istr = XLSTREAM (instream);
- ostr = XLSTREAM (outstream);
- GCPRO3 (instream, outstream, da_outstream);
- while (1)
- {
- ssize_t size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf));
- if (!size_in_bytes)
- break;
- Lstream_write (ostr, tempbuf, size_in_bytes);
- }
- Lstream_close (istr);
- Lstream_close (ostr);
- UNGCPRO;
- Lstream_delete (istr);
- Lstream_delete (ostr);
- Lstream_delete (XLSTREAM (da_outstream));
- }
-
- *len_out = Dynarr_length (conversion_in_dynarr);
- Dynarr_add (conversion_in_dynarr, 0); /* remember to zero-terminate! */
- return Dynarr_atp (conversion_in_dynarr, 0);
-}
-
-\f
/************************************************************************/
/* Initialization */
/************************************************************************/
void
syms_of_file_coding (void)
{
+ INIT_LRECORD_IMPLEMENTATION (coding_system);
+
deferror (&Qcoding_system_error, "coding-system-error",
"Coding-system error", Qio_error);
DEFSUBR (Fcoding_system_name);
DEFSUBR (Fmake_coding_system);
DEFSUBR (Fcopy_coding_system);
+ DEFSUBR (Fcoding_system_canonical_name_p);
+ DEFSUBR (Fcoding_system_alias_p);
+ DEFSUBR (Fcoding_system_aliasee);
DEFSUBR (Fdefine_coding_system_alias);
DEFSUBR (Fsubsidiary_coding_system);
defsymbol (&Qlock_shift, "lock-shift");
defsymbol (&Qescape_quoted, "escape-quoted");
#endif /* MULE */
+#ifdef UTF2000
+ defsymbol (&Qdisable_composition, "disable-composition");
+#endif
defsymbol (&Qencode, "encode");
defsymbol (&Qdecode, "decode");
#ifdef MULE
- defsymbol (&Qctext, "ctext");
defsymbol (&coding_category_symbol[CODING_CATEGORY_SHIFT_JIS],
"shift-jis");
defsymbol (&coding_category_symbol[CODING_CATEGORY_BIG5],
Vterminal_coding_system = Qnil;
DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read /*
-Overriding coding system used when writing a file or process.
-You should *bind* this, not set it. If this is non-nil, it specifies
-the coding system that will be used when a file or process is read
-in, and overrides `buffer-file-coding-system-for-read',
+Overriding coding system used when reading from a file or process.
+You should bind this variable with `let', but do not set it globally.
+If this is non-nil, it specifies the coding system that will be used
+to decode input on read operations, such as from a file or process.
+It overrides `buffer-file-coding-system-for-read',
`insert-file-contents-pre-hook', etc. Use those variables instead of
-this one for permanent changes to the environment.
-*/ );
+this one for permanent changes to the environment. */ );
Vcoding_system_for_read = Qnil;
DEFVAR_LISP ("coding-system-for-write",
&Vcoding_system_for_write /*
-Overriding coding system used when writing a file or process.
-You should *bind* this, not set it. If this is non-nil, it specifies
-the coding system that will be used when a file or process is wrote
-in, and overrides `buffer-file-coding-system',
-`write-region-pre-hook', etc. Use those variables instead of this one
-for permanent changes to the environment.
-*/ );
+Overriding coding system used when writing to a file or process.
+You should bind this variable with `let', but do not set it globally.
+If this is non-nil, it specifies the coding system that will be used
+to encode output for write operations, such as to a file or process.
+It overrides `buffer-file-coding-system', `write-region-pre-hook', etc.
+Use those variables instead of this one for permanent changes to the
+environment. */ );
Vcoding_system_for_write = Qnil;
DEFVAR_LISP ("file-name-coding-system", &Vfile_name_coding_system /*
Fdefine_coding_system_alias (Qno_conversion, Qraw_text);
+ Fdefine_coding_system_alias (Qfile_name, Qbinary);
+
+ Fdefine_coding_system_alias (Qterminal, Qbinary);
+ Fdefine_coding_system_alias (Qkeyboard, Qbinary);
+
/* Need this for bootstrapping */
fcd->coding_category_system[CODING_CATEGORY_NO_CONVERSION] =
Fget_coding_system (Qraw_text);
fcd->coding_category_system[CODING_CATEGORY_UTF8]
= Fget_coding_system (Qutf8);
#endif
+
+#if defined(MULE) && !defined(UTF2000)
+ {
+ unsigned int i;
+
+ for (i = 0; i < countof (fcd->ucs_to_mule_table); i++)
+ fcd->ucs_to_mule_table[i] = Qnil;
+ }
+ staticpro (&mule_to_ucs_table);
+ mule_to_ucs_table = Fmake_char_table(Qgeneric);
+#endif /* defined(MULE) && !defined(UTF2000) */
}