+1999-09-14 MORIOKA Tomohiko <tomo@etl.go.jp>
+
+ * mule/mule-charset.el (default-coded-charset-priority-list): Add
+ thai-tis620 and hebrew-iso8859-8; namely all BMP are covered.
+
+1999-09-13 MORIOKA Tomohiko <tomo@urania.m17n.org>
+
+ * mule/mule-charset.el (default-coded-charset-priority-list): Add
+ `cyrillic-iso8859-5' and `greek-iso8859-7'.
+
+1999-09-13 MORIOKA Tomohiko <tomo@urania.m17n.org>
+
+ * mule/mule-charset.el (default-coded-charset-priority-list):
+ Setup in UTF-2000.
+
1999-09-09 MORIOKA Tomohiko <tomo@etl.go.jp>
* mule/viet-chars.el: Don't define charset
(custom-add-loads 'fill '("simple" "fill"))
(custom-add-loads 'custom-magic-faces '("cus-edit"))
(custom-add-loads 'display '("modeline" "toolbar" "scrollbar" "auto-show"))
-(custom-add-loads 'faces '("faces" "font" "cus-edit" "font-lock" "hyper-apropos" "info" "wid-edit"))
+(custom-add-loads 'faces '("faces" "cus-edit" "font-lock" "font" "hyper-apropos" "info" "wid-edit"))
(custom-add-loads 'emacs '("faces" "help" "files" "cus-edit" "package-get"))
(custom-add-loads 'processes '("process" "gnuserv"))
(custom-add-loads 'hyper-apropos '("hyper-apropos"))
;; Copyright (C) 1992 Free Software Foundation, Inc.
;; Copyright (C) 1995 Amdahl Corporation.
;; Copyright (C) 1996 Sun Microsystems.
+;; Copyright (C) 1999 Electrotechnical Laboratory, JAPAN.
+;; Licensed to the Free Software Foundation.
;; Author: Unknown
;; Keywords: i18n, mule, internal
(put-char-table (car l) t auto-fill-chars)
(setq l (cdr l))))
+
+;;; @ Coded character set
+;;;
+
+(when (featurep 'utf-2000)
+ (setq default-coded-charset-priority-list
+ '(ascii
+ control-1
+ latin-iso8859-1
+ latin-iso8859-2
+ latin-iso8859-3
+ latin-iso8859-4
+ latin-iso8859-9
+ latin-jisx0201
+ cyrillic-iso8859-5
+ greek-iso8859-7
+ thai-tis620
+ hebrew-iso8859-8
+ vietnamese-viscii-lower
+ vietnamese-viscii-upper)))
+
;;; mule-charset.el ends here
+1999-09-14 MORIOKA Tomohiko <tomo@etl.go.jp>
+
+ * char-ucs.h (breakup_char_1): Use
+ `Vdefault_coded_charset_priority_list' for hebrew-iso8859-8,
+ thai-tis620 and katakana-jisx0201 area.
+
+1999-09-13 MORIOKA Tomohiko <tomo@urania.m17n.org>
+
+ * char-ucs.h (breakup_char_1): Use
+ `Vdefault_coded_charset_priority_list' for cyrillic-iso8859-5
+ area.
+
+ * text-coding.c (reset_encoding_stream): Fixed.
+ (char_encode_ucs4): Delete `& 255'.
+
+ * char-ucs.h (breakup_char_1): Use
+ `Vdefault_coded_charset_priority_list' for greek-iso8859-7 area.
+
+1999-09-13 MORIOKA Tomohiko <tomo@urania.m17n.org>
+
+ * file-coding.c (Fmake_coding_system): Don't set up
+ `codesys->fixed.size'.
+ (encode_coding_no_conversion): Don't refer
+ `str->codesys->fixed.size'.
+
+1999-09-13 MORIOKA Tomohiko <tomo@urania.m17n.org>
+
+ * mule-charset.c, char-ucs.h (latin_a_char_to_charset): Deleted.
+ (latin_a_char_to_byte1): Deleted.
+ (latin_a_char_to_byte2): Deleted.
+
+1999-09-13 MORIOKA Tomohiko <tomo@urania.m17n.org>
+
+ * mule-charset.c (make_charset): Add new argument `ucs_min',
+ `ucs_max' and `code_offset'.
+ (charset_get_byte1): New implementation [delete specific charset
+ depended implementations].
+ (Fmake_charset): Modify for `make_charset'.
+ (Fmake_reverse_direction_charset): Likewise.
+ (complex_vars_of_mule_charset): Likewise.
+
+ * char-ucs.h (struct Lisp_Charset): Add `ucs_min', `ucs_max' and
+ `code_offset'.
+ (CHARSET_UCS_MIN): New macro.
+ (CHARSET_UCS_MAX): New macro.
+ (CHARSET_CODE_OFFSET): New macro.
+ (MAKE_CHAR): Delete charset depended definitions [except
+ katakana-jisx0201].
+
+1999-09-13 MORIOKA Tomohiko <tomo@etl.go.jp>
+
+ * char-ucs.h (breakup_char_1): Use
+ `Vdefault_coded_charset_priority_list' for C0-Controls,
+ Basic-Latin, C1-Controls and Latin-1-Supplement area.
+
+1999-09-13 MORIOKA Tomohiko <tomo@urania.m17n.org>
+
+ * char-ucs.h (charset_get_byte1): New function.
+ (XCHARSET_GET_BYTE1): Deleted.
+ (charset_get_byte2): New function.
+ (XCHARSET_GET_BYTE2): Deleted.
+ (Vdefault_coded_charset_priority_list): New external variable.
+ (breakup_char_1): Use `charset_get_byte1', `charset_get_byte2' and
+ `Vdefault_preferred_coded_charset_list'.
+
+ * mule-charset.c (charset_get_byte1): New function.
+ (charset_get_byte2): New function.
+ (Vdefault_coded_charset_priority_list): New variable.
+ (vars_of_mule_charset): Add new variable
+ `default-coded-charset-priority-list'.
+
+1999-09-12 MORIOKA Tomohiko <tomo@urania.m17n.org>
+
+ * char-ucs.h (XCHARSET_GET_BYTE1): New inline function.
+ (XCHARSET_GET_BYTE2): New inline function.
+ (breakup_char_1): Use `XCHARSET_GET_BYTE1' and
+ `XCHARSET_GET_BYTE2'.
+
+1999-09-12 MORIOKA Tomohiko <tomo@urania.m17n.org>
+
+ * mule-charset.c (make_charset): Initialize
+ `CHARSET_TO_BYTE1_TABLE(cs)' and `CHARSET_TO_BYTE2_TABLE(cs)' by
+ NULL if table is not defined.
+
+1999-09-11 MORIOKA Tomohiko <tomo@urania.m17n.org>
+
+ * text-coding.c (char_encode_shift_jis): Use
+ `XCHARSET_TO_BYTE1_TABLE' for `Vcharset_latin_jisx0201' instead of
+ `ucs_to_latin_jisx0201'.
+
+ * mule-charset.c (ucs_to_latin_jisx0201): Deleted.
+ (ucs_to_latin_iso8859_2): Deleted.
+ (ucs_to_latin_iso8859_3): Deleted.
+ (ucs_to_latin_iso8859_4): Deleted.
+ (ucs_to_latin_iso8859_9): Deleted.
+ (ucs_to_latin_viscii_lower): Deleted.
+ (ucs_to_latin_viscii_upper): Deleted.
+ (ucs_to_latin_tcvn5712): Deleted.
+ (make_charset): Add new argument `decoding_table'; set up
+ `CHARSET_DECODING_TABLE(cs)' in UTF-2000; set up
+ `CHARSET_TO_BYTE1_TABLE(cs)' for 94-set and 96-set if
+ `decoding_table' is defined in UTF-2000.
+ (Fmake_charset): Modify for `make_charset'.
+ (Fmake_reverse_direction_charset): Likewise.
+ (complex_vars_of_mule_charset): Likewise; delete `GENERATE_94_SET'
+ and `GENERATE_96_SET'.
+
+ * char-ucs.h (latin_jisx0201_to_ucs): Deleted.
+ (ucs_to_latin_jisx0201): Deleted.
+ (latin_iso8859_2_to_ucs): Deleted.
+ (ucs_to_latin_iso8859_2): Deleted.
+ (latin_iso8859_3_to_ucs): Deleted.
+ (ucs_to_latin_iso8859_3): Deleted.
+ (latin_iso8859_4_to_ucs): Deleted.
+ (ucs_to_latin_iso8859_4): Deleted.
+ (latin_iso8859_9_to_ucs): Deleted.
+ (ucs_to_latin_iso8859_9): Deleted.
+ (latin_viscii_lower_to_ucs): Deleted.
+ (ucs_to_latin_viscii_lower): Deleted.
+ (latin_viscii_upper_to_ucs): Deleted.
+ (ucs_to_latin_viscii_upper): Deleted.
+ (struct Lisp_Charset): Renamed `encoding_table' to
+ `to_byte1_table'; add `to_byte2_table'.
+ (CHARSET_DECODING_TABLE): New macro.
+ (CHARSET_TO_BYTE1_TABLE): New macro.
+ (CHARSET_TO_BYTE2_TABLE): New macro.
+ (XCHARSET_DECODING_TABLE): New macro.
+ (XCHARSET_TO_BYTE1_TABLE): New macro.
+ (XCHARSET_TO_BYTE2_TABLE): New macro.
+ (MAKE_CHAR): Use `XCHARSET_DECODING_TABLE'; don't use `*_to_ucs'
+ tables.
+ (breakup_char_1): Use `XCHARSET_TO_BYTE1_TABLE' if it is defined;
+ don't use `ucs_to_*' tables.
+
+1999-09-11 MORIOKA Tomohiko <tomo@urania.m17n.org>
+
+ * text-coding.c (Fmake_coding_system): Don't set up
+ `codesys->fixed.size'.
+ (encode_coding_no_conversion): Use `if' instead of `switch'.
+
+ * file-coding.h (struct Lisp_Coding_System): Delete `fixed.size'.
+
+1999-09-11 MORIOKA Tomohiko <tomo@etl.go.jp>
+
+ * mule-charset.c (make_charset): Delete argument `rep_bytes'.
+ (Fmake_charset): Modify for `make_charset'.
+ (Fmake_reverse_direction_charset): Likewise.
+ (complex_vars_of_mule_charset): Likewise.
+
+1999-09-11 MORIOKA Tomohiko <tomo@etl.go.jp>
+
+ * text-coding.c (char_encode_shift_jis): Use table
+ `ucs_to_latin_jisx0201' and BREAKUP_CHAR.
+
+1999-09-11 MORIOKA Tomohiko <tomo@etl.go.jp>
+
+ * text-coding.c (text_encode_generic): Use `if' instead of
+ `switch'.
+ (decode_coding_sjis): Use `MAKE_CHAR' and `DECODE_ADD_UCS_CHAR' to
+ decode JIS-Latin.
+
+1999-09-10 MORIOKA Tomohiko <tomo@etl.go.jp>
+
+ * text-coding.c (encode_coding_sjis): Deleted.
+ (char_encode_shift_jis): New function.
+ (char_finish_shift_jis): New function.
+ (reset_encoding_stream): Set up `encode_char' and `finish' for
+ `CODESYS_UCS4' and `CODESYS_SHIFT_JIS'.
+ (mule_encode): Use generic encoder for `CODESYS_SHIFT_JIS'.
+ (char_encode_utf8): Treat `eol_type'.
+
+1999-09-10 MORIOKA Tomohiko <tomo@etl.go.jp>
+
+ * file-coding.c (decode_coding_iso2022): Use
+ `DECODE_ADD_UCS_CHAR'; don't use `XCHARSET_REP_BYTES'.
+
+1999-09-10 MORIOKA Tomohiko <tomo@etl.go.jp>
+
+ * mule-charset.c (vars_of_mule_charset): Update `utf-2000-version'
+ to 0.7 (Hirano).
+
+1999-09-10 MORIOKA Tomohiko <tomo@etl.go.jp>
+
+ * char-lb.h (CHAR_COLUMNS): New macro.
+
+1999-09-09 MORIOKA Tomohiko <tomo@etl.go.jp>
+
+ * text-coding.c (char_encode_ucs4): New function.
+ (char_finish_ucs4): New function.
+ (encode_coding_ucs4): Deleted.
+ (mule_encode): Use generic encoder for `CODESYS_UCS4'.
+ (text_encode_generic): Delete local variable `charset' and `half'.
+ (ucs_to_mule_table): Deleted.
+ (mule_to_ucs_table): Deleted.
+ (Fset_ucs_char): Deleted.
+ (ucs_to_char): Deleted.
+ (Fucs_char): Deleted.
+ (Fset_char_ucs): Deleted.
+ (Fchar_ucs): Deleted.
+ (decode_ucs4): Deleted.
+ (mule_char_to_ucs4): Deleted.
+ (encode_ucs4): Deleted.
+ (decode_coding_ucs4): Use `DECODE_ADD_UCS_CHAR'.
+ (decode_coding_utf8): Likewise.
+ (decode_coding_iso2022): Likewise; don't use `XCHARSET_REP_BYTES'.
+ (char_encode_iso2022): Fixed.
+ (syms_of_file_coding): Delete `Fset_ucs_char', `Fucs_char',
+ `Fset_char_ucs' and `Fchar_ucs'.
+ (complex_vars_of_file_coding): Don't initialize
+ `ucs_to_mule_table'.
+
+ * objects-tty.c (tty_initialize_font_instance): Don't use
+ `XCHARSET_COLUMNS'.
+
+ * mule-charset.c (make_charset): Don't set up CHARSET_REP_BYTES in
+ UTF-2000.
+
+ * redisplay-tty.c (tty_output_display_block): Use `CHAR_COLUMNS'
+ instead of `XCHARSET_COLUMNS' and `CHAR_CHARSET'.
+
+ * insdel.c (bufbyte_string_displayed_columns): Use `CHAR_COLUMNS'
+ instead of `XCHARSET_COLUMNS' and `CHAR_CHARSET'.
+ (emchar_string_displayed_columns): Likewise.
+
+ * indent.c (column_at_point): Use `CHAR_COLUMNS' instead of
+ `XCHARSET_COLUMNS' and `CHAR_CHARSET'.
+ (string_column_at_point): Likewise.
+ (Fmove_to_column): Likewise.
+
+ * char-ucs.h (struct Lisp_Charset): Delete `rep_bytes'; add
+ `encoding_table' and `decoding_table'.
+ (CHARSET_REP_BYTES): Deleted.
+ (XCHARSET_REP_BYTES): Deleted.
+ (XCHARSET_COLUMNS): Deleted.
+ (CHAR_COLUMNS): New macro.
+ (lookup_composite_char): Deleted unconditionally.
+ (composite_char_string): Likewise.
+
1999-09-09 MORIOKA Tomohiko <tomo@etl.go.jp>
* char-ucs.h (Emchar_to_byte_table): New type.
return ((unsigned int) (ch) <= 0xff) || non_ascii_valid_char_p (ch);
}
+#define CHAR_COLUMNS(c) (XCHARSET_COLUMNS(CHAR_CHARSET(c)))
+
#endif /* _XEMACS_CHAR_LB_H */
#define CHAR_ASCII_P(ch) ((ch) <= 0x7F)
\f
-unsigned char
-get_byte_from_character_table (Emchar ch,
- Emchar* table, size_t size, unsigned char offset);
+int
+get_byte_from_character_table (Emchar ch, Lisp_Object ccs);
extern Lisp_Object Vcharset_ucs_bmp;
-
extern Lisp_Object Vcharset_latin_jisx0201;
-extern Emchar latin_jisx0201_to_ucs[94];
-
extern Lisp_Object Vcharset_latin_iso8859_2;
-extern Emchar latin_iso8859_2_to_ucs[96];
-
extern Lisp_Object Vcharset_latin_iso8859_3;
-extern Emchar latin_iso8859_3_to_ucs[96];
-
extern Lisp_Object Vcharset_latin_iso8859_4;
-extern Emchar latin_iso8859_4_to_ucs[96];
-
extern Lisp_Object Vcharset_latin_iso8859_9;
-extern Emchar latin_iso8859_9_to_ucs[96];
-
extern Lisp_Object Vcharset_latin_viscii_lower;
-extern Emchar latin_viscii_lower_to_ucs[96];
-
extern Lisp_Object Vcharset_latin_viscii_upper;
-extern Emchar latin_viscii_upper_to_ucs[96];
\f
/************************************************************************/
/* Final byte of this character set in ISO2022 designating escape sequence */
Bufbyte final;
- /* Number of bytes (1 - 4) required in the internal representation
- for characters in this character set. This is *not* the
- same as the dimension of the character set). */
- unsigned int rep_bytes;
-
/* Number of columns a character in this charset takes up, on TTY
devices. Not used for X devices. */
unsigned int columns;
/* Which half of font to be used to display this character set */
unsigned int graphic;
+
+ /* Byte->character mapping table */
+ Emchar* decoding_table;
+
+ /* Range of character code */
+ Emchar ucs_min, ucs_max;
+
+ /* Offset for external representation */
+ Emchar code_offset;
};
typedef struct Lisp_Charset Lisp_Charset;
#define CHARSET_NAME(cs) ((cs)->name)
#define CHARSET_SHORT_NAME(cs) ((cs)->short_name)
#define CHARSET_LONG_NAME(cs) ((cs)->long_name)
-#define CHARSET_REP_BYTES(cs) ((cs)->rep_bytes)
#define CHARSET_COLUMNS(cs) ((cs)->columns)
#define CHARSET_GRAPHIC(cs) ((cs)->graphic)
#define CHARSET_TYPE(cs) ((cs)->type)
#define CHARSET_DIMENSION(cs) ((cs)->dimension)
#define CHARSET_CHARS(cs) ((cs)->chars)
#define CHARSET_REVERSE_DIRECTION_CHARSET(cs) ((cs)->reverse_direction_charset)
+#define CHARSET_DECODING_TABLE(cs) ((cs)->decoding_table)
+#define CHARSET_UCS_MIN(cs) ((cs)->ucs_min)
+#define CHARSET_UCS_MAX(cs) ((cs)->ucs_max)
+#define CHARSET_CODE_OFFSET(cs) ((cs)->code_offset)
#define XCHARSET_ID(cs) CHARSET_ID (XCHARSET (cs))
#define XCHARSET_NAME(cs) CHARSET_NAME (XCHARSET (cs))
#define XCHARSET_SHORT_NAME(cs) CHARSET_SHORT_NAME (XCHARSET (cs))
#define XCHARSET_LONG_NAME(cs) CHARSET_LONG_NAME (XCHARSET (cs))
-#define XCHARSET_REP_BYTES(cs) CHARSET_REP_BYTES (XCHARSET (cs))
-#define XCHARSET_COLUMNS(cs) CHARSET_COLUMNS (XCHARSET (cs))
#define XCHARSET_GRAPHIC(cs) CHARSET_GRAPHIC (XCHARSET (cs))
#define XCHARSET_TYPE(cs) CHARSET_TYPE (XCHARSET (cs))
#define XCHARSET_DIRECTION(cs) CHARSET_DIRECTION (XCHARSET (cs))
#define XCHARSET_CHARS(cs) CHARSET_CHARS (XCHARSET (cs))
#define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \
CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs))
+#define XCHARSET_DECODING_TABLE(cs) CHARSET_DECODING_TABLE(XCHARSET(cs))
struct charset_lookup {
/* Table of charsets indexed by (leading byte - MIN_LEADING_BYTE). */
INLINE_HEADER Emchar
MAKE_CHAR (Lisp_Object charset, int c1, int c2)
{
- if (EQ (charset, Vcharset_ascii))
- return c1;
- else if (EQ (charset, Vcharset_control_1))
- return c1 | 0x80;
- else if (EQ (charset, Vcharset_ucs_bmp))
- return (c1 << 8) | c2;
- else if (EQ (charset, Vcharset_latin_iso8859_1))
- return c1 | 0x80;
- else if (EQ (charset, Vcharset_latin_iso8859_2))
- return latin_iso8859_2_to_ucs[c1 - 32];
- else if (EQ (charset, Vcharset_latin_iso8859_3))
- return latin_iso8859_3_to_ucs[c1 - 32];
- else if (EQ (charset, Vcharset_latin_iso8859_4))
- return latin_iso8859_4_to_ucs[c1 - 32];
- else if (EQ (charset, Vcharset_cyrillic_iso8859_5))
- return c1 + MIN_CHAR_CYRILLIC - 0x20;
- else if (EQ (charset, Vcharset_greek_iso8859_7))
- return c1 + MIN_CHAR_GREEK - 0x20;
- else if (EQ (charset, Vcharset_hebrew_iso8859_8))
- return c1 + MIN_CHAR_HEBREW - 0x20;
- else if (EQ (charset, Vcharset_latin_iso8859_9))
- return latin_iso8859_9_to_ucs[c1 - 32];
- else if (EQ (charset, Vcharset_thai_tis620))
- return c1 + MIN_CHAR_THAI - 0x20;
+ Emchar* decoding_table;
+
+ if ((decoding_table = XCHARSET_DECODING_TABLE (charset)) != NULL)
+ return decoding_table[c1 - (XCHARSET_CHARS (charset) == 94 ? 33 : 32)];
else if (EQ (charset, Vcharset_katakana_jisx0201))
if (c1 < 0x60)
return c1 + MIN_CHAR_HALFWIDTH_KATAKANA - 0x20;
else
- return 32;
- else if (EQ (charset, Vcharset_latin_jisx0201))
- return latin_jisx0201_to_ucs[c1 - 33];
- else if (EQ (charset, Vcharset_latin_viscii_lower))
- return latin_viscii_lower_to_ucs[c1 - 32];
- else if (EQ (charset, Vcharset_latin_viscii_upper))
- return latin_viscii_upper_to_ucs[c1 - 32];
+ /* return MIN_CHAR_94 + ('I' - '0') * 94 + (c1 - 33); */
+ return ' ';
+ else if (CHARSET_UCS_MAX (XCHARSET (charset)))
+ return c1 - CHARSET_CODE_OFFSET (XCHARSET (charset))
+ + CHARSET_UCS_MIN (XCHARSET (charset));
else if (XCHARSET_DIMENSION (charset) == 1)
{
switch (XCHARSET_CHARS (charset))
}
}
-extern Charset_ID latin_a_char_to_charset[128];
-extern unsigned char latin_a_char_to_byte1[128];
-extern unsigned char latin_a_char_to_byte2[128];
+unsigned char charset_get_byte1 (Lisp_Object charset, Emchar ch);
+unsigned char charset_get_byte2 (Lisp_Object charset, Emchar ch);
+
+extern Lisp_Object Vdefault_coded_charset_priority_list;
+EXFUN (Ffind_charset, 1);
INLINE_HEADER void breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2);
INLINE_HEADER void
breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2)
{
- if (c <= MAX_CHAR_BASIC_LATIN)
- {
- *charset = Vcharset_ascii;
- *c1 = c;
- *c2 = 0;
- }
- else if (c < 0xA0)
- {
- *charset = Vcharset_control_1;
- *c1 = c & 0x7f;
- *c2 = 0;
- }
- else if (c <= 0xff)
- {
- *charset = Vcharset_latin_iso8859_1;
- *c1 = c & 0x7f;
- *c2 = 0;
- }
- else if (c <= 0x17f)
- {
- *charset
- = CHARSET_BY_LEADING_BYTE (latin_a_char_to_charset[c - 0x100]);
-
- if (EQ (*charset, Vcharset_latin_iso8859_2))
- {
- *c1 = get_byte_from_character_table (c, latin_iso8859_2_to_ucs, 96, 32);
- *c2 = 0;
- }
- else if (EQ (*charset, Vcharset_latin_iso8859_3))
- {
- *c1 = get_byte_from_character_table (c, latin_iso8859_3_to_ucs, 96, 32);
- *c2 = 0;
- }
- else if (EQ (*charset, Vcharset_latin_iso8859_4))
- {
- *c1 = get_byte_from_character_table (c, latin_iso8859_4_to_ucs, 96, 32);
- *c2 = 0;
- }
- else if (EQ (*charset, Vcharset_latin_iso8859_9))
- {
- *c1 = get_byte_from_character_table (c, latin_iso8859_9_to_ucs, 96, 32);
- *c2 = 0;
- }
- else
- {
- *c1 = latin_a_char_to_byte1[c - 0x100];
- *c2 = latin_a_char_to_byte2[c - 0x100];
- }
- }
- else if (c < MIN_CHAR_GREEK)
+ if (c < MIN_CHAR_94)
{
- if ( (*c1 = get_byte_from_character_table (c, latin_iso8859_2_to_ucs,
- 96, 32)) )
+ Lisp_Object charsets = Vdefault_coded_charset_priority_list;
+ while (!EQ (charsets, Qnil))
{
- *charset = Vcharset_latin_iso8859_2;
- *c2 = 0;
+ *charset = Ffind_charset (Fcar (charsets));
+ if (!EQ (*charset, Qnil)
+ && (*c1 = charset_get_byte1 (*charset, c)) )
+ {
+ *c2 = charset_get_byte2 (*charset, c);
+ return;
+ }
+ charsets = Fcdr (charsets);
}
- else if ( (*c1 =
- get_byte_from_character_table (c, latin_iso8859_3_to_ucs,
- 96, 32)) )
+ /* otherwise --- maybe for bootstrap */
+ if (c <= MAX_CHAR_BASIC_LATIN)
{
- *charset = Vcharset_latin_iso8859_3;
- *c2 = 0;
+ *charset = Vcharset_ascii;
+ *c1 = charset_get_byte1 (*charset, c);
+ *c2 = charset_get_byte2 (*charset, c);
}
- else if ( (*c1 =
- get_byte_from_character_table (c, latin_iso8859_4_to_ucs,
- 96, 32)) )
+ else if (c < 0xA0)
{
- *charset = Vcharset_latin_iso8859_4;
- *c2 = 0;
+ *charset = Vcharset_control_1;
+ *c1 = charset_get_byte1 (*charset, c);
+ *c2 = charset_get_byte2 (*charset, c);
}
- else if ( (*c1 =
- get_byte_from_character_table (c, latin_iso8859_9_to_ucs,
- 96, 32)) )
+ else if (c <= 0xff)
{
- *charset = Vcharset_latin_iso8859_9;
- *c2 = 0;
+ *charset = Vcharset_latin_iso8859_1;
+ *c1 = charset_get_byte1 (*charset, c);
+ *c2 = charset_get_byte2 (*charset, c);
}
- else if ( (*c1 =
- get_byte_from_character_table (c, latin_viscii_lower_to_ucs,
- 96, 32)) )
+ else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
{
- *charset = Vcharset_latin_viscii_lower;
+ *charset = Vcharset_greek_iso8859_7;
+ *c1 = c - MIN_CHAR_GREEK + 0x20;
*c2 = 0;
}
- else if ( (*c1 =
- get_byte_from_character_table (c, latin_viscii_upper_to_ucs,
- 96, 32)) )
+ else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
{
- *charset = Vcharset_latin_viscii_upper;
+ *charset = Vcharset_cyrillic_iso8859_5;
+ *c1 = c - MIN_CHAR_CYRILLIC + 0x20;
*c2 = 0;
}
- else
+ else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
{
- *charset = Vcharset_ucs_bmp;
- *c1 = c >> 8;
- *c2 = c & 0xff;
- }
- }
- else if (c <= MAX_CHAR_GREEK)
- {
- *charset = Vcharset_greek_iso8859_7;
- *c1 = c - MIN_CHAR_GREEK + 0x20;
- *c2 = 0;
- }
- else if (c < MIN_CHAR_CYRILLIC)
- {
- *charset = Vcharset_ucs_bmp;
- *c1 = c >> 8;
- *c2 = c & 0xff;
- }
- else if (c <= MAX_CHAR_CYRILLIC)
- {
- *charset = Vcharset_cyrillic_iso8859_5;
- *c1 = c - MIN_CHAR_CYRILLIC + 0x20;
- *c2 = 0;
- }
- else if (c < MIN_CHAR_HEBREW)
- {
- *charset = Vcharset_ucs_bmp;
- *c1 = c >> 8;
- *c2 = c & 0xff;
- }
- else if (c <= MAX_CHAR_HEBREW)
- {
- *charset = Vcharset_hebrew_iso8859_8;
- *c1 = c - MIN_CHAR_HEBREW + 0x20;
- *c2 = 0;
- }
- else if (c < MIN_CHAR_THAI)
- {
- *charset = Vcharset_ucs_bmp;
- *c1 = c >> 8;
- *c2 = c & 0xff;
- }
- else if (c <= MAX_CHAR_THAI)
- {
- *charset = Vcharset_thai_tis620;
- *c1 = c - MIN_CHAR_THAI + 0x20;
- *c2 = 0;
- }
- else if (c < MIN_CHAR_HALFWIDTH_KATAKANA)
- {
- if ( (*c1 = get_byte_from_character_table (c, latin_jisx0201_to_ucs,
- 94, 33)) )
- {
- *charset = Vcharset_latin_jisx0201;
+ *charset = Vcharset_hebrew_iso8859_8;
+ *c1 = c - MIN_CHAR_HEBREW + 0x20;
*c2 = 0;
}
- else if ( (*c1 = get_byte_from_character_table (c,
- latin_viscii_lower_to_ucs,
- 96, 32)) )
+ else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
{
- *charset = Vcharset_latin_viscii_lower;
+ *charset = Vcharset_thai_tis620;
+ *c1 = c - MIN_CHAR_THAI + 0x20;
*c2 = 0;
}
- else if ( (*c1 = get_byte_from_character_table (c,
- latin_viscii_upper_to_ucs,
- 96, 32)) )
+ else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
+ && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
{
- *charset = Vcharset_latin_viscii_upper;
+ *charset = Vcharset_katakana_jisx0201;
+ *c1 = c - MIN_CHAR_HALFWIDTH_KATAKANA + 0x20;
*c2 = 0;
}
else
*c2 = c & 0xff;
}
}
- else if (c <= MAX_CHAR_HALFWIDTH_KATAKANA)
- {
- *charset = Vcharset_katakana_jisx0201;
- *c1 = c - MIN_CHAR_HALFWIDTH_KATAKANA + 0x20;
- *c2 = 0;
- }
else if (c <= MAX_CHAR_94)
{
*charset
#define CHAR_LEADING_BYTE(c) (XCHARSET_LEADING_BYTE(CHAR_CHARSET(c)))
-\f
-#ifdef ENABLE_COMPOSITE_CHARS
-/************************************************************************/
-/* Composite characters */
-/************************************************************************/
-
-Emchar lookup_composite_char (Bufbyte *str, int len);
-Lisp_Object composite_char_string (Emchar ch);
-#endif /* ENABLE_COMPOSITE_CHARS */
+#define CHAR_COLUMNS(c) (CHARSET_COLUMNS(XCHARSET(CHAR_CHARSET(c))))
\f
/************************************************************************/
/* Exported functions */
/************************************************************************/
-EXFUN (Ffind_charset, 1);
EXFUN (Fget_charset, 1);
extern Lisp_Object Vcharset_chinese_big5_1;
termcap.o: $(LISP_H) conslots.h console.h device.h
terminfo.o: config.h
tests.o: $(LISP_H) buffer.h bufslots.h casetab.h char-1byte.h char-lb.h char-ucs.h character.h chartab.h lstream.h mb-1byte.h mb-lb.h mb-multibyte.h mb-utf-8.h mule-charset.h multibyte.h opaque.h
+text-coding.o: $(LISP_H) buffer.h bufslots.h casetab.h char-1byte.h char-lb.h char-ucs.h character.h chartab.h elhash.h file-coding.h insdel.h lstream.h mb-1byte.h mb-lb.h mb-multibyte.h mb-utf-8.h mule-ccl.h mule-charset.h multibyte.h opaque.h
toolbar.o: $(LISP_H) buffer.h bufslots.h casetab.h char-1byte.h char-lb.h char-ucs.h character.h chartab.h conslots.h console.h device.h frame.h frameslots.h glyphs.h gui.h mb-1byte.h mb-lb.h mb-multibyte.h mb-utf-8.h mule-charset.h multibyte.h redisplay.h scrollbar.h specifier.h toolbar.h window.h winslots.h
tooltalk.o: $(LISP_H) buffer.h bufslots.h casetab.h char-1byte.h char-lb.h char-ucs.h character.h chartab.h elhash.h mb-1byte.h mb-lb.h mb-multibyte.h mb-utf-8.h mule-charset.h multibyte.h process.h syssignal.h tooltalk.h
tparam.o: config.h
CHECK_STRING (doc_string);
CODING_SYSTEM_DOC_STRING (codesys) = doc_string;
-#ifdef UTF2000
- if (ty == CODESYS_NO_CONVERSION)
- codesys->fixed.size = 1;
-#endif
{
EXTERNAL_PROPERTY_LIST_LOOP_3 (key, value, props)
{
charset = new_charset;
}
-#ifndef UTF2000
+#ifdef UTF2000
+ if (XCHARSET_DIMENSION (charset) == 1)
+ {
+ DECODE_OUTPUT_PARTIAL_CHAR (ch);
+ DECODE_ADD_UCS_CHAR
+ (MAKE_CHAR (charset, c & 0x7F, 0), dst);
+ }
+ else if (ch)
+ {
+ DECODE_ADD_UCS_CHAR
+ (MAKE_CHAR (charset, ch & 0x7F, c & 0x7F), dst);
+ ch = 0;
+ }
+ else
+ ch = c;
+#else
lb = XCHARSET_LEADING_BYTE (charset);
-#endif
switch (XCHARSET_REP_BYTES (charset))
{
case 1: /* ASCII */
case 2: /* one-byte official */
DECODE_OUTPUT_PARTIAL_CHAR (ch);
-#ifdef UTF2000
- DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, c & 0x7F, 0), dst);
-#else
Dynarr_add (dst, lb);
Dynarr_add (dst, c | 0x80);
-#endif
break;
case 3: /* one-byte private or two-byte official */
-#ifdef UTF2000
- if (XCHARSET_DIMENSION (charset) == 1)
-#else
if (XCHARSET_PRIVATE_P (charset))
-#endif
{
DECODE_OUTPUT_PARTIAL_CHAR (ch);
-#ifdef UTF2000
- DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, c & 0x7F, 0),
- dst);
-#else
Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_1);
Dynarr_add (dst, lb);
Dynarr_add (dst, c | 0x80);
-#endif
}
else
{
if (ch)
{
-#ifdef UTF2000
- DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset,
- ch & 0x7F,
- c & 0x7F), dst);
-#else
Dynarr_add (dst, lb);
Dynarr_add (dst, ch | 0x80);
Dynarr_add (dst, c | 0x80);
-#endif
ch = 0;
}
else
default: /* two-byte private */
if (ch)
{
-#ifdef UTF2000
- DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset,
- ch & 0x7F,
- c & 0x7F), dst);
-#else
Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_2);
Dynarr_add (dst, lb);
Dynarr_add (dst, ch | 0x80);
Dynarr_add (dst, c | 0x80);
-#endif
ch = 0;
}
else
ch = c;
}
+#endif
}
if (!ch)
break;
case 1:
ch = ( ch << 6 ) | ( c & 0x3f );
- switch ( str->codesys->fixed.size )
- {
- case 1:
- Dynarr_add (dst, ch & 0xff);
- break;
- case 2:
- Dynarr_add (dst, (ch >> 8) & 0xff);
- Dynarr_add (dst, ch & 0xff);
- break;
- case 3:
- Dynarr_add (dst, (ch >> 16) & 0xff);
- Dynarr_add (dst, (ch >> 8) & 0xff);
- Dynarr_add (dst, ch & 0xff);
- break;
- case 4:
- Dynarr_add (dst, (ch >> 24) & 0xff);
- Dynarr_add (dst, (ch >> 16) & 0xff);
- Dynarr_add (dst, (ch >> 8) & 0xff);
- Dynarr_add (dst, ch & 0xff);
- break;
- default:
- fprintf(stderr, "It seems %d bytes stream.\n",
- str->codesys->fixed.size);
- abort ();
- }
+ Dynarr_add (dst, ch & 0xff);
char_boundary = 0;
break;
default:
} iso2022;
struct
{
- unsigned char size;
- } fixed;
- struct
- {
/* For a CCL coding system, these specify the CCL programs used for
decoding (input) and encoding (output). */
Lisp_Object decode;
+ displayed_glyphs->end_columns));
#else /* XEmacs */
#ifdef MULE
- col += XCHARSET_COLUMNS (CHAR_CHARSET (c));
+ col += CHAR_COLUMNS (c);
#else
col ++;
#endif /* MULE */
break;
else
#ifdef MULE
- col += XCHARSET_COLUMNS (CHAR_CHARSET (c));
+ col += CHAR_COLUMNS (c);
#else
col ++;
#endif /* MULE */
+ displayed_glyphs->end_columns));
#else /* XEmacs */
#ifdef MULE
- col += XCHARSET_COLUMNS (CHAR_CHARSET (c));
+ col += CHAR_COLUMNS (c);
#else
col ++;
#endif /* MULE */
{
#ifdef MULE
Emchar ch = charptr_emchar (str);
- cols += XCHARSET_COLUMNS (CHAR_CHARSET (ch));
+ cols += CHAR_COLUMNS (ch);
#else
cols++;
#endif
int i;
for (i = 0; i < len; i++)
- cols += XCHARSET_COLUMNS (CHAR_CHARSET (str[i]));
+ cols += CHAR_COLUMNS (str[i]);
return cols;
#else /* not MULE */
#endif
#ifdef UTF2000
-unsigned char
-get_byte_from_character_table (Emchar ch,
- Emchar* table, size_t size, unsigned char offset)
+int
+get_byte_from_character_table (Emchar ch, Lisp_Object ccs)
{
- size_t i;
+ Lisp_Charset* cs = XCHARSET(ccs);
- for (i = 0; i < size; i++)
+ if (CHARSET_DIMENSION (cs) == 1)
{
- if (table[i] == ch)
- return i + offset;
+ Emchar* table = CHARSET_DECODING_TABLE (cs);
+ size_t size = CHARSET_CHARS (cs);
+ unsigned char offset = CHARSET_CODE_OFFSET (cs);
+ size_t i;
+
+ for (i = 0; i < size; i++)
+ {
+ if (table[i] == ch)
+ return i + offset;
+ }
}
- return 0;
+ return -1;
}
#define CHAR96(ft,b) (MIN_CHAR_96 + (ft - '0') * 96 + (b & 0x7f) - 32)
0x203E /* 0x7E OVERLINE */
};
-
Emchar latin_iso8859_2_to_ucs[96] =
{
0x00A0 /* 0xA0 NO-BREAK SPACE */,
CHAR96('2', 0x7f)
};
-
Emchar latin_tcvn5712_to_ucs[96] =
{
0x00A0 /* 0xA0 NO-BREAK SPACE */,
0x1ED0 /* 0xFF LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE */
};
-Charset_ID latin_a_char_to_charset[128] = {
- /* U+0100 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0101 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0102 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0103 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0104 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0105 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0106 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0107 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0108 */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+0109 */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+010A */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+010B */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+010C */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+010D */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+010E */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+010F */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0110 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0111 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0112 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0113 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0114 */ LEADING_BYTE_UCS_BMP,
- /* U+0115 */ LEADING_BYTE_UCS_BMP,
- /* U+0116 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0117 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0118 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0119 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+011A */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+011B */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+011C */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+011D */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+011E */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+011F */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+0120 */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+0121 */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+0122 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0123 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0124 */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+0125 */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+0126 */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+0127 */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+0128 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0129 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+012A */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+012B */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+012C */ LEADING_BYTE_UCS_BMP,
- /* U+012D */ LEADING_BYTE_UCS_BMP,
- /* U+012E */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+012F */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0130 */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+0131 */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+0132 */ LEADING_BYTE_JAPANESE_JISX0212,
- /* U+0133 */ LEADING_BYTE_JAPANESE_JISX0212,
- /* U+0134 */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+0135 */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+0136 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0137 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0138 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0139 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+013A */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+013B */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+013C */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+013D */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+013E */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+013F */ LEADING_BYTE_JAPANESE_JISX0212,
- /* U+0140 */ LEADING_BYTE_JAPANESE_JISX0212,
- /* U+0141 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0142 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0143 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0144 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0145 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0146 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0147 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0148 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0149 */ LEADING_BYTE_JAPANESE_JISX0212,
- /* U+014A */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+014B */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+014C */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+014D */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+014E */ LEADING_BYTE_UCS_BMP,
- /* U+014F */ LEADING_BYTE_UCS_BMP,
- /* U+0150 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0151 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0152 */ LEADING_BYTE_JAPANESE_JISX0212,
- /* U+0153 */ LEADING_BYTE_JAPANESE_JISX0212,
- /* U+0154 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0155 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0156 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0157 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0158 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0159 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+015A */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+015B */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+015C */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+015D */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+015E */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+015F */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0160 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0161 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0162 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0163 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0164 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0165 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0166 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0167 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0168 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0169 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+016A */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+016B */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+016C */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+016D */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+016E */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+016F */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0170 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0171 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0172 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0173 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0174 */ LEADING_BYTE_JAPANESE_JISX0212,
- /* U+0175 */ LEADING_BYTE_JAPANESE_JISX0212,
- /* U+0176 */ LEADING_BYTE_JAPANESE_JISX0212,
- /* U+0177 */ LEADING_BYTE_JAPANESE_JISX0212,
- /* U+0178 */ LEADING_BYTE_JAPANESE_JISX0212,
- /* U+0179 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+017A */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+017B */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+017C */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+017D */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+017E */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+017F */ LEADING_BYTE_UCS_BMP
-};
-
-unsigned char latin_a_char_to_byte1[128] = {
- /* U+0100 */ 0xC0 - 0x80,
- /* U+0101 */ 0xE0 - 0x80,
- /* U+0102 */ 0xC3 - 0x80,
- /* U+0103 */ 0xE3 - 0x80,
- /* U+0104 */ 0xA1 - 0x80,
- /* U+0105 */ 0xB1 - 0x80,
- /* U+0106 */ 0xC6 - 0x80,
- /* U+0107 */ 0xE6 - 0x80,
- /* U+0108 */ 0xC6 - 0x80,
- /* U+0109 */ 0xE6 - 0x80,
- /* U+010A */ 0xC5 - 0x80,
- /* U+010B */ 0xE5 - 0x80,
- /* U+010C */ 0xC8 - 0x80,
- /* U+010D */ 0xE8 - 0x80,
- /* U+010E */ 0xCF - 0x80,
- /* U+010F */ 0xEF - 0x80,
- /* U+0110 */ 0xD0 - 0x80,
- /* U+0111 */ 0xF0 - 0x80,
- /* U+0112 */ 0xAA - 0x80,
- /* U+0113 */ 0xBA - 0x80,
- /* U+0114 */ 0x01,
- /* U+0115 */ 0x01,
- /* U+0116 */ 0xCC - 0x80,
- /* U+0117 */ 0xEC - 0x80,
- /* U+0118 */ 0xCA - 0x80,
- /* U+0119 */ 0xEA - 0x80,
- /* U+011A */ 0xCC - 0x80,
- /* U+011B */ 0xEC - 0x80,
- /* U+011C */ 0xD8 - 0x80,
- /* U+011D */ 0xF8 - 0x80,
- /* U+011E */ 0xAB - 0x80,
- /* U+011F */ 0xBB - 0x80,
- /* U+0120 */ 0xD5 - 0x80,
- /* U+0121 */ 0xF5 - 0x80,
- /* U+0122 */ 0xAB - 0x80,
- /* U+0123 */ 0xBB - 0x80,
- /* U+0124 */ 0xA6 - 0x80,
- /* U+0125 */ 0xB6 - 0x80,
- /* U+0126 */ 0xA1 - 0x80,
- /* U+0127 */ 0xB1 - 0x80,
- /* U+0128 */ 0xA5 - 0x80,
- /* U+0129 */ 0xB5 - 0x80,
- /* U+012A */ 0xCF - 0x80,
- /* U+012B */ 0xEF - 0x80,
- /* U+012C */ 0x01,
- /* U+012D */ 0x01,
- /* U+012E */ 0xC7 - 0x80,
- /* U+012F */ 0xE7 - 0x80,
- /* U+0130 */ 0xA9 - 0x80,
- /* U+0131 */ 0xB9 - 0x80,
- /* U+0132 */ 0x29,
- /* U+0133 */ 0x29,
- /* U+0134 */ 0xAC - 0x80,
- /* U+0135 */ 0xBC - 0x80,
- /* U+0136 */ 0xD3 - 0x80,
- /* U+0137 */ 0xF3 - 0x80,
- /* U+0138 */ 0xA2 - 0x80,
- /* U+0139 */ 0xC5 - 0x80,
- /* U+013A */ 0xE5 - 0x80,
- /* U+013B */ 0xA6 - 0x80,
- /* U+013C */ 0xB6 - 0x80,
- /* U+013D */ 0xA5 - 0x80,
- /* U+013E */ 0xB5 - 0x80,
- /* U+013F */ 0x29,
- /* U+0140 */ 0x29,
- /* U+0141 */ 0xA3 - 0x80,
- /* U+0142 */ 0xB3 - 0x80,
- /* U+0143 */ 0xD1 - 0x80,
- /* U+0144 */ 0xF1 - 0x80,
- /* U+0145 */ 0xD1 - 0x80,
- /* U+0146 */ 0xF1 - 0x80,
- /* U+0147 */ 0xD2 - 0x80,
- /* U+0148 */ 0xF2 - 0x80,
- /* U+0149 */ 0x29,
- /* U+014A */ 0xBD - 0x80,
- /* U+014B */ 0xBF - 0x80,
- /* U+014C */ 0xD2 - 0x80,
- /* U+014D */ 0xF2 - 0x80,
- /* U+014E */ 0x01,
- /* U+014F */ 0x01,
- /* U+0150 */ 0xD5 - 0x80,
- /* U+0151 */ 0xF5 - 0x80,
- /* U+0152 */ 0x29,
- /* U+0153 */ 0x29,
- /* U+0154 */ 0xC0 - 0x80,
- /* U+0155 */ 0xE0 - 0x80,
- /* U+0156 */ 0xA3 - 0x80,
- /* U+0157 */ 0xB3 - 0x80,
- /* U+0158 */ 0xD8 - 0x80,
- /* U+0159 */ 0xF8 - 0x80,
- /* U+015A */ 0xA6 - 0x80,
- /* U+015B */ 0xB6 - 0x80,
- /* U+015C */ 0xDE - 0x80,
- /* U+015D */ 0xFE - 0x80,
- /* U+015E */ 0xAA - 0x80,
- /* U+015F */ 0xBA - 0x80,
- /* U+0160 */ 0xA9 - 0x80,
- /* U+0161 */ 0xB9 - 0x80,
- /* U+0162 */ 0xDE - 0x80,
- /* U+0163 */ 0xFE - 0x80,
- /* U+0164 */ 0xAB - 0x80,
- /* U+0165 */ 0xBB - 0x80,
- /* U+0166 */ 0xAC - 0x80,
- /* U+0167 */ 0xBC - 0x80,
- /* U+0168 */ 0xDD - 0x80,
- /* U+0169 */ 0xFD - 0x80,
- /* U+016A */ 0xDE - 0x80,
- /* U+016B */ 0xFE - 0x80,
- /* U+016C */ 0xDD - 0x80,
- /* U+016D */ 0xFD - 0x80,
- /* U+016E */ 0xD9 - 0x80,
- /* U+016F */ 0xF9 - 0x80,
- /* U+0170 */ 0xDB - 0x80,
- /* U+0171 */ 0xFB - 0x80,
- /* U+0172 */ 0xD9 - 0x80,
- /* U+0173 */ 0xF9 - 0x80,
- /* U+0174 */ 0x2A,
- /* U+0175 */ 0x2B,
- /* U+0176 */ 0x2A,
- /* U+0177 */ 0x2B,
- /* U+0178 */ 0x2A,
- /* U+0179 */ 0xAC - 0x80,
- /* U+017A */ 0xBC - 0x80,
- /* U+017B */ 0xAF - 0x80,
- /* U+017C */ 0xBF - 0x80,
- /* U+017D */ 0xAE - 0x80,
- /* U+017E */ 0xBE - 0x80,
- /* U+017F */ 0x01
-};
-
-unsigned char latin_a_char_to_byte2[128] = {
- /* U+0100 */ 0x00,
- /* U+0101 */ 0x00,
- /* U+0102 */ 0x00,
- /* U+0103 */ 0x00,
- /* U+0104 */ 0x00,
- /* U+0105 */ 0x00,
- /* U+0106 */ 0x00,
- /* U+0107 */ 0x00,
- /* U+0108 */ 0x00,
- /* U+0109 */ 0x00,
- /* U+010A */ 0x00,
- /* U+010B */ 0x00,
- /* U+010C */ 0x00,
- /* U+010D */ 0x00,
- /* U+010E */ 0x00,
- /* U+010F */ 0x00,
- /* U+0110 */ 0x00,
- /* U+0111 */ 0x00,
- /* U+0112 */ 0x00,
- /* U+0113 */ 0x00,
- /* U+0114 */ 0x14,
- /* U+0115 */ 0x15,
- /* U+0116 */ 0x00,
- /* U+0117 */ 0x00,
- /* U+0118 */ 0x00,
- /* U+0119 */ 0x00,
- /* U+011A */ 0x00,
- /* U+011B */ 0x00,
- /* U+011C */ 0x00,
- /* U+011D */ 0x00,
- /* U+011E */ 0x00,
- /* U+011F */ 0x00,
- /* U+0120 */ 0x00,
- /* U+0121 */ 0x00,
- /* U+0122 */ 0x00,
- /* U+0123 */ 0x00,
- /* U+0124 */ 0x00,
- /* U+0125 */ 0x00,
- /* U+0126 */ 0x00,
- /* U+0127 */ 0x00,
- /* U+0128 */ 0x00,
- /* U+0129 */ 0x00,
- /* U+012A */ 0x00,
- /* U+012B */ 0x00,
- /* U+012C */ 0x2C,
- /* U+012D */ 0x2D,
- /* U+012E */ 0x00,
- /* U+012F */ 0x00,
- /* U+0130 */ 0x00,
- /* U+0131 */ 0x00,
- /* U+0132 */ 0x26,
- /* U+0133 */ 0x46,
- /* U+0134 */ 0x00,
- /* U+0135 */ 0x00,
- /* U+0136 */ 0x00,
- /* U+0137 */ 0x00,
- /* U+0138 */ 0x00,
- /* U+0139 */ 0x00,
- /* U+013A */ 0x00,
- /* U+013B */ 0x00,
- /* U+013C */ 0x00,
- /* U+013D */ 0x00,
- /* U+013E */ 0x00,
- /* U+013F */ 0x29,
- /* U+0140 */ 0x49,
- /* U+0141 */ 0x00,
- /* U+0142 */ 0x00,
- /* U+0143 */ 0x00,
- /* U+0144 */ 0x00,
- /* U+0145 */ 0x00,
- /* U+0146 */ 0x00,
- /* U+0147 */ 0x00,
- /* U+0148 */ 0x00,
- /* U+0149 */ 0x4A,
- /* U+014A */ 0x00,
- /* U+014B */ 0x00,
- /* U+014C */ 0x00,
- /* U+014D */ 0x00,
- /* U+014E */ 0x4E,
- /* U+014F */ 0x4F,
- /* U+0150 */ 0x00,
- /* U+0151 */ 0x00,
- /* U+0152 */ 0x2D,
- /* U+0153 */ 0x4D,
- /* U+0154 */ 0x00,
- /* U+0155 */ 0x00,
- /* U+0156 */ 0x00,
- /* U+0157 */ 0x00,
- /* U+0158 */ 0x00,
- /* U+0159 */ 0x00,
- /* U+015A */ 0x00,
- /* U+015B */ 0x00,
- /* U+015C */ 0x00,
- /* U+015D */ 0x00,
- /* U+015E */ 0x00,
- /* U+015F */ 0x00,
- /* U+0160 */ 0x00,
- /* U+0161 */ 0x00,
- /* U+0162 */ 0x00,
- /* U+0163 */ 0x00,
- /* U+0164 */ 0x00,
- /* U+0165 */ 0x00,
- /* U+0166 */ 0x00,
- /* U+0167 */ 0x00,
- /* U+0168 */ 0x00,
- /* U+0169 */ 0x00,
- /* U+016A */ 0x00,
- /* U+016B */ 0x00,
- /* U+016C */ 0x00,
- /* U+016D */ 0x00,
- /* U+016E */ 0x00,
- /* U+016F */ 0x00,
- /* U+0170 */ 0x00,
- /* U+0171 */ 0x00,
- /* U+0172 */ 0x00,
- /* U+0173 */ 0x00,
- /* U+0174 */ 0x71,
- /* U+0175 */ 0x71,
- /* U+0176 */ 0x74,
- /* U+0177 */ 0x74,
- /* U+0178 */ 0x73,
- /* U+0179 */ 0x00,
- /* U+017A */ 0x00,
- /* U+017B */ 0x00,
- /* U+017C */ 0x00,
- /* U+017D */ 0x00,
- /* U+017E */ 0x00,
- /* U+017F */ 0x7F
-};
-
Lisp_Object Vutf_2000_version;
#endif
};
DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
- mark_charset, print_charset, 0, 0, 0, charset_description,
+ mark_charset, print_charset, 0, 0, 0,
+ charset_description,
Lisp_Charset);
/* Make a new charset. */
/* #### SJT Should generic properties be allowed? */
static Lisp_Object
-make_charset (Charset_ID id, Lisp_Object name, unsigned char rep_bytes,
+make_charset (Charset_ID id, Lisp_Object name,
unsigned char type, unsigned char columns, unsigned char graphic,
- Bufbyte final, unsigned char direction, Lisp_Object short_name,
+ Bufbyte final, unsigned char direction, Lisp_Object short_name,
Lisp_Object long_name, Lisp_Object doc,
- Lisp_Object reg)
+ Lisp_Object reg,
+ Emchar* decoding_table,
+ Emchar ucs_min, Emchar ucs_max, Emchar code_offset)
{
Lisp_Object obj;
Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
CHARSET_NAME (cs) = name;
CHARSET_SHORT_NAME (cs) = short_name;
CHARSET_LONG_NAME (cs) = long_name;
- CHARSET_REP_BYTES (cs) = rep_bytes;
CHARSET_DIRECTION (cs) = direction;
CHARSET_TYPE (cs) = type;
CHARSET_COLUMNS (cs) = columns;
CHARSET_REGISTRY (cs) = reg;
CHARSET_CCL_PROGRAM (cs) = Qnil;
CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
-
+#ifdef UTF2000
+ CHARSET_DECODING_TABLE(cs) = decoding_table;
+ CHARSET_UCS_MIN(cs) = ucs_min;
+ CHARSET_UCS_MAX(cs) = ucs_max;
+ CHARSET_CODE_OFFSET(cs) = code_offset;
+#endif
+
switch ( CHARSET_TYPE (cs) )
{
case CHARSET_TYPE_94:
#endif
}
+#ifndef UTF2000
+ if (id == LEADING_BYTE_ASCII)
+ CHARSET_REP_BYTES (cs) = 1;
+ else if (id < 0xA0)
+ CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
+ else
+ CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
+#endif
+
if (final)
{
/* some charsets do not have final characters. This includes
assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
+#ifndef UTF2000
+ if (id < 0xA0)
+ /* official leading byte */
+ rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
+#endif
/* Some charsets are "faux" and don't have names or really exist at
all except in the leading-byte table. */
return lb;
}
+#ifdef UTF2000
+unsigned char
+charset_get_byte1 (Lisp_Object charset, Emchar ch)
+{
+ Emchar* table = XCHARSET_DECODING_TABLE (charset);
+ int d;
+
+ if ( (table != NULL) &&
+ (XCHARSET_DIMENSION (charset) == 1) &&
+ ( (d = get_byte_from_character_table (ch, charset)) >= 0) )
+ return d;
+ else if ((CHARSET_UCS_MIN (XCHARSET (charset)) <= ch)
+ && (ch <= CHARSET_UCS_MAX (XCHARSET (charset))))
+ return ch - CHARSET_UCS_MIN (XCHARSET (charset))
+ + CHARSET_CODE_OFFSET (XCHARSET (charset));
+ else if (XCHARSET_DIMENSION (charset) == 1)
+ {
+ if (XCHARSET_CHARS (charset) == 94)
+ {
+ if (((d = ch - (MIN_CHAR_94
+ + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
+ && (d < 94))
+ return d + 32;
+ }
+ else if (XCHARSET_CHARS (charset) == 96)
+ {
+ if (((d = ch - (MIN_CHAR_96
+ + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
+ && (d < 96))
+ return d + 33;
+ }
+ else
+ return 0;
+ }
+ else if (XCHARSET_DIMENSION (charset) == 2)
+ {
+ if (XCHARSET_CHARS (charset) == 94)
+ {
+ if (((d = ch - (MIN_CHAR_94x94
+ + (XCHARSET_FINAL (charset) - '0') * 94 * 94)) >= 0)
+ && (d < 94 * 94))
+ return (d / 94) + 33;
+ }
+ else if (XCHARSET_CHARS (charset) == 96)
+ {
+ if (((d = ch - (MIN_CHAR_96x96
+ + (XCHARSET_FINAL (charset) - '0') * 96 * 96)) >= 0)
+ && (d < 96 * 96))
+ return (d / 96) + 32;
+ }
+ }
+ return 0;
+}
+
+unsigned char
+charset_get_byte2 (Lisp_Object charset, Emchar ch)
+{
+ if (XCHARSET_DIMENSION (charset) == 1)
+ return 0;
+ else
+ {
+ if (EQ (charset, Vcharset_ucs_bmp))
+ return (ch >> 8) & 0xff;
+ else if (XCHARSET_CHARS (charset) == 94)
+ return (MIN_CHAR_94x94
+ + (XCHARSET_FINAL (charset) - '0') * 94 * 94 <= ch)
+ && (ch < MIN_CHAR_94x94
+ + (XCHARSET_FINAL (charset) - '0' + 1) * 94 * 94) ?
+ ((ch - MIN_CHAR_94x94) % 94) + 33 : 0;
+ else /* if (XCHARSET_CHARS (charset) == 96) */
+ return (MIN_CHAR_96x96
+ + (XCHARSET_FINAL (charset) - '0') * 96 * 96 <= ch)
+ && (ch < MIN_CHAR_96x96
+ + (XCHARSET_FINAL (charset) - '0' + 1) * 96 * 96) ?
+ ((ch - MIN_CHAR_96x96) % 96) + 32 : 0;
+ }
+}
+
+Lisp_Object Vdefault_coded_charset_priority_list;
+#endif
+
\f
/************************************************************************/
/* Basic charset Lisp functions */
if (columns == -1)
columns = dimension;
- charset = make_charset (id, name, dimension + 2, type, columns, graphic,
- final, direction, short_name, long_name, doc_string, registry);
+ charset = make_charset (id, name, type, columns, graphic,
+ final, direction, short_name, long_name,
+ doc_string, registry,
+ NULL,
+ 0, 0, 0);
if (!NILP (ccl_program))
XCHARSET_CCL_PROGRAM (charset) = ccl_program;
return charset;
long_name = CHARSET_LONG_NAME (cs);
registry = CHARSET_REGISTRY (cs);
- new_charset = make_charset (id, new_name, dimension + 2, type, columns,
+ new_charset = make_charset (id, new_name, type, columns,
graphic, final, direction, short_name, long_name,
- doc_string, registry);
+ doc_string, registry,
+ NULL,
+ 0, 0, 0);
CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
#endif
#ifdef UTF2000
- Vutf_2000_version = build_string("0.6 (Tōbushijō-mae)");
+ Vutf_2000_version = build_string("0.7 (Hirano)");
DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
Version number of UTF-2000.
*/ );
+
+ Vdefault_coded_charset_priority_list = Qnil;
+ DEFVAR_LISP ("default-coded-charset-priority-list",
+ &Vdefault_coded_charset_priority_list /*
+Default order of preferred coded-character-set.
+*/ );
#endif
}
#ifdef UTF2000
staticpro (&Vcharset_ucs_bmp);
Vcharset_ucs_bmp =
- make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 1,
+ make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
CHARSET_TYPE_256X256, 1, 0, 0,
CHARSET_LEFT_TO_RIGHT,
build_string ("BMP"),
build_string ("BMP"),
build_string ("BMP"),
- build_string (""));
+ build_string (""),
+ NULL, 0, 0xFFFF, 0);
#endif
staticpro (&Vcharset_ascii);
Vcharset_ascii =
- make_charset (LEADING_BYTE_ASCII, Qascii, 1,
+ make_charset (LEADING_BYTE_ASCII, Qascii,
CHARSET_TYPE_94, 1, 0, 'B',
CHARSET_LEFT_TO_RIGHT,
build_string ("ASCII"),
build_string ("ASCII)"),
build_string ("ASCII (ISO646 IRV)"),
- build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"));
+ build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
+ NULL, 0, 0x7F, 0);
staticpro (&Vcharset_control_1);
Vcharset_control_1 =
- make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
+ make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
CHARSET_TYPE_94, 1, 1, 0,
CHARSET_LEFT_TO_RIGHT,
build_string ("C1"),
build_string ("Control characters"),
build_string ("Control characters 128-191"),
- build_string (""));
+ build_string (""),
+ NULL, 0x80, 0x9F, 0);
staticpro (&Vcharset_latin_iso8859_1);
Vcharset_latin_iso8859_1 =
- make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
+ make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
CHARSET_TYPE_96, 1, 1, 'A',
CHARSET_LEFT_TO_RIGHT,
build_string ("Latin-1"),
build_string ("ISO8859-1 (Latin-1)"),
build_string ("ISO8859-1 (Latin-1)"),
- build_string ("iso8859-1"));
+ build_string ("iso8859-1"),
+ NULL, 0xA0, 0xFF, 32);
staticpro (&Vcharset_latin_iso8859_2);
Vcharset_latin_iso8859_2 =
- make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
+ make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
CHARSET_TYPE_96, 1, 1, 'B',
CHARSET_LEFT_TO_RIGHT,
build_string ("Latin-2"),
build_string ("ISO8859-2 (Latin-2)"),
build_string ("ISO8859-2 (Latin-2)"),
- build_string ("iso8859-2"));
+ build_string ("iso8859-2"),
+ latin_iso8859_2_to_ucs, 0, 0, 32);
staticpro (&Vcharset_latin_iso8859_3);
Vcharset_latin_iso8859_3 =
- make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
+ make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
CHARSET_TYPE_96, 1, 1, 'C',
CHARSET_LEFT_TO_RIGHT,
build_string ("Latin-3"),
build_string ("ISO8859-3 (Latin-3)"),
build_string ("ISO8859-3 (Latin-3)"),
- build_string ("iso8859-3"));
+ build_string ("iso8859-3"),
+ latin_iso8859_3_to_ucs, 0, 0, 32);
staticpro (&Vcharset_latin_iso8859_4);
Vcharset_latin_iso8859_4 =
- make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
+ make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
CHARSET_TYPE_96, 1, 1, 'D',
CHARSET_LEFT_TO_RIGHT,
build_string ("Latin-4"),
build_string ("ISO8859-4 (Latin-4)"),
build_string ("ISO8859-4 (Latin-4)"),
- build_string ("iso8859-4"));
+ build_string ("iso8859-4"),
+ latin_iso8859_4_to_ucs, 0, 0, 32);
staticpro (&Vcharset_thai_tis620);
Vcharset_thai_tis620 =
- make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
+ make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
CHARSET_TYPE_96, 1, 1, 'T',
CHARSET_LEFT_TO_RIGHT,
build_string ("TIS620"),
build_string ("TIS620 (Thai)"),
build_string ("TIS620.2529 (Thai)"),
- build_string ("tis620"));
+ build_string ("tis620"),
+ NULL, MIN_CHAR_THAI, MAX_CHAR_THAI, 32);
staticpro (&Vcharset_greek_iso8859_7);
Vcharset_greek_iso8859_7 =
- make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
+ make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
CHARSET_TYPE_96, 1, 1, 'F',
CHARSET_LEFT_TO_RIGHT,
build_string ("ISO8859-7"),
build_string ("ISO8859-7 (Greek)"),
build_string ("ISO8859-7 (Greek)"),
- build_string ("iso8859-7"));
+ build_string ("iso8859-7"),
+ NULL, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 32);
staticpro (&Vcharset_arabic_iso8859_6);
Vcharset_arabic_iso8859_6 =
- make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
+ make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
CHARSET_TYPE_96, 1, 1, 'G',
CHARSET_RIGHT_TO_LEFT,
build_string ("ISO8859-6"),
build_string ("ISO8859-6 (Arabic)"),
build_string ("ISO8859-6 (Arabic)"),
- build_string ("iso8859-6"));
+ build_string ("iso8859-6"),
+ NULL, 0, 0, 32);
staticpro (&Vcharset_hebrew_iso8859_8);
Vcharset_hebrew_iso8859_8 =
- make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
+ make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
CHARSET_TYPE_96, 1, 1, 'H',
CHARSET_RIGHT_TO_LEFT,
build_string ("ISO8859-8"),
build_string ("ISO8859-8 (Hebrew)"),
build_string ("ISO8859-8 (Hebrew)"),
- build_string ("iso8859-8"));
+ build_string ("iso8859-8"),
+ NULL, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 32);
staticpro (&Vcharset_katakana_jisx0201);
Vcharset_katakana_jisx0201 =
- make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
+ make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
CHARSET_TYPE_94, 1, 1, 'I',
CHARSET_LEFT_TO_RIGHT,
build_string ("JISX0201 Kana"),
build_string ("JISX0201.1976 (Japanese Kana)"),
build_string ("JISX0201.1976 Japanese Kana"),
- build_string ("jisx0201.1976"));
+ build_string ("jisx0201.1976"),
+ NULL,
+ MIN_CHAR_HALFWIDTH_KATAKANA,
+ MAX_CHAR_HALFWIDTH_KATAKANA, 33);
staticpro (&Vcharset_latin_jisx0201);
Vcharset_latin_jisx0201 =
- make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
+ make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
CHARSET_TYPE_94, 1, 0, 'J',
CHARSET_LEFT_TO_RIGHT,
build_string ("JISX0201 Roman"),
build_string ("JISX0201.1976 (Japanese Roman)"),
build_string ("JISX0201.1976 Japanese Roman"),
- build_string ("jisx0201.1976"));
+ build_string ("jisx0201.1976"),
+ latin_jisx0201_to_ucs, 0, 0, 33);
staticpro (&Vcharset_cyrillic_iso8859_5);
Vcharset_cyrillic_iso8859_5 =
- make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
+ make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
CHARSET_TYPE_96, 1, 1, 'L',
CHARSET_LEFT_TO_RIGHT,
build_string ("ISO8859-5"),
build_string ("ISO8859-5 (Cyrillic)"),
build_string ("ISO8859-5 (Cyrillic)"),
- build_string ("iso8859-5"));
+ build_string ("iso8859-5"),
+ NULL, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 32);
staticpro (&Vcharset_latin_iso8859_9);
Vcharset_latin_iso8859_9 =
- make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
+ make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
CHARSET_TYPE_96, 1, 1, 'M',
CHARSET_LEFT_TO_RIGHT,
build_string ("Latin-5"),
build_string ("ISO8859-9 (Latin-5)"),
build_string ("ISO8859-9 (Latin-5)"),
- build_string ("iso8859-9"));
+ build_string ("iso8859-9"),
+ latin_iso8859_9_to_ucs, 0, 0, 32);
staticpro (&Vcharset_japanese_jisx0208_1978);
Vcharset_japanese_jisx0208_1978 =
- make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
+ make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
CHARSET_TYPE_94X94, 2, 0, '@',
CHARSET_LEFT_TO_RIGHT,
build_string ("JISX0208.1978"),
build_string ("JISX0208.1978 (Japanese)"),
build_string
("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
- build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"));
+ build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
+ NULL, 0, 0, 33);
staticpro (&Vcharset_chinese_gb2312);
Vcharset_chinese_gb2312 =
- make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
+ make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
CHARSET_TYPE_94X94, 2, 0, 'A',
CHARSET_LEFT_TO_RIGHT,
build_string ("GB2312"),
build_string ("GB2312)"),
build_string ("GB2312 Chinese simplified"),
- build_string ("gb2312"));
+ build_string ("gb2312"),
+ NULL, 0, 0, 33);
staticpro (&Vcharset_japanese_jisx0208);
Vcharset_japanese_jisx0208 =
- make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
+ make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
CHARSET_TYPE_94X94, 2, 0, 'B',
CHARSET_LEFT_TO_RIGHT,
build_string ("JISX0208"),
build_string ("JISX0208.1983/1990 (Japanese)"),
build_string ("JISX0208.1983/1990 Japanese Kanji"),
- build_string ("jisx0208.19\\(83\\|90\\)"));
+ build_string ("jisx0208.19\\(83\\|90\\)"),
+ NULL, 0, 0, 33);
staticpro (&Vcharset_korean_ksc5601);
Vcharset_korean_ksc5601 =
- make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
+ make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
CHARSET_TYPE_94X94, 2, 0, 'C',
CHARSET_LEFT_TO_RIGHT,
build_string ("KSC5601"),
build_string ("KSC5601 (Korean"),
build_string ("KSC5601 Korean Hangul and Hanja"),
- build_string ("ksc5601"));
+ build_string ("ksc5601"),
+ NULL, 0, 0, 33);
staticpro (&Vcharset_japanese_jisx0212);
Vcharset_japanese_jisx0212 =
- make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
+ make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
CHARSET_TYPE_94X94, 2, 0, 'D',
CHARSET_LEFT_TO_RIGHT,
build_string ("JISX0212"),
build_string ("JISX0212 (Japanese)"),
build_string ("JISX0212 Japanese Supplement"),
- build_string ("jisx0212"));
+ build_string ("jisx0212"),
+ NULL, 0, 0, 33);
#define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
staticpro (&Vcharset_chinese_cns11643_1);
Vcharset_chinese_cns11643_1 =
- make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
+ make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
CHARSET_TYPE_94X94, 2, 0, 'G',
CHARSET_LEFT_TO_RIGHT,
build_string ("CNS11643-1"),
build_string ("CNS11643-1 (Chinese traditional)"),
build_string
("CNS 11643 Plane 1 Chinese traditional"),
- build_string (CHINESE_CNS_PLANE_RE("1")));
+ build_string (CHINESE_CNS_PLANE_RE("1")),
+ NULL, 0, 0, 33);
staticpro (&Vcharset_chinese_cns11643_2);
Vcharset_chinese_cns11643_2 =
- make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
+ make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
CHARSET_TYPE_94X94, 2, 0, 'H',
CHARSET_LEFT_TO_RIGHT,
build_string ("CNS11643-2"),
build_string ("CNS11643-2 (Chinese traditional)"),
build_string
("CNS 11643 Plane 2 Chinese traditional"),
- build_string (CHINESE_CNS_PLANE_RE("2")));
+ build_string (CHINESE_CNS_PLANE_RE("2")),
+ NULL, 0, 0, 33);
#ifdef UTF2000
staticpro (&Vcharset_chinese_cns11643_3);
Vcharset_chinese_cns11643_3 =
- make_charset (LEADING_BYTE_CHINESE_CNS11643_3, Qchinese_cns11643_3, 3,
+ make_charset (LEADING_BYTE_CHINESE_CNS11643_3, Qchinese_cns11643_3,
CHARSET_TYPE_94X94, 2, 0, 'I',
CHARSET_LEFT_TO_RIGHT,
build_string ("CNS11643-3"),
build_string ("CNS11643-3 (Chinese traditional)"),
build_string
("CNS 11643 Plane 3 Chinese traditional"),
- build_string (CHINESE_CNS_PLANE_RE("3")));
+ build_string (CHINESE_CNS_PLANE_RE("3")),
+ NULL, 0, 0, 33);
staticpro (&Vcharset_chinese_cns11643_4);
Vcharset_chinese_cns11643_4 =
- make_charset (LEADING_BYTE_CHINESE_CNS11643_4, Qchinese_cns11643_4, 3,
+ make_charset (LEADING_BYTE_CHINESE_CNS11643_4, Qchinese_cns11643_4,
CHARSET_TYPE_94X94, 2, 0, 'J',
CHARSET_LEFT_TO_RIGHT,
build_string ("CNS11643-4"),
build_string ("CNS11643-4 (Chinese traditional)"),
build_string
("CNS 11643 Plane 4 Chinese traditional"),
- build_string (CHINESE_CNS_PLANE_RE("4")));
+ build_string (CHINESE_CNS_PLANE_RE("4")),
+ NULL, 0, 0, 33);
staticpro (&Vcharset_chinese_cns11643_5);
Vcharset_chinese_cns11643_5 =
- make_charset (LEADING_BYTE_CHINESE_CNS11643_5, Qchinese_cns11643_5, 3,
+ make_charset (LEADING_BYTE_CHINESE_CNS11643_5, Qchinese_cns11643_5,
CHARSET_TYPE_94X94, 2, 0, 'K',
CHARSET_LEFT_TO_RIGHT,
build_string ("CNS11643-5"),
build_string ("CNS11643-5 (Chinese traditional)"),
build_string
("CNS 11643 Plane 5 Chinese traditional"),
- build_string (CHINESE_CNS_PLANE_RE("5")));
+ build_string (CHINESE_CNS_PLANE_RE("5")),
+ NULL, 0, 0, 33);
staticpro (&Vcharset_chinese_cns11643_6);
Vcharset_chinese_cns11643_6 =
- make_charset (LEADING_BYTE_CHINESE_CNS11643_6, Qchinese_cns11643_6, 3,
+ make_charset (LEADING_BYTE_CHINESE_CNS11643_6, Qchinese_cns11643_6,
CHARSET_TYPE_94X94, 2, 0, 'L',
CHARSET_LEFT_TO_RIGHT,
build_string ("CNS11643-6"),
build_string ("CNS11643-6 (Chinese traditional)"),
build_string
("CNS 11643 Plane 6 Chinese traditional"),
- build_string (CHINESE_CNS_PLANE_RE("6")));
+ build_string (CHINESE_CNS_PLANE_RE("6")),
+ NULL, 0, 0, 33);
staticpro (&Vcharset_chinese_cns11643_7);
Vcharset_chinese_cns11643_7 =
- make_charset (LEADING_BYTE_CHINESE_CNS11643_7, Qchinese_cns11643_7, 3,
+ make_charset (LEADING_BYTE_CHINESE_CNS11643_7, Qchinese_cns11643_7,
CHARSET_TYPE_94X94, 2, 0, 'M',
CHARSET_LEFT_TO_RIGHT,
build_string ("CNS11643-7"),
build_string ("CNS11643-7 (Chinese traditional)"),
build_string
("CNS 11643 Plane 7 Chinese traditional"),
- build_string (CHINESE_CNS_PLANE_RE("7")));
+ build_string (CHINESE_CNS_PLANE_RE("7")),
+ NULL, 0, 0, 33);
+ staticpro (&Vcharset_latin_viscii_lower);
Vcharset_latin_viscii_lower =
- make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 2,
+ make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
CHARSET_TYPE_96, 1, 1, '1',
CHARSET_LEFT_TO_RIGHT,
build_string ("VISCII lower"),
build_string ("VISCII lower (Vietnamese)"),
build_string ("VISCII lower (Vietnamese)"),
- build_string ("VISCII1.1"));
+ build_string ("VISCII1.1"),
+ latin_viscii_lower_to_ucs, 0, 0, 32);
+ staticpro (&Vcharset_latin_viscii_upper);
Vcharset_latin_viscii_upper =
- make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 2,
+ make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
CHARSET_TYPE_96, 1, 1, '2',
CHARSET_LEFT_TO_RIGHT,
build_string ("VISCII upper"),
build_string ("VISCII upper (Vietnamese)"),
build_string ("VISCII upper (Vietnamese)"),
- build_string ("VISCII1.1"));
+ build_string ("VISCII1.1"),
+ latin_viscii_upper_to_ucs, 0, 0, 32);
#endif
staticpro (&Vcharset_chinese_big5_1);
Vcharset_chinese_big5_1 =
- make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
+ make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
CHARSET_TYPE_94X94, 2, 0, '0',
CHARSET_LEFT_TO_RIGHT,
build_string ("Big5"),
build_string ("Big5 (Level-1)"),
build_string
("Big5 Level-1 Chinese traditional"),
- build_string ("big5"));
+ build_string ("big5"),
+ NULL, 0, 0, 33);
staticpro (&Vcharset_chinese_big5_2);
Vcharset_chinese_big5_2 =
- make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
+ make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
CHARSET_TYPE_94X94, 2, 0, '1',
CHARSET_LEFT_TO_RIGHT,
build_string ("Big5"),
build_string ("Big5 (Level-2)"),
build_string
("Big5 Level-2 Chinese traditional"),
- build_string ("big5"));
+ build_string ("big5"),
+ NULL, 0, 0, 33);
#ifdef ENABLE_COMPOSITE_CHARS
/* #### For simplicity, we put composite chars into a 96x96 charset.
room, esp. as we don't yet recycle numbers. */
staticpro (&Vcharset_composite);
Vcharset_composite =
- make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
+ make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
CHARSET_TYPE_96X96, 2, 0, 0,
CHARSET_LEFT_TO_RIGHT,
build_string ("Composite"),
FONT_INSTANCE_TTY_CHARSET (f) = charset;
#ifdef MULE
if (CHARSETP (charset))
- f->width = XCHARSET_COLUMNS (charset);
+ f->width = CHARSET_COLUMNS (XCHARSET (charset));
else
#endif
f->width = 1;
const Extbyte *src, Lstream_data_count n);
static void decode_coding_sjis (Lstream *decoding, const Extbyte *src,
unsigned_char_dynarr *dst, Lstream_data_count n);
-static void encode_coding_sjis (Lstream *encoding, const Bufbyte *src,
- unsigned_char_dynarr *dst, Lstream_data_count n);
+void char_encode_shift_jis (struct encoding_stream *str, Emchar c,
+ unsigned_char_dynarr *dst, unsigned int *flags);
+void char_finish_shift_jis (struct encoding_stream *str,
+ unsigned_char_dynarr *dst, unsigned int *flags);
+
static int detect_coding_big5 (struct detection_state *st,
const Extbyte *src, Lstream_data_count n);
static void decode_coding_big5 (Lstream *decoding, const Extbyte *src,
const Extbyte *src, Lstream_data_count n);
static void decode_coding_ucs4 (Lstream *decoding, const Extbyte *src,
unsigned_char_dynarr *dst, Lstream_data_count n);
-static void encode_coding_ucs4 (Lstream *encoding, const Bufbyte *src,
- unsigned_char_dynarr *dst, Lstream_data_count n);
+void char_encode_ucs4 (struct encoding_stream *str, Emchar c,
+ unsigned_char_dynarr *dst, unsigned int *flags);
+void char_finish_ucs4 (struct encoding_stream *str,
+ unsigned_char_dynarr *dst, unsigned int *flags);
+
static int detect_coding_utf8 (struct detection_state *st,
const Extbyte *src, Lstream_data_count n);
static void decode_coding_utf8 (Lstream *decoding, const Extbyte *src,
CHECK_STRING (doc_string);
CODING_SYSTEM_DOC_STRING (codesys) = doc_string;
-#ifdef UTF2000
- if (ty == CODESYS_NO_CONVERSION)
- codesys->fixed.size = 1;
-#endif
{
EXTERNAL_PROPERTY_LIST_LOOP_3 (key, value, props)
{
case CODESYS_UTF8:
str->encode_char = &char_encode_utf8;
str->finish = &char_finish_utf8;
+ break;
+ case CODESYS_UCS4:
+ str->encode_char = &char_encode_ucs4;
+ str->finish = &char_finish_ucs4;
+ break;
+ case CODESYS_SHIFT_JIS:
+ str->encode_char = &char_encode_shift_jis;
+ str->finish = &char_finish_shift_jis;
+ break;
default:
break;
}
encode_coding_no_conversion (encoding, src, dst, n);
break;
#ifdef MULE
- case CODESYS_SHIFT_JIS:
- encode_coding_sjis (encoding, src, dst, n);
- break;
case CODESYS_BIG5:
encode_coding_big5 (encoding, src, dst, n);
break;
- case CODESYS_UCS4:
- encode_coding_ucs4 (encoding, src, dst, n);
- break;
case CODESYS_CCL:
str->ccl.last_block = str->flags & CODING_STATE_END;
/* When applying ccl program to stream, MUST NOT set NULL
struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
unsigned int flags = str->flags;
Emchar ch = str->ch;
- Lisp_Object charset;
- int half;
char_boundary = str->iso2022.current_char_boundary;
- charset = str->iso2022.current_charset;
- half = str->iso2022.current_half;
while (n--)
{
c = *src++;
- switch (char_boundary)
+ if (char_boundary == 0)
{
- case 0:
- if ( c >= 0xfc )
+ if (c >= 0xfc)
{
ch = c & 0x01;
char_boundary = 5;
}
- else if ( c >= 0xf8 )
+ else if (c >= 0xf8)
{
ch = c & 0x03;
char_boundary = 4;
}
- else if ( c >= 0xf0 )
+ else if (c >= 0xf0)
{
ch = c & 0x07;
char_boundary = 3;
}
- else if ( c >= 0xe0 )
+ else if (c >= 0xe0)
{
ch = c & 0x0f;
char_boundary = 2;
}
- else if ( c >= 0xc0 )
+ else if (c >= 0xc0)
{
ch = c & 0x1f;
char_boundary = 1;
}
else
- {
- (*str->encode_char) (str, c, dst, &flags);
- ch = 0;
- char_boundary = 0;
- }
- break;
- case 1:
+ (*str->encode_char) (str, c, dst, &flags);
+ }
+ else if (char_boundary == 1)
+ {
(*str->encode_char) (str, (ch << 6) | (c & 0x3f), dst, &flags);
ch =0;
char_boundary = 0;
- break;
- default:
- ch = ( ch << 6 ) | ( c & 0x3f );
+ }
+ else
+ {
+ ch = (ch << 6) | (c & 0x3f);
char_boundary--;
}
}
- if ( (char_boundary == 0) && flags & CODING_STATE_END)
+ if ((char_boundary == 0) && (flags & CODING_STATE_END))
{
(*str->finish) (str, dst, &flags);
}
str->flags = flags;
str->ch = ch;
str->iso2022.current_char_boundary = char_boundary;
- str->iso2022.current_charset = charset;
- str->iso2022.current_half = half;
-
- /* Verbum caro factum est! */
}
\f
Dynarr_add (dst, c);
#endif
}
+#ifdef UTF2000
+ else if (c > 32)
+ DECODE_ADD_UCS_CHAR(MAKE_CHAR(Vcharset_latin_jisx0201,
+ c, 0), dst);
+#endif
else
DECODE_ADD_BINARY_CHAR (c, dst);
}
str->ch = ch;
}
-/* Convert internally-formatted data to Shift-JIS. */
+/* Convert internal character representation to Shift_JIS. */
-static void
-encode_coding_sjis (Lstream *encoding, const Bufbyte *src,
- unsigned_char_dynarr *dst, Lstream_data_count n)
+void
+char_encode_shift_jis (struct encoding_stream *str, Emchar ch,
+ unsigned_char_dynarr *dst, unsigned int *flags)
{
- struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
- unsigned int flags = str->flags;
- unsigned int ch = str->ch;
eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
-#ifdef UTF2000
- unsigned char char_boundary = str->iso2022.current_char_boundary;
-#endif
- while (n--)
+ if (ch == '\n')
{
- Bufbyte c = *src++;
+ if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+ Dynarr_add (dst, '\r');
+ if (eol_type != EOL_CR)
+ Dynarr_add (dst, ch);
+ }
+ else
+ {
+ Lisp_Object charset;
+ unsigned int c1, c2, s1, s2;
+
#ifdef UTF2000
- switch (char_boundary)
+ if ( (c1 =
+ get_byte_from_character_table (ch, Vcharset_latin_jisx0201))
+ >= 0 )
{
- case 0:
- if ( c >= 0xfc )
- {
- ch = c & 0x01;
- char_boundary = 5;
- }
- else if ( c >= 0xf8 )
- {
- ch = c & 0x03;
- char_boundary = 4;
- }
- else if ( c >= 0xf0 )
- {
- ch = c & 0x07;
- char_boundary = 3;
- }
- else if ( c >= 0xe0 )
- {
- ch = c & 0x0f;
- char_boundary = 2;
- }
- else if ( c >= 0xc0 )
- {
- ch = c & 0x1f;
- char_boundary = 1;
- }
- else
- {
- ch = 0;
- if (c == '\n')
- {
- if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
- Dynarr_add (dst, '\r');
- if (eol_type != EOL_CR)
- Dynarr_add (dst, c);
- }
- else
- Dynarr_add (dst, c);
- char_boundary = 0;
- }
- break;
- case 1:
- ch = ( ch << 6 ) | ( c & 0x3f );
- {
- Lisp_Object charset;
- unsigned int c1, c2, s1, s2;
-
- BREAKUP_CHAR (ch, charset, c1, c2);
- if (EQ(charset, Vcharset_katakana_jisx0201))
- {
- Dynarr_add (dst, c1 | 0x80);
- }
- else if (EQ(charset, Vcharset_japanese_jisx0208))
- {
- ENCODE_SJIS (c1 | 0x80, c2 | 0x80, s1, s2);
- Dynarr_add (dst, s1);
- Dynarr_add (dst, s2);
- }
- }
- char_boundary = 0;
- break;
- default:
- ch = ( ch << 6 ) | ( c & 0x3f );
- char_boundary--;
+ charset = Vcharset_latin_jisx0201;
+ c2 = 0;
}
-#else
- if (c == '\n')
+ else
+#endif
+ BREAKUP_CHAR (ch, charset, c1, c2);
+
+ if (EQ(charset, Vcharset_katakana_jisx0201))
{
- if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
- Dynarr_add (dst, '\r');
- if (eol_type != EOL_CR)
- Dynarr_add (dst, '\n');
- ch = 0;
+ Dynarr_add (dst, c1 | 0x80);
}
- else if (BYTE_ASCII_P (c))
+ else if (c2 == 0)
{
- Dynarr_add (dst, c);
- ch = 0;
+ Dynarr_add (dst, c1);
}
- else if (BUFBYTE_LEADING_BYTE_P (c))
- ch = (c == LEADING_BYTE_KATAKANA_JISX0201 ||
- c == LEADING_BYTE_JAPANESE_JISX0208_1978 ||
- c == LEADING_BYTE_JAPANESE_JISX0208) ? c : 0;
- else if (ch)
+ else if (EQ(charset, Vcharset_japanese_jisx0208))
{
- if (ch == LEADING_BYTE_KATAKANA_JISX0201)
- {
- Dynarr_add (dst, c);
- ch = 0;
- }
- else if (ch == LEADING_BYTE_JAPANESE_JISX0208_1978 ||
- ch == LEADING_BYTE_JAPANESE_JISX0208)
- ch = c;
- else
- {
- unsigned char j1, j2;
- ENCODE_SJIS (ch, c, j1, j2);
- Dynarr_add (dst, j1);
- Dynarr_add (dst, j2);
- ch = 0;
- }
+ ENCODE_SJIS (c1 | 0x80, c2 | 0x80, s1, s2);
+ Dynarr_add (dst, s1);
+ Dynarr_add (dst, s2);
}
-#endif
+ else
+ Dynarr_add (dst, '?');
}
+}
- str->flags = flags;
- str->ch = ch;
-#ifdef UTF2000
- str->iso2022.current_char_boundary = char_boundary;
-#endif
+void
+char_finish_shift_jis (struct encoding_stream *str, unsigned_char_dynarr *dst,
+ unsigned int *flags)
+{
}
DEFUN ("decode-shift-jis-char", Fdecode_shift_jis_char, 1, 1, 0, /*
\f
/************************************************************************/
/* UCS-4 methods */
-/* */
-/* UCS-4 character codes are implemented as nonnegative integers. */
-/* */
/************************************************************************/
-
-DEFUN ("set-ucs-char", Fset_ucs_char, 2, 2, 0, /*
-Map UCS-4 code CODE to Mule character CHARACTER.
-
-Return T on success, NIL on failure.
-*/
- (code, character))
-{
- size_t c;
-
- CHECK_CHAR (character);
- CHECK_NATNUM (code);
- c = XINT (code);
-
- if (c < countof (fcd->ucs_to_mule_table))
- {
- fcd->ucs_to_mule_table[c] = character;
- return Qt;
- }
- else
- return Qnil;
-}
-
-static Lisp_Object
-ucs_to_char (unsigned long code)
-{
- if (code < countof (fcd->ucs_to_mule_table))
- {
- return fcd->ucs_to_mule_table[code];
- }
- else if ((0xe00000 <= code) && (code <= 0xe00000 + 94 * 94 * 14))
- {
- unsigned int c;
-
- code -= 0xe00000;
- c = code % (94 * 94);
- return make_char
- (MAKE_CHAR (CHARSET_BY_ATTRIBUTES
- (CHARSET_TYPE_94X94, code / (94 * 94) + '@',
- CHARSET_LEFT_TO_RIGHT),
- c / 94 + 33, c % 94 + 33));
- }
- else
- return Qnil;
-}
-
-DEFUN ("ucs-char", Fucs_char, 1, 1, 0, /*
-Return Mule character corresponding to UCS code CODE (a positive integer).
-*/
- (code))
-{
- CHECK_NATNUM (code);
- return ucs_to_char (XINT (code));
-}
-
-DEFUN ("set-char-ucs", Fset_char_ucs, 2, 2, 0, /*
-Map Mule character CHARACTER to UCS code CODE (a positive integer).
-*/
- (character, code))
-{
- /* #### Isn't this gilding the lily? Fput_char_table checks its args.
- Fset_char_ucs is more restrictive on index arg, but should
- check code arg in a char_table method. */
- CHECK_CHAR (character);
- CHECK_NATNUM (code);
- return Fput_char_table (character, code, mule_to_ucs_table);
-}
-
-DEFUN ("char-ucs", Fchar_ucs, 1, 1, 0, /*
-Return the UCS code (a positive integer) corresponding to CHARACTER.
-*/
- (character))
-{
- return Fget_char_table (character, mule_to_ucs_table);
-}
-
-#ifdef UTF2000
-#define decode_ucs4 DECODE_ADD_UCS_CHAR
-#else
-/* Decode a UCS-4 character into a buffer. If the lookup fails, use
- <GETA MARK> (U+3013) of JIS X 0208, which means correct character
- is not found, instead.
- #### do something more appropriate (use blob?)
- Danger, Will Robinson! Data loss. Should we signal user? */
-static void
-decode_ucs4 (unsigned long ch, unsigned_char_dynarr *dst)
-{
- Lisp_Object chr = ucs_to_char (ch);
-
- if (! NILP (chr))
- {
- Bufbyte work[MAX_EMCHAR_LEN];
- int len;
-
- ch = XCHAR (chr);
- len = (ch < 128) ?
- simple_set_charptr_emchar (work, ch) :
- non_ascii_set_charptr_emchar (work, ch);
- Dynarr_add_many (dst, work, len);
- }
- else
- {
- Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208);
- Dynarr_add (dst, 34 + 128);
- Dynarr_add (dst, 46 + 128);
- }
-}
-#endif
-
-static unsigned long
-mule_char_to_ucs4 (Lisp_Object charset,
- unsigned char h, unsigned char l)
-{
- Lisp_Object code
- = Fget_char_table (make_char (MAKE_CHAR (charset, h & 127, l & 127)),
- mule_to_ucs_table);
-
- if (INTP (code))
- {
- return XINT (code);
- }
- else if ( (XCHARSET_DIMENSION (charset) == 2) &&
- (XCHARSET_CHARS (charset) == 94) )
- {
- unsigned char final = XCHARSET_FINAL (charset);
-
- if ( ('@' <= final) && (final < 0x7f) )
- {
- return 0xe00000 + (final - '@') * 94 * 94
- + ((h & 127) - 33) * 94 + (l & 127) - 33;
- }
- else
- {
- return '?';
- }
- }
- else
- {
- return '?';
- }
-}
-
-static void
-encode_ucs4 (Lisp_Object charset,
- unsigned char h, unsigned char l, unsigned_char_dynarr *dst)
-{
- unsigned long code = mule_char_to_ucs4 (charset, h, l);
- Dynarr_add (dst, code >> 24);
- Dynarr_add (dst, (code >> 16) & 255);
- Dynarr_add (dst, (code >> 8) & 255);
- Dynarr_add (dst, code & 255);
-}
-
static int
detect_coding_ucs4 (struct detection_state *st, const Extbyte *src, Lstream_data_count n)
{
counter = 3;
break;
case 1:
- decode_ucs4 ( ( ch << 8 ) | c, dst);
+ DECODE_ADD_UCS_CHAR ((ch << 8) | c, dst);
ch = 0;
counter = 0;
break;
str->counter = counter;
}
-static void
-encode_coding_ucs4 (Lstream *encoding, const Bufbyte *src,
- unsigned_char_dynarr *dst, Lstream_data_count n)
+void
+char_encode_ucs4 (struct encoding_stream *str, Emchar ch,
+ unsigned_char_dynarr *dst, unsigned int *flags)
{
-#ifndef UTF2000
- struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
- unsigned int flags = str->flags;
- unsigned int ch = str->ch;
- unsigned char char_boundary = str->iso2022.current_char_boundary;
- Lisp_Object charset = str->iso2022.current_charset;
-
-#ifdef ENABLE_COMPOSITE_CHARS
- /* flags for handling composite chars. We do a little switcharoo
- on the source while we're outputting the composite char. */
- unsigned int saved_n = 0;
- const unsigned char *saved_src = NULL;
- int in_composite = 0;
-
- back_to_square_n:
-#endif
-
- while (n--)
- {
- unsigned char c = *src++;
-
- if (BYTE_ASCII_P (c))
- { /* Processing ASCII character */
- ch = 0;
- encode_ucs4 (Vcharset_ascii, c, 0, dst);
- char_boundary = 1;
- }
- else if (BUFBYTE_LEADING_BYTE_P (c) || BUFBYTE_LEADING_BYTE_P (ch))
- { /* Processing Leading Byte */
- ch = 0;
- charset = CHARSET_BY_LEADING_BYTE (c);
- if (LEADING_BYTE_PREFIX_P(c))
- ch = c;
- char_boundary = 0;
- }
- else
- { /* Processing Non-ASCII character */
- char_boundary = 1;
- if (EQ (charset, Vcharset_control_1))
- {
- encode_ucs4 (Vcharset_control_1, c, 0, dst);
- }
- else
- {
- switch (XCHARSET_REP_BYTES (charset))
- {
- case 2:
- encode_ucs4 (charset, c, 0, dst);
- break;
- case 3:
- if (XCHARSET_PRIVATE_P (charset))
- {
- encode_ucs4 (charset, c, 0, dst);
- ch = 0;
- }
- else if (ch)
- {
-#ifdef ENABLE_COMPOSITE_CHARS
- if (EQ (charset, Vcharset_composite))
- {
- if (in_composite)
- {
- /* #### Bother! We don't know how to
- handle this yet. */
- Dynarr_add (dst, '\0');
- Dynarr_add (dst, '\0');
- Dynarr_add (dst, '\0');
- Dynarr_add (dst, '~');
- }
- else
- {
- Emchar emch = MAKE_CHAR (Vcharset_composite,
- ch & 0x7F, c & 0x7F);
- Lisp_Object lstr = composite_char_string (emch);
- saved_n = n;
- saved_src = src;
- in_composite = 1;
- src = XSTRING_DATA (lstr);
- n = XSTRING_LENGTH (lstr);
- }
- }
- else
-#endif /* ENABLE_COMPOSITE_CHARS */
- {
- encode_ucs4(charset, ch, c, dst);
- }
- ch = 0;
- }
- else
- {
- ch = c;
- char_boundary = 0;
- }
- break;
- case 4:
- if (ch)
- {
- encode_ucs4 (charset, ch, c, dst);
- ch = 0;
- }
- else
- {
- ch = c;
- char_boundary = 0;
- }
- break;
- default:
- abort ();
- }
- }
- }
- }
-
-#ifdef ENABLE_COMPOSITE_CHARS
- if (in_composite)
- {
- n = saved_n;
- src = saved_src;
- in_composite = 0;
- goto back_to_square_n; /* Wheeeeeeeee ..... */
- }
-#endif /* ENABLE_COMPOSITE_CHARS */
-
- str->flags = flags;
- str->ch = ch;
- str->iso2022.current_char_boundary = char_boundary;
- str->iso2022.current_charset = charset;
+ Dynarr_add (dst, ch >> 24);
+ Dynarr_add (dst, ch >> 16);
+ Dynarr_add (dst, ch >> 8);
+ Dynarr_add (dst, ch );
+}
- /* Verbum caro factum est! */
-#endif
+void
+char_finish_ucs4 (struct encoding_stream *str, unsigned_char_dynarr *dst,
+ unsigned int *flags)
+{
}
\f
else
{
DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
- decode_ucs4 (c, dst);
+ DECODE_ADD_UCS_CHAR (c, dst);
}
break;
case 1:
ch = ( ch << 6 ) | ( c & 0x3f );
- decode_ucs4 (ch, dst);
+ DECODE_ADD_UCS_CHAR (ch, dst);
ch = 0;
counter = 0;
break;
}
void
-char_encode_utf8 (struct encoding_stream *str, Emchar code,
+char_encode_utf8 (struct encoding_stream *str, Emchar ch,
unsigned_char_dynarr *dst, unsigned int *flags)
{
- if ( code <= 0x7f )
+ eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
+
+ if (ch == '\n')
+ {
+ if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+ Dynarr_add (dst, '\r');
+ if (eol_type != EOL_CR)
+ Dynarr_add (dst, ch);
+ }
+ else if (ch <= 0x7f)
{
- Dynarr_add (dst, code);
+ Dynarr_add (dst, ch);
}
- else if ( code <= 0x7ff )
+ else if (ch <= 0x7ff)
{
- Dynarr_add (dst, (code >> 6) | 0xc0);
- Dynarr_add (dst, (code & 0x3f) | 0x80);
+ Dynarr_add (dst, (ch >> 6) | 0xc0);
+ Dynarr_add (dst, (ch & 0x3f) | 0x80);
}
- else if ( code <= 0xffff )
+ else if (ch <= 0xffff)
{
- Dynarr_add (dst, (code >> 12) | 0xe0);
- Dynarr_add (dst, ((code >> 6) & 0x3f) | 0x80);
- Dynarr_add (dst, (code & 0x3f) | 0x80);
+ Dynarr_add (dst, (ch >> 12) | 0xe0);
+ Dynarr_add (dst, ((ch >> 6) & 0x3f) | 0x80);
+ Dynarr_add (dst, (ch & 0x3f) | 0x80);
}
- else if ( code <= 0x1fffff )
+ else if (ch <= 0x1fffff)
{
- Dynarr_add (dst, (code >> 18) | 0xf0);
- Dynarr_add (dst, ((code >> 12) & 0x3f) | 0x80);
- Dynarr_add (dst, ((code >> 6) & 0x3f) | 0x80);
- Dynarr_add (dst, (code & 0x3f) | 0x80);
+ Dynarr_add (dst, (ch >> 18) | 0xf0);
+ Dynarr_add (dst, ((ch >> 12) & 0x3f) | 0x80);
+ Dynarr_add (dst, ((ch >> 6) & 0x3f) | 0x80);
+ Dynarr_add (dst, (ch & 0x3f) | 0x80);
}
- else if ( code <= 0x3ffffff )
+ else if (ch <= 0x3ffffff)
{
- Dynarr_add (dst, (code >> 24) | 0xf8);
- Dynarr_add (dst, ((code >> 18) & 0x3f) | 0x80);
- Dynarr_add (dst, ((code >> 12) & 0x3f) | 0x80);
- Dynarr_add (dst, ((code >> 6) & 0x3f) | 0x80);
- Dynarr_add (dst, (code & 0x3f) | 0x80);
+ Dynarr_add (dst, (ch >> 24) | 0xf8);
+ Dynarr_add (dst, ((ch >> 18) & 0x3f) | 0x80);
+ Dynarr_add (dst, ((ch >> 12) & 0x3f) | 0x80);
+ Dynarr_add (dst, ((ch >> 6) & 0x3f) | 0x80);
+ Dynarr_add (dst, (ch & 0x3f) | 0x80);
}
else
{
- Dynarr_add (dst, (code >> 30) | 0xfc);
- Dynarr_add (dst, ((code >> 24) & 0x3f) | 0x80);
- Dynarr_add (dst, ((code >> 18) & 0x3f) | 0x80);
- Dynarr_add (dst, ((code >> 12) & 0x3f) | 0x80);
- Dynarr_add (dst, ((code >> 6) & 0x3f) | 0x80);
- Dynarr_add (dst, (code & 0x3f) | 0x80);
+ Dynarr_add (dst, (ch >> 30) | 0xfc);
+ Dynarr_add (dst, ((ch >> 24) & 0x3f) | 0x80);
+ Dynarr_add (dst, ((ch >> 18) & 0x3f) | 0x80);
+ Dynarr_add (dst, ((ch >> 12) & 0x3f) | 0x80);
+ Dynarr_add (dst, ((ch >> 6) & 0x3f) | 0x80);
+ Dynarr_add (dst, (ch & 0x3f) | 0x80);
}
}
charset = new_charset;
}
-#ifndef UTF2000
+#ifdef UTF2000
+ if (XCHARSET_DIMENSION (charset) == 1)
+ {
+ DECODE_OUTPUT_PARTIAL_CHAR (ch);
+ DECODE_ADD_UCS_CHAR
+ (MAKE_CHAR (charset, c & 0x7F, 0), dst);
+ }
+ else if (ch)
+ {
+ DECODE_ADD_UCS_CHAR
+ (MAKE_CHAR (charset, ch & 0x7F, c & 0x7F), dst);
+ ch = 0;
+ }
+ else
+ ch = c;
+#else
lb = XCHARSET_LEADING_BYTE (charset);
-#endif
switch (XCHARSET_REP_BYTES (charset))
{
case 1: /* ASCII */
case 2: /* one-byte official */
DECODE_OUTPUT_PARTIAL_CHAR (ch);
-#ifdef UTF2000
- DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, c & 0x7F, 0), dst);
-#else
Dynarr_add (dst, lb);
Dynarr_add (dst, c | 0x80);
-#endif
break;
case 3: /* one-byte private or two-byte official */
-#ifdef UTF2000
- if (XCHARSET_DIMENSION (charset) == 1)
-#else
if (XCHARSET_PRIVATE_P (charset))
-#endif
{
DECODE_OUTPUT_PARTIAL_CHAR (ch);
-#ifdef UTF2000
- DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset, c & 0x7F, 0),
- dst);
-#else
Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_1);
Dynarr_add (dst, lb);
Dynarr_add (dst, c | 0x80);
-#endif
}
else
{
if (ch)
{
-#ifdef UTF2000
- DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset,
- ch & 0x7F,
- c & 0x7F), dst);
-#else
Dynarr_add (dst, lb);
Dynarr_add (dst, ch | 0x80);
Dynarr_add (dst, c | 0x80);
-#endif
ch = 0;
}
else
default: /* two-byte private */
if (ch)
{
-#ifdef UTF2000
- DECODE_ADD_UCS_CHAR(MAKE_CHAR(charset,
- ch & 0x7F,
- c & 0x7F), dst);
-#else
Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_2);
Dynarr_add (dst, lb);
Dynarr_add (dst, ch | 0x80);
Dynarr_add (dst, c | 0x80);
-#endif
ch = 0;
}
else
ch = c;
}
+#endif
}
if (!ch)
Lisp_Coding_System* codesys = str->codesys;
eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
int i;
- Lisp_Object charset;
- int half;
+ Lisp_Object charset = str->iso2022.current_charset;
+ int half = str->iso2022.current_half;
unsigned int byte1, byte2;
if (ch <= 0x7F)
abort ();
}
}
+ str->iso2022.current_charset = charset;
+ str->iso2022.current_half = half;
}
void
{
c = *src++;
#ifdef UTF2000
- switch (char_boundary)
+ if (char_boundary == 0)
+ if ( c >= 0xfc )
+ {
+ ch = c & 0x01;
+ char_boundary = 5;
+ }
+ else if ( c >= 0xf8 )
+ {
+ ch = c & 0x03;
+ char_boundary = 4;
+ }
+ else if ( c >= 0xf0 )
+ {
+ ch = c & 0x07;
+ char_boundary = 3;
+ }
+ else if ( c >= 0xe0 )
+ {
+ ch = c & 0x0f;
+ char_boundary = 2;
+ }
+ else if ( c >= 0xc0 )
+ {
+ ch = c & 0x1f;
+ char_boundary = 1;
+ }
+ else
+ {
+ ch = 0;
+ if (c == '\n')
+ {
+ if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
+ Dynarr_add (dst, '\r');
+ if (eol_type != EOL_CR)
+ Dynarr_add (dst, c);
+ }
+ else
+ Dynarr_add (dst, c);
+ char_boundary = 0;
+ }
+ else if (char_boundary == 1)
{
- case 0:
- if ( c >= 0xfc )
- {
- ch = c & 0x01;
- char_boundary = 5;
- }
- else if ( c >= 0xf8 )
- {
- ch = c & 0x03;
- char_boundary = 4;
- }
- else if ( c >= 0xf0 )
- {
- ch = c & 0x07;
- char_boundary = 3;
- }
- else if ( c >= 0xe0 )
- {
- ch = c & 0x0f;
- char_boundary = 2;
- }
- else if ( c >= 0xc0 )
- {
- ch = c & 0x1f;
- char_boundary = 1;
- }
- else
- {
- ch = 0;
-
- if (c == '\n')
- {
- if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
- Dynarr_add (dst, '\r');
- if (eol_type != EOL_CR)
- Dynarr_add (dst, c);
- }
- else
- Dynarr_add (dst, c);
- char_boundary = 0;
- }
- break;
- case 1:
ch = ( ch << 6 ) | ( c & 0x3f );
- switch ( str->codesys->fixed.size )
- {
- case 1:
- Dynarr_add (dst, ch & 0xff);
- break;
- case 2:
- Dynarr_add (dst, (ch >> 8) & 0xff);
- Dynarr_add (dst, ch & 0xff);
- break;
- case 3:
- Dynarr_add (dst, (ch >> 16) & 0xff);
- Dynarr_add (dst, (ch >> 8) & 0xff);
- Dynarr_add (dst, ch & 0xff);
- break;
- case 4:
- Dynarr_add (dst, (ch >> 24) & 0xff);
- Dynarr_add (dst, (ch >> 16) & 0xff);
- Dynarr_add (dst, (ch >> 8) & 0xff);
- Dynarr_add (dst, ch & 0xff);
- break;
- default:
- fprintf(stderr, "It seems %d bytes stream.\n",
- str->codesys->fixed.size);
- abort ();
- }
+ Dynarr_add (dst, ch & 0xff);
char_boundary = 0;
- break;
- default:
+ }
+ else
+ {
ch = ( ch << 6 ) | ( c & 0x3f );
char_boundary--;
}
DEFSUBR (Fencode_shift_jis_char);
DEFSUBR (Fdecode_big5_char);
DEFSUBR (Fencode_big5_char);
- DEFSUBR (Fset_ucs_char);
- DEFSUBR (Fucs_char);
- DEFSUBR (Fset_char_ucs);
- DEFSUBR (Fchar_ucs);
#endif /* MULE */
defsymbol (&Qcoding_systemp, "coding-system-p");
defsymbol (&Qno_conversion, "no-conversion");
fcd->coding_category_system[CODING_CATEGORY_UTF8]
= Fget_coding_system (Qutf8);
#endif
-
-#if defined(MULE) && !defined(UTF2000)
- {
- size_t i;
-
- for (i = 0; i < countof (fcd->ucs_to_mule_table); i++)
- fcd->ucs_to_mule_table[i] = Qnil;
- }
- staticpro (&mule_to_ucs_table);
- mule_to_ucs_table = Fmake_char_table(Qgeneric);
-#endif /* defined(MULE) && !defined(UTF2000) */
}