/* Header for UCS-4 character representation.
- Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
+ Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko
This file is part of XEmacs.
#define CHAR_ASCII_P(ch) ((ch) <= 0x7F)
+\f
+/************************************************************************/
+/* Exported functions */
+/************************************************************************/
+
+extern Lisp_Object Vcharset_ucs;
extern Lisp_Object Vcharset_latin_jisx0201;
+extern Lisp_Object Vcharset_chinese_big5;
+extern Lisp_Object Vcharset_chinese_big5_1;
+extern Lisp_Object Vcharset_chinese_big5_2;
+extern Lisp_Object Vcharset_japanese_jisx0208;
+extern Lisp_Object Vcharset_japanese_jisx0208_1990;
+extern Lisp_Object Vcharset_japanese_jisx0212;
+
+EXFUN (Fget_charset, 1);
+
+extern Lisp_Object Qucs;
+
+Lisp_Object put_char_ccs_code_point (Lisp_Object character,
+ Lisp_Object ccs, Lisp_Object value);
+Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
\f
/************************************************************************/
typedef short Charset_ID;
#define MIN_LEADING_BYTE -0x200
-#define CHARSET_ID_OFFSET 0x00
/* ISO/IEC 10646 */
-#define LEADING_BYTE_UCS (CHARSET_ID_OFFSET - 1)
+#define LEADING_BYTE_UCS (MIN_LEADING_BYTE + 1)
/* represent normal 80-9F */
-#define LEADING_BYTE_CONTROL_1 (CHARSET_ID_OFFSET - 2)
+#define LEADING_BYTE_CONTROL_1 (MIN_LEADING_BYTE + 2)
/* ISO/IEC 10646 BMP */
-#define LEADING_BYTE_UCS_BMP (CHARSET_ID_OFFSET - 3)
+#define LEADING_BYTE_UCS_BMP (MIN_LEADING_BYTE + 3)
/* ISO/IEC 10646 SMP */
-#define LEADING_BYTE_UCS_SMP (CHARSET_ID_OFFSET - 4)
+#define LEADING_BYTE_UCS_SMP (MIN_LEADING_BYTE + 4)
/* ISO/IEC 10646 SIP */
-#define LEADING_BYTE_UCS_SIP (CHARSET_ID_OFFSET - 5)
+#define LEADING_BYTE_UCS_SIP (MIN_LEADING_BYTE + 5)
/* ISO/IEC 10646 for CNS */
-#define LEADING_BYTE_UCS_CNS (CHARSET_ID_OFFSET - 6)
+#define LEADING_BYTE_UCS_CNS (MIN_LEADING_BYTE + 6)
/* ISO/IEC 10646 for JIS */
-#define LEADING_BYTE_UCS_JIS (CHARSET_ID_OFFSET - 7)
+#define LEADING_BYTE_UCS_JIS (MIN_LEADING_BYTE + 7)
/* ISO/IEC 10646 for KS */
-#define LEADING_BYTE_UCS_KS (CHARSET_ID_OFFSET - 8)
+#define LEADING_BYTE_UCS_KS (MIN_LEADING_BYTE + 8)
/* ISO/IEC 10646 for Big5 */
-#define LEADING_BYTE_UCS_BIG5 (CHARSET_ID_OFFSET - 9)
+#define LEADING_BYTE_UCS_BIG5 (MIN_LEADING_BYTE + 9)
/* Japanese JIS X0208-1990 2/4 2/{(8),9,10,11} 4/2 (B) */
-#define LEADING_BYTE_JAPANESE_JISX0208_1990 (CHARSET_ID_OFFSET - 10)
+#define LEADING_BYTE_JAPANESE_JISX0208_1990 (MIN_LEADING_BYTE + 10)
/* Chinese GB 12345-1990 */
-#define LEADING_BYTE_CHINESE_GB12345 (CHARSET_ID_OFFSET - 11)
+#define LEADING_BYTE_CHINESE_GB12345 (MIN_LEADING_BYTE + 11)
-#define LEADING_BYTE_CHINESE_BIG5 (CHARSET_ID_OFFSET - 20)
+#define LEADING_BYTE_CHINESE_BIG5 (MIN_LEADING_BYTE + 20)
/* Big5 Level 1 2/4 2/{(8),9,10,11} 4/0 '0' */
-#define LEADING_BYTE_CHINESE_BIG5_1 (CHARSET_ID_OFFSET - 21)
+#define LEADING_BYTE_CHINESE_BIG5_1 (MIN_LEADING_BYTE + 21)
/* Big5 Level 2 2/4 2/{(8),9,10,11} 4/0 '1' */
-#define LEADING_BYTE_CHINESE_BIG5_2 (CHARSET_ID_OFFSET - 22)
+#define LEADING_BYTE_CHINESE_BIG5_2 (MIN_LEADING_BYTE + 22)
/* VISCII 1.1 */
-#define LEADING_BYTE_LATIN_VISCII (CHARSET_ID_OFFSET - 24)
+#define LEADING_BYTE_LATIN_VISCII (MIN_LEADING_BYTE + 24)
/* MULE VISCII-LOWER (CHARSET_ID_OFFSET_96 + '1') */
-#define LEADING_BYTE_LATIN_VISCII_LOWER (CHARSET_ID_OFFSET - 25)
+#define LEADING_BYTE_LATIN_VISCII_LOWER (MIN_LEADING_BYTE + 25)
/* MULE VISCII-UPPER (CHARSET_ID_OFFSET_96 + '2') */
-#define LEADING_BYTE_LATIN_VISCII_UPPER (CHARSET_ID_OFFSET - 26)
-
-#define LEADING_BYTE_ETHIOPIC_UCS (CHARSET_ID_OFFSET - 27)
-
-#define LEADING_BYTE_DAIKANWA_0 (CHARSET_ID_OFFSET - 28)
-#define LEADING_BYTE_DAIKANWA_1 (CHARSET_ID_OFFSET - 29)
-#define LEADING_BYTE_DAIKANWA_2 (CHARSET_ID_OFFSET - 30)
-#define LEADING_BYTE_DAIKANWA_3 (CHARSET_ID_OFFSET - 31)
-
-#define LEADING_BYTE_GT (CHARSET_ID_OFFSET - 40)
-#define LEADING_BYTE_GT_PJ_1 (CHARSET_ID_OFFSET - 41)
-#define LEADING_BYTE_GT_PJ_2 (CHARSET_ID_OFFSET - 42)
-#define LEADING_BYTE_GT_PJ_3 (CHARSET_ID_OFFSET - 43)
-#define LEADING_BYTE_GT_PJ_4 (CHARSET_ID_OFFSET - 44)
-#define LEADING_BYTE_GT_PJ_5 (CHARSET_ID_OFFSET - 45)
-#define LEADING_BYTE_GT_PJ_6 (CHARSET_ID_OFFSET - 46)
-#define LEADING_BYTE_GT_PJ_7 (CHARSET_ID_OFFSET - 47)
-#define LEADING_BYTE_GT_PJ_8 (CHARSET_ID_OFFSET - 48)
-#define LEADING_BYTE_GT_PJ_9 (CHARSET_ID_OFFSET - 49)
-#define LEADING_BYTE_GT_PJ_10 (CHARSET_ID_OFFSET - 50)
-#define LEADING_BYTE_GT_PJ_11 (CHARSET_ID_OFFSET - 51)
-
-#define LEADING_BYTE_CHINA3_JEF (CHARSET_ID_OFFSET - 82)
-#define LEADING_BYTE_CBETA (CHARSET_ID_OFFSET - 83)
-#define LEADING_BYTE_CHINESE_BIG5_CDP (CHARSET_ID_OFFSET - 84)
-#define LEADING_BYTE_HANZIKU_1 (CHARSET_ID_OFFSET - 85)
-#define LEADING_BYTE_HANZIKU_2 (CHARSET_ID_OFFSET - 86)
-#define LEADING_BYTE_HANZIKU_3 (CHARSET_ID_OFFSET - 87)
-#define LEADING_BYTE_HANZIKU_4 (CHARSET_ID_OFFSET - 88)
-#define LEADING_BYTE_HANZIKU_5 (CHARSET_ID_OFFSET - 89)
-#define LEADING_BYTE_HANZIKU_6 (CHARSET_ID_OFFSET - 90)
-#define LEADING_BYTE_HANZIKU_7 (CHARSET_ID_OFFSET - 91)
-#define LEADING_BYTE_HANZIKU_8 (CHARSET_ID_OFFSET - 92)
-#define LEADING_BYTE_HANZIKU_9 (CHARSET_ID_OFFSET - 93)
-#define LEADING_BYTE_HANZIKU_10 (CHARSET_ID_OFFSET - 94)
-#define LEADING_BYTE_HANZIKU_11 (CHARSET_ID_OFFSET - 95)
-#define LEADING_BYTE_HANZIKU_12 (CHARSET_ID_OFFSET - 96)
-
-#define MIN_LEADING_BYTE_PRIVATE MIN_LEADING_BYTE
-#define MAX_LEADING_BYTE_PRIVATE (CHARSET_ID_OFFSET - 97)
-
-
-/* #define CHARSET_ID_OFFSET_94 (CHARSET_ID_OFFSET - '0') */
-
-/* #define MIN_CHARSET_ID_PRIVATE_94 (CHARSET_ID_OFFSET_94 + '0') */
-/* #define MAX_CHARSET_ID_PRIVATE_94 (CHARSET_ID_OFFSET_94 + '?') */
+#define LEADING_BYTE_LATIN_VISCII_UPPER (MIN_LEADING_BYTE + 26)
+
+#define LEADING_BYTE_ETHIOPIC_UCS (MIN_LEADING_BYTE + 27)
+
+#define LEADING_BYTE_DAIKANWA_0 (MIN_LEADING_BYTE + 28)
+#define LEADING_BYTE_DAIKANWA_1 (MIN_LEADING_BYTE + 29)
+#define LEADING_BYTE_DAIKANWA_2 (MIN_LEADING_BYTE + 30)
+#define LEADING_BYTE_DAIKANWA_3 (MIN_LEADING_BYTE + 31)
+
+#define LEADING_BYTE_GT (MIN_LEADING_BYTE + 40)
+#define LEADING_BYTE_GT_PJ_1 (MIN_LEADING_BYTE + 41)
+#define LEADING_BYTE_GT_PJ_2 (MIN_LEADING_BYTE + 42)
+#define LEADING_BYTE_GT_PJ_3 (MIN_LEADING_BYTE + 43)
+#define LEADING_BYTE_GT_PJ_4 (MIN_LEADING_BYTE + 44)
+#define LEADING_BYTE_GT_PJ_5 (MIN_LEADING_BYTE + 45)
+#define LEADING_BYTE_GT_PJ_6 (MIN_LEADING_BYTE + 46)
+#define LEADING_BYTE_GT_PJ_7 (MIN_LEADING_BYTE + 47)
+#define LEADING_BYTE_GT_PJ_8 (MIN_LEADING_BYTE + 48)
+#define LEADING_BYTE_GT_PJ_9 (MIN_LEADING_BYTE + 49)
+#define LEADING_BYTE_GT_PJ_10 (MIN_LEADING_BYTE + 50)
+#define LEADING_BYTE_GT_PJ_11 (MIN_LEADING_BYTE + 51)
+
+#define LEADING_BYTE_CHINA3_JEF (MIN_LEADING_BYTE + 82)
+#define LEADING_BYTE_CBETA (MIN_LEADING_BYTE + 83)
+/* #define LEADING_BYTE_CHINESE_BIG5_CDP (MIN_LEADING_BYTE + 84) */
+#define LEADING_BYTE_HANZIKU_1 (MIN_LEADING_BYTE + 85)
+#define LEADING_BYTE_HANZIKU_2 (MIN_LEADING_BYTE + 86)
+#define LEADING_BYTE_HANZIKU_3 (MIN_LEADING_BYTE + 87)
+#define LEADING_BYTE_HANZIKU_4 (MIN_LEADING_BYTE + 88)
+#define LEADING_BYTE_HANZIKU_5 (MIN_LEADING_BYTE + 89)
+#define LEADING_BYTE_HANZIKU_6 (MIN_LEADING_BYTE + 90)
+#define LEADING_BYTE_HANZIKU_7 (MIN_LEADING_BYTE + 91)
+#define LEADING_BYTE_HANZIKU_8 (MIN_LEADING_BYTE + 92)
+#define LEADING_BYTE_HANZIKU_9 (MIN_LEADING_BYTE + 93)
+#define LEADING_BYTE_HANZIKU_10 (MIN_LEADING_BYTE + 94)
+#define LEADING_BYTE_HANZIKU_11 (MIN_LEADING_BYTE + 95)
+#define LEADING_BYTE_HANZIKU_12 (MIN_LEADING_BYTE + 96)
+
+#define MIN_LEADING_BYTE_PRIVATE (MIN_LEADING_BYTE + 97)
+#define MAX_LEADING_BYTE_PRIVATE -1
+
/* ISO 646 IRV */
#define LEADING_BYTE_ASCII 6 /* (CHARSET_ID_OFFSET_94 + 'B') */
#define CHARSET_MOTHER(cs) ((cs)->mother)
#define CHARSET_CONVERSION(cs) ((cs)->conversion)
+INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
+INLINE_HEADER int
+CHARSET_BYTE_SIZE (Lisp_Charset* cs)
+{
+ /* ad-hoc method for `ascii' */
+ if ((CHARSET_CHARS (cs) == 94) &&
+ (CHARSET_BYTE_OFFSET (cs) != 33))
+ return 128 - CHARSET_BYTE_OFFSET (cs);
+ else
+ return CHARSET_CHARS (cs);
+}
+
INLINE_HEADER Lisp_Object CHARSET_ENCODING_TABLE (Lisp_Charset* cs);
INLINE_HEADER Lisp_Object
CHARSET_ENCODING_TABLE (Lisp_Charset* cs)
#define XCHARSET_CCL_PROGRAM(cs) CHARSET_CCL_PROGRAM (XCHARSET (cs))
#define XCHARSET_DIMENSION(cs) CHARSET_DIMENSION (XCHARSET (cs))
#define XCHARSET_CHARS(cs) CHARSET_CHARS (XCHARSET (cs))
+#define XCHARSET_BYTE_SIZE(cs) CHARSET_BYTE_SIZE (XCHARSET (cs))
#define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \
CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs))
#define XCHARSET_DECODING_TABLE(cs) CHARSET_DECODING_TABLE(XCHARSET(cs))
Emchar decode_builtin_char (Lisp_Object charset, int code_point);
-extern Lisp_Object Vcharset_chinese_big5;
-extern Lisp_Object Vcharset_chinese_big5_1;
-extern Lisp_Object Vcharset_chinese_big5_2;
-
-INLINE_HEADER Emchar
-DECODE_DEFINED_CHAR (Lisp_Object charset, int code_point);
-INLINE_HEADER Emchar
-DECODE_DEFINED_CHAR (Lisp_Object charset, int code_point)
+INLINE_HEADER Lisp_Object
+get_ccs_octet_table (Lisp_Object table, Lisp_Object ccs, int code);
+INLINE_HEADER Lisp_Object
+get_ccs_octet_table (Lisp_Object table, Lisp_Object ccs, int code)
{
- int dim = XCHARSET_DIMENSION (charset);
- Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (charset);
- int idx;
- Lisp_Object ch;
+ int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
- while (dim > 0)
+ if (VECTORP (table))
{
- dim--;
- if ( VECTORP (decoding_table)
- && ( 0 <= (idx = ((code_point >> (dim * 8))
- & 255) - XCHARSET_BYTE_OFFSET (charset)) )
- && ( idx < XVECTOR_LENGTH (decoding_table) )
- && !NILP (ch = XVECTOR_DATA(decoding_table)[idx]) )
- {
- if (CHARP (ch))
- return XCHAR (ch);
- else
- decoding_table = ch;
- }
+ int idx = code - byte_offset;
+
+ if (idx < XVECTOR_LENGTH(table))
+ return XVECTOR_DATA(table)[idx];
else
- break;
+ return Qunbound;
}
- return -1;
+ else
+ return table;
}
+INLINE_HEADER Lisp_Object
+put_ccs_octet_table (Lisp_Object table, Lisp_Object ccs, int code,
+ Lisp_Object value);
+INLINE_HEADER Lisp_Object
+put_ccs_octet_table (Lisp_Object table, Lisp_Object ccs, int code,
+ Lisp_Object value)
+{
+ int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
+ int ccs_len = XCHARSET_BYTE_SIZE (ccs);
+
+ if (VECTORP (table))
+ {
+ XVECTOR_DATA(table)[code - byte_offset] = value;
+ return table;
+ }
+ else if (EQ (table, value))
+ return table;
+ else
+ {
+ table = make_vector (ccs_len, table);
+ XVECTOR_DATA(table)[code - byte_offset] = value;
+ return table;
+ }
+}
+
+INLINE_HEADER void
+decoding_table_put_char (Lisp_Object ccs,
+ int code_point, Lisp_Object character);
+INLINE_HEADER void
+decoding_table_put_char (Lisp_Object ccs,
+ int code_point, Lisp_Object character)
+{
+ Lisp_Object table1 = XCHARSET_DECODING_TABLE (ccs);
+ int dim = XCHARSET_DIMENSION (ccs);
+
+ if (dim == 1)
+ XCHARSET_DECODING_TABLE (ccs)
+ = put_ccs_octet_table (table1, ccs, code_point, character);
+ else if (dim == 2)
+ {
+ Lisp_Object table2
+ = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 8));
+
+ table2 = put_ccs_octet_table (table2, ccs,
+ (unsigned char)code_point, character);
+ XCHARSET_DECODING_TABLE (ccs)
+ = put_ccs_octet_table (table1, ccs,
+ (unsigned char)(code_point >> 8), table2);
+ }
+ else if (dim == 3)
+ {
+ Lisp_Object table2
+ = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 16));
+ Lisp_Object table3
+ = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 8));
+
+ table3 = put_ccs_octet_table (table3, ccs,
+ (unsigned char)code_point, character);
+ table2 = put_ccs_octet_table (table2, ccs,
+ (unsigned char)(code_point >> 8), table3);
+ XCHARSET_DECODING_TABLE (ccs)
+ = put_ccs_octet_table (table1, ccs,
+ (unsigned char)(code_point >> 16), table2);
+ }
+ else /* if (dim == 4) */
+ {
+ Lisp_Object table2
+ = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 24));
+ Lisp_Object table3
+ = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 16));
+ Lisp_Object table4
+ = get_ccs_octet_table (table3, ccs, (unsigned char)(code_point >> 8));
+
+ table4 = put_ccs_octet_table (table4, ccs,
+ (unsigned char)code_point, character);
+ table3 = put_ccs_octet_table (table3, ccs,
+ (unsigned char)(code_point >> 8), table4);
+ table2 = put_ccs_octet_table (table2, ccs,
+ (unsigned char)(code_point >> 16), table3);
+ XCHARSET_DECODING_TABLE (ccs)
+ = put_ccs_octet_table (table1, ccs,
+ (unsigned char)(code_point >> 24), table2);
+ }
+}
+
+INLINE_HEADER void
+decoding_table_remove_char (Lisp_Object ccs, int code_point);
+INLINE_HEADER void
+decoding_table_remove_char (Lisp_Object ccs, int code_point)
+{
+ decoding_table_put_char (ccs, code_point, Qunbound);
+}
+
+Emchar decode_defined_char (Lisp_Object charset, int code_point);
+
INLINE_HEADER Emchar DECODE_CHAR (Lisp_Object charset, int code_point);
INLINE_HEADER Emchar
DECODE_CHAR (Lisp_Object charset, int code_point)
{
- Emchar char_id = DECODE_DEFINED_CHAR (charset, code_point);
+ Emchar char_id = decode_defined_char (charset, code_point);
if (char_id >= 0)
return char_id;
return cc;
}
-\f
-/************************************************************************/
-/* Exported functions */
-/************************************************************************/
-
-EXFUN (Fget_charset, 1);
-
-extern Lisp_Object Qucs;
-
-extern Lisp_Object Vcharset_japanese_jisx0208;
-extern Lisp_Object Vcharset_japanese_jisx0208_1990;
-extern Lisp_Object Vcharset_japanese_jisx0212;
-
-Lisp_Object put_char_ccs_code_point (Lisp_Object character,
- Lisp_Object ccs, Lisp_Object value);
-Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
-
#endif /* INCLUDED_char_ucs_h_ */