/* Header for UCS-4 character representation.
- Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko
+ Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko
This file is part of XEmacs.
#define CHAR_ASCII_P(ch) ((ch) <= 0x7F)
+\f
+/************************************************************************/
+/* Exported functions */
+/************************************************************************/
+
+extern Lisp_Object Vcharset_ucs;
extern Lisp_Object Vcharset_latin_jisx0201;
+extern Lisp_Object Vcharset_chinese_big5;
+extern Lisp_Object Vcharset_chinese_big5_1;
+extern Lisp_Object Vcharset_chinese_big5_2;
+extern Lisp_Object Vcharset_japanese_jisx0208;
+extern Lisp_Object Vcharset_japanese_jisx0208_1990;
+extern Lisp_Object Vcharset_japanese_jisx0212;
+
+EXFUN (Fget_charset, 1);
+
+extern Lisp_Object Qucs;
+
+Lisp_Object put_char_ccs_code_point (Lisp_Object character,
+ Lisp_Object ccs, Lisp_Object value);
+Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
\f
/************************************************************************/
/* ISO/IEC 10646 SIP */
#define LEADING_BYTE_UCS_SIP (MIN_LEADING_BYTE + 5)
+/* ISO/IEC 10646 for GB */
+#define LEADING_BYTE_UCS_GB (MIN_LEADING_BYTE + 6)
+
/* ISO/IEC 10646 for CNS */
-#define LEADING_BYTE_UCS_CNS (MIN_LEADING_BYTE + 6)
+#define LEADING_BYTE_UCS_CNS (MIN_LEADING_BYTE + 7)
/* ISO/IEC 10646 for JIS */
-#define LEADING_BYTE_UCS_JIS (MIN_LEADING_BYTE + 7)
+#define LEADING_BYTE_UCS_JIS (MIN_LEADING_BYTE + 8)
/* ISO/IEC 10646 for KS */
-#define LEADING_BYTE_UCS_KS (MIN_LEADING_BYTE + 8)
+#define LEADING_BYTE_UCS_KS (MIN_LEADING_BYTE + 9)
-/* ISO/IEC 10646 for Big5 */
-#define LEADING_BYTE_UCS_BIG5 (MIN_LEADING_BYTE + 9)
+/* Japanese JIS X0208 Common 2/4 2/{(8),9,10,11} 4/2 (B) */
+#define LEADING_BYTE_JIS_X0208 (MIN_LEADING_BYTE + 11)
/* Japanese JIS X0208-1990 2/4 2/{(8),9,10,11} 4/2 (B) */
-#define LEADING_BYTE_JAPANESE_JISX0208_1990 (MIN_LEADING_BYTE + 10)
+#define LEADING_BYTE_JAPANESE_JISX0208_1990 (MIN_LEADING_BYTE + 12)
/* Chinese GB 12345-1990 */
-#define LEADING_BYTE_CHINESE_GB12345 (MIN_LEADING_BYTE + 11)
+#define LEADING_BYTE_CHINESE_GB12345 (MIN_LEADING_BYTE + 13)
#define LEADING_BYTE_CHINESE_BIG5 (MIN_LEADING_BYTE + 20)
#define LEADING_BYTE_DAIKANWA_2 (MIN_LEADING_BYTE + 30)
#define LEADING_BYTE_DAIKANWA_3 (MIN_LEADING_BYTE + 31)
-#define LEADING_BYTE_GT (MIN_LEADING_BYTE + 40)
#define LEADING_BYTE_GT_PJ_1 (MIN_LEADING_BYTE + 41)
#define LEADING_BYTE_GT_PJ_2 (MIN_LEADING_BYTE + 42)
#define LEADING_BYTE_GT_PJ_3 (MIN_LEADING_BYTE + 43)
#define LEADING_BYTE_GT_PJ_10 (MIN_LEADING_BYTE + 50)
#define LEADING_BYTE_GT_PJ_11 (MIN_LEADING_BYTE + 51)
-#define LEADING_BYTE_CHINA3_JEF (MIN_LEADING_BYTE + 82)
-#define LEADING_BYTE_CBETA (MIN_LEADING_BYTE + 83)
/* #define LEADING_BYTE_CHINESE_BIG5_CDP (MIN_LEADING_BYTE + 84) */
#define LEADING_BYTE_HANZIKU_1 (MIN_LEADING_BYTE + 85)
#define LEADING_BYTE_HANZIKU_2 (MIN_LEADING_BYTE + 86)
#define XCHARSET(x) XRECORD (x, charset, Lisp_Charset)
#define XSETCHARSET(x, p) XSETRECORD (x, p, charset)
#define CHARSETP(x) RECORDP (x, charset)
-#define GC_CHARSETP(x) GC_RECORDP (x, charset)
#define CHECK_CHARSET(x) CHECK_RECORD (x, charset)
#define CONCHECK_CHARSET(x) CONCHECK_RECORD (x, charset)
#define CHARSET_MOTHER(cs) ((cs)->mother)
#define CHARSET_CONVERSION(cs) ((cs)->conversion)
+INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
+INLINE_HEADER int
+CHARSET_BYTE_SIZE (Lisp_Charset* cs)
+{
+ /* ad-hoc method for `ascii' */
+ if ((CHARSET_CHARS (cs) == 94) &&
+ (CHARSET_BYTE_OFFSET (cs) != 33))
+ return 128 - CHARSET_BYTE_OFFSET (cs);
+ else
+ return CHARSET_CHARS (cs);
+}
+
INLINE_HEADER Lisp_Object CHARSET_ENCODING_TABLE (Lisp_Charset* cs);
INLINE_HEADER Lisp_Object
CHARSET_ENCODING_TABLE (Lisp_Charset* cs)
}
#define CONVERSION_IDENTICAL 0
-#define CONVERSION_94x60 1
-#define CONVERSION_94x94x60 2
+#define CONVERSION_94 1
+#define CONVERSION_96 2
+#define CONVERSION_94x60 3
+#define CONVERSION_94x94 4
+#define CONVERSION_96x96 5
+#define CONVERSION_94x94x60 6
+#define CONVERSION_94x94x94 7
+#define CONVERSION_96x96x96 8
+#define CONVERSION_94x94x94x60 9
+#define CONVERSION_94x94x94x94 10
+#define CONVERSION_96x96x96x96 11
#define XCHARSET_ID(cs) CHARSET_ID (XCHARSET (cs))
#define XCHARSET_NAME(cs) CHARSET_NAME (XCHARSET (cs))
#define XCHARSET_CCL_PROGRAM(cs) CHARSET_CCL_PROGRAM (XCHARSET (cs))
#define XCHARSET_DIMENSION(cs) CHARSET_DIMENSION (XCHARSET (cs))
#define XCHARSET_CHARS(cs) CHARSET_CHARS (XCHARSET (cs))
+#define XCHARSET_BYTE_SIZE(cs) CHARSET_BYTE_SIZE (XCHARSET (cs))
#define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \
CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs))
#define XCHARSET_DECODING_TABLE(cs) CHARSET_DECODING_TABLE(XCHARSET(cs))
#define MIN_CHAR_DAIKANWA 0x00E00000
#define MAX_CHAR_DAIKANWA (MIN_CHAR_DAIKANWA + 50100) /* 0xE0FFFF */
+/*
#define MIN_CHAR_CBETA 0x00E20000
#define MAX_CHAR_CBETA 0x00E2FFFF
+
#define MIN_CHAR_CHINA3_JEF 0x00E80000
#define MAX_CHAR_CHINA3_JEF 0x00E8FFFF
+*/
#define MIN_CHAR_94 0x00E90940
#define MAX_CHAR_94 (MIN_CHAR_94 + 94 * 80 - 1)
#define MIN_CHAR_96x96 0xF4C000
#define MAX_CHAR_96x96 (MIN_CHAR_96x96 + 96 * 96 * 80 - 1)
+/*
#define MIN_CHAR_GT 0x61000000
#define MAX_CHAR_GT (MIN_CHAR_GT + 66773)
+*/
#define MIN_CHAR_BIG5_CDP 0x62000000
#define MAX_CHAR_BIG5_CDP 0x6200FFFF
#define MIN_CHAR_HANZIKU_1 (0x62000000 + 65536 * 1)
Emchar decode_builtin_char (Lisp_Object charset, int code_point);
-extern Lisp_Object Vcharset_chinese_big5;
-extern Lisp_Object Vcharset_chinese_big5_1;
-extern Lisp_Object Vcharset_chinese_big5_2;
-
-INLINE_HEADER Emchar
-DECODE_DEFINED_CHAR (Lisp_Object charset, int code_point);
-INLINE_HEADER Emchar
-DECODE_DEFINED_CHAR (Lisp_Object charset, int code_point)
+INLINE_HEADER Lisp_Object
+get_ccs_octet_table (Lisp_Object table, Lisp_Object ccs, int code);
+INLINE_HEADER Lisp_Object
+get_ccs_octet_table (Lisp_Object table, Lisp_Object ccs, int code)
{
- int dim = XCHARSET_DIMENSION (charset);
- Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (charset);
- int idx;
- Lisp_Object ch;
+ int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
- while (dim > 0)
+ if (VECTORP (table))
{
- dim--;
- if ( VECTORP (decoding_table)
- && ( 0 <= (idx = ((code_point >> (dim * 8))
- & 255) - XCHARSET_BYTE_OFFSET (charset)) )
- && ( idx < XVECTOR_LENGTH (decoding_table) )
- && !NILP (ch = XVECTOR_DATA(decoding_table)[idx]) )
- {
- if (CHARP (ch))
- return XCHAR (ch);
- else
- decoding_table = ch;
- }
+ int idx = code - byte_offset;
+
+ if (idx < XVECTOR_LENGTH(table))
+ return XVECTOR_DATA(table)[idx];
else
- break;
+ return Qunbound;
+ }
+ else
+ return table;
+}
+
+INLINE_HEADER Lisp_Object
+put_ccs_octet_table (Lisp_Object table, Lisp_Object ccs, int code,
+ Lisp_Object value);
+INLINE_HEADER Lisp_Object
+put_ccs_octet_table (Lisp_Object table, Lisp_Object ccs, int code,
+ Lisp_Object value)
+{
+ int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
+ int ccs_len = XCHARSET_BYTE_SIZE (ccs);
+
+ if (VECTORP (table))
+ {
+ XVECTOR_DATA(table)[code - byte_offset] = value;
+ return table;
+ }
+ else if (EQ (table, value))
+ return table;
+ else
+ {
+ table = make_vector (ccs_len, table);
+ XVECTOR_DATA(table)[code - byte_offset] = value;
+ return table;
+ }
+}
+
+INLINE_HEADER void
+decoding_table_put_char (Lisp_Object ccs,
+ int code_point, Lisp_Object character);
+INLINE_HEADER void
+decoding_table_put_char (Lisp_Object ccs,
+ int code_point, Lisp_Object character)
+{
+ Lisp_Object table1 = XCHARSET_DECODING_TABLE (ccs);
+ int dim = XCHARSET_DIMENSION (ccs);
+
+ if (dim == 1)
+ XCHARSET_DECODING_TABLE (ccs)
+ = put_ccs_octet_table (table1, ccs, code_point, character);
+ else if (dim == 2)
+ {
+ Lisp_Object table2
+ = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 8));
+
+ table2 = put_ccs_octet_table (table2, ccs,
+ (unsigned char)code_point, character);
+ XCHARSET_DECODING_TABLE (ccs)
+ = put_ccs_octet_table (table1, ccs,
+ (unsigned char)(code_point >> 8), table2);
}
- return -1;
+ else if (dim == 3)
+ {
+ Lisp_Object table2
+ = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 16));
+ Lisp_Object table3
+ = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 8));
+
+ table3 = put_ccs_octet_table (table3, ccs,
+ (unsigned char)code_point, character);
+ table2 = put_ccs_octet_table (table2, ccs,
+ (unsigned char)(code_point >> 8), table3);
+ XCHARSET_DECODING_TABLE (ccs)
+ = put_ccs_octet_table (table1, ccs,
+ (unsigned char)(code_point >> 16), table2);
+ }
+ else /* if (dim == 4) */
+ {
+ Lisp_Object table2
+ = get_ccs_octet_table (table1, ccs, (unsigned char)(code_point >> 24));
+ Lisp_Object table3
+ = get_ccs_octet_table (table2, ccs, (unsigned char)(code_point >> 16));
+ Lisp_Object table4
+ = get_ccs_octet_table (table3, ccs, (unsigned char)(code_point >> 8));
+
+ table4 = put_ccs_octet_table (table4, ccs,
+ (unsigned char)code_point, character);
+ table3 = put_ccs_octet_table (table3, ccs,
+ (unsigned char)(code_point >> 8), table4);
+ table2 = put_ccs_octet_table (table2, ccs,
+ (unsigned char)(code_point >> 16), table3);
+ XCHARSET_DECODING_TABLE (ccs)
+ = put_ccs_octet_table (table1, ccs,
+ (unsigned char)(code_point >> 24), table2);
+ }
+}
+
+INLINE_HEADER void
+decoding_table_remove_char (Lisp_Object ccs, int code_point);
+INLINE_HEADER void
+decoding_table_remove_char (Lisp_Object ccs, int code_point)
+{
+ decoding_table_put_char (ccs, code_point, Qunbound);
}
+Emchar decode_defined_char (Lisp_Object charset, int code_point);
+
INLINE_HEADER Emchar DECODE_CHAR (Lisp_Object charset, int code_point);
INLINE_HEADER Emchar
DECODE_CHAR (Lisp_Object charset, int code_point)
{
- Emchar char_id = DECODE_DEFINED_CHAR (charset, code_point);
+ Emchar char_id = decode_defined_char (charset, code_point);
if (char_id >= 0)
return char_id;
extern Lisp_Object Vcharacter_attribute_table;
int encode_builtin_char_1 (Emchar c, Lisp_Object* charset);
-int charset_code_point (Lisp_Object charset, Emchar ch);
+int charset_code_point (Lisp_Object charset, Emchar ch, int defined_only);
int range_charset_code_point (Lisp_Object charset, Emchar ch);
extern Lisp_Object Vdefault_coded_charset_priority_list;
*charset = Ffind_charset (Fcar (charsets));
if (!NILP (*charset))
{
- int code_point = charset_code_point (*charset, ch);
+ int code_point = charset_code_point (*charset, ch, 0);
if (code_point >= 0)
return code_point;
return cc;
}
-\f
-/************************************************************************/
-/* Exported functions */
-/************************************************************************/
-
-EXFUN (Fget_charset, 1);
-
-extern Lisp_Object Qucs;
-
-extern Lisp_Object Vcharset_japanese_jisx0208;
-extern Lisp_Object Vcharset_japanese_jisx0208_1990;
-extern Lisp_Object Vcharset_japanese_jisx0212;
-
-Lisp_Object put_char_ccs_code_point (Lisp_Object character,
- Lisp_Object ccs, Lisp_Object value);
-Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
-
#endif /* INCLUDED_char_ucs_h_ */