#include <config.h>
#include "lisp.h"
-#include <stddef.h>
#include "buffer.h"
#include "chartab.h"
#include "lstream.h"
#include "device.h"
#include "faces.h"
+#include "mule-ccl.h"
/* The various pre-defined charsets. */
Lisp_Object Vcharset_japanese_jisx0212;
Lisp_Object Vcharset_chinese_cns11643_1;
Lisp_Object Vcharset_chinese_cns11643_2;
+#ifdef UTF2000
+Lisp_Object Vcharset_chinese_cns11643_3;
+Lisp_Object Vcharset_chinese_cns11643_4;
+Lisp_Object Vcharset_chinese_cns11643_5;
+Lisp_Object Vcharset_chinese_cns11643_6;
+Lisp_Object Vcharset_chinese_cns11643_7;
+Lisp_Object Vcharset_ucs_bmp;
+#endif
Lisp_Object Vcharset_chinese_big5_1;
Lisp_Object Vcharset_chinese_big5_2;
#endif /* ENABLE_COMPOSITE_CHARS */
/* Table of charsets indexed by leading byte. */
-Lisp_Object charset_by_leading_byte[128];
+Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
/* Table of charsets indexed by type/final-byte/direction. */
+#ifdef UTF2000
+Lisp_Object charset_by_attributes[4][128];
+#else
Lisp_Object charset_by_attributes[4][128][2];
+#endif
+#ifndef UTF2000
/* Table of number of bytes in the string representation of a character
indexed by the first byte of that representation.
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 0x80 - 0x8f are for Dimension-1 official charsets */
+#ifdef CHAR_IS_UCS4
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
+#else
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+#endif
/* 0x90 - 0x9d are for Dimension-2 official charsets */
/* 0x9e is for Dimension-1 private charsets */
/* 0x9f is for Dimension-2 private charsets */
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
};
+#endif
+
+#ifdef UTF2000
+Charset_ID latin_a_char_to_charset[128] = {
+ /* U+0100 */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+0101 */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+0102 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0103 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0104 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0105 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0106 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0107 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0108 */ LEADING_BYTE_LATIN_ISO8859_3,
+ /* U+0109 */ LEADING_BYTE_LATIN_ISO8859_3,
+ /* U+010A */ LEADING_BYTE_LATIN_ISO8859_3,
+ /* U+010B */ LEADING_BYTE_LATIN_ISO8859_3,
+ /* U+010C */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+010D */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+010E */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+010F */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0110 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0111 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0112 */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+0113 */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+0114 */ LEADING_BYTE_UCS_BMP,
+ /* U+0115 */ LEADING_BYTE_UCS_BMP,
+ /* U+0116 */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+0117 */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+0118 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0119 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+011A */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+011B */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+011C */ LEADING_BYTE_LATIN_ISO8859_3,
+ /* U+011D */ LEADING_BYTE_LATIN_ISO8859_3,
+ /* U+011E */ LEADING_BYTE_LATIN_ISO8859_3,
+ /* U+011F */ LEADING_BYTE_LATIN_ISO8859_3,
+ /* U+0120 */ LEADING_BYTE_LATIN_ISO8859_3,
+ /* U+0121 */ LEADING_BYTE_LATIN_ISO8859_3,
+ /* U+0122 */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+0123 */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+0124 */ LEADING_BYTE_LATIN_ISO8859_3,
+ /* U+0125 */ LEADING_BYTE_LATIN_ISO8859_3,
+ /* U+0126 */ LEADING_BYTE_LATIN_ISO8859_3,
+ /* U+0127 */ LEADING_BYTE_LATIN_ISO8859_3,
+ /* U+0128 */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+0129 */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+012A */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+012B */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+012C */ LEADING_BYTE_UCS_BMP,
+ /* U+012D */ LEADING_BYTE_UCS_BMP,
+ /* U+012E */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+012F */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+0130 */ LEADING_BYTE_LATIN_ISO8859_3,
+ /* U+0131 */ LEADING_BYTE_LATIN_ISO8859_3,
+ /* U+0132 */ LEADING_BYTE_JAPANESE_JISX0212,
+ /* U+0133 */ LEADING_BYTE_JAPANESE_JISX0212,
+ /* U+0134 */ LEADING_BYTE_LATIN_ISO8859_3,
+ /* U+0135 */ LEADING_BYTE_LATIN_ISO8859_3,
+ /* U+0136 */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+0137 */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+0138 */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+0139 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+013A */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+013B */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+013C */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+013D */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+013E */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+013F */ LEADING_BYTE_JAPANESE_JISX0212,
+ /* U+0140 */ LEADING_BYTE_JAPANESE_JISX0212,
+ /* U+0141 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0142 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0143 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0144 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0145 */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+0146 */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+0147 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0148 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0149 */ LEADING_BYTE_JAPANESE_JISX0212,
+ /* U+014A */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+014B */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+014C */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+014D */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+014E */ LEADING_BYTE_UCS_BMP,
+ /* U+014F */ LEADING_BYTE_UCS_BMP,
+ /* U+0150 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0151 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0152 */ LEADING_BYTE_JAPANESE_JISX0212,
+ /* U+0153 */ LEADING_BYTE_JAPANESE_JISX0212,
+ /* U+0154 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0155 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0156 */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+0157 */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+0158 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0159 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+015A */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+015B */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+015C */ LEADING_BYTE_LATIN_ISO8859_3,
+ /* U+015D */ LEADING_BYTE_LATIN_ISO8859_3,
+ /* U+015E */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+015F */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0160 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0161 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0162 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0163 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0164 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0165 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0166 */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+0167 */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+0168 */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+0169 */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+016A */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+016B */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+016C */ LEADING_BYTE_LATIN_ISO8859_3,
+ /* U+016D */ LEADING_BYTE_LATIN_ISO8859_3,
+ /* U+016E */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+016F */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0170 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0171 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+0172 */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+0173 */ LEADING_BYTE_LATIN_ISO8859_4,
+ /* U+0174 */ LEADING_BYTE_JAPANESE_JISX0212,
+ /* U+0175 */ LEADING_BYTE_JAPANESE_JISX0212,
+ /* U+0176 */ LEADING_BYTE_JAPANESE_JISX0212,
+ /* U+0177 */ LEADING_BYTE_JAPANESE_JISX0212,
+ /* U+0178 */ LEADING_BYTE_JAPANESE_JISX0212,
+ /* U+0179 */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+017A */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+017B */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+017C */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+017D */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+017E */ LEADING_BYTE_LATIN_ISO8859_2,
+ /* U+017F */ LEADING_BYTE_UCS_BMP
+};
+
+unsigned char latin_a_char_to_byte1[128] = {
+ /* U+0100 */ 0xC0 - 0x80,
+ /* U+0101 */ 0xE0 - 0x80,
+ /* U+0102 */ 0xC3 - 0x80,
+ /* U+0103 */ 0xE3 - 0x80,
+ /* U+0104 */ 0xA1 - 0x80,
+ /* U+0105 */ 0xB1 - 0x80,
+ /* U+0106 */ 0xC6 - 0x80,
+ /* U+0107 */ 0xE6 - 0x80,
+ /* U+0108 */ 0xC6 - 0x80,
+ /* U+0109 */ 0xE6 - 0x80,
+ /* U+010A */ 0xC5 - 0x80,
+ /* U+010B */ 0xE5 - 0x80,
+ /* U+010C */ 0xC8 - 0x80,
+ /* U+010D */ 0xE8 - 0x80,
+ /* U+010E */ 0xCF - 0x80,
+ /* U+010F */ 0xEF - 0x80,
+ /* U+0110 */ 0xD0 - 0x80,
+ /* U+0111 */ 0xF0 - 0x80,
+ /* U+0112 */ 0xAA - 0x80,
+ /* U+0113 */ 0xBA - 0x80,
+ /* U+0114 */ 0x01,
+ /* U+0115 */ 0x01,
+ /* U+0116 */ 0xCC - 0x80,
+ /* U+0117 */ 0xEC - 0x80,
+ /* U+0118 */ 0xCA - 0x80,
+ /* U+0119 */ 0xEA - 0x80,
+ /* U+011A */ 0xCC - 0x80,
+ /* U+011B */ 0xEC - 0x80,
+ /* U+011C */ 0xD8 - 0x80,
+ /* U+011D */ 0xF8 - 0x80,
+ /* U+011E */ 0xAB - 0x80,
+ /* U+011F */ 0xBB - 0x80,
+ /* U+0120 */ 0xD5 - 0x80,
+ /* U+0121 */ 0xF5 - 0x80,
+ /* U+0122 */ 0xAB - 0x80,
+ /* U+0123 */ 0xBB - 0x80,
+ /* U+0124 */ 0xA6 - 0x80,
+ /* U+0125 */ 0xB6 - 0x80,
+ /* U+0126 */ 0xA1 - 0x80,
+ /* U+0127 */ 0xB1 - 0x80,
+ /* U+0128 */ 0xA5 - 0x80,
+ /* U+0129 */ 0xB5 - 0x80,
+ /* U+012A */ 0xCF - 0x80,
+ /* U+012B */ 0xEF - 0x80,
+ /* U+012C */ 0x01,
+ /* U+012D */ 0x01,
+ /* U+012E */ 0xC7 - 0x80,
+ /* U+012F */ 0xE7 - 0x80,
+ /* U+0130 */ 0xA9 - 0x80,
+ /* U+0131 */ 0xB9 - 0x80,
+ /* U+0132 */ 0x29,
+ /* U+0133 */ 0x29,
+ /* U+0134 */ 0xAC - 0x80,
+ /* U+0135 */ 0xBC - 0x80,
+ /* U+0136 */ 0xD3 - 0x80,
+ /* U+0137 */ 0xF3 - 0x80,
+ /* U+0138 */ 0xA2 - 0x80,
+ /* U+0139 */ 0xC5 - 0x80,
+ /* U+013A */ 0xE5 - 0x80,
+ /* U+013B */ 0xA6 - 0x80,
+ /* U+013C */ 0xB6 - 0x80,
+ /* U+013D */ 0xA5 - 0x80,
+ /* U+013E */ 0xB5 - 0x80,
+ /* U+013F */ 0x29,
+ /* U+0140 */ 0x29,
+ /* U+0141 */ 0xA3 - 0x80,
+ /* U+0142 */ 0xB3 - 0x80,
+ /* U+0143 */ 0xD1 - 0x80,
+ /* U+0144 */ 0xF1 - 0x80,
+ /* U+0145 */ 0xD1 - 0x80,
+ /* U+0146 */ 0xF1 - 0x80,
+ /* U+0147 */ 0xD2 - 0x80,
+ /* U+0148 */ 0xF2 - 0x80,
+ /* U+0149 */ 0x29,
+ /* U+014A */ 0xBD - 0x80,
+ /* U+014B */ 0xBF - 0x80,
+ /* U+014C */ 0xD2 - 0x80,
+ /* U+014D */ 0xF2 - 0x80,
+ /* U+014E */ 0x01,
+ /* U+014F */ 0x01,
+ /* U+0150 */ 0xD5 - 0x80,
+ /* U+0151 */ 0xF5 - 0x80,
+ /* U+0152 */ 0x29,
+ /* U+0153 */ 0x29,
+ /* U+0154 */ 0xC0 - 0x80,
+ /* U+0155 */ 0xE0 - 0x80,
+ /* U+0156 */ 0xA3 - 0x80,
+ /* U+0157 */ 0xB3 - 0x80,
+ /* U+0158 */ 0xD8 - 0x80,
+ /* U+0159 */ 0xF8 - 0x80,
+ /* U+015A */ 0xA6 - 0x80,
+ /* U+015B */ 0xB6 - 0x80,
+ /* U+015C */ 0xDE - 0x80,
+ /* U+015D */ 0xFE - 0x80,
+ /* U+015E */ 0xAA - 0x80,
+ /* U+015F */ 0xBA - 0x80,
+ /* U+0160 */ 0xA9 - 0x80,
+ /* U+0161 */ 0xB9 - 0x80,
+ /* U+0162 */ 0xDE - 0x80,
+ /* U+0163 */ 0xFE - 0x80,
+ /* U+0164 */ 0xAB - 0x80,
+ /* U+0165 */ 0xBB - 0x80,
+ /* U+0166 */ 0xAC - 0x80,
+ /* U+0167 */ 0xBC - 0x80,
+ /* U+0168 */ 0xDD - 0x80,
+ /* U+0169 */ 0xFD - 0x80,
+ /* U+016A */ 0xDE - 0x80,
+ /* U+016B */ 0xFE - 0x80,
+ /* U+016C */ 0xDD - 0x80,
+ /* U+016D */ 0xFD - 0x80,
+ /* U+016E */ 0xD9 - 0x80,
+ /* U+016F */ 0xF9 - 0x80,
+ /* U+0170 */ 0xDB - 0x80,
+ /* U+0171 */ 0xFB - 0x80,
+ /* U+0172 */ 0xD9 - 0x80,
+ /* U+0173 */ 0xF9 - 0x80,
+ /* U+0174 */ 0x2A,
+ /* U+0175 */ 0x2B,
+ /* U+0176 */ 0x2A,
+ /* U+0177 */ 0x2B,
+ /* U+0178 */ 0x2A,
+ /* U+0179 */ 0xAC - 0x80,
+ /* U+017A */ 0xBC - 0x80,
+ /* U+017B */ 0xAF - 0x80,
+ /* U+017C */ 0xBF - 0x80,
+ /* U+017D */ 0xAE - 0x80,
+ /* U+017E */ 0xBE - 0x80,
+ /* U+017F */ 0x01
+};
+
+unsigned char latin_a_char_to_byte2[128] = {
+ /* U+0100 */ 0x00,
+ /* U+0101 */ 0x00,
+ /* U+0102 */ 0x00,
+ /* U+0103 */ 0x00,
+ /* U+0104 */ 0x00,
+ /* U+0105 */ 0x00,
+ /* U+0106 */ 0x00,
+ /* U+0107 */ 0x00,
+ /* U+0108 */ 0x00,
+ /* U+0109 */ 0x00,
+ /* U+010A */ 0x00,
+ /* U+010B */ 0x00,
+ /* U+010C */ 0x00,
+ /* U+010D */ 0x00,
+ /* U+010E */ 0x00,
+ /* U+010F */ 0x00,
+ /* U+0110 */ 0x00,
+ /* U+0111 */ 0x00,
+ /* U+0112 */ 0x00,
+ /* U+0113 */ 0x00,
+ /* U+0114 */ 0x14,
+ /* U+0115 */ 0x15,
+ /* U+0116 */ 0x00,
+ /* U+0117 */ 0x00,
+ /* U+0118 */ 0x00,
+ /* U+0119 */ 0x00,
+ /* U+011A */ 0x00,
+ /* U+011B */ 0x00,
+ /* U+011C */ 0x00,
+ /* U+011D */ 0x00,
+ /* U+011E */ 0x00,
+ /* U+011F */ 0x00,
+ /* U+0120 */ 0x00,
+ /* U+0121 */ 0x00,
+ /* U+0122 */ 0x00,
+ /* U+0123 */ 0x00,
+ /* U+0124 */ 0x00,
+ /* U+0125 */ 0x00,
+ /* U+0126 */ 0x00,
+ /* U+0127 */ 0x00,
+ /* U+0128 */ 0x00,
+ /* U+0129 */ 0x00,
+ /* U+012A */ 0x00,
+ /* U+012B */ 0x00,
+ /* U+012C */ 0x2C,
+ /* U+012D */ 0x2D,
+ /* U+012E */ 0x00,
+ /* U+012F */ 0x00,
+ /* U+0130 */ 0x00,
+ /* U+0131 */ 0x00,
+ /* U+0132 */ 0x26,
+ /* U+0133 */ 0x46,
+ /* U+0134 */ 0x00,
+ /* U+0135 */ 0x00,
+ /* U+0136 */ 0x00,
+ /* U+0137 */ 0x00,
+ /* U+0138 */ 0x00,
+ /* U+0139 */ 0x00,
+ /* U+013A */ 0x00,
+ /* U+013B */ 0x00,
+ /* U+013C */ 0x00,
+ /* U+013D */ 0x00,
+ /* U+013E */ 0x00,
+ /* U+013F */ 0x29,
+ /* U+0140 */ 0x49,
+ /* U+0141 */ 0x00,
+ /* U+0142 */ 0x00,
+ /* U+0143 */ 0x00,
+ /* U+0144 */ 0x00,
+ /* U+0145 */ 0x00,
+ /* U+0146 */ 0x00,
+ /* U+0147 */ 0x00,
+ /* U+0148 */ 0x00,
+ /* U+0149 */ 0x4A,
+ /* U+014A */ 0x00,
+ /* U+014B */ 0x00,
+ /* U+014C */ 0x00,
+ /* U+014D */ 0x00,
+ /* U+014E */ 0x4E,
+ /* U+014F */ 0x4F,
+ /* U+0150 */ 0x00,
+ /* U+0151 */ 0x00,
+ /* U+0152 */ 0x2D,
+ /* U+0153 */ 0x4D,
+ /* U+0154 */ 0x00,
+ /* U+0155 */ 0x00,
+ /* U+0156 */ 0x00,
+ /* U+0157 */ 0x00,
+ /* U+0158 */ 0x00,
+ /* U+0159 */ 0x00,
+ /* U+015A */ 0x00,
+ /* U+015B */ 0x00,
+ /* U+015C */ 0x00,
+ /* U+015D */ 0x00,
+ /* U+015E */ 0x00,
+ /* U+015F */ 0x00,
+ /* U+0160 */ 0x00,
+ /* U+0161 */ 0x00,
+ /* U+0162 */ 0x00,
+ /* U+0163 */ 0x00,
+ /* U+0164 */ 0x00,
+ /* U+0165 */ 0x00,
+ /* U+0166 */ 0x00,
+ /* U+0167 */ 0x00,
+ /* U+0168 */ 0x00,
+ /* U+0169 */ 0x00,
+ /* U+016A */ 0x00,
+ /* U+016B */ 0x00,
+ /* U+016C */ 0x00,
+ /* U+016D */ 0x00,
+ /* U+016E */ 0x00,
+ /* U+016F */ 0x00,
+ /* U+0170 */ 0x00,
+ /* U+0171 */ 0x00,
+ /* U+0172 */ 0x00,
+ /* U+0173 */ 0x00,
+ /* U+0174 */ 0x71,
+ /* U+0175 */ 0x71,
+ /* U+0176 */ 0x74,
+ /* U+0177 */ 0x74,
+ /* U+0178 */ 0x73,
+ /* U+0179 */ 0x00,
+ /* U+017A */ 0x00,
+ /* U+017B */ 0x00,
+ /* U+017C */ 0x00,
+ /* U+017D */ 0x00,
+ /* U+017E */ 0x00,
+ /* U+017F */ 0x7F
+};
+
+Lisp_Object Vutf_2000_version;
+#endif
+
+#ifndef UTF2000
+int leading_code_private_11;
+#endif
Lisp_Object Qcharsetp;
Lisp_Object Qregistry, Qfinal, Qgraphic;
Lisp_Object Qdirection;
Lisp_Object Qreverse_direction_charset;
-Lisp_Object Qccl_program;
Lisp_Object Qleading_byte;
Lisp_Object Qshort_name, Qlong_name;
Qjapanese_jisx0212,
Qchinese_cns11643_1,
Qchinese_cns11643_2,
+#ifdef UTF2000
+ Qchinese_cns11643_3,
+ Qchinese_cns11643_4,
+ Qchinese_cns11643_5,
+ Qchinese_cns11643_6,
+ Qchinese_cns11643_7,
+ Qucs_bmp,
+#endif
Qchinese_big5_1,
Qchinese_big5_2,
Qcomposite;
Lisp_Object Vcharset_hash_table;
-static Bufbyte next_allocated_1_byte_leading_byte;
-static Bufbyte next_allocated_2_byte_leading_byte;
+static Charset_ID next_allocated_1_byte_leading_byte;
+static Charset_ID next_allocated_2_byte_leading_byte;
/* Composite characters are characters constructed by overstriking two
or more regular characters.
non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
{
Bufbyte *p;
- Bufbyte lb;
+#ifndef UTF2000
+ Charset_ID lb;
int c1, c2;
Lisp_Object charset;
+#endif
p = str;
+#ifdef UTF2000
+ if ( c <= 0x7f )
+ {
+ *p++ = c;
+ }
+ else if ( c <= 0x7ff )
+ {
+ *p++ = (c >> 6) | 0xc0;
+ *p++ = (c & 0x3f) | 0x80;
+ }
+ else if ( c <= 0xffff )
+ {
+ *p++ = (c >> 12) | 0xe0;
+ *p++ = ((c >> 6) & 0x3f) | 0x80;
+ *p++ = (c & 0x3f) | 0x80;
+ }
+ else if ( c <= 0x1fffff )
+ {
+ *p++ = (c >> 18) | 0xf0;
+ *p++ = ((c >> 12) & 0x3f) | 0x80;
+ *p++ = ((c >> 6) & 0x3f) | 0x80;
+ *p++ = (c & 0x3f) | 0x80;
+ }
+ else if ( c <= 0x3ffffff )
+ {
+ *p++ = (c >> 24) | 0xf8;
+ *p++ = ((c >> 18) & 0x3f) | 0x80;
+ *p++ = ((c >> 12) & 0x3f) | 0x80;
+ *p++ = ((c >> 6) & 0x3f) | 0x80;
+ *p++ = (c & 0x3f) | 0x80;
+ }
+ else
+ {
+ *p++ = (c >> 30) | 0xfc;
+ *p++ = ((c >> 24) & 0x3f) | 0x80;
+ *p++ = ((c >> 18) & 0x3f) | 0x80;
+ *p++ = ((c >> 12) & 0x3f) | 0x80;
+ *p++ = ((c >> 6) & 0x3f) | 0x80;
+ *p++ = (c & 0x3f) | 0x80;
+ }
+#else
BREAKUP_CHAR (c, charset, c1, c2);
lb = CHAR_LEADING_BYTE (c);
if (LEADING_BYTE_PRIVATE_P (lb))
*p++ = c1 | 0x80;
if (c2)
*p++ = c2 | 0x80;
-
+#endif
return (p - str);
}
Emchar
non_ascii_charptr_emchar (CONST Bufbyte *str)
{
+#ifdef UTF2000
+ Bufbyte b;
+ Emchar ch;
+ int len;
+
+ b = *str++;
+ if ( b >= 0xfc )
+ {
+ ch = (b & 0x01);
+ len = 5;
+ }
+ else if ( b >= 0xf8 )
+ {
+ ch = b & 0x03;
+ len = 4;
+ }
+ else if ( b >= 0xf0 )
+ {
+ ch = b & 0x07;
+ len = 3;
+ }
+ else if ( b >= 0xe0 )
+ {
+ ch = b & 0x0f;
+ len = 2;
+ }
+ else if ( b >= 0xc0 )
+ {
+ ch = b & 0x1f;
+ len = 1;
+ }
+ else
+ {
+ ch = b;
+ len = 0;
+ }
+ for( ; len > 0; len-- )
+ {
+ b = *str++;
+ ch = ( ch << 6 ) | ( b & 0x3f );
+ }
+ return ch;
+#else
Bufbyte i0 = *str, i1, i2 = 0;
Lisp_Object charset;
i2 = *++str & 0x7F;
return MAKE_CHAR (charset, i1, i2);
+#endif
}
/* Return whether CH is a valid Emchar, assuming it's non-ASCII.
Do not call this directly. Use the macro valid_char_p() instead. */
+#ifndef UTF2000
int
non_ascii_valid_char_p (Emchar ch)
{
return (XCHARSET_CHARS (charset) == 96);
}
}
+#endif
\f
/************************************************************************/
switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
{
/* Notice fallthrough. */
+#ifdef UTF2000
+ case 6: *++strptr = *ptr++;
+ case 5: *++strptr = *ptr++;
+#endif
case 4: *++strptr = *ptr++;
case 3: *++strptr = *ptr++;
case 2: *++strptr = *ptr;
switch (REP_BYTES_BY_FIRST_BYTE (ch))
{
/* Notice fallthrough. */
+#ifdef UTF2000
+ case 6:
+ ch = Lstream_getc (stream);
+ assert (ch >= 0);
+ *++strptr = (Bufbyte) ch;
+ case 5:
+ ch = Lstream_getc (stream);
+ assert (ch >= 0);
+ *++strptr = (Bufbyte) ch;
+#endif
case 4:
ch = Lstream_getc (stream);
assert (ch >= 0);
/* Make a new charset. */
static Lisp_Object
-make_charset (int id, Lisp_Object name, unsigned char rep_bytes,
+make_charset (Charset_ID id, Lisp_Object name, unsigned char rep_bytes,
unsigned char type, unsigned char columns, unsigned char graphic,
Bufbyte final, unsigned char direction, Lisp_Object short_name,
Lisp_Object long_name, Lisp_Object doc,
CHARSET_CCL_PROGRAM (cs) = Qnil;
CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
- CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
- CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
- CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
- CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
+ switch ( CHARSET_TYPE (cs) )
+ {
+ case CHARSET_TYPE_94:
+ CHARSET_DIMENSION (cs) = 1;
+ CHARSET_CHARS (cs) = 94;
+ break;
+ case CHARSET_TYPE_96:
+ CHARSET_DIMENSION (cs) = 1;
+ CHARSET_CHARS (cs) = 96;
+ break;
+ case CHARSET_TYPE_94X94:
+ CHARSET_DIMENSION (cs) = 2;
+ CHARSET_CHARS (cs) = 94;
+ break;
+ case CHARSET_TYPE_96X96:
+ CHARSET_DIMENSION (cs) = 2;
+ CHARSET_CHARS (cs) = 96;
+ break;
+#ifdef UTF2000
+ case CHARSET_TYPE_128X128:
+ CHARSET_DIMENSION (cs) = 2;
+ CHARSET_CHARS (cs) = 128;
+ break;
+ case CHARSET_TYPE_256X256:
+ CHARSET_DIMENSION (cs) = 2;
+ CHARSET_CHARS (cs) = 256;
+ break;
+#endif
+ }
if (final)
{
/* some charsets do not have final characters. This includes
ASCII, Control-1, Composite, and the two faux private
charsets. */
+#if UTF2000
+ assert (NILP (charset_by_attributes[type][final]));
+ charset_by_attributes[type][final] = obj;
+#else
assert (NILP (charset_by_attributes[type][final][direction]));
charset_by_attributes[type][final][direction] = obj;
+#endif
}
- assert (NILP (charset_by_leading_byte[id - 128]));
- charset_by_leading_byte[id - 128] = obj;
+ assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
+ charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
+#ifndef UTF2000
if (id < 0xA0)
/* official leading byte */
rep_bytes_by_first_byte[id] = rep_bytes;
+#endif
/* Some charsets are "faux" and don't have names or really exist at
all except in the leading-byte table. */
static int
get_unallocated_leading_byte (int dimension)
{
- int lb;
+ Charset_ID lb;
if (dimension == 1)
{
error
("Character set already defined for this DIMENSION/CHARS/FINAL combo");
+#ifdef UTF2000
+ if (dimension == 1)
+ {
+ if (chars == 94)
+ {
+ /* id = CHARSET_ID_OFFSET_94 + final; */
+ id = get_unallocated_leading_byte (dimension);
+ }
+ else if (chars == 96)
+ {
+ id = get_unallocated_leading_byte (dimension);
+ }
+ else
+ {
+ abort ();
+ }
+ }
+ else if (dimension == 2)
+ {
+ if (chars == 94)
+ {
+ id = get_unallocated_leading_byte (dimension);
+ }
+ else if (chars == 96)
+ {
+ id = get_unallocated_leading_byte (dimension);
+ }
+ else
+ {
+ abort ();
+ }
+ }
+ else
+ {
+ abort ();
+ }
+#else
id = get_unallocated_leading_byte (dimension);
+#endif
if (NILP (doc_string))
doc_string = build_string ("");
{
CHECK_CHAR_COERCE_INT (ch);
- return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
- (CHAR_LEADING_BYTE (XCHAR (ch))));
+ return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
}
DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
defsymbol (&Qgraphic, "graphic");
defsymbol (&Qdirection, "direction");
defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
- defsymbol (&Qccl_program, "ccl-program");
defsymbol (&Qshort_name, "short-name");
defsymbol (&Qlong_name, "long-name");
defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
+#ifdef UTF2000
+ defsymbol (&Qchinese_cns11643_3, "chinese-cns11643-3");
+ defsymbol (&Qchinese_cns11643_4, "chinese-cns11643-4");
+ defsymbol (&Qchinese_cns11643_5, "chinese-cns11643-5");
+ defsymbol (&Qchinese_cns11643_6, "chinese-cns11643-6");
+ defsymbol (&Qchinese_cns11643_7, "chinese-cns11643-7");
+ defsymbol (&Qucs_bmp, "ucs-bmp");
+#endif
defsymbol (&Qchinese_big5_1, "chinese-big5-1");
defsymbol (&Qchinese_big5_2, "chinese-big5-2");
void
vars_of_mule_charset (void)
{
- int i, j, k;
+ int i, j;
+#ifndef UTF2000
+ int k;
+#endif
/* Table of charsets indexed by leading byte. */
for (i = 0; i < countof (charset_by_leading_byte); i++)
charset_by_leading_byte[i] = Qnil;
+#ifdef UTF2000
+ /* Table of charsets indexed by type/final-byte. */
+ for (i = 0; i < countof (charset_by_attributes); i++)
+ for (j = 0; j < countof (charset_by_attributes[0]); j++)
+ charset_by_attributes[i][j] = Qnil;
+#else
/* Table of charsets indexed by type/final-byte/direction. */
for (i = 0; i < countof (charset_by_attributes); i++)
for (j = 0; j < countof (charset_by_attributes[0]); j++)
for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
charset_by_attributes[i][j][k] = Qnil;
+#endif
next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
+#ifdef UTF2000
+ next_allocated_2_byte_leading_byte = LEADING_BYTE_CHINESE_BIG5_2 + 1;
+#else
next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
+#endif
+
+#ifndef UTF2000
+ leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
+ DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
+Leading-code of private TYPE9N charset of column-width 1.
+*/ );
+ leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
+#endif
+
+#ifdef UTF2000
+ Vutf_2000_version = build_string("0.5 (TennÅji)");
+ DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
+Version number of UTF-2000.
+*/ );
+#endif
}
void
/* Predefined character sets. We store them into variables for
ease of access. */
+#ifdef UTF2000
+ Vcharset_ucs_bmp =
+ make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 1,
+ CHARSET_TYPE_256X256, 1, 0, 0,
+ CHARSET_LEFT_TO_RIGHT,
+ build_string ("BMP"),
+ build_string ("BMP"),
+ build_string ("BMP"),
+ build_string (""));
+#endif
Vcharset_ascii =
make_charset (LEADING_BYTE_ASCII, Qascii, 1,
CHARSET_TYPE_94, 1, 0, 'B',
build_string
("CNS 11643 Plane 2 Chinese traditional"),
build_string (CHINESE_CNS_PLANE_RE("2")));
+#ifdef UTF2000
+ Vcharset_chinese_cns11643_3 =
+ make_charset (LEADING_BYTE_CHINESE_CNS11643_3, Qchinese_cns11643_3, 3,
+ CHARSET_TYPE_94X94, 2, 0, 'I',
+ CHARSET_LEFT_TO_RIGHT,
+ build_string ("CNS11643-3"),
+ build_string ("CNS11643-3 (Chinese traditional)"),
+ build_string
+ ("CNS 11643 Plane 3 Chinese traditional"),
+ build_string (CHINESE_CNS_PLANE_RE("3")));
+ Vcharset_chinese_cns11643_4 =
+ make_charset (LEADING_BYTE_CHINESE_CNS11643_4, Qchinese_cns11643_4, 3,
+ CHARSET_TYPE_94X94, 2, 0, 'J',
+ CHARSET_LEFT_TO_RIGHT,
+ build_string ("CNS11643-4"),
+ build_string ("CNS11643-4 (Chinese traditional)"),
+ build_string
+ ("CNS 11643 Plane 4 Chinese traditional"),
+ build_string (CHINESE_CNS_PLANE_RE("4")));
+ Vcharset_chinese_cns11643_5 =
+ make_charset (LEADING_BYTE_CHINESE_CNS11643_5, Qchinese_cns11643_5, 3,
+ CHARSET_TYPE_94X94, 2, 0, 'K',
+ CHARSET_LEFT_TO_RIGHT,
+ build_string ("CNS11643-5"),
+ build_string ("CNS11643-5 (Chinese traditional)"),
+ build_string
+ ("CNS 11643 Plane 5 Chinese traditional"),
+ build_string (CHINESE_CNS_PLANE_RE("5")));
+ Vcharset_chinese_cns11643_6 =
+ make_charset (LEADING_BYTE_CHINESE_CNS11643_6, Qchinese_cns11643_6, 3,
+ CHARSET_TYPE_94X94, 2, 0, 'L',
+ CHARSET_LEFT_TO_RIGHT,
+ build_string ("CNS11643-6"),
+ build_string ("CNS11643-6 (Chinese traditional)"),
+ build_string
+ ("CNS 11643 Plane 6 Chinese traditional"),
+ build_string (CHINESE_CNS_PLANE_RE("6")));
+ Vcharset_chinese_cns11643_7 =
+ make_charset (LEADING_BYTE_CHINESE_CNS11643_7, Qchinese_cns11643_7, 3,
+ CHARSET_TYPE_94X94, 2, 0, 'M',
+ CHARSET_LEFT_TO_RIGHT,
+ build_string ("CNS11643-7"),
+ build_string ("CNS11643-7 (Chinese traditional)"),
+ build_string
+ ("CNS 11643 Plane 7 Chinese traditional"),
+ build_string (CHINESE_CNS_PLANE_RE("7")));
+#endif
Vcharset_chinese_big5_1 =
make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
CHARSET_TYPE_94X94, 2, 0, '0',