From 7913ad8d670fc3edc59c86727a15c4acdd501b65 Mon Sep 17 00:00:00 2001 From: tomo Date: Wed, 1 Sep 1999 17:52:24 +0000 Subject: [PATCH] (Charset_ID): New type. (LEADING_BYTE_UCS_BMP): New macro. (LEADING_BYTE_CONTROL_1): Changed from 0x8F to 0x81. (CHARSET_ID_OFFSET_94): New macro. (MIN_CHARSET_ID_PRIVATE_94): New macro. (MAX_CHARSET_ID_PRIVATE_94): New macro. (LEADING_BYTE_ASCII): Changed to use CHARSET_ID_OFFSET_94 and final-byte. (LEADING_BYTE_KATAKANA_JISX0201): Likewise. (LEADING_BYTE_LATIN_JISX0201): Likewise. (CHARSET_ID_OFFSET_96): New macro. (LEADING_BYTE_LATIN_ISO8859_1): Changed to use CHARSET_ID_OFFSET_96 and final-byte. (LEADING_BYTE_LATIN_ISO8859_2): Likewise. (LEADING_BYTE_LATIN_ISO8859_3): Likewise. (LEADING_BYTE_LATIN_ISO8859_4): Likewise. (LEADING_BYTE_GREEK_ISO8859_7): Likewise. (LEADING_BYTE_ARABIC_ISO8859_6): Likewise. (LEADING_BYTE_HEBREW_ISO8859_8): Likewise. (LEADING_BYTE_CYRILLIC_ISO8859_5): Likewise. (LEADING_BYTE_LATIN_ISO8859_9): Likewise. (LEADING_BYTE_THAI_TIS620): Likewise. (MIN_LEADING_BYTE_PRIVATE_1): Changed from 0x0D0 to 0xD0. (MAX_LEADING_BYTE_PRIVATE_1): Changed from 0x11f to 0xDF. (CHARSET_ID_OFFSET_94x94): New macro. (LEADING_BYTE_CHINESE_BIG5_1): Changed to use CHARSET_ID_OFFSET_94x94 and final-byte. (LEADING_BYTE_CHINESE_BIG5_2): Likewise. (MIN_LEADING_BYTE_PRIVATE_2): Likewise. (MAX_LEADING_BYTE_PRIVATE_2): Likewise. (LEADING_BYTE_JAPANESE_JISX0208_1978): Likewise. (LEADING_BYTE_CHINESE_GB2312): Likewise. (LEADING_BYTE_JAPANESE_JISX0208): Likewise. (LEADING_BYTE_KOREAN_KSC5601): Likewise. (LEADING_BYTE_JAPANESE_JISX0212): Likewise. (LEADING_BYTE_CHINESE_CCITT_GB): Likewise. (LEADING_BYTE_CHINESE_CNS11643_*): Likewise. (LEADING_BYTE_KOREAN_KPS9566): Likewise. (CHARSET_TYPE_128X128): New macro. (CHARSET_TYPE_256X256): New macro. (XCHARSET_PRIVATE_P): Delete unconditionally. (charset_by_attributes): Delete array about direction. (CHARSET_BY_LEADING_BYTE): Use `Charset_ID' instead of `int'. (CHARSET_BY_ATTRIBUTES): Modify for `charset_by_attributes'. (MIN_CHAR_94): New macro. (MAX_CHAR_94): New macro. (MIN_CHAR_96): New macro. (MAX_CHAR_96): New macro. (MIN_CHAR_94x94): New macro. (MAX_CHAR_94x94): New macro. (MIN_CHAR_96x96): New macro. (MAX_CHAR_96x96): New macro. (FIELD1_TO_PRIVATE_LEADING_BYTE): Use `CHARSET_ID_OFFSET_94x94'. (FIELD1_TO_OFFICIAL_LEADING_BYTE): Likewise. (FIELD2_TO_PRIVATE_LEADING_BYTE): Use `(MIN_LEADING_BYTE_PRIVATE_1 - 32)'. (FIELD2_TO_OFFICIAL_LEADING_BYTE): Use `LEADING_BYTE_ASCII'. (MIN_CHAR_FIELD2_OFFICIAL): Deleted. (MAX_CHAR_FIELD2_OFFICIAL): Deleted. (MIN_CHAR_OFFICIAL_TYPE9N): Deleted. (MAX_CHAR_PRIVATE_TYPE9N): Changed. (MAKE_CHAR): Use `XCHARSET_FINAL' instead of `XCHARSET_LEADING_BYTE' to make code-point. (latin_a_char_to_charset): New variable. (latin_a_char_to_byte1): New variable. (latin_a_char_to_byte2): New variable. (breakup_char_1): Use `latin_a_char_to_{charset|byte1|byte2}' for Latin Extended-A; use `CHARSET_BY_ATTRIBUTES' instead of `CHARSET_BY_LEADING_BYTE' to get charset for ISO-2022 characters. (Vcharset_japanese_jisx0212): New variable definition. --- src/character.h | 308 ++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 212 insertions(+), 96 deletions(-) diff --git a/src/character.h b/src/character.h index db8df92..e2b4591 100644 --- a/src/character.h +++ b/src/character.h @@ -32,61 +32,122 @@ Boston, MA 02111-1307, USA. */ /* Definition of leading bytes */ /************************************************************************/ +typedef int Charset_ID; + #define MIN_LEADING_BYTE 0x80 -/* These need special treatment in a string and/or character */ -#define LEADING_BYTE_ASCII 0x8E /* Omitted in a buffer */ -#define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */ - -/** The following are for 1-byte characters in an official charset. **/ - -#define LEADING_BYTE_LATIN_ISO8859_1 0x81 /* Right half of ISO 8859-1 */ -#define LEADING_BYTE_LATIN_ISO8859_2 0x82 /* Right half of ISO 8859-2 */ -#define LEADING_BYTE_LATIN_ISO8859_3 0x83 /* Right half of ISO 8859-3 */ -#define LEADING_BYTE_LATIN_ISO8859_4 0x84 /* Right half of ISO 8859-4 */ -#define LEADING_BYTE_THAI_TIS620 0x85 /* TIS620-2533 */ -#define LEADING_BYTE_GREEK_ISO8859_7 0x86 /* Right half of ISO 8859-7 */ -#define LEADING_BYTE_ARABIC_ISO8859_6 0x87 /* Right half of ISO 8859-6 */ -#define LEADING_BYTE_HEBREW_ISO8859_8 0x88 /* Right half of ISO 8859-8 */ -#define LEADING_BYTE_KATAKANA_JISX0201 0x89 /* Right half of JIS X0201-1976 */ -#define LEADING_BYTE_LATIN_JISX0201 0x8A /* Left half of JIS X0201-1976 */ -#define LEADING_BYTE_CYRILLIC_ISO8859_5 0x8C /* Right half of ISO 8859-5 */ -#define LEADING_BYTE_LATIN_ISO8859_9 0x8D /* Right half of ISO 8859-9 */ - -#define MIN_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_ISO8859_1 -#define MAX_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_ISO8859_9 - -#define LEADING_BYTE_CHINESE_BIG5_1 0xB0 /* Big5 Level 1 */ -#define LEADING_BYTE_CHINESE_BIG5_2 0xB1 /* Big5 Level 2 */ -#define MIN_LEADING_BYTE_PRIVATE_2 0xB0 -#define MAX_LEADING_BYTE_PRIVATE_2 0xBF - -/** The following are for 2-byte characters in an official charset. **/ - -#define LEADING_BYTE_JAPANESE_JISX0208_1978 0xC0/* Japanese JIS X0208-1978 */ -#define LEADING_BYTE_CHINESE_GB2312 0xC1 /* Chinese Hanzi GB2312-1980 */ -#define LEADING_BYTE_JAPANESE_JISX0208 0xC2 /* Japanese JIS X0208-1983 */ -#define LEADING_BYTE_KOREAN_KSC5601 0xC3 /* Hangul KS C5601-1987 */ -#define LEADING_BYTE_JAPANESE_JISX0212 0xC4 /* Japanese JIS X0212-1990 */ -#define LEADING_BYTE_CHINESE_CCITT_GB 0xC5 /* CCITT Extended GB */ -#define LEADING_BYTE_CHINESE_CNS11643_1 0xC7 /* Chinese CNS11643 Set 1 */ -#define LEADING_BYTE_CHINESE_CNS11643_2 0xC8 /* Chinese CNS11643 Set 2 */ -#define LEADING_BYTE_CHINESE_CNS11643_3 0xC9 /* Chinese CNS11643 Set 3 */ -#define LEADING_BYTE_CHINESE_CNS11643_4 0xCA /* Chinese CNS11643 Set 4 */ -#define LEADING_BYTE_CHINESE_CNS11643_5 0xCB /* Chinese CNS11643 Set 5 */ -#define LEADING_BYTE_CHINESE_CNS11643_6 0xCC /* Chinese CNS11643 Set 6 */ -#define LEADING_BYTE_CHINESE_CNS11643_7 0xCD /* Chinese CNS11643 Set 7 */ -#define LEADING_BYTE_KOREAN_KPS9566 0xCE /* DPRK Hangul KPS 9566-1997 */ -#define MIN_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_JAPANESE_JISX0208_1978 -#define MAX_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_KOREAN_KPS9566 +#define LEADING_BYTE_UCS_BMP 0x80 +#define LEADING_BYTE_CONTROL_1 0x81 /* represent normal 80-9F */ + + +#define CHARSET_ID_OFFSET_94 0x60 + +#define MIN_CHARSET_ID_PRIVATE_94 (CHARSET_ID_OFFSET_94 + '0') +#define MAX_CHARSET_ID_PRIVATE_94 (CHARSET_ID_OFFSET_94 + '?') + +/* ISO 646 IRV */ +#define LEADING_BYTE_ASCII (CHARSET_ID_OFFSET_94 + 'B') + +/* Right half of JIS X0201-1976 */ +#define LEADING_BYTE_KATAKANA_JISX0201 (CHARSET_ID_OFFSET_94 + 'I') + +/* Left half of JIS X0201-1976 */ +#define LEADING_BYTE_LATIN_JISX0201 (CHARSET_ID_OFFSET_94 + 'J') + + +#define CHARSET_ID_OFFSET_96 0x70 + +/* Right half of ISO 8859-1 */ +#define LEADING_BYTE_LATIN_ISO8859_1 (CHARSET_ID_OFFSET_96 + 'A') + +/* Right half of ISO 8859-2 */ +#define LEADING_BYTE_LATIN_ISO8859_2 (CHARSET_ID_OFFSET_96 + 'B') + +/* Right half of ISO 8859-3 */ +#define LEADING_BYTE_LATIN_ISO8859_3 (CHARSET_ID_OFFSET_96 + 'C') + +/* Right half of ISO 8859-4 */ +#define LEADING_BYTE_LATIN_ISO8859_4 (CHARSET_ID_OFFSET_96 + 'D') + +/* Right half of ISO 8859-7 */ +#define LEADING_BYTE_GREEK_ISO8859_7 (CHARSET_ID_OFFSET_96 + 'F') + +/* Right half of ISO 8859-6 */ +#define LEADING_BYTE_ARABIC_ISO8859_6 (CHARSET_ID_OFFSET_96 + 'G') + +/* Right half of ISO 8859-8 */ +#define LEADING_BYTE_HEBREW_ISO8859_8 (CHARSET_ID_OFFSET_96 + 'H') + +/* Right half of ISO 8859-5 */ +#define LEADING_BYTE_CYRILLIC_ISO8859_5 (CHARSET_ID_OFFSET_96 + 'L') + +/* Right half of ISO 8859-9 */ +#define LEADING_BYTE_LATIN_ISO8859_9 (CHARSET_ID_OFFSET_96 + 'M') + +/* TIS620-2533 */ +#define LEADING_BYTE_THAI_TIS620 (CHARSET_ID_OFFSET_96 + 'T') + + +#define MIN_LEADING_BYTE_PRIVATE_1 0xD0 +#define MAX_LEADING_BYTE_PRIVATE_1 0xDF + + +#define CHARSET_ID_OFFSET_94x94 0xB0 + +/* Big5 Level 1 */ +#define LEADING_BYTE_CHINESE_BIG5_1 ('0' + CHARSET_ID_OFFSET_94x94) + +/* Big5 Level 2 */ +#define LEADING_BYTE_CHINESE_BIG5_2 ('1' + CHARSET_ID_OFFSET_94x94) + +#define MIN_LEADING_BYTE_PRIVATE_2 ('0' + CHARSET_ID_OFFSET_94x94) +#define MAX_LEADING_BYTE_PRIVATE_2 ('?' + CHARSET_ID_OFFSET_94x94) + +/* Japanese JIS X0208-1978 */ +#define LEADING_BYTE_JAPANESE_JISX0208_1978 \ + ('@' + CHARSET_ID_OFFSET_94x94) + +/* Chinese Hanzi GB2312-1980 */ +#define LEADING_BYTE_CHINESE_GB2312 ('A' + CHARSET_ID_OFFSET_94x94) + +/* Japanese JIS X0208-1983 */ +#define LEADING_BYTE_JAPANESE_JISX0208 ('B' + CHARSET_ID_OFFSET_94x94) + +/* Hangul KS C5601-1987 */ +#define LEADING_BYTE_KOREAN_KSC5601 ('C' + CHARSET_ID_OFFSET_94x94) + +/* Japanese JIS X0212-1990 */ +#define LEADING_BYTE_JAPANESE_JISX0212 ('D' + CHARSET_ID_OFFSET_94x94) -/** The following are for 1- and 2-byte characters in a private charset. **/ +/* CCITT Extended GB */ +#define LEADING_BYTE_CHINESE_CCITT_GB ('E' + CHARSET_ID_OFFSET_94x94) -#define PRE_LEADING_BYTE_PRIVATE_1 0x120 /* 1-byte char-set */ -#define PRE_LEADING_BYTE_PRIVATE_2 0x121 /* 2-byte char-set */ +/* Chinese CNS11643 Set 1 */ +#define LEADING_BYTE_CHINESE_CNS11643_1 ('G' + CHARSET_ID_OFFSET_94x94) -#define MIN_LEADING_BYTE_PRIVATE_1 0x0D0 -#define MAX_LEADING_BYTE_PRIVATE_1 0x11f +/* Chinese CNS11643 Set 2 */ +#define LEADING_BYTE_CHINESE_CNS11643_2 ('H' + CHARSET_ID_OFFSET_94x94) + +/* Chinese CNS11643 Set 3 */ +#define LEADING_BYTE_CHINESE_CNS11643_3 ('I' + CHARSET_ID_OFFSET_94x94) + +/* Chinese CNS11643 Set 4 */ +#define LEADING_BYTE_CHINESE_CNS11643_4 ('J' + CHARSET_ID_OFFSET_94x94) + +/* Chinese CNS11643 Set 5 */ +#define LEADING_BYTE_CHINESE_CNS11643_5 ('K' + CHARSET_ID_OFFSET_94x94) + +/* Chinese CNS11643 Set 6 */ +#define LEADING_BYTE_CHINESE_CNS11643_6 ('L' + CHARSET_ID_OFFSET_94x94) + +/* Chinese CNS11643 Set 7 */ +#define LEADING_BYTE_CHINESE_CNS11643_7 ('M' + CHARSET_ID_OFFSET_94x94) + +/* DPRK Hangul KPS 9566-1997 */ +#define LEADING_BYTE_KOREAN_KPS9566 ('N' + CHARSET_ID_OFFSET_94x94) + +#define MIN_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_JAPANESE_JISX0208_1978 +#define MAX_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_KOREAN_KPS9566 #define NUM_LEADING_BYTES 256 @@ -171,10 +232,12 @@ DECLARE_LRECORD (charset, struct Lisp_Charset); #define CHECK_CHARSET(x) CHECK_RECORD (x, charset) #define CONCHECK_CHARSET(x) CONCHECK_RECORD (x, charset) -#define CHARSET_TYPE_94 0 /* This charset includes 94 characters. */ -#define CHARSET_TYPE_96 1 /* This charset includes 96 characters. */ -#define CHARSET_TYPE_94X94 2 /* This charset includes 94x94 characters. */ -#define CHARSET_TYPE_96X96 3 /* This charset includes 96x96 characters. */ +#define CHARSET_TYPE_94 0 /* This charset includes 94 characters. */ +#define CHARSET_TYPE_96 1 /* This charset includes 96 characters. */ +#define CHARSET_TYPE_94X94 2 /* This charset includes 94x94 characters. */ +#define CHARSET_TYPE_96X96 3 /* This charset includes 96x96 characters. */ +#define CHARSET_TYPE_128X128 4 /* This charset includes 128x128 characters. */ +#define CHARSET_TYPE_256X256 5 /* This charset includes 256x256 characters. */ #define CHARSET_LEFT_TO_RIGHT 0 #define CHARSET_RIGHT_TO_LEFT 1 @@ -215,25 +278,22 @@ DECLARE_LRECORD (charset, struct Lisp_Charset); #define XCHARSET_CCL_PROGRAM(cs) CHARSET_CCL_PROGRAM (XCHARSET (cs)) #define XCHARSET_DIMENSION(cs) CHARSET_DIMENSION (XCHARSET (cs)) #define XCHARSET_CHARS(cs) CHARSET_CHARS (XCHARSET (cs)) -#ifdef CHARSET_PRIVATE_P -#define XCHARSET_PRIVATE_P(cs) CHARSET_PRIVATE_P (XCHARSET (cs)) -#endif #define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \ CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs)) /* Table of charsets indexed by (leading byte - 128). */ extern Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES]; -/* Table of charsets indexed by type/final-byte/direction. */ -extern Lisp_Object charset_by_attributes[4][128][2]; +/* Table of charsets indexed by type/final-byte. */ +extern Lisp_Object charset_by_attributes[4][128]; #ifdef ERROR_CHECK_TYPECHECK /* int not Bufbyte even though that is the actual type of a leading byte. This way, out-ot-range values will get caught rather than automatically truncated. */ -INLINE Lisp_Object CHARSET_BY_LEADING_BYTE (int lb); +INLINE Lisp_Object CHARSET_BY_LEADING_BYTE (Charset_ID lb); INLINE Lisp_Object -CHARSET_BY_LEADING_BYTE (int lb) +CHARSET_BY_LEADING_BYTE (Charset_ID lb) { assert (lb >= MIN_LEADING_BYTE && lb < (MIN_LEADING_BYTE + NUM_LEADING_BYTES)); @@ -248,7 +308,7 @@ CHARSET_BY_LEADING_BYTE (int lb) #endif #define CHARSET_BY_ATTRIBUTES(type, final, dir) \ - (charset_by_attributes[type][final][dir]) + (charset_by_attributes[type][final]) INLINE int REP_BYTES_BY_FIRST_BYTE (int fb); INLINE int @@ -301,6 +361,16 @@ REP_BYTES_BY_FIRST_BYTE (int fb) #define MIN_CHAR_HALFWIDTH_KATAKANA 0xFF60 #define MAX_CHAR_HALFWIDTH_KATAKANA 0xFF9F +#define MIN_CHAR_94 0xE90940 +#define MAX_CHAR_94 (MIN_CHAR_94 + 94 * 80 - 1) +#define MIN_CHAR_96 (MIN_CHAR_94 + 94 * 80) +#define MAX_CHAR_96 (MIN_CHAR_96 + 96 * 80 - 1) + +#define MIN_CHAR_94x94 0xE9F6C0 +#define MAX_CHAR_94x94 (MIN_CHAR_94x94 + 94 * 94 * 80 - 1) +#define MIN_CHAR_96x96 0xF4C000 +#define MAX_CHAR_96x96 (MIN_CHAR_96x96 + 96 * 96 * 80 - 1) + /* Macros to access each field of a character code of C. */ #define CHAR_FIELD1(c) (((c) & CHAR_FIELD1_MASK) >> 14) @@ -313,18 +383,13 @@ REP_BYTES_BY_FIRST_BYTE (int fb) /* Converting between field values and leading bytes. */ -#define FIELD1_TO_PRIVATE_LEADING_BYTE 0x80 -#define FIELD1_TO_OFFICIAL_LEADING_BYTE 0x80 -#define FIELD2_TO_PRIVATE_LEADING_BYTE 0xb0 -#define FIELD2_TO_OFFICIAL_LEADING_BYTE 0x80 +#define FIELD1_TO_PRIVATE_LEADING_BYTE CHARSET_ID_OFFSET_94x94 +#define FIELD1_TO_OFFICIAL_LEADING_BYTE CHARSET_ID_OFFSET_94x94 +#define FIELD2_TO_PRIVATE_LEADING_BYTE (MIN_LEADING_BYTE_PRIVATE_1 - 32) +#define FIELD2_TO_OFFICIAL_LEADING_BYTE LEADING_BYTE_ASCII /* Minimum and maximum allowed values for the fields. */ -#define MIN_CHAR_FIELD2_OFFICIAL \ - (MIN_LEADING_BYTE_OFFICIAL_1 - FIELD2_TO_OFFICIAL_LEADING_BYTE) -#define MAX_CHAR_FIELD2_OFFICIAL \ - (MAX_LEADING_BYTE_OFFICIAL_1 - FIELD2_TO_OFFICIAL_LEADING_BYTE) - #define MIN_CHAR_FIELD1_OFFICIAL \ (MIN_LEADING_BYTE_OFFICIAL_2 - FIELD1_TO_OFFICIAL_LEADING_BYTE) #define MAX_CHAR_FIELD1_OFFICIAL \ @@ -344,10 +409,10 @@ REP_BYTES_BY_FIRST_BYTE (int fb) #define MULE_CHAR_PRIVATE_OFFSET (0xe0 << 16) -#define MIN_CHAR_OFFICIAL_TYPE9N \ - (MULE_CHAR_PRIVATE_OFFSET | (MIN_CHAR_FIELD2_OFFICIAL << 7)) #define MIN_CHAR_PRIVATE_TYPE9N \ (MULE_CHAR_PRIVATE_OFFSET | (MIN_CHAR_FIELD2_PRIVATE << 7)) +#define MAX_CHAR_PRIVATE_TYPE9N \ + (MULE_CHAR_PRIVATE_OFFSET | (MAX_CHAR_FIELD2_PRIVATE << 7) | 0x7f) #define MIN_CHAR_PRIVATE_TYPE9NX9N \ (MULE_CHAR_PRIVATE_OFFSET | (MIN_CHAR_FIELD1_PRIVATE << 14)) #define MIN_CHAR_OFFICIAL_TYPE9NX9N \ @@ -386,20 +451,42 @@ MAKE_CHAR (Lisp_Object charset, int c1, int c2) return c1 + MIN_CHAR_HALFWIDTH_KATAKANA - 0x20; else return 32; -#ifdef ENABLE_COMPOSITE_CHARS - else if (EQ (charset, Vcharset_composite)) - return (0x1F << 14) | ((c1) << 7) | (c2); -#endif else if (XCHARSET_DIMENSION (charset) == 1) - return MULE_CHAR_PRIVATE_OFFSET - | ((XCHARSET_LEADING_BYTE (charset) - - FIELD2_TO_OFFICIAL_LEADING_BYTE) << 7) | (c1); + { + switch (XCHARSET_CHARS (charset)) + { + case 94: + return MIN_CHAR_94 + + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33); + case 96: + return MIN_CHAR_96 + + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32); + default: + abort (); + } + } else - return MULE_CHAR_PRIVATE_OFFSET - | ((XCHARSET_LEADING_BYTE (charset) - - FIELD1_TO_PRIVATE_LEADING_BYTE) << 14) | ((c1) << 7) | (c2); + { + switch (XCHARSET_CHARS (charset)) + { + case 94: + return MIN_CHAR_94x94 + + (XCHARSET_FINAL (charset) - '0') * 94 * 94 + + (c1 - 33) * 94 + (c2 - 33); + case 96: + return MIN_CHAR_96x96 + + (XCHARSET_FINAL (charset) - '0') * 96 * 96 + + (c1 - 32) * 96 + (c2 - 32); + default: + abort (); + } + } } +extern Charset_ID latin_a_char_to_charset[128]; +extern unsigned char latin_a_char_to_byte1[128]; +extern unsigned char latin_a_char_to_byte2[128]; + INLINE void breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2); INLINE void breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2) @@ -422,8 +509,16 @@ breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2) *c1 = CHAR_FIELD3_INTERNAL (c); *c2 = 0; } + else if (c <= 0x17f) + { + *charset + = CHARSET_BY_LEADING_BYTE (latin_a_char_to_charset[c - 0x100]); + *c1 = latin_a_char_to_byte1[c - 0x100]; + *c2 = latin_a_char_to_byte2[c - 0x100]; + } else if (c < MIN_CHAR_GREEK) { + printf("not break up u+%x", c); abort (); } else if (c <= MAX_CHAR_GREEK) @@ -434,6 +529,7 @@ breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2) } else if (c < MIN_CHAR_CYRILLIC) { + printf("not break up u+%x", c); abort (); } else if (c <= MAX_CHAR_CYRILLIC) @@ -444,6 +540,7 @@ breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2) } else if (c < MIN_CHAR_HEBREW) { + printf("not break up u+%x", c); abort (); } else if (c <= MAX_CHAR_HEBREW) @@ -454,6 +551,7 @@ breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2) } else if (c < MIN_CHAR_THAI) { + printf("not break up u+%x", c); abort (); } else if (c <= MAX_CHAR_THAI) @@ -464,6 +562,7 @@ breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2) } else if (c < MIN_CHAR_HALFWIDTH_KATAKANA) { + printf("not break up u+%x", c); abort (); } else if (c <= MAX_CHAR_HALFWIDTH_KATAKANA) @@ -472,30 +571,46 @@ breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2) *c1 = c - MIN_CHAR_HALFWIDTH_KATAKANA + 0x20; *c2 = 0; } - else if (c < MIN_CHAR_PRIVATE_TYPE9NX9N) + else if (c <= MAX_CHAR_94) + { + *charset + = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94, + ((c - MIN_CHAR_94) / 94) + '0', + CHARSET_LEFT_TO_RIGHT); + *c1 = ((c - MIN_CHAR_94) % 94) + 33; + *c2 = 0; + } + else if (c <= MAX_CHAR_96) { *charset - = CHARSET_BY_LEADING_BYTE (CHAR_FIELD2_INTERNAL (c) + - FIELD2_TO_OFFICIAL_LEADING_BYTE); - *c1 = CHAR_FIELD3_INTERNAL(c); + = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96, + ((c - MIN_CHAR_96) / 96) + '0', + CHARSET_LEFT_TO_RIGHT); + *c1 = ((c - MIN_CHAR_96) % 96) + 32; *c2 = 0; } - else if (c < MIN_CHAR_COMPOSITION) + else if (c <= MAX_CHAR_94x94) + { + *charset + = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94, + ((c - MIN_CHAR_94x94) / (94 * 94)) + '0', + CHARSET_LEFT_TO_RIGHT); + *c1 = (((c - MIN_CHAR_94x94) / 94) % 94) + 33; + *c2 = ((c - MIN_CHAR_94x94) % 94) + 33; + } + else if (c <= MAX_CHAR_96x96) { *charset - = CHARSET_BY_LEADING_BYTE (CHAR_FIELD1 (c) + - FIELD1_TO_OFFICIAL_LEADING_BYTE); - *c1 = CHAR_FIELD2_INTERNAL(c); - *c2 = CHAR_FIELD3_INTERNAL(c); + = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96X96, + ((c - MIN_CHAR_96x96) / (96 * 96)) + '0', + CHARSET_LEFT_TO_RIGHT); + *c1 = (((c - MIN_CHAR_96x96) / 96) % 96) + 32; + *c2 = ((c - MIN_CHAR_96x96) % 96) + 32; } else { -#ifdef ENABLE_COMPOSITE_CHARS - return LEADING_BYTE_COMPOSITE; -#else printf("u+%x", c); abort(); -#endif /* ENABLE_COMPOSITE_CHARS */ } } @@ -536,6 +651,7 @@ EXFUN (Fget_charset, 1); extern Lisp_Object Vcharset_chinese_big5_1; extern Lisp_Object Vcharset_chinese_big5_2; extern Lisp_Object Vcharset_japanese_jisx0208; +extern Lisp_Object Vcharset_japanese_jisx0212; Emchar Lstream_get_emchar_1 (Lstream *stream, int first_char); int Lstream_fput_emchar (Lstream *stream, Emchar ch); -- 1.7.10.4