X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Fmule-charset.h;h=a8dcad911dffaa0c66b4e770c5b3e551b183f7e0;hb=313d1c4d8bd1f94564e5edda76ee6aac3ecb70fd;hp=0efe357b42b06800bc82aad3860e54090acf74ae;hpb=8cfc1839650550b300c1ef2873cf6f3741392375;p=chise%2Fxemacs-chise.git- diff --git a/src/mule-charset.h b/src/mule-charset.h index 0efe357..a8dcad9 100644 --- a/src/mule-charset.h +++ b/src/mule-charset.h @@ -324,6 +324,8 @@ Boston, MA 02111-1307, USA. */ /* Definition of leading bytes */ /************************************************************************/ +typedef int Charset_ID; + #define MIN_LEADING_BYTE 0x80 /* These need special treatment in a string and/or character */ #define LEADING_BYTE_ASCII 0x8E /* Omitted in a buffer */ @@ -357,17 +359,15 @@ Boston, MA 02111-1307, USA. */ #define LEADING_BYTE_JAPANESE_JISX0208 0x92 /* Japanese JIS X0208-1983 */ #define LEADING_BYTE_KOREAN_KSC5601 0x93 /* Hangul KS C5601-1987 */ #define LEADING_BYTE_JAPANESE_JISX0212 0x94 /* Japanese JIS X0212-1990 */ -#define LEADING_BYTE_CHINESE_CCITT_GB 0x95 /* CCITT Extended GB */ -#define LEADING_BYTE_CHINESE_BIG5_1 0x96 /* Big5 Level 1 */ -#define LEADING_BYTE_CHINESE_CNS11643_1 0x97 /* Chinese CNS11643 Set 1 */ -#define LEADING_BYTE_CHINESE_CNS11643_2 0x98 /* Chinese CNS11643 Set 2 */ -#define LEADING_BYTE_CHINESE_CNS11643_3 0x99 /* Chinese CNS11643 Set 3 */ -#define LEADING_BYTE_CHINESE_CNS11643_4 0x9A /* Chinese CNS11643 Set 4 */ -#define LEADING_BYTE_CHINESE_CNS11643_5 0x9B /* Chinese CNS11643 Set 5 */ -#define LEADING_BYTE_CHINESE_CNS11643_6 0x9C /* Chinese CNS11643 Set 6 */ -#define LEADING_BYTE_CHINESE_CNS11643_7 0x9D /* Chinese CNS11643 Set 7 */ -#define LEADING_BYTE_CHINESE_BIG5_2 0x9D /* Big5 Level 2 */ -#define LEADING_BYTE_KOREAN_KPS9566 0x9E /* DPRK Hangul KPS 9566-1997 */ +#define LEADING_BYTE_CHINESE_CNS11643_1 0x95 /* Chinese CNS11643 Set 1 */ +#define LEADING_BYTE_CHINESE_CNS11643_2 0x96 /* Chinese CNS11643 Set 2 */ +#define LEADING_BYTE_CHINESE_BIG5_1 0x97 /* Big5 Level 1 */ +#define LEADING_BYTE_CHINESE_BIG5_2 0x98 /* Big5 Level 2 */ + /* 0x99 unused */ + /* 0x9A unused */ + /* 0x9B unused */ + /* 0x9C unused */ + /* 0x9D unused */ #define MIN_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_JAPANESE_JISX0208_1978 #define MAX_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_CHINESE_BIG5_2 @@ -543,7 +543,7 @@ DECLARE_LRECORD (charset, struct Lisp_Charset); CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs)) /* Table of charsets indexed by (leading byte - 128). */ -extern Lisp_Object charset_by_leading_byte[128]; +extern Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES]; /* Table of charsets indexed by type/final-byte/direction. */ extern Lisp_Object charset_by_attributes[4][128][2]; @@ -567,13 +567,15 @@ INLINE Lisp_Object CHARSET_BY_LEADING_BYTE (int lb); INLINE Lisp_Object CHARSET_BY_LEADING_BYTE (int lb) { - assert (lb >= 0x80 && lb <= 0xFF); - return charset_by_leading_byte[lb - 128]; + assert (lb >= MIN_LEADING_BYTE && + lb < (MIN_LEADING_BYTE + NUM_LEADING_BYTES)); + return charset_by_leading_byte[lb - MIN_LEADING_BYTE]; } #else -#define CHARSET_BY_LEADING_BYTE(lb) (charset_by_leading_byte[(lb) - 128]) +#define CHARSET_BY_LEADING_BYTE(lb) \ + (charset_by_leading_byte[(lb) - MIN_LEADING_BYTE]) #endif @@ -600,39 +602,20 @@ REP_BYTES_BY_FIRST_BYTE (int fb) /* Dealing with characters */ /************************************************************************/ -/* Is this character represented by more than one byte in a string? */ - -#define CHAR_MULTIBYTE_P(c) ((c) >= 0x80) - -#define CHAR_ASCII_P(c) (!CHAR_MULTIBYTE_P (c)) +#define CHAR_ASCII_P(ch) ((ch) <= 0x7F) /* The bit fields of character are divided into 3 parts: FIELD1(5bits):FIELD2(7bits):FIELD3(7bits) */ -#define CHAR_FIELD1_MASK (0x7F << 14) +#define CHAR_FIELD1_MASK (0x1F << 14) #define CHAR_FIELD2_MASK (0x7F << 7) #define CHAR_FIELD3_MASK 0x7F -#define MIN_CHAR_GREEK 0x0370 -#define MAX_CHAR_GREEK 0x03CF - -#define MIN_CHAR_CYRILLIC 0x0400 -#define MAX_CHAR_CYRILLIC 0x045F - -#define MIN_CHAR_HEBREW 0x0590 -#define MAX_CHAR_HEBREW 0x05EF - -#define MIN_CHAR_THAI 0x0E00 -#define MAX_CHAR_THAI 0x0E5F - -#define MIN_CHAR_HALFWIDTH_KATAKANA 0xFF60 -#define MAX_CHAR_HALFWIDTH_KATAKANA 0xFF9F - /* Macros to access each field of a character code of C. */ #define CHAR_FIELD1(c) (((c) & CHAR_FIELD1_MASK) >> 14) -#define CHAR_FIELD2_INTERNAL(c) (((c) & CHAR_FIELD2_MASK) >> 7) -#define CHAR_FIELD3_INTERNAL(c) ((c) & CHAR_FIELD3_MASK) +#define CHAR_FIELD2(c) (((c) & CHAR_FIELD2_MASK) >> 7) +#define CHAR_FIELD3(c) ((c) & CHAR_FIELD3_MASK) /* Field 1, if non-zero, usually holds a leading byte for a dimension-2 charset. Field 2, if non-zero, usually holds a leading @@ -643,49 +626,8 @@ REP_BYTES_BY_FIRST_BYTE (int fb) #define FIELD2_TO_OFFICIAL_LEADING_BYTE 0x80 #define FIELD2_TO_PRIVATE_LEADING_BYTE 0x80 -#define FIELD1_TO_PRIVATE_LEADING_BYTE 0xc0 -#define FIELD1_TO_OFFICIAL_LEADING_BYTE 0x50 - -INLINE Emchar -CHAR_FIELD2 (Emchar c) -{ - if( (MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK) ) - return LEADING_BYTE_GREEK_ISO8859_7 - - FIELD2_TO_OFFICIAL_LEADING_BYTE; - else if( (MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC) ) - return LEADING_BYTE_CYRILLIC_ISO8859_5 - - FIELD2_TO_OFFICIAL_LEADING_BYTE; - else if( (MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW) ) - return LEADING_BYTE_HEBREW_ISO8859_8 - - FIELD2_TO_OFFICIAL_LEADING_BYTE; - else if( (MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI) ) - return LEADING_BYTE_THAI_TIS620 - - FIELD2_TO_OFFICIAL_LEADING_BYTE; - else if( (MIN_CHAR_HALFWIDTH_KATAKANA <= c) - && (c <= MAX_CHAR_HALFWIDTH_KATAKANA) ) - return LEADING_BYTE_KATAKANA_JISX0201 - - FIELD2_TO_OFFICIAL_LEADING_BYTE; - else - return CHAR_FIELD2_INTERNAL(c); -} - -INLINE Emchar -CHAR_FIELD3 (Emchar c) -{ - if( (MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK) ) - return c - MIN_CHAR_GREEK + 0x20; - else if( (MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC) ) - return c - MIN_CHAR_CYRILLIC + 0x20; - else if( (MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW) ) - return c - MIN_CHAR_HEBREW + 0x20; - else if( (MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI) ) - return c - MIN_CHAR_THAI + 0x20; - else if( (MIN_CHAR_HALFWIDTH_KATAKANA <= c) - && (c <= MAX_CHAR_HALFWIDTH_KATAKANA) ) - return c - MIN_CHAR_HALFWIDTH_KATAKANA + 0x20; - else - return CHAR_FIELD3_INTERNAL(c); -} +#define FIELD1_TO_OFFICIAL_LEADING_BYTE 0x8F +#define FIELD1_TO_PRIVATE_LEADING_BYTE 0xE1 /* Minimum and maximum allowed values for the fields. */ @@ -711,18 +653,11 @@ CHAR_FIELD3 (Emchar c) /* Minimum character code of each character. */ -#define MULE_CHAR_PRIVATE_OFFSET (0xe0 << 16) - -#define MIN_CHAR_OFFICIAL_TYPE9N \ - (MULE_CHAR_PRIVATE_OFFSET | (MIN_CHAR_FIELD2_OFFICIAL << 7)) -#define MIN_CHAR_PRIVATE_TYPE9N \ - (MULE_CHAR_PRIVATE_OFFSET | (MIN_CHAR_FIELD2_PRIVATE << 7)) -#define MIN_CHAR_PRIVATE_TYPE9NX9N \ - (MULE_CHAR_PRIVATE_OFFSET | (MIN_CHAR_FIELD1_PRIVATE << 14)) -#define MIN_CHAR_OFFICIAL_TYPE9NX9N \ - (MULE_CHAR_PRIVATE_OFFSET | (MIN_CHAR_FIELD1_OFFICIAL << 14)) -#define MIN_CHAR_COMPOSITION \ - (MULE_CHAR_PRIVATE_OFFSET | (0x7f << 14)) +#define MIN_CHAR_OFFICIAL_TYPE9N (MIN_CHAR_FIELD2_OFFICIAL << 7) +#define MIN_CHAR_PRIVATE_TYPE9N (MIN_CHAR_FIELD2_PRIVATE << 7) +#define MIN_CHAR_OFFICIAL_TYPE9NX9N (MIN_CHAR_FIELD1_OFFICIAL << 14) +#define MIN_CHAR_PRIVATE_TYPE9NX9N (MIN_CHAR_FIELD1_PRIVATE << 14) +#define MIN_CHAR_COMPOSITION (0x1F << 14) /* Leading byte of a character. @@ -739,24 +674,12 @@ CHAR_LEADING_BYTE (Emchar c) return LEADING_BYTE_ASCII; else if (c < 0xA0) return LEADING_BYTE_CONTROL_1; - else if (c <= 0xff) - return LEADING_BYTE_LATIN_ISO8859_1; - else if (c <= MAX_CHAR_GREEK) - return LEADING_BYTE_GREEK_ISO8859_7; - else if (c <= MAX_CHAR_CYRILLIC) - return LEADING_BYTE_CYRILLIC_ISO8859_5; - else if (c <= MAX_CHAR_HEBREW) - return LEADING_BYTE_HEBREW_ISO8859_8; - else if (c <= MAX_CHAR_THAI) - return LEADING_BYTE_THAI_TIS620; - else if (c <= MAX_CHAR_HALFWIDTH_KATAKANA) - return LEADING_BYTE_KATAKANA_JISX0201; - else if (c < MIN_CHAR_PRIVATE_TYPE9NX9N) - return CHAR_FIELD2 (c) + FIELD2_TO_OFFICIAL_LEADING_BYTE; else if (c < MIN_CHAR_OFFICIAL_TYPE9NX9N) - return CHAR_FIELD1 (c) + FIELD1_TO_PRIVATE_LEADING_BYTE; - else if (c < MIN_CHAR_COMPOSITION) + return CHAR_FIELD2 (c) + FIELD2_TO_OFFICIAL_LEADING_BYTE; + else if (c < MIN_CHAR_PRIVATE_TYPE9NX9N) return CHAR_FIELD1 (c) + FIELD1_TO_OFFICIAL_LEADING_BYTE; + else if (c < MIN_CHAR_COMPOSITION) + return CHAR_FIELD1 (c) + FIELD1_TO_PRIVATE_LEADING_BYTE; else { #ifdef ENABLE_COMPOSITE_CHARS @@ -786,37 +709,19 @@ MAKE_CHAR (Lisp_Object charset, int c1, int c2) return c1; else if (EQ (charset, Vcharset_control_1)) return c1 | 0x80; - else if (EQ (charset, Vcharset_latin_iso8859_1)) - return c1 | 0x80; - else if (EQ (charset, Vcharset_greek_iso8859_7)) - return c1 + MIN_CHAR_GREEK - 0x20; - else if (EQ (charset, Vcharset_cyrillic_iso8859_5)) - return c1 + MIN_CHAR_CYRILLIC - 0x20; - else if (EQ (charset, Vcharset_hebrew_iso8859_8)) - return c1 + MIN_CHAR_HEBREW - 0x20; - else if (EQ (charset, Vcharset_thai_tis620)) - return c1 + MIN_CHAR_THAI - 0x20; - else if (EQ (charset, Vcharset_katakana_jisx0201)) - if (c1 < 0x60) - return c1 + MIN_CHAR_HALFWIDTH_KATAKANA - 0x20; - else - return 32; #ifdef ENABLE_COMPOSITE_CHARS else if (EQ (charset, Vcharset_composite)) return (0x1F << 14) | ((c1) << 7) | (c2); #endif else if (XCHARSET_DIMENSION (charset) == 1) - return MULE_CHAR_PRIVATE_OFFSET - | ((XCHARSET_LEADING_BYTE (charset) - - FIELD2_TO_OFFICIAL_LEADING_BYTE) << 7) | (c1); + return ((XCHARSET_LEADING_BYTE (charset) - + FIELD2_TO_OFFICIAL_LEADING_BYTE) << 7) | (c1); else if (!XCHARSET_PRIVATE_P (charset)) - return MULE_CHAR_PRIVATE_OFFSET - | ((XCHARSET_LEADING_BYTE (charset) - - FIELD1_TO_OFFICIAL_LEADING_BYTE) << 14) | ((c1) << 7) | (c2); + return ((XCHARSET_LEADING_BYTE (charset) - + FIELD1_TO_OFFICIAL_LEADING_BYTE) << 14) | ((c1) << 7) | (c2); else - return MULE_CHAR_PRIVATE_OFFSET - | ((XCHARSET_LEADING_BYTE (charset) - - FIELD1_TO_PRIVATE_LEADING_BYTE) << 14) | ((c1) << 7) | (c2); + return ((XCHARSET_LEADING_BYTE (charset) - + FIELD1_TO_PRIVATE_LEADING_BYTE) << 14) | ((c1) << 7) | (c2); } /* The charset of character C is set to CHARSET, and the