X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Fmule-charset.c;h=7959a90263da74c4ce9955490e145d3b24bb9fc6;hb=1fcdc2830c4efbf90ddb9cba59205468c431d6a4;hp=0a4b0ed313fea25047574b7731c5b37273bb4260;hpb=937bb3ce20f4819a75e8234cb91a1acaa19847f8;p=chise%2Fxemacs-chise.git diff --git a/src/mule-charset.c b/src/mule-charset.c index 0a4b0ed..7959a90 100644 --- a/src/mule-charset.c +++ b/src/mule-charset.c @@ -32,6 +32,7 @@ Boston, MA 02111-1307, USA. */ #include "lstream.h" #include "device.h" #include "faces.h" +#include "mule-ccl.h" /* The various pre-defined charsets. */ @@ -74,11 +75,12 @@ static int composite_char_col_next; #endif /* ENABLE_COMPOSITE_CHARS */ /* Table of charsets indexed by leading byte. */ -Lisp_Object charset_by_leading_byte[128]; +Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES]; /* Table of charsets indexed by type/final-byte/direction. */ Lisp_Object charset_by_attributes[4][128][2]; +#ifndef UTF2000 /* Table of number of bytes in the string representation of a character indexed by the first byte of that representation. @@ -98,12 +100,21 @@ Bytecount rep_bytes_by_first_byte[0xA0] = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80 - 0x8f are for Dimension-1 official charsets */ +#ifdef CHAR_IS_UCS4 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, +#else 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +#endif /* 0x90 - 0x9d are for Dimension-2 official charsets */ /* 0x9e is for Dimension-1 private charsets */ /* 0x9f is for Dimension-2 private charsets */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4 }; +#endif + +Lisp_Object Vutf_2000_version; + +int leading_code_private_11; Lisp_Object Qcharsetp; @@ -111,7 +122,6 @@ Lisp_Object Qcharsetp; Lisp_Object Qregistry, Qfinal, Qgraphic; Lisp_Object Qdirection; Lisp_Object Qreverse_direction_charset; -Lisp_Object Qccl_program; Lisp_Object Qleading_byte; Lisp_Object Qshort_name, Qlong_name; @@ -190,11 +200,54 @@ Bytecount non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c) { Bufbyte *p; +#ifndef UTF2000 Bufbyte lb; int c1, c2; Lisp_Object charset; +#endif p = str; +#ifdef UTF2000 + if ( c <= 0x7f ) + { + *p++ = c; + } + else if ( c <= 0x7ff ) + { + *p++ = (c >> 6) | 0xc0; + *p++ = (c & 0x3f) | 0x80; + } + else if ( c <= 0xffff ) + { + *p++ = (c >> 12) | 0xe0; + *p++ = ((c >> 6) & 0x3f) | 0x80; + *p++ = (c & 0x3f) | 0x80; + } + else if ( c <= 0x1fffff ) + { + *p++ = (c >> 18) | 0xf0; + *p++ = ((c >> 12) & 0x3f) | 0x80; + *p++ = ((c >> 6) & 0x3f) | 0x80; + *p++ = (c & 0x3f) | 0x80; + } + else if ( c <= 0x3ffffff ) + { + *p++ = (c >> 24) | 0xf8; + *p++ = ((c >> 18) & 0x3f) | 0x80; + *p++ = ((c >> 12) & 0x3f) | 0x80; + *p++ = ((c >> 6) & 0x3f) | 0x80; + *p++ = (c & 0x3f) | 0x80; + } + else + { + *p++ = (c >> 30) | 0xfc; + *p++ = ((c >> 24) & 0x3f) | 0x80; + *p++ = ((c >> 18) & 0x3f) | 0x80; + *p++ = ((c >> 12) & 0x3f) | 0x80; + *p++ = ((c >> 6) & 0x3f) | 0x80; + *p++ = (c & 0x3f) | 0x80; + } +#else BREAKUP_CHAR (c, charset, c1, c2); lb = CHAR_LEADING_BYTE (c); if (LEADING_BYTE_PRIVATE_P (lb)) @@ -205,7 +258,7 @@ non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c) *p++ = c1 | 0x80; if (c2) *p++ = c2 | 0x80; - +#endif return (p - str); } @@ -216,6 +269,49 @@ non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c) Emchar non_ascii_charptr_emchar (CONST Bufbyte *str) { +#ifdef UTF2000 + Bufbyte b; + Emchar ch; + int len; + + b = *str++; + if ( b >= 0xfc ) + { + ch = (b & 0x01); + len = 5; + } + else if ( b >= 0xf8 ) + { + ch = b & 0x03; + len = 4; + } + else if ( b >= 0xf0 ) + { + ch = b & 0x07; + len = 3; + } + else if ( b >= 0xe0 ) + { + ch = b & 0x0f; + len = 2; + } + else if ( b >= 0xc0 ) + { + ch = b & 0x1f; + len = 1; + } + else + { + ch = b; + len = 0; + } + for( ; len > 0; len-- ) + { + b = *str++; + ch = ( ch << 6 ) | ( b & 0x3f ); + } + return ch; +#else Bufbyte i0 = *str, i1, i2 = 0; Lisp_Object charset; @@ -232,11 +328,13 @@ non_ascii_charptr_emchar (CONST Bufbyte *str) i2 = *++str & 0x7F; return MAKE_CHAR (charset, i1, i2); +#endif } /* Return whether CH is a valid Emchar, assuming it's non-ASCII. Do not call this directly. Use the macro valid_char_p() instead. */ +#ifndef UTF2000 int non_ascii_valid_char_p (Emchar ch) { @@ -307,6 +405,7 @@ non_ascii_valid_char_p (Emchar ch) return (XCHARSET_CHARS (charset) == 96); } } +#endif /************************************************************************/ @@ -325,6 +424,10 @@ non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str) switch (REP_BYTES_BY_FIRST_BYTE (*strptr)) { /* Notice fallthrough. */ +#ifdef UTF2000 + case 6: *++strptr = *ptr++; + case 5: *++strptr = *ptr++; +#endif case 4: *++strptr = *ptr++; case 3: *++strptr = *ptr++; case 2: *++strptr = *ptr; @@ -354,6 +457,16 @@ Lstream_get_emchar_1 (Lstream *stream, int ch) switch (REP_BYTES_BY_FIRST_BYTE (ch)) { /* Notice fallthrough. */ +#ifdef UTF2000 + case 6: + ch = Lstream_getc (stream); + assert (ch >= 0); + *++strptr = (Bufbyte) ch; + case 5: + ch = Lstream_getc (stream); + assert (ch >= 0); + *++strptr = (Bufbyte) ch; +#endif case 4: ch = Lstream_getc (stream); assert (ch >= 0); @@ -492,11 +605,13 @@ make_charset (int id, Lisp_Object name, unsigned char rep_bytes, charset_by_attributes[type][final][direction] = obj; } - assert (NILP (charset_by_leading_byte[id - 128])); - charset_by_leading_byte[id - 128] = obj; + assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE])); + charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj; +#ifndef UTF2000 if (id < 0xA0) /* official leading byte */ rep_bytes_by_first_byte[id] = rep_bytes; +#endif /* Some charsets are "faux" and don't have names or really exist at all except in the leading-byte table. */ @@ -1254,7 +1369,6 @@ syms_of_mule_charset (void) defsymbol (&Qgraphic, "graphic"); defsymbol (&Qdirection, "direction"); defsymbol (&Qreverse_direction_charset, "reverse-direction-charset"); - defsymbol (&Qccl_program, "ccl-program"); defsymbol (&Qshort_name, "short-name"); defsymbol (&Qlong_name, "long-name"); @@ -1288,6 +1402,13 @@ syms_of_mule_charset (void) defsymbol (&Qchinese_big5_2, "chinese-big5-2"); defsymbol (&Qcomposite, "composite"); + +#ifdef UTF2000 + Vutf_2000_version = build_string("0.4 (Shin-Imamiya)"); + DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /* +Version number of UTF-2000. +*/ ); +#endif } void @@ -1307,6 +1428,12 @@ vars_of_mule_charset (void) next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1; next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2; + + leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1; + DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /* +Leading-code of private TYPE9N charset of column-width 1. +*/ ); + leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1; } void