#include "lstream.h"
#include "device.h"
#include "faces.h"
+#include "mule-ccl.h"
/* The various pre-defined charsets. */
#endif /* ENABLE_COMPOSITE_CHARS */
/* Table of charsets indexed by leading byte. */
-Lisp_Object charset_by_leading_byte[128];
+Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
/* Table of charsets indexed by type/final-byte/direction. */
Lisp_Object charset_by_attributes[4][128][2];
+#ifndef UTF2000
/* Table of number of bytes in the string representation of a character
indexed by the first byte of that representation.
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 0x80 - 0x8f are for Dimension-1 official charsets */
+#ifdef CHAR_IS_UCS4
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
+#else
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+#endif
/* 0x90 - 0x9d are for Dimension-2 official charsets */
/* 0x9e is for Dimension-1 private charsets */
/* 0x9f is for Dimension-2 private charsets */
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
};
+#endif
+
+Lisp_Object Vutf_2000_version;
+
+int leading_code_private_11;
Lisp_Object Qcharsetp;
Lisp_Object Qregistry, Qfinal, Qgraphic;
Lisp_Object Qdirection;
Lisp_Object Qreverse_direction_charset;
-Lisp_Object Qccl_program;
Lisp_Object Qleading_byte;
Lisp_Object Qshort_name, Qlong_name;
non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
{
Bufbyte *p;
+#ifndef UTF2000
Bufbyte lb;
int c1, c2;
Lisp_Object charset;
+#endif
p = str;
+#ifdef UTF2000
+ if ( c <= 0x7f )
+ {
+ *p++ = c;
+ }
+ else if ( c <= 0x7ff )
+ {
+ *p++ = (c >> 6) | 0xc0;
+ *p++ = (c & 0x3f) | 0x80;
+ }
+ else if ( c <= 0xffff )
+ {
+ *p++ = (c >> 12) | 0xe0;
+ *p++ = ((c >> 6) & 0x3f) | 0x80;
+ *p++ = (c & 0x3f) | 0x80;
+ }
+ else if ( c <= 0x1fffff )
+ {
+ *p++ = (c >> 18) | 0xf0;
+ *p++ = ((c >> 12) & 0x3f) | 0x80;
+ *p++ = ((c >> 6) & 0x3f) | 0x80;
+ *p++ = (c & 0x3f) | 0x80;
+ }
+ else if ( c <= 0x3ffffff )
+ {
+ *p++ = (c >> 24) | 0xf8;
+ *p++ = ((c >> 18) & 0x3f) | 0x80;
+ *p++ = ((c >> 12) & 0x3f) | 0x80;
+ *p++ = ((c >> 6) & 0x3f) | 0x80;
+ *p++ = (c & 0x3f) | 0x80;
+ }
+ else
+ {
+ *p++ = (c >> 30) | 0xfc;
+ *p++ = ((c >> 24) & 0x3f) | 0x80;
+ *p++ = ((c >> 18) & 0x3f) | 0x80;
+ *p++ = ((c >> 12) & 0x3f) | 0x80;
+ *p++ = ((c >> 6) & 0x3f) | 0x80;
+ *p++ = (c & 0x3f) | 0x80;
+ }
+#else
BREAKUP_CHAR (c, charset, c1, c2);
lb = CHAR_LEADING_BYTE (c);
if (LEADING_BYTE_PRIVATE_P (lb))
*p++ = c1 | 0x80;
if (c2)
*p++ = c2 | 0x80;
-
+#endif
return (p - str);
}
Emchar
non_ascii_charptr_emchar (CONST Bufbyte *str)
{
+#ifdef UTF2000
+ Bufbyte b;
+ Emchar ch;
+ int len;
+
+ b = *str++;
+ if ( b >= 0xfc )
+ {
+ ch = (b & 0x01);
+ len = 5;
+ }
+ else if ( b >= 0xf8 )
+ {
+ ch = b & 0x03;
+ len = 4;
+ }
+ else if ( b >= 0xf0 )
+ {
+ ch = b & 0x07;
+ len = 3;
+ }
+ else if ( b >= 0xe0 )
+ {
+ ch = b & 0x0f;
+ len = 2;
+ }
+ else if ( b >= 0xc0 )
+ {
+ ch = b & 0x1f;
+ len = 1;
+ }
+ else
+ {
+ ch = b;
+ len = 0;
+ }
+ for( ; len > 0; len-- )
+ {
+ b = *str++;
+ ch = ( ch << 6 ) | ( b & 0x3f );
+ }
+ return ch;
+#else
Bufbyte i0 = *str, i1, i2 = 0;
Lisp_Object charset;
i2 = *++str & 0x7F;
return MAKE_CHAR (charset, i1, i2);
+#endif
}
/* Return whether CH is a valid Emchar, assuming it's non-ASCII.
Do not call this directly. Use the macro valid_char_p() instead. */
+#ifndef UTF2000
int
non_ascii_valid_char_p (Emchar ch)
{
return (XCHARSET_CHARS (charset) == 96);
}
}
+#endif
\f
/************************************************************************/
switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
{
/* Notice fallthrough. */
+#ifdef UTF2000
+ case 6: *++strptr = *ptr++;
+ case 5: *++strptr = *ptr++;
+#endif
case 4: *++strptr = *ptr++;
case 3: *++strptr = *ptr++;
case 2: *++strptr = *ptr;
switch (REP_BYTES_BY_FIRST_BYTE (ch))
{
/* Notice fallthrough. */
+#ifdef UTF2000
+ case 6:
+ ch = Lstream_getc (stream);
+ assert (ch >= 0);
+ *++strptr = (Bufbyte) ch;
+ case 5:
+ ch = Lstream_getc (stream);
+ assert (ch >= 0);
+ *++strptr = (Bufbyte) ch;
+#endif
case 4:
ch = Lstream_getc (stream);
assert (ch >= 0);
write_c_string (buf, printcharfun);
}
+static const struct lrecord_description charset_description[] = {
+ { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
+ { XD_END }
+};
+
DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
- mark_charset, print_charset, 0, 0, 0,
+ mark_charset, print_charset, 0, 0, 0, charset_description,
struct Lisp_Charset);
/* Make a new charset. */
charset_by_attributes[type][final][direction] = obj;
}
- assert (NILP (charset_by_leading_byte[id - 128]));
- charset_by_leading_byte[id - 128] = obj;
+ assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
+ charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
+#ifndef UTF2000
if (id < 0xA0)
/* official leading byte */
rep_bytes_by_first_byte[id] = rep_bytes;
+#endif
/* Some charsets are "faux" and don't have names or really exist at
all except in the leading-byte table. */
/************************************************************************/
DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
-Make a multi-byte character from CHARSET and octets ARG1 and ARG2.
+Make a character from CHARSET and octets ARG1 and ARG2.
+ARG2 is required only for characters from two-dimensional charsets.
+For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
+character s with caron.
*/
(charset, arg1, arg2))
{
(CHAR_LEADING_BYTE (XCHAR (ch))));
}
-DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
-Return the octet numbered N (should be 0 or 1) of char CH.
-N defaults to 0 if omitted.
+DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
+Return list of charset and one or two position-codes of CHAR.
*/
- (ch, n))
+ (character))
{
- Lisp_Object charset;
- int c1, c2, int_n;
+ /* This function can GC */
+ struct gcpro gcpro1, gcpro2;
+ Lisp_Object charset = Qnil;
+ Lisp_Object rc = Qnil;
+ int c1, c2;
- CHECK_CHAR_COERCE_INT (ch);
- if (NILP (n))
- int_n = 0;
+ GCPRO2 (charset, rc);
+ CHECK_CHAR_COERCE_INT (character);
+
+ BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
+
+ if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
+ {
+ rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
+ }
else
{
- CHECK_INT (n);
- int_n = XINT (n);
- if (int_n != 0 && int_n != 1)
- signal_simple_error ("Octet number must be 0 or 1", n);
+ rc = list2 (XCHARSET_NAME (charset), make_int (c1));
}
- BREAKUP_CHAR (XCHAR (ch), charset, c1, c2);
- return make_int (int_n == 0 ? c1 : c2);
+ UNGCPRO;
+
+ return rc;
}
\f
DEFSUBR (Fmake_char);
DEFSUBR (Fchar_charset);
- DEFSUBR (Fchar_octet);
+ DEFSUBR (Fsplit_char);
#ifdef ENABLE_COMPOSITE_CHARS
DEFSUBR (Fmake_composite_char);
defsymbol (&Qgraphic, "graphic");
defsymbol (&Qdirection, "direction");
defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
- defsymbol (&Qccl_program, "ccl-program");
defsymbol (&Qshort_name, "short-name");
defsymbol (&Qlong_name, "long-name");
defsymbol (&Qchinese_big5_2, "chinese-big5-2");
defsymbol (&Qcomposite, "composite");
+
+#ifdef UTF2000
+ Vutf_2000_version = build_string("0.4 (Shin-Imamiya)");
+ DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
+Version number of UTF-2000.
+*/ );
+#endif
+
+ leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
+ DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
+Leading-code of private TYPE9N charset of column-width 1.
+*/ );
+ leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
}
void