X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Fmule-charset.c;h=c54162853b7f9cc07194751bd2ea52d13d827ae0;hb=63a686a3d18465a8c96b8cc4a273c295f8a5a379;hp=b0a60fa830416f93ac39e8d00a05f3dc01374dcf;hpb=afa9772e3fcbb4e80e3e4cfd1a40b4fccc6d08b8;p=chise%2Fxemacs-chise.git.1 diff --git a/src/mule-charset.c b/src/mule-charset.c index b0a60fa..c541628 100644 --- a/src/mule-charset.c +++ b/src/mule-charset.c @@ -77,12 +77,12 @@ static int composite_char_col_next; struct charset_lookup *chlook; static const struct lrecord_description charset_lookup_description_1[] = { - { XD_LISP_OBJECT, offsetof(struct charset_lookup, charset_by_leading_byte), 128+4*128*2 }, + { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte), 128+4*128*2 }, { XD_END } }; static const struct struct_description charset_lookup_description = { - sizeof(struct charset_lookup), + sizeof (struct charset_lookup), charset_lookup_description_1 }; @@ -92,9 +92,9 @@ static const struct struct_description charset_lookup_description = { rep_bytes_by_first_byte(c) is more efficient than the equivalent canonical computation: - (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */ + XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */ -Bytecount rep_bytes_by_first_byte[0xA0] = +const Bytecount rep_bytes_by_first_byte[0xA0] = { /* 0x00 - 0x7f are for straight ASCII */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -150,9 +150,6 @@ Lisp_Object Ql2r, Qr2l; Lisp_Object Vcharset_hash_table; -static Bufbyte next_allocated_1_byte_leading_byte; -static Bufbyte next_allocated_2_byte_leading_byte; - /* Composite characters are characters constructed by overstriking two or more regular characters. @@ -220,7 +217,7 @@ non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c) Use the macro charptr_emchar() instead. */ Emchar -non_ascii_charptr_emchar (CONST Bufbyte *str) +non_ascii_charptr_emchar (const Bufbyte *str) { Bufbyte i0 = *str, i1, i2 = 0; Lisp_Object charset; @@ -325,26 +322,18 @@ non_ascii_valid_char_p (Emchar ch) /* Basic string functions */ /************************************************************************/ -/* Copy the character pointed to by PTR into STR, assuming it's - non-ASCII. Do not call this directly. Use the macro - charptr_copy_char() instead. */ +/* Copy the character pointed to by SRC into DST. Do not call this + directly. Use the macro charptr_copy_char() instead. + Return the number of bytes copied. */ Bytecount -non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str) +non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst) { - Bufbyte *strptr = str; - *strptr = *ptr++; - switch (REP_BYTES_BY_FIRST_BYTE (*strptr)) - { - /* Notice fallthrough. */ - case 4: *++strptr = *ptr++; - case 3: *++strptr = *ptr++; - case 2: *++strptr = *ptr; - break; - default: - abort (); - } - return strptr + 1 - str; + unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src); + unsigned int i; + for (i = bytes; i; i--, dst++, src++) + *dst = *src; + return bytes; } @@ -361,26 +350,15 @@ Lstream_get_emchar_1 (Lstream *stream, int ch) { Bufbyte str[MAX_EMCHAR_LEN]; Bufbyte *strptr = str; + unsigned int bytes; str[0] = (Bufbyte) ch; - switch (REP_BYTES_BY_FIRST_BYTE (ch)) + + for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--) { - /* Notice fallthrough. */ - case 4: - ch = Lstream_getc (stream); - assert (ch >= 0); - *++strptr = (Bufbyte) ch; - case 3: - ch = Lstream_getc (stream); - assert (ch >= 0); - *++strptr = (Bufbyte) ch; - case 2: - ch = Lstream_getc (stream); - assert (ch >= 0); - *++strptr = (Bufbyte) ch; - break; - default: - abort (); + int c = Lstream_getc (stream); + bufpos_checking_assert (c >= 0); + *++strptr = (Bufbyte) c; } return charptr_emchar (str); } @@ -409,7 +387,7 @@ Lstream_funget_emchar (Lstream *stream, Emchar ch) static Lisp_Object mark_charset (Lisp_Object obj) { - struct Lisp_Charset *cs = XCHARSET (obj); + Lisp_Charset *cs = XCHARSET (obj); mark_object (cs->short_name); mark_object (cs->long_name); @@ -422,7 +400,7 @@ mark_charset (Lisp_Object obj) static void print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag) { - struct Lisp_Charset *cs = XCHARSET (obj); + Lisp_Charset *cs = XCHARSET (obj); char buf[200]; if (print_readably) @@ -454,15 +432,22 @@ print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag) } static const struct lrecord_description charset_description[] = { - { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 }, + { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) }, + { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) }, + { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) }, + { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) }, + { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) }, + { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) }, + { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) }, { XD_END } }; DEFINE_LRECORD_IMPLEMENTATION ("charset", charset, mark_charset, print_charset, 0, 0, 0, charset_description, - struct Lisp_Charset); -/* Make a new charset. */ + Lisp_Charset); +/* Make a new charset. */ +/* #### SJT Should generic properties be allowed? */ static Lisp_Object make_charset (int id, Lisp_Object name, unsigned char rep_bytes, unsigned char type, unsigned char columns, unsigned char graphic, @@ -471,8 +456,10 @@ make_charset (int id, Lisp_Object name, unsigned char rep_bytes, Lisp_Object reg) { Lisp_Object obj; - struct Lisp_Charset *cs = - alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset); + Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset); + + zero_lcrecord (cs); + XSETCHARSET (obj, cs); CHARSET_ID (cs) = id; @@ -506,9 +493,6 @@ make_charset (int id, Lisp_Object name, unsigned char rep_bytes, assert (NILP (chlook->charset_by_leading_byte[id - 128])); chlook->charset_by_leading_byte[id - 128] = obj; - if (id < 0xA0) - /* official leading byte */ - rep_bytes_by_first_byte[id] = rep_bytes; /* Some charsets are "faux" and don't have names or really exist at all except in the leading-byte table. */ @@ -524,17 +508,24 @@ get_unallocated_leading_byte (int dimension) if (dimension == 1) { - if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1) + if (chlook->next_allocated_1_byte_leading_byte > + MAX_LEADING_BYTE_PRIVATE_1) lb = 0; else - lb = next_allocated_1_byte_leading_byte++; + lb = chlook->next_allocated_1_byte_leading_byte++; } else { - if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2) - lb = 0; + /* awfully fragile, but correct */ +#if MAX_LEADING_BYTE_PRIVATE_2 == 255 + if (chlook->next_allocated_2_byte_leading_byte == 0) +#else + if (chlook->next_allocated_2_byte_leading_byte > + MAX_LEADING_BYTE_PRIVATE_2) +#endif + lb = 0; else - lb = next_allocated_2_byte_leading_byte++; + lb = chlook->next_allocated_2_byte_leading_byte++; } if (!lb) @@ -627,13 +618,14 @@ Return a list of the names of all defined charsets. } DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /* -Return the name of the given charset. +Return the name of charset CHARSET. */ (charset)) { return XCHARSET_NAME (Fget_charset (charset)); } +/* #### SJT Should generic properties be allowed? */ DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /* Define a new character set. This function is for use with Mule support. @@ -688,7 +680,6 @@ character set. Recognized properties are: int type; Lisp_Object registry = Qnil; Lisp_Object charset; - Lisp_Object rest, keyword, value; Lisp_Object ccl_program = Qnil; Lisp_Object short_name = Qnil, long_name = Qnil; @@ -700,85 +691,90 @@ character set. Recognized properties are: if (!NILP (charset)) signal_simple_error ("Cannot redefine existing charset", name); - EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props) - { - if (EQ (keyword, Qshort_name)) - { - CHECK_STRING (value); - short_name = value; - } - - if (EQ (keyword, Qlong_name)) - { - CHECK_STRING (value); - long_name = value; - } - - else if (EQ (keyword, Qdimension)) - { - CHECK_INT (value); - dimension = XINT (value); - if (dimension < 1 || dimension > 2) - signal_simple_error ("Invalid value for 'dimension", value); - } - - else if (EQ (keyword, Qchars)) - { - CHECK_INT (value); - chars = XINT (value); - if (chars != 94 && chars != 96) - signal_simple_error ("Invalid value for 'chars", value); - } - - else if (EQ (keyword, Qcolumns)) - { - CHECK_INT (value); - columns = XINT (value); - if (columns != 1 && columns != 2) - signal_simple_error ("Invalid value for 'columns", value); - } - - else if (EQ (keyword, Qgraphic)) - { - CHECK_INT (value); - graphic = XINT (value); - if (graphic < 0 || graphic > 1) - signal_simple_error ("Invalid value for 'graphic", value); - } - - else if (EQ (keyword, Qregistry)) - { - CHECK_STRING (value); - registry = value; - } - - else if (EQ (keyword, Qdirection)) - { - if (EQ (value, Ql2r)) - direction = CHARSET_LEFT_TO_RIGHT; - else if (EQ (value, Qr2l)) - direction = CHARSET_RIGHT_TO_LEFT; - else - signal_simple_error ("Invalid value for 'direction", value); - } - - else if (EQ (keyword, Qfinal)) - { - CHECK_CHAR_COERCE_INT (value); - final = XCHAR (value); - if (final < '0' || final > '~') - signal_simple_error ("Invalid value for 'final", value); - } - - else if (EQ (keyword, Qccl_program)) - { - CHECK_VECTOR (value); - ccl_program = value; - } - - else - signal_simple_error ("Unrecognized property", keyword); - } + { + EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props) + { + if (EQ (keyword, Qshort_name)) + { + CHECK_STRING (value); + short_name = value; + } + + else if (EQ (keyword, Qlong_name)) + { + CHECK_STRING (value); + long_name = value; + } + + else if (EQ (keyword, Qdimension)) + { + CHECK_INT (value); + dimension = XINT (value); + if (dimension < 1 || dimension > 2) + signal_simple_error ("Invalid value for 'dimension", value); + } + + else if (EQ (keyword, Qchars)) + { + CHECK_INT (value); + chars = XINT (value); + if (chars != 94 && chars != 96) + signal_simple_error ("Invalid value for 'chars", value); + } + + else if (EQ (keyword, Qcolumns)) + { + CHECK_INT (value); + columns = XINT (value); + if (columns != 1 && columns != 2) + signal_simple_error ("Invalid value for 'columns", value); + } + + else if (EQ (keyword, Qgraphic)) + { + CHECK_INT (value); + graphic = XINT (value); + if (graphic < 0 || graphic > 1) + signal_simple_error ("Invalid value for 'graphic", value); + } + + else if (EQ (keyword, Qregistry)) + { + CHECK_STRING (value); + registry = value; + } + + else if (EQ (keyword, Qdirection)) + { + if (EQ (value, Ql2r)) + direction = CHARSET_LEFT_TO_RIGHT; + else if (EQ (value, Qr2l)) + direction = CHARSET_RIGHT_TO_LEFT; + else + signal_simple_error ("Invalid value for 'direction", value); + } + + else if (EQ (keyword, Qfinal)) + { + CHECK_CHAR_COERCE_INT (value); + final = XCHAR (value); + if (final < '0' || final > '~') + signal_simple_error ("Invalid value for 'final", value); + } + + else if (EQ (keyword, Qccl_program)) + { + struct ccl_program test_ccl; + + if (setup_ccl_program (&test_ccl, value) < 0) + signal_simple_error ("Invalid value for 'ccl-program", value); + ccl_program = value; + } + + else + signal_simple_error ("Unrecognized property", keyword); + } + } if (!final) error ("'final must be specified"); @@ -831,7 +827,7 @@ NEW-NAME is the name of the new charset. Return the new charset. int id, dimension, columns, graphic, final; int direction, type; Lisp_Object registry, doc_string, short_name, long_name; - struct Lisp_Charset *cs; + Lisp_Charset *cs; charset = Fget_charset (charset); if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset))) @@ -974,13 +970,13 @@ Return dimension of CHARSET. } DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /* -Return property PROP of CHARSET. +Return property PROP of CHARSET, a charset object or symbol naming a charset. Recognized properties are those listed in `make-charset', as well as 'name and 'doc-string. */ (charset, prop)) { - struct Lisp_Charset *cs; + Lisp_Charset *cs; charset = Fget_charset (charset); cs = XCHARSET (charset); @@ -1002,10 +998,8 @@ Recognized properties are those listed in `make-charset', as well as if (EQ (prop, Qreverse_direction_charset)) { Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs); - if (NILP (obj)) - return Qnil; - else - return XCHARSET_NAME (obj); + /* #### Is this translation OK? If so, error checking sufficient? */ + return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj; } signal_simple_error ("Unrecognized charset property name", prop); return Qnil; /* not reached */ @@ -1027,8 +1021,11 @@ Set the 'ccl-program property of CHARSET to CCL-PROGRAM. */ (charset, ccl_program)) { + struct ccl_program test_ccl; + charset = Fget_charset (charset); - CHECK_VECTOR (ccl_program); + if (setup_ccl_program (&test_ccl, ccl_program) < 0) + signal_simple_error ("Invalid ccl-program", ccl_program); XCHARSET_CCL_PROGRAM (charset) = ccl_program; return Qnil; } @@ -1074,7 +1071,7 @@ character s with caron. */ (charset, arg1, arg2)) { - struct Lisp_Charset *cs; + Lisp_Charset *cs; int a1, a2; int lowlim, highlim; @@ -1088,7 +1085,7 @@ character s with caron. CHECK_INT (arg1); /* It is useful (and safe, according to Olivier Galibert) to strip - the 8th bit off ARG1 and ARG2 becaue it allows programmers to + the 8th bit off ARG1 and ARG2 because it allows programmers to write (make-char 'latin-iso8859-2 CODE) where code is the actual Latin 2 code of the character. */ a1 = XINT (arg1) & 0x7f; @@ -1112,18 +1109,39 @@ character s with caron. } DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /* -Return the character set of char CH. +Return the character set of CHARACTER. */ - (ch)) + (character)) { - CHECK_CHAR_COERCE_INT (ch); + CHECK_CHAR_COERCE_INT (character); return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE - (CHAR_LEADING_BYTE (XCHAR (ch)))); + (CHAR_LEADING_BYTE (XCHAR (character)))); +} + +DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /* +Return the octet numbered N (should be 0 or 1) of CHARACTER. +N defaults to 0 if omitted. +*/ + (character, n)) +{ + Lisp_Object charset; + int octet0, octet1; + + CHECK_CHAR_COERCE_INT (character); + + BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1); + + if (NILP (n) || EQ (n, Qzero)) + return make_int (octet0); + else if (EQ (n, make_int (1))) + return make_int (octet1); + else + signal_simple_error ("Octet number must be 0 or 1", n); } DEFUN ("split-char", Fsplit_char, 1, 1, 0, /* -Return list of charset and one or two position-codes of CHAR. +Return list of charset and one or two position-codes of CHARACTER. */ (character)) { @@ -1233,6 +1251,8 @@ Return a string of the characters comprising a composite character. void syms_of_mule_charset (void) { + INIT_LRECORD_IMPLEMENTATION (charset); + DEFSUBR (Fcharsetp); DEFSUBR (Ffind_charset); DEFSUBR (Fget_charset); @@ -1253,6 +1273,7 @@ syms_of_mule_charset (void) DEFSUBR (Fmake_char); DEFSUBR (Fchar_charset); + DEFSUBR (Fchar_octet); DEFSUBR (Fsplit_char); #ifdef ENABLE_COMPOSITE_CHARS @@ -1306,8 +1327,8 @@ vars_of_mule_charset (void) { int i, j, k; - chlook = xnew (struct charset_lookup); - dumpstruct (&chlook, &charset_lookup_description); + chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */ + dump_add_root_struct_ptr (&chlook, &charset_lookup_description); /* Table of charsets indexed by leading byte. */ for (i = 0; i < countof (chlook->charset_by_leading_byte); i++) @@ -1319,8 +1340,8 @@ vars_of_mule_charset (void) for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++) chlook->charset_by_attributes[i][j][k] = Qnil; - next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1; - next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2; + chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1; + chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2; } void