X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Fmule-charset.c;h=324a6100c11fd6df1bc446118f4a6b9c7c2e971e;hb=e66d0a27c2fd4cdf439328ae014419a38fb99152;hp=db6b2b8d97f7e77868b225969a3059a16507e67b;hpb=37b3644d18eac02385208725715ab8e9af354b74;p=chise%2Fxemacs-chise.git- diff --git a/src/mule-charset.c b/src/mule-charset.c index db6b2b8..324a610 100644 --- a/src/mule-charset.c +++ b/src/mule-charset.c @@ -59,6 +59,7 @@ Lisp_Object Vcharset_chinese_cns11643_1; Lisp_Object Vcharset_chinese_cns11643_2; #ifdef UTF2000 Lisp_Object Vcharset_ucs_bmp; +Lisp_Object Vcharset_latin_viscii; Lisp_Object Vcharset_latin_viscii_lower; Lisp_Object Vcharset_latin_viscii_upper; Lisp_Object Vcharset_hiragana_jisx0208; @@ -124,77 +125,428 @@ Bytecount rep_bytes_by_first_byte[0xA0] = #endif #ifdef UTF2000 -Emchar_to_byte_table* -make_byte_from_character_table () +static Lisp_Object +mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object)) { - Emchar_to_byte_table* table - = (Emchar_to_byte_table*) xmalloc (sizeof (Emchar_to_byte_table)); + struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj); + int i; - table->base = NULL; - return table; + for (i = 0; i < 256; i++) + { + markobj (cte->property[i]); + } + return Qnil; } -#define destroy_byte_from_character_table(table) xfree(table) +static int +char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth) +{ + struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1); + struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2); + int i; + + for (i = 0; i < 256; i++) + if (CHAR_BYTE_TABLE_P (cte1->property[i])) + { + if (CHAR_BYTE_TABLE_P (cte2->property[i])) + { + if (!char_byte_table_equal (cte1->property[i], + cte2->property[i], depth + 1)) + return 0; + } + else + return 0; + } + else + if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1)) + return 0; + return 1; +} -void -put_byte_from_character_table (Emchar ch, unsigned char val, - Emchar_to_byte_table* table) +static unsigned long +char_byte_table_hash (Lisp_Object obj, int depth) { - if (table->base == NULL) + struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj); + + return internal_array_hash (cte->property, 256, depth); +} + +static const struct lrecord_description char_byte_table_description[] = { + { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 }, + { XD_END } +}; + +DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_byte_table, + mark_char_byte_table, + internal_object_printer, + 0, char_byte_table_equal, + char_byte_table_hash, + char_byte_table_description, + struct Lisp_Char_Byte_Table); + + +static Lisp_Object +make_char_byte_table (Lisp_Object initval) +{ + Lisp_Object obj; + int i; + struct Lisp_Char_Byte_Table *cte = + alloc_lcrecord_type (struct Lisp_Char_Byte_Table, + &lrecord_char_byte_table); + + for (i = 0; i < 256; i++) + cte->property[i] = initval; + + XSETCHAR_BYTE_TABLE (obj, cte); + return obj; +} + +static Lisp_Object +copy_char_byte_table (Lisp_Object entry) +{ + struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry); + Lisp_Object obj; + int i; + struct Lisp_Char_Byte_Table *ctenew = + alloc_lcrecord_type (struct Lisp_Char_Byte_Table, + &lrecord_char_byte_table); + + for (i = 0; i < 256; i++) { - table->base = xmalloc (256); - table->offset = ch - (ch % 256); - table->size = 256; - table->base[ch - table->offset] = val; + Lisp_Object new = cte->property[i]; + if (CHAR_BYTE_TABLE_P (new)) + ctenew->property[i] = copy_char_byte_table (new); + else + ctenew->property[i] = new; } + + XSETCHAR_BYTE_TABLE (obj, ctenew); + return obj; +} + +#define make_char_code_table(initval) make_char_byte_table(initval) + +Lisp_Object +get_char_code_table (Emchar ch, Lisp_Object table) +{ + struct Lisp_Char_Byte_Table* cpt = XCHAR_BYTE_TABLE (table); + Lisp_Object ret = cpt->property [ch >> 24]; + + if (CHAR_BYTE_TABLE_P (ret)) + cpt = XCHAR_BYTE_TABLE (ret); else - { - int i = ch - table->offset; + return ret; + + ret = cpt->property [(unsigned char) (ch >> 16)]; + if (CHAR_BYTE_TABLE_P (ret)) + cpt = XCHAR_BYTE_TABLE (ret); + else + return ret; - if (i < 0) + ret = cpt->property [(unsigned char) (ch >> 8)]; + if (CHAR_BYTE_TABLE_P (ret)) + cpt = XCHAR_BYTE_TABLE (ret); + else + return ret; + + return cpt->property [(unsigned char) ch]; +} + +void +put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table) +{ + struct Lisp_Char_Byte_Table* cpt1 = XCHAR_BYTE_TABLE (table); + Lisp_Object ret = cpt1->property[ch >> 24]; + + if (CHAR_BYTE_TABLE_P (ret)) + { + struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret); + + ret = cpt2->property[(unsigned char)(ch >> 16)]; + if (CHAR_BYTE_TABLE_P (ret)) { - size_t new_size = table->size - i; - size_t j; - - new_size += 256 - (new_size % 256); - table->base = xrealloc (table->base, new_size); - memmove (table->base + (new_size - table->size), table->base, - table->size); - for (j = 0; j < (new_size - table->size); j++) - table->base[j] = 0; - table->offset -= (new_size - table->size); - table->base[ch - table->offset] = val; - table->size = new_size; + struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret); + + ret = cpt3->property[(unsigned char)(ch >> 8)]; + if (CHAR_BYTE_TABLE_P (ret)) + { + struct Lisp_Char_Byte_Table* cpt4 + = XCHAR_BYTE_TABLE (ret); + + cpt4->property[(unsigned char)ch] = value; + } + else if (!EQ (ret, value)) + { + Lisp_Object cpt4 = make_char_byte_table (ret); + + XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value; + cpt3->property[(unsigned char)(ch >> 8)] = cpt4; + } } - else if (i >= table->size) + else if (!EQ (ret, value)) + { + Lisp_Object cpt3 = make_char_byte_table (ret); + Lisp_Object cpt4 = make_char_byte_table (ret); + + XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value; + XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)] + = cpt4; + cpt2->property[(unsigned char)(ch >> 16)] = cpt3; + } + } + else if (!EQ (ret, value)) + { + Lisp_Object cpt2 = make_char_byte_table (ret); + Lisp_Object cpt3 = make_char_byte_table (ret); + Lisp_Object cpt4 = make_char_byte_table (ret); + + XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value; + XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)] = cpt4; + XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(ch >> 16)] = cpt3; + cpt1->property[(unsigned char)(ch >> 24)] = cpt2; + } +} + + +Lisp_Object Vcharacter_attribute_table; + +DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /* +Return the alist of attributes of CHARACTER. +*/ + (character)) +{ + return get_char_code_table (XCHAR (character), Vcharacter_attribute_table); +} + +DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /* +Return the value of CHARACTER's ATTRIBUTE. +*/ + (character, attribute)) +{ + Lisp_Object ret + = get_char_code_table (XCHAR (character), Vcharacter_attribute_table); + Lisp_Object ccs; + + if (EQ (ret, Qnil)) + return Qnil; + + if (!NILP (ccs = Ffind_charset (attribute))) + attribute = ccs; + + return Fcdr (Fassq (attribute, ret)); +} + +Lisp_Object +put_char_attribute (Lisp_Object character, Lisp_Object attribute, + Lisp_Object value) +{ + Emchar char_code = XCHAR (character); + Lisp_Object ret + = get_char_code_table (char_code, Vcharacter_attribute_table); + Lisp_Object cell; + + cell = Fassq (attribute, ret); + + if (NILP (cell)) + { + ret = Fcons (Fcons (attribute, value), ret); + } + else if (!EQ (Fcdr (cell), value)) + { + Fsetcdr (cell, value); + } + put_char_code_table (char_code, ret, Vcharacter_attribute_table); + return ret; +} + +DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /* +Store CHARACTER's ATTRIBUTE with VALUE. +*/ + (character, attribute, value)) +{ + Lisp_Object ccs; + + ccs = Ffind_charset (attribute); + if (!NILP (ccs)) + { + Lisp_Object rest; + Lisp_Object v = XCHARSET_DECODING_TABLE (ccs); + Lisp_Object nv; + int i = -1; + int ccs_len; + + /* ad-hoc method for `ascii' */ + if ((XCHARSET_CHARS (ccs) == 94) && + (XCHARSET_BYTE_OFFSET (ccs) != 33)) + ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs); + else + ccs_len = XCHARSET_CHARS (ccs); + + if (!CONSP (value)) + signal_simple_error ("Invalid value for coded-charset", + value); + + attribute = ccs; + rest = Fget_char_attribute (character, attribute); + if (VECTORP (v)) { - size_t new_size = i + 1; - size_t j; - - new_size += 256 - (new_size % 256); - table->base = xrealloc (table->base, new_size); - for (j = table->size; j < new_size; j++) - table->base[j] = 0; - table->base[i] = val; - table->size = new_size; + if (!NILP (rest)) + { + while (!NILP (rest)) + { + Lisp_Object ei = Fcar (rest); + + i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs); + nv = XVECTOR_DATA(v)[i]; + if (!VECTORP (nv)) + break; + v = nv; + rest = Fcdr (rest); + } + if (i >= 0) + XVECTOR_DATA(v)[i] = Qnil; + v = XCHARSET_DECODING_TABLE (ccs); + } } else { - table->base[i] = val; + XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil); + } + + rest = value; + i = -1; + while (CONSP (rest)) + { + Lisp_Object ei = Fcar (rest); + + if (!INTP (ei)) + signal_simple_error ("Invalid value for coded-charset", + value); + i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs); + nv = XVECTOR_DATA(v)[i]; + rest = Fcdr (rest); + if (CONSP (rest)) + { + if (!VECTORP (nv)) + { + nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil)); + } + v = nv; + } + else + break; } + XVECTOR_DATA(v)[i] = character; } + return put_char_attribute (character, attribute, value); } -unsigned char -get_byte_from_character_table (Emchar ch, Emchar_to_byte_table* table) +Lisp_Object Qucs; + +DEFUN ("define-char", Fdefine_char, 1, 1, 0, /* +Store character's ATTRIBUTES. +*/ + (attributes)) { - size_t i = ch - table->offset; - if (i < table->size) - return table->base[i]; + Lisp_Object rest = attributes; + Lisp_Object code = Fcdr (Fassq (Qucs, attributes)); + Lisp_Object character; + + if (NILP (code)) + { + while (CONSP (rest)) + { + Lisp_Object cell = Fcar (rest); + Lisp_Object ccs; + + if (!LISTP (cell)) + signal_simple_error ("Invalid argument", attributes); + if (!NILP (ccs = Ffind_charset (Fcar (cell))) + && XCHARSET_FINAL (ccs)) + { + Emchar code; + + if (XCHARSET_DIMENSION (ccs) == 1) + { + Lisp_Object eb1 = Fcar (Fcdr (cell)); + int b1; + + if (!INTP (eb1)) + signal_simple_error ("Invalid argument", attributes); + b1 = XINT (eb1); + switch (XCHARSET_CHARS (ccs)) + { + case 94: + code = MIN_CHAR_94 + + (XCHARSET_FINAL (ccs) - '0') * 94 + (b1 - 33); + break; + case 96: + code = MIN_CHAR_96 + + (XCHARSET_FINAL (ccs) - '0') * 96 + (b1 - 32); + break; + default: + abort (); + } + } + else if (XCHARSET_DIMENSION (ccs) == 2) + { + Lisp_Object eb1 = Fcar (Fcdr (cell)); + Lisp_Object eb2 = Fcar (Fcdr (Fcdr (cell))); + int b1, b2; + + if (!INTP (eb1)) + signal_simple_error ("Invalid argument", attributes); + b1 = XINT (eb1); + if (!INTP (eb2)) + signal_simple_error ("Invalid argument", attributes); + b2 = XINT (eb2); + switch (XCHARSET_CHARS (ccs)) + { + case 94: + code = MIN_CHAR_94x94 + + (XCHARSET_FINAL (ccs) - '0') * 94 * 94 + + (b1 - 33) * 94 + (b2 - 33); + break; + case 96: + code = MIN_CHAR_96x96 + + (XCHARSET_FINAL (ccs) - '0') * 96 * 96 + + (b1 - 32) * 96 + (b2 - 32); + break; + default: + abort (); + } + } + else + { + rest = Fcdr (rest); + continue; + } + character = make_char (code); + goto setup_attributes; + } + rest = Fcdr (rest); + } + return Qnil; + } + else if (!INTP (code)) + signal_simple_error ("Invalid argument", attributes); else - return 0; -} + character = make_char (XINT (code)); + + setup_attributes: + rest = attributes; + while (CONSP (rest)) + { + Lisp_Object cell = Fcar (rest); + if (!LISTP (cell)) + signal_simple_error ("Invalid argument", attributes); + Fput_char_attribute (character, Fcar (cell), Fcdr (cell)); + rest = Fcdr (rest); + } + return + get_char_code_table (XCHAR (character), Vcharacter_attribute_table); +} Lisp_Object Vutf_2000_version; #endif @@ -235,6 +587,7 @@ Lisp_Object Qascii, Qchinese_cns11643_2, #ifdef UTF2000 Qucs_bmp, + Qlatin_viscii, Qlatin_viscii_lower, Qlatin_viscii_upper, Qvietnamese_viscii_lower, @@ -250,8 +603,12 @@ Lisp_Object Ql2r, Qr2l; Lisp_Object Vcharset_hash_table; +#ifdef UTF2000 +static Charset_ID next_allocated_leading_byte; +#else static Charset_ID next_allocated_1_byte_leading_byte; static Charset_ID next_allocated_2_byte_leading_byte; +#endif /* Composite characters are characters constructed by overstriking two or more regular characters. @@ -655,6 +1012,9 @@ print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag) static const struct lrecord_description charset_description[] = { { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 }, +#ifdef UTF2000 + { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 }, +#endif { XD_END } }; @@ -662,6 +1022,7 @@ DEFINE_LRECORD_IMPLEMENTATION ("charset", charset, mark_charset, print_charset, 0, 0, 0, charset_description, struct Lisp_Charset); + /* Make a new charset. */ static Lisp_Object @@ -693,91 +1054,47 @@ make_charset (Charset_ID id, Lisp_Object name, CHARSET_CCL_PROGRAM (cs) = Qnil; CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil; #ifdef UTF2000 - CHARSET_DECODING_TABLE(cs) = decoding_table; + CHARSET_DECODING_TABLE(cs) = Qnil; CHARSET_UCS_MIN(cs) = ucs_min; CHARSET_UCS_MAX(cs) = ucs_max; CHARSET_CODE_OFFSET(cs) = code_offset; CHARSET_BYTE_OFFSET(cs) = byte_offset; #endif - - switch ( CHARSET_TYPE (cs) ) + + switch (CHARSET_TYPE (cs)) { case CHARSET_TYPE_94: CHARSET_DIMENSION (cs) = 1; CHARSET_CHARS (cs) = 94; -#ifdef UTF2000 - if (!EQ (decoding_table, Qnil)) - { - size_t i; - CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table(); - for (i = 0; i < 94; i++) - { - Lisp_Object c = XVECTOR_DATA(decoding_table)[i]; - - if (!EQ (c, Qnil)) - put_byte_from_character_table (XCHAR (c), i + 33, - CHARSET_TO_BYTE1_TABLE(cs)); - } - } - else - CHARSET_TO_BYTE1_TABLE(cs) = NULL; - CHARSET_TO_BYTE2_TABLE(cs) = NULL; -#endif break; case CHARSET_TYPE_96: CHARSET_DIMENSION (cs) = 1; CHARSET_CHARS (cs) = 96; -#ifdef UTF2000 - if (!EQ (decoding_table, Qnil)) - { - size_t i; - CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table(); - for (i = 0; i < 96; i++) - { - Lisp_Object c = XVECTOR_DATA(decoding_table)[i]; - - if (!EQ (c, Qnil)) - put_byte_from_character_table (XCHAR (c), i + 32, - CHARSET_TO_BYTE1_TABLE(cs)); - } - } - else - CHARSET_TO_BYTE1_TABLE(cs) = NULL; - CHARSET_TO_BYTE2_TABLE(cs) = NULL; -#endif break; case CHARSET_TYPE_94X94: CHARSET_DIMENSION (cs) = 2; CHARSET_CHARS (cs) = 94; -#ifdef UTF2000 - CHARSET_TO_BYTE1_TABLE(cs) = NULL; - CHARSET_TO_BYTE2_TABLE(cs) = NULL; -#endif break; case CHARSET_TYPE_96X96: CHARSET_DIMENSION (cs) = 2; CHARSET_CHARS (cs) = 96; -#ifdef UTF2000 - CHARSET_TO_BYTE1_TABLE(cs) = NULL; - CHARSET_TO_BYTE2_TABLE(cs) = NULL; -#endif break; #ifdef UTF2000 + case CHARSET_TYPE_128: + CHARSET_DIMENSION (cs) = 1; + CHARSET_CHARS (cs) = 128; + break; case CHARSET_TYPE_128X128: CHARSET_DIMENSION (cs) = 2; CHARSET_CHARS (cs) = 128; -#ifdef UTF2000 - CHARSET_TO_BYTE1_TABLE(cs) = NULL; - CHARSET_TO_BYTE2_TABLE(cs) = NULL; -#endif + break; + case CHARSET_TYPE_256: + CHARSET_DIMENSION (cs) = 1; + CHARSET_CHARS (cs) = 256; break; case CHARSET_TYPE_256X256: CHARSET_DIMENSION (cs) = 2; CHARSET_CHARS (cs) = 256; -#ifdef UTF2000 - CHARSET_TO_BYTE1_TABLE(cs) = NULL; - CHARSET_TO_BYTE2_TABLE(cs) = NULL; -#endif break; #endif } @@ -828,6 +1145,12 @@ get_unallocated_leading_byte (int dimension) { Charset_ID lb; +#ifdef UTF2000 + if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE) + lb = 0; + else + lb = next_allocated_leading_byte++; +#else if (dimension == 1) { if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1) @@ -842,6 +1165,7 @@ get_unallocated_leading_byte (int dimension) else lb = next_allocated_2_byte_leading_byte++; } +#endif if (!lb) signal_simple_error @@ -852,30 +1176,47 @@ get_unallocated_leading_byte (int dimension) } #ifdef UTF2000 -unsigned char -charset_get_byte1 (Lisp_Object charset, Emchar ch) +Lisp_Object +range_charset_code_point (Lisp_Object charset, Emchar ch) { - Emchar_to_byte_table* table; int d; - if ((table = XCHARSET_TO_BYTE1_TABLE (charset)) != NULL) - return get_byte_from_character_table (ch, table); - else if ((XCHARSET_UCS_MIN (charset) <= ch) - && (ch <= XCHARSET_UCS_MAX (charset))) - return (ch - XCHARSET_UCS_MIN (charset) - + XCHARSET_CODE_OFFSET (charset)) - / (XCHARSET_DIMENSION (charset) == 1 ? - 1 - : - XCHARSET_DIMENSION (charset) == 2 ? - XCHARSET_CHARS (charset) - : - XCHARSET_DIMENSION (charset) == 3 ? - XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset) - : - XCHARSET_CHARS (charset) - * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)) - + XCHARSET_BYTE_OFFSET (charset); + if ((XCHARSET_UCS_MIN (charset) <= ch) + && (ch <= XCHARSET_UCS_MAX (charset))) + { + d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset); + + if (XCHARSET_DIMENSION (charset) == 1) + return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset))); + else if (XCHARSET_DIMENSION (charset) == 2) + return list2 (make_int (d / XCHARSET_CHARS (charset) + + XCHARSET_BYTE_OFFSET (charset)), + make_int (d % XCHARSET_CHARS (charset) + + XCHARSET_BYTE_OFFSET (charset))); + else if (XCHARSET_DIMENSION (charset) == 3) + return list3 (make_int (d / (XCHARSET_CHARS (charset) + * XCHARSET_CHARS (charset)) + + XCHARSET_BYTE_OFFSET (charset)), + make_int (d / XCHARSET_CHARS (charset) + % XCHARSET_CHARS (charset) + + XCHARSET_BYTE_OFFSET (charset)), + make_int (d % XCHARSET_CHARS (charset) + + XCHARSET_BYTE_OFFSET (charset))); + else /* if (XCHARSET_DIMENSION (charset) == 4) */ + return list4 (make_int (d / (XCHARSET_CHARS (charset) + * XCHARSET_CHARS (charset) + * XCHARSET_CHARS (charset)) + + XCHARSET_BYTE_OFFSET (charset)), + make_int (d / (XCHARSET_CHARS (charset) + * XCHARSET_CHARS (charset)) + % XCHARSET_CHARS (charset) + + XCHARSET_BYTE_OFFSET (charset)), + make_int (d / XCHARSET_CHARS (charset) + % XCHARSET_CHARS (charset) + + XCHARSET_BYTE_OFFSET (charset)), + make_int (d % XCHARSET_CHARS (charset) + + XCHARSET_BYTE_OFFSET (charset))); + } else if (XCHARSET_CODE_OFFSET (charset) == 0) { if (XCHARSET_DIMENSION (charset) == 1) @@ -885,17 +1226,17 @@ charset_get_byte1 (Lisp_Object charset, Emchar ch) if (((d = ch - (MIN_CHAR_94 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0) && (d < 94)) - return d + 33; + return list1 (make_int (d + 33)); } else if (XCHARSET_CHARS (charset) == 96) { if (((d = ch - (MIN_CHAR_96 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0) && (d < 96)) - return d + 32; + return list1 (make_int (d + 32)); } else - return 0; + return Qnil; } else if (XCHARSET_DIMENSION (charset) == 2) { @@ -905,7 +1246,7 @@ charset_get_byte1 (Lisp_Object charset, Emchar ch) + (XCHARSET_FINAL (charset) - '0') * 94 * 94)) >= 0) && (d < 94 * 94)) - return (d / 94) + 33; + return list2 ((d / 94) + 33, d % 94 + 33); } else if (XCHARSET_CHARS (charset) == 96) { @@ -913,50 +1254,26 @@ charset_get_byte1 (Lisp_Object charset, Emchar ch) + (XCHARSET_FINAL (charset) - '0') * 96 * 96)) >= 0) && (d < 96 * 96)) - return (d / 96) + 32; + return list2 ((d / 96) + 32, d % 96 + 32); } } } - return 0; + return Qnil; } -unsigned char -charset_get_byte2 (Lisp_Object charset, Emchar ch) +Lisp_Object +charset_code_point (Lisp_Object charset, Emchar ch) { - if (XCHARSET_DIMENSION (charset) == 1) - return 0; - else + Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table); + + if (!EQ (cdef, Qnil)) { - Emchar_to_byte_table* table; - - if ((table = XCHARSET_TO_BYTE2_TABLE (charset)) != NULL) - return get_byte_from_character_table (ch, table); - else if ((XCHARSET_UCS_MIN (charset) <= ch) - && (ch <= XCHARSET_UCS_MAX (charset))) - return ((ch - XCHARSET_UCS_MIN (charset) - + XCHARSET_CODE_OFFSET (charset)) - / (XCHARSET_DIMENSION (charset) == 2 ? - 1 - : - XCHARSET_DIMENSION (charset) == 3 ? - XCHARSET_CHARS (charset) - : - XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))) - % XCHARSET_CHARS (charset) - + XCHARSET_BYTE_OFFSET (charset); - else if (XCHARSET_CHARS (charset) == 94) - return (MIN_CHAR_94x94 - + (XCHARSET_FINAL (charset) - '0') * 94 * 94 <= ch) - && (ch < MIN_CHAR_94x94 - + (XCHARSET_FINAL (charset) - '0' + 1) * 94 * 94) ? - ((ch - MIN_CHAR_94x94) % 94) + 33 : 0; - else /* if (XCHARSET_CHARS (charset) == 96) */ - return (MIN_CHAR_96x96 - + (XCHARSET_FINAL (charset) - '0') * 96 * 96 <= ch) - && (ch < MIN_CHAR_96x96 - + (XCHARSET_FINAL (charset) - '0' + 1) * 96 * 96) ? - ((ch - MIN_CHAR_96x96) % 96) + 32 : 0; + Lisp_Object field = Fassq (charset, cdef); + + if (!EQ (field, Qnil)) + return Fcdr (field); } + return range_charset_code_point (charset, ch); } Lisp_Object Vdefault_coded_charset_priority_list; @@ -1108,6 +1425,7 @@ character set. Recognized properties are: Lisp_Object rest, keyword, value; Lisp_Object ccl_program = Qnil; Lisp_Object short_name = Qnil, long_name = Qnil; + unsigned char byte_offset = 0; CHECK_SYMBOL (name); if (!NILP (doc_string)) @@ -1159,7 +1477,11 @@ character set. Recognized properties are: { CHECK_INT (value); graphic = XINT (value); +#ifdef UTF2000 + if (graphic < 0 || graphic > 2) +#else if (graphic < 0 || graphic > 1) +#endif signal_simple_error ("Invalid value for 'graphic", value); } @@ -1214,45 +1536,7 @@ character set. Recognized properties are: error ("Character set already defined for this DIMENSION/CHARS/FINAL combo"); -#ifdef UTF2000 - if (dimension == 1) - { - if (chars == 94) - { - /* id = CHARSET_ID_OFFSET_94 + final; */ - id = get_unallocated_leading_byte (dimension); - } - else if (chars == 96) - { - id = get_unallocated_leading_byte (dimension); - } - else - { - abort (); - } - } - else if (dimension == 2) - { - if (chars == 94) - { - id = get_unallocated_leading_byte (dimension); - } - else if (chars == 96) - { - id = get_unallocated_leading_byte (dimension); - } - else - { - abort (); - } - } - else - { - abort (); - } -#else id = get_unallocated_leading_byte (dimension); -#endif if (NILP (doc_string)) doc_string = build_string (""); @@ -1271,7 +1555,7 @@ character set. Recognized properties are: charset = make_charset (id, name, type, columns, graphic, final, direction, short_name, long_name, doc_string, registry, - Qnil, 0, 0, 0, 0); + Qnil, 0, 0, 0, byte_offset); if (!NILP (ccl_program)) XCHARSET_CCL_PROGRAM (charset) = ccl_program; return charset; @@ -1552,114 +1836,77 @@ Set mapping-table of CHARSET to TABLE. (charset, table)) { struct Lisp_Charset *cs; - Emchar_to_byte_table* old_byte1_table; - Emchar_to_byte_table* old_byte2_table; + Lisp_Object old_table; + size_t i; charset = Fget_charset (charset); - CHECK_VECTOR (table); - cs = XCHARSET (charset); - CHARSET_DECODING_TABLE(cs) = table; - old_byte1_table = CHARSET_TO_BYTE1_TABLE(cs); - old_byte2_table = CHARSET_TO_BYTE2_TABLE(cs); - switch (CHARSET_TYPE (cs)) + + if (EQ (table, Qnil)) { - case CHARSET_TYPE_94: - if (!EQ (table, Qnil)) - { - size_t i; - CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table(); - for (i = 0; i < 94; i++) - { - Lisp_Object c = XVECTOR_DATA(table)[i]; + CHARSET_DECODING_TABLE(cs) = table; + return table; + } + else if (VECTORP (table)) + { + if (XVECTOR_LENGTH (table) > CHARSET_CHARS (cs)) + args_out_of_range (table, make_int (CHARSET_CHARS (cs))); + old_table = CHARSET_DECODING_TABLE(cs); + CHARSET_DECODING_TABLE(cs) = table; + } + else + signal_error (Qwrong_type_argument, + list2 (build_translated_string ("vector-or-nil-p"), + table)); + /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */ - if (!EQ (c, Qnil)) - put_byte_from_character_table (XCHAR (c), i + 33, - CHARSET_TO_BYTE1_TABLE(cs)); - } - } - else - CHARSET_TO_BYTE1_TABLE(cs) = NULL; - CHARSET_TO_BYTE2_TABLE(cs) = NULL; - break; - case CHARSET_TYPE_96: - if (!EQ (table, Qnil)) + switch (CHARSET_DIMENSION (cs)) + { + case 1: + for (i = 0; i < XVECTOR_LENGTH (table); i++) { - size_t i; - CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table(); - for (i = 0; i < 96; i++) - { - Lisp_Object c = XVECTOR_DATA(table)[i]; + Lisp_Object c = XVECTOR_DATA(table)[i]; - if (!EQ (c, Qnil)) - put_byte_from_character_table (XCHAR (c), i + 32, - CHARSET_TO_BYTE1_TABLE(cs)); - } + if (CHARP (c)) + put_char_attribute + (c, charset, + list1 (make_int (i + CHARSET_BYTE_OFFSET (cs)))); } - else - CHARSET_TO_BYTE1_TABLE(cs) = NULL; - CHARSET_TO_BYTE2_TABLE(cs) = NULL; break; - case CHARSET_TYPE_94X94: - if (!EQ (table, Qnil)) + case 2: + for (i = 0; i < XVECTOR_LENGTH (table); i++) { - size_t i; + Lisp_Object v = XVECTOR_DATA(table)[i]; - CHARSET_TO_BYTE1_TABLE(cs) = make_byte_from_character_table(); - CHARSET_TO_BYTE2_TABLE(cs) = make_byte_from_character_table(); - for (i = 0; i < XVECTOR_LENGTH (table); i++) + if (VECTORP (v)) { - Lisp_Object v = XVECTOR_DATA(table)[i]; + size_t j; - if (VECTORP (v)) + if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs)) { - size_t j; - - for (j = 0; j < XVECTOR_LENGTH (v); j++) - { - Lisp_Object c = XVECTOR_DATA(v)[j]; - - if (!EQ (c, Qnil)) - { - put_byte_from_character_table - (XCHAR (c), i + 33, CHARSET_TO_BYTE1_TABLE(cs)); - put_byte_from_character_table - (XCHAR (c), j + 33, CHARSET_TO_BYTE2_TABLE(cs)); - } - } + CHARSET_DECODING_TABLE(cs) = old_table; + args_out_of_range (v, make_int (CHARSET_CHARS (cs))); + } + for (j = 0; j < XVECTOR_LENGTH (v); j++) + { + Lisp_Object c = XVECTOR_DATA(v)[j]; + + if (CHARP (c)) + put_char_attribute (c, charset, + list2 + (make_int + (i + CHARSET_BYTE_OFFSET (cs)), + make_int + (j + CHARSET_BYTE_OFFSET (cs)))); } - else if (CHARP (v)) - put_byte_from_character_table - (XCHAR (v), i + 33, CHARSET_TO_BYTE1_TABLE(cs)); } + else if (CHARP (v)) + put_char_attribute (v, charset, + list1 + (make_int (i + CHARSET_BYTE_OFFSET (cs)))); } - else - { - CHARSET_TO_BYTE1_TABLE(cs) = NULL; - CHARSET_TO_BYTE2_TABLE(cs) = NULL; - } - break; - case CHARSET_TYPE_96X96: - CHARSET_TO_BYTE1_TABLE(cs) = NULL; - CHARSET_TO_BYTE2_TABLE(cs) = NULL; - break; - case CHARSET_TYPE_128X128: - CHARSET_DIMENSION (cs) = 2; - CHARSET_CHARS (cs) = 128; - CHARSET_TO_BYTE1_TABLE(cs) = NULL; - CHARSET_TO_BYTE2_TABLE(cs) = NULL; - break; - case CHARSET_TYPE_256X256: - CHARSET_DIMENSION (cs) = 2; - CHARSET_CHARS (cs) = 256; - CHARSET_TO_BYTE1_TABLE(cs) = NULL; - CHARSET_TO_BYTE2_TABLE(cs) = NULL; break; } - if (old_byte1_table != NULL) - destroy_byte_from_character_table (old_byte1_table); - if (old_byte2_table != NULL) - destroy_byte_from_character_table (old_byte2_table); return table; } #endif @@ -1869,6 +2116,10 @@ syms_of_mule_charset (void) DEFSUBR (Fset_charset_ccl_program); DEFSUBR (Fset_charset_registry); #ifdef UTF2000 + DEFSUBR (Fchar_attribute_alist); + DEFSUBR (Fget_char_attribute); + DEFSUBR (Fput_char_attribute); + DEFSUBR (Fdefine_char); DEFSUBR (Fcharset_mapping_table); DEFSUBR (Fset_charset_mapping_table); #endif @@ -1918,7 +2169,9 @@ syms_of_mule_charset (void) defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1"); defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2"); #ifdef UTF2000 + defsymbol (&Qucs, "ucs"); defsymbol (&Qucs_bmp, "ucs-bmp"); + defsymbol (&Qlatin_viscii, "latin-viscii"); defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower"); defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper"); defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower"); @@ -1957,10 +2210,10 @@ vars_of_mule_charset (void) charset_by_attributes[i][j][k] = Qnil; #endif - next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1; #ifdef UTF2000 - next_allocated_2_byte_leading_byte = LEADING_BYTE_CHINESE_BIG5_2 + 1; + next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE; #else + next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1; next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2; #endif @@ -1973,15 +2226,18 @@ Leading-code of private TYPE9N charset of column-width 1. #endif #ifdef UTF2000 - Vutf_2000_version = build_string("0.8 (Kami)"); + Vutf_2000_version = build_string("0.11 (Shiki)"); DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /* Version number of UTF-2000. */ ); + staticpro (&Vcharacter_attribute_table); + Vcharacter_attribute_table = make_char_code_table (Qnil); + Vdefault_coded_charset_priority_list = Qnil; DEFVAR_LISP ("default-coded-charset-priority-list", &Vdefault_coded_charset_priority_list /* -Default order of preferred coded-character-set. +Default order of preferred coded-character-sets. */ ); #endif } @@ -1999,12 +2255,12 @@ complex_vars_of_mule_charset (void) #ifdef UTF2000 Vcharset_ucs_bmp = make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, - CHARSET_TYPE_256X256, 1, 0, 0, + CHARSET_TYPE_256X256, 1, 2, 0, CHARSET_LEFT_TO_RIGHT, build_string ("BMP"), build_string ("BMP"), - build_string ("BMP"), - build_string (""), + build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"), + build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"), Qnil, 0, 0xFFFF, 0, 0); #else # define MIN_CHAR_THAI 0 @@ -2222,7 +2478,7 @@ complex_vars_of_mule_charset (void) build_string ("VISCII lower"), build_string ("VISCII lower (Vietnamese)"), build_string ("VISCII lower (Vietnamese)"), - build_string ("VISCII1\\.1"), + build_string ("MULEVISCII-LOWER"), Qnil, 0, 0, 0, 32); Vcharset_latin_viscii_upper = make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, @@ -2231,8 +2487,17 @@ complex_vars_of_mule_charset (void) build_string ("VISCII upper"), build_string ("VISCII upper (Vietnamese)"), build_string ("VISCII upper (Vietnamese)"), - build_string ("VISCII1\\.1"), + build_string ("MULEVISCII-UPPER"), Qnil, 0, 0, 0, 32); + Vcharset_latin_viscii = + make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, + CHARSET_TYPE_256, 1, 2, 0, + CHARSET_LEFT_TO_RIGHT, + build_string ("VISCII"), + build_string ("VISCII 1.1 (Vietnamese)"), + build_string ("VISCII 1.1 (Vietnamese)"), + build_string ("VISCII1\\.1"), + Qnil, 0, 0, 0, 0); Vcharset_hiragana_jisx0208 = make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208, CHARSET_TYPE_94X94, 2, 0, 'B',