X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Fmule-charset.c;h=29c60069811604de8a75120ec2f99caa540348dc;hb=313d1c4d8bd1f94564e5edda76ee6aac3ecb70fd;hp=ee0ce80731771a5d27f76ff42eb4ab4b2108b963;hpb=d4fd14df47400ce4a63d921ec1a4c9de649622dc;p=chise%2Fxemacs-chise.git- diff --git a/src/mule-charset.c b/src/mule-charset.c index ee0ce80..29c6006 100644 --- a/src/mule-charset.c +++ b/src/mule-charset.c @@ -62,8 +62,8 @@ Lisp_Object Vcharset_ucs_bmp; Lisp_Object Vcharset_latin_viscii; Lisp_Object Vcharset_latin_viscii_lower; Lisp_Object Vcharset_latin_viscii_upper; -Lisp_Object Vcharset_hiragana_jisx0208; -Lisp_Object Vcharset_katakana_jisx0208; +Lisp_Object Vcharset_ideograph_daikanwa; +Lisp_Object Vcharset_ethiopic_ucs; #endif Lisp_Object Vcharset_chinese_big5_1; Lisp_Object Vcharset_chinese_big5_2; @@ -125,6 +125,7 @@ Bytecount rep_bytes_by_first_byte[0xA0] = #endif #ifdef UTF2000 + static Lisp_Object mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object)) { @@ -176,7 +177,7 @@ static const struct lrecord_description char_byte_table_description[] = { { XD_END } }; -DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_byte_table, +DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table, mark_char_byte_table, internal_object_printer, 0, char_byte_table_equal, @@ -184,7 +185,6 @@ DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_byte_table, char_byte_table_description, struct Lisp_Char_Byte_Table); - static Lisp_Object make_char_byte_table (Lisp_Object initval) { @@ -224,63 +224,133 @@ copy_char_byte_table (Lisp_Object entry) return obj; } -#define make_char_code_table(initval) make_char_byte_table(initval) + +static Lisp_Object +mark_char_code_table (Lisp_Object obj, void (*markobj) (Lisp_Object)) +{ + struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj); + + return cte->table; +} + +static int +char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth) +{ + struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1); + struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2); + + return char_byte_table_equal (cte1->table, cte2->table, depth + 1); +} + +static unsigned long +char_code_table_hash (Lisp_Object obj, int depth) +{ + struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj); + + return char_code_table_hash (cte->table, depth + 1); +} + +static const struct lrecord_description char_code_table_description[] = { + { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 }, + { XD_END } +}; + +DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table, + mark_char_code_table, + internal_object_printer, + 0, char_code_table_equal, + char_code_table_hash, + char_code_table_description, + struct Lisp_Char_Code_Table); + +static Lisp_Object +make_char_code_table (Lisp_Object initval) +{ + Lisp_Object obj; + struct Lisp_Char_Code_Table *cte = + alloc_lcrecord_type (struct Lisp_Char_Code_Table, + &lrecord_char_code_table); + + cte->table = make_char_byte_table (initval); + + XSETCHAR_CODE_TABLE (obj, cte); + return obj; +} + +static Lisp_Object +copy_char_code_table (Lisp_Object entry) +{ + struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry); + Lisp_Object obj; + struct Lisp_Char_Code_Table *ctenew = + alloc_lcrecord_type (struct Lisp_Char_Code_Table, + &lrecord_char_code_table); + + ctenew->table = copy_char_byte_table (cte->table); + XSETCHAR_CODE_TABLE (obj, ctenew); + return obj; +} + Lisp_Object get_char_code_table (Emchar ch, Lisp_Object table) { - struct Lisp_Char_Byte_Table* cpt = XCHAR_BYTE_TABLE (table); - Lisp_Object ret = cpt->property [ch >> 24]; + unsigned int code = ch; + struct Lisp_Char_Byte_Table* cpt + = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table); + Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)]; if (CHAR_BYTE_TABLE_P (ret)) cpt = XCHAR_BYTE_TABLE (ret); else return ret; - ret = cpt->property [(unsigned char) (ch >> 16)]; + ret = cpt->property [(unsigned char) (code >> 16)]; if (CHAR_BYTE_TABLE_P (ret)) cpt = XCHAR_BYTE_TABLE (ret); else return ret; - ret = cpt->property [(unsigned char) (ch >> 8)]; + ret = cpt->property [(unsigned char) (code >> 8)]; if (CHAR_BYTE_TABLE_P (ret)) cpt = XCHAR_BYTE_TABLE (ret); else return ret; - return cpt->property [(unsigned char) ch]; + return cpt->property [(unsigned char) code]; } void put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table) { - struct Lisp_Char_Byte_Table* cpt1 = XCHAR_BYTE_TABLE (table); - Lisp_Object ret = cpt1->property[ch >> 24]; + unsigned int code = ch; + struct Lisp_Char_Byte_Table* cpt1 + = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table); + Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)]; if (CHAR_BYTE_TABLE_P (ret)) { struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret); - ret = cpt2->property[(unsigned char)(ch >> 16)]; + ret = cpt2->property[(unsigned char)(code >> 16)]; if (CHAR_BYTE_TABLE_P (ret)) { struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret); - ret = cpt3->property[(unsigned char)(ch >> 8)]; + ret = cpt3->property[(unsigned char)(code >> 8)]; if (CHAR_BYTE_TABLE_P (ret)) { struct Lisp_Char_Byte_Table* cpt4 = XCHAR_BYTE_TABLE (ret); - cpt4->property[(unsigned char)ch] = value; + cpt4->property[(unsigned char)code] = value; } else if (!EQ (ret, value)) { Lisp_Object cpt4 = make_char_byte_table (ret); - XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value; - cpt3->property[(unsigned char)(ch >> 8)] = cpt4; + XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value; + cpt3->property[(unsigned char)(code >> 8)] = cpt4; } } else if (!EQ (ret, value)) @@ -288,10 +358,10 @@ put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table) Lisp_Object cpt3 = make_char_byte_table (ret); Lisp_Object cpt4 = make_char_byte_table (ret); - XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value; - XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)] + XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value; + XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4; - cpt2->property[(unsigned char)(ch >> 16)] = cpt3; + cpt2->property[(unsigned char)(code >> 16)] = cpt3; } } else if (!EQ (ret, value)) @@ -300,13 +370,406 @@ put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table) Lisp_Object cpt3 = make_char_byte_table (ret); Lisp_Object cpt4 = make_char_byte_table (ret); - XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value; - XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)] = cpt4; - XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(ch >> 16)] = cpt3; - cpt1->property[(unsigned char)(ch >> 24)] = cpt2; + XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value; + XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4; + XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3; + cpt1->property[(unsigned char)(code >> 24)] = cpt2; + } +} + + +Lisp_Object Vcharacter_attribute_table; +Lisp_Object Vcharacter_composition_table; +Lisp_Object Vcharacter_variant_table; + +Lisp_Object Q_decomposition; +Lisp_Object Q_ucs; +Lisp_Object Qcompat; +Lisp_Object QnoBreak; +Lisp_Object Qfraction; +Lisp_Object Qsuper; +Lisp_Object Qsub; +Lisp_Object Qcircle; +Lisp_Object Qsquare; +Lisp_Object Qwide; +Lisp_Object Qnarrow; +Lisp_Object Qfont; + +Emchar +to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg) +{ + if (INTP (v)) + return XINT (v); + if (CHARP (v)) + return XCHAR (v); + else if (EQ (v, Qcompat)) + return -1; + else if (EQ (v, QnoBreak)) + return -2; + else if (EQ (v, Qfraction)) + return -3; + else if (EQ (v, Qsuper)) + return -4; + else if (EQ (v, Qsub)) + return -5; + else if (EQ (v, Qcircle)) + return -6; + else if (EQ (v, Qsquare)) + return -7; + else if (EQ (v, Qwide)) + return -8; + else if (EQ (v, Qnarrow)) + return -9; + else if (EQ (v, Qfont)) + return -10; + else + signal_simple_error (err_msg, err_arg); +} + +DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /* +Return character corresponding with list. +*/ + (list)) +{ + Lisp_Object table = Vcharacter_composition_table; + Lisp_Object rest = list; + + while (CONSP (rest)) + { + Lisp_Object v = Fcar (rest); + Lisp_Object ret; + Emchar c = to_char_code (v, "Invalid value for composition", list); + + ret = get_char_code_table (c, table); + + rest = Fcdr (rest); + if (NILP (rest)) + { + if (!CHAR_CODE_TABLE_P (ret)) + return ret; + else + return Qt; + } + else if (!CONSP (rest)) + break; + else if (CHAR_CODE_TABLE_P (ret)) + table = ret; + else + signal_simple_error ("Invalid table is found with", list); + } + signal_simple_error ("Invalid value for composition", list); +} + +DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /* +Return variants of CHARACTER. +*/ + (character)) +{ + CHECK_CHAR (character); + return Fcopy_list (get_char_code_table (XCHAR (character), + Vcharacter_variant_table)); +} + +DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /* +Return the alist of attributes of CHARACTER. +*/ + (character)) +{ + CHECK_CHAR (character); + return Fcopy_alist (get_char_code_table (XCHAR (character), + Vcharacter_attribute_table)); +} + +DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /* +Return the value of CHARACTER's ATTRIBUTE. +*/ + (character, attribute)) +{ + Lisp_Object ret + = get_char_code_table (XCHAR (character), Vcharacter_attribute_table); + Lisp_Object ccs; + + if (EQ (ret, Qnil)) + return Qnil; + + if (!NILP (ccs = Ffind_charset (attribute))) + attribute = ccs; + + return Fcdr (Fassq (attribute, ret)); +} + +Lisp_Object +put_char_attribute (Lisp_Object character, Lisp_Object attribute, + Lisp_Object value) +{ + Emchar char_code = XCHAR (character); + Lisp_Object ret + = get_char_code_table (char_code, Vcharacter_attribute_table); + Lisp_Object cell; + + cell = Fassq (attribute, ret); + + if (NILP (cell)) + { + ret = Fcons (Fcons (attribute, value), ret); + } + else if (!EQ (Fcdr (cell), value)) + { + Fsetcdr (cell, value); + } + put_char_code_table (char_code, ret, Vcharacter_attribute_table); + return ret; +} + +DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /* +Store CHARACTER's ATTRIBUTE with VALUE. +*/ + (character, attribute, value)) +{ + Lisp_Object ccs; + + CHECK_CHAR (character); + ccs = Ffind_charset (attribute); + if (!NILP (ccs)) + { + Lisp_Object rest; + Lisp_Object v = XCHARSET_DECODING_TABLE (ccs); + Lisp_Object nv; + int i = -1; + int ccs_len; + + /* ad-hoc method for `ascii' */ + if ((XCHARSET_CHARS (ccs) == 94) && + (XCHARSET_BYTE_OFFSET (ccs) != 33)) + ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs); + else + ccs_len = XCHARSET_CHARS (ccs); + + if (!CONSP (value)) + signal_simple_error ("Invalid value for coded-charset", + value); + + attribute = ccs; + rest = Fget_char_attribute (character, attribute); + if (VECTORP (v)) + { + if (!NILP (rest)) + { + while (!NILP (rest)) + { + Lisp_Object ei = Fcar (rest); + + i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs); + nv = XVECTOR_DATA(v)[i]; + if (!VECTORP (nv)) + break; + v = nv; + rest = Fcdr (rest); + } + if (i >= 0) + XVECTOR_DATA(v)[i] = Qnil; + v = XCHARSET_DECODING_TABLE (ccs); + } + } + else + { + XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil); + } + + if (XCHARSET_GRAPHIC (ccs) == 1) + value = Fcopy_list (value); + rest = value; + i = -1; + while (CONSP (rest)) + { + Lisp_Object ei = Fcar (rest); + + if (!INTP (ei)) + signal_simple_error ("Invalid value for coded-charset", value); + i = XINT (ei); + if ((i < 0) || (255 < i)) + signal_simple_error ("Invalid value for coded-charset", value); + if (XCHARSET_GRAPHIC (ccs) == 1) + { + i &= 0x7F; + Fsetcar (rest, make_int (i)); + } + i -= XCHARSET_BYTE_OFFSET (ccs); + nv = XVECTOR_DATA(v)[i]; + rest = Fcdr (rest); + if (CONSP (rest)) + { + if (!VECTORP (nv)) + { + nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil)); + } + v = nv; + } + else + break; + } + XVECTOR_DATA(v)[i] = character; } + else if (EQ (attribute, Q_decomposition)) + { + Lisp_Object rest = value; + Lisp_Object table = Vcharacter_composition_table; + + if (!CONSP (value)) + signal_simple_error ("Invalid value for ->decomposition", + value); + + while (CONSP (rest)) + { + Lisp_Object v = Fcar (rest); + Lisp_Object ntable; + Emchar c + = to_char_code (v, "Invalid value for ->decomposition", value); + + rest = Fcdr (rest); + if (!CONSP (rest)) + { + put_char_code_table (c, character, table); + break; + } + else + { + ntable = get_char_code_table (c, table); + if (!CHAR_CODE_TABLE_P (ntable)) + { + ntable = make_char_code_table (Qnil); + put_char_code_table (c, ntable, table); + } + table = ntable; + } + } + } + else if (EQ (attribute, Q_ucs)) + { + Lisp_Object ret; + Emchar c; + + if (!INTP (value)) + signal_simple_error ("Invalid value for ->ucs", value); + + c = XINT (value); + + ret = get_char_code_table (c, Vcharacter_variant_table); + if (NILP (Fmemq (character, ret))) + { + put_char_code_table (c, Fcons (character, ret), + Vcharacter_variant_table); + } + } + return put_char_attribute (character, attribute, value); } +Lisp_Object Qucs; + +DEFUN ("define-char", Fdefine_char, 1, 1, 0, /* +Store character's ATTRIBUTES. +*/ + (attributes)) +{ + Lisp_Object rest = attributes; + Lisp_Object code = Fcdr (Fassq (Qucs, attributes)); + Lisp_Object character; + + if (NILP (code)) + { + while (CONSP (rest)) + { + Lisp_Object cell = Fcar (rest); + Lisp_Object ccs; + + if (!LISTP (cell)) + signal_simple_error ("Invalid argument", attributes); + if (!NILP (ccs = Ffind_charset (Fcar (cell))) + && XCHARSET_FINAL (ccs)) + { + Emchar code; + + if (XCHARSET_DIMENSION (ccs) == 1) + { + Lisp_Object eb1 = Fcar (Fcdr (cell)); + int b1; + + if (!INTP (eb1)) + signal_simple_error ("Invalid argument", attributes); + b1 = XINT (eb1); + switch (XCHARSET_CHARS (ccs)) + { + case 94: + code = MIN_CHAR_94 + + (XCHARSET_FINAL (ccs) - '0') * 94 + (b1 - 33); + break; + case 96: + code = MIN_CHAR_96 + + (XCHARSET_FINAL (ccs) - '0') * 96 + (b1 - 32); + break; + default: + abort (); + } + } + else if (XCHARSET_DIMENSION (ccs) == 2) + { + Lisp_Object eb1 = Fcar (Fcdr (cell)); + Lisp_Object eb2 = Fcar (Fcdr (Fcdr (cell))); + int b1, b2; + + if (!INTP (eb1)) + signal_simple_error ("Invalid argument", attributes); + b1 = XINT (eb1); + if (!INTP (eb2)) + signal_simple_error ("Invalid argument", attributes); + b2 = XINT (eb2); + switch (XCHARSET_CHARS (ccs)) + { + case 94: + code = MIN_CHAR_94x94 + + (XCHARSET_FINAL (ccs) - '0') * 94 * 94 + + (b1 - 33) * 94 + (b2 - 33); + break; + case 96: + code = MIN_CHAR_96x96 + + (XCHARSET_FINAL (ccs) - '0') * 96 * 96 + + (b1 - 32) * 96 + (b2 - 32); + break; + default: + abort (); + } + } + else + { + rest = Fcdr (rest); + continue; + } + character = make_char (code); + goto setup_attributes; + } + rest = Fcdr (rest); + } + return Qnil; + } + else if (!INTP (code)) + signal_simple_error ("Invalid argument", attributes); + else + character = make_char (XINT (code)); + + setup_attributes: + rest = attributes; + while (CONSP (rest)) + { + Lisp_Object cell = Fcar (rest); + + if (!LISTP (cell)) + signal_simple_error ("Invalid argument", attributes); + Fput_char_attribute (character, Fcar (cell), Fcdr (cell)); + rest = Fcdr (rest); + } + return + get_char_code_table (XCHAR (character), Vcharacter_attribute_table); +} Lisp_Object Vutf_2000_version; #endif @@ -352,8 +815,8 @@ Lisp_Object Qascii, Qlatin_viscii_upper, Qvietnamese_viscii_lower, Qvietnamese_viscii_upper, - Qhiragana_jisx0208, - Qkatakana_jisx0208, + Qideograph_daikanwa, + Qethiopic_ucs, #endif Qchinese_big5_1, Qchinese_big5_2, @@ -363,8 +826,12 @@ Lisp_Object Ql2r, Qr2l; Lisp_Object Vcharset_hash_table; +#ifdef UTF2000 +static Charset_ID next_allocated_leading_byte; +#else static Charset_ID next_allocated_1_byte_leading_byte; static Charset_ID next_allocated_2_byte_leading_byte; +#endif /* Composite characters are characters constructed by overstriking two or more regular characters. @@ -728,7 +1195,6 @@ mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object)) markobj (cs->ccl_program); #ifdef UTF2000 markobj (cs->decoding_table); - markobj (cs->encoding_table); #endif return cs->name; } @@ -812,7 +1278,6 @@ make_charset (Charset_ID id, Lisp_Object name, CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil; #ifdef UTF2000 CHARSET_DECODING_TABLE(cs) = Qnil; - CHARSET_ENCODING_TABLE(cs) = Qnil; CHARSET_UCS_MIN(cs) = ucs_min; CHARSET_UCS_MAX(cs) = ucs_max; CHARSET_CODE_OFFSET(cs) = code_offset; @@ -903,6 +1368,12 @@ get_unallocated_leading_byte (int dimension) { Charset_ID lb; +#ifdef UTF2000 + if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE) + lb = 0; + else + lb = next_allocated_leading_byte++; +#else if (dimension == 1) { if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1) @@ -917,6 +1388,7 @@ get_unallocated_leading_byte (int dimension) else lb = next_allocated_2_byte_leading_byte++; } +#endif if (!lb) signal_simple_error @@ -927,46 +1399,47 @@ get_unallocated_leading_byte (int dimension) } #ifdef UTF2000 -unsigned char -charset_get_byte1 (Lisp_Object charset, Emchar ch) +Lisp_Object +range_charset_code_point (Lisp_Object charset, Emchar ch) { - Lisp_Object table; int d; - if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil)) - { - Lisp_Object value = get_char_code_table (ch, table); - - if (INTP (value)) - { - Emchar code = XINT (value); - - if (code < (1 << 8)) - return code; - else if (code < (1 << 16)) - return code >> 8; - else if (code < (1 << 24)) - return code >> 16; - else - return code >> 24; - } - } if ((XCHARSET_UCS_MIN (charset) <= ch) && (ch <= XCHARSET_UCS_MAX (charset))) - return (ch - XCHARSET_UCS_MIN (charset) - + XCHARSET_CODE_OFFSET (charset)) - / (XCHARSET_DIMENSION (charset) == 1 ? - 1 - : - XCHARSET_DIMENSION (charset) == 2 ? - XCHARSET_CHARS (charset) - : - XCHARSET_DIMENSION (charset) == 3 ? - XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset) - : - XCHARSET_CHARS (charset) - * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset)) - + XCHARSET_BYTE_OFFSET (charset); + { + d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset); + + if (XCHARSET_DIMENSION (charset) == 1) + return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset))); + else if (XCHARSET_DIMENSION (charset) == 2) + return list2 (make_int (d / XCHARSET_CHARS (charset) + + XCHARSET_BYTE_OFFSET (charset)), + make_int (d % XCHARSET_CHARS (charset) + + XCHARSET_BYTE_OFFSET (charset))); + else if (XCHARSET_DIMENSION (charset) == 3) + return list3 (make_int (d / (XCHARSET_CHARS (charset) + * XCHARSET_CHARS (charset)) + + XCHARSET_BYTE_OFFSET (charset)), + make_int (d / XCHARSET_CHARS (charset) + % XCHARSET_CHARS (charset) + + XCHARSET_BYTE_OFFSET (charset)), + make_int (d % XCHARSET_CHARS (charset) + + XCHARSET_BYTE_OFFSET (charset))); + else /* if (XCHARSET_DIMENSION (charset) == 4) */ + return list4 (make_int (d / (XCHARSET_CHARS (charset) + * XCHARSET_CHARS (charset) + * XCHARSET_CHARS (charset)) + + XCHARSET_BYTE_OFFSET (charset)), + make_int (d / (XCHARSET_CHARS (charset) + * XCHARSET_CHARS (charset)) + % XCHARSET_CHARS (charset) + + XCHARSET_BYTE_OFFSET (charset)), + make_int (d / XCHARSET_CHARS (charset) + % XCHARSET_CHARS (charset) + + XCHARSET_BYTE_OFFSET (charset)), + make_int (d % XCHARSET_CHARS (charset) + + XCHARSET_BYTE_OFFSET (charset))); + } else if (XCHARSET_CODE_OFFSET (charset) == 0) { if (XCHARSET_DIMENSION (charset) == 1) @@ -976,17 +1449,17 @@ charset_get_byte1 (Lisp_Object charset, Emchar ch) if (((d = ch - (MIN_CHAR_94 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0) && (d < 94)) - return d + 33; + return list1 (make_int (d + 33)); } else if (XCHARSET_CHARS (charset) == 96) { if (((d = ch - (MIN_CHAR_96 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0) && (d < 96)) - return d + 32; + return list1 (make_int (d + 32)); } else - return 0; + return Qnil; } else if (XCHARSET_DIMENSION (charset) == 2) { @@ -996,7 +1469,8 @@ charset_get_byte1 (Lisp_Object charset, Emchar ch) + (XCHARSET_FINAL (charset) - '0') * 94 * 94)) >= 0) && (d < 94 * 94)) - return (d / 94) + 33; + return list2 (make_int ((d / 94) + 33), + make_int (d % 94 + 33)); } else if (XCHARSET_CHARS (charset) == 96) { @@ -1004,64 +1478,129 @@ charset_get_byte1 (Lisp_Object charset, Emchar ch) + (XCHARSET_FINAL (charset) - '0') * 96 * 96)) >= 0) && (d < 96 * 96)) - return (d / 96) + 32; + return list2 (make_int ((d / 96) + 32), + make_int (d % 96 + 32)); } } } - return 0; + return Qnil; } -unsigned char -charset_get_byte2 (Lisp_Object charset, Emchar ch) +Lisp_Object +split_builtin_char (Emchar c) { - if (XCHARSET_DIMENSION (charset) == 1) - return 0; + if (c < MIN_CHAR_OBS_94x94) + { + if (c <= MAX_CHAR_BASIC_LATIN) + { + return list2 (Vcharset_ascii, make_int (c)); + } + else if (c < 0xA0) + { + return list2 (Vcharset_control_1, make_int (c & 0x7F)); + } + else if (c <= 0xff) + { + return list2 (Vcharset_latin_iso8859_1, make_int (c & 0x7F)); + } + else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK)) + { + return list2 (Vcharset_greek_iso8859_7, + make_int (c - MIN_CHAR_GREEK + 0x20)); + } + else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC)) + { + return list2 (Vcharset_cyrillic_iso8859_5, + make_int (c - MIN_CHAR_CYRILLIC + 0x20)); + } + else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW)) + { + return list2 (Vcharset_hebrew_iso8859_8, + make_int (c - MIN_CHAR_HEBREW + 0x20)); + } + else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI)) + { + return list2 (Vcharset_thai_tis620, + make_int (c - MIN_CHAR_THAI + 0x20)); + } + else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c) + && (c <= MAX_CHAR_HALFWIDTH_KATAKANA)) + { + return list2 (Vcharset_katakana_jisx0201, + make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33)); + } + else + { + return list3 (Vcharset_ucs_bmp, + make_int (c >> 8), make_int (c & 0xff)); + } + } + else if (c <= MAX_CHAR_OBS_94x94) + { + return list3 (CHARSET_BY_ATTRIBUTES + (CHARSET_TYPE_94X94, + ((c - MIN_CHAR_OBS_94x94) / (94 * 94)) + '@', + CHARSET_LEFT_TO_RIGHT), + make_int ((((c - MIN_CHAR_OBS_94x94) / 94) % 94) + 33), + make_int (((c - MIN_CHAR_OBS_94x94) % 94) + 33)); + } + else if (c <= MAX_CHAR_DAIKANWA) + { + return list3 (Vcharset_ideograph_daikanwa, + make_int ((c - MIN_CHAR_DAIKANWA) >> 8), + make_int ((c - MIN_CHAR_DAIKANWA) & 255)); + } + else if (c <= MAX_CHAR_94) + { + return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94, + ((c - MIN_CHAR_94) / 94) + '0', + CHARSET_LEFT_TO_RIGHT), + make_int (((c - MIN_CHAR_94) % 94) + 33)); + } + else if (c <= MAX_CHAR_96) + { + return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96, + ((c - MIN_CHAR_96) / 96) + '0', + CHARSET_LEFT_TO_RIGHT), + make_int (((c - MIN_CHAR_96) % 96) + 32)); + } + else if (c <= MAX_CHAR_94x94) + { + return list3 (CHARSET_BY_ATTRIBUTES + (CHARSET_TYPE_94X94, + ((c - MIN_CHAR_94x94) / (94 * 94)) + '0', + CHARSET_LEFT_TO_RIGHT), + make_int ((((c - MIN_CHAR_94x94) / 94) % 94) + 33), + make_int (((c - MIN_CHAR_94x94) % 94) + 33)); + } + else if (c <= MAX_CHAR_96x96) + { + return list3 (CHARSET_BY_ATTRIBUTES + (CHARSET_TYPE_96X96, + ((c - MIN_CHAR_96x96) / (96 * 96)) + '0', + CHARSET_LEFT_TO_RIGHT), + make_int ((((c - MIN_CHAR_96x96) / 96) % 96) + 32), + make_int (((c - MIN_CHAR_96x96) % 96) + 32)); + } else { - Lisp_Object table; + return Qnil; + } +} - if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil)) - { - Lisp_Object value = get_char_code_table (ch, table); - - if (INTP (value)) - { - Emchar code = XINT (value); +Lisp_Object +charset_code_point (Lisp_Object charset, Emchar ch) +{ + Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table); - if (code < (1 << 16)) - return (unsigned char)code; - else if (code < (1 << 24)) - return (unsigned char)(code >> 16); - else - return (unsigned char)(code >> 24); - } - } - if ((XCHARSET_UCS_MIN (charset) <= ch) - && (ch <= XCHARSET_UCS_MAX (charset))) - return ((ch - XCHARSET_UCS_MIN (charset) - + XCHARSET_CODE_OFFSET (charset)) - / (XCHARSET_DIMENSION (charset) == 2 ? - 1 - : - XCHARSET_DIMENSION (charset) == 3 ? - XCHARSET_CHARS (charset) - : - XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))) - % XCHARSET_CHARS (charset) - + XCHARSET_BYTE_OFFSET (charset); - else if (XCHARSET_CHARS (charset) == 94) - return (MIN_CHAR_94x94 - + (XCHARSET_FINAL (charset) - '0') * 94 * 94 <= ch) - && (ch < MIN_CHAR_94x94 - + (XCHARSET_FINAL (charset) - '0' + 1) * 94 * 94) ? - ((ch - MIN_CHAR_94x94) % 94) + 33 : 0; - else /* if (XCHARSET_CHARS (charset) == 96) */ - return (MIN_CHAR_96x96 - + (XCHARSET_FINAL (charset) - '0') * 96 * 96 <= ch) - && (ch < MIN_CHAR_96x96 - + (XCHARSET_FINAL (charset) - '0' + 1) * 96 * 96) ? - ((ch - MIN_CHAR_96x96) % 96) + 32 : 0; + if (!EQ (cdef, Qnil)) + { + Lisp_Object field = Fassq (charset, cdef); + + if (!EQ (field, Qnil)) + return Fcdr (field); } + return range_charset_code_point (charset, ch); } Lisp_Object Vdefault_coded_charset_priority_list; @@ -1213,9 +1752,7 @@ character set. Recognized properties are: Lisp_Object rest, keyword, value; Lisp_Object ccl_program = Qnil; Lisp_Object short_name = Qnil, long_name = Qnil; -#ifdef UTF2000 - unsigned char byte_offset = 0; -#endif + int byte_offset = -1; CHECK_SYMBOL (name); if (!NILP (doc_string)) @@ -1326,52 +1863,7 @@ character set. Recognized properties are: error ("Character set already defined for this DIMENSION/CHARS/FINAL combo"); -#ifdef UTF2000 - if (dimension == 1) - { - if (chars == 94) - { - /* id = CHARSET_ID_OFFSET_94 + final; */ - id = get_unallocated_leading_byte (dimension); - } - else if (chars == 96) - { - id = get_unallocated_leading_byte (dimension); - } - else - { - abort (); - } - } - else if (dimension == 2) - { - if (chars == 94) - { - id = get_unallocated_leading_byte (dimension); - } - else if (chars == 96) - { - id = get_unallocated_leading_byte (dimension); - } - else - { - abort (); - } - } - else - { - abort (); - } - if (final) - { - if (chars == 94) - byte_offset = 33; - else if (chars == 96) - byte_offset = 32; - } -#else id = get_unallocated_leading_byte (dimension); -#endif if (NILP (doc_string)) doc_string = build_string (""); @@ -1387,6 +1879,17 @@ character set. Recognized properties are: if (columns == -1) columns = dimension; + + if (byte_offset < 0) + { + if (chars == 94) + byte_offset = 33; + else if (chars == 96) + byte_offset = 32; + else + byte_offset = 0; + } + charset = make_charset (id, name, type, columns, graphic, final, direction, short_name, long_name, doc_string, registry, @@ -1680,14 +2183,22 @@ Set mapping-table of CHARSET to TABLE. if (EQ (table, Qnil)) { CHARSET_DECODING_TABLE(cs) = table; - CHARSET_ENCODING_TABLE(cs) = Qnil; return table; } else if (VECTORP (table)) { - if (XVECTOR_LENGTH (table) > CHARSET_CHARS (cs)) + int ccs_len; + + /* ad-hoc method for `ascii' */ + if ((CHARSET_CHARS (cs) == 94) && + (CHARSET_BYTE_OFFSET (cs) != 33)) + ccs_len = 128 - CHARSET_BYTE_OFFSET (cs); + else + ccs_len = CHARSET_CHARS (cs); + + if (XVECTOR_LENGTH (table) > ccs_len) args_out_of_range (table, make_int (CHARSET_CHARS (cs))); - old_table = CHARSET_ENCODING_TABLE(cs); + old_table = CHARSET_DECODING_TABLE(cs); CHARSET_DECODING_TABLE(cs) = table; } else @@ -1699,19 +2210,17 @@ Set mapping-table of CHARSET to TABLE. switch (CHARSET_DIMENSION (cs)) { case 1: - CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil); for (i = 0; i < XVECTOR_LENGTH (table); i++) { Lisp_Object c = XVECTOR_DATA(table)[i]; if (CHARP (c)) - put_char_code_table (XCHAR (c), - make_int (i + CHARSET_BYTE_OFFSET (cs)), - CHARSET_ENCODING_TABLE(cs)); + put_char_attribute + (c, charset, + list1 (make_int (i + CHARSET_BYTE_OFFSET (cs)))); } break; case 2: - CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil); for (i = 0; i < XVECTOR_LENGTH (table); i++) { Lisp_Object v = XVECTOR_DATA(table)[i]; @@ -1730,17 +2239,18 @@ Set mapping-table of CHARSET to TABLE. Lisp_Object c = XVECTOR_DATA(v)[j]; if (CHARP (c)) - put_char_code_table - (XCHAR (c), - make_int (( (i + CHARSET_BYTE_OFFSET (cs)) << 8) - | (j + CHARSET_BYTE_OFFSET (cs))), - CHARSET_ENCODING_TABLE(cs)); + put_char_attribute (c, charset, + list2 + (make_int + (i + CHARSET_BYTE_OFFSET (cs)), + make_int + (j + CHARSET_BYTE_OFFSET (cs)))); } } else if (CHARP (v)) - put_char_code_table (XCHAR (v), - make_int (i + CHARSET_BYTE_OFFSET (cs)), - CHARSET_ENCODING_TABLE(cs)); + put_char_attribute (v, charset, + list1 + (make_int (i + CHARSET_BYTE_OFFSET (cs)))); } break; } @@ -1828,6 +2338,18 @@ Return list of charset and one or two position-codes of CHAR. */ (character)) { +#ifdef UTF2000 + Lisp_Object ret; + Lisp_Object charset; + + CHECK_CHAR_COERCE_INT (character); + ret = SPLIT_CHAR (XCHAR (character)); + charset = Fcar (ret); + if (CHARSETP (charset)) + return Fcons (XCHARSET_NAME (charset), Fcopy_list (Fcdr (ret))); + else + return ret; +#else /* This function can GC */ struct gcpro gcpro1, gcpro2; Lisp_Object charset = Qnil; @@ -1848,8 +2370,8 @@ Return list of charset and one or two position-codes of CHAR. rc = list2 (XCHARSET_NAME (charset), make_int (c1)); } UNGCPRO; - return rc; +#endif } @@ -1953,6 +2475,12 @@ syms_of_mule_charset (void) DEFSUBR (Fset_charset_ccl_program); DEFSUBR (Fset_charset_registry); #ifdef UTF2000 + DEFSUBR (Fchar_attribute_alist); + DEFSUBR (Fget_char_attribute); + DEFSUBR (Fput_char_attribute); + DEFSUBR (Fdefine_char); + DEFSUBR (Fchar_variants); + DEFSUBR (Fget_composite_char); DEFSUBR (Fcharset_mapping_table); DEFSUBR (Fset_charset_mapping_table); #endif @@ -2002,14 +2530,27 @@ syms_of_mule_charset (void) defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1"); defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2"); #ifdef UTF2000 + defsymbol (&Q_ucs, "->ucs"); + defsymbol (&Q_decomposition, "->decomposition"); + defsymbol (&Qcompat, "compat"); + defsymbol (&QnoBreak, "noBreak"); + defsymbol (&Qfraction, "fraction"); + defsymbol (&Qsuper, "super"); + defsymbol (&Qsub, "sub"); + defsymbol (&Qcircle, "circle"); + defsymbol (&Qsquare, "square"); + defsymbol (&Qwide, "wide"); + defsymbol (&Qnarrow, "narrow"); + defsymbol (&Qfont, "font"); + defsymbol (&Qucs, "ucs"); defsymbol (&Qucs_bmp, "ucs-bmp"); defsymbol (&Qlatin_viscii, "latin-viscii"); defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower"); defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper"); defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower"); defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper"); - defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208"); - defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208"); + defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa"); + defsymbol (&Qethiopic_ucs, "ethiopic-ucs"); #endif defsymbol (&Qchinese_big5_1, "chinese-big5-1"); defsymbol (&Qchinese_big5_2, "chinese-big5-2"); @@ -2042,10 +2583,10 @@ vars_of_mule_charset (void) charset_by_attributes[i][j][k] = Qnil; #endif - next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1; #ifdef UTF2000 - next_allocated_2_byte_leading_byte = LEADING_BYTE_CHINESE_BIG5_2 + 1; + next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE; #else + next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1; next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2; #endif @@ -2058,15 +2599,24 @@ Leading-code of private TYPE9N charset of column-width 1. #endif #ifdef UTF2000 - Vutf_2000_version = build_string("0.8 (Kami)"); + Vutf_2000_version = build_string("0.12 (Kashiwara)"); DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /* Version number of UTF-2000. */ ); + staticpro (&Vcharacter_attribute_table); + Vcharacter_attribute_table = make_char_code_table (Qnil); + + staticpro (&Vcharacter_composition_table); + Vcharacter_composition_table = make_char_code_table (Qnil); + + staticpro (&Vcharacter_variant_table); + Vcharacter_variant_table = make_char_code_table (Qnil); + Vdefault_coded_charset_priority_list = Qnil; DEFVAR_LISP ("default-coded-charset-priority-list", &Vdefault_coded_charset_priority_list /* -Default order of preferred coded-character-set. +Default order of preferred coded-character-sets. */ ); #endif } @@ -2327,26 +2877,24 @@ complex_vars_of_mule_charset (void) build_string ("VISCII 1.1 (Vietnamese)"), build_string ("VISCII1\\.1"), Qnil, 0, 0, 0, 0); - Vcharset_hiragana_jisx0208 = - make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208, - CHARSET_TYPE_94X94, 2, 0, 'B', + Vcharset_ideograph_daikanwa = + make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, + CHARSET_TYPE_256X256, 2, 2, 0, CHARSET_LEFT_TO_RIGHT, - build_string ("Hiragana"), - build_string ("Hiragana of JIS X0208"), - build_string ("Japanese Hiragana of JIS X0208"), - build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"), - Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA, - (0x24 - 33) * 94 + (0x21 - 33), 33); - Vcharset_katakana_jisx0208 = - make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208, - CHARSET_TYPE_94X94, 2, 0, 'B', + build_string ("Daikanwa"), + build_string ("Morohashi's Daikanwa"), + build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"), + build_string ("Daikanwa"), + Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0); + Vcharset_ethiopic_ucs = + make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, + CHARSET_TYPE_256X256, 2, 2, 0, CHARSET_LEFT_TO_RIGHT, - build_string ("Katakana"), - build_string ("Katakana of JIS X0208"), - build_string ("Japanese Katakana of JIS X0208"), - build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"), - Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA, - (0x25 - 33) * 94 + (0x21 - 33), 33); + build_string ("Ethiopic (UCS)"), + build_string ("Ethiopic (UCS)"), + build_string ("Ethiopic of UCS"), + build_string ("Ethiopic-Unicode"), + Qnil, 0x1200, 0x137F, 0x1200, 0); #endif Vcharset_chinese_big5_1 = make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,