X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;ds=inline;f=src%2Fmule-charset.c;h=285fa6990057a4e20524d4061ac06d707881d63d;hb=716cfba952c1dc0d2cf5c968971f3780ba728a89;hp=67f10498a2db71a20740c2517c2ed4067b6fd9b3;hpb=113b194be934327de99a168d809271db252c07c4;p=chise%2Fxemacs-chise.git.1 diff --git a/src/mule-charset.c b/src/mule-charset.c index 67f1049..285fa69 100644 --- a/src/mule-charset.c +++ b/src/mule-charset.c @@ -32,6 +32,7 @@ Boston, MA 02111-1307, USA. */ #include "lstream.h" #include "device.h" #include "faces.h" +#include "mule-ccl.h" /* The various pre-defined charsets. */ @@ -73,11 +74,17 @@ static int composite_char_col_next; #endif /* ENABLE_COMPOSITE_CHARS */ -/* Table of charsets indexed by leading byte. */ -Lisp_Object charset_by_leading_byte[128]; +struct charset_lookup *chlook; -/* Table of charsets indexed by type/final-byte/direction. */ -Lisp_Object charset_by_attributes[4][128][2]; +static const struct lrecord_description charset_lookup_description_1[] = { + { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte), 128+4*128*2 }, + { XD_END } +}; + +static const struct struct_description charset_lookup_description = { + sizeof (struct charset_lookup), + charset_lookup_description_1 +}; /* Table of number of bytes in the string representation of a character indexed by the first byte of that representation. @@ -85,9 +92,9 @@ Lisp_Object charset_by_attributes[4][128][2]; rep_bytes_by_first_byte(c) is more efficient than the equivalent canonical computation: - (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */ + XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */ -Bytecount rep_bytes_by_first_byte[0xA0] = +const Bytecount rep_bytes_by_first_byte[0xA0] = { /* 0x00 - 0x7f are for straight ASCII */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -111,7 +118,6 @@ Lisp_Object Qcharsetp; Lisp_Object Qregistry, Qfinal, Qgraphic; Lisp_Object Qdirection; Lisp_Object Qreverse_direction_charset; -Lisp_Object Qccl_program; Lisp_Object Qleading_byte; Lisp_Object Qshort_name, Qlong_name; @@ -261,7 +267,8 @@ non_ascii_valid_char_p (Emchar ch) if (f3 < 0x20) return 0; - if (f3 != 0x20 && f3 != 0x7F) + if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE && + f2 <= MAX_CHAR_FIELD2_PRIVATE)) return 1; /* @@ -270,6 +277,8 @@ non_ascii_valid_char_p (Emchar ch) FIELD2_TO_PRIVATE_LEADING_BYTE are the same. */ charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE); + if (EQ (charset, Qnil)) + return 0; return (XCHARSET_CHARS (charset) == 96); } else @@ -294,7 +303,8 @@ non_ascii_valid_char_p (Emchar ch) } #endif /* ENABLE_COMPOSITE_CHARS */ - if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F) + if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F + && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE)) return 1; if (f1 <= MAX_CHAR_FIELD1_OFFICIAL) @@ -304,6 +314,8 @@ non_ascii_valid_char_p (Emchar ch) charset = CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE); + if (EQ (charset, Qnil)) + return 0; return (XCHARSET_CHARS (charset) == 96); } } @@ -395,22 +407,22 @@ Lstream_funget_emchar (Lstream *stream, Emchar ch) /************************************************************************/ static Lisp_Object -mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object)) +mark_charset (Lisp_Object obj) { - struct Lisp_Charset *cs = XCHARSET (obj); + Lisp_Charset *cs = XCHARSET (obj); - markobj (cs->short_name); - markobj (cs->long_name); - markobj (cs->doc_string); - markobj (cs->registry); - markobj (cs->ccl_program); + mark_object (cs->short_name); + mark_object (cs->long_name); + mark_object (cs->doc_string); + mark_object (cs->registry); + mark_object (cs->ccl_program); return cs->name; } static void print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag) { - struct Lisp_Charset *cs = XCHARSET (obj); + Lisp_Charset *cs = XCHARSET (obj); char buf[200]; if (print_readably) @@ -441,9 +453,20 @@ print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag) write_c_string (buf, printcharfun); } +static const struct lrecord_description charset_description[] = { + { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) }, + { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) }, + { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) }, + { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) }, + { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) }, + { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) }, + { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) }, + { XD_END } +}; + DEFINE_LRECORD_IMPLEMENTATION ("charset", charset, - mark_charset, print_charset, 0, 0, 0, - struct Lisp_Charset); + mark_charset, print_charset, 0, 0, 0, charset_description, + Lisp_Charset); /* Make a new charset. */ static Lisp_Object @@ -454,8 +477,10 @@ make_charset (int id, Lisp_Object name, unsigned char rep_bytes, Lisp_Object reg) { Lisp_Object obj; - struct Lisp_Charset *cs = - alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset); + Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset); + + zero_lcrecord (cs); + XSETCHARSET (obj, cs); CHARSET_ID (cs) = id; @@ -483,15 +508,12 @@ make_charset (int id, Lisp_Object name, unsigned char rep_bytes, /* some charsets do not have final characters. This includes ASCII, Control-1, Composite, and the two faux private charsets. */ - assert (NILP (charset_by_attributes[type][final][direction])); - charset_by_attributes[type][final][direction] = obj; + assert (NILP (chlook->charset_by_attributes[type][final][direction])); + chlook->charset_by_attributes[type][final][direction] = obj; } - assert (NILP (charset_by_leading_byte[id - 128])); - charset_by_leading_byte[id - 128] = obj; - if (id < 0xA0) - /* official leading byte */ - rep_bytes_by_first_byte[id] = rep_bytes; + assert (NILP (chlook->charset_by_leading_byte[id - 128])); + chlook->charset_by_leading_byte[id - 128] = obj; /* Some charsets are "faux" and don't have names or really exist at all except in the leading-byte table. */ @@ -814,7 +836,7 @@ NEW-NAME is the name of the new charset. Return the new charset. int id, dimension, columns, graphic, final; int direction, type; Lisp_Object registry, doc_string, short_name, long_name; - struct Lisp_Charset *cs; + Lisp_Charset *cs; charset = Fget_charset (charset); if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset))) @@ -963,7 +985,7 @@ Recognized properties are those listed in `make-charset', as well as */ (charset, prop)) { - struct Lisp_Charset *cs; + Lisp_Charset *cs; charset = Fget_charset (charset); cs = XCHARSET (charset); @@ -1050,11 +1072,14 @@ Set the 'registry property of CHARSET to REGISTRY. /************************************************************************/ DEFUN ("make-char", Fmake_char, 2, 3, 0, /* -Make a multi-byte character from CHARSET and octets ARG1 and ARG2. +Make a character from CHARSET and octets ARG1 and ARG2. +ARG2 is required only for characters from two-dimensional charsets. +For example, (make-char 'latin-iso8859-2 185) will return the Latin 2 +character s with caron. */ (charset, arg1, arg2)) { - struct Lisp_Charset *cs; + Lisp_Charset *cs; int a1, a2; int lowlim, highlim; @@ -1109,20 +1134,47 @@ N defaults to 0 if omitted. (ch, n)) { Lisp_Object charset; - int c1, c2, int_n; + int octet0, octet1; CHECK_CHAR_COERCE_INT (ch); - if (NILP (n)) - int_n = 0; + + BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1); + + if (NILP (n) || EQ (n, Qzero)) + return make_int (octet0); + else if (EQ (n, make_int (1))) + return make_int (octet1); + else + signal_simple_error ("Octet number must be 0 or 1", n); +} + +DEFUN ("split-char", Fsplit_char, 1, 1, 0, /* +Return list of charset and one or two position-codes of CHAR. +*/ + (character)) +{ + /* This function can GC */ + struct gcpro gcpro1, gcpro2; + Lisp_Object charset = Qnil; + Lisp_Object rc = Qnil; + int c1, c2; + + GCPRO2 (charset, rc); + CHECK_CHAR_COERCE_INT (character); + + BREAKUP_CHAR (XCHAR (character), charset, c1, c2); + + if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2) + { + rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2)); + } else { - CHECK_INT (n); - int_n = XINT (n); - if (int_n != 0 && int_n != 1) - signal_simple_error ("Octet number must be 0 or 1", n); + rc = list2 (XCHARSET_NAME (charset), make_int (c1)); } - BREAKUP_CHAR (XCHAR (ch), charset, c1, c2); - return make_int (int_n == 0 ? c1 : c2); + UNGCPRO; + + return rc; } @@ -1228,6 +1280,7 @@ syms_of_mule_charset (void) DEFSUBR (Fmake_char); DEFSUBR (Fchar_charset); DEFSUBR (Fchar_octet); + DEFSUBR (Fsplit_char); #ifdef ENABLE_COMPOSITE_CHARS DEFSUBR (Fmake_composite_char); @@ -1240,7 +1293,6 @@ syms_of_mule_charset (void) defsymbol (&Qgraphic, "graphic"); defsymbol (&Qdirection, "direction"); defsymbol (&Qreverse_direction_charset, "reverse-direction-charset"); - defsymbol (&Qccl_program, "ccl-program"); defsymbol (&Qshort_name, "short-name"); defsymbol (&Qlong_name, "long-name"); @@ -1281,15 +1333,18 @@ vars_of_mule_charset (void) { int i, j, k; + chlook = xnew (struct charset_lookup); + dumpstruct (&chlook, &charset_lookup_description); + /* Table of charsets indexed by leading byte. */ - for (i = 0; i < countof (charset_by_leading_byte); i++) - charset_by_leading_byte[i] = Qnil; + for (i = 0; i < countof (chlook->charset_by_leading_byte); i++) + chlook->charset_by_leading_byte[i] = Qnil; /* Table of charsets indexed by type/final-byte/direction. */ - for (i = 0; i < countof (charset_by_attributes); i++) - for (j = 0; j < countof (charset_by_attributes[0]); j++) - for (k = 0; k < countof (charset_by_attributes[0][0]); k++) - charset_by_attributes[i][j][k] = Qnil; + for (i = 0; i < countof (chlook->charset_by_attributes); i++) + for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++) + for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++) + chlook->charset_by_attributes[i][j][k] = Qnil; next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1; next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2; @@ -1305,6 +1360,7 @@ complex_vars_of_mule_charset (void) /* Predefined character sets. We store them into variables for ease of access. */ + staticpro (&Vcharset_ascii); Vcharset_ascii = make_charset (LEADING_BYTE_ASCII, Qascii, 1, CHARSET_TYPE_94, 1, 0, 'B', @@ -1313,6 +1369,7 @@ complex_vars_of_mule_charset (void) build_string ("ASCII)"), build_string ("ASCII (ISO646 IRV)"), build_string ("\\(iso8859-[0-9]*\\|-ascii\\)")); + staticpro (&Vcharset_control_1); Vcharset_control_1 = make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2, CHARSET_TYPE_94, 1, 1, 0, @@ -1321,6 +1378,7 @@ complex_vars_of_mule_charset (void) build_string ("Control characters"), build_string ("Control characters 128-191"), build_string ("")); + staticpro (&Vcharset_latin_iso8859_1); Vcharset_latin_iso8859_1 = make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2, CHARSET_TYPE_96, 1, 1, 'A', @@ -1329,6 +1387,7 @@ complex_vars_of_mule_charset (void) build_string ("ISO8859-1 (Latin-1)"), build_string ("ISO8859-1 (Latin-1)"), build_string ("iso8859-1")); + staticpro (&Vcharset_latin_iso8859_2); Vcharset_latin_iso8859_2 = make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2, CHARSET_TYPE_96, 1, 1, 'B', @@ -1337,6 +1396,7 @@ complex_vars_of_mule_charset (void) build_string ("ISO8859-2 (Latin-2)"), build_string ("ISO8859-2 (Latin-2)"), build_string ("iso8859-2")); + staticpro (&Vcharset_latin_iso8859_3); Vcharset_latin_iso8859_3 = make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2, CHARSET_TYPE_96, 1, 1, 'C', @@ -1345,6 +1405,7 @@ complex_vars_of_mule_charset (void) build_string ("ISO8859-3 (Latin-3)"), build_string ("ISO8859-3 (Latin-3)"), build_string ("iso8859-3")); + staticpro (&Vcharset_latin_iso8859_4); Vcharset_latin_iso8859_4 = make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2, CHARSET_TYPE_96, 1, 1, 'D', @@ -1353,6 +1414,7 @@ complex_vars_of_mule_charset (void) build_string ("ISO8859-4 (Latin-4)"), build_string ("ISO8859-4 (Latin-4)"), build_string ("iso8859-4")); + staticpro (&Vcharset_thai_tis620); Vcharset_thai_tis620 = make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2, CHARSET_TYPE_96, 1, 1, 'T', @@ -1361,6 +1423,7 @@ complex_vars_of_mule_charset (void) build_string ("TIS620 (Thai)"), build_string ("TIS620.2529 (Thai)"), build_string ("tis620")); + staticpro (&Vcharset_greek_iso8859_7); Vcharset_greek_iso8859_7 = make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2, CHARSET_TYPE_96, 1, 1, 'F', @@ -1369,6 +1432,7 @@ complex_vars_of_mule_charset (void) build_string ("ISO8859-7 (Greek)"), build_string ("ISO8859-7 (Greek)"), build_string ("iso8859-7")); + staticpro (&Vcharset_arabic_iso8859_6); Vcharset_arabic_iso8859_6 = make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2, CHARSET_TYPE_96, 1, 1, 'G', @@ -1377,6 +1441,7 @@ complex_vars_of_mule_charset (void) build_string ("ISO8859-6 (Arabic)"), build_string ("ISO8859-6 (Arabic)"), build_string ("iso8859-6")); + staticpro (&Vcharset_hebrew_iso8859_8); Vcharset_hebrew_iso8859_8 = make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2, CHARSET_TYPE_96, 1, 1, 'H', @@ -1385,6 +1450,7 @@ complex_vars_of_mule_charset (void) build_string ("ISO8859-8 (Hebrew)"), build_string ("ISO8859-8 (Hebrew)"), build_string ("iso8859-8")); + staticpro (&Vcharset_katakana_jisx0201); Vcharset_katakana_jisx0201 = make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2, CHARSET_TYPE_94, 1, 1, 'I', @@ -1393,6 +1459,7 @@ complex_vars_of_mule_charset (void) build_string ("JISX0201.1976 (Japanese Kana)"), build_string ("JISX0201.1976 Japanese Kana"), build_string ("jisx0201.1976")); + staticpro (&Vcharset_latin_jisx0201); Vcharset_latin_jisx0201 = make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2, CHARSET_TYPE_94, 1, 0, 'J', @@ -1401,6 +1468,7 @@ complex_vars_of_mule_charset (void) build_string ("JISX0201.1976 (Japanese Roman)"), build_string ("JISX0201.1976 Japanese Roman"), build_string ("jisx0201.1976")); + staticpro (&Vcharset_cyrillic_iso8859_5); Vcharset_cyrillic_iso8859_5 = make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2, CHARSET_TYPE_96, 1, 1, 'L', @@ -1409,6 +1477,7 @@ complex_vars_of_mule_charset (void) build_string ("ISO8859-5 (Cyrillic)"), build_string ("ISO8859-5 (Cyrillic)"), build_string ("iso8859-5")); + staticpro (&Vcharset_latin_iso8859_9); Vcharset_latin_iso8859_9 = make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2, CHARSET_TYPE_96, 1, 1, 'M', @@ -1417,6 +1486,7 @@ complex_vars_of_mule_charset (void) build_string ("ISO8859-9 (Latin-5)"), build_string ("ISO8859-9 (Latin-5)"), build_string ("iso8859-9")); + staticpro (&Vcharset_japanese_jisx0208_1978); Vcharset_japanese_jisx0208_1978 = make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3, CHARSET_TYPE_94X94, 2, 0, '@', @@ -1426,6 +1496,7 @@ complex_vars_of_mule_charset (void) build_string ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"), build_string ("\\(jisx0208\\|jisc6226\\)\\.1978")); + staticpro (&Vcharset_chinese_gb2312); Vcharset_chinese_gb2312 = make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3, CHARSET_TYPE_94X94, 2, 0, 'A', @@ -1434,6 +1505,7 @@ complex_vars_of_mule_charset (void) build_string ("GB2312)"), build_string ("GB2312 Chinese simplified"), build_string ("gb2312")); + staticpro (&Vcharset_japanese_jisx0208); Vcharset_japanese_jisx0208 = make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3, CHARSET_TYPE_94X94, 2, 0, 'B', @@ -1442,6 +1514,7 @@ complex_vars_of_mule_charset (void) build_string ("JISX0208.1983/1990 (Japanese)"), build_string ("JISX0208.1983/1990 Japanese Kanji"), build_string ("jisx0208.19\\(83\\|90\\)")); + staticpro (&Vcharset_korean_ksc5601); Vcharset_korean_ksc5601 = make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3, CHARSET_TYPE_94X94, 2, 0, 'C', @@ -1450,6 +1523,7 @@ complex_vars_of_mule_charset (void) build_string ("KSC5601 (Korean"), build_string ("KSC5601 Korean Hangul and Hanja"), build_string ("ksc5601")); + staticpro (&Vcharset_japanese_jisx0212); Vcharset_japanese_jisx0212 = make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3, CHARSET_TYPE_94X94, 2, 0, 'D', @@ -1460,6 +1534,7 @@ complex_vars_of_mule_charset (void) build_string ("jisx0212")); #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$" + staticpro (&Vcharset_chinese_cns11643_1); Vcharset_chinese_cns11643_1 = make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3, CHARSET_TYPE_94X94, 2, 0, 'G', @@ -1469,6 +1544,7 @@ complex_vars_of_mule_charset (void) build_string ("CNS 11643 Plane 1 Chinese traditional"), build_string (CHINESE_CNS_PLANE_RE("1"))); + staticpro (&Vcharset_chinese_cns11643_2); Vcharset_chinese_cns11643_2 = make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3, CHARSET_TYPE_94X94, 2, 0, 'H', @@ -1478,6 +1554,7 @@ complex_vars_of_mule_charset (void) build_string ("CNS 11643 Plane 2 Chinese traditional"), build_string (CHINESE_CNS_PLANE_RE("2"))); + staticpro (&Vcharset_chinese_big5_1); Vcharset_chinese_big5_1 = make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3, CHARSET_TYPE_94X94, 2, 0, '0', @@ -1487,6 +1564,7 @@ complex_vars_of_mule_charset (void) build_string ("Big5 Level-1 Chinese traditional"), build_string ("big5")); + staticpro (&Vcharset_chinese_big5_2); Vcharset_chinese_big5_2 = make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3, CHARSET_TYPE_94X94, 2, 0, '1', @@ -1502,6 +1580,7 @@ complex_vars_of_mule_charset (void) /* #### For simplicity, we put composite chars into a 96x96 charset. This is going to lead to problems because you can run out of room, esp. as we don't yet recycle numbers. */ + staticpro (&Vcharset_composite); Vcharset_composite = make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3, CHARSET_TYPE_96X96, 2, 0, 0, @@ -1511,6 +1590,7 @@ complex_vars_of_mule_charset (void) build_string ("Composite characters"), build_string ("")); + /* #### not dumped properly */ composite_char_row_next = 32; composite_char_col_next = 32;