/* Functions to handle multilingual characters.
Copyright (C) 1992, 1995 Free Software Foundation, Inc.
Copyright (C) 1995 Sun Microsystems, Inc.
- Copyright (C) 1999,2000 MORIOKA Tomohiko
+ Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
This file is part of XEmacs.
the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
-/* Synched up with: FSF 20.3. Not in FSF. */
-
/* Rewritten by Ben Wing <ben@xemacs.org>. */
+/* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
+
#include <config.h>
#ifdef UTF2000
#include <limits.h>
Lisp_Object Vcharset_latin_iso8859_9;
Lisp_Object Vcharset_japanese_jisx0208_1978;
Lisp_Object Vcharset_chinese_gb2312;
+Lisp_Object Vcharset_chinese_gb12345;
Lisp_Object Vcharset_japanese_jisx0208;
Lisp_Object Vcharset_japanese_jisx0208_1990;
Lisp_Object Vcharset_korean_ksc5601;
Lisp_Object Vcharset_ucs;
Lisp_Object Vcharset_ucs_bmp;
Lisp_Object Vcharset_ucs_cns;
+Lisp_Object Vcharset_ucs_big5;
Lisp_Object Vcharset_latin_viscii;
Lisp_Object Vcharset_latin_tcvn5712;
Lisp_Object Vcharset_latin_viscii_lower;
Lisp_Object Qideograph_daikanwa;
Lisp_Object Q_decomposition;
Lisp_Object Qucs;
+Lisp_Object Qto_ucs;
Lisp_Object Q_ucs;
Lisp_Object Qcompat;
Lisp_Object Qisolated;
}
value = seq;
}
- else if (EQ (attribute, Q_ucs))
+ else if (EQ (attribute, Qto_ucs) || EQ (attribute, Q_ucs))
{
Lisp_Object ret;
Emchar c;
put_char_id_table (c, Fcons (character, ret),
Vcharacter_variant_table);
}
+#if 0
+ if (EQ (attribute, Q_ucs))
+ attribute = Qto_ucs;
+#endif
}
{
Lisp_Object table = Fgethash (attribute,
character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
else
character = Fdecode_char (ccs, cell);
- goto setup_attributes;
+ if (!NILP (character))
+ goto setup_attributes;
}
rest = Fcdr (rest);
}
- if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
+ if ( (!NILP (code = Fcdr (Fassq (Qto_ucs, attributes)))) ||
+ (!NILP (code = Fcdr (Fassq (Q_ucs, attributes)))) )
+
{
if (!INTP (code))
signal_simple_error ("Invalid argument", attributes);
Qlatin_iso8859_9,
Qjapanese_jisx0208_1978,
Qchinese_gb2312,
+ Qchinese_gb12345,
Qjapanese_jisx0208,
Qjapanese_jisx0208_1990,
Qkorean_ksc5601,
#ifdef UTF2000
Qucs_bmp,
Qucs_cns,
+ Qucs_big5,
Qlatin_viscii,
Qlatin_tcvn5712,
Qlatin_viscii_lower,
#define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
Emchar
-make_builtin_char (Lisp_Object charset, int c1, int c2)
+decode_builtin_char (Lisp_Object charset, int code_point)
{
- if (XCHARSET_UCS_MAX (charset))
- {
- Emchar code
- = (XCHARSET_DIMENSION (charset) == 1
- ?
- c1 - XCHARSET_BYTE_OFFSET (charset)
- :
- (c1 - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset)
- + c2 - XCHARSET_BYTE_OFFSET (charset))
- - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
- if ((code < XCHARSET_UCS_MIN (charset))
- || (XCHARSET_UCS_MAX (charset) < code))
- signal_simple_error ("Arguments makes invalid character",
- make_char (code));
- return code;
- }
- else if (XCHARSET_DIMENSION (charset) == 1)
- {
- switch (XCHARSET_CHARS (charset))
- {
- case 94:
- return MIN_CHAR_94
- + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33);
- case 96:
- return MIN_CHAR_96
- + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32);
- default:
- abort ();
- }
- }
- else
+ int final;
+
+ if (EQ (charset, Vcharset_chinese_big5))
{
- if (EQ (charset, Vcharset_chinese_big5))
+ int c1 = code_point >> 8;
+ int c2 = code_point & 0xFF;
+ unsigned int I;
+
+ if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
+ &&
+ ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
+ ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
{
- int B1 = c1, B2 = c2;
- unsigned int I
- = (B1 - 0xA1) * BIG5_SAME_ROW
- + B2 - (B2 < 0x7F ? 0x40 : 0x62);
+ I = (c1 - 0xA1) * BIG5_SAME_ROW
+ + c2 - (c2 < 0x7F ? 0x40 : 0x62);
- if (B1 < 0xC9)
+ if (c1 < 0xC9)
{
charset = Vcharset_chinese_big5_1;
}
charset = Vcharset_chinese_big5_2;
I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
}
- c1 = I / 94 + 33;
- c2 = I % 94 + 33;
+ code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
+ }
+ }
+ if ((final = XCHARSET_FINAL (charset)) >= '0')
+ {
+ if (XCHARSET_DIMENSION (charset) == 1)
+ {
+ switch (XCHARSET_CHARS (charset))
+ {
+ case 94:
+ return MIN_CHAR_94
+ + (final - '0') * 94 + ((code_point & 0x7F) - 33);
+ case 96:
+ return MIN_CHAR_96
+ + (final - '0') * 96 + ((code_point & 0x7F) - 32);
+ default:
+ abort ();
+ return -1;
+ }
}
- switch (XCHARSET_CHARS (charset))
+ else
{
- case 94:
- return MIN_CHAR_94x94
- + (XCHARSET_FINAL (charset) - '0') * 94 * 94
- + (c1 - 33) * 94 + (c2 - 33);
- case 96:
- return MIN_CHAR_96x96
- + (XCHARSET_FINAL (charset) - '0') * 96 * 96
- + (c1 - 32) * 96 + (c2 - 32);
- default:
- abort ();
+ switch (XCHARSET_CHARS (charset))
+ {
+ case 94:
+ return MIN_CHAR_94x94
+ + (final - '0') * 94 * 94
+ + (((code_point >> 8) & 0x7F) - 33) * 94
+ + ((code_point & 0x7F) - 33);
+ case 96:
+ return MIN_CHAR_96x96
+ + (final - '0') * 96 * 96
+ + (((code_point >> 8) & 0x7F) - 32) * 96
+ + ((code_point & 0x7F) - 32);
+ default:
+ abort ();
+ return -1;
+ }
}
}
+ else if (XCHARSET_UCS_MAX (charset))
+ {
+ Emchar cid
+ = (XCHARSET_DIMENSION (charset) == 1
+ ?
+ code_point - XCHARSET_BYTE_OFFSET (charset)
+ :
+ ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
+ * XCHARSET_CHARS (charset)
+ + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
+ - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
+ if ((cid < XCHARSET_UCS_MIN (charset))
+ || (XCHARSET_UCS_MAX (charset) < cid))
+ return -1;
+ return cid;
+ }
+ else
+ return -1;
}
int
c = XINT (code);
if (XCHARSET_GRAPHIC (charset) == 1)
c &= 0x7F7F7F7F;
- return make_char (DECODE_CHAR (charset, c));
+ c = DECODE_CHAR (charset, c);
+ return c >= 0 ? make_char (c) : Qnil;
}
DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
(charset, code))
{
int c;
- int final;
charset = Fget_charset (charset);
CHECK_INT (code);
- c = XINT (code);
-
- if ((final = XCHARSET_FINAL (charset)) >= '0')
+ if (EQ (charset, Vcharset_latin_viscii))
{
- if (XCHARSET_DIMENSION (charset) == 1)
+ Lisp_Object chr = Fdecode_char (charset, code);
+ Lisp_Object ret;
+
+ if (!NILP (chr))
{
- switch (XCHARSET_CHARS (charset))
+ if (!NILP
+ (ret = Fget_char_attribute (chr,
+ Vcharset_latin_viscii_lower,
+ Qnil)))
{
- case 94:
- return
- make_char (MIN_CHAR_94 + (final - '0') * 94
- + ((c & 0x7F) - 33));
- case 96:
- return
- make_char (MIN_CHAR_96 + (final - '0') * 96
- + ((c & 0x7F) - 32));
- default:
- return Fdecode_char (charset, code);
+ charset = Vcharset_latin_viscii_lower;
+ code = ret;
}
- }
- else
- {
- switch (XCHARSET_CHARS (charset))
+ else if (!NILP
+ (ret = Fget_char_attribute (chr,
+ Vcharset_latin_viscii_upper,
+ Qnil)))
{
- case 94:
- return
- make_char (MIN_CHAR_94x94
- + (final - '0') * 94 * 94
- + (((c >> 8) & 0x7F) - 33) * 94
- + ((c & 0x7F) - 33));
- case 96:
- return
- make_char (MIN_CHAR_96x96
- + (final - '0') * 96 * 96
- + (((c >> 8) & 0x7F) - 32) * 96
- + ((c & 0x7F) - 32));
- default:
- return Fdecode_char (charset, code);
+ charset = Vcharset_latin_viscii_upper;
+ code = ret;
}
}
}
- else if (XCHARSET_UCS_MAX (charset))
- {
- Emchar cid
- = (XCHARSET_DIMENSION (charset) == 1
- ?
- c - XCHARSET_BYTE_OFFSET (charset)
- :
- ((c >> 8) - XCHARSET_BYTE_OFFSET (charset))
- * XCHARSET_CHARS (charset)
- + (c & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
- - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
- if ((cid < XCHARSET_UCS_MIN (charset))
- || (XCHARSET_UCS_MAX (charset) < cid))
- return Fdecode_char (charset, code);
- return make_char (cid);
- }
- else
- return Fdecode_char (charset, code);
+ c = XINT (code);
+#if 0
+ if (XCHARSET_GRAPHIC (charset) == 1)
+ c &= 0x7F7F7F7F;
+#endif
+ c = decode_builtin_char (charset, c);
+ return c >= 0 ? make_char (c) : Fdecode_char (charset, code);
}
#endif
defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
defsymbol (&Qchinese_gb2312, "chinese-gb2312");
+ defsymbol (&Qchinese_gb12345, "chinese-gb12345");
defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
#ifdef UTF2000
+ defsymbol (&Qto_ucs, "=>ucs");
defsymbol (&Q_ucs, "->ucs");
defsymbol (&Q_decomposition, "->decomposition");
defsymbol (&Qcompat, "compat");
defsymbol (&Qucs, "ucs");
defsymbol (&Qucs_bmp, "ucs-bmp");
defsymbol (&Qucs_cns, "ucs-cns");
+ defsymbol (&Qucs_big5, "ucs-big5");
defsymbol (&Qlatin_viscii, "latin-viscii");
defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
Qnil, 0, 0xFFFF, 0, 0);
staticpro (&Vcharset_ucs_cns);
Vcharset_ucs_cns =
- make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 4,
+ make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
1, 2, 0, CHARSET_LEFT_TO_RIGHT,
build_string ("UCS for CNS"),
build_string ("UCS for CNS 11643"),
build_string ("ISO/IEC 10646 for CNS 11643"),
build_string (""),
- Qnil, 0, 0xFFFFFFF, 0, 0);
+ Qnil, 0, 0, 0, 0);
+ staticpro (&Vcharset_ucs_big5);
+ Vcharset_ucs_big5 =
+ make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
+ 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
+ build_string ("UCS for Big5"),
+ build_string ("UCS for Big5"),
+ build_string ("ISO/IEC 10646 for Big5"),
+ build_string (""),
+ Qnil, 0, 0, 0, 0);
#else
# define MIN_CHAR_THAI 0
# define MAX_CHAR_THAI 0
build_string ("GB2312 Chinese simplified"),
build_string ("gb2312"),
Qnil, 0, 0, 0, 33);
+ staticpro (&Vcharset_chinese_gb12345);
+ Vcharset_chinese_gb12345 =
+ make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
+ 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
+ build_string ("G1"),
+ build_string ("GB 12345)"),
+ build_string ("GB 12345-1990"),
+ build_string ("GB12345\\(\\.1990\\)?-0"),
+ Qnil, 0, 0, 0, 33);
staticpro (&Vcharset_japanese_jisx0208);
Vcharset_japanese_jisx0208 =
make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
build_string ("TCVN 5712"),
build_string ("TCVN 5712 (VSCII-2)"),
build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
- build_string ("tcvn5712-1"),
+ build_string ("tcvn5712\\(\\.1993\\)?-1"),
Qnil, 0, 0, 0, 32);
staticpro (&Vcharset_latin_viscii_lower);
Vcharset_latin_viscii_lower =