/* Functions to handle multilingual characters.
Copyright (C) 1992, 1995 Free Software Foundation, Inc.
Copyright (C) 1995 Sun Microsystems, Inc.
- Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
+ Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko
This file is part of XEmacs.
Lisp_Object Vcharset_latin_viscii_lower;
Lisp_Object Vcharset_latin_viscii_upper;
Lisp_Object Vcharset_chinese_big5;
-Lisp_Object Vcharset_chinese_big5_cdp;
+/* Lisp_Object Vcharset_chinese_big5_cdp; */
Lisp_Object Vcharset_ideograph_hanziku_1;
Lisp_Object Vcharset_ideograph_hanziku_2;
Lisp_Object Vcharset_ideograph_hanziku_3;
#ifdef UTF2000
-INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
-INLINE_HEADER int
-CHARSET_BYTE_SIZE (Lisp_Charset* cs)
-{
- /* ad-hoc method for `ascii' */
- if ((CHARSET_CHARS (cs) == 94) &&
- (CHARSET_BYTE_OFFSET (cs) != 33))
- return 128 - CHARSET_BYTE_OFFSET (cs);
- else
- return CHARSET_CHARS (cs);
-}
-
-#define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
-
int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
int
decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
return 0;
}
-INLINE_HEADER void
-decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
- int code_point);
-INLINE_HEADER void
-decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
- int code_point)
-{
- int i = -1;
-
- while (dim > 0)
- {
- Lisp_Object nv;
-
- dim--;
- i = ((code_point >> (8 * dim)) & 255) - byte_offset;
- nv = XVECTOR_DATA(v)[i];
- if (!VECTORP (nv))
- break;
- v = nv;
- }
- if (i >= 0)
- XVECTOR_DATA(v)[i] = Qnil;
-}
-
-INLINE_HEADER void
-decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
- int code_point, Lisp_Object character);
-INLINE_HEADER void
-decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
- int code_point, Lisp_Object character)
-{
- int i = -1;
- Lisp_Object nv;
- int ccs_len = XVECTOR_LENGTH (v);
-
- while (dim > 0)
- {
- dim--;
- i = ((code_point >> (8 * dim)) & 255) - byte_offset;
- nv = XVECTOR_DATA(v)[i];
- if (dim > 0)
- {
- if (!VECTORP (nv))
- nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
- v = nv;
- }
- else
- break;
- }
- XVECTOR_DATA(v)[i] = character;
-}
-
Lisp_Object
put_char_ccs_code_point (Lisp_Object character,
Lisp_Object ccs, Lisp_Object value)
|| (XCHAR (character) != XINT (value)))
{
Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
- int dim = XCHARSET_DIMENSION (ccs);
- int ccs_len = XCHARSET_BYTE_SIZE (ccs);
- int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
int code_point;
if (CONSP (value))
Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
if (INTP (cpos))
{
- decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
+ decoding_table_remove_char (ccs, XINT (cpos));
}
}
- else
- {
- XCHARSET_DECODING_TABLE (ccs)
- = v = make_vector (ccs_len, Qnil);
- }
-
- decoding_table_put_char (v, dim, byte_offset, code_point, character);
+ decoding_table_put_char (ccs, code_point, character);
}
return value;
}
if (!NILP (cpos))
{
- decoding_table_remove_char (decoding_table,
- XCHARSET_DIMENSION (ccs),
- XCHARSET_BYTE_OFFSET (ccs),
- XINT (cpos));
+ decoding_table_remove_char (ccs, XINT (cpos));
}
}
if (CHAR_TABLEP (encoding_table))
{
- put_char_id_table (XCHAR_TABLE(encoding_table), character, Qnil);
+ put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
}
return Qt;
}
Qvietnamese_viscii_lower,
Qvietnamese_viscii_upper,
Qchinese_big5,
- Qchinese_big5_cdp,
+ /* Qchinese_big5_cdp, */
Qideograph_hanziku_1,
Qideograph_hanziku_2,
Qideograph_hanziku_3,
CHARSET_CCL_PROGRAM (cs) = Qnil;
CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
#ifdef UTF2000
- CHARSET_DECODING_TABLE(cs) = Qnil;
+ CHARSET_DECODING_TABLE(cs) = Qunbound;
CHARSET_MIN_CODE (cs) = min_code;
CHARSET_MAX_CODE (cs) = max_code;
CHARSET_CODE_OFFSET (cs) = code_offset;
#define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
Emchar
+decode_defined_char (Lisp_Object ccs, int code_point)
+{
+ int dim = XCHARSET_DIMENSION (ccs);
+ Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
+ Emchar char_id = -1;
+ Lisp_Object mother;
+
+ while (dim > 0)
+ {
+ dim--;
+ decoding_table
+ = get_ccs_octet_table (decoding_table, ccs,
+ (code_point >> (dim * 8)) & 255);
+ }
+ if (CHARP (decoding_table))
+ return XCHAR (decoding_table);
+ if (char_id >= 0)
+ return char_id;
+ else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
+ {
+ if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
+ {
+ if ( EQ (mother, Vcharset_ucs) )
+ return DECODE_CHAR (mother, code_point);
+ else
+ return decode_defined_char (mother, code_point);
+ }
+ }
+ return -1;
+}
+
+Emchar
decode_builtin_char (Lisp_Object charset, int code_point)
{
Lisp_Object mother = XCHARSET_MOTHER (charset);
int final;
- if ( CHARSETP (mother) )
+ if ( CHARSETP (mother) && (XCHARSET_MAX_CODE (charset) > 0) )
{
int code = code_point;
+ (row - (18 + 32)) * 94
+ cell - 33;
}
- return DECODE_CHAR (mother, code + XCHARSET_CODE_OFFSET(charset));
+ return
+ decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
}
+#if 0
else if (EQ (charset, Vcharset_chinese_big5))
{
int c1 = code_point >> 8;
code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
}
}
+#endif
if ((final = XCHARSET_FINAL (charset)) >= '0')
{
if (XCHARSET_DIMENSION (charset) == 1)
code = charset_code_point (mother, ch);
else
code = ch;
- if ( (min <= code) && (code <= max) )
+ if ( ((max == 0) && CHARSETP (mother)) ||
+ ((min <= code) && (code <= max)) )
{
int d = code - XCHARSET_CODE_OFFSET (charset);
this character set.
'dimension Number of octets used to index a character in this charset.
Either 1 or 2. Defaults to 1.
+ If UTF-2000 feature is enabled, 3 or 4 are also available.
'columns Number of columns used to display a character in this charset.
Only used in TTY mode. (Under X, the actual width of a
character can be derived from the font used to display the
'chars Number of characters in each dimension (94 or 96).
Defaults to 94. Note that if the dimension is 2, the
character set thus described is 94x94 or 96x96.
+ If UTF-2000 feature is enabled, 128 or 256 are also available.
'final Final byte of ISO 2022 escape sequence. Must be
supplied. Each combination of (DIMENSION, CHARS) defines a
separate namespace for final bytes. Note that ISO
is passed the octets of the character, with the high
bit cleared and set depending upon whether the value
of the 'graphic property is 0 or 1.
+'mother [UTF-2000 only] Base coded-charset.
+'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
+'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
+'code-offset [UTF-2000 only] Offset for a code-point of a base
+ coded-charset.
+'conversion [UTF-2000 only] Conversion for a code-point of a base
+ coded-charset (94x60 or 94x94x60).
*/
(name, doc_string, props))
{
if (NILP (defined_only))
c = DECODE_CHAR (charset, c);
else
- c = DECODE_DEFINED_CHAR (charset, c);
+ c = decode_defined_char (charset, c);
return c >= 0 ? make_char (c) : Qnil;
}
defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
defsymbol (&Qchinese_big5, "chinese-big5");
- defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp");
+ /* defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp"); */
defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
build_string ("UCS"),
build_string ("ISO/IEC 10646"),
build_string (""),
- Qnil, 0, 0xFFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
+ Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
staticpro (&Vcharset_ucs_bmp);
Vcharset_ucs_bmp =
make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
build_string ("UCS for CNS 11643"),
build_string ("ISO/IEC 10646 for CNS 11643"),
build_string (""),
- Qnil, 0, 0, 0, 0,
- Qnil, CONVERSION_IDENTICAL);
+ Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
staticpro (&Vcharset_ucs_jis);
Vcharset_ucs_jis =
make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2, 2, 0, CHARSET_LEFT_TO_RIGHT,
build_string ("UCS for JIS"),
build_string ("UCS for JIS X 0208, 0212 and 0213"),
- build_string ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
+ build_string
+ ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
build_string (""),
- Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
+ Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
staticpro (&Vcharset_ucs_ks);
Vcharset_ucs_ks =
make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
build_string ("UCS for CCS defined by KS"),
build_string ("ISO/IEC 10646 for Korean Standards"),
build_string (""),
- Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
+ Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
staticpro (&Vcharset_ucs_big5);
Vcharset_ucs_big5 =
make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
build_string ("UCS for Big5"),
build_string ("ISO/IEC 10646 for Big5"),
build_string (""),
- Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
+ Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
#else
# define MIN_CHAR_THAI 0
# define MAX_CHAR_THAI 0
build_string ("Big5"),
build_string ("Big5"),
build_string ("Big5 Chinese traditional"),
- build_string ("big5"),
+ build_string ("big5-0"),
Qnil,
- 0 /* MIN_CHAR_BIG5_CDP */,
- 0 /* MAX_CHAR_BIG5_CDP */, 0, 0,
- Qnil, CONVERSION_IDENTICAL);
+ MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
+ MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
+#if 0
staticpro (&Vcharset_chinese_big5_cdp);
Vcharset_chinese_big5_cdp =
make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
build_string ("big5\\.cdp-0"),
Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
+#endif
#define DEF_HANZIKU(n) \
staticpro (&Vcharset_ideograph_hanziku_##n); \
Vcharset_ideograph_hanziku_##n = \