Lisp_Object Qleading_byte;
Lisp_Object Qshort_name, Qlong_name;
#ifdef UTF2000
-Lisp_Object Qmin_code, Qmax_code;
-Lisp_Object Qmother, Qconversion, Q94x60;
+Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
+Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
#endif
Lisp_Object Qascii,
Emchar
decode_builtin_char (Lisp_Object charset, int code_point)
{
+ Lisp_Object mother = XCHARSET_MOTHER (charset);
int final;
- if (EQ (charset, Vcharset_chinese_big5))
+ if ( CHARSETP (mother) )
+ {
+ int code = code_point;
+
+ if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
+ {
+ int row = code_point >> 8;
+ int cell = code_point & 255;
+
+ if (row < 16 + 32)
+ return -1;
+ else if (row < 16 + 32 + 30)
+ code = (row - (16 + 32)) * 94 + cell - 33;
+ else if (row < 18 + 32 + 30)
+ return -1;
+ else if (row < 18 + 32 + 60)
+ code = (row - (18 + 32)) * 94 + cell - 33;
+ }
+ else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
+ {
+ int plane = code_point >> 16;
+ int row = (code_point >> 8) & 255;
+ int cell = code_point & 255;
+
+ if (row < 16 + 32)
+ return -1;
+ else if (row < 16 + 32 + 30)
+ code
+ = (plane - 33) * 94 * 60
+ + (row - (16 + 32)) * 94
+ + cell - 33;
+ else if (row < 18 + 32 + 30)
+ return -1;
+ else if (row < 18 + 32 + 60)
+ code
+ = (plane - 33) * 94 * 60
+ + (row - (18 + 32)) * 94
+ + cell - 33;
+ }
+ return DECODE_CHAR (mother, code + XCHARSET_CODE_OFFSET(charset));
+ }
+ else if (EQ (charset, Vcharset_chinese_big5))
{
int c1 = code_point >> 8;
int c2 = code_point & 0xFF;
((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
* XCHARSET_CHARS (charset)
+ (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
- - XCHARSET_CODE_OFFSET (charset) + XCHARSET_MIN_CODE (charset);
+ + XCHARSET_CODE_OFFSET (charset);
if ((cid < XCHARSET_MIN_CODE (charset))
|| (XCHARSET_MAX_CODE (charset) < cid))
return -1;
else
{
Lisp_Object mother = XCHARSET_MOTHER (charset);
+ int min = XCHARSET_MIN_CODE (charset);
+ int max = XCHARSET_MAX_CODE (charset);
+ int code;
if ( CHARSETP (mother) )
+ code = charset_code_point (mother, ch);
+ else
+ code = ch;
+ if ( (min <= code) && (code <= max) )
{
- int min = XCHARSET_MIN_CODE (charset);
- int max = XCHARSET_MAX_CODE (charset);
- int code = charset_code_point (mother, ch);
+ int d = code - XCHARSET_CODE_OFFSET (charset);
- if ( (min <= code) && (code <= max) )
+ if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
{
- if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
- {
- int m = code - min;
- int row = m / 94;
- int cell = m % 94 + 33;
-
- if (row < 30)
- row += 16 + 32;
- else
- row += 18 + 32;
- return (row << 8) | cell;
- }
+ int row = d / 94;
+ int cell = d % 94 + 33;
+
+ if (row < 30)
+ row += 16 + 32;
else
- return code - min + XCHARSET_CODE_OFFSET (charset);
+ row += 18 + 32;
+ return (row << 8) | cell;
}
- }
- }
- return range_charset_code_point (charset, ch);
-}
-
-int
-range_charset_code_point (Lisp_Object charset, Emchar ch)
-{
- int d;
+ else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
+ {
+ int plane = d / (94 * 60) + 33;
+ int row = (d % (94 * 60)) / 94;
+ int cell = d % 94 + 33;
- if ((XCHARSET_MIN_CODE (charset) <= ch)
- && (ch <= XCHARSET_MAX_CODE (charset)))
- {
- d = ch - XCHARSET_MIN_CODE (charset) + XCHARSET_CODE_OFFSET (charset);
-
- if (XCHARSET_CHARS (charset) == 256)
- return d;
- else if (XCHARSET_DIMENSION (charset) == 1)
- return d + XCHARSET_BYTE_OFFSET (charset);
- else if (XCHARSET_DIMENSION (charset) == 2)
- return
- ((d / XCHARSET_CHARS (charset)
- + XCHARSET_BYTE_OFFSET (charset)) << 8)
- | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
- else if (XCHARSET_DIMENSION (charset) == 3)
- return
- ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
- + XCHARSET_BYTE_OFFSET (charset)) << 16)
- | ((d / XCHARSET_CHARS (charset)
- % XCHARSET_CHARS (charset)
- + XCHARSET_BYTE_OFFSET (charset)) << 8)
- | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
- else /* if (XCHARSET_DIMENSION (charset) == 4) */
- return
- ((d / (XCHARSET_CHARS (charset)
- * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
- + XCHARSET_BYTE_OFFSET (charset)) << 24)
- | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
- % XCHARSET_CHARS (charset)
- + XCHARSET_BYTE_OFFSET (charset)) << 16)
- | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
- + XCHARSET_BYTE_OFFSET (charset)) << 8)
- | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
- }
- else if (XCHARSET_CODE_OFFSET (charset) == 0)
- {
- if (XCHARSET_DIMENSION (charset) == 1)
- {
- if (XCHARSET_CHARS (charset) == 94)
+ if (row < 30)
+ row += 16 + 32;
+ else
+ row += 18 + 32;
+ return (plane << 16) | (row << 8) | cell;
+ }
+ else if (XCHARSET_CHARS (charset) == 94)
{
- if (((d = ch - (MIN_CHAR_94
- + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
- && (d < 94))
+ if (XCHARSET_DIMENSION (charset) == 1)
return d + 33;
+ else if (XCHARSET_DIMENSION (charset) == 2)
+ return ((d / 94 + 33) << 8) | (d % 94 + 33);
+ else if (XCHARSET_DIMENSION (charset) == 3)
+ return
+ ( (d / (94 * 94) + 33) << 16)
+ | ((d / 94 % 94 + 33) << 8)
+ | (d % 94 + 33);
+ else /* if (XCHARSET_DIMENSION (charset) == 4) */
+ return
+ ( (d / (94 * 94 * 94) + 33) << 24)
+ | ((d / (94 * 94) % 94 + 33) << 16)
+ | ((d / 94 % 94 + 33) << 8)
+ | (d % 94 + 33);
}
else if (XCHARSET_CHARS (charset) == 96)
{
- if (((d = ch - (MIN_CHAR_96
- + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
- && (d < 96))
+ if (XCHARSET_DIMENSION (charset) == 1)
return d + 32;
+ else if (XCHARSET_DIMENSION (charset) == 2)
+ return ((d / 96 + 32) << 8) | (d % 96 + 32);
+ else if (XCHARSET_DIMENSION (charset) == 3)
+ return
+ ( (d / (96 * 96) + 32) << 16)
+ | ((d / 96 % 96 + 32) << 8)
+ | (d % 96 + 32);
+ else /* if (XCHARSET_DIMENSION (charset) == 4) */
+ return
+ ( (d / (96 * 96 * 96) + 32) << 24)
+ | ((d / (96 * 96) % 96 + 32) << 16)
+ | ((d / 96 % 96 + 32) << 8)
+ | (d % 96 + 32);
}
else
- return -1;
+ return code - XCHARSET_CODE_OFFSET (charset);
}
- else if (XCHARSET_DIMENSION (charset) == 2)
+ else if ( (XCHARSET_CODE_OFFSET (charset) == 0) ||
+ (XCHARSET_CODE_OFFSET (charset)
+ == XCHARSET_MIN_CODE (charset)) )
{
- if (XCHARSET_CHARS (charset) == 94)
+ int d;
+
+ if (XCHARSET_DIMENSION (charset) == 1)
{
- if (((d = ch - (MIN_CHAR_94x94
- + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
- >= 0)
- && (d < 94 * 94))
- return (((d / 94) + 33) << 8) | (d % 94 + 33);
+ if (XCHARSET_CHARS (charset) == 94)
+ {
+ if (((d = ch - (MIN_CHAR_94
+ + (XCHARSET_FINAL (charset) - '0') * 94))
+ >= 0)
+ && (d < 94))
+ return d + 33;
+ }
+ else if (XCHARSET_CHARS (charset) == 96)
+ {
+ if (((d = ch - (MIN_CHAR_96
+ + (XCHARSET_FINAL (charset) - '0') * 96))
+ >= 0)
+ && (d < 96))
+ return d + 32;
+ }
+ else
+ return -1;
}
- else if (XCHARSET_CHARS (charset) == 96)
+ else if (XCHARSET_DIMENSION (charset) == 2)
{
- if (((d = ch - (MIN_CHAR_96x96
- + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
- >= 0)
- && (d < 96 * 96))
- return (((d / 96) + 32) << 8) | (d % 96 + 32);
+ if (XCHARSET_CHARS (charset) == 94)
+ {
+ if (((d = ch - (MIN_CHAR_94x94
+ +
+ (XCHARSET_FINAL (charset) - '0') * 94 * 94))
+ >= 0)
+ && (d < 94 * 94))
+ return (((d / 94) + 33) << 8) | (d % 94 + 33);
+ }
+ else if (XCHARSET_CHARS (charset) == 96)
+ {
+ if (((d = ch - (MIN_CHAR_96x96
+ +
+ (XCHARSET_FINAL (charset) - '0') * 96 * 96))
+ >= 0)
+ && (d < 96 * 96))
+ return (((d / 96) + 32) << 8) | (d % 96 + 32);
+ }
+ else
+ return -1;
}
- else
- return -1;
}
}
return -1;
this character set.
'dimension Number of octets used to index a character in this charset.
Either 1 or 2. Defaults to 1.
+ If UTF-2000 feature is enabled, 3 or 4 are also available.
'columns Number of columns used to display a character in this charset.
Only used in TTY mode. (Under X, the actual width of a
character can be derived from the font used to display the
'chars Number of characters in each dimension (94 or 96).
Defaults to 94. Note that if the dimension is 2, the
character set thus described is 94x94 or 96x96.
+ If UTF-2000 feature is enabled, 128 or 256 are also available.
'final Final byte of ISO 2022 escape sequence. Must be
supplied. Each combination of (DIMENSION, CHARS) defines a
separate namespace for final bytes. Note that ISO
is passed the octets of the character, with the high
bit cleared and set depending upon whether the value
of the 'graphic property is 0 or 1.
+'mother [UTF-2000 only] Base coded-charset.
+'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
+'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
+'code-offset [UTF-2000 only] Offset for a code-point of a base
+ coded-charset.
+'conversion [UTF-2000 only] Conversion for a code-point of a base
+ coded-charset (94x60 or 94x94x60).
*/
(name, doc_string, props))
{
Lisp_Object charset;
Lisp_Object ccl_program = Qnil;
Lisp_Object short_name = Qnil, long_name = Qnil;
- int min_code = 0, max_code = 0;
Lisp_Object mother = Qnil;
+ int min_code = 0, max_code = 0, code_offset = 0;
int byte_offset = -1;
int conversion = 0;
}
#ifdef UTF2000
+ else if (EQ (keyword, Qmother))
+ {
+ mother = Fget_charset (value);
+ }
+
else if (EQ (keyword, Qmin_code))
{
CHECK_INT (value);
- min_code = XINT (value);
- if (min_code < 0)
- {
- min_code = (~(-1 - min_code)) & 0x7FFFFFFF;
- }
+ min_code = XUINT (value);
}
else if (EQ (keyword, Qmax_code))
{
CHECK_INT (value);
- max_code = XINT (value);
- if (max_code < 0)
- {
- max_code = (~(-1 - max_code)) & 0x7FFFFFFF;
- }
+ max_code = XUINT (value);
}
- else if (EQ (keyword, Qmother))
+ else if (EQ (keyword, Qcode_offset))
{
- mother = Fget_charset (value);
+ CHECK_INT (value);
+ code_offset = XUINT (value);
}
else if (EQ (keyword, Qconversion))
{
if (EQ (value, Q94x60))
conversion = CONVERSION_94x60;
+ else if (EQ (value, Q94x94x60))
+ conversion = CONVERSION_94x94x60;
+ else
+ signal_simple_error ("Unrecognized conversion", value);
}
#endif
charset = make_charset (id, name, chars, dimension, columns, graphic,
final, direction, short_name, long_name,
doc_string, registry,
- Qnil, min_code, max_code, 0, byte_offset,
+ Qnil, min_code, max_code, code_offset, byte_offset,
mother, conversion);
if (!NILP (ccl_program))
XCHARSET_CCL_PROGRAM (charset) = ccl_program;
DEFUN ("encode-char", Fencode_char, 2, 2, 0, /*
Return code-point of CHARACTER in specified CHARSET.
*/
- (charset, character))
+ (character, charset))
{
int code_point;
defsymbol (&Qshort_name, "short-name");
defsymbol (&Qlong_name, "long-name");
#ifdef UTF2000
+ defsymbol (&Qmother, "mother");
defsymbol (&Qmin_code, "min-code");
defsymbol (&Qmax_code, "max-code");
- defsymbol (&Qmother, "mother");
+ defsymbol (&Qcode_offset, "code-offset");
defsymbol (&Qconversion, "conversion");
defsymbol (&Q94x60, "94x60");
+ defsymbol (&Q94x94x60, "94x94x60");
#endif
defsymbol (&Ql2r, "l2r");
build_string ("UCS-SMP"),
build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
build_string ("UCS00-1"),
- Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP, 0, 0,
- Qnil, CONVERSION_IDENTICAL);
+ Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
+ MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
staticpro (&Vcharset_ucs_sip);
Vcharset_ucs_sip =
make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
build_string ("UCS-SIP"),
build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
- Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP, 0, 0,
- Qnil, CONVERSION_IDENTICAL);
+ Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
+ MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
staticpro (&Vcharset_ucs_cns);
Vcharset_ucs_cns =
make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
build_string ("Control characters"),
build_string ("Control characters 128-191"),
build_string (""),
- Qnil, 0x80, 0x9F, 0, 0, Qnil, CONVERSION_IDENTICAL);
+ Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
staticpro (&Vcharset_latin_iso8859_1);
Vcharset_latin_iso8859_1 =
make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
build_string ("ISO8859-1 (Latin-1)"),
build_string ("ISO8859-1 (Latin-1)"),
build_string ("iso8859-1"),
- Qnil, 0xA0, 0xFF, 0, 32, Qnil, CONVERSION_IDENTICAL);
+ Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
staticpro (&Vcharset_latin_iso8859_2);
Vcharset_latin_iso8859_2 =
make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
build_string ("TIS620 (Thai)"),
build_string ("TIS620.2529 (Thai)"),
build_string ("tis620"),
- Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32,
- Qnil, CONVERSION_IDENTICAL);
+ Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI,
+ MIN_CHAR_THAI, 32, Qnil, CONVERSION_IDENTICAL);
staticpro (&Vcharset_greek_iso8859_7);
Vcharset_greek_iso8859_7 =
make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
build_string ("jisx0208\\.1990"),
Qnil,
MIN_CHAR_JIS_X0208_1990,
- MAX_CHAR_JIS_X0208_1990, 0, 33,
+ MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
Qnil, CONVERSION_IDENTICAL);
#endif
staticpro (&Vcharset_korean_ksc5601);
build_string ("Big5 + CDP extension"),
build_string ("Big5 with CDP extension"),
build_string ("big5\\.cdp-0"),
- Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP, 0, 0,
- Qnil, CONVERSION_IDENTICAL);
+ Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
+ MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
#define DEF_HANZIKU(n) \
staticpro (&Vcharset_ideograph_hanziku_##n); \
Vcharset_ideograph_hanziku_##n = \
build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
build_string \
("hanziku-"#n"$"), \
- Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, 0, 0, \
- Qnil, CONVERSION_IDENTICAL);
+ Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
+ MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
DEF_HANZIKU (1);
DEF_HANZIKU (2);
DEF_HANZIKU (3);
build_string ("JEF + CHINA3"),
build_string ("JEF + CHINA3 private characters"),
build_string ("china3jef-0"),
- Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF, 0, 0,
- Qnil, CONVERSION_IDENTICAL);
+ Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF,
+ MIN_CHAR_CHINA3_JEF, 0, Qnil, CONVERSION_IDENTICAL);
staticpro (&Vcharset_ideograph_cbeta);
Vcharset_ideograph_cbeta =
make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
build_string ("CBETA"),
build_string ("CBETA private characters"),
build_string ("cbeta-0"),
- Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA, 0, 0,
- Qnil, CONVERSION_IDENTICAL);
+ Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA,
+ MIN_CHAR_CBETA, 0, Qnil, CONVERSION_IDENTICAL);
staticpro (&Vcharset_ideograph_gt);
Vcharset_ideograph_gt =
make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
build_string ("GT"),
build_string ("GT"),
build_string (""),
- Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0,
- Qnil, CONVERSION_IDENTICAL);
+ Qnil, MIN_CHAR_GT, MAX_CHAR_GT,
+ MIN_CHAR_GT, 0, Qnil, CONVERSION_IDENTICAL);
#define DEF_GT_PJ(n) \
staticpro (&Vcharset_ideograph_gt_pj_##n); \
Vcharset_ideograph_gt_pj_##n = \
build_string
("Daikanwa dictionary (second revised version)"),
build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
- Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0,
- Qnil, CONVERSION_IDENTICAL);
+ Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
+ MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
staticpro (&Vcharset_ethiopic_ucs);
Vcharset_ethiopic_ucs =
build_string ("Ethiopic (UCS)"),
build_string ("Ethiopic of UCS"),
build_string ("Ethiopic-Unicode"),
- Qnil, 0x1200, 0x137F, 0x1200, 0,
+ Qnil, 0x1200, 0x137F, 0, 0,
Qnil, CONVERSION_IDENTICAL);
#endif
staticpro (&Vcharset_chinese_big5_1);