Lisp_Object Qleading_byte;
Lisp_Object Qshort_name, Qlong_name;
#ifdef UTF2000
-Lisp_Object Qmin_code, Qmax_code;
-Lisp_Object Qmother, Qconversion, Q94x60;
+Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
+Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
#endif
Lisp_Object Qascii,
Emchar
decode_builtin_char (Lisp_Object charset, int code_point)
{
+ Lisp_Object mother = XCHARSET_MOTHER (charset);
int final;
- if (EQ (charset, Vcharset_chinese_big5))
+ if ( CHARSETP (mother) )
+ {
+ int code = code_point;
+
+ if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
+ {
+ int row = code_point >> 8;
+ int cell = code_point & 255;
+
+ if (row < 16 + 32)
+ return -1;
+ else if (row < 16 + 32 + 30)
+ code = (row - (16 + 32)) * 94 + cell - 33;
+ else if (row < 18 + 32 + 30)
+ return -1;
+ else if (row < 18 + 32 + 60)
+ code = (row - (18 + 32)) * 94 + cell - 33;
+ }
+ else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
+ {
+ int plane = code_point >> 16;
+ int row = (code_point >> 8) & 255;
+ int cell = code_point & 255;
+
+ if (row < 16 + 32)
+ return -1;
+ else if (row < 16 + 32 + 30)
+ code
+ = (plane - 33) * 94 * 60
+ + (row - (16 + 32)) * 94
+ + cell - 33;
+ else if (row < 18 + 32 + 30)
+ return -1;
+ else if (row < 18 + 32 + 60)
+ code
+ = (plane - 33) * 94 * 60
+ + (row - (18 + 32)) * 94
+ + cell - 33;
+ }
+ return DECODE_CHAR (mother, code + XCHARSET_CODE_OFFSET(charset));
+ }
+ else if (EQ (charset, Vcharset_chinese_big5))
{
int c1 = code_point >> 8;
int c2 = code_point & 0xFF;
((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
* XCHARSET_CHARS (charset)
+ (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
- - XCHARSET_CODE_OFFSET (charset) + XCHARSET_MIN_CODE (charset);
+ + XCHARSET_CODE_OFFSET (charset);
if ((cid < XCHARSET_MIN_CODE (charset))
|| (XCHARSET_MAX_CODE (charset) < cid))
return -1;
else
{
Lisp_Object mother = XCHARSET_MOTHER (charset);
+ int min = XCHARSET_MIN_CODE (charset);
+ int max = XCHARSET_MAX_CODE (charset);
+ int code;
if ( CHARSETP (mother) )
+ code = charset_code_point (mother, ch);
+ else
+ code = ch;
+ if ( (min <= code) && (code <= max) )
{
- int min = XCHARSET_MIN_CODE (charset);
- int max = XCHARSET_MAX_CODE (charset);
- int code = charset_code_point (mother, ch);
+ int d = code - XCHARSET_CODE_OFFSET (charset);
- if ( (min <= code) && (code <= max) )
+ if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
{
- if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
- {
- int m = code - min;
- int row = m / 94;
- int cell = m % 94 + 33;
-
- if (row < 30)
- row += 16 + 32;
- else
- row += 18 + 32;
- return (row << 8) | cell;
- }
+ int row = d / 94;
+ int cell = d % 94 + 33;
+
+ if (row < 30)
+ row += 16 + 32;
else
- return code;
+ row += 18 + 32;
+ return (row << 8) | cell;
}
- }
- }
- return range_charset_code_point (charset, ch);
-}
-
-int
-range_charset_code_point (Lisp_Object charset, Emchar ch)
-{
- int d;
+ else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
+ {
+ int plane = d / (94 * 60) + 33;
+ int row = (d % (94 * 60)) / 94;
+ int cell = d % 94 + 33;
- if ((XCHARSET_MIN_CODE (charset) <= ch)
- && (ch <= XCHARSET_MAX_CODE (charset)))
- {
- d = ch - XCHARSET_MIN_CODE (charset) + XCHARSET_CODE_OFFSET (charset);
-
- if (XCHARSET_CHARS (charset) == 256)
- return d;
- else if (XCHARSET_DIMENSION (charset) == 1)
- return d + XCHARSET_BYTE_OFFSET (charset);
- else if (XCHARSET_DIMENSION (charset) == 2)
- return
- ((d / XCHARSET_CHARS (charset)
- + XCHARSET_BYTE_OFFSET (charset)) << 8)
- | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
- else if (XCHARSET_DIMENSION (charset) == 3)
- return
- ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
- + XCHARSET_BYTE_OFFSET (charset)) << 16)
- | ((d / XCHARSET_CHARS (charset)
- % XCHARSET_CHARS (charset)
- + XCHARSET_BYTE_OFFSET (charset)) << 8)
- | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
- else /* if (XCHARSET_DIMENSION (charset) == 4) */
- return
- ((d / (XCHARSET_CHARS (charset)
- * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
- + XCHARSET_BYTE_OFFSET (charset)) << 24)
- | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
- % XCHARSET_CHARS (charset)
- + XCHARSET_BYTE_OFFSET (charset)) << 16)
- | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
- + XCHARSET_BYTE_OFFSET (charset)) << 8)
- | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
- }
- else if (XCHARSET_CODE_OFFSET (charset) == 0)
- {
- if (XCHARSET_DIMENSION (charset) == 1)
- {
- if (XCHARSET_CHARS (charset) == 94)
+ if (row < 30)
+ row += 16 + 32;
+ else
+ row += 18 + 32;
+ return (plane << 16) | (row << 8) | cell;
+ }
+ else if (XCHARSET_CHARS (charset) == 94)
{
- if (((d = ch - (MIN_CHAR_94
- + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
- && (d < 94))
+ if (XCHARSET_DIMENSION (charset) == 1)
return d + 33;
+ else if (XCHARSET_DIMENSION (charset) == 2)
+ return ((d / 94 + 33) << 8) | (d % 94 + 33);
+ else if (XCHARSET_DIMENSION (charset) == 3)
+ return
+ ( (d / (94 * 94) + 33) << 16)
+ | ((d / 94 % 94 + 33) << 8)
+ | (d % 94 + 33);
+ else /* if (XCHARSET_DIMENSION (charset) == 4) */
+ return
+ ( (d / (94 * 94 * 94) + 33) << 24)
+ | ((d / (94 * 94) % 94 + 33) << 16)
+ | ((d / 94 % 94 + 33) << 8)
+ | (d % 94 + 33);
}
else if (XCHARSET_CHARS (charset) == 96)
{
- if (((d = ch - (MIN_CHAR_96
- + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
- && (d < 96))
+ if (XCHARSET_DIMENSION (charset) == 1)
return d + 32;
+ else if (XCHARSET_DIMENSION (charset) == 2)
+ return ((d / 96 + 32) << 8) | (d % 96 + 32);
+ else if (XCHARSET_DIMENSION (charset) == 3)
+ return
+ ( (d / (96 * 96) + 32) << 16)
+ | ((d / 96 % 96 + 32) << 8)
+ | (d % 96 + 32);
+ else /* if (XCHARSET_DIMENSION (charset) == 4) */
+ return
+ ( (d / (96 * 96 * 96) + 32) << 24)
+ | ((d / (96 * 96) % 96 + 32) << 16)
+ | ((d / 96 % 96 + 32) << 8)
+ | (d % 96 + 32);
}
else
- return -1;
+ return code - XCHARSET_CODE_OFFSET (charset);
}
- else if (XCHARSET_DIMENSION (charset) == 2)
+ else if ( (XCHARSET_CODE_OFFSET (charset) == 0) ||
+ (XCHARSET_CODE_OFFSET (charset)
+ == XCHARSET_MIN_CODE (charset)) )
{
- if (XCHARSET_CHARS (charset) == 94)
+ int d;
+
+ if (XCHARSET_DIMENSION (charset) == 1)
{
- if (((d = ch - (MIN_CHAR_94x94
- + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
- >= 0)
- && (d < 94 * 94))
- return (((d / 94) + 33) << 8) | (d % 94 + 33);
+ if (XCHARSET_CHARS (charset) == 94)
+ {
+ if (((d = ch - (MIN_CHAR_94
+ + (XCHARSET_FINAL (charset) - '0') * 94))
+ >= 0)
+ && (d < 94))
+ return d + 33;
+ }
+ else if (XCHARSET_CHARS (charset) == 96)
+ {
+ if (((d = ch - (MIN_CHAR_96
+ + (XCHARSET_FINAL (charset) - '0') * 96))
+ >= 0)
+ && (d < 96))
+ return d + 32;
+ }
+ else
+ return -1;
}
- else if (XCHARSET_CHARS (charset) == 96)
+ else if (XCHARSET_DIMENSION (charset) == 2)
{
- if (((d = ch - (MIN_CHAR_96x96
- + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
- >= 0)
- && (d < 96 * 96))
- return (((d / 96) + 32) << 8) | (d % 96 + 32);
+ if (XCHARSET_CHARS (charset) == 94)
+ {
+ if (((d = ch - (MIN_CHAR_94x94
+ +
+ (XCHARSET_FINAL (charset) - '0') * 94 * 94))
+ >= 0)
+ && (d < 94 * 94))
+ return (((d / 94) + 33) << 8) | (d % 94 + 33);
+ }
+ else if (XCHARSET_CHARS (charset) == 96)
+ {
+ if (((d = ch - (MIN_CHAR_96x96
+ +
+ (XCHARSET_FINAL (charset) - '0') * 96 * 96))
+ >= 0)
+ && (d < 96 * 96))
+ return (((d / 96) + 32) << 8) | (d % 96 + 32);
+ }
+ else
+ return -1;
}
- else
- return -1;
}
}
return -1;
Lisp_Object charset;
Lisp_Object ccl_program = Qnil;
Lisp_Object short_name = Qnil, long_name = Qnil;
- int min_code = 0, max_code = 0;
Lisp_Object mother = Qnil;
+ int min_code = 0, max_code = 0, code_offset = 0;
int byte_offset = -1;
int conversion = 0;
}
#ifdef UTF2000
+ else if (EQ (keyword, Qmother))
+ {
+ mother = Fget_charset (value);
+ }
+
else if (EQ (keyword, Qmin_code))
{
CHECK_INT (value);
- min_code = XINT (value);
- if (min_code < 0)
- {
- min_code = (~(-1 - min_code)) & 0x7FFFFFFF;
- }
+ min_code = XUINT (value);
}
else if (EQ (keyword, Qmax_code))
{
CHECK_INT (value);
- max_code = XINT (value);
- if (max_code < 0)
- {
- max_code = (~(-1 - max_code)) & 0x7FFFFFFF;
- }
+ max_code = XUINT (value);
}
- else if (EQ (keyword, Qmother))
+ else if (EQ (keyword, Qcode_offset))
{
- mother = Fget_charset (value);
+ CHECK_INT (value);
+ code_offset = XUINT (value);
}
else if (EQ (keyword, Qconversion))
{
if (EQ (value, Q94x60))
conversion = CONVERSION_94x60;
+ else if (EQ (value, Q94x94x60))
+ conversion = CONVERSION_94x94x60;
+ else
+ signal_simple_error ("Unrecognized conversion", value);
}
#endif
charset = make_charset (id, name, chars, dimension, columns, graphic,
final, direction, short_name, long_name,
doc_string, registry,
- Qnil, min_code, max_code, 0, byte_offset,
+ Qnil, min_code, max_code, code_offset, byte_offset,
mother, conversion);
if (!NILP (ccl_program))
XCHARSET_CCL_PROGRAM (charset) = ccl_program;
DEFUN ("encode-char", Fencode_char, 2, 2, 0, /*
Return code-point of CHARACTER in specified CHARSET.
*/
- (charset, character))
+ (character, charset))
{
int code_point;
defsymbol (&Qshort_name, "short-name");
defsymbol (&Qlong_name, "long-name");
#ifdef UTF2000
+ defsymbol (&Qmother, "mother");
defsymbol (&Qmin_code, "min-code");
defsymbol (&Qmax_code, "max-code");
- defsymbol (&Qmother, "mother");
+ defsymbol (&Qcode_offset, "code-offset");
defsymbol (&Qconversion, "conversion");
defsymbol (&Q94x60, "94x60");
+ defsymbol (&Q94x94x60, "94x94x60");
#endif
defsymbol (&Ql2r, "l2r");
build_string ("UCS-SMP"),
build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
build_string ("UCS00-1"),
- Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP, 0, 0,
- Qnil, CONVERSION_IDENTICAL);
+ Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
+ MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
staticpro (&Vcharset_ucs_sip);
Vcharset_ucs_sip =
make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
build_string ("UCS-SIP"),
build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
- Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP, 0, 0,
- Qnil, CONVERSION_IDENTICAL);
+ Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
+ MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
staticpro (&Vcharset_ucs_cns);
Vcharset_ucs_cns =
make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
build_string ("Control characters"),
build_string ("Control characters 128-191"),
build_string (""),
- Qnil, 0x80, 0x9F, 0, 0, Qnil, CONVERSION_IDENTICAL);
+ Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
staticpro (&Vcharset_latin_iso8859_1);
Vcharset_latin_iso8859_1 =
make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
build_string ("ISO8859-1 (Latin-1)"),
build_string ("ISO8859-1 (Latin-1)"),
build_string ("iso8859-1"),
- Qnil, 0xA0, 0xFF, 0, 32, Qnil, CONVERSION_IDENTICAL);
+ Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
staticpro (&Vcharset_latin_iso8859_2);
Vcharset_latin_iso8859_2 =
make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
build_string ("TIS620 (Thai)"),
build_string ("TIS620.2529 (Thai)"),
build_string ("tis620"),
- Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32,
- Qnil, CONVERSION_IDENTICAL);
+ Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI,
+ MIN_CHAR_THAI, 32, Qnil, CONVERSION_IDENTICAL);
staticpro (&Vcharset_greek_iso8859_7);
Vcharset_greek_iso8859_7 =
make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
build_string ("jisx0208\\.1990"),
Qnil,
MIN_CHAR_JIS_X0208_1990,
- MAX_CHAR_JIS_X0208_1990, 0, 33,
+ MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
Qnil, CONVERSION_IDENTICAL);
#endif
staticpro (&Vcharset_korean_ksc5601);
build_string ("Big5 + CDP extension"),
build_string ("Big5 with CDP extension"),
build_string ("big5\\.cdp-0"),
- Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP, 0, 0,
- Qnil, CONVERSION_IDENTICAL);
+ Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
+ MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
#define DEF_HANZIKU(n) \
staticpro (&Vcharset_ideograph_hanziku_##n); \
Vcharset_ideograph_hanziku_##n = \
build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
build_string \
("hanziku-"#n"$"), \
- Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, 0, 0, \
- Qnil, CONVERSION_IDENTICAL);
+ Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
+ MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
DEF_HANZIKU (1);
DEF_HANZIKU (2);
DEF_HANZIKU (3);
build_string ("JEF + CHINA3"),
build_string ("JEF + CHINA3 private characters"),
build_string ("china3jef-0"),
- Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF, 0, 0,
- Qnil, CONVERSION_IDENTICAL);
+ Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF,
+ MIN_CHAR_CHINA3_JEF, 0, Qnil, CONVERSION_IDENTICAL);
staticpro (&Vcharset_ideograph_cbeta);
Vcharset_ideograph_cbeta =
make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
build_string ("CBETA"),
build_string ("CBETA private characters"),
build_string ("cbeta-0"),
- Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA, 0, 0,
- Qnil, CONVERSION_IDENTICAL);
+ Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA,
+ MIN_CHAR_CBETA, 0, Qnil, CONVERSION_IDENTICAL);
staticpro (&Vcharset_ideograph_gt);
Vcharset_ideograph_gt =
make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
build_string ("GT"),
build_string ("GT"),
build_string (""),
- Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0,
- Qnil, CONVERSION_IDENTICAL);
+ Qnil, MIN_CHAR_GT, MAX_CHAR_GT,
+ MIN_CHAR_GT, 0, Qnil, CONVERSION_IDENTICAL);
#define DEF_GT_PJ(n) \
staticpro (&Vcharset_ideograph_gt_pj_##n); \
Vcharset_ideograph_gt_pj_##n = \
build_string
("Daikanwa dictionary (second revised version)"),
build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
- Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0,
- Qnil, CONVERSION_IDENTICAL);
+ Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
+ MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
staticpro (&Vcharset_ethiopic_ucs);
Vcharset_ethiopic_ucs =
build_string ("Ethiopic (UCS)"),
build_string ("Ethiopic of UCS"),
build_string ("Ethiopic-Unicode"),
- Qnil, 0x1200, 0x137F, 0x1200, 0,
+ Qnil, 0x1200, 0x137F, 0, 0,
Qnil, CONVERSION_IDENTICAL);
#endif
staticpro (&Vcharset_chinese_big5_1);