/* Functions to handle multilingual characters.
Copyright (C) 1992, 1995 Free Software Foundation, Inc.
Copyright (C) 1995 Sun Microsystems, Inc.
- Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko
+ Copyright (C) 1999,2000,2001,2002,2003,2004 MORIOKA Tomohiko
This file is part of XEmacs.
/* Rewritten by Ben Wing <ben@xemacs.org>. */
-/* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
+/* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs CHISE. */
#include <config.h>
-#ifdef CHISE
+#ifdef HAVE_LIBCHISE
#include <chise.h>
#endif
#ifdef UTF2000
Lisp_Object Vcharset_chinese_cns11643_1;
Lisp_Object Vcharset_chinese_cns11643_2;
#ifdef UTF2000
+Lisp_Object Vcharset_system_char_id;
Lisp_Object Vcharset_ucs;
Lisp_Object Vcharset_ucs_bmp;
Lisp_Object Vcharset_ucs_smp;
put_char_ccs_code_point (Lisp_Object character,
Lisp_Object ccs, Lisp_Object value)
{
- if (!EQ (XCHARSET_NAME (ccs), Qmap_ucs)
- || !INTP (value)
- || (XCHAR (character) != XINT (value)))
+ if ( !(EQ (XCHARSET_NAME (ccs), Qmap_ucs)
+ && INTP (value) && (XINT (value) < 0xF0000))
+ || !INTP (value)
+ /* || (XCHAR (character) != XINT (value)) */ )
{
Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
int code_point;
Qmap_jis_x0208_1983,
Qmap_ks_x1001,
Qmap_jis_x0212,
- Qchinese_cns11643_1,
- Qchinese_cns11643_2,
+ Qmap_cns11643_1,
+ Qmap_cns11643_2,
#ifdef UTF2000
+ Qsystem_char_id,
Qmap_ucs, Qucs,
Qucs_bmp,
Qucs_smp,
#define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
+static int
+decode_ccs_conversion (int conv_type, int code_point)
+{
+ if ( conv_type == CONVERSION_IDENTICAL )
+ {
+ return code_point;
+ }
+ if ( conv_type == CONVERSION_94x60 )
+ {
+ int row = code_point >> 8;
+ int cell = code_point & 255;
+
+ if (row < 16 + 32)
+ return -1;
+ else if (row < 16 + 32 + 30)
+ return (row - (16 + 32)) * 94 + cell - 33;
+ else if (row < 18 + 32 + 30)
+ return -1;
+ else if (row < 18 + 32 + 60)
+ return (row - (18 + 32)) * 94 + cell - 33;
+ }
+ else if ( conv_type == CONVERSION_94x94x60 )
+ {
+ int plane = code_point >> 16;
+ int row = (code_point >> 8) & 255;
+ int cell = code_point & 255;
+
+ if (row < 16 + 32)
+ return -1;
+ else if (row < 16 + 32 + 30)
+ return
+ (plane - 33) * 94 * 60
+ + (row - (16 + 32)) * 94
+ + cell - 33;
+ else if (row < 18 + 32 + 30)
+ return -1;
+ else if (row < 18 + 32 + 60)
+ return
+ (plane - 33) * 94 * 60
+ + (row - (18 + 32)) * 94
+ + cell - 33;
+ }
+ else if ( conv_type == CONVERSION_BIG5_1 )
+ {
+ unsigned int I
+ = (((code_point >> 8) & 0x7F) - 33) * 94
+ + (( code_point & 0x7F) - 33);
+ unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
+ unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
+
+ b2 += b2 < 0x3F ? 0x40 : 0x62;
+ return (b1 << 8) | b2;
+ }
+ else if ( conv_type == CONVERSION_BIG5_2 )
+ {
+ unsigned int I
+ = (((code_point >> 8) & 0x7F) - 33) * 94
+ + (( code_point & 0x7F) - 33)
+ + BIG5_SAME_ROW * (0xC9 - 0xA1);
+ unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
+ unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
+
+ b2 += b2 < 0x3F ? 0x40 : 0x62;
+ return (b1 << 8) | b2;
+ }
+ return -1;
+}
+
Emchar
-decode_defined_char (Lisp_Object ccs, int code_point)
+decode_defined_char (Lisp_Object ccs, int code_point, int without_inheritance)
{
int dim = XCHARSET_DIMENSION (ccs);
Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
}
if (CHARP (decoding_table))
return XCHAR (decoding_table);
-#ifdef HAVE_CHISE_CLIENT
+#ifdef HAVE_CHISE
if (EQ (decoding_table, Qunloaded))
{
char_id = load_char_decoding_entry_maybe (ccs, code_point);
}
-#endif
+#endif /* HAVE_CHISE */
if (char_id >= 0)
return char_id;
- else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
+ else if ( !without_inheritance
+ && CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
{
- if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
+ int code
+ = decode_ccs_conversion (XCHARSET_CONVERSION (ccs), code_point);
+
+ if (code >= 0)
{
+ code += XCHARSET_CODE_OFFSET(ccs);
if ( EQ (mother, Vcharset_ucs) )
- return DECODE_CHAR (mother, code_point);
+ return DECODE_CHAR (mother, code, without_inheritance);
else
- return decode_defined_char (mother, code_point);
- }
- else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_1 )
- {
- unsigned int I
- = (((code_point >> 8) & 0x7F) - 33) * 94
- + (( code_point & 0x7F) - 33);
- unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
- unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
-
- b2 += b2 < 0x3F ? 0x40 : 0x62;
- return decode_defined_char (mother, (b1 << 8) | b2);
- }
- else if ( XCHARSET_CONVERSION (ccs) == CONVERSION_BIG5_2 )
- {
- unsigned int I
- = (((code_point >> 8) & 0x7F) - 33) * 94
- + (( code_point & 0x7F) - 33)
- + BIG5_SAME_ROW * (0xC9 - 0xA1);
- unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
- unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
-
- b2 += b2 < 0x3F ? 0x40 : 0x62;
- return decode_defined_char (mother, (b1 << 8) | b2);
+ return decode_defined_char (mother, code,
+ without_inheritance);
}
}
return -1;
{
if ( CHARSETP (mother) )
{
- int code = code_point;
-
- if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
- {
- int row = code_point >> 8;
- int cell = code_point & 255;
+ int code
+ = decode_ccs_conversion (XCHARSET_CONVERSION (charset),
+ code_point);
- if (row < 16 + 32)
- return -1;
- else if (row < 16 + 32 + 30)
- code = (row - (16 + 32)) * 94 + cell - 33;
- else if (row < 18 + 32 + 30)
- return -1;
- else if (row < 18 + 32 + 60)
- code = (row - (18 + 32)) * 94 + cell - 33;
- }
- else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
- {
- int plane = code_point >> 16;
- int row = (code_point >> 8) & 255;
- int cell = code_point & 255;
-
- if (row < 16 + 32)
- return -1;
- else if (row < 16 + 32 + 30)
- code
- = (plane - 33) * 94 * 60
- + (row - (16 + 32)) * 94
- + cell - 33;
- else if (row < 18 + 32 + 30)
- return -1;
- else if (row < 18 + 32 + 60)
- code
- = (plane - 33) * 94 * 60
- + (row - (18 + 32)) * 94
- + cell - 33;
- }
- else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_1 )
- {
- unsigned int I
- = (((code_point >> 8) & 0x7F) - 33) * 94
- + (( code_point & 0x7F) - 33);
- unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
- unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
-
- b2 += b2 < 0x3F ? 0x40 : 0x62;
- code = (b1 << 8) | b2;
- }
- else if ( XCHARSET_CONVERSION (charset) == CONVERSION_BIG5_2 )
- {
- unsigned int I
- = (((code_point >> 8) & 0x7F) - 33) * 94
- + (( code_point & 0x7F) - 33)
- + BIG5_SAME_ROW * (0xC9 - 0xA1);
- unsigned char b1 = I / (0xFF - 0xA1 + 0x7F - 0x40) + 0xA1;
- unsigned char b2 = I % (0xFF - 0xA1 + 0x7F - 0x40);
-
- b2 += b2 < 0x3F ? 0x40 : 0x62;
- code = (b1 << 8) | b2;
- }
- return
- decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
+ if (code >= 0)
+ return
+ decode_builtin_char (mother,
+ code + XCHARSET_CODE_OFFSET(charset));
+ else
+ return -1;
}
else
{
exit (-1);
}
}
+ else if (defined_only)
+ return -1;
else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
( XCHARSET_MIN_CODE (charset) == 0 )
/*
return table;
}
-#ifdef HAVE_CHISE_CLIENT
+#ifdef HAVE_CHISE
DEFUN ("save-charset-mapping-table", Fsave_charset_mapping_table, 1, 1, 0, /*
Save mapping-table of CHARSET.
*/
{
struct Lisp_Charset *cs;
int byte_min, byte_max;
+#ifdef HAVE_LIBCHISE
+ CHISE_CCS dt_ccs;
+#else /* HAVE_LIBCHISE */
Lisp_Object db;
Lisp_Object db_file;
+#endif /* not HAVE_LIBCHISE */
charset = Fget_charset (charset);
cs = XCHARSET (charset);
+#ifdef HAVE_LIBCHISE
+ if ( open_chise_data_source_maybe () )
+ return -1;
+
+ dt_ccs
+ = chise_ds_get_ccs (default_chise_data_source,
+ XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
+ if (dt_ccs == NULL)
+ {
+ printf ("Can't open decoding-table %s\n",
+ XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(charset))));
+ return -1;
+ }
+#else /* HAVE_LIBCHISE */
db_file = char_attribute_system_db_file (CHARSET_NAME (cs),
Qsystem_char_id, 1);
db = Fopen_database (db_file, Qnil, Qnil, build_string ("w+"), Qnil);
+#endif /* not HAVE_LIBCHISE */
byte_min = CHARSET_BYTE_OFFSET (cs);
byte_max = byte_min + CHARSET_BYTE_SIZE (cs);
Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
if (CHARP (c))
- Fput_database (Fprin1_to_string (make_int (cell), Qnil),
- Fprin1_to_string (c, Qnil),
- db, Qt);
+ {
+#ifdef HAVE_LIBCHISE
+ chise_ccs_set_decoded_char (dt_ccs, cell, XCHAR (c));
+#else /* HAVE_LIBCHISE */
+ Fput_database (Fprin1_to_string (make_int (cell), Qnil),
+ Fprin1_to_string (c, Qnil),
+ db, Qt);
+#endif /* not HAVE_LIBCHISE */
+ }
}
}
break;
Lisp_Object c = get_ccs_octet_table (table_c, charset, cell);
if (CHARP (c))
- Fput_database (Fprin1_to_string (make_int ((row << 8)
- | cell),
- Qnil),
- Fprin1_to_string (c, Qnil),
- db, Qt);
+ {
+#ifdef HAVE_LIBCHISE
+ chise_ccs_set_decoded_char
+ (dt_ccs,
+ (row << 8) | cell, XCHAR (c));
+#else /* HAVE_LIBCHISE */
+ Fput_database (Fprin1_to_string (make_int ((row << 8)
+ | cell),
+ Qnil),
+ Fprin1_to_string (c, Qnil),
+ db, Qt);
+#endif /* not HAVE_LIBCHISE */
+ }
}
}
}
cell);
if (CHARP (c))
- Fput_database (Fprin1_to_string (make_int ((plane << 16)
- | (row << 8)
- | cell),
- Qnil),
- Fprin1_to_string (c, Qnil),
- db, Qt);
+ {
+#ifdef HAVE_LIBCHISE
+ chise_ccs_set_decoded_char
+ (dt_ccs,
+ (plane << 16)
+ | (row << 8)
+ | cell, XCHAR (c));
+#else /* HAVE_LIBCHISE */
+ Fput_database (Fprin1_to_string
+ (make_int ((plane << 16)
+ | (row << 8)
+ | cell),
+ Qnil),
+ Fprin1_to_string (c, Qnil),
+ db, Qt);
+#endif /* not HAVE_LIBCHISE */
+ }
}
}
}
= get_ccs_octet_table (table_c, charset, cell);
if (CHARP (c))
- Fput_database (Fprin1_to_string
- (make_int (( group << 24)
- | (plane << 16)
- | (row << 8)
- | cell),
- Qnil),
- Fprin1_to_string (c, Qnil),
- db, Qt);
+ {
+#ifdef HAVE_LIBCHISE
+ chise_ccs_set_decoded_char
+ (dt_ccs,
+ ( group << 24)
+ | (plane << 16)
+ | (row << 8)
+ | cell, XCHAR (c));
+#else /* HAVE_LIBCHISE */
+ Fput_database (Fprin1_to_string
+ (make_int (( group << 24)
+ | (plane << 16)
+ | (row << 8)
+ | cell),
+ Qnil),
+ Fprin1_to_string (c, Qnil),
+ db, Qt);
+#endif /* not HAVE_LIBCHISE */
+ }
}
}
}
}
}
}
+#ifdef HAVE_LIBCHISE
+ chise_ccs_sync (dt_ccs);
+ return Qnil;
+#else /* HAVE_LIBCHISE */
return Fclose_database (db);
+#endif /* not HAVE_LIBCHISE */
}
DEFUN ("reset-charset-mapping-table", Freset_charset_mapping_table, 1, 1, 0, /*
*/
(ccs))
{
+#ifdef HAVE_LIBCHISE
+ CHISE_CCS chise_ccs;
+#else
Lisp_Object db_file;
+#endif
ccs = Fget_charset (ccs);
+
+#ifdef HAVE_LIBCHISE
+ if ( open_chise_data_source_maybe () )
+ return -1;
+
+ chise_ccs = chise_ds_get_ccs (default_chise_data_source,
+ XSTRING_DATA (Fsymbol_name
+ (XCHARSET_NAME(ccs))));
+ if (chise_ccs == NULL)
+ return Qnil;
+#else
db_file = char_attribute_system_db_file (XCHARSET_NAME(ccs),
Qsystem_char_id, 0);
+#endif
- if (!NILP (Ffile_exists_p (db_file)))
+ if (
+#ifdef HAVE_LIBCHISE
+ chise_ccs_setup_db (chise_ccs, 0) == 0
+#else
+ !NILP (Ffile_exists_p (db_file))
+#endif
+ )
{
XCHARSET_DECODING_TABLE(ccs) = Qunloaded;
return Qt;
Emchar
load_char_decoding_entry_maybe (Lisp_Object ccs, int code_point)
{
-#ifdef CHISE
- Lisp_Object db_dir = Vexec_directory;
- CHISE_DS ds;
- CHISE_Decoding_Table *dt_ccs;
- int modemask;
- int accessmask = 0;
- DBTYPE real_subtype;
- int status;
+#ifdef HAVE_LIBCHISE
CHISE_Char_ID char_id;
- if (NILP (db_dir))
- db_dir = build_string ("../lib-src");
- db_dir = Fexpand_file_name (build_string ("char-db"), db_dir);
-
- status = chise_open_data_source (&ds, CHISE_DS_Berkeley_DB,
- XSTRING_DATA (db_dir));
- if (status)
- {
- chise_close_data_source (&ds);
- return -1;
- }
-
- modemask = 0755; /* rwxr-xr-x */
- real_subtype = DB_HASH;
- accessmask = DB_RDONLY;
-
- status
- = chise_open_decoding_table (&dt_ccs, &ds,
- XSTRING_DATA (Fsymbol_name
- (XCHARSET_NAME(ccs))),
- real_subtype,
- accessmask, modemask);
- if (status)
- {
- printf ("Can't open decoding-table %s\n",
- XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(ccs))));
- chise_close_decoding_table (dt_ccs);
- chise_close_data_source (&ds);
- return -1;
- }
+ if ( open_chise_data_source_maybe () )
+ return -1;
- char_id = chise_dt_get_char (dt_ccs, code_point);
- /*
- printf ("%s's 0x%X (%d) => 0x%X\n",
- XSTRING_DATA (Fsymbol_name (XCHARSET_NAME(ccs))),
- code_point, code_point, char_id);
- */
+ char_id
+ = chise_ds_decode_char (default_chise_data_source,
+ XSTRING_DATA(Fsymbol_name (XCHARSET_NAME(ccs))),
+ code_point);
if (char_id >= 0)
decoding_table_put_char (ccs, code_point, make_char (char_id));
else
decoding_table_put_char (ccs, code_point, Qnil);
- chise_close_decoding_table (dt_ccs);
-
- chise_close_data_source (&ds);
-
+ /* chise_ccst_close (dt_ccs); */
return char_id;
-#else
+#else /* HAVE_LIBCHISE */
Lisp_Object db;
Lisp_Object db_file
= char_attribute_system_db_file (XCHARSET_NAME(ccs), Qsystem_char_id,
Fclose_database (db);
}
return -1;
-#endif
+#endif /* not HAVE_LIBCHISE */
}
-#endif /* HAVE_CHISE_CLIENT */
+#endif /* HAVE_CHISE */
#endif /* UTF2000 */
\f
/************************************************************************/
#ifdef UTF2000
-DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
+DEFUN ("decode-char", Fdecode_char, 2, 4, 0, /*
Make a character from CHARSET and code-point CODE.
If DEFINED_ONLY is non-nil, builtin character is not returned.
+If WITHOUT_INHERITANCE is non-nil, inherited character is not returned.
If corresponding character is not found, nil is returned.
*/
- (charset, code, defined_only))
+ (charset, code, defined_only, without_inheritance))
{
int c;
if (XCHARSET_GRAPHIC (charset) == 1)
c &= 0x7F7F7F7F;
if (NILP (defined_only))
- c = DECODE_CHAR (charset, c);
+ c = DECODE_CHAR (charset, c, !NILP (without_inheritance));
else
- c = decode_defined_char (charset, c);
+ c = decode_defined_char (charset, c, !NILP (without_inheritance));
return c >= 0 ? make_char (c) : Qnil;
}
CHECK_INT (code);
if (EQ (charset, Vcharset_latin_viscii))
{
- Lisp_Object chr = Fdecode_char (charset, code, Qnil);
+ Lisp_Object chr = Fdecode_char (charset, code, Qnil, Qnil);
Lisp_Object ret;
if (!NILP (chr))
c &= 0x7F7F7F7F;
#endif
c = decode_builtin_char (charset, c);
- return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
+ return
+ c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil, Qnil);
}
#endif
DEFSUBR (Fcharset_id);
DEFSUBR (Fset_charset_ccl_program);
DEFSUBR (Fset_charset_registry);
+
#ifdef UTF2000
DEFSUBR (Fcharset_mapping_table);
DEFSUBR (Fset_charset_mapping_table);
-#ifdef HAVE_CHISE_CLIENT
+#ifdef HAVE_CHISE
DEFSUBR (Fsave_charset_mapping_table);
DEFSUBR (Freset_charset_mapping_table);
-#endif
-
+#endif /* HAVE_CHISE */
DEFSUBR (Fdecode_char);
DEFSUBR (Fdecode_builtin_char);
DEFSUBR (Fencode_char);
#endif
+
DEFSUBR (Fmake_char);
DEFSUBR (Fchar_charset);
DEFSUBR (Fchar_octet);
defsymbol (&Qmap_jis_x0208_1983, "=jis-x0208-1983");
defsymbol (&Qmap_ks_x1001, "=ks-x1001");
defsymbol (&Qmap_jis_x0212, "=jis-x0212");
- defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
- defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
+ defsymbol (&Qmap_cns11643_1, "=cns11643-1");
+ defsymbol (&Qmap_cns11643_2, "=cns11643-2");
#ifdef UTF2000
+ defsymbol (&Qsystem_char_id, "system-char-id");
defsymbol (&Qmap_ucs, "=ucs");
defsymbol (&Qucs, "ucs");
defsymbol (&Qucs_bmp, "ucs-bmp");
ease of access. */
#ifdef UTF2000
+ staticpro (&Vcharset_system_char_id);
+ Vcharset_system_char_id =
+ make_charset (LEADING_BYTE_SYSTEM_CHAR_ID, Qsystem_char_id, 256, 4,
+ 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
+ build_string ("SCID"),
+ build_string ("CHAR-ID"),
+ build_string ("System char-id"),
+ build_string (""),
+ Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
staticpro (&Vcharset_ucs);
Vcharset_ucs =
make_charset (LEADING_BYTE_UCS, Qmap_ucs, 256, 4,
build_string ("UCS"),
build_string ("ISO/IEC 10646"),
build_string (""),
- Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
+ Qnil, 0, 0xEFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
staticpro (&Vcharset_ucs_bmp);
Vcharset_ucs_bmp =
make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
#define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
staticpro (&Vcharset_chinese_cns11643_1);
Vcharset_chinese_cns11643_1 =
- make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
+ make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qmap_cns11643_1, 94, 2,
2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
build_string ("CNS11643-1"),
build_string ("CNS11643-1 (Chinese traditional)"),
Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
staticpro (&Vcharset_chinese_cns11643_2);
Vcharset_chinese_cns11643_2 =
- make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
+ make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qmap_cns11643_2, 94, 2,
2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
build_string ("CNS11643-2"),
build_string ("CNS11643-2 (Chinese traditional)"),