/* Functions to handle multilingual characters.
Copyright (C) 1992, 1995 Free Software Foundation, Inc.
Copyright (C) 1995 Sun Microsystems, Inc.
+ Copyright (C) 1999,2000,2001,2002 MORIOKA Tomohiko
This file is part of XEmacs.
the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
-/* Synched up with: FSF 20.3. Not in FSF. */
-
/* Rewritten by Ben Wing <ben@xemacs.org>. */
+/* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
+
#include <config.h>
+#ifdef UTF2000
+#include <limits.h>
+#endif
#include "lisp.h"
#include "buffer.h"
Lisp_Object Vcharset_latin_iso8859_9;
Lisp_Object Vcharset_japanese_jisx0208_1978;
Lisp_Object Vcharset_chinese_gb2312;
+Lisp_Object Vcharset_chinese_gb12345;
Lisp_Object Vcharset_japanese_jisx0208;
+Lisp_Object Vcharset_japanese_jisx0208_1990;
Lisp_Object Vcharset_korean_ksc5601;
Lisp_Object Vcharset_japanese_jisx0212;
Lisp_Object Vcharset_chinese_cns11643_1;
Lisp_Object Vcharset_chinese_cns11643_2;
#ifdef UTF2000
-Lisp_Object Vcharset_chinese_cns11643_3;
-Lisp_Object Vcharset_chinese_cns11643_4;
-Lisp_Object Vcharset_chinese_cns11643_5;
-Lisp_Object Vcharset_chinese_cns11643_6;
-Lisp_Object Vcharset_chinese_cns11643_7;
+Lisp_Object Vcharset_ucs;
Lisp_Object Vcharset_ucs_bmp;
+Lisp_Object Vcharset_ucs_smp;
+Lisp_Object Vcharset_ucs_sip;
+Lisp_Object Vcharset_ucs_gb;
+Lisp_Object Vcharset_ucs_cns;
+Lisp_Object Vcharset_ucs_jis;
+Lisp_Object Vcharset_ucs_ks;
+Lisp_Object Vcharset_ucs_big5;
+Lisp_Object Vcharset_latin_viscii;
+Lisp_Object Vcharset_latin_tcvn5712;
Lisp_Object Vcharset_latin_viscii_lower;
Lisp_Object Vcharset_latin_viscii_upper;
+Lisp_Object Vcharset_jis_x0208;
+Lisp_Object Vcharset_chinese_big5;
+Lisp_Object Vcharset_ideograph_hanziku_1;
+Lisp_Object Vcharset_ideograph_hanziku_2;
+Lisp_Object Vcharset_ideograph_hanziku_3;
+Lisp_Object Vcharset_ideograph_hanziku_4;
+Lisp_Object Vcharset_ideograph_hanziku_5;
+Lisp_Object Vcharset_ideograph_hanziku_6;
+Lisp_Object Vcharset_ideograph_hanziku_7;
+Lisp_Object Vcharset_ideograph_hanziku_8;
+Lisp_Object Vcharset_ideograph_hanziku_9;
+Lisp_Object Vcharset_ideograph_hanziku_10;
+Lisp_Object Vcharset_ideograph_hanziku_11;
+Lisp_Object Vcharset_ideograph_hanziku_12;
+Lisp_Object Vcharset_ideograph_cbeta;
+Lisp_Object Vcharset_ideograph_gt;
+Lisp_Object Vcharset_ideograph_gt_pj_1;
+Lisp_Object Vcharset_ideograph_gt_pj_2;
+Lisp_Object Vcharset_ideograph_gt_pj_3;
+Lisp_Object Vcharset_ideograph_gt_pj_4;
+Lisp_Object Vcharset_ideograph_gt_pj_5;
+Lisp_Object Vcharset_ideograph_gt_pj_6;
+Lisp_Object Vcharset_ideograph_gt_pj_7;
+Lisp_Object Vcharset_ideograph_gt_pj_8;
+Lisp_Object Vcharset_ideograph_gt_pj_9;
+Lisp_Object Vcharset_ideograph_gt_pj_10;
+Lisp_Object Vcharset_ideograph_gt_pj_11;
+Lisp_Object Vcharset_ideograph_daikanwa_2;
+Lisp_Object Vcharset_ideograph_daikanwa;
+Lisp_Object Vcharset_ethiopic_ucs;
#endif
Lisp_Object Vcharset_chinese_big5_1;
Lisp_Object Vcharset_chinese_big5_2;
#endif /* ENABLE_COMPOSITE_CHARS */
-/* Table of charsets indexed by leading byte. */
-Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
+struct charset_lookup *chlook;
-/* Table of charsets indexed by type/final-byte/direction. */
+static const struct lrecord_description charset_lookup_description_1[] = {
+ { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
#ifdef UTF2000
-Lisp_Object charset_by_attributes[4][128];
+ 128+4*128
#else
-Lisp_Object charset_by_attributes[4][128][2];
+ 128+4*128*2
#endif
+ }, { XD_END }
+};
+
+static const struct struct_description charset_lookup_description = {
+ sizeof (struct charset_lookup),
+ charset_lookup_description_1
+};
#ifndef UTF2000
/* Table of number of bytes in the string representation of a character
rep_bytes_by_first_byte(c) is more efficient than the equivalent
canonical computation:
- (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
+ XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
-Bytecount rep_bytes_by_first_byte[0xA0] =
+const Bytecount rep_bytes_by_first_byte[0xA0] =
{ /* 0x00 - 0x7f are for straight ASCII */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
#endif
#ifdef UTF2000
-Emchar_to_byte_table*
-make_byte_from_character_table ()
+
+int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
+int
+decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
{
- Emchar_to_byte_table* table
- = (Emchar_to_byte_table*) xmalloc (sizeof (Emchar_to_byte_table));
+ int i;
- table->base = NULL;
- return table;
-}
+ if (XVECTOR_LENGTH (v) > ccs_len)
+ return -1;
-void
-put_byte_from_character_table (Emchar ch, unsigned char val,
- Emchar_to_byte_table* table)
-{
- if (table->base == NULL)
+ for (i = 0; i < XVECTOR_LENGTH (v); i++)
{
- table->base = xmalloc (128);
- table->offset = ch - (ch % 128);
- table->size = 128;
- table->base[ch - table->offset] = val;
- }
- else
- {
- int i = ch - table->offset;
+ Lisp_Object c = XVECTOR_DATA(v)[i];
- if (i < 0)
+ if (!NILP (c) && !CHARP (c))
{
- size_t new_size = table->size - i;
- size_t j;
-
- new_size += 128 - (new_size % 128);
- table->base = xrealloc (table->base, new_size);
- memmove (table->base + (new_size - table->size), table->base,
- table->size);
- for (j = 0; j < (new_size - table->size); j++)
- table->base[j] = 0;
- table->offset -= (new_size - table->size);
- table->base[ch - table->offset] = val;
- table->size = new_size;
+ if (VECTORP (c))
+ {
+ int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
+ if (ret)
+ return ret;
+ }
+ else
+ return -2;
+ }
+ }
+ return 0;
+}
+
+Lisp_Object
+put_char_ccs_code_point (Lisp_Object character,
+ Lisp_Object ccs, Lisp_Object value)
+{
+ if (!EQ (XCHARSET_NAME (ccs), Qucs)
+ || !INTP (value)
+ || (XCHAR (character) != XINT (value)))
+ {
+ Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
+ int code_point;
+
+ if (CONSP (value))
+ { /* obsolete representation: value must be a list of bytes */
+ Lisp_Object ret = Fcar (value);
+ Lisp_Object rest;
+
+ if (!INTP (ret))
+ signal_simple_error ("Invalid value for coded-charset", value);
+ code_point = XINT (ret);
+ if (XCHARSET_GRAPHIC (ccs) == 1)
+ code_point &= 0x7F;
+ rest = Fcdr (value);
+ while (!NILP (rest))
+ {
+ int j;
+
+ if (!CONSP (rest))
+ signal_simple_error ("Invalid value for coded-charset",
+ value);
+ ret = Fcar (rest);
+ if (!INTP (ret))
+ signal_simple_error ("Invalid value for coded-charset",
+ value);
+ j = XINT (ret);
+ if (XCHARSET_GRAPHIC (ccs) == 1)
+ j &= 0x7F;
+ code_point = (code_point << 8) | j;
+ rest = Fcdr (rest);
+ }
+ value = make_int (code_point);
}
- else if (i >= table->size)
+ else if (INTP (value))
{
- size_t new_size = i + 1;
- size_t j;
-
- new_size += 128 - (new_size % 128);
- table->base = xrealloc (table->base, new_size);
- for (j = table->size; j < new_size; j++)
- table->base[j] = 0;
- table->base[i] = val;
- table->size = new_size;
+ code_point = XINT (value);
+ if (XCHARSET_GRAPHIC (ccs) == 1)
+ {
+ code_point &= 0x7F7F7F7F;
+ value = make_int (code_point);
+ }
}
else
+ signal_simple_error ("Invalid value for coded-charset", value);
+
+ if (VECTORP (v))
{
- table->base[i] = val;
+ Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
+ if (INTP (cpos))
+ {
+ decoding_table_remove_char (ccs, XINT (cpos));
+ }
}
+ decoding_table_put_char (ccs, code_point, character);
}
+ return value;
}
-unsigned char
-get_byte_from_character_table (Emchar ch, Emchar_to_byte_table* table)
-{
- size_t i = ch - table->offset;
- if (i < table->size)
- return table->base[i];
- else
- return 0;
-}
-
-#define CHAR96(ft,b) (MIN_CHAR_96 + (ft - '0') * 96 + (b & 0x7f) - 32)
-
-Emchar_to_byte_table* ucs_to_latin_jisx0201;
-
-Emchar latin_jisx0201_to_ucs[94] =
-{
- 0x0021 /* 0x21 EXCLAMATION MARK */,
- 0x0022 /* 0x22 QUOTATION MARK */,
- 0x0023 /* 0x23 NUMBER SIGN */,
- 0x0024 /* 0x24 DOLLAR SIGN */,
- 0x0025 /* 0x25 PERCENT SIGN */,
- 0x0026 /* 0x26 AMPERSAND */,
- 0x0027 /* 0x27 APOSTROPHE */,
- 0x0028 /* 0x28 LEFT PARENTHESIS */,
- 0x0029 /* 0x29 RIGHT PARENTHESIS */,
- 0x002A /* 0x2A ASTERISK */,
- 0x002B /* 0x2B PLUS SIGN */,
- 0x002C /* 0x2C COMMA */,
- 0x002D /* 0x2D HYPHEN-MINUS */,
- 0x002E /* 0x2E FULL STOP */,
- 0x002F /* 0x2F SOLIDUS */,
- 0x0030 /* 0x30 DIGIT ZERO */,
- 0x0031 /* 0x31 DIGIT ONE */,
- 0x0032 /* 0x32 DIGIT TWO */,
- 0x0033 /* 0x33 DIGIT THREE */,
- 0x0034 /* 0x34 DIGIT FOUR */,
- 0x0035 /* 0x35 DIGIT FIVE */,
- 0x0036 /* 0x36 DIGIT SIX */,
- 0x0037 /* 0x37 DIGIT SEVEN */,
- 0x0038 /* 0x38 DIGIT EIGHT */,
- 0x0039 /* 0x39 DIGIT NINE */,
- 0x003A /* 0x3A COLON */,
- 0x003B /* 0x3B SEMICOLON */,
- 0x003C /* 0x3C LESS-THAN SIGN */,
- 0x003D /* 0x3D EQUALS SIGN */,
- 0x003E /* 0x3E GREATER-THAN SIGN */,
- 0x003F /* 0x3F QUESTION MARK */,
- 0x0040 /* 0x40 COMMERCIAL AT */,
- 0x0041 /* 0x41 LATIN CAPITAL LETTER A */,
- 0x0042 /* 0x42 LATIN CAPITAL LETTER B */,
- 0x0043 /* 0x43 LATIN CAPITAL LETTER C */,
- 0x0044 /* 0x44 LATIN CAPITAL LETTER D */,
- 0x0045 /* 0x45 LATIN CAPITAL LETTER E */,
- 0x0046 /* 0x46 LATIN CAPITAL LETTER F */,
- 0x0047 /* 0x47 LATIN CAPITAL LETTER G */,
- 0x0048 /* 0x48 LATIN CAPITAL LETTER H */,
- 0x0049 /* 0x49 LATIN CAPITAL LETTER I */,
- 0x004A /* 0x4A LATIN CAPITAL LETTER J */,
- 0x004B /* 0x4B LATIN CAPITAL LETTER K */,
- 0x004C /* 0x4C LATIN CAPITAL LETTER L */,
- 0x004D /* 0x4D LATIN CAPITAL LETTER M */,
- 0x004E /* 0x4E LATIN CAPITAL LETTER N */,
- 0x004F /* 0x4F LATIN CAPITAL LETTER O */,
- 0x0050 /* 0x50 LATIN CAPITAL LETTER P */,
- 0x0051 /* 0x51 LATIN CAPITAL LETTER Q */,
- 0x0052 /* 0x52 LATIN CAPITAL LETTER R */,
- 0x0053 /* 0x53 LATIN CAPITAL LETTER S */,
- 0x0054 /* 0x54 LATIN CAPITAL LETTER T */,
- 0x0055 /* 0x55 LATIN CAPITAL LETTER U */,
- 0x0056 /* 0x56 LATIN CAPITAL LETTER V */,
- 0x0057 /* 0x57 LATIN CAPITAL LETTER W */,
- 0x0058 /* 0x58 LATIN CAPITAL LETTER X */,
- 0x0059 /* 0x59 LATIN CAPITAL LETTER Y */,
- 0x005A /* 0x5A LATIN CAPITAL LETTER Z */,
- 0x005B /* 0x5B LEFT SQUARE BRACKET */,
- 0x00A5 /* 0x5C YEN SIGN */,
- 0x005D /* 0x5D RIGHT SQUARE BRACKET */,
- 0x005E /* 0x5E CIRCUMFLEX ACCENT */,
- 0x005F /* 0x5F LOW LINE */,
- 0x0060 /* 0x60 GRAVE ACCENT */,
- 0x0061 /* 0x61 LATIN SMALL LETTER A */,
- 0x0062 /* 0x62 LATIN SMALL LETTER B */,
- 0x0063 /* 0x63 LATIN SMALL LETTER C */,
- 0x0064 /* 0x64 LATIN SMALL LETTER D */,
- 0x0065 /* 0x65 LATIN SMALL LETTER E */,
- 0x0066 /* 0x66 LATIN SMALL LETTER F */,
- 0x0067 /* 0x67 LATIN SMALL LETTER G */,
- 0x0068 /* 0x68 LATIN SMALL LETTER H */,
- 0x0069 /* 0x69 LATIN SMALL LETTER I */,
- 0x006A /* 0x6A LATIN SMALL LETTER J */,
- 0x006B /* 0x6B LATIN SMALL LETTER K */,
- 0x006C /* 0x6C LATIN SMALL LETTER L */,
- 0x006D /* 0x6D LATIN SMALL LETTER M */,
- 0x006E /* 0x6E LATIN SMALL LETTER N */,
- 0x006F /* 0x6F LATIN SMALL LETTER O */,
- 0x0070 /* 0x70 LATIN SMALL LETTER P */,
- 0x0071 /* 0x71 LATIN SMALL LETTER Q */,
- 0x0072 /* 0x72 LATIN SMALL LETTER R */,
- 0x0073 /* 0x73 LATIN SMALL LETTER S */,
- 0x0074 /* 0x74 LATIN SMALL LETTER T */,
- 0x0075 /* 0x75 LATIN SMALL LETTER U */,
- 0x0076 /* 0x76 LATIN SMALL LETTER V */,
- 0x0077 /* 0x77 LATIN SMALL LETTER W */,
- 0x0078 /* 0x78 LATIN SMALL LETTER X */,
- 0x0079 /* 0x79 LATIN SMALL LETTER Y */,
- 0x007A /* 0x7A LATIN SMALL LETTER Z */,
- 0x007B /* 0x7B LEFT CURLY BRACKET */,
- 0x007C /* 0x7C VERTICAL LINE */,
- 0x007D /* 0x7D RIGHT CURLY BRACKET */,
- 0x203E /* 0x7E OVERLINE */
-};
-
-
-Emchar_to_byte_table* ucs_to_latin_iso8859_2;
-
-Emchar latin_iso8859_2_to_ucs[96] =
-{
- 0x00A0 /* 0xA0 NO-BREAK SPACE */,
- 0x0104 /* 0xA1 LATIN CAPITAL LETTER A WITH OGONEK */,
- 0x02D8 /* 0xA2 BREVE */,
- 0x0141 /* 0xA3 LATIN CAPITAL LETTER L WITH STROKE */,
- 0x00A4 /* 0xA4 CURRENCY SIGN */,
- 0x013D /* 0xA5 LATIN CAPITAL LETTER L WITH CARON */,
- 0x015A /* 0xA6 LATIN CAPITAL LETTER S WITH ACUTE */,
- 0x00A7 /* 0xA7 SECTION SIGN */,
- 0x00A8 /* 0xA8 DIAERESIS */,
- 0x0160 /* 0xA9 LATIN CAPITAL LETTER S WITH CARON */,
- 0x015E /* 0xAA LATIN CAPITAL LETTER S WITH CEDILLA */,
- 0x0164 /* 0xAB LATIN CAPITAL LETTER T WITH CARON */,
- 0x0179 /* 0xAC LATIN CAPITAL LETTER Z WITH ACUTE */,
- 0x00AD /* 0xAD SOFT HYPHEN */,
- 0x017D /* 0xAE LATIN CAPITAL LETTER Z WITH CARON */,
- 0x017B /* 0xAF LATIN CAPITAL LETTER Z WITH DOT ABOVE */,
- 0x00B0 /* 0xB0 DEGREE SIGN */,
- 0x0105 /* 0xB1 LATIN SMALL LETTER A WITH OGONEK */,
- 0x02DB /* 0xB2 OGONEK */,
- 0x0142 /* 0xB3 LATIN SMALL LETTER L WITH STROKE */,
- 0x00B4 /* 0xB4 ACUTE ACCENT */,
- 0x013E /* 0xB5 LATIN SMALL LETTER L WITH CARON */,
- 0x015B /* 0xB6 LATIN SMALL LETTER S WITH ACUTE */,
- 0x02C7 /* 0xB7 CARON */,
- 0x00B8 /* 0xB8 CEDILLA */,
- 0x0161 /* 0xB9 LATIN SMALL LETTER S WITH CARON */,
- 0x015F /* 0xBA LATIN SMALL LETTER S WITH CEDILLA */,
- 0x0165 /* 0xBB LATIN SMALL LETTER T WITH CARON */,
- 0x017A /* 0xBC LATIN SMALL LETTER Z WITH ACUTE */,
- 0x02DD /* 0xBD DOUBLE ACUTE ACCENT */,
- 0x017E /* 0xBE LATIN SMALL LETTER Z WITH CARON */,
- 0x017C /* 0xBF LATIN SMALL LETTER Z WITH DOT ABOVE */,
- 0x0154 /* 0xC0 LATIN CAPITAL LETTER R WITH ACUTE */,
- 0x00C1 /* 0xC1 LATIN CAPITAL LETTER A WITH ACUTE */,
- 0x00C2 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
- 0x0102 /* 0xC3 LATIN CAPITAL LETTER A WITH BREVE */,
- 0x00C4 /* 0xC4 LATIN CAPITAL LETTER A WITH DIAERESIS */,
- 0x0139 /* 0xC5 LATIN CAPITAL LETTER L WITH ACUTE */,
- 0x0106 /* 0xC6 LATIN CAPITAL LETTER C WITH ACUTE */,
- 0x00C7 /* 0xC7 LATIN CAPITAL LETTER C WITH CEDILLA */,
- 0x010C /* 0xC8 LATIN CAPITAL LETTER C WITH CARON */,
- 0x00C9 /* 0xC9 LATIN CAPITAL LETTER E WITH ACUTE */,
- 0x0118 /* 0xCA LATIN CAPITAL LETTER E WITH OGONEK */,
- 0x00CB /* 0xCB LATIN CAPITAL LETTER E WITH DIAERESIS */,
- 0x011A /* 0xCC LATIN CAPITAL LETTER E WITH CARON */,
- 0x00CD /* 0xCD LATIN CAPITAL LETTER I WITH ACUTE */,
- 0x00CE /* 0xCE LATIN CAPITAL LETTER I WITH CIRCUMFLEX */,
- 0x010E /* 0xCF LATIN CAPITAL LETTER D WITH CARON */,
- 0x0110 /* 0xD0 LATIN CAPITAL LETTER D WITH STROKE */,
- 0x0143 /* 0xD1 LATIN CAPITAL LETTER N WITH ACUTE */,
- 0x0147 /* 0xD2 LATIN CAPITAL LETTER N WITH CARON */,
- 0x00D3 /* 0xD3 LATIN CAPITAL LETTER O WITH ACUTE */,
- 0x00D4 /* 0xD4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
- 0x0150 /* 0xD5 LATIN CAPITAL LETTER O WITH DOUBLE ACUTE */,
- 0x00D6 /* 0xD6 LATIN CAPITAL LETTER O WITH DIAERESIS */,
- 0x00D7 /* 0xD7 MULTIPLICATION SIGN */,
- 0x0158 /* 0xD8 LATIN CAPITAL LETTER R WITH CARON */,
- 0x016E /* 0xD9 LATIN CAPITAL LETTER U WITH RING ABOVE */,
- 0x00DA /* 0xDA LATIN CAPITAL LETTER U WITH ACUTE */,
- 0x0170 /* 0xDB LATIN CAPITAL LETTER U WITH DOUBLE ACUTE */,
- 0x00DC /* 0xDC LATIN CAPITAL LETTER U WITH DIAERESIS */,
- 0x00DD /* 0xDD LATIN CAPITAL LETTER Y WITH ACUTE */,
- 0x0162 /* 0xDE LATIN CAPITAL LETTER T WITH CEDILLA */,
- 0x00DF /* 0xDF LATIN SMALL LETTER SHARP S */,
- 0x0155 /* 0xE0 LATIN SMALL LETTER R WITH ACUTE */,
- 0x00E1 /* 0xE1 LATIN SMALL LETTER A WITH ACUTE */,
- 0x00E2 /* 0xE2 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
- 0x0103 /* 0xE3 LATIN SMALL LETTER A WITH BREVE */,
- 0x00E4 /* 0xE4 LATIN SMALL LETTER A WITH DIAERESIS */,
- 0x013A /* 0xE5 LATIN SMALL LETTER L WITH ACUTE */,
- 0x0107 /* 0xE6 LATIN SMALL LETTER C WITH ACUTE */,
- 0x00E7 /* 0xE7 LATIN SMALL LETTER C WITH CEDILLA */,
- 0x010D /* 0xE8 LATIN SMALL LETTER C WITH CARON */,
- 0x00E9 /* 0xE9 LATIN SMALL LETTER E WITH ACUTE */,
- 0x0119 /* 0xEA LATIN SMALL LETTER E WITH OGONEK */,
- 0x00EB /* 0xEB LATIN SMALL LETTER E WITH DIAERESIS */,
- 0x011B /* 0xEC LATIN SMALL LETTER E WITH CARON */,
- 0x00ED /* 0xED LATIN SMALL LETTER I WITH ACUTE */,
- 0x00EE /* 0xEE LATIN SMALL LETTER I WITH CIRCUMFLEX */,
- 0x010F /* 0xEF LATIN SMALL LETTER D WITH CARON */,
- 0x0111 /* 0xF0 LATIN SMALL LETTER D WITH STROKE */,
- 0x0144 /* 0xF1 LATIN SMALL LETTER N WITH ACUTE */,
- 0x0148 /* 0xF2 LATIN SMALL LETTER N WITH CARON */,
- 0x00F3 /* 0xF3 LATIN SMALL LETTER O WITH ACUTE */,
- 0x00F4 /* 0xF4 LATIN SMALL LETTER O WITH CIRCUMFLEX */,
- 0x0151 /* 0xF5 LATIN SMALL LETTER O WITH DOUBLE ACUTE */,
- 0x00F6 /* 0xF6 LATIN SMALL LETTER O WITH DIAERESIS */,
- 0x00F7 /* 0xF7 DIVISION SIGN */,
- 0x0159 /* 0xF8 LATIN SMALL LETTER R WITH CARON */,
- 0x016F /* 0xF9 LATIN SMALL LETTER U WITH RING ABOVE */,
- 0x00FA /* 0xFA LATIN SMALL LETTER U WITH ACUTE */,
- 0x0171 /* 0xFB LATIN SMALL LETTER U WITH DOUBLE ACUTE */,
- 0x00FC /* 0xFC LATIN SMALL LETTER U WITH DIAERESIS */,
- 0x00FD /* 0xFD LATIN SMALL LETTER Y WITH ACUTE */,
- 0x0163 /* 0xFE LATIN SMALL LETTER T WITH CEDILLA */,
- 0x02D9 /* 0xFF DOT ABOVE */
-};
-
-Emchar_to_byte_table* ucs_to_latin_iso8859_3;
-
-Emchar latin_iso8859_3_to_ucs[96] =
-{
- 0x00A0 /* 0xA0 NO-BREAK SPACE */,
- 0x0126 /* 0xA1 LATIN CAPITAL LETTER H WITH STROKE */,
- 0x02D8 /* 0xA2 BREVE */,
- 0x00A3 /* 0xA3 POUND SIGN */,
- 0x00A4 /* 0xA4 CURRENCY SIGN */,
- CHAR96('C', 0xA5),
- 0x0124 /* 0xA6 LATIN CAPITAL LETTER H WITH CIRCUMFLEX */,
- 0x00A7 /* 0xA7 SECTION SIGN */,
- 0x00A8 /* 0xA8 DIAERESIS */,
- 0x0130 /* 0xA9 LATIN CAPITAL LETTER I WITH DOT ABOVE */,
- 0x015E /* 0xAA LATIN CAPITAL LETTER S WITH CEDILLA */,
- 0x011E /* 0xAB LATIN CAPITAL LETTER G WITH BREVE */,
- 0x0134 /* 0xAC LATIN CAPITAL LETTER J WITH CIRCUMFLEX */,
- 0x00AD /* 0xAD SOFT HYPHEN */,
- CHAR96('C', 0xAE),
- 0x017B /* 0xAF LATIN CAPITAL LETTER Z WITH DOT ABOVE */,
- 0x00B0 /* 0xB0 DEGREE SIGN */,
- 0x0127 /* 0xB1 LATIN SMALL LETTER H WITH STROKE */,
- 0x00B2 /* 0xB2 SUPERSCRIPT TWO */,
- 0x00B3 /* 0xB3 SUPERSCRIPT THREE */,
- 0x00B4 /* 0xB4 ACUTE ACCENT */,
- 0x00B5 /* 0xB5 MICRO SIGN */,
- 0x0125 /* 0xB6 LATIN SMALL LETTER H WITH CIRCUMFLEX */,
- 0x00B7 /* 0xB7 MIDDLE DOT */,
- 0x00B8 /* 0xB8 CEDILLA */,
- 0x0131 /* 0xB9 LATIN SMALL LETTER DOTLESS I */,
- 0x015F /* 0xBA LATIN SMALL LETTER S WITH CEDILLA */,
- 0x011F /* 0xBB LATIN SMALL LETTER G WITH BREVE */,
- 0x0135 /* 0xBC LATIN SMALL LETTER J WITH CIRCUMFLEX */,
- 0x00BD /* 0xBD VULGAR FRACTION ONE HALF */,
- CHAR96('C', 0xBE),
- 0x017C /* 0xBF LATIN SMALL LETTER Z WITH DOT ABOVE */,
- 0x00C0 /* 0xC0 LATIN CAPITAL LETTER A WITH GRAVE */,
- 0x00C1 /* 0xC1 LATIN CAPITAL LETTER A WITH ACUTE */,
- 0x00C2 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
- CHAR96('C', 0xC3),
- 0x00C4 /* 0xC4 LATIN CAPITAL LETTER A WITH DIAERESIS */,
- 0x010A /* 0xC5 LATIN CAPITAL LETTER C WITH DOT ABOVE */,
- 0x0108 /* 0xC6 LATIN CAPITAL LETTER C WITH CIRCUMFLEX */,
- 0x00C7 /* 0xC7 LATIN CAPITAL LETTER C WITH CEDILLA */,
- 0x00C8 /* 0xC8 LATIN CAPITAL LETTER E WITH GRAVE */,
- 0x00C9 /* 0xC9 LATIN CAPITAL LETTER E WITH ACUTE */,
- 0x00CA /* 0xCA LATIN CAPITAL LETTER E WITH CIRCUMFLEX */,
- 0x00CB /* 0xCB LATIN CAPITAL LETTER E WITH DIAERESIS */,
- 0x00CC /* 0xCC LATIN CAPITAL LETTER I WITH GRAVE */,
- 0x00CD /* 0xCD LATIN CAPITAL LETTER I WITH ACUTE */,
- 0x00CE /* 0xCE LATIN CAPITAL LETTER I WITH CIRCUMFLEX */,
- 0x00CF /* 0xCF LATIN CAPITAL LETTER I WITH DIAERESIS */,
- CHAR96('C', 0xD0),
- 0x00D1 /* 0xD1 LATIN CAPITAL LETTER N WITH TILDE */,
- 0x00D2 /* 0xD2 LATIN CAPITAL LETTER O WITH GRAVE */,
- 0x00D3 /* 0xD3 LATIN CAPITAL LETTER O WITH ACUTE */,
- 0x00D4 /* 0xD4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
- 0x0120 /* 0xD5 LATIN CAPITAL LETTER G WITH DOT ABOVE */,
- 0x00D6 /* 0xD6 LATIN CAPITAL LETTER O WITH DIAERESIS */,
- 0x00D7 /* 0xD7 MULTIPLICATION SIGN */,
- 0x011C /* 0xD8 LATIN CAPITAL LETTER G WITH CIRCUMFLEX */,
- 0x00D9 /* 0xD9 LATIN CAPITAL LETTER U WITH GRAVE */,
- 0x00DA /* 0xDA LATIN CAPITAL LETTER U WITH ACUTE */,
- 0x00DB /* 0xDB LATIN CAPITAL LETTER U WITH CIRCUMFLEX */,
- 0x00DC /* 0xDC LATIN CAPITAL LETTER U WITH DIAERESIS */,
- 0x016C /* 0xDD LATIN CAPITAL LETTER U WITH BREVE */,
- 0x015C /* 0xDE LATIN CAPITAL LETTER S WITH CIRCUMFLEX */,
- 0x00DF /* 0xDF LATIN SMALL LETTER SHARP S */,
- 0x00E0 /* 0xE0 LATIN SMALL LETTER A WITH GRAVE */,
- 0x00E1 /* 0xE1 LATIN SMALL LETTER A WITH ACUTE */,
- 0x00E2 /* 0xE2 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
- CHAR96('C', 0xE3),
- 0x00E4 /* 0xE4 LATIN SMALL LETTER A WITH DIAERESIS */,
- 0x010B /* 0xE5 LATIN SMALL LETTER C WITH DOT ABOVE */,
- 0x0109 /* 0xE6 LATIN SMALL LETTER C WITH CIRCUMFLEX */,
- 0x00E7 /* 0xE7 LATIN SMALL LETTER C WITH CEDILLA */,
- 0x00E8 /* 0xE8 LATIN SMALL LETTER E WITH GRAVE */,
- 0x00E9 /* 0xE9 LATIN SMALL LETTER E WITH ACUTE */,
- 0x00EA /* 0xEA LATIN SMALL LETTER E WITH CIRCUMFLEX */,
- 0x00EB /* 0xEB LATIN SMALL LETTER E WITH DIAERESIS */,
- 0x00EC /* 0xEC LATIN SMALL LETTER I WITH GRAVE */,
- 0x00ED /* 0xED LATIN SMALL LETTER I WITH ACUTE */,
- 0x00EE /* 0xEE LATIN SMALL LETTER I WITH CIRCUMFLEX */,
- 0x00EF /* 0xEF LATIN SMALL LETTER I WITH DIAERESIS */,
- CHAR96('C', 0xF0),
- 0x00F1 /* 0xF1 LATIN SMALL LETTER N WITH TILDE */,
- 0x00F2 /* 0xF2 LATIN SMALL LETTER O WITH GRAVE */,
- 0x00F3 /* 0xF3 LATIN SMALL LETTER O WITH ACUTE */,
- 0x00F4 /* 0xF4 LATIN SMALL LETTER O WITH CIRCUMFLEX */,
- 0x0121 /* 0xF5 LATIN SMALL LETTER G WITH DOT ABOVE */,
- 0x00F6 /* 0xF6 LATIN SMALL LETTER O WITH DIAERESIS */,
- 0x00F7 /* 0xF7 DIVISION SIGN */,
- 0x011D /* 0xF8 LATIN SMALL LETTER G WITH CIRCUMFLEX */,
- 0x00F9 /* 0xF9 LATIN SMALL LETTER U WITH GRAVE */,
- 0x00FA /* 0xFA LATIN SMALL LETTER U WITH ACUTE */,
- 0x00FB /* 0xFB LATIN SMALL LETTER U WITH CIRCUMFLEX */,
- 0x00FC /* 0xFC LATIN SMALL LETTER U WITH DIAERESIS */,
- 0x016D /* 0xFD LATIN SMALL LETTER U WITH BREVE */,
- 0x015D /* 0xFE LATIN SMALL LETTER S WITH CIRCUMFLEX */,
- 0x02D9 /* 0xFF DOT ABOVE */
-};
-
-Emchar_to_byte_table* ucs_to_latin_iso8859_4;
-
-Emchar latin_iso8859_4_to_ucs[96] =
-{
- 0x00A0 /* 0xA0 NO-BREAK SPACE */,
- 0x0104 /* 0xA1 LATIN CAPITAL LETTER A WITH OGONEK */,
- 0x0138 /* 0xA2 LATIN SMALL LETTER KRA */,
- 0x0156 /* 0xA3 LATIN CAPITAL LETTER R WITH CEDILLA */,
- 0x00A4 /* 0xA4 CURRENCY SIGN */,
- 0x0128 /* 0xA5 LATIN CAPITAL LETTER I WITH TILDE */,
- 0x013B /* 0xA6 LATIN CAPITAL LETTER L WITH CEDILLA */,
- 0x00A7 /* 0xA7 SECTION SIGN */,
- 0x00A8 /* 0xA8 DIAERESIS */,
- 0x0160 /* 0xA9 LATIN CAPITAL LETTER S WITH CARON */,
- 0x0112 /* 0xAA LATIN CAPITAL LETTER E WITH MACRON */,
- 0x0122 /* 0xAB LATIN CAPITAL LETTER G WITH CEDILLA */,
- 0x0166 /* 0xAC LATIN CAPITAL LETTER T WITH STROKE */,
- 0x00AD /* 0xAD SOFT HYPHEN */,
- 0x017D /* 0xAE LATIN CAPITAL LETTER Z WITH CARON */,
- 0x00AF /* 0xAF MACRON */,
- 0x00B0 /* 0xB0 DEGREE SIGN */,
- 0x0105 /* 0xB1 LATIN SMALL LETTER A WITH OGONEK */,
- 0x02DB /* 0xB2 OGONEK */,
- 0x0157 /* 0xB3 LATIN SMALL LETTER R WITH CEDILLA */,
- 0x00B4 /* 0xB4 ACUTE ACCENT */,
- 0x0129 /* 0xB5 LATIN SMALL LETTER I WITH TILDE */,
- 0x013C /* 0xB6 LATIN SMALL LETTER L WITH CEDILLA */,
- 0x02C7 /* 0xB7 CARON */,
- 0x00B8 /* 0xB8 CEDILLA */,
- 0x0161 /* 0xB9 LATIN SMALL LETTER S WITH CARON */,
- 0x0113 /* 0xBA LATIN SMALL LETTER E WITH MACRON */,
- 0x0123 /* 0xBB LATIN SMALL LETTER G WITH CEDILLA */,
- 0x0167 /* 0xBC LATIN SMALL LETTER T WITH STROKE */,
- 0x014A /* 0xBD LATIN CAPITAL LETTER ENG */,
- 0x017E /* 0xBE LATIN SMALL LETTER Z WITH CARON */,
- 0x014B /* 0xBF LATIN SMALL LETTER ENG */,
- 0x0100 /* 0xC0 LATIN CAPITAL LETTER A WITH MACRON */,
- 0x00C1 /* 0xC1 LATIN CAPITAL LETTER A WITH ACUTE */,
- 0x00C2 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
- 0x00C3 /* 0xC3 LATIN CAPITAL LETTER A WITH TILDE */,
- 0x00C4 /* 0xC4 LATIN CAPITAL LETTER A WITH DIAERESIS */,
- 0x00C5 /* 0xC5 LATIN CAPITAL LETTER A WITH RING ABOVE */,
- 0x00C6 /* 0xC6 LATIN CAPITAL LETTER AE */,
- 0x012E /* 0xC7 LATIN CAPITAL LETTER I WITH OGONEK */,
- 0x010C /* 0xC8 LATIN CAPITAL LETTER C WITH CARON */,
- 0x00C9 /* 0xC9 LATIN CAPITAL LETTER E WITH ACUTE */,
- 0x0118 /* 0xCA LATIN CAPITAL LETTER E WITH OGONEK */,
- 0x00CB /* 0xCB LATIN CAPITAL LETTER E WITH DIAERESIS */,
- 0x0116 /* 0xCC LATIN CAPITAL LETTER E WITH DOT ABOVE */,
- 0x00CD /* 0xCD LATIN CAPITAL LETTER I WITH ACUTE */,
- 0x00CE /* 0xCE LATIN CAPITAL LETTER I WITH CIRCUMFLEX */,
- 0x012A /* 0xCF LATIN CAPITAL LETTER I WITH MACRON */,
- 0x0110 /* 0xD0 LATIN CAPITAL LETTER D WITH STROKE */,
- 0x0145 /* 0xD1 LATIN CAPITAL LETTER N WITH CEDILLA */,
- 0x014C /* 0xD2 LATIN CAPITAL LETTER O WITH MACRON */,
- 0x0136 /* 0xD3 LATIN CAPITAL LETTER K WITH CEDILLA */,
- 0x00D4 /* 0xD4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
- 0x00D5 /* 0xD5 LATIN CAPITAL LETTER O WITH TILDE */,
- 0x00D6 /* 0xD6 LATIN CAPITAL LETTER O WITH DIAERESIS */,
- 0x00D7 /* 0xD7 MULTIPLICATION SIGN */,
- 0x00D8 /* 0xD8 LATIN CAPITAL LETTER O WITH STROKE */,
- 0x0172 /* 0xD9 LATIN CAPITAL LETTER U WITH OGONEK */,
- 0x00DA /* 0xDA LATIN CAPITAL LETTER U WITH ACUTE */,
- 0x00DB /* 0xDB LATIN CAPITAL LETTER U WITH CIRCUMFLEX */,
- 0x00DC /* 0xDC LATIN CAPITAL LETTER U WITH DIAERESIS */,
- 0x0168 /* 0xDD LATIN CAPITAL LETTER U WITH TILDE */,
- 0x016A /* 0xDE LATIN CAPITAL LETTER U WITH MACRON */,
- 0x00DF /* 0xDF LATIN SMALL LETTER SHARP S */,
- 0x0101 /* 0xE0 LATIN SMALL LETTER A WITH MACRON */,
- 0x00E1 /* 0xE1 LATIN SMALL LETTER A WITH ACUTE */,
- 0x00E2 /* 0xE2 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
- 0x00E3 /* 0xE3 LATIN SMALL LETTER A WITH TILDE */,
- 0x00E4 /* 0xE4 LATIN SMALL LETTER A WITH DIAERESIS */,
- 0x00E5 /* 0xE5 LATIN SMALL LETTER A WITH RING ABOVE */,
- 0x00E6 /* 0xE6 LATIN SMALL LETTER AE */,
- 0x012F /* 0xE7 LATIN SMALL LETTER I WITH OGONEK */,
- 0x010D /* 0xE8 LATIN SMALL LETTER C WITH CARON */,
- 0x00E9 /* 0xE9 LATIN SMALL LETTER E WITH ACUTE */,
- 0x0119 /* 0xEA LATIN SMALL LETTER E WITH OGONEK */,
- 0x00EB /* 0xEB LATIN SMALL LETTER E WITH DIAERESIS */,
- 0x0117 /* 0xEC LATIN SMALL LETTER E WITH DOT ABOVE */,
- 0x00ED /* 0xED LATIN SMALL LETTER I WITH ACUTE */,
- 0x00EE /* 0xEE LATIN SMALL LETTER I WITH CIRCUMFLEX */,
- 0x012B /* 0xEF LATIN SMALL LETTER I WITH MACRON */,
- 0x0111 /* 0xF0 LATIN SMALL LETTER D WITH STROKE */,
- 0x0146 /* 0xF1 LATIN SMALL LETTER N WITH CEDILLA */,
- 0x014D /* 0xF2 LATIN SMALL LETTER O WITH MACRON */,
- 0x0137 /* 0xF3 LATIN SMALL LETTER K WITH CEDILLA */,
- 0x00F4 /* 0xF4 LATIN SMALL LETTER O WITH CIRCUMFLEX */,
- 0x00F5 /* 0xF5 LATIN SMALL LETTER O WITH TILDE */,
- 0x00F6 /* 0xF6 LATIN SMALL LETTER O WITH DIAERESIS */,
- 0x00F7 /* 0xF7 DIVISION SIGN */,
- 0x00F8 /* 0xF8 LATIN SMALL LETTER O WITH STROKE */,
- 0x0173 /* 0xF9 LATIN SMALL LETTER U WITH OGONEK */,
- 0x00FA /* 0xFA LATIN SMALL LETTER U WITH ACUTE */,
- 0x00FB /* 0xFB LATIN SMALL LETTER U WITH CIRCUMFLEX */,
- 0x00FC /* 0xFC LATIN SMALL LETTER U WITH DIAERESIS */,
- 0x0169 /* 0xFD LATIN SMALL LETTER U WITH TILDE */,
- 0x016B /* 0xFE LATIN SMALL LETTER U WITH MACRON */,
- 0x02D9 /* 0xFF DOT ABOVE */
-};
-
-Emchar_to_byte_table* ucs_to_latin_iso8859_9;
-
-Emchar latin_iso8859_9_to_ucs[96] =
-{
- 0x00A0 /* 0xA0 NO-BREAK SPACE */,
- 0x00A1 /* 0xA1 INVERTED EXCLAMATION MARK */,
- 0x00A2 /* 0xA2 CENT SIGN */,
- 0x00A3 /* 0xA3 POUND SIGN */,
- 0x00A4 /* 0xA4 CURRENCY SIGN */,
- 0x00A5 /* 0xA5 YEN SIGN */,
- 0x00A6 /* 0xA6 BROKEN BAR */,
- 0x00A7 /* 0xA7 SECTION SIGN */,
- 0x00A8 /* 0xA8 DIAERESIS */,
- 0x00A9 /* 0xA9 COPYRIGHT SIGN */,
- 0x00AA /* 0xAA FEMININE ORDINAL INDICATOR */,
- 0x00AB /* 0xAB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK */,
- 0x00AC /* 0xAC NOT SIGN */,
- 0x00AD /* 0xAD SOFT HYPHEN */,
- 0x00AE /* 0xAE REGISTERED SIGN */,
- 0x00AF /* 0xAF MACRON */,
- 0x00B0 /* 0xB0 DEGREE SIGN */,
- 0x00B1 /* 0xB1 PLUS-MINUS SIGN */,
- 0x00B2 /* 0xB2 SUPERSCRIPT TWO */,
- 0x00B3 /* 0xB3 SUPERSCRIPT THREE */,
- 0x00B4 /* 0xB4 ACUTE ACCENT */,
- 0x00B5 /* 0xB5 MICRO SIGN */,
- 0x00B6 /* 0xB6 PILCROW SIGN */,
- 0x00B7 /* 0xB7 MIDDLE DOT */,
- 0x00B8 /* 0xB8 CEDILLA */,
- 0x00B9 /* 0xB9 SUPERSCRIPT ONE */,
- 0x00BA /* 0xBA MASCULINE ORDINAL INDICATOR */,
- 0x00BB /* 0xBB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK */,
- 0x00BC /* 0xBC VULGAR FRACTION ONE QUARTER */,
- 0x00BD /* 0xBD VULGAR FRACTION ONE HALF */,
- 0x00BE /* 0xBE VULGAR FRACTION THREE QUARTERS */,
- 0x00BF /* 0xBF INVERTED QUESTION MARK */,
- 0x00C0 /* 0xC0 LATIN CAPITAL LETTER A WITH GRAVE */,
- 0x00C1 /* 0xC1 LATIN CAPITAL LETTER A WITH ACUTE */,
- 0x00C2 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
- 0x00C3 /* 0xC3 LATIN CAPITAL LETTER A WITH TILDE */,
- 0x00C4 /* 0xC4 LATIN CAPITAL LETTER A WITH DIAERESIS */,
- 0x00C5 /* 0xC5 LATIN CAPITAL LETTER A WITH RING ABOVE */,
- 0x00C6 /* 0xC6 LATIN CAPITAL LETTER AE */,
- 0x00C7 /* 0xC7 LATIN CAPITAL LETTER C WITH CEDILLA */,
- 0x00C8 /* 0xC8 LATIN CAPITAL LETTER E WITH GRAVE */,
- 0x00C9 /* 0xC9 LATIN CAPITAL LETTER E WITH ACUTE */,
- 0x00CA /* 0xCA LATIN CAPITAL LETTER E WITH CIRCUMFLEX */,
- 0x00CB /* 0xCB LATIN CAPITAL LETTER E WITH DIAERESIS */,
- 0x00CC /* 0xCC LATIN CAPITAL LETTER I WITH GRAVE */,
- 0x00CD /* 0xCD LATIN CAPITAL LETTER I WITH ACUTE */,
- 0x00CE /* 0xCE LATIN CAPITAL LETTER I WITH CIRCUMFLEX */,
- 0x00CF /* 0xCF LATIN CAPITAL LETTER I WITH DIAERESIS */,
- 0x011E /* 0xD0 LATIN CAPITAL LETTER G WITH BREVE */,
- 0x00D1 /* 0xD1 LATIN CAPITAL LETTER N WITH TILDE */,
- 0x00D2 /* 0xD2 LATIN CAPITAL LETTER O WITH GRAVE */,
- 0x00D3 /* 0xD3 LATIN CAPITAL LETTER O WITH ACUTE */,
- 0x00D4 /* 0xD4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
- 0x00D5 /* 0xD5 LATIN CAPITAL LETTER O WITH TILDE */,
- 0x00D6 /* 0xD6 LATIN CAPITAL LETTER O WITH DIAERESIS */,
- 0x00D7 /* 0xD7 MULTIPLICATION SIGN */,
- 0x00D8 /* 0xD8 LATIN CAPITAL LETTER O WITH STROKE */,
- 0x00D9 /* 0xD9 LATIN CAPITAL LETTER U WITH GRAVE */,
- 0x00DA /* 0xDA LATIN CAPITAL LETTER U WITH ACUTE */,
- 0x00DB /* 0xDB LATIN CAPITAL LETTER U WITH CIRCUMFLEX */,
- 0x00DC /* 0xDC LATIN CAPITAL LETTER U WITH DIAERESIS */,
- 0x0130 /* 0xDD LATIN CAPITAL LETTER I WITH DOT ABOVE */,
- 0x015E /* 0xDE LATIN CAPITAL LETTER S WITH CEDILLA */,
- 0x00DF /* 0xDF LATIN SMALL LETTER SHARP S */,
- 0x00E0 /* 0xE0 LATIN SMALL LETTER A WITH GRAVE */,
- 0x00E1 /* 0xE1 LATIN SMALL LETTER A WITH ACUTE */,
- 0x00E2 /* 0xE2 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
- 0x00E3 /* 0xE3 LATIN SMALL LETTER A WITH TILDE */,
- 0x00E4 /* 0xE4 LATIN SMALL LETTER A WITH DIAERESIS */,
- 0x00E5 /* 0xE5 LATIN SMALL LETTER A WITH RING ABOVE */,
- 0x00E6 /* 0xE6 LATIN SMALL LETTER AE */,
- 0x00E7 /* 0xE7 LATIN SMALL LETTER C WITH CEDILLA */,
- 0x00E8 /* 0xE8 LATIN SMALL LETTER E WITH GRAVE */,
- 0x00E9 /* 0xE9 LATIN SMALL LETTER E WITH ACUTE */,
- 0x00EA /* 0xEA LATIN SMALL LETTER E WITH CIRCUMFLEX */,
- 0x00EB /* 0xEB LATIN SMALL LETTER E WITH DIAERESIS */,
- 0x00EC /* 0xEC LATIN SMALL LETTER I WITH GRAVE */,
- 0x00ED /* 0xED LATIN SMALL LETTER I WITH ACUTE */,
- 0x00EE /* 0xEE LATIN SMALL LETTER I WITH CIRCUMFLEX */,
- 0x00EF /* 0xEF LATIN SMALL LETTER I WITH DIAERESIS */,
- 0x011F /* 0xF0 LATIN SMALL LETTER G WITH BREVE */,
- 0x00F1 /* 0xF1 LATIN SMALL LETTER N WITH TILDE */,
- 0x00F2 /* 0xF2 LATIN SMALL LETTER O WITH GRAVE */,
- 0x00F3 /* 0xF3 LATIN SMALL LETTER O WITH ACUTE */,
- 0x00F4 /* 0xF4 LATIN SMALL LETTER O WITH CIRCUMFLEX */,
- 0x00F5 /* 0xF5 LATIN SMALL LETTER O WITH TILDE */,
- 0x00F6 /* 0xF6 LATIN SMALL LETTER O WITH DIAERESIS */,
- 0x00F7 /* 0xF7 DIVISION SIGN */,
- 0x00F8 /* 0xF8 LATIN SMALL LETTER O WITH STROKE */,
- 0x00F9 /* 0xF9 LATIN SMALL LETTER U WITH GRAVE */,
- 0x00FA /* 0xFA LATIN SMALL LETTER U WITH ACUTE */,
- 0x00FB /* 0xFB LATIN SMALL LETTER U WITH CIRCUMFLEX */,
- 0x00FC /* 0xFC LATIN SMALL LETTER U WITH DIAERESIS */,
- 0x0131 /* 0xFD LATIN SMALL LETTER DOTLESS I */,
- 0x015F /* 0xFE LATIN SMALL LETTER S WITH CEDILLA */,
- 0x00FF /* 0xFF LATIN SMALL LETTER Y WITH DIAERESIS */,
-};
-
-Emchar_to_byte_table* ucs_to_latin_viscii_lower;
-
-Emchar latin_viscii_lower_to_ucs[96] =
-{
- CHAR96('1', 0x20),
- 0x1eaf /* 0x21 */,
- 0x1eb1 /* 0x22 */,
- 0x1eb7 /* 0x23 */,
- 0x1ea5 /* 0x24 */,
- 0x1ea7 /* 0x25 */,
- 0x1ea9 /* 0x26 */,
- 0x1ead /* 0x27 */,
- 0x1ebd /* 0x28 */,
- 0x1eb9 /* 0x29 */,
- 0x1ebf /* 0x2a */,
- 0x1ec1 /* 0x2b */,
- 0x1ec3 /* 0x2c */,
- 0x1ec5 /* 0x2d */,
- 0x1ec7 /* 0x2e */,
- 0x1ed1 /* 0x2f */,
- 0x1ed3 /* 0x30 */,
- 0x1ed5 /* 0x31 */,
- 0x1ed7 /* 0x32 */,
- CHAR96('1', 0x33),
- CHAR96('1', 0x34),
- 0x1ed9 /* 0x35 */,
- 0x1edd /* 0x36 */,
- 0x1edf /* 0x37 */,
- 0x1ecb /* 0x38 */,
- CHAR96('1', 0x39),
- CHAR96('1', 0x3A),
- CHAR96('1', 0x3B),
- CHAR96('1', 0x3C),
- 0x01a1 /* 0x3d */,
- 0x1edb /* 0x3e */,
- CHAR96('1', 0x3F),
- CHAR96('1', 0x40),
- CHAR96('1', 0x41),
- CHAR96('1', 0x42),
- CHAR96('1', 0x43),
- CHAR96('1', 0x44),
- CHAR96('1', 0x45),
- 0x1eb3 /* 0x46 */,
- 0x1eb5 /* 0x47 */,
- CHAR96('1', 0x48),
- CHAR96('1', 0x49),
- CHAR96('1', 0x4A),
- CHAR96('1', 0x4B),
- CHAR96('1', 0x4C),
- CHAR96('1', 0x4D),
- CHAR96('1', 0x4E),
- 0x1ef3 /* 0x4f */,
- CHAR96('1', 0x50),
- 0x1ee9 /* 0x51 */,
- CHAR96('1', 0x52),
- CHAR96('1', 0x53),
- CHAR96('1', 0x54),
- 0x1ea1 /* 0x55 */,
- 0x1ef7 /* 0x56 */,
- 0x1eeb /* 0x57 */,
- 0x1eed /* 0x58 */,
- CHAR96('1', 0x59),
- CHAR96('1', 0x5A),
- 0x1ef9 /* 0x5b */,
- 0x1ef5 /* 0x5c */,
- CHAR96('1', 0x5D),
- 0x1ee1 /* 0x5e */,
- 0x01b0 /* 0x5f */,
- 0x00e0 /* 0x60 */,
- 0x00e1 /* 0x61 */,
- 0x00e2 /* 0x62 */,
- 0x00e3 /* 0x63 */,
- 0x1ea3 /* 0x64 */,
- 0x0103 /* 0x65 */,
- 0x1eef /* 0x66 */,
- 0x1eab /* 0x67 */,
- 0x00e8 /* 0x68 */,
- 0x00e9 /* 0x69 */,
- 0x00ea /* 0x6a */,
- 0x1ebb /* 0x6b */,
- 0x00ec /* 0x6c */,
- 0x00ed /* 0x6d */,
- 0x0129 /* 0x6e */,
- 0x1ec9 /* 0x6f */,
- 0x0111 /* 0x70 */,
- 0x1ef1 /* 0x71 */,
- 0x00f2 /* 0x72 */,
- 0x00f3 /* 0x73 */,
- 0x00f4 /* 0x74 */,
- 0x00f5 /* 0x75 */,
- 0x1ecf /* 0x76 */,
- 0x1ecd /* 0x77 */,
- 0x1ee5 /* 0x78 */,
- 0x00f9 /* 0x79 */,
- 0x00fa /* 0x7a */,
- 0x0169 /* 0x7b */,
- 0x1ee7 /* 0x7c */,
- 0x00fd /* 0x7d */,
- 0x1ee3 /* 0x7e */,
- CHAR96('1', 0x7F)
-};
-
-Emchar_to_byte_table* ucs_to_latin_viscii_upper;
-
-Emchar latin_viscii_upper_to_ucs[96] =
-{
- CHAR96('2', 0x20),
- 0x1eae /* 0x21 */,
- 0x1eb0 /* 0x22 */,
- 0x1eb6 /* 0x23 */,
- 0x1ea4 /* 0x24 */,
- 0x1ea6 /* 0x25 */,
- 0x1ea8 /* 0x26 */,
- 0x1eac /* 0x27 */,
- 0x1ebc /* 0x28 */,
- 0x1eb8 /* 0x29 */,
- 0x1ebe /* 0x2a */,
- 0x1ec0 /* 0x2b */,
- 0x1ec2 /* 0x2c */,
- 0x1ec4 /* 0x2d */,
- 0x1ec6 /* 0x2e */,
- 0x1ed0 /* 0x2f */,
- 0x1ed2 /* 0x30 */,
- 0x1ed4 /* 0x31 */,
- 0x1ed6 /* 0x32 */,
- CHAR96('2', 0x33),
- CHAR96('2', 0x34),
- 0x1ed8 /* 0x35 */,
- 0x1edc /* 0x36 */,
- 0x1ede /* 0x37 */,
- 0x1eca /* 0x38 */,
- CHAR96('2', 0x39),
- CHAR96('2', 0x3a),
- CHAR96('2', 0x3b),
- CHAR96('2', 0x3c),
- 0x01a0 /* 0x3d */,
- 0x1eda /* 0x3e */,
- CHAR96('2', 0x3f),
- CHAR96('2', 0x40),
- CHAR96('2', 0x41),
- CHAR96('2', 0x42),
- CHAR96('2', 0x43),
- CHAR96('2', 0x44),
- CHAR96('2', 0x45),
- 0x1eb2 /* 0x46 */,
- 0x1eb4 /* 0x47 */,
- CHAR96('2', 0x48),
- CHAR96('2', 0x49),
- CHAR96('2', 0x4a),
- CHAR96('2', 0x4b),
- CHAR96('2', 0x4c),
- CHAR96('2', 0x4d),
- CHAR96('2', 0x4e),
- 0x1ef2 /* 0x4f */,
- CHAR96('2', 0x50),
- 0x1ee8 /* 0x51 */,
- CHAR96('2', 0x52),
- CHAR96('2', 0x53),
- CHAR96('2', 0x54),
- 0x1ea0 /* 0x55 */,
- 0x1ef6 /* 0x56 */,
- 0x1eea /* 0x57 */,
- 0x1eec /* 0x58 */,
- CHAR96('2', 0x59),
- CHAR96('2', 0x5a),
- 0x1ef8 /* 0x5b */,
- 0x1ef4 /* 0x5c */,
- CHAR96('2', 0x5d),
- 0x1ee0 /* 0x5e */,
- 0x01af /* 0x5f */,
- 0x00c0 /* 0x60 */,
- 0x00c1 /* 0x61 */,
- 0x00c2 /* 0x62 */,
- 0x00c3 /* 0x63 */,
- 0x1ea2 /* 0x64 */,
- 0x0102 /* 0x65 */,
- 0x1eee /* 0x66 */,
- 0x1eaa /* 0x67 */,
- 0x00c8 /* 0x68 */,
- 0x00c9 /* 0x69 */,
- 0x00ca /* 0x6a */,
- 0x1eba /* 0x6b */,
- 0x00cc /* 0x6c */,
- 0x00cd /* 0x6d */,
- 0x0128 /* 0x6e */,
- 0x1ec8 /* 0x6f */,
- 0x0110 /* 0x70 */,
- 0x1ef0 /* 0x71 */,
- 0x00d2 /* 0x72 */,
- 0x00d3 /* 0x73 */,
- 0x00d4 /* 0x74 */,
- 0x00d5 /* 0x75 */,
- 0x1ece /* 0x76 */,
- 0x1ecc /* 0x77 */,
- 0x1ee4 /* 0x78 */,
- 0x00d9 /* 0x79 */,
- 0x00da /* 0x7a */,
- 0x0168 /* 0x7b */,
- 0x1ee6 /* 0x7c */,
- 0x00dd /* 0x7d */,
- 0x1ee2 /* 0x7e */,
- CHAR96('2', 0x7f)
-};
-
-
-Emchar_to_byte_table* ucs_to_latin_tcvn5712;
-
-Emchar latin_tcvn5712_to_ucs[96] =
+Lisp_Object
+remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
{
- 0x00A0 /* 0xA0 NO-BREAK SPACE */,
- 0x0102 /* 0xA1 LATIN CAPITAL LETTER A WITH BREVE */,
- 0x00C2 /* 0xA2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX */,
- 0x00CA /* 0xA3 LATIN CAPITAL LETTER E WITH CIRCUMFLEX */,
- 0x00D4 /* 0xA4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX */,
- 0x01A0 /* 0xA5 LATIN CAPITAL LETTER O WITH HORN */,
- 0x01AF /* 0xA6 LATIN CAPITAL LETTER U WITH HORN */,
- 0x0110 /* 0xA7 LATIN CAPITAL LETTER D WITH STROKE */,
- 0x0103 /* 0xA8 LATIN SMALL LETTER A WITH BREVE */,
- 0x00E2 /* 0xA9 LATIN SMALL LETTER A WITH CIRCUMFLEX */,
- 0x00EA /* 0xAA LATIN SMALL LETTER E WITH CIRCUMFLEX */,
- 0x00F4 /* 0xAB LATIN SMALL LETTER O WITH CIRCUMFLEX */,
- 0x01A1 /* 0xAC LATIN SMALL LETTER O WITH HORN */,
- 0x01B0 /* 0xAD LATIN SMALL LETTER U WITH HORN */,
- 0x0111 /* 0xAE LATIN SMALL LETTER D WITH STROKE */,
- 0x1EB0 /* 0xAF LATIN CAPITAL LETTER A WITH BREVE AND GRAVE */,
- 0x0300 /* 0xB0 COMBINING GRAVE ACCENT */,
- 0x0309 /* 0xB1 COMBINING HOOK ABOVE */,
- 0x0303 /* 0xB2 COMBINING TILDE */,
- 0x0301 /* 0xB3 COMBINING ACUTE ACCENT */,
- 0x0323 /* 0xB4 COMBINING DOT BELOW */,
- 0x00E0 /* 0xB5 LATIN SMALL LETTER A WITH GRAVE */,
- 0x1EA3 /* 0xB6 LATIN SMALL LETTER A WITH HOOK ABOVE */,
- 0x00E3 /* 0xB7 LATIN SMALL LETTER A WITH TILDE */,
- 0x00E1 /* 0xB8 LATIN SMALL LETTER A WITH ACUTE */,
- 0x1EA1 /* 0xB9 LATIN SMALL LETTER A WITH DOT BELOW */,
- 0x1EB2 /* 0xBA LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE */,
- 0x1EB1 /* 0xBB LATIN SMALL LETTER A WITH BREVE AND GRAVE */,
- 0x1EB3 /* 0xBC LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE */,
- 0x1EB5 /* 0xBD LATIN SMALL LETTER A WITH BREVE AND TILDE */,
- 0x1EAF /* 0xBE LATIN SMALL LETTER A WITH BREVE AND ACUTE */,
- 0x1EB4 /* 0xBF LATIN CAPITAL LETTER A WITH BREVE AND TILDE */,
- 0x1EAE /* 0xC0 LATIN CAPITAL LETTER A WITH BREVE AND ACUTE */,
- 0x1EA6 /* 0xC1 LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE */,
- 0x1EA8 /* 0xC2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE */,
- 0x1EAA /* 0xC3 LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE */,
- 0x1EA4 /* 0xC4 LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE */,
- 0x1EC0 /* 0xC5 LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE */,
- 0x1EB7 /* 0xC6 LATIN SMALL LETTER A WITH BREVE AND DOT BELOW */,
- 0x1EA7 /* 0xC7 LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE */,
- 0x1EA9 /* 0xC8 LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE */,
- 0x1EAB /* 0xC9 LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE */,
- 0x1EA5 /* 0xCA LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE */,
- 0x1EAD /* 0xCB LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW */,
- 0x00E8 /* 0xCC LATIN SMALL LETTER E WITH GRAVE */,
- 0x1EC2 /* 0xCD LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE */,
- 0x1EBB /* 0xCE LATIN SMALL LETTER E WITH HOOK ABOVE */,
- 0x1EBD /* 0xCF LATIN SMALL LETTER E WITH TILDE */,
- 0x00E9 /* 0xD0 LATIN SMALL LETTER E WITH ACUTE */,
- 0x1EB9 /* 0xD1 LATIN SMALL LETTER E WITH DOT BELOW */,
- 0x1EC1 /* 0xD2 LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE */,
- 0x1EC3 /* 0xD3 LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE */,
- 0x1EC5 /* 0xD4 LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE */,
- 0x1EBF /* 0xD5 LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE */,
- 0x1EC7 /* 0xD6 LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW */,
- 0x00EC /* 0xD7 LATIN SMALL LETTER I WITH GRAVE */,
- 0x1EC9 /* 0xD8 LATIN SMALL LETTER I WITH HOOK ABOVE */,
- 0x1EC4 /* 0xD9 LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE */,
- 0x1EBE /* 0xDA LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE */,
- 0x1ED2 /* 0xDB LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE */,
- 0x0129 /* 0xDC LATIN SMALL LETTER I WITH TILDE */,
- 0x00ED /* 0xDD LATIN SMALL LETTER I WITH ACUTE */,
- 0x1ECB /* 0xDE LATIN SMALL LETTER I WITH DOT BELOW */,
- 0x00F2 /* 0xDF LATIN SMALL LETTER O WITH GRAVE */,
- 0x1ED4 /* 0xE0 LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE */,
- 0x1ECF /* 0xE1 LATIN SMALL LETTER O WITH HOOK ABOVE */,
- 0x00F5 /* 0xE2 LATIN SMALL LETTER O WITH TILDE */,
- 0x00F3 /* 0xE3 LATIN SMALL LETTER O WITH ACUTE */,
- 0x1ECD /* 0xE4 LATIN SMALL LETTER O WITH DOT BELOW */,
- 0x1ED3 /* 0xE5 LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE */,
- 0x1ED5 /* 0xE6 LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE */,
- 0x1ED7 /* 0xE7 LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE */,
- 0x1ED1 /* 0xE8 LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE */,
- 0x1ED9 /* 0xE9 LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW */,
- 0x1EDD /* 0xEA LATIN SMALL LETTER O WITH HORN AND GRAVE */,
- 0x1EDF /* 0xEB LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE */,
- 0x1EE1 /* 0xEC LATIN SMALL LETTER O WITH HORN AND TILDE */,
- 0x1EDB /* 0xED LATIN SMALL LETTER O WITH HORN AND ACUTE */,
- 0x1EE3 /* 0xEE LATIN SMALL LETTER O WITH HORN AND DOT BELOW */,
- 0x00F9 /* 0xEF LATIN SMALL LETTER U WITH GRAVE */,
- 0x1ED6 /* 0xF0 LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE */,
- 0x1EE7 /* 0xF1 LATIN SMALL LETTER U WITH HOOK ABOVE */,
- 0x0169 /* 0xF2 LATIN SMALL LETTER U WITH TILDE */,
- 0x00FA /* 0xF3 LATIN SMALL LETTER U WITH ACUTE */,
- 0x1EE5 /* 0xF4 LATIN SMALL LETTER U WITH DOT BELOW */,
- 0x1EEB /* 0xF5 LATIN SMALL LETTER U WITH HORN AND GRAVE */,
- 0x1EED /* 0xF6 LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE */,
- 0x1EEF /* 0xF7 LATIN SMALL LETTER U WITH HORN AND TILDE */,
- 0x1EE9 /* 0xF8 LATIN SMALL LETTER U WITH HORN AND ACUTE */,
- 0x1EF1 /* 0xF9 LATIN SMALL LETTER U WITH HORN AND DOT BELOW */,
- 0x1EF3 /* 0xFA LATIN SMALL LETTER Y WITH GRAVE */,
- 0x1EF7 /* 0xFB LATIN SMALL LETTER Y WITH HOOK ABOVE */,
- 0x1EF9 /* 0xFC LATIN SMALL LETTER Y WITH TILDE */,
- 0x00FD /* 0xFD LATIN SMALL LETTER Y WITH ACUTE */,
- 0x1EF5 /* 0xFE LATIN SMALL LETTER Y WITH DOT BELOW */,
- 0x1ED0 /* 0xFF LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE */
-};
+ Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
+ Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
-Charset_ID latin_a_char_to_charset[128] = {
- /* U+0100 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0101 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0102 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0103 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0104 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0105 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0106 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0107 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0108 */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+0109 */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+010A */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+010B */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+010C */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+010D */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+010E */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+010F */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0110 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0111 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0112 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0113 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0114 */ LEADING_BYTE_UCS_BMP,
- /* U+0115 */ LEADING_BYTE_UCS_BMP,
- /* U+0116 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0117 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0118 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0119 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+011A */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+011B */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+011C */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+011D */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+011E */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+011F */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+0120 */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+0121 */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+0122 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0123 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0124 */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+0125 */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+0126 */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+0127 */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+0128 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0129 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+012A */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+012B */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+012C */ LEADING_BYTE_UCS_BMP,
- /* U+012D */ LEADING_BYTE_UCS_BMP,
- /* U+012E */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+012F */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0130 */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+0131 */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+0132 */ LEADING_BYTE_JAPANESE_JISX0212,
- /* U+0133 */ LEADING_BYTE_JAPANESE_JISX0212,
- /* U+0134 */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+0135 */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+0136 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0137 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0138 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0139 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+013A */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+013B */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+013C */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+013D */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+013E */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+013F */ LEADING_BYTE_JAPANESE_JISX0212,
- /* U+0140 */ LEADING_BYTE_JAPANESE_JISX0212,
- /* U+0141 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0142 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0143 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0144 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0145 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0146 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0147 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0148 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0149 */ LEADING_BYTE_JAPANESE_JISX0212,
- /* U+014A */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+014B */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+014C */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+014D */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+014E */ LEADING_BYTE_UCS_BMP,
- /* U+014F */ LEADING_BYTE_UCS_BMP,
- /* U+0150 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0151 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0152 */ LEADING_BYTE_JAPANESE_JISX0212,
- /* U+0153 */ LEADING_BYTE_JAPANESE_JISX0212,
- /* U+0154 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0155 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0156 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0157 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0158 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0159 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+015A */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+015B */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+015C */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+015D */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+015E */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+015F */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0160 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0161 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0162 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0163 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0164 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0165 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0166 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0167 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0168 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0169 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+016A */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+016B */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+016C */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+016D */ LEADING_BYTE_LATIN_ISO8859_3,
- /* U+016E */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+016F */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0170 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0171 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+0172 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0173 */ LEADING_BYTE_LATIN_ISO8859_4,
- /* U+0174 */ LEADING_BYTE_JAPANESE_JISX0212,
- /* U+0175 */ LEADING_BYTE_JAPANESE_JISX0212,
- /* U+0176 */ LEADING_BYTE_JAPANESE_JISX0212,
- /* U+0177 */ LEADING_BYTE_JAPANESE_JISX0212,
- /* U+0178 */ LEADING_BYTE_JAPANESE_JISX0212,
- /* U+0179 */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+017A */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+017B */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+017C */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+017D */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+017E */ LEADING_BYTE_LATIN_ISO8859_2,
- /* U+017F */ LEADING_BYTE_UCS_BMP
-};
-
-unsigned char latin_a_char_to_byte1[128] = {
- /* U+0100 */ 0xC0 - 0x80,
- /* U+0101 */ 0xE0 - 0x80,
- /* U+0102 */ 0xC3 - 0x80,
- /* U+0103 */ 0xE3 - 0x80,
- /* U+0104 */ 0xA1 - 0x80,
- /* U+0105 */ 0xB1 - 0x80,
- /* U+0106 */ 0xC6 - 0x80,
- /* U+0107 */ 0xE6 - 0x80,
- /* U+0108 */ 0xC6 - 0x80,
- /* U+0109 */ 0xE6 - 0x80,
- /* U+010A */ 0xC5 - 0x80,
- /* U+010B */ 0xE5 - 0x80,
- /* U+010C */ 0xC8 - 0x80,
- /* U+010D */ 0xE8 - 0x80,
- /* U+010E */ 0xCF - 0x80,
- /* U+010F */ 0xEF - 0x80,
- /* U+0110 */ 0xD0 - 0x80,
- /* U+0111 */ 0xF0 - 0x80,
- /* U+0112 */ 0xAA - 0x80,
- /* U+0113 */ 0xBA - 0x80,
- /* U+0114 */ 0x01,
- /* U+0115 */ 0x01,
- /* U+0116 */ 0xCC - 0x80,
- /* U+0117 */ 0xEC - 0x80,
- /* U+0118 */ 0xCA - 0x80,
- /* U+0119 */ 0xEA - 0x80,
- /* U+011A */ 0xCC - 0x80,
- /* U+011B */ 0xEC - 0x80,
- /* U+011C */ 0xD8 - 0x80,
- /* U+011D */ 0xF8 - 0x80,
- /* U+011E */ 0xAB - 0x80,
- /* U+011F */ 0xBB - 0x80,
- /* U+0120 */ 0xD5 - 0x80,
- /* U+0121 */ 0xF5 - 0x80,
- /* U+0122 */ 0xAB - 0x80,
- /* U+0123 */ 0xBB - 0x80,
- /* U+0124 */ 0xA6 - 0x80,
- /* U+0125 */ 0xB6 - 0x80,
- /* U+0126 */ 0xA1 - 0x80,
- /* U+0127 */ 0xB1 - 0x80,
- /* U+0128 */ 0xA5 - 0x80,
- /* U+0129 */ 0xB5 - 0x80,
- /* U+012A */ 0xCF - 0x80,
- /* U+012B */ 0xEF - 0x80,
- /* U+012C */ 0x01,
- /* U+012D */ 0x01,
- /* U+012E */ 0xC7 - 0x80,
- /* U+012F */ 0xE7 - 0x80,
- /* U+0130 */ 0xA9 - 0x80,
- /* U+0131 */ 0xB9 - 0x80,
- /* U+0132 */ 0x29,
- /* U+0133 */ 0x29,
- /* U+0134 */ 0xAC - 0x80,
- /* U+0135 */ 0xBC - 0x80,
- /* U+0136 */ 0xD3 - 0x80,
- /* U+0137 */ 0xF3 - 0x80,
- /* U+0138 */ 0xA2 - 0x80,
- /* U+0139 */ 0xC5 - 0x80,
- /* U+013A */ 0xE5 - 0x80,
- /* U+013B */ 0xA6 - 0x80,
- /* U+013C */ 0xB6 - 0x80,
- /* U+013D */ 0xA5 - 0x80,
- /* U+013E */ 0xB5 - 0x80,
- /* U+013F */ 0x29,
- /* U+0140 */ 0x29,
- /* U+0141 */ 0xA3 - 0x80,
- /* U+0142 */ 0xB3 - 0x80,
- /* U+0143 */ 0xD1 - 0x80,
- /* U+0144 */ 0xF1 - 0x80,
- /* U+0145 */ 0xD1 - 0x80,
- /* U+0146 */ 0xF1 - 0x80,
- /* U+0147 */ 0xD2 - 0x80,
- /* U+0148 */ 0xF2 - 0x80,
- /* U+0149 */ 0x29,
- /* U+014A */ 0xBD - 0x80,
- /* U+014B */ 0xBF - 0x80,
- /* U+014C */ 0xD2 - 0x80,
- /* U+014D */ 0xF2 - 0x80,
- /* U+014E */ 0x01,
- /* U+014F */ 0x01,
- /* U+0150 */ 0xD5 - 0x80,
- /* U+0151 */ 0xF5 - 0x80,
- /* U+0152 */ 0x29,
- /* U+0153 */ 0x29,
- /* U+0154 */ 0xC0 - 0x80,
- /* U+0155 */ 0xE0 - 0x80,
- /* U+0156 */ 0xA3 - 0x80,
- /* U+0157 */ 0xB3 - 0x80,
- /* U+0158 */ 0xD8 - 0x80,
- /* U+0159 */ 0xF8 - 0x80,
- /* U+015A */ 0xA6 - 0x80,
- /* U+015B */ 0xB6 - 0x80,
- /* U+015C */ 0xDE - 0x80,
- /* U+015D */ 0xFE - 0x80,
- /* U+015E */ 0xAA - 0x80,
- /* U+015F */ 0xBA - 0x80,
- /* U+0160 */ 0xA9 - 0x80,
- /* U+0161 */ 0xB9 - 0x80,
- /* U+0162 */ 0xDE - 0x80,
- /* U+0163 */ 0xFE - 0x80,
- /* U+0164 */ 0xAB - 0x80,
- /* U+0165 */ 0xBB - 0x80,
- /* U+0166 */ 0xAC - 0x80,
- /* U+0167 */ 0xBC - 0x80,
- /* U+0168 */ 0xDD - 0x80,
- /* U+0169 */ 0xFD - 0x80,
- /* U+016A */ 0xDE - 0x80,
- /* U+016B */ 0xFE - 0x80,
- /* U+016C */ 0xDD - 0x80,
- /* U+016D */ 0xFD - 0x80,
- /* U+016E */ 0xD9 - 0x80,
- /* U+016F */ 0xF9 - 0x80,
- /* U+0170 */ 0xDB - 0x80,
- /* U+0171 */ 0xFB - 0x80,
- /* U+0172 */ 0xD9 - 0x80,
- /* U+0173 */ 0xF9 - 0x80,
- /* U+0174 */ 0x2A,
- /* U+0175 */ 0x2B,
- /* U+0176 */ 0x2A,
- /* U+0177 */ 0x2B,
- /* U+0178 */ 0x2A,
- /* U+0179 */ 0xAC - 0x80,
- /* U+017A */ 0xBC - 0x80,
- /* U+017B */ 0xAF - 0x80,
- /* U+017C */ 0xBF - 0x80,
- /* U+017D */ 0xAE - 0x80,
- /* U+017E */ 0xBE - 0x80,
- /* U+017F */ 0x01
-};
+ if (VECTORP (decoding_table))
+ {
+ Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
-unsigned char latin_a_char_to_byte2[128] = {
- /* U+0100 */ 0x00,
- /* U+0101 */ 0x00,
- /* U+0102 */ 0x00,
- /* U+0103 */ 0x00,
- /* U+0104 */ 0x00,
- /* U+0105 */ 0x00,
- /* U+0106 */ 0x00,
- /* U+0107 */ 0x00,
- /* U+0108 */ 0x00,
- /* U+0109 */ 0x00,
- /* U+010A */ 0x00,
- /* U+010B */ 0x00,
- /* U+010C */ 0x00,
- /* U+010D */ 0x00,
- /* U+010E */ 0x00,
- /* U+010F */ 0x00,
- /* U+0110 */ 0x00,
- /* U+0111 */ 0x00,
- /* U+0112 */ 0x00,
- /* U+0113 */ 0x00,
- /* U+0114 */ 0x14,
- /* U+0115 */ 0x15,
- /* U+0116 */ 0x00,
- /* U+0117 */ 0x00,
- /* U+0118 */ 0x00,
- /* U+0119 */ 0x00,
- /* U+011A */ 0x00,
- /* U+011B */ 0x00,
- /* U+011C */ 0x00,
- /* U+011D */ 0x00,
- /* U+011E */ 0x00,
- /* U+011F */ 0x00,
- /* U+0120 */ 0x00,
- /* U+0121 */ 0x00,
- /* U+0122 */ 0x00,
- /* U+0123 */ 0x00,
- /* U+0124 */ 0x00,
- /* U+0125 */ 0x00,
- /* U+0126 */ 0x00,
- /* U+0127 */ 0x00,
- /* U+0128 */ 0x00,
- /* U+0129 */ 0x00,
- /* U+012A */ 0x00,
- /* U+012B */ 0x00,
- /* U+012C */ 0x2C,
- /* U+012D */ 0x2D,
- /* U+012E */ 0x00,
- /* U+012F */ 0x00,
- /* U+0130 */ 0x00,
- /* U+0131 */ 0x00,
- /* U+0132 */ 0x26,
- /* U+0133 */ 0x46,
- /* U+0134 */ 0x00,
- /* U+0135 */ 0x00,
- /* U+0136 */ 0x00,
- /* U+0137 */ 0x00,
- /* U+0138 */ 0x00,
- /* U+0139 */ 0x00,
- /* U+013A */ 0x00,
- /* U+013B */ 0x00,
- /* U+013C */ 0x00,
- /* U+013D */ 0x00,
- /* U+013E */ 0x00,
- /* U+013F */ 0x29,
- /* U+0140 */ 0x49,
- /* U+0141 */ 0x00,
- /* U+0142 */ 0x00,
- /* U+0143 */ 0x00,
- /* U+0144 */ 0x00,
- /* U+0145 */ 0x00,
- /* U+0146 */ 0x00,
- /* U+0147 */ 0x00,
- /* U+0148 */ 0x00,
- /* U+0149 */ 0x4A,
- /* U+014A */ 0x00,
- /* U+014B */ 0x00,
- /* U+014C */ 0x00,
- /* U+014D */ 0x00,
- /* U+014E */ 0x4E,
- /* U+014F */ 0x4F,
- /* U+0150 */ 0x00,
- /* U+0151 */ 0x00,
- /* U+0152 */ 0x2D,
- /* U+0153 */ 0x4D,
- /* U+0154 */ 0x00,
- /* U+0155 */ 0x00,
- /* U+0156 */ 0x00,
- /* U+0157 */ 0x00,
- /* U+0158 */ 0x00,
- /* U+0159 */ 0x00,
- /* U+015A */ 0x00,
- /* U+015B */ 0x00,
- /* U+015C */ 0x00,
- /* U+015D */ 0x00,
- /* U+015E */ 0x00,
- /* U+015F */ 0x00,
- /* U+0160 */ 0x00,
- /* U+0161 */ 0x00,
- /* U+0162 */ 0x00,
- /* U+0163 */ 0x00,
- /* U+0164 */ 0x00,
- /* U+0165 */ 0x00,
- /* U+0166 */ 0x00,
- /* U+0167 */ 0x00,
- /* U+0168 */ 0x00,
- /* U+0169 */ 0x00,
- /* U+016A */ 0x00,
- /* U+016B */ 0x00,
- /* U+016C */ 0x00,
- /* U+016D */ 0x00,
- /* U+016E */ 0x00,
- /* U+016F */ 0x00,
- /* U+0170 */ 0x00,
- /* U+0171 */ 0x00,
- /* U+0172 */ 0x00,
- /* U+0173 */ 0x00,
- /* U+0174 */ 0x71,
- /* U+0175 */ 0x71,
- /* U+0176 */ 0x74,
- /* U+0177 */ 0x74,
- /* U+0178 */ 0x73,
- /* U+0179 */ 0x00,
- /* U+017A */ 0x00,
- /* U+017B */ 0x00,
- /* U+017C */ 0x00,
- /* U+017D */ 0x00,
- /* U+017E */ 0x00,
- /* U+017F */ 0x7F
-};
+ if (!NILP (cpos))
+ {
+ decoding_table_remove_char (ccs, XINT (cpos));
+ }
+ }
+ if (CHAR_TABLEP (encoding_table))
+ {
+ put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
+ }
+ return Qt;
+}
-Lisp_Object Vutf_2000_version;
#endif
#ifndef UTF2000
Lisp_Object Qreverse_direction_charset;
Lisp_Object Qleading_byte;
Lisp_Object Qshort_name, Qlong_name;
+#ifdef UTF2000
+Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
+Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
+#endif
Lisp_Object Qascii,
Qcontrol_1,
Qlatin_iso8859_9,
Qjapanese_jisx0208_1978,
Qchinese_gb2312,
+ Qchinese_gb12345,
Qjapanese_jisx0208,
+ Qjapanese_jisx0208_1990,
Qkorean_ksc5601,
Qjapanese_jisx0212,
Qchinese_cns11643_1,
Qchinese_cns11643_2,
#ifdef UTF2000
- Qchinese_cns11643_3,
- Qchinese_cns11643_4,
- Qchinese_cns11643_5,
- Qchinese_cns11643_6,
- Qchinese_cns11643_7,
+ Qucs,
Qucs_bmp,
+ Qucs_smp,
+ Qucs_sip,
+ Qucs_gb,
+ Qucs_cns,
+ Qucs_jis,
+ Qucs_ks,
+ Qucs_big5,
+ Qlatin_viscii,
+ Qlatin_tcvn5712,
Qlatin_viscii_lower,
Qlatin_viscii_upper,
+ Qvietnamese_viscii_lower,
+ Qvietnamese_viscii_upper,
+ Qjis_x0208,
+ Qchinese_big5,
+ /* Qchinese_big5_cdp, */
+ Qideograph_hanziku_1,
+ Qideograph_hanziku_2,
+ Qideograph_hanziku_3,
+ Qideograph_hanziku_4,
+ Qideograph_hanziku_5,
+ Qideograph_hanziku_6,
+ Qideograph_hanziku_7,
+ Qideograph_hanziku_8,
+ Qideograph_hanziku_9,
+ Qideograph_hanziku_10,
+ Qideograph_hanziku_11,
+ Qideograph_hanziku_12,
+ Qideograph_cbeta,
+ Qideograph_daikanwa_2,
+ Qideograph_daikanwa,
+ Qideograph_gt,
+ Qideograph_gt_pj_1,
+ Qideograph_gt_pj_2,
+ Qideograph_gt_pj_3,
+ Qideograph_gt_pj_4,
+ Qideograph_gt_pj_5,
+ Qideograph_gt_pj_6,
+ Qideograph_gt_pj_7,
+ Qideograph_gt_pj_8,
+ Qideograph_gt_pj_9,
+ Qideograph_gt_pj_10,
+ Qideograph_gt_pj_11,
+ Qethiopic_ucs,
#endif
Qchinese_big5_1,
Qchinese_big5_2,
Lisp_Object Vcharset_hash_table;
-static Charset_ID next_allocated_1_byte_leading_byte;
-static Charset_ID next_allocated_2_byte_leading_byte;
-
/* Composite characters are characters constructed by overstriking two
or more regular characters.
Use the macro charptr_emchar() instead. */
Emchar
-non_ascii_charptr_emchar (CONST Bufbyte *str)
+non_ascii_charptr_emchar (const Bufbyte *str)
{
#ifdef UTF2000
Bufbyte b;
if (f3 < 0x20)
return 0;
- if (f3 != 0x20 && f3 != 0x7F)
+ if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
+ f2 <= MAX_CHAR_FIELD2_PRIVATE))
return 1;
/*
FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
*/
charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
+ if (EQ (charset, Qnil))
+ return 0;
return (XCHARSET_CHARS (charset) == 96);
}
else
}
#endif /* ENABLE_COMPOSITE_CHARS */
- if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
+ if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
+ && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
return 1;
if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
charset =
CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
+ if (EQ (charset, Qnil))
+ return 0;
return (XCHARSET_CHARS (charset) == 96);
}
}
/* Basic string functions */
/************************************************************************/
-/* Copy the character pointed to by PTR into STR, assuming it's
- non-ASCII. Do not call this directly. Use the macro
- charptr_copy_char() instead. */
+/* Copy the character pointed to by SRC into DST. Do not call this
+ directly. Use the macro charptr_copy_char() instead.
+ Return the number of bytes copied. */
Bytecount
-non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
+non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
{
- Bufbyte *strptr = str;
- *strptr = *ptr++;
- switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
- {
- /* Notice fallthrough. */
-#ifdef UTF2000
- case 6: *++strptr = *ptr++;
- case 5: *++strptr = *ptr++;
-#endif
- case 4: *++strptr = *ptr++;
- case 3: *++strptr = *ptr++;
- case 2: *++strptr = *ptr;
- break;
- default:
- abort ();
- }
- return strptr + 1 - str;
+ unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
+ unsigned int i;
+ for (i = bytes; i; i--, dst++, src++)
+ *dst = *src;
+ return bytes;
}
\f
{
Bufbyte str[MAX_EMCHAR_LEN];
Bufbyte *strptr = str;
+ unsigned int bytes;
str[0] = (Bufbyte) ch;
- switch (REP_BYTES_BY_FIRST_BYTE (ch))
+
+ for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
{
- /* Notice fallthrough. */
-#ifdef UTF2000
- case 6:
- ch = Lstream_getc (stream);
- assert (ch >= 0);
- *++strptr = (Bufbyte) ch;
- case 5:
- ch = Lstream_getc (stream);
- assert (ch >= 0);
- *++strptr = (Bufbyte) ch;
-#endif
- case 4:
- ch = Lstream_getc (stream);
- assert (ch >= 0);
- *++strptr = (Bufbyte) ch;
- case 3:
- ch = Lstream_getc (stream);
- assert (ch >= 0);
- *++strptr = (Bufbyte) ch;
- case 2:
- ch = Lstream_getc (stream);
- assert (ch >= 0);
- *++strptr = (Bufbyte) ch;
- break;
- default:
- abort ();
+ int c = Lstream_getc (stream);
+ bufpos_checking_assert (c >= 0);
+ *++strptr = (Bufbyte) c;
}
return charptr_emchar (str);
}
/************************************************************************/
static Lisp_Object
-mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
+mark_charset (Lisp_Object obj)
{
- struct Lisp_Charset *cs = XCHARSET (obj);
+ Lisp_Charset *cs = XCHARSET (obj);
- markobj (cs->short_name);
- markobj (cs->long_name);
- markobj (cs->doc_string);
- markobj (cs->registry);
- markobj (cs->ccl_program);
+ mark_object (cs->short_name);
+ mark_object (cs->long_name);
+ mark_object (cs->doc_string);
+ mark_object (cs->registry);
+ mark_object (cs->ccl_program);
+#ifdef UTF2000
+ mark_object (cs->decoding_table);
+ mark_object (cs->mother);
+#endif
return cs->name;
}
static void
print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
{
- struct Lisp_Charset *cs = XCHARSET (obj);
+ Lisp_Charset *cs = XCHARSET (obj);
char buf[200];
if (print_readably)
print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
write_c_string (" ", printcharfun);
print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
- sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
- CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
- CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
- CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
- "96x96",
+ sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
+ CHARSET_CHARS (cs),
+ CHARSET_DIMENSION (cs),
CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
CHARSET_COLUMNS (cs),
CHARSET_GRAPHIC (cs),
}
static const struct lrecord_description charset_description[] = {
- { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
+ { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
+ { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
+ { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
+ { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
+ { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
+ { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
+ { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
+#ifdef UTF2000
+ { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
+ { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
+#endif
{ XD_END }
};
DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
- mark_charset, print_charset, 0, 0, 0, charset_description,
- struct Lisp_Charset);
-/* Make a new charset. */
+ mark_charset, print_charset, 0, 0, 0,
+ charset_description,
+ Lisp_Charset);
+/* Make a new charset. */
+/* #### SJT Should generic properties be allowed? */
static Lisp_Object
-make_charset (Charset_ID id, Lisp_Object name, unsigned char rep_bytes,
- unsigned char type, unsigned char columns, unsigned char graphic,
- Bufbyte final, unsigned char direction, Lisp_Object short_name,
+make_charset (Charset_ID id, Lisp_Object name,
+ unsigned short chars, unsigned char dimension,
+ unsigned char columns, unsigned char graphic,
+ Bufbyte final, unsigned char direction, Lisp_Object short_name,
Lisp_Object long_name, Lisp_Object doc,
- Lisp_Object reg)
+ Lisp_Object reg,
+ Lisp_Object decoding_table,
+ Emchar min_code, Emchar max_code,
+ Emchar code_offset, unsigned char byte_offset,
+ Lisp_Object mother, unsigned char conversion)
{
Lisp_Object obj;
- struct Lisp_Charset *cs =
- alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
+ Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
+
+ zero_lcrecord (cs);
+
XSETCHARSET (obj, cs);
CHARSET_ID (cs) = id;
CHARSET_NAME (cs) = name;
CHARSET_SHORT_NAME (cs) = short_name;
CHARSET_LONG_NAME (cs) = long_name;
-#ifndef UTF2000
- CHARSET_REP_BYTES (cs) = rep_bytes;
-#endif
+ CHARSET_CHARS (cs) = chars;
+ CHARSET_DIMENSION (cs) = dimension;
CHARSET_DIRECTION (cs) = direction;
- CHARSET_TYPE (cs) = type;
CHARSET_COLUMNS (cs) = columns;
CHARSET_GRAPHIC (cs) = graphic;
CHARSET_FINAL (cs) = final;
CHARSET_REGISTRY (cs) = reg;
CHARSET_CCL_PROGRAM (cs) = Qnil;
CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
-
- switch ( CHARSET_TYPE (cs) )
- {
- case CHARSET_TYPE_94:
- CHARSET_DIMENSION (cs) = 1;
- CHARSET_CHARS (cs) = 94;
- break;
- case CHARSET_TYPE_96:
- CHARSET_DIMENSION (cs) = 1;
- CHARSET_CHARS (cs) = 96;
- break;
- case CHARSET_TYPE_94X94:
- CHARSET_DIMENSION (cs) = 2;
- CHARSET_CHARS (cs) = 94;
- break;
- case CHARSET_TYPE_96X96:
- CHARSET_DIMENSION (cs) = 2;
- CHARSET_CHARS (cs) = 96;
- break;
#ifdef UTF2000
- case CHARSET_TYPE_128X128:
- CHARSET_DIMENSION (cs) = 2;
- CHARSET_CHARS (cs) = 128;
- break;
- case CHARSET_TYPE_256X256:
- CHARSET_DIMENSION (cs) = 2;
- CHARSET_CHARS (cs) = 256;
- break;
+ CHARSET_DECODING_TABLE(cs) = Qunbound;
+ CHARSET_MIN_CODE (cs) = min_code;
+ CHARSET_MAX_CODE (cs) = max_code;
+ CHARSET_CODE_OFFSET (cs) = code_offset;
+ CHARSET_BYTE_OFFSET (cs) = byte_offset;
+ CHARSET_MOTHER (cs) = mother;
+ CHARSET_CONVERSION (cs) = conversion;
+#endif
+
+#ifndef UTF2000
+ if (id == LEADING_BYTE_ASCII)
+ CHARSET_REP_BYTES (cs) = 1;
+ else if (id < 0xA0)
+ CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
+ else
+ CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
#endif
- }
if (final)
{
/* some charsets do not have final characters. This includes
ASCII, Control-1, Composite, and the two faux private
charsets. */
+ unsigned char iso2022_type
+ = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
#if UTF2000
- assert (NILP (charset_by_attributes[type][final]));
- charset_by_attributes[type][final] = obj;
+ if (code_offset == 0)
+ {
+ assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
+ chlook->charset_by_attributes[iso2022_type][final] = obj;
+ }
#else
- assert (NILP (charset_by_attributes[type][final][direction]));
- charset_by_attributes[type][final][direction] = obj;
+ assert (NILP
+ (chlook->charset_by_attributes[iso2022_type][final][direction]));
+ chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
#endif
}
- assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
- charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
-#ifndef UTF2000
- if (id < 0xA0)
- /* official leading byte */
- rep_bytes_by_first_byte[id] = rep_bytes;
-#endif
+ assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
+ chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
/* Some charsets are "faux" and don't have names or really exist at
all except in the leading-byte table. */
{
Charset_ID lb;
+#ifdef UTF2000
+ if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
+ lb = 0;
+ else
+ lb = chlook->next_allocated_leading_byte++;
+#else
if (dimension == 1)
{
- if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
+ if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
lb = 0;
else
- lb = next_allocated_1_byte_leading_byte++;
+ lb = chlook->next_allocated_1_byte_leading_byte++;
}
else
{
- if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
+ if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
lb = 0;
else
- lb = next_allocated_2_byte_leading_byte++;
+ lb = chlook->next_allocated_2_byte_leading_byte++;
}
+#endif
if (!lb)
signal_simple_error
return lb;
}
+#ifdef UTF2000
+/* Number of Big5 characters which have the same code in 1st byte. */
+
+#define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
+
+Emchar
+decode_defined_char (Lisp_Object ccs, int code_point)
+{
+ int dim = XCHARSET_DIMENSION (ccs);
+ Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
+ Emchar char_id = -1;
+ Lisp_Object mother;
+
+ while (dim > 0)
+ {
+ dim--;
+ decoding_table
+ = get_ccs_octet_table (decoding_table, ccs,
+ (code_point >> (dim * 8)) & 255);
+ }
+ if (CHARP (decoding_table))
+ return XCHAR (decoding_table);
+ if (char_id >= 0)
+ return char_id;
+ else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
+ {
+ if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
+ {
+ if ( EQ (mother, Vcharset_ucs) )
+ return DECODE_CHAR (mother, code_point);
+ else
+ return decode_defined_char (mother, code_point);
+ }
+ }
+ return -1;
+}
+
+Emchar
+decode_builtin_char (Lisp_Object charset, int code_point)
+{
+ Lisp_Object mother = XCHARSET_MOTHER (charset);
+ int final;
+
+ if ( XCHARSET_MAX_CODE (charset) > 0 )
+ {
+ if ( CHARSETP (mother) )
+ {
+ int code = code_point;
+
+ if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
+ {
+ int row = code_point >> 8;
+ int cell = code_point & 255;
+
+ if (row < 16 + 32)
+ return -1;
+ else if (row < 16 + 32 + 30)
+ code = (row - (16 + 32)) * 94 + cell - 33;
+ else if (row < 18 + 32 + 30)
+ return -1;
+ else if (row < 18 + 32 + 60)
+ code = (row - (18 + 32)) * 94 + cell - 33;
+ }
+ else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
+ {
+ int plane = code_point >> 16;
+ int row = (code_point >> 8) & 255;
+ int cell = code_point & 255;
+
+ if (row < 16 + 32)
+ return -1;
+ else if (row < 16 + 32 + 30)
+ code
+ = (plane - 33) * 94 * 60
+ + (row - (16 + 32)) * 94
+ + cell - 33;
+ else if (row < 18 + 32 + 30)
+ return -1;
+ else if (row < 18 + 32 + 60)
+ code
+ = (plane - 33) * 94 * 60
+ + (row - (18 + 32)) * 94
+ + cell - 33;
+ }
+ return
+ decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
+ }
+ else
+ {
+ Emchar cid
+ = (XCHARSET_DIMENSION (charset) == 1
+ ?
+ code_point - XCHARSET_BYTE_OFFSET (charset)
+ :
+ ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
+ * XCHARSET_CHARS (charset)
+ + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
+ + XCHARSET_CODE_OFFSET (charset);
+ if ((cid < XCHARSET_MIN_CODE (charset))
+ || (XCHARSET_MAX_CODE (charset) < cid))
+ return -1;
+ return cid;
+ }
+ }
+ else if ((final = XCHARSET_FINAL (charset)) >= '0')
+ {
+ if (XCHARSET_DIMENSION (charset) == 1)
+ {
+ switch (XCHARSET_CHARS (charset))
+ {
+ case 94:
+ return MIN_CHAR_94
+ + (final - '0') * 94 + ((code_point & 0x7F) - 33);
+ case 96:
+ return MIN_CHAR_96
+ + (final - '0') * 96 + ((code_point & 0x7F) - 32);
+ default:
+ abort ();
+ return -1;
+ }
+ }
+ else
+ {
+ switch (XCHARSET_CHARS (charset))
+ {
+ case 94:
+ return MIN_CHAR_94x94
+ + (final - '0') * 94 * 94
+ + (((code_point >> 8) & 0x7F) - 33) * 94
+ + ((code_point & 0x7F) - 33);
+ case 96:
+ return MIN_CHAR_96x96
+ + (final - '0') * 96 * 96
+ + (((code_point >> 8) & 0x7F) - 32) * 96
+ + ((code_point & 0x7F) - 32);
+ default:
+ abort ();
+ return -1;
+ }
+ }
+ }
+ else
+ return -1;
+}
+
+int
+charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
+{
+ Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
+ Lisp_Object ret;
+
+ if ( CHAR_TABLEP (encoding_table)
+ && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
+ ch)) )
+ return XINT (ret);
+ else
+ {
+ Lisp_Object mother = XCHARSET_MOTHER (charset);
+ int min = XCHARSET_MIN_CODE (charset);
+ int max = XCHARSET_MAX_CODE (charset);
+ int code = -1;
+
+ if ( CHARSETP (mother) )
+ {
+ if (XCHARSET_FINAL (charset) >= '0')
+ code = charset_code_point (mother, ch, 1);
+ else
+ code = charset_code_point (mother, ch, defined_only);
+ }
+ else if (defined_only)
+ return -1;
+ else if ( ((max == 0) && CHARSETP (mother)
+ && (XCHARSET_FINAL (charset) == 0))
+ || ((min <= ch) && (ch <= max)) )
+ code = ch;
+ if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
+ || ((min <= code) && (code <= max)) )
+ {
+ int d = code - XCHARSET_CODE_OFFSET (charset);
+
+ if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
+ return d;
+ else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
+ return d + 33;
+ else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
+ return d + 32;
+ else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
+ {
+ int row = d / 94;
+ int cell = d % 94 + 33;
+
+ if (row < 30)
+ row += 16 + 32;
+ else
+ row += 18 + 32;
+ return (row << 8) | cell;
+ }
+ else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
+ return ((d / 94 + 33) << 8) | (d % 94 + 33);
+ else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
+ return ((d / 96 + 32) << 8) | (d % 96 + 32);
+ else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
+ {
+ int plane = d / (94 * 60) + 33;
+ int row = (d % (94 * 60)) / 94;
+ int cell = d % 94 + 33;
+
+ if (row < 30)
+ row += 16 + 32;
+ else
+ row += 18 + 32;
+ return (plane << 16) | (row << 8) | cell;
+ }
+ else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
+ return
+ ( (d / (94 * 94) + 33) << 16)
+ | ((d / 94 % 94 + 33) << 8)
+ | (d % 94 + 33);
+ else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
+ return
+ ( (d / (96 * 96) + 32) << 16)
+ | ((d / 96 % 96 + 32) << 8)
+ | (d % 96 + 32);
+ else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
+ return
+ ( (d / (94 * 94 * 94) + 33) << 24)
+ | ((d / (94 * 94) % 94 + 33) << 16)
+ | ((d / 94 % 94 + 33) << 8)
+ | (d % 94 + 33);
+ else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
+ return
+ ( (d / (96 * 96 * 96) + 32) << 24)
+ | ((d / (96 * 96) % 96 + 32) << 16)
+ | ((d / 96 % 96 + 32) << 8)
+ | (d % 96 + 32);
+ else
+ {
+ printf ("Unknown CCS-conversion %d is specified!",
+ XCHARSET_CONVERSION (charset));
+ exit (-1);
+ }
+ }
+ else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
+ ( XCHARSET_MIN_CODE (charset) == 0 )
+ /*
+ (XCHARSET_CODE_OFFSET (charset) == 0) ||
+ (XCHARSET_CODE_OFFSET (charset)
+ == XCHARSET_MIN_CODE (charset))
+ */ )
+ {
+ int d;
+
+ if (XCHARSET_DIMENSION (charset) == 1)
+ {
+ if (XCHARSET_CHARS (charset) == 94)
+ {
+ if (((d = ch - (MIN_CHAR_94
+ + (XCHARSET_FINAL (charset) - '0') * 94))
+ >= 0)
+ && (d < 94))
+ return d + 33;
+ }
+ else if (XCHARSET_CHARS (charset) == 96)
+ {
+ if (((d = ch - (MIN_CHAR_96
+ + (XCHARSET_FINAL (charset) - '0') * 96))
+ >= 0)
+ && (d < 96))
+ return d + 32;
+ }
+ else
+ return -1;
+ }
+ else if (XCHARSET_DIMENSION (charset) == 2)
+ {
+ if (XCHARSET_CHARS (charset) == 94)
+ {
+ if (((d = ch - (MIN_CHAR_94x94
+ +
+ (XCHARSET_FINAL (charset) - '0') * 94 * 94))
+ >= 0)
+ && (d < 94 * 94))
+ return (((d / 94) + 33) << 8) | (d % 94 + 33);
+ }
+ else if (XCHARSET_CHARS (charset) == 96)
+ {
+ if (((d = ch - (MIN_CHAR_96x96
+ +
+ (XCHARSET_FINAL (charset) - '0') * 96 * 96))
+ >= 0)
+ && (d < 96 * 96))
+ return (((d / 96) + 32) << 8) | (d % 96 + 32);
+ }
+ else
+ return -1;
+ }
+ }
+ }
+ return -1;
+}
+
+int
+encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
+{
+ if (c <= MAX_CHAR_BASIC_LATIN)
+ {
+ *charset = Vcharset_ascii;
+ return c;
+ }
+ else if (c < 0xA0)
+ {
+ *charset = Vcharset_control_1;
+ return c & 0x7F;
+ }
+ else if (c <= 0xff)
+ {
+ *charset = Vcharset_latin_iso8859_1;
+ return c & 0x7F;
+ }
+ /*
+ else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
+ {
+ *charset = Vcharset_hebrew_iso8859_8;
+ return c - MIN_CHAR_HEBREW + 0x20;
+ }
+ */
+ else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
+ {
+ *charset = Vcharset_thai_tis620;
+ return c - MIN_CHAR_THAI + 0x20;
+ }
+ /*
+ else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
+ && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
+ {
+ return list2 (Vcharset_katakana_jisx0201,
+ make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
+ }
+ */
+ else if (c <= MAX_CHAR_BMP)
+ {
+ *charset = Vcharset_ucs_bmp;
+ return c;
+ }
+ else if (c <= MAX_CHAR_SMP)
+ {
+ *charset = Vcharset_ucs_smp;
+ return c - MIN_CHAR_SMP;
+ }
+ else if (c <= MAX_CHAR_SIP)
+ {
+ *charset = Vcharset_ucs_sip;
+ return c - MIN_CHAR_SIP;
+ }
+ else if (c < MIN_CHAR_DAIKANWA)
+ {
+ *charset = Vcharset_ucs;
+ return c;
+ }
+ else if (c <= MAX_CHAR_DAIKANWA)
+ {
+ *charset = Vcharset_ideograph_daikanwa;
+ return c - MIN_CHAR_DAIKANWA;
+ }
+ else if (c < MIN_CHAR_94)
+ {
+ *charset = Vcharset_ucs;
+ return c;
+ }
+ else if (c <= MAX_CHAR_94)
+ {
+ *charset = CHARSET_BY_ATTRIBUTES (94, 1,
+ ((c - MIN_CHAR_94) / 94) + '0',
+ CHARSET_LEFT_TO_RIGHT);
+ if (!NILP (*charset))
+ return ((c - MIN_CHAR_94) % 94) + 33;
+ else
+ {
+ *charset = Vcharset_ucs;
+ return c;
+ }
+ }
+ else if (c <= MAX_CHAR_96)
+ {
+ *charset = CHARSET_BY_ATTRIBUTES (96, 1,
+ ((c - MIN_CHAR_96) / 96) + '0',
+ CHARSET_LEFT_TO_RIGHT);
+ if (!NILP (*charset))
+ return ((c - MIN_CHAR_96) % 96) + 32;
+ else
+ {
+ *charset = Vcharset_ucs;
+ return c;
+ }
+ }
+ else if (c <= MAX_CHAR_94x94)
+ {
+ *charset
+ = CHARSET_BY_ATTRIBUTES (94, 2,
+ ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
+ CHARSET_LEFT_TO_RIGHT);
+ if (!NILP (*charset))
+ return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
+ | (((c - MIN_CHAR_94x94) % 94) + 33);
+ else
+ {
+ *charset = Vcharset_ucs;
+ return c;
+ }
+ }
+ else if (c <= MAX_CHAR_96x96)
+ {
+ *charset
+ = CHARSET_BY_ATTRIBUTES (96, 2,
+ ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
+ CHARSET_LEFT_TO_RIGHT);
+ if (!NILP (*charset))
+ return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
+ | (((c - MIN_CHAR_96x96) % 96) + 32);
+ else
+ {
+ *charset = Vcharset_ucs;
+ return c;
+ }
+ }
+ else
+ {
+ *charset = Vcharset_ucs;
+ return c;
+ }
+}
+
+Lisp_Object Vdefault_coded_charset_priority_list;
+#endif
+
\f
/************************************************************************/
/* Basic charset Lisp functions */
(struct charset_list_closure*) charset_list_closure;
Lisp_Object *charset_list = chcl->charset_list;
- *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
+ *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
return 0;
}
}
DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
-Return the name of the given charset.
+Return the name of charset CHARSET.
*/
(charset))
{
return XCHARSET_NAME (Fget_charset (charset));
}
+/* #### SJT Should generic properties be allowed? */
DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
Define a new character set.
This function is for use with Mule support.
this character set.
'dimension Number of octets used to index a character in this charset.
Either 1 or 2. Defaults to 1.
+ If UTF-2000 feature is enabled, 3 or 4 are also available.
'columns Number of columns used to display a character in this charset.
Only used in TTY mode. (Under X, the actual width of a
character can be derived from the font used to display the
'chars Number of characters in each dimension (94 or 96).
Defaults to 94. Note that if the dimension is 2, the
character set thus described is 94x94 or 96x96.
+ If UTF-2000 feature is enabled, 128 or 256 are also available.
'final Final byte of ISO 2022 escape sequence. Must be
supplied. Each combination of (DIMENSION, CHARS) defines a
separate namespace for final bytes. Note that ISO
is passed the octets of the character, with the high
bit cleared and set depending upon whether the value
of the 'graphic property is 0 or 1.
+'mother [UTF-2000 only] Base coded-charset.
+'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
+'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
+'code-offset [UTF-2000 only] Offset for a code-point of a base
+ coded-charset.
+'conversion [UTF-2000 only] Conversion for a code-point of a base
+ coded-charset (94x60 or 94x94x60).
*/
(name, doc_string, props))
{
int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
int direction = CHARSET_LEFT_TO_RIGHT;
- int type;
Lisp_Object registry = Qnil;
Lisp_Object charset;
- Lisp_Object rest, keyword, value;
Lisp_Object ccl_program = Qnil;
Lisp_Object short_name = Qnil, long_name = Qnil;
+ Lisp_Object mother = Qnil;
+ int min_code = 0, max_code = 0, code_offset = 0;
+ int byte_offset = -1;
+ int conversion = 0;
CHECK_SYMBOL (name);
if (!NILP (doc_string))
if (!NILP (charset))
signal_simple_error ("Cannot redefine existing charset", name);
- EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
- {
- if (EQ (keyword, Qshort_name))
- {
- CHECK_STRING (value);
- short_name = value;
- }
-
- if (EQ (keyword, Qlong_name))
- {
- CHECK_STRING (value);
- long_name = value;
- }
-
- else if (EQ (keyword, Qdimension))
- {
- CHECK_INT (value);
- dimension = XINT (value);
- if (dimension < 1 || dimension > 2)
- signal_simple_error ("Invalid value for 'dimension", value);
- }
-
- else if (EQ (keyword, Qchars))
- {
- CHECK_INT (value);
- chars = XINT (value);
- if (chars != 94 && chars != 96)
- signal_simple_error ("Invalid value for 'chars", value);
- }
-
- else if (EQ (keyword, Qcolumns))
- {
- CHECK_INT (value);
- columns = XINT (value);
- if (columns != 1 && columns != 2)
- signal_simple_error ("Invalid value for 'columns", value);
- }
-
- else if (EQ (keyword, Qgraphic))
- {
- CHECK_INT (value);
- graphic = XINT (value);
- if (graphic < 0 || graphic > 1)
- signal_simple_error ("Invalid value for 'graphic", value);
- }
-
- else if (EQ (keyword, Qregistry))
- {
- CHECK_STRING (value);
- registry = value;
- }
-
- else if (EQ (keyword, Qdirection))
- {
- if (EQ (value, Ql2r))
- direction = CHARSET_LEFT_TO_RIGHT;
- else if (EQ (value, Qr2l))
- direction = CHARSET_RIGHT_TO_LEFT;
- else
- signal_simple_error ("Invalid value for 'direction", value);
- }
-
- else if (EQ (keyword, Qfinal))
- {
- CHECK_CHAR_COERCE_INT (value);
- final = XCHAR (value);
- if (final < '0' || final > '~')
- signal_simple_error ("Invalid value for 'final", value);
- }
+ {
+ EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
+ {
+ if (EQ (keyword, Qshort_name))
+ {
+ CHECK_STRING (value);
+ short_name = value;
+ }
+
+ if (EQ (keyword, Qlong_name))
+ {
+ CHECK_STRING (value);
+ long_name = value;
+ }
+
+ else if (EQ (keyword, Qdimension))
+ {
+ CHECK_INT (value);
+ dimension = XINT (value);
+ if (dimension < 1 ||
+#ifdef UTF2000
+ dimension > 4
+#else
+ dimension > 2
+#endif
+ )
+ signal_simple_error ("Invalid value for 'dimension", value);
+ }
+
+ else if (EQ (keyword, Qchars))
+ {
+ CHECK_INT (value);
+ chars = XINT (value);
+ if (chars != 94 && chars != 96
+#ifdef UTF2000
+ && chars != 128 && chars != 256
+#endif
+ )
+ signal_simple_error ("Invalid value for 'chars", value);
+ }
+
+ else if (EQ (keyword, Qcolumns))
+ {
+ CHECK_INT (value);
+ columns = XINT (value);
+ if (columns != 1 && columns != 2)
+ signal_simple_error ("Invalid value for 'columns", value);
+ }
+
+ else if (EQ (keyword, Qgraphic))
+ {
+ CHECK_INT (value);
+ graphic = XINT (value);
+ if (graphic < 0 ||
+#ifdef UTF2000
+ graphic > 2
+#else
+ graphic > 1
+#endif
+ )
+ signal_simple_error ("Invalid value for 'graphic", value);
+ }
+
+ else if (EQ (keyword, Qregistry))
+ {
+ CHECK_STRING (value);
+ registry = value;
+ }
+
+ else if (EQ (keyword, Qdirection))
+ {
+ if (EQ (value, Ql2r))
+ direction = CHARSET_LEFT_TO_RIGHT;
+ else if (EQ (value, Qr2l))
+ direction = CHARSET_RIGHT_TO_LEFT;
+ else
+ signal_simple_error ("Invalid value for 'direction", value);
+ }
+
+ else if (EQ (keyword, Qfinal))
+ {
+ CHECK_CHAR_COERCE_INT (value);
+ final = XCHAR (value);
+ if (final < '0' || final > '~')
+ signal_simple_error ("Invalid value for 'final", value);
+ }
- else if (EQ (keyword, Qccl_program))
- {
- CHECK_VECTOR (value);
- ccl_program = value;
- }
+#ifdef UTF2000
+ else if (EQ (keyword, Qmother))
+ {
+ mother = Fget_charset (value);
+ }
+
+ else if (EQ (keyword, Qmin_code))
+ {
+ CHECK_INT (value);
+ min_code = XUINT (value);
+ }
+
+ else if (EQ (keyword, Qmax_code))
+ {
+ CHECK_INT (value);
+ max_code = XUINT (value);
+ }
+
+ else if (EQ (keyword, Qcode_offset))
+ {
+ CHECK_INT (value);
+ code_offset = XUINT (value);
+ }
+
+ else if (EQ (keyword, Qconversion))
+ {
+ if (EQ (value, Q94x60))
+ conversion = CONVERSION_94x60;
+ else if (EQ (value, Q94x94x60))
+ conversion = CONVERSION_94x94x60;
+ else
+ signal_simple_error ("Unrecognized conversion", value);
+ }
- else
- signal_simple_error ("Unrecognized property", keyword);
- }
+#endif
+ else if (EQ (keyword, Qccl_program))
+ {
+ struct ccl_program test_ccl;
+
+ if (setup_ccl_program (&test_ccl, value) < 0)
+ signal_simple_error ("Invalid value for 'ccl-program", value);
+ ccl_program = value;
+ }
+
+ else
+ signal_simple_error ("Unrecognized property", keyword);
+ }
+ }
+#ifndef UTF2000
if (!final)
error ("'final must be specified");
+#endif
if (dimension == 2 && final > 0x5F)
signal_simple_error
("Final must be in the range 0x30 - 0x5F for dimension == 2",
make_char (final));
- if (dimension == 1)
- type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
- else
- type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
-
- if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
- !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
+ if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
+ CHARSET_LEFT_TO_RIGHT)) ||
+ !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
+ CHARSET_RIGHT_TO_LEFT)))
error
("Character set already defined for this DIMENSION/CHARS/FINAL combo");
-#ifdef UTF2000
- if (dimension == 1)
- {
- if (chars == 94)
- {
- /* id = CHARSET_ID_OFFSET_94 + final; */
- id = get_unallocated_leading_byte (dimension);
- }
- else if (chars == 96)
- {
- id = get_unallocated_leading_byte (dimension);
- }
- else
- {
- abort ();
- }
- }
- else if (dimension == 2)
- {
- if (chars == 94)
- {
- id = get_unallocated_leading_byte (dimension);
- }
- else if (chars == 96)
- {
- id = get_unallocated_leading_byte (dimension);
- }
- else
- {
- abort ();
- }
- }
- else
- {
- abort ();
- }
-#else
id = get_unallocated_leading_byte (dimension);
-#endif
if (NILP (doc_string))
doc_string = build_string ("");
if (columns == -1)
columns = dimension;
- charset = make_charset (id, name, dimension + 2, type, columns, graphic,
- final, direction, short_name, long_name, doc_string, registry);
+
+ if (byte_offset < 0)
+ {
+ if (chars == 94)
+ byte_offset = 33;
+ else if (chars == 96)
+ byte_offset = 32;
+ else
+ byte_offset = 0;
+ }
+
+ charset = make_charset (id, name, chars, dimension, columns, graphic,
+ final, direction, short_name, long_name,
+ doc_string, registry,
+ Qnil, min_code, max_code, code_offset, byte_offset,
+ mother, conversion);
if (!NILP (ccl_program))
XCHARSET_CCL_PROGRAM (charset) = ccl_program;
return charset;
(charset, new_name))
{
Lisp_Object new_charset = Qnil;
- int id, dimension, columns, graphic, final;
- int direction, type;
+ int id, chars, dimension, columns, graphic, final;
+ int direction;
Lisp_Object registry, doc_string, short_name, long_name;
- struct Lisp_Charset *cs;
+ Lisp_Charset *cs;
charset = Fget_charset (charset);
if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
cs = XCHARSET (charset);
- type = CHARSET_TYPE (cs);
- columns = CHARSET_COLUMNS (cs);
+ chars = CHARSET_CHARS (cs);
dimension = CHARSET_DIMENSION (cs);
+ columns = CHARSET_COLUMNS (cs);
id = get_unallocated_leading_byte (dimension);
graphic = CHARSET_GRAPHIC (cs);
long_name = CHARSET_LONG_NAME (cs);
registry = CHARSET_REGISTRY (cs);
- new_charset = make_charset (id, new_name, dimension + 2, type, columns,
+ new_charset = make_charset (id, new_name, chars, dimension, columns,
graphic, final, direction, short_name, long_name,
- doc_string, registry);
+ doc_string, registry,
+#ifdef UTF2000
+ CHARSET_DECODING_TABLE(cs),
+ CHARSET_MIN_CODE(cs),
+ CHARSET_MAX_CODE(cs),
+ CHARSET_CODE_OFFSET(cs),
+ CHARSET_BYTE_OFFSET(cs),
+ CHARSET_MOTHER(cs),
+ CHARSET_CONVERSION (cs)
+#else
+ Qnil, 0, 0, 0, 0, Qnil, 0
+#endif
+);
CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
return new_charset;
}
+DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
+Define symbol ALIAS as an alias for CHARSET.
+*/
+ (alias, charset))
+{
+ CHECK_SYMBOL (alias);
+ charset = Fget_charset (charset);
+ return Fputhash (alias, charset, Vcharset_hash_table);
+}
+
/* #### Reverse direction charsets not yet implemented. */
#if 0
DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
(dimension, chars, final, direction))
{
int dm, ch, fi, di = -1;
- int type;
Lisp_Object obj = Qnil;
CHECK_INT (dimension);
signal_simple_error
("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
- if (dm == 1)
- type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
- else
- type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
-
- if (di == -1)
+ if (di == -1)
{
- obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
+ obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
if (NILP (obj))
- obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
+ obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
}
else
- obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
+ obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
if (CHARSETP (obj))
return XCHARSET_NAME (obj);
}
DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
-Return property PROP of CHARSET.
+Return property PROP of CHARSET, a charset object or symbol naming a charset.
Recognized properties are those listed in `make-charset', as well as
'name and 'doc-string.
*/
(charset, prop))
{
- struct Lisp_Charset *cs;
+ Lisp_Charset *cs;
charset = Fget_charset (charset);
cs = XCHARSET (charset);
if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
- if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
+ if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
+ Qnil : make_char (CHARSET_FINAL (cs));
if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
if (EQ (prop, Qreverse_direction_charset))
{
Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
- if (NILP (obj))
- return Qnil;
- else
- return XCHARSET_NAME (obj);
+ /* #### Is this translation OK? If so, error checking sufficient? */
+ return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
}
+#ifdef UTF2000
+ if (EQ (prop, Qmother))
+ return CHARSET_MOTHER (cs);
+ if (EQ (prop, Qmin_code))
+ return make_int (CHARSET_MIN_CODE (cs));
+ if (EQ (prop, Qmax_code))
+ return make_int (CHARSET_MAX_CODE (cs));
+#endif
signal_simple_error ("Unrecognized charset property name", prop);
return Qnil; /* not reached */
}
*/
(charset, ccl_program))
{
+ struct ccl_program test_ccl;
+
charset = Fget_charset (charset);
- CHECK_VECTOR (ccl_program);
+ if (setup_ccl_program (&test_ccl, ccl_program) < 0)
+ signal_simple_error ("Invalid ccl-program", ccl_program);
XCHARSET_CCL_PROGRAM (charset) = ccl_program;
return Qnil;
}
}
}
-/* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
Set the 'registry property of CHARSET to REGISTRY.
*/
return Qnil;
}
+#ifdef UTF2000
+DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
+Return mapping-table of CHARSET.
+*/
+ (charset))
+{
+ return XCHARSET_DECODING_TABLE (Fget_charset (charset));
+}
+
+DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
+Set mapping-table of CHARSET to TABLE.
+*/
+ (charset, table))
+{
+ struct Lisp_Charset *cs;
+ size_t i;
+ int byte_offset;
+
+ charset = Fget_charset (charset);
+ cs = XCHARSET (charset);
+
+ if (NILP (table))
+ {
+ CHARSET_DECODING_TABLE(cs) = Qnil;
+ return table;
+ }
+ else if (VECTORP (table))
+ {
+ int ccs_len = CHARSET_BYTE_SIZE (cs);
+ int ret = decoding_table_check_elements (table,
+ CHARSET_DIMENSION (cs),
+ ccs_len);
+ if (ret)
+ {
+ if (ret == -1)
+ signal_simple_error ("Too big table", table);
+ else if (ret == -2)
+ signal_simple_error ("Invalid element is found", table);
+ else
+ signal_simple_error ("Something wrong", table);
+ }
+ CHARSET_DECODING_TABLE(cs) = Qnil;
+ }
+ else
+ signal_error (Qwrong_type_argument,
+ list2 (build_translated_string ("vector-or-nil-p"),
+ table));
+
+ byte_offset = CHARSET_BYTE_OFFSET (cs);
+ switch (CHARSET_DIMENSION (cs))
+ {
+ case 1:
+ for (i = 0; i < XVECTOR_LENGTH (table); i++)
+ {
+ Lisp_Object c = XVECTOR_DATA(table)[i];
+
+ if (CHARP (c))
+ Fput_char_attribute (c, XCHARSET_NAME (charset),
+ make_int (i + byte_offset));
+ }
+ break;
+ case 2:
+ for (i = 0; i < XVECTOR_LENGTH (table); i++)
+ {
+ Lisp_Object v = XVECTOR_DATA(table)[i];
+
+ if (VECTORP (v))
+ {
+ size_t j;
+
+ for (j = 0; j < XVECTOR_LENGTH (v); j++)
+ {
+ Lisp_Object c = XVECTOR_DATA(v)[j];
+
+ if (CHARP (c))
+ Fput_char_attribute
+ (c, XCHARSET_NAME (charset),
+ make_int ( ( (i + byte_offset) << 8 )
+ | (j + byte_offset)
+ ) );
+ }
+ }
+ else if (CHARP (v))
+ Fput_char_attribute (v, XCHARSET_NAME (charset),
+ make_int (i + byte_offset));
+ }
+ break;
+ }
+ return table;
+}
+#endif
+
\f
/************************************************************************/
/* Lisp primitives for working with characters */
/************************************************************************/
+#ifdef UTF2000
+DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
+Make a character from CHARSET and code-point CODE.
+If DEFINED_ONLY is non-nil, builtin character is not returned.
+If corresponding character is not found, nil is returned.
+*/
+ (charset, code, defined_only))
+{
+ int c;
+
+ charset = Fget_charset (charset);
+ CHECK_INT (code);
+ c = XINT (code);
+ if (XCHARSET_GRAPHIC (charset) == 1)
+ c &= 0x7F7F7F7F;
+ if (NILP (defined_only))
+ c = DECODE_CHAR (charset, c);
+ else
+ c = decode_defined_char (charset, c);
+ return c >= 0 ? make_char (c) : Qnil;
+}
+
+DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
+Make a builtin character from CHARSET and code-point CODE.
+*/
+ (charset, code))
+{
+ int c;
+
+ charset = Fget_charset (charset);
+ CHECK_INT (code);
+ if (EQ (charset, Vcharset_latin_viscii))
+ {
+ Lisp_Object chr = Fdecode_char (charset, code, Qnil);
+ Lisp_Object ret;
+
+ if (!NILP (chr))
+ {
+ if (!NILP
+ (ret = Fget_char_attribute (chr,
+ Vcharset_latin_viscii_lower,
+ Qnil)))
+ {
+ charset = Vcharset_latin_viscii_lower;
+ code = ret;
+ }
+ else if (!NILP
+ (ret = Fget_char_attribute (chr,
+ Vcharset_latin_viscii_upper,
+ Qnil)))
+ {
+ charset = Vcharset_latin_viscii_upper;
+ code = ret;
+ }
+ }
+ }
+ c = XINT (code);
+#if 0
+ if (XCHARSET_GRAPHIC (charset) == 1)
+ c &= 0x7F7F7F7F;
+#endif
+ c = decode_builtin_char (charset, c);
+ return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
+}
+#endif
+
DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
Make a character from CHARSET and octets ARG1 and ARG2.
ARG2 is required only for characters from two-dimensional charsets.
*/
(charset, arg1, arg2))
{
- struct Lisp_Charset *cs;
+ Lisp_Charset *cs;
int a1, a2;
int lowlim, highlim;
CHECK_INT (arg1);
/* It is useful (and safe, according to Olivier Galibert) to strip
- the 8th bit off ARG1 and ARG2 becaue it allows programmers to
+ the 8th bit off ARG1 and ARG2 because it allows programmers to
write (make-char 'latin-iso8859-2 CODE) where code is the actual
Latin 2 code of the character. */
#ifdef UTF2000
}
DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
-Return the character set of char CH.
+Return the character set of CHARACTER.
*/
- (ch))
+ (character))
+{
+ CHECK_CHAR_COERCE_INT (character);
+
+ return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
+}
+
+DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
+Return the octet numbered N (should be 0 or 1) of CHARACTER.
+N defaults to 0 if omitted.
+*/
+ (character, n))
+{
+ Lisp_Object charset;
+ int octet0, octet1;
+
+ CHECK_CHAR_COERCE_INT (character);
+
+ BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
+
+ if (NILP (n) || EQ (n, Qzero))
+ return make_int (octet0);
+ else if (EQ (n, make_int (1)))
+ return make_int (octet1);
+ else
+ signal_simple_error ("Octet number must be 0 or 1", n);
+}
+
+#ifdef UTF2000
+DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
+Return code-point of CHARACTER in specified CHARSET.
+*/
+ (character, charset, defined_only))
{
- CHECK_CHAR_COERCE_INT (ch);
+ int code_point;
- return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
+ CHECK_CHAR_COERCE_INT (character);
+ charset = Fget_charset (charset);
+ code_point = charset_code_point (charset, XCHAR (character),
+ !NILP (defined_only));
+ if (code_point >= 0)
+ return make_int (code_point);
+ else
+ return Qnil;
}
+#endif
DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
-Return list of charset and one or two position-codes of CHAR.
+Return list of charset and one or two position-codes of CHARACTER.
*/
(character))
{
struct gcpro gcpro1, gcpro2;
Lisp_Object charset = Qnil;
Lisp_Object rc = Qnil;
+#ifdef UTF2000
+ int code_point;
+ int dimension;
+#else
int c1, c2;
+#endif
GCPRO2 (charset, rc);
CHECK_CHAR_COERCE_INT (character);
+#ifdef UTF2000
+ code_point = ENCODE_CHAR (XCHAR (character), charset);
+ dimension = XCHARSET_DIMENSION (charset);
+ while (dimension > 0)
+ {
+ rc = Fcons (make_int (code_point & 255), rc);
+ code_point >>= 8;
+ dimension--;
+ }
+ rc = Fcons (XCHARSET_NAME (charset), rc);
+#else
BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
{
rc = list2 (XCHARSET_NAME (charset), make_int (c1));
}
+#endif
UNGCPRO;
return rc;
void
syms_of_mule_charset (void)
{
+ INIT_LRECORD_IMPLEMENTATION (charset);
+
DEFSUBR (Fcharsetp);
DEFSUBR (Ffind_charset);
DEFSUBR (Fget_charset);
DEFSUBR (Fmake_charset);
DEFSUBR (Fmake_reverse_direction_charset);
/* DEFSUBR (Freverse_direction_charset); */
+ DEFSUBR (Fdefine_charset_alias);
DEFSUBR (Fcharset_from_attributes);
DEFSUBR (Fcharset_short_name);
DEFSUBR (Fcharset_long_name);
DEFSUBR (Fcharset_id);
DEFSUBR (Fset_charset_ccl_program);
DEFSUBR (Fset_charset_registry);
+#ifdef UTF2000
+ DEFSUBR (Fcharset_mapping_table);
+ DEFSUBR (Fset_charset_mapping_table);
+#endif
+#ifdef UTF2000
+ DEFSUBR (Fdecode_char);
+ DEFSUBR (Fdecode_builtin_char);
+ DEFSUBR (Fencode_char);
+#endif
DEFSUBR (Fmake_char);
DEFSUBR (Fchar_charset);
+ DEFSUBR (Fchar_octet);
DEFSUBR (Fsplit_char);
#ifdef ENABLE_COMPOSITE_CHARS
defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
defsymbol (&Qshort_name, "short-name");
defsymbol (&Qlong_name, "long-name");
+#ifdef UTF2000
+ defsymbol (&Qmother, "mother");
+ defsymbol (&Qmin_code, "min-code");
+ defsymbol (&Qmax_code, "max-code");
+ defsymbol (&Qcode_offset, "code-offset");
+ defsymbol (&Qconversion, "conversion");
+ defsymbol (&Q94x60, "94x60");
+ defsymbol (&Q94x94x60, "94x94x60");
+#endif
defsymbol (&Ql2r, "l2r");
defsymbol (&Qr2l, "r2l");
defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
defsymbol (&Qchinese_gb2312, "chinese-gb2312");
+ defsymbol (&Qchinese_gb12345, "chinese-gb12345");
defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
+ defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
#ifdef UTF2000
- defsymbol (&Qchinese_cns11643_3, "chinese-cns11643-3");
- defsymbol (&Qchinese_cns11643_4, "chinese-cns11643-4");
- defsymbol (&Qchinese_cns11643_5, "chinese-cns11643-5");
- defsymbol (&Qchinese_cns11643_6, "chinese-cns11643-6");
- defsymbol (&Qchinese_cns11643_7, "chinese-cns11643-7");
+ defsymbol (&Qucs, "ucs");
defsymbol (&Qucs_bmp, "ucs-bmp");
- defsymbol (&Qlatin_viscii_lower, "vietnamese-viscii-lower");
- defsymbol (&Qlatin_viscii_upper, "vietnamese-viscii-upper");
+ defsymbol (&Qucs_smp, "ucs-smp");
+ defsymbol (&Qucs_sip, "ucs-sip");
+ defsymbol (&Qucs_gb, "ucs-gb");
+ defsymbol (&Qucs_cns, "ucs-cns");
+ defsymbol (&Qucs_jis, "ucs-jis");
+ defsymbol (&Qucs_ks, "ucs-ks");
+ defsymbol (&Qucs_big5, "ucs-big5");
+ defsymbol (&Qlatin_viscii, "latin-viscii");
+ defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
+ defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
+ defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
+ defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
+ defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
+ defsymbol (&Qjis_x0208, "=jis-x0208");
+ defsymbol (&Qideograph_gt, "ideograph-gt");
+ defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
+ defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
+ defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
+ defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
+ defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
+ defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
+ defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
+ defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
+ defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
+ defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
+ defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
+ defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
+ defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
+ defsymbol (&Qchinese_big5, "chinese-big5");
+ /* defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp"); */
+ defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
+ defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
+ defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
+ defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
+ defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
+ defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
+ defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
+ defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
+ defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
+ defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
+ defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
+ defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
+ defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
+ defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
#endif
defsymbol (&Qchinese_big5_1, "chinese-big5-1");
defsymbol (&Qchinese_big5_2, "chinese-big5-2");
int k;
#endif
+ chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
+ dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
+
/* Table of charsets indexed by leading byte. */
- for (i = 0; i < countof (charset_by_leading_byte); i++)
- charset_by_leading_byte[i] = Qnil;
+ for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
+ chlook->charset_by_leading_byte[i] = Qnil;
#ifdef UTF2000
/* Table of charsets indexed by type/final-byte. */
- for (i = 0; i < countof (charset_by_attributes); i++)
- for (j = 0; j < countof (charset_by_attributes[0]); j++)
- charset_by_attributes[i][j] = Qnil;
+ for (i = 0; i < countof (chlook->charset_by_attributes); i++)
+ for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
+ chlook->charset_by_attributes[i][j] = Qnil;
#else
/* Table of charsets indexed by type/final-byte/direction. */
- for (i = 0; i < countof (charset_by_attributes); i++)
- for (j = 0; j < countof (charset_by_attributes[0]); j++)
- for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
- charset_by_attributes[i][j][k] = Qnil;
+ for (i = 0; i < countof (chlook->charset_by_attributes); i++)
+ for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
+ for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
+ chlook->charset_by_attributes[i][j][k] = Qnil;
#endif
- next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
#ifdef UTF2000
- next_allocated_2_byte_leading_byte = LEADING_BYTE_CHINESE_BIG5_2 + 1;
+ chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
#else
- next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
+ chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
+ chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
#endif
#ifndef UTF2000
#endif
#ifdef UTF2000
- Vutf_2000_version = build_string("0.7 (Hirano)");
- DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
-Version number of UTF-2000.
+ Vdefault_coded_charset_priority_list = Qnil;
+ DEFVAR_LISP ("default-coded-charset-priority-list",
+ &Vdefault_coded_charset_priority_list /*
+Default order of preferred coded-character-sets.
*/ );
#endif
}
ease of access. */
#ifdef UTF2000
+ staticpro (&Vcharset_ucs);
+ Vcharset_ucs =
+ make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
+ 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
+ build_string ("UCS"),
+ build_string ("UCS"),
+ build_string ("ISO/IEC 10646"),
+ build_string (""),
+ Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_ucs_bmp);
Vcharset_ucs_bmp =
- make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 1,
- CHARSET_TYPE_256X256, 1, 0, 0,
- CHARSET_LEFT_TO_RIGHT,
- build_string ("BMP"),
+ make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
+ 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
build_string ("BMP"),
- build_string ("BMP"),
- build_string (""));
+ build_string ("UCS-BMP"),
+ build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
+ build_string
+ ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
+ Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_ucs_smp);
+ Vcharset_ucs_smp =
+ make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
+ 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
+ build_string ("SMP"),
+ build_string ("UCS-SMP"),
+ build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
+ build_string ("UCS00-1"),
+ Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
+ MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_ucs_sip);
+ Vcharset_ucs_sip =
+ make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
+ 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
+ build_string ("SIP"),
+ build_string ("UCS-SIP"),
+ build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
+ build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
+ Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
+ MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_ucs_gb);
+ Vcharset_ucs_gb =
+ make_charset (LEADING_BYTE_UCS_GB, Qucs_gb, 256, 3,
+ 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
+ build_string ("UCS for GB"),
+ build_string ("UCS for GB"),
+ build_string ("ISO/IEC 10646 for GB"),
+ build_string (""),
+ Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_ucs_cns);
+ Vcharset_ucs_cns =
+ make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
+ 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
+ build_string ("UCS for CNS"),
+ build_string ("UCS for CNS 11643"),
+ build_string ("ISO/IEC 10646 for CNS 11643"),
+ build_string (""),
+ Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_ucs_jis);
+ Vcharset_ucs_jis =
+ make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
+ 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
+ build_string ("UCS for JIS"),
+ build_string ("UCS for JIS X 0208, 0212 and 0213"),
+ build_string
+ ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
+ build_string (""),
+ Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_ucs_ks);
+ Vcharset_ucs_ks =
+ make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
+ 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
+ build_string ("UCS for KS"),
+ build_string ("UCS for CCS defined by KS"),
+ build_string ("ISO/IEC 10646 for Korean Standards"),
+ build_string (""),
+ Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_ucs_big5);
+ Vcharset_ucs_big5 =
+ make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
+ 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
+ build_string ("UCS for Big5"),
+ build_string ("UCS for Big5"),
+ build_string ("ISO/IEC 10646 for Big5"),
+ build_string (""),
+ Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
+#else
+# define MIN_CHAR_THAI 0
+# define MAX_CHAR_THAI 0
+ /* # define MIN_CHAR_HEBREW 0 */
+ /* # define MAX_CHAR_HEBREW 0 */
+# define MIN_CHAR_HALFWIDTH_KATAKANA 0
+# define MAX_CHAR_HALFWIDTH_KATAKANA 0
#endif
+ staticpro (&Vcharset_ascii);
Vcharset_ascii =
- make_charset (LEADING_BYTE_ASCII, Qascii, 1,
- CHARSET_TYPE_94, 1, 0, 'B',
- CHARSET_LEFT_TO_RIGHT,
+ make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
+ 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
build_string ("ASCII"),
build_string ("ASCII)"),
build_string ("ASCII (ISO646 IRV)"),
- build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"));
+ build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
+ Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_control_1);
Vcharset_control_1 =
- make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
- CHARSET_TYPE_94, 1, 1, 0,
- CHARSET_LEFT_TO_RIGHT,
+ make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
+ 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
build_string ("C1"),
build_string ("Control characters"),
build_string ("Control characters 128-191"),
- build_string (""));
+ build_string (""),
+ Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_latin_iso8859_1);
Vcharset_latin_iso8859_1 =
- make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
- CHARSET_TYPE_96, 1, 1, 'A',
- CHARSET_LEFT_TO_RIGHT,
+ make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
+ 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
build_string ("Latin-1"),
build_string ("ISO8859-1 (Latin-1)"),
build_string ("ISO8859-1 (Latin-1)"),
- build_string ("iso8859-1"));
+ build_string ("iso8859-1"),
+ Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_latin_iso8859_2);
Vcharset_latin_iso8859_2 =
- make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
- CHARSET_TYPE_96, 1, 1, 'B',
- CHARSET_LEFT_TO_RIGHT,
+ make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
+ 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
build_string ("Latin-2"),
build_string ("ISO8859-2 (Latin-2)"),
build_string ("ISO8859-2 (Latin-2)"),
- build_string ("iso8859-2"));
+ build_string ("iso8859-2"),
+ Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_latin_iso8859_3);
Vcharset_latin_iso8859_3 =
- make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
- CHARSET_TYPE_96, 1, 1, 'C',
- CHARSET_LEFT_TO_RIGHT,
+ make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
+ 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
build_string ("Latin-3"),
build_string ("ISO8859-3 (Latin-3)"),
build_string ("ISO8859-3 (Latin-3)"),
- build_string ("iso8859-3"));
+ build_string ("iso8859-3"),
+ Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_latin_iso8859_4);
Vcharset_latin_iso8859_4 =
- make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
- CHARSET_TYPE_96, 1, 1, 'D',
- CHARSET_LEFT_TO_RIGHT,
+ make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
+ 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
build_string ("Latin-4"),
build_string ("ISO8859-4 (Latin-4)"),
build_string ("ISO8859-4 (Latin-4)"),
- build_string ("iso8859-4"));
+ build_string ("iso8859-4"),
+ Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_thai_tis620);
Vcharset_thai_tis620 =
- make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
- CHARSET_TYPE_96, 1, 1, 'T',
- CHARSET_LEFT_TO_RIGHT,
+ make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
+ 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
build_string ("TIS620"),
build_string ("TIS620 (Thai)"),
build_string ("TIS620.2529 (Thai)"),
- build_string ("tis620"));
+ build_string ("tis620"),
+ Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_greek_iso8859_7);
Vcharset_greek_iso8859_7 =
- make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
- CHARSET_TYPE_96, 1, 1, 'F',
- CHARSET_LEFT_TO_RIGHT,
+ make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
+ 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
build_string ("ISO8859-7"),
build_string ("ISO8859-7 (Greek)"),
build_string ("ISO8859-7 (Greek)"),
- build_string ("iso8859-7"));
+ build_string ("iso8859-7"),
+ Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_arabic_iso8859_6);
Vcharset_arabic_iso8859_6 =
- make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
- CHARSET_TYPE_96, 1, 1, 'G',
- CHARSET_RIGHT_TO_LEFT,
+ make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
+ 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
build_string ("ISO8859-6"),
build_string ("ISO8859-6 (Arabic)"),
build_string ("ISO8859-6 (Arabic)"),
- build_string ("iso8859-6"));
+ build_string ("iso8859-6"),
+ Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_hebrew_iso8859_8);
Vcharset_hebrew_iso8859_8 =
- make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
- CHARSET_TYPE_96, 1, 1, 'H',
- CHARSET_RIGHT_TO_LEFT,
+ make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
+ 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
build_string ("ISO8859-8"),
build_string ("ISO8859-8 (Hebrew)"),
build_string ("ISO8859-8 (Hebrew)"),
- build_string ("iso8859-8"));
+ build_string ("iso8859-8"),
+ Qnil,
+ 0 /* MIN_CHAR_HEBREW */,
+ 0 /* MAX_CHAR_HEBREW */, 0, 32,
+ Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_katakana_jisx0201);
Vcharset_katakana_jisx0201 =
- make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
- CHARSET_TYPE_94, 1, 1, 'I',
- CHARSET_LEFT_TO_RIGHT,
+ make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
+ 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
build_string ("JISX0201 Kana"),
build_string ("JISX0201.1976 (Japanese Kana)"),
build_string ("JISX0201.1976 Japanese Kana"),
- build_string ("jisx0201.1976"));
+ build_string ("jisx0201\\.1976"),
+ Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_latin_jisx0201);
Vcharset_latin_jisx0201 =
- make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
- CHARSET_TYPE_94, 1, 0, 'J',
- CHARSET_LEFT_TO_RIGHT,
+ make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
+ 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
build_string ("JISX0201 Roman"),
build_string ("JISX0201.1976 (Japanese Roman)"),
build_string ("JISX0201.1976 Japanese Roman"),
- build_string ("jisx0201.1976"));
+ build_string ("jisx0201\\.1976"),
+ Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_cyrillic_iso8859_5);
Vcharset_cyrillic_iso8859_5 =
- make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
- CHARSET_TYPE_96, 1, 1, 'L',
- CHARSET_LEFT_TO_RIGHT,
+ make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
+ 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
build_string ("ISO8859-5"),
build_string ("ISO8859-5 (Cyrillic)"),
build_string ("ISO8859-5 (Cyrillic)"),
- build_string ("iso8859-5"));
+ build_string ("iso8859-5"),
+ Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_latin_iso8859_9);
Vcharset_latin_iso8859_9 =
- make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
- CHARSET_TYPE_96, 1, 1, 'M',
- CHARSET_LEFT_TO_RIGHT,
+ make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
+ 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
build_string ("Latin-5"),
build_string ("ISO8859-9 (Latin-5)"),
build_string ("ISO8859-9 (Latin-5)"),
- build_string ("iso8859-9"));
+ build_string ("iso8859-9"),
+ Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
+#ifdef UTF2000
+ staticpro (&Vcharset_jis_x0208);
+ Vcharset_jis_x0208 =
+ make_charset (LEADING_BYTE_JIS_X0208,
+ Qjis_x0208, 94, 2,
+ 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
+ build_string ("JIS X0208"),
+ build_string ("JIS X0208 Common"),
+ build_string ("JIS X0208 Common part"),
+ build_string ("jisx0208\\.1990"),
+ Qnil,
+ MIN_CHAR_JIS_X0208_1990,
+ MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
+ Qnil, CONVERSION_94x94);
+#endif
+ staticpro (&Vcharset_japanese_jisx0208_1978);
Vcharset_japanese_jisx0208_1978 =
- make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
- CHARSET_TYPE_94X94, 2, 0, '@',
- CHARSET_LEFT_TO_RIGHT,
- build_string ("JISX0208.1978"),
- build_string ("JISX0208.1978 (Japanese)"),
+ make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
+ Qjapanese_jisx0208_1978, 94, 2,
+ 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
+ build_string ("JIS X0208:1978"),
+ build_string ("JIS X0208:1978 (Japanese)"),
build_string
- ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
- build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"));
+ ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
+ build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
+ Qnil, 0, 0, 0, 33,
+#ifdef UTF2000
+ Vcharset_jis_x0208,
+#else
+ Qnil,
+#endif
+ CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_chinese_gb2312);
Vcharset_chinese_gb2312 =
- make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
- CHARSET_TYPE_94X94, 2, 0, 'A',
- CHARSET_LEFT_TO_RIGHT,
+ make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
+ 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
build_string ("GB2312"),
build_string ("GB2312)"),
build_string ("GB2312 Chinese simplified"),
- build_string ("gb2312"));
+ build_string ("gb2312"),
+ Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_chinese_gb12345);
+ Vcharset_chinese_gb12345 =
+ make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
+ 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
+ build_string ("G1"),
+ build_string ("GB 12345)"),
+ build_string ("GB 12345-1990"),
+ build_string ("GB12345\\(\\.1990\\)?-0"),
+ Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_japanese_jisx0208);
Vcharset_japanese_jisx0208 =
- make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
- CHARSET_TYPE_94X94, 2, 0, 'B',
- CHARSET_LEFT_TO_RIGHT,
+ make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
+ 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
build_string ("JISX0208"),
- build_string ("JISX0208.1983/1990 (Japanese)"),
- build_string ("JISX0208.1983/1990 Japanese Kanji"),
- build_string ("jisx0208.19\\(83\\|90\\)"));
+ build_string ("JIS X0208:1983 (Japanese)"),
+ build_string ("JIS X0208:1983 Japanese Kanji"),
+ build_string ("jisx0208\\.1983"),
+ Qnil, 0, 0, 0, 33,
+#ifdef UTF2000
+ Vcharset_jis_x0208,
+#else
+ Qnil,
+#endif
+ CONVERSION_IDENTICAL);
+#ifdef UTF2000
+ staticpro (&Vcharset_japanese_jisx0208_1990);
+ Vcharset_japanese_jisx0208_1990 =
+ make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
+ Qjapanese_jisx0208_1990, 94, 2,
+ 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
+ build_string ("JISX0208-1990"),
+ build_string ("JIS X0208:1990 (Japanese)"),
+ build_string ("JIS X0208:1990 Japanese Kanji"),
+ build_string ("jisx0208\\.1990"),
+ Qnil,
+ 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
+ 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
+ 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
+ Vcharset_jis_x0208 /* Qnil */,
+ CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
+#endif
+ staticpro (&Vcharset_korean_ksc5601);
Vcharset_korean_ksc5601 =
- make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
- CHARSET_TYPE_94X94, 2, 0, 'C',
- CHARSET_LEFT_TO_RIGHT,
+ make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
+ 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
build_string ("KSC5601"),
build_string ("KSC5601 (Korean"),
build_string ("KSC5601 Korean Hangul and Hanja"),
- build_string ("ksc5601"));
+ build_string ("ksc5601"),
+ Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_japanese_jisx0212);
Vcharset_japanese_jisx0212 =
- make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
- CHARSET_TYPE_94X94, 2, 0, 'D',
- CHARSET_LEFT_TO_RIGHT,
+ make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
+ 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
build_string ("JISX0212"),
build_string ("JISX0212 (Japanese)"),
build_string ("JISX0212 Japanese Supplement"),
- build_string ("jisx0212"));
+ build_string ("jisx0212"),
+ Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
#define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
+ staticpro (&Vcharset_chinese_cns11643_1);
Vcharset_chinese_cns11643_1 =
- make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
- CHARSET_TYPE_94X94, 2, 0, 'G',
- CHARSET_LEFT_TO_RIGHT,
+ make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
+ 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
build_string ("CNS11643-1"),
build_string ("CNS11643-1 (Chinese traditional)"),
build_string
("CNS 11643 Plane 1 Chinese traditional"),
- build_string (CHINESE_CNS_PLANE_RE("1")));
+ build_string (CHINESE_CNS_PLANE_RE("1")),
+ Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_chinese_cns11643_2);
Vcharset_chinese_cns11643_2 =
- make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
- CHARSET_TYPE_94X94, 2, 0, 'H',
- CHARSET_LEFT_TO_RIGHT,
+ make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
+ 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
build_string ("CNS11643-2"),
build_string ("CNS11643-2 (Chinese traditional)"),
build_string
("CNS 11643 Plane 2 Chinese traditional"),
- build_string (CHINESE_CNS_PLANE_RE("2")));
+ build_string (CHINESE_CNS_PLANE_RE("2")),
+ Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
#ifdef UTF2000
- Vcharset_chinese_cns11643_3 =
- make_charset (LEADING_BYTE_CHINESE_CNS11643_3, Qchinese_cns11643_3, 3,
- CHARSET_TYPE_94X94, 2, 0, 'I',
- CHARSET_LEFT_TO_RIGHT,
- build_string ("CNS11643-3"),
- build_string ("CNS11643-3 (Chinese traditional)"),
- build_string
- ("CNS 11643 Plane 3 Chinese traditional"),
- build_string (CHINESE_CNS_PLANE_RE("3")));
- Vcharset_chinese_cns11643_4 =
- make_charset (LEADING_BYTE_CHINESE_CNS11643_4, Qchinese_cns11643_4, 3,
- CHARSET_TYPE_94X94, 2, 0, 'J',
- CHARSET_LEFT_TO_RIGHT,
- build_string ("CNS11643-4"),
- build_string ("CNS11643-4 (Chinese traditional)"),
- build_string
- ("CNS 11643 Plane 4 Chinese traditional"),
- build_string (CHINESE_CNS_PLANE_RE("4")));
- Vcharset_chinese_cns11643_5 =
- make_charset (LEADING_BYTE_CHINESE_CNS11643_5, Qchinese_cns11643_5, 3,
- CHARSET_TYPE_94X94, 2, 0, 'K',
- CHARSET_LEFT_TO_RIGHT,
- build_string ("CNS11643-5"),
- build_string ("CNS11643-5 (Chinese traditional)"),
- build_string
- ("CNS 11643 Plane 5 Chinese traditional"),
- build_string (CHINESE_CNS_PLANE_RE("5")));
- Vcharset_chinese_cns11643_6 =
- make_charset (LEADING_BYTE_CHINESE_CNS11643_6, Qchinese_cns11643_6, 3,
- CHARSET_TYPE_94X94, 2, 0, 'L',
- CHARSET_LEFT_TO_RIGHT,
- build_string ("CNS11643-6"),
- build_string ("CNS11643-6 (Chinese traditional)"),
- build_string
- ("CNS 11643 Plane 6 Chinese traditional"),
- build_string (CHINESE_CNS_PLANE_RE("6")));
- Vcharset_chinese_cns11643_7 =
- make_charset (LEADING_BYTE_CHINESE_CNS11643_7, Qchinese_cns11643_7, 3,
- CHARSET_TYPE_94X94, 2, 0, 'M',
- CHARSET_LEFT_TO_RIGHT,
- build_string ("CNS11643-7"),
- build_string ("CNS11643-7 (Chinese traditional)"),
- build_string
- ("CNS 11643 Plane 7 Chinese traditional"),
- build_string (CHINESE_CNS_PLANE_RE("7")));
+ staticpro (&Vcharset_latin_tcvn5712);
+ Vcharset_latin_tcvn5712 =
+ make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
+ 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
+ build_string ("TCVN 5712"),
+ build_string ("TCVN 5712 (VSCII-2)"),
+ build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
+ build_string ("tcvn5712\\(\\.1993\\)?-1"),
+ Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_latin_viscii_lower);
Vcharset_latin_viscii_lower =
- make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 2,
- CHARSET_TYPE_96, 1, 1, '1',
- CHARSET_LEFT_TO_RIGHT,
+ make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
+ 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
build_string ("VISCII lower"),
build_string ("VISCII lower (Vietnamese)"),
build_string ("VISCII lower (Vietnamese)"),
- build_string ("VISCII1.1"));
+ build_string ("MULEVISCII-LOWER"),
+ Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_latin_viscii_upper);
Vcharset_latin_viscii_upper =
- make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 2,
- CHARSET_TYPE_96, 1, 1, '2',
- CHARSET_LEFT_TO_RIGHT,
+ make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
+ 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
build_string ("VISCII upper"),
build_string ("VISCII upper (Vietnamese)"),
build_string ("VISCII upper (Vietnamese)"),
- build_string ("VISCII1.1"));
+ build_string ("MULEVISCII-UPPER"),
+ Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_latin_viscii);
+ Vcharset_latin_viscii =
+ make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
+ 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
+ build_string ("VISCII"),
+ build_string ("VISCII 1.1 (Vietnamese)"),
+ build_string ("VISCII 1.1 (Vietnamese)"),
+ build_string ("VISCII1\\.1"),
+ Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_chinese_big5);
+ Vcharset_chinese_big5 =
+ make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
+ 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
+ build_string ("Big5"),
+ build_string ("Big5"),
+ build_string ("Big5 Chinese traditional"),
+ build_string ("big5-0"),
+ Qnil,
+ MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
+ MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
+#if 0
+ staticpro (&Vcharset_chinese_big5_cdp);
+ Vcharset_chinese_big5_cdp =
+ make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
+ 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
+ build_string ("Big5-CDP"),
+ build_string ("Big5 + CDP extension"),
+ build_string ("Big5 with CDP extension"),
+ build_string ("big5\\.cdp-0"),
+ Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
+ MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
+#endif
+#define DEF_HANZIKU(n) \
+ staticpro (&Vcharset_ideograph_hanziku_##n); \
+ Vcharset_ideograph_hanziku_##n = \
+ make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
+ 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
+ build_string ("HZK-"#n), \
+ build_string ("HANZIKU-"#n), \
+ build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
+ build_string \
+ ("hanziku-"#n"$"), \
+ Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
+ MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
+ DEF_HANZIKU (1);
+ DEF_HANZIKU (2);
+ DEF_HANZIKU (3);
+ DEF_HANZIKU (4);
+ DEF_HANZIKU (5);
+ DEF_HANZIKU (6);
+ DEF_HANZIKU (7);
+ DEF_HANZIKU (8);
+ DEF_HANZIKU (9);
+ DEF_HANZIKU (10);
+ DEF_HANZIKU (11);
+ DEF_HANZIKU (12);
+ staticpro (&Vcharset_ideograph_cbeta);
+ Vcharset_ideograph_cbeta =
+ make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
+ 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
+ build_string ("CB"),
+ build_string ("CBETA"),
+ build_string ("CBETA private characters"),
+ build_string ("cbeta-0"),
+ Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA,
+ MIN_CHAR_CBETA, 0, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_ideograph_gt);
+ Vcharset_ideograph_gt =
+ make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
+ 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
+ build_string ("GT"),
+ build_string ("GT"),
+ build_string ("GT"),
+ build_string (""),
+ Qnil, MIN_CHAR_GT, MAX_CHAR_GT,
+ MIN_CHAR_GT, 0, Qnil, CONVERSION_IDENTICAL);
+#define DEF_GT_PJ(n) \
+ staticpro (&Vcharset_ideograph_gt_pj_##n); \
+ Vcharset_ideograph_gt_pj_##n = \
+ make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
+ 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
+ build_string ("GT-PJ-"#n), \
+ build_string ("GT (pseudo JIS encoding) part "#n), \
+ build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
+ build_string \
+ ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
+ Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
+ DEF_GT_PJ (1);
+ DEF_GT_PJ (2);
+ DEF_GT_PJ (3);
+ DEF_GT_PJ (4);
+ DEF_GT_PJ (5);
+ DEF_GT_PJ (6);
+ DEF_GT_PJ (7);
+ DEF_GT_PJ (8);
+ DEF_GT_PJ (9);
+ DEF_GT_PJ (10);
+ DEF_GT_PJ (11);
+
+ staticpro (&Vcharset_ideograph_daikanwa_2);
+ Vcharset_ideograph_daikanwa_2 =
+ make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
+ 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
+ build_string ("Daikanwa Rev."),
+ build_string ("Morohashi's Daikanwa Rev."),
+ build_string
+ ("Daikanwa dictionary (revised version)"),
+ build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
+ Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_ideograph_daikanwa);
+ Vcharset_ideograph_daikanwa =
+ make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
+ 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
+ build_string ("Daikanwa"),
+ build_string ("Morohashi's Daikanwa Rev.2"),
+ build_string
+ ("Daikanwa dictionary (second revised version)"),
+ build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
+ Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
+ MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
+
+ staticpro (&Vcharset_ethiopic_ucs);
+ Vcharset_ethiopic_ucs =
+ make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
+ 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
+ build_string ("Ethiopic (UCS)"),
+ build_string ("Ethiopic (UCS)"),
+ build_string ("Ethiopic of UCS"),
+ build_string ("Ethiopic-Unicode"),
+ Qnil, 0x1200, 0x137F, 0, 0,
+ Qnil, CONVERSION_IDENTICAL);
#endif
+ staticpro (&Vcharset_chinese_big5_1);
Vcharset_chinese_big5_1 =
- make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
- CHARSET_TYPE_94X94, 2, 0, '0',
- CHARSET_LEFT_TO_RIGHT,
+ make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
+ 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
build_string ("Big5"),
build_string ("Big5 (Level-1)"),
build_string
("Big5 Level-1 Chinese traditional"),
- build_string ("big5"));
+ build_string ("big5"),
+ Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
+ staticpro (&Vcharset_chinese_big5_2);
Vcharset_chinese_big5_2 =
- make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
- CHARSET_TYPE_94X94, 2, 0, '1',
- CHARSET_LEFT_TO_RIGHT,
+ make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
+ 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
build_string ("Big5"),
build_string ("Big5 (Level-2)"),
build_string
("Big5 Level-2 Chinese traditional"),
- build_string ("big5"));
-
-#ifdef UTF2000
-#define GENERATE_94_SET(name) \
- { \
- size_t i; \
- ucs_to_##name = make_byte_from_character_table(); \
- for (i = 0; i < 94; i++) \
- { \
- Emchar c = name##_to_ucs[i]; \
- if (c <= 0xffff) \
- put_byte_from_character_table (c, \
- i + 33, ucs_to_##name); \
- } \
- }
-#define GENERATE_96_SET(name) \
- { \
- size_t i; \
- ucs_to_##name = make_byte_from_character_table(); \
- for (i = 0; i < 96; i++) \
- { \
- Emchar c = name##_to_ucs[i]; \
- if (c <= 0xffff) \
- put_byte_from_character_table (c, \
- i + 32, ucs_to_##name); \
- } \
- }
-
- GENERATE_94_SET (latin_jisx0201);
-
- GENERATE_96_SET (latin_iso8859_2);
- GENERATE_96_SET (latin_iso8859_3);
- GENERATE_96_SET (latin_iso8859_4);
- GENERATE_96_SET (latin_iso8859_9);
- GENERATE_96_SET (latin_viscii_lower);
- GENERATE_96_SET (latin_viscii_upper);
-#endif
+ build_string ("big5"),
+ Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
#ifdef ENABLE_COMPOSITE_CHARS
/* #### For simplicity, we put composite chars into a 96x96 charset.
This is going to lead to problems because you can run out of
room, esp. as we don't yet recycle numbers. */
+ staticpro (&Vcharset_composite);
Vcharset_composite =
- make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
- CHARSET_TYPE_96X96, 2, 0, 0,
- CHARSET_LEFT_TO_RIGHT,
+ make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
+ 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
build_string ("Composite"),
build_string ("Composite characters"),
build_string ("Composite characters"),
build_string (""));
+ /* #### not dumped properly */
composite_char_row_next = 32;
composite_char_col_next = 32;