+1999-10-10 MORIOKA Tomohiko <tomo@urania.m17n.org>
+
+ * mule/mule-charset.el (default-coded-charset-priority-list): Add
+ `latin-viscii'; prefer it for characters used in Vietnamese.
+
+ * utf-2000/ccs-viscii.el: Add mapping-table for `latin-viscii'.
+
+1999-10-08 Daiki Ueno <ueno@ueda.info.waseda.ac.jp>
+
+ * bytecomp.el (byte-compile-insert-header): Fix regexp.
+
+1999-10-07 MORIOKA Tomohiko <tomo@etl.go.jp>
+
+ * utf-2000/ccs-viscii.el: Rename `vietnamese-viscii-*' to
+ `latin-viscii-*'.
+
+1999-10-07 MORIOKA Tomohiko <tomo@etl.go.jp>
+
+ * mule/viet-chars.el (latin-viscii-lower): Renamed from charset
+ `vietnamese-viscii-lower'.
+ (latin-viscii-upper): Renamed from charset
+ `vietnamese-viscii-upper'.
+ (vietnamese-viscii-lower): New alias for charset
+ `latin-viscii-lower'.
+ (vietnamese-viscii-upper): New alias for charset
+ `latin-viscii-upper'.
+
1999-10-05 MORIOKA Tomohiko <tomo@etl.go.jp>
* mule/mule-charset.el (default-coded-charset-priority-list):
(and (eq (point) (point-max))
(not
(re-search-backward
- "\\u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]" nil t)))))
+ "\\\\u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]" nil t)))))
(setq buffer-file-coding-system 'raw-text)
(cond ((featurep 'utf-2000)
(insert "(require 'mule)\n;;;###coding system: utf-8\n")
;; katakana-jisx0208
japanese-jisx0208
hebrew-iso8859-8
+ latin-viscii
vietnamese-viscii-lower
vietnamese-viscii-upper)))
;; Vietnamese VISCII with two tables.
(unless (featurep 'utf-2000)
- (make-charset 'vietnamese-viscii-lower "VISCII lower (Vietnamese)"
+ (make-charset 'latin-viscii-lower "VISCII lower (Vietnamese)"
'(registry "VISCII1.1"
dimension 1
chars 96
graphic 1
))
- (make-charset 'vietnamese-viscii-upper "VISCII upper (Vietnamese)"
+ (make-charset 'latin-viscii-upper "VISCII upper (Vietnamese)"
'(registry "VISCII1.1"
dimension 1
chars 96
))
)
+(define-charset-alias 'vietnamese-viscii-lower 'latin-viscii-lower)
+(define-charset-alias 'vietnamese-viscii-upper 'latin-viscii-upper)
+
(modify-syntax-entry 'vietnamese-viscii-lower "w")
(modify-syntax-entry 'vietnamese-viscii-upper "w")
;;; Code:
(set-charset-mapping-table
- 'vietnamese-viscii-lower
+ 'latin-viscii
+ [?\u0000 ; 0x00
+ ?\u0001 ; 0x01
+ ?\u1EB2 ; 0x02 (\e.2\8eÆ)
+ ?\u0003 ; 0x03
+ ?\u0004 ; 0x04
+ ?\u1EB4 ; 0x05 (\8eÇ)
+ ?\u1EAA ; 0x06 (\8eç)
+ ?\u0007 ; 0x07
+ ?\u0008 ; 0x08
+ ?\u0009 ; 0x09
+ ?\u000A ; 0x0A
+ ?\u000B ; 0x0B
+ ?\u000C ; 0x0C
+ ?\u000D ; 0x0D
+ ?\u000E ; 0x0E
+ ?\u000F ; 0x0F
+ ?\u0010 ; 0x10
+ ?\u0011 ; 0x11
+ ?\u0012 ; 0x12
+ ?\u0013 ; 0x13
+ ?\u1EF6 ; 0x14 (\8eÖ)
+ ?\u0015 ; 0x15
+ ?\u0016 ; 0x16
+ ?\u0017 ; 0x17
+ ?\u0018 ; 0x18
+ ?\u1EF8 ; 0x19 (\8eÛ)
+ ?\u001A ; 0x1A
+ ?\u001B ; 0x1B
+ ?\u001C ; 0x1C
+ ?\u001D ; 0x1D
+ ?\u1EF4 ; 0x1E (\8eÜ)
+ ?\u001F ; 0x1F
+ ?\u0020 ; 0x20 ( )
+ ?\u0021 ; 0x21 (!)
+ ?\u0022 ; 0x22 (")
+ ?\u0023 ; 0x23 (#)
+ ?\u0024 ; 0x24 ($)
+ ?\u0025 ; 0x25 (%)
+ ?\u0026 ; 0x26 (&)
+ ?\u0027 ; 0x27 (')
+ ?\u0028 ; 0x28 (()
+ ?\u0029 ; 0x29 ())
+ ?\u002A ; 0x2A (*)
+ ?\u002B ; 0x2B (+)
+ ?\u002C ; 0x2C (,)
+ ?\u002D ; 0x2D (-)
+ ?\u002E ; 0x2E (.)
+ ?\u002F ; 0x2F (/)
+ ?\u0030 ; 0x30 (0)
+ ?\u0031 ; 0x31 (1)
+ ?\u0032 ; 0x32 (2)
+ ?\u0033 ; 0x33 (3)
+ ?\u0034 ; 0x34 (4)
+ ?\u0035 ; 0x35 (5)
+ ?\u0036 ; 0x36 (6)
+ ?\u0037 ; 0x37 (7)
+ ?\u0038 ; 0x38 (8)
+ ?\u0039 ; 0x39 (9)
+ ?\u003A ; 0x3A (:)
+ ?\u003B ; 0x3B (;)
+ ?\u003C ; 0x3C (<)
+ ?\u003D ; 0x3D (=)
+ ?\u003E ; 0x3E (>)
+ ?\u003F ; 0x3F (?)
+ ?\u0040 ; 0x40 (@)
+ ?\u0041 ; 0x41 (A)
+ ?\u0042 ; 0x42 (B)
+ ?\u0043 ; 0x43 (C)
+ ?\u0044 ; 0x44 (D)
+ ?\u0045 ; 0x45 (E)
+ ?\u0046 ; 0x46 (F)
+ ?\u0047 ; 0x47 (G)
+ ?\u0048 ; 0x48 (H)
+ ?\u0049 ; 0x49 (I)
+ ?\u004A ; 0x4A (J)
+ ?\u004B ; 0x4B (K)
+ ?\u004C ; 0x4C (L)
+ ?\u004D ; 0x4D (M)
+ ?\u004E ; 0x4E (N)
+ ?\u004F ; 0x4F (O)
+ ?\u0050 ; 0x50 (P)
+ ?\u0051 ; 0x51 (Q)
+ ?\u0052 ; 0x52 (R)
+ ?\u0053 ; 0x53 (S)
+ ?\u0054 ; 0x54 (T)
+ ?\u0055 ; 0x55 (U)
+ ?\u0056 ; 0x56 (V)
+ ?\u0057 ; 0x57 (W)
+ ?\u0058 ; 0x58 (X)
+ ?\u0059 ; 0x59 (Y)
+ ?\u005A ; 0x5A (Z)
+ ?\u005B ; 0x5B ([)
+ ?\u005C ; 0x5C (\)
+ ?\u005D ; 0x5D (])
+ ?\u005E ; 0x5E (^)
+ ?\u005F ; 0x5F (_)
+ ?\u0060 ; 0x60 (`)
+ ?\u0061 ; 0x61 (a)
+ ?\u0062 ; 0x62 (b)
+ ?\u0063 ; 0x63 (c)
+ ?\u0064 ; 0x64 (d)
+ ?\u0065 ; 0x65 (e)
+ ?\u0066 ; 0x66 (f)
+ ?\u0067 ; 0x67 (g)
+ ?\u0068 ; 0x68 (h)
+ ?\u0069 ; 0x69 (i)
+ ?\u006A ; 0x6A (j)
+ ?\u006B ; 0x6B (k)
+ ?\u006C ; 0x6C (l)
+ ?\u006D ; 0x6D (m)
+ ?\u006E ; 0x6E (n)
+ ?\u006F ; 0x6F (o)
+ ?\u0070 ; 0x70 (p)
+ ?\u0071 ; 0x71 (q)
+ ?\u0072 ; 0x72 (r)
+ ?\u0073 ; 0x73 (s)
+ ?\u0074 ; 0x74 (t)
+ ?\u0075 ; 0x75 (u)
+ ?\u0076 ; 0x76 (v)
+ ?\u0077 ; 0x77 (w)
+ ?\u0078 ; 0x78 (x)
+ ?\u0079 ; 0x79 (y)
+ ?\u007A ; 0x7A (z)
+ ?\u007B ; 0x7B ({)
+ ?\u007C ; 0x7C (|)
+ ?\u007D ; 0x7D (})
+ ?\u007E ; 0x7E (~)
+ ?\u007F ; 0x7F
+ ?\u1EA0 ; 0x80 (\8eÕ)
+ ?\u1EAE ; 0x81 (\8e¡)
+ ?\u1EB0 ; 0x82 (\8e¢)
+ ?\u1EB6 ; 0x83 (\8e£)
+ ?\u1EA4 ; 0x84 (\8e¤)
+ ?\u1EA6 ; 0x85 (\8e¥)
+ ?\u1EA8 ; 0x86 (\8e¦)
+ ?\u1EAC ; 0x87 (\8e§)
+ ?\u1EBC ; 0x88 (\8e¨)
+ ?\u1EB8 ; 0x89 (\8e©)
+ ?\u1EBE ; 0x8A (\8eª)
+ ?\u1EC0 ; 0x8B (\8e«)
+ ?\u1EC2 ; 0x8C (\8e¬)
+ ?\u1EC4 ; 0x8D (\8e)
+ ?\u1EC6 ; 0x8E (\8e®)
+ ?\u1ED0 ; 0x8F (\8e¯)
+ ?\u1ED2 ; 0x90 (\8e°)
+ ?\u1ED4 ; 0x91 (\8e±)
+ ?\u1ED6 ; 0x92 (\8e²)
+ ?\u1ED8 ; 0x93 (\8eµ)
+ ?\u1EE2 ; 0x94 (\8eþ)
+ ?\u1EDA ; 0x95 (\8e¾)
+ ?\u1EDC ; 0x96 (\8e¶)
+ ?\u1EDE ; 0x97 (\8e·)
+ ?\u1ECA ; 0x98 (\8e¸)
+ ?\u1ECE ; 0x99 (\8eö)
+ ?\u1ECC ; 0x9A (\8e÷)
+ ?\u1EC8 ; 0x9B (\8eï)
+ ?\u1EE6 ; 0x9C (\8eü)
+ ?\u0168 ; 0x9D (\8eû)
+ ?\u1EE4 ; 0x9E (\8eø)
+ ?\u1EF2 ; 0x9F (\8eÏ)
+ ?\u00D5 ; 0xA0 (\8eõ)
+ ?\u1EAF ; 0xA1 (\e.1\8e¡)
+ ?\u1EB1 ; 0xA2 (\8e¢)
+ ?\u1EB7 ; 0xA3 (\8e£)
+ ?\u1EA5 ; 0xA4 (\8e¤)
+ ?\u1EA7 ; 0xA5 (\8e¥)
+ ?\u1EA9 ; 0xA6 (\8e¦)
+ ?\u1EAD ; 0xA7 (\8e§)
+ ?\u1EBD ; 0xA8 (\8e¨)
+ ?\u1EB9 ; 0xA9 (\8e©)
+ ?\u1EBF ; 0xAA (\8eª)
+ ?\u1EC1 ; 0xAB (\8e«)
+ ?\u1EC3 ; 0xAC (\8e¬)
+ ?\u1EC5 ; 0xAD (\8e)
+ ?\u1EC7 ; 0xAE (\8e®)
+ ?\u1ED1 ; 0xAF (\8e¯)
+ ?\u1ED3 ; 0xB0 (\8e°)
+ ?\u1ED5 ; 0xB1 (\8e±)
+ ?\u1ED7 ; 0xB2 (\8e²)
+ ?\u1EE0 ; 0xB3 (\e.2\8eÞ)
+ ?\u01A0 ; 0xB4 (\8e½)
+ ?\u1ED9 ; 0xB5 (\e.1\8eµ)
+ ?\u1EDD ; 0xB6 (\8e¶)
+ ?\u1EDF ; 0xB7 (\8e·)
+ ?\u1ECB ; 0xB8 (\8e¸)
+ ?\u1EF0 ; 0xB9 (\e.2\8eñ)
+ ?\u1EE8 ; 0xBA (\8eÑ)
+ ?\u1EEA ; 0xBB (\8e×)
+ ?\u1EEC ; 0xBC (\8eØ)
+ ?\u01A1 ; 0xBD (\e.1\8e½)
+ ?\u1EDB ; 0xBE (\8e¾)
+ ?\u01AF ; 0xBF (\e.2\8eß)
+ ?\u00C0 ; 0xC0 (\8eà)
+ ?\u00C1 ; 0xC1 (\8eá)
+ ?\u00C2 ; 0xC2 (\8eâ)
+ ?\u00C3 ; 0xC3 (\8eã)
+ ?\u1EA2 ; 0xC4 (\8eä)
+ ?\u0102 ; 0xC5 (\8eå)
+ ?\u1EB3 ; 0xC6 (\e.1\8eÆ)
+ ?\u1EB5 ; 0xC7 (\8eÇ)
+ ?\u00C8 ; 0xC8 (\e.2\8eè)
+ ?\u00C9 ; 0xC9 (\8eé)
+ ?\u00CA ; 0xCA (\8eê)
+ ?\u1EBA ; 0xCB (\8eë)
+ ?\u00CC ; 0xCC (\8eì)
+ ?\u00CD ; 0xCD (\8eí)
+ ?\u0128 ; 0xCE (\8eî)
+ ?\u1EF3 ; 0xCF (\e.1\8eÏ)
+ ?\u0110 ; 0xD0 (\e.2\8eð)
+ ?\u1EE9 ; 0xD1 (\e.1\8eÑ)
+ ?\u00D2 ; 0xD2 (\e.2\8eò)
+ ?\u00D3 ; 0xD3 (\8eó)
+ ?\u00D4 ; 0xD4 (\8eô)
+ ?\u1EA1 ; 0xD5 (\e.1\8eÕ)
+ ?\u1EF7 ; 0xD6 (\8eÖ)
+ ?\u1EEB ; 0xD7 (\8e×)
+ ?\u1EED ; 0xD8 (\8eØ)
+ ?\u00D9 ; 0xD9 (\e.2\8eù)
+ ?\u00DA ; 0xDA (\8eú)
+ ?\u1EF9 ; 0xDB (\e.1\8eÛ)
+ ?\u1EF5 ; 0xDC (\8eÜ)
+ ?\u00DD ; 0xDD (\e.2\8eý)
+ ?\u1EE1 ; 0xDE (\e.1\8eÞ)
+ ?\u01B0 ; 0xDF (\8eß)
+ ?\u00E0 ; 0xE0 (\8eà)
+ ?\u00E1 ; 0xE1 (\8eá)
+ ?\u00E2 ; 0xE2 (\8eâ)
+ ?\u00E3 ; 0xE3 (\8eã)
+ ?\u1EA3 ; 0xE4 (\8eä)
+ ?\u0103 ; 0xE5 (\8eå)
+ ?\u1EEF ; 0xE6 (\8eæ)
+ ?\u1EAB ; 0xE7 (\8eç)
+ ?\u00E8 ; 0xE8 (\8eè)
+ ?\u00E9 ; 0xE9 (\8eé)
+ ?\u00EA ; 0xEA (\8eê)
+ ?\u1EBB ; 0xEB (\8eë)
+ ?\u00EC ; 0xEC (\8eì)
+ ?\u00ED ; 0xED (\8eí)
+ ?\u0129 ; 0xEE (\8eî)
+ ?\u1EC9 ; 0xEF (\8eï)
+ ?\u0111 ; 0xF0 (\8eð)
+ ?\u1EF1 ; 0xF1 (\8eñ)
+ ?\u00F2 ; 0xF2 (\8eò)
+ ?\u00F3 ; 0xF3 (\8eó)
+ ?\u00F4 ; 0xF4 (\8eô)
+ ?\u00F5 ; 0xF5 (\8eõ)
+ ?\u1ECF ; 0xF6 (\8eö)
+ ?\u1ECD ; 0xF7 (\8e÷)
+ ?\u1EE5 ; 0xF8 (\8eø)
+ ?\u00F9 ; 0xF9 (\8eù)
+ ?\u00FA ; 0xFA (\8eú)
+ ?\u0169 ; 0xFB (\8eû)
+ ?\u1EE7 ; 0xFC (\8eü)
+ ?\u00FD ; 0xFD (\8eý)
+ ?\u1EE3 ; 0xFE (\8eþ)
+ ?\u1EEE ; 0xFF (\e.2\8eæ)
+ ])
+
+(set-charset-mapping-table
+ 'latin-viscii-lower
[nil ; 0x20
?\u1eaf ; 0x21
?\u1eb1 ; 0x22
])
(set-charset-mapping-table
- 'vietnamese-viscii-upper
+ 'latin-viscii-upper
[nil ; 0x20
?\u1eae ; 0x21
?\u1eb0 ; 0x22
+1999-10-12 MORIOKA Tomohiko <tomo@etl.go.jp>
+
+ * mule-charset.c (vars_of_mule_charset): Update `utf-2000-version'
+ to 0.9.
+
+1999-10-11 MORIOKA Tomohiko <tomo@urania.m17n.org>
+
+ * regex.c (compile_extended_range): Use `CHAR_CHARSET_ID' instead
+ of `CHAR_LEADING_BYTE' in UTF-2000.
+
+ * insdel.c (find_charsets_in_bufbyte_string): Use
+ `CHAR_CHARSET_ID' instead of `CHAR_LEADING_BYTE' in UTF-2000.
+ (find_charsets_in_emchar_string): Likewise.
+
+ * chartab.h (CHAR_TABLE_NON_ASCII_VALUE_UNSAFE): Use
+ `CHAR_CHARSET_ID' instead of `CHAR_LEADING_BYTE' in UTF-2000.
+
+ * char-ucs.h (CHAR_LEADING_BYTE): Deleted.
+ (CHAR_CHARSET_ID): New macro.
+
+1999-10-11 MORIOKA Tomohiko <tomo@urania.m17n.org>
+
+ * chartab.c (get_char_table): Don't use type `Charset_ID' for
+ charset-id - MIN_LEADING_BYTE.
+ (put_char_table): Likewise.
+
+1999-10-11 MORIOKA Tomohiko <tomo@urania.m17n.org>
+
+ * char-ucs.h (MIN_LEADING_BYTE): Changed to `-0x40'.
+ (NUM_LEADING_BYTES): Changed to (80 * 3 - MIN_LEADING_BYTE).
+ (CHARSET_LEADING_BYTE): Don't cast by `Bufbyte'.
+ (CHARSET_ID_OFFSET): New macro.
+ (LEADING_BYTE_CONTROL_1): Changed to (CHARSET_ID_OFFSET - 1).
+ (LEADING_BYTE_UCS_BMP): Changed to (CHARSET_ID_OFFSET - 2).
+ (LEADING_BYTE_LATIN_VISCII): Changed to (CHARSET_ID_OFFSET - 3).
+ (LEADING_BYTE_HIRAGANA_JISX0208): Changed to (CHARSET_ID_OFFSET -
+ 4).
+ (LEADING_BYTE_KATAKANA_JISX0208): Changed to (CHARSET_ID_OFFSET -
+ 5).
+ (MIN_LEADING_BYTE_PRIVATE): Changed to `MIN_LEADING_BYTE'.
+ (MAX_LEADING_BYTE_PRIVATE): Changed to (CHARSET_ID_OFFSET - 6).
+ (CHARSET_ID_OFFSET_94): Changed to (CHARSET_ID_OFFSET - '0').
+ (CHARSET_ID_OFFSET_96): Changed to (CHARSET_ID_OFFSET_94 + 80).
+ (CHARSET_ID_OFFSET_94x94): Changed to (CHARSET_ID_OFFSET_96 + 80).
+
+1999-10-11 MORIOKA Tomohiko <tomo@urania.m17n.org>
+
+ * mule-charset.c (next_allocated_leading_byte): New variable in
+ UTF-2000.
+ (next_allocated_1_byte_leading_byte): Don't define in UTF-2000.
+ (next_allocated_2_byte_leading_byte): Don't define in UTF-2000.
+ (get_unallocated_leading_byte): Simply use
+ `next_allocated_leading_byte' [ignore dimension] in UTF-2000.
+ (vars_of_mule_charset): Setup `next_allocated_leading_byte' in
+ UTF-2000.
+
+ * char-ucs.h (MIN_LEADING_BYTE_PRIVATE): New macro.
+ (MAX_LEADING_BYTE_PRIVATE): New macro.
+ (MIN_LEADING_BYTE_OFFICIAL_2): Deleted.
+ (MAX_LEADING_BYTE_OFFICIAL_2): Deleted.
+
+1999-10-11 MORIOKA Tomohiko <tomo@urania.m17n.org>
+
+ * mule-charset.c (Fmake_charset): Allocate final-byte based
+ charset-id for 94-set, 96-set and 94x94-set.
+
+1999-10-11 MORIOKA Tomohiko <tomo@etl.go.jp>
+
+ * mule-charset.c (char_byte_table_equal): Fill braces to avoid
+ ambiguous `else'.
+ (Fmake_charset): Likewise.
+ (complex_vars_of_mule_charset): Modify the font registry of
+ `ucs-bmp' not to match `Ethiopic-Unicode'.
+
+1999-10-10 MORIOKA Tomohiko <tomo@urania.m17n.org>
+
+ * mule-charset.c (complex_vars_of_mule_charset): Add font
+ registory of `ucs-bmp'.
+
+1999-10-10 MORIOKA Tomohiko <tomo@urania.m17n.org>
+
+ * text-coding.c (char_encode_iso2022): Ignore non-ISO-2022
+ coded-charsets in `default-coded-charset-priority-list' when
+ breaking up a character.
+
+ * mule-charset.c (Vcharset_latin_viscii): New variable.
+ (Qlatin_viscii): New variable.
+ (make_charset): Don't use `decoding_table'.
+ (Fmake_charset): Regard graphic = 2 as 256^n-set; setup
+ byte_offset.
+ (Fset_charset_mapping_table): New implementation.
+ (syms_of_mule_charset): Add new symbol `latin-viscii'.
+ (complex_vars_of_mule_charset): Set `graphic' attribute of charset
+ `ucs-bmp' and `latin_viscii' to 2; change font registry of charset
+ `latin-viscii-lower' to "MULEVISCII-LOWER"; change font registry
+ of charset `latin-viscii-upper' to "MULEVISCII-UPPER"; add new
+ charset `latin_viscii'.
+
+ * char-ucs.h (LEADING_BYTE_LATIN_VISCII): New macro.
+ (CHARSET_TYPE_94X94): Change to 1 from 2.
+ (CHARSET_TYPE_96): Change to 2 from 1.
+ (CHARSET_TYPE_128): New macro.
+ (CHARSET_TYPE_128X128): Change to 5 from 4.
+ (CHARSET_TYPE_256): New macro.
+ (CHARSET_TYPE_256X256): Change to 7 from 5.
+ (MAKE_CHAR): Use `XCHARSET_BYTE_OFFSET(charset)'.
+
+1999-10-10 MORIOKA Tomohiko <tomo@urania.m17n.org>
+
+ * text-coding.c (char_encode_shift_jis): Refer
+ `XCHARSET_ENCODING_TABLE(Vcharset_latin_jisx0201)' instead of
+ `XCHARSET_TO_BYTE1_TABLE(Vcharset_latin_jisx0201)'.
+
+ * mule-charset.c (mark_char_byte_table): New function in UTF-2000.
+ (char_byte_table_equal): New function in UTF-2000.
+ (char_byte_table_hash): New function in UTF-2000.
+ (char_byte_table_description): New constant in UTF-2000.
+ (char_byte_table): New type in UTF-2000.
+ (make_char_byte_table): New function in UTF-2000.
+ (copy_char_byte_table): New function in UTF-2000.
+ (make_char_code_table): New macro in UTF-2000.
+ (get_char_code_table): New function in UTF-2000.
+ (put_char_code_table): New function in UTF-2000.
+ (mark_charset): Mark `cs->encoding_table' in UTF-2000.
+ (charset_description): Add setting in UTF-2000.
+ (make_charset): Setup `CHARSET_ENCODING_TABLE(cs)' instead of
+ `CHARSET_TO_BYTE1_TABLE(cs)'.
+ (charset_get_byte1): Refer `XCHARSET_ENCODING_TABLE(charset)'
+ instead of `XCHARSET_TO_BYTE1_TABLE(charset)'.
+ (charset_get_byte2): Refer `XCHARSET_ENCODING_TABLE(charset)'
+ instead of `XCHARSET_TO_BYTE2_TABLE(charset)'.
+ (Fset_charset_mapping_table): Setup `CHARSET_ENCODING_TABLE(cs)'
+ instead of `CHARSET_TO_BYTE1_TABLE(cs)' and
+ `CHARSET_TO_BYTE2_TABLE(cs)'.
+
+ * char-ucs.h (char_byte_table): New type.
+ (XCHAR_BYTE_TABLE): New macro.
+ (XSETCHAR_BYTE_TABLE): New macro.
+ (CHAR_BYTE_TABLE_P): New macro.
+ (GC_CHAR_BYTE_TABLE_P): New macro.
+ (struct Lisp_Char_Byte_Table): New structure.
+ (get_char_code_table): New interface.
+ (Emchar_to_byte_table): Deleted.
+ (get_byte_from_character_table): Deleted.
+ (struct Lisp_Charset): Add `encoding_table'; delete
+ `to_byte1_table' and `to_byte2_table'.
+ (CHARSET_ENCODING_TABLE): New macro.
+ (CHARSET_TO_BYTE1_TABLE): Deleted.
+ (CHARSET_TO_BYTE2_TABLE): Deleted.
+ (XCHARSET_ENCODING_TABLE): New macro.
+ (XCHARSET_TO_BYTE1_TABLE): Deleted.
+ (XCHARSET_TO_BYTE2_TABLE): Deleted.
+
+1999-10-07 MORIOKA Tomohiko <tomo@etl.go.jp>
+
+ * mule-charset.c (syms_of_mule_charset): Delete charset alias
+ `vietnamese-viscii-*'.
+
1999-10-07 MORIOKA Tomohiko <tomo@etl.go.jp>
* mule-charset.c (Qvietnamese_viscii_lower): New variable.
#define CHAR_ASCII_P(ch) ((ch) <= 0x7F)
\f
-int
-get_byte_from_character_table (Emchar ch, Lisp_Object ccs);
+struct Lisp_Char_Byte_Table
+{
+ struct lcrecord_header header;
+
+ Lisp_Object property[256];
+};
+typedef struct Lisp_Char_Byte_Table Lisp_Char_Byte_Table;
+
+DECLARE_LRECORD (char_byte_table, Lisp_Char_Byte_Table);
+#define XCHAR_BYTE_TABLE(x) \
+ XRECORD (x, char_byte_table, struct Lisp_Char_Byte_Table)
+#define XSETCHAR_BYTE_TABLE(x, p) XSETRECORD (x, p, char_byte_table)
+#define CHAR_BYTE_TABLE_P(x) RECORDP (x, char_byte_table)
+/* #define CHECK_CHAR_BYTE_TABLE(x) CHECK_RECORD (x, char_byte_table)
+ char table entries should never escape to Lisp */
+
+Lisp_Object get_char_code_table (Emchar ch, Lisp_Object table);
+
extern Lisp_Object Vcharset_ucs_bmp;
extern Lisp_Object Vcharset_latin_jisx0201;
typedef int Charset_ID;
-#define MIN_LEADING_BYTE 0x80
+#define MIN_LEADING_BYTE -0x40
+#define CHARSET_ID_OFFSET 0x00
+
+/* represent normal 80-9F */
+#define LEADING_BYTE_CONTROL_1 (CHARSET_ID_OFFSET - 1)
-#define LEADING_BYTE_UCS_BMP 0x80
-#define LEADING_BYTE_CONTROL_1 0x81 /* represent normal 80-9F */
-#define LEADING_BYTE_HIRAGANA_JISX0208 0x82
-#define LEADING_BYTE_KATAKANA_JISX0208 0x83
+/* ISO/IEC 10646 BMP */
+#define LEADING_BYTE_UCS_BMP (CHARSET_ID_OFFSET - 2)
+/* VISCII 1.1 */
+#define LEADING_BYTE_LATIN_VISCII (CHARSET_ID_OFFSET - 3)
-#define CHARSET_ID_OFFSET_94 0x55
+#define LEADING_BYTE_HIRAGANA_JISX0208 (CHARSET_ID_OFFSET - 4)
+#define LEADING_BYTE_KATAKANA_JISX0208 (CHARSET_ID_OFFSET - 5)
+
+#define MIN_LEADING_BYTE_PRIVATE MIN_LEADING_BYTE
+#define MAX_LEADING_BYTE_PRIVATE (CHARSET_ID_OFFSET - 6)
+
+
+#define CHARSET_ID_OFFSET_94 (CHARSET_ID_OFFSET - '0')
#define MIN_CHARSET_ID_PRIVATE_94 (CHARSET_ID_OFFSET_94 + '0')
#define MAX_CHARSET_ID_PRIVATE_94 (CHARSET_ID_OFFSET_94 + '?')
#define LEADING_BYTE_LATIN_JISX0201 (CHARSET_ID_OFFSET_94 + 'J')
-#define CHARSET_ID_OFFSET_96 0x70
+#define CHARSET_ID_OFFSET_96 (CHARSET_ID_OFFSET_94 + 80)
#define LEADING_BYTE_LATIN_VISCII_LOWER (CHARSET_ID_OFFSET_96 + '1')
#define LEADING_BYTE_LATIN_VISCII_UPPER (CHARSET_ID_OFFSET_96 + '2')
#define LEADING_BYTE_THAI_TIS620 (CHARSET_ID_OFFSET_96 + 'T')
-#define MIN_LEADING_BYTE_PRIVATE_1 0xD0
-#define MAX_LEADING_BYTE_PRIVATE_1 0xDF
-
-
-#define CHARSET_ID_OFFSET_94x94 0xB0
+#define CHARSET_ID_OFFSET_94x94 (CHARSET_ID_OFFSET_96 + 80)
/* Big5 Level 1 */
#define LEADING_BYTE_CHINESE_BIG5_1 ('0' + CHARSET_ID_OFFSET_94x94)
/* DPRK Hangul KPS 9566-1997 */
#define LEADING_BYTE_KOREAN_KPS9566 ('N' + CHARSET_ID_OFFSET_94x94)
-#define MIN_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_JAPANESE_JISX0208_1978
-#define MAX_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_KOREAN_KPS9566
-#define NUM_LEADING_BYTES 256
+#define NUM_LEADING_BYTES (80 * 3 - MIN_LEADING_BYTE)
\f
/************************************************************************/
/* Byte->character mapping table */
Lisp_Object decoding_table;
+ /* Character->byte mapping table */
+ Lisp_Object encoding_table;
+
/* Range of character code */
Emchar ucs_min, ucs_max;
#define CHECK_CHARSET(x) CHECK_RECORD (x, charset)
#define CONCHECK_CHARSET(x) CONCHECK_RECORD (x, charset)
-#define CHARSET_TYPE_94 0 /* This charset includes 94 characters. */
-#define CHARSET_TYPE_96 1 /* This charset includes 96 characters. */
-#define CHARSET_TYPE_94X94 2 /* This charset includes 94x94 characters. */
-#define CHARSET_TYPE_96X96 3 /* This charset includes 96x96 characters. */
-#define CHARSET_TYPE_128X128 4 /* This charset includes 128x128 characters. */
-#define CHARSET_TYPE_256X256 5 /* This charset includes 256x256 characters. */
+#define CHARSET_TYPE_94 0 /* This charset includes 94 characters. */
+#define CHARSET_TYPE_94X94 1 /* This charset includes 94x94 characters. */
+#define CHARSET_TYPE_96 2 /* This charset includes 96 characters. */
+#define CHARSET_TYPE_96X96 3 /* This charset includes 96x96 characters. */
+#define CHARSET_TYPE_128 4 /* This charset includes 128 characters. */
+#define CHARSET_TYPE_128X128 5 /* This charset includes 128x128 characters. */
+#define CHARSET_TYPE_256 6 /* This charset includes 256 characters. */
+#define CHARSET_TYPE_256X256 7 /* This charset includes 256x256 characters. */
#define CHARSET_LEFT_TO_RIGHT 0
#define CHARSET_RIGHT_TO_LEFT 1
#define CHARSET_CHARS(cs) ((cs)->chars)
#define CHARSET_REVERSE_DIRECTION_CHARSET(cs) ((cs)->reverse_direction_charset)
#define CHARSET_DECODING_TABLE(cs) ((cs)->decoding_table)
+#define CHARSET_ENCODING_TABLE(cs) ((cs)->encoding_table)
#define CHARSET_UCS_MIN(cs) ((cs)->ucs_min)
#define CHARSET_UCS_MAX(cs) ((cs)->ucs_max)
#define CHARSET_CODE_OFFSET(cs) ((cs)->code_offset)
#define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \
CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs))
#define XCHARSET_DECODING_TABLE(cs) CHARSET_DECODING_TABLE(XCHARSET(cs))
+#define XCHARSET_ENCODING_TABLE(cs) CHARSET_ENCODING_TABLE(XCHARSET(cs))
#define XCHARSET_UCS_MIN(cs) CHARSET_UCS_MIN(XCHARSET(cs))
#define XCHARSET_UCS_MAX(cs) CHARSET_UCS_MAX(XCHARSET(cs))
#define XCHARSET_CODE_OFFSET(cs) CHARSET_CODE_OFFSET(XCHARSET(cs))
/* Table of charsets indexed by type/final-byte. */
Lisp_Object charset_by_attributes[4][128];
- Charset_ID next_allocated_1_byte_leading_byte;
- Charset_ID next_allocated_2_byte_leading_byte;
+ Charset_ID next_allocated_leading_byte;
};
extern struct charset_lookup *chlook;
MAKE_CHAR (Lisp_Object charset, int c1, int c2)
{
Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (charset);
- int ofs, idx;
+ int idx;
Lisp_Object ch;
if (!EQ (decoding_table, Qnil)
- && (0 <= (idx =
- c1 - (ofs = (XCHARSET_CHARS (charset) == 94 ? 33 : 32))))
+ && (0 <= (idx = c1 - XCHARSET_BYTE_OFFSET (charset)))
&& (idx < XVECTOR_LENGTH (decoding_table))
&& !EQ (ch = XVECTOR_DATA(decoding_table)[idx], Qnil))
{
if (VECTORP (ch))
{
- if ((0 <= (idx = c2 - ofs))
+ if ((0 <= (idx = c2 - XCHARSET_BYTE_OFFSET (charset)))
&& (idx < XVECTOR_LENGTH (ch))
&& !EQ (ch = XVECTOR_DATA(ch)[idx], Qnil))
return XCHAR (ch);
return charset;
}
-#define CHAR_LEADING_BYTE(c) (XCHARSET_LEADING_BYTE(CHAR_CHARSET(c)))
-
+#define CHAR_CHARSET_ID(c) (XCHARSET_ID(CHAR_CHARSET(c)))
#define CHAR_COLUMNS(c) (CHARSET_COLUMNS(XCHARSET(CHAR_CHARSET(c))))
\f
val = ct->ascii[byte1 + 128];
else
{
- Charset_ID lb = XCHARSET_LEADING_BYTE (charset) - MIN_LEADING_BYTE;
+ int lb = XCHARSET_LEADING_BYTE (charset) - MIN_LEADING_BYTE;
val = ct->level1[lb];
if (CHAR_TABLE_ENTRYP (val))
{
}
else
{
- Charset_ID lb
- = XCHARSET_LEADING_BYTE (range->charset) - MIN_LEADING_BYTE;
+ int lb = XCHARSET_LEADING_BYTE (range->charset) - MIN_LEADING_BYTE;
ct->level1[lb] = val;
}
break;
case CHARTAB_RANGE_ROW:
{
Lisp_Char_Table_Entry *cte;
- Charset_ID lb
- = XCHARSET_LEADING_BYTE (range->charset) - MIN_LEADING_BYTE;
+ int lb = XCHARSET_LEADING_BYTE (range->charset) - MIN_LEADING_BYTE;
/* make sure that there is a separate entry for the row. */
if (!CHAR_TABLE_ENTRYP (ct->level1[lb]))
ct->level1[lb] = make_char_table_entry (ct->level1[lb]);
else
{
Lisp_Char_Table_Entry *cte;
- Charset_ID lb = XCHARSET_LEADING_BYTE (charset) - MIN_LEADING_BYTE;
+ int lb = XCHARSET_LEADING_BYTE (charset) - MIN_LEADING_BYTE;
/* make sure that there is a separate entry for the row. */
if (!CHAR_TABLE_ENTRYP (ct->level1[lb]))
ct->level1[lb] = make_char_table_entry (ct->level1[lb]);
INLINE_HEADER Lisp_Object
CHAR_TABLE_NON_ASCII_VALUE_UNSAFE (Lisp_Char_Table *ct, Emchar ch)
{
+#ifdef UTF2000
+ Charset_ID lb = CHAR_CHARSET_ID (ch);
+#else
Charset_ID lb = CHAR_LEADING_BYTE (ch);
+#endif
if (!CHAR_TABLE_ENTRYP ((ct)->level1[lb - MIN_LEADING_BYTE]))
return (ct)->level1[lb - MIN_LEADING_BYTE];
else
while (str < strend)
{
+#ifdef UTF2000
+ charsets[CHAR_CHARSET_ID (charptr_emchar (str))
+ - MIN_LEADING_BYTE] = 1;
+#else /* I'm not sure the definition for UTF2000 works with leading-byte
+ representation. */
charsets[CHAR_LEADING_BYTE (charptr_emchar (str))
- MIN_LEADING_BYTE] = 1;
+#endif
INC_CHARPTR (str);
}
#endif
for (i = 0; i < len; i++)
{
+#ifdef UTF2000
+ charsets[CHAR_CHARSET_ID (str[i]) - MIN_LEADING_BYTE] = 1;
+#else /* I'm not sure the definition for UTF2000 works with leading-byte
+ representation. */
charsets[CHAR_LEADING_BYTE (str[i]) - MIN_LEADING_BYTE] = 1;
+#endif
}
#endif
}
lrecord_type_lstream,
lrecord_type_process,
lrecord_type_charset,
+ lrecord_type_char_byte_table,
lrecord_type_coding_system,
lrecord_type_char_table,
lrecord_type_char_table_entry,
Lisp_Object Vcharset_chinese_cns11643_2;
#ifdef UTF2000
Lisp_Object Vcharset_ucs_bmp;
+Lisp_Object Vcharset_latin_viscii;
Lisp_Object Vcharset_latin_viscii_lower;
Lisp_Object Vcharset_latin_viscii_upper;
Lisp_Object Vcharset_hiragana_jisx0208;
#endif
#ifdef UTF2000
-int
-get_byte_from_character_table (Emchar ch, Lisp_Object ccs)
+static Lisp_Object
+mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
{
- Lisp_Charset* cs = XCHARSET(ccs);
- Lisp_Object decoding_table = CHARSET_DECODING_TABLE (cs);
- int byte_offset = CHARSET_BYTE_OFFSET (cs);
+ struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
+ int i;
- if (VECTORP (decoding_table))
+ for (i = 0; i < 256; i++)
{
- int row;
+ mark_object (cte->property[i]);
+ }
+ return Qnil;
+}
- for (row = 0; row < XVECTOR_LENGTH (decoding_table); row++)
- {
- Lisp_Object elt = XVECTOR_DATA(decoding_table)[row];
+static int
+char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
+{
+ struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
+ struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
+ int i;
- if (VECTORP (elt))
- {
- int cell;
+ for (i = 0; i < 256; i++)
+ if (CHAR_BYTE_TABLE_P (cte1->property[i]))
+ {
+ if (CHAR_BYTE_TABLE_P (cte2->property[i]))
+ {
+ if (!char_byte_table_equal (cte1->property[i],
+ cte2->property[i], depth + 1))
+ return 0;
+ }
+ else
+ return 0;
+ }
+ else
+ if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
+ return 0;
+ return 1;
+}
- for (cell = 0; cell < XVECTOR_LENGTH (elt); cell++)
- {
- Lisp_Object obj = XVECTOR_DATA(elt)[cell];
-
- if (CHARP (obj))
- {
- if (XCHAR (obj) == ch)
- return
- ( (row + byte_offset) << 8 )
- | (cell + byte_offset);
- }
- }
+static unsigned long
+char_byte_table_hash (Lisp_Object obj, int depth)
+{
+ struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
+
+ return internal_array_hash (cte->property, 256, depth);
+}
+
+static const struct lrecord_description char_byte_table_description[] = {
+ { XD_LISP_OBJECT_ARRAY, offsetof(Lisp_Char_Byte_Table, property), 256 },
+ { XD_END }
+};
+
+DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_byte_table,
+ mark_char_byte_table,
+ internal_object_printer,
+ 0, char_byte_table_equal,
+ char_byte_table_hash,
+ char_byte_table_description,
+ Lisp_Char_Byte_Table);
+
+
+static Lisp_Object
+make_char_byte_table (Lisp_Object initval)
+{
+ Lisp_Object obj;
+ int i;
+ struct Lisp_Char_Byte_Table *cte =
+ alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
+ &lrecord_char_byte_table);
+
+ for (i = 0; i < 256; i++)
+ cte->property[i] = initval;
+
+ XSETCHAR_BYTE_TABLE (obj, cte);
+ return obj;
+}
+
+static Lisp_Object
+copy_char_byte_table (Lisp_Object entry)
+{
+ struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
+ Lisp_Object obj;
+ int i;
+ struct Lisp_Char_Byte_Table *ctenew =
+ alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
+ &lrecord_char_byte_table);
+
+ for (i = 0; i < 256; i++)
+ {
+ Lisp_Object new = cte->property[i];
+ if (CHAR_BYTE_TABLE_P (new))
+ ctenew->property[i] = copy_char_byte_table (new);
+ else
+ ctenew->property[i] = new;
+ }
+
+ XSETCHAR_BYTE_TABLE (obj, ctenew);
+ return obj;
+}
+
+#define make_char_code_table(initval) make_char_byte_table(initval)
+
+Lisp_Object
+get_char_code_table (Emchar ch, Lisp_Object table)
+{
+ struct Lisp_Char_Byte_Table* cpt = XCHAR_BYTE_TABLE (table);
+ Lisp_Object ret = cpt->property [ch >> 24];
+
+ if (CHAR_BYTE_TABLE_P (ret))
+ cpt = XCHAR_BYTE_TABLE (ret);
+ else
+ return ret;
+
+ ret = cpt->property [(unsigned char) (ch >> 16)];
+ if (CHAR_BYTE_TABLE_P (ret))
+ cpt = XCHAR_BYTE_TABLE (ret);
+ else
+ return ret;
+
+ ret = cpt->property [(unsigned char) (ch >> 8)];
+ if (CHAR_BYTE_TABLE_P (ret))
+ cpt = XCHAR_BYTE_TABLE (ret);
+ else
+ return ret;
+
+ return cpt->property [(unsigned char) ch];
+}
+
+void
+put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
+{
+ struct Lisp_Char_Byte_Table* cpt1 = XCHAR_BYTE_TABLE (table);
+ Lisp_Object ret = cpt1->property[ch >> 24];
+
+ if (CHAR_BYTE_TABLE_P (ret))
+ {
+ struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
+
+ ret = cpt2->property[(unsigned char)(ch >> 16)];
+ if (CHAR_BYTE_TABLE_P (ret))
+ {
+ struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
+
+ ret = cpt3->property[(unsigned char)(ch >> 8)];
+ if (CHAR_BYTE_TABLE_P (ret))
+ {
+ struct Lisp_Char_Byte_Table* cpt4
+ = XCHAR_BYTE_TABLE (ret);
+
+ cpt4->property[(unsigned char)ch] = value;
}
- else if (CHARP (elt))
+ else if (!EQ (ret, value))
{
- if (XCHAR (elt) == ch)
- return (row + byte_offset) << 8;
+ Lisp_Object cpt4 = make_char_byte_table (ret);
+
+ XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
+ cpt3->property[(unsigned char)(ch >> 8)] = cpt4;
}
}
+ else if (!EQ (ret, value))
+ {
+ Lisp_Object cpt3 = make_char_byte_table (ret);
+ Lisp_Object cpt4 = make_char_byte_table (ret);
+
+ XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
+ XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)]
+ = cpt4;
+ cpt2->property[(unsigned char)(ch >> 16)] = cpt3;
+ }
+ }
+ else if (!EQ (ret, value))
+ {
+ Lisp_Object cpt2 = make_char_byte_table (ret);
+ Lisp_Object cpt3 = make_char_byte_table (ret);
+ Lisp_Object cpt4 = make_char_byte_table (ret);
+
+ XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
+ XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)] = cpt4;
+ XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(ch >> 16)] = cpt3;
+ cpt1->property[(unsigned char)(ch >> 24)] = cpt2;
}
- return 0;
}
+
Lisp_Object Vutf_2000_version;
#endif
Qchinese_cns11643_2,
#ifdef UTF2000
Qucs_bmp,
+ Qlatin_viscii,
Qlatin_viscii_lower,
Qlatin_viscii_upper,
Qvietnamese_viscii_lower,
mark_object (cs->ccl_program);
#ifdef UTF2000
mark_object (cs->decoding_table);
+ mark_object (cs->encoding_table);
#endif
return cs->name;
}
{ XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
{ XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
{ XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
+#ifdef UTF2000
{ XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
+ { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
+#endif
{ XD_END }
};
CHARSET_CCL_PROGRAM (cs) = Qnil;
CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
#ifdef UTF2000
- CHARSET_DECODING_TABLE(cs) = decoding_table;
+ CHARSET_DECODING_TABLE(cs) = Qnil;
+ CHARSET_ENCODING_TABLE(cs) = Qnil;
CHARSET_UCS_MIN(cs) = ucs_min;
CHARSET_UCS_MAX(cs) = ucs_max;
CHARSET_CODE_OFFSET(cs) = code_offset;
CHARSET_BYTE_OFFSET(cs) = byte_offset;
#endif
-
- switch ( CHARSET_TYPE (cs) )
+
+ switch (CHARSET_TYPE (cs))
{
case CHARSET_TYPE_94:
CHARSET_DIMENSION (cs) = 1;
CHARSET_CHARS (cs) = 96;
break;
#ifdef UTF2000
+ case CHARSET_TYPE_128:
+ CHARSET_DIMENSION (cs) = 1;
+ CHARSET_CHARS (cs) = 128;
+ break;
case CHARSET_TYPE_128X128:
CHARSET_DIMENSION (cs) = 2;
CHARSET_CHARS (cs) = 128;
break;
+ case CHARSET_TYPE_256:
+ CHARSET_DIMENSION (cs) = 1;
+ CHARSET_CHARS (cs) = 256;
+ break;
case CHARSET_TYPE_256X256:
CHARSET_DIMENSION (cs) = 2;
CHARSET_CHARS (cs) = 256;
{
Charset_ID lb;
+#ifdef UTF2000
+ if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
+ lb = 0;
+ else
+ lb = chlook->next_allocated_leading_byte++;
+#else
if (dimension == 1)
{
if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
else
lb = chlook->next_allocated_2_byte_leading_byte++;
}
+#endif
if (!lb)
signal_simple_error
unsigned char
charset_get_byte1 (Lisp_Object charset, Emchar ch)
{
+ Lisp_Object table;
int d;
- if ((d = get_byte_from_character_table (ch, charset)) > 0)
- return d >> 8;
- else if ((XCHARSET_UCS_MIN (charset) <= ch)
- && (ch <= XCHARSET_UCS_MAX (charset)))
+ if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil))
+ {
+ Lisp_Object value = get_char_code_table (ch, table);
+
+ if (INTP (value))
+ {
+ Emchar code = XINT (value);
+
+ if (code < (1 << 8))
+ return code;
+ else if (code < (1 << 16))
+ return code >> 8;
+ else if (code < (1 << 24))
+ return code >> 16;
+ else
+ return code >> 24;
+ }
+ }
+ if ((XCHARSET_UCS_MIN (charset) <= ch)
+ && (ch <= XCHARSET_UCS_MAX (charset)))
return (ch - XCHARSET_UCS_MIN (charset)
+ XCHARSET_CODE_OFFSET (charset))
/ (XCHARSET_DIMENSION (charset) == 1 ?
return 0;
else
{
- int d;
+ Lisp_Object table;
- if ((d = get_byte_from_character_table (ch, charset)) > 0)
- return d & 0xFF;
- else if ((XCHARSET_UCS_MIN (charset) <= ch)
- && (ch <= XCHARSET_UCS_MAX (charset)))
+ if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil))
+ {
+ Lisp_Object value = get_char_code_table (ch, table);
+
+ if (INTP (value))
+ {
+ Emchar code = XINT (value);
+
+ if (code < (1 << 16))
+ return (unsigned char)code;
+ else if (code < (1 << 24))
+ return (unsigned char)(code >> 16);
+ else
+ return (unsigned char)(code >> 24);
+ }
+ }
+ if ((XCHARSET_UCS_MIN (charset) <= ch)
+ && (ch <= XCHARSET_UCS_MAX (charset)))
return ((ch - XCHARSET_UCS_MIN (charset)
+ XCHARSET_CODE_OFFSET (charset))
/ (XCHARSET_DIMENSION (charset) == 2 ?
Lisp_Object charset;
Lisp_Object ccl_program = Qnil;
Lisp_Object short_name = Qnil, long_name = Qnil;
+#ifdef UTF2000
+ Emchar code_offset = 0;
+ unsigned char byte_offset = 0;
+#endif
CHECK_SYMBOL (name);
if (!NILP (doc_string))
{
CHECK_INT (value);
graphic = XINT (value);
+#ifdef UTF2000
+ if (graphic < 0 || graphic > 2)
+#else
if (graphic < 0 || graphic > 1)
+#endif
signal_simple_error ("Invalid value for 'graphic", value);
}
{
if (chars == 94)
{
- /* id = CHARSET_ID_OFFSET_94 + final; */
- id = get_unallocated_leading_byte (dimension);
+ if (code_offset == 0)
+ id = CHARSET_ID_OFFSET_94 + final;
+ else
+ id = get_unallocated_leading_byte (dimension);
}
else if (chars == 96)
{
- id = get_unallocated_leading_byte (dimension);
+ if (code_offset == 0)
+ id = CHARSET_ID_OFFSET_96 + final;
+ else
+ id = get_unallocated_leading_byte (dimension);
}
else
{
{
if (chars == 94)
{
- id = get_unallocated_leading_byte (dimension);
+ if (code_offset == 0)
+ id = CHARSET_ID_OFFSET_94x94 + final;
+ else
+ id = get_unallocated_leading_byte (dimension);
}
else if (chars == 96)
{
{
abort ();
}
+ if (final)
+ {
+ if (chars == 94)
+ byte_offset = 33;
+ else if (chars == 96)
+ byte_offset = 32;
+ }
#else
id = get_unallocated_leading_byte (dimension);
#endif
charset = make_charset (id, name, type, columns, graphic,
final, direction, short_name, long_name,
doc_string, registry,
- Qnil, 0, 0, 0, 0);
+ Qnil, 0, 0, 0, byte_offset);
if (!NILP (ccl_program))
XCHARSET_CCL_PROGRAM (charset) = ccl_program;
return charset;
(charset, table))
{
struct Lisp_Charset *cs;
+ Lisp_Object old_table;
+ size_t i;
charset = Fget_charset (charset);
- CHECK_VECTOR (table);
-
cs = XCHARSET (charset);
- CHARSET_DECODING_TABLE(cs) = table;
+
+ if (EQ (table, Qnil))
+ {
+ CHARSET_DECODING_TABLE(cs) = table;
+ CHARSET_ENCODING_TABLE(cs) = Qnil;
+ return table;
+ }
+ else if (VECTORP (table))
+ {
+ if (XVECTOR_LENGTH (table) > CHARSET_CHARS (cs))
+ args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
+ old_table = CHARSET_ENCODING_TABLE(cs);
+ CHARSET_DECODING_TABLE(cs) = table;
+ }
+ else
+ signal_error (Qwrong_type_argument,
+ list2 (build_translated_string ("vector-or-nil-p"),
+ table));
+ /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
+
+ switch (CHARSET_DIMENSION (cs))
+ {
+ case 1:
+ CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
+ for (i = 0; i < XVECTOR_LENGTH (table); i++)
+ {
+ Lisp_Object c = XVECTOR_DATA(table)[i];
+
+ if (CHARP (c))
+ put_char_code_table (XCHAR (c),
+ make_int (i + CHARSET_BYTE_OFFSET (cs)),
+ CHARSET_ENCODING_TABLE(cs));
+ }
+ break;
+ case 2:
+ CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil);
+ for (i = 0; i < XVECTOR_LENGTH (table); i++)
+ {
+ Lisp_Object v = XVECTOR_DATA(table)[i];
+
+ if (VECTORP (v))
+ {
+ size_t j;
+
+ if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
+ {
+ CHARSET_DECODING_TABLE(cs) = old_table;
+ args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
+ }
+ for (j = 0; j < XVECTOR_LENGTH (v); j++)
+ {
+ Lisp_Object c = XVECTOR_DATA(v)[j];
+
+ if (CHARP (c))
+ put_char_code_table
+ (XCHAR (c),
+ make_int (( (i + CHARSET_BYTE_OFFSET (cs)) << 8)
+ | (j + CHARSET_BYTE_OFFSET (cs))),
+ CHARSET_ENCODING_TABLE(cs));
+ }
+ }
+ else if (CHARP (v))
+ put_char_code_table (XCHAR (v),
+ make_int (i + CHARSET_BYTE_OFFSET (cs)),
+ CHARSET_ENCODING_TABLE(cs));
+ }
+ break;
+ }
return table;
}
#endif
syms_of_mule_charset (void)
{
INIT_LRECORD_IMPLEMENTATION (charset);
+ INIT_LRECORD_IMPLEMENTATION (char_byte_table);
DEFSUBR (Fcharsetp);
DEFSUBR (Ffind_charset);
defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
#ifdef UTF2000
defsymbol (&Qucs_bmp, "ucs-bmp");
+ defsymbol (&Qlatin_viscii, "latin-viscii");
defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
chlook->charset_by_attributes[i][j][k] = Qnil;
#endif
- chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
#ifdef UTF2000
- chlook->next_allocated_2_byte_leading_byte = LEADING_BYTE_CHINESE_BIG5_2 + 1;
+ chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
#else
+ chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
#endif
#endif
#ifdef UTF2000
- Vutf_2000_version = build_string("0.8 (Kami)");
+ Vutf_2000_version = build_string("0.9 (Kyūhōji)");
DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
Version number of UTF-2000.
*/ );
Vdefault_coded_charset_priority_list = Qnil;
DEFVAR_LISP ("default-coded-charset-priority-list",
&Vdefault_coded_charset_priority_list /*
-Default order of preferred coded-character-set.
+Default order of preferred coded-character-sets.
*/ );
#endif
}
staticpro (&Vcharset_ucs_bmp);
Vcharset_ucs_bmp =
make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
- CHARSET_TYPE_256X256, 1, 0, 0,
+ CHARSET_TYPE_256X256, 1, 2, 0,
CHARSET_LEFT_TO_RIGHT,
build_string ("BMP"),
build_string ("BMP"),
- build_string ("BMP"),
- build_string (""),
+ build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
+ build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
Qnil, 0, 0xFFFF, 0, 0);
#else
# define MIN_CHAR_THAI 0
build_string ("VISCII lower"),
build_string ("VISCII lower (Vietnamese)"),
build_string ("VISCII lower (Vietnamese)"),
- build_string ("VISCII1\\.1"),
+ build_string ("MULEVISCII-LOWER"),
Qnil, 0, 0, 0, 32);
staticpro (&Vcharset_latin_viscii_upper);
Vcharset_latin_viscii_upper =
build_string ("VISCII upper"),
build_string ("VISCII upper (Vietnamese)"),
build_string ("VISCII upper (Vietnamese)"),
- build_string ("VISCII1\\.1"),
+ build_string ("MULEVISCII-UPPER"),
Qnil, 0, 0, 0, 32);
- /*
- Fputhash (Qvietnamese_viscii_lower, Vcharset_latin_viscii_lower,
- Vcharset_hash_table);
- Fputhash (Qvietnamese_viscii_upper, Vcharset_latin_viscii_upper,
- Vcharset_hash_table);
- */
- Fdefine_charset_alias (Qvietnamese_viscii_lower,
- Vcharset_latin_viscii_lower);
- Fdefine_charset_alias (Qvietnamese_viscii_upper,
- Vcharset_latin_viscii_upper);
+ staticpro (&Vcharset_latin_viscii);
+ Vcharset_latin_viscii =
+ make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
+ CHARSET_TYPE_256, 1, 2, 0,
+ CHARSET_LEFT_TO_RIGHT,
+ build_string ("VISCII"),
+ build_string ("VISCII 1.1 (Vietnamese)"),
+ build_string ("VISCII 1.1 (Vietnamese)"),
+ build_string ("VISCII1\\.1"),
+ Qnil, 0, 0, 0, 0);
staticpro (&Vcharset_hiragana_jisx0208);
Vcharset_hiragana_jisx0208 =
make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
ranges entirely within the first 256 chars. */
if ((range_start >= 0x100 || range_end >= 0x100)
- && CHAR_LEADING_BYTE (range_start) !=
- CHAR_LEADING_BYTE (range_end))
+#ifdef UTF2000
+ && CHAR_CHARSET_ID (range_start) != CHAR_CHARSET_ID (range_end)
+#else
+ && CHAR_LEADING_BYTE (range_start) != CHAR_LEADING_BYTE (range_end)
+#endif
+ )
return REG_ERANGESPAN;
/* As advertised, translations only work over the 0 - 0x7F range.
}
else
{
- Lisp_Object charset;
+ Lisp_Object charset, value;
unsigned int c1, c2, s1, s2;
#ifdef UTF2000
- if ( (c1 =
- get_byte_from_character_table (ch, Vcharset_latin_jisx0201))
- >= 0 )
+ if (INTP (value =
+ get_char_code_table
+ (ch, XCHARSET_ENCODING_TABLE (Vcharset_latin_jisx0201))))
{
charset = Vcharset_latin_jisx0201;
+ c1 = XINT (value);
c2 = 0;
}
else
}
}
if (reg == -1)
- BREAKUP_CHAR (ch, charset, byte1, byte2);
+ {
+ Lisp_Object original_default_coded_charset_priority_list
+ = Vdefault_coded_charset_priority_list;
+
+ while (!EQ (Vdefault_coded_charset_priority_list, Qnil))
+ {
+ BREAKUP_CHAR (ch, charset, byte1, byte2);
+ if (XCHARSET_FINAL (charset))
+ goto found;
+ Vdefault_coded_charset_priority_list
+ = Fcdr (Fmemq (XCHARSET_NAME (charset),
+ Vdefault_coded_charset_priority_list));
+ }
+ BREAKUP_CHAR (ch, charset, byte1, byte2);
+ found:
+ Vdefault_coded_charset_priority_list
+ = original_default_coded_charset_priority_list;
+ }
ensure_correct_direction (XCHARSET_DIRECTION (charset),
codesys, dst, flags, 0);