From dab896ef955c1afd6fd53cef13685d5fab2f6e1e Mon Sep 17 00:00:00 2001 From: tomo Date: Tue, 17 Dec 2002 18:05:47 +0000 Subject: [PATCH] Sync with r21-2-19-utf-2000-0_9-0. --- lisp/ChangeLog | 27 +++ lisp/bytecomp.el | 2 +- lisp/mule/mule-charset.el | 1 + lisp/mule/viet-chars.el | 7 +- lisp/utf-2000/ccs-viscii.el | 264 ++++++++++++++++++++++++++- src/ChangeLog | 158 ++++++++++++++++ src/char-ucs.h | 87 +++++---- src/chartab.c | 10 +- src/chartab.h | 4 + src/insdel.c | 11 ++ src/lrecord.h | 1 + src/mule-charset.c | 422 ++++++++++++++++++++++++++++++++++++------- src/regex.c | 8 +- src/text-coding.c | 28 ++- 14 files changed, 913 insertions(+), 117 deletions(-) diff --git a/lisp/ChangeLog b/lisp/ChangeLog index b01118d..0360936 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,3 +1,30 @@ +1999-10-10 MORIOKA Tomohiko + + * mule/mule-charset.el (default-coded-charset-priority-list): Add + `latin-viscii'; prefer it for characters used in Vietnamese. + + * utf-2000/ccs-viscii.el: Add mapping-table for `latin-viscii'. + +1999-10-08 Daiki Ueno + + * bytecomp.el (byte-compile-insert-header): Fix regexp. + +1999-10-07 MORIOKA Tomohiko + + * utf-2000/ccs-viscii.el: Rename `vietnamese-viscii-*' to + `latin-viscii-*'. + +1999-10-07 MORIOKA Tomohiko + + * mule/viet-chars.el (latin-viscii-lower): Renamed from charset + `vietnamese-viscii-lower'. + (latin-viscii-upper): Renamed from charset + `vietnamese-viscii-upper'. + (vietnamese-viscii-lower): New alias for charset + `latin-viscii-lower'. + (vietnamese-viscii-upper): New alias for charset + `latin-viscii-upper'. + 1999-10-05 MORIOKA Tomohiko * mule/mule-charset.el (default-coded-charset-priority-list): diff --git a/lisp/bytecomp.el b/lisp/bytecomp.el index cdc6bd2..abf4dd4 100644 --- a/lisp/bytecomp.el +++ b/lisp/bytecomp.el @@ -1787,7 +1787,7 @@ With argument, insert value in current buffer after the form." (and (eq (point) (point-max)) (not (re-search-backward - "\\u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]" nil t))))) + "\\\\u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]" nil t))))) (setq buffer-file-coding-system 'raw-text) (cond ((featurep 'utf-2000) (insert "(require 'mule)\n;;;###coding system: utf-8\n") diff --git a/lisp/mule/mule-charset.el b/lisp/mule/mule-charset.el index a972ecb..63f7bbe 100644 --- a/lisp/mule/mule-charset.el +++ b/lisp/mule/mule-charset.el @@ -281,6 +281,7 @@ DESCRIPTION (string) is the description string of the charset." ;; katakana-jisx0208 japanese-jisx0208 hebrew-iso8859-8 + latin-viscii vietnamese-viscii-lower vietnamese-viscii-upper))) diff --git a/lisp/mule/viet-chars.el b/lisp/mule/viet-chars.el index 7b3c4a4..3f935a7 100644 --- a/lisp/mule/viet-chars.el +++ b/lisp/mule/viet-chars.el @@ -32,7 +32,7 @@ ;; Vietnamese VISCII with two tables. (unless (featurep 'utf-2000) - (make-charset 'vietnamese-viscii-lower "VISCII lower (Vietnamese)" + (make-charset 'latin-viscii-lower "VISCII lower (Vietnamese)" '(registry "VISCII1.1" dimension 1 chars 96 @@ -40,7 +40,7 @@ graphic 1 )) - (make-charset 'vietnamese-viscii-upper "VISCII upper (Vietnamese)" + (make-charset 'latin-viscii-upper "VISCII upper (Vietnamese)" '(registry "VISCII1.1" dimension 1 chars 96 @@ -49,6 +49,9 @@ )) ) +(define-charset-alias 'vietnamese-viscii-lower 'latin-viscii-lower) +(define-charset-alias 'vietnamese-viscii-upper 'latin-viscii-upper) + (modify-syntax-entry 'vietnamese-viscii-lower "w") (modify-syntax-entry 'vietnamese-viscii-upper "w") diff --git a/lisp/utf-2000/ccs-viscii.el b/lisp/utf-2000/ccs-viscii.el index 68fce6a..a7dc739 100644 --- a/lisp/utf-2000/ccs-viscii.el +++ b/lisp/utf-2000/ccs-viscii.el @@ -26,7 +26,267 @@ ;;; Code: (set-charset-mapping-table - 'vietnamese-viscii-lower + 'latin-viscii + [?\u0000 ; 0x00 + ?\u0001 ; 0x01 + ?\u1EB2 ; 0x02 (.2ŽÆ) + ?\u0003 ; 0x03 + ?\u0004 ; 0x04 + ?\u1EB4 ; 0x05 (ŽÇ) + ?\u1EAA ; 0x06 (Žç) + ?\u0007 ; 0x07 + ?\u0008 ; 0x08 + ?\u0009 ; 0x09 + ?\u000A ; 0x0A + ?\u000B ; 0x0B + ?\u000C ; 0x0C + ?\u000D ; 0x0D + ?\u000E ; 0x0E + ?\u000F ; 0x0F + ?\u0010 ; 0x10 + ?\u0011 ; 0x11 + ?\u0012 ; 0x12 + ?\u0013 ; 0x13 + ?\u1EF6 ; 0x14 (ŽÖ) + ?\u0015 ; 0x15 + ?\u0016 ; 0x16 + ?\u0017 ; 0x17 + ?\u0018 ; 0x18 + ?\u1EF8 ; 0x19 (ŽÛ) + ?\u001A ; 0x1A + ?\u001B ; 0x1B + ?\u001C ; 0x1C + ?\u001D ; 0x1D + ?\u1EF4 ; 0x1E (ŽÜ) + ?\u001F ; 0x1F + ?\u0020 ; 0x20 ( ) + ?\u0021 ; 0x21 (!) + ?\u0022 ; 0x22 (") + ?\u0023 ; 0x23 (#) + ?\u0024 ; 0x24 ($) + ?\u0025 ; 0x25 (%) + ?\u0026 ; 0x26 (&) + ?\u0027 ; 0x27 (') + ?\u0028 ; 0x28 (() + ?\u0029 ; 0x29 ()) + ?\u002A ; 0x2A (*) + ?\u002B ; 0x2B (+) + ?\u002C ; 0x2C (,) + ?\u002D ; 0x2D (-) + ?\u002E ; 0x2E (.) + ?\u002F ; 0x2F (/) + ?\u0030 ; 0x30 (0) + ?\u0031 ; 0x31 (1) + ?\u0032 ; 0x32 (2) + ?\u0033 ; 0x33 (3) + ?\u0034 ; 0x34 (4) + ?\u0035 ; 0x35 (5) + ?\u0036 ; 0x36 (6) + ?\u0037 ; 0x37 (7) + ?\u0038 ; 0x38 (8) + ?\u0039 ; 0x39 (9) + ?\u003A ; 0x3A (:) + ?\u003B ; 0x3B (;) + ?\u003C ; 0x3C (<) + ?\u003D ; 0x3D (=) + ?\u003E ; 0x3E (>) + ?\u003F ; 0x3F (?) + ?\u0040 ; 0x40 (@) + ?\u0041 ; 0x41 (A) + ?\u0042 ; 0x42 (B) + ?\u0043 ; 0x43 (C) + ?\u0044 ; 0x44 (D) + ?\u0045 ; 0x45 (E) + ?\u0046 ; 0x46 (F) + ?\u0047 ; 0x47 (G) + ?\u0048 ; 0x48 (H) + ?\u0049 ; 0x49 (I) + ?\u004A ; 0x4A (J) + ?\u004B ; 0x4B (K) + ?\u004C ; 0x4C (L) + ?\u004D ; 0x4D (M) + ?\u004E ; 0x4E (N) + ?\u004F ; 0x4F (O) + ?\u0050 ; 0x50 (P) + ?\u0051 ; 0x51 (Q) + ?\u0052 ; 0x52 (R) + ?\u0053 ; 0x53 (S) + ?\u0054 ; 0x54 (T) + ?\u0055 ; 0x55 (U) + ?\u0056 ; 0x56 (V) + ?\u0057 ; 0x57 (W) + ?\u0058 ; 0x58 (X) + ?\u0059 ; 0x59 (Y) + ?\u005A ; 0x5A (Z) + ?\u005B ; 0x5B ([) + ?\u005C ; 0x5C (\) + ?\u005D ; 0x5D (]) + ?\u005E ; 0x5E (^) + ?\u005F ; 0x5F (_) + ?\u0060 ; 0x60 (`) + ?\u0061 ; 0x61 (a) + ?\u0062 ; 0x62 (b) + ?\u0063 ; 0x63 (c) + ?\u0064 ; 0x64 (d) + ?\u0065 ; 0x65 (e) + ?\u0066 ; 0x66 (f) + ?\u0067 ; 0x67 (g) + ?\u0068 ; 0x68 (h) + ?\u0069 ; 0x69 (i) + ?\u006A ; 0x6A (j) + ?\u006B ; 0x6B (k) + ?\u006C ; 0x6C (l) + ?\u006D ; 0x6D (m) + ?\u006E ; 0x6E (n) + ?\u006F ; 0x6F (o) + ?\u0070 ; 0x70 (p) + ?\u0071 ; 0x71 (q) + ?\u0072 ; 0x72 (r) + ?\u0073 ; 0x73 (s) + ?\u0074 ; 0x74 (t) + ?\u0075 ; 0x75 (u) + ?\u0076 ; 0x76 (v) + ?\u0077 ; 0x77 (w) + ?\u0078 ; 0x78 (x) + ?\u0079 ; 0x79 (y) + ?\u007A ; 0x7A (z) + ?\u007B ; 0x7B ({) + ?\u007C ; 0x7C (|) + ?\u007D ; 0x7D (}) + ?\u007E ; 0x7E (~) + ?\u007F ; 0x7F + ?\u1EA0 ; 0x80 (ŽÕ) + ?\u1EAE ; 0x81 (Ž¡) + ?\u1EB0 ; 0x82 (Ž¢) + ?\u1EB6 ; 0x83 (Ž£) + ?\u1EA4 ; 0x84 (Ž¤) + ?\u1EA6 ; 0x85 (Ž¥) + ?\u1EA8 ; 0x86 (Ž¦) + ?\u1EAC ; 0x87 (Ž§) + ?\u1EBC ; 0x88 (Ž¨) + ?\u1EB8 ; 0x89 (Ž©) + ?\u1EBE ; 0x8A (Žª) + ?\u1EC0 ; 0x8B (Ž«) + ?\u1EC2 ; 0x8C (Ž¬) + ?\u1EC4 ; 0x8D (Ž­) + ?\u1EC6 ; 0x8E (Ž®) + ?\u1ED0 ; 0x8F (Ž¯) + ?\u1ED2 ; 0x90 (Ž°) + ?\u1ED4 ; 0x91 (Ž±) + ?\u1ED6 ; 0x92 (Ž²) + ?\u1ED8 ; 0x93 (Žµ) + ?\u1EE2 ; 0x94 (Žþ) + ?\u1EDA ; 0x95 (Ž¾) + ?\u1EDC ; 0x96 (Ž¶) + ?\u1EDE ; 0x97 (Ž·) + ?\u1ECA ; 0x98 (Ž¸) + ?\u1ECE ; 0x99 (Žö) + ?\u1ECC ; 0x9A (Ž÷) + ?\u1EC8 ; 0x9B (Žï) + ?\u1EE6 ; 0x9C (Žü) + ?\u0168 ; 0x9D (Žû) + ?\u1EE4 ; 0x9E (Žø) + ?\u1EF2 ; 0x9F (ŽÏ) + ?\u00D5 ; 0xA0 (Žõ) + ?\u1EAF ; 0xA1 (.1Ž¡) + ?\u1EB1 ; 0xA2 (Ž¢) + ?\u1EB7 ; 0xA3 (Ž£) + ?\u1EA5 ; 0xA4 (Ž¤) + ?\u1EA7 ; 0xA5 (Ž¥) + ?\u1EA9 ; 0xA6 (Ž¦) + ?\u1EAD ; 0xA7 (Ž§) + ?\u1EBD ; 0xA8 (Ž¨) + ?\u1EB9 ; 0xA9 (Ž©) + ?\u1EBF ; 0xAA (Žª) + ?\u1EC1 ; 0xAB (Ž«) + ?\u1EC3 ; 0xAC (Ž¬) + ?\u1EC5 ; 0xAD (Ž­) + ?\u1EC7 ; 0xAE (Ž®) + ?\u1ED1 ; 0xAF (Ž¯) + ?\u1ED3 ; 0xB0 (Ž°) + ?\u1ED5 ; 0xB1 (Ž±) + ?\u1ED7 ; 0xB2 (Ž²) + ?\u1EE0 ; 0xB3 (.2ŽÞ) + ?\u01A0 ; 0xB4 (Ž½) + ?\u1ED9 ; 0xB5 (.1Žµ) + ?\u1EDD ; 0xB6 (Ž¶) + ?\u1EDF ; 0xB7 (Ž·) + ?\u1ECB ; 0xB8 (Ž¸) + ?\u1EF0 ; 0xB9 (.2Žñ) + ?\u1EE8 ; 0xBA (ŽÑ) + ?\u1EEA ; 0xBB (Ž×) + ?\u1EEC ; 0xBC (ŽØ) + ?\u01A1 ; 0xBD (.1Ž½) + ?\u1EDB ; 0xBE (Ž¾) + ?\u01AF ; 0xBF (.2Žß) + ?\u00C0 ; 0xC0 (Žà) + ?\u00C1 ; 0xC1 (Žá) + ?\u00C2 ; 0xC2 (Žâ) + ?\u00C3 ; 0xC3 (Žã) + ?\u1EA2 ; 0xC4 (Žä) + ?\u0102 ; 0xC5 (Žå) + ?\u1EB3 ; 0xC6 (.1ŽÆ) + ?\u1EB5 ; 0xC7 (ŽÇ) + ?\u00C8 ; 0xC8 (.2Žè) + ?\u00C9 ; 0xC9 (Žé) + ?\u00CA ; 0xCA (Žê) + ?\u1EBA ; 0xCB (Žë) + ?\u00CC ; 0xCC (Žì) + ?\u00CD ; 0xCD (Ží) + ?\u0128 ; 0xCE (Žî) + ?\u1EF3 ; 0xCF (.1ŽÏ) + ?\u0110 ; 0xD0 (.2Žð) + ?\u1EE9 ; 0xD1 (.1ŽÑ) + ?\u00D2 ; 0xD2 (.2Žò) + ?\u00D3 ; 0xD3 (Žó) + ?\u00D4 ; 0xD4 (Žô) + ?\u1EA1 ; 0xD5 (.1ŽÕ) + ?\u1EF7 ; 0xD6 (ŽÖ) + ?\u1EEB ; 0xD7 (Ž×) + ?\u1EED ; 0xD8 (ŽØ) + ?\u00D9 ; 0xD9 (.2Žù) + ?\u00DA ; 0xDA (Žú) + ?\u1EF9 ; 0xDB (.1ŽÛ) + ?\u1EF5 ; 0xDC (ŽÜ) + ?\u00DD ; 0xDD (.2Žý) + ?\u1EE1 ; 0xDE (.1ŽÞ) + ?\u01B0 ; 0xDF (Žß) + ?\u00E0 ; 0xE0 (Žà) + ?\u00E1 ; 0xE1 (Žá) + ?\u00E2 ; 0xE2 (Žâ) + ?\u00E3 ; 0xE3 (Žã) + ?\u1EA3 ; 0xE4 (Žä) + ?\u0103 ; 0xE5 (Žå) + ?\u1EEF ; 0xE6 (Žæ) + ?\u1EAB ; 0xE7 (Žç) + ?\u00E8 ; 0xE8 (Žè) + ?\u00E9 ; 0xE9 (Žé) + ?\u00EA ; 0xEA (Žê) + ?\u1EBB ; 0xEB (Žë) + ?\u00EC ; 0xEC (Žì) + ?\u00ED ; 0xED (Ží) + ?\u0129 ; 0xEE (Žî) + ?\u1EC9 ; 0xEF (Žï) + ?\u0111 ; 0xF0 (Žð) + ?\u1EF1 ; 0xF1 (Žñ) + ?\u00F2 ; 0xF2 (Žò) + ?\u00F3 ; 0xF3 (Žó) + ?\u00F4 ; 0xF4 (Žô) + ?\u00F5 ; 0xF5 (Žõ) + ?\u1ECF ; 0xF6 (Žö) + ?\u1ECD ; 0xF7 (Ž÷) + ?\u1EE5 ; 0xF8 (Žø) + ?\u00F9 ; 0xF9 (Žù) + ?\u00FA ; 0xFA (Žú) + ?\u0169 ; 0xFB (Žû) + ?\u1EE7 ; 0xFC (Žü) + ?\u00FD ; 0xFD (Žý) + ?\u1EE3 ; 0xFE (Žþ) + ?\u1EEE ; 0xFF (.2Žæ) + ]) + +(set-charset-mapping-table + 'latin-viscii-lower [nil ; 0x20 ?\u1eaf ; 0x21 ?\u1eb1 ; 0x22 @@ -126,7 +386,7 @@ ]) (set-charset-mapping-table - 'vietnamese-viscii-upper + 'latin-viscii-upper [nil ; 0x20 ?\u1eae ; 0x21 ?\u1eb0 ; 0x22 diff --git a/src/ChangeLog b/src/ChangeLog index 4247f7d..8654bc6 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,161 @@ +1999-10-12 MORIOKA Tomohiko + + * mule-charset.c (vars_of_mule_charset): Update `utf-2000-version' + to 0.9. + +1999-10-11 MORIOKA Tomohiko + + * regex.c (compile_extended_range): Use `CHAR_CHARSET_ID' instead + of `CHAR_LEADING_BYTE' in UTF-2000. + + * insdel.c (find_charsets_in_bufbyte_string): Use + `CHAR_CHARSET_ID' instead of `CHAR_LEADING_BYTE' in UTF-2000. + (find_charsets_in_emchar_string): Likewise. + + * chartab.h (CHAR_TABLE_NON_ASCII_VALUE_UNSAFE): Use + `CHAR_CHARSET_ID' instead of `CHAR_LEADING_BYTE' in UTF-2000. + + * char-ucs.h (CHAR_LEADING_BYTE): Deleted. + (CHAR_CHARSET_ID): New macro. + +1999-10-11 MORIOKA Tomohiko + + * chartab.c (get_char_table): Don't use type `Charset_ID' for + charset-id - MIN_LEADING_BYTE. + (put_char_table): Likewise. + +1999-10-11 MORIOKA Tomohiko + + * char-ucs.h (MIN_LEADING_BYTE): Changed to `-0x40'. + (NUM_LEADING_BYTES): Changed to (80 * 3 - MIN_LEADING_BYTE). + (CHARSET_LEADING_BYTE): Don't cast by `Bufbyte'. + (CHARSET_ID_OFFSET): New macro. + (LEADING_BYTE_CONTROL_1): Changed to (CHARSET_ID_OFFSET - 1). + (LEADING_BYTE_UCS_BMP): Changed to (CHARSET_ID_OFFSET - 2). + (LEADING_BYTE_LATIN_VISCII): Changed to (CHARSET_ID_OFFSET - 3). + (LEADING_BYTE_HIRAGANA_JISX0208): Changed to (CHARSET_ID_OFFSET - + 4). + (LEADING_BYTE_KATAKANA_JISX0208): Changed to (CHARSET_ID_OFFSET - + 5). + (MIN_LEADING_BYTE_PRIVATE): Changed to `MIN_LEADING_BYTE'. + (MAX_LEADING_BYTE_PRIVATE): Changed to (CHARSET_ID_OFFSET - 6). + (CHARSET_ID_OFFSET_94): Changed to (CHARSET_ID_OFFSET - '0'). + (CHARSET_ID_OFFSET_96): Changed to (CHARSET_ID_OFFSET_94 + 80). + (CHARSET_ID_OFFSET_94x94): Changed to (CHARSET_ID_OFFSET_96 + 80). + +1999-10-11 MORIOKA Tomohiko + + * mule-charset.c (next_allocated_leading_byte): New variable in + UTF-2000. + (next_allocated_1_byte_leading_byte): Don't define in UTF-2000. + (next_allocated_2_byte_leading_byte): Don't define in UTF-2000. + (get_unallocated_leading_byte): Simply use + `next_allocated_leading_byte' [ignore dimension] in UTF-2000. + (vars_of_mule_charset): Setup `next_allocated_leading_byte' in + UTF-2000. + + * char-ucs.h (MIN_LEADING_BYTE_PRIVATE): New macro. + (MAX_LEADING_BYTE_PRIVATE): New macro. + (MIN_LEADING_BYTE_OFFICIAL_2): Deleted. + (MAX_LEADING_BYTE_OFFICIAL_2): Deleted. + +1999-10-11 MORIOKA Tomohiko + + * mule-charset.c (Fmake_charset): Allocate final-byte based + charset-id for 94-set, 96-set and 94x94-set. + +1999-10-11 MORIOKA Tomohiko + + * mule-charset.c (char_byte_table_equal): Fill braces to avoid + ambiguous `else'. + (Fmake_charset): Likewise. + (complex_vars_of_mule_charset): Modify the font registry of + `ucs-bmp' not to match `Ethiopic-Unicode'. + +1999-10-10 MORIOKA Tomohiko + + * mule-charset.c (complex_vars_of_mule_charset): Add font + registory of `ucs-bmp'. + +1999-10-10 MORIOKA Tomohiko + + * text-coding.c (char_encode_iso2022): Ignore non-ISO-2022 + coded-charsets in `default-coded-charset-priority-list' when + breaking up a character. + + * mule-charset.c (Vcharset_latin_viscii): New variable. + (Qlatin_viscii): New variable. + (make_charset): Don't use `decoding_table'. + (Fmake_charset): Regard graphic = 2 as 256^n-set; setup + byte_offset. + (Fset_charset_mapping_table): New implementation. + (syms_of_mule_charset): Add new symbol `latin-viscii'. + (complex_vars_of_mule_charset): Set `graphic' attribute of charset + `ucs-bmp' and `latin_viscii' to 2; change font registry of charset + `latin-viscii-lower' to "MULEVISCII-LOWER"; change font registry + of charset `latin-viscii-upper' to "MULEVISCII-UPPER"; add new + charset `latin_viscii'. + + * char-ucs.h (LEADING_BYTE_LATIN_VISCII): New macro. + (CHARSET_TYPE_94X94): Change to 1 from 2. + (CHARSET_TYPE_96): Change to 2 from 1. + (CHARSET_TYPE_128): New macro. + (CHARSET_TYPE_128X128): Change to 5 from 4. + (CHARSET_TYPE_256): New macro. + (CHARSET_TYPE_256X256): Change to 7 from 5. + (MAKE_CHAR): Use `XCHARSET_BYTE_OFFSET(charset)'. + +1999-10-10 MORIOKA Tomohiko + + * text-coding.c (char_encode_shift_jis): Refer + `XCHARSET_ENCODING_TABLE(Vcharset_latin_jisx0201)' instead of + `XCHARSET_TO_BYTE1_TABLE(Vcharset_latin_jisx0201)'. + + * mule-charset.c (mark_char_byte_table): New function in UTF-2000. + (char_byte_table_equal): New function in UTF-2000. + (char_byte_table_hash): New function in UTF-2000. + (char_byte_table_description): New constant in UTF-2000. + (char_byte_table): New type in UTF-2000. + (make_char_byte_table): New function in UTF-2000. + (copy_char_byte_table): New function in UTF-2000. + (make_char_code_table): New macro in UTF-2000. + (get_char_code_table): New function in UTF-2000. + (put_char_code_table): New function in UTF-2000. + (mark_charset): Mark `cs->encoding_table' in UTF-2000. + (charset_description): Add setting in UTF-2000. + (make_charset): Setup `CHARSET_ENCODING_TABLE(cs)' instead of + `CHARSET_TO_BYTE1_TABLE(cs)'. + (charset_get_byte1): Refer `XCHARSET_ENCODING_TABLE(charset)' + instead of `XCHARSET_TO_BYTE1_TABLE(charset)'. + (charset_get_byte2): Refer `XCHARSET_ENCODING_TABLE(charset)' + instead of `XCHARSET_TO_BYTE2_TABLE(charset)'. + (Fset_charset_mapping_table): Setup `CHARSET_ENCODING_TABLE(cs)' + instead of `CHARSET_TO_BYTE1_TABLE(cs)' and + `CHARSET_TO_BYTE2_TABLE(cs)'. + + * char-ucs.h (char_byte_table): New type. + (XCHAR_BYTE_TABLE): New macro. + (XSETCHAR_BYTE_TABLE): New macro. + (CHAR_BYTE_TABLE_P): New macro. + (GC_CHAR_BYTE_TABLE_P): New macro. + (struct Lisp_Char_Byte_Table): New structure. + (get_char_code_table): New interface. + (Emchar_to_byte_table): Deleted. + (get_byte_from_character_table): Deleted. + (struct Lisp_Charset): Add `encoding_table'; delete + `to_byte1_table' and `to_byte2_table'. + (CHARSET_ENCODING_TABLE): New macro. + (CHARSET_TO_BYTE1_TABLE): Deleted. + (CHARSET_TO_BYTE2_TABLE): Deleted. + (XCHARSET_ENCODING_TABLE): New macro. + (XCHARSET_TO_BYTE1_TABLE): Deleted. + (XCHARSET_TO_BYTE2_TABLE): Deleted. + +1999-10-07 MORIOKA Tomohiko + + * mule-charset.c (syms_of_mule_charset): Delete charset alias + `vietnamese-viscii-*'. + 1999-10-07 MORIOKA Tomohiko * mule-charset.c (Qvietnamese_viscii_lower): New variable. diff --git a/src/char-ucs.h b/src/char-ucs.h index b9bfa06..bf23902 100644 --- a/src/char-ucs.h +++ b/src/char-ucs.h @@ -29,8 +29,24 @@ Boston, MA 02111-1307, USA. */ #define CHAR_ASCII_P(ch) ((ch) <= 0x7F) -int -get_byte_from_character_table (Emchar ch, Lisp_Object ccs); +struct Lisp_Char_Byte_Table +{ + struct lcrecord_header header; + + Lisp_Object property[256]; +}; +typedef struct Lisp_Char_Byte_Table Lisp_Char_Byte_Table; + +DECLARE_LRECORD (char_byte_table, Lisp_Char_Byte_Table); +#define XCHAR_BYTE_TABLE(x) \ + XRECORD (x, char_byte_table, struct Lisp_Char_Byte_Table) +#define XSETCHAR_BYTE_TABLE(x, p) XSETRECORD (x, p, char_byte_table) +#define CHAR_BYTE_TABLE_P(x) RECORDP (x, char_byte_table) +/* #define CHECK_CHAR_BYTE_TABLE(x) CHECK_RECORD (x, char_byte_table) + char table entries should never escape to Lisp */ + +Lisp_Object get_char_code_table (Emchar ch, Lisp_Object table); + extern Lisp_Object Vcharset_ucs_bmp; extern Lisp_Object Vcharset_latin_jisx0201; @@ -48,15 +64,26 @@ extern Lisp_Object Vcharset_latin_viscii_upper; typedef int Charset_ID; -#define MIN_LEADING_BYTE 0x80 +#define MIN_LEADING_BYTE -0x40 +#define CHARSET_ID_OFFSET 0x00 + +/* represent normal 80-9F */ +#define LEADING_BYTE_CONTROL_1 (CHARSET_ID_OFFSET - 1) -#define LEADING_BYTE_UCS_BMP 0x80 -#define LEADING_BYTE_CONTROL_1 0x81 /* represent normal 80-9F */ -#define LEADING_BYTE_HIRAGANA_JISX0208 0x82 -#define LEADING_BYTE_KATAKANA_JISX0208 0x83 +/* ISO/IEC 10646 BMP */ +#define LEADING_BYTE_UCS_BMP (CHARSET_ID_OFFSET - 2) +/* VISCII 1.1 */ +#define LEADING_BYTE_LATIN_VISCII (CHARSET_ID_OFFSET - 3) -#define CHARSET_ID_OFFSET_94 0x55 +#define LEADING_BYTE_HIRAGANA_JISX0208 (CHARSET_ID_OFFSET - 4) +#define LEADING_BYTE_KATAKANA_JISX0208 (CHARSET_ID_OFFSET - 5) + +#define MIN_LEADING_BYTE_PRIVATE MIN_LEADING_BYTE +#define MAX_LEADING_BYTE_PRIVATE (CHARSET_ID_OFFSET - 6) + + +#define CHARSET_ID_OFFSET_94 (CHARSET_ID_OFFSET - '0') #define MIN_CHARSET_ID_PRIVATE_94 (CHARSET_ID_OFFSET_94 + '0') #define MAX_CHARSET_ID_PRIVATE_94 (CHARSET_ID_OFFSET_94 + '?') @@ -71,7 +98,7 @@ typedef int Charset_ID; #define LEADING_BYTE_LATIN_JISX0201 (CHARSET_ID_OFFSET_94 + 'J') -#define CHARSET_ID_OFFSET_96 0x70 +#define CHARSET_ID_OFFSET_96 (CHARSET_ID_OFFSET_94 + 80) #define LEADING_BYTE_LATIN_VISCII_LOWER (CHARSET_ID_OFFSET_96 + '1') #define LEADING_BYTE_LATIN_VISCII_UPPER (CHARSET_ID_OFFSET_96 + '2') @@ -107,11 +134,7 @@ typedef int Charset_ID; #define LEADING_BYTE_THAI_TIS620 (CHARSET_ID_OFFSET_96 + 'T') -#define MIN_LEADING_BYTE_PRIVATE_1 0xD0 -#define MAX_LEADING_BYTE_PRIVATE_1 0xDF - - -#define CHARSET_ID_OFFSET_94x94 0xB0 +#define CHARSET_ID_OFFSET_94x94 (CHARSET_ID_OFFSET_96 + 80) /* Big5 Level 1 */ #define LEADING_BYTE_CHINESE_BIG5_1 ('0' + CHARSET_ID_OFFSET_94x94) @@ -165,10 +188,8 @@ typedef int Charset_ID; /* DPRK Hangul KPS 9566-1997 */ #define LEADING_BYTE_KOREAN_KPS9566 ('N' + CHARSET_ID_OFFSET_94x94) -#define MIN_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_JAPANESE_JISX0208_1978 -#define MAX_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_KOREAN_KPS9566 -#define NUM_LEADING_BYTES 256 +#define NUM_LEADING_BYTES (80 * 3 - MIN_LEADING_BYTE) /************************************************************************/ @@ -215,6 +236,9 @@ struct Lisp_Charset /* Byte->character mapping table */ Lisp_Object decoding_table; + /* Character->byte mapping table */ + Lisp_Object encoding_table; + /* Range of character code */ Emchar ucs_min, ucs_max; @@ -233,12 +257,14 @@ DECLARE_LRECORD (charset, Lisp_Charset); #define CHECK_CHARSET(x) CHECK_RECORD (x, charset) #define CONCHECK_CHARSET(x) CONCHECK_RECORD (x, charset) -#define CHARSET_TYPE_94 0 /* This charset includes 94 characters. */ -#define CHARSET_TYPE_96 1 /* This charset includes 96 characters. */ -#define CHARSET_TYPE_94X94 2 /* This charset includes 94x94 characters. */ -#define CHARSET_TYPE_96X96 3 /* This charset includes 96x96 characters. */ -#define CHARSET_TYPE_128X128 4 /* This charset includes 128x128 characters. */ -#define CHARSET_TYPE_256X256 5 /* This charset includes 256x256 characters. */ +#define CHARSET_TYPE_94 0 /* This charset includes 94 characters. */ +#define CHARSET_TYPE_94X94 1 /* This charset includes 94x94 characters. */ +#define CHARSET_TYPE_96 2 /* This charset includes 96 characters. */ +#define CHARSET_TYPE_96X96 3 /* This charset includes 96x96 characters. */ +#define CHARSET_TYPE_128 4 /* This charset includes 128 characters. */ +#define CHARSET_TYPE_128X128 5 /* This charset includes 128x128 characters. */ +#define CHARSET_TYPE_256 6 /* This charset includes 256 characters. */ +#define CHARSET_TYPE_256X256 7 /* This charset includes 256x256 characters. */ #define CHARSET_LEFT_TO_RIGHT 0 #define CHARSET_RIGHT_TO_LEFT 1 @@ -261,6 +287,7 @@ DECLARE_LRECORD (charset, Lisp_Charset); #define CHARSET_CHARS(cs) ((cs)->chars) #define CHARSET_REVERSE_DIRECTION_CHARSET(cs) ((cs)->reverse_direction_charset) #define CHARSET_DECODING_TABLE(cs) ((cs)->decoding_table) +#define CHARSET_ENCODING_TABLE(cs) ((cs)->encoding_table) #define CHARSET_UCS_MIN(cs) ((cs)->ucs_min) #define CHARSET_UCS_MAX(cs) ((cs)->ucs_max) #define CHARSET_CODE_OFFSET(cs) ((cs)->code_offset) @@ -284,6 +311,7 @@ DECLARE_LRECORD (charset, Lisp_Charset); #define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \ CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs)) #define XCHARSET_DECODING_TABLE(cs) CHARSET_DECODING_TABLE(XCHARSET(cs)) +#define XCHARSET_ENCODING_TABLE(cs) CHARSET_ENCODING_TABLE(XCHARSET(cs)) #define XCHARSET_UCS_MIN(cs) CHARSET_UCS_MIN(XCHARSET(cs)) #define XCHARSET_UCS_MAX(cs) CHARSET_UCS_MAX(XCHARSET(cs)) #define XCHARSET_CODE_OFFSET(cs) CHARSET_CODE_OFFSET(XCHARSET(cs)) @@ -296,8 +324,7 @@ struct charset_lookup { /* Table of charsets indexed by type/final-byte. */ Lisp_Object charset_by_attributes[4][128]; - Charset_ID next_allocated_1_byte_leading_byte; - Charset_ID next_allocated_2_byte_leading_byte; + Charset_ID next_allocated_leading_byte; }; extern struct charset_lookup *chlook; @@ -380,18 +407,17 @@ INLINE_HEADER Emchar MAKE_CHAR (Lisp_Object charset, int c1, int c2) { Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (charset); - int ofs, idx; + int idx; Lisp_Object ch; if (!EQ (decoding_table, Qnil) - && (0 <= (idx = - c1 - (ofs = (XCHARSET_CHARS (charset) == 94 ? 33 : 32)))) + && (0 <= (idx = c1 - XCHARSET_BYTE_OFFSET (charset))) && (idx < XVECTOR_LENGTH (decoding_table)) && !EQ (ch = XVECTOR_DATA(decoding_table)[idx], Qnil)) { if (VECTORP (ch)) { - if ((0 <= (idx = c2 - ofs)) + if ((0 <= (idx = c2 - XCHARSET_BYTE_OFFSET (charset))) && (idx < XVECTOR_LENGTH (ch)) && !EQ (ch = XVECTOR_DATA(ch)[idx], Qnil)) return XCHAR (ch); @@ -600,8 +626,7 @@ CHAR_CHARSET (Emchar ch) return charset; } -#define CHAR_LEADING_BYTE(c) (XCHARSET_LEADING_BYTE(CHAR_CHARSET(c))) - +#define CHAR_CHARSET_ID(c) (XCHARSET_ID(CHAR_CHARSET(c))) #define CHAR_COLUMNS(c) (CHARSET_COLUMNS(XCHARSET(CHAR_CHARSET(c)))) diff --git a/src/chartab.c b/src/chartab.c index 59b7ddb..5f7931b 100644 --- a/src/chartab.c +++ b/src/chartab.c @@ -825,7 +825,7 @@ get_char_table (Emchar ch, Lisp_Char_Table *ct) val = ct->ascii[byte1 + 128]; else { - Charset_ID lb = XCHARSET_LEADING_BYTE (charset) - MIN_LEADING_BYTE; + int lb = XCHARSET_LEADING_BYTE (charset) - MIN_LEADING_BYTE; val = ct->level1[lb]; if (CHAR_TABLE_ENTRYP (val)) { @@ -1077,8 +1077,7 @@ put_char_table (Lisp_Char_Table *ct, struct chartab_range *range, } else { - Charset_ID lb - = XCHARSET_LEADING_BYTE (range->charset) - MIN_LEADING_BYTE; + int lb = XCHARSET_LEADING_BYTE (range->charset) - MIN_LEADING_BYTE; ct->level1[lb] = val; } break; @@ -1086,8 +1085,7 @@ put_char_table (Lisp_Char_Table *ct, struct chartab_range *range, case CHARTAB_RANGE_ROW: { Lisp_Char_Table_Entry *cte; - Charset_ID lb - = XCHARSET_LEADING_BYTE (range->charset) - MIN_LEADING_BYTE; + int lb = XCHARSET_LEADING_BYTE (range->charset) - MIN_LEADING_BYTE; /* make sure that there is a separate entry for the row. */ if (!CHAR_TABLE_ENTRYP (ct->level1[lb])) ct->level1[lb] = make_char_table_entry (ct->level1[lb]); @@ -1111,7 +1109,7 @@ put_char_table (Lisp_Char_Table *ct, struct chartab_range *range, else { Lisp_Char_Table_Entry *cte; - Charset_ID lb = XCHARSET_LEADING_BYTE (charset) - MIN_LEADING_BYTE; + int lb = XCHARSET_LEADING_BYTE (charset) - MIN_LEADING_BYTE; /* make sure that there is a separate entry for the row. */ if (!CHAR_TABLE_ENTRYP (ct->level1[lb])) ct->level1[lb] = make_char_table_entry (ct->level1[lb]); diff --git a/src/chartab.h b/src/chartab.h index 8ed1cf4..20d6c88 100644 --- a/src/chartab.h +++ b/src/chartab.h @@ -140,7 +140,11 @@ CHAR_TABLE_NON_ASCII_VALUE_UNSAFE (Lisp_Char_Table *ct, Emchar ch); INLINE_HEADER Lisp_Object CHAR_TABLE_NON_ASCII_VALUE_UNSAFE (Lisp_Char_Table *ct, Emchar ch) { +#ifdef UTF2000 + Charset_ID lb = CHAR_CHARSET_ID (ch); +#else Charset_ID lb = CHAR_LEADING_BYTE (ch); +#endif if (!CHAR_TABLE_ENTRYP ((ct)->level1[lb - MIN_LEADING_BYTE])) return (ct)->level1[lb - MIN_LEADING_BYTE]; else diff --git a/src/insdel.c b/src/insdel.c index 046697c..c9f4b40 100644 --- a/src/insdel.c +++ b/src/insdel.c @@ -3119,8 +3119,14 @@ find_charsets_in_bufbyte_string (Charset_ID *charsets, const Bufbyte *str, while (str < strend) { +#ifdef UTF2000 + charsets[CHAR_CHARSET_ID (charptr_emchar (str)) + - MIN_LEADING_BYTE] = 1; +#else /* I'm not sure the definition for UTF2000 works with leading-byte + representation. */ charsets[CHAR_LEADING_BYTE (charptr_emchar (str)) - MIN_LEADING_BYTE] = 1; +#endif INC_CHARPTR (str); } #endif @@ -3147,7 +3153,12 @@ find_charsets_in_emchar_string (Charset_ID *charsets, const Emchar *str, for (i = 0; i < len; i++) { +#ifdef UTF2000 + charsets[CHAR_CHARSET_ID (str[i]) - MIN_LEADING_BYTE] = 1; +#else /* I'm not sure the definition for UTF2000 works with leading-byte + representation. */ charsets[CHAR_LEADING_BYTE (str[i]) - MIN_LEADING_BYTE] = 1; +#endif } #endif } diff --git a/src/lrecord.h b/src/lrecord.h index bdf1c9e..6fef08a 100644 --- a/src/lrecord.h +++ b/src/lrecord.h @@ -151,6 +151,7 @@ enum lrecord_type lrecord_type_lstream, lrecord_type_process, lrecord_type_charset, + lrecord_type_char_byte_table, lrecord_type_coding_system, lrecord_type_char_table, lrecord_type_char_table_entry, diff --git a/src/mule-charset.c b/src/mule-charset.c index 945549f..8fd87a9 100644 --- a/src/mule-charset.c +++ b/src/mule-charset.c @@ -59,6 +59,7 @@ Lisp_Object Vcharset_chinese_cns11643_1; Lisp_Object Vcharset_chinese_cns11643_2; #ifdef UTF2000 Lisp_Object Vcharset_ucs_bmp; +Lisp_Object Vcharset_latin_viscii; Lisp_Object Vcharset_latin_viscii_lower; Lisp_Object Vcharset_latin_viscii_upper; Lisp_Object Vcharset_hiragana_jisx0208; @@ -132,48 +133,189 @@ const Bytecount rep_bytes_by_first_byte[0xA0] = #endif #ifdef UTF2000 -int -get_byte_from_character_table (Emchar ch, Lisp_Object ccs) +static Lisp_Object +mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object)) { - Lisp_Charset* cs = XCHARSET(ccs); - Lisp_Object decoding_table = CHARSET_DECODING_TABLE (cs); - int byte_offset = CHARSET_BYTE_OFFSET (cs); + struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj); + int i; - if (VECTORP (decoding_table)) + for (i = 0; i < 256; i++) { - int row; + mark_object (cte->property[i]); + } + return Qnil; +} - for (row = 0; row < XVECTOR_LENGTH (decoding_table); row++) - { - Lisp_Object elt = XVECTOR_DATA(decoding_table)[row]; +static int +char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth) +{ + struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1); + struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2); + int i; - if (VECTORP (elt)) - { - int cell; + for (i = 0; i < 256; i++) + if (CHAR_BYTE_TABLE_P (cte1->property[i])) + { + if (CHAR_BYTE_TABLE_P (cte2->property[i])) + { + if (!char_byte_table_equal (cte1->property[i], + cte2->property[i], depth + 1)) + return 0; + } + else + return 0; + } + else + if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1)) + return 0; + return 1; +} - for (cell = 0; cell < XVECTOR_LENGTH (elt); cell++) - { - Lisp_Object obj = XVECTOR_DATA(elt)[cell]; - - if (CHARP (obj)) - { - if (XCHAR (obj) == ch) - return - ( (row + byte_offset) << 8 ) - | (cell + byte_offset); - } - } +static unsigned long +char_byte_table_hash (Lisp_Object obj, int depth) +{ + struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj); + + return internal_array_hash (cte->property, 256, depth); +} + +static const struct lrecord_description char_byte_table_description[] = { + { XD_LISP_OBJECT_ARRAY, offsetof(Lisp_Char_Byte_Table, property), 256 }, + { XD_END } +}; + +DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_byte_table, + mark_char_byte_table, + internal_object_printer, + 0, char_byte_table_equal, + char_byte_table_hash, + char_byte_table_description, + Lisp_Char_Byte_Table); + + +static Lisp_Object +make_char_byte_table (Lisp_Object initval) +{ + Lisp_Object obj; + int i; + struct Lisp_Char_Byte_Table *cte = + alloc_lcrecord_type (struct Lisp_Char_Byte_Table, + &lrecord_char_byte_table); + + for (i = 0; i < 256; i++) + cte->property[i] = initval; + + XSETCHAR_BYTE_TABLE (obj, cte); + return obj; +} + +static Lisp_Object +copy_char_byte_table (Lisp_Object entry) +{ + struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry); + Lisp_Object obj; + int i; + struct Lisp_Char_Byte_Table *ctenew = + alloc_lcrecord_type (struct Lisp_Char_Byte_Table, + &lrecord_char_byte_table); + + for (i = 0; i < 256; i++) + { + Lisp_Object new = cte->property[i]; + if (CHAR_BYTE_TABLE_P (new)) + ctenew->property[i] = copy_char_byte_table (new); + else + ctenew->property[i] = new; + } + + XSETCHAR_BYTE_TABLE (obj, ctenew); + return obj; +} + +#define make_char_code_table(initval) make_char_byte_table(initval) + +Lisp_Object +get_char_code_table (Emchar ch, Lisp_Object table) +{ + struct Lisp_Char_Byte_Table* cpt = XCHAR_BYTE_TABLE (table); + Lisp_Object ret = cpt->property [ch >> 24]; + + if (CHAR_BYTE_TABLE_P (ret)) + cpt = XCHAR_BYTE_TABLE (ret); + else + return ret; + + ret = cpt->property [(unsigned char) (ch >> 16)]; + if (CHAR_BYTE_TABLE_P (ret)) + cpt = XCHAR_BYTE_TABLE (ret); + else + return ret; + + ret = cpt->property [(unsigned char) (ch >> 8)]; + if (CHAR_BYTE_TABLE_P (ret)) + cpt = XCHAR_BYTE_TABLE (ret); + else + return ret; + + return cpt->property [(unsigned char) ch]; +} + +void +put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table) +{ + struct Lisp_Char_Byte_Table* cpt1 = XCHAR_BYTE_TABLE (table); + Lisp_Object ret = cpt1->property[ch >> 24]; + + if (CHAR_BYTE_TABLE_P (ret)) + { + struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret); + + ret = cpt2->property[(unsigned char)(ch >> 16)]; + if (CHAR_BYTE_TABLE_P (ret)) + { + struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret); + + ret = cpt3->property[(unsigned char)(ch >> 8)]; + if (CHAR_BYTE_TABLE_P (ret)) + { + struct Lisp_Char_Byte_Table* cpt4 + = XCHAR_BYTE_TABLE (ret); + + cpt4->property[(unsigned char)ch] = value; } - else if (CHARP (elt)) + else if (!EQ (ret, value)) { - if (XCHAR (elt) == ch) - return (row + byte_offset) << 8; + Lisp_Object cpt4 = make_char_byte_table (ret); + + XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value; + cpt3->property[(unsigned char)(ch >> 8)] = cpt4; } } + else if (!EQ (ret, value)) + { + Lisp_Object cpt3 = make_char_byte_table (ret); + Lisp_Object cpt4 = make_char_byte_table (ret); + + XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value; + XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)] + = cpt4; + cpt2->property[(unsigned char)(ch >> 16)] = cpt3; + } + } + else if (!EQ (ret, value)) + { + Lisp_Object cpt2 = make_char_byte_table (ret); + Lisp_Object cpt3 = make_char_byte_table (ret); + Lisp_Object cpt4 = make_char_byte_table (ret); + + XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value; + XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)] = cpt4; + XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(ch >> 16)] = cpt3; + cpt1->property[(unsigned char)(ch >> 24)] = cpt2; } - return 0; } + Lisp_Object Vutf_2000_version; #endif @@ -213,6 +355,7 @@ Lisp_Object Qascii, Qchinese_cns11643_2, #ifdef UTF2000 Qucs_bmp, + Qlatin_viscii, Qlatin_viscii_lower, Qlatin_viscii_upper, Qvietnamese_viscii_lower, @@ -563,6 +706,7 @@ mark_charset (Lisp_Object obj) mark_object (cs->ccl_program); #ifdef UTF2000 mark_object (cs->decoding_table); + mark_object (cs->encoding_table); #endif return cs->name; } @@ -609,7 +753,10 @@ static const struct lrecord_description charset_description[] = { { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) }, { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) }, { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) }, +#ifdef UTF2000 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) }, + { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) }, +#endif { XD_END } }; @@ -651,14 +798,15 @@ make_charset (Charset_ID id, Lisp_Object name, CHARSET_CCL_PROGRAM (cs) = Qnil; CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil; #ifdef UTF2000 - CHARSET_DECODING_TABLE(cs) = decoding_table; + CHARSET_DECODING_TABLE(cs) = Qnil; + CHARSET_ENCODING_TABLE(cs) = Qnil; CHARSET_UCS_MIN(cs) = ucs_min; CHARSET_UCS_MAX(cs) = ucs_max; CHARSET_CODE_OFFSET(cs) = code_offset; CHARSET_BYTE_OFFSET(cs) = byte_offset; #endif - - switch ( CHARSET_TYPE (cs) ) + + switch (CHARSET_TYPE (cs)) { case CHARSET_TYPE_94: CHARSET_DIMENSION (cs) = 1; @@ -677,10 +825,18 @@ make_charset (Charset_ID id, Lisp_Object name, CHARSET_CHARS (cs) = 96; break; #ifdef UTF2000 + case CHARSET_TYPE_128: + CHARSET_DIMENSION (cs) = 1; + CHARSET_CHARS (cs) = 128; + break; case CHARSET_TYPE_128X128: CHARSET_DIMENSION (cs) = 2; CHARSET_CHARS (cs) = 128; break; + case CHARSET_TYPE_256: + CHARSET_DIMENSION (cs) = 1; + CHARSET_CHARS (cs) = 256; + break; case CHARSET_TYPE_256X256: CHARSET_DIMENSION (cs) = 2; CHARSET_CHARS (cs) = 256; @@ -734,6 +890,12 @@ get_unallocated_leading_byte (int dimension) { Charset_ID lb; +#ifdef UTF2000 + if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE) + lb = 0; + else + lb = chlook->next_allocated_leading_byte++; +#else if (dimension == 1) { if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1) @@ -748,6 +910,7 @@ get_unallocated_leading_byte (int dimension) else lb = chlook->next_allocated_2_byte_leading_byte++; } +#endif if (!lb) signal_simple_error @@ -761,12 +924,29 @@ get_unallocated_leading_byte (int dimension) unsigned char charset_get_byte1 (Lisp_Object charset, Emchar ch) { + Lisp_Object table; int d; - if ((d = get_byte_from_character_table (ch, charset)) > 0) - return d >> 8; - else if ((XCHARSET_UCS_MIN (charset) <= ch) - && (ch <= XCHARSET_UCS_MAX (charset))) + if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil)) + { + Lisp_Object value = get_char_code_table (ch, table); + + if (INTP (value)) + { + Emchar code = XINT (value); + + if (code < (1 << 8)) + return code; + else if (code < (1 << 16)) + return code >> 8; + else if (code < (1 << 24)) + return code >> 16; + else + return code >> 24; + } + } + if ((XCHARSET_UCS_MIN (charset) <= ch) + && (ch <= XCHARSET_UCS_MAX (charset))) return (ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset)) / (XCHARSET_DIMENSION (charset) == 1 ? @@ -832,12 +1012,26 @@ charset_get_byte2 (Lisp_Object charset, Emchar ch) return 0; else { - int d; + Lisp_Object table; - if ((d = get_byte_from_character_table (ch, charset)) > 0) - return d & 0xFF; - else if ((XCHARSET_UCS_MIN (charset) <= ch) - && (ch <= XCHARSET_UCS_MAX (charset))) + if (!EQ (table = XCHARSET_ENCODING_TABLE (charset), Qnil)) + { + Lisp_Object value = get_char_code_table (ch, table); + + if (INTP (value)) + { + Emchar code = XINT (value); + + if (code < (1 << 16)) + return (unsigned char)code; + else if (code < (1 << 24)) + return (unsigned char)(code >> 16); + else + return (unsigned char)(code >> 24); + } + } + if ((XCHARSET_UCS_MIN (charset) <= ch) + && (ch <= XCHARSET_UCS_MAX (charset))) return ((ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset)) / (XCHARSET_DIMENSION (charset) == 2 ? @@ -1013,6 +1207,10 @@ character set. Recognized properties are: Lisp_Object charset; Lisp_Object ccl_program = Qnil; Lisp_Object short_name = Qnil, long_name = Qnil; +#ifdef UTF2000 + Emchar code_offset = 0; + unsigned char byte_offset = 0; +#endif CHECK_SYMBOL (name); if (!NILP (doc_string)) @@ -1065,7 +1263,11 @@ character set. Recognized properties are: { CHECK_INT (value); graphic = XINT (value); +#ifdef UTF2000 + if (graphic < 0 || graphic > 2) +#else if (graphic < 0 || graphic > 1) +#endif signal_simple_error ("Invalid value for 'graphic", value); } @@ -1129,12 +1331,17 @@ character set. Recognized properties are: { if (chars == 94) { - /* id = CHARSET_ID_OFFSET_94 + final; */ - id = get_unallocated_leading_byte (dimension); + if (code_offset == 0) + id = CHARSET_ID_OFFSET_94 + final; + else + id = get_unallocated_leading_byte (dimension); } else if (chars == 96) { - id = get_unallocated_leading_byte (dimension); + if (code_offset == 0) + id = CHARSET_ID_OFFSET_96 + final; + else + id = get_unallocated_leading_byte (dimension); } else { @@ -1145,7 +1352,10 @@ character set. Recognized properties are: { if (chars == 94) { - id = get_unallocated_leading_byte (dimension); + if (code_offset == 0) + id = CHARSET_ID_OFFSET_94x94 + final; + else + id = get_unallocated_leading_byte (dimension); } else if (chars == 96) { @@ -1160,6 +1370,13 @@ character set. Recognized properties are: { abort (); } + if (final) + { + if (chars == 94) + byte_offset = 33; + else if (chars == 96) + byte_offset = 32; + } #else id = get_unallocated_leading_byte (dimension); #endif @@ -1181,7 +1398,7 @@ character set. Recognized properties are: charset = make_charset (id, name, type, columns, graphic, final, direction, short_name, long_name, doc_string, registry, - Qnil, 0, 0, 0, 0); + Qnil, 0, 0, 0, byte_offset); if (!NILP (ccl_program)) XCHARSET_CCL_PROGRAM (charset) = ccl_program; return charset; @@ -1463,12 +1680,79 @@ Set mapping-table of CHARSET to TABLE. (charset, table)) { struct Lisp_Charset *cs; + Lisp_Object old_table; + size_t i; charset = Fget_charset (charset); - CHECK_VECTOR (table); - cs = XCHARSET (charset); - CHARSET_DECODING_TABLE(cs) = table; + + if (EQ (table, Qnil)) + { + CHARSET_DECODING_TABLE(cs) = table; + CHARSET_ENCODING_TABLE(cs) = Qnil; + return table; + } + else if (VECTORP (table)) + { + if (XVECTOR_LENGTH (table) > CHARSET_CHARS (cs)) + args_out_of_range (table, make_int (CHARSET_CHARS (cs))); + old_table = CHARSET_ENCODING_TABLE(cs); + CHARSET_DECODING_TABLE(cs) = table; + } + else + signal_error (Qwrong_type_argument, + list2 (build_translated_string ("vector-or-nil-p"), + table)); + /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */ + + switch (CHARSET_DIMENSION (cs)) + { + case 1: + CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil); + for (i = 0; i < XVECTOR_LENGTH (table); i++) + { + Lisp_Object c = XVECTOR_DATA(table)[i]; + + if (CHARP (c)) + put_char_code_table (XCHAR (c), + make_int (i + CHARSET_BYTE_OFFSET (cs)), + CHARSET_ENCODING_TABLE(cs)); + } + break; + case 2: + CHARSET_ENCODING_TABLE(cs) = make_char_code_table (Qnil); + for (i = 0; i < XVECTOR_LENGTH (table); i++) + { + Lisp_Object v = XVECTOR_DATA(table)[i]; + + if (VECTORP (v)) + { + size_t j; + + if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs)) + { + CHARSET_DECODING_TABLE(cs) = old_table; + args_out_of_range (v, make_int (CHARSET_CHARS (cs))); + } + for (j = 0; j < XVECTOR_LENGTH (v); j++) + { + Lisp_Object c = XVECTOR_DATA(v)[j]; + + if (CHARP (c)) + put_char_code_table + (XCHAR (c), + make_int (( (i + CHARSET_BYTE_OFFSET (cs)) << 8) + | (j + CHARSET_BYTE_OFFSET (cs))), + CHARSET_ENCODING_TABLE(cs)); + } + } + else if (CHARP (v)) + put_char_code_table (XCHAR (v), + make_int (i + CHARSET_BYTE_OFFSET (cs)), + CHARSET_ENCODING_TABLE(cs)); + } + break; + } return table; } #endif @@ -1681,6 +1965,7 @@ void syms_of_mule_charset (void) { INIT_LRECORD_IMPLEMENTATION (charset); + INIT_LRECORD_IMPLEMENTATION (char_byte_table); DEFSUBR (Fcharsetp); DEFSUBR (Ffind_charset); @@ -1752,6 +2037,7 @@ syms_of_mule_charset (void) defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2"); #ifdef UTF2000 defsymbol (&Qucs_bmp, "ucs-bmp"); + defsymbol (&Qlatin_viscii, "latin-viscii"); defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower"); defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper"); defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower"); @@ -1793,10 +2079,10 @@ vars_of_mule_charset (void) chlook->charset_by_attributes[i][j][k] = Qnil; #endif - chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1; #ifdef UTF2000 - chlook->next_allocated_2_byte_leading_byte = LEADING_BYTE_CHINESE_BIG5_2 + 1; + chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE; #else + chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1; chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2; #endif @@ -1809,7 +2095,7 @@ Leading-code of private TYPE9N charset of column-width 1. #endif #ifdef UTF2000 - Vutf_2000_version = build_string("0.8 (Kami)"); + Vutf_2000_version = build_string("0.9 (KyÅ«hōji)"); DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /* Version number of UTF-2000. */ ); @@ -1817,7 +2103,7 @@ Version number of UTF-2000. Vdefault_coded_charset_priority_list = Qnil; DEFVAR_LISP ("default-coded-charset-priority-list", &Vdefault_coded_charset_priority_list /* -Default order of preferred coded-character-set. +Default order of preferred coded-character-sets. */ ); #endif } @@ -1836,12 +2122,12 @@ complex_vars_of_mule_charset (void) staticpro (&Vcharset_ucs_bmp); Vcharset_ucs_bmp = make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, - CHARSET_TYPE_256X256, 1, 0, 0, + CHARSET_TYPE_256X256, 1, 2, 0, CHARSET_LEFT_TO_RIGHT, build_string ("BMP"), build_string ("BMP"), - build_string ("BMP"), - build_string (""), + build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"), + build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"), Qnil, 0, 0xFFFF, 0, 0); #else # define MIN_CHAR_THAI 0 @@ -2081,7 +2367,7 @@ complex_vars_of_mule_charset (void) build_string ("VISCII lower"), build_string ("VISCII lower (Vietnamese)"), build_string ("VISCII lower (Vietnamese)"), - build_string ("VISCII1\\.1"), + build_string ("MULEVISCII-LOWER"), Qnil, 0, 0, 0, 32); staticpro (&Vcharset_latin_viscii_upper); Vcharset_latin_viscii_upper = @@ -2091,18 +2377,18 @@ complex_vars_of_mule_charset (void) build_string ("VISCII upper"), build_string ("VISCII upper (Vietnamese)"), build_string ("VISCII upper (Vietnamese)"), - build_string ("VISCII1\\.1"), + build_string ("MULEVISCII-UPPER"), Qnil, 0, 0, 0, 32); - /* - Fputhash (Qvietnamese_viscii_lower, Vcharset_latin_viscii_lower, - Vcharset_hash_table); - Fputhash (Qvietnamese_viscii_upper, Vcharset_latin_viscii_upper, - Vcharset_hash_table); - */ - Fdefine_charset_alias (Qvietnamese_viscii_lower, - Vcharset_latin_viscii_lower); - Fdefine_charset_alias (Qvietnamese_viscii_upper, - Vcharset_latin_viscii_upper); + staticpro (&Vcharset_latin_viscii); + Vcharset_latin_viscii = + make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, + CHARSET_TYPE_256, 1, 2, 0, + CHARSET_LEFT_TO_RIGHT, + build_string ("VISCII"), + build_string ("VISCII 1.1 (Vietnamese)"), + build_string ("VISCII 1.1 (Vietnamese)"), + build_string ("VISCII1\\.1"), + Qnil, 0, 0, 0, 0); staticpro (&Vcharset_hiragana_jisx0208); Vcharset_hiragana_jisx0208 = make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208, diff --git a/src/regex.c b/src/regex.c index f1c9bb2..b1356fa 100644 --- a/src/regex.c +++ b/src/regex.c @@ -3362,8 +3362,12 @@ compile_extended_range (re_char **p_ptr, re_char *pend, ranges entirely within the first 256 chars. */ if ((range_start >= 0x100 || range_end >= 0x100) - && CHAR_LEADING_BYTE (range_start) != - CHAR_LEADING_BYTE (range_end)) +#ifdef UTF2000 + && CHAR_CHARSET_ID (range_start) != CHAR_CHARSET_ID (range_end) +#else + && CHAR_LEADING_BYTE (range_start) != CHAR_LEADING_BYTE (range_end) +#endif + ) return REG_ERANGESPAN; /* As advertised, translations only work over the 0 - 0x7F range. diff --git a/src/text-coding.c b/src/text-coding.c index 113f726..4f40729 100644 --- a/src/text-coding.c +++ b/src/text-coding.c @@ -3240,15 +3240,16 @@ char_encode_shift_jis (struct encoding_stream *str, Emchar ch, } else { - Lisp_Object charset; + Lisp_Object charset, value; unsigned int c1, c2, s1, s2; #ifdef UTF2000 - if ( (c1 = - get_byte_from_character_table (ch, Vcharset_latin_jisx0201)) - >= 0 ) + if (INTP (value = + get_char_code_table + (ch, XCHARSET_ENCODING_TABLE (Vcharset_latin_jisx0201)))) { charset = Vcharset_latin_jisx0201; + c1 = XINT (value); c2 = 0; } else @@ -5058,7 +5059,24 @@ char_encode_iso2022 (struct encoding_stream *str, Emchar ch, } } if (reg == -1) - BREAKUP_CHAR (ch, charset, byte1, byte2); + { + Lisp_Object original_default_coded_charset_priority_list + = Vdefault_coded_charset_priority_list; + + while (!EQ (Vdefault_coded_charset_priority_list, Qnil)) + { + BREAKUP_CHAR (ch, charset, byte1, byte2); + if (XCHARSET_FINAL (charset)) + goto found; + Vdefault_coded_charset_priority_list + = Fcdr (Fmemq (XCHARSET_NAME (charset), + Vdefault_coded_charset_priority_list)); + } + BREAKUP_CHAR (ch, charset, byte1, byte2); + found: + Vdefault_coded_charset_priority_list + = original_default_coded_charset_priority_list; + } ensure_correct_direction (XCHARSET_DIRECTION (charset), codesys, dst, flags, 0); -- 1.7.10.4