1 ;;; mule-charset.el --- Charset functions for Mule.
3 ;; Copyright (C) 1992 Free Software Foundation, Inc.
4 ;; Copyright (C) 1995 Amdahl Corporation.
5 ;; Copyright (C) 1996 Sun Microsystems.
6 ;; Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008,
7 ;; 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018
11 ;; Keywords: i18n, mule, internal
13 ;; This file is part of XEmacs.
15 ;; XEmacs is free software; you can redistribute it and/or modify it
16 ;; under the terms of the GNU General Public License as published by
17 ;; the Free Software Foundation; either version 2, or (at your option)
20 ;; XEmacs is distributed in the hope that it will be useful, but
21 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
22 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 ;; General Public License for more details.
25 ;; You should have received a copy of the GNU General Public License
26 ;; along with XEmacs; see the file COPYING. If not, write to the
27 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
28 ;; Boston, MA 02111-1307, USA.
30 ;;; Synched up with: Not synched. API at source level synched with FSF 20.3.9.
34 ;; These functions are not compatible at the bytecode level with Emacs/Mule,
35 ;; and they never will be. -sb [1999-05-26]
39 ;;;; Classifying text according to charsets
41 (defun charsets-in-region (start end &optional buffer)
42 "Return a list of the charsets in the region between START and END.
43 BUFFER defaults to the current buffer if omitted."
49 (narrow-to-region start end)
50 (goto-char (point-min))
53 (ch (char-after (point)))
54 (charset (char-charset ch)))
55 (if (not (eq prev-charset charset))
57 (setq prev-charset charset)
58 (or (memq charset list)
59 (setq list (cons charset list))))))
63 (defun charsets-in-string (string)
64 "Return a list of the charsets in STRING."
67 prev-charset charset list)
69 (setq charset (char-charset (aref string i)))
70 (if (not (eq prev-charset charset))
72 (setq prev-charset charset)
73 (or (memq charset list)
74 (setq list (cons charset list)))))
79 ;;;; Charset accessors
81 (defun charset-iso-graphic-plane (charset)
82 "Return the `graphic' property of CHARSET.
84 (charset-property charset 'graphic))
86 (defun charset-iso-final-char (charset)
87 "Return the final byte of the ISO 2022 escape sequence designating CHARSET."
88 (charset-property charset 'final))
90 (defun charset-chars (charset)
91 "Return the number of characters per dimension of CHARSET."
92 (charset-property charset 'chars))
94 (defun charset-width (charset)
95 "Return the number of display columns per character of CHARSET.
96 This only applies to TTY mode (under X, the actual display width can
97 be automatically determined)."
98 (charset-property charset 'columns))
100 ;; #### FSFmacs returns 0
101 (defun charset-direction (charset)
102 "Return the display direction (0 for `l2r' or 1 for `r2l') of CHARSET.
103 Only left-to-right is currently implemented."
104 (if (eq (charset-property charset 'direction) 'l2r)
109 (defun charset-registry (charset)
110 "Return the registry of CHARSET.
111 This is a regular expression matching the registry field of fonts
112 that can display the characters in CHARSET."
113 (charset-property charset 'registry))
115 (defun charset-ccl-program (charset)
116 "Return the CCL program of CHARSET.
118 (charset-property charset 'ccl-program))
120 (defun charset-bytes (charset)
121 "Useless in XEmacs, returns 1."
124 (define-obsolete-function-alias 'charset-columns 'charset-width) ;; 19990409
125 (define-obsolete-function-alias 'charset-final 'charset-iso-final-char) ;; 19990409
126 (define-obsolete-function-alias 'charset-graphic 'charset-iso-graphic-plane) ;; 19990409
127 (define-obsolete-function-alias 'charset-doc-string 'charset-description) ;; 19990409
129 ;;;; Define setf methods for all settable Charset properties
131 (defsetf charset-registry set-charset-registry)
132 (defsetf charset-ccl-program set-charset-ccl-program)
134 ;;; FSF compatibility functions
135 (defun charset-after (&optional pos)
136 "Return charset of a character in current buffer at position POS.
137 If POS is nil, it defauls to the current point.
138 If POS is out of range, the value is nil."
141 (check-argument-type 'integerp pos)
142 (unless (or (< pos (point-min))
144 (char-charset (char-after pos))))
147 ;; We're not going to support this.
148 ;(defun charset-info (charset)
149 ; "Return a vector of information of CHARSET.
150 ;The elements of the vector are:
151 ; CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION,
152 ; LEADING-CODE-BASE, LEADING-CODE-EXT,
153 ; ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE,
154 ; REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION,
157 ;CHARSET-ID (integer) is the identification number of the charset.
158 ;BYTES (integer) is the length of multi-byte form of a character in
159 ; the charset: one of 1, 2, 3, and 4.
160 ;DIMENSION (integer) is the number of bytes to represent a character of
161 ;the charset: 1 or 2.
162 ;CHARS (integer) is the number of characters in a dimension: 94 or 96.
163 ;WIDTH (integer) is the number of columns a character in the charset
164 ; occupies on the screen: one of 0, 1, and 2.
165 ;DIRECTION (integer) is the rendering direction of characters in the
166 ; charset when rendering. If 0, render from left to right, else
167 ; render from right to left.
168 ;LEADING-CODE-BASE (integer) is the base leading-code for the
170 ;LEADING-CODE-EXT (integer) is the extended leading-code for the
171 ; charset. All charsets of less than 0xA0 has the value 0.
172 ;ISO-FINAL-CHAR (character) is the final character of the
173 ; corresponding ISO 2022 charset.
174 ;ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
175 ; while encoding to variants of ISO 2022 coding system, one of the
176 ; following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
177 ;REVERSE-CHARSET (integer) is the charset which differs only in
178 ; LEFT-TO-RIGHT value from the charset. If there's no such a
179 ; charset, the value is -1.
180 ;SHORT-NAME (string) is the short name to refer to the charset.
181 ;LONG-NAME (string) is the long name to refer to the charset
182 ;DESCRIPTION (string) is the description string of the charset.
183 ;PLIST (property list) may contain any type of information a user
184 ; want to put and get by functions `put-charset-property' and
185 ; `get-charset-property' respectively."
187 ; (charset-id charset)
189 ; (charset-dimension charset)
190 ; (charset-chars charset)
191 ; (charset-width charset)
192 ; (charset-direction charset)
193 ; nil ;; (charset-leading-code-base (charset))
194 ; nil ;; (charset-leading-code-ext (charset))
195 ; (charset-iso-final-char charset)
196 ; (charset-iso-graphic-plane charset)
198 ; (charset-short-name charset)
199 ; (charset-long-name charset)
200 ; (charset-description charset)
201 ; (charset-plist charset)))
203 ;(make-compatible 'charset-info "Don't use this if you can help it.")
205 (defun define-charset (charset-id charset property-vector)
206 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.
207 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is
208 treated as a private charset.
209 INFO-VECTOR is a vector of the format:
210 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
211 SHORT-NAME LONG-NAME DESCRIPTION]
212 The meanings of each elements is as follows:
213 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
214 CHARS (integer) is the number of characters in a dimension: 94 or 96.
215 WIDTH (integer) is the number of columns a character in the charset
216 occupies on the screen: one of 0, 1, and 2.
218 DIRECTION (integer) is the rendering direction of characters in the
219 charset when rendering. If 0, render from left to right, else
220 render from right to left.
222 ISO-FINAL-CHAR (character) is the final character of the
223 corresponding ISO 2022 charset.
225 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
226 while encoding to variants of ISO 2022 coding system, one of the
227 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
230 SHORT-NAME (string) is the short name to refer to the charset.
232 LONG-NAME (string) is the long name to refer to the charset.
234 DESCRIPTION (string) is the description string of the charset."
235 (make-charset charset (aref property-vector 8)
237 'short-name (aref property-vector 6)
238 'long-name (aref property-vector 7)
239 'dimension (aref property-vector 0)
240 'columns (aref property-vector 2)
241 'chars (aref property-vector 1)
242 'final (aref property-vector 4)
243 'graphic (aref property-vector 5)
244 'direction (aref property-vector 3))))
246 (make-compatible 'define-charset "")
250 (defalias 'get-charset-property 'get)
251 (defalias 'put-charset-property 'put)
252 (defalias 'charset-plist 'object-plist)
253 (defalias 'set-charset-plist 'setplist)
255 ;; Setup auto-fill-chars for charsets that should invoke auto-filling.
256 ;; SPACE and NEWLIE are already set.
257 ;; (let ((l '(katakana-jisx0201
258 ;; japanese-jisx0208 japanese-jisx0212
259 ;; chinese-gb2312 chinese-big5-1 chinese-big5-2)))
261 ;; (put-char-table (car l) t auto-fill-chars)
262 ;; (setq l (cdr l))))
265 ;;; @ Coded character set
268 (when (featurep 'utf-2000)
269 (setq default-coded-charset-priority-list
283 =ucs-radicals@unicode
285 =ucs-bmp-cjk@JP/hanazono
292 japanese-jisx0208-1978
320 =hanyo-denshi/tk/mf-01
421 =ucs-bmp-cjk-compat@unicode
424 ==ucs-radicals@unicode
425 ==ucs-bmp-cjk@JP/hanazono
430 ==ucs-sip@JP/hanazono
451 ==big5-cdp-itaiji-001
469 ==hanyo-denshi/tk/mf-01
473 ==ucs-bmp-cjk-compat@JP
474 ==ucs-bmp-cjk-compat@gb
489 ;; ===hanyo-denshi/ja
560 ===ucs-bmp-cjk-compat@unicode
573 =>big5-cdp-itaiji-001
590 =>ucs-bmp-cjk@unicode
591 =>ucs-bmp-cjk@cognate
592 =>ucs-bmp-cjk@component
597 =+>ucs-bmp-cjk@unicode
598 =+>ucs-bmp-cjk-compat@jis
623 ;;; mule-charset.el ends here