1 ;;; mule-category.el --- category functions for XEmacs/Mule.
3 ;; Copyright (C) 1992,93,94,95 Free Software Foundation, Inc.
4 ;; Copyright (C) 1995, 1997, 1999 Electrotechnical Laboratory, JAPAN.
5 ;; Licensed to the Free Software Foundation.
6 ;; Copyright (C) 1995 Amdahl Corporation.
7 ;; Copyright (C) 1995 Sun Microsystems.
9 ;; This file is part of XEmacs.
11 ;; XEmacs is free software; you can redistribute it and/or modify it
12 ;; under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
16 ;; XEmacs is distributed in the hope that it will be useful, but
17 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 ;; General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
28 ;; Functions for working with category tables, which are a particular
29 ;; type of char table. Some function names / arguments should be
30 ;; parallel with syntax tables.
32 ;; Written by Ben Wing <ben@xemacs.org>. The initialization code
33 ;; at the end of this file comes from Mule.
34 ;; Some bugfixes by Jareth Hein <jhod@po.iijnet.or.jp>
38 (defvar defined-category-hashtable (make-hash-table :size 50))
40 (defun define-category (designator doc-string)
41 "Make a new category whose designator is DESIGNATOR.
42 DESIGNATOR should be a visible letter of ' ' thru '~'.
43 STRING is a doc string for the category.
44 Letters of 'a' thru 'z' are already used or kept for the system."
45 (check-argument-type 'category-designator-p designator)
46 (check-argument-type 'stringp doc-string)
47 (puthash designator doc-string defined-category-hashtable))
49 (defun undefine-category (designator)
50 "Undefine DESIGNATOR as a designator for a category."
51 (check-argument-type 'category-designator-p designator)
52 (remhash designator defined-category-hashtable))
54 (defun defined-category-p (designator)
55 "Return non-nil if DESIGNATOR is a designator for a defined category."
56 (and (category-designator-p designator)
57 (gethash designator defined-category-hashtable)))
59 (defun defined-category-list ()
60 "Return a list of the currently defined categories.
61 Categories are given by their designators."
63 (maphash #'(lambda (key value)
64 (setq list (cons key list)))
65 defined-category-hashtable)
68 (defun undefined-category-designator ()
69 "Return an undefined category designator, or nil if there are none."
71 (while (and (< a 127) (not found))
72 (unless (gethash a defined-category-hashtable)
73 (setq found (make-char 'ascii a)))
77 (defun category-doc-string (designator)
78 "Return the doc-string for the category denoted by DESIGNATOR."
79 (check-argument-type 'defined-category-p designator)
80 (gethash designator defined-category-hashtable))
82 (defun modify-category-entry (char-range designator &optional category-table reset)
83 "Add a category to the categories associated with CHAR-RANGE.
84 CHAR-RANGE is a single character or a range of characters,
85 as per `put-char-table'.
86 The category is given by a designator character.
87 The changes are made in CATEGORY-TABLE, which defaults to the current
88 buffer's category table.
89 If optional fourth argument RESET is non-nil, previous categories associated
90 with CHAR-RANGE are removed before adding the specified category."
91 (or category-table (setq category-table (category-table)))
92 (check-argument-type 'category-table-p category-table)
93 (check-argument-type 'defined-category-p designator)
95 ;; clear all existing stuff.
96 (put-char-table char-range nil category-table))
99 ;; make sure that this range has a bit-vector assigned to it
100 (if (not (bit-vector-p value))
101 (setq value (make-bit-vector 95 0))
102 (setq value (copy-sequence value)))
103 ;; set the appropriate bit in that vector.
104 (aset value (- designator 32) 1)
105 ;; put the vector back, thus assuring we have a unique setting for this range
106 (put-char-table key value category-table))
107 category-table char-range))
109 (defun char-category-list (character &optional category-table)
110 "Return a list of the categories that CHARACTER is in.
111 CATEGORY-TABLE defaults to the current buffer's category table.
112 The categories are given by their designators."
113 (or category-table (setq category-table (category-table)))
114 (check-argument-type 'category-table-p category-table)
115 (let ((vec (get-char-table character category-table)))
119 (if (= 1 (aref vec (- a 32)))
120 (setq list (cons (make-char 'ascii a) list)))
124 ;; implemented in C, file chartab.c (97/3/14 jhod@po.iijnet.or.jp)
125 ;(defun char-in-category-p (char category &optional table)
126 ; "Return non-nil if CHAR is in CATEGORY.
127 ;TABLE defaults to the current buffer's category table.
128 ;Categories are specified by their designators."
129 ; (or table (setq table (category-table)))
130 ; (check-argument-type 'category-table-p table)
131 ; (check-argument-type 'category-designator-p category)
132 ; (let ((vec (get-char-table char table)))
134 ; (= 1 (aref vec (- category 32))))))
136 (defun describe-category ()
137 "Describe the category specifications in the category table.
138 The descriptions are inserted in a buffer, which is then displayed."
140 (with-displaying-help-buffer
142 (describe-category-table (category-table) standard-output))))
144 (defun describe-category-table (table stream)
149 (lambda (first last value stream)
150 (if (and (bit-vector-p value)
151 (> (reduce '+ value) 0))
153 (if (equal first last)
154 (cond ((vectorp first)
155 (princ (format "%s, row %d"
161 (princ (charset-name first) stream))
162 (t (princ first stream)))
163 (cond ((vectorp first)
164 (princ (format "%s, rows %d .. %d"
171 (princ (format "%s .. %s" first last)
173 (describe-category-code value stream))))))
175 (lambda (range value)
177 (and (characterp range)
178 (characterp first-char)
179 (eq (char-charset range) (char-charset first-char))
180 (= (char-to-int last-char) (1- (char-to-int range))))
183 (eq (aref range 0) (aref first-char 0))
184 (= (aref last-char 1) (1- (aref range 1))))
185 (equal value prev-val)))
186 (setq last-char range)
189 (funcall describe-one first-char last-char prev-val stream)
190 (setq first-char nil)))
191 (funcall describe-one range range value stream))
195 (funcall describe-one first-char last-char prev-val stream))))
197 (defun describe-category-code (code stream)
198 (let ((standard-output (or stream standard-output)))
199 (princ "\tin categories: ")
200 (if (not (bit-vector-p code))
205 (if (= 1 (aref code i))
207 (if (not already-matched)
208 (setq already-matched t)
210 (princ (int-to-char (+ 32 i)))))
212 (if (not already-matched)
216 (if (= 1 (aref code i))
217 (princ (format "\n\t\tmeaning: %s"
218 (category-doc-string (int-to-char (+ 32 i))))))
222 (defconst predefined-category-list
223 '((latin-iso8859-1 ?l "Latin-1 through Latin-5 character set")
228 (cyrillic-iso8859-5 ?y "Cyrillic character set")
229 (arabic-iso8859-6 ?b "Arabic character set")
230 (greek-iso8859-7 ?g "Greek character set")
231 (hebrew-iso8859-8 ?w "Hebrew character set")
232 (katakana-jisx0201 ?k "Japanese 1-byte Katakana character set")
233 (latin-jisx0201 ?r "Japanese 1-byte Roman character set")
234 (japanese-jisx0208-1978 ?j "Japanese 2-byte character set (old)")
235 (japanese-jisx0208 ?j "Japanese 2-byte character set")
236 (japanese-jisx0212 ?j)
237 (chinese-gb2312 ?c "Chinese GB (China, PRC) 2-byte character set")
238 (chinese-cns11643-1 ?t "Chinese Taiwan (CNS or Big5) 2-byte character set")
239 (chinese-cns11643-2 ?t)
242 (korean-ksc5601 ?h "Hangul (Korean) 2-byte character set")
244 "List of predefined categories.
245 Each element is a list of a charset, a designator, and maybe a doc string.")
248 (define-category ?a "ASCII character set.")
249 (define-category ?l "Latin-1 through Latin-5 character set")
252 (modify-category-entry i ?a)
253 (modify-category-entry i ?l)
255 (setq l predefined-category-list)
257 (if (and (nth 2 (car l))
258 (not (defined-category-p (nth 2 (car l)))))
259 (define-category (nth 1 (car l)) (nth 2 (car l))))
260 (modify-category-entry (car (car l)) (nth 1 (car l)))
263 ;;; Setting word boundary.
265 (setq word-combining-categories
268 (setq word-separating-categories ; (2-byte character sets)
269 '((?A . ?K) ; Alpha numeric - Katakana
270 (?A . ?C) ; Alpha numeric - Chinese
271 (?H . ?A) ; Hiragana - Alpha numeric
272 (?H . ?K) ; Hiragana - Katakana
273 (?H . ?C) ; Hiragana - Chinese
274 (?K . ?A) ; Katakana - Alpha numeric
275 (?K . ?C) ; Katakana - Chinese
276 (?C . ?A) ; Chinese - Alpha numeric
277 (?C . ?K) ; Chinese - Katakana
280 ;;; At the present, I know Japanese and Chinese text can
281 ;;; break line at any point under a restriction of 'kinsoku'.
282 (defvar word-across-newline "\\(\\cj\\|\\cc\\|\\ct\\)"
283 "Regular expression of such characters which can be a word across newline.")
285 (defvar ascii-char "[\40-\176]")
286 (defvar ascii-space "[ \t]")
287 (defvar ascii-symbols "[\40-\57\72-\100\133-\140\173-\176]")
288 (defvar ascii-numeric "[\60-\71]")
289 (defvar ascii-English-Upper "[\101-\132]")
290 (defvar ascii-English-Lower "[\141-\172]")
291 (defvar ascii-alphanumeric "[\60-\71\101-\132\141-\172]")
293 (defvar kanji-char "\\cj")
294 (defvar kanji-space "
\e$B!!
\e(B")
295 (defvar kanji-symbols "\\cS")
296 (defvar kanji-numeric "[
\e$B#0
\e(B-
\e$B#9
\e(B]")
297 (defvar kanji-English-Upper "[
\e$B#A
\e(B-
\e$B#Z
\e(B]")
298 (defvar kanji-English-Lower "[
\e$B#a
\e(B-
\e$B#z
\e(B]")
299 (defvar kanji-hiragana "\\cH")
300 (defvar kanji-katakana "\\cK")
301 (defvar kanji-Greek-Upper "[
\e$B&!
\e(B-
\e$B&8
\e(B]")
302 (defvar kanji-Greek-Lower "[
\e$B&A
\e(B-
\e$B&X
\e(B]")
303 (defvar kanji-Russian-Upper "[
\e$B'!
\e(B-
\e$B'A
\e(B]")
304 (defvar kanji-Russian-Lower "[
\e$B'Q
\e(B-
\e$B'q
\e(B]")
305 (defvar kanji-Kanji-1st-Level "[
\e$B0!
\e(B-
\e$BOS
\e(B]")
306 (defvar kanji-Kanji-2nd-Level "[
\e$BP!
\e(B-
\e$Bt$
\e(B]")
308 (defvar kanji-kanji-char "\\(\\cH\\|\\cK\\|\\cC\\)")