1 ;;; mm-util.el --- Utility functions for MIME things
2 ;; Copyright (C) 1998,99 Free Software Foundation, Inc.
4 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
5 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
6 ;; This file is part of GNU Emacs.
8 ;; GNU Emacs is free software; you can redistribute it and/or modify
9 ;; it under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; GNU Emacs is distributed in the hope that it will be useful,
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;; GNU General Public License for more details.
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with GNU Emacs; see the file COPYING. If not, write to the
20 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 ;; Boston, MA 02111-1307, USA.
27 (defconst mm-running-xemacs (string-match "XEmacs" emacs-version))
29 (defconst mm-binary-coding-system
31 'binary 'no-conversion)
32 "100% binary coding system.")
34 (defconst mm-text-coding-system
35 (and (fboundp 'coding-system-list)
36 (if (memq system-type '(windows-nt ms-dos ms-windows))
37 'raw-text-dos 'raw-text))
38 "Text-safe coding system (For removing ^M).")
40 (defvar mm-mime-mule-charset-alist
42 (iso-8859-1 latin-iso8859-1)
43 (iso-8859-2 latin-iso8859-2)
44 (iso-8859-3 latin-iso8859-3)
45 (iso-8859-4 latin-iso8859-4)
46 (iso-8859-5 cyrillic-iso8859-5)
47 (koi8-r cyrillic-iso8859-5)
48 (iso-8859-6 arabic-iso8859-6)
49 (iso-8859-7 greek-iso8859-7)
50 (iso-8859-8 hebrew-iso8859-8)
51 (iso-8859-9 latin-iso8859-9)
52 (viscii vietnamese-viscii-lower)
53 (iso-2022-jp latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978)
54 (euc-kr korean-ksc5601)
55 (cn-gb-2312 chinese-gb2312)
56 (cn-big5 chinese-big5-1 chinese-big5-2)
58 (thai-tis620 thai-tis620)
59 (iso-2022-7bit ethiopic arabic-1-column arabic-2-column)
60 (iso-2022-jp-2 latin-iso8859-1 greek-iso8859-7
61 latin-jisx0201 japanese-jisx0208-1978
62 chinese-gb2312 japanese-jisx0208
63 korean-ksc5601 japanese-jisx0212
65 (iso-2022-int-1 latin-iso8859-1 greek-iso8859-7
66 latin-jisx0201 japanese-jisx0208-1978
67 chinese-gb2312 japanese-jisx0208
68 korean-ksc5601 japanese-jisx0212
69 chinese-cns11643-1 chinese-cns11643-2)
70 (iso-2022-int-1 latin-iso8859-1 latin-iso8859-2
71 cyrillic-iso8859-5 greek-iso8859-7
72 latin-jisx0201 japanese-jisx0208-1978
73 chinese-gb2312 japanese-jisx0208
74 korean-ksc5601 japanese-jisx0212
75 chinese-cns11643-1 chinese-cns11643-2
76 chinese-cns11643-3 chinese-cns11643-4
77 chinese-cns11643-5 chinese-cns11643-6
79 "Alist of MIME-charset/MULE-charsets.")
85 (let ((nfunc (intern (format "mm-%s" (car elem)))))
86 (if (fboundp (car elem))
87 (fset nfunc (car elem))
88 (fset nfunc (cdr elem)))))
89 '((decode-coding-string . (lambda (s a) s))
90 (encode-coding-string . (lambda (s a) s))
91 (encode-coding-region . ignore)
92 (coding-system-list . ignore)
93 (decode-coding-region . ignore)
95 (device-type . ignore)
96 (coding-system-equal . equal)
97 (annotationp . ignore)
98 (set-buffer-file-coding-system . ignore)
100 . (lambda (charset int)
104 "Prompt the user for a coding system."
106 prompt (mapcar (lambda (s) (list (symbol-name (car s))))
107 mm-mime-mule-charset-alist)))))))
109 (defvar mm-coding-system-list nil)
110 (defun mm-get-coding-system-list ()
111 "Get the coding system list."
112 (or mm-coding-system-list
113 (setq mm-coding-system-list (mm-coding-system-list))))
115 (defvar mm-charset-synonym-alist
117 (gb2312 . cn-gb-2312)
119 "A mapping from invalid charset names to the real charset names.")
121 ;;; Internal variables:
125 (defun mm-mule-charset-to-mime-charset (charset)
126 "Return the MIME charset corresponding to MULE CHARSET."
127 (let ((alist mm-mime-mule-charset-alist)
130 (when (memq charset (cdar alist))
131 (setq out (caar alist)
136 (defun mm-charset-to-coding-system (charset &optional lbt)
137 "Return coding-system corresponding to CHARSET.
138 CHARSET is a symbol naming a MIME charset.
139 If optional argument LBT (`unix', `dos' or `mac') is specified, it is
140 used as the line break code type of the coding system."
141 (when (stringp charset)
142 (setq charset (intern (downcase charset))))
144 (or (cdr (assq charset mm-charset-synonym-alist))
147 (setq charset (intern (format "%s-%s" charset lbt))))
149 ;; Running in a non-MULE environment.
150 ((null (mm-get-coding-system-list))
153 ((eq charset 'us-ascii)
155 ;; Check to see whether we can handle this charset.
156 ((memq charset (mm-get-coding-system-list))
162 (defun mm-replace-chars-in-string (string from to)
163 "Replace characters in STRING from FROM to TO."
164 (let ((string (substring string 0)) ;Copy string.
165 (len (length string))
167 ;; Replace all occurrences of FROM with TO.
169 (when (= (aref string idx) from)
170 (aset string idx to))
174 (defsubst mm-enable-multibyte ()
175 "Enable multibyte in the current buffer."
176 (when (and (fboundp 'set-buffer-multibyte)
177 (default-value 'enable-multibyte-characters))
178 (set-buffer-multibyte t)))
180 (defsubst mm-disable-multibyte ()
181 "Disable multibyte in the current buffer."
182 (when (fboundp 'set-buffer-multibyte)
183 (set-buffer-multibyte nil)))
185 (defun mm-preferred-coding-system (charset)
186 ;; A typo in some Emacs versions.
187 (or (get-charset-property charset 'prefered-coding-system)
188 (get-charset-property charset 'preffered-coding-system)))
190 (defun mm-mime-charset (charset)
191 "Return the MIME charset corresponding to the MULE CHARSET."
192 (if (fboundp 'coding-system-get)
193 ;; This exists in Emacs 20.
195 (and (mm-preferred-coding-system charset)
197 (mm-preferred-coding-system charset) 'mime-charset))
198 (and (eq charset 'ascii)
200 (mm-preferred-coding-system charset)
201 (mm-mule-charset-to-mime-charset charset))
202 ;; This is for XEmacs.
203 (mm-mule-charset-to-mime-charset charset)))
205 (defun mm-find-mime-charset-region (b e)
206 "Return the MIME charsets needed to encode the region between B and E."
208 (mapcar 'mm-mime-charset
210 (mm-find-charset-region b e)))))
211 (when (memq 'iso-2022-jp-2 charsets)
212 (setq charsets (delq 'iso-2022-jp charsets)))
213 (delete-duplicates charsets)))
215 (defsubst mm-multibyte-p ()
216 "Say whether multibyte is enabled."
217 (and (boundp 'enable-multibyte-characters)
218 enable-multibyte-characters))
220 (defmacro mm-with-unibyte-buffer (&rest forms)
221 "Create a temporary buffer, and evaluate FORMS there like `progn'.
222 See also `with-temp-file' and `with-output-to-string'."
223 (let ((temp-buffer (make-symbol "temp-buffer"))
224 (multibyte (make-symbol "multibyte")))
225 `(if (not (boundp 'enable-multibyte-characters))
226 (with-temp-buffer ,@forms)
227 (let ((,multibyte (default-value 'enable-multibyte-characters))
231 (setq-default enable-multibyte-characters nil)
233 (get-buffer-create (generate-new-buffer-name " *temp*")))
235 (with-current-buffer ,temp-buffer
236 (let ((buffer-file-coding-system mm-binary-coding-system)
237 (coding-system-for-read mm-binary-coding-system)
238 (coding-system-for-write mm-binary-coding-system))
240 (and (buffer-name ,temp-buffer)
241 (kill-buffer ,temp-buffer))))
242 (setq-default enable-multibyte-characters ,multibyte))))))
243 (put 'mm-with-unibyte-buffer 'lisp-indent-function 0)
244 (put 'mm-with-unibyte-buffer 'edebug-form-spec '(body))
246 (defun mm-find-charset-region (b e)
247 "Return a list of charsets in the region."
249 ((and (boundp 'enable-multibyte-characters)
250 enable-multibyte-characters
251 (fboundp 'find-charset-region))
252 (find-charset-region b e))
253 ((not (boundp 'current-language-environment))
256 (narrow-to-region b e)
257 (goto-char (point-min))
258 (skip-chars-forward "\0-\177")
261 (delq nil (list 'ascii mail-parse-charset))))))
263 ;; We are in a unibyte buffer, so we futz around a bit.
266 (narrow-to-region b e)
267 (goto-char (point-min))
268 (let ((entry (assoc (capitalize current-language-environment)
269 language-info-alist)))
270 (skip-chars-forward "\0-\177")
273 (list 'ascii (car (last (assq 'charset entry)))))))))))
275 (defun mm-read-charset (prompt)
280 (mapcar (lambda (e) (list (symbol-name (car e))))
281 mm-mime-mule-charset-alist)
284 (defun mm-quote-arg (arg)
285 "Return a version of ARG that is safe to evaluate in a shell."
286 (let ((pos 0) new-pos accum)
287 ;; *** bug: we don't handle newline characters properly
288 (while (setq new-pos (string-match "[]*[;!'`\"$\\& \t{} |()<>]" arg pos))
289 (push (substring arg pos new-pos) accum)
291 (push (list (aref arg new-pos)) accum)
292 (setq pos (1+ new-pos)))
295 (apply 'concat (nconc (nreverse accum) (list (substring arg pos)))))))
297 (defun mm-auto-mode-alist ()
298 "Return an `auto-mode-alist' with only the .gz (etc) thingies."
299 (let ((alist auto-mode-alist)
302 (when (listp (cdar alist))
303 (push (car alist) out))
307 (defun mm-insert-file-contents (filename &optional visit beg end replace)
308 "Like `insert-file-contents', q.v., but only reads in the file.
309 A buffer may be modified in several ways after reading into the buffer due
310 to advanced Emacs features, such as file-name-handlers, format decoding,
311 find-file-hooks, etc.
312 This function ensures that none of these modifications will take place."
313 (let ((format-alist nil)
314 (auto-mode-alist (mm-auto-mode-alist))
315 (default-major-mode 'fundamental-mode)
316 (enable-local-variables nil)
317 (after-insert-file-functions nil)
318 (enable-local-eval nil)
319 (find-file-hooks nil))
320 (insert-file-contents filename visit beg end replace)))
324 ;;; mm-util.el ends here