X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=check-mule.el;h=85a6537a02428bb89fa12396cc87e81a7e57948d;hb=199fc2a1c41002d00203ee4604e7e3b6ef1b7f9b;hp=d0ac0db8fc93d35a425672927f2c71cc72f1f01b;hpb=228b6d8e9f977666c0487d4354450f81ca058d16;p=chise%2Ftomoyo-tools.git diff --git a/check-mule.el b/check-mule.el index d0ac0db..85a6537 100644 --- a/check-mule.el +++ b/check-mule.el @@ -1,50 +1,62 @@ -(with-current-buffer "arabic0.txt" - (goto-char (point-min)) - (let (code ucs chr ret) - (while (re-search-forward "^0x\\([0-9A-F]+\\)\t0x\\([0-9A-F]+\\)" nil t) - (setq code (string-to-int (match-string 1) 16) - ucs (string-to-int (match-string 2) 16)) - (setq chr (decode-char 'arabic-digit code)) - (if (or (setq ret (get-char-attribute chr 'ucs)) - (setq ret (get-char-attribute chr 'ucs-mule)) - (setq ret (get-char-attribute chr '=>ucs)) - (setq ret (get-char-attribute chr '->ucs))) - (unless (= ret ucs) - (put-char-attribute chr 'ucs-mule ucs) - ) - (put-char-attribute (decode-char 'ucs ucs) - 'arabic-digit code))))) +(defconst mule-charset-mapping-table-alist + '((arabic-digit "http://www.m17n.org/mule/mappings/arabic0.txt") + (arabic-1-column "http://www.m17n.org/mule/mappings/arabic1.txt") + (arabic-2-column "http://www.m17n.org/mule/mappings/arabic2.txt") + (ipa "http://www.m17n.org/mule/mappings/ipa.txt") + )) -(with-current-buffer "arabic1.txt" - (goto-char (point-min)) - (let (code ucs chr ret) - (while (re-search-forward "^0x\\([0-9A-F]+\\)\t0x\\([0-9A-F]+\\)" nil t) - (setq code (string-to-int (match-string 1) 16) - ucs (string-to-int (match-string 2) 16)) - (setq chr (decode-char 'arabic-1-column code)) - (if (or (setq ret (get-char-attribute chr 'ucs)) - (setq ret (get-char-attribute chr 'ucs-mule)) - (setq ret (get-char-attribute chr '=>ucs)) - (setq ret (get-char-attribute chr '->ucs))) - (unless (= ret ucs) - (put-char-attribute chr 'ucs-mule ucs) - ) - (put-char-attribute (decode-char 'ucs ucs) - 'arabic-1-column code))))) +(defvar mule-charset-http-url-root "~/pub/http/") -(with-current-buffer "arabic2.txt" - (goto-char (point-min)) - (let (code ucs chr ret) - (while (re-search-forward "^0x\\([0-9A-F]+\\)\t0x\\([0-9A-F]+\\)" nil t) - (setq code (string-to-int (match-string 1) 16) - ucs (string-to-int (match-string 2) 16)) - (setq chr (decode-char 'arabic-2-column code)) - (if (or (setq ret (get-char-attribute chr 'ucs)) - (setq ret (get-char-attribute chr 'ucs-mule)) - (setq ret (get-char-attribute chr '=>ucs)) - (setq ret (get-char-attribute chr '->ucs))) - (unless (= ret ucs) - (put-char-attribute chr 'ucs-mule ucs) - ) - (put-char-attribute (decode-char 'ucs ucs) - 'arabic-2-column code))))) +(defun mule-charset-url-to-filename (url) + (when (string-match "http://" url) + (expand-file-name (substring url (match-end 0)) + mule-charset-http-url-root))) + +(defun mule-charset-maybe-retrieve-url (url) + (call-process "wget" nil nil nil + "-q" "-P" (expand-file-name + mule-charset-http-url-root) "-m" url)) + +(defun mule-charset-check-mapping (coded-charset &optional url) + (unless url + (setq url + (car (cdr (assq coded-charset mule-charset-mapping-table-alist))))) + (mule-charset-maybe-retrieve-url url) + (let ((file (mule-charset-url-to-filename url)) + buf) + (if (file-exists-p file) + (progn + (setq buf (find-file-noselect file)) + (with-current-buffer buf + (goto-char (point-min)) + (let (code ucs chr ret) + (while + (re-search-forward + "^0x\\([0-9A-F]+\\)\t\\(0x[0-9A-F]+\\( 0x[0-9A-F]+\\)\\)" + nil t) + (setq code (string-to-int (match-string 1) 16) + ucs (match-string 2)) + (if (string-match " " ucs) + (define-char + (list (cons coded-charset code) + (cons '->decomposition + (mapcar (lambda (str) + (string-to-int + (substring str 2) 16)) + (split-string ucs " "))))) + (setq ucs (string-to-int ucs 16)) + (setq chr (decode-char coded-charset code)) + (if (or (setq ret (get-char-attribute chr 'ucs)) + (setq ret (get-char-attribute chr 'ucs-mule)) + (setq ret (get-char-attribute chr '=>ucs)) + (setq ret (get-char-attribute chr '->ucs))) + (unless (= ret ucs) + (put-char-attribute chr 'ucs-mule ucs)) + (put-char-attribute (decode-char 'ucs ucs) + coded-charset code)))))) + (kill-buffer buf)) + (with-current-buffer "*scratch*" + (insert (format "%s is not found!!!\n" file)))))) + +(dolist (cell mule-charset-mapping-table-alist) + (mule-charset-check-mapping (car cell)(cadr cell)))