1998-08-16 Tanaka Akira <akr@jaist.ac.jp>
+ * FLIM-ELS (flim-modules): Add `closure', `natset', `digraph',
+ `automata', `rx', `lex', `lr-driver', `ew-util', `ew-line',
+ `ew-quote', `ew-unit', `ew-data', `ew-parse', `ew-scan-s',
+ `ew-scan-m', `ew-scan-u' and `ew-dec'.
+
+ * eword-decode.el: Require 'ew-dec.
+ (eword-decode-header): Use `ew-decode-field'.
+ (eword-decode-and-unfold-structure): Ditto.
+ (eword-decode-structured-field-body): Ditto.
+ (eword-decode-unstructured-field-body): Ditto.
+
+ * automata.el, closure.el, digraph.el, ew-data.el, ew-dec.el,
+ ew-line.el, parser, ew-quote.el, ew-scan-m.el, ew-scan-s.el,
+ ew-scan-u.el, ew-unit.el, ew-util.el, lex.el, lr-driver.el,
+ natset.el, rx.el: New files
+
+1998-08-16 Tanaka Akira <akr@jaist.ac.jp>
+
+ * DOODLE: branched from flam-1_9_1
+
+1998-08-16 Tanaka Akira <akr@jaist.ac.jp>
+
* mime-def.el (mime-library-version-string): bump up to 1.9.1.
1998-07-22 Tanaka Akira <akr@jaist.ac.jp>
eword-decode eword-encode
mime mime-parse mmbuffer mmcooked
;; mime-lib
- mailcap))
+ mailcap
+ closure
+ natset
+ digraph
+ automata rx lex
+ lr-driver
+ ew-util
+ ew-line
+ ew-quote
+ ew-unit
+ ew-data
+ ew-parse
+ ew-scan-s
+ ew-scan-m
+ ew-scan-u
+ ew-dec
+ ))
(if (fboundp 'dynamic-link)
(setq flim-modules (cons 'mel-dl flim-modules))
--- /dev/null
+
+(require 'digraph)
+(require 'natset)
+(provide 'automata)
+
+(defvar automata-never-fail (make-symbol "automata-never-fail"))
+(defvar automata-state-var (make-symbol "automata-state"))
+
+(defmacro automata (in-var start-tag &rest clauses)
+ (let* ((org-len (length clauses))
+ (org-graph (make-vector org-len nil))
+ (tag-to-org-alist nil)
+ forest org-to-forest forest-to-org
+ i j tmp trans)
+ (setq tmp clauses
+ i 0)
+ (while tmp
+ (setq tag-to-org-alist (cons (cons (caar tmp) i) tag-to-org-alist)
+ i (1+ i)
+ tmp (cdr tmp)))
+ (setq tmp clauses
+ i 0)
+ (while tmp
+ (setq trans (cddr (cdar tmp)))
+ (while trans
+ (setq j (cdr (assoc (cadr (car trans)) tag-to-org-alist)))
+ (if (not (member j (aref org-graph i)))
+ (aset org-graph i (cons j (aref org-graph i))))
+ (setq trans (cdr trans)))
+ (setq i (1+ i)
+ tmp (cdr tmp)))
+ ;;(error "%s" org-graph)
+ (setq tmp (digraph-split-as-forest org-graph)
+ forest (aref tmp 0)
+ org-to-forest (aref tmp 1)
+ forest-to-org (aref tmp 2))
+ (setq clauses
+ (mapcar
+ (lambda (c)
+ (let ((tag (car c))
+ (action (cadr c))
+ (fail (nth 2 c))
+ (trs (nthcdr 3 c)))
+ (setq trs
+ (mapcar
+ (lambda (next)
+ (list
+ (apply
+ 'natset-union
+ (mapcar
+ (lambda (tr) (if (equal (cadr tr) next) (car tr) (natset-empty)))
+ trs))
+ next))
+ (natset-enum (apply 'natset-single (mapcar 'cadr trs)))))
+ (cons tag (cons action (cons fail trs)))))
+ clauses))
+ `(let ((,automata-state-var ,(cdr (assoc start-tag tag-to-org-alist))))
+ ,@(automata-exp-seq
+ (automata-gen-state
+ in-var clauses
+ org-len
+ (list (cdr (assoc start-tag tag-to-org-alist)))
+ tag-to-org-alist
+ (aref org-to-forest (cdr (assoc start-tag tag-to-org-alist)))
+ forest org-to-forest forest-to-org)))))
+
+(defun automata-gen-state (in-var clauses
+ org-len
+ start-states
+ tag-to-org-alist
+ forest-state forest org-to-forest forest-to-org)
+ (let* ((org-states (aref forest-to-org forest-state))
+ (forest-states (digraph-descents forest forest-state))
+ (branch-length (+ (length org-states) (length forest-states)))
+ (branch-to-org
+ (vconcat
+ (mapcar 'list org-states)
+ (mapcar (lambda (forest-state)
+ (aref forest-to-org forest-state))
+ forest-states)))
+ (org-to-branch
+ (let ((org-to-branch (make-vector org-len nil))
+ (i 0) tmp)
+ (while (< i branch-length)
+ (setq tmp (aref branch-to-org i))
+ (while tmp
+ (aset org-to-branch (car tmp) i)
+ (setq tmp (cdr tmp)))
+ (setq i (1+ i)))
+ org-to-branch))
+ (branch-to-forest
+ (vconcat
+ (make-list (length org-states) nil)
+ forest-states))
+ (branch-state-range
+ (vconcat
+ (mapcar 'natset-single org-states)
+ (mapcar (lambda (forest-state)
+ (apply 'natset-single
+ (aref forest-to-org forest-state)))
+ forest-states)))
+ (branch-descents
+ (vconcat
+ (mapcar (lambda (org-state)
+ (let* ((c (nth org-state clauses))
+ (trs (nthcdr 3 c)))
+ (apply 'natset-union
+ (mapcar (lambda (tr)
+ (natset-single
+ (cdr (assoc (cadr tr) tag-to-org-alist))))
+ trs))))
+ org-states)
+ (mapcar (lambda (forest-state) ())
+ forest-states)))
+ (all-descents (apply 'natset-union (append branch-descents ())))
+ (branch-ascents
+ (let* ((branch-ascents (make-vector branch-length 0))
+ (i 0) j)
+ (while (< i branch-length)
+ (setq j 0)
+ (while (< j branch-length)
+ (if (natset-has-intersection-p (aref branch-state-range i)
+ (aref branch-descents j))
+ (aset branch-ascents i
+ (1+ (aref branch-ascents i))))
+ (setq j (1+ j)))
+ (setq i (1+ i)))
+ branch-ascents))
+ (start-inline nil)
+ (branch-inline
+ (let* ((branch-inline (make-vector branch-length nil))
+ (start-ns (apply 'natset-single start-states))
+ (i 0))
+ (while (< i branch-length)
+ (if (natset-has-intersection-p (aref branch-state-range i) start-ns)
+ (if (and (= (length start-states) 1)
+ (= (aref branch-ascents i) 0))
+ (progn
+ (setq start-inline i)
+ (aset branch-inline i t))
+ nil)
+ (if (= (aref branch-ascents i) 1)
+ (aset branch-inline i t)))
+ (setq i (1+ i)))
+ branch-inline))
+ (branch-gen nil)
+ (_
+ (setq branch-gen
+ (vconcat
+ (mapcar
+ (lambda (org-state)
+ (cons
+ (lambda (org-state)
+ `(,(natset-single org-state)
+ nil ; don't duplicate.
+ ,@(let* ((c (nth org-state clauses)))
+ (automata-exp-seq
+ (nth 1 c) ; action
+ (if (null (nthcdr 3 c))
+ (nth 2 c)
+ `(automata-branch
+ ,in-var ; input variable
+ ,(natset-full) ; input is unpredictable.
+ ,(nth 2 c) ; fail action
+ ,@(let ((trs (nthcdr 3 c)))
+ (mapcar
+ (lambda (next-branch)
+ (let ((input-range
+ (apply 'natset-union
+ (mapcar
+ (lambda (tr)
+ (if (member (cdr (assoc (cadr tr) tag-to-org-alist))
+ (aref branch-to-org next-branch))
+ (car tr)
+ (natset-empty)))
+ trs))))
+ `(,input-range ; input range
+ ,(not (aref branch-inline next-branch)) ; duplicatable unless inlining.
+ ,(let ((goto-list (apply
+ 'append
+ (mapcar
+ (lambda (tr)
+ (let ((range (natset-intersection (car tr) input-range)))
+ (if range
+ `((,range
+ t
+ (automata-goto
+ ,automata-state-var
+ ,org-state
+ ,(cdr (assoc (cadr tr) tag-to-org-alist)))))
+ ())))
+ trs))))
+ (if (= (length goto-list) 1)
+ (car (cddr (car goto-list)))
+ `(automata-branch
+ ,in-var
+ ,input-range
+ ,automata-never-fail
+ ,@goto-list)))
+ ,@(if (aref branch-inline next-branch)
+ (automata-exp-seq
+ `(progn ,@(cddr (funcall (car (aref branch-gen next-branch))
+ (cdr (aref branch-gen next-branch))))))
+ ()))))
+ (natset-enum
+ (apply 'natset-union
+ (mapcar
+ (lambda (tr)
+ (natset-single
+ (aref org-to-branch
+ (cdr (assoc (cadr tr) tag-to-org-alist)))))
+ trs))))
+ )))))))
+ org-state))
+ org-states)
+ (mapcar
+ (lambda (forest-state)
+ (cons
+ (lambda (forest-state)
+ `(,(natset-intersection
+ (apply 'natset-single (aref forest-to-org forest-state))
+ all-descents) ; state range
+ nil ; don't duplicate.
+ ,@(automata-exp-seq
+ (automata-gen-state
+ in-var clauses
+ org-len
+ (aref forest-to-org forest-state)
+ tag-to-org-alist
+ forest-state forest org-to-forest forest-to-org))))
+ forest-state))
+ forest-states))))
+ (branches
+ (let* ((branches ())
+ (i branch-length))
+ (while (< 0 i)
+ (setq i (1- i))
+ (if (not (aref branch-inline i))
+ (setq branches
+ (cons
+ (funcall (car (aref branch-gen i))
+ (cdr (aref branch-gen i)))
+ branches))))
+ branches))
+ )
+ ;;(error "err")
+ (if start-inline
+ (apply
+ 'automata-seq-exp
+ `(progn ,@(cddr (funcall (car (aref branch-gen start-inline))
+ (cdr (aref branch-gen start-inline)))))
+ (cond
+ ((null branches) ())
+ ((null (cdr branches))
+ (cddr (car branches)))
+ (t
+ `((while t
+ (automata-branch
+ ,automata-state-var ,(natset-full) ,automata-never-fail
+ ,@branches))))))
+ (if (= (length branches) 1)
+ `(while t ,@(cddr (car branches)))
+ `(while t ; ,branch-inline ,branch-state-range ,branch-descents ,branch-ascents
+ (automata-branch
+ ,automata-state-var ,(natset-full) ,automata-never-fail
+ ,@branches))))))
+
+(defun automata-seq-exp (&rest seq)
+ (cond
+ ((null seq) nil)
+ ((null (cdr seq))
+ (car seq))
+ (t
+ (setq seq
+ (apply
+ 'append
+ (mapcar
+ (lambda (exp) (if (and (consp exp) (eq (car exp) 'progn))
+ (cdr exp)
+ (list exp)))
+ seq)))
+ (let ((rseq (reverse seq)))
+ (cons 'progn
+ (apply
+ 'append
+ (apply
+ 'append
+ (mapcar
+ (lambda (exp) (if (null exp) () (list exp)))
+ (nreverse (cdr rseq))))
+ (list (list (car rseq)))))))))
+
+(defun automata-exp-seq (&rest seq)
+ (let ((exp (apply 'automata-seq-exp seq)))
+ (if (and (consp exp) (eq (car exp) 'progn))
+ (cdr exp)
+ (list exp))))
+
+(defmacro automata-goto (var curr next)
+ (if (eq curr next)
+ nil
+ `(setq ,var ,next)))
+
+(defmacro automata-branch (var range fail &rest clauses)
+ (when (eq fail automata-never-fail)
+ (setq range (natset-intersection
+ (apply 'natset-union (mapcar 'car clauses))
+ range)))
+ (let ((len (length clauses))
+ ns-list dup-list body-list tmp ns)
+ (setq tmp clauses
+ ns (natset-negate range))
+ (while tmp
+ (setq ns-list (cons (natset-sub (caar tmp) ns) ns-list)
+ dup-list (cons (cadr (car tmp)) dup-list)
+ body-list (cons (cddr (car tmp)) body-list)
+ ns (natset-union ns (caar tmp))
+ tmp (cdr tmp))
+ (if (natset-empty-p (car ns-list))
+ (setq ns-list (cdr ns-list)
+ dup-list (cdr dup-list)
+ body-list (cdr body-list))))
+ (setq ns-list (nreverse ns-list)
+ dup-list (nreverse dup-list)
+ body-list (nreverse body-list))
+ (automata-branch-i var range fail ns-list dup-list body-list)))
+
+(defun automata-branch-i (var range fail ns-list dup-list body-list)
+ (cond
+ ((null ns-list) fail)
+ ((null (cdr ns-list))
+ (if (natset-include-p (car ns-list) range)
+ (apply 'automata-seq-exp (car body-list))
+ `(if ,(natset-gen-pred-exp (car ns-list) var range)
+ ,(apply 'automata-seq-exp (car body-list))
+ ,fail)))
+ (t
+ (let (tmp tmpn tmpd cut)
+ (setq tmpn ns-list
+ cut (natset-empty))
+ (while tmpn
+ (setq cut (natset-union cut (natset-boundary-set (car tmpn)))
+ tmpn (cdr tmpn)))
+ (setq tmpn ns-list
+ tmpd dup-list)
+ (while tmpn
+ (if (not (car tmpd))
+ (setq tmp (natset-minmax (car tmpn))
+ tmp (natset-sub tmp (natset-start-set tmp))
+ cut (natset-sub cut tmp)))
+ (setq tmpn (cdr tmpn)
+ tmpd (cdr tmpd)))
+ (setq cut (natset-sub cut (natset-boundary-set (natset-minmax range))))
+ (if (null (setq cut (natset-enum cut)))
+ `(if ,(natset-gen-pred-exp (car ns-list) var range)
+ ,(apply 'automata-seq-exp (car body-list))
+ ,(automata-branch-i var
+ (natset-sub range (car ns-list))
+ fail
+ (cdr ns-list)
+ (cdr dup-list)
+ (cdr body-list)))
+ (let* ((mid (nth (/ (length cut) 2) cut))
+ (lower (natset-seg 0 (1- mid)))
+ (higher (natset-seg mid))
+ ns-list1 dup-list1 body-list1
+ ns-list2 dup-list2 body-list2
+ )
+ (while ns-list
+ (if (natset-has-intersection-p lower (car ns-list))
+ (setq ns-list1 (cons (natset-intersection (car ns-list) lower) ns-list1)
+ dup-list1 (cons (car dup-list) dup-list1)
+ body-list1 (cons (car body-list) body-list1)))
+ (if (natset-has-intersection-p higher (car ns-list))
+ (setq ns-list2 (cons (natset-intersection (car ns-list) higher) ns-list2)
+ dup-list2 (cons (car dup-list) dup-list2)
+ body-list2 (cons (car body-list) body-list2)))
+ (setq ns-list (cdr ns-list)
+ dup-list (cdr dup-list)
+ body-list (cdr body-list)))
+ ;;(if (or (null ns-list1) (null ns-list2)) (error "divide fail"))
+ `(if (< ,var ,mid)
+ ,(automata-branch-i var
+ (natset-intersection range lower)
+ fail ns-list1 dup-list1 body-list1)
+ ,(automata-branch-i var
+ (natset-intersection range higher)
+ fail ns-list2 dup-list2 body-list2))))))))
+
+
+'(
+(npp (macroexpand '
+(automata pc 0
+ (1
+ (progn
+ (lex-match 1)
+ (lex-scan-unibyte-save))
+ (lex-fail))
+ (5
+ (progn
+ (lex-match 2)
+ (lex-scan-unibyte-save))
+ (lex-fail))
+ (4
+ (progn
+ (lex-scan-unibyte-read pc))
+ (lex-fail)
+ ((9 10) 5) ((32 33) 5))
+ (2
+ (progn
+ (lex-match 3)
+ (lex-scan-unibyte-save)
+ (lex-scan-unibyte-read pc))
+ (lex-fail)
+ ((13 14) 2)
+ ((0 9) 3) ((11 13) 3) ((14 32) 3) ((33) 3)
+ ((10 11) 4))
+ (3
+ (progn
+ (lex-match 3)
+ (lex-scan-unibyte-save)
+ (lex-scan-unibyte-read pc))
+ (lex-fail)
+ ((0 9) 3) ((10 11) 3) ((11 13) 3) ((14 32) 3) ((33) 3)
+ ((13 14) 2)
+ )
+ (0
+ (progn
+ (lex-match 3)
+ (lex-scan-unibyte-save)
+ (lex-scan-unibyte-read pc))
+ (lex-fail)
+ ((0 9) 3) ((10 11) 3) ((11 13) 3) ((14 32) 3) ((33) 3)
+ ((13 14) 2)
+ ((9 10) 1) ((32 33) 1)))))
+
+(npp (macroexpand '
+(automata pc 0
+ (0
+ (progn
+ (lex-match 3)
+ (lex-scan-unibyte-save)
+ (lex-scan-unibyte-read pc))
+ (lex-fail)
+ ((0 9) 3) ((10 11) 3) ((11 13) 3) ((14 32) 3) ((33) 3)
+ ((13 14) 2)
+ ((9 10) 1) ((32 33) 1))
+ (1
+ (progn
+ (lex-match 1)
+ (lex-scan-unibyte-save))
+ (lex-fail))
+ (2
+ (progn
+ (lex-match 3)
+ (lex-scan-unibyte-save)
+ (lex-scan-unibyte-read pc))
+ (lex-fail)
+ ((13 14) 2)
+ ((0 9) 3) ((11 13) 3) ((14 32) 3) ((33) 3)
+ ((10 11) 4))
+ (3
+ (progn
+ (lex-match 3)
+ (lex-scan-unibyte-save)
+ (lex-scan-unibyte-read pc))
+ (lex-fail)
+ ((0 9) 3) ((10 11) 3) ((11 13) 3) ((14 32) 3) ((33) 3)
+ ((13 14) 2)
+ )
+ (4
+ (progn
+ (lex-scan-unibyte-read pc))
+ (lex-fail)
+ ((9 10) 5) ((32 33) 5))
+ (5
+ (progn
+ (lex-match 2)
+ (lex-scan-unibyte-save))
+ (lex-fail))
+)))
+
+(npp
+(automata-gen-state
+'pc
+'((0 (progn (lex-match 3) (lex-scan-unibyte-save) (lex-scan-unibyte-read pc)) (lex-fail) ((9 10 32 33) 1) ((13 14) 2) ((0 9 10 13 14 32 33) 3))
+ (1 (progn (lex-match 1) (lex-scan-unibyte-save)) (lex-fail))
+ (2 (progn (lex-match 3) (lex-scan-unibyte-save) (lex-scan-unibyte-read pc)) (lex-fail) ((13 14) 2) ((0 9 11 13 14 32 33) 3) ((10 11) 4))
+ (3 (progn (lex-match 3) (lex-scan-unibyte-save) (lex-scan-unibyte-read pc)) (lex-fail) ((13 14) 2) ((0 9 10 13 14 32 33) 3))
+ (4 (progn (lex-scan-unibyte-read pc)) (lex-fail) ((9 10 32 33) 5))
+ (5 (progn (lex-match 2) (lex-scan-unibyte-save)) (lex-fail)))
+6
+'(0)
+'((5 . 5) (4 . 4) (3 . 3) (2 . 2) (1 . 1) (0 . 0))
+0
+[(4 1) (2) (3) nil nil]
+[0 4 1 1 2 3]
+[(0) (3 2) (4) (5) (1)]))
+)
\ No newline at end of file
--- /dev/null
+(provide 'closure)
+
+(defmacro closure-make (fun &rest fvs)
+ "Make closure from function FUN and free variable list FVS.
+CAUTION: Do not assign to free variables."
+ (let* ((funv (make-symbol "funv"))
+ (args (make-symbol "args")))
+ `(list
+ ,fun
+ (lambda (,funv ,args ,@fvs)
+ (apply ,funv ,args))
+ ,@fvs)))
+
+(defun closure-call (clo &rest args)
+ "Call closure."
+ (if (functionp clo)
+ (apply clo args)
+ (apply (cadr clo) (car clo) args (cddr clo))))
+
+(defun closure-compose (c1 c2)
+ "Compose C1 and C2.
+
+If either C1 or C2 is nil, another one is returned.
+If C1 and C2 is non-nil, C1 must be closure with one argument."
+ (cond
+ ((null c1) c2)
+ ((null c2) c1)
+ (t
+ (closure-make
+ (lambda (&rest args)
+ (closure-call c1 (apply 'closure-call c2 args)))
+ c1 c2))))
+
+'(
+
+(setq c1 (let ((a 1)) (closure-make (lambda (b) (+ a b)) a)))
+(closure-call c1 2) ; => 3
+
+(let ((a 1)) (setq plus1 (closure-make (lambda (b) (+ a b)) a)))
+(let ((a 2)) (setq plus2 (closure-make (lambda (b) (+ a b)) a)))
+(setq plus3 (closure-compose plus1 plus2))
+(closure-call plus3 4) ; => 7
+
+)
\ No newline at end of file
--- /dev/null
+;;; directed graph package.
+(provide 'digraph)
+
+;; A directed graph is represented as vector of lists of integers.
+;; The number of nodes in the graph is length of the vector.
+;;
+;; i'th list of the vector contains j <=> there is the edge from i to j.
+
+(defalias 'digraph-descents 'aref)
+
+(defun digraph-split-as-dag (g)
+ "Returns 3 element vector of follows.
+
+0. Directed acyclic graph generated by mergeing each strongly connected
+components in G as new nodes.
+
+1. Map from a node in g to a node in result.
+
+2. Map from a node in result to nodes in g."
+ (let* ((old-len (length g))
+ (new-to-olds (vconcat (digraph-tsort g)))
+ (new-len (length new-to-olds))
+ (old-to-new (make-vector old-len ()))
+ (i 0)
+ js res)
+ (while (< i new-len)
+ (setq js (aref new-to-olds i))
+ (while js
+ (aset old-to-new (car js) i)
+ (setq js (cdr js)))
+ (setq i (1+ i)))
+ (setq i (1- new-len))
+ (while (<= 0 i)
+ (setq res (cons
+ (digraph-list-uniq
+ (apply
+ 'nconc
+ (mapcar
+ (lambda (old)
+ (apply
+ 'nconc
+ (mapcar
+ (lambda (old)
+ (if (= i (aref old-to-new old))
+ ()
+ (list (aref old-to-new old))))
+ (aref g old))))
+ (aref new-to-olds i))))
+ res)
+ i (1- i)))
+ (vector
+ (vconcat res)
+ old-to-new
+ new-to-olds)))
+
+(defun digraph-split-as-forest (g)
+ "Returns 3 element vector of follows.
+
+0. Tree generated by merging nodes which have common descent node.
+
+1. Map from a node in g to a node in result.
+
+2. Map from a node in result to nodes in g."
+ (let* ((tmp (digraph-split-as-dag g))
+ (d (aref tmp 0))
+ (g-to-d (aref tmp 1))
+ (d-to-g (aref tmp 2))
+ ;;(_ (error "%s" tmp))
+ (tmp (digraph-dag-to-forest d))
+ (f (aref tmp 0))
+ (d-to-f (aref tmp 1))
+ (f-to-d (aref tmp 2))
+ old-indices
+ new-indices
+ i)
+ (setq i (1- (length g)))
+ (while (<= 0 i)
+ (setq old-indices (cons i old-indices)
+ i (1- i)))
+ (setq i (1- (length f)))
+ (while (<= 0 i)
+ (setq new-indices (cons i new-indices)
+ i (1- i)))
+ (vector
+ f
+ (vconcat
+ (mapcar
+ (lambda (gi) (aref d-to-f (aref g-to-d gi)))
+ old-indices))
+ (vconcat
+ (mapcar
+ (lambda (fi)
+ (apply
+ 'nconc
+ (mapcar
+ (lambda (di) (aref d-to-g di))
+ (aref f-to-d fi))))
+ new-indices)))))
+
+(defun digraph-tsort (dep)
+ "Sort nodes in a graph toporogicaly.
+
+DEP is a vector of lists of integers and
+digraph-tsort returns list of lists of integers.
+
+The graph has (length DEP) nodes.
+
+Dependency for i'th node is represented by i'th element of DEP.
+If (aref DEP i) is a list (j1 j2 ... jn), it represents that i'th node
+depends to j1, j2, ... and jn'th nodes of the graph.
+
+A result of digraph-tsort is a sequence of sets of indexes for each
+strongly connected nodes ordered by indepenedent to dependent as list
+of lists of integers.
+
+If (nth n result) contains an integer i, it represents the fact as
+follows.
+
+1. For all j in (nth n result) and j != i, i'th node depends to j'th
+node and vice versa.
+
+2. For all m < n and j in (nth m result), i'th nodes does not depend
+to j'th node."
+ (let* ((len (length dep))
+ (ord (make-vector len nil))
+ (i 0)
+ (res ()))
+ (while (< i len)
+ (if (not (aref ord i))
+ (setq res (nth 3 (digraph-tsort-visit dep len ord i 0 () res))))
+ (setq i (1+ i)))
+ res))
+
+(defun digraph-tsort-visit (dep len ord i id stk res)
+ (aset ord i id)
+ (let ((js (aref dep i))
+ (m id)
+ (nid (1+ id))
+ (stk (cons i stk))
+ (res res))
+ (while js
+ (let* ((j (car js)) (jo (aref ord j)))
+ (if jo
+ (setq m (if (< m jo) m jo))
+ (let* ((tmp (digraph-tsort-visit dep len ord j nid stk res))
+ (m0 (nth 0 tmp)))
+ (setq m (if (< m m0) m m0)
+ nid (nth 1 tmp)
+ stk (nth 2 tmp)
+ res (nth 3 tmp)))))
+ (setq js (cdr js)))
+ (if (= m id)
+ (let* ((p (member i stk))
+ (nstk (cdr p))
+ (tmp stk))
+ (setcdr p ())
+ (while tmp
+ (aset ord (car tmp) len)
+ (setq tmp (cdr tmp)))
+ (list m nid nstk (cons stk res)))
+ (list m nid stk res))))
+
+(defun digraph-reverse (g)
+ "Return graph with reversed edge."
+ (let* ((len (length g))
+ (rev (make-vector len nil))
+ (i 0))
+ (while (< i len)
+ (let ((links (aref g i)))
+ (while links
+ (if (not (member i (aref rev (car links))))
+ (aset rev (car links) (cons i (aref rev (car links)))))
+ (setq links (cdr links))))
+ (setq i (1+ i)))
+ rev))
+
+(defun digraph-leaves (g)
+ "Return list of leaves of G."
+ (let* ((i (length g))
+ (res ()))
+ (while (< 0 i)
+ (setq i (1- i))
+ (if (null (aref g i))
+ (setq res (cons i res))))
+ res))
+
+(defun digraph-roots (g)
+ "Return list of roots of G."
+ (digraph-leaves (digraph-reverse g)))
+
+;;; forest
+
+(defun digraph-dag-to-forest (dag)
+ "Convert a DAG(directed acyclic graph) to forest(set of trees)."
+ (let* ((len (length dag))
+ (rev (digraph-reverse dag))
+ (forest (digraph-forest-make len))
+ (i 0))
+ (while (< i len)
+ (digraph-dag-forest-add-node forest i (aref rev i))
+ (setq i (1+ i)))
+ ;;(error "%s" forest)
+ (digraph-forest-to-graph forest)))
+
+(defun digraph-dag-forest-add-node (forest node links)
+ (if (null links)
+ (digraph-forest-add-tree forest node)
+ (let ((parent (car links)))
+ (setq links (cdr links))
+ (digraph-forest-add-node forest parent node)
+ (while links
+ (digraph-forest-merge-node forest parent (car links))
+ (setq links (cdr links)))))
+ forest)
+
+;; forest = [last-tree-id node-merge-map node-parent-map]
+(defun digraph-forest-make (num)
+ (vector 0 (make-vector num nil) (make-vector num nil)))
+(defsubst digraph-forest-merge-map (forest) (aref forest 1))
+(defsubst digraph-forest-parent-map (forest) (aref forest 2))
+
+(defun digraph-forest-add-tree (forest node)
+ (aset (digraph-forest-parent-map forest)
+ node
+ (aset forest 0 (1- (aref forest 0)))))
+
+(defun digraph-forest-add-node (forest parent node)
+ (aset (digraph-forest-parent-map forest) node parent))
+
+(defun digraph-forest-node-id (forest node)
+ (let ((merge-map (digraph-forest-merge-map forest)) tmp)
+ (while (setq tmp (aref merge-map node))
+ (setq node tmp))
+ node))
+
+(defun digraph-forest-tree-id (forest node)
+ (setq node (digraph-forest-node-id forest node))
+ (let ((parent-map (digraph-forest-parent-map forest))
+ tmp)
+ (while (<= 0 (setq tmp (aref parent-map node)))
+ (setq node (digraph-forest-node-id forest tmp)))
+ tmp))
+
+(defun digraph-forest-root-p (forest node)
+ (setq node (digraph-forest-node-id forest node))
+ (< (aref (digraph-forest-parent-map forest) node) 0))
+
+(defun digraph-forest-path-to-root (forest node)
+ (setq node (digraph-forest-node-id forest node))
+ (let ((parent-map (digraph-forest-parent-map forest))
+ (path (list node)))
+ (while (not (digraph-forest-root-p forest node))
+ (setq node (digraph-forest-node-id forest (aref parent-map node))
+ path (cons node path)))
+ path))
+
+(defun digraph-forest-merge-node (forest n1 n2)
+ (setq n1 (digraph-forest-node-id forest n1)
+ n2 (digraph-forest-node-id forest n2))
+ (let ((t1 (digraph-forest-tree-id forest n1))
+ (t2 (digraph-forest-tree-id forest n2)))
+ (if (= t1 t2)
+ (let ((merge-map (digraph-forest-merge-map forest))
+ (p1 (digraph-forest-path-to-root forest n1))
+ (p2 (digraph-forest-path-to-root forest n2))
+ top)
+ (while (and p1 p2
+ (= (car p1) (car p2)))
+ (setq top (car p1)
+ p1 (cdr p1)
+ p2 (cdr p2)))
+ (setq p1 (nreverse p1))
+ (setq p2 (nreverse p2))
+ (while (and p1 p2)
+ (aset merge-map (car p2) (car p1))
+ (setq p1 (cdr p1)
+ p2 (cdr p2)))
+ (if (or p1 p2)
+ (let ((ns (nconc p1 p2)) n)
+ (while ns
+ (aset merge-map (car ns) top)
+ (setq ns (cdr ns))))))
+ (let ((merge-map (digraph-forest-merge-map forest))
+ (parent-map (digraph-forest-parent-map forest)))
+ (while (and (not (digraph-forest-root-p forest n1))
+ (not (digraph-forest-root-p forest n2)))
+ (aset merge-map n2 n1)
+ (setq n1 (digraph-forest-node-id forest (aref parent-map n1))
+ n2 (digraph-forest-node-id forest (aref parent-map n2))))
+ (if (digraph-forest-root-p forest n2)
+ (aset merge-map n2 n1)
+ (aset merge-map n1 n2)))))
+ forest)
+
+(defun digraph-forest-to-graph (forest)
+ (let* ((merge-map (digraph-forest-merge-map forest))
+ (parent-map (digraph-forest-parent-map forest))
+ (old-len (length merge-map))
+ (old-to-new (make-vector old-len nil))
+ new-to-olds
+ (new-len 0)
+ (i 0)
+ j
+ graph
+ )
+ (while (< i old-len)
+ (setq j (digraph-forest-node-id forest i))
+ (if (aref old-to-new j)
+ (aset old-to-new i (aref old-to-new j))
+ (progn
+ (aset old-to-new j new-len)
+ (aset old-to-new i new-len)
+ (setq new-len (1+ new-len))))
+ (setq i (1+ i)))
+ (setq new-to-olds (make-vector new-len nil)
+ graph (make-vector new-len nil))
+ (setq i (1- old-len))
+ (while (<= 0 i)
+ (setq j (aref old-to-new i))
+ (aset new-to-olds j
+ (cons i (aref new-to-olds j)))
+ (setq i (1- i)))
+ (setq i 0)
+ (while (< i new-len)
+ (setq j (aref parent-map (digraph-forest-node-id forest (car (aref new-to-olds i)))))
+ (if (<= 0 j) (aset graph (aref old-to-new j)
+ (cons i (aref graph (aref old-to-new j)))))
+ (setq i (1+ i)))
+ (vector graph old-to-new new-to-olds)))
+
+;;; utilities
+
+(defun digraph-list-uniq (il)
+ (if (null il)
+ ()
+ (if (member (car il) (cdr il))
+ (digraph-list-uniq (cdr il))
+ (cons (car il) (digraph-list-uniq (cdr il))))))
+
+'(
+(digraph-split-as-forest [nil nil nil nil nil nil nil nil nil nil (10) nil nil (12) (13) (0 1 2 3 4 5 6 7 8 9 10 11 13 14)])
+[
+ [(14 13 12 11 10 9 8 7 6 5 4 3 1) (2) nil nil nil nil nil nil nil nil nil nil nil nil nil]
+ [14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 0]
+ [(15) (14) (13) (12) (11) (10) (9) (8) (7) (6) (5) (4) (3) (2) (1) (0)]]
+
+)
\ No newline at end of file
--- /dev/null
+(provide 'ew-data)
+
+(defun ew-make-anchor (column str)
+ (let ((anchor (make-symbol str)))
+ (put anchor 'anchor anchor)
+ (put anchor 'prev-frag anchor)
+ (put anchor 'next-frag anchor)
+ (put anchor 'prev-token anchor)
+ (put anchor 'next-token anchor)
+ (put anchor 'column column)
+ (put anchor 'line-length 0)
+ anchor))
+
+(defun ew-terminate (anchor)
+ (let ((frag (get anchor 'prev-frag))
+ (line-length (get anchor 'column)))
+ (while (null (get frag 'line-length))
+ (put frag 'line-length line-length)
+ (setq frag (get frag 'prev-frag)))))
+
+(defun ew-tokenize-frag (anchor frag)
+ (put frag 'prev-token (get anchor 'prev-token))
+ (put frag 'next-token anchor)
+ (put (get anchor 'prev-token) 'next-token frag)
+ (put anchor 'prev-token frag)
+ frag)
+
+(defun ew-add-frag (anchor start end type)
+ (let ((frag (make-symbol (substring (symbol-name anchor) start end))))
+ (put frag 'anchor anchor)
+ (put frag 'start start)
+ (put frag 'end end)
+ (put frag 'type type)
+ (put frag 'prev-frag (get anchor 'prev-frag))
+ (put frag 'next-frag anchor)
+ (put (get anchor 'prev-frag) 'next-frag frag)
+ (put anchor 'prev-frag frag)
+ (put frag 'decode (or (get type 'decode) 'ew-decode-none))
+ (if (string-match "\r\n\\(.*\r\n\\)*" (symbol-name frag))
+ (let ((prev-line-length (+ (get anchor 'column) (match-beginning 0)))
+ (next-line-column (- (length (symbol-name frag)) (match-end 0)))
+ (tmp frag))
+ (while (null (get tmp 'line-length))
+ (put tmp 'line-length prev-line-length)
+ (setq tmp (get tmp 'prev-frag)))
+ (put anchor 'column next-line-column))
+ (put anchor 'column (+ (get anchor 'column) (length (symbol-name frag)))))
+ frag))
+
+(defun ew-add-open (anchor start end type)
+ (let ((frag (ew-add-frag anchor start end type)))
+ (put frag 'prev-open (get anchor 'prev-open))
+ (put anchor 'prev-open frag)
+ frag))
+
+(defun ew-add-close (anchor start end type)
+ (let ((frag (ew-add-frag anchor start end type)))
+ (put frag 'pair (get anchor 'prev-open))
+ (put (get anchor 'prev-open) 'pair frag)
+ (put anchor 'prev-open (get (get frag 'pair) 'prev-open))
+ frag))
+
+(defun ew-add-token (anchor start end type)
+ (ew-tokenize-frag anchor (ew-add-frag anchor start end type)))
+
+(defun ew-add-close-token (anchor start end type)
+ (ew-tokenize-frag anchor (ew-add-close anchor start end type)))
+
+;;; listup
+
+(defun ew-frag-list (anchor)
+ (let ((res ())
+ (tmp (get anchor 'prev-frag)))
+ (while (not (eq anchor tmp))
+ (setq res (cons tmp res)
+ tmp (get tmp 'prev-frag)))
+ res))
+
+(defun ew-token-list (anchor)
+ (let ((res ())
+ (tmp (get anchor 'prev-token)))
+ (while (not (eq anchor tmp))
+ (setq res (cons tmp res)
+ tmp (get tmp 'prev-token)))
+ res))
+
+(defun ew-pair-list (anchor)
+ (mapcar
+ (lambda (frag)
+ (cons (symbol-value (get frag 'type))
+ frag))
+ (ew-frag-list anchor)))
+
+;;; phrase marking
+
+(defun ew-mark-phrase (frag1 frag2)
+ (while (not (eq frag1 frag2))
+ (unless (ew-comment-frag-p frag2)
+ (put frag2 'decode 'ew-decode-phrase))
+ (setq frag2 (get frag2 'prev-frag)))
+ (unless (ew-comment-frag-p frag2)
+ (put frag2 'decode 'ew-decode-phrase))
+ (setq frag2 (get frag2 'prev-frag))
+ (while (not (get frag2 'prev-token))
+ (unless (ew-comment-frag-p frag2)
+ (put frag2 'decode 'ew-decode-phrase))
+ (setq frag2 (get frag2 'prev-frag))))
+
+;;; frag predicate
+
+(defun ew-comment-frag-p (frag)
+ (member (get frag 'type)
+ '(ew:raw-cm-begin-tok
+ ew:raw-cm-end-tok
+ ew:raw-cm-nested-begin-tok
+ ew:raw-cm-nested-end-tok
+ ew:raw-cm-texts-tok
+ ew:raw-cm-wsp-tok
+ ew:raw-cm-fold-tok
+ ew:raw-cm-qfold-tok
+ ew:raw-cm-qpair-tok)))
+
+(defun ew-special-frag-p (frag)
+ (or (eq frag (get frag 'anchor))
+ (member (get frag 'type)
+ '(ew:raw-lt-tok
+ ew:raw-gt-tok
+ ew:raw-at-tok
+ ew:raw-comma-tok
+ ew:raw-semicolon-tok
+ ew:raw-colon-tok
+ ew:raw-dot-tok
+ ew:raw-qs-begin-tok
+ ew:raw-qs-end-tok
+ ew:raw-dl-begin-tok
+ ew:raw-dl-end-tok
+ ew:raw-cm-begin-tok
+ ew:raw-cm-end-tok))))
--- /dev/null
+(require 'emu)
+(require 'ew-unit)
+(require 'ew-scan-s)
+(require 'ew-scan-m)
+(require 'ew-scan-u)
+(require 'ew-parse)
+(provide 'ew-dec)
+
+;;; user customizable variable.
+
+(defvar ew-decode-quoted-encoded-word nil)
+(defvar ew-ignore-75bytes-limit nil)
+(defvar ew-ignore-76bytes-limit nil)
+(defvar ew-permit-sticked-comment nil)
+(defvar ew-permit-sticked-special nil)
+
+;; anonymous function to decode ground string.
+;; NOTE: STR is CRLF-form and it should return as CRLF-form.
+(defvar ew-decode-us-ascii (lambda (str) (decode-coding-string str 'iso-latin-1-unix)))
+
+;;;
+(defvar ew-decode-field-syntax-alist
+'(("from" ew-scan-unibyte-std11 . ew:tag-mailbox+-tok)
+ ("sender" ew-scan-unibyte-std11 . ew:tag-mailbox-tok)
+ ("to" ew-scan-unibyte-std11 . ew:tag-address+-tok)
+ ("resent-to" ew-scan-unibyte-std11 . ew:tag-address+-tok)
+ ("cc" ew-scan-unibyte-std11 . ew:tag-address+-tok)
+ ("resent-cc" ew-scan-unibyte-std11 . ew:tag-address+-tok)
+ ("bcc" ew-scan-unibyte-std11 . ew:tag-address*-tok)
+ ("resent-bcc" ew-scan-unibyte-std11 . ew:tag-address*-tok)
+ ("message-id" ew-scan-unibyte-std11)
+ ("resent-message-id" ew-scan-unibyte-std11)
+ ("in-reply-to" ew-scan-unibyte-std11 . ew:tag-phrase-msg-id*-tok)
+ ("references" ew-scan-unibyte-std11 . ew:tag-phrase-msg-id*-tok)
+ ("keywords" ew-scan-unibyte-std11 . ew:tag-phrase*-tok)
+ ("subject" ew-scan-unibyte-unstructured)
+ ("comments" ew-scan-unibyte-unstructured)
+ ("encrypted" ew-scan-unibyte-std11)
+ ("date" ew-scan-unibyte-std11)
+ ("reply-to" ew-scan-unibyte-std11 . ew:tag-address+-tok)
+ ("received" ew-scan-unibyte-std11)
+ ("resent-reply-to" ew-scan-unibyte-std11 . ew:tag-address+-tok)
+ ("resent-from" ew-scan-unibyte-std11 . ew:tag-mailbox+-tok)
+ ("resent-sender" ew-scan-unibyte-std11 . ew:tag-mailbox-tok)
+ ("resent-date" ew-scan-unibyte-std11)
+ ("return-path" ew-scan-unibyte-std11)
+ ("mime-version" ew-scan-unibyte-std11)
+ ("content-type" ew-scan-unibyte-mime)
+ ("content-transfer-encoding" ew-scan-unibyte-mime)
+ ("content-id" ew-scan-unibyte-mime)
+ ("content-description" ew-scan-unibyte-unstructured)
+))
+(defvar ew-decode-field-default-syntax '(ew-scan-unibyte-unstructured))
+
+(defun ew-decode-field (field-name field-body &optional eword-filter)
+ "Decode MIME RFC2047 encoded-words in a field.
+FIELD-NAME is a name of the field such as \"To\", \"Subject\" etc. and
+used to selecting syntax of body of the field and deciding first
+column of body of the field.
+FIELD-BODY is a body of the field.
+
+If FIELD-BODY has multiple lines, each line is separated by CRLF as
+pure network representation. Also if the result has multiple lines,
+each line is separated by CRLF.
+
+If EWORD-FILTER is non-nil, it should be closure. it is called for
+each successful decoded encoded-word with decoded string as a
+argument. The return value of EWORD-FILTER is used as decoding result
+instead of its argument."
+ (let ((tmp (assoc (downcase field-name) ew-decode-field-syntax-alist))
+ frag-anchor frag1 frag2 decode)
+ (if tmp
+ (setq tmp (cdr tmp))
+ (setq tmp ew-decode-field-default-syntax))
+ (setq frag-anchor (funcall (car tmp) (1+ (length field-name)) field-body))
+ ;;(setq zzz frag-anchor)
+ (when (cdr tmp)
+ (ew-mark (cdr tmp) frag-anchor))
+ (setq frag1 (get frag-anchor 'next-frag))
+ (while (not (eq frag1 frag-anchor))
+ (setq decode (get frag1 'decode))
+ (setq frag2 (get frag1 'next-frag))
+ (while (and (not (eq frag2 frag-anchor))
+ (eq decode (get frag2 'decode)))
+ (setq frag2 (get frag2 'next-frag)))
+ (funcall decode frag-anchor frag1 frag2 eword-filter)
+ (setq frag1 frag2))
+ (mapconcat (lambda (frag) (or (get frag 'result) (symbol-name frag)))
+ (ew-frag-list frag-anchor) "")))
+
+(defun ew-mark (tag anchor)
+ (let ((tlist (cons (list (symbol-value tag)) (ew-pair-list anchor))))
+ ;;(insert (format "%s" tlist))
+ (ew-parse
+ (lambda () (if (null tlist) '(0)
+ (prog1 (car tlist) (setq tlist (cdr tlist)))))
+ (lambda (msg tok) (setq zzz-anchor anchor) (message "parse error: %s%s : %s" msg tok anchor)))))
+
+(defun ew-decode-none (anchor frag end eword-filter)
+ (while (not (eq frag end))
+ (put frag 'result (funcall ew-decode-us-ascii (symbol-name frag)))
+ (setq frag (get frag 'next-frag))))
+
+(defun ew-decode-generic (anchor start end
+ decode-ewords
+ decode-others
+ eword gap all
+ eword-filter)
+ (let ((frag start) result buff type f)
+ (while (not (eq frag end))
+ (setq type (get frag 'type))
+ (cond
+ ((and (memq type eword)
+ (ew-proper-eword-p frag))
+ (when buff
+ (setq result (ew-rappend result
+ (funcall decode-others
+ (nreverse buff)))
+ buff ()))
+ (let ((first frag) (ewords (list frag)))
+ (while (progn
+ (setq f (get frag 'next-frag))
+ (while (and (not (eq f end))
+ (memq (get f 'type) gap))
+ (setq f (get f 'next-frag)))
+ (and (not (eq f end))
+ (ew-proper-eword-p f)))
+ (setq ewords (ew-rcons* ewords f)
+ frag f))
+ (while (not (eq first frag))
+ (put first 'result "")
+ (setq first (get first 'next-frag)))
+ (put frag 'result "")
+ (setq result (ew-rappend result
+ (funcall decode-ewords
+ (nreverse ewords)
+ eword-filter)))))
+ ((memq type all)
+ (setq buff (cons frag buff))
+ (put frag 'result ""))
+ (t
+ (error "unexpected token: %s (%s)" frag type)))
+ (setq frag (get frag 'next-frag)))
+ (when buff
+ (setq result (ew-rappend result (funcall decode-others (nreverse buff)))))
+ (put start 'result
+ (apply 'ew-quote-concat (nreverse result)))
+ ))
+
+(defun ew-decode-generic-others (frags puncts quotes targets)
+ (let (result buff frag type tmp)
+ (while frags
+ (setq frag (car frags)
+ type (get frag 'type)
+ frags (cdr frags))
+ (cond
+ ((memq type puncts)
+ (when buff
+ (setq buff (nreverse buff)
+ tmp (funcall ew-decode-us-ascii
+ (mapconcat 'car buff "")))
+ (if (ew-contain-non-ascii-p tmp)
+ (setq result (ew-rcons* result tmp))
+ (setq result (ew-rcons*
+ result
+ (funcall ew-decode-us-ascii
+ (mapconcat 'cdr buff "")))))
+ (setq buff ()))
+ (setq result (ew-rcons*
+ result
+ (symbol-name frag))))
+ ((memq type quotes)
+ (setq buff (ew-rcons*
+ buff
+ (cons (substring (symbol-name frag) 1)
+ (symbol-name frag)))))
+ ((memq type targets)
+ (setq buff (ew-rcons*
+ buff
+ (cons (symbol-name frag)
+ (symbol-name frag)))))
+ (t
+ (error "something wrong: unexpected token: %s (%s)" frag type))))
+ (when buff
+ (setq buff (nreverse buff)
+ tmp (funcall ew-decode-us-ascii
+ (mapconcat 'car buff "")))
+ (if (ew-contain-non-ascii-p tmp)
+ (setq result (ew-rcons* result tmp))
+ (setq result (ew-rcons*
+ result
+ (funcall ew-decode-us-ascii
+ (mapconcat 'cdr buff "")))))
+ (setq buff ()))
+ (nreverse result)))
+
+(defun ew-decode-unstructured-ewords (ewords eword-filter)
+ (let (result)
+ (while ewords
+ (setq result (ew-rcons*
+ result
+ (list (ew-decode-eword (symbol-name (car ewords))
+ eword-filter
+ 'ew-encode-crlf)))
+ ewords (cdr ewords)))
+ (nreverse result)))
+
+(defun ew-decode-unstructured-others (frags)
+ (let (result)
+ (while frags
+ (setq result (ew-rcons*
+ result
+ (symbol-name (car frags)))
+ frags (cdr frags)))
+ (list (funcall ew-decode-us-ascii
+ (apply 'concat (nreverse result))))))
+
+(defun ew-decode-unstructured (anchor start end eword-filter)
+ (ew-decode-generic
+ anchor start end
+ 'ew-decode-unstructured-ewords
+ 'ew-decode-unstructured-others
+ '(ew:raw-us-texts-tok)
+ '(ew:raw-us-wsp-tok
+ ew:raw-us-fold-tok)
+ '(ew:raw-us-texts-tok
+ ew:raw-us-wsp-tok
+ ew:raw-us-fold-tok)
+ eword-filter))
+
+(defun ew-decode-phrase-ewords (ewords eword-filter)
+ (let ((qs (eq (get (car ewords) 'type) 'ew:raw-qs-texts-tok))
+ require-quoting
+ result)
+ (while ewords
+ (setq result (ew-rcons*
+ result
+ (list (ew-decode-eword (symbol-name (car ewords))
+ eword-filter
+ 'ew-encode-crlf)))
+ require-quoting (or require-quoting
+ (string-match "[][()<>@,;:\\\".\000-\037]"
+ (caar result)))
+ ewords (cdr ewords)))
+ (if require-quoting
+ (list
+ (funcall (if qs 'ew-embed-in-quoted-string 'ew-embed-in-phrase)
+ (apply 'ew-quote-concat
+ (nreverse result))))
+ (nreverse result))))
+
+(defun ew-decode-phrase-others (frags)
+ (ew-decode-generic-others
+ frags
+ '(ew:raw-qs-begin-tok
+ ew:raw-qs-end-tok)
+ '(ew:raw-qs-qfold-tok
+ ew:raw-qs-qpair-tok)
+ '(ew:raw-atom-tok
+ ew:raw-wsp-tok
+ ew:raw-fold-tok
+ ew:raw-qs-texts-tok
+ ew:raw-qs-wsp-tok
+ ew:raw-qs-fold-tok)))
+
+(defun ew-decode-phrase (anchor start end eword-filter)
+ (ew-decode-generic
+ anchor start end
+ 'ew-decode-phrase-ewords
+ 'ew-decode-phrase-others
+ (if ew-decode-quoted-encoded-word
+ '(ew:raw-atom-tok ew:raw-qs-texts-tok)
+ '(ew:raw-atom-tok))
+ '(ew:raw-wsp-tok
+ ew:raw-fold-tok)
+ '(ew:raw-atom-tok
+ ew:raw-wsp-tok
+ ew:raw-fold-tok
+ ew:raw-qs-begin-tok
+ ew:raw-qs-end-tok
+ ew:raw-qs-texts-tok
+ ew:raw-qs-wsp-tok
+ ew:raw-qs-fold-tok
+ ew:raw-qs-qfold-tok
+ ew:raw-qs-qpair-tok)
+ eword-filter))
+
+(defun ew-decode-comment-ewords (ewords eword-filter)
+ (let (require-quoting
+ result)
+ (while ewords
+ (setq result (ew-rcons*
+ result
+ (list (ew-decode-eword (symbol-name (car ewords))
+ eword-filter
+ 'ew-encode-crlf)))
+ require-quoting (or require-quoting
+ (string-match "[()\\\\]" (caar result)))
+ ewords (cdr ewords)))
+ (if require-quoting
+ (list
+ (ew-embed-in-comment
+ (apply 'ew-quote-concat
+ (nreverse result))))
+ (nreverse result))))
+
+(defun ew-decode-comment-others (frags)
+ (ew-decode-generic-others
+ frags
+ '()
+ '(ew:raw-cm-qfold-tok
+ ew:raw-cm-qpair-tok)
+ '(ew:raw-cm-texts-tok
+ ew:raw-cm-wsp-tok
+ ew:raw-cm-fold-tok)))
+
+(defun ew-decode-comment (anchor start end eword-filter)
+ (ew-decode-generic
+ anchor start end
+ 'ew-decode-comment-ewords
+ 'ew-decode-comment-others
+ '(ew:raw-cm-texts-tok)
+ '(ew:raw-cm-wsp-tok
+ ew:raw-cm-fold-tok)
+ '(ew:raw-cm-texts-tok
+ ew:raw-cm-wsp-tok
+ ew:raw-cm-fold-tok
+ ew:raw-cm-qfold-tok
+ ew:raw-cm-qpair-tok)
+ eword-filter))
+
+;;;
+
+(defun ew-embed-in-phrase (str)
+ (concat "\"" (ew-embed-in-quoted-string str) "\""))
+
+(defun ew-embed-in-quoted-string (str)
+ (ew-quote-as-quoted-pair str '(?\\ ?\")))
+
+(defun ew-embed-in-comment (str)
+ (ew-quote-as-quoted-pair str '(?\\ ?\( ?\))))
+
+(defun ew-quote-as-quoted-pair (str specials)
+ (let ((i 0) (j 0) (l (length str)) result)
+ (while (< j l)
+ (when (member (aref str j) specials)
+ (setq result (ew-rcons*
+ result
+ (substring str i j)
+ "\\")
+ i j))
+ (setq j (1+ j)))
+ (when (< i l)
+ (setq result (ew-rcons*
+ result
+ (substring str i))))
+ (apply 'concat (nreverse result))))
+
+;;;
+
+(defun ew-proper-eword-p (frag)
+ (and
+ (or ew-ignore-75bytes-limit
+ (<= (length (symbol-name frag)) 75))
+ (or ew-ignore-76bytes-limit
+ (<= (get frag 'line-length) 76))
+ (cond
+ ((eq (get frag 'type) 'ew:raw-cm-texts-tok)
+ (ew-eword-p (symbol-name frag)))
+ ((eq (get frag 'type) 'ew:raw-qs-texts-tok)
+ (ew-eword-p (symbol-name frag)))
+ ((eq (get frag 'type) 'ew:raw-atom-tok)
+ (and
+ (or ew-permit-sticked-comment
+ (and
+ (not (ew-comment-frag-p (get frag 'prev-frag)))
+ (not (ew-comment-frag-p (get frag 'next-frag)))))
+ (or ew-permit-sticked-special
+ (and
+ (or (ew-comment-frag-p (get frag 'prev-frag))
+ (not (ew-special-frag-p (get frag 'prev-frag))))
+ (or (ew-comment-frag-p (get frag 'next-frag))
+ (not (ew-special-frag-p (get frag 'next-frag))))))
+ (ew-eword-p (symbol-name frag))))
+ ((eq (get frag 'type) 'ew:raw-us-texts-tok)
+ (and
+ (or ew-permit-sticked-special
+ (not (ew-special-frag-p (get frag 'prev-frag))))
+ (ew-eword-p (symbol-name frag))))
+ (t
+ nil))))
+
+(defun ew-contain-non-ascii-p (str)
+ (not (eq (charsets-to-mime-charset (find-charset-string str)) 'us-ascii)))
+
+'(
+
+(ew-decode-field "To" " =?US-ASCII?Q?phrase?= <akr@jaist.ac.jp>")
+(ew-decode-field "To" " =?US-ASCII?Q?phrase?= < =?US-ASCII?Q?akr?= @jaist.ac.jp>")
+(ew-decode-field "To" " =?US-ASCII?Q?akr?= @jaist.ac.jp")
+(ew-decode-field "Subject" " =?ISO-2022-JP?B?GyRCJCIbKEI=?=")
+(ew-decode-field "Content-Type" " text/vnd.latex-z(=?US-ASCII?Q?What=3F?=);charset=ISO-2022-JP")
+
+(ew-decode-field "To" " =?US-ASCII?Q?A=22B=5CC?= <akr@jaist.ac.jp>")
+(let ((ew-decode-quoted-encoded-word t))
+ (ew-decode-field "To" " \"=?US-ASCII?Q?A=22B=5CC?=\" <akr@jaist.ac.jp>"))
+
+(ew-decode-field "To" " akr@jaist.ac.jp (=?US-ASCII?Q?=28A=29B=5C?=)")
+
+(ew-decode-field "To" "\"A\\BC\e$B\\\"\\\\\e(B\" <foo@bar>")
+(ew-decode-field "To" "\"A\\BC\" <foo@bar>")
+(ew-decode-field "To" "\"\e\\$\\B\\$\\\"\e\\(\\B\" <foo@bar>")
+
+)
--- /dev/null
+(require 'lex)
+(require 'ew-util)
+(provide 'ew-line)
+
+(put 'ew-line-generic 'lisp-indent-function 1)
+(put 'ew-line-convert 'lisp-indent-function 1)
+
+(defun ew-lf-to-crlf (str)
+ (let ((i 0) (j 0) (l (length str)) result)
+ (while (< j l)
+ (when (equal (aref str j) ?\n)
+ (setq result (ew-rcons*
+ result
+ (substring str i j)
+ "\r")
+ i j))
+ (setq j (1+ j)))
+ (when (< i l)
+ (setq result (ew-rcons*
+ result
+ (substring str i))))
+ (apply 'concat (nreverse result))))
+
+(defun ew-crlf-to-lf (str)
+ (let* ((i 0) (j 0) (l (length str)) (l- (1- l)) result)
+ (while (< j l-)
+ (when (and (equal (aref str j) ?\r)
+ (equal (aref str (1+ j)) ?\n))
+ (setq result (ew-rcons*
+ result
+ (substring str i j))
+ j (1+ j)
+ i j))
+ (setq j (1+ j)))
+ (when (< i l)
+ (setq result (ew-rcons*
+ result
+ (substring str i))))
+ (apply 'concat (nreverse result))))
+
+(defun ew-lf-crlf-to-crlf (str)
+ (let* ((i 0) (j 0) (l (length str)) (l- (1- l)) result)
+ (while (< j l)
+ (cond
+ ((and (< j l-)
+ (equal (aref str j) ?\r)
+ (equal (aref str (1+ j)) ?\n))
+ (setq j (1+ j)))
+ ((equal (aref str j) ?\n)
+ (setq result (ew-rcons*
+ result
+ (substring str i j)
+ "\r")
+ i j)))
+ (setq j (1+ j)))
+ (when (< i l)
+ (setq result (ew-rcons*
+ result
+ (substring str i))))
+ (apply 'concat (nreverse result))))
+
+(defun ew-crlf-unfold (str)
+ (let* ((i 0) (j 0) (l (length str)) (l- (- l 2)) result)
+ (while (< j l-)
+ (when (and (equal (aref str j) ?\r)
+ (equal (aref str (1+ j)) ?\n)
+ (member (aref str (+ j 2)) '(?\t ?\ )))
+ (setq result (ew-rcons*
+ result
+ (substring str i j))
+ j (+ j 2)
+ i j))
+ (setq j (1+ j)))
+ (when (< i l)
+ (setq result (ew-rcons*
+ result
+ (substring str i))))
+ (apply 'concat (nreverse result))))
+
+(defun ew-lf-unfold (str)
+ (let* ((i 0) (j 0) (l (length str)) (l- (- l 1)) result)
+ (while (< j l-)
+ (when (and (equal (aref str j) ?\n)
+ (member (aref str (+ j 1)) '(?\t ?\ )))
+ (setq result (ew-rcons*
+ result
+ (substring str i j))
+ j (+ j 1)
+ i j))
+ (setq j (1+ j)))
+ (when (< i l)
+ (setq result (ew-rcons*
+ result
+ (substring str i))))
+ (apply 'concat (nreverse result))))
+
+(defun ew-cut-cr-lf (str)
+ (let ((i 0) (j 0) (l (length str)) result)
+ (while (< j l)
+ (when (member (aref str j) '(?\r ?\n))
+ (setq result (ew-rcons*
+ result
+ (substring str i j))
+ i (1+ j)))
+ (setq j (1+ j)))
+ (when (< i l)
+ (setq result (ew-rcons*
+ result
+ (substring str i))))
+ (apply 'concat (nreverse result))))
+
+(defmacro ew-line-generic-define ()
+ (let ((str (make-symbol "str"))
+ (others-fun (make-symbol "others-fun"))
+ (fold-fun (make-symbol "fold-fun"))
+ (crlf-fun (make-symbol "crlf-fun"))
+ (bare-cr-fun (make-symbol "bare-cr-fun"))
+ (bare-lf-fun (make-symbol "bare-lf-fun"))
+ (p (make-symbol "p"))
+ (q (make-symbol "q"))
+ (r (make-symbol "r")))
+ `(defun ew-line-generic
+ (,str ,others-fun ,fold-fun ,crlf-fun ,bare-cr-fun ,bare-lf-fun)
+ (let ((,p 0) (,q (length ,str)) ,r)
+ (while (< ,p ,q)
+ (setq ,r ,p)
+ (lex-scan-unibyte ,str ,p ,q
+ ((+ [^ "\r\n"]) (when ,others-fun (funcall ,others-fun ,r ,p)))
+ ((?\r ?\n [" \t"]) (when ,fold-fun (funcall ,fold-fun ,r ,p)))
+ ((?\r ?\n) (when ,crlf-fun (funcall ,crlf-fun ,r ,p)))
+ ((?\r) (when ,bare-cr-fun (funcall ,bare-cr-fun ,r ,p)))
+ ((?\n) (when ,bare-lf-fun (funcall ,bare-lf-fun ,r ,p)))
+ (() (error "something wrong"))))
+ ,q))))
+
+(ew-line-generic-define)
+
+(defmacro ew-line-convert-define ()
+ (let ((str (make-symbol "str"))
+ (others-fun (make-symbol "others-fun"))
+ (fold-fun (make-symbol "fold-fun"))
+ (crlf-fun (make-symbol "crlf-fun"))
+ (bare-cr-fun (make-symbol "bare-cr-fun"))
+ (bare-lf-fun (make-symbol "bare-lf-fun"))
+ (index (make-symbol "index"))
+ (result (make-symbol "result"))
+ (start (make-symbol "starx"))
+ (end (make-symbol "end")))
+ `(defun ew-line-convert
+ (,str ,others-fun ,fold-fun ,crlf-fun ,bare-cr-fun ,bare-lf-fun)
+ (let ((,index 0) ,result)
+ (when (> (ew-line-generic
+ ,str
+ ,@(mapcar
+ (lambda (fun)
+ `(when ,fun
+ (lambda (,start ,end)
+ (when (< ,index ,start)
+ (setq ,result
+ (ew-rcons* ,result
+ (substring ,str ,index ,start))))
+ (setq ,result
+ (ew-rcons* ,result
+ (funcall ,fun
+ (substring ,str ,start ,end)))
+ ,index ,end))))
+ (list others-fun fold-fun crlf-fun bare-cr-fun bare-lf-fun)))
+ ,index)
+ (setq ,result
+ (ew-rcons* ,result
+ (substring ,str ,index))))
+ (apply 'concat (nreverse ,result))))))
+
+(ew-line-convert-define)
--- /dev/null
+;;; quoted encoded word library
+
+(require 'ew-util)
+(require 'ew-line)
+(provide 'ew-quote)
+
+;; This library provides functions operating strings embedding
+;; unencodable encoded words.
+
+;;;type embedded-pattern denoted-pattern
+;; Type-0 =?(QQQ)*A?B?C?= =?(Q)*A?B?C?=
+;; Type-1 =?(QQQ)*QA?B?C?= Decode =?(Q)*A?B?C?= as encoded-word
+;; Type-2 =?(QQQ)*QQA?B?C?= =?(Q)*A?B?C?
+
+;; Q : quoting character '+'.
+;; A : token. But it does not start with quoting character.
+;; B : token.
+;; C : encoded-text.
+
+(defconst ew-quoting-char ?+)
+(defconst ew-quoting-char-singleton (list ew-quoting-char))
+(defconst ew-quoting-chars-regexp
+ (concat (regexp-quote (char-to-string ew-quoting-char)) "*"))
+
+(defconst ew-token-regexp "[-!#-'*+0-9A-Z^-~]+")
+(defconst ew-encoded-text-regexp "[!->@-~]+")
+(defconst ew-encoded-word-regexp
+ (concat (regexp-quote "=?")
+ "\\(" ew-token-regexp "\\)"
+ (regexp-quote "?")
+ "\\(" ew-token-regexp "\\)"
+ (regexp-quote "?")
+ "\\(" ew-encoded-text-regexp "\\)"
+ (regexp-quote "?=")))
+
+(defconst ew-type2-regexp
+ (concat (regexp-quote "=?")
+ "\\(" ew-token-regexp "\\)"
+ (regexp-quote "?")
+ "\\(" ew-token-regexp "\\)"
+ (regexp-quote "?")
+ "\\(" ew-encoded-text-regexp "\\)"
+ (regexp-quote "?")
+ "\\'"))
+
+;;;
+
+(defun ew-quoting-char-seq (num)
+ (make-string num ew-quoting-char))
+
+(defun ew-quote (str)
+ (ew-quote-concat str))
+
+(defun ew-concat (&rest args)
+ (apply 'ew-quote-concat (mapcar 'list args)))
+
+(defun ew-quote-concat (&rest args)
+ (let (result raws tmp)
+ (while args
+ (setq tmp (car args))
+ (cond
+ ((stringp tmp)
+ ;; raw string
+ (setq raws (cons tmp raws)))
+ ((listp tmp)
+ ;; quoted encoded word embedding strings
+ (let (str start eword-start charset-start quoting-end eword-end l q r)
+ (while tmp
+ (setq str (car tmp)
+ start 0)
+ (while (string-match ew-encoded-word-regexp str start)
+ (setq eword-start (match-beginning 0)
+ charset-start (match-beginning 1)
+ eword-end (match-end 0))
+ (string-match ew-quoting-chars-regexp str charset-start)
+ (setq quoting-end (match-end 0)
+ l (- quoting-end charset-start)
+ q (/ l 3)
+ r (% l 3))
+ (cond
+ ((= r 0) ; Type-0
+ (setq raws
+ (ew-rcons*
+ raws
+ (substring str start charset-start)
+ (ew-quoting-char-seq q)
+ (substring str quoting-end eword-end))))
+ ((= r 2) ; Type-2
+ (setq raws
+ (ew-rcons*
+ raws
+ (substring str start charset-start)
+ (ew-quoting-char-seq q)
+ (substring str quoting-end (1- eword-end)))))
+ ((= r 1) ; Type-1
+ (setq raws
+ (ew-rcons*
+ raws
+ (substring str start eword-start))
+ result
+ (ew-rcons*
+ result
+ (ew-quote-sole (apply 'concat (nreverse raws)) t)
+ (substring str eword-start eword-end))
+ raws ())))
+ (setq start eword-end))
+ (setq raws (ew-rcons* raws (substring str start))
+ tmp (cdr tmp)))))
+ (t
+ (error "ew-quote-concat: %s" tmp)))
+ (setq args (cdr args)))
+ (setq result
+ (ew-rcons*
+ result
+ (ew-quote-sole (apply 'concat (nreverse raws)) nil)))
+ (apply 'concat (nreverse result))))
+
+(defun ew-quote-sole (str gen-type2)
+ (let (result (start 0) charset-start quoting-end eword-end l)
+ (while (string-match ew-encoded-word-regexp str start)
+ (setq charset-start (match-beginning 1)
+ eword-end (match-end 0))
+ (string-match ew-quoting-chars-regexp str charset-start)
+ (setq quoting-end (match-end 0)
+ l (* (- quoting-end charset-start) 3)
+ result
+ (ew-rcons*
+ result
+ (substring str start charset-start)
+ (ew-quoting-char-seq l)
+ (substring str quoting-end eword-end))
+ start eword-end))
+ (if (and gen-type2
+ (string-match ew-type2-regexp str start))
+ (progn
+ (setq charset-start (match-beginning 1)
+ eword-end (match-end 0))
+ (string-match ew-quoting-chars-regexp str charset-start)
+ (setq quoting-end (match-end 0)
+ l (* (- quoting-end charset-start) 3)
+ result
+ (ew-rcons*
+ result
+ (substring str start charset-start)
+ (ew-quoting-char-seq (+ l 2))
+ (substring str quoting-end eword-end)
+ "=")))
+ (setq result (ew-rcons* result (substring str start))))
+ (apply 'concat (nreverse result))))
+
+(defun ew-quote-eword (charset encoding encoded-text)
+ (string-match ew-quoting-chars-regexp charset)
+ (concat
+ "=?+" ; Type-1
+ (ew-quoting-char-seq (* (- (match-end 0) (match-beginning 0)) 3))
+ (substring charset (match-end 0))
+ "?"
+ encoding
+ "?"
+ encoded-text
+ "?="))
+
+(defun ew-encode-crlf (str)
+ (let ((sstart 0)
+ (mstart 0)
+ (end (length str)) result ms me)
+ (while (string-match "\\(\r\n\\)+" str mstart)
+ (setq ms (match-beginning 0)
+ me (match-end 0))
+ (setq mstart me)
+ (when (and (< me end)
+ (member (aref str me) '(?\t ?\ )))
+ (setq me (- me 2)))
+ (when (< ms me)
+ (setq result (ew-rcons* result
+ (substring str sstart ms)
+ "=?+US-ASCII?Q?")
+ sstart me)
+ (while (< ms me)
+ (setq result (ew-rcons* result "=0D=0A")
+ ms (+ ms 2)))
+ (setq result (ew-rcons* result "?="))))
+ (when (< sstart end)
+ (setq result (ew-rcons* result
+ (substring str sstart))))
+ (apply 'concat (nreverse result))))
+
+
+'(
+(ew-quote-concat "aaa=?A?B?C?=ccc") ;"aaa=?A?B?C?=ccc"
+(ew-quote-concat "aaa=?+A?B?C?=ccc") ;"aaa=?+++A?B?C?=ccc"
+(ew-quote-concat '("aaa=?A?B?C?=ccc")) ;"aaa=?A?B?C?=ccc"
+(ew-quote-concat '("aaa=?+++A?B?C?=ccc")) ;"aaa=?+++A?B?C?=ccc"
+(ew-quote-concat "aaa=?+A?B" "?C?=ccc") ;"aaa=?+++A?B?C?=ccc"
+(ew-quote-concat "a=?+A?B?C?" '("=?+US-ASCII?Q?z?=")) ;"a=?+++++A?B?C?==?+US-ASCII?Q?z?="
+(ew-quote-concat "a=?+A?B?C?=?+D?E?F?" '("=?+US-ASCII?Q?z?=")) ;"a=?+++A?B?C?=?+D?E?F?=?+US-ASCII?Q?z?="
+(ew-quote-concat "a=?+A?B?C?=?+D?E?F?=?+G?H?I?" '("=?+US-ASCII?Q?z?=")) ;"a=?+++A?B?C?=?+D?E?F?=?+++++G?H?I?==?+US-ASCII?Q?z?="
+(ew-quote-concat '("a=?++A?B?C?==?+++A?B?C?=c")) ;"a=?A?B?C?=?+A?B?C?=c"
+)
\ No newline at end of file
--- /dev/null
+(require 'lex)
+(require 'automata)
+(require 'ew-data)
+(require 'ew-parse)
+(provide 'ew-scan-m)
+
+(defmacro ew-scan-mime (scan col str)
+ `(let ((res (ew-make-anchor col str))
+ (mode 'token)
+ (p 0)
+ (q (length str))
+ r
+ nest)
+ (while (< p q)
+ (setq r p)
+ (cond
+ ((eq mode 'token)
+ (,scan
+ str p q
+ ([" \t"] (ew-add-frag res r p 'ew:raw-wsp-tok))
+ (?< (ew-add-token res r p 'ew:raw-lt-tok))
+ (?> (ew-add-token res r p 'ew:raw-gt-tok))
+ (?@ (ew-add-token res r p 'ew:raw-at-tok))
+ (?, (ew-add-token res r p 'ew:raw-comma-tok))
+ (?\; (ew-add-token res r p 'ew:raw-semicolon-tok))
+ (?: (ew-add-token res r p 'ew:raw-colon-tok))
+ (?/ (ew-add-token res r p 'ew:raw-slash-tok))
+ (?? (ew-add-token res r p 'ew:raw-question-tok))
+ (?= (ew-add-token res r p 'ew:raw-equal-tok))
+ ((?\r ?\n [" \t"])
+ (ew-add-frag res r p 'ew:raw-fold-tok))
+ ((?\r ?\n [^ " \t"])
+ (ew-add-frag res r (setq p q) 'ew:raw-err-tok))
+ ((+ [(?a ?z) (?A ?Z) (?0 ?9) "!#$%&'*+-.^_`{|}~" non-ascii])
+ (ew-add-token res r p 'ew:raw-atom-tok))
+ (?\" (ew-add-open res r p 'ew:raw-qs-begin-tok)
+ (setq mode 'quoted-string))
+ (?\[ (ew-add-open res r p 'ew:raw-dl-begin-tok)
+ (setq mode 'domain-literal))
+ (?\( (ew-add-open res r p 'ew:raw-cm-begin-tok)
+ (setq mode 'comment
+ nest 1))
+ (() (ew-add-frag res r q 'ew:raw-err-tok) (setq p q))))
+ ((eq mode 'quoted-string)
+ (,scan
+ str p q
+ (?\" (ew-add-close-token res r p 'ew:raw-qs-end-tok)
+ (setq mode 'token))
+ ((?\\ ?\r ?\n [" \t"])
+ (ew-add-frag res r p 'ew:raw-qs-qfold-tok))
+ ((?\\ ?\r ?\n [^ " \t"])
+ (ew-add-frag res r (setq p q) 'ew:raw-err-tok))
+ (((* [^ "\"\\ \t\r"]) (* (+ ?\r) [^ "\"\\ \t\r\n"] (* [^ "\"\\ \t\r"])) (* ?\r)
+ (?\r ?\n [" \t"]))
+ (when (< r (- p 3))
+ (ew-add-frag res r (- p 3) 'ew:raw-qs-texts-tok)
+ (setq r (- p 3)))
+ (ew-add-frag res r p 'ew:raw-qs-fold-tok))
+ (((* [^ "\"\\ \t\r"]) (* (+ ?\r) [^ "\"\\ \t\r\n"] (* [^ "\"\\ \t\r"])) (* ?\r)
+ (?\r ?\n [^ " \t"]))
+ (when (< r (- p 3))
+ (ew-add-frag res r (- p 3) 'ew:raw-qs-texts-tok)
+ (setq r (- p 3)))
+ (ew-add-frag res r (setq p q) 'ew:raw-err-tok))
+ ((?\\ (any))
+ (ew-add-frag res r p 'ew:raw-qs-qpair-tok))
+ ([" \t"]
+ (ew-add-frag res r p 'ew:raw-qs-wsp-tok))
+ (((* [^ "\"\\ \t\r"]) (* (+ ?\r) [^ "\"\\ \t\r\n"] (* [^ "\"\\ \t\r"])) (* ?\r))
+ (if (< r p)
+ (ew-add-frag res r p 'ew:raw-qs-texts-tok)
+ (ew-add-frag res r (setq p q) 'ew:raw-err-tok)))))
+ ((eq mode 'domain-literal)
+ (,scan
+ str p q
+ (?\] (ew-add-close-token res r p 'ew:raw-dl-end-tok)
+ (setq mode 'token))
+ ((?\\ ?\r ?\n [" \t"])
+ (ew-add-frag res r p 'ew:raw-dl-qfold-tok))
+ ((?\\ ?\r ?\n [^ " \t"])
+ (ew-add-frag res r (setq p q) 'ew:raw-err-tok))
+ (((* [^ "[]\\ \t\r"]) (* (+ ?\r) [^ "[]\\ \t\r\n"] (* [^ "[]\\ \t\r"])) (* ?\r)
+ (?\r ?\n [" \t"]))
+ (when (< r (- p 3))
+ (ew-add-frag res r (- p 3) 'ew:raw-dl-texts-tok)
+ (setq r (- p 3)))
+ (ew-add-frag res r p 'ew:raw-dl-fold-tok))
+ (((* [^ "[]\\ \t\r"]) (* (+ ?\r) [^ "[]\\ \t\r\n"] (* [^ "[]\\ \t\r"])) (* ?\r)
+ (?\r ?\n [^ " \t"]))
+ (when (< r (- p 3))
+ (ew-add-frag res r (- p 3) 'ew:raw-dl-texts-tok)
+ (setq r (- p 3)))
+ (ew-add-frag res r (setq p q) 'ew:raw-err-tok))
+ ((?\\ (any))
+ (ew-add-frag res r p 'ew:raw-dl-qpair-tok))
+ ([" \t"]
+ (ew-add-frag res r p 'ew:raw-dl-wsp-tok))
+ (((* [^ "[]\\ \t\r"]) (* (+ ?\r) [^ "[]\\ \t\r\n"] (* [^ "[]\\ \t\r"])) (* ?\r))
+ (if (< r p)
+ (ew-add-frag res r p 'ew:raw-dl-texts-tok)
+ (ew-add-frag res r (setq p q) 'ew:raw-err-tok)))))
+ ((eq mode 'comment)
+ (,scan
+ str p q
+ (?\( (ew-add-open res r p 'ew:raw-cm-nested-begin-tok)
+ (setq nest (1+ nest)))
+ (?\) (setq nest (1- nest))
+ (if (zerop nest)
+ (progn
+ (ew-add-close res r p 'ew:raw-cm-end-tok)
+ (setq mode 'token))
+ (ew-add-close res r p 'ew:raw-cm-nested-end-tok)))
+ ((?\\ ?\r ?\n [" \t"])
+ (ew-add-frag res r p 'ew:raw-cm-qfold-tok))
+ ((?\\ ?\r ?\n [^ " \t"])
+ (ew-add-frag res r (setq p q) 'ew:raw-err-tok))
+ (((* [^ "()\\ \t\r"]) (* (+ ?\r) [^ "()\\ \t\r\n"] (* [^ "()\\ \t\r"])) (* ?\r)
+ (?\r ?\n [" \t"]))
+ (when (< r (- p 3))
+ (ew-add-frag res r (- p 3) 'ew:raw-cm-texts-tok)
+ (setq r (- p 3)))
+ (ew-add-frag res r p 'ew:raw-cm-fold-tok))
+ (((* [^ "()\\ \t\r"]) (* (+ ?\r) [^ "()\\ \t\r\n"] (* [^ "()\\ \t\r"])) (* ?\r)
+ (?\r ?\n [^ " \t"]))
+ (when (< r (- p 3))
+ (ew-add-frag res r (- p 3) 'ew:raw-cm-texts-tok)
+ (setq r (- p 3)))
+ (ew-add-frag res r (setq p q) 'ew:raw-err-tok))
+ ((?\\ (any))
+ (ew-add-frag res r p 'ew:raw-cm-qpair-tok))
+ ([" \t"]
+ (ew-add-frag res r p 'ew:raw-cm-wsp-tok))
+ (((* [^ "()\\ \t\r"]) (* (+ ?\r) [^ "()\\ \t\r\n"] (* [^ "()\\ \t\r"])) (* ?\r))
+ (if (< r p)
+ (ew-add-frag res r p 'ew:raw-cm-texts-tok)
+ (ew-add-frag res r (setq p q) 'ew:raw-err-tok)))))))
+ (ew-terminate res)
+ res))
+
+(defun ew-scan-unibyte-mime (col str)
+ (ew-scan-mime lex-scan-unibyte col str))
+(defun ew-scan-multibyte-mime (col str)
+ (ew-scan-mime lex-scan-multibyte col str))
+
+'(
+(npp
+ (mapcar
+ (lambda (frag) (cons (get frag 'type) (symbol-name frag)))
+ (ew-frag-list
+ (ew-scan-unibyte-mime
+ 0 " text/vnd.latex-z; charset=ISO-2022-JP"))))
+)
--- /dev/null
+(require 'lex)
+(require 'automata)
+(require 'ew-data)
+(require 'ew-parse)
+(provide 'ew-scan-s)
+
+(defmacro ew-scan-std11 (scan col str)
+ `(let ((res (ew-make-anchor col str))
+ (mode 'token)
+ (p 0)
+ (q (length str))
+ r
+ nest)
+ (while (< p q)
+ (setq r p)
+ (cond
+ ((eq mode 'token)
+ (,scan
+ str p q
+ ([" \t"] (ew-add-frag res r p 'ew:raw-wsp-tok))
+ (?< (ew-add-token res r p 'ew:raw-lt-tok))
+ (?> (ew-add-token res r p 'ew:raw-gt-tok))
+ (?@ (ew-add-token res r p 'ew:raw-at-tok))
+ (?, (ew-add-token res r p 'ew:raw-comma-tok))
+ (?\; (ew-add-token res r p 'ew:raw-semicolon-tok))
+ (?: (ew-add-token res r p 'ew:raw-colon-tok))
+ (?. (ew-add-token res r p 'ew:raw-dot-tok))
+ ((?\r ?\n [" \t"])
+ (ew-add-frag res r p 'ew:raw-fold-tok))
+ ((?\r ?\n [^ " \t"])
+ (ew-add-frag res r (setq p q) 'ew:raw-err-tok))
+ ((+ [(?a ?z) (?A ?Z) (?0 ?9) "!#$%&'*+-/=?^_`{|}~" non-ascii])
+ (ew-add-token res r p 'ew:raw-atom-tok))
+ (?\" (ew-add-open res r p 'ew:raw-qs-begin-tok)
+ (setq mode 'quoted-string))
+ (?\[ (ew-add-open res r p 'ew:raw-dl-begin-tok)
+ (setq mode 'domain-literal))
+ (?\( (ew-add-open res r p 'ew:raw-cm-begin-tok)
+ (setq mode 'comment
+ nest 1))
+ (() (ew-add-frag res r q 'ew:raw-err-tok) (setq p q))))
+ ((eq mode 'quoted-string)
+ (,scan
+ str p q
+ (?\" (ew-add-close-token res r p 'ew:raw-qs-end-tok)
+ (setq mode 'token))
+ ((?\\ ?\r ?\n [" \t"])
+ (ew-add-frag res r p 'ew:raw-qs-qfold-tok))
+ ((?\\ ?\r ?\n [^ " \t"])
+ (ew-add-frag res r (setq p q) 'ew:raw-err-tok))
+ (((* [^ "\"\\ \t\r"]) (* (+ ?\r) [^ "\"\\ \t\r\n"] (* [^ "\"\\ \t\r"])) (* ?\r)
+ (?\r ?\n [" \t"]))
+ (when (< r (- p 3))
+ (ew-add-frag res r (- p 3) 'ew:raw-qs-texts-tok)
+ (setq r (- p 3)))
+ (ew-add-frag res r p 'ew:raw-qs-fold-tok))
+ (((* [^ "\"\\ \t\r"]) (* (+ ?\r) [^ "\"\\ \t\r\n"] (* [^ "\"\\ \t\r"])) (* ?\r)
+ (?\r ?\n [^ " \t"]))
+ (when (< r (- p 3))
+ (ew-add-frag res r (- p 3) 'ew:raw-qs-texts-tok)
+ (setq r (- p 3)))
+ (ew-add-frag res r (setq p q) 'ew:raw-err-tok))
+ ((?\\ (any))
+ (ew-add-frag res r p 'ew:raw-qs-qpair-tok))
+ ([" \t"]
+ (ew-add-frag res r p 'ew:raw-qs-wsp-tok))
+ (((* [^ "\"\\ \t\r"]) (* (+ ?\r) [^ "\"\\ \t\r\n"] (* [^ "\"\\ \t\r"])) (* ?\r))
+ (if (< r p)
+ (ew-add-frag res r p 'ew:raw-qs-texts-tok)
+ (ew-add-frag res r (setq p q) 'ew:raw-err-tok)))))
+ ((eq mode 'domain-literal)
+ (,scan
+ str p q
+ (?\] (ew-add-close-token res r p 'ew:raw-dl-end-tok)
+ (setq mode 'token))
+ ((?\\ ?\r ?\n [" \t"])
+ (ew-add-frag res r p 'ew:raw-dl-qfold-tok))
+ ((?\\ ?\r ?\n [^ " \t"])
+ (ew-add-frag res r (setq p q) 'ew:raw-err-tok))
+ (((* [^ "[]\\ \t\r"]) (* (+ ?\r) [^ "[]\\ \t\r\n"] (* [^ "[]\\ \t\r"])) (* ?\r)
+ (?\r ?\n [" \t"]))
+ (when (< r (- p 3))
+ (ew-add-frag res r (- p 3) 'ew:raw-dl-texts-tok)
+ (setq r (- p 3)))
+ (ew-add-frag res r p 'ew:raw-dl-fold-tok))
+ (((* [^ "[]\\ \t\r"]) (* (+ ?\r) [^ "[]\\ \t\r\n"] (* [^ "[]\\ \t\r"])) (* ?\r)
+ (?\r ?\n [^ " \t"]))
+ (when (< r (- p 3))
+ (ew-add-frag res r (- p 3) 'ew:raw-dl-texts-tok)
+ (setq r (- p 3)))
+ (ew-add-frag res r (setq p q) 'ew:raw-err-tok))
+ ((?\\ (any))
+ (ew-add-frag res r p 'ew:raw-dl-qpair-tok))
+ ([" \t"]
+ (ew-add-frag res r p 'ew:raw-dl-wsp-tok))
+ (((* [^ "[]\\ \t\r"]) (* (+ ?\r) [^ "[]\\ \t\r\n"] (* [^ "[]\\ \t\r"])) (* ?\r))
+ (if (< r p)
+ (ew-add-frag res r p 'ew:raw-dl-texts-tok)
+ (ew-add-frag res r (setq p q) 'ew:raw-err-tok)))))
+ ((eq mode 'comment)
+ (,scan
+ str p q
+ (?\( (ew-add-open res r p 'ew:raw-cm-nested-begin-tok)
+ (setq nest (1+ nest)))
+ (?\) (setq nest (1- nest))
+ (if (zerop nest)
+ (progn
+ (ew-add-close res r p 'ew:raw-cm-end-tok)
+ (setq mode 'token))
+ (ew-add-close res r p 'ew:raw-cm-nested-end-tok)))
+ ((?\\ ?\r ?\n [" \t"])
+ (ew-add-frag res r p 'ew:raw-cm-qfold-tok))
+ ((?\\ ?\r ?\n [^ " \t"])
+ (ew-add-frag res r (setq p q) 'ew:raw-err-tok))
+ (((* [^ "()\\ \t\r"]) (* (+ ?\r) [^ "()\\ \t\r\n"] (* [^ "()\\ \t\r"])) (* ?\r)
+ (?\r ?\n [" \t"]))
+ (when (< r (- p 3))
+ (ew-add-frag res r (- p 3) 'ew:raw-cm-texts-tok)
+ (setq r (- p 3)))
+ (ew-add-frag res r p 'ew:raw-cm-fold-tok))
+ (((* [^ "()\\ \t\r"]) (* (+ ?\r) [^ "()\\ \t\r\n"] (* [^ "()\\ \t\r"])) (* ?\r)
+ (?\r ?\n [^ " \t"]))
+ (when (< r (- p 3))
+ (ew-add-frag res r (- p 3) 'ew:raw-cm-texts-tok)
+ (setq r (- p 3)))
+ (ew-add-frag res r (setq p q) 'ew:raw-err-tok))
+ ((?\\ (any))
+ (ew-add-frag res r p 'ew:raw-cm-qpair-tok))
+ ([" \t"]
+ (ew-add-frag res r p 'ew:raw-cm-wsp-tok))
+ (((* [^ "()\\ \t\r"]) (* (+ ?\r) [^ "()\\ \t\r\n"] (* [^ "()\\ \t\r"])) (* ?\r))
+ (if (< r p)
+ (ew-add-frag res r p 'ew:raw-cm-texts-tok)
+ (ew-add-frag res r (setq p q) 'ew:raw-err-tok)))))))
+ (ew-terminate res)
+ res))
+
+(defun ew-scan-unibyte-std11 (col str)
+ (ew-scan-std11 lex-scan-unibyte col str))
+(defun ew-scan-multibyte-std11 (col str)
+ (ew-scan-std11 lex-scan-multibyte col str))
+
+'(
+(npp
+ (mapcar
+ 'symbol-plist
+ (ew-frag-list
+ (ew-scan-unibyte-std11
+ 0 " Tanaka Akira <akr@jaist.ac.jp> (Tanaka Akira)"))))
+
+(npp
+ (mapcar
+ (lambda (frag) (cons (get frag 'type) (symbol-name frag)))
+ (ew-frag-list
+ (ew-scan-unibyte-std11
+ 0 " Tanaka Akira <akr@jaist.ac.jp> (Tanaka Akira)"))))
+)
--- /dev/null
+(require 'lex)
+(require 'automata)
+(require 'ew-data)
+(require 'ew-parse)
+(provide 'ew-scan-u)
+
+(defmacro ew-scan-unstructured (scan col str)
+ `(let ((res (ew-make-anchor col str))
+ (p 0)
+ (q (length str))
+ r)
+ (while (< p q)
+ (setq r p)
+ (,scan
+ str p q
+ ([" \t"] (ew-add-frag res r p 'ew:raw-us-wsp-tok))
+ (((* [^ " \t\r"]) (* (+ ?\r) [^ " \t\r\n"] (* [^ " \t\r"])) (* ?\r)
+ (?\r ?\n [" \t"]))
+ (when (< r (- p 3))
+ (ew-add-frag res r (- p 3) 'ew:raw-us-texts-tok)
+ (setq r (- p 3)))
+ (ew-add-frag res r p 'ew:raw-us-fold-tok))
+ (((* [^ " \t\r"]) (* (+ ?\r) [^ " \t\r\n"] (* [^ " \t\r"])) (* ?\r)
+ (?\r ?\n [^ " \t"]))
+ (when (< r (- p 3))
+ (ew-add-frag res r (- p 3) 'ew:raw-us-texts-tok)
+ (setq r (- p 3)))
+ (ew-add-frag res r (setq p q) 'ew:raw-err-tok))
+ (((* [^ " \t\r"]) (* (+ ?\r) [^ " \t\r\n"] (* [^ " \t\r"])) (* ?\r))
+ (if (< r p)
+ (ew-add-frag res r p 'ew:raw-us-texts-tok)
+ (ew-add-frag res r (setq p q) 'ew:raw-err-tok)))))
+ (ew-terminate res)
+ res))
+
+(defun ew-scan-unibyte-unstructured (col str)
+ (ew-scan-unstructured lex-scan-unibyte col str))
+(defun ew-scan-multibyte-unstructured (col str)
+ (ew-scan-unstructured lex-scan-multibyte col str))
+
+'(
+(npp
+ (mapcar
+ (lambda (frag) (cons (get frag 'type) (symbol-name frag)))
+ (ew-frag-list
+ (ew-scan-unibyte-unstructured
+ 0 " Hello! =?US-ASCII?Q?Hello!?="))))
+
+(npp
+ (mapcar
+ (lambda (frag) (cons (get frag 'type) (symbol-name frag)))
+ (ew-frag-list
+ (ew-scan-unibyte-unstructured
+ 0 " \r\na"))))
+
+)
--- /dev/null
+(require 'closure)
+(require 'ew-line)
+(require 'ew-quote)
+(require 'mel)
+
+(provide 'ew-unit)
+
+(defconst ew-anchored-encoded-word-regexp
+ (concat "\\`" ew-encoded-word-regexp "\\'"))
+
+(defconst ew-b-regexp
+ (concat "\\`\\("
+ "[A-Za-z0-9+/]"
+ "[A-Za-z0-9+/]"
+ "[A-Za-z0-9+/]"
+ "[A-Za-z0-9+/]"
+ "\\)*"
+ "[A-Za-z0-9+/]"
+ "[A-Za-z0-9+/]"
+ "\\(==\\|"
+ "[A-Za-z0-9+/]"
+ "[A-Za-z0-9+/=]"
+ "\\)\\'"))
+
+(defconst ew-q-regexp "\\`\\([^=?]\\|=[0-9A-Fa-f][0-9A-Fa-f]\\)*\\'")
+
+(defconst ew-byte-decoder-alist
+ '(("B" . ew-b-decode)
+ ("Q" . ew-q-decode)))
+
+(defconst ew-byte-checker-alist
+ '(("B" . ew-b-check)
+ ("Q" . ew-q-check)))
+
+(defun ew-b-check (encoding encoded-text) (string-match ew-b-regexp encoded-text))
+(defun ew-q-check (encoding encoded-text) (string-match ew-q-regexp encoded-text))
+
+(defun ew-eword-p (str)
+ (let ((len (length str)))
+ (and
+ (<= 3 len)
+ (string= (substring str 0 2) "=?")
+ (string= (substring str (- len 2) len) "?="))))
+
+(defun ew-decode-eword (str &optional eword-filter1 eword-filter2)
+ (if (string-match ew-anchored-encoded-word-regexp str)
+ (let ((charset (match-string 1 str))
+ (encoding (match-string 2 str))
+ (encoded-text (match-string 3 str))
+ bdec cdec
+ bcheck
+ tmp)
+ (if (and (setq bdec (ew-byte-decoder encoding))
+ (setq cdec (ew-char-decoder charset)))
+ (if (or (null (setq bcheck (ew-byte-checker encoding)))
+ (funcall bcheck encoding encoded-text))
+ (progn
+ (setq tmp (closure-call cdec (funcall bdec encoded-text)))
+ (when eword-filter1 (setq tmp (closure-call eword-filter1 tmp)))
+ (setq tmp (ew-quote tmp))
+ (when eword-filter2 (setq tmp (closure-call eword-filter2 tmp)))
+ tmp)
+ (ew-quote str))
+ (ew-quote-eword charset encoding encoded-text)))
+ (ew-quote str)))
+
+(defun ew-byte-decoder (encoding)
+ (cdr (assoc (upcase encoding) ew-byte-decoder-alist)))
+
+(defun ew-byte-checker (encoding)
+ (cdr (assoc (upcase encoding) ew-byte-checker-alist)))
+
+(defalias 'ew-b-decode 'base64-decode-string)
+(defalias 'ew-q-decode 'q-encoding-decode-string)
+
+(defconst ew-charset-aliases
+ '((us-ascii . iso-8859-1)
+ (iso-2022-jp-2 . iso-2022-7bit-ss2)))
+(defun ew-char-decoder (charset)
+ (catch 'return
+ (setq charset (downcase charset))
+ (let ((sym (intern charset))
+ tmp cs)
+ (when (setq tmp (assq sym ew-charset-aliases))
+ (setq sym (cdr tmp)))
+ (setq cs (intern (concat (symbol-name sym) "-unix")))
+ (when (coding-system-p cs)
+ (throw 'return
+ (closure-make (lambda (str) (decode-coding-string str cs)) cs)))
+ nil)))
--- /dev/null
+(provide 'ew-util)
+
+(defmacro ew-cons* (seed &rest rest)
+ (setq rest (nreverse (cons seed rest))
+ seed (car rest)
+ rest (cdr rest))
+ (while rest
+ (setq seed `(cons ,(car rest) ,seed)
+ rest (cdr rest)))
+ seed)
+
+(defmacro ew-rcons* (seed &rest rest)
+ (while rest
+ (setq seed `(cons ,(car rest) ,seed)
+ rest (cdr rest)))
+ seed)
+
+(defmacro ew-rappend (a b)
+ `(append (reverse ,b) ,a))
(require 'mel)
(require 'mime-def)
+(require 'ew-dec)
+
(defgroup eword-decode nil
"Encoded-word decoding"
:group 'mime)
+;;; TEST
+
+(defmacro rotate-memo (var val)
+ `(progn
+ (unless (boundp ',var) (setq ,var ()))
+ (setq ,var (cons ,val ,var))
+ (let ((tmp (last ,var (- (length ,var) 100))))
+ (when tmp (setcdr tmp nil)))
+ ,var))
;;; @ variables
;;;
default-mime-charset.
If SEPARATOR is not nil, it is used as header separator."
(interactive "*")
+ (rotate-memo args-eword-decode-header
+ (list code-conversion))
+ (unless code-conversion
+ (message "eword-decode-header is called with no code-conversion"))
(if (and code-conversion
(not (mime-charset-to-coding-system code-conversion)))
(setq code-conversion default-mime-charset))
(save-restriction
(std11-narrow-to-header separator)
(if code-conversion
- (let (beg p end field-name len)
+ (let (beg p end field-name field-body len)
(goto-char (point-min))
(while (re-search-forward std11-field-head-regexp nil t)
(setq beg (match-beginning 0)
p (match-end 0)
field-name (buffer-substring beg (1- p))
- len (string-width field-name)
- field-name (intern (capitalize field-name))
- end (std11-field-end))
- (cond ((memq field-name eword-decode-ignored-field-list)
- ;; Don't decode
- )
- ((memq field-name eword-decode-structured-field-list)
- ;; Decode as structured field
- (let ((body (buffer-substring p end)))
- (delete-region p end)
- (insert (eword-decode-and-fold-structured-field
- body (1+ len)))
- ))
- (t
- ;; Decode as unstructured field
- (save-restriction
- (narrow-to-region beg (1+ end))
- (goto-char p)
- (eword-decode-region beg (point-max) 'unfold nil
- code-conversion)
- (goto-char (point-max))
- )))))
+ end (std11-field-end)
+ field-body (buffer-substring p end))
+ (delete-region p end)
+ (insert (ew-decode-field field-name (ew-lf-crlf-to-crlf field-body)))
+ ))
(eword-decode-region (point-min) (point-max) t nil nil)
))))
If an encoded-word is broken or your emacs implementation can not
decode the charset included in it, it is not decoded."
+ (rotate-memo args-eword-decode-and-unfold-structured-field
+ (list string))
(let ((tokens (eword-lexical-analyze string 'must-unfold))
(result ""))
(while tokens
If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
if there are in decoded encoded-words (generated by bad manner MUA
such as a version of Net$cape)."
+ (rotate-memo args-eword-decode-structured-field-body
+ (list string must-unfold start-column max-column))
(if start-column
- ;; fold with max-column
- (eword-decode-and-fold-structured-field
- string start-column max-column must-unfold)
+ ;; fold with max-column (folding is not implemented.)
+ (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
+ (decoded (ew-decode-field (make-string (1- start-column) ?X)
+ (ew-lf-crlf-to-crlf string)
+ (if must-unfold 'ew-cut-cr-lf))))
+ (if must-unfold (ew-cut-cr-lf decoded) decoded))
;; Don't fold
- (mapconcat (function eword-decode-token)
- (eword-lexical-analyze string must-unfold)
- "")
- ))
+ (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
+ (decoded (ew-decode-field ""
+ (ew-lf-crlf-to-crlf string)
+ (if must-unfold 'ew-cut-cr-lf))))
+ (if must-unfold (ew-cut-cr-lf decoded) decoded))))
(defun eword-decode-unstructured-field-body (string &optional must-unfold)
"Decode non us-ascii characters in STRING as unstructured field body.
If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
if there are in decoded encoded-words (generated by bad manner MUA
such as a version of Net$cape)."
- (eword-decode-string string must-unfold default-mime-charset))
+ (rotate-memo args-eword-decode-unstructured-field-body
+ (list string must-unfold))
+ (let ((decoded (ew-decode-field ""
+ (ew-lf-crlf-to-crlf string)
+ (if must-unfold 'ew-cut-cr-lf))))
+ (if must-unfold
+ (ew-cut-cr-lf decoded)
+ decoded)))
(defun eword-extract-address-components (string)
"Extract full name and canonical address from STRING.
--- /dev/null
+(require 'emu)
+(require 'rx)
+(require 'automata)
+(provide 'lex)
+
+(put 'lex-scan-multibyte 'lisp-indent-function 3)
+(put 'lex-scan-unibyte 'lisp-indent-function 3)
+
+;;; automata generation
+
+(defun lex-automata (rx)
+ (let* ((rx (rx-simplify rx))
+ (stack (list rx)) ; list of rx
+ (table (list (rx-cons* rx 0 (lex-make-box (list 'd1 'd2)))))
+ ; list of (rx id . box-for-reverse-links)
+ (states ()) ; list of (id act trans . box-for-reverse-links)
+ ; where trans = list of (pc id . box-for-reverse-links)
+ (next-id 1)
+ tbl-ent box id pcs act pc trans rx-stepped p)
+ (while (consp stack)
+ (setq rx (car stack)
+ stack (cdr stack)
+ tbl-ent (assoc rx table)
+ id (cadr tbl-ent)
+ box (cddr tbl-ent)
+ pcs (rx-head-pcs rx)
+ act (rx-head-act rx)
+ trans ())
+ (while (consp pcs)
+ (setq pc (car pcs)
+ pcs (cdr pcs)
+ rx-stepped (rx-step rx pc)
+ p (assoc rx-stepped table))
+ (if p
+ (progn
+ (setq trans (cons (cons pc (cdr p)) trans))
+ (lex-add-box (cddr p) id))
+ (setq p (rx-cons* rx-stepped next-id (lex-make-box (list id)))
+ trans (cons (cons pc (cdr p)) trans)
+ table (cons p table)
+ next-id (1+ next-id)
+ stack (cons rx-stepped stack))))
+ (setq states
+ (cons (rx-cons* id act trans box)
+ states)))
+ states))
+
+;;; automata coding
+
+(defvar lex-pc-var (make-symbol "pc"))
+(defvar lex-act-var (make-symbol "act"))
+(defvar lex-escape-tag (make-symbol "esc"))
+
+(defun lex-gen-machine (states cs acts read-macro save-macro)
+ `(let (,lex-pc-var ,lex-act-var)
+ (catch ',lex-escape-tag
+ (automata
+ ,lex-pc-var 0
+ ,@(mapcar
+ (lambda (s) (lex-gen-state s cs read-macro save-macro))
+ states)))
+ (automata-branch
+ ,lex-act-var ,(apply 'natset-single (mapcar 'car acts)) automata-never-fail
+ ,@(mapcar
+ (lambda (act) `(,(natset-single (car act)) nil ,@(cdr act)))
+ acts))))
+
+(defun lex-gen-state (s cs read-macro save-macro)
+ (let ((id (nth 0 s))
+ (act (nth 1 s))
+ (trans (nth 2 s)))
+ `(,id
+ (progn
+ ,@(if act
+ `((lex-match ,(cdr act)) (,save-macro))
+ ())
+ ,@(if (consp trans) `((,read-macro ,lex-pc-var))))
+ (lex-fail)
+ ,@(mapcar
+ (lambda (tr) `(,(let ((l (member (car tr) cs)))
+ (if (null (cdr l))
+ (natset-seg (car l))
+ (natset-seg (car l) (1- (cadr l)))))
+ ,(cadr tr)))
+ trans))))
+
+;;; internal macros
+
+(defmacro lex-match (id)
+ `(setq ,lex-act-var ',id))
+(defmacro lex-fail ()
+ `(throw ',lex-escape-tag nil))
+
+;;; user interface macro
+
+;;; multibyte
+
+(defvar lex-scan-multibyte-str-var (make-symbol "str"))
+(defvar lex-scan-multibyte-ptr-var (make-symbol "ptr"))
+(defvar lex-scan-multibyte-end-var (make-symbol "end"))
+(defvar lex-scan-multibyte-mch-var (make-symbol "mch"))
+
+(defmacro lex-scan-multibyte-read (pc)
+ `(if (< ,lex-scan-multibyte-ptr-var ,lex-scan-multibyte-end-var)
+ (setq ,pc (sref ,lex-scan-multibyte-str-var ,lex-scan-multibyte-ptr-var)
+ ,lex-scan-multibyte-ptr-var (char-next-index ,pc ,lex-scan-multibyte-ptr-var)
+ ,pc (char-int ,pc))
+ (lex-fail)))
+
+(defmacro lex-scan-multibyte-save ()
+ `(setq ,lex-scan-multibyte-mch-var ,lex-scan-multibyte-ptr-var))
+
+(defmacro lex-scan-multibyte (str start end &rest clauses)
+ (if (not start) (setq start 0))
+ (if (not end) (setq end `(length ,lex-scan-multibyte-str-var)))
+ (let ((id 1) (rx ()) (acts ()) tmp code
+ (restore-code (if (symbolp start) `(setq ,start ,lex-scan-multibyte-mch-var))))
+ (while (consp clauses)
+ (setq rx (cons (rx-con (caar clauses) (rx-act id)) rx)
+ acts (cons (cons id (cons restore-code (cdar clauses))) acts)
+ id (1+ id)
+ clauses (cdr clauses)))
+ (setq rx (rx-alt rx)
+ tmp (rx-categolize-char (rx-desugar rx)))
+ `(let* ((,lex-scan-multibyte-str-var ,str)
+ (,lex-scan-multibyte-ptr-var ,start)
+ (,lex-scan-multibyte-end-var ,end)
+ ,lex-scan-multibyte-mch-var)
+ ,(lex-gen-machine (lex-automata (car tmp)) (cdr tmp) acts 'lex-scan-multibyte-read 'lex-scan-multibyte-save))))
+
+;;; unibyte
+
+(defvar lex-scan-unibyte-str-var (make-symbol "str"))
+(defvar lex-scan-unibyte-ptr-var (make-symbol "ptr"))
+(defvar lex-scan-unibyte-end-var (make-symbol "end"))
+(defvar lex-scan-unibyte-mch-var (make-symbol "mch"))
+
+(defmacro lex-scan-unibyte-read (pc)
+ `(if (< ,lex-scan-unibyte-ptr-var ,lex-scan-unibyte-end-var)
+ (setq ,pc (aref ,lex-scan-unibyte-str-var ,lex-scan-unibyte-ptr-var)
+ ,lex-scan-unibyte-ptr-var (1+ ,lex-scan-unibyte-ptr-var)
+ ,pc (char-int ,pc))
+ (lex-fail)))
+
+(defmacro lex-scan-unibyte-save ()
+ `(setq ,lex-scan-unibyte-mch-var ,lex-scan-unibyte-ptr-var))
+
+(defmacro lex-scan-unibyte (str start end &rest clauses)
+ (if (not start) (setq start 0))
+ (if (not end) (setq end `(length ,lex-scan-unibyte-str-var)))
+ (let ((id 1) (rx ()) (acts ()) tmp code
+ (restore-code (if (symbolp start) `(setq ,start ,lex-scan-unibyte-mch-var))))
+ (while (consp clauses)
+ (setq rx (cons (rx-con (caar clauses) (rx-act id)) rx)
+ acts (cons (cons id (cons restore-code (cdar clauses))) acts)
+ id (1+ id)
+ clauses (cdr clauses)))
+ (setq rx (rx-alt rx)
+ tmp (rx-categolize-char (rx-desugar rx)))
+ `(let* ((,lex-scan-unibyte-str-var ,str)
+ (,lex-scan-unibyte-ptr-var ,start)
+ (,lex-scan-unibyte-end-var ,end)
+ ,lex-scan-unibyte-mch-var)
+ ,(lex-gen-machine (lex-automata (car tmp)) (cdr tmp) acts 'lex-scan-unibyte-read 'lex-scan-unibyte-save))))
+
+;;; utilities
+
+(defun lex-make-box (val)
+ (list val))
+(defalias 'lex-box-ref 'car)
+
+(defun lex-add-box (box val)
+ (if (not (member val (car box)))
+ (setcar box (cons val (car box)))))
+
+;;; testing
+'(
+
+ (mapcar (lambda (v) (set v (intern (symbol-name (symbol-value v)))))
+ '(lex-pc-var
+ lex-act-var
+ lex-escape-tag
+ lex-scan-multibyte-str-var
+ lex-scan-multibyte-ptr-var
+ lex-scan-multibyte-end-var
+ lex-scan-multibyte-mch-var
+ lex-scan-unibyte-str-var
+ lex-scan-unibyte-ptr-var
+ lex-scan-unibyte-end-var
+ lex-scan-unibyte-mch-var))
+
+ (lex-scan-multibyte
+ "aaa" 0 3
+ (?a 'a))
+
+)
--- /dev/null
+;; ---------------------------------------------------------------------- ;;
+;; FICHIER : lr-dvr.scm ;;
+;; DATE DE CREATION : Fri May 31 15:47:05 1996 ;;
+;; DERNIERE MODIFICATION : Fri May 31 15:51:13 1996 ;;
+;; ---------------------------------------------------------------------- ;;
+;; Copyright (c) 1996 Dominique Boucher ;;
+;; ---------------------------------------------------------------------- ;;
+;; The LR parser driver ;;
+;; ;;
+;; lr-dvr.scm is part of the lalr.scm distribution which is free ;;
+;; software; you can redistribute it and/or modify ;;
+;; it under the terms of the GNU General Public License as published by ;;
+;; the Free Software Foundation; either version 2, or (at your option) ;;
+;; any later version. ;;
+;; ;;
+;; lalr.scm is distributed in the hope that it will be useful, ;;
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;
+;; GNU General Public License for more details. ;;
+;; ;;
+;; You should have received a copy of the GNU General Public License ;;
+;; along with lalr.scm; see the file COPYING. If not, write to ;;
+;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. ;;
+;; ---------------------------------------------------------------------- ;;
+
+;; 1998/08/16: Tanaka Akira <akr@jaist.ac.jp> transplants from Scheme to Emacs-Lisp.
+
+(provide 'lr-driver)
+
+(defconst lr-max-stack-size 500)
+
+(defun lr-push (stack sp new-cat goto-table lval)
+ (let* ((state (aref stack sp))
+ (new-state (cdr (assq new-cat (aref goto-table state))))
+ (new-sp (+ sp 2)))
+ (if (>= new-sp lr-max-stack-size)
+ (error "PARSE ERROR : stack overflow")
+ (progn
+ (aset stack new-sp new-state)
+ (aset stack (- new-sp 1) lval)
+ new-sp))))
+
+(defun lr-parse (lexerp errorp action-table goto-table reduction-table token-defs)
+ (let ((stack (make-vector lr-max-stack-size 0)) (sp 0) (input (funcall lexerp)))
+ (catch 'parser
+ (while t
+ (let* ((state (aref stack sp))
+ (i (car input))
+ (act (let* ((l (aref action-table state)) (y (assq i l))) (if y (cdr y) (cdar l)))))
+
+ (cond
+
+ ;; Input succesfully parsed
+ ((eq act 'accept)
+ (throw 'parser (aref stack 1)))
+
+ ;; Syntax error in input
+ ((eq act '*error*)
+ (throw 'parser
+ (funcall errorp "PARSE ERROR : unexpected token : "
+ (cdr (assq i token-defs)))))
+
+ ;; Shift current token on top of the stack
+ ((>= act 0)
+ (aset stack (+ sp 1) (cdr input))
+ (aset stack (+ sp 2) act)
+ (setq sp (+ sp 2))
+ (setq input (funcall lexerp)))
+
+ ;; Reduce by rule (- act)
+ (t
+ (setq sp (funcall (aref reduction-table (- act)) stack sp goto-table)))))))))
--- /dev/null
+(provide 'natset)
+
+;;; pacage for set of natural number.
+;; (natural number includes zero.)
+
+;;; predicates
+
+(defun natset-empty-p (ns)
+ "Returns t if NS is empty."
+ (equal ns ()))
+
+(defun natset-full-p (ns)
+ "Returns t if NS is full."
+ (equal ns '(0)))
+
+(defun natset-closed-p (ns)
+ "Returns t if NS is closed."
+ (= (logand (length ns) 1) 0))
+
+(defun natset-open-p (ns)
+ "Returns t if NS is open."
+ (= (logand (length ns) 1) 1))
+
+(defun natset-has-p (ns i)
+ "Returns t if I is in NS."
+ (not (natset-empty-p (natset-intersection (natset-single i) ns))))
+
+(defun natset-has-intersection-p (ns1 ns2)
+ "Returns t if the intersection of NS1 and NS2 is not empty."
+ (not (natset-empty-p (natset-intersection ns1 ns2))))
+
+(defun natset-include-p (ns1 ns2)
+ "Returns t if NS1 includes NS2."
+ (equal ns1 (natset-union ns1 ns2)))
+
+;;; accessor
+
+(defun natset-start (ns)
+ "Returns start element in NS."
+ (if (natset-empty-p ns)
+ (error "natset empty" ns))
+ (car ns))
+
+;;; primitive constructor
+
+(defun natset-empty ()
+ "Returns a empty set.
+{}"
+ ())
+
+(defun natset-full ()
+ "Returns a full set.
+{i | 0 <= i}"
+ '(0))
+
+(defun natset-single (&rest elts)
+ "Returns a set contains singleton elements.
+{i | i in ELTS}"
+ (let ((ns (natset-empty)))
+ (while elts
+ (setq ns (natset-union ns (natset-seg (car elts) (car elts)))
+ elts (cdr elts)))
+ ns))
+
+(defun natset-seg (start &optional end)
+ "Returns a set contains one segment.
+{i | START <= i and i <= END}
+
+If END is nil, Return the set {i | START <= i}"
+ (if end
+ (list start (1+ end))
+ (list start)))
+
+;;; complex constructor
+
+(defun natset-start-set (ns)
+ "Returns a set contains start boundaries for NS.
+{i | NS does not contains i-1 and NS contains i}"
+ (let ((res ()))
+ (while ns
+ (setq res (cons (1+ (car ns)) (cons (car ns) res))
+ ns (cddr ns)))
+ (nreverse res)))
+
+(defun natset-end-set (ns)
+ "Returns a set contains end boundaries for NS.
+{i | NS contains i-1 and NS does not contains i}"
+ (let ((res ()))
+ (setq ns (cdr ns))
+ (while ns
+ (setq res (cons (1+ (car ns)) (cons (car ns) res))
+ ns (cddr ns)))
+ (nreverse res)))
+
+(defun natset-boundary-set (ns)
+ "Returns a set contains start and end boundaries for NS.
+{i | NS contains i-1 xor NS does not contains i}"
+ (natset-union (natset-start-set ns) (natset-end-set ns)))
+
+(defun natset-minmax (ns)
+ "Returns a set contains a range from minimum to maximam of NS.
+{i | There exists j, k in NS, j <= i <= k}"
+ (cond
+ ((null ns) ())
+ ((natset-open-p ns) (list (car ns)))
+ (t
+ (list (car ns) (nth (1- (length ns)) ns)))))
+
+;;; set operation
+
+(defun natset-negate (ns)
+ "Returns negated set.
+{i | 0 <= i and NS does not contains i}"
+ (if (and (consp ns) (= (car ns) 0))
+ (cdr ns)
+ (cons 0 ns)))
+
+(defun natset-union (&rest nss)
+ "Returns unioned set.
+{i | There exists ns in NSS s.t ns contains i}"
+ (let ((ns (natset-empty)))
+ (while nss
+ (setq ns (natset-union2 ns (car nss))
+ nss (cdr nss)))
+ ns))
+
+(defun natset-intersection (&rest nss)
+ "Returns intersectioned set.
+{i | For all ns in NSS, ns contains i}"
+ (natset-negate (apply 'natset-union (mapcar 'natset-negate nss))))
+
+(defun natset-sub (ns &rest nss)
+ "Returns subtracted set.
+{i | NS contains i and for all ns in NSS, ns does not contains i}"
+ (setq ns (natset-intersection ns (natset-negate (apply 'natset-union nss)))))
+
+;;; enumeration
+
+(defun natset-enum (ns)
+ (if (natset-open-p ns)
+ (error "natset open" ns))
+ (let ((res ()) i j)
+ (while ns
+ (setq i (car ns)
+ j (cadr ns)
+ ns (cddr ns))
+ (while (< i j)
+ (setq res (cons i res)
+ i (1+ i))))
+ (nreverse res)))
+
+;;; code generation
+
+(defun natset-take-seg (ns)
+ (cond
+ ((null ns) (error "NS empty" ns))
+ ((null (cdr ns)) (cons ns ()))
+ (t (cons (list (car ns) (cadr ns)) (cddr ns)))))
+
+(defun natset-valid-filter (ns valid)
+ "Returns a filtered set R.
+R includes intersection between VALID and NS.
+R does not include intersecton between VALID and negated NS.
+Element does not contained in VALID is unspecified."
+ (let* ((res (natset-intersection valid ns))
+ (len (length res))
+ (u-set (natset-negate valid))
+ tmp1 tmp2 tmpl)
+ (while u-set
+ (setq tmp1 (natset-take-seg u-set))
+ (setq tmp2 (natset-union (car tmp1) res)
+ tmpl (length tmp2))
+ (if (or (< tmpl len) (and (= tmpl len) (equal 0 (car tmp2))))
+ (setq res tmp2
+ len (length tmp2)))
+ (setq u-set (cdr tmp1)))
+ res))
+
+(defun natset-gen-pred-exp (ns var &optional valid)
+ "Returns a expression to test value of variable VAR is in NS or not.
+
+If VALID is not nil, the condition value of VAR is in VALID is assumed.
+It is impossible to set VALID to empty set because empty set is represented as nil."
+ (if valid (setq ns (natset-valid-filter ns valid)))
+ (cond
+ ((null ns) nil)
+ ((= (car ns) 0) (natset-gen-pred-exp-internal (cdr ns) var nil 0))
+ (t (natset-gen-pred-exp-internal ns var t 0))))
+
+;;; internal primitive
+
+(defun natset-union2 (ns1 ns2)
+ (let (res start (end t))
+ (while (and end (or (consp ns1) (consp ns2)))
+ (if (and (consp ns1) (or (null ns2) (<= (car ns1) (car ns2))))
+ (setq start (car ns1)
+ end (cadr ns1)
+ ns1 (cddr ns1))
+ (setq start (car ns2)
+ end (cadr ns2)
+ ns2 (cddr ns2)))
+ (while (and end
+ (or (and (consp ns1) (<= (car ns1) end))
+ (and (consp ns2) (<= (car ns2) end))))
+ (if (and (consp ns1) (<= (car ns1) end))
+ (progn
+ (if (or (null (cadr ns1)) (< end (cadr ns1))) (setq end (cadr ns1)))
+ (setq ns1 (cddr ns1)))
+ (progn
+ (if (or (null (cadr ns2)) (< end (cadr ns2))) (setq end (cadr ns2)))
+ (setq ns2 (cddr ns2)))))
+ (setq res (cons start res))
+ (if end (setq res (cons end res))))
+ (nreverse res)))
+
+; n is greater or equal 2.
+; returns one of 1 .. n-1
+; (In reality, returns greatest 2^i - 1)
+(defun natset-divide (n)
+ (let ((l 2) tmp)
+ (while (< (setq tmp (lsh l 1)) n)
+ (setq l tmp))
+ (1- l)))
+
+(defun natset-gen-pred-exp-internal (ns var bool base)
+ (cond
+ ((null ns) (not bool))
+ ((null (cdr ns))
+ (if (<= (car ns) base)
+ bool
+ (if bool `(<= ,(car ns) ,var) `(< ,var ,(car ns)))))
+ (t
+ (let* ((div (natset-divide (length ns)))
+ (l (append ns ()))
+ (g (nthcdr (1- div) l))
+ (m (cadr g))
+ )
+ (setq g (prog1 (cddr g) (setcdr g ())))
+ `(if (< ,var ,m)
+ ,(natset-gen-pred-exp-internal l var bool base)
+ ,(natset-gen-pred-exp-internal
+ g var (if (= (logand div 1) 1) bool (not bool)) m))))))
--- /dev/null
+;;; regular expression
+
+;;; core
+;; rx ::= [] {}
+;; | () {""}
+;; | (* . rx) closure
+;; | (| . rxs) alternative
+;; | (rx . rx) concatination
+;; | (cc c1 c2 c3 ...) character class (natset)
+;;; sugar
+;; | (+ . rx) positive closure
+;; | "..." string
+;; | c character
+;; | (non-ascii) (cc 128)
+;; | [ range ... ]
+;; | [ ^ range ... ]
+;;; internal
+;; | pc primitive character class
+;; | (act . int) action
+
+;; range ::= c
+;; | "..."
+;; | (c1 c2) [c1 c2]
+;; | (c1 . c2) [c1 c2)
+;; | (c) [c1 inf)
+;; | non-ascii
+
+(require 'natset)
+(require 'automata)
+(provide 'rx)
+
+(defun rx-empty-p (rx) (equal rx []))
+(defun rx-null-p (rx) (equal rx ()))
+(defun rx-act-p (rx) (and (consp rx) (eq (car rx) 'act)))
+(defun rx-cc-p (rx) (and (consp rx) (eq (car rx) 'cc)))
+(defalias 'rx-pc-p 'integerp)
+
+(defun rx-clo-p (rx) (and (consp rx) (eq (car rx) '*)))
+(defun rx-alt-p (rx) (and (consp rx) (eq (car rx) '|)))
+(defun rx-con-p (rx) (and (consp rx) (or (null (car rx)) (not (symbolp (car rx))))))
+
+(defun rx-clo (rx)
+ (cond
+ ((rx-empty-p rx) ())
+ ((rx-null-p rx) rx)
+ ((rx-act-p rx) rx)
+ ((rx-clo-p rx) rx)
+ (t (cons '* rx))))
+(defun rx-alt (rxs)
+ (cond
+ ((null rxs) [])
+ ((null (cdr rxs)) (car rxs))
+ (t (cons '| rxs))))
+(defun rx-alt2 (r1 r2)
+ (cond
+ ((rx-empty-p r1) r2)
+ ((rx-empty-p r2) r1)
+ ((equal r1 r2) r1)
+ (t (list '| r1 r2))))
+(defun rx-con (r1 r2)
+ (cond
+ ((rx-empty-p r1) [])
+ ((rx-empty-p r2) [])
+ ((rx-null-p r1) r2)
+ ((rx-null-p r2) r1)
+ ((and (rx-act-p r1) (rx-act-p r2)) r2)
+ (t (cons r1 r2))))
+(defun rx-act (obj) (cons 'act obj))
+(defun rx-cc (cs) (cons 'cc cs))
+
+;;; regular expression preprocessing
+
+(defun rx-range-to-ns (range)
+ (cond
+ ((char-or-char-int-p range)
+ (natset-single (char-int range)))
+ ((stringp range)
+ (let ((ns (natset-empty)) (chars (string-to-int-list range)))
+ (while chars
+ (setq ns (natset-union ns (natset-single (car chars)))
+ chars (cdr chars)))
+ ns))
+ ((eq range 'non-ascii)
+ (natset-seg 128))
+ ((and (consp range)
+ (null (cdr range))
+ (char-or-char-int-p (car range)))
+ (natset-seg (car range)))
+ ((and (consp range)
+ (consp (cdr range))
+ (null (cddr range))
+ (char-or-char-int-p (car range))
+ (char-or-char-int-p (cadr range)))
+ (natset-seg (char-int (car range)) (char-int (cadr range))))
+ ((and (consp range)
+ (char-or-char-int-p (car range))
+ (char-or-char-int-p (cdr range)))
+ (natset-seg (char-int (car range)) (1- (char-int (cdr range)))))
+ (t (error "not range %s" range))))
+
+(defun rx-vcc-to-rx (vcc)
+ (let ((l (append vcc ())) neg ns)
+ (if (eq (car l) '^)
+ (setq l (cdr l)
+ neg t))
+ (setq l (mapcar 'rx-range-to-ns l))
+ (setq ns (natset-empty))
+ (while l
+ (setq ns (natset-union ns (car l))
+ l (cdr l)))
+ (if neg (setq ns (natset-negate ns)))
+ (if (natset-empty-p ns)
+ []
+ (rx-cc ns))))
+
+(defun rx-desugar (rx)
+ (cond
+ ((stringp rx) (rx-desugar (string-to-int-list rx)))
+ ((vectorp rx) (rx-vcc-to-rx rx))
+ ((char-or-char-int-p rx) (rx-cc (natset-single (char-int rx))))
+ ((and (consp rx) (eq (car rx) '+)) (let ((r (rx-desugar (cdr rx)))) (rx-con r (rx-clo r))))
+ ((and (consp rx) (eq (car rx) 'non-ascii)) (rx-cc (natset-seg 128)))
+ ((and (consp rx) (eq (car rx) 'any)) (rx-cc (natset-full)))
+ ((rx-empty-p rx) rx)
+ ((rx-null-p rx) rx)
+ ((rx-act-p rx) rx)
+ ((rx-cc-p rx) rx)
+ ((rx-clo-p rx) (rx-clo (rx-desugar (cdr rx))))
+ ((rx-alt-p rx) (rx-alt (mapcar 'rx-desugar (cdr rx))))
+ ((rx-con-p rx) (rx-con (rx-desugar (car rx)) (rx-desugar (cdr rx))))
+ (t (error "not rx %s" rx))))
+
+(defun rx-collect-cc (rx &optional cs)
+ (cond
+ ((rx-empty-p rx) cs)
+ ((rx-null-p rx) cs)
+ ((rx-act-p rx) cs)
+ ((rx-cc-p rx) (append (cdr rx) cs))
+ ((rx-clo-p rx) (rx-collect-cc (cdr rx) cs))
+ ((rx-alt-p rx)
+ (setq rx (cdr rx))
+ (while (consp rx)
+ (setq cs (rx-collect-cc (car rx) cs)
+ rx (cdr rx)))
+ cs)
+ ((rx-con-p rx) (rx-collect-cc (car rx) (rx-collect-cc (cdr rx) cs)))
+ (t (error "not rx %s" rx))))
+
+(defun rx-cc-to-pc (rx cs)
+ (cond
+ ((rx-empty-p rx) rx)
+ ((rx-null-p rx) rx)
+ ((rx-act-p rx) rx)
+ ((rx-cc-p rx)
+ (setq rx (cdr rx))
+ (let (res)
+ (while (and (consp rx) (consp (cdr rx)))
+ (let ((start (car rx)) (end (cadr rx)))
+ (setq res (rx-filter (lambda (c) (and (<= start c) (< c end))) cs res)
+ rx (cddr rx))))
+ (if (consp rx)
+ (let ((start (car rx)))
+ (setq res (rx-filter (lambda (c) (<= start c)) cs res))))
+ (rx-alt (rx-sort-int res))))
+ ((rx-clo-p rx) (rx-clo (rx-cc-to-pc (cdr rx) cs)))
+ ((rx-alt-p rx) (rx-alt (mapcar (lambda (r) (rx-cc-to-pc r cs)) (cdr rx))))
+ ((rx-con-p rx) (rx-con (rx-cc-to-pc (car rx) cs) (rx-cc-to-pc (cdr rx) cs)))
+ (t (error "not rx %s" rx))))
+
+(defun rx-categolize-char (rx)
+ (let ((cs (rx-sort-int (rx-collect-cc rx))))
+ (cons
+ (rx-cc-to-pc rx cs)
+ cs)))
+
+;;; simplification
+
+(defun rx-nullable-p (rx)
+ (cond
+ ((rx-empty-p rx) nil)
+ ((rx-null-p rx) t)
+ ((rx-act-p rx) t)
+ ((rx-pc-p rx) nil)
+ ((rx-clo-p rx) t)
+ ((rx-alt-p rx)
+ (setq rx (cdr rx))
+ (while (and (consp rx) (not (rx-nullable-p (car rx))))
+ (setq rx (cdr rx)))
+ (consp rx))
+ ((rx-con-p rx)
+ (and (rx-nullable-p (car rx)) (rx-nullable-p (cdr rx))))
+ (t (error "not rx %s" rx))))
+
+(defun rx-simplify (rx)
+ (cond
+ ((rx-empty-p rx) rx)
+ ((rx-null-p rx) rx)
+ ((rx-act-p rx) rx)
+ ((rx-pc-p rx) rx)
+ ((rx-clo-p rx)
+ (rx-clo (rx-simplify (cdr rx))))
+ ((rx-alt-p rx)
+ (let ((in (cdr rx)) (out ())
+ already-simplified-list already-simplified)
+ (while (consp in)
+ (setq rx (car in)
+ in (cdr in)
+ already-simplified (car already-simplified-list)
+ already-simplified-list (cdr already-simplified-list))
+ (if (rx-alt-p rx)
+ (setq in (append (cdr rx) in))
+ (progn
+ (setq rx (if already-simplified rx (rx-simplify rx)))
+ (cond
+ ((rx-empty-p rx)) ; [] is identity element for alternation.
+ ((rx-alt-p rx)
+ (setq in (append (cdr rx) in)
+ already-simplified-list (append (make-list (length (cdr rx)) nil) already-simplified-list)))
+ ((not (member rx out))
+ (setq out (cons rx out)))))))
+ (rx-alt (rx-sort-rx (reverse out)))))
+ ((rx-con-p rx)
+ (catch 'return
+ (let ((in (list (car rx) (cdr rx))) (out ())
+ already-simplified-list already-simplified)
+ (while (consp in)
+ (setq rx (car in)
+ in (cdr in)
+ already-simplified (car already-simplified-list)
+ already-simplified-list (cdr already-simplified-list))
+ (if (rx-con-p rx)
+ (setq in (rx-cons* (car rx) (cdr rx) in))
+ (progn
+ (setq rx (if already-simplified rx (rx-simplify rx)))
+ (cond
+ ((rx-empty-p rx) ; [] is zero element for concatination.
+ (throw 'return []))
+ ((rx-null-p rx)) ; () is identity element for concatination.
+ ((rx-con-p rx)
+ (setq in (rx-cons* (car rx) (cdr rx) in))
+ already-simplified-list (rx-cons* t t already-simplified-list))
+ (t
+ (setq out (cons rx out)))))))
+ (if (= (length out) 1)
+ (car out)
+ (nreverse out)))))
+ (t (error "not rx %s" rx))))
+
+;;; head property
+
+(defun rx-head-pcs (rx &optional res)
+ (cond
+ ((rx-empty-p rx) res)
+ ((rx-null-p rx) res)
+ ((rx-act-p rx) res)
+ ((rx-pc-p rx) (if (member rx res) res (cons rx res)))
+ ((rx-clo-p rx) (rx-head-pcs (cdr rx) res))
+ ((rx-alt-p rx)
+ (setq rx (cdr rx))
+ (while (consp rx)
+ (setq res (rx-head-pcs (car rx) res)
+ rx (cdr rx)))
+ res)
+ ((rx-con-p rx)
+ (setq res (rx-head-pcs (car rx) res))
+ (if (rx-nullable-p (car rx))
+ (setq res (rx-head-pcs (cdr rx) res)))
+ res)
+ (t (error "not rx %s" rx))))
+
+(defun rx-head-act (rx &optional res)
+ (cond
+ ((rx-empty-p rx) res)
+ ((rx-null-p rx) res)
+ ((rx-act-p rx) (rx-better-act rx res))
+ ((rx-pc-p rx) res)
+ ((rx-clo-p rx) (rx-head-act (cdr rx) res))
+ ((rx-alt-p rx)
+ (setq rx (cdr rx))
+ (while (consp rx)
+ (setq res (rx-head-act (car rx) res)
+ rx (cdr rx)))
+ res)
+ ((rx-con-p rx)
+ (setq res (rx-head-act (car rx) res))
+ (if (rx-nullable-p (car rx))
+ (setq res (rx-head-act (cdr rx) res)))
+ res)
+ (t (error "not rx %s" rx))))
+
+;;; stepping
+
+(defun rx-step-internal (rx pc)
+ (cond
+ ((rx-empty-p rx) [])
+ ((rx-null-p rx) [])
+ ((rx-act-p rx) [])
+ ((rx-pc-p rx) (if (= rx pc) () []))
+ ((rx-clo-p rx) (rx-con (rx-step-internal (cdr rx) pc) rx))
+ ((rx-alt-p rx) (rx-alt (mapcar (lambda (r) (rx-step-internal r pc)) (cdr rx))))
+ ((rx-con-p rx)
+ (if (rx-nullable-p (car rx))
+ (rx-alt2
+ (rx-con (rx-step-internal (car rx) pc) (cdr rx))
+ (rx-step-internal (cdr rx) pc))
+ (rx-con (rx-step-internal (car rx) pc) (cdr rx))))
+ (t (error "not rx %s" rx))))
+
+(defun rx-step (rx &rest pcs)
+ (while (consp pcs)
+ (setq rx (rx-simplify (rx-step-internal rx (car pcs)))
+ pcs (cdr pcs)))
+ rx)
+
+;;; utilities
+
+(defun rx-better-act (a1 a2)
+ (cond
+ ((null a2) a1)
+ ((< (cdr a1) (cdr a2)) a1)
+ (t a2)))
+
+(defun rx-cons* (elt &rest lst)
+ (if (null lst)
+ elt
+ (cons elt (apply 'rx-cons* (car lst) (cdr lst)))))
+
+(defun rx-filter (fun lst &optional rest)
+ (if (null lst)
+ rest
+ (if (funcall fun (car lst))
+ (cons (car lst) (rx-filter fun (cdr lst) rest))
+ (rx-filter fun (cdr lst) rest))))
+
+(defun rx-cmp-index (rx)
+ (cond
+ ((rx-null-p rx) (list 0))
+ ((rx-act-p rx) (list 1 (cdr rx)))
+ ((rx-empty-p rx) (list 2))
+ ((rx-clo-p rx) (list 3 (cdr rx)))
+ ((rx-alt-p rx) (cons 4 (cdr rx)))
+ ((rx-con-p rx) (list 5 (car rx) (cdr rx)))
+ ((rx-pc-p rx) (list 6 rx))
+ (t (error "not rx %s" rx))))
+
+(defun rx-cmp-int (i1 i2)
+ (cond
+ ((< i1 i2) -1)
+ ((> i1 i2) 1)
+ (t 0)))
+
+(defun rx-cmp-rx (r1 r2)
+ (let ((i1 (rx-cmp-index r1)) (i2 (rx-cmp-index r2)))
+ (cond
+ ((< (car i1) (car i2)) -1)
+ ((> (car i1) (car i2)) 1)
+ (t (setq i1 (cdr i1)
+ i2 (cdr i2))
+ (catch 'result
+ (while (and (consp i1) (consp i2))
+ (let ((r (if (and (integerp (car i1)) (integerp (car i2)))
+ (rx-cmp-int (car i1) (car i2))
+ (rx-cmp-rx (car i1) (car i2)))))
+ (if (not (zerop r))
+ (throw 'result r)
+ (setq i1 (cdr i1)
+ i2 (cdr i2)))))
+ (if (null i1) (if (null i2) 0 -1) 1))))))
+
+(defun rx-sort-rx (l &optional res)
+ (if (null l)
+ res
+ (let ((e (car l)) lt gt cmp)
+ (setq l (cdr l))
+ (while (consp l)
+ (setq cmp (rx-cmp-rx (car l) e))
+ (cond
+ ((< cmp 0) (setq lt (cons (car l) lt)))
+ ((< 0 cmp) (setq gt (cons (car l) gt))))
+ (setq l (cdr l)))
+ (rx-sort-rx lt (cons e (rx-sort-rx gt res))))))
+
+(defun rx-sort-int (l &optional res)
+ (if (null l)
+ res
+ (let ((e (car l)) lt gt)
+ (setq l (cdr l))
+ (while (consp l)
+ (cond
+ ((< (car l) e) (setq lt (cons (car l) lt)))
+ ((< e (car l)) (setq gt (cons (car l) gt))))
+ (setq l (cdr l)))
+ (rx-sort-int lt (cons e (rx-sort-int gt res))))))
+