* digraph.el: Refined.
[elisp/flim.git] / automata.el
1 (require 'digraph)
2 (require 'natset)
3 (provide 'automata)
4
5 (defvar automata-never-fail (make-symbol "automata-never-fail"))
6 (defvar automata-state-var (make-symbol "automata-state"))
7
8 (defmacro automata (in-var start-tag &rest clauses)
9   (let* ((org-len (length clauses))
10          (org-graph (digraph-make org-len))
11          (tag-to-org-alist nil)
12          forest org-to-forest forest-to-org
13          i j tmp trans)
14     (setq tmp clauses
15           i 0)
16     (while tmp
17       (setq tag-to-org-alist (cons (cons (caar tmp) i) tag-to-org-alist)
18             i (1+ i)
19             tmp (cdr tmp)))
20     (setq tmp clauses
21           i 0)
22     (while tmp
23       (setq trans (cddr (cdar tmp)))
24       (while trans
25         (setq j (cdr (assoc (cadr (car trans)) tag-to-org-alist)))
26         (digraph-add-edge org-graph i j)
27         (setq trans (cdr trans)))
28       (setq i (1+ i)
29             tmp (cdr tmp)))
30     ;;(error "%s" org-graph)
31     (setq tmp (digraph-split-as-forest org-graph)
32           forest (aref tmp 0)
33           org-to-forest (aref tmp 1)
34           forest-to-org (aref tmp 2))
35     (setq clauses
36           (mapcar
37            (lambda (c)
38              (let ((tag (car c))
39                    (action (cadr c))
40                    (fail (nth 2 c))
41                    (trs (nthcdr 3 c)))
42                (setq trs
43                      (mapcar
44                       (lambda (next)
45                         (list
46                          (apply
47                           'natset-union
48                           (mapcar
49                            (lambda (tr) (if (equal (cadr tr) next) (car tr) (natset-empty)))
50                            trs))
51                          next))
52                       (natset-enum (apply 'natset-single (mapcar 'cadr trs)))))
53                (cons tag (cons action (cons fail trs)))))
54            clauses))
55     `(let ((,automata-state-var ,(cdr (assoc start-tag tag-to-org-alist))))
56        ,@(automata-exp-seq
57           (automata-gen-state
58            in-var clauses
59            org-len
60            (list (cdr (assoc start-tag tag-to-org-alist)))
61            tag-to-org-alist
62            (aref org-to-forest (cdr (assoc start-tag tag-to-org-alist)))
63            forest org-to-forest forest-to-org)))))
64
65 (defun automata-gen-state (in-var clauses
66                                   org-len
67                                   start-states
68                                   tag-to-org-alist
69                                   forest-state forest org-to-forest forest-to-org)
70   (let* ((org-states (aref forest-to-org forest-state))
71          (forest-states (digraph-descents forest forest-state))
72          (branch-length (+ (length org-states) (length forest-states)))
73          (branch-to-org
74           (vconcat
75            (mapcar 'list org-states)
76            (mapcar (lambda (forest-state)
77                      (aref forest-to-org forest-state))
78                    forest-states)))
79          (org-to-branch
80           (let ((org-to-branch (make-vector org-len nil))
81                 (i 0) tmp)
82             (while (< i branch-length)
83               (setq tmp (aref branch-to-org i))
84               (while tmp
85                 (aset org-to-branch (car tmp) i)
86                 (setq tmp (cdr tmp)))
87               (setq i (1+ i)))
88             org-to-branch))
89          (branch-to-forest
90           (vconcat
91            (make-list (length org-states) nil)
92            forest-states))
93          (branch-state-range
94           (vconcat
95            (mapcar 'natset-single org-states)
96            (mapcar (lambda (forest-state)
97                      (apply 'natset-single
98                             (aref forest-to-org forest-state)))
99                    forest-states)))
100          (branch-descents
101           (vconcat
102            (mapcar (lambda (org-state)
103                      (let* ((c (nth org-state clauses))
104                             (trs (nthcdr 3 c)))
105                        (apply 'natset-union
106                               (mapcar (lambda (tr)
107                                         (natset-single
108                                          (cdr (assoc (cadr tr) tag-to-org-alist))))
109                                       trs))))
110                    org-states)
111            (mapcar (lambda (forest-state) ())
112                    forest-states)))
113          (all-descents (apply 'natset-union (append branch-descents ())))
114          (branch-ascents
115           (let* ((branch-ascents (make-vector branch-length 0))
116                  (i 0) j)
117             (while (< i branch-length)
118               (setq j 0)
119               (while (< j branch-length)
120                 (if (natset-has-intersection-p (aref branch-state-range i)
121                                                (aref branch-descents j))
122                     (aset branch-ascents i
123                           (1+ (aref branch-ascents i))))
124                 (setq j (1+ j)))
125               (setq i (1+ i)))
126             branch-ascents))
127          (start-inline nil)
128          (branch-inline
129           (let* ((branch-inline (make-vector branch-length nil))
130                  (start-ns (apply 'natset-single start-states))
131                  (i 0))
132             (while (< i branch-length)
133               (if (natset-has-intersection-p (aref branch-state-range i) start-ns)
134                   (if (and (= (length start-states) 1)
135                            (= (aref branch-ascents i) 0))
136                       (progn
137                         (setq start-inline i)
138                         (aset branch-inline i t))
139                     nil)
140                 (if (= (aref branch-ascents i) 1)
141                     (aset branch-inline i t)))
142               (setq i (1+ i)))
143             branch-inline))
144          (branch-gen nil)
145          (_
146           (setq branch-gen
147                 (vconcat
148                  (mapcar
149                   (lambda (org-state)
150                     (cons
151                      (lambda (org-state)
152                        `(,(natset-single org-state)
153                          nil ; don't duplicate.
154                          ,@(let* ((c (nth org-state clauses)))
155                              (automata-exp-seq
156                               (nth 1 c) ; action
157                               (if (null (nthcdr 3 c))
158                                   (nth 2 c)
159                                 `(automata-branch
160                                   ,in-var ; input variable
161                                   ,(natset-full) ; input is unpredictable.
162                                   ,(nth 2 c) ; fail action
163                                   ,@(let ((trs (nthcdr 3 c)))
164                                       (mapcar
165                                        (lambda (next-branch)
166                                          (let ((input-range 
167                                                 (apply 'natset-union
168                                                        (mapcar
169                                                         (lambda (tr)
170                                                           (if (member (cdr (assoc (cadr tr) tag-to-org-alist))
171                                                                       (aref branch-to-org next-branch))
172                                                               (car tr)
173                                                             (natset-empty)))
174                                                         trs))))
175                                            `(,input-range ; input range
176                                              ,(not (aref branch-inline next-branch)) ; duplicatable unless inlining.
177                                              ,(let ((goto-list (apply
178                                                                 'append
179                                                                 (mapcar
180                                                                  (lambda (tr)
181                                                                    (let ((range (natset-intersection (car tr) input-range)))
182                                                                      (if range
183                                                                          `((,range
184                                                                             t
185                                                                             (automata-goto
186                                                                              ,automata-state-var
187                                                                              ,org-state
188                                                                              ,(cdr (assoc (cadr tr) tag-to-org-alist)))))
189                                                                        ())))
190                                                                  trs))))
191                                                 (if (= (length goto-list) 1)
192                                                     (car (cddr (car goto-list)))
193                                                   `(automata-branch
194                                                     ,in-var
195                                                     ,input-range
196                                                     ,automata-never-fail
197                                                     ,@goto-list)))
198                                              ,@(if (aref branch-inline next-branch)
199                                                    (automata-exp-seq
200                                                     `(progn ,@(cddr (funcall (car (aref branch-gen next-branch))
201                                                                              (cdr (aref branch-gen next-branch))))))
202                                                  ()))))
203                                        (natset-enum
204                                         (apply 'natset-union
205                                                (mapcar
206                                                 (lambda (tr)
207                                                   (natset-single
208                                                    (aref org-to-branch
209                                                          (cdr (assoc (cadr tr) tag-to-org-alist)))))
210                                                 trs))))
211                                       )))))))
212                      org-state))
213                   org-states)
214                  (mapcar
215                   (lambda (forest-state)
216                     (cons
217                      (lambda (forest-state)
218                        `(,(natset-intersection
219                            (apply 'natset-single (aref forest-to-org forest-state))
220                            all-descents) ; state range
221                          nil ; don't duplicate.
222                          ,@(automata-exp-seq
223                             (automata-gen-state
224                              in-var clauses
225                              org-len
226                              (aref forest-to-org forest-state)
227                              tag-to-org-alist
228                              forest-state forest org-to-forest forest-to-org))))
229                      forest-state))
230                   forest-states))))
231          (branches
232           (let* ((branches ())
233                  (i branch-length))
234             (while (< 0 i)
235               (setq i (1- i))
236               (if (not (aref branch-inline i))
237                   (setq branches
238                         (cons
239                          (funcall (car (aref branch-gen i))
240                                   (cdr (aref branch-gen i)))
241                          branches))))
242             branches))
243          )
244     ;;(error "err")
245     (if start-inline
246         (apply
247          'automata-seq-exp
248          `(progn ,@(cddr (funcall (car (aref branch-gen start-inline))
249                                   (cdr (aref branch-gen start-inline)))))
250          (cond
251           ((null branches) ())
252           ((null (cdr branches))
253            (cddr (car branches)))
254           (t
255            `((while t 
256                (automata-branch
257                 ,automata-state-var ,(natset-full) ,automata-never-fail
258                 ,@branches))))))
259       (if (= (length branches) 1)
260           `(while t ,@(cddr (car branches)))
261         `(while t ; ,branch-inline ,branch-state-range ,branch-descents ,branch-ascents
262                 (automata-branch
263                  ,automata-state-var ,(natset-full) ,automata-never-fail
264                  ,@branches))))))
265
266 (defun automata-seq-exp (&rest seq)
267   (cond
268    ((null seq) nil)
269    ((null (cdr seq))
270     (car seq))
271    (t
272     (setq seq
273           (apply
274            'append
275            (mapcar
276             (lambda (exp) (if (and (consp exp) (eq (car exp) 'progn))
277                               (cdr exp)
278                             (list exp)))
279             seq)))
280     (let ((rseq (reverse seq)))
281       (cons 'progn
282             (apply
283              'append
284              (apply
285               'append
286               (mapcar
287                (lambda (exp) (if (null exp) () (list exp)))
288                (nreverse (cdr rseq))))
289              (list (list (car rseq)))))))))
290
291 (defun automata-exp-seq (&rest seq)
292   (let ((exp (apply 'automata-seq-exp seq)))
293     (if (and (consp exp) (eq (car exp) 'progn))
294         (cdr exp)
295       (list exp))))
296
297 (defmacro automata-goto (var curr next)
298   (if (eq curr next)
299       nil
300     `(setq ,var ,next)))
301
302 (defmacro automata-branch (var range fail &rest clauses)
303   (when (eq fail automata-never-fail)
304     (setq range (natset-intersection
305                  (apply 'natset-union (mapcar 'car clauses))
306                  range)))
307   (let ((len (length clauses))
308         ns-list dup-list body-list tmp ns)
309     (setq tmp clauses
310           ns (natset-negate range))
311     (while tmp
312       (setq ns-list (cons (natset-sub (caar tmp) ns) ns-list)
313             dup-list (cons (cadr (car tmp)) dup-list)
314             body-list (cons (cddr (car tmp)) body-list)
315             ns (natset-union ns (caar tmp))
316             tmp (cdr tmp))
317       (if (natset-empty-p (car ns-list))
318           (setq ns-list (cdr ns-list)
319                 dup-list (cdr dup-list)
320                 body-list (cdr body-list))))
321     (setq ns-list (nreverse ns-list)
322           dup-list (nreverse dup-list)
323           body-list (nreverse body-list))
324     (automata-branch-i var range fail ns-list dup-list body-list)))
325
326 (defun automata-branch-i (var range fail ns-list dup-list body-list)
327   (cond
328    ((null ns-list) fail)
329    ((null (cdr ns-list))
330     (if (natset-include-p (car ns-list) range)
331         (apply 'automata-seq-exp (car body-list))
332       `(if ,(natset-gen-pred-exp (car ns-list) var range)
333            ,(apply 'automata-seq-exp (car body-list))
334          ,fail)))
335    (t
336     (let (tmp tmpn tmpd cut)
337       (setq tmpn ns-list
338             cut (natset-empty))
339       (while tmpn
340         (setq cut (natset-union cut (natset-boundary-set (car tmpn)))
341               tmpn (cdr tmpn)))
342       (setq tmpn ns-list
343             tmpd dup-list)
344       (while tmpn
345         (if (not (car tmpd))
346             (setq tmp (natset-minmax (car tmpn))
347                   tmp (natset-sub tmp (natset-start-set tmp))
348                   cut (natset-sub cut tmp)))
349         (setq tmpn (cdr tmpn)
350               tmpd (cdr tmpd)))
351       (setq cut (natset-sub cut (natset-boundary-set (natset-minmax range))))
352       (if (null (setq cut (natset-enum cut)))
353           `(if ,(natset-gen-pred-exp (car ns-list) var range)
354                ,(apply 'automata-seq-exp (car body-list))
355              ,(automata-branch-i var
356                                  (natset-sub range (car ns-list))
357                                  fail
358                                  (cdr ns-list)
359                                  (cdr dup-list)
360                                  (cdr body-list)))
361         (let* ((mid (nth (/ (length cut) 2) cut))
362                (lower (natset-seg 0 (1- mid)))
363                (higher (natset-seg mid))
364                ns-list1 dup-list1 body-list1
365                ns-list2 dup-list2 body-list2
366                )
367           (while ns-list
368             (if (natset-has-intersection-p lower (car ns-list))
369                 (setq ns-list1 (cons (natset-intersection (car ns-list) lower) ns-list1)
370                       dup-list1 (cons (car dup-list) dup-list1)
371                       body-list1 (cons (car body-list) body-list1)))
372             (if (natset-has-intersection-p higher (car ns-list))
373                 (setq ns-list2 (cons (natset-intersection (car ns-list) higher) ns-list2)
374                       dup-list2 (cons (car dup-list) dup-list2)
375                       body-list2 (cons (car body-list) body-list2)))
376             (setq ns-list (cdr ns-list)
377                   dup-list (cdr dup-list)
378                   body-list (cdr body-list)))
379           ;;(if (or (null ns-list1) (null ns-list2)) (error "divide fail"))
380           `(if (< ,var ,mid)
381                ,(automata-branch-i var
382                                    (natset-intersection range lower)
383                                    fail ns-list1 dup-list1 body-list1)
384              ,(automata-branch-i var
385                                  (natset-intersection range higher)
386                                  fail ns-list2 dup-list2 body-list2))))))))
387
388
389 '(
390 (npp (macroexpand '
391 (automata pc 0
392                 (1
393                  (progn
394                    (lex-match 1)
395                    (lex-scan-unibyte-save))
396                  (lex-fail))
397                 (5
398                  (progn
399                    (lex-match 2)
400                    (lex-scan-unibyte-save))
401                  (lex-fail))
402                 (4
403                  (progn
404                    (lex-scan-unibyte-read pc))
405                  (lex-fail)
406                  ((9 10) 5) ((32 33) 5))
407                 (2
408                  (progn
409                    (lex-match 3)
410                    (lex-scan-unibyte-save)
411                    (lex-scan-unibyte-read pc))
412                  (lex-fail)
413                  ((13 14) 2)
414                  ((0 9) 3) ((11 13) 3) ((14 32) 3) ((33) 3)
415                  ((10 11) 4))
416                 (3
417                  (progn
418                    (lex-match 3)
419                    (lex-scan-unibyte-save)
420                    (lex-scan-unibyte-read pc))
421                  (lex-fail)
422                  ((0 9) 3) ((10 11) 3) ((11 13) 3) ((14 32) 3) ((33) 3)
423                  ((13 14) 2)
424                  )
425                 (0
426                  (progn
427                    (lex-match 3)
428                    (lex-scan-unibyte-save)
429                    (lex-scan-unibyte-read pc))
430                  (lex-fail)
431                  ((0 9) 3) ((10 11) 3) ((11 13) 3) ((14 32) 3) ((33) 3)
432                  ((13 14) 2)
433                  ((9 10) 1) ((32 33) 1)))))
434
435 (npp (macroexpand '
436 (automata pc 0
437                 (0
438                  (progn
439                    (lex-match 3)
440                    (lex-scan-unibyte-save)
441                    (lex-scan-unibyte-read pc))
442                  (lex-fail)
443                  ((0 9) 3) ((10 11) 3) ((11 13) 3) ((14 32) 3) ((33) 3)
444                  ((13 14) 2)
445                  ((9 10) 1) ((32 33) 1))
446                 (1
447                  (progn
448                    (lex-match 1)
449                    (lex-scan-unibyte-save))
450                  (lex-fail))
451                 (2
452                  (progn
453                    (lex-match 3)
454                    (lex-scan-unibyte-save)
455                    (lex-scan-unibyte-read pc))
456                  (lex-fail)
457                  ((13 14) 2)
458                  ((0 9) 3) ((11 13) 3) ((14 32) 3) ((33) 3)
459                  ((10 11) 4))
460                 (3
461                  (progn
462                    (lex-match 3)
463                    (lex-scan-unibyte-save)
464                    (lex-scan-unibyte-read pc))
465                  (lex-fail)
466                  ((0 9) 3) ((10 11) 3) ((11 13) 3) ((14 32) 3) ((33) 3)
467                  ((13 14) 2)
468                  )
469                 (4
470                  (progn
471                    (lex-scan-unibyte-read pc))
472                  (lex-fail)
473                  ((9 10) 5) ((32 33) 5))
474                 (5
475                  (progn
476                    (lex-match 2)
477                    (lex-scan-unibyte-save))
478                  (lex-fail))
479 )))
480
481 (npp
482 (automata-gen-state
483 'pc
484 '((0 (progn (lex-match 3) (lex-scan-unibyte-save) (lex-scan-unibyte-read pc)) (lex-fail) ((9 10 32 33) 1) ((13 14) 2) ((0 9 10 13 14 32 33) 3))
485   (1 (progn (lex-match 1) (lex-scan-unibyte-save)) (lex-fail))
486   (2 (progn (lex-match 3) (lex-scan-unibyte-save) (lex-scan-unibyte-read pc)) (lex-fail) ((13 14) 2) ((0 9 11 13 14 32 33) 3) ((10 11) 4))
487   (3 (progn (lex-match 3) (lex-scan-unibyte-save) (lex-scan-unibyte-read pc)) (lex-fail) ((13 14) 2) ((0 9 10 13 14 32 33) 3))
488   (4 (progn (lex-scan-unibyte-read pc)) (lex-fail) ((9 10 32 33) 5))
489   (5 (progn (lex-match 2) (lex-scan-unibyte-save)) (lex-fail)))
490 6
491 '(0)
492 '((5 . 5) (4 . 4) (3 . 3) (2 . 2) (1 . 1) (0 . 0))
493 0
494 [(4 1) (2) (3) nil nil]
495 [0 4 1 1 2 3]
496 [(0) (3 2) (4) (5) (1)]))
497 )