6 (put 'lex-scan-multibyte 'lisp-indent-function 3)
7 (put 'lex-scan-unibyte 'lisp-indent-function 3)
9 ;;; automata generation
11 (defun lex-automata (rx)
12 (let* ((rx (rx-simplify rx))
13 (stack (list rx)) ; list of rx
14 (table (list (rx-cons* rx 0 (lex-make-box (list 'd1 'd2)))))
15 ; list of (rx id . box-for-reverse-links)
16 (states ()) ; list of (id act trans . box-for-reverse-links)
17 ; where trans = list of (pc id . box-for-reverse-links)
19 tbl-ent box id pcs act pc trans rx-stepped p)
23 tbl-ent (assoc rx table)
32 rx-stepped (rx-step rx pc)
33 p (assoc rx-stepped table))
36 (setq trans (cons (cons pc (cdr p)) trans))
37 (lex-add-box (cddr p) id))
38 (setq p (rx-cons* rx-stepped next-id (lex-make-box (list id)))
39 trans (cons (cons pc (cdr p)) trans)
42 stack (cons rx-stepped stack))))
44 (cons (rx-cons* id act trans box)
50 (defvar lex-pc-var (make-symbol "pc"))
51 (defvar lex-act-var (make-symbol "act"))
52 (defvar lex-escape-tag (make-symbol "esc"))
54 (defun lex-gen-machine (states cs acts read-macro save-macro)
55 `(let (,lex-pc-var ,lex-act-var)
56 (catch ',lex-escape-tag
60 (lambda (s) (lex-gen-state s cs read-macro save-macro))
63 ,lex-act-var ,(apply 'natset-single (mapcar 'car acts)) automata-never-fail
65 (lambda (act) `(,(natset-single (car act)) nil ,@(cdr act)))
68 (defun lex-gen-state (s cs read-macro save-macro)
75 `((lex-match ,(cdr act)) (,save-macro))
77 ,@(if (consp trans) `((,read-macro ,lex-pc-var))))
80 (lambda (tr) `(,(let ((l (member (car tr) cs)))
83 (natset-seg (car l) (1- (cadr l)))))
89 (defmacro lex-match (id)
90 `(setq ,lex-act-var ',id))
92 `(throw ',lex-escape-tag nil))
94 ;;; user interface macro
98 (defvar lex-scan-multibyte-str-var (make-symbol "str"))
99 (defvar lex-scan-multibyte-ptr-var (make-symbol "ptr"))
100 (defvar lex-scan-multibyte-end-var (make-symbol "end"))
101 (defvar lex-scan-multibyte-mch-var (make-symbol "mch"))
103 (defmacro lex-scan-multibyte-read (pc)
104 `(if (< ,lex-scan-multibyte-ptr-var ,lex-scan-multibyte-end-var)
105 (setq ,pc (sref ,lex-scan-multibyte-str-var ,lex-scan-multibyte-ptr-var)
106 ,lex-scan-multibyte-ptr-var (char-next-index ,pc ,lex-scan-multibyte-ptr-var)
110 (defmacro lex-scan-multibyte-save ()
111 `(setq ,lex-scan-multibyte-mch-var ,lex-scan-multibyte-ptr-var))
113 (defmacro lex-scan-multibyte (str start end &rest clauses)
114 (if (not start) (setq start 0))
115 (if (not end) (setq end `(length ,lex-scan-multibyte-str-var)))
116 (let ((id 1) (rx ()) (acts ()) tmp code
117 (restore-code (if (symbolp start) `(setq ,start ,lex-scan-multibyte-mch-var))))
118 (while (consp clauses)
119 (setq rx (cons (rx-con (caar clauses) (rx-act id)) rx)
120 acts (cons (cons id (cons restore-code (cdar clauses))) acts)
122 clauses (cdr clauses)))
124 tmp (rx-categolize-char (rx-desugar rx)))
125 `(let* ((,lex-scan-multibyte-str-var ,str)
126 (,lex-scan-multibyte-ptr-var ,start)
127 (,lex-scan-multibyte-end-var ,end)
128 ,lex-scan-multibyte-mch-var)
129 ,(lex-gen-machine (lex-automata (car tmp)) (cdr tmp) acts 'lex-scan-multibyte-read 'lex-scan-multibyte-save))))
133 (defvar lex-scan-unibyte-str-var (make-symbol "str"))
134 (defvar lex-scan-unibyte-ptr-var (make-symbol "ptr"))
135 (defvar lex-scan-unibyte-end-var (make-symbol "end"))
136 (defvar lex-scan-unibyte-mch-var (make-symbol "mch"))
138 (defmacro lex-scan-unibyte-read (pc)
139 `(if (< ,lex-scan-unibyte-ptr-var ,lex-scan-unibyte-end-var)
140 (setq ,pc (aref ,lex-scan-unibyte-str-var ,lex-scan-unibyte-ptr-var)
141 ,lex-scan-unibyte-ptr-var (1+ ,lex-scan-unibyte-ptr-var)
145 (defmacro lex-scan-unibyte-save ()
146 `(setq ,lex-scan-unibyte-mch-var ,lex-scan-unibyte-ptr-var))
148 (defmacro lex-scan-unibyte (str start end &rest clauses)
149 (if (not start) (setq start 0))
150 (if (not end) (setq end `(length ,lex-scan-unibyte-str-var)))
151 (let ((id 1) (rx ()) (acts ()) tmp code
152 (restore-code (if (symbolp start) `(setq ,start ,lex-scan-unibyte-mch-var))))
153 (while (consp clauses)
154 (setq rx (cons (rx-con (caar clauses) (rx-act id)) rx)
155 acts (cons (cons id (cons restore-code (cdar clauses))) acts)
157 clauses (cdr clauses)))
159 tmp (rx-categolize-char (rx-desugar rx)))
160 `(let* ((,lex-scan-unibyte-str-var ,str)
161 (,lex-scan-unibyte-ptr-var ,start)
162 (,lex-scan-unibyte-end-var ,end)
163 ,lex-scan-unibyte-mch-var)
164 ,(lex-gen-machine (lex-automata (car tmp)) (cdr tmp) acts 'lex-scan-unibyte-read 'lex-scan-unibyte-save))))
168 (defun lex-make-box (val)
170 (defalias 'lex-box-ref 'car)
172 (defun lex-add-box (box val)
173 (if (not (member val (car box)))
174 (setcar box (cons val (car box)))))
179 (mapcar (lambda (v) (set v (intern (symbol-name (symbol-value v)))))
183 lex-scan-multibyte-str-var
184 lex-scan-multibyte-ptr-var
185 lex-scan-multibyte-end-var
186 lex-scan-multibyte-mch-var
187 lex-scan-unibyte-str-var
188 lex-scan-unibyte-ptr-var
189 lex-scan-unibyte-end-var
190 lex-scan-unibyte-mch-var))