6 (put 'lex-scan-multibyte 'lisp-indent-function 3)
7 (put 'lex-scan-unibyte 'lisp-indent-function 3)
13 ;; As a result of profiling, CCL is slower than Emacs-Lisp, sigh...
14 (setq lex-ccl-execute nil)
16 (defvar lex-ccl-execute
18 (or (when (fboundp 'ccl-execute-on-substring) 'ccl-execute-on-substring)
19 (when (fboundp 'ccl-execute-on-string) 'ccl-execute-on-string))))
21 (defvar lex-ccl-use-name
27 (register-ccl-program 'test-ccl (ccl-compile '(0 (r0 = 1))))
28 (ccl-execute-with-args 'test-ccl)
36 ;;; user interface macro
40 (defvar lex-scan-multibyte-str-var (make-symbol "str"))
41 (defvar lex-scan-multibyte-ptr-var (make-symbol "ptr"))
42 (defvar lex-scan-multibyte-end-var (make-symbol "end"))
43 (defvar lex-scan-multibyte-mch-var (make-symbol "mch"))
45 (defmacro lex-scan-multibyte-read (pc)
46 `(if (< ,lex-scan-multibyte-ptr-var ,lex-scan-multibyte-end-var)
47 (setq ,pc (sref ,lex-scan-multibyte-str-var ,lex-scan-multibyte-ptr-var)
48 ,lex-scan-multibyte-ptr-var (char-next-index ,pc ,lex-scan-multibyte-ptr-var)
52 (defmacro lex-scan-multibyte-save ()
53 `(setq ,lex-scan-multibyte-mch-var ,lex-scan-multibyte-ptr-var))
55 (defmacro lex-scan-multibyte (str start end &rest clauses)
56 (if (not start) (setq start 0))
57 (if (not end) (setq end `(length ,lex-scan-multibyte-str-var)))
58 (let ((id 1) (rx ()) (acts ()) tmp code
59 (restore-code (if (symbolp start) `(setq ,start ,lex-scan-multibyte-mch-var))))
60 (while (consp clauses)
61 (setq rx (cons (rx-con (caar clauses) (rx-act id)) rx)
62 acts (cons (cons id (cons restore-code (cdar clauses))) acts)
64 clauses (cdr clauses)))
66 tmp (rx-categolize-char (rx-desugar rx)))
67 `(let* ((,lex-scan-multibyte-str-var ,str)
68 (,lex-scan-multibyte-ptr-var ,start)
69 (,lex-scan-multibyte-end-var ,end)
70 ,lex-scan-multibyte-mch-var)
71 ,(lex-gen-machine (lex-automata (car tmp)) (cdr tmp) acts 'lex-scan-multibyte-read 'lex-scan-multibyte-save))))
75 (defvar lex-scan-unibyte-str-var (make-symbol "str"))
76 (defvar lex-scan-unibyte-ptr-var (make-symbol "ptr"))
77 (defvar lex-scan-unibyte-end-var (make-symbol "end"))
78 (defvar lex-scan-unibyte-mch-var (make-symbol "mch"))
80 (defmacro lex-scan-unibyte-read (pc)
81 `(if (< ,lex-scan-unibyte-ptr-var ,lex-scan-unibyte-end-var)
82 (setq ,pc (aref ,lex-scan-unibyte-str-var ,lex-scan-unibyte-ptr-var)
83 ,lex-scan-unibyte-ptr-var (1+ ,lex-scan-unibyte-ptr-var)
87 (defmacro lex-scan-unibyte-save ()
88 `(setq ,lex-scan-unibyte-mch-var ,lex-scan-unibyte-ptr-var))
90 (defmacro lex-scan-unibyte (str start end &rest clauses)
91 (if (not start) (setq start 0))
92 (if (not end) (setq end `(length ,lex-scan-unibyte-str-var)))
93 (let ((id 1) (rx ()) (acts ()) tmp code
94 (restore-code (if (symbolp start) `(setq ,start ,lex-scan-unibyte-mch-var))))
95 (while (consp clauses)
96 (setq rx (cons (rx-con (caar clauses) (rx-act id)) rx)
97 acts (cons (cons id (cons restore-code (cdar clauses))) acts)
99 clauses (cdr clauses)))
101 tmp (rx-categolize-char (rx-desugar rx)))
102 `(let* ((,lex-scan-unibyte-str-var ,str)
103 (,lex-scan-unibyte-ptr-var ,start)
104 (,lex-scan-unibyte-end-var ,end)
105 ,lex-scan-unibyte-mch-var)
106 ,(lex-gen-machine (lex-automata (car tmp)) (cdr tmp) acts 'lex-scan-unibyte-read 'lex-scan-unibyte-save))))
108 ;;; automata generation
110 (defun lex-automata (rx)
111 (let* ((rx (rx-simplify rx))
112 (stack (list rx)) ; list of rx
113 (table (list (rx-cons* rx 0 (lex-make-box (list 'd1 'd2)))))
114 ; list of (rx id . box-for-reverse-links)
115 (states ()) ; list of (id act trans . box-for-reverse-links)
116 ; where trans = list of (pc id . box-for-reverse-links)
118 tbl-ent box id pcs act pc trans rx-stepped p)
122 tbl-ent (assoc rx table)
131 rx-stepped (rx-step rx pc)
132 p (assoc rx-stepped table))
135 (setq trans (cons (cons pc (cdr p)) trans))
136 (lex-add-box (cddr p) id))
137 (setq p (rx-cons* rx-stepped next-id (lex-make-box (list id)))
138 trans (cons (cons pc (cdr p)) trans)
141 stack (cons rx-stepped stack))))
143 (cons (rx-cons* id act trans box)
149 (defvar lex-pc-var (make-symbol "pc"))
150 (defvar lex-act-var (make-symbol "act"))
151 (defvar lex-escape-tag (make-symbol "esc"))
153 (defun lex-gen-machine (states cs acts read-macro save-macro)
154 `(let (,lex-pc-var ,lex-act-var)
155 ,(if (and lex-ccl-execute
156 (eq read-macro 'lex-scan-unibyte-read)
157 (eq save-macro 'lex-scan-unibyte-save))
158 (lex-gen-ccl-unibyte-automata states cs)
159 (lex-gen-automata states cs read-macro save-macro))
160 ,(lex-gen-action acts)))
162 (defun lex-gen-automata (states cs read-macro save-macro)
163 `(catch ',lex-escape-tag
167 (lambda (s) (lex-gen-state s cs read-macro save-macro))
170 (defun lex-gen-state (s cs read-macro save-macro)
177 `((lex-match ,(cdr act)) (,save-macro))
179 ,@(if (consp trans) `((,read-macro ,lex-pc-var))))
182 (lambda (tr) `(,(let ((l (member (car tr) cs)))
185 (natset-seg (car l) (1- (cadr l)))))
189 (defun lex-gen-action (acts)
191 ,lex-act-var ,(apply 'natset-single (mapcar 'car acts)) automata-never-fail
193 (lambda (act) `(,(natset-single (car act)) nil ,@(cdr act)))
196 ;;; CCL version automata generation
198 (defun lex-gen-ccl-unibyte-automata (states cs)
199 ;; read-macro is lex-scan-unibyte-read
200 ;; save-macro is lex-scan-unibyte-save
201 (let ((name (make-symbol "ccl-prog-name"))
202 (frag-vector (make-vector 1 nil))
204 `(let ((frag ,frag-vector)
205 (status [nil nil nil nil nil nil nil nil nil])
206 (prog (eval-when-compile
208 ',(lex-gen-ccl-unibyte-automata-program states cs)))))
209 (unless (aref frag 0)
210 (register-ccl-program
213 (aset status 0 nil) ; r0: pc
214 (aset status 1 0) ; r1: state
215 (aset status 2 ,lex-scan-unibyte-ptr-var) ; r2: ptr
216 (aset status 3 ,lex-scan-unibyte-ptr-var) ; r3: start
217 (aset status 4 ,lex-scan-unibyte-end-var) ; r4: end
218 (aset status 5 nil) ; r5: mch
219 (aset status 6 0) ; r6: act
220 (aset status 7 nil) ; r7
221 (aset status 8 nil) ; ic
222 ,(if (eval-when-compile (eq lex-ccl-execute 'ccl-execute-on-string))
223 `(ccl-execute-on-string
224 ,(if (eval-when-compile lex-ccl-use-name) `',name `prog)
226 ,lex-scan-unibyte-str-var)
227 `(ccl-execute-on-substring
228 ,(if (eval-when-compile lex-ccl-use-name) `',name `prog)
230 ,lex-scan-unibyte-str-var
231 ,lex-scan-unibyte-ptr-var
232 ,lex-scan-unibyte-end-var))
233 (setq ,lex-scan-unibyte-ptr-var (aref status 2))
234 (when (< 0 (aref status 6))
235 (setq ,lex-act-var (aref status 6)
236 ,lex-scan-unibyte-mch-var (aref status 5))))))
238 (defun lex-gen-ccl-unibyte-automata-program (states cs)
240 (,@(eval-when-compile
241 (when (eq lex-ccl-execute 'ccl-execute-on-string)
251 (lambda (s) (lex-gen-ccl-unibyte-automata-state
252 (nth 0 s) (cdr (nth 1 s)) (nth 2 s)
255 (lambda (a b) (< (car a) (car b))))))))))
257 (defun lex-gen-ccl-unibyte-automata-state (id act trans cs)
267 'natset-gen-ccl-branch
272 (let ((l (member (car tr) cs)))
275 (natset-seg (car l) (1- (cadr l)))))
284 (defmacro lex-match (id)
285 `(setq ,lex-act-var ',id))
286 (defmacro lex-fail ()
287 `(throw ',lex-escape-tag nil))
291 (defun lex-make-box (val)
293 (defalias 'lex-box-ref 'car)
295 (defun lex-add-box (box val)
296 (if (not (member val (car box)))
297 (setcar box (cons val (car box)))))
302 (mapcar (lambda (v) (set v (intern (symbol-name (symbol-value v)))))
306 lex-scan-multibyte-str-var
307 lex-scan-multibyte-ptr-var
308 lex-scan-multibyte-end-var
309 lex-scan-multibyte-mch-var
310 lex-scan-unibyte-str-var
311 lex-scan-unibyte-ptr-var
312 lex-scan-unibyte-end-var
313 lex-scan-unibyte-mch-var))
319 (let* ((str "abcdef\ndeefx\r\n jfdks\r")
322 (lex-scan-unibyte str p nil
326 (* (+ ?\r) [^ "\r\n"] (* [^ "\r"]))
331 (* (+ ?\r) [^ "\r\n"] (* [^ "\r"]))
336 (* (+ ?\r) [^ "\r\n"] (* [^ "\r"]))
341 (ew-crlf-line-convert "abcdef\ndeefx\r\n jfdks\r"
342 (lambda (a) (format "[L:%s]" a))
343 (lambda (a) (format "[F:%s]" a))
344 (lambda (a) (format "[N:%s]" a)))