6 (put 'lex-scan-multibyte 'lisp-indent-function 3)
7 (put 'lex-scan-unibyte 'lisp-indent-function 3)
12 ;; CCL is not so fast for this library.
13 ;; Because it requires quadratic time for skipping string prefix.
14 ;; However, it is bit faster than emacs-lisp on average for common case,
15 ;; it is default if available.
16 (defvar lex-use-ccl (fboundp 'ccl-execute-on-string))
21 ;;; user interface macro
25 (defvar lex-scan-multibyte-str-var (make-symbol "str"))
26 (defvar lex-scan-multibyte-ptr-var (make-symbol "ptr"))
27 (defvar lex-scan-multibyte-end-var (make-symbol "end"))
28 (defvar lex-scan-multibyte-mch-var (make-symbol "mch"))
30 (defmacro lex-scan-multibyte-read (pc)
31 `(if (< ,lex-scan-multibyte-ptr-var ,lex-scan-multibyte-end-var)
32 (setq ,pc (sref ,lex-scan-multibyte-str-var ,lex-scan-multibyte-ptr-var)
33 ,lex-scan-multibyte-ptr-var (char-next-index ,pc ,lex-scan-multibyte-ptr-var)
37 (defmacro lex-scan-multibyte-save ()
38 `(setq ,lex-scan-multibyte-mch-var ,lex-scan-multibyte-ptr-var))
40 (defmacro lex-scan-multibyte (str start end &rest clauses)
41 (if (not start) (setq start 0))
42 (if (not end) (setq end `(length ,lex-scan-multibyte-str-var)))
43 (let ((id 1) (rx ()) (acts ()) tmp code
44 (restore-code (if (symbolp start) `(setq ,start ,lex-scan-multibyte-mch-var))))
45 (while (consp clauses)
46 (setq rx (cons (rx-con (caar clauses) (rx-act id)) rx)
47 acts (cons (cons id (cons restore-code (cdar clauses))) acts)
49 clauses (cdr clauses)))
51 tmp (rx-categolize-char (rx-desugar rx)))
52 `(let* ((,lex-scan-multibyte-str-var ,str)
53 (,lex-scan-multibyte-ptr-var ,start)
54 (,lex-scan-multibyte-end-var ,end)
55 ,lex-scan-multibyte-mch-var)
56 ,(lex-gen-machine (lex-automata (car tmp)) (cdr tmp) acts 'lex-scan-multibyte-read 'lex-scan-multibyte-save))))
60 (defvar lex-scan-unibyte-str-var (make-symbol "str"))
61 (defvar lex-scan-unibyte-ptr-var (make-symbol "ptr"))
62 (defvar lex-scan-unibyte-end-var (make-symbol "end"))
63 (defvar lex-scan-unibyte-mch-var (make-symbol "mch"))
65 (defmacro lex-scan-unibyte-read (pc)
66 `(if (< ,lex-scan-unibyte-ptr-var ,lex-scan-unibyte-end-var)
67 (setq ,pc (aref ,lex-scan-unibyte-str-var ,lex-scan-unibyte-ptr-var)
68 ,lex-scan-unibyte-ptr-var (1+ ,lex-scan-unibyte-ptr-var)
72 (defmacro lex-scan-unibyte-save ()
73 `(setq ,lex-scan-unibyte-mch-var ,lex-scan-unibyte-ptr-var))
75 (defmacro lex-scan-unibyte (str start end &rest clauses)
76 (if (not start) (setq start 0))
77 (if (not end) (setq end `(length ,lex-scan-unibyte-str-var)))
78 (let ((id 1) (rx ()) (acts ()) tmp code
79 (restore-code (if (symbolp start) `(setq ,start ,lex-scan-unibyte-mch-var))))
80 (while (consp clauses)
81 (setq rx (cons (rx-con (caar clauses) (rx-act id)) rx)
82 acts (cons (cons id (cons restore-code (cdar clauses))) acts)
84 clauses (cdr clauses)))
86 tmp (rx-categolize-char (rx-desugar rx)))
87 `(let* ((,lex-scan-unibyte-str-var ,str)
88 (,lex-scan-unibyte-ptr-var ,start)
89 (,lex-scan-unibyte-end-var ,end)
90 ,lex-scan-unibyte-mch-var)
91 ,(lex-gen-machine (lex-automata (car tmp)) (cdr tmp) acts 'lex-scan-unibyte-read 'lex-scan-unibyte-save))))
93 ;;; automata generation
95 (defun lex-automata (rx)
96 (let* ((rx (rx-simplify rx))
97 (stack (list rx)) ; list of rx
98 (table (list (rx-cons* rx 0 (lex-make-box (list 'd1 'd2)))))
99 ; list of (rx id . box-for-reverse-links)
100 (states ()) ; list of (id act trans . box-for-reverse-links)
101 ; where trans = list of (pc id . box-for-reverse-links)
103 tbl-ent box id pcs act pc trans rx-stepped p)
107 tbl-ent (assoc rx table)
116 rx-stepped (rx-step rx pc)
117 p (assoc rx-stepped table))
120 (setq trans (cons (cons pc (cdr p)) trans))
121 (lex-add-box (cddr p) id))
122 (setq p (rx-cons* rx-stepped next-id (lex-make-box (list id)))
123 trans (cons (cons pc (cdr p)) trans)
126 stack (cons rx-stepped stack))))
128 (cons (rx-cons* id act trans box)
134 (defvar lex-pc-var (make-symbol "pc"))
135 (defvar lex-act-var (make-symbol "act"))
136 (defvar lex-escape-tag (make-symbol "esc"))
138 (defun lex-gen-machine (states cs acts read-macro save-macro)
139 `(let (,lex-pc-var ,lex-act-var)
140 ,(if (and lex-use-ccl
141 (eq read-macro 'lex-scan-unibyte-read)
142 (eq save-macro 'lex-scan-unibyte-save))
143 (lex-gen-ccl-unibyte-automata states cs)
144 (lex-gen-automata states cs read-macro save-macro))
145 ,(lex-gen-action acts)))
147 (defun lex-gen-automata (states cs read-macro save-macro)
148 `(catch ',lex-escape-tag
152 (lambda (s) (lex-gen-state s cs read-macro save-macro))
155 (defun lex-gen-state (s cs read-macro save-macro)
162 `((lex-match ,(cdr act)) (,save-macro))
164 ,@(if (consp trans) `((,read-macro ,lex-pc-var))))
167 (lambda (tr) `(,(let ((l (member (car tr) cs)))
170 (natset-seg (car l) (1- (cadr l)))))
174 (defun lex-gen-action (acts)
176 ,lex-act-var ,(apply 'natset-single (mapcar 'car acts)) automata-never-fail
178 (lambda (act) `(,(natset-single (car act)) nil ,@(cdr act)))
181 ;;; CCL version automata generation
183 (defun lex-gen-ccl-unibyte-automata (states cs)
184 ;; read-macro is lex-scan-unibyte-read
185 ;; save-macro is lex-scan-unibyte-save
186 `(let ((status [nil nil nil nil nil nil nil nil nil]))
187 (aset status 0 nil) ; r0: pc
188 (aset status 1 0) ; r1: state
189 (aset status 2 ,lex-scan-unibyte-ptr-var) ; r2: ptr
190 (aset status 3 ,lex-scan-unibyte-ptr-var) ; r3: start
191 (aset status 4 ,lex-scan-unibyte-end-var) ; r4: end
192 (aset status 5 nil) ; r5: mch
193 (aset status 6 0) ; r6: act
194 (aset status 7 nil) ; r7
195 (aset status 8 nil) ; ic
196 (ccl-execute-on-string
199 ',(lex-gen-ccl-unibyte-automata-program states cs)))
201 ,lex-scan-unibyte-str-var)
202 (setq ,lex-scan-unibyte-ptr-var (aref status 2))
203 (when (< 0 (aref status 6))
204 (setq ,lex-act-var (aref status 6)
205 ,lex-scan-unibyte-mch-var (aref status 5)))))
207 (defun lex-gen-ccl-unibyte-automata-program (states cs)
218 (lambda (s) (lex-gen-ccl-unibyte-automata-state
219 (nth 0 s) (cdr (nth 1 s)) (nth 2 s)
222 (lambda (a b) (< (car a) (car b))))))))))
224 (defun lex-gen-ccl-unibyte-automata-state (id act trans cs)
234 'natset-gen-ccl-branch
239 (let ((l (member (car tr) cs)))
242 (natset-seg (car l) (1- (cadr l)))))
251 (defmacro lex-match (id)
252 `(setq ,lex-act-var ',id))
253 (defmacro lex-fail ()
254 `(throw ',lex-escape-tag nil))
258 (defun lex-make-box (val)
260 (defalias 'lex-box-ref 'car)
262 (defun lex-add-box (box val)
263 (if (not (member val (car box)))
264 (setcar box (cons val (car box)))))
269 (mapcar (lambda (v) (set v (intern (symbol-name (symbol-value v)))))
273 lex-scan-multibyte-str-var
274 lex-scan-multibyte-ptr-var
275 lex-scan-multibyte-end-var
276 lex-scan-multibyte-mch-var
277 lex-scan-unibyte-str-var
278 lex-scan-unibyte-ptr-var
279 lex-scan-unibyte-end-var
280 lex-scan-unibyte-mch-var))
286 (let* ((str "abcdef\ndeefx\r\n jfdks\r")
289 (lex-scan-unibyte str p nil
293 (* (+ ?\r) [^ "\r\n"] (* [^ "\r"]))
298 (* (+ ?\r) [^ "\r\n"] (* [^ "\r"]))
303 (* (+ ?\r) [^ "\r\n"] (* [^ "\r"]))
308 (ew-crlf-line-convert "abcdef\ndeefx\r\n jfdks\r"
309 (lambda (a) (format "[L:%s]" a))
310 (lambda (a) (format "[F:%s]" a))
311 (lambda (a) (format "[N:%s]" a)))