(put 'lex-scan-multibyte 'lisp-indent-function 3)
(put 'lex-scan-unibyte 'lisp-indent-function 3)
+;;;
+
+(eval-and-compile
+(defvar lex-use-ccl (fboundp 'ccl-execute-on-string))
+(when lex-use-ccl
+ (require 'ccl))
+)
+
+;;; user interface macro
+
+;;; multibyte
+
+(defvar lex-scan-multibyte-str-var (make-symbol "str"))
+(defvar lex-scan-multibyte-ptr-var (make-symbol "ptr"))
+(defvar lex-scan-multibyte-end-var (make-symbol "end"))
+(defvar lex-scan-multibyte-mch-var (make-symbol "mch"))
+
+(defmacro lex-scan-multibyte-read (pc)
+ `(if (< ,lex-scan-multibyte-ptr-var ,lex-scan-multibyte-end-var)
+ (setq ,pc (sref ,lex-scan-multibyte-str-var ,lex-scan-multibyte-ptr-var)
+ ,lex-scan-multibyte-ptr-var (char-next-index ,pc ,lex-scan-multibyte-ptr-var)
+ ,pc (char-int ,pc))
+ (lex-fail)))
+
+(defmacro lex-scan-multibyte-save ()
+ `(setq ,lex-scan-multibyte-mch-var ,lex-scan-multibyte-ptr-var))
+
+(defmacro lex-scan-multibyte (str start end &rest clauses)
+ (if (not start) (setq start 0))
+ (if (not end) (setq end `(length ,lex-scan-multibyte-str-var)))
+ (let ((id 1) (rx ()) (acts ()) tmp code
+ (restore-code (if (symbolp start) `(setq ,start ,lex-scan-multibyte-mch-var))))
+ (while (consp clauses)
+ (setq rx (cons (rx-con (caar clauses) (rx-act id)) rx)
+ acts (cons (cons id (cons restore-code (cdar clauses))) acts)
+ id (1+ id)
+ clauses (cdr clauses)))
+ (setq rx (rx-alt rx)
+ tmp (rx-categolize-char (rx-desugar rx)))
+ `(let* ((,lex-scan-multibyte-str-var ,str)
+ (,lex-scan-multibyte-ptr-var ,start)
+ (,lex-scan-multibyte-end-var ,end)
+ ,lex-scan-multibyte-mch-var)
+ ,(lex-gen-machine (lex-automata (car tmp)) (cdr tmp) acts 'lex-scan-multibyte-read 'lex-scan-multibyte-save))))
+
+;;; unibyte
+
+(defvar lex-scan-unibyte-str-var (make-symbol "str"))
+(defvar lex-scan-unibyte-ptr-var (make-symbol "ptr"))
+(defvar lex-scan-unibyte-end-var (make-symbol "end"))
+(defvar lex-scan-unibyte-mch-var (make-symbol "mch"))
+
+(defmacro lex-scan-unibyte-read (pc)
+ `(if (< ,lex-scan-unibyte-ptr-var ,lex-scan-unibyte-end-var)
+ (setq ,pc (aref ,lex-scan-unibyte-str-var ,lex-scan-unibyte-ptr-var)
+ ,lex-scan-unibyte-ptr-var (1+ ,lex-scan-unibyte-ptr-var)
+ ,pc (char-int ,pc))
+ (lex-fail)))
+
+(defmacro lex-scan-unibyte-save ()
+ `(setq ,lex-scan-unibyte-mch-var ,lex-scan-unibyte-ptr-var))
+
+(defmacro lex-scan-unibyte (str start end &rest clauses)
+ (if (not start) (setq start 0))
+ (if (not end) (setq end `(length ,lex-scan-unibyte-str-var)))
+ (let ((id 1) (rx ()) (acts ()) tmp code
+ (restore-code (if (symbolp start) `(setq ,start ,lex-scan-unibyte-mch-var))))
+ (while (consp clauses)
+ (setq rx (cons (rx-con (caar clauses) (rx-act id)) rx)
+ acts (cons (cons id (cons restore-code (cdar clauses))) acts)
+ id (1+ id)
+ clauses (cdr clauses)))
+ (setq rx (rx-alt rx)
+ tmp (rx-categolize-char (rx-desugar rx)))
+ `(let* ((,lex-scan-unibyte-str-var ,str)
+ (,lex-scan-unibyte-ptr-var ,start)
+ (,lex-scan-unibyte-end-var ,end)
+ ,lex-scan-unibyte-mch-var)
+ ,(lex-gen-machine (lex-automata (car tmp)) (cdr tmp) acts 'lex-scan-unibyte-read 'lex-scan-unibyte-save))))
+
;;; automata generation
(defun lex-automata (rx)
(defun lex-gen-machine (states cs acts read-macro save-macro)
`(let (,lex-pc-var ,lex-act-var)
- (catch ',lex-escape-tag
- (automata
- ,lex-pc-var 0
- ,@(mapcar
- (lambda (s) (lex-gen-state s cs read-macro save-macro))
- states)))
- (automata-branch
- ,lex-act-var ,(apply 'natset-single (mapcar 'car acts)) automata-never-fail
+ ,(if (and lex-use-ccl
+ (eq read-macro 'lex-scan-unibyte-read)
+ (eq save-macro 'lex-scan-unibyte-save))
+ (lex-gen-ccl-unibyte-automata states cs)
+ (lex-gen-automata states cs read-macro save-macro))
+ ,(lex-gen-action acts)))
+
+(defun lex-gen-automata (states cs read-macro save-macro)
+ `(catch ',lex-escape-tag
+ (automata
+ ,lex-pc-var 0
,@(mapcar
- (lambda (act) `(,(natset-single (car act)) nil ,@(cdr act)))
- acts))))
+ (lambda (s) (lex-gen-state s cs read-macro save-macro))
+ states))))
(defun lex-gen-state (s cs read-macro save-macro)
(let ((id (nth 0 s))
,(cadr tr)))
trans))))
-;;; internal macros
+(defun lex-gen-action (acts)
+ `(automata-branch
+ ,lex-act-var ,(apply 'natset-single (mapcar 'car acts)) automata-never-fail
+ ,@(mapcar
+ (lambda (act) `(,(natset-single (car act)) nil ,@(cdr act)))
+ acts)))
-(defmacro lex-match (id)
- `(setq ,lex-act-var ',id))
-(defmacro lex-fail ()
- `(throw ',lex-escape-tag nil))
-
-;;; user interface macro
-
-;;; multibyte
-
-(defvar lex-scan-multibyte-str-var (make-symbol "str"))
-(defvar lex-scan-multibyte-ptr-var (make-symbol "ptr"))
-(defvar lex-scan-multibyte-end-var (make-symbol "end"))
-(defvar lex-scan-multibyte-mch-var (make-symbol "mch"))
-
-(defmacro lex-scan-multibyte-read (pc)
- `(if (< ,lex-scan-multibyte-ptr-var ,lex-scan-multibyte-end-var)
- (setq ,pc (sref ,lex-scan-multibyte-str-var ,lex-scan-multibyte-ptr-var)
- ,lex-scan-multibyte-ptr-var (char-next-index ,pc ,lex-scan-multibyte-ptr-var)
- ,pc (char-int ,pc))
- (lex-fail)))
-
-(defmacro lex-scan-multibyte-save ()
- `(setq ,lex-scan-multibyte-mch-var ,lex-scan-multibyte-ptr-var))
-
-(defmacro lex-scan-multibyte (str start end &rest clauses)
- (if (not start) (setq start 0))
- (if (not end) (setq end `(length ,lex-scan-multibyte-str-var)))
- (let ((id 1) (rx ()) (acts ()) tmp code
- (restore-code (if (symbolp start) `(setq ,start ,lex-scan-multibyte-mch-var))))
- (while (consp clauses)
- (setq rx (cons (rx-con (caar clauses) (rx-act id)) rx)
- acts (cons (cons id (cons restore-code (cdar clauses))) acts)
- id (1+ id)
- clauses (cdr clauses)))
- (setq rx (rx-alt rx)
- tmp (rx-categolize-char (rx-desugar rx)))
- `(let* ((,lex-scan-multibyte-str-var ,str)
- (,lex-scan-multibyte-ptr-var ,start)
- (,lex-scan-multibyte-end-var ,end)
- ,lex-scan-multibyte-mch-var)
- ,(lex-gen-machine (lex-automata (car tmp)) (cdr tmp) acts 'lex-scan-multibyte-read 'lex-scan-multibyte-save))))
+;;; CCL version automata generation
-;;; unibyte
+(defun lex-gen-ccl-unibyte-automata (states cs)
+ ;; read-macro is lex-scan-unibyte-read
+ ;; save-macro is lex-scan-unibyte-save
+ `(let ((status [nil nil nil nil nil nil nil nil nil]))
+ (aset status 0 nil) ; r0: pc
+ (aset status 1 0) ; r1: state
+ (aset status 2 ,lex-scan-unibyte-ptr-var) ; r2: ptr
+ (aset status 3 ,lex-scan-unibyte-ptr-var) ; r3: start
+ (aset status 4 ,lex-scan-unibyte-end-var) ; r4: end
+ (aset status 5 nil) ; r5: mch
+ (aset status 6 0) ; r6: act
+ (aset status 7 nil) ; r7
+ (aset status 8 nil) ; ic
+ (ccl-execute-on-string
+ (eval-when-compile
+ (ccl-compile
+ ',(lex-gen-ccl-unibyte-automata-program states cs)))
+ status
+ ,lex-scan-unibyte-str-var)
+ (setq ,lex-scan-unibyte-ptr-var (aref status 2))
+ (when (< 0 (aref status 6))
+ (setq ,lex-act-var (aref status 6)
+ ,lex-scan-unibyte-mch-var (aref status 5)))))
-(defvar lex-scan-unibyte-str-var (make-symbol "str"))
-(defvar lex-scan-unibyte-ptr-var (make-symbol "ptr"))
-(defvar lex-scan-unibyte-end-var (make-symbol "end"))
-(defvar lex-scan-unibyte-mch-var (make-symbol "mch"))
+(defun lex-gen-ccl-unibyte-automata-program (states cs)
+ `(0
+ ((loop
+ (if (r3 > 0)
+ ((r3 -= 1)
+ (read r0)
+ (repeat))
+ (break)))
+ (loop
+ (branch r1
+ ,@(mapcar
+ (lambda (s) (lex-gen-ccl-unibyte-automata-state
+ (nth 0 s) (cdr (nth 1 s)) (nth 2 s)
+ cs))
+ (sort states
+ (lambda (a b) (< (car a) (car b))))))))))
-(defmacro lex-scan-unibyte-read (pc)
- `(if (< ,lex-scan-unibyte-ptr-var ,lex-scan-unibyte-end-var)
- (setq ,pc (aref ,lex-scan-unibyte-str-var ,lex-scan-unibyte-ptr-var)
- ,lex-scan-unibyte-ptr-var (1+ ,lex-scan-unibyte-ptr-var)
- ,pc (char-int ,pc))
- (lex-fail)))
+(defun lex-gen-ccl-unibyte-automata-state (id act trans cs)
+ `(,@(when act
+ `((r5 = r2)
+ (r6 = ,act)))
+ ,@(if (consp trans)
+ `((if (r4 <= r2)
+ (end)
+ ((read r0)
+ (r2 += 1)
+ ,(apply
+ 'natset-gen-ccl-branch
+ 'r0
+ '(end)
+ (mapcar
+ (lambda (tr) (cons
+ (let ((l (member (car tr) cs)))
+ (if (null (cdr l))
+ (natset-seg (car l))
+ (natset-seg (car l) (1- (cadr l)))))
+ `(r1 = ,(cadr tr))))
+ trans))
+ (repeat))))
+ '((end)))))
-(defmacro lex-scan-unibyte-save ()
- `(setq ,lex-scan-unibyte-mch-var ,lex-scan-unibyte-ptr-var))
+;;; internal macros
-(defmacro lex-scan-unibyte (str start end &rest clauses)
- (if (not start) (setq start 0))
- (if (not end) (setq end `(length ,lex-scan-unibyte-str-var)))
- (let ((id 1) (rx ()) (acts ()) tmp code
- (restore-code (if (symbolp start) `(setq ,start ,lex-scan-unibyte-mch-var))))
- (while (consp clauses)
- (setq rx (cons (rx-con (caar clauses) (rx-act id)) rx)
- acts (cons (cons id (cons restore-code (cdar clauses))) acts)
- id (1+ id)
- clauses (cdr clauses)))
- (setq rx (rx-alt rx)
- tmp (rx-categolize-char (rx-desugar rx)))
- `(let* ((,lex-scan-unibyte-str-var ,str)
- (,lex-scan-unibyte-ptr-var ,start)
- (,lex-scan-unibyte-end-var ,end)
- ,lex-scan-unibyte-mch-var)
- ,(lex-gen-machine (lex-automata (car tmp)) (cdr tmp) acts 'lex-scan-unibyte-read 'lex-scan-unibyte-save))))
+(defmacro lex-match (id)
+ `(setq ,lex-act-var ',id))
+(defmacro lex-fail ()
+ `(throw ',lex-escape-tag nil))
;;; utilities
"aaa" 0 3
(?a 'a))
+(let* ((str "abcdef\ndeefx\r\n jfdks\r")
+ (p 15))
+ (cons
+ (lex-scan-unibyte str p nil
+ (()
+ 'error)
+ (((* [^ "\r\n"])
+ (* (+ ?\r) [^ "\r\n"] (* [^ "\r"]))
+ (* ?\r)
+ (?\r ?\n [" \t"]))
+ 'line-fold)
+ (((* [^ "\r\n"])
+ (* (+ ?\r) [^ "\r\n"] (* [^ "\r"]))
+ (* ?\r)
+ (?\r ?\n))
+ 'line-crlf)
+ (((* [^ "\r\n"])
+ (* (+ ?\r) [^ "\r\n"] (* [^ "\r"]))
+ (* ?\r))
+ 'line))
+ p))
+
+(ew-crlf-line-convert "abcdef\ndeefx\r\n jfdks\r"
+ (lambda (a) (format "[L:%s]" a))
+ (lambda (a) (format "[F:%s]" a))
+ (lambda (a) (format "[N:%s]" a)))
+
+
)
+