(require 'lex) (require 'automata) (require 'ew-data) (require 'ew-parse) (provide 'ew-scan-s) (defmacro ew-scan-std11 (scan col str) `(let ((res (ew-make-anchor col str)) (mode 'token) (p 0) (q (length str)) r nest) (while (< p q) (setq r p) (cond ((eq mode 'token) (,scan str p q ([" \t"] (ew-add-frag res r p 'ew:raw-wsp-tok)) (?< (ew-add-token res r p 'ew:raw-lt-tok)) (?> (ew-add-token res r p 'ew:raw-gt-tok)) (?@ (ew-add-token res r p 'ew:raw-at-tok)) (?, (ew-add-token res r p 'ew:raw-comma-tok)) (?\; (ew-add-token res r p 'ew:raw-semicolon-tok)) (?: (ew-add-token res r p 'ew:raw-colon-tok)) (?. (ew-add-token res r p 'ew:raw-dot-tok)) ((?\r ?\n [" \t"]) (ew-add-frag res r p 'ew:raw-fold-tok)) ((?\r ?\n [^ " \t"]) (ew-add-frag res r (setq p q) 'ew:raw-err-tok)) ((+ [(?a ?z) (?A ?Z) (?0 ?9) "!#$%&'*+-/=?^_`{|}~" non-ascii]) (ew-add-token res r p 'ew:raw-atom-tok)) (?\" (ew-add-open res r p 'ew:raw-qs-begin-tok) (setq mode 'quoted-string)) (?\[ (ew-add-open res r p 'ew:raw-dl-begin-tok) (setq mode 'domain-literal)) (?\( (ew-add-open res r p 'ew:raw-cm-begin-tok) (setq mode 'comment nest 1)) (() (ew-add-frag res r q 'ew:raw-err-tok) (setq p q)))) ((eq mode 'quoted-string) (,scan str p q (?\" (ew-add-close-token res r p 'ew:raw-qs-end-tok) (setq mode 'token)) ((?\\ ?\r ?\n [" \t"]) (ew-add-frag res r p 'ew:raw-qs-qfold-tok)) ((?\\ ?\r ?\n [^ " \t"]) (ew-add-frag res r (setq p q) 'ew:raw-err-tok)) (((* [^ "\"\\ \t\r"]) (* (+ ?\r) [^ "\"\\ \t\r\n"] (* [^ "\"\\ \t\r"])) (* ?\r) (?\r ?\n [" \t"])) (when (< r (- p 3)) (ew-add-frag res r (- p 3) 'ew:raw-qs-texts-tok) (setq r (- p 3))) (ew-add-frag res r p 'ew:raw-qs-fold-tok)) (((* [^ "\"\\ \t\r"]) (* (+ ?\r) [^ "\"\\ \t\r\n"] (* [^ "\"\\ \t\r"])) (* ?\r) (?\r ?\n [^ " \t"])) (when (< r (- p 3)) (ew-add-frag res r (- p 3) 'ew:raw-qs-texts-tok) (setq r (- p 3))) (ew-add-frag res r (setq p q) 'ew:raw-err-tok)) ((?\\ (any)) (ew-add-frag res r p 'ew:raw-qs-qpair-tok)) ([" \t"] (ew-add-frag res r p 'ew:raw-qs-wsp-tok)) (((* [^ "\"\\ \t\r"]) (* (+ ?\r) [^ "\"\\ \t\r\n"] (* [^ "\"\\ \t\r"])) (* ?\r)) (if (< r p) (ew-add-frag res r p 'ew:raw-qs-texts-tok) (ew-add-frag res r (setq p q) 'ew:raw-err-tok))))) ((eq mode 'domain-literal) (,scan str p q (?\] (ew-add-close-token res r p 'ew:raw-dl-end-tok) (setq mode 'token)) ((?\\ ?\r ?\n [" \t"]) (ew-add-frag res r p 'ew:raw-dl-qfold-tok)) ((?\\ ?\r ?\n [^ " \t"]) (ew-add-frag res r (setq p q) 'ew:raw-err-tok)) (((* [^ "[]\\ \t\r"]) (* (+ ?\r) [^ "[]\\ \t\r\n"] (* [^ "[]\\ \t\r"])) (* ?\r) (?\r ?\n [" \t"])) (when (< r (- p 3)) (ew-add-frag res r (- p 3) 'ew:raw-dl-texts-tok) (setq r (- p 3))) (ew-add-frag res r p 'ew:raw-dl-fold-tok)) (((* [^ "[]\\ \t\r"]) (* (+ ?\r) [^ "[]\\ \t\r\n"] (* [^ "[]\\ \t\r"])) (* ?\r) (?\r ?\n [^ " \t"])) (when (< r (- p 3)) (ew-add-frag res r (- p 3) 'ew:raw-dl-texts-tok) (setq r (- p 3))) (ew-add-frag res r (setq p q) 'ew:raw-err-tok)) ((?\\ (any)) (ew-add-frag res r p 'ew:raw-dl-qpair-tok)) ([" \t"] (ew-add-frag res r p 'ew:raw-dl-wsp-tok)) (((* [^ "[]\\ \t\r"]) (* (+ ?\r) [^ "[]\\ \t\r\n"] (* [^ "[]\\ \t\r"])) (* ?\r)) (if (< r p) (ew-add-frag res r p 'ew:raw-dl-texts-tok) (ew-add-frag res r (setq p q) 'ew:raw-err-tok))))) ((eq mode 'comment) (,scan str p q (?\( (ew-add-open res r p 'ew:raw-cm-nested-begin-tok) (setq nest (1+ nest))) (?\) (setq nest (1- nest)) (if (zerop nest) (progn (ew-add-close res r p 'ew:raw-cm-end-tok) (setq mode 'token)) (ew-add-close res r p 'ew:raw-cm-nested-end-tok))) ((?\\ ?\r ?\n [" \t"]) (ew-add-frag res r p 'ew:raw-cm-qfold-tok)) ((?\\ ?\r ?\n [^ " \t"]) (ew-add-frag res r (setq p q) 'ew:raw-err-tok)) (((* [^ "()\\ \t\r"]) (* (+ ?\r) [^ "()\\ \t\r\n"] (* [^ "()\\ \t\r"])) (* ?\r) (?\r ?\n [" \t"])) (when (< r (- p 3)) (ew-add-frag res r (- p 3) 'ew:raw-cm-texts-tok) (setq r (- p 3))) (ew-add-frag res r p 'ew:raw-cm-fold-tok)) (((* [^ "()\\ \t\r"]) (* (+ ?\r) [^ "()\\ \t\r\n"] (* [^ "()\\ \t\r"])) (* ?\r) (?\r ?\n [^ " \t"])) (when (< r (- p 3)) (ew-add-frag res r (- p 3) 'ew:raw-cm-texts-tok) (setq r (- p 3))) (ew-add-frag res r (setq p q) 'ew:raw-err-tok)) ((?\\ (any)) (ew-add-frag res r p 'ew:raw-cm-qpair-tok)) ([" \t"] (ew-add-frag res r p 'ew:raw-cm-wsp-tok)) (((* [^ "()\\ \t\r"]) (* (+ ?\r) [^ "()\\ \t\r\n"] (* [^ "()\\ \t\r"])) (* ?\r)) (if (< r p) (ew-add-frag res r p 'ew:raw-cm-texts-tok) (ew-add-frag res r (setq p q) 'ew:raw-err-tok))))))) (ew-terminate res) res)) (defun ew-scan-unibyte-std11 (col str) (ew-scan-std11 lex-scan-unibyte col str)) (defun ew-scan-multibyte-std11 (col str) (ew-scan-std11 lex-scan-multibyte col str)) '( (npp (mapcar 'symbol-plist (ew-frag-list (ew-scan-unibyte-std11 0 " Tanaka Akira (Tanaka Akira)")))) (npp (mapcar (lambda (frag) (cons (get frag 'type) (symbol-name frag))) (ew-frag-list (ew-scan-unibyte-std11 0 " Tanaka Akira (Tanaka Akira)")))) )