From 321fe06f7c522588348cee5be193bfe8b578983c Mon Sep 17 00:00:00 2001 From: akr Date: Sat, 22 Aug 1998 18:49:06 +0000 Subject: [PATCH] * TESTPAT: Use `\t' instead of raw TAB character. * ew-line.el (ew-crlf-line-generic): Accept bare LF before first CR occurence. * ew-quote.el: Evaluate constants on byte-compile time. * ew-unit.el: Ditto. * ew-var.el: Ditto. * lex.el: CCL support. (lex-use-ccl): New variable. (lex-gen-machine): Use `lex-gen-automata' and `lex-gen-action'. (lex-gen-automata): New function. (lex-gen-action): New function. (lex-gen-ccl-unibyte-automata): New function. (lex-gen-ccl-unibyte-automata-program): New function. (lex-gen-ccl-unibyte-automata-state): New function. * natset.el (natset-gen-ccl-branch): New function. (natset-gen-ccl-branch-internal): New function. (natset-assoc): New function. --- ChangeLog | 26 ++++++ TESTPAT | 28 +++--- ew-line.el | 6 +- ew-quote.el | 24 +++--- ew-unit.el | 29 ++++--- ew-var.el | 21 +++-- lex.el | 278 +++++++++++++++++++++++++++++++++++++++++------------------ natset.el | 54 ++++++++++++ 8 files changed, 335 insertions(+), 131 deletions(-) diff --git a/ChangeLog b/ChangeLog index e749096..71306cc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,29 @@ +1998-08-22 Tanaka Akira + + * TESTPAT: Use `\t' instead of raw TAB character. + + * ew-line.el (ew-crlf-line-generic): Accept bare LF before first CR + occurence. + + * ew-quote.el: Evaluate constants on byte-compile time. + + * ew-unit.el: Ditto. + + * ew-var.el: Ditto. + + * lex.el: CCL support. + (lex-use-ccl): New variable. + (lex-gen-machine): Use `lex-gen-automata' and `lex-gen-action'. + (lex-gen-automata): New function. + (lex-gen-action): New function. + (lex-gen-ccl-unibyte-automata): New function. + (lex-gen-ccl-unibyte-automata-program): New function. + (lex-gen-ccl-unibyte-automata-state): New function. + + * natset.el (natset-gen-ccl-branch): New function. + (natset-gen-ccl-branch-internal): New function. + (natset-assoc): New function. + 1998-08-21 Shuhei KOBAYASHI * TESTPAT: Specify coding-system. diff --git a/TESTPAT b/TESTPAT index 7becd18..1290c99 100644 --- a/TESTPAT +++ b/TESTPAT @@ -12,10 +12,10 @@ ;;separate-sticked-comment ; violate the policy preserving absence of space ;;separate-sticked-special ; violate the policy preserving absence of space ;;zero-characters-encoded-word-hack ; violate common sense (very tricky) -;;embedded-encoded-word ; violate character sequence semantics +;;embedded-encoded-word ; violate character sequence semantics ;;separate-sticked-tokens-for-fold ; violate the policy preserving absence of space ;;encode-long-ascii-string-for-fold ; violate the policy preserving US-ASCII string -;;divide-atom-for-fold ; violate tne policy preserving absence of space +;;divide-atom-for-fold ; violate tne policy preserving absence of space ;;; test driver @@ -157,8 +157,8 @@ (decode "From: Nathaniel Borenstein \r - (=?iso-8859-8?b?7eXs+SDv4SDp7Oj08A==?=)" -"From: Nathaniel Borenstein (.HNmNeNlNy NoNa NiNlNhNtNp)")*B +\t(=?iso-8859-8?b?7eXs+SDv4SDp7Oj08A==?=)" +"From: Nathaniel Borenstein \t(.HNmNeNlNy NoNa NiNlNhNtNp)")*B (decode "From: foo@bar.baz (=?ISO-8859-1?Q?a?=)" @@ -178,7 +178,7 @@ (decode "From: foo@bar.baz (=?ISO-8859-1?Q?a?=\r - =?ISO-8859-1?Q?b?=)" +\t=?ISO-8859-1?Q?b?=)" "From: foo@bar.baz (ab)") (decode @@ -207,8 +207,8 @@ (decode "Subject: (=?ISO-8859-1?Q?a?=\r - =?ISO-8859-1?Q?b?=)" -"Subject: (=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)") +\t=?ISO-8859-1?Q?b?=)" +"Subject: (=?ISO-8859-1?Q?a?=\t=?ISO-8859-1?Q?b?=)") (decode "Subject: (=?ISO-8859-1?Q?a_b?=)" @@ -237,7 +237,7 @@ (decode "From: =?ISO-2022-JP?B?GyRCJCIbKEI=?=\r - =?ISO-2022-JP?B?GyRCJCQbKEI=?=\r +\t=?ISO-2022-JP?B?GyRCJCQbKEI=?=\r " "From: $B$"$$(B ") @@ -1226,7 +1226,7 @@ (decode "Subject: =?Unknown-Charset?B?gqA=?=\r - =?Unknown-Charset?B?gqA=?=" +\t=?Unknown-Charset?B?gqA=?=" "Subject: =?+Unknown-Charset?B?gqA=?==?+Unknown-Charset?B?gqA=?=" 'embedded-encoded-word) @@ -1288,8 +1288,8 @@ "From: akr@jaist.ac.jp (\r=?US-ASCII?Q?=0A?= )") (decode -"From: akr@jaist.ac.jp (\r=?US-ASCII?Q?=0A?= )" -"From: akr@jaist.ac.jp (\r=?US-ASCII?Q?=0A?= )") +"From: akr@jaist.ac.jp (\r=?US-ASCII?Q?=0A?=\t)" +"From: akr@jaist.ac.jp (\r=?US-ASCII?Q?=0A?=\t)") (decode "From: akr@jaist.ac.jp (\\\r\\\n)" @@ -1311,15 +1311,15 @@ (decode "From: akr@jaist.ac.jp (\\\r=?US-ASCII?Q?=0A=09?=)" -"From: akr@jaist.ac.jp (\\ )") +"From: akr@jaist.ac.jp (\\\t)") (decode "From: akr@jaist.ac.jp (\\\r=?US-ASCII?Q?=0A?= )" "From: akr@jaist.ac.jp (\\ )") (decode -"From: akr@jaist.ac.jp (\\\r=?US-ASCII?Q?=0A?= )" -"From: akr@jaist.ac.jp (\\ )") +"From: akr@jaist.ac.jp (\\\r=?US-ASCII?Q?=0A?=\t)" +"From: akr@jaist.ac.jp (\\\t)") (decode "From: akr@jaist.ac.jp (\\\r=?US-ASCII?Q?=0A?=\r diff --git a/ew-line.el b/ew-line.el index 831d508..226935a 100644 --- a/ew-line.el +++ b/ew-line.el @@ -171,19 +171,19 @@ (setq ,r ,p) (lex-scan-unibyte ,str ,p ,q (() (error "something wrong")) - (((* [^ "\r\n"]) + (((* [^ "\r"]) (* (+ ?\r) [^ "\r\n"] (* [^ "\r"])) (* ?\r) (?\r ?\n [" \t"])) (when (and ,others-fun (< ,r (- ,p 3))) (funcall ,others-fun ,r (- ,p 3))) (when ,fold-fun (funcall ,fold-fun (- ,p 3) ,p))) - (((* [^ "\r\n"]) + (((* [^ "\r"]) (* (+ ?\r) [^ "\r\n"] (* [^ "\r"])) (* ?\r) (?\r ?\n)) (when (and ,others-fun (< ,r (- ,p 2))) (funcall ,others-fun ,r (- ,p 2))) (when ,nl-fun (funcall ,nl-fun (- ,p 2) ,p))) - (((* [^ "\r\n"]) + (((* [^ "\r"]) (* (+ ?\r) [^ "\r\n"] (* [^ "\r"])) (* ?\r)) (when ,others-fun (funcall ,others-fun ,r ,p))))) diff --git a/ew-quote.el b/ew-quote.el index b37e01f..583d060 100644 --- a/ew-quote.el +++ b/ew-quote.el @@ -18,19 +18,23 @@ ;; B : token. ;; C : encoded-text. -(defconst ew-quoting-char ?+) +(eval-and-compile + (defconst ew-quoting-char ?+)) (defconst ew-quoting-chars-regexp - (concat (regexp-quote (char-to-string ew-quoting-char)) "*")) + (eval-when-compile + (concat (regexp-quote (char-to-string ew-quoting-char)) "*"))) (defconst ew-type2-regexp - (concat (regexp-quote "=?") - "\\(" ew-token-regexp "\\)" - (regexp-quote "?") - "\\(" ew-token-regexp "\\)" - (regexp-quote "?") - "\\(" ew-encoded-text-regexp "\\)" - (regexp-quote "?") - "\\'")) + (eval-when-compile + (require 'ew-var) + (concat (regexp-quote "=?") + "\\(" ew-token-regexp "\\)" + (regexp-quote "?") + "\\(" ew-token-regexp "\\)" + (regexp-quote "?") + "\\(" ew-encoded-text-regexp "\\)" + (regexp-quote "?") + "\\'"))) ;;; diff --git a/ew-unit.el b/ew-unit.el index 3b47e3d..ab4af48 100644 --- a/ew-unit.el +++ b/ew-unit.el @@ -6,21 +6,24 @@ (provide 'ew-unit) (defconst ew-anchored-encoded-word-regexp - (concat "\\`" ew-encoded-word-regexp "\\'")) + (eval-when-compile + (require 'ew-var) + (concat "\\`" ew-encoded-word-regexp "\\'"))) (defconst ew-b-regexp - (concat "\\`\\(" - "[A-Za-z0-9+/]" - "[A-Za-z0-9+/]" - "[A-Za-z0-9+/]" - "[A-Za-z0-9+/]" - "\\)*" - "[A-Za-z0-9+/]" - "[A-Za-z0-9+/]" - "\\(==\\|" - "[A-Za-z0-9+/]" - "[A-Za-z0-9+/=]" - "\\)\\'")) + (eval-when-compile + (concat "\\`\\(" + "[A-Za-z0-9+/]" + "[A-Za-z0-9+/]" + "[A-Za-z0-9+/]" + "[A-Za-z0-9+/]" + "\\)*" + "[A-Za-z0-9+/]" + "[A-Za-z0-9+/]" + "\\(==\\|" + "[A-Za-z0-9+/]" + "[A-Za-z0-9+/=]" + "\\)\\'"))) (defconst ew-q-regexp "\\`\\([^=?]\\|=[0-9A-Fa-f][0-9A-Fa-f]\\)*\\'") diff --git a/ew-var.el b/ew-var.el index 66758a6..9ac619b 100644 --- a/ew-var.el +++ b/ew-var.el @@ -53,16 +53,19 @@ ;;; constants. -(defconst ew-token-regexp "[-!#-'*+0-9A-Z^-~]+") -(defconst ew-encoded-text-regexp "[!->@-~]+") +(eval-and-compile + (defconst ew-token-regexp "[-!#-'*+0-9A-Z^-~]+") + (defconst ew-encoded-text-regexp "[!->@-~]+") +) (defconst ew-encoded-word-regexp - (concat (regexp-quote "=?") - "\\(" ew-token-regexp "\\)" - (regexp-quote "?") - "\\(" ew-token-regexp "\\)" - (regexp-quote "?") - "\\(" ew-encoded-text-regexp "\\)" - (regexp-quote "?="))) + (eval-when-compile + (concat (regexp-quote "=?") + "\\(" ew-token-regexp "\\)" + (regexp-quote "?") + "\\(" ew-token-regexp "\\)" + (regexp-quote "?") + "\\(" ew-encoded-text-regexp "\\)" + (regexp-quote "?=")))) ;;; utilities for variables. diff --git a/lex.el b/lex.el index cd0040d..ede088e 100644 --- a/lex.el +++ b/lex.el @@ -6,6 +6,86 @@ (put 'lex-scan-multibyte 'lisp-indent-function 3) (put 'lex-scan-unibyte 'lisp-indent-function 3) +;;; + +(eval-and-compile +(defvar lex-use-ccl (fboundp 'ccl-execute-on-string)) +(when lex-use-ccl + (require 'ccl)) +) + +;;; user interface macro + +;;; multibyte + +(defvar lex-scan-multibyte-str-var (make-symbol "str")) +(defvar lex-scan-multibyte-ptr-var (make-symbol "ptr")) +(defvar lex-scan-multibyte-end-var (make-symbol "end")) +(defvar lex-scan-multibyte-mch-var (make-symbol "mch")) + +(defmacro lex-scan-multibyte-read (pc) + `(if (< ,lex-scan-multibyte-ptr-var ,lex-scan-multibyte-end-var) + (setq ,pc (sref ,lex-scan-multibyte-str-var ,lex-scan-multibyte-ptr-var) + ,lex-scan-multibyte-ptr-var (char-next-index ,pc ,lex-scan-multibyte-ptr-var) + ,pc (char-int ,pc)) + (lex-fail))) + +(defmacro lex-scan-multibyte-save () + `(setq ,lex-scan-multibyte-mch-var ,lex-scan-multibyte-ptr-var)) + +(defmacro lex-scan-multibyte (str start end &rest clauses) + (if (not start) (setq start 0)) + (if (not end) (setq end `(length ,lex-scan-multibyte-str-var))) + (let ((id 1) (rx ()) (acts ()) tmp code + (restore-code (if (symbolp start) `(setq ,start ,lex-scan-multibyte-mch-var)))) + (while (consp clauses) + (setq rx (cons (rx-con (caar clauses) (rx-act id)) rx) + acts (cons (cons id (cons restore-code (cdar clauses))) acts) + id (1+ id) + clauses (cdr clauses))) + (setq rx (rx-alt rx) + tmp (rx-categolize-char (rx-desugar rx))) + `(let* ((,lex-scan-multibyte-str-var ,str) + (,lex-scan-multibyte-ptr-var ,start) + (,lex-scan-multibyte-end-var ,end) + ,lex-scan-multibyte-mch-var) + ,(lex-gen-machine (lex-automata (car tmp)) (cdr tmp) acts 'lex-scan-multibyte-read 'lex-scan-multibyte-save)))) + +;;; unibyte + +(defvar lex-scan-unibyte-str-var (make-symbol "str")) +(defvar lex-scan-unibyte-ptr-var (make-symbol "ptr")) +(defvar lex-scan-unibyte-end-var (make-symbol "end")) +(defvar lex-scan-unibyte-mch-var (make-symbol "mch")) + +(defmacro lex-scan-unibyte-read (pc) + `(if (< ,lex-scan-unibyte-ptr-var ,lex-scan-unibyte-end-var) + (setq ,pc (aref ,lex-scan-unibyte-str-var ,lex-scan-unibyte-ptr-var) + ,lex-scan-unibyte-ptr-var (1+ ,lex-scan-unibyte-ptr-var) + ,pc (char-int ,pc)) + (lex-fail))) + +(defmacro lex-scan-unibyte-save () + `(setq ,lex-scan-unibyte-mch-var ,lex-scan-unibyte-ptr-var)) + +(defmacro lex-scan-unibyte (str start end &rest clauses) + (if (not start) (setq start 0)) + (if (not end) (setq end `(length ,lex-scan-unibyte-str-var))) + (let ((id 1) (rx ()) (acts ()) tmp code + (restore-code (if (symbolp start) `(setq ,start ,lex-scan-unibyte-mch-var)))) + (while (consp clauses) + (setq rx (cons (rx-con (caar clauses) (rx-act id)) rx) + acts (cons (cons id (cons restore-code (cdar clauses))) acts) + id (1+ id) + clauses (cdr clauses))) + (setq rx (rx-alt rx) + tmp (rx-categolize-char (rx-desugar rx))) + `(let* ((,lex-scan-unibyte-str-var ,str) + (,lex-scan-unibyte-ptr-var ,start) + (,lex-scan-unibyte-end-var ,end) + ,lex-scan-unibyte-mch-var) + ,(lex-gen-machine (lex-automata (car tmp)) (cdr tmp) acts 'lex-scan-unibyte-read 'lex-scan-unibyte-save)))) + ;;; automata generation (defun lex-automata (rx) @@ -53,17 +133,20 @@ (defun lex-gen-machine (states cs acts read-macro save-macro) `(let (,lex-pc-var ,lex-act-var) - (catch ',lex-escape-tag - (automata - ,lex-pc-var 0 - ,@(mapcar - (lambda (s) (lex-gen-state s cs read-macro save-macro)) - states))) - (automata-branch - ,lex-act-var ,(apply 'natset-single (mapcar 'car acts)) automata-never-fail + ,(if (and lex-use-ccl + (eq read-macro 'lex-scan-unibyte-read) + (eq save-macro 'lex-scan-unibyte-save)) + (lex-gen-ccl-unibyte-automata states cs) + (lex-gen-automata states cs read-macro save-macro)) + ,(lex-gen-action acts))) + +(defun lex-gen-automata (states cs read-macro save-macro) + `(catch ',lex-escape-tag + (automata + ,lex-pc-var 0 ,@(mapcar - (lambda (act) `(,(natset-single (car act)) nil ,@(cdr act))) - acts)))) + (lambda (s) (lex-gen-state s cs read-macro save-macro)) + states)))) (defun lex-gen-state (s cs read-macro save-macro) (let ((id (nth 0 s)) @@ -84,84 +167,86 @@ ,(cadr tr))) trans)))) -;;; internal macros +(defun lex-gen-action (acts) + `(automata-branch + ,lex-act-var ,(apply 'natset-single (mapcar 'car acts)) automata-never-fail + ,@(mapcar + (lambda (act) `(,(natset-single (car act)) nil ,@(cdr act))) + acts))) -(defmacro lex-match (id) - `(setq ,lex-act-var ',id)) -(defmacro lex-fail () - `(throw ',lex-escape-tag nil)) - -;;; user interface macro - -;;; multibyte - -(defvar lex-scan-multibyte-str-var (make-symbol "str")) -(defvar lex-scan-multibyte-ptr-var (make-symbol "ptr")) -(defvar lex-scan-multibyte-end-var (make-symbol "end")) -(defvar lex-scan-multibyte-mch-var (make-symbol "mch")) - -(defmacro lex-scan-multibyte-read (pc) - `(if (< ,lex-scan-multibyte-ptr-var ,lex-scan-multibyte-end-var) - (setq ,pc (sref ,lex-scan-multibyte-str-var ,lex-scan-multibyte-ptr-var) - ,lex-scan-multibyte-ptr-var (char-next-index ,pc ,lex-scan-multibyte-ptr-var) - ,pc (char-int ,pc)) - (lex-fail))) - -(defmacro lex-scan-multibyte-save () - `(setq ,lex-scan-multibyte-mch-var ,lex-scan-multibyte-ptr-var)) - -(defmacro lex-scan-multibyte (str start end &rest clauses) - (if (not start) (setq start 0)) - (if (not end) (setq end `(length ,lex-scan-multibyte-str-var))) - (let ((id 1) (rx ()) (acts ()) tmp code - (restore-code (if (symbolp start) `(setq ,start ,lex-scan-multibyte-mch-var)))) - (while (consp clauses) - (setq rx (cons (rx-con (caar clauses) (rx-act id)) rx) - acts (cons (cons id (cons restore-code (cdar clauses))) acts) - id (1+ id) - clauses (cdr clauses))) - (setq rx (rx-alt rx) - tmp (rx-categolize-char (rx-desugar rx))) - `(let* ((,lex-scan-multibyte-str-var ,str) - (,lex-scan-multibyte-ptr-var ,start) - (,lex-scan-multibyte-end-var ,end) - ,lex-scan-multibyte-mch-var) - ,(lex-gen-machine (lex-automata (car tmp)) (cdr tmp) acts 'lex-scan-multibyte-read 'lex-scan-multibyte-save)))) +;;; CCL version automata generation -;;; unibyte +(defun lex-gen-ccl-unibyte-automata (states cs) + ;; read-macro is lex-scan-unibyte-read + ;; save-macro is lex-scan-unibyte-save + `(let ((status [nil nil nil nil nil nil nil nil nil])) + (aset status 0 nil) ; r0: pc + (aset status 1 0) ; r1: state + (aset status 2 ,lex-scan-unibyte-ptr-var) ; r2: ptr + (aset status 3 ,lex-scan-unibyte-ptr-var) ; r3: start + (aset status 4 ,lex-scan-unibyte-end-var) ; r4: end + (aset status 5 nil) ; r5: mch + (aset status 6 0) ; r6: act + (aset status 7 nil) ; r7 + (aset status 8 nil) ; ic + (ccl-execute-on-string + (eval-when-compile + (ccl-compile + ',(lex-gen-ccl-unibyte-automata-program states cs))) + status + ,lex-scan-unibyte-str-var) + (setq ,lex-scan-unibyte-ptr-var (aref status 2)) + (when (< 0 (aref status 6)) + (setq ,lex-act-var (aref status 6) + ,lex-scan-unibyte-mch-var (aref status 5))))) -(defvar lex-scan-unibyte-str-var (make-symbol "str")) -(defvar lex-scan-unibyte-ptr-var (make-symbol "ptr")) -(defvar lex-scan-unibyte-end-var (make-symbol "end")) -(defvar lex-scan-unibyte-mch-var (make-symbol "mch")) +(defun lex-gen-ccl-unibyte-automata-program (states cs) + `(0 + ((loop + (if (r3 > 0) + ((r3 -= 1) + (read r0) + (repeat)) + (break))) + (loop + (branch r1 + ,@(mapcar + (lambda (s) (lex-gen-ccl-unibyte-automata-state + (nth 0 s) (cdr (nth 1 s)) (nth 2 s) + cs)) + (sort states + (lambda (a b) (< (car a) (car b)))))))))) -(defmacro lex-scan-unibyte-read (pc) - `(if (< ,lex-scan-unibyte-ptr-var ,lex-scan-unibyte-end-var) - (setq ,pc (aref ,lex-scan-unibyte-str-var ,lex-scan-unibyte-ptr-var) - ,lex-scan-unibyte-ptr-var (1+ ,lex-scan-unibyte-ptr-var) - ,pc (char-int ,pc)) - (lex-fail))) +(defun lex-gen-ccl-unibyte-automata-state (id act trans cs) + `(,@(when act + `((r5 = r2) + (r6 = ,act))) + ,@(if (consp trans) + `((if (r4 <= r2) + (end) + ((read r0) + (r2 += 1) + ,(apply + 'natset-gen-ccl-branch + 'r0 + '(end) + (mapcar + (lambda (tr) (cons + (let ((l (member (car tr) cs))) + (if (null (cdr l)) + (natset-seg (car l)) + (natset-seg (car l) (1- (cadr l))))) + `(r1 = ,(cadr tr)))) + trans)) + (repeat)))) + '((end))))) -(defmacro lex-scan-unibyte-save () - `(setq ,lex-scan-unibyte-mch-var ,lex-scan-unibyte-ptr-var)) +;;; internal macros -(defmacro lex-scan-unibyte (str start end &rest clauses) - (if (not start) (setq start 0)) - (if (not end) (setq end `(length ,lex-scan-unibyte-str-var))) - (let ((id 1) (rx ()) (acts ()) tmp code - (restore-code (if (symbolp start) `(setq ,start ,lex-scan-unibyte-mch-var)))) - (while (consp clauses) - (setq rx (cons (rx-con (caar clauses) (rx-act id)) rx) - acts (cons (cons id (cons restore-code (cdar clauses))) acts) - id (1+ id) - clauses (cdr clauses))) - (setq rx (rx-alt rx) - tmp (rx-categolize-char (rx-desugar rx))) - `(let* ((,lex-scan-unibyte-str-var ,str) - (,lex-scan-unibyte-ptr-var ,start) - (,lex-scan-unibyte-end-var ,end) - ,lex-scan-unibyte-mch-var) - ,(lex-gen-machine (lex-automata (car tmp)) (cdr tmp) acts 'lex-scan-unibyte-read 'lex-scan-unibyte-save)))) +(defmacro lex-match (id) + `(setq ,lex-act-var ',id)) +(defmacro lex-fail () + `(throw ',lex-escape-tag nil)) ;;; utilities @@ -193,4 +278,33 @@ "aaa" 0 3 (?a 'a)) +(let* ((str "abcdef\ndeefx\r\n jfdks\r") + (p 15)) + (cons + (lex-scan-unibyte str p nil + (() + 'error) + (((* [^ "\r\n"]) + (* (+ ?\r) [^ "\r\n"] (* [^ "\r"])) + (* ?\r) + (?\r ?\n [" \t"])) + 'line-fold) + (((* [^ "\r\n"]) + (* (+ ?\r) [^ "\r\n"] (* [^ "\r"])) + (* ?\r) + (?\r ?\n)) + 'line-crlf) + (((* [^ "\r\n"]) + (* (+ ?\r) [^ "\r\n"] (* [^ "\r"])) + (* ?\r)) + 'line)) + p)) + +(ew-crlf-line-convert "abcdef\ndeefx\r\n jfdks\r" + (lambda (a) (format "[L:%s]" a)) + (lambda (a) (format "[F:%s]" a)) + (lambda (a) (format "[N:%s]" a))) + + ) + diff --git a/natset.el b/natset.el index 8887ea5..2a09041 100644 --- a/natset.el +++ b/natset.el @@ -187,6 +187,60 @@ It is impossible to set VALID to empty set because empty set is represented as n ((= (car ns) 0) (natset-gen-pred-exp-internal (cdr ns) var nil 0)) (t (natset-gen-pred-exp-internal ns var t 0)))) +(defun natset-gen-ccl-branch (reg fail &rest clauses) + (let* ((natsets (mapcar 'car clauses))) + (let ((range (apply 'natset-union natsets)) tmp) + (unless (natset-empty-p range) + (setq natsets (cons (natset-negate range) + natsets) + clauses (cons (cons (car natsets) + fail) + clauses))) + (setq range (natset-full) + tmp natsets) + (while tmp + (setcar tmp + (natset-intersection + (car tmp) + range)) + (setq range (natset-sub range (car tmp)) + tmp (cdr tmp)))) + (let ((b (natset-enum + (natset-sub + (apply + 'natset-union + (mapcar + 'natset-boundary-set + natsets)) + (natset-single 0))))) + (natset-gen-ccl-branch-internal reg 0 b clauses)))) + +(defun natset-gen-ccl-branch-internal (reg s b clauses) + (cond + ((null b) + (cdr (natset-assoc s clauses))) + ((null (cdr b)) + `(if (,reg < ,(car b)) + ,(cdr (natset-assoc s clauses)) + ,(cdr (natset-assoc (car b) clauses)))) + (t + (let* ((div (natset-divide (length b))) + (l (append b ())) + (g (nthcdr (1- div) l)) + (m (cadr g))) + (setq g (prog1 (cddr g) (setcdr g ()))) + `(if (,reg < ,m) + ,(natset-gen-ccl-branch-internal reg s l clauses) + ,(natset-gen-ccl-branch-internal reg m g clauses)))))) + +(defun natset-assoc (key alist) + (catch 'return + (while alist + (when (natset-has-p (caar alist) key) + (throw 'return (car alist))) + (setq alist (cdr alist))) + nil)) + ;;; internal primitive (defun natset-union2 (ns1 ns2) -- 1.7.10.4