From: handa Date: Thu, 24 Mar 2011 06:52:40 +0000 (+0000) Subject: *** empty log message *** X-Git-Url: http://git.chise.org/gitweb/?p=m17n%2Fm17n-lib-cs.git;a=commitdiff_plain;h=e8e1602f6504f4c43db49daff79fac714612c3ef *** empty log message *** --- diff --git a/peg.el b/peg.el new file mode 100644 index 0000000..728e6f6 --- /dev/null +++ b/peg.el @@ -0,0 +1,767 @@ +;;; peg.el --- Parsing Expression Grammars in Emacs Lisp +;; +;; Copyright 2008 Helmut Eller . +;; +;; Version: 0.4 (2008-Dec-17) +;; +;; This file is licensed under the terms of the GNU General Public +;; License as distributed with Emacs (press C-h C-c for details). +;; +;;; Commentary: +;; +;; Parsing Expression Grammars (PEG) are a formalism in the spirit of +;; Context Free Grammars (CFG) with some simplifications which makes +;; the implementation of PEGs as top-down parser particularly simple +;; and easy to understand [Ford, Baker]. +;; +;; This file implements a macro `peg-parse' which parses the current +;; buffer according to a PEG. E.g. we can match integers with a PEG +;; like this: +;; +;; (peg-parse (number sign digit (* digit)) +;; (sign (or "+" "-" "")) +;; (digit [0-9])) +;; +;; In contrast to regexps, PEGs allow us to define recursive rules. A +;; PEG is a list of rules. A rule is written as (NAME . PE). +;; E.g. (sign (or "+" "-" "")) is a rule with the name "sign". For +;; convenience, PE is implicitly wrapped in a and. The syntax for +;; Parsing Expression (PE) is a follows: +;; +;; Description Lisp Haskell, as in Ford's paper +;; Sequence (and e1 e2) e1 e2 +;; Prioritized Choice (or e1 e2) e1 / e2 +;; Not-predicate (not e) !e +;; And-predicate (if e) &e +;; Any character (any) . +;; Literal string "abc" "abc" +;; Character C (char c) 'c' +;; Zero-or-more (* e) e* +;; One-or-more (+ e) e+ +;; Optional (opt e) e? +;; Character range (range a b) [a-b] +;; Character set [a-b "+*" ?x] [a-b+*x] +;; +;; `peg-parse' also supports parsing actions, i.e. Lisp snippets which +;; are executed when a PE matches. This can be used to construct +;; syntax trees or for similar tasks. Actions are written as +;; +;; (action FORM) ; evaluate FORM +;; `(VAR... -- FORM...) ; stack action +;; +;; Actions don't consume input, but are executed at the point of +;; match. A "stack action" takes VARs from the "value stack" and +;; pushes the result of evaluating FORMs to that stack. See +;; `peg-ex-parse-int' for an example. +;; +;; References: +;; +;; [Ford] Bryan Ford. Parsing Expression Grammars: a Recognition-Based +;; Syntactic Foundation. In POPL'04: Proceedings of the 31st ACM +;; SIGPLAN-SIGACT symposium on Principles of Programming Languages, +;; pages 111-122, New York, NY, USA, 2004. ACM Press. +;; +;; [Baker] Baker, Henry G. "Pragmatic Parsing in Common Lisp". ACM Lisp +;; Pointers 4(2), April--June 1991, pp. 3--15. +;; + +(unless (>= emacs-major-version 22) + (error "peg.el requires Emacs version 22 or newer")) + +;;; Code: + +(defmacro peg-parse (&rest rules) + "Match RULES at point. +Return (T STACK) if the match succeed and nil on failure." + (peg-translate-rules rules)) + +(defmacro peg-parse-exp (exp) + "Match the parsing expression EXP at point. +Note: a PE can't \"call\" rules by name." + `(let ((peg-thunks nil)) + (when ,(peg-translate-exp (peg-normalize exp)) + (peg-postprocess peg-thunks)))) + +;; A table of the PEG rules. Used during compilation to resolve +;; references to named rules. +(defvar peg-rules) + +;; used at runtime for backtracking. It's a list ((POS . THUNK)...). +;; Each THUNK is executed at the corresponding POS. Thunks are +;; executed in a postprocessing step, not during parsing. +(defvar peg-thunks) + +;; The basic idea is to translate each rule to a lisp function. +;; The result looks like +;; (let ((rule1 (lambda () code-for-rule1)) +;; ... +;; (ruleN (lambda () code-for-ruleN))) +;; (funcall rule1)) +;; +;; code-for-ruleX returns t if the rule matches and nil otherwise. +;; +(defun peg-translate-rules (rules) + "Translate the PEG RULES, to a top-down parser." + (let ((peg-rules (make-hash-table :size 20))) + (dolist (rule rules) + (puthash (car rule) 'defer peg-rules)) + (dolist (rule rules) + (puthash (car rule) (peg-normalize `(and . ,(cdr rule))) peg-rules)) + (peg-check-cycles peg-rules) + `(let ((peg-thunks '()) . ,(mapcar #'car rules)) + ,@(mapcar (lambda (rule) + (let ((name (car rule))) + `(setq ,name + (lambda () + ,(peg-translate-exp (gethash name peg-rules)))))) + rules) + (when (funcall ,(car (car rules))) + (peg-postprocess peg-thunks))))) + +(eval-and-compile + (defun peg-method-table-name (method-name) + (intern (format "peg-%s-methods" method-name)))) + +(defmacro peg-define-method-table (name) + (let ((tab (peg-method-table-name name))) + `(progn + (defvar ,tab) + (setq ,tab (make-hash-table :size 20))))) + +(defmacro peg-add-method (method type args &rest body) + (declare (indent 3)) + `(puthash ',type (lambda ,args . ,body) ,(peg-method-table-name method))) + +(peg-define-method-table normalize) + +;; Internally we use a regularized syntax, e.g. we only have binary OR +;; nodes. Regularized nodes are lists of the form (OP ARGS...). +(defun peg-normalize (exp) + "Return a \"normalized\" form of EXP." + (cond ((and (consp exp) + (let ((fun (gethash (car exp) peg-normalize-methods))) + (and fun + (apply fun (cdr exp)))))) + ((stringp exp) + (let ((len (length exp))) + (cond ((zerop len) '(null)) + ((= len 1) `(char ,(aref exp 0))) + (t `(str ,exp))))) + ((and (symbolp exp) exp) + (when (not (gethash exp peg-rules)) + (error "Reference to undefined PEG rule: %S" exp)) + `(call ,exp)) + ((vectorp exp) + (peg-normalize `(set . ,(append exp '())))) + (t + (error "Invalid parsing expression: %S" exp)))) + +(defvar peg-leaf-types '(null fail any call action char range str eob + set)) + +(dolist (type peg-leaf-types) + (puthash type `(lambda (&rest args) (cons ',type args)) + peg-normalize-methods)) + +(peg-add-method normalize or (&rest args) + (cond ((null args) '(fail)) + ((null (cdr args)) (peg-normalize (car args))) + (t `(or ,(peg-normalize (car args)) + ,(peg-normalize `(or . ,(cdr args))))))) + +(peg-add-method normalize and (&rest args) + (cond ((null args) '(null)) + ((null (cdr args)) (peg-normalize (car args))) + (t `(and ,(peg-normalize (car args)) + ,(peg-normalize `(and . ,(cdr args))))))) + +(peg-add-method normalize * (&rest args) + `(* ,(peg-normalize `(and . ,args)))) + +;; FIXME: this duplicates code; could use some loop to avoid that +(peg-add-method normalize + (&rest args) + (let ((e (peg-normalize `(and . ,args)))) + `(and ,e (* ,e)))) + +(peg-add-method normalize opt (&rest args) + (let ((e (peg-normalize `(and . ,args)))) + `(or ,e (null)))) + +(peg-add-method normalize if (&rest args) + `(if ,(peg-normalize `(and . ,args)))) + +(peg-add-method normalize not (&rest args) + `(not ,(peg-normalize `(and . ,args)))) + +(peg-add-method normalize \` (form) + (peg-normalize `(stack-action ,form))) + +(peg-add-method normalize stack-action (form) + (unless (member '-- form) + (error "Malformed stack action: %S" form)) + (let ((args (cdr (member '-- (reverse form)))) + (values (cdr (member '-- form)))) + (let ((form `(let ,(mapcar (lambda (var) `(,var (pop peg-stack))) args) + ,@(mapcar (lambda (val) `(push ,val peg-stack)) values)))) + `(action ,form)))) + +(defvar peg-char-classes + '(ascii alnum alpha blank cntrl digit graph lower multibyte nonascii print + punct space unibyte upper word xdigit)) + +(peg-add-method normalize set (&rest specs) + (cond ((null specs) '(fail)) + ((and (null (cdr specs)) + (let ((range (peg-range-designator (car specs)))) + (and range `(range ,(car range) ,(cdr range)))))) + (t + (let ((chars '()) (ranges '()) (classes '())) + (while specs + (let* ((spec (pop specs)) + (range (peg-range-designator spec))) + (cond (range + (push range ranges)) + ((peg-characterp spec) + (push spec chars)) + ((stringp spec) + (setq chars (append (reverse (append spec ())) chars))) + ((memq spec peg-char-classes) + (push spec classes)) + (t (error "Invalid set specifier: %S" spec))))) + (setq ranges (reverse ranges)) + (setq chars (delete-dups (reverse chars))) + (setq classes (reverse classes)) + (cond ((and (null ranges) + (null classes) + (cond ((null chars) '(fail)) + ((null (cdr chars)) `(char ,(car chars)))))) + (t `(set ,ranges ,chars ,classes))))))) + +(defun peg-range-designator (x) + (and (symbolp x) + (let ((str (symbol-name x))) + (and (= (length str) 3) + (eq (aref str 1) ?-) + (< (aref str 0) (aref str 2)) + (cons (aref str 0) (aref str 2)))))) + +;; characterp is new in Emacs 23. +(defun peg-characterp (x) + (if (fboundp 'characterp) + (characterp x) + (integerp x))) + +(peg-add-method normalize list (&rest args) + (peg-normalize + (let ((marker (make-symbol "magic-marker"))) + `(and (stack-action (-- ',marker)) + ,@args + (stack-action (-- + (let ((l '())) + (while + (let ((e (pop peg-stack))) + (cond ((eq e ',marker) nil) + ((null peg-stack) + (error "Marker not longer stack")) + (t (push e l) t)))) + l))))))) + +(peg-add-method normalize *list (&rest args) + (peg-normalize `(list (* . ,args)))) + +(peg-add-method normalize substring (&rest args) + (peg-normalize + `(and `(-- (point)) + ,@args + `(start -- (buffer-substring-no-properties start (point)))))) + +(peg-add-method normalize region (&rest args) + (peg-normalize + `(and `(-- (point)) + ,@args + `(-- (point))))) + +(peg-add-method normalize replace (pe replacement) + (peg-normalize + `(and (stack-action (-- (point))) + ,pe + (stack-action (start -- (progn + (delete-region start (point)) + (insert-before-markers ,replacement)))) + (stack-action (x --))))) + +(peg-add-method normalize quote (form) + (error "quote is reverved for future use")) + +(peg-define-method-table translate) + +;; This is the main translation function. +(defun peg-translate-exp (exp) + "Return the ELisp code to match the PE EXP." + (let ((translator (or (gethash (car exp) peg-translate-methods) + (error "No translator for: %S" (car exp))))) + (apply translator (cdr exp)))) + +(peg-add-method translate and (e1 e2) + `(and ,(peg-translate-exp e1) + ,(peg-translate-exp e2))) + +(peg-add-method translate or (e1 e2) + (let ((cp (peg-make-choicepoint))) + `(,@(peg-save-choicepoint cp) + (or ,(peg-translate-exp e1) + (,@(peg-restore-choicepoint cp) + ,(peg-translate-exp e2)))))) + +;; Choicepoints are used for backtracking. At a choicepoint we save +;; enough state, so that we can continue from there if needed. +(defun peg-make-choicepoint () + (cons (make-symbol "point") (make-symbol "thunks"))) + +(defun peg-save-choicepoint (choicepoint) + `(let ((,(car choicepoint) (point)) + (,(cdr choicepoint) peg-thunks)))) + +(defun peg-restore-choicepoint (choicepoint) + `(progn + (goto-char ,(car choicepoint)) + (setq peg-thunks ,(cdr choicepoint)))) + +;; match empty strings +(peg-add-method translate null () + `t) + +;; match nothing +(peg-add-method translate fail () + `nil) + +(peg-add-method translate eob () + '(eobp)) + +(peg-add-method translate * (e) + (let ((cp (peg-make-choicepoint))) + `(progn (while (,@(peg-save-choicepoint cp) + (cond (,(peg-translate-exp e)) + (t ,(peg-restore-choicepoint cp) + nil)))) + t))) + +(peg-add-method translate if (e) + (let ((cp (peg-make-choicepoint))) + `(,@(peg-save-choicepoint cp) + (when ,(peg-translate-exp e) + ,(peg-restore-choicepoint cp) + t)))) + +(peg-add-method translate not (e) + (let ((cp (peg-make-choicepoint))) + `(,@(peg-save-choicepoint cp) + (when (not ,(peg-translate-exp e)) + ,(peg-restore-choicepoint cp) + t)))) + +(peg-add-method translate any () + '(when (not (eobp)) + (forward-char) + t)) + +(peg-add-method translate char (c) + `(when (eq (char-after) ',c) + (forward-char) + t)) + +(peg-add-method translate set (ranges chars classes) + `(when (looking-at ',(peg-make-charset-regexp ranges chars classes)) + (forward-char) + t)) + +(defun peg-make-charset-regexp (ranges chars classes) + (when (and (not ranges) (not classes) (<= (length chars) 1)) + (error "Bug")) + (let ((rbracket (member ?\] chars)) + (minus (member ?- chars)) + (hat (member ?^ chars))) + (dolist (c '(?\] ?- ?^)) + (setq chars (delete c chars))) + (format "[%s%s%s%s%s%s]" + (if rbracket "]" "") + (if minus "-" "") + (mapconcat (lambda (x) (format "%c-%c" (car x) (cdr x))) + ranges "") + (mapconcat (lambda (c) (format "[:%s:]" c)) classes "") + (mapconcat (lambda (c) (format "%c" c)) chars "") + (if hat "^" "")))) + +(peg-add-method translate range (from to) + `(when (and (char-after) + (<= ',from (char-after)) + (<= (char-after) ',to)) + (forward-char) + t)) + +(peg-add-method translate str (str) + `(when (looking-at ',(regexp-quote str)) + (goto-char (match-end 0)) + t)) + +(peg-add-method translate call (name) + (or (gethash name peg-rules) + (error "Reference to unknown rule: %S" name)) + `(funcall ,name)) + +(peg-add-method translate action (form) + `(progn + (push (cons (point) (lambda () ,form)) peg-thunks) + t)) + +(defvar peg-stack) +(defun peg-postprocess (thunks) + "Execute \"actions\"." + (let ((peg-stack '())) + (dolist (thunk (mapcar (lambda (x) + (goto-char (car x)) + (cons (point-marker) (cdr x))) + (reverse thunks))) + (goto-char (car thunk)) + (funcall (cdr thunk))) + (list t peg-stack))) + +;; Left recursion is presumably a common mistate when using PEGs. +;; Here we try to detect such mistakes. Essentailly we traverse the +;; graph as long as we can without consuming input. When we find a +;; recursive call we signal an error. + +(defun peg-check-cycles (peg-rules) + (maphash (lambda (name exp) + (peg-detect-cycles exp (list name)) + (dolist (node (peg-find-star-nodes exp)) + (peg-detect-cycles node '()))) + peg-rules)) + +(defun peg-find-star-nodes (exp) + (let ((type (car exp))) + (cond ((memq type peg-leaf-types) '()) + (t (let ((kids (apply #'append + (mapcar #'peg-find-star-nodes (cdr exp))))) + (if (eq type '*) + (cons exp kids) + kids)))))) + +(peg-define-method-table detect-cycles) + +(defun peg-detect-cycles (exp path) + "Signal an error on a cycle. +Otherwise traverse EXP recursively and return T if EXP can match +without consuming input. Return nil if EXP definetly consumes +input. PATH is the list of rules that we have visited so far." + (apply (or (gethash (car exp) peg-detect-cycles-methods) + (error "No detect-cycle method for: %S" exp)) + path (cdr exp))) + +(peg-add-method detect-cycles call (path name) + (cond ((member name path) + (error "Possible left recursion: %s" + (mapconcat (lambda (x) (format "%s" x)) + (reverse (cons name path)) " -> "))) + (t + (peg-detect-cycles (gethash name peg-rules) (cons name path))))) + +(peg-add-method detect-cycles and (path e1 e2) + (and (peg-detect-cycles e1 path) + (peg-detect-cycles e2 path))) + +(peg-add-method detect-cycles or (path e1 e2) + (or (peg-detect-cycles e1 path) + (peg-detect-cycles e2 path))) + +(peg-add-method detect-cycles * (path e) + (when (peg-detect-cycles e path) + (error "Infinite *-loop: %S matches empty string" e)) + t) + +(peg-add-method detect-cycles if (path e) (peg-unary-nullable e path)) +(peg-add-method detect-cycles not (path e) (peg-unary-nullable e path)) + +(defun peg-unary-nullable (exp path) + (peg-detect-cycles exp path) + t) + +(peg-add-method detect-cycles any (path) nil) +(peg-add-method detect-cycles char (path c) nil) +(peg-add-method detect-cycles set (path r c k) nil) +(peg-add-method detect-cycles range (path c1 c2) nil) +(peg-add-method detect-cycles str (path s) (equal s "")) +(peg-add-method detect-cycles null (path) t) +(peg-add-method detect-cycles fail (path) nil) +(peg-add-method detect-cycles eob (path) t) +(peg-add-method detect-cycles action (path form) t) + +;;; Tests: + +(defmacro peg-parse-string (rules string) + `(with-temp-buffer + (insert ,string) + (goto-char (point-min)) + (peg-parse . ,rules))) + +;; We can't expand the macro at compile time, because it needs helper +;; functions which aren't available yet. Delay the expansion to +;; load-time (or later). +(eval '(progn " +(" ;<-- this stops Emacs from indenting the next form + +(defun peg-test () + (interactive) + (assert (peg-parse-string ((s "a")) "a")) + (assert (not (peg-parse-string ((s "a")) "b"))) + (assert (peg-parse-string ((s (not "a"))) "b")) + (assert (not (peg-parse-string ((s (not "a"))) "a"))) + (assert (peg-parse-string ((s (if "a"))) "a")) + (assert (not (peg-parse-string ((s (if "a"))) "b"))) + (assert (peg-parse-string ((s "ab")) "ab")) + (assert (not (peg-parse-string ((s "ab")) "ba"))) + (assert (not (peg-parse-string ((s "ab")) "a"))) + (assert (peg-parse-string ((s (range ?0 ?9))) "0")) + (assert (not (peg-parse-string ((s (range ?0 ?9))) "a"))) + (assert (peg-parse-string ((s [0-9])) "0")) + (assert (not (peg-parse-string ((s [0-9])) "a"))) + (assert (not (peg-parse-string ((s [0-9])) ""))) + (assert (peg-parse-string ((s (any))) "0")) + (assert (not (peg-parse-string ((s (any))) ""))) + (assert (peg-parse-string ((s (eob))) "")) + (assert (peg-parse-string ((s (not (eob)))) "a")) + (assert (peg-parse-string ((s (or "a" "b"))) "a")) + (assert (peg-parse-string ((s (or "a" "b"))) "b")) + (assert (not (peg-parse-string ((s (or "a" "b"))) "c"))) + (assert (peg-parse-string ((s (and "a" "b"))) "ab")) + (assert (peg-parse-string ((s (and "a" "b"))) "abc")) + (assert (not (peg-parse-string ((s (and "a" "b"))) "ba"))) + (assert (peg-parse-string ((s (and "a" "b" "c"))) "abc")) + (assert (peg-parse-string ((s (* "a") "b" (eob))) "b")) + (assert (peg-parse-string ((s (* "a") "b" (eob))) "ab")) + (assert (peg-parse-string ((s (* "a") "b" (eob))) "aaab")) + (assert (not (peg-parse-string ((s (* "a") "b" (eob))) "abc"))) + (assert (peg-parse-string ((s "")) "abc")) + (assert (peg-parse-string ((s "" (eob))) "")) + (assert (peg-parse-string ((s (opt "a") "b")) "abc")) + (assert (peg-parse-string ((s (opt "a") "b")) "bc")) + (assert (not (peg-parse-string ((s (or))) "ab"))) + (assert (peg-parse-string ((s (and))) "ab")) + (assert (peg-parse-string ((s (and))) "")) + (assert (peg-parse-string ((s ["^"])) "^")) + (assert (peg-parse-string ((s ["^a"])) "a")) + (assert (peg-parse-string ((s ["-"])) "-")) + (assert (peg-parse-string ((s ["]-"])) "]")) + (assert (peg-parse-string ((s ["^]"])) "^")) + (assert (peg-parse-string ((s [alpha])) "z")) + (assert (not (peg-parse-string ((s [alpha])) "0"))) + (assert (not (peg-parse-string ((s [alpha])) ""))) + (assert (not (peg-parse-string ((s ["][:alpha:]"])) "z"))) + (assert (equal (peg-parse-string ((s `(-- 1 2))) "") '(t (2 1)))) + (assert (equal (peg-parse-string ((s `(-- 1 2) `(a b -- a b))) "") + '(t (2 1)))) + (assert (equal (peg-parse-string ((s (list x y)) + (x `(-- 1)) + (y `(-- 2))) + "") + '(t ((1 2))))) + (assert (equal (peg-parse-string ((s (list (* x))) + (x "x" `(-- 'x))) + "xxx") + '(t ((x x x))))) + (assert (equal (peg-parse-string ((s (region (* x))) + (x "x" `(-- 'x))) + "xxx") + '(t (4 x x x 1)))) + (assert (equal (peg-parse-string ((s (region (list (* x)))) + (x "x" `(-- 'x 'y))) + "xxx") + '(t (4 (x y x y x y) 1)))) + (assert (equal (with-temp-buffer + (save-excursion (insert "abcdef")) + (list + (peg-parse (x "a" + (replace "bc" "x") + (replace "de" "y") + "f")) + (buffer-string))) + '((t nil) "axyf"))) + ) + +(when (featurep 'cl) + (peg-test)) + +;;; Examples: + +;; peg-ex-recognize-int recognizes integers. An integer begins with a +;; optional sign, then follows one or more digits. Digits are all +;; characters from 0 to 9. +;; +;; Notes: +;; 1) "" matches the empty sequence, i.e. matches without +;; consuming input. +;; 2) [0-9] is the character range from 0 to 9. This can also be +;; written as (range ?0 ?9). [Note that 0-9 is a symbol.] +(defun peg-ex-recognize-int () + (peg-parse (number sign digit (* digit)) + (sign (or "+" "-" "")) + (digit [0-9]))) + +;; peg-ex-parse-int recognizes integers and computes the corresponding +;; value. The grammer is the same as for `peg-ex-recognize-int' added +;; with parsing actions. Unfortunaletly, the actions add quite a bit +;; of clutter. +;; +;; The action for the sign rule pushes t on the stack for a minus sign +;; and nil for plus or no sign. +;; +;; The action for the digit rule pushes the value for a single digit. +;; +;; The action `(a b -- (+ (* a 10) b)), takes two items from the stack +;; and pushes the first digit times 10 added to second digit. +;; +;; The action `(minus val -- (if minus (- val) val)), negates the +;; value if the minus flag is true. +(defun peg-ex-parse-int () + (peg-parse (number sign + digit + (* digit `(a b -- (+ (* a 10) b))) + `(minus val -- (if minus (- val) val))) + (sign (or (and "+" `(-- nil)) + (and "-" `(-- t)) + (and "" `(-- nil)))) + (digit [0-9] `(-- (- (char-before) ?0))))) + +;; Put point after the ) and press C-x C-e +;; (peg-ex-parse-int)-234234 + +;; Parse arithmetic expressions and compute the result as side effect. +(defun peg-ex-arith () + (peg-parse + (expr (or (and ws sum eol) + (and (* (not eol) (any)) eol error))) + (sum product (* (or (and plus product `(a b -- (+ a b))) + (and minus product `(a b -- (- a b)))))) + (product value (* (or (and times value `(a b -- (* a b))) + (and divide value `(a b -- (/ a b)))))) + (value (or (and (substring number) `(string -- (string-to-number string))) + (and open sum close))) + (number (+ [0-9]) ws) + (plus "+" ws) + (minus "-" ws) + (times "*" ws) + (divide "/" ws) + (open "(" ws) + (close ")" ws) + (ws (* (or " " "\t"))) + (eol (or "\n" "\r\n" "\r")) + (error (action (error "Parse error at: %s" (point)))))) + +;; (peg-ex-arith) 1 + 2 * 3 * (4 + 5) + +;; Parse URI according to RFC 2396. +(defun peg-ex-uri () + (peg-parse + (URI-reference (or absoluteURI relativeURI) + (or (and "#" (substring fragment)) + `(-- nil)) + `(scheme user host port path query fragment -- + (list :scheme scheme :user user + :host host :port port + :path path :query query + :fragment fragment))) + (absoluteURI (substring scheme) ":" (or hier-part opaque-part)) + (hier-part ;(-- user host port path query) + (or net-path + (and `(-- nil nil nil) + abs-path)) + (or (and "?" (substring query)) + `(-- nil))) + (net-path "//" authority (or abs-path `(-- nil))) + (abs-path "/" path-segments) + (path-segments segment (*list "/" segment) `(s l -- (cons s l))) + (segment (substring (* pchar) (* ";" param))) + (param (* pchar)) + (pchar (or unreserved escaped [":@&=+$,"])) + (query (* uric)) + (fragment (* uric)) + (relativeURI (or net-path abs-path rel-path) (opt "?" query)) + (rel-path rel-segment (opt abs-path)) + (rel-segment (+ unreserved escaped [";@&=+$,"])) + (authority (or server reg-name)) + (server (or (and (or (and (substring userinfo) "@") + `(-- nil)) + hostport) + `(-- nil nil nil))) + (userinfo (* (or unreserved escaped [";:&=+$,"]))) + (hostport (substring host) (or (and ":" (substring port)) + `(-- nil))) + (host (or hostname ipv4address)) + (hostname (* domainlabel ".") toplabel (opt ".")) + (domainlabel alphanum + (opt (* (or alphanum "-") (if alphanum)) + alphanum)) + (toplabel alpha + (* (or alphanum "-") (if alphanum)) + alphanum) + (ipv4address (+ digit) "." (+ digit) "." (+ digit) "." (+ digit)) + (port (* digit)) + (scheme alpha (* (or alpha digit ["+-."]))) + (reg-name (or unreserved escaped ["$,;:@&=+"])) + (opaque-part uric-no-slash (* uric)) + (uric (or reserved unreserved escaped)) + (uric-no-slash (or unreserved escaped [";?:@&=+$,"])) + (reserved (set ";/?:@&=+$,")) + (unreserved (or alphanum mark)) + (escaped "%" hex hex) + (hex (or digit [A-F] [a-f])) + (mark (set "-_.!~*'()")) + (alphanum (or alpha digit)) + (alpha (or lowalpha upalpha)) + (lowalpha [a-z]) + (upalpha [A-Z]) + (digit [0-9]))) + +;; (peg-ex-uri)file:/bar/baz.html?foo=df#x +;; (peg-ex-uri)http://lu...@www.foo.com:8080/bar/baz.html?x=1#foo + +;; Parse a lisp style Sexp. +;; [To keep the example short, ' and . are handled as ordinary symbol.] +(defun peg-ex-lisp () + (peg-parse + (sexp _ (or string list number symbol)) + (_ (* (or [" \n\t"] comment))) + (comment ";" (* (not (or "\n" (eob))) (any))) + (string "\"" (substring (* (not "\"") (any))) "\"") + (number (substring (opt (set "+-")) (+ digit)) + (if terminating) + `(string -- (string-to-number string))) + (symbol (substring (and symchar (* (not terminating) symchar))) + `(s -- (intern s))) + (symchar [a-z A-Z 0-9 "-;!#%&'*+,./:;<=>?...@[]^_`{|}~"]) + (list "(" `(-- (cons nil nil)) `(hd -- hd hd) + (* sexp `(tl e -- (setcdr tl (list e))) + ) _ ")" `(hd tl -- (cdr hd))) + (digit [0-9]) + (terminating (or (set " \n\t();\"'") (eob))))) + +;; (peg-ex-lisp) + +;; We try to detect left recursion and report it as error. +(defun peg-ex-left-recursion () + (eval '(peg-parse (exp (or term + (and exp "+" exp))) + (term (or digit + (and term "*" term))) + (digit [0-9])))) + +(defun peg-ex-infinite-loop () + (eval '(peg-parse (exp (* (or "x" + "y" + (action (foo)))))))) + +)) ; end of eval-when-load + +(provide 'peg) + +;;; peg.el ends here