;; R: consonant RA
;; n: NUKTA
;; H: HALANT
- ;; m: MATRA (pre)
- ;; u: MATRA (above)
- ;; b: MATRA (below)
- ;; p: MATRA (post)
+ ;; m: vowel sign (pre)
+ ;; u: vowel sign (above)
+ ;; b: vowel sign (below)
+ ;; p: vowel sign (post)
;; A: vowel modifier (above)
;; a: vowel modifier (post)
- ;; S: stress sign (above)
- ;; s: stress sign (below)
+ ;; S: stress sign / accent (above)
+ ;; s: stress sign / accent (below)
;; V: independent vowel
- ;; N: ZWNJ (ZERO WIDTH NON-JOINER)
- ;; J: ZWJ (ZERO WIDTH JOINER)
- ;; E: ELSE
- ;;
- (#x0900 #x097F ?E) ; ELSE
- (#x0901 ?A) ; SIGN CANDRABINDU (above)
- (#x0902 ?A) ; SIGN ANUSVARA (above)
- (#x0903 ?a) ; SIGN VISARGA (post)
- (#x0905 #x0914 ?V) ; LETTER A .. LETTER AU
- (#x0915 #x0939 ?C) ; LETTER KA .. LETTER HA
+ ;; N: ZWNJ
+ ;; J: ZWJ
+ ;; X: generic
+ (#x0900 #x097F ?X) ; generic
+ (#x0901 ?A) ; SIGN CANDRABINDU
+ (#x0902 ?A) ; SIGN ANUSVARA
+ (#x0903 ?a) ; SIGN VISARGA
+ (#x0904 #x0914 ?V) ; LETTER SHORT A .. AU
+ (#x0915 #x0939 ?C) ; LETTER KA .. HA
(#x0930 ?R) ; LETTER RA
(#x093C ?n) ; SIGN NUKTA
- (#x093D ?E) ; SIGN AVAGRAHA
- (#x093E #x094C ?p) ; VOWEL SIGN (post)
- (#x093F ?m) ; VOWEL SIGN I (pre)
- (#x0941 #x0944 ?b) ; VOWEL SIGN (below)
- (#x0945 #x0948 ?u) ; VOWEL SIGN (above)
- (#x094D ?H) ; SIGN VIRAMA (HALANT)
- (#x0950 ?E) ; OM
- (#x0951 #x0954 ?S) ; STRESS SIGN or TONE MARK (above)
- (#x0952 ?s) ; STRESS SIGN or TONE MARK (below)
- (#x0958 #x095E ?C) ; LETTER QA .. LETTER YYA
- (#x0960 ?V) ; LETTER VOCALIC RR
- (#x0961 ?V) ; LETTER VOCALIC LL
- (#x0962 #x0963 ?b) ; VOWEL SIGN (below)
- (#x0964 #x0970 ?E) ; DANDA .. ABBREVIATION SIGN
+ (#x093E ?p) ; VOWEL SIGN AA
+ (#x093F ?m) ; VOWEL SIGN I
+ (#x0940 ?p) ; VOWEL SIGN II
+ (#x0941 #x0944 ?b) ; VOWEL SIGN UU .. VOCALIC RR
+ (#x0945 #x0948 ?u) ; VOWEL SIGN CANDRA E .. AI
+ (#x0949 #x094C ?p) ; VOWEL SIGN CANDRA O .. AU
+ (#x094D ?H) ; SIGN VIRAMA
+ (#x0951 ?S) ; STRESS SIGN UDATTA
+ (#x0952 ?s) ; STRESS SIGN ANUDATTA
+ (#x0953 #x0954 ?S) ; GRAVE ACCENT .. ACUTE ACCENT
+ (#x0958 #x095F ?C) ; LETTER QA .. YYA
+ (#x0960 #x0961 ?V) ; LETTER VOCALIC RR .. VOCALIC LL
+ (#x0962 #x0963 ?b) ; VOWEL SIGN VOCALIC L .. VOCALIC LL
+ (#x097B #x097C ?C) ; LETTER GGA .. JJA
+ (#x097E #x097F ?C) ; LETTER DDDA .. BBA
(#x200C ?N) ; ZWNJ
(#x200D ?J) ; ZWJ
- (#x097D ?x) ; internally used tag to indicate Reph
- (#x097E ?y) ; internally used tag to indicate Mpost
- (#x097F ?z) ; internally used tag to indicate Cbase
)
-;; The first stage is to extract a syllable and re-order characters in
-;; it.
+;; Reordering
(generator
(0
(cond
- ;; If [CR]H is followed by ZWNJ/ZWJ, move ZWNJ/ZWJ to the head so
- ;; that the later stages find it quickly.
- ("([CR]n?H)([NJ])"
- < | (2 =) (1 = *) | > )
-
- ;; A syllable starting with RH (Reph) and ending with a vowel
- ;; and/or a vowel modifier.
- ("(RH)(([CR]n?H)*[CR]n?)([mubp][Aa]?[Ss]?|[Aa][Ss]?|[Ss])"
- < | (1 #x097D = =) (2 move-base-Halant) (4 reorder-post-base) | >)
-
- ;; A syllable starting with RH (Reph) and ending without a vowel
- ;; nor a vowel modifier.
- ("(RH)(([CR]n?H)*[CR]n?)(H)?"
- < | (1 #x097D = =) (2 move-base-Halant) (4 =) | >)
-
- ;; A syllable starting with the other consonant and ending with a
- ;; vowel and/or a vowel modifier.
- ("(([CR]n?H)*[CR]n?)([mubp][Aa]?[Ss]?|[Aa][Ss]?|[Ss])"
- < | (1 move-base-Halant) (3 reorder-post-base) | >)
-
- ;; A syllable starting with the other consonant and ending without
- ;; a vowel nor a vowel modifier.
- ("(([CR]n?H)*[CR]n?)(H)?"
- < | (1 move-base-Halant) (3 =) | >)
+ ;; A syllable containing a vowel sign.
+ ;;1 23 45 6 7 89 A BC D
+ ("(RH)?(([CR]n?H)*[CR]n?)((m)|(b)|([up]))((A)|(a))?((S)|(s))?"
+ < |
+ (5 =) (2 move-h) (6 =) (13 =) (7 =) (1 otf:deva=rphf+) (9 =) (12 =) (10 =)
+ | >)
+
+ ;; A syllable without vowel signs but with a vowel modifier.
+ ;;1 23 45 6 78 9
+ ("(RH)?(([CR]n?H)*[CR]n?)((A)|(a))((S)|(s))?"
+ < | (2 move-h) (9 =) (1 otf:deva=rphf+) (5 =) (8 =) (6 =) | >)
+
+ ;; No vowel signs, No vowel modifiers, but with a stress sign or an accent.
+ ;;1 23 45 6
+ ("(RH)?(([CR]n?H)*[CR]n?)((S)|(s))"
+ < | (2 move-h) (6 =)(1 otf:deva=rphf+) (5 =) | >)
+
+ ;; A special case.
+ ("(RH)J"
+ < | (1 otf:deva=half+) | >)
+
+ ;; Forced half form. Do not move halant.
+ ;;1 23
+ ("(RH)?(([CR]n?H)+)J"
+ < | (2 = *) (1 otf:deva=rphf+) | >)
+
+ ;; If a syllable ends with a halant, or a halant and a ZWNJ, mark
+ ;; this syllable for the special handling in the later stages.
+ ;;1 23 4 5
+ ("(RH)?(([CR]n?H)*[CR]n?)(H)?(N)?"
+ < | (4 0x200C) (2 move-h) (1 otf:deva=rphf+) (4 =) | >)
;; A syllable starting with an independent vowel.
- ("V[Aa]?[Ss]?"
- < | = * | > )
+ ;;1 23 4 56 7
+ ("(V)((A)|(a))?((S)|(s))?"
+ < | (1 =) (7 =) (3 =) (6 =) (4 =) | >)
("." =))
*)
- ;; Move Halant on a base consonant to the tail.
- (move-base-Halant
+ ;; Move the halant that follows the base consonant to the end.
+ (move-h
(cond
- ("(([CR]n?H)*[CR]n?)(H)(R)"
- (1 = *)
- (4 =)
- (3 =))
- (".*"
- = *)))
-
- ;; Re-order post modifiers.
- (reorder-post-base
- ;; 12 3 4 5 67 8 9 10
- ("((m)|(u)|(b)|(p))?((A)|(a))?((S)|(s))?$"
- (2 =) (4 =) (10 =) (3 =) (5 =)
- #x097E
- (7 =) (9 =) (8))))
-
-;; The second stage is to reorder Reph and Mpre.
+ ;;12 3 4 56
+ ("(([CR]n?H)*(Cn?))(H)((RH)*R)$"
+ (1 = *) (5 = *) (4 =))
+ ;;1 2 34
+ ("(R)(H)((RH)*R)$"
+ (1 =) (3 = *) (2 =))
+ (".+" = *))))
+
+;; Language forms
+;; If a syllable is marked, render the final halant explicitly.
+;; Do not apply blwf nor half to the initial RA.
(generator
(0
(cond
- (" [NJ]([^ ]*) "
- = *)
- (" (x(..))([CRnH]*) "
- | (3 = *) (2 otf:deva=rphf) |)
- (" (x(..))([CRnH]*)(m?)([^y]*)y([^ ]*) "
- | (4 =) (3 = *) (5 = *) (2 otf:deva=rphf) (6 = *) |)
- (" ([CRnH][CRnH]*) "
- = *)
- (" ([CRnH][CRnH]*)(m?)([^y]*)y([^ ]*) "
- | (2 =) (1 = *) (3 = *) (4 = *) |)
+ (" N(R)([^ ]*)(H) "
+ | (1 =) (2 gsub1) (3 =) |)
+ (" (R)([^ ]*) "
+ | (1 =) (2 gsub1) |)
+ (" N([^ ]+)(H) "
+ | (1 gsub1) (2 =) |)
+ (" ([^ ]+) "
+ | (1 gsub1) |)
("." =))
- *))
+ *)
-;; The third stage is to drive OTF tables. For the moment, we use
-;; the default LangSys, and try all GSUB/GPOS features except for the
-;; sequence followed by ZWNJ in which case try "nukt" and "haln"
-;; features only.
+ (gsub1
+ otf:deva=nukt,akhn,blwf,half,vatu+))
+
+;; Other OTF features
(generator
(0
(cond
- (" N([^ ]*) "
- (1 otf:deva=nukt,haln))
-
- (" J([^ ]*) "
- (1 otf:deva))
-
- (" ([^ ]*)(CH) "
- (1 otf:deva=~rphf,*) (2 otf:deva=haln))
-
- (" ([^ ]*) "
- (1 otf:deva=~rphf,*))
-
+ (" ([^ ]+) "
+ (1 otf:deva=pres,abvs,blws,psts,haln))
("."
\[ otf:deva=+ \] ))
*))