(font (nil nil unicode-bmp :otf=orya=rphf)))
(category
- ;; C: consonant (excluding Y and R)
- ;; R: consonant RA (reph, below)
+ ;; C: consonant (except for R, B and Y)
+ ;; R: consonant RA
;; B: consonant (below)
- ;; Y: consonant YA, YYA (post)
+ ;; Y: consonant (post)
;; n: NUKTA
;; H: HALANT
- ;; m: MATRA (pre)
- ;; u: MATRA (above)
- ;; b: MATRA (below)
- ;; p: MATRA (post)
- ;; t: MATRA (two-part)
+ ;; m: vowel sign (pre)
+ ;; u: vowel sign (above)
+ ;; b: vowel sign (below)
+ ;; p: vowel sign (post)
+ ;; t: vowel sign (two-part)
;; A: vowel modifier (above)
;; a: vowel modifier (post)
;; V: independent vowel
;; N: ZWNJ (ZERO WIDTH NON-JOINER)
;; J: ZWJ (ZERO WIDTH JOINER)
- ;; E: ELSE
- ;;
- (0x200C ?N) ; ZWNJ
- (0x200D ?J) ; ZWJ
- (0x0664 0x0665 ?E) ; DANDA, DOUBLE DANDA
- (0x0B00 0x0B7F ?E) ; ELSE
- (0x0B01 ?A) ; SIGN CANDRABINDU (above)
- (0x0B02 0x0B03 ?a) ; SIGN ANUSWAR, VISARGA (post)
- (0x0B05 0x0B0C ?V) ; LETTER A .. VOCALIC L
- (0x0B0F 0x0B10 ?V) ; LETTER E .. AI
- (0x0B13 0x0B14 ?V) ; LETTER O .. AU
+ ;; X: generic
+ ;; Z: internal use
+ (0x0B00 0x0B7F ?X) ; generic
+ (0x0B00 ?Z) ; internal use
+ (0x0B01 0x0B03 ?a) ; SIGN CANDRABINDU .. VISARGA
+ (0x0B05 0x0B14 ?V) ; LETTER A .. VOCALIC AU
(0x0B15 0x0B39 ?C) ; LETTER KA .. HA
(0x0B24 ?B) ; LETTER TA
(0x0B28 ?B) ; LETTER NA
- (0x0B2C ?B) ; LETTER BA
- (0x0B2D ?B) ; LETTER BHA
- (0x0B2E ?B) ; LETTER MA
+ (0x0B2C 0x0B2E ?B) ; LETTER BA .. MA
(0x0B2F ?Y) ; LETTER YA
(0x0B30 ?R) ; LETTER RA
- (0x0B32 ?B) ; LETTER LA
+ (0x0B32 0x0B33 ?B) ; LETTER LA
(0x0B33 ?B) ; LETTER LLA
- (0x0B35 ?B) ; LETTER VA
(0x0B3C ?n) ; SIGN NUKTA
- (0x0B3E ?p) ; VOWEL SIGN AA (post)
- (0x0B3F ?u) ; VOWEL SIGN I (above)
- (0x0B40 ?p) ; VOWEL SIGN II (post)
- (0x0B41 0x0B43 ?b) ; VOWEL SIGN U, UU, R (below)
- (0x0B47 ?m) ; VOWEL SIGN E (pre)
- (0x0B48 0x0B4C ?t) ; VOWEL SIGN AI, O, AU (two-part)
- (0x0B4D ?H) ; SIGN VIRAMA (HALANT)
+ (0x0B3E ?p) ; VOWEL SIGN AA
+ (0x0B3F ?u) ; VOWEL SIGN I
+ (0x0B40 ?p) ; VOWEL SIGN II
+ (0x0B41 0x0B43 ?b) ; VOWEL SIGN U .. VOCALIC R
+ (0x0B47 ?m) ; VOWEL SIGN E
+ (0x0B48 0x0B4C ?t) ; VOWEL SIGN AI .. AU
+ (0x0B4D ?H) ; SIGN VIRAMA
(0x0B56 ?u) ; AI LENGTH MARK
(0x0B57 ?p) ; AU LENGTH MARK
- (0x0B5C 0x0B5D ?C) ; LETTER RRA, RHA
+ (0x0B5C 0x0B5D ?C) ; LETTER RRA .. RHA
(0x0B5F ?Y) ; LETTER YYA
- (0x0B60 0x0B61 ?V) ; LETTER VOCALIC RR, LL
+ (0x0B60 0x0B61 ?V) ; LETTER VOCALIC RR .. LL
(0x0B71 ?C) ; LETTER WA
- (0x0B7E ?x) ; mark #1 (internal use)
- (0x0B7F ?y) ; mark #2 (internal use)
+ (0x0B64 0x0B65 ?X) ; DANDA .. DOUBLE DANDA
+ (0x200C ?N) ; ZWNJ
+ (0x200D ?J) ; ZWJ
)
-;; Step 1 : Syllable identification. Recognised syllables are quoted
-;; by the pseudo character, which is generated by the command "|" and
-;; has the category " " (space).
+;; Decompose two-part vowel signs.
+;; Move ZWJ before the consonant.
(generator
(0
(cond
- ;; Case F : Syllables containing an independent vowel.
- ("(RH)?(V)(a)?(A)?"
- < |
- (2 =)
- (1 = =)
- (3 =)
- (4 =)
- | >)
-
- ;; Case A-C are for those syllables that end with an explicit vowel
- ;; mark and/or a vowel modifier. They are divided into three cases
- ;; for readability of the regular expressions. The leading
- ;; consonant-Halant repetition is analysed for reordering in the
- ;; next step. A two-part vowel, if any, is split for
- ;; canonicalisation.
+ ((0x0B48)
+ 0x0B47 0x0B56)
+ ((0x0B4B)
+ 0x0B47 0x0B3E)
+ ((0x0B4C)
+ 0x0B47 0x0B57)
+ ("(Cn?)(J)"
+ (2 =) (1 = *))
+ ("." =))
+ *))
- ;; Case A : A syllable ending with a vowel modifier.
- ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbup]*)(t)?([Aa])"
+;; Syllable identification and reordering.
+;; Do not apply 'rphf' if a syllable begins with ZWJ.
+(generator
+ (0
+ (cond
+ ;; A syllable with ZWJ and a pre-base vowel sign.
+ ;;1 23 4 5 6 7
+ ("(J)(([CRBY]n?H)*[CRBY]n?)(m)(u)?(p)?(a)?"
+ < | (1 =) (4 =) (2 pre-below) (5 =) (2 post) (6 =) (7 =) | >)
+
+ ;; A syllable with ZWJ and a non-pre-base vowel sign.
+ ;;1 23 45 6 7
+ ("(J)(([CRBY]n?H)*[CRBY]n?)(([bu])|(p))(a)?"
+ < | (1 =) (2 pre-below) (5 =) (2 post) (6 =) (7 =) | >)
+
+ ;; A syllable with ZWJ and a vowel modifier, but without vowel signs.
+ ;;1 23 4
+ ("(J)(([CRBY]n?H)*[CRBY]n?)(a)"
+ < | (1 =) (2 pre-below) (2 post) (4 =) | >)
+
+ ;; Add a ZWNJ explicitly when a syllable ends with a halant.
+ ;;1 23 4 5
+ ("(J)(([CRBY]n?H)*[CRBY]n?)(H)?(N)?"
+ < | (1 =) (2 pre-below) (4 = 0x200C) (2 post) | >)
+
+ ;; With a pre-base vowel sign, without a ZWJ.
+ ;;1 23 4 5 6 7
+ ("(RH)?(([CRBY]n?H)*[CRBY]n?)(m)(u)?(p)?(a)?"
< |
- (1 = =)
- (2 set-marks)
- (5 = *)
- (6 split)
- (7 =)
+ (4 =) (2 pre-below) (5 =) (1 otf:orya=rphf) (2 post) (6 =) (7 =)
| >)
- ;; Case B : A syllable ending with a two-part vowel.
- ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))(t)"
- < |
- (1 = =)
- (2 set-marks)
- (5 split)
- | >)
+ ;; With a non-pre-base vowel sign, without a ZWJ.
+ ;; 1 23 45 6 7
+ ("(RH)?(([CRBY]n?H)*[CRBY]n?)(([bu])|(p))(a)?"
+ < | (2 pre-below) (5 =) (1 otf:orya=rphf) (2 post) (6 =) (7 =) | >)
- ;; Case C : A syllable ending with other vowel(s). Note that a
- ;; two-part vowel may be expressed with two vowel marks for
- ;; backward compatibility.
- ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbup]+)"
- < |
- (1 = =)
- (2 set-marks)
- (5 = *)
- | >)
+ ;; With a vowel modifier, without vowel signs and a ZWJ.
+ ;;1 23 4
+ ("(RH)?(([CRBY]n?H)*[CRBY]n?)(a)"
+ < | (2 pre-below) (1 otf:orya=rphf) (2 post) (4 =) | >)
- ;; Case E : No explicit vowel nor modifier. If the syllable ends
- ;; with a consonant, analyse it for reordering in the next step.
- ;; Otherwise, just identify the syllable without changing anything.
- ;;1 23 4
- ("(RH)?(([CRBY]n?H[NJ]?)*[CRBY]n?)(HN|HJ|H)?"
- < |
- (1 = =)
- (2 set-marks)
- (4 = *)
- | >)
+ ;; Add a ZWNJ explicitly when a syllable ends with a halant.
+ ;;1 23 4 5
+ ("(RH)?(([CRBY]n?H)*[CRBY]n?)(H)?(N)?"
+ < | (2 pre-below) (1 otf:orya=rphf) (4 = 0x200C) (2 post) | >)
+
+ ;; A syllable starting with an independent vowel.
+ ("Va?"
+ < | = * | >)
("." =))
*)
- ;; Set mark #1 (x) at the position where below consonants begin, and
- ;; mark #2 (y) at the position to which below and above signs will be
- ;; moved.
- (set-marks
+ ;; Move a halant after the base consonant to the end.
+ ;; Fill the resulting gap with a special mark.
+ ;; Remove post-base parts.
+ (pre-below
(cond
- ;; Ending with Y.
- ;;1 2 3 45 6
- ("([CRBY]n?(H[NJ]?Cn?)*)(H)(([RB]H)*)(Y)$"
- (1 = *) ; prebase & base
- 0x0B7E ; below begin
- (4 = *) ; below consonants
- 0x0B7F ; below end
- (6 =) ; YA
- (3 =)) ; moved HALANT
- ;; Ending with R or B.
- ;;1 2 3 45
- ("([CRBY]n?(H[NJ]?Cn?)*)(H)(([RB]H)*[RB])$"
- (1 = *) ; prebase & base
- 0x0B7E ; below begin
- (4 = *) ; below consonants
- (3 =) ; moved HALANT
- 0x0B7F) ; below end
- (".+"
- = *
- 0x0B7E ; below begin
- 0x0B7F))) ; below end
-
- ;; Split two-part dependent vowel signs for canonicalisation.
- (split
+ ("([CRBYnH]*[CYn])H([RBH]+)[YH]+$"
+ (1 = *) 0x0B00 (2 = *))
+ ("([CRBYnH]*[CYn])(H)([RBH]+)$"
+ (1 = *) 0x0B00 (3 = *) (2 =))
+ ("([CRBYnH]*[Cn])[YH]*$"
+ (1 = *) 0x0B00)
+ ("([RB]n?)H([RBH]*)[YH]+$"
+ (1 = *) 0x0B00 (2 = *))
+ ("([RB]n?)(H)([RBH]*)$"
+ (1 = *) 0x0B00 (3 = *) (2 =))
+ ("([RBY]n?)[YH]*$"
+ (1 = *) 0x0B00)))
+
+ ;; Extract post-base parts and add a halant at the end.
+ ;; Produce nothing if there are no post-base parts.
+ (post
(cond
- ((0x0B48) 0x0B47 0x0B56)
- ((0x0B4B) 0x0B47 0x0B3E)
- ((0x0B4C) 0x0B47 0x0B57)))
+ ("[CRBYnH]*[CRBn]H([YH]+)$"
+ (1 = *) 0x0B4D)
+ ("Yn?H(YH)+$"
+ (1 = *) 0x0B4D)
+ (".+"
+ )))
)
-;; Step 2 : Move Reph and Matra if necessary. From now on, we care
-;; only for those syllables that have been identified in Step 1.
+;; Apply language forms to concerning segments.
(generator
(0
(cond
- ;; Special case: a single consonant and a Halant.
- (" (.)xy(H[NJ]?) "
- |
- (1 =)
- (2 = *)
- |)
-
- ;; This is the most generic pattern. It follows Case A-C and a
- ;; part of Case E in Step 1. Now Mark #1 is used to indicate the
- ;; critical part that requires pre-base substitution in the
- ;; following steps.
-
- ;; 1 2 3 4 5 6 7 8 9 10 11
- (" (RH)?([^ xy]+)x([^ y]*)y(YH)?(m)?(b)?(u)?(p)?(A)?(a)?(HN|HJ|H)? "
- |
- (5 =) ; [Mpre]
- ;; We can safely perform Nukta composition here because it does
- ;; not affect surrounding letters in the syllable. The Akhand
- ;; ligature operation is also applied here, before applying the
- ;; half form operation because the Utkal font generates Akhand
- ;; ligatures directly from the "C H C" sequence, not via the half
- ;; form.
- 0x0B7E ; begin Cpre & Cbase
- (2 otf:orya=nukt,akhn+) ; {Cpre + H} + Cbase
- 0x0B7E ; end Cpre & Cbase
- (3 otf:orya=blwf+) ; {Cbelow + H}
- (6 =) ; [Mbelow]
- (7 =) ; [Mabove]
- (1 otf:orya=rphf+) ; [Reph]
- (4 otf:orya=pstf+) ; [Cpost + H]
- (8 =) ; [Mpost]
- (9 =) ; [VMabove]
- (10 =) ; [VMpost]
- (11 = *) ; optional HALANT
- |)
+ ;; If a syllable contains a ZWNJ, render the preceding halant explicitly.
+ (" ([^Z]+)(Z)([^N]*)(HN)([^ ]*) "
+ | (1 otf:orya=nukt,akhn,half+) (2 =) (3 otf:orya=blwf+) (4 = =)
+ (5 otf:orya=pstf+) |)
- ;; Syllables that begin with an independent vowel (following up
- ;; Step 1, Case F). Syllables of this type do not require further
- ;; modification.
- (" (V)(RH)(.*) "
- |
- (1 =)
- (2 otf:orya=rphf+)
- (3 = *)
- |)
+ (" (J?m?)([^Z]+)(Z)([^ ]*) "
+ | (1 = *) (2 otf:orya=nukt,akhn,half+) (3 =) (4 otf:orya=blwf,pstf+) |)
("." =))
*))
-;; Step 3 : Now only those syllables that contain the pseudo character
-;; x require pre-base substition. Unlike the Mukti font for Bengali,
-;; the Utkal font can produce the ligature for "C1 H C2" from
-;; "C1halant" and "C2". If such a ligature is not available, we get a
-;; sequence consisting of "C1halant" and "C2", which is satisfactory.
-
+;; Apply 'pres' to get pre-base conjuncts.
(generator
(0
(cond
- (" (.H)J "
- |
- (1 otf:orya=half+)
- |)
- (" (.H)N? "
- |
- (1 otf:orya=haln+)
- |)
- (" ([^x ]?x)([^x ]*)(x[^ ]*) "
- |
- (1 = *)
- (2 pres)
- (3 = *)
- |)
+ (" (J?m?)([^Z]+)(Z)([^ ]*) "
+ | (1 = *) (2 otf:orya=pres+) (3 =) (4 = *) |)
("." =))
- *)
-
- (pres
- (cond
- ("([^NJ]*)(.H)J(.*)"
- (1 otf:orya=haln,pres+)
- (2 otf:orya=half+)
- (3 pres))
- ("([^N]*)(H)N(.*)"
- (1 otf:orya=haln,pres+)
- (2 =)
- (3 pres))
- (".*"
- otf:orya=haln,pres+)))
-
- )
-
-;; Step 4 : Mpre/Cpre reordering. If the pre-base substitution in
-;; the previous step results in more than one glyph, and there is an
-;; Mpre in this syllable, then move the Mpre before the Cbase.
-;; i.e. [Mpre]{Kh}Kf... -> {Kh}[Mpre]Kf...
+ *))
+;; When the number of glyphs between a pre-base vowel sign and the
+;; post-base mark is more than one, move the pre-base vowel sign
+;; before the base glyph.
(generator
(0
(cond
- (" ([^x ])x([^x ]+)([^x ])x([^x ]*) "
- |
- (2 = *)
- (1 =)
- (3 =)
- (4 = *)
- |)
- (" ([^x ])?x([^x ]*)x([^ ]*) "
- |
- (1 =)
- (2 = *)
- (3 = *)
- |)
+ (" (J)?(m)([^Z]+)([^Z])Z([^N ]*)N?([^ ]*) "
+ | (1 =) (3 = *) (2 =) (4 =) (5 = *) (6 = *)|)
+ (" ([^Z]+)Z([^N ]*)N?([^ ]*) "
+ | (1 = *) (2 = *) (3 = *) |)
("." =))
*))
-;; Step 5 : Substitutions & positioning.
-
+;; Apply other features.
+;; Do not apply 'vatu' and 'blws' if there is a ZWJ.
+;; The 'pres' feature is applied again for pre-base vowel sign.
(generator
(0
(cond
- (" ([^ ]*) "
- ;; FIXME : The pres below is for the TTA ligature in the Utkal
- ;; font. It should be removed once the font is updated.
- (1 otf:orya=vatu,abvs,blws,psts,pres))
+ (" J([^ ]+) "
+ (1 otf:orya=pres,abvs,pstp,haln))
+ (" ([^ ]+) "
+ (1 otf:orya=vatu,pres,abvs,blws,pstp,haln))
("."
[ otf:orya=+ ]))
*))