;; b: MATRA (below)
;; p: MATRA (post)
;; t: MATRA (two-part)
- ;; U: AU LENGTH MARK
;; A: vowel modifier (above)
;; a: vowel modifier (post)
;; V: independent vowel
)
;; Step 1 : Syllable identification. Recognised syllables are quoted
-;; by the virtual character, which is generated by the command "|" and
+;; by the pseudo character, which is generated by the command "|" and
;; has the category " " (space).
(generator
(0
;; canonicalisation.
;; Case A : A syllable ending with a vowel modifier.
- ("(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbp]*)(t)?([Aa])"
+ ;;1 23 4 5 6 7
+ ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbp]*)(t)?([Aa])"
< |
- (1 set-marks)
- (4 = *)
- (5 split)
- (6 =)
+ (1 = =)
+ (2 set-marks)
+ (5 = *)
+ (6 split)
+ (7 =)
| >)
;; Case B : A syllable ending with a two-part vowel.
- ("(([CRBY]n?H[NJ]?)*([CRBY]n?))(t)"
+ ;;1 23 4 5
+ ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))(t)"
< |
- (1 set-marks)
- (4 split)
+ (1 = =)
+ (2 set-marks)
+ (5 split)
| >)
;; Case C : A syllable ending with other vowel. Note that a
;; two-part vowel may be expressed with two vowel marks for
;; backward compatibility.
- ("(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbp]+)"
+ ;;1 23 4 5
+ ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbp]+)"
< |
- (1 set-marks)
- (4 = *)
+ (1 = =)
+ (2 set-marks)
+ (5 = *)
| >)
;; Case D : Ya-phalaa. Reorder H and Y for the next step.
;; <http://www.unicode.org/faq/indic.html> says "it should be
;; permissible for the Ya-phalla to be consistently formed by "ZWNJ
;; + VIRAMA + YA".
- ("([CBRY]N)(H)(Y)"
+ ("([CRBY]n?N)(H)(Y)"
< |
- (1 = =)
+ (1 = *)
(3 =)
(2 =)
| >)
;; Case E : No explicit vowel nor modifier. If the syllable ends
;; with a consonant, analyse it for reordering in the next step.
;; Otherwise, just identify the syllable without changing anything.
- ("([CRBY]n?H[NJ]?)*[CRBY]n?(HN|HJ|H)?"
- (cond
- (".+[^HNJ]$"
- < | set-marks | >)
- (".+"
- < | = * | >)))
+ ;;1 23 4
+ ("(RH)?(([CRBY]n?H[NJ]?)*[CRBY]n?)(HN|HJ|H)?"
+ < |
+ (1 = =)
+ (2 set-marks)
+ (4 = *)
+ | >)
;; Case F : Syllables that begin with an independent vowel. An
;; optional HYp sequence appears when this syllable represents the
;; moved.
(set-marks
(cond
- ;; At least one C and ends with Y.
- ("(([CRBY]n?H[NJ]?)*Cn?)H(([BR]H)*)Y"
- (1 = *)
+ ;; Ending with Y.
+ ;;1 2 3 45 6
+ ("([CRBY]n?(H[NJ]?Cn?)*)(H)(([RB]H)*)(Y)"
+ (1 = *) ; prebase & base
0x09FE ; mark #1
- (3 = *) ; below consonants
+ (4 = *) ; below consonants
0x09FF ; mark #2
- 0x09AF 0x09CD) ; YA + moved HASANT
- ;; At least one C and ends with B or R.
- ("(([CRBY]n?H[NJ]?)*Cn?)H(([BR]H)*[BR])"
- (1 = *)
+ (6 =) ; YA
+ (3 =)) ; moved HASANT
+ ;; Ending with R or B.
+ ;;1 2 3 45
+ ("([CRBY]n?(H[NJ]?Cn?)*)(H)(([RB]H)*[RB])"
+ (1 = *) ; prebase & base
0x09FE ; mark #1
- (3 = *) ; below consonants
- 0x09CD ; moved HASANT
+ (4 = *) ; below consonants
+ (3 =) ; moved HASANT
0x09FF) ; mark #2
(".+"
= *
(generator
(0
(cond
+ ;; Special case: a single consonant and a Halant.
+ (" (.)xy(H[NJ]?) "
+ |
+ 0x09FE
+ (1 =)
+ (2 = *)
+ 0x09FE
+ |)
+
;; This is the most generic pattern. It follows Case A-C and a
;; part of Case E in Step 1. Now Mark #1 is used to indicate the
;; critical part that requires pre-base substitution in the
;; following steps.
- ;; 1 2 3 4 5 6 7 8 9
- (" (RH)?([^ xy]+)x([^ y]*)y(YH)?(m)?(b)?(p)?(A)?(a)? "
+ ;; 1 2 3 4 5 6 7 8 9 10
+ (" (RH)?([^ xy]+)x([^ y]*)y(YH)?(m)?(b)?(p)?(A)?(a)?(HN|HJ|H)? "
|
(5 =) ; [Mpre]
- 0x09FE
;; Actually, the nukt feature is not necessary for Bengali because
;; all the necessary Nukta forms are precomposed in the Unicode
;; standard. Even if a Nukta consonant is given in the form of
;; operation is also applied here, before applying the half form
;; operation because the Mukti font generates Akhand ligatures
;; directly from the "C H C" sequence, not via the half form.
+ 0x09FE ; begin Cpre & Cbase
(2 otf:beng=nukt,akhn) ; {Cpre + H} + Cbase
- 0x09FE
+ 0x09FE ; end Cpre & Cbase
(3 otf:beng=blwf) ; {Cbelow + H}
(6 =) ; [Mbelow]
(1 otf:beng=rphf) ; [Reph]
(4 otf:beng=pstf) ; [Cpost + H]
(7 =) ; [Mpost]
(9 =) ; [VMpost]
+ (10 = *) ; optional HASANT
|)
;; Syllables that begin with an independent vowel (following up
;; Ya-phalaa (following up Step 1, Case D). Remove N and change YH
;; to the post base form. Syllables of this type do not require
;; further modification.
- (" ([CBRY])N(YH) "
+ (" ([CBRY]n?)N(YH) "
|
(1 =)
(2 otf:beng=pstf)
|)
- ;; Syllables that end with an H and an optional N or J (following
- ;; up a part of Step 1, Case E). Syllables of this type also
- ;; require pre-base substitution in the following steps.
- (" ([^ ]+H[NJ]?) "
- |
- 0x09FE
- ;; Only Nukt and Akhn are applied here. See the comment in the
- ;; topmost sibling for explanation.
- (1 otf:beng=nukt,akhn)
- 0x09FE
- |)
-
("." =))
*))
-;; Step 3 : Now only those syllables that contain the virtual
-;; character x require pre-base substition. This is the most
-;; complicated part in this FLT.
+;; Step 3 : Now only those syllables that contain the pseudo character
+;; x require pre-base substition. This is the most complicated part
+;; in this FLT.
;; If the sequence "C1 H C2" makes ligature L12, L12 replaces the
;; original sequence.
;; into the _Halant_ (not half) form of C1. However, there is no way
;; to reconvert C1half into C1halant nor to revert back to "C1 H".
;; Thus we duplicate the critical part in two different forms so that
-;; we can select the appropriate one in the next step. The virtual
+;; we can select the appropriate one in the next step. The pseudo
;; character x is used to indicate the boundaries.
;; ... C1 H C2 ... ==> ... x C1halant C2 x L12 x ...