From: ntakahas Date: Fri, 13 Aug 2004 06:53:12 +0000 (+0000) Subject: Fix bug in base consonant finding. X-Git-Tag: REL-1-1-0~18 X-Git-Url: http://git.chise.org/gitweb/?a=commitdiff_plain;h=257c281987a7c5e354f8ddc94ddab0d67ec31708;p=m17n%2Fm17n-db.git Fix bug in base consonant finding. --- diff --git a/BEN-OTF.flt b/BEN-OTF.flt index 7eb5feb..1b78947 100644 --- a/BEN-OTF.flt +++ b/BEN-OTF.flt @@ -39,7 +39,6 @@ ;; b: MATRA (below) ;; p: MATRA (post) ;; t: MATRA (two-part) - ;; U: AU LENGTH MARK ;; A: vowel modifier (above) ;; a: vowel modifier (post) ;; V: independent vowel @@ -79,7 +78,7 @@ ) ;; Step 1 : Syllable identification. Recognised syllables are quoted -;; by the virtual character, which is generated by the command "|" and +;; by the pseudo character, which is generated by the command "|" and ;; has the category " " (space). (generator (0 @@ -92,28 +91,34 @@ ;; canonicalisation. ;; Case A : A syllable ending with a vowel modifier. - ("(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbp]*)(t)?([Aa])" + ;;1 23 4 5 6 7 + ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbp]*)(t)?([Aa])" < | - (1 set-marks) - (4 = *) - (5 split) - (6 =) + (1 = =) + (2 set-marks) + (5 = *) + (6 split) + (7 =) | >) ;; Case B : A syllable ending with a two-part vowel. - ("(([CRBY]n?H[NJ]?)*([CRBY]n?))(t)" + ;;1 23 4 5 + ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))(t)" < | - (1 set-marks) - (4 split) + (1 = =) + (2 set-marks) + (5 split) | >) ;; Case C : A syllable ending with other vowel. Note that a ;; two-part vowel may be expressed with two vowel marks for ;; backward compatibility. - ("(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbp]+)" + ;;1 23 4 5 + ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbp]+)" < | - (1 set-marks) - (4 = *) + (1 = =) + (2 set-marks) + (5 = *) | >) ;; Case D : Ya-phalaa. Reorder H and Y for the next step. @@ -121,9 +126,9 @@ ;; says "it should be ;; permissible for the Ya-phalla to be consistently formed by "ZWNJ ;; + VIRAMA + YA". - ("([CBRY]N)(H)(Y)" + ("([CRBY]n?N)(H)(Y)" < | - (1 = =) + (1 = *) (3 =) (2 =) | >) @@ -131,12 +136,13 @@ ;; Case E : No explicit vowel nor modifier. If the syllable ends ;; with a consonant, analyse it for reordering in the next step. ;; Otherwise, just identify the syllable without changing anything. - ("([CRBY]n?H[NJ]?)*[CRBY]n?(HN|HJ|H)?" - (cond - (".+[^HNJ]$" - < | set-marks | >) - (".+" - < | = * | >))) + ;;1 23 4 + ("(RH)?(([CRBY]n?H[NJ]?)*[CRBY]n?)(HN|HJ|H)?" + < | + (1 = =) + (2 set-marks) + (4 = *) + | >) ;; Case F : Syllables that begin with an independent vowel. An ;; optional HYp sequence appears when this syllable represents the @@ -153,19 +159,22 @@ ;; moved. (set-marks (cond - ;; At least one C and ends with Y. - ("(([CRBY]n?H[NJ]?)*Cn?)H(([BR]H)*)Y" - (1 = *) + ;; Ending with Y. + ;;1 2 3 45 6 + ("([CRBY]n?(H[NJ]?Cn?)*)(H)(([RB]H)*)(Y)" + (1 = *) ; prebase & base 0x09FE ; mark #1 - (3 = *) ; below consonants + (4 = *) ; below consonants 0x09FF ; mark #2 - 0x09AF 0x09CD) ; YA + moved HASANT - ;; At least one C and ends with B or R. - ("(([CRBY]n?H[NJ]?)*Cn?)H(([BR]H)*[BR])" - (1 = *) + (6 =) ; YA + (3 =)) ; moved HASANT + ;; Ending with R or B. + ;;1 2 3 45 + ("([CRBY]n?(H[NJ]?Cn?)*)(H)(([RB]H)*[RB])" + (1 = *) ; prebase & base 0x09FE ; mark #1 - (3 = *) ; below consonants - 0x09CD ; moved HASANT + (4 = *) ; below consonants + (3 =) ; moved HASANT 0x09FF) ; mark #2 (".+" = * @@ -184,16 +193,24 @@ (generator (0 (cond + ;; Special case: a single consonant and a Halant. + (" (.)xy(H[NJ]?) " + | + 0x09FE + (1 =) + (2 = *) + 0x09FE + |) + ;; This is the most generic pattern. It follows Case A-C and a ;; part of Case E in Step 1. Now Mark #1 is used to indicate the ;; critical part that requires pre-base substitution in the ;; following steps. - ;; 1 2 3 4 5 6 7 8 9 - (" (RH)?([^ xy]+)x([^ y]*)y(YH)?(m)?(b)?(p)?(A)?(a)? " + ;; 1 2 3 4 5 6 7 8 9 10 + (" (RH)?([^ xy]+)x([^ y]*)y(YH)?(m)?(b)?(p)?(A)?(a)?(HN|HJ|H)? " | (5 =) ; [Mpre] - 0x09FE ;; Actually, the nukt feature is not necessary for Bengali because ;; all the necessary Nukta forms are precomposed in the Unicode ;; standard. Even if a Nukta consonant is given in the form of @@ -203,8 +220,9 @@ ;; operation is also applied here, before applying the half form ;; operation because the Mukti font generates Akhand ligatures ;; directly from the "C H C" sequence, not via the half form. + 0x09FE ; begin Cpre & Cbase (2 otf:beng=nukt,akhn) ; {Cpre + H} + Cbase - 0x09FE + 0x09FE ; end Cpre & Cbase (3 otf:beng=blwf) ; {Cbelow + H} (6 =) ; [Mbelow] (1 otf:beng=rphf) ; [Reph] @@ -212,6 +230,7 @@ (4 otf:beng=pstf) ; [Cpost + H] (7 =) ; [Mpost] (9 =) ; [VMpost] + (10 = *) ; optional HASANT |) ;; Syllables that begin with an independent vowel (following up @@ -228,30 +247,18 @@ ;; Ya-phalaa (following up Step 1, Case D). Remove N and change YH ;; to the post base form. Syllables of this type do not require ;; further modification. - (" ([CBRY])N(YH) " + (" ([CBRY]n?)N(YH) " | (1 =) (2 otf:beng=pstf) |) - ;; Syllables that end with an H and an optional N or J (following - ;; up a part of Step 1, Case E). Syllables of this type also - ;; require pre-base substitution in the following steps. - (" ([^ ]+H[NJ]?) " - | - 0x09FE - ;; Only Nukt and Akhn are applied here. See the comment in the - ;; topmost sibling for explanation. - (1 otf:beng=nukt,akhn) - 0x09FE - |) - ("." =)) *)) -;; Step 3 : Now only those syllables that contain the virtual -;; character x require pre-base substition. This is the most -;; complicated part in this FLT. +;; Step 3 : Now only those syllables that contain the pseudo character +;; x require pre-base substition. This is the most complicated part +;; in this FLT. ;; If the sequence "C1 H C2" makes ligature L12, L12 replaces the ;; original sequence. @@ -274,7 +281,7 @@ ;; into the _Halant_ (not half) form of C1. However, there is no way ;; to reconvert C1half into C1halant nor to revert back to "C1 H". ;; Thus we duplicate the critical part in two different forms so that -;; we can select the appropriate one in the next step. The virtual +;; we can select the appropriate one in the next step. The pseudo ;; character x is used to indicate the boundaries. ;; ... C1 H C2 ... ==> ... x C1halant C2 x L12 x ...