Fix bug in base consonant finding.

author ntakahas <ntakahas>

Fri, 13 Aug 2004 06:53:12 +0000 (06:53 +0000)

committer ntakahas <ntakahas>

Fri, 13 Aug 2004 06:53:12 +0000 (06:53 +0000)
author ntakahas <ntakahas>
Fri, 13 Aug 2004 06:53:12 +0000 (06:53 +0000)
committer ntakahas <ntakahas>
Fri, 13 Aug 2004 06:53:12 +0000 (06:53 +0000)
diff --git a/BEN-OTF.flt b/BEN-OTF.flt

index 7eb5feb..1b78947 100644 (file)
--- a/BEN-OTF.flt
+++ b/BEN-OTF.flt
@@ -39,7 +39,6 @@
   ;; b: MATRA (below)
   ;; p: MATRA (post)
   ;; t: MATRA (two-part)
- ;; U: AU LENGTH MARK
   ;; A: vowel modifier (above)
   ;; a: vowel modifier (post)
   ;; V: independent vowel
@@ -79,7 +78,7 @@
   )
  
  ;; Step 1 : Syllable identification.  Recognised syllables are quoted
-;; by the virtual character, which is generated by the command "|" and
+;; by the pseudo character, which is generated by the command "|" and
  ;; has the category " " (space).
  (generator
   (0
@@ -92,28 +91,34 @@
     ;; canonicalisation.
  
     ;; Case A : A syllable ending with a vowel modifier.
-   ("(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbp]*)(t)?([Aa])"
+   ;;1    23                4          5       6   7
+   ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbp]*)(t)?([Aa])"
      < |
-    (1 set-marks)
-    (4 = *)
-    (5 split)
-    (6 =)
+    (1 = =)
+    (2 set-marks)
+    (5 = *)
+    (6 split)
+    (7 =)
      | >)
  
     ;; Case B : A syllable ending with a two-part vowel.
-   ("(([CRBY]n?H[NJ]?)*([CRBY]n?))(t)"
+   ;;1    23                4          5
+   ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))(t)"
      < |
-    (1 set-marks)
-    (4 split)
+    (1 = =)
+    (2 set-marks)
+    (5 split)
      | >)
  
     ;; Case C : A syllable ending with other vowel.  Note that a
     ;; two-part vowel may be expressed with two vowel marks for
     ;; backward compatibility.
-   ("(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbp]+)"
+   ;;1    23                4          5
+   ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbp]+)"
      < |
-    (1 set-marks)
-    (4 = *)
+    (1 = =)
+    (2 set-marks)
+    (5 = *)
      | >)
  
     ;; Case D : Ya-phalaa.  Reorder H and Y for the next step.
@@ -121,9 +126,9 @@
     ;; <http://www.unicode.org/faq/indic.html> says "it should be
     ;; permissible for the Ya-phalla to be consistently formed by "ZWNJ
     ;; + VIRAMA + YA".
-   ("([CBRY]N)(H)(Y)"
+   ("([CRBY]n?N)(H)(Y)"
      < |
-    (1 = =)
+    (1 = *)
      (3 =)
      (2 =)
      | >)
@@ -131,12 +136,13 @@
     ;; Case E : No explicit vowel nor modifier.  If the syllable ends
     ;; with a consonant, analyse it for reordering in the next step.
     ;; Otherwise, just identify the syllable without changing anything.
-   ("([CRBY]n?H[NJ]?)*[CRBY]n?(HN|HJ|H)?"
-    (cond
-     (".+[^HNJ]$"
-      < | set-marks | >)
-     (".+"
-      < | = * | >)))
+   ;;1    23                         4
+   ("(RH)?(([CRBY]n?H[NJ]?)*[CRBY]n?)(HN|HJ|H)?"
+    < |
+    (1 = =)
+    (2 set-marks)
+    (4 = *)
+    | >)
  
     ;; Case F : Syllables that begin with an independent vowel.  An
     ;; optional HYp sequence appears when this syllable represents the
@@ -153,19 +159,22 @@
   ;; moved.
   (set-marks
    (cond
-   ;; At least one C and ends with Y.
-   ("(([CRBY]n?H[NJ]?)*Cn?)H(([BR]H)*)Y"
-    (1 = *)
+   ;; Ending with Y.
+   ;;1        2            3  45        6
+   ("([CRBY]n?(H[NJ]?Cn?)*)(H)(([RB]H)*)(Y)"
+    (1 = *)                            ; prebase & base
      0x09FE                             ; mark #1
-    (3 = *)                            ; below consonants
+    (4 = *)                            ; below consonants
      0x09FF                             ; mark #2
-    0x09AF 0x09CD)                     ; YA + moved HASANT
-   ;; At least one C and ends with B or R.
-   ("(([CRBY]n?H[NJ]?)*Cn?)H(([BR]H)*[BR])"
-    (1 = *)
+    (6 =)                              ; YA
+    (3 =))                             ; moved HASANT
+   ;; Ending with R or B.
+   ;;1        2            3  45
+   ("([CRBY]n?(H[NJ]?Cn?)*)(H)(([RB]H)*[RB])"
+    (1 = *)                            ; prebase & base
      0x09FE                             ; mark #1
-    (3 = *)                            ; below consonants 
-    0x09CD                             ; moved HASANT
+    (4 = *)                            ; below consonants 
+    (3 =)                              ; moved HASANT
      0x09FF)                            ; mark #2
     (".+"
      = *
@@ -184,16 +193,24 @@
  (generator
   (0
    (cond
+   ;; Special case: a single consonant and a Halant.
+   (" (.)xy(H[NJ]?) "
+    |
+    0x09FE
+    (1 =)
+    (2 = *)
+    0x09FE
+    |)
+
     ;; This is the most generic pattern.  It follows Case A-C and a
     ;; part of Case E in Step 1.  Now Mark #1 is used to indicate the
     ;; critical part that requires pre-base substitution in the
     ;; following steps.
  
-   ;; 1    2         3        4    5   6   7   8   9
-   (" (RH)?([^ xy]+)x([^ y]*)y(YH)?(m)?(b)?(p)?(A)?(a)? "
+   ;; 1    2         3        4    5   6   7   8   9   10
+   (" (RH)?([^ xy]+)x([^ y]*)y(YH)?(m)?(b)?(p)?(A)?(a)?(HN|HJ|H)? "
      |
      (5 =)                              ; [Mpre]
-    0x09FE
      ;; Actually, the nukt feature is not necessary for Bengali because
      ;; all the necessary Nukta forms are precomposed in the Unicode
      ;; standard.  Even if a Nukta consonant is given in the form of
@@ -203,8 +220,9 @@
      ;; operation is also applied here, before applying the half form
      ;; operation because the Mukti font generates Akhand ligatures
      ;; directly from the "C H C" sequence, not via the half form.
+    0x09FE                             ; begin Cpre & Cbase
      (2 otf:beng=nukt,akhn)             ; {Cpre + H} + Cbase
-    0x09FE
+    0x09FE                             ; end Cpre & Cbase
      (3 otf:beng=blwf)                  ; {Cbelow + H}
      (6 =)                              ; [Mbelow]
      (1 otf:beng=rphf)                  ; [Reph]
@@ -212,6 +230,7 @@
      (4 otf:beng=pstf)                  ; [Cpost + H]
      (7 =)                              ; [Mpost]
      (9 =)                              ; [VMpost]
+    (10 = *)                           ; optional HASANT
      |)
  
     ;; Syllables that begin with an independent vowel (following up
@@ -228,30 +247,18 @@
     ;; Ya-phalaa (following up Step 1, Case D).  Remove N and change YH
     ;; to the post base form.  Syllables of this type do not require
     ;; further modification.
-   (" ([CBRY])N(YH) "
+   (" ([CBRY]n?)N(YH) "
      |
      (1 =)
      (2 otf:beng=pstf)
      |)
  
-   ;; Syllables that end with an H and an optional N or J (following
-   ;; up a part of Step 1, Case E).  Syllables of this type also
-   ;; require pre-base substitution in the following steps.
-   (" ([^ ]+H[NJ]?) "
-    |
-    0x09FE
-    ;; Only Nukt and Akhn are applied here.  See the comment in the
-    ;; topmost sibling for explanation.
-    (1 otf:beng=nukt,akhn)
-    0x09FE
-    |)
-
     ("." =))
    *))
  
-;; Step 3 : Now only those syllables that contain the virtual
-;; character x require pre-base substition.  This is the most
-;; complicated part in this FLT.
+;; Step 3 : Now only those syllables that contain the pseudo character
+;; x require pre-base substition.  This is the most complicated part
+;; in this FLT.
  
  ;; If the sequence "C1 H C2" makes ligature L12, L12 replaces the
  ;; original sequence.
@@ -274,7 +281,7 @@
  ;; into the _Halant_ (not half) form of C1.  However, there is no way
  ;; to reconvert C1half into C1halant nor to revert back to "C1 H".
  ;; Thus we duplicate the critical part in two different forms so that
-;; we can select the appropriate one in the next step.  The virtual
+;; we can select the appropriate one in the next step.  The pseudo
  ;; character x is used to indicate the boundaries.
  
  ;; ... C1 H C2 ...  ==>  ... x C1halant C2 x L12 x ...
author	ntakahas <ntakahas>
	Fri, 13 Aug 2004 06:53:12 +0000 (06:53 +0000)
committer	ntakahas <ntakahas>
	Fri, 13 Aug 2004 06:53:12 +0000 (06:53 +0000)