Rewritten with new algorithm for Unicode 5.0.

author ntakahas <ntakahas>

Thu, 28 Jun 2007 05:35:08 +0000 (05:35 +0000)

committer ntakahas <ntakahas>

Thu, 28 Jun 2007 05:35:08 +0000 (05:35 +0000)
author ntakahas <ntakahas>
Thu, 28 Jun 2007 05:35:08 +0000 (05:35 +0000)
committer ntakahas <ntakahas>
Thu, 28 Jun 2007 05:35:08 +0000 (05:35 +0000)
diff --git a/FLT/ORYA-OTF.flt b/FLT/ORYA-OTF.flt

index ebd6ef5..5840962 100644 (file)
--- a/FLT/ORYA-OTF.flt
+++ b/FLT/ORYA-OTF.flt
@@ -30,289 +30,202 @@
        (font (nil nil unicode-bmp :otf=orya=rphf)))
  
  (category
- ;; C: consonant (excluding Y and R)
- ;; R: consonant RA (reph, below)
+ ;; C: consonant (except for R, B and Y)
+ ;; R: consonant RA
   ;; B: consonant (below)
- ;; Y: consonant YA, YYA (post)
+ ;; Y: consonant (post)
   ;; n: NUKTA
   ;; H: HALANT
- ;; m: MATRA (pre)
- ;; u: MATRA (above)
- ;; b: MATRA (below)
- ;; p: MATRA (post)
- ;; t: MATRA (two-part)
+ ;; m: vowel sign (pre)
+ ;; u: vowel sign (above)
+ ;; b: vowel sign (below)
+ ;; p: vowel sign (post)
+ ;; t: vowel sign (two-part)
   ;; A: vowel modifier (above)
   ;; a: vowel modifier (post)
   ;; V: independent vowel
   ;; N: ZWNJ (ZERO WIDTH NON-JOINER)
   ;; J: ZWJ (ZERO WIDTH JOINER)
- ;; E: ELSE
- ;;
- (0x200C       ?N)                     ; ZWNJ
- (0x200D       ?J)                     ; ZWJ
- (0x0664 0x0665        ?E)                     ; DANDA, DOUBLE DANDA
- (0x0B00 0x0B7F        ?E)                     ; ELSE
- (0x0B01       ?A)                     ; SIGN CANDRABINDU (above)
- (0x0B02 0x0B03        ?a)                     ; SIGN ANUSWAR, VISARGA (post)
- (0x0B05 0x0B0C        ?V)                     ; LETTER A .. VOCALIC L
- (0x0B0F 0x0B10        ?V)                     ; LETTER E .. AI
- (0x0B13 0x0B14        ?V)                     ; LETTER O .. AU
+ ;; X: generic
+ ;; Z: internal use
+ (0x0B00 0x0B7F ?X)                    ; generic
+ (0x0B00       ?Z)                     ; internal use
+ (0x0B01 0x0B03        ?a)                     ; SIGN CANDRABINDU .. VISARGA
+ (0x0B05 0x0B14        ?V)                     ; LETTER A .. VOCALIC AU
   (0x0B15 0x0B39        ?C)                     ; LETTER KA .. HA
   (0x0B24       ?B)                     ; LETTER TA
   (0x0B28       ?B)                     ; LETTER NA
- (0x0B2C       ?B)                     ; LETTER BA
- (0x0B2D       ?B)                     ; LETTER BHA
- (0x0B2E       ?B)                     ; LETTER MA
+ (0x0B2C 0x0B2E        ?B)                     ; LETTER BA .. MA
   (0x0B2F       ?Y)                     ; LETTER YA
   (0x0B30       ?R)                     ; LETTER RA
- (0x0B32       ?B)                     ; LETTER LA
+ (0x0B32 0x0B33        ?B)                     ; LETTER LA
   (0x0B33       ?B)                     ; LETTER LLA
- (0x0B35       ?B)                     ; LETTER VA
   (0x0B3C       ?n)                     ; SIGN NUKTA
- (0x0B3E       ?p)                     ; VOWEL SIGN AA (post)
- (0x0B3F       ?u)                     ; VOWEL SIGN I (above)
- (0x0B40       ?p)                     ; VOWEL SIGN II (post)
- (0x0B41 0x0B43        ?b)                     ; VOWEL SIGN U, UU, R (below)
- (0x0B47       ?m)                     ; VOWEL SIGN E (pre)
- (0x0B48 0x0B4C ?t)                    ; VOWEL SIGN AI, O, AU (two-part)
- (0x0B4D       ?H)                     ; SIGN VIRAMA (HALANT)
+ (0x0B3E       ?p)                     ; VOWEL SIGN AA
+ (0x0B3F       ?u)                     ; VOWEL SIGN I
+ (0x0B40       ?p)                     ; VOWEL SIGN II
+ (0x0B41 0x0B43        ?b)                     ; VOWEL SIGN U .. VOCALIC R
+ (0x0B47       ?m)                     ; VOWEL SIGN E
+ (0x0B48 0x0B4C ?t)                    ; VOWEL SIGN AI .. AU
+ (0x0B4D       ?H)                     ; SIGN VIRAMA
   (0x0B56       ?u)                     ; AI LENGTH MARK
   (0x0B57       ?p)                     ; AU LENGTH MARK
- (0x0B5C 0x0B5D        ?C)                     ; LETTER RRA, RHA
+ (0x0B5C 0x0B5D        ?C)                     ; LETTER RRA .. RHA
   (0x0B5F       ?Y)                     ; LETTER YYA
- (0x0B60 0x0B61        ?V)                     ; LETTER VOCALIC RR, LL
+ (0x0B60 0x0B61        ?V)                     ; LETTER VOCALIC RR .. LL
   (0x0B71       ?C)                     ; LETTER WA
- (0x0B7E       ?x)                     ; mark #1 (internal use)
- (0x0B7F       ?y)                     ; mark #2 (internal use)
+ (0x0B64 0x0B65        ?X)                     ; DANDA .. DOUBLE DANDA
+ (0x200C       ?N)                     ; ZWNJ
+ (0x200D       ?J)                     ; ZWJ
   )
  
-;; Step 1 : Syllable identification.  Recognised syllables are quoted
-;; by the pseudo character, which is generated by the command "|" and
-;; has the category " " (space).
+;; Decompose two-part vowel signs.
+;; Move ZWJ before the consonant.
  (generator
   (0
    (cond
-   ;; Case F : Syllables containing an independent vowel.
-    ("(RH)?(V)(a)?(A)?"
-    < |
-    (2 =)
-    (1 = =)
-    (3 =)
-    (4 =)
-    | >)
-
-   ;; Case A-C are for those syllables that end with an explicit vowel
-   ;; mark and/or a vowel modifier.  They are divided into three cases
-   ;; for readability of the regular expressions.  The leading
-   ;; consonant-Halant repetition is analysed for reordering in the
-   ;; next step.  A two-part vowel, if any, is split for
-   ;; canonicalisation.
+   ((0x0B48)
+    0x0B47 0x0B56)
+   ((0x0B4B)
+    0x0B47 0x0B3E)
+   ((0x0B4C)
+    0x0B47 0x0B57)
+   ("(Cn?)(J)"
+    (2 =) (1 = *))
+   ("." =))
+  *))
  
-   ;; Case A : A syllable ending with a vowel modifier.
-   ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbup]*)(t)?([Aa])"
+;; Syllable identification and reordering.
+;; Do not apply 'rphf' if a syllable begins with ZWJ.
+(generator
+ (0
+  (cond
+   ;; A syllable with ZWJ and a pre-base vowel sign.
+   ;;1  23                    4  5   6   7
+   ("(J)(([CRBY]n?H)*[CRBY]n?)(m)(u)?(p)?(a)?"
+    < | (1 =) (4 =) (2 pre-below) (5 =) (2 post) (6 =) (7 =) | >)
+
+   ;; A syllable with ZWJ and a non-pre-base vowel sign.
+   ;;1  23                    45      6   7
+   ("(J)(([CRBY]n?H)*[CRBY]n?)(([bu])|(p))(a)?"
+    < | (1 =) (2 pre-below) (5 =) (2 post) (6 =) (7 =) | >)
+
+   ;; A syllable with ZWJ and a vowel modifier, but without vowel signs.
+   ;;1  23                    4
+   ("(J)(([CRBY]n?H)*[CRBY]n?)(a)"
+    < | (1 =) (2 pre-below) (2 post) (4 =) | >)
+
+   ;; Add a ZWNJ explicitly when a syllable ends with a halant.
+   ;;1  23                    4   5
+   ("(J)(([CRBY]n?H)*[CRBY]n?)(H)?(N)?"
+    < | (1 =) (2 pre-below) (4 = 0x200C) (2 post) | >)
+
+   ;; With a pre-base vowel sign, without a ZWJ.
+   ;;1    23                    4  5   6   7
+   ("(RH)?(([CRBY]n?H)*[CRBY]n?)(m)(u)?(p)?(a)?"
      < |
-    (1 = =)
-    (2 set-marks)
-    (5 = *)
-    (6 split)
-    (7 =)
+    (4 =) (2 pre-below) (5 =) (1 otf:orya=rphf) (2 post) (6 =) (7 =)
      | >)
  
-   ;; Case B : A syllable ending with a two-part vowel.
-   ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))(t)"
-    < |
-    (1 = =)
-    (2 set-marks)
-    (5 split)
-    | >)
+   ;; With a non-pre-base vowel sign, without a ZWJ.
+   ;; 1   23                    45      6   7
+   ("(RH)?(([CRBY]n?H)*[CRBY]n?)(([bu])|(p))(a)?"
+    < | (2 pre-below) (5 =) (1 otf:orya=rphf) (2 post) (6 =) (7 =) | >)
  
-   ;; Case C : A syllable ending with other vowel(s).  Note that a
-   ;; two-part vowel may be expressed with two vowel marks for
-   ;; backward compatibility.
-   ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbup]+)"
-    < |
-    (1 = =)
-    (2 set-marks)
-    (5 = *)
-    | >)
+   ;; With a vowel modifier, without vowel signs and a ZWJ.
+   ;;1    23                    4
+   ("(RH)?(([CRBY]n?H)*[CRBY]n?)(a)"
+    < | (2 pre-below) (1 otf:orya=rphf) (2 post) (4 =) | >)
  
-   ;; Case E : No explicit vowel nor modifier.  If the syllable ends
-   ;; with a consonant, analyse it for reordering in the next step.
-   ;; Otherwise, just identify the syllable without changing anything.
-   ;;1    23                         4
-   ("(RH)?(([CRBY]n?H[NJ]?)*[CRBY]n?)(HN|HJ|H)?"
-    < |
-    (1 = =)
-    (2 set-marks)
-    (4 = *)
-    | >)
+   ;; Add a ZWNJ explicitly when a syllable ends with a halant.
+   ;;1    23                    4   5
+   ("(RH)?(([CRBY]n?H)*[CRBY]n?)(H)?(N)?"
+    < | (2 pre-below) (1 otf:orya=rphf) (4 = 0x200C) (2 post) | >)
+
+   ;; A syllable starting with an independent vowel.
+   ("Va?"
+    < | = * | >)
  
     ("." =))
    *)
  
- ;; Set mark #1 (x) at the position where below consonants begin, and
- ;; mark #2 (y) at the position to which below and above signs will be
- ;; moved.
- (set-marks
+ ;; Move a halant after the base consonant to the end.
+ ;; Fill the resulting gap with a special mark.
+ ;; Remove post-base parts.
+ (pre-below
    (cond
-   ;; Ending with Y.
-   ;;1        2            3  45        6
-   ("([CRBY]n?(H[NJ]?Cn?)*)(H)(([RB]H)*)(Y)$"
-    (1 = *)                            ; prebase & base
-    0x0B7E                             ; below begin
-    (4 = *)                            ; below consonants
-    0x0B7F                             ; below end
-    (6 =)                              ; YA
-    (3 =))                             ; moved HALANT
-   ;; Ending with R or B.
-   ;;1        2            3  45
-   ("([CRBY]n?(H[NJ]?Cn?)*)(H)(([RB]H)*[RB])$"
-    (1 = *)                            ; prebase & base
-    0x0B7E                             ; below begin
-    (4 = *)                            ; below consonants 
-    (3 =)                              ; moved HALANT
-    0x0B7F)                            ; below end
-   (".+"
-    = *
-    0x0B7E                             ; below begin
-    0x0B7F)))                          ; below end
-
- ;; Split two-part dependent vowel signs for canonicalisation.
- (split
+   ("([CRBYnH]*[CYn])H([RBH]+)[YH]+$"
+    (1 = *) 0x0B00 (2 = *))
+   ("([CRBYnH]*[CYn])(H)([RBH]+)$"
+    (1 = *) 0x0B00 (3 = *) (2 =))
+   ("([CRBYnH]*[Cn])[YH]*$"
+    (1 = *) 0x0B00)
+   ("([RB]n?)H([RBH]*)[YH]+$"
+    (1 = *) 0x0B00 (2 = *))
+   ("([RB]n?)(H)([RBH]*)$"
+    (1 = *) 0x0B00 (3 = *) (2 =))
+   ("([RBY]n?)[YH]*$"
+    (1 = *) 0x0B00)))
+
+ ;; Extract post-base parts and add a halant at the end.
+ ;; Produce nothing if there are no post-base parts.
+ (post
    (cond
-   ((0x0B48)   0x0B47 0x0B56)
-   ((0x0B4B)   0x0B47 0x0B3E)
-   ((0x0B4C)   0x0B47 0x0B57)))
+   ("[CRBYnH]*[CRBn]H([YH]+)$"
+    (1 = *) 0x0B4D)
+   ("Yn?H(YH)+$"
+    (1 = *) 0x0B4D)
+   (".+"
+    )))
   )
  
-;; Step 2 : Move Reph and Matra if necessary.  From now on, we care
-;; only for those syllables that have been identified in Step 1.
+;; Apply language forms to concerning segments.
  (generator
   (0
    (cond
-   ;; Special case: a single consonant and a Halant.
-   (" (.)xy(H[NJ]?) "
-    |
-    (1 =)
-    (2 = *)
-    |)
-
-   ;; This is the most generic pattern.  It follows Case A-C and a
-   ;; part of Case E in Step 1.  Now Mark #1 is used to indicate the
-   ;; critical part that requires pre-base substitution in the
-   ;; following steps.
-
-   ;; 1    2         3        4    5   6   7   8   9   10  11
-   (" (RH)?([^ xy]+)x([^ y]*)y(YH)?(m)?(b)?(u)?(p)?(A)?(a)?(HN|HJ|H)? "
-    |
-    (5 =)                              ; [Mpre]
-    ;; We can safely perform Nukta composition here because it does
-    ;; not affect surrounding letters in the syllable.  The Akhand
-    ;; ligature operation is also applied here, before applying the
-    ;; half form operation because the Utkal font generates Akhand
-    ;; ligatures directly from the "C H C" sequence, not via the half
-    ;; form.
-    0x0B7E                             ; begin Cpre & Cbase
-    (2 otf:orya=nukt,akhn+)            ; {Cpre + H} + Cbase
-    0x0B7E                             ; end Cpre & Cbase
-    (3 otf:orya=blwf+)                 ; {Cbelow + H}
-    (6 =)                              ; [Mbelow]
-    (7 =)                              ; [Mabove]
-    (1 otf:orya=rphf+)                 ; [Reph]
-    (4 otf:orya=pstf+)                 ; [Cpost + H]
-    (8 =)                              ; [Mpost]
-    (9 =)                              ; [VMabove]
-    (10 =)                             ; [VMpost]
-    (11 = *)                           ; optional HALANT
-    |)
+   ;; If a syllable contains a ZWNJ, render the preceding halant explicitly.
+   (" ([^Z]+)(Z)([^N]*)(HN)([^ ]*) "
+    | (1 otf:orya=nukt,akhn,half+) (2 =) (3 otf:orya=blwf+) (4 = =)
+    (5 otf:orya=pstf+) |)
  
-   ;; Syllables that begin with an independent vowel (following up
-   ;; Step 1, Case F).  Syllables of this type do not require further
-   ;; modification.
-   (" (V)(RH)(.*) "
-    |
-    (1 =)
-    (2 otf:orya=rphf+)
-    (3 = *)
-    |)
+   (" (J?m?)([^Z]+)(Z)([^ ]*) "
+    | (1 = *) (2 otf:orya=nukt,akhn,half+) (3 =) (4 otf:orya=blwf,pstf+) |)
  
     ("." =))
    *))
  
-;; Step 3 : Now only those syllables that contain the pseudo character
-;; x require pre-base substition.  Unlike the Mukti font for Bengali,
-;; the Utkal font can produce the ligature for "C1 H C2" from
-;; "C1halant" and "C2".  If such a ligature is not available, we get a
-;; sequence consisting of "C1halant" and "C2", which is satisfactory.
-
+;; Apply 'pres' to get pre-base conjuncts.
  (generator
   (0
    (cond
-   (" (.H)J "
-    |
-    (1 otf:orya=half+)
-    |)
-   (" (.H)N? "
-    |
-    (1 otf:orya=haln+)
-    |)
-   (" ([^x ]?x)([^x ]*)(x[^ ]*) "
-    |
-    (1 = *)
-    (2 pres)
-    (3 = *)
-    |)
+   (" (J?m?)([^Z]+)(Z)([^ ]*) "
+    | (1 = *) (2 otf:orya=pres+) (3 =) (4 = *) |)
     ("." =))
-  *)
-
- (pres
-  (cond
-   ("([^NJ]*)(.H)J(.*)"
-    (1 otf:orya=haln,pres+)
-    (2 otf:orya=half+)
-    (3 pres))
-   ("([^N]*)(H)N(.*)"
-    (1 otf:orya=haln,pres+)
-    (2 =)
-    (3 pres))
-   (".*"
-    otf:orya=haln,pres+)))
-    
- )
-
-;; Step 4 : Mpre/Cpre reordering.  If the pre-base substitution in
-;; the previous step results in more than one glyph, and there is an
-;; Mpre in this syllable, then move the Mpre before the Cbase.
-;; i.e. [Mpre]{Kh}Kf... -> {Kh}[Mpre]Kf...
+  *))
  
+;; When the number of glyphs between a pre-base vowel sign and the
+;; post-base mark is more than one, move the pre-base vowel sign
+;; before the base glyph.
  (generator
   (0
    (cond
-   (" ([^x ])x([^x ]+)([^x ])x([^x ]*) "
-    |
-    (2 = *)
-    (1 =)
-    (3 =)
-    (4 = *)
-    |)
-   (" ([^x ])?x([^x ]*)x([^ ]*) "
-    |
-    (1 =)
-    (2 = *)
-    (3 = *)
-    |)
+   (" (J)?(m)([^Z]+)([^Z])Z([^N ]*)N?([^ ]*) "
+    | (1 =) (3 = *) (2 =) (4 =) (5 = *) (6 = *)|)
+   (" ([^Z]+)Z([^N ]*)N?([^ ]*) "
+    | (1 = *) (2 = *) (3 = *) |)
     ("." =))
    *))
  
-;; Step 5 : Substitutions & positioning.
-
+;; Apply other features.
+;; Do not apply 'vatu' and 'blws' if there is a ZWJ.
+;; The 'pres' feature is applied again for pre-base vowel sign.
  (generator
   (0
    (cond
-   (" ([^ ]*) "
-    ;; FIXME : The pres below is for the TTA ligature in the Utkal
-    ;; font.  It should be removed once the font is updated.
-    (1 otf:orya=vatu,abvs,blws,psts,pres))
+   (" J([^ ]+) "
+    (1 otf:orya=pres,abvs,pstp,haln))
+   (" ([^ ]+) "
+    (1 otf:orya=vatu,pres,abvs,blws,pstp,haln))
     ("."
      [ otf:orya=+ ]))
    *))
author	ntakahas <ntakahas>
	Thu, 28 Jun 2007 05:35:08 +0000 (05:35 +0000)
committer	ntakahas <ntakahas>
	Thu, 28 Jun 2007 05:35:08 +0000 (05:35 +0000)