;; ORYA-OTF.flt -- Font Layout Table for Oriya OpenType font
;; Copyright (C) 2004
;;   National Institute of Advanced Industrial Science and Technology (AIST)
;;   Registration Number H15PRO112

;; This file is part of the m17n database; a sub-part of the m17n
;; library.

;; The m17n library is free software; you can redistribute it and/or
;; modify it under the terms of the GNU Lesser General Public License
;; as published by the Free Software Foundation; either version 2.1 of
;; the License, or (at your option) any later version.

;; The m17n library is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; Lesser General Public License for more details.

;; You should have received a copy of the GNU Lesser General Public
;; License along with the m17n library; if not, write to the Free
;; Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
;; Boston, MA 02110-1301, USA.

;;; <li> ORYA-OTF.flt
;;;
;;; For Oriya OpenType fonts to draw the Oriya script.  Tested with
;;; utkalm.ttf <http://oriya.sarovar.org/download/utkalm.ttf.gz>

(category
 ;; C: consonant (excluding Y and R)
 ;; R: consonant RA (reph, below)
 ;; B: consonant (below)
 ;; Y: consonant YA, YYA (post)
 ;; n: NUKTA
 ;; H: HALANT
 ;; m: MATRA (pre)
 ;; u: MATRA (above)
 ;; b: MATRA (below)
 ;; p: MATRA (post)
 ;; t: MATRA (two-part)
 ;; A: vowel modifier (above)
 ;; a: vowel modifier (post)
 ;; V: independent vowel
 ;; N: ZWNJ (ZERO WIDTH NON-JOINER)
 ;; J: ZWJ (ZERO WIDTH JOINER)
 ;; E: ELSE
 ;;
 (0x200C	?N)			; ZWNJ
 (0x200D	?J)			; ZWJ
 (0x0664 0x0665	?E)			; DANDA, DOUBLE DANDA
 (0x0B00 0x0B7F	?E)			; ELSE
 (0x0B01	?A)			; SIGN CANDRABINDU (above)
 (0x0B02 0x0B03	?a)			; SIGN ANUSWAR, VISARGA (post)
 (0x0B05 0x0B0C	?V)			; LETTER A .. VOCALIC L
 (0x0B0F 0x0B10	?V)			; LETTER E .. AI
 (0x0B13 0x0B14	?V)			; LETTER O .. AU
 (0x0B15 0x0B39	?C)			; LETTER KA .. HA
 (0x0B24	?B)			; LETTER TA
 (0x0B28	?B)			; LETTER NA
 (0x0B2C	?B)			; LETTER BA
 (0x0B2D	?B)			; LETTER BHA
 (0x0B2E	?B)			; LETTER MA
 (0x0B2F	?Y)			; LETTER YA
 (0x0B30	?R)			; LETTER RA
 (0x0B32	?B)			; LETTER LA
 (0x0B33	?B)			; LETTER LLA
 (0x0B35	?B)			; LETTER VA
 (0x0B3C	?n)			; SIGN NUKTA
 (0x0B3E	?p)			; VOWEL SIGN AA (post)
 (0x0B3F	?u)			; VOWEL SIGN I (above)
 (0x0B40	?p)			; VOWEL SIGN II (post)
 (0x0B41 0x0B43	?b)			; VOWEL SIGN U, UU, R (below)
 (0x0B47	?m)			; VOWEL SIGN E (pre)
 (0x0B48 0x0B4C ?t)			; VOWEL SIGN AI, O, AU (two-part)
 (0x0B4D	?H)			; SIGN VIRAMA (HALANT)
 (0x0B56	?u)			; AI LENGTH MARK
 (0x0B57	?p)			; AU LENGTH MARK
 (0x0B5C 0x0B5D	?C)			; LETTER RRA, RHA
 (0x0B5F	?Y)			; LETTER YYA
 (0x0B60 0x0B61	?V)			; LETTER VOCALIC RR, LL
 (0x0B71	?C)			; LETTER WA
 (0x0B7E	?x)			; mark #1 (internal use)
 (0x0B7F	?y)			; mark #2 (internal use)
 )

;; Step 1 : Syllable identification.  Recognised syllables are quoted
;; by the pseudo character, which is generated by the command "|" and
;; has the category " " (space).
(generator
 (0
  (cond
   ;; Case F : Syllables containing an independent vowel.
    ("(RH)?(V)(a)?(A)?"
    < |
    (2 =)
    (1 = =)
    (3 =)
    (4 =)
    | >)

   ;; Case A-C are for those syllables that end with an explicit vowel
   ;; mark and/or a vowel modifier.  They are divided into three cases
   ;; for readability of the regular expressions.  The leading
   ;; consonant-Halant repetition is analysed for reordering in the
   ;; next step.  A two-part vowel, if any, is split for
   ;; canonicalisation.

   ;; Case A : A syllable ending with a vowel modifier.
   ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbup]*)(t)?([Aa])"
    < |
    (1 = =)
    (2 set-marks)
    (5 = *)
    (6 split)
    (7 =)
    | >)

   ;; Case B : A syllable ending with a two-part vowel.
   ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))(t)"
    < |
    (1 = =)
    (2 set-marks)
    (5 split)
    | >)

   ;; Case C : A syllable ending with other vowel(s).  Note that a
   ;; two-part vowel may be expressed with two vowel marks for
   ;; backward compatibility.
   ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbup]+)"
    < |
    (1 = =)
    (2 set-marks)
    (5 = *)
    | >)

   ;; Case E : No explicit vowel nor modifier.  If the syllable ends
   ;; with a consonant, analyse it for reordering in the next step.
   ;; Otherwise, just identify the syllable without changing anything.
   ;;1    23                         4
   ("(RH)?(([CRBY]n?H[NJ]?)*[CRBY]n?)(HN|HJ|H)?"
    < |
    (1 = =)
    (2 set-marks)
    (4 = *)
    | >)

   ("." =))
  *)

 ;; Set mark #1 (x) at the position where below consonants begin, and
 ;; mark #2 (y) at the position to which below and above signs will be
 ;; moved.
 (set-marks
  (cond
   ;; Ending with Y.
   ;;1        2            3  45        6
   ("([CRBY]n?(H[NJ]?Cn?)*)(H)(([RB]H)*)(Y)$"
    (1 = *)				; prebase & base
    0x0B7E				; below begin
    (4 = *)				; below consonants
    0x0B7F				; below end
    (6 =)				; YA
    (3 =))				; moved HALANT
   ;; Ending with R or B.
   ;;1        2            3  45
   ("([CRBY]n?(H[NJ]?Cn?)*)(H)(([RB]H)*[RB])$"
    (1 = *)				; prebase & base
    0x0B7E				; below begin
    (4 = *)				; below consonants 
    (3 =)				; moved HALANT
    0x0B7F)				; below end
   (".+"
    = *
    0x0B7E				; below begin
    0x0B7F)))				; below end

 ;; Split two-part dependent vowel signs for canonicalisation.
 (split
  (cond
   ((0x0B48)	0x0B47 0x0B56)
   ((0x0B4B)	0x0B47 0x0B3E)
   ((0x0B4C)	0x0B47 0x0B57)))
 )

;; Step 2 : Move Reph and Matra if necessary.  From now on, we care
;; only for those syllables that have been identified in Step 1.
(generator
 (0
  (cond
   ;; Special case: a single consonant and a Halant.
   (" (.)xy(H[NJ]?) "
    |
    (1 =)
    (2 = *)
    |)

   ;; This is the most generic pattern.  It follows Case A-C and a
   ;; part of Case E in Step 1.  Now Mark #1 is used to indicate the
   ;; critical part that requires pre-base substitution in the
   ;; following steps.

   ;; 1    2         3        4    5   6   7   8   9   10  11
   (" (RH)?([^ xy]+)x([^ y]*)y(YH)?(m)?(b)?(u)?(p)?(A)?(a)?(HN|HJ|H)? "
    |
    (5 =)				; [Mpre]
    ;; We can safely perform Nukta composition here because it does
    ;; not affect surrounding letters in the syllable.  The Akhand
    ;; ligature operation is also applied here, before applying the
    ;; half form operation because the Utkal font generates Akhand
    ;; ligatures directly from the "C H C" sequence, not via the half
    ;; form.
    0x0B7E				; begin Cpre & Cbase
    (2 otf:orya=nukt,akhn+)		; {Cpre + H} + Cbase
    0x0B7E				; end Cpre & Cbase
    (3 otf:orya=blwf+)			; {Cbelow + H}
    (6 =)				; [Mbelow]
    (7 =)				; [Mabove]
    (1 otf:orya=rphf+)			; [Reph]
    (4 otf:orya=pstf+)			; [Cpost + H]
    (8 =)				; [Mpost]
    (9 =)				; [VMabove]
    (10 =)				; [VMpost]
    (11 = *)				; optional HALANT
    |)

   ;; Syllables that begin with an independent vowel (following up
   ;; Step 1, Case F).  Syllables of this type do not require further
   ;; modification.
   (" (V)(RH)(.*) "
    |
    (1 =)
    (2 otf:orya=rphf+)
    (3 = *)
    |)

   ("." =))
  *))

;; Step 3 : Now only those syllables that contain the pseudo character
;; x require pre-base substition.  Unlike the Mukti font for Bengali,
;; the Utkal font can produce the ligature for "C1 H C2" from
;; "C1halant" and "C2".  If such a ligature is not available, we get a
;; sequence consisting of "C1halant" and "C2", which is satisfactory.

(generator
 (0
  (cond
   (" (.H)J "
    |
    (1 otf:orya=half+)
    |)
   (" (.H)N? "
    |
    (1 otf:orya=haln+)
    |)
   (" ([^x ]?x)([^x ]*)(x[^ ]*) "
    |
    (1 = *)
    (2 pres)
    (3 = *)
    |)
   ("." =))
  *)

 (pres
  (cond
   ("([^NJ]*)(.H)J(.*)"
    (1 otf:orya=haln,pres+)
    (2 otf:orya=half+)
    (3 pres))
   ("([^N]*)(H)N(.*)"
    (1 otf:orya=haln,pres+)
    (2 =)
    (3 pres))
   (".*"
    otf:orya=haln,pres+)))
    
 )

;; Step 4 : Mpre/Cpre reordering.  If the pre-base substitution in
;; the previous step results in more than one glyph, and there is an
;; Mpre in this syllable, then move the Mpre before the Cbase.
;; i.e. [Mpre]{Kh}Kf... -> {Kh}[Mpre]Kf...

(generator
 (0
  (cond
   (" ([^x ])x([^x ]+)([^x ])x([^x ]*) "
    |
    (2 = *)
    (1 =)
    (3 =)
    (4 = *)
    |)
   (" ([^x ])?x([^x ]*)x([^ ]*) "
    |
    (1 =)
    (2 = *)
    (3 = *)
    |)
   ("." =))
  *))

;; Step 5 : Substitutions & positioning.

(generator
 (0
  (cond
   (" ([^ ]*) "
    ;; FIXME : The pres below is for the TTA ligature in the Utkal
    ;; font.  It should be removed once the font is updated.
    (1 otf:orya=vatu,abvs,blws,psts,pres))
   ("."
    [ otf:orya=+ ]))
  *))

;; Local Variables:
;; mode: emacs-lisp
;; End: