;; ORYA-OTF.flt -- Font Layout Table for Oriya OpenType font
;; Copyright (C) 2004
;; National Institute of Advanced Industrial Science and Technology (AIST)
;; Registration Number H15PRO112
;; This file is part of the m17n database; a sub-part of the m17n
;; library.
;; The m17n library is free software; you can redistribute it and/or
;; modify it under the terms of the GNU Lesser General Public License
;; as published by the Free Software Foundation; either version 2.1 of
;; the License, or (at your option) any later version.
;; The m17n library is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; Lesser General Public License for more details.
;; You should have received a copy of the GNU Lesser General Public
;; License along with the m17n library; if not, write to the Free
;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
;; 02111-1307, USA.
;;;
ORYA-OTF.flt
;;;
;;; For Oriya OpenType fonts to draw the Oriya script. Tested with
;;; utkalm.ttf
(category
;; C: consonant (excluding Y and R)
;; R: consonant RA (reph, below)
;; B: consonant (below)
;; Y: consonant YA, YYA (post)
;; n: NUKTA
;; H: HALANT
;; m: MATRA (pre)
;; u: MATRA (above)
;; b: MATRA (below)
;; p: MATRA (post)
;; t: MATRA (two-part)
;; A: vowel modifier (above)
;; a: vowel modifier (post)
;; V: independent vowel
;; N: ZWNJ (ZERO WIDTH NON-JOINER)
;; J: ZWJ (ZERO WIDTH JOINER)
;; E: ELSE
;;
(0x200C ?N) ; ZWNJ
(0x200D ?J) ; ZWJ
(0x0664 0x0665 ?E) ; DANDA, DOUBLE DANDA
(0x0B00 0x0B7F ?E) ; ELSE
(0x0B01 ?A) ; SIGN CANDRABINDU (above)
(0x0B02 0x0B03 ?a) ; SIGN ANUSWAR, VISARGA (post)
(0x0B05 0x0B0C ?V) ; LETTER A .. VOCALIC L
(0x0B0F 0x0B10 ?V) ; LETTER E .. AI
(0x0B13 0x0B14 ?V) ; LETTER O .. AU
(0x0B15 0x0B39 ?C) ; LETTER KA .. HA
(0x0B24 ?B) ; LETTER TA
(0x0B28 ?B) ; LETTER NA
(0x0B2C ?B) ; LETTER BA
(0x0B2D ?B) ; LETTER BHA
(0x0B2E ?B) ; LETTER MA
(0x0B2F ?Y) ; LETTER YA
(0x0B30 ?R) ; LETTER RA
(0x0B32 ?B) ; LETTER LA
(0x0B33 ?B) ; LETTER LLA
(0x0B35 ?B) ; LETTER VA
(0x0B3C ?n) ; SIGN NUKTA
(0x0B3E ?p) ; VOWEL SIGN AA (post)
(0x0B3F ?u) ; VOWEL SIGN I (above)
(0x0B40 ?p) ; VOWEL SIGN II (post)
(0x0B41 0x0B43 ?b) ; VOWEL SIGN U, UU, R (below)
(0x0B47 ?m) ; VOWEL SIGN E (pre)
(0x0B48 0x0B4C ?t) ; VOWEL SIGN AI, O, AU (two-part)
(0x0B4D ?H) ; SIGN VIRAMA (HALANT)
(0x0B56 ?u) ; AI LENGTH MARK
(0x0B57 ?p) ; AU LENGTH MARK
(0x0B5C 0x0B5D ?C) ; LETTER RRA, RHA
(0x0B5F ?Y) ; LETTER YYA
(0x0B60 0x0B61 ?V) ; LETTER VOCALIC RR, LL
(0x0B71 ?C) ; LETTER WA
(0x0B7E ?x) ; mark #1 (internal use)
(0x0B7F ?y) ; mark #2 (internal use)
)
;; Step 1 : Syllable identification. Recognised syllables are quoted
;; by the pseudo character, which is generated by the command "|" and
;; has the category " " (space).
(generator
(0
(cond
;; Case F : Syllables containing an independent vowel.
("(RH)?(V)(a)?(A)?"
< |
(2 =)
(1 = =)
(3 =)
(4 =)
| >)
;; Case A-C are for those syllables that end with an explicit vowel
;; mark and/or a vowel modifier. They are divided into three cases
;; for readability of the regular expressions. The leading
;; consonant-Halant repetition is analysed for reordering in the
;; next step. A two-part vowel, if any, is split for
;; canonicalisation.
;; Case A : A syllable ending with a vowel modifier.
("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbup]*)(t)?([Aa])"
< |
(1 = =)
(2 set-marks)
(5 = *)
(6 split)
(7 =)
| >)
;; Case B : A syllable ending with a two-part vowel.
("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))(t)"
< |
(1 = =)
(2 set-marks)
(5 split)
| >)
;; Case C : A syllable ending with other vowel(s). Note that a
;; two-part vowel may be expressed with two vowel marks for
;; backward compatibility.
("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbup]+)"
< |
(1 = =)
(2 set-marks)
(5 = *)
| >)
;; Case E : No explicit vowel nor modifier. If the syllable ends
;; with a consonant, analyse it for reordering in the next step.
;; Otherwise, just identify the syllable without changing anything.
;;1 23 4
("(RH)?(([CRBY]n?H[NJ]?)*[CRBY]n?)(HN|HJ|H)?"
< |
(1 = =)
(2 set-marks)
(4 = *)
| >)
("." =))
*)
;; Set mark #1 (x) at the position where below consonants begin, and
;; mark #2 (y) at the position to which below and above signs will be
;; moved.
(set-marks
(cond
;; Ending with Y.
;;1 2 3 45 6
("([CRBY]n?(H[NJ]?Cn?)*)(H)(([RB]H)*)(Y)$"
(1 = *) ; prebase & base
0x0B7E ; below begin
(4 = *) ; below consonants
0x0B7F ; below end
(6 =) ; YA
(3 =)) ; moved HALANT
;; Ending with R or B.
;;1 2 3 45
("([CRBY]n?(H[NJ]?Cn?)*)(H)(([RB]H)*[RB])$"
(1 = *) ; prebase & base
0x0B7E ; below begin
(4 = *) ; below consonants
(3 =) ; moved HALANT
0x0B7F) ; below end
(".+"
= *
0x0B7E ; below begin
0x0B7F))) ; below end
;; Split two-part dependent vowel signs for canonicalisation.
(split
(cond
((0x0B48) 0x0B47 0x0B56)
((0x0B4B) 0x0B47 0x0B3E)
((0x0B4C) 0x0B47 0x0B57)))
)
;; Step 2 : Move Reph and Matra if necessary. From now on, we care
;; only for those syllables that have been identified in Step 1.
(generator
(0
(cond
;; Special case: a single consonant and a Halant.
(" (.)xy(H[NJ]?) "
|
(1 =)
(2 = *)
|)
;; This is the most generic pattern. It follows Case A-C and a
;; part of Case E in Step 1. Now Mark #1 is used to indicate the
;; critical part that requires pre-base substitution in the
;; following steps.
;; 1 2 3 4 5 6 7 8 9 10 11
(" (RH)?([^ xy]+)x([^ y]*)y(YH)?(m)?(b)?(u)?(p)?(A)?(a)?(HN|HJ|H)? "
|
(5 =) ; [Mpre]
;; We can safely perform Nukta composition here because it does
;; not affect surrounding letters in the syllable. The Akhand
;; ligature operation is also applied here, before applying the
;; half form operation because the Utkal font generates Akhand
;; ligatures directly from the "C H C" sequence, not via the half
;; form.
0x0B7E ; begin Cpre & Cbase
(2 otf:orya=nukt,akhn+) ; {Cpre + H} + Cbase
0x0B7E ; end Cpre & Cbase
(3 otf:orya=blwf+) ; {Cbelow + H}
(6 =) ; [Mbelow]
(7 =) ; [Mabove]
(1 otf:orya=rphf+) ; [Reph]
(4 otf:orya=pstf+) ; [Cpost + H]
(8 =) ; [Mpost]
(9 =) ; [VMabove]
(10 =) ; [VMpost]
(11 = *) ; optional HALANT
|)
;; Syllables that begin with an independent vowel (following up
;; Step 1, Case F). Syllables of this type do not require further
;; modification.
(" (V)(RH)(.*) "
|
(1 =)
(2 otf:orya=rphf+)
(3 = *)
|)
("." =))
*))
;; Step 3 : Now only those syllables that contain the pseudo character
;; x require pre-base substition. Unlike the Mukti font for Bengali,
;; the Utkal font can produce the ligature for "C1 H C2" from
;; "C1halnt" and "C2". If such a ligature is not available, we get a
;; sequence consisting of "C1halant" and "C2", which is satisfactory.
(generator
(0
(cond
(" (.H)J "
|
(1 otf:orya=half+)
|)
(" (.H)N? "
|
(1 otf:orya=haln+)
|)
(" ([^x ]?x)([^x]*)(x[^ ]*) "
|
(1 = *)
(2 pres)
(3 = *)
|)
("." =))
*)
(pres
(cond
("([^NJ]*)(.H)J(.*)"
(1 otf:orya=haln,pres+)
(2 otf:orya=half+)
(3 pres))
("([^N]*)(H)N(.*)"
(1 otf:orya=haln,pres+)
(2 =)
(3 pres))
(".*"
otf:orya=haln,pres+)))
)
;; Step 4 : Mpre/Cpre reordering. If the pre-base substitution in
;; the previous step results in more than one glyph, and there is an
;; Mpre in this syllable, then move the Mpre before the Cbase.
;; i.e. [Mpre]{Kh}Kf... -> {Kh}[Mpre]Kf...
(generator
(0
(cond
(" (.)x(.+)(.)x(.*) "
|
(2 = *)
(1 =)
(3 =)
(4 = *)
|)
(" (.)?x([^x ]*)x([^ ]*) "
|
(1 =)
(2 = *)
(3 = *)
|)
("." =))
*))
;; Step 5 : Substitutions & positioning.
(generator
(0
(cond
(" ([^ ]*) "
(1 otf:orya=vatu,abvs,blws,psts))
("."
[ otf:orya=+ ]))
*))
;; Local Variables:
;; mode: emacs-lisp
;; End: