;; ORYA-OTF.flt -- Font Layout Table for Oriya OpenType font ;; Copyright (C) 2004 ;; National Institute of Advanced Industrial Science and Technology (AIST) ;; Registration Number H15PRO112 ;; This file is part of the m17n database; a sub-part of the m17n ;; library. ;; The m17n library is free software; you can redistribute it and/or ;; modify it under the terms of the GNU Lesser General Public License ;; as published by the Free Software Foundation; either version 2.1 of ;; the License, or (at your option) any later version. ;; The m17n library is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; Lesser General Public License for more details. ;; You should have received a copy of the GNU Lesser General Public ;; License along with the m17n library; if not, write to the Free ;; Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ;; Boston, MA 02110-1301, USA. ;;;
  • ORYA-OTF.flt ;;; ;;; For Oriya OpenType fonts to draw the Oriya script. Tested with ;;; utkalm.ttf (category ;; C: consonant (excluding Y and R) ;; R: consonant RA (reph, below) ;; B: consonant (below) ;; Y: consonant YA, YYA (post) ;; n: NUKTA ;; H: HALANT ;; m: MATRA (pre) ;; u: MATRA (above) ;; b: MATRA (below) ;; p: MATRA (post) ;; t: MATRA (two-part) ;; A: vowel modifier (above) ;; a: vowel modifier (post) ;; V: independent vowel ;; N: ZWNJ (ZERO WIDTH NON-JOINER) ;; J: ZWJ (ZERO WIDTH JOINER) ;; E: ELSE ;; (0x200C ?N) ; ZWNJ (0x200D ?J) ; ZWJ (0x0664 0x0665 ?E) ; DANDA, DOUBLE DANDA (0x0B00 0x0B7F ?E) ; ELSE (0x0B01 ?A) ; SIGN CANDRABINDU (above) (0x0B02 0x0B03 ?a) ; SIGN ANUSWAR, VISARGA (post) (0x0B05 0x0B0C ?V) ; LETTER A .. VOCALIC L (0x0B0F 0x0B10 ?V) ; LETTER E .. AI (0x0B13 0x0B14 ?V) ; LETTER O .. AU (0x0B15 0x0B39 ?C) ; LETTER KA .. HA (0x0B24 ?B) ; LETTER TA (0x0B28 ?B) ; LETTER NA (0x0B2C ?B) ; LETTER BA (0x0B2D ?B) ; LETTER BHA (0x0B2E ?B) ; LETTER MA (0x0B2F ?Y) ; LETTER YA (0x0B30 ?R) ; LETTER RA (0x0B32 ?B) ; LETTER LA (0x0B33 ?B) ; LETTER LLA (0x0B35 ?B) ; LETTER VA (0x0B3C ?n) ; SIGN NUKTA (0x0B3E ?p) ; VOWEL SIGN AA (post) (0x0B3F ?u) ; VOWEL SIGN I (above) (0x0B40 ?p) ; VOWEL SIGN II (post) (0x0B41 0x0B43 ?b) ; VOWEL SIGN U, UU, R (below) (0x0B47 ?m) ; VOWEL SIGN E (pre) (0x0B48 0x0B4C ?t) ; VOWEL SIGN AI, O, AU (two-part) (0x0B4D ?H) ; SIGN VIRAMA (HALANT) (0x0B56 ?u) ; AI LENGTH MARK (0x0B57 ?p) ; AU LENGTH MARK (0x0B5C 0x0B5D ?C) ; LETTER RRA, RHA (0x0B5F ?Y) ; LETTER YYA (0x0B60 0x0B61 ?V) ; LETTER VOCALIC RR, LL (0x0B71 ?C) ; LETTER WA (0x0B7E ?x) ; mark #1 (internal use) (0x0B7F ?y) ; mark #2 (internal use) ) ;; Step 1 : Syllable identification. Recognised syllables are quoted ;; by the pseudo character, which is generated by the command "|" and ;; has the category " " (space). (generator (0 (cond ;; Case F : Syllables containing an independent vowel. ("(RH)?(V)(a)?(A)?" < | (2 =) (1 = =) (3 =) (4 =) | >) ;; Case A-C are for those syllables that end with an explicit vowel ;; mark and/or a vowel modifier. They are divided into three cases ;; for readability of the regular expressions. The leading ;; consonant-Halant repetition is analysed for reordering in the ;; next step. A two-part vowel, if any, is split for ;; canonicalisation. ;; Case A : A syllable ending with a vowel modifier. ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbup]*)(t)?([Aa])" < | (1 = =) (2 set-marks) (5 = *) (6 split) (7 =) | >) ;; Case B : A syllable ending with a two-part vowel. ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))(t)" < | (1 = =) (2 set-marks) (5 split) | >) ;; Case C : A syllable ending with other vowel(s). Note that a ;; two-part vowel may be expressed with two vowel marks for ;; backward compatibility. ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbup]+)" < | (1 = =) (2 set-marks) (5 = *) | >) ;; Case E : No explicit vowel nor modifier. If the syllable ends ;; with a consonant, analyse it for reordering in the next step. ;; Otherwise, just identify the syllable without changing anything. ;;1 23 4 ("(RH)?(([CRBY]n?H[NJ]?)*[CRBY]n?)(HN|HJ|H)?" < | (1 = =) (2 set-marks) (4 = *) | >) ("." =)) *) ;; Set mark #1 (x) at the position where below consonants begin, and ;; mark #2 (y) at the position to which below and above signs will be ;; moved. (set-marks (cond ;; Ending with Y. ;;1 2 3 45 6 ("([CRBY]n?(H[NJ]?Cn?)*)(H)(([RB]H)*)(Y)$" (1 = *) ; prebase & base 0x0B7E ; below begin (4 = *) ; below consonants 0x0B7F ; below end (6 =) ; YA (3 =)) ; moved HALANT ;; Ending with R or B. ;;1 2 3 45 ("([CRBY]n?(H[NJ]?Cn?)*)(H)(([RB]H)*[RB])$" (1 = *) ; prebase & base 0x0B7E ; below begin (4 = *) ; below consonants (3 =) ; moved HALANT 0x0B7F) ; below end (".+" = * 0x0B7E ; below begin 0x0B7F))) ; below end ;; Split two-part dependent vowel signs for canonicalisation. (split (cond ((0x0B48) 0x0B47 0x0B56) ((0x0B4B) 0x0B47 0x0B3E) ((0x0B4C) 0x0B47 0x0B57))) ) ;; Step 2 : Move Reph and Matra if necessary. From now on, we care ;; only for those syllables that have been identified in Step 1. (generator (0 (cond ;; Special case: a single consonant and a Halant. (" (.)xy(H[NJ]?) " | (1 =) (2 = *) |) ;; This is the most generic pattern. It follows Case A-C and a ;; part of Case E in Step 1. Now Mark #1 is used to indicate the ;; critical part that requires pre-base substitution in the ;; following steps. ;; 1 2 3 4 5 6 7 8 9 10 11 (" (RH)?([^ xy]+)x([^ y]*)y(YH)?(m)?(b)?(u)?(p)?(A)?(a)?(HN|HJ|H)? " | (5 =) ; [Mpre] ;; We can safely perform Nukta composition here because it does ;; not affect surrounding letters in the syllable. The Akhand ;; ligature operation is also applied here, before applying the ;; half form operation because the Utkal font generates Akhand ;; ligatures directly from the "C H C" sequence, not via the half ;; form. 0x0B7E ; begin Cpre & Cbase (2 otf:orya=nukt,akhn+) ; {Cpre + H} + Cbase 0x0B7E ; end Cpre & Cbase (3 otf:orya=blwf+) ; {Cbelow + H} (6 =) ; [Mbelow] (7 =) ; [Mabove] (1 otf:orya=rphf+) ; [Reph] (4 otf:orya=pstf+) ; [Cpost + H] (8 =) ; [Mpost] (9 =) ; [VMabove] (10 =) ; [VMpost] (11 = *) ; optional HALANT |) ;; Syllables that begin with an independent vowel (following up ;; Step 1, Case F). Syllables of this type do not require further ;; modification. (" (V)(RH)(.*) " | (1 =) (2 otf:orya=rphf+) (3 = *) |) ("." =)) *)) ;; Step 3 : Now only those syllables that contain the pseudo character ;; x require pre-base substition. Unlike the Mukti font for Bengali, ;; the Utkal font can produce the ligature for "C1 H C2" from ;; "C1halant" and "C2". If such a ligature is not available, we get a ;; sequence consisting of "C1halant" and "C2", which is satisfactory. (generator (0 (cond (" (.H)J " | (1 otf:orya=half+) |) (" (.H)N? " | (1 otf:orya=haln+) |) (" ([^x ]?x)([^x ]*)(x[^ ]*) " | (1 = *) (2 pres) (3 = *) |) ("." =)) *) (pres (cond ("([^NJ]*)(.H)J(.*)" (1 otf:orya=haln,pres+) (2 otf:orya=half+) (3 pres)) ("([^N]*)(H)N(.*)" (1 otf:orya=haln,pres+) (2 =) (3 pres)) (".*" otf:orya=haln,pres+))) ) ;; Step 4 : Mpre/Cpre reordering. If the pre-base substitution in ;; the previous step results in more than one glyph, and there is an ;; Mpre in this syllable, then move the Mpre before the Cbase. ;; i.e. [Mpre]{Kh}Kf... -> {Kh}[Mpre]Kf... (generator (0 (cond (" ([^x ])x([^x ]+)([^x ])x([^x ]*) " | (2 = *) (1 =) (3 =) (4 = *) |) (" ([^x ])?x([^x ]*)x([^ ]*) " | (1 =) (2 = *) (3 = *) |) ("." =)) *)) ;; Step 5 : Substitutions & positioning. (generator (0 (cond (" ([^ ]*) " ;; FIXME : The pres below is for the TTA ligature in the Utkal ;; font. It should be removed once the font is updated. (1 otf:orya=vatu,abvs,blws,psts,pres)) ("." [ otf:orya=+ ])) *)) ;; Local Variables: ;; mode: emacs-lisp ;; End: