1 ;; ORYA-OTF.flt -- Font Layout Table for Oriya OpenType font
2 ;; Copyright (C) 2004, 2007
3 ;; National Institute of Advanced Industrial Science and Technology (AIST)
4 ;; Registration Number H15PRO112
6 ;; This file is part of the m17n database; a sub-part of the m17n
9 ;; The m17n library is free software; you can redistribute it and/or
10 ;; modify it under the terms of the GNU Lesser General Public License
11 ;; as published by the Free Software Foundation; either version 2.1 of
12 ;; the License, or (at your option) any later version.
14 ;; The m17n library is distributed in the hope that it will be useful,
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 ;; Lesser General Public License for more details.
19 ;; You should have received a copy of the GNU Lesser General Public
20 ;; License along with the m17n library; if not, write to the Free
21 ;; Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
22 ;; Boston, MA 02110-1301, USA.
26 ;;; For Oriya OpenType fonts to draw the Oriya script. Tested with
27 ;;; utkalm.ttf <http://oriya.sarovar.org/download/utkalm.ttf.gz>
29 (font layouter orya-otf nil
30 (font (nil nil unicode-bmp :otf=orya=rphf)))
33 ;; C: consonant (excluding Y and R)
34 ;; R: consonant RA (reph, below)
35 ;; B: consonant (below)
36 ;; Y: consonant YA, YYA (post)
43 ;; t: MATRA (two-part)
44 ;; A: vowel modifier (above)
45 ;; a: vowel modifier (post)
46 ;; V: independent vowel
47 ;; N: ZWNJ (ZERO WIDTH NON-JOINER)
48 ;; J: ZWJ (ZERO WIDTH JOINER)
53 (0x0664 0x0665 ?E) ; DANDA, DOUBLE DANDA
54 (0x0B00 0x0B7F ?E) ; ELSE
55 (0x0B01 ?A) ; SIGN CANDRABINDU (above)
56 (0x0B02 0x0B03 ?a) ; SIGN ANUSWAR, VISARGA (post)
57 (0x0B05 0x0B0C ?V) ; LETTER A .. VOCALIC L
58 (0x0B0F 0x0B10 ?V) ; LETTER E .. AI
59 (0x0B13 0x0B14 ?V) ; LETTER O .. AU
60 (0x0B15 0x0B39 ?C) ; LETTER KA .. HA
61 (0x0B24 ?B) ; LETTER TA
62 (0x0B28 ?B) ; LETTER NA
63 (0x0B2C ?B) ; LETTER BA
64 (0x0B2D ?B) ; LETTER BHA
65 (0x0B2E ?B) ; LETTER MA
66 (0x0B2F ?Y) ; LETTER YA
67 (0x0B30 ?R) ; LETTER RA
68 (0x0B32 ?B) ; LETTER LA
69 (0x0B33 ?B) ; LETTER LLA
70 (0x0B35 ?B) ; LETTER VA
71 (0x0B3C ?n) ; SIGN NUKTA
72 (0x0B3E ?p) ; VOWEL SIGN AA (post)
73 (0x0B3F ?u) ; VOWEL SIGN I (above)
74 (0x0B40 ?p) ; VOWEL SIGN II (post)
75 (0x0B41 0x0B43 ?b) ; VOWEL SIGN U, UU, R (below)
76 (0x0B47 ?m) ; VOWEL SIGN E (pre)
77 (0x0B48 0x0B4C ?t) ; VOWEL SIGN AI, O, AU (two-part)
78 (0x0B4D ?H) ; SIGN VIRAMA (HALANT)
79 (0x0B56 ?u) ; AI LENGTH MARK
80 (0x0B57 ?p) ; AU LENGTH MARK
81 (0x0B5C 0x0B5D ?C) ; LETTER RRA, RHA
82 (0x0B5F ?Y) ; LETTER YYA
83 (0x0B60 0x0B61 ?V) ; LETTER VOCALIC RR, LL
84 (0x0B71 ?C) ; LETTER WA
85 (0x0B7E ?x) ; mark #1 (internal use)
86 (0x0B7F ?y) ; mark #2 (internal use)
89 ;; Step 1 : Syllable identification. Recognised syllables are quoted
90 ;; by the pseudo character, which is generated by the command "|" and
91 ;; has the category " " (space).
95 ;; Case F : Syllables containing an independent vowel.
104 ;; Case A-C are for those syllables that end with an explicit vowel
105 ;; mark and/or a vowel modifier. They are divided into three cases
106 ;; for readability of the regular expressions. The leading
107 ;; consonant-Halant repetition is analysed for reordering in the
108 ;; next step. A two-part vowel, if any, is split for
111 ;; Case A : A syllable ending with a vowel modifier.
112 ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbup]*)(t)?([Aa])"
121 ;; Case B : A syllable ending with a two-part vowel.
122 ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))(t)"
129 ;; Case C : A syllable ending with other vowel(s). Note that a
130 ;; two-part vowel may be expressed with two vowel marks for
131 ;; backward compatibility.
132 ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbup]+)"
139 ;; Case E : No explicit vowel nor modifier. If the syllable ends
140 ;; with a consonant, analyse it for reordering in the next step.
141 ;; Otherwise, just identify the syllable without changing anything.
143 ("(RH)?(([CRBY]n?H[NJ]?)*[CRBY]n?)(HN|HJ|H)?"
153 ;; Set mark #1 (x) at the position where below consonants begin, and
154 ;; mark #2 (y) at the position to which below and above signs will be
160 ("([CRBY]n?(H[NJ]?Cn?)*)(H)(([RB]H)*)(Y)$"
161 (1 = *) ; prebase & base
163 (4 = *) ; below consonants
166 (3 =)) ; moved HALANT
167 ;; Ending with R or B.
169 ("([CRBY]n?(H[NJ]?Cn?)*)(H)(([RB]H)*[RB])$"
170 (1 = *) ; prebase & base
172 (4 = *) ; below consonants
178 0x0B7F))) ; below end
180 ;; Split two-part dependent vowel signs for canonicalisation.
183 ((0x0B48) 0x0B47 0x0B56)
184 ((0x0B4B) 0x0B47 0x0B3E)
185 ((0x0B4C) 0x0B47 0x0B57)))
188 ;; Step 2 : Move Reph and Matra if necessary. From now on, we care
189 ;; only for those syllables that have been identified in Step 1.
193 ;; Special case: a single consonant and a Halant.
200 ;; This is the most generic pattern. It follows Case A-C and a
201 ;; part of Case E in Step 1. Now Mark #1 is used to indicate the
202 ;; critical part that requires pre-base substitution in the
205 ;; 1 2 3 4 5 6 7 8 9 10 11
206 (" (RH)?([^ xy]+)x([^ y]*)y(YH)?(m)?(b)?(u)?(p)?(A)?(a)?(HN|HJ|H)? "
209 ;; We can safely perform Nukta composition here because it does
210 ;; not affect surrounding letters in the syllable. The Akhand
211 ;; ligature operation is also applied here, before applying the
212 ;; half form operation because the Utkal font generates Akhand
213 ;; ligatures directly from the "C H C" sequence, not via the half
215 0x0B7E ; begin Cpre & Cbase
216 (2 otf:orya=nukt,akhn+) ; {Cpre + H} + Cbase
217 0x0B7E ; end Cpre & Cbase
218 (3 otf:orya=blwf+) ; {Cbelow + H}
221 (1 otf:orya=rphf+) ; [Reph]
222 (4 otf:orya=pstf+) ; [Cpost + H]
226 (11 = *) ; optional HALANT
229 ;; Syllables that begin with an independent vowel (following up
230 ;; Step 1, Case F). Syllables of this type do not require further
242 ;; Step 3 : Now only those syllables that contain the pseudo character
243 ;; x require pre-base substition. Unlike the Mukti font for Bengali,
244 ;; the Utkal font can produce the ligature for "C1 H C2" from
245 ;; "C1halant" and "C2". If such a ligature is not available, we get a
246 ;; sequence consisting of "C1halant" and "C2", which is satisfactory.
259 (" ([^x ]?x)([^x ]*)(x[^ ]*) "
271 (1 otf:orya=haln,pres+)
275 (1 otf:orya=haln,pres+)
279 otf:orya=haln,pres+)))
283 ;; Step 4 : Mpre/Cpre reordering. If the pre-base substitution in
284 ;; the previous step results in more than one glyph, and there is an
285 ;; Mpre in this syllable, then move the Mpre before the Cbase.
286 ;; i.e. [Mpre]{Kh}Kf... -> {Kh}[Mpre]Kf...
291 (" ([^x ])x([^x ]+)([^x ])x([^x ]*) "
298 (" ([^x ])?x([^x ]*)x([^ ]*) "
307 ;; Step 5 : Substitutions & positioning.
313 ;; FIXME : The pres below is for the TTA ligature in the Utkal
314 ;; font. It should be removed once the font is updated.
315 (1 otf:orya=vatu,abvs,blws,psts,pres))