1 ;; BENG-OTF.flt -- Font Layout Table for Bengali OpenType font
2 ;; Copyright (C) 2004, 2007
3 ;; National Institute of Advanced Industrial Science and Technology (AIST)
4 ;; Registration Number H15PRO112
6 ;; This file is part of the m17n database; a sub-part of the m17n
9 ;; The m17n library is free software; you can redistribute it and/or
10 ;; modify it under the terms of the GNU Lesser General Public License
11 ;; as published by the Free Software Foundation; either version 2.1 of
12 ;; the License, or (at your option) any later version.
14 ;; The m17n library is distributed in the hope that it will be useful,
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 ;; Lesser General Public License for more details.
19 ;; You should have received a copy of the GNU Lesser General Public
20 ;; License along with the m17n library; if not, write to the Free
21 ;; Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
22 ;; Boston, MA 02110-1301, USA.
26 ;;; For Bengali OpenType fonts to draw the Bengali script. Tested with
27 ;;; MuktiNarrow.ttf <http://www.nongnu.org/freebangfont/index.html>
29 ;;; LikhanNormal.otf <http:http://www.stat.wisc.edu/~deepayan/Bengali/WebPage/Font/fonts.html>
31 (font layouter beng-otf nil
32 (font (nil nil unicode-bmp :otf=beng=rphf)))
35 ;; C: consonant (excluding B, Y and R)
36 ;; B: consonant BA (below)
37 ;; Y: consonant YA (post)
38 ;; R: consonant RA (reph, below)
44 ;; t: MATRA (two-part)
45 ;; A: vowel modifier (above)
46 ;; a: vowel modifier (post)
47 ;; V: independent vowel
48 ;; N: ZWNJ (ZERO WIDTH NON-JOINER)
49 ;; J: ZWJ (ZERO WIDTH JOINER)
54 (0x0964 0x0965 ?E) ; DANDA, DOUBLE DANDA
55 (0x0980 0x09FF ?E) ; ELSE
56 (0x0981 ?A) ; SIGN CANDRABINDU (above)
57 (0x0982 0x0983 ?a) ; SIGN ANUSWAR, VISARGA (post)
58 (0x0985 0x098C ?V) ; LETTER A .. VOCALIC L
59 (0x098F 0x0990 ?V) ; LETTER E .. AI
60 (0x0993 0x0994 ?V) ; LETTER O .. AU
61 (0x0995 0x09B9 ?C) ; LETTER KA .. HA
62 (0x09AC ?B) ; LETTER BA
63 (0x09AF ?Y) ; LETTER YA
64 (0x09B0 ?R) ; LETTER RA
65 (0x09BC ?n) ; SIGN NUKTA
66 (0x09BE ?p) ; VOWEL SIGN AA (post)
67 (0x09BF ?m) ; VOWEL SIGN I (pre)
68 (0x09C0 ?p) ; VOWEL SIGN II (post)
69 (0x09C1 0x09C4 ?b) ; VOWEL SIGN U, UU, R, RR (below)
70 (0x09C7 0x09C8 ?m) ; VOWEL SIGN E, AI (pre)
71 (0x09CB 0x09CC ?t) ; VOWEL SIGN O, AU (two-part)
72 (0x09CD ?H) ; SIGN VIRAMA (HASANT)
73 (0x09CE ?C) ; LETTER KHANDA TA
74 (0x09D7 ?p) ; AU LENGTH MARK
75 (0x09DC 0x09DD ?C) ; LETTER RRA, RHA
76 (0x09DF ?C) ; LETTER YYA
77 (0x09E0 0x09E1 ?V) ; LETTER VOCALIC RR, LL
78 (0x09E2 0x09E3 ?b) ; VOWEL SIGN L .. LL (below)
79 (0x09F0 0x09F1 ?C) ; LETTER RR', RR'' (assamese)
80 (0x09FE ?x) ; mark #1 (internal use)
81 (0x09FF ?y) ; mark #2 (internal use)
84 ;; Step 1 : Syllable identification. Recognised syllables are quoted
85 ;; by the pseudo character, which is generated by the command "|" and
86 ;; has the category " " (space).
90 ;; Special case. The sequence "C1 H N C2 m" is reordered as
91 ;; "C1 H m C2", not "m C1 H C2". Besides, "C1 H" is drawn in the
98 ;; Case A-C are for those syllables that end with an explicit vowel
99 ;; mark and/or a vowel modifier. They are divided into three cases
100 ;; for the readability of regular expression. The leading
101 ;; consonant-Hasant repetition is analysed for reordering in the
102 ;; next step. Two-part vowel, if any, is split for
105 ;; Case A : A syllable ending with a vowel modifier.
107 ("(RH)?(([CRBY]n?HJ?)*([CRBY]n?))([mbp]*)(t)?([Aa])"
116 ;; Case B : A syllable ending with a two-part vowel.
118 ("(RH)?(([CRBY]n?HJ?)*([CRBY]n?))(t)"
125 ;; Case C : A syllable ending with other vowel. Note that a
126 ;; two-part vowel may be expressed with two vowel marks for
127 ;; backward compatibility.
129 ("(RH)?(([CRBY]n?HJ?)*([CRBY]n?))([mbp]+)"
136 ;; Case D : Ya-phalaa. Reorder H and Y for the next step.
137 ;; The web page "Unicode FAQ for Indic Scripts and Languages"
138 ;; <http://www.unicode.org/faq/indic.html> says "it should be
139 ;; permissible for the Ya-phalla to be consistently formed by "ZWNJ
148 ;; Case E : No explicit vowel nor modifier. If the syllable ends
149 ;; with a consonant, analyse it for reordering in the next step.
150 ;; Otherwise, just identify the syllable without changing anything.
152 ("(RH)?(([CRBY]n?HJ?)*[CRBY]n?)(HN|HJ|H)?"
159 ;; Case F : Syllables that begin with an independent vowel. An
160 ;; optional HYp sequence appears when this syllable represents the
161 ;; sound "a" in English "bat" (see the FAQ above). If it appears,
162 ;; we reorder the H and Y for the next step.
164 < | (1 =) (2 ("HY(p)" 0x09AF 0x09CD (1 =))) (3 =) | >)
169 ;; Set mark #1 (x) at the position where below consonants begin, and
170 ;; mark #2 (y) at the position to which below and above signs will be
176 ("([CRBY]n?(HJ?Cn?)*)(H)(([RB]H)*)(Y)"
177 (1 = *) ; prebase & base
179 (4 = *) ; below consonants
182 (3 =)) ; moved HASANT
183 ;; Ending with R or B.
185 ("([CRBY]n?(HJ?Cn?)*)(H)(([RB]H)*[RB])"
186 (1 = *) ; prebase & base
188 (4 = *) ; below consonants
196 ;; Split two-part dependent vowel signs for canonicalisation.
199 ((0x09CB) 0x09C7 0x09BE)
200 ((0x09CC) 0x09C7 0x09D7)))
203 ;; Step 2 : Move Reph and Matra if necessary. From now on, we care
204 ;; only for those syllables that have been identified in Step 1.
208 ;; Special case: a single consonant and a Halant.
217 ;; This is the most generic pattern. It follows Cases A, B, C and
218 ;; E in Step 1. Now Mark #1 is used to indicate the critical part
219 ;; that requires pre-base substitution in the following steps.
221 ;; 1 2 3 4 5 6 7 8 9 10
222 (" (RH)?([^ xy]+)x([^ y]*)y(YH)?(m)?(b)?(p)?(A)?(a)?(HJ|H)? "
225 ;; Actually, the nukt feature is not necessary for Bengali because
226 ;; all the necessary Nukta forms are precomposed in the Unicode
227 ;; standard. Even if a Nukta consonant is given in the form of
228 ;; the combination of the base consonant and a Nukta sign, we can
229 ;; safely perform the composition here because it does not affect
230 ;; surrounding letters in the syllable. The Akhand ligature
231 ;; operation is also applied here, before applying the half form
232 ;; operation because the Mukti font generates Akhand ligatures
233 ;; directly from the "C H C" sequence, not via the half form.
234 0x09FE ; begin Cpre & Cbase
235 (2 otf:beng=nukt,akhn) ; {Cpre + H} + Cbase
236 0x09FE ; end Cpre & Cbase
237 (3 otf:beng=blwf) ; {Cbelow + H}
239 (1 otf:beng=rphf) ; [Reph]
241 (4 otf:beng=pstf) ; [Cpost + H]
244 (10 = *) ; optional HASANT
247 ;; Syllables that begin with an independent vowel (following up
248 ;; Step 1, Case F). If a YH sequence exist, it is changed to the
249 ;; post-base form. Syllables of this type do not require further
258 ;; Ya-phalaa (following up Step 1, Case D). Remove N and change YH
259 ;; to the post base form. Syllables of this type do not require
260 ;; further modification.
270 ;; Step 3 : Now only those syllables that contain the pseudo character
271 ;; x require pre-base substition. This is the most complicated part
274 ;; If the sequence "C1 H C2" makes ligature L12, L12 replaces the
275 ;; original sequence.
277 ;; To test the availability of such a ligature, we try to generate it
278 ;; using the pre-base substitute feature, then see whether succeeded
279 ;; or not. In the case of failure, the pre-base feature does not
280 ;; change the original sequence.
282 ;; To create a ligature, the "C1 H" part must be first converted into
283 ;; the half form of C1. Creating the half form of a consonant always
286 ;; ligature(half(C1,H),C2)
287 ;; ==> ligature(C1half,C2)
291 ;; If the ligature is not available, the "C1 H" part must be converted
292 ;; into the _Halant_ (not half) form of C1. However, there is no way
293 ;; to reconvert C1half into C1halant nor to revert back to "C1 H".
294 ;; Thus we duplicate the critical part in two different forms so that
295 ;; we can select the appropriate one in the next step. The pseudo
296 ;; character x is used to indicate the boundaries.
298 ;; ... C1 H C2 ... ==> ... x C1halant C2 x L12 x ...
300 ;; If the length of the L12 part is one, ligature generation was
301 ;; successful. In this case we wipe out the duplicated C1halant and
302 ;; C2. Otherwise we remove L12.
304 ;; In very few cases (I found only one in the Mukti font), the "C1 H"
305 ;; part need to be converted into C1halant (instead of C1half) to make
306 ;; a ligature with C2. So when we try to generate a ligature form, we
307 ;; apply the GSUB features "half", "haln" and "pres" in this order.
310 ;; C: consonant (excluding B, Y and R)
312 ;; N: ZWNJ (ZERO WIDTH NON-JOINER)
313 ;; J: ZWJ (ZERO WIDTH JOINER)
318 (0x0964 0x0965 ?E) ; DANDA, DOUBLE DANDA
319 (0x0980 0x09FF ?E) ; ELSE
320 (0x09CD ?H) ; SIGN VIRAMA (HASANT)
321 (0x0995 ?K) ; LETTER KA
322 (0x09B7 ?S) ; LETTER SSA
323 (0x09A3 ?M) ; LETTER NNA
324 (0x09AE ?M) ; LETTER MA
325 (0x09FE ?x) ; mark #1 (internal use)
332 ;; One pre-base and base.
334 (" ([^x ]*)x((.H)([^J]))(H)?x([^ ]*) "
338 (3 otf:beng=haln) ; C1halant
341 (2 otf:beng=half,haln,pres) ; ligature result
347 ;; One pre-base with ZWJ. According to the Unicode FAQ, the half
348 ;; form is forced in this case. So we fake as if ligature
349 ;; generation was failed.
350 (" ([^x ]*)x(.H)J(.)?x([^ ]*) "
354 (2 otf:beng=half) ; C1half
357 0x09FD ; pseudo result
358 0x09FD ; pseudo result
363 ;; One pre-base possibly with ZWNJ. Similar to above.
364 (" ([^x ]*)x(.H)N?(.)?x([^ ]*) "
368 (2 otf:beng=haln) ; C1halant
371 0x09FD ; pseudo result
372 0x09FD ; pseudo result
377 ;; Standalone base. There is nothing more to do.
378 (" ([^x ]*)x(.)x([^ ]*) "
385 ;; KA-SSA-NNA and KA-SSA-MA are the only pre-base ligatures that
386 ;; consist of three consonants.
388 (" ([^x ]*)x((KH)(SH)(M))(H)?x([^ ]*) "
392 (3 otf:beng=haln) ; KAhalant
393 (4 otf:beng=haln) ; SSAhalant
396 (2 otf:beng=half,haln,pres) ; ligature result
402 ;; Two or more pre-bases plus base. Give up. Convert all
403 ;; pre-bases into halant form.
405 (" ([^x ]*)x(([^x]H[JN]?)+)([^x])?x([^ ]*) "
409 (2 force-haln) ; halant forms
412 0x09FD ; pseudo result
413 0x09FD ; pseudo result
421 ;; This is to remove ZWNJ and ZWJ. The half-form-force-effect of ZWJ
422 ;; is ignored. Sorry.
432 ;; Step 4 : Select the appropriate representation. Only those
433 ;; syllables that contain the virtual character x require
438 ;; Only one glyph in the ligature section (between the second and
439 ;; the third x). It means a ligature was successfully generated.
440 ;; C1halant and C2 (between the first and second x) are removed.
441 (" ([^x ]*)x[^x]+x(.)x([^ ]*) "
448 ;; Otherwise halant and base forms are used. The failed ligature
450 (" ([^x ]*)x([^x]+)x[^x]+x([^ ]*) "
457 ;; No need to care the other cases.
461 ;; Step 5 : Select appropriate glyph variants for fine adjustments.
462 ;; Now the syllable boundary marks are removed so that the final step
463 ;; can find word boundaries.
468 (1 otf:beng=blws,abvs,psts,vatu))
474 ;; Step 6 : Word initial substitute. As the syllable boundaries have
475 ;; been eliminated in the previous step, this rule is applied to a run
476 ;; of Bengali glyphs, i.e. word by word. We finally apply the init
477 ;; feature to the word initial gylphs to get the final result.