;; BENG-OTF.flt -- Font Layout Table for Bengali OpenType font
;; Copyright (C) 2004
;; National Institute of Advanced Industrial Science and Technology (AIST)
;; Registration Number H15PRO112
;; This file is part of the m17n database; a sub-part of the m17n
;; library.
;; The m17n library is free software; you can redistribute it and/or
;; modify it under the terms of the GNU Lesser General Public License
;; as published by the Free Software Foundation; either version 2.1 of
;; the License, or (at your option) any later version.
;; The m17n library is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; Lesser General Public License for more details.
;; You should have received a copy of the GNU Lesser General Public
;; License along with the m17n library; if not, write to the Free
;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
;; 02111-1307, USA.
;;;
BENG-OTF.flt
;;;
;;; For Bengali OpenType fonts to draw the Bengali script. Tested with
;;; MuktiNarrow.ttf
;;; and
;;; LikhanNormal.otf
(category
;; C: consonant (excluding B, Y and R)
;; B: consonant BA (below)
;; Y: consonant YA (post)
;; R: consonant RA (reph, below)
;; n: NUKTA
;; H: HALANT
;; m: MATRA (pre)
;; b: MATRA (below)
;; p: MATRA (post)
;; t: MATRA (two-part)
;; A: vowel modifier (above)
;; a: vowel modifier (post)
;; V: independent vowel
;; N: ZWNJ (ZERO WIDTH NON-JOINER)
;; J: ZWJ (ZERO WIDTH JOINER)
;; E: ELSE
;;
(0x200C ?N) ; ZWNJ
(0x200D ?J) ; ZWJ
(0x0964 0x0965 ?E) ; DANDA, DOUBLE DANDA
(0x0980 0x09FF ?E) ; ELSE
(0x0981 ?A) ; SIGN CANDRABINDU (above)
(0x0982 0x0983 ?a) ; SIGN ANUSWAR, VISARGA (post)
(0x0985 0x098C ?V) ; LETTER A .. VOCALIC L
(0x098F 0x0990 ?V) ; LETTER E .. AI
(0x0993 0x0994 ?V) ; LETTER O .. AU
(0x0995 0x09B9 ?C) ; LETTER KA .. HA
(0x09AC ?B) ; LETTER BA
(0x09AF ?Y) ; LETTER YA
(0x09B0 ?R) ; LETTER RA
(0x09BC ?n) ; SIGN NUKTA
(0x09BE ?p) ; VOWEL SIGN AA (post)
(0x09BF ?m) ; VOWEL SIGN I (pre)
(0x09C0 ?p) ; VOWEL SIGN II (post)
(0x09C1 0x09C4 ?b) ; VOWEL SIGN U, UU, R, RR (below)
(0x09C7 0x09C8 ?m) ; VOWEL SIGN E, AI (pre)
(0x09CB 0x09CC ?t) ; VOWEL SIGN O, AU (two-part)
(0x09CD ?H) ; SIGN VIRAMA (HASANT)
(0x09CE ?C) ; LETTER KHANDA TA
(0x09D7 ?p) ; AU LENGTH MARK
(0x09DC 0x09DD ?C) ; LETTER RRA, RHA
(0x09DF ?C) ; LETTER YYA
(0x09E0 0x09E1 ?V) ; LETTER VOCALIC RR, LL
(0x09E2 0x09E3 ?b) ; VOWEL SIGN L .. LL (below)
(0x09F0 0x09F1 ?C) ; LETTER RR', RR'' (assamese)
(0x09FE ?x) ; mark #1 (internal use)
(0x09FF ?y) ; mark #2 (internal use)
)
;; Step 1 : Syllable identification. Recognised syllables are quoted
;; by the pseudo character, which is generated by the command "|" and
;; has the category " " (space).
(generator
(0
(cond
;; Case A-C are for those syllables that end with an explicit vowel
;; mark and/or a vowel modifier. They are divided into three cases
;; for the readability of regular expression. The leading
;; consonant-Hasant repetition is analysed for reordering in the
;; next step. Two-part vowel, if any, is split for
;; canonicalisation.
;; Case A : A syllable ending with a vowel modifier.
;;1 23 4 5 6 7
("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbp]*)(t)?([Aa])"
< |
(1 = =)
(2 set-marks)
(5 = *)
(6 split)
(7 =)
| >)
;; Case B : A syllable ending with a two-part vowel.
;;1 23 4 5
("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))(t)"
< |
(1 = =)
(2 set-marks)
(5 split)
| >)
;; Case C : A syllable ending with other vowel. Note that a
;; two-part vowel may be expressed with two vowel marks for
;; backward compatibility.
;;1 23 4 5
("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbp]+)"
< |
(1 = =)
(2 set-marks)
(5 = *)
| >)
;; Case D : Ya-phalaa. Reorder H and Y for the next step.
;; The web page "Unicode FAQ for Indic Scripts and Languages"
;; says "it should be
;; permissible for the Ya-phalla to be consistently formed by "ZWNJ
;; + VIRAMA + YA".
("([CRBY]n?N)(H)(Y)"
< |
(1 = *)
(3 =)
(2 =)
| >)
;; Case E : No explicit vowel nor modifier. If the syllable ends
;; with a consonant, analyse it for reordering in the next step.
;; Otherwise, just identify the syllable without changing anything.
;;1 23 4
("(RH)?(([CRBY]n?H[NJ]?)*[CRBY]n?)(HN|HJ|H)?"
< |
(1 = =)
(2 set-marks)
(4 = *)
| >)
;; Case F : Syllables that begin with an independent vowel. An
;; optional HYp sequence appears when this syllable represents the
;; sound "a" in English "bat" (see the FAQ above). If it appears,
;; we reorder the H and Y for the next step.
("(V)(HYp)?([aA])?"
< | (1 =) (2 ("HY(p)" 0x09AF 0x09CD (1 =))) (3 =) | >)
("." =))
*)
;; Set mark #1 (x) at the position where below consonants begin, and
;; mark #2 (y) at the position to which below and above signs will be
;; moved.
(set-marks
(cond
;; Ending with Y.
;;1 2 3 45 6
("([CRBY]n?(H[NJ]?Cn?)*)(H)(([RB]H)*)(Y)"
(1 = *) ; prebase & base
0x09FE ; mark #1
(4 = *) ; below consonants
0x09FF ; mark #2
(6 =) ; YA
(3 =)) ; moved HASANT
;; Ending with R or B.
;;1 2 3 45
("([CRBY]n?(H[NJ]?Cn?)*)(H)(([RB]H)*[RB])"
(1 = *) ; prebase & base
0x09FE ; mark #1
(4 = *) ; below consonants
(3 =) ; moved HASANT
0x09FF) ; mark #2
(".+"
= *
0x09FE ; mark #1
0x09FF))) ; mark #2
;; Split two-part dependent vowel signs for canonicalisation.
(split
(cond
((0x09CB) 0x09C7 0x09BE)
((0x09CC) 0x09C7 0x09D7)))
)
;; Step 2 : Move Reph and Matra if necessary. From now on, we care
;; only for those syllables that have been identified in Step 1.
(generator
(0
(cond
;; Special case: a single consonant and a Halant.
(" (.)xy(H[NJ]?) "
|
0x09FE
(1 =)
(2 = *)
0x09FE
|)
;; This is the most generic pattern. It follows Cases A, B, C and
;; E in Step 1. Now Mark #1 is used to indicate the critical part
;; that requires pre-base substitution in the following steps.
;; 1 2 3 4 5 6 7 8 9 10
(" (RH)?([^ xy]+)x([^ y]*)y(YH)?(m)?(b)?(p)?(A)?(a)?(HN|HJ|H)? "
|
(5 =) ; [Mpre]
;; Actually, the nukt feature is not necessary for Bengali because
;; all the necessary Nukta forms are precomposed in the Unicode
;; standard. Even if a Nukta consonant is given in the form of
;; the combination of the base consonant and a Nukta sign, we can
;; safely perform the composition here because it does not affect
;; surrounding letters in the syllable. The Akhand ligature
;; operation is also applied here, before applying the half form
;; operation because the Mukti font generates Akhand ligatures
;; directly from the "C H C" sequence, not via the half form.
0x09FE ; begin Cpre & Cbase
(2 otf:beng=nukt,akhn) ; {Cpre + H} + Cbase
0x09FE ; end Cpre & Cbase
(3 otf:beng=blwf) ; {Cbelow + H}
(6 =) ; [Mbelow]
(1 otf:beng=rphf) ; [Reph]
(8 =) ; [VMabove]
(4 otf:beng=pstf) ; [Cpost + H]
(7 =) ; [Mpost]
(9 =) ; [VMpost]
(10 = *) ; optional HASANT
|)
;; Syllables that begin with an independent vowel (following up
;; Step 1, Case F). If a YH sequence exist, it is changed to the
;; post-base form. Syllables of this type do not require further
;; modification.
(" (V)(YH)(.*) "
|
(1 =)
(2 otf:beng=pstf)
(3 = *)
|)
;; Ya-phalaa (following up Step 1, Case D). Remove N and change YH
;; to the post base form. Syllables of this type do not require
;; further modification.
(" ([CBRY]n?)N(YH) "
|
(1 =)
(2 otf:beng=pstf)
|)
("." =))
*))
;; Step 3 : Now only those syllables that contain the pseudo character
;; x require pre-base substition. This is the most complicated part
;; in this FLT.
;; If the sequence "C1 H C2" makes ligature L12, L12 replaces the
;; original sequence.
;; To test the availability of such a ligature, we try to generate it
;; using the pre-base substitute feature, then see whether succeeded
;; or not. In the case of failure, the pre-base feature does not
;; change the original sequence.
;; To create a ligature, the "C1 H" part must be first converted into
;; the half form of C1. Creating the half form of a consonant always
;; succeeds.
;; ligature(half(C1,H),C2)
;; ==> ligature(C1half,C2)
;; ==> L12 ; success
;; C1half C2 ; fail
;; If the ligature is not available, the "C1 H" part must be converted
;; into the _Halant_ (not half) form of C1. However, there is no way
;; to reconvert C1half into C1halant nor to revert back to "C1 H".
;; Thus we duplicate the critical part in two different forms so that
;; we can select the appropriate one in the next step. The pseudo
;; character x is used to indicate the boundaries.
;; ... C1 H C2 ... ==> ... x C1halant C2 x L12 x ...
;; If the length of the L12 part is one, ligature generation was
;; successful. In this case we wipe out the duplicated C1halant and
;; C2. Otherwise we remove L12.
;; In very few cases (I found only one in the Mukti font), the "C1 H"
;; part need to be converted into C1halant (instead of C1half) to make
;; a ligature with C2. So when we try to generate a ligature form, we
;; apply the GSUB features "half", "haln" and "pres" in this order.
(category
;; C: consonant (excluding B, Y and R)
;; H: HALANT
;; N: ZWNJ (ZERO WIDTH NON-JOINER)
;; J: ZWJ (ZERO WIDTH JOINER)
;; E: ELSE
;;
(0x200C ?N) ; ZWNJ
(0x200D ?J) ; ZWJ
(0x0964 0x0965 ?E) ; DANDA, DOUBLE DANDA
(0x0980 0x09FF ?E) ; ELSE
(0x09CD ?H) ; SIGN VIRAMA (HASANT)
(0x0995 ?K) ; LETTER KA
(0x09B7 ?S) ; LETTER SSA
(0x09A3 ?M) ; LETTER NNA
(0x09AE ?M) ; LETTER MA
(0x09FE ?x) ; mark #1 (internal use)
)
(generator
(0
(cond
;; One pre-base and base.
;; 1 23 4 5 6
(" ([^x ]*)x((.H)([^NJ]))(H)?x([^ ]*) "
|
(1 = *)
0x09FE ; x
(3 otf:beng=haln) ; C1halant
(4 =) ; C2
0x09FE ; x
(2 otf:beng=half,haln,pres) ; ligature result
0x09FE ; x
(5 =)
(6 = *)
|)
;; One pre-base with ZWJ. According to the Unicode FAQ, the half
;; form is forced in this case. So we fake as if ligature
;; generation was failed.
(" ([^x ]*)x(.H)J(.)?x([^ ]*) "
|
(1 = *)
0x09FE ; x
(2 otf:beng=half) ; C1half
(3 =) ; C2
0x09FE ; x
0x09FD ; pseudo result
0x09FD ; pseudo result
0x09FE ; x
(4 = *)
|)
;; One pre-base possibly with ZWNJ. Similar to above.
(" ([^x ]*)x(.H)N?(.)?x([^ ]*) "
|
(1 = *)
0x09FE ; x
(2 otf:beng=haln) ; C1halant
(3 =) ; C2
0x09FE ; x
0x09FD ; pseudo result
0x09FD ; pseudo result
0x09FE ; x
(4 = *)
|)
;; Standalone base. There is nothing more to do.
(" ([^x ]*)x(.)x([^ ]*) "
|
(1 = *)
(2 =)
(3 = *)
|)
;; KA-SSA-NNA and KA-SSA-MA are the only pre-base ligatures that
;; consist of three consonants.
;; 1 23 4 5 6 7
(" ([^x ]*)x((KH)(SH)(M))(H)?x([^ ]*) "
|
(1 = *)
0x09FE ; x
(3 otf:beng=haln) ; KAhalant
(4 otf:beng=haln) ; SSAhalant
(5 =) ; NNA or MA
0x09FE ; x
(2 otf:beng=half,haln,pres) ; ligature result
0x09FE ; x
(6 =)
(7 = *)
|)
;; Two or more pre-bases plus base. Give up. Convert all
;; pre-bases into halant form.
;; 1 23 4 5
(" ([^x ]*)x(([^x]H[JN]?)+)([^x])?x([^ ]*) "
|
(1 = *)
0x09FE ; x
(2 force-haln) ; halant forms
(4 =) ; full form
0x09FE ; x
0x09FD ; pseudo result
0x09FD ; pseudo result
0x09FE ; x
(5 = *)
|)
("." =))
*)
;; This is to remove ZWNJ and ZWJ. The half-form-force-effect of ZWJ
;; is ignored. Sorry.
(force-haln
(cond
("([^JN]*)[JN](.*)"
(1 otf:beng=haln)
(2 force-haln))
(".+"
otf:beng=haln)))
)
;; Step 4 : Select the appropriate representation. Only those
;; syllables that contain the virtual character x require
;; modification.
(generator
(0
(cond
;; Only one glyph in the ligature section (between the second and
;; the third x). It means a ligature was successfully generated.
;; C1halant and C2 (between the first and second x) are removed.
(" ([^x ]*)x[^x]+x(.)x([^ ]*) "
|
(1 = *)
(2 =)
(3 = *)
|)
;; Otherwise halant and base forms are used. The failed ligature
;; is removed.
(" ([^x ]*)x([^x]+)x[^x]+x([^ ]*) "
|
(1 = *)
(2 = *)
(3 = *)
|)
;; No need to care the other cases.
("." =))
*))
;; Step 5 : Select appropriate glyph variants for fine adjustments.
;; Now the syllable boundary marks are removed so that the final step
;; can find word boundaries.
(generator
(0
(cond
(" ([^ ]+) "
(1 otf:beng=blws,abvs,psts,vatu))
("."
[ otf:beng=+ ] ))
*)
)
;; Step 6 : Word initial substitute. As the syllable boundaries have
;; been eliminated in the previous step, this rule is applied to a run
;; of Bengali glyphs, i.e. word by word. We finally apply the init
;; feature to the word initial gylphs to get the final result.
(generator
(0
("(.)(.*)"
(1 otf:beng=init)
(2 = *))))
;; Local Variables:
;; mode: emacs-lisp
;; End: