;; BENG-OTF.flt -- Font Layout Table for Bengali OpenType font ;; Copyright (C) 2004 ;; National Institute of Advanced Industrial Science and Technology (AIST) ;; Registration Number H15PRO112 ;; This file is part of the m17n database; a sub-part of the m17n ;; library. ;; The m17n library is free software; you can redistribute it and/or ;; modify it under the terms of the GNU Lesser General Public License ;; as published by the Free Software Foundation; either version 2.1 of ;; the License, or (at your option) any later version. ;; The m17n library is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; Lesser General Public License for more details. ;; You should have received a copy of the GNU Lesser General Public ;; License along with the m17n library; if not, write to the Free ;; Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ;; Boston, MA 02110-1301, USA. ;;;
  • BENG-OTF.flt ;;; ;;; For Bengali OpenType fonts to draw the Bengali script. Tested with ;;; MuktiNarrow.ttf ;;; and ;;; LikhanNormal.otf (category ;; C: consonant (excluding B, Y and R) ;; B: consonant BA (below) ;; Y: consonant YA (post) ;; R: consonant RA (reph, below) ;; n: NUKTA ;; H: HALANT ;; m: MATRA (pre) ;; b: MATRA (below) ;; p: MATRA (post) ;; t: MATRA (two-part) ;; A: vowel modifier (above) ;; a: vowel modifier (post) ;; V: independent vowel ;; N: ZWNJ (ZERO WIDTH NON-JOINER) ;; J: ZWJ (ZERO WIDTH JOINER) ;; E: ELSE ;; (0x200C ?N) ; ZWNJ (0x200D ?J) ; ZWJ (0x0964 0x0965 ?E) ; DANDA, DOUBLE DANDA (0x0980 0x09FF ?E) ; ELSE (0x0981 ?A) ; SIGN CANDRABINDU (above) (0x0982 0x0983 ?a) ; SIGN ANUSWAR, VISARGA (post) (0x0985 0x098C ?V) ; LETTER A .. VOCALIC L (0x098F 0x0990 ?V) ; LETTER E .. AI (0x0993 0x0994 ?V) ; LETTER O .. AU (0x0995 0x09B9 ?C) ; LETTER KA .. HA (0x09AC ?B) ; LETTER BA (0x09AF ?Y) ; LETTER YA (0x09B0 ?R) ; LETTER RA (0x09BC ?n) ; SIGN NUKTA (0x09BE ?p) ; VOWEL SIGN AA (post) (0x09BF ?m) ; VOWEL SIGN I (pre) (0x09C0 ?p) ; VOWEL SIGN II (post) (0x09C1 0x09C4 ?b) ; VOWEL SIGN U, UU, R, RR (below) (0x09C7 0x09C8 ?m) ; VOWEL SIGN E, AI (pre) (0x09CB 0x09CC ?t) ; VOWEL SIGN O, AU (two-part) (0x09CD ?H) ; SIGN VIRAMA (HASANT) (0x09CE ?C) ; LETTER KHANDA TA (0x09D7 ?p) ; AU LENGTH MARK (0x09DC 0x09DD ?C) ; LETTER RRA, RHA (0x09DF ?C) ; LETTER YYA (0x09E0 0x09E1 ?V) ; LETTER VOCALIC RR, LL (0x09E2 0x09E3 ?b) ; VOWEL SIGN L .. LL (below) (0x09F0 0x09F1 ?C) ; LETTER RR', RR'' (assamese) (0x09FE ?x) ; mark #1 (internal use) (0x09FF ?y) ; mark #2 (internal use) ) ;; Step 1 : Syllable identification. Recognised syllables are quoted ;; by the pseudo character, which is generated by the command "|" and ;; has the category " " (space). (generator (0 (cond ;; Case A-C are for those syllables that end with an explicit vowel ;; mark and/or a vowel modifier. They are divided into three cases ;; for the readability of regular expression. The leading ;; consonant-Hasant repetition is analysed for reordering in the ;; next step. Two-part vowel, if any, is split for ;; canonicalisation. ;; Case A : A syllable ending with a vowel modifier. ;;1 23 4 5 6 7 ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbp]*)(t)?([Aa])" < | (1 = =) (2 set-marks) (5 = *) (6 split) (7 =) | >) ;; Case B : A syllable ending with a two-part vowel. ;;1 23 4 5 ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))(t)" < | (1 = =) (2 set-marks) (5 split) | >) ;; Case C : A syllable ending with other vowel. Note that a ;; two-part vowel may be expressed with two vowel marks for ;; backward compatibility. ;;1 23 4 5 ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbp]+)" < | (1 = =) (2 set-marks) (5 = *) | >) ;; Case D : Ya-phalaa. Reorder H and Y for the next step. ;; The web page "Unicode FAQ for Indic Scripts and Languages" ;; says "it should be ;; permissible for the Ya-phalla to be consistently formed by "ZWNJ ;; + VIRAMA + YA". ("([CRBY]n?N)(H)(Y)" < | (1 = *) (3 =) (2 =) | >) ;; Case E : No explicit vowel nor modifier. If the syllable ends ;; with a consonant, analyse it for reordering in the next step. ;; Otherwise, just identify the syllable without changing anything. ;;1 23 4 ("(RH)?(([CRBY]n?H[NJ]?)*[CRBY]n?)(HN|HJ|H)?" < | (1 = =) (2 set-marks) (4 = *) | >) ;; Case F : Syllables that begin with an independent vowel. An ;; optional HYp sequence appears when this syllable represents the ;; sound "a" in English "bat" (see the FAQ above). If it appears, ;; we reorder the H and Y for the next step. ("(V)(HYp)?([aA])?" < | (1 =) (2 ("HY(p)" 0x09AF 0x09CD (1 =))) (3 =) | >) ("." =)) *) ;; Set mark #1 (x) at the position where below consonants begin, and ;; mark #2 (y) at the position to which below and above signs will be ;; moved. (set-marks (cond ;; Ending with Y. ;;1 2 3 45 6 ("([CRBY]n?(H[NJ]?Cn?)*)(H)(([RB]H)*)(Y)" (1 = *) ; prebase & base 0x09FE ; mark #1 (4 = *) ; below consonants 0x09FF ; mark #2 (6 =) ; YA (3 =)) ; moved HASANT ;; Ending with R or B. ;;1 2 3 45 ("([CRBY]n?(H[NJ]?Cn?)*)(H)(([RB]H)*[RB])" (1 = *) ; prebase & base 0x09FE ; mark #1 (4 = *) ; below consonants (3 =) ; moved HASANT 0x09FF) ; mark #2 (".+" = * 0x09FE ; mark #1 0x09FF))) ; mark #2 ;; Split two-part dependent vowel signs for canonicalisation. (split (cond ((0x09CB) 0x09C7 0x09BE) ((0x09CC) 0x09C7 0x09D7))) ) ;; Step 2 : Move Reph and Matra if necessary. From now on, we care ;; only for those syllables that have been identified in Step 1. (generator (0 (cond ;; Special case: a single consonant and a Halant. (" (.)xy(H[NJ]?) " | 0x09FE (1 =) (2 = *) 0x09FE |) ;; This is the most generic pattern. It follows Cases A, B, C and ;; E in Step 1. Now Mark #1 is used to indicate the critical part ;; that requires pre-base substitution in the following steps. ;; 1 2 3 4 5 6 7 8 9 10 (" (RH)?([^ xy]+)x([^ y]*)y(YH)?(m)?(b)?(p)?(A)?(a)?(HN|HJ|H)? " | (5 =) ; [Mpre] ;; Actually, the nukt feature is not necessary for Bengali because ;; all the necessary Nukta forms are precomposed in the Unicode ;; standard. Even if a Nukta consonant is given in the form of ;; the combination of the base consonant and a Nukta sign, we can ;; safely perform the composition here because it does not affect ;; surrounding letters in the syllable. The Akhand ligature ;; operation is also applied here, before applying the half form ;; operation because the Mukti font generates Akhand ligatures ;; directly from the "C H C" sequence, not via the half form. 0x09FE ; begin Cpre & Cbase (2 otf:beng=nukt,akhn) ; {Cpre + H} + Cbase 0x09FE ; end Cpre & Cbase (3 otf:beng=blwf) ; {Cbelow + H} (6 =) ; [Mbelow] (1 otf:beng=rphf) ; [Reph] (8 =) ; [VMabove] (4 otf:beng=pstf) ; [Cpost + H] (7 =) ; [Mpost] (9 =) ; [VMpost] (10 = *) ; optional HASANT |) ;; Syllables that begin with an independent vowel (following up ;; Step 1, Case F). If a YH sequence exist, it is changed to the ;; post-base form. Syllables of this type do not require further ;; modification. (" (V)(YH)(.*) " | (1 =) (2 otf:beng=pstf) (3 = *) |) ;; Ya-phalaa (following up Step 1, Case D). Remove N and change YH ;; to the post base form. Syllables of this type do not require ;; further modification. (" ([CBRY]n?)N(YH) " | (1 =) (2 otf:beng=pstf) |) ("." =)) *)) ;; Step 3 : Now only those syllables that contain the pseudo character ;; x require pre-base substition. This is the most complicated part ;; in this FLT. ;; If the sequence "C1 H C2" makes ligature L12, L12 replaces the ;; original sequence. ;; To test the availability of such a ligature, we try to generate it ;; using the pre-base substitute feature, then see whether succeeded ;; or not. In the case of failure, the pre-base feature does not ;; change the original sequence. ;; To create a ligature, the "C1 H" part must be first converted into ;; the half form of C1. Creating the half form of a consonant always ;; succeeds. ;; ligature(half(C1,H),C2) ;; ==> ligature(C1half,C2) ;; ==> L12 ; success ;; C1half C2 ; fail ;; If the ligature is not available, the "C1 H" part must be converted ;; into the _Halant_ (not half) form of C1. However, there is no way ;; to reconvert C1half into C1halant nor to revert back to "C1 H". ;; Thus we duplicate the critical part in two different forms so that ;; we can select the appropriate one in the next step. The pseudo ;; character x is used to indicate the boundaries. ;; ... C1 H C2 ... ==> ... x C1halant C2 x L12 x ... ;; If the length of the L12 part is one, ligature generation was ;; successful. In this case we wipe out the duplicated C1halant and ;; C2. Otherwise we remove L12. ;; In very few cases (I found only one in the Mukti font), the "C1 H" ;; part need to be converted into C1halant (instead of C1half) to make ;; a ligature with C2. So when we try to generate a ligature form, we ;; apply the GSUB features "half", "haln" and "pres" in this order. (category ;; C: consonant (excluding B, Y and R) ;; H: HALANT ;; N: ZWNJ (ZERO WIDTH NON-JOINER) ;; J: ZWJ (ZERO WIDTH JOINER) ;; E: ELSE ;; (0x200C ?N) ; ZWNJ (0x200D ?J) ; ZWJ (0x0964 0x0965 ?E) ; DANDA, DOUBLE DANDA (0x0980 0x09FF ?E) ; ELSE (0x09CD ?H) ; SIGN VIRAMA (HASANT) (0x0995 ?K) ; LETTER KA (0x09B7 ?S) ; LETTER SSA (0x09A3 ?M) ; LETTER NNA (0x09AE ?M) ; LETTER MA (0x09FE ?x) ; mark #1 (internal use) ) (generator (0 (cond ;; One pre-base and base. ;; 1 23 4 5 6 (" ([^x ]*)x((.H)([^NJ]))(H)?x([^ ]*) " | (1 = *) 0x09FE ; x (3 otf:beng=haln) ; C1halant (4 =) ; C2 0x09FE ; x (2 otf:beng=half,haln,pres) ; ligature result 0x09FE ; x (5 =) (6 = *) |) ;; One pre-base with ZWJ. According to the Unicode FAQ, the half ;; form is forced in this case. So we fake as if ligature ;; generation was failed. (" ([^x ]*)x(.H)J(.)?x([^ ]*) " | (1 = *) 0x09FE ; x (2 otf:beng=half) ; C1half (3 =) ; C2 0x09FE ; x 0x09FD ; pseudo result 0x09FD ; pseudo result 0x09FE ; x (4 = *) |) ;; One pre-base possibly with ZWNJ. Similar to above. (" ([^x ]*)x(.H)N?(.)?x([^ ]*) " | (1 = *) 0x09FE ; x (2 otf:beng=haln) ; C1halant (3 =) ; C2 0x09FE ; x 0x09FD ; pseudo result 0x09FD ; pseudo result 0x09FE ; x (4 = *) |) ;; Standalone base. There is nothing more to do. (" ([^x ]*)x(.)x([^ ]*) " | (1 = *) (2 =) (3 = *) |) ;; KA-SSA-NNA and KA-SSA-MA are the only pre-base ligatures that ;; consist of three consonants. ;; 1 23 4 5 6 7 (" ([^x ]*)x((KH)(SH)(M))(H)?x([^ ]*) " | (1 = *) 0x09FE ; x (3 otf:beng=haln) ; KAhalant (4 otf:beng=haln) ; SSAhalant (5 =) ; NNA or MA 0x09FE ; x (2 otf:beng=half,haln,pres) ; ligature result 0x09FE ; x (6 =) (7 = *) |) ;; Two or more pre-bases plus base. Give up. Convert all ;; pre-bases into halant form. ;; 1 23 4 5 (" ([^x ]*)x(([^x]H[JN]?)+)([^x])?x([^ ]*) " | (1 = *) 0x09FE ; x (2 force-haln) ; halant forms (4 =) ; full form 0x09FE ; x 0x09FD ; pseudo result 0x09FD ; pseudo result 0x09FE ; x (5 = *) |) ("." =)) *) ;; This is to remove ZWNJ and ZWJ. The half-form-force-effect of ZWJ ;; is ignored. Sorry. (force-haln (cond ("([^JN]*)[JN](.*)" (1 otf:beng=haln) (2 force-haln)) (".+" otf:beng=haln))) ) ;; Step 4 : Select the appropriate representation. Only those ;; syllables that contain the virtual character x require ;; modification. (generator (0 (cond ;; Only one glyph in the ligature section (between the second and ;; the third x). It means a ligature was successfully generated. ;; C1halant and C2 (between the first and second x) are removed. (" ([^x ]*)x[^x]+x(.)x([^ ]*) " | (1 = *) (2 =) (3 = *) |) ;; Otherwise halant and base forms are used. The failed ligature ;; is removed. (" ([^x ]*)x([^x]+)x[^x]+x([^ ]*) " | (1 = *) (2 = *) (3 = *) |) ;; No need to care the other cases. ("." =)) *)) ;; Step 5 : Select appropriate glyph variants for fine adjustments. ;; Now the syllable boundary marks are removed so that the final step ;; can find word boundaries. (generator (0 (cond (" ([^ ]+) " (1 otf:beng=blws,abvs,psts,vatu)) ("." [ otf:beng=+ ] )) *) ) ;; Step 6 : Word initial substitute. As the syllable boundaries have ;; been eliminated in the previous step, this rule is applied to a run ;; of Bengali glyphs, i.e. word by word. We finally apply the init ;; feature to the word initial gylphs to get the final result. (generator (0 ("(.)(.*)" (1 otf:beng=init) (2 = *)))) ;; Local Variables: ;; mode: emacs-lisp ;; End: