From: ntakahas Date: Fri, 21 May 2004 06:18:26 +0000 (+0000) Subject: New file X-Git-Tag: REL-1-1-0~127 X-Git-Url: http://git.chise.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=39ee2a731d18e5c29c93e5d20620407c953de041;p=m17n%2Fm17n-db.git New file --- diff --git a/BEN-OTF.flt b/BEN-OTF.flt new file mode 100644 index 0000000..c466581 --- /dev/null +++ b/BEN-OTF.flt @@ -0,0 +1,469 @@ +;; BEN-OTF.flt -- Font Layout Table for Bengali OpenType font +;; Copyright (C) 2004 +;; National Institute of Advanced Industrial Science and Technology (AIST) +;; Registration Number H15PRO112 + +;; This file is part of the m17n database; a sub-part of the m17n +;; library. + +;; The m17n library is free software; you can redistribute it and/or +;; modify it under the terms of the GNU Lesser General Public License +;; as published by the Free Software Foundation; either version 2.1 of +;; the License, or (at your option) any later version. + +;; The m17n library is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; Lesser General Public License for more details. + +;; You should have received a copy of the GNU Lesser General Public +;; License along with the m17n library; if not, write to the Free +;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +;; 02111-1307, USA. + +;;;
  • BEN-OTF.flt +;;; +;;; For Bengali OpenType fonts to draw Bengali script. Tested with +;;; MuktiNarrow.ttf LikhanNormal.ttf. Both fonts are distributed by +;;; the Free Bangla Fonts Project. +;;; + +(category + ;; C: consonant (excluding B, Y and R) + ;; B: consonant BA (below) + ;; Y: consonant YA (post) + ;; R: consonant RA (reph, below) + ;; n: NUKTA + ;; H: HALANT + ;; m: MATRA (pre) + ;; b: MATRA (below) + ;; p: MATRA (post) + ;; o: MATRA (two-part O) + ;; u: MATRA (two-part AU) + ;; U: AU LENGTH MARK + ;; A: vowel modifier (above) + ;; a: vowel modifier (post) + ;; V: independent vowel + ;; N: ZWNJ (ZERO WIDTH NON-JOINER) + ;; J: ZWJ (ZERO WIDTH JOINER) + ;; E: ELSE + ;; + (0x200C ?N) ; ZWNJ + (0x200D ?J) ; ZWJ + (0x0964 0x0965 ?E) ; DANDA, DOUBLE DANDA + (0x0980 0x09FF ?E) ; ELSE + (0x0981 ?A) ; SIGN CANDRABINDU (above) + (0x0982 0x0983 ?a) ; SIGN ANUSWAR, VISARGA (post) + (0x0985 0x098C ?V) ; LETTER A .. VOCALIC L + (0x098F 0x0990 ?V) ; LETTER E .. AI + (0x0993 0x0994 ?V) ; LETTER O .. AU + (0x0995 0x09B9 ?C) ; LETTER KA .. HA + (0x09AC ?B) ; LETTER BA + (0x09AF ?Y) ; LETTER YA + (0x09B0 ?R) ; LETTER RA + (0x09BC ?n) ; SIGN NUKTA + (0x09BE ?p) ; VOWEL SIGN AA (post) + (0x09BF ?m) ; VOWEL SIGN I (pre) + (0x09C0 ?p) ; VOWEL SIGN II (post) + (0x09C1 0x09C4 ?b) ; VOWEL SIGN U, UU, R, RR (below) + (0x09C7 0x09C8 ?m) ; VOWEL SIGN E, AI (pre) + (0x09CB 0x09CC ?t) ; VOWEL SIGN O, AU (two-part) + (0x09CD ?H) ; SIGN VIRAMA (HASANT) + (0x09D7 ?p) ; AU LENGTH MARK + (0x09DC 0x09DD ?C) ; LETTER RRA, RHA + (0x09DF ?C) ; LETTER YYA + (0x09E0 0x09E1 ?V) ; LETTER VOCALIC RR, LL + (0x09E2 0x09E3 ?b) ; VOWEL SIGN L .. LL (below) + (0x09F1 0x09F2 ?C) ; LETTER RR', RR'' (assamese) + (0x09FE ?x) ; mark #1 (internal use) + (0x09FF ?y) ; mark #2 (internal use) + ) + +;; Step 1 : Syllable identification. Recognised syllables are quoted +;; by the virtual character, which is generated by the command "|" and +;; has the category " " (space). +(generator + (0 + (cond + ;; Case A-C are for those syllables that end with an explicit vowel + ;; mark and/or a vowel modifier. They are divided into three cases + ;; for the readability of regular expression. The leading + ;; consonant-Hasant repetition is analysed for reordering in the + ;; next step. Two-part vowel, if any, is split for + ;; canonicalisation. + + ;; Case A : A syllable ending with a vowel modifier. + ("(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbp]*)(t)?([Aa])" + < | + (1 set-marks) + (4 = *) + (5 split) + (6 =) + | >) + + ;; Case B : A syllable ending with a two-part vowel. + ("(([CRBY]n?H[NJ]?)*([CRBY]n?))(t)" + < | + (1 set-marks) + (4 split) + | >) + + ;; Case C : A syllable ending with other vowel. Note that a + ;; two-part vowel may be expressed with two vowel marks for + ;; backward compatibility. + ("(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbp]+)" + < | + (1 set-marks) + (4 = *) + | >) + + ;; Case D : Ya-phalaa. Reorder H and Y for the next step. + ;; The web page "Unicode FAQ for Indic Scripts and Languages" + ;; says "it should be + ;; permissible for the Ya-phalla to be consistently formed by "ZWNJ + ;; + VIRAMA + YA". + ("([CBRY]N)(H)(Y)" + < | + (1 = =) + (3 =) + (2 =) + | >) + + ;; Case E : No explicit vowel nor modifier. If the syllable ends + ;; with a consonant, analyse it for reordering in the next step. + ;; Otherwise, just identify the syllable without changing anything. + ("([CRBY]n?H[NJ]?)*[CRBY]n?(HN|HJ|H)?" + (cond + (".+[^HNJ]$" + < | set-marks | >) + (".+" + < | = * | >))) + + ;; Case F : Syllables that begin with an independent vowel. An + ;; optional HYp sequence appears when this syllable represents the + ;; sound "a" in English "bat" (see the FAQ above). If it appears, + ;; we reorder the H and Y for the next step. + ("(V)(HYp)?([aA])?" + < | (1 =) (2 ("HY(p)" 0x09AF 0x09CD (1 =))) (3 =) | >) + + ("." =)) + *) + + ;; Set mark #1 (x) at the position where below consonants begin, and + ;; mark #2 (y) at the position to which below and above signs will be + ;; moved. + (set-marks + (cond + ;; At least one C and ends with Y. + ("(([CRBY]n?H[NJ]?)*Cn?)H(([BR]H)*)Y" + (1 = *) + 0x09FE ; mark #1 + (3 = *) ; below consonants + 0x09FF ; mark #2 + 0x09AF 0x09CD) ; YA + moved HASANT + ;; At least one C and ends with B or R. + ("(([CRBY]n?H[NJ]?)*Cn?)H(([BR]H)*[BR])" + (1 = *) + 0x09FE ; mark #1 + (3 = *) ; below consonants + 0x09CD ; moved HASANT + 0x09FF) ; mark #2 + (".+" + = * + 0x09FE ; mark #1 + 0x09FF))) ; mark #2 + + ;; Split two-part dependent vowel signs for canonicalisation. + (split + (cond + ((0x09CB) 0x09C7 0x09BE) + ((0x09CC) 0x09C7 0x09D7))) + ) + +;; Step 2 : Move Reph and Matra if necessary. From now on, we care +;; for only those syllables that were identified in Step 1. +(generator + (0 + (cond + ;; This is the most generic pattern. It follows Case A-C and a + ;; part of Case E in Step 1. Now Mark #1 is used to indicate the + ;; critical part that require pre-base substitution in the + ;; following steps. + + ;; 1 2 3 4 5 6 7 8 9 + (" (RH)?([^ xy]+)x([^ y]*)y(YH)?(m)?(b)?(p)?(A)?(a)? " + | + (5 =) ; [Mpre] + 0x09FE + ;; Actually, the nukt feature is not necessary for Bengali because + ;; all the necessary Nukta forms are precomposed in the Unicode + ;; standard. Even if a Nukta consonant is given in the form of + ;; the combination of the base consonant and a Nukta sign, we can + ;; safely perm the composition here because it does not affect + ;; surrounding letters in the syllable. The Akhand ligature + ;; operation is also applied here, before applying the half form + ;; operation because the Mukti font generates Akhand ligatures + ;; directly from the "C H C" sequence, not via the half form. + (2 otf:beng=nukt,akhn) ; {Cpre + H} + Cbase + 0x09FE + (3 otf:beng=blwf) ; {Cbelow + H} + (6 =) ; [Mbelow] + (1 otf:beng=rphf) ; [Reph] + (8 =) ; [VMabove] + (4 otf:beng=pstf) ; [Cpost + H] + (7 =) ; [Mpost] + (9 =) ; [VMpost] + |) + + ;; Syllables that begin with an independent vowel (following up + ;; Step 1, Case F). If a YH sequence exist, it is changed to the + ;; post base form. This type of syllables do not require further + ;; modification. + (" (V)(YH)(.*) " + | + (1 =) + (2 otf:beng=pstf) + (3 = *) + |) + + ;; Ya-phalaa (following up Step 1, Case D). Remove N and change YH + ;; to the post base form. This type of syllables do not require + ;; further modification. + (" ([CBRY])N(YH) " + | + (1 =) + (2 otf:beng=pstf) + |) + + ;; Syllables that end with H and an optional N or J (following up a + ;; part of Step 1, Case E). This type of syllables also require + ;; pre-base substitution in the following steps. + (" ([^ ]+H[NJ]?) " + | + 0x09FE + ;; Only Nukt and Akhn are applied here. See the comment in the + ;; topmost sibling for explanation. + (1 otf:beng=nukt,akhn) + 0x09FE + |) + + ("." =)) + *)) + +;; Step 3 : Now only those syllables that contain the virtual +;; character x require pre-base substition. This is the most +;; complicated part in this FLT. + +;; If the sequence "C1 H C2" makes ligature L12, L12 replaces the +;; original sequence. + +;; To test the availability of such a ligature, we try to generate it +;; using the pre-base substitute feature, then see whether succeeded +;; or not. In the case of failure, the pre-base feature does not +;; change the original sequence. + +;; To create a ligature, the "C1 H" part must be first converted into +;; the half form of C1. Creating the half form of a consonant always +;; succeeds. + +;; ligature(half(C1,H),C2) +;; ==> ligature(C1half,C2) +;; ==> L12 ; success +;; C1half C2 ; fail + +;; If the ligature is not available, the "C1 H" part must be converted +;; into the _Halant_ (not half) form of C1. However, there is no way +;; to reconvert C1half into C1halant nor to revert back to "C1 H". +;; Thus we duplicate the critical part in two different forms so that +;; we can select the appropriate one in the next step. The virtual +;; character x is used to indicate the boundaries. + +;; ... C1 H C2 ... ==> ... x C1halant C2 x L12 x ... + +;; If the length of the L12 part is one, ligature generation was +;; successful. In this case we wipe out the duplicated C1halant and +;; C2. Otherwise we remove L12. + +;; In very few cases (I found only one in the Mukti font), the "C1 H" +;; part need to be converted into C1halant to make a ligature with C2. +;; So when try to generate a ligature form, we apply the GSUB features +;; "half", "haln" and "pres" in this order. + +(category + ;; C: consonant (excluding B, Y and R) + ;; H: HALANT + ;; N: ZWNJ (ZERO WIDTH NON-JOINER) + ;; J: ZWJ (ZERO WIDTH JOINER) + ;; E: ELSE + ;; + (0x200C ?N) ; ZWNJ + (0x200D ?J) ; ZWJ + (0x0964 0x0965 ?E) ; DANDA, DOUBLE DANDA + (0x0980 0x09FF ?E) ; ELSE + (0x09CD ?H) ; SIGN VIRAMA (HASANT) + (0x0995 ?K) ; LETTER KA + (0x09B7 ?S) ; LETTER SSA + (0x09A8 ?M) ; LETTER NA + (0x09AE ?M) ; LETTER MA + (0x09FE ?x) ; mark #1 (internal use) + ) + +(generator + (0 + (cond + + ;; One pre-base and base. + ;; 1 23 4 5 6 + (" ([^x ]*)x((.H)([^NJ]))(H)?x([^ ]*) " + | + (1 = *) + 0x09FE ; x + (3 otf:beng=haln) ; C1halant + (4 =) ; C2 + 0x09FE ; x + (2 otf:beng=half,haln,pres) ; ligature result + 0x09FE ; x + (5 =) + (6 = *) + |) + + ;; One pre-base with ZWJ. According to the Unicode FAQ, the half + ;; form is forced in this case. So we fake as if ligature + ;; generation was failed. + (" ([^x ]*)x(.H)J(.)?x([^ ]*) " + | + (1 = *) + 0x09FE ; x + (2 otf:beng=half) ; C1half + (3 =) ; C2 + 0x09FE ; x + 0x09FD ; pseudo result + 0x09FD ; pseudo result + 0x09FE ; x + (4 = *) + |) + + ;; One pre-base possibly with ZWNJ. Similar to above. + (" ([^x ]*)x(.H)N?(.)?x([^ ]*) " + | + (1 = *) + 0x09FE ; x + (2 otf:beng=haln) ; C1halant + (3 =) ; C2 + 0x09FE ; x + 0x09FD ; pseudo result + 0x09FD ; pseudo result + 0x09FE ; x + (4 = *) + |) + + ;; Standalone base. There is nothing more to do. + (" ([^x ]*)x(.)x([^ ]*) " + | + (1 = *) + (2 =) + (3 = *) + |) + + ;; KA-SSA-NA and KA-SSA-MA are the only pre-base ligatures that + ;; consist of three consonants. + ;; 1 23 4 5 6 7 + (" ([^x ]*)x((KH)(SH)(M))(H)?x([^ ]*) " + | + (1 = *) + 0x09FE ; x + (3 otf:beng=haln) ; KAhalant + (4 otf:beng=haln) ; SSAhalant + (5 =) ; NA or MA + 0x09FE ; x + (2 otf:beng=half,haln,pres) ; ligature result + 0x09FE ; x + (6 =) + (7 = *) + |) + + ;; Two or more pre-bases plus base. Give up. Convert all + ;; pre-bases into halant form. + ;; 1 23 4 5 + (" ([^x ]*)x(([^x]H[JN]?)+)([^x])?x([^ ]*) " + | + (1 = *) + 0x09FE ; x + (2 force-haln) ; halant forms + (4 =) ; full form + 0x09FE ; x + 0x09FD ; pseudo result + 0x09FD ; pseudo result + 0x09FE ; x + (5 = *) + |) + + ("." =)) + *) + + ;; This is to remove ZWNJ and ZWJ. The half-form-force-effect of ZWJ + ;; is ignored. Sorry. + (force-haln + (cond + ("([^JN]*)[JN](.*)" + (1 otf:beng=haln) + (2 force-haln)) + (".+" + otf:beng=haln))) + ) + +;; Step 4 : Select the appropriate representation. Only those +;; syllables that contain the virtual character x require +;; modification. +(generator + (0 + (cond + ;; Only one glyph in the ligature section (between the second and + ;; the third x). It means a ligature was successfully generated. + ;; C1halant and C2 (between the first and second x) are removed. + (" ([^x ]*)x[^x]+x(.)x([^ ]*) " + | + (1 = *) + (2 =) + (3 = *) + |) + + ;; Otherwise halant and base forms are used. The failed ligature + ;; is removed. + (" ([^x ]*)x([^x]+)x[^x]+x([^ ]*) " + | + (1 = *) + (2 = *) + (3 = *) + |) + + ;; No need to care the other cases. + ("." =)) + *)) + +;; Step 5 : Fine adjustments. Select appropriate glyph variants and +;; apply GPOS features. Now the syllable boundary marks are removed +;; so that the final step can find word boundaries. +(generator + (0 + (cond + (" ([^ ]+) " + (1 otf:beng=blws,abvs,psts,vatu)) + ("." + [ otf:beng=+ ] )) + *) + ) + +;; Step 6 : Word initial substitute. As the syllable boundaries has +;; been eliminated in the previous step, this rule is applied to a run +;; of Bengali glyphs, i.e. word by word. We finally apply the init +;; feature to the word initial gylphs and everything is over. +(generator + (0 + ("(.)(.*)" + (1 otf:beng=init) + (2 = *)))) + +;; Local Variables: +;; mode: emacs-lisp +;; End: