FLT/BENG-OTF.flt

   1 ;; BENG-OTF.flt -- Font Layout Table for Bengali OpenType font
   2 ;; Copyright (C) 2004, 2007
   3 ;;   National Institute of Advanced Industrial Science and Technology (AIST)
   4 ;;   Registration Number H15PRO112
   5
   6 ;; This file is part of the m17n database; a sub-part of the m17n
   7 ;; library.
   8
   9 ;; The m17n library is free software; you can redistribute it and/or
  10 ;; modify it under the terms of the GNU Lesser General Public License
  11 ;; as published by the Free Software Foundation; either version 2.1 of
  12 ;; the License, or (at your option) any later version.
  13
  14 ;; The m17n library is distributed in the hope that it will be useful,
  15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 ;; Lesser General Public License for more details.
  18
  19 ;; You should have received a copy of the GNU Lesser General Public
  20 ;; License along with the m17n library; if not, write to the Free
  21 ;; Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  22 ;; Boston, MA 02110-1301, USA.
  23
  24 ;;; <li> BENG-OTF.flt
  25 ;;;
  26 ;;; For Bengali OpenType fonts to draw the Bengali script.  Tested with
  27 ;;; MuktiNarrow.ttf <http://www.nongnu.org/freebangfont/index.html>
  28 ;;; and
  29 ;;; LikhanNormal.otf <http:http://www.stat.wisc.edu/~deepayan/Bengali/WebPage/Font/fonts.html>
  30
  31 (font layouter beng-otf nil
  32       (font (nil nil unicode-bmp :otf=beng=rphf)))
  33
  34 (category
  35  ;; C: consonant (excluding B, Y and R)
  36  ;; B: consonant BA (below)
  37  ;; Y: consonant YA (post)
  38  ;; R: consonant RA (reph, below)
  39  ;; n: NUKTA
  40  ;; H: HALANT
  41  ;; m: MATRA (pre)
  42  ;; b: MATRA (below)
  43  ;; p: MATRA (post)
  44  ;; t: MATRA (two-part)
  45  ;; A: vowel modifier (above)
  46  ;; a: vowel modifier (post)
  47  ;; V: independent vowel
  48  ;; N: ZWNJ (ZERO WIDTH NON-JOINER)
  49  ;; J: ZWJ (ZERO WIDTH JOINER)
  50  ;; E: ELSE
  51  ;;
  52  (0x200C        ?N)                     ; ZWNJ
  53  (0x200D        ?J)                     ; ZWJ
  54  (0x0964 0x0965 ?E)                     ; DANDA, DOUBLE DANDA
  55  (0x0980 0x09FF ?E)                     ; ELSE
  56  (0x0981        ?A)                     ; SIGN CANDRABINDU (above)
  57  (0x0982 0x0983 ?a)                     ; SIGN ANUSWAR, VISARGA (post)
  58  (0x0985 0x098C ?V)                     ; LETTER A .. VOCALIC L
  59  (0x098F 0x0990 ?V)                     ; LETTER E .. AI
  60  (0x0993 0x0994 ?V)                     ; LETTER O .. AU
  61  (0x0995 0x09B9 ?C)                     ; LETTER KA .. HA
  62  (0x09AC        ?B)                     ; LETTER BA
  63  (0x09AF        ?Y)                     ; LETTER YA
  64  (0x09B0        ?R)                     ; LETTER RA
  65  (0x09BC        ?n)                     ; SIGN NUKTA
  66  (0x09BE        ?p)                     ; VOWEL SIGN AA (post)
  67  (0x09BF        ?m)                     ; VOWEL SIGN I (pre)
  68  (0x09C0        ?p)                     ; VOWEL SIGN II (post)
  69  (0x09C1 0x09C4 ?b)                     ; VOWEL SIGN U, UU, R, RR (below)
  70  (0x09C7 0x09C8 ?m)                     ; VOWEL SIGN E, AI (pre)
  71  (0x09CB 0x09CC ?t)                     ; VOWEL SIGN O, AU (two-part)
  72  (0x09CD        ?H)                     ; SIGN VIRAMA (HASANT)
  73  (0x09CE        ?C)                     ; LETTER KHANDA TA
  74  (0x09D7        ?p)                     ; AU LENGTH MARK
  75  (0x09DC 0x09DD ?C)                     ; LETTER RRA, RHA
  76  (0x09DF        ?C)                     ; LETTER YYA
  77  (0x09E0 0x09E1 ?V)                     ; LETTER VOCALIC RR, LL
  78  (0x09E2 0x09E3 ?b)                     ; VOWEL SIGN L .. LL (below)
  79  (0x09F0 0x09F1 ?C)                     ; LETTER RR', RR'' (assamese)
  80  (0x09FE        ?x)                     ; mark #1 (internal use)
  81  (0x09FF        ?y)                     ; mark #2 (internal use)
  82  )
  83
  84 ;; Step 1 : Syllable identification.  Recognised syllables are quoted
  85 ;; by the pseudo character, which is generated by the command "|" and
  86 ;; has the category " " (space).
  87 (generator
  88  (0
  89   (cond
  90    ;; Special case.  The sequence "C1 H N C2 m" is reordered as
  91    ;; "C1 H m C2", not "m C1 H C2".  Besides, "C1 H" is drawn in the
  92    ;; halant-form.
  93    ("([CRBY]n?H)N"
  94     < |
  95     (1 otf:beng=hln)
  96     | >)
  97
  98    ;; Case A-C are for those syllables that end with an explicit vowel
  99    ;; mark and/or a vowel modifier.  They are divided into three cases
 100    ;; for the readability of regular expression.  The leading
 101    ;; consonant-Hasant repetition is analysed for reordering in the
 102    ;; next step.  Two-part vowel, if any, is split for
 103    ;; canonicalisation.
 104
 105    ;; Case A : A syllable ending with a vowel modifier.
 106    ;;1    23                4          5       6   7
 107    ("(RH)?(([CRBY]n?HJ?)*([CRBY]n?))([mbp]*)(t)?([Aa])"
 108     < |
 109     (1 = =)
 110     (2 set-marks)
 111     (5 = *)
 112     (6 split)
 113     (7 =)
 114     | >)
 115
 116    ;; Case B : A syllable ending with a two-part vowel.
 117    ;;1    23                4          5
 118    ("(RH)?(([CRBY]n?HJ?)*([CRBY]n?))(t)"
 119     < |
 120     (1 = =)
 121     (2 set-marks)
 122     (5 split)
 123     | >)
 124
 125    ;; Case C : A syllable ending with other vowel.  Note that a
 126    ;; two-part vowel may be expressed with two vowel marks for
 127    ;; backward compatibility.
 128    ;;1    23                4          5
 129    ("(RH)?(([CRBY]n?HJ?)*([CRBY]n?))([mbp]+)"
 130     < |
 131     (1 = =)
 132     (2 set-marks)
 133     (5 = *)
 134     | >)
 135
 136    ;; Case D : Ya-phalaa.  Reorder H and Y for the next step.
 137    ;; The web page "Unicode FAQ for Indic Scripts and Languages"
 138    ;; <http://www.unicode.org/faq/indic.html> says "it should be
 139    ;; permissible for the Ya-phalla to be consistently formed by "ZWNJ
 140    ;; + VIRAMA + YA".
 141    ("([CRBY]n?N)(H)(Y)"
 142     < |
 143     (1 = *)
 144     (3 =)
 145     (2 =)
 146     | >)
 147
 148    ;; Case E : No explicit vowel nor modifier.  If the syllable ends
 149    ;; with a consonant, analyse it for reordering in the next step.
 150    ;; Otherwise, just identify the syllable without changing anything.
 151    ;;1    23                         4
 152    ("(RH)?(([CRBY]n?HJ?)*[CRBY]n?)(HN|HJ|H)?"
 153     < |
 154     (1 = =)
 155     (2 set-marks)
 156     (4 = *)
 157     | >)
 158
 159    ;; Case F : Syllables that begin with an independent vowel.  An
 160    ;; optional HYp sequence appears when this syllable represents the
 161    ;; sound "a" in English "bat" (see the FAQ above).  If it appears,
 162    ;; we reorder the H and Y for the next step.
 163    ("(V)(HYp)?([aA])?"
 164     < | (1 =) (2 ("HY(p)" 0x09AF 0x09CD (1 =))) (3 =) | >)
 165
 166    ("." =))
 167   *)
 168
 169  ;; Set mark #1 (x) at the position where below consonants begin, and
 170  ;; mark #2 (y) at the position to which below and above signs will be
 171  ;; moved.
 172  (set-marks
 173   (cond
 174    ;; Ending with Y.
 175    ;;1        2            3  45        6
 176    ("([CRBY]n?(HJ?Cn?)*)(H)(([RB]H)*)(Y)"
 177     (1 = *)                             ; prebase & base
 178     0x09FE                              ; mark #1
 179     (4 = *)                             ; below consonants
 180     0x09FF                              ; mark #2
 181     (6 =)                               ; YA
 182     (3 =))                              ; moved HASANT
 183    ;; Ending with R or B.
 184    ;;1        2            3  45
 185    ("([CRBY]n?(HJ?Cn?)*)(H)(([RB]H)*[RB])"
 186     (1 = *)                             ; prebase & base
 187     0x09FE                              ; mark #1
 188     (4 = *)                             ; below consonants
 189     (3 =)                               ; moved HASANT
 190     0x09FF)                             ; mark #2
 191    (".+"
 192     = *
 193     0x09FE                              ; mark #1
 194     0x09FF)))                           ; mark #2
 195
 196  ;; Split two-part dependent vowel signs for canonicalisation.
 197  (split
 198   (cond
 199    ((0x09CB)    0x09C7 0x09BE)
 200    ((0x09CC)    0x09C7 0x09D7)))
 201  )
 202
 203 ;; Step 2 : Move Reph and Matra if necessary.  From now on, we care
 204 ;; only for those syllables that have been identified in Step 1.
 205 (generator
 206  (0
 207   (cond
 208    ;; Special case: a single consonant and a Halant.
 209    (" (.)xy(HJ?) "
 210     |
 211     0x09FE
 212     (1 =)
 213     (2 = *)
 214     0x09FE
 215     |)
 216
 217    ;; This is the most generic pattern.  It follows Cases A, B, C and
 218    ;; E in Step 1.  Now Mark #1 is used to indicate the critical part
 219    ;; that requires pre-base substitution in the following steps.
 220
 221    ;; 1    2         3        4    5   6   7   8   9   10
 222    (" (RH)?([^ xy]+)x([^ y]*)y(YH)?(m)?(b)?(p)?(A)?(a)?(HJ|H)? "
 223     |
 224     (5 =)                               ; [Mpre]
 225     ;; Actually, the nukt feature is not necessary for Bengali because
 226     ;; all the necessary Nukta forms are precomposed in the Unicode
 227     ;; standard.  Even if a Nukta consonant is given in the form of
 228     ;; the combination of the base consonant and a Nukta sign, we can
 229     ;; safely perform the composition here because it does not affect
 230     ;; surrounding letters in the syllable.  The Akhand ligature
 231     ;; operation is also applied here, before applying the half form
 232     ;; operation because the Mukti font generates Akhand ligatures
 233     ;; directly from the "C H C" sequence, not via the half form.
 234     0x09FE                              ; begin Cpre & Cbase
 235     (2 otf:beng=nukt,akhn)              ; {Cpre + H} + Cbase
 236     0x09FE                              ; end Cpre & Cbase
 237     (3 otf:beng=blwf)                   ; {Cbelow + H}
 238     (6 =)                               ; [Mbelow]
 239     (1 otf:beng=rphf)                   ; [Reph]
 240     (8 =)                               ; [VMabove]
 241     (4 otf:beng=pstf)                   ; [Cpost + H]
 242     (7 =)                               ; [Mpost]
 243     (9 =)                               ; [VMpost]
 244     (10 = *)                            ; optional HASANT
 245     |)
 246
 247    ;; Syllables that begin with an independent vowel (following up
 248    ;; Step 1, Case F).  If a YH sequence exist, it is changed to the
 249    ;; post-base form.  Syllables of this type do not require further
 250    ;; modification.
 251    (" (V)(YH)(.*) "
 252     |
 253     (1 =)
 254     (2 otf:beng=pstf)
 255     (3 = *)
 256     |)
 257
 258    ;; Ya-phalaa (following up Step 1, Case D).  Remove N and change YH
 259    ;; to the post base form.  Syllables of this type do not require
 260    ;; further modification.
 261    (" ([CBRY]n?)N(YH) "
 262     |
 263     (1 =)
 264     (2 otf:beng=pstf)
 265     |)
 266
 267    ("." =))
 268   *))
 269
 270 ;; Step 3 : Now only those syllables that contain the pseudo character
 271 ;; x require pre-base substition.  This is the most complicated part
 272 ;; in this FLT.
 273
 274 ;; If the sequence "C1 H C2" makes ligature L12, L12 replaces the
 275 ;; original sequence.
 276
 277 ;; To test the availability of such a ligature, we try to generate it
 278 ;; using the pre-base substitute feature, then see whether succeeded
 279 ;; or not.  In the case of failure, the pre-base feature does not
 280 ;; change the original sequence.
 281
 282 ;; To create a ligature, the "C1 H" part must be first converted into
 283 ;; the half form of C1.  Creating the half form of a consonant always
 284 ;; succeeds.
 285
 286 ;; ligature(half(C1,H),C2)
 287 ;; ==> ligature(C1half,C2)
 288 ;; ==> L12         ; success
 289 ;;     C1half C2   ; fail
 290
 291 ;; If the ligature is not available, the "C1 H" part must be converted
 292 ;; into the _Halant_ (not half) form of C1.  However, there is no way
 293 ;; to reconvert C1half into C1halant nor to revert back to "C1 H".
 294 ;; Thus we duplicate the critical part in two different forms so that
 295 ;; we can select the appropriate one in the next step.  The pseudo
 296 ;; character x is used to indicate the boundaries.
 297
 298 ;; ... C1 H C2 ...  ==>  ... x C1halant C2 x L12 x ...
 299
 300 ;; If the length of the L12 part is one, ligature generation was
 301 ;; successful.  In this case we wipe out the duplicated C1halant and
 302 ;; C2.  Otherwise we remove L12.
 303
 304 ;; In very few cases (I found only one in the Mukti font), the "C1 H"
 305 ;; part need to be converted into C1halant (instead of C1half) to make
 306 ;; a ligature with C2.  So when we try to generate a ligature form, we
 307 ;; apply the GSUB features "half", "haln" and "pres" in this order.
 308
 309 (category
 310  ;; C: consonant (excluding B, Y and R)
 311  ;; H: HALANT
 312  ;; N: ZWNJ (ZERO WIDTH NON-JOINER)
 313  ;; J: ZWJ (ZERO WIDTH JOINER)
 314  ;; E: ELSE
 315  ;;
 316  (0x200C        ?N)                     ; ZWNJ
 317  (0x200D        ?J)                     ; ZWJ
 318  (0x0964 0x0965 ?E)                     ; DANDA, DOUBLE DANDA
 319  (0x0980 0x09FF ?E)                     ; ELSE
 320  (0x09CD        ?H)                     ; SIGN VIRAMA (HASANT)
 321  (0x0995        ?K)                     ; LETTER KA
 322  (0x09B7        ?S)                     ; LETTER SSA
 323  (0x09A3        ?M)                     ; LETTER NNA
 324  (0x09AE        ?M)                     ; LETTER MA
 325  (0x09FE        ?x)                     ; mark #1 (internal use)
 326  )
 327
 328 (generator
 329  (0
 330   (cond
 331
 332    ;; One pre-base and base.
 333    ;; 1        23   4       5    6
 334    (" ([^x ]*)x((.H)([^J]))(H)?x([^ ]*) "
 335     |
 336     (1 = *)
 337     0x09FE                              ; x
 338     (3 otf:beng=haln)                   ; C1halant
 339     (4 =)                               ; C2
 340     0x09FE                              ; x
 341     (2 otf:beng=half,haln,pres)         ; ligature result
 342     0x09FE                              ; x
 343     (5 =)
 344     (6 = *)
 345     |)
 346
 347    ;; One pre-base with ZWJ.  According to the Unicode FAQ, the half
 348    ;; form is forced in this case.  So we fake as if ligature
 349    ;; generation was failed.
 350    (" ([^x ]*)x(.H)J(.)?x([^ ]*) "
 351     |
 352     (1 = *)
 353     0x09FE                              ; x
 354     (2 otf:beng=half)                   ; C1half
 355     (3 =)                               ; C2
 356     0x09FE                              ; x
 357     0x09FD                              ; pseudo result
 358     0x09FD                              ; pseudo result
 359     0x09FE                              ; x
 360     (4 = *)
 361     |)
 362
 363    ;; One pre-base possibly with ZWNJ.  Similar to above.
 364    (" ([^x ]*)x(.H)N?(.)?x([^ ]*) "
 365     |
 366     (1 = *)
 367     0x09FE                              ; x
 368     (2 otf:beng=haln)                   ; C1halant
 369     (3 =)                               ; C2
 370     0x09FE                              ; x
 371     0x09FD                              ; pseudo result
 372     0x09FD                              ; pseudo result
 373     0x09FE                              ; x
 374     (4 = *)
 375     |)
 376
 377    ;; Standalone base.  There is nothing more to do.
 378    (" ([^x ]*)x(.)x([^ ]*) "
 379     |
 380     (1 = *)
 381     (2 =)
 382     (3 = *)
 383     |)
 384
 385    ;; KA-SSA-NNA and KA-SSA-MA are the only pre-base ligatures that
 386    ;; consist of three consonants.
 387    ;; 1        23   4   5   6    7
 388    (" ([^x ]*)x((KH)(SH)(M))(H)?x([^ ]*) "
 389     |
 390     (1 = *)
 391     0x09FE                              ; x
 392     (3 otf:beng=haln)                   ; KAhalant
 393     (4 otf:beng=haln)                   ; SSAhalant
 394     (5 =)                               ; NNA or MA
 395     0x09FE                              ; x
 396     (2 otf:beng=half,haln,pres)         ; ligature result
 397     0x09FE                              ; x
 398     (6 =)
 399     (7 = *)
 400     |)
 401
 402    ;; Two or more pre-bases plus base.  Give up.  Convert all
 403    ;; pre-bases into halant form.
 404    ;; 1        23             4       5
 405    (" ([^x ]*)x(([^x]H[JN]?)+)([^x])?x([^ ]*) "
 406     |
 407     (1 = *)
 408     0x09FE                              ; x
 409     (2 force-haln)                      ; halant forms
 410     (4 =)                               ; full form
 411     0x09FE                              ; x
 412     0x09FD                              ; pseudo result
 413     0x09FD                              ; pseudo result
 414     0x09FE                              ; x
 415     (5 = *)
 416     |)
 417
 418    ("." =))
 419   *)
 420
 421  ;; This is to remove ZWNJ and ZWJ.  The half-form-force-effect of ZWJ
 422  ;; is ignored.  Sorry.
 423  (force-haln
 424   (cond
 425    ("([^JN]*)[JN](.*)"
 426     (1 otf:beng=haln)
 427     (2 force-haln))
 428    (".+"
 429     otf:beng=haln)))
 430  )
 431
 432 ;; Step 4 : Select the appropriate representation.  Only those
 433 ;; syllables that contain the virtual character x require
 434 ;; modification.
 435 (generator
 436  (0
 437   (cond
 438    ;; Only one glyph in the ligature section (between the second and
 439    ;; the third x).  It means a ligature was successfully generated.
 440    ;; C1halant and C2 (between the first and second x) are removed.
 441    (" ([^x ]*)x[^x]+x(.)x([^ ]*) "
 442     |
 443     (1 = *)
 444     (2 =)
 445     (3 = *)
 446     |)
 447
 448    ;; Otherwise halant and base forms are used.  The failed ligature
 449    ;; is removed.
 450    (" ([^x ]*)x([^x]+)x[^x]+x([^ ]*) "
 451     |
 452     (1 = *)
 453     (2 = *)
 454     (3 = *)
 455     |)
 456
 457    ;; No need to care the other cases.
 458    ("." =))
 459   *))
 460
 461 ;; Step 5 : Select appropriate glyph variants for fine adjustments.
 462 ;; Now the syllable boundary marks are removed so that the final step
 463 ;; can find word boundaries.
 464 (generator
 465  (0
 466   (cond
 467    (" ([^ ]+) "
 468     (1 otf:beng=blws,abvs,psts,vatu))
 469    ("."
 470     [ otf:beng=+ ] ))
 471   *)
 472  )
 473
 474 ;; Step 6 : Word initial substitute.  As the syllable boundaries have
 475 ;; been eliminated in the previous step, this rule is applied to a run
 476 ;; of Bengali glyphs, i.e. word by word.  We finally apply the init
 477 ;; feature to the word initial gylphs to get the final result.
 478 (generator
 479  (0
 480   ("(.)(.*)"
 481    (1 otf:beng=init)
 482    (2 = *))))
 483
 484 ;; Local Variables:
 485 ;; mode: emacs-lisp
 486 ;; End: