;;; <li> BENG-OTF.flt
;;;
-;;; For Bengali OpenType fonts to draw the Bengali script. Tested with
-;;; MuktiNarrow.ttf <http://www.nongnu.org/freebangfont/index.html>
-;;; and
-;;; LikhanNormal.otf <http:http://www.stat.wisc.edu/~deepayan/Bengali/WebPage/Font/fonts.html>
+;;; For Bengali OpenType fonts to draw the Bengali script.
(font layouter beng-otf nil
(font (nil nil unicode-bmp :otf=beng=rphf)))
(category
- ;; C: consonant (excluding B, Y and R)
- ;; B: consonant BA (below)
- ;; Y: consonant YA (post)
- ;; R: consonant RA (reph, below)
+ ;; X: generic
+ ;; V: independent vowel
+ ;; C: consonant (except for R, B and Y)
+ ;; R: consonant RA
+ ;; B: consonant BA
+ ;; Y: consonant YA
+ ;; T: KHANDA TA
;; n: NUKTA
;; H: HALANT
- ;; m: MATRA (pre)
- ;; b: MATRA (below)
- ;; p: MATRA (post)
- ;; t: MATRA (two-part)
+ ;; m: vowel sign (pre)
+ ;; b: vowel sign (below)
+ ;; p: vowel sign (post)
;; A: vowel modifier (above)
;; a: vowel modifier (post)
- ;; V: independent vowel
+ ;; Z: internal use
;; N: ZWNJ (ZERO WIDTH NON-JOINER)
;; J: ZWJ (ZERO WIDTH JOINER)
- ;; E: ELSE
- ;;
- (0x200C ?N) ; ZWNJ
- (0x200D ?J) ; ZWJ
- (0x0964 0x0965 ?E) ; DANDA, DOUBLE DANDA
- (0x0980 0x09FF ?E) ; ELSE
- (0x0981 ?A) ; SIGN CANDRABINDU (above)
- (0x0982 0x0983 ?a) ; SIGN ANUSWAR, VISARGA (post)
- (0x0985 0x098C ?V) ; LETTER A .. VOCALIC L
- (0x098F 0x0990 ?V) ; LETTER E .. AI
- (0x0993 0x0994 ?V) ; LETTER O .. AU
+ (0x0980 0x09FF ?X) ; generic
+ (0x0980 ?Z) ; internal use
+ (0x0981 ?A) ; SIGN CANDRABINDU
+ (0x0982 0x0983 ?a) ; SIGN ANUSVARA .. VISARGA
+ (0x0985 0x0994 ?V) ; LETTER A .. AU
(0x0995 0x09B9 ?C) ; LETTER KA .. HA
(0x09AC ?B) ; LETTER BA
(0x09AF ?Y) ; LETTER YA
(0x09B0 ?R) ; LETTER RA
(0x09BC ?n) ; SIGN NUKTA
- (0x09BE ?p) ; VOWEL SIGN AA (post)
- (0x09BF ?m) ; VOWEL SIGN I (pre)
- (0x09C0 ?p) ; VOWEL SIGN II (post)
- (0x09C1 0x09C4 ?b) ; VOWEL SIGN U, UU, R, RR (below)
- (0x09C7 0x09C8 ?m) ; VOWEL SIGN E, AI (pre)
- (0x09CB 0x09CC ?t) ; VOWEL SIGN O, AU (two-part)
- (0x09CD ?H) ; SIGN VIRAMA (HASANT)
- (0x09CE ?C) ; LETTER KHANDA TA
+ (0x09BE ?p) ; VOWEL SIGN AA
+ (0x09BF ?m) ; VOWEL SIGN I
+ (0x09C0 ?p) ; VOWEL SIGN II
+ (0x09C1 0x09C4 ?b) ; VOWEL SIGN U .. RR
+ (0x09C7 0x09C8 ?m) ; VOWEL SIGN E .. AI
+ (0x09CD ?H) ; SIGN VIRAMA
+ (0x09CE ?T) ; LETTER KHANDA TA
(0x09D7 ?p) ; AU LENGTH MARK
- (0x09DC 0x09DD ?C) ; LETTER RRA, RHA
- (0x09DF ?C) ; LETTER YYA
+ (0x09DC 0x09DF ?C) ; LETTER RRA .. YYA
(0x09E0 0x09E1 ?V) ; LETTER VOCALIC RR, LL
- (0x09E2 0x09E3 ?b) ; VOWEL SIGN L .. LL (below)
- (0x09F0 0x09F1 ?C) ; LETTER RR', RR'' (assamese)
- (0x09FE ?x) ; mark #1 (internal use)
- (0x09FF ?y) ; mark #2 (internal use)
+ (0x09E2 0x09E3 ?b) ; VOWEL SIGN L .. LL
+ (0x09F0 0x09F1 ?C) ; LETTER RR WITH MIDDLE/LOWER DIAGONAL
+ (0x0964 0x0965 ?X) ; DANDA .. DOUBLE DANDA
+ (0x200C ?N) ; ZWNJ
+ (0x200D ?J) ; ZWJ
)
-;; Step 1 : Syllable identification. Recognised syllables are quoted
-;; by the pseudo character, which is generated by the command "|" and
-;; has the category " " (space).
+;; Change consonant
(generator
(0
(cond
- ;; Special case. The sequence "C1 H N C2 m" is reordered as
- ;; "C1 H m C2", not "m C1 H C2". Besides, "C1 H" is drawn in the
- ;; halant-form.
- ("([CRBY]n?H)N"
- < |
- (1 otf:beng=hln)
- | >)
-
- ;; Case A-C are for those syllables that end with an explicit vowel
- ;; mark and/or a vowel modifier. They are divided into three cases
- ;; for the readability of regular expression. The leading
- ;; consonant-Hasant repetition is analysed for reordering in the
- ;; next step. Two-part vowel, if any, is split for
- ;; canonicalisation.
-
- ;; Case A : A syllable ending with a vowel modifier.
- ;;1 23 4 5 6 7
- ("(RH)?(([CRBY]n?HJ?)*([CRBY]n?))([mbp]*)(t)?([Aa])"
- < |
- (1 = =)
- (2 set-marks)
- (5 = *)
- (6 split)
- (7 =)
- | >)
-
- ;; Case B : A syllable ending with a two-part vowel.
- ;;1 23 4 5
- ("(RH)?(([CRBY]n?HJ?)*([CRBY]n?))(t)"
- < |
- (1 = =)
- (2 set-marks)
- (5 split)
- | >)
-
- ;; Case C : A syllable ending with other vowel. Note that a
- ;; two-part vowel may be expressed with two vowel marks for
- ;; backward compatibility.
- ;;1 23 4 5
- ("(RH)?(([CRBY]n?HJ?)*([CRBY]n?))([mbp]+)"
- < |
- (1 = =)
- (2 set-marks)
- (5 = *)
- | >)
-
- ;; Case D : Ya-phalaa. Reorder H and Y for the next step.
- ;; The web page "Unicode FAQ for Indic Scripts and Languages"
- ;; <http://www.unicode.org/faq/indic.html> says "it should be
- ;; permissible for the Ya-phalla to be consistently formed by "ZWNJ
- ;; + VIRAMA + YA".
- ("([CRBY]n?N)(H)(Y)"
- < |
- (1 = *)
- (3 =)
- (2 =)
- | >)
-
- ;; Case E : No explicit vowel nor modifier. If the syllable ends
- ;; with a consonant, analyse it for reordering in the next step.
- ;; Otherwise, just identify the syllable without changing anything.
- ;;1 23 4
- ("(RH)?(([CRBY]n?HJ?)*[CRBY]n?)(HN|HJ|H)?"
- < |
- (1 = =)
- (2 set-marks)
- (4 = *)
- | >)
+ ;; Decompose two-part vowel signs.
+ ((0x09CB)
+ 0x09C7 0x09BE)
+ ((0x09CC)
+ 0x09C7 0x09D7)
+ ;; TA + HALANT + ZWJ -> KHANDA-TA
+ ((0x09A4 0x09CD 0x200D)
+ 0x09CE)
+ ;; consonant + NUKTA
+ ((0x09A1 0x09BC)
+ 0x09DC)
+ ((0x09A2 0x09BC)
+ 0x09DD)
+ ((0x09AF 0x09BC)
+ 0x09DF)
+ ("." =))
+ *))
- ;; Case F : Syllables that begin with an independent vowel. An
- ;; optional HYp sequence appears when this syllable represents the
- ;; sound "a" in English "bat" (see the FAQ above). If it appears,
- ;; we reorder the H and Y for the next step.
- ("(V)(HYp)?([aA])?"
- < | (1 =) (2 ("HY(p)" 0x09AF 0x09CD (1 =))) (3 =) | >)
+;; Syllable identification and reordering.
+(generator
+ (0
+ (cond
+ ;; Khanda-Ta
+ ("(RH)?(T)"
+ < | (2 =) (1 otf:beng=rphf+) | >)
+
+ ;; Standalone Ya-phalaa
+ ("JHY"
+ < | post | >)
+
+ ;; A syllable with a pre-base vowel sign.
+ ;;1 2 3 4 5 6 7 8 9
+ ("(RH)?([CRBY]n?(HCn?)*(H[RB])?(J?HY)?)(m)(p)?(A)?(a)?"
+ < | (6 =) (2 pre-below) (1 otf:beng=rphf+) (8 =) (2 post) (7 =) (9 =) | >)
+
+ ;; A syllable with a non-pre-base vowel sign.
+ ;;1 2 3 4 5 6 78 9 10 11
+ ("(RH)?([CRBY]n?(HCn?)*(H[RB])?(J?HY)?)(N)?J?((b)|(p))(A)?(a)?"
+ < | (6 =) (2 pre-below) (8 =) (1 otf:beng=rphf+) (10 =)
+ (2 post) (9 =) (11 =) | >)
+
+ ;; A syllable with a vowel modifier and no vowel signs.
+ ;;1 2 3 4 5 67 8
+ ("(RH)?([CRBY]n?(HCn?)*(H[RB])?(J?HY)?)((A)|(a))"
+ < | (2 pre-below) (1 otf:beng=rphf+) (7 =) (2 post) (8 =) | >)
+
+ ;; A syllable ending with a halant.
+ ;;1 2 3 4 5 6
+ ("(RH)?([CRBY]n?(HCn?)*(H[RB])?(J?HY)?)(H)?N?"
+ < | (2 pre-below) (6 =) (1 otf:beng=rphf+) (2 post) | >)
+
+ ;; A syllable starting with an independent vowel.
+ ;;1 2 3
+ ("(V)(J?HY)?(A?a?)"
+ < | (1 =) 0x0980 (2 post) (3 = *) | >)
("." =))
*)
- ;; Set mark #1 (x) at the position where below consonants begin, and
- ;; mark #2 (y) at the position to which below and above signs will be
- ;; moved.
- (set-marks
+ ;; Move a halant after the base consonant to the end.
+ ;; Put a special mark after the final belew-base consonant.
+ ;; Remove post-base parts.
+ (pre-below
(cond
- ;; Ending with Y.
- ;;1 2 3 45 6
- ("([CRBY]n?(HJ?Cn?)*)(H)(([RB]H)*)(Y)"
- (1 = *) ; prebase & base
- 0x09FE ; mark #1
- (4 = *) ; below consonants
- 0x09FF ; mark #2
- (6 =) ; YA
- (3 =)) ; moved HASANT
- ;; Ending with R or B.
- ;;1 2 3 45
- ("([CRBY]n?(HJ?Cn?)*)(H)(([RB]H)*[RB])"
- (1 = *) ; prebase & base
- 0x09FE ; mark #1
- (4 = *) ; below consonants
- (3 =) ; moved HASANT
- 0x09FF) ; mark #2
+ ("(.+)(H)([RB])(J?HY)?$"
+ (1 = *) (3 =) (2 =) 0x0980)
+ ("([^J]+)J?HY$"
+ (1 = *) 0x0980)
(".+"
- = *
- 0x09FE ; mark #1
- 0x09FF))) ; mark #2
+ = * 0x0980)))
- ;; Split two-part dependent vowel signs for canonicalisation.
- (split
+ ;; Extract post-base parts and add a halant at the end.
+ ;; Produce nothing if there are no post-base parts.
+ (post
(cond
- ((0x09CB) 0x09C7 0x09BE)
- ((0x09CC) 0x09C7 0x09D7)))
- )
+ (".*(H)(Y)$"
+ (2 =) (1 =))
+ (".+"
+ ))))
-;; Step 2 : Move Reph and Matra if necessary. From now on, we care
-;; only for those syllables that have been identified in Step 1.
+;; Apply'nukt' and 'akhn'.
(generator
(0
(cond
- ;; Special case: a single consonant and a Halant.
- (" (.)xy(HJ?) "
- |
- 0x09FE
- (1 =)
- (2 = *)
- 0x09FE
- |)
-
- ;; This is the most generic pattern. It follows Cases A, B, C and
- ;; E in Step 1. Now Mark #1 is used to indicate the critical part
- ;; that requires pre-base substitution in the following steps.
-
- ;; 1 2 3 4 5 6 7 8 9 10
- (" (RH)?([^ xy]+)x([^ y]*)y(YH)?(m)?(b)?(p)?(A)?(a)?(HJ|H)? "
- |
- (5 =) ; [Mpre]
- ;; Actually, the nukt feature is not necessary for Bengali because
- ;; all the necessary Nukta forms are precomposed in the Unicode
- ;; standard. Even if a Nukta consonant is given in the form of
- ;; the combination of the base consonant and a Nukta sign, we can
- ;; safely perform the composition here because it does not affect
- ;; surrounding letters in the syllable. The Akhand ligature
- ;; operation is also applied here, before applying the half form
- ;; operation because the Mukti font generates Akhand ligatures
- ;; directly from the "C H C" sequence, not via the half form.
- 0x09FE ; begin Cpre & Cbase
- (2 otf:beng=nukt,akhn) ; {Cpre + H} + Cbase
- 0x09FE ; end Cpre & Cbase
- (3 otf:beng=blwf) ; {Cbelow + H}
- (6 =) ; [Mbelow]
- (1 otf:beng=rphf) ; [Reph]
- (8 =) ; [VMabove]
- (4 otf:beng=pstf) ; [Cpost + H]
- (7 =) ; [Mpost]
- (9 =) ; [VMpost]
- (10 = *) ; optional HASANT
- |)
-
- ;; Syllables that begin with an independent vowel (following up
- ;; Step 1, Case F). If a YH sequence exist, it is changed to the
- ;; post-base form. Syllables of this type do not require further
- ;; modification.
- (" (V)(YH)(.*) "
- |
- (1 =)
- (2 otf:beng=pstf)
- (3 = *)
- |)
-
- ;; Ya-phalaa (following up Step 1, Case D). Remove N and change YH
- ;; to the post base form. Syllables of this type do not require
- ;; further modification.
- (" ([CBRY]n?)N(YH) "
- |
- (1 =)
- (2 otf:beng=pstf)
- |)
-
+ (" ([^Z]+)(Z[^ ]*) "
+ | (1 otf:beng=nukt,akhn+) (2 = *) |)
("." =))
*))
-;; Step 3 : Now only those syllables that contain the pseudo character
-;; x require pre-base substition. This is the most complicated part
-;; in this FLT.
-
-;; If the sequence "C1 H C2" makes ligature L12, L12 replaces the
-;; original sequence.
-
-;; To test the availability of such a ligature, we try to generate it
-;; using the pre-base substitute feature, then see whether succeeded
-;; or not. In the case of failure, the pre-base feature does not
-;; change the original sequence.
-
-;; To create a ligature, the "C1 H" part must be first converted into
-;; the half form of C1. Creating the half form of a consonant always
-;; succeeds.
-
-;; ligature(half(C1,H),C2)
-;; ==> ligature(C1half,C2)
-;; ==> L12 ; success
-;; C1half C2 ; fail
-
-;; If the ligature is not available, the "C1 H" part must be converted
-;; into the _Halant_ (not half) form of C1. However, there is no way
-;; to reconvert C1half into C1halant nor to revert back to "C1 H".
-;; Thus we duplicate the critical part in two different forms so that
-;; we can select the appropriate one in the next step. The pseudo
-;; character x is used to indicate the boundaries.
-
-;; ... C1 H C2 ... ==> ... x C1halant C2 x L12 x ...
-
-;; If the length of the L12 part is one, ligature generation was
-;; successful. In this case we wipe out the duplicated C1halant and
-;; C2. Otherwise we remove L12.
-
-;; In very few cases (I found only one in the Mukti font), the "C1 H"
-;; part need to be converted into C1halant (instead of C1half) to make
-;; a ligature with C2. So when we try to generate a ligature form, we
-;; apply the GSUB features "half", "haln" and "pres" in this order.
-
-(category
- ;; C: consonant (excluding B, Y and R)
- ;; H: HALANT
- ;; N: ZWNJ (ZERO WIDTH NON-JOINER)
- ;; J: ZWJ (ZERO WIDTH JOINER)
- ;; E: ELSE
- ;;
- (0x200C ?N) ; ZWNJ
- (0x200D ?J) ; ZWJ
- (0x0964 0x0965 ?E) ; DANDA, DOUBLE DANDA
- (0x0980 0x09FF ?E) ; ELSE
- (0x09CD ?H) ; SIGN VIRAMA (HASANT)
- (0x0995 ?K) ; LETTER KA
- (0x09B7 ?S) ; LETTER SSA
- (0x09A3 ?M) ; LETTER NNA
- (0x09AE ?M) ; LETTER MA
- (0x09FE ?x) ; mark #1 (internal use)
- )
-
+;; Apply 'blwf' and 'pstf' to the concerning parts.
(generator
(0
(cond
-
- ;; One pre-base and base.
- ;; 1 23 4 5 6
- (" ([^x ]*)x((.H)([^J]))(H)?x([^ ]*) "
- |
- (1 = *)
- 0x09FE ; x
- (3 otf:beng=haln) ; C1halant
- (4 =) ; C2
- 0x09FE ; x
- (2 otf:beng=half,haln,pres) ; ligature result
- 0x09FE ; x
- (5 =)
- (6 = *)
- |)
-
- ;; One pre-base with ZWJ. According to the Unicode FAQ, the half
- ;; form is forced in this case. So we fake as if ligature
- ;; generation was failed.
- (" ([^x ]*)x(.H)J(.)?x([^ ]*) "
- |
- (1 = *)
- 0x09FE ; x
- (2 otf:beng=half) ; C1half
- (3 =) ; C2
- 0x09FE ; x
- 0x09FD ; pseudo result
- 0x09FD ; pseudo result
- 0x09FE ; x
- (4 = *)
- |)
-
- ;; One pre-base possibly with ZWNJ. Similar to above.
- (" ([^x ]*)x(.H)N?(.)?x([^ ]*) "
- |
- (1 = *)
- 0x09FE ; x
- (2 otf:beng=haln) ; C1halant
- (3 =) ; C2
- 0x09FE ; x
- 0x09FD ; pseudo result
- 0x09FD ; pseudo result
- 0x09FE ; x
- (4 = *)
- |)
-
- ;; Standalone base. There is nothing more to do.
- (" ([^x ]*)x(.)x([^ ]*) "
- |
- (1 = *)
- (2 =)
- (3 = *)
- |)
-
- ;; KA-SSA-NNA and KA-SSA-MA are the only pre-base ligatures that
- ;; consist of three consonants.
- ;; 1 23 4 5 6 7
- (" ([^x ]*)x((KH)(SH)(M))(H)?x([^ ]*) "
- |
- (1 = *)
- 0x09FE ; x
- (3 otf:beng=haln) ; KAhalant
- (4 otf:beng=haln) ; SSAhalant
- (5 =) ; NNA or MA
- 0x09FE ; x
- (2 otf:beng=half,haln,pres) ; ligature result
- 0x09FE ; x
- (6 =)
- (7 = *)
- |)
-
- ;; Two or more pre-bases plus base. Give up. Convert all
- ;; pre-bases into halant form.
- ;; 1 23 4 5
- (" ([^x ]*)x(([^x]H[JN]?)+)([^x])?x([^ ]*) "
- |
- (1 = *)
- 0x09FE ; x
- (2 force-haln) ; halant forms
- (4 =) ; full form
- 0x09FE ; x
- 0x09FD ; pseudo result
- 0x09FD ; pseudo result
- 0x09FE ; x
- (5 = *)
- |)
-
+ (" (N?m?.)([^Z]*)(Z)([^ ]*) "
+ | (1 = *) (2 otf:beng=blwf+) (3 =) (4 otf:beng=pstf+) |)
+ (" (YH) "
+ | (1 otf:beng=pstf+) |)
("." =))
- *)
+ *))
- ;; This is to remove ZWNJ and ZWJ. The half-form-force-effect of ZWJ
- ;; is ignored. Sorry.
- (force-haln
+;; Get pre-base and below-base conjuncts.
+(generator
+ (0
(cond
- ("([^JN]*)[JN](.*)"
- (1 otf:beng=haln)
- (2 force-haln))
- (".+"
- otf:beng=haln)))
- )
+ (" (N?m?)([^Z]+)(Z)([^ ]*) "
+ | (1 = *) (2 otf:beng=half,vatu,pres,blws+) (3 =) (4 = *) |)
+ ("." =))
+ *))
-;; Step 4 : Select the appropriate representation. Only those
-;; syllables that contain the virtual character x require
-;; modification.
+;; When the number of glyphs between a pre-base vowel sign and the
+;; post-below mark is more than one, move the pre-base vowel sign
+;; before the final glyph.
(generator
(0
(cond
- ;; Only one glyph in the ligature section (between the second and
- ;; the third x). It means a ligature was successfully generated.
- ;; C1halant and C2 (between the first and second x) are removed.
- (" ([^x ]*)x[^x]+x(.)x([^ ]*) "
- |
- (1 = *)
- (2 =)
- (3 = *)
- |)
-
- ;; Otherwise halant and base forms are used. The failed ligature
- ;; is removed.
- (" ([^x ]*)x([^x]+)x[^x]+x([^ ]*) "
- |
- (1 = *)
- (2 = *)
- (3 = *)
- |)
-
- ;; No need to care the other cases.
+ (" (N)?(m)([^Z]+)([^Z])Z([^ ]*) "
+ | (1 =) (3 = *) (2 =) (4 =) (5 = *) |)
+ (" ([^Z]+)Z([^ ]*) "
+ | (1 = *) (2 = *) |)
("." =))
*))
-;; Step 5 : Select appropriate glyph variants for fine adjustments.
-;; Now the syllable boundary marks are removed so that the final step
-;; can find word boundaries.
+;; Get matra conjuncts.
+;; Do not apply 'blws' to syllables that begins with ZWNJ.
(generator
(0
(cond
+ (" N([^ ]+) "
+ (1 otf:beng=init,pres,abvs,psts,haln))
(" ([^ ]+) "
- (1 otf:beng=blws,abvs,psts,vatu))
+ (1 otf:beng=init,pres,abvs,blws,psts,haln))
("."
- [ otf:beng=+ ] ))
- *)
- )
-
-;; Step 6 : Word initial substitute. As the syllable boundaries have
-;; been eliminated in the previous step, this rule is applied to a run
-;; of Bengali glyphs, i.e. word by word. We finally apply the init
-;; feature to the word initial gylphs to get the final result.
-(generator
- (0
- ("(.)(.*)"
- (1 otf:beng=init)
- (2 = *))))
+ [ otf:beng=+ ]))
+ *))
;; Local Variables:
;; mode: emacs-lisp