From: ntakahas Date: Thu, 28 Jun 2007 05:35:08 +0000 (+0000) Subject: Rewritten with new algorithm for Unicode 5.0. X-Git-Tag: REL-1-4-0~20 X-Git-Url: http://git.chise.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=64b644f3f4e02ef86d2fe4147f3b6852db0d0ab9;p=m17n%2Fm17n-db.git Rewritten with new algorithm for Unicode 5.0. --- diff --git a/FLT/ORYA-OTF.flt b/FLT/ORYA-OTF.flt index ebd6ef5..5840962 100644 --- a/FLT/ORYA-OTF.flt +++ b/FLT/ORYA-OTF.flt @@ -30,289 +30,202 @@ (font (nil nil unicode-bmp :otf=orya=rphf))) (category - ;; C: consonant (excluding Y and R) - ;; R: consonant RA (reph, below) + ;; C: consonant (except for R, B and Y) + ;; R: consonant RA ;; B: consonant (below) - ;; Y: consonant YA, YYA (post) + ;; Y: consonant (post) ;; n: NUKTA ;; H: HALANT - ;; m: MATRA (pre) - ;; u: MATRA (above) - ;; b: MATRA (below) - ;; p: MATRA (post) - ;; t: MATRA (two-part) + ;; m: vowel sign (pre) + ;; u: vowel sign (above) + ;; b: vowel sign (below) + ;; p: vowel sign (post) + ;; t: vowel sign (two-part) ;; A: vowel modifier (above) ;; a: vowel modifier (post) ;; V: independent vowel ;; N: ZWNJ (ZERO WIDTH NON-JOINER) ;; J: ZWJ (ZERO WIDTH JOINER) - ;; E: ELSE - ;; - (0x200C ?N) ; ZWNJ - (0x200D ?J) ; ZWJ - (0x0664 0x0665 ?E) ; DANDA, DOUBLE DANDA - (0x0B00 0x0B7F ?E) ; ELSE - (0x0B01 ?A) ; SIGN CANDRABINDU (above) - (0x0B02 0x0B03 ?a) ; SIGN ANUSWAR, VISARGA (post) - (0x0B05 0x0B0C ?V) ; LETTER A .. VOCALIC L - (0x0B0F 0x0B10 ?V) ; LETTER E .. AI - (0x0B13 0x0B14 ?V) ; LETTER O .. AU + ;; X: generic + ;; Z: internal use + (0x0B00 0x0B7F ?X) ; generic + (0x0B00 ?Z) ; internal use + (0x0B01 0x0B03 ?a) ; SIGN CANDRABINDU .. VISARGA + (0x0B05 0x0B14 ?V) ; LETTER A .. VOCALIC AU (0x0B15 0x0B39 ?C) ; LETTER KA .. HA (0x0B24 ?B) ; LETTER TA (0x0B28 ?B) ; LETTER NA - (0x0B2C ?B) ; LETTER BA - (0x0B2D ?B) ; LETTER BHA - (0x0B2E ?B) ; LETTER MA + (0x0B2C 0x0B2E ?B) ; LETTER BA .. MA (0x0B2F ?Y) ; LETTER YA (0x0B30 ?R) ; LETTER RA - (0x0B32 ?B) ; LETTER LA + (0x0B32 0x0B33 ?B) ; LETTER LA (0x0B33 ?B) ; LETTER LLA - (0x0B35 ?B) ; LETTER VA (0x0B3C ?n) ; SIGN NUKTA - (0x0B3E ?p) ; VOWEL SIGN AA (post) - (0x0B3F ?u) ; VOWEL SIGN I (above) - (0x0B40 ?p) ; VOWEL SIGN II (post) - (0x0B41 0x0B43 ?b) ; VOWEL SIGN U, UU, R (below) - (0x0B47 ?m) ; VOWEL SIGN E (pre) - (0x0B48 0x0B4C ?t) ; VOWEL SIGN AI, O, AU (two-part) - (0x0B4D ?H) ; SIGN VIRAMA (HALANT) + (0x0B3E ?p) ; VOWEL SIGN AA + (0x0B3F ?u) ; VOWEL SIGN I + (0x0B40 ?p) ; VOWEL SIGN II + (0x0B41 0x0B43 ?b) ; VOWEL SIGN U .. VOCALIC R + (0x0B47 ?m) ; VOWEL SIGN E + (0x0B48 0x0B4C ?t) ; VOWEL SIGN AI .. AU + (0x0B4D ?H) ; SIGN VIRAMA (0x0B56 ?u) ; AI LENGTH MARK (0x0B57 ?p) ; AU LENGTH MARK - (0x0B5C 0x0B5D ?C) ; LETTER RRA, RHA + (0x0B5C 0x0B5D ?C) ; LETTER RRA .. RHA (0x0B5F ?Y) ; LETTER YYA - (0x0B60 0x0B61 ?V) ; LETTER VOCALIC RR, LL + (0x0B60 0x0B61 ?V) ; LETTER VOCALIC RR .. LL (0x0B71 ?C) ; LETTER WA - (0x0B7E ?x) ; mark #1 (internal use) - (0x0B7F ?y) ; mark #2 (internal use) + (0x0B64 0x0B65 ?X) ; DANDA .. DOUBLE DANDA + (0x200C ?N) ; ZWNJ + (0x200D ?J) ; ZWJ ) -;; Step 1 : Syllable identification. Recognised syllables are quoted -;; by the pseudo character, which is generated by the command "|" and -;; has the category " " (space). +;; Decompose two-part vowel signs. +;; Move ZWJ before the consonant. (generator (0 (cond - ;; Case F : Syllables containing an independent vowel. - ("(RH)?(V)(a)?(A)?" - < | - (2 =) - (1 = =) - (3 =) - (4 =) - | >) - - ;; Case A-C are for those syllables that end with an explicit vowel - ;; mark and/or a vowel modifier. They are divided into three cases - ;; for readability of the regular expressions. The leading - ;; consonant-Halant repetition is analysed for reordering in the - ;; next step. A two-part vowel, if any, is split for - ;; canonicalisation. + ((0x0B48) + 0x0B47 0x0B56) + ((0x0B4B) + 0x0B47 0x0B3E) + ((0x0B4C) + 0x0B47 0x0B57) + ("(Cn?)(J)" + (2 =) (1 = *)) + ("." =)) + *)) - ;; Case A : A syllable ending with a vowel modifier. - ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbup]*)(t)?([Aa])" +;; Syllable identification and reordering. +;; Do not apply 'rphf' if a syllable begins with ZWJ. +(generator + (0 + (cond + ;; A syllable with ZWJ and a pre-base vowel sign. + ;;1 23 4 5 6 7 + ("(J)(([CRBY]n?H)*[CRBY]n?)(m)(u)?(p)?(a)?" + < | (1 =) (4 =) (2 pre-below) (5 =) (2 post) (6 =) (7 =) | >) + + ;; A syllable with ZWJ and a non-pre-base vowel sign. + ;;1 23 45 6 7 + ("(J)(([CRBY]n?H)*[CRBY]n?)(([bu])|(p))(a)?" + < | (1 =) (2 pre-below) (5 =) (2 post) (6 =) (7 =) | >) + + ;; A syllable with ZWJ and a vowel modifier, but without vowel signs. + ;;1 23 4 + ("(J)(([CRBY]n?H)*[CRBY]n?)(a)" + < | (1 =) (2 pre-below) (2 post) (4 =) | >) + + ;; Add a ZWNJ explicitly when a syllable ends with a halant. + ;;1 23 4 5 + ("(J)(([CRBY]n?H)*[CRBY]n?)(H)?(N)?" + < | (1 =) (2 pre-below) (4 = 0x200C) (2 post) | >) + + ;; With a pre-base vowel sign, without a ZWJ. + ;;1 23 4 5 6 7 + ("(RH)?(([CRBY]n?H)*[CRBY]n?)(m)(u)?(p)?(a)?" < | - (1 = =) - (2 set-marks) - (5 = *) - (6 split) - (7 =) + (4 =) (2 pre-below) (5 =) (1 otf:orya=rphf) (2 post) (6 =) (7 =) | >) - ;; Case B : A syllable ending with a two-part vowel. - ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))(t)" - < | - (1 = =) - (2 set-marks) - (5 split) - | >) + ;; With a non-pre-base vowel sign, without a ZWJ. + ;; 1 23 45 6 7 + ("(RH)?(([CRBY]n?H)*[CRBY]n?)(([bu])|(p))(a)?" + < | (2 pre-below) (5 =) (1 otf:orya=rphf) (2 post) (6 =) (7 =) | >) - ;; Case C : A syllable ending with other vowel(s). Note that a - ;; two-part vowel may be expressed with two vowel marks for - ;; backward compatibility. - ("(RH)?(([CRBY]n?H[NJ]?)*([CRBY]n?))([mbup]+)" - < | - (1 = =) - (2 set-marks) - (5 = *) - | >) + ;; With a vowel modifier, without vowel signs and a ZWJ. + ;;1 23 4 + ("(RH)?(([CRBY]n?H)*[CRBY]n?)(a)" + < | (2 pre-below) (1 otf:orya=rphf) (2 post) (4 =) | >) - ;; Case E : No explicit vowel nor modifier. If the syllable ends - ;; with a consonant, analyse it for reordering in the next step. - ;; Otherwise, just identify the syllable without changing anything. - ;;1 23 4 - ("(RH)?(([CRBY]n?H[NJ]?)*[CRBY]n?)(HN|HJ|H)?" - < | - (1 = =) - (2 set-marks) - (4 = *) - | >) + ;; Add a ZWNJ explicitly when a syllable ends with a halant. + ;;1 23 4 5 + ("(RH)?(([CRBY]n?H)*[CRBY]n?)(H)?(N)?" + < | (2 pre-below) (1 otf:orya=rphf) (4 = 0x200C) (2 post) | >) + + ;; A syllable starting with an independent vowel. + ("Va?" + < | = * | >) ("." =)) *) - ;; Set mark #1 (x) at the position where below consonants begin, and - ;; mark #2 (y) at the position to which below and above signs will be - ;; moved. - (set-marks + ;; Move a halant after the base consonant to the end. + ;; Fill the resulting gap with a special mark. + ;; Remove post-base parts. + (pre-below (cond - ;; Ending with Y. - ;;1 2 3 45 6 - ("([CRBY]n?(H[NJ]?Cn?)*)(H)(([RB]H)*)(Y)$" - (1 = *) ; prebase & base - 0x0B7E ; below begin - (4 = *) ; below consonants - 0x0B7F ; below end - (6 =) ; YA - (3 =)) ; moved HALANT - ;; Ending with R or B. - ;;1 2 3 45 - ("([CRBY]n?(H[NJ]?Cn?)*)(H)(([RB]H)*[RB])$" - (1 = *) ; prebase & base - 0x0B7E ; below begin - (4 = *) ; below consonants - (3 =) ; moved HALANT - 0x0B7F) ; below end - (".+" - = * - 0x0B7E ; below begin - 0x0B7F))) ; below end - - ;; Split two-part dependent vowel signs for canonicalisation. - (split + ("([CRBYnH]*[CYn])H([RBH]+)[YH]+$" + (1 = *) 0x0B00 (2 = *)) + ("([CRBYnH]*[CYn])(H)([RBH]+)$" + (1 = *) 0x0B00 (3 = *) (2 =)) + ("([CRBYnH]*[Cn])[YH]*$" + (1 = *) 0x0B00) + ("([RB]n?)H([RBH]*)[YH]+$" + (1 = *) 0x0B00 (2 = *)) + ("([RB]n?)(H)([RBH]*)$" + (1 = *) 0x0B00 (3 = *) (2 =)) + ("([RBY]n?)[YH]*$" + (1 = *) 0x0B00))) + + ;; Extract post-base parts and add a halant at the end. + ;; Produce nothing if there are no post-base parts. + (post (cond - ((0x0B48) 0x0B47 0x0B56) - ((0x0B4B) 0x0B47 0x0B3E) - ((0x0B4C) 0x0B47 0x0B57))) + ("[CRBYnH]*[CRBn]H([YH]+)$" + (1 = *) 0x0B4D) + ("Yn?H(YH)+$" + (1 = *) 0x0B4D) + (".+" + ))) ) -;; Step 2 : Move Reph and Matra if necessary. From now on, we care -;; only for those syllables that have been identified in Step 1. +;; Apply language forms to concerning segments. (generator (0 (cond - ;; Special case: a single consonant and a Halant. - (" (.)xy(H[NJ]?) " - | - (1 =) - (2 = *) - |) - - ;; This is the most generic pattern. It follows Case A-C and a - ;; part of Case E in Step 1. Now Mark #1 is used to indicate the - ;; critical part that requires pre-base substitution in the - ;; following steps. - - ;; 1 2 3 4 5 6 7 8 9 10 11 - (" (RH)?([^ xy]+)x([^ y]*)y(YH)?(m)?(b)?(u)?(p)?(A)?(a)?(HN|HJ|H)? " - | - (5 =) ; [Mpre] - ;; We can safely perform Nukta composition here because it does - ;; not affect surrounding letters in the syllable. The Akhand - ;; ligature operation is also applied here, before applying the - ;; half form operation because the Utkal font generates Akhand - ;; ligatures directly from the "C H C" sequence, not via the half - ;; form. - 0x0B7E ; begin Cpre & Cbase - (2 otf:orya=nukt,akhn+) ; {Cpre + H} + Cbase - 0x0B7E ; end Cpre & Cbase - (3 otf:orya=blwf+) ; {Cbelow + H} - (6 =) ; [Mbelow] - (7 =) ; [Mabove] - (1 otf:orya=rphf+) ; [Reph] - (4 otf:orya=pstf+) ; [Cpost + H] - (8 =) ; [Mpost] - (9 =) ; [VMabove] - (10 =) ; [VMpost] - (11 = *) ; optional HALANT - |) + ;; If a syllable contains a ZWNJ, render the preceding halant explicitly. + (" ([^Z]+)(Z)([^N]*)(HN)([^ ]*) " + | (1 otf:orya=nukt,akhn,half+) (2 =) (3 otf:orya=blwf+) (4 = =) + (5 otf:orya=pstf+) |) - ;; Syllables that begin with an independent vowel (following up - ;; Step 1, Case F). Syllables of this type do not require further - ;; modification. - (" (V)(RH)(.*) " - | - (1 =) - (2 otf:orya=rphf+) - (3 = *) - |) + (" (J?m?)([^Z]+)(Z)([^ ]*) " + | (1 = *) (2 otf:orya=nukt,akhn,half+) (3 =) (4 otf:orya=blwf,pstf+) |) ("." =)) *)) -;; Step 3 : Now only those syllables that contain the pseudo character -;; x require pre-base substition. Unlike the Mukti font for Bengali, -;; the Utkal font can produce the ligature for "C1 H C2" from -;; "C1halant" and "C2". If such a ligature is not available, we get a -;; sequence consisting of "C1halant" and "C2", which is satisfactory. - +;; Apply 'pres' to get pre-base conjuncts. (generator (0 (cond - (" (.H)J " - | - (1 otf:orya=half+) - |) - (" (.H)N? " - | - (1 otf:orya=haln+) - |) - (" ([^x ]?x)([^x ]*)(x[^ ]*) " - | - (1 = *) - (2 pres) - (3 = *) - |) + (" (J?m?)([^Z]+)(Z)([^ ]*) " + | (1 = *) (2 otf:orya=pres+) (3 =) (4 = *) |) ("." =)) - *) - - (pres - (cond - ("([^NJ]*)(.H)J(.*)" - (1 otf:orya=haln,pres+) - (2 otf:orya=half+) - (3 pres)) - ("([^N]*)(H)N(.*)" - (1 otf:orya=haln,pres+) - (2 =) - (3 pres)) - (".*" - otf:orya=haln,pres+))) - - ) - -;; Step 4 : Mpre/Cpre reordering. If the pre-base substitution in -;; the previous step results in more than one glyph, and there is an -;; Mpre in this syllable, then move the Mpre before the Cbase. -;; i.e. [Mpre]{Kh}Kf... -> {Kh}[Mpre]Kf... + *)) +;; When the number of glyphs between a pre-base vowel sign and the +;; post-base mark is more than one, move the pre-base vowel sign +;; before the base glyph. (generator (0 (cond - (" ([^x ])x([^x ]+)([^x ])x([^x ]*) " - | - (2 = *) - (1 =) - (3 =) - (4 = *) - |) - (" ([^x ])?x([^x ]*)x([^ ]*) " - | - (1 =) - (2 = *) - (3 = *) - |) + (" (J)?(m)([^Z]+)([^Z])Z([^N ]*)N?([^ ]*) " + | (1 =) (3 = *) (2 =) (4 =) (5 = *) (6 = *)|) + (" ([^Z]+)Z([^N ]*)N?([^ ]*) " + | (1 = *) (2 = *) (3 = *) |) ("." =)) *)) -;; Step 5 : Substitutions & positioning. - +;; Apply other features. +;; Do not apply 'vatu' and 'blws' if there is a ZWJ. +;; The 'pres' feature is applied again for pre-base vowel sign. (generator (0 (cond - (" ([^ ]*) " - ;; FIXME : The pres below is for the TTA ligature in the Utkal - ;; font. It should be removed once the font is updated. - (1 otf:orya=vatu,abvs,blws,psts,pres)) + (" J([^ ]+) " + (1 otf:orya=pres,abvs,pstp,haln)) + (" ([^ ]+) " + (1 otf:orya=vatu,pres,abvs,blws,pstp,haln)) ("." [ otf:orya=+ ])) *))