im-indic/te-rts.mim

   1 ;; te-rts.mim -- Telugu input method with RTS method
   2
   3 ;; Copyright 2005, 2006 Suraj N. Kurapati
   4 ;; Copyright (C) 2003, 2004, 2005, 2006
   5 ;;   National Institute of Advanced Industrial Science and Technology (AIST)
   6 ;;   Registration Number H15PRO112
   7
   8
   9 ;; This file is part of the m17n contrib; a sub-part of the m17n
  10 ;; library.
  11
  12 ;; The m17n library is free software; you can redistribute it and/or
  13 ;; modify it under the terms of the GNU Lesser General Public License
  14 ;; as published by the Free Software Foundation; either version 2.1 of
  15 ;; the License, or (at your option) any later version.
  16
  17 ;; The m17n library is distributed in the hope that it will be useful,
  18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20 ;; Lesser General Public License for more details.
  21
  22 ;; You should have received a copy of the GNU Lesser General Public
  23 ;; License along with the m17n library; if not, write to the Free
  24 ;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  25 ;; 02111-1307, USA.
  26
  27
  28
  29 (input-method te rts)
  30
  31 (description "Input method for Telugu script with RTS method.
  32 For the detail of RTS, see the page:
  33   <http://groups.google.com/groups?selm=Bv0A9M.27B%40rice.edu&output=gplain>.
  34
  35 This input method is based upon the Telugu Rice Transliteration
  36 Standard (RTS) specification and its Rice Inverse
  37 Transliterator (RIT) supplement.
  38
  39 The original RTS specification was written by Ananda Kishore and
  40 Rama Rao Kanneganti in 1992 and can presently be accessed in the
  41 \"soc.culture.indian.telugu\" newsgroup archives (see
  42 <http://groups.google.com/groups?selm=Bv0A9M.27B\%40rice.edu&output=gplain>).
  43
  44 The RIT supplement adds alternative combinations for
  45 transliteration but, in general, does not distract from the
  46 original specification (see
  47 <http://www.teluguworld.org/RIT/rit3.0/manual.html>). Whenever a
  48 supplemental combination conflicts with the original RTS, the RTS
  49 version has precedence and the supplemental combination is
  50 disregarded (such as 'ea' from RIT 3.0).
  51
  52 Finally, this input method deviates slightly from the RTS in the
  53 following ways:
  54
  55         (1) The combinations \"\@n\", \"\@2\", \"~c\", and \"~j\"
  56             yield \"�\" because their corresponding glyphs do not
  57             yet exist in Telugu's Unicode chart.
  58
  59         (2) The operators \"_\" and \"#\" are not implemented
  60             because the user's input is transliterated in real
  61             time, as opposed to being post-processed in one
  62             swoop.
  63
  64         (3) According to the RTS, the combination \"jn\" should
  65             yield \"జ్ఞ్\" but seems as if it may yield
  66             \"జ్న్\". To avoid confusion, the user is required to
  67             type \"j~n\" to generate \"జ్ఞ్\", and \"jn\" to
  68             generate \"జ్న్\".
  69
  70         (4) If it appears at the end of a word, the combination
  71             \"m\" yields \"ం\82\". The user can type \"m&\" to
  72             bypass this behavior and force \"m\" to yield \"మ్\".
  73 ")
  74
  75 (title "క")
  76
  77 (map
  78  (starter
  79   ((S-\ )) ((C-@))      ; m17n stuff
  80
  81   ("a") ("b") ("c") ("d") ("e") ("f") ("g") ("h") ("i") ("j")
  82   ("k") ("l") ("m") ("n") ("o") ("p") ("r") ("s") ("t") ("u")
  83   ("v") ("w") ("x") ("y") ("z")
  84
  85   ("A") ("B") ("C") ("D") ("E") ("G") ("H") ("I") ("J") ("K")
  86   ("L") ("M") ("N") ("O") ("P") ("R") ("S") ("T") ("U")
  87
  88   ("0") ("1") ("2") ("3") ("4") ("5") ("6") ("7") ("8") ("9")
  89
  90   ("@") ("|") ("~")
  91  )
  92
  93
  94
  95  ; these consonants undergo automatic sunna generation
  96  (consonant
  97
  98   ; row 1
  99   ("k" "క్")
 100
 101   ("kh" "ఖ్")
 102   ("kH" "ఖ్")
 103   ("K" "ఖ్")
 104   ("Kh" "ఖ్")
 105   ("KH" "ఖ్")
 106
 107   ("g" "గ్")
 108
 109   ("gh" "ఘ్")
 110   ("gH" "ఘ్")
 111   ("G" "ఘ్")
 112   ("Gh" "ఘ్")
 113   ("GH" "ఘ్")
 114
 115
 116   ; row 2
 117   ("c" "చ్")
 118   ("ch" "చ్")
 119   ("cH" "చ్")
 120
 121   ("~c" "�")  ; త్స (tsa) allophone of చ (cha)
 122
 123   ("C" "ఛ్")
 124   ("Ch" "ఛ్")
 125   ("CH" "ఛ్")
 126   ("c'" "ఛ్")       ; from RIT 2.0, 3.0
 127
 128   ("j" "జ్")
 129   ("z" "జ్")        ; from RIT 3.0
 130
 131   ("~j" "�")  ; డ్జ (dza) allophone of జ (ja)
 132
 133   ("jh" "ఝ్")
 134   ("jH" "ఝ్")
 135   ("J" "ఝ్")
 136   ("Jh" "ఝ్")
 137   ("JH" "ఝ్")
 138
 139
 140   ; row 3
 141   ("T" "ట్")
 142   ("t'" "ట్")
 143
 144   ("Th" "ఠ్")
 145   ("TH" "ఠ్")
 146   ("th'" "ఠ్")
 147   ("tH'" "ఠ్")
 148
 149   ("D" "డ్")
 150   ("d'" "డ్")
 151
 152   ("Dh" "ఢ్")
 153   ("DH" "ఢ్")
 154   ("dh'" "ఢ్")
 155   ("dH'" "ఢ్")
 156
 157
 158   ; row 4
 159   ("t" "త్")
 160
 161   ("th" "థ్")
 162   ("tH" "థ్")
 163
 164   ("d" "ద్")
 165
 166   ("dh" "ధ్")
 167   ("dH" "ధ్")
 168
 169
 170   ; row 5
 171   ("p" "ప్")
 172
 173   ("f" "ఫ్")
 174   ("P" "ఫ్")
 175   ("ph" "ఫ్")
 176   ("pH" "ఫ్")
 177   ("Ph" "ఫ్")
 178   ("PH" "ఫ్")
 179
 180   ("b" "బ్")
 181
 182   ("bh" "భ్")
 183   ("bH" "భ్")
 184   ("B" "భ్")
 185   ("Bh" "భ్")
 186   ("BH" "భ్")
 187
 188
 189   ; row 6
 190   ("l" "ల్")
 191
 192   ("v" "వ్")
 193   ("V" "వ్")        ; from RIT 3.0
 194   ("w" "వ్")
 195   ("W" "వ్")        ; from RIT 3.0
 196
 197   ("S" "శ్")
 198   ("s'" "శ్")       ; from RIT 2.0, 3.0
 199
 200   ("s" "స్")
 201
 202
 203   ("x" "క్ష్")
 204   ("ksh" "క్ష్")
 205   ("ksH" "క్ష్")
 206   ("ks" "క్స్") ; workaround for inputting "ks"
 207  )
 208
 209
 210
 211  ; these consonants do NOT undergo automatic sunna generation
 212  (consonant2
 213   ("~m" "ఙ్")
 214
 215   ("~n" "ఞ్")
 216
 217   ("N" "ణ్")
 218   ("nh" "ణ్")
 219   ("nH" "ణ్")
 220   ("n'" "ణ్")       ; from RIT 2.0, 3.0
 221
 222   ("n&" "న్")
 223
 224   ("m&" "మ్")
 225
 226   ("y" "య్")
 227
 228   ("r" "ర్")
 229
 230   ("sh" "ష్")
 231   ("sH" "ష్")
 232   ("Sh" "ష్")       ; from RIT 3.0
 233   ("SH" "ష్")       ; from RIT 3.0
 234
 235   ("h" "హ్")
 236   ("H" "హ్")
 237
 238   ("L" "ళ్")
 239   ("lh" "ళ్")
 240   ("lH" "ళ్")
 241   ("Lh" "ళ్")
 242   ("LH" "ళ్")
 243   ("l'" "ళ్")       ; from RIT 2.0, 3.0
 244
 245   ("~r" "ఱ్")
 246   ("r''" "ఱ్")      ; from RIT 2.0, 3.0
 247  )
 248
 249
 250
 251  ; these consonants are converted into sunna by the automatic sunna generation logic, if they appear inside a word
 252  (sunna-inside-word
 253   ("n" "న్")
 254
 255   ("m" "మ్")
 256  )
 257
 258
 259
 260  ; these sequences are converted into sunna by the automatic sunna generation logic, if they appear at the end of a word
 261  (sunna-endof-word
 262   ((m Tab) "ం ")
 263   ((m Return) "ం")
 264
 265
 266   ; the sequences below, using punctuation marks to denote the end of a word, are generated by the following shell command. keys in [1] the (starter) block, [2] the (independent) block, and [3] those which begin with the 'm' key are intentionally excluded from this command to ensure that they are transliterated normally.
 267   ; for ch in ' ' '!' '\"' '#' '$' '%' "'" '(' ')' '*' '+' ',' '-' '.' '/' '\\' ':' ';' '<' '=' '>' '?' '[' ']' '_' '`' '{' '}'; do echo "  (\"m${ch}\" \"ం${ch}\")"; done # exclude '^' '&' '|' '@' '~'
 268   ("m " "ం ")
 269   ("m!" "ం!")
 270   ("m\"" "ం\"")
 271   ("m#" "ం#")
 272   ("m$" "ం$")
 273   ("m%" "ం%")
 274   ("m'" "ం'")
 275   ("m(" "ం(")
 276   ("m)" "ం)")
 277   ("m*" "ం*")
 278   ("m+" "ం+")
 279   ("m," "ం,")
 280   ("m-" "ం-")
 281   ("m." "ం.")
 282   ("m/" "ం/")
 283   ("m\\" "ం\\")
 284   ("m:" "ం:")
 285   ("m;" "ం;")
 286   ("m<" "ం<")
 287   ("m=" "ం=")
 288   ("m>" "ం>")
 289   ("m?" "ం?")
 290   ("m[" "ం[")
 291   ("m]" "ం]")
 292   ("m_" "ం_")
 293   ("m`" "ం`")
 294   ("m{" "ం{")
 295   ("m}" "ం}")
 296  )
 297
 298
 299
 300  (independent
 301
 302   ; అచ్చులు (vowels)
 303   ("a" "అ")
 304
 305   ("aa" "ఆ")
 306   ("a'" "ఆ")
 307   ("A" "ఆ")   ; from RIT 2.0, 3.0
 308
 309   ("i" "ఇ")
 310
 311   ("ee" "ఈ")
 312   ("ii" "ఈ")
 313   ("ia" "ఈ")
 314   ("i'" "ఈ")
 315   ("I" "ఈ")   ; from RIT 2.0, 3.0
 316
 317   ("u" "ఉ")
 318
 319   ("oo" "ఊ")
 320   ("uu" "ఊ")
 321   ("U" "ఊ")
 322   ("ua" "ఊ")
 323   ("u'" "ఊ")
 324
 325   ("R" "ఋ")
 326   ("r'" "ఋ")  ; from RIT 2.0
 327
 328   ("Ru" "ౠ")
 329   ("r'u" "ౠ") ; from RIT 2.0
 330
 331   ("~l" "ఌ")
 332
 333   ("~L" "ౡ")
 334
 335   ("e" "ఎ")
 336
 337   ("ea" "ఏ")
 338   ("ae" "ఏ")
 339   ("E" "ఏ")
 340   ("e'" "ఏ")
 341
 342   ("ai" "ఐ")
 343   ("ei" "ఐ")  ; from RIT 3.0
 344
 345   ("o" "ఒ")
 346
 347   ("oe" "ఓ")
 348   ("O" "ఓ")
 349   ("oa" "ఓ")
 350   ("o'" "ఓ")
 351
 352   ("au" "ఔ")
 353   ("ou" "ఔ")
 354   ("ow" "ఔ")  ; from RIT 3.0
 355
 356
 357   ; అంకెలు (numbers)
 358   ("0" "౦")
 359   ("1" "౧")
 360   ("2" "౨")
 361   ("3" "౩")
 362   ("4" "౪")
 363   ("5" "౫")
 364   ("6" "౬")
 365   ("7" "౭")
 366   ("8" "౮")
 367   ("9" "౯")
 368
 369
 370   ; punctuation
 371   ("|" "।")   ; from RIT 3.0
 372   ("||" "॥")  ; from Yudit
 373
 374
 375   ; additional modifiers
 376   ("M" "ం")   ; from "internal representation" section of RTS. This combination has been included because it is very widely used in RTS implementations which do not support automatic sunna generation and thus has become the defacto way of manually producing sunna.
 377
 378   ("@M" "ఁ")  ; అర్ధసున్న (ardhasunna), చంద్ర బిందు (chandra bindu)
 379   ("@m" "ఁ")  ; from RIT 3.0
 380
 381   ("@h" "ః")  ; విసర్గ  (visarga)
 382   ("@H" "ః")
 383
 384   ("@n" "�")  ; నకర పొల్లు  (nakara-pollu), నకర విరమ (nakara-virama)
 385   ("@N" "�")  ; from RIT 3.0
 386
 387   ("@2" "�")  ; అవగ్రహ  (avagraha)
 388
 389   ("^" "్‌")        ; పొల్లు (pollu), విరమ (virama), halant
 390
 391
 392   ; m17n stuff
 393   ((S-\ ) "‌")
 394   ((C-@) "‍")
 395  )
 396
 397
 398
 399  (dependent
 400   ("a" (delete @-) "")
 401
 402   ("aa" (delete @-) "ా")
 403   ("a'" (delete @-) "ా")
 404   ("A" (delete @-) "ా")       ; from RIT 3.0
 405
 406   ("i" (delete @-) "ి")
 407
 408   ("ee" (delete @-) "ీ")
 409   ("ii" (delete @-) "ీ")
 410   ("ia" (delete @-) "ీ")
 411   ("i'" (delete @-) "ీ")
 412   ("I" (delete @-) "ీ")       ; from RIT 3.0
 413
 414   ("u" (delete @-) "ు")
 415
 416   ("oo" (delete @-) "ూ")
 417   ("uu" (delete @-) "ూ")
 418   ("U" (delete @-) "ూ")
 419   ("ua" (delete @-) "ూ")
 420   ("u'" (delete @-) "ూ")
 421
 422   ("R" (delete @-) "ృ")
 423   ("r'" (delete @-) "ృ")      ; from RIT 2.0
 424
 425   ("Ru" (delete @-) "ౄ")
 426   ("r'u" (delete @-) "ౄ")     ; from RIT 2.0
 427
 428   ("~l" (delete @-) "")
 429
 430   ("~L" (delete @-) "")
 431
 432   ("e" (delete @-) "ె")
 433
 434   ("ea" (delete @-) "ే")
 435   ("ae" (delete @-) "ే")
 436   ("E" (delete @-) "ే")
 437   ("e'" (delete @-) "ే")
 438
 439   ("ai" (delete @-) "ై")
 440   ("ei" (delete @-) "ై")      ; from RIT 3.0
 441
 442   ("o" (delete @-) "ొ")
 443
 444   ("oe" (delete @-) "ో")
 445   ("O" (delete @-) "ో")
 446   ("oa" (delete @-) "ో")
 447   ("o'" (delete @-) "ో")
 448
 449   ("au" (delete @-) "ౌ")
 450   ("ou" (delete @-) "ౌ")
 451   ("ow" (delete @-) "ౌ")      ; from RIT 3.0
 452
 453
 454   ; additional modifiers
 455   ("^" (delete @-) "్‌")    ; పొల్లు (pollu), విరమ (virama), halant
 456  )
 457
 458
 459  ; m17n stuff
 460  (return
 461   ((Return)))
 462
 463  (backspace
 464   ((BackSpace) (undo)))
 465 )
 466
 467
 468
 469 ; state machine for transliteration
 470 (state
 471  (init
 472   (starter (pushback 1) (shift intermediate))
 473  )
 474
 475  (intermediate
 476   (consonant (shift second))
 477   (consonant2 (shift second))
 478   (sunna-inside-word (shift second-sunna-inside-word))
 479   (sunna-endof-word (shift init))
 480   (independent (shift init))
 481   (backspace)
 482   (return (shift init))
 483  )
 484
 485  (second
 486   (consonant)
 487   (consonant2)
 488   (sunna-inside-word (shift second-sunna-inside-word))
 489   (sunna-endof-word (shift init))
 490   (dependent (shift init))
 491   (backspace)
 492   (return (shift init))
 493  )
 494
 495  (second-sunna-inside-word
 496   (t (mark p))
 497   (consonant (move p) (delete @-) (delete @-) "ం" (move @>) (shift second))
 498   (consonant2 (shift second))
 499   (sunna-inside-word)
 500   (sunna-endof-word (shift init))
 501   (dependent (shift init))
 502   (backspace)
 503  )
 504 )
 505
 506 ;; Local Variables:
 507 ;; coding: utf-8
 508 ;; mode: emacs-lisp
 509 ;; End: