im-indic/te-rts.mim

   1 ;; te-rts.mim -- Telugu input method with RTS method
   2
   3 ;; Copyright 2005, 2006 Suraj N. Kurapati
   4 ;; Copyright (C) 2003, 2004, 2005, 2006
   5 ;;   National Institute of Advanced Industrial Science and Technology (AIST)
   6 ;;   Registration Number H15PRO112
   7
   8
   9 ;; This file is part of the m17n contrib; a sub-part of the m17n
  10 ;; library.
  11
  12 ;; The m17n library is free software; you can redistribute it and/or
  13 ;; modify it under the terms of the GNU Lesser General Public License
  14 ;; as published by the Free Software Foundation; either version 2.1 of
  15 ;; the License, or (at your option) any later version.
  16
  17 ;; The m17n library is distributed in the hope that it will be useful,
  18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20 ;; Lesser General Public License for more details.
  21
  22 ;; You should have received a copy of the GNU Lesser General Public
  23 ;; License along with the m17n library; if not, write to the Free
  24 ;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  25 ;; 02111-1307, USA.
  26
  27
  28
  29 (input-method te rts)
  30
  31 (description "Input method for Telugu script with RTS method.
  32 For the detail of RTS, see the page:
  33   <http://groups.google.com/groups?selm=Bv0A9M.27B%40rice.edu&output=gplain>.
  34
  35 This input method is based upon the Telugu Rice Transliteration
  36 Standard (RTS) specification and its Rice Inverse
  37 Transliterator (RIT) supplement.
  38
  39 The original RTS specification was written by Ananda Kishore and
  40 Rama Rao Kanneganti in 1992 and can presently be accessed in the
  41 \"soc.culture.indian.telugu\" newsgroup archives (see
  42 <http://groups.google.com/groups?selm=Bv0A9M.27B\%40rice.edu&output=gplain>).
  43
  44 The RIT supplement adds alternative combinations for
  45 transliteration but, in general, does not distract from the
  46 original specification (see
  47 <http://www.teluguworld.org/RIT/rit3.0/manual.html>). Whenever a
  48 supplemental combination conflicts with the original RTS, the RTS
  49 version has precedence and the supplemental combination is
  50 disregarded (such as 'ea' from RIT 3.0).
  51
  52 Finally, this input method deviates slightly from the RTS in the
  53 following ways:
  54
  55         (1) The combinations \"\@n\", \"\@2\", \"~c\", and \"~j\"
  56             yield \"�\" because their corresponding glyphs do not
  57             yet exist in Telugu's Unicode chart.
  58
  59         (2) The operators \"_\" and \"#\" are not implemented
  60             because the user's input is transliterated in real
  61             time, as opposed to being post-processed in one
  62             swoop.
  63
  64         (3) According to the RTS, the combination \"jn\" should
  65             yield \"జ్ఞ్\" but seems as if it may yield
  66             \"జ్న్\". To avoid confusion, the user is required to
  67             type \"j~n\" to generate \"జ్ఞ్\", and \"jn\" to
  68             generate \"జ్న్\".
  69
  70         (4) If it appears at the end of a word, the combination
  71             \"m\" yields \"ం\82\". The user can type \"m&\" to
  72             bypass this behavior and force \"m\" to yield \"మ్\".
  73 ")
  74
  75 (title "క")
  76
  77 (map
  78  (starter
  79   ((S-\ )) ((C-@))      ; m17n stuff
  80
  81   ("a") ("b") ("c") ("d") ("e") ("f") ("g") ("h") ("i") ("j")
  82   ("k") ("l") ("m") ("n") ("o") ("p") ("r") ("s") ("t") ("u")
  83   ("v") ("w") ("x") ("y") ("z")
  84
  85   ("A") ("B") ("C") ("D") ("E") ("G") ("H") ("I") ("J") ("K")
  86   ("L") ("M") ("N") ("O") ("P") ("R") ("S") ("T") ("U")
  87
  88   ("0") ("1") ("2") ("3") ("4") ("5") ("6") ("7") ("8") ("9")
  89
  90   ("@") ("|") ("~")
  91  )
  92
  93
  94
  95  ; these consonants undergo automatic sunna generation
  96  (consonant
  97
  98   ; row 1
  99   ("k" "క్")
 100
 101   ("kh" "ఖ్")
 102   ("kH" "ఖ్")
 103   ("K" "ఖ్")
 104   ("Kh" "ఖ్")
 105   ("KH" "ఖ్")
 106
 107   ("g" "గ్")
 108
 109   ("gh" "ఘ్")
 110   ("gH" "ఘ్")
 111   ("G" "ఘ్")
 112   ("Gh" "ఘ్")
 113   ("GH" "ఘ్")
 114
 115
 116   ; row 2
 117   ("c" "చ్")
 118   ("ch" "చ్")
 119   ("cH" "చ్")
 120
 121   ("~c" "�")  ; త్స (tsa) allophone of చ (cha)
 122
 123   ("C" "ఛ్")
 124   ("Ch" "ఛ్")
 125   ("CH" "ఛ్")
 126   ("c'" "ఛ్")       ; from RIT 2.0, 3.0
 127
 128   ("j" "జ్")
 129   ("z" "జ్")        ; from RIT 3.0
 130
 131   ("~j" "�")  ; డ్జ (dza) allophone of జ (ja)
 132
 133   ("jh" "ఝ్")
 134   ("jH" "ఝ్")
 135   ("J" "ఝ్")
 136   ("Jh" "ఝ్")
 137   ("JH" "ఝ్")
 138
 139
 140   ; row 3
 141   ("T" "ట్")
 142   ("t'" "ట్")
 143
 144   ("Th" "ఠ్")
 145   ("TH" "ఠ్")
 146   ("th'" "ఠ్")
 147   ("tH'" "ఠ్")
 148
 149   ("D" "డ్")
 150   ("d'" "డ్")
 151
 152   ("Dh" "ఢ్")
 153   ("DH" "ఢ్")
 154   ("dh'" "ఢ్")
 155   ("dH'" "ఢ్")
 156
 157
 158   ; row 4
 159   ("t" "త్")
 160
 161   ("th" "థ్")
 162   ("tH" "థ్")
 163
 164   ("d" "ద్")
 165
 166   ("dh" "ధ్")
 167   ("dH" "ధ్")
 168
 169
 170   ; row 5
 171   ("p" "ప్")
 172
 173   ("f" "ఫ్")
 174   ("P" "ఫ్")
 175   ("ph" "ఫ్")
 176   ("pH" "ఫ్")
 177   ("Ph" "ఫ్")
 178   ("PH" "ఫ్")
 179
 180   ("b" "బ్")
 181
 182   ("bh" "భ్")
 183   ("bH" "భ్")
 184   ("B" "భ్")
 185   ("Bh" "భ్")
 186   ("BH" "భ్")
 187
 188
 189   ; row 6
 190   ("l" "ల్")
 191
 192   ("v" "వ్")
 193   ("V" "వ్")        ; from RIT 3.0
 194   ("w" "వ్")
 195   ("W" "వ్")        ; from RIT 3.0
 196
 197   ("S" "శ్")
 198   ("s'" "శ్")       ; from RIT 2.0, 3.0
 199
 200   ("s" "స్")
 201  )
 202
 203
 204
 205  ; these consonants do NOT undergo automatic sunna generation
 206  (consonant2
 207   ("~m" "ఙ్")
 208
 209   ("~n" "ఞ్")
 210
 211   ("N" "ణ్")
 212   ("nh" "ణ్")
 213   ("nH" "ణ్")
 214   ("n'" "ణ్")       ; from RIT 2.0, 3.0
 215
 216   ("n&" "న్")
 217
 218   ("m&" "మ్")
 219
 220   ("y" "య్")
 221
 222   ("r" "ర్")
 223
 224   ("sh" "ష్")
 225   ("sH" "ష్")
 226   ("Sh" "ష్")       ; from RIT 3.0
 227   ("SH" "ష్")       ; from RIT 3.0
 228
 229   ("h" "హ్")
 230   ("H" "హ్")
 231
 232   ("L" "ళ్")
 233   ("lh" "ళ్")
 234   ("lH" "ళ్")
 235   ("Lh" "ళ్")
 236   ("LH" "ళ్")
 237   ("l'" "ళ్")       ; from RIT 2.0, 3.0
 238
 239   ("x" "క్ష్")
 240   ("ksh" "క్ష్")
 241   ("ksH" "క్ష్")
 242   ("ks" "క్స్") ; workaround for inputting "ks"
 243
 244   ("~r" "ఱ్")
 245   ("r''" "ఱ్")      ; from RIT 2.0, 3.0
 246  )
 247
 248
 249
 250  ; these consonants are converted into sunna by the automatic sunna generation logic, if they appear inside a word
 251  (sunna-inside-word
 252   ("n" "న్")
 253
 254   ("m" "మ్")
 255  )
 256
 257
 258
 259  ; these sequences are converted into sunna by the automatic sunna generation logic, if they appear at the end of a word
 260  (sunna-endof-word
 261   ((m Tab) "ం ")
 262   ((m Return) "ం")
 263
 264
 265   ; the sequences below, using punctuation marks to denote the end of a word, are generated by the following shell command. keys in [1] the (starter) block, [2] the (independent) block, and [3] those which begin with the 'm' key are intentionally excluded from this command to ensure that they are transliterated normally.
 266   ; for ch in ' ' '!' '\"' '#' '$' '%' "'" '(' ')' '*' '+' ',' '-' '.' '/' '\\' ':' ';' '<' '=' '>' '?' '[' ']' '_' '`' '{' '}'; do echo "  (\"m${ch}\" \"ం${ch}\")"; done # exclude '^' '&' '|' '@' '~'
 267   ("m " "ం ")
 268   ("m!" "ం!")
 269   ("m\"" "ం\"")
 270   ("m#" "ం#")
 271   ("m$" "ం$")
 272   ("m%" "ం%")
 273   ("m'" "ం'")
 274   ("m(" "ం(")
 275   ("m)" "ం)")
 276   ("m*" "ం*")
 277   ("m+" "ం+")
 278   ("m," "ం,")
 279   ("m-" "ం-")
 280   ("m." "ం.")
 281   ("m/" "ం/")
 282   ("m\\" "ం\\")
 283   ("m:" "ం:")
 284   ("m;" "ం;")
 285   ("m<" "ం<")
 286   ("m=" "ం=")
 287   ("m>" "ం>")
 288   ("m?" "ం?")
 289   ("m[" "ం[")
 290   ("m]" "ం]")
 291   ("m_" "ం_")
 292   ("m`" "ం`")
 293   ("m{" "ం{")
 294   ("m}" "ం}")
 295  )
 296
 297
 298
 299  (independent
 300
 301   ; అచ్చులు (vowels)
 302   ("a" "అ")
 303
 304   ("aa" "ఆ")
 305   ("a'" "ఆ")
 306   ("A" "ఆ")   ; from RIT 2.0, 3.0
 307
 308   ("i" "ఇ")
 309
 310   ("ee" "ఈ")
 311   ("ii" "ఈ")
 312   ("ia" "ఈ")
 313   ("i'" "ఈ")
 314   ("I" "ఈ")   ; from RIT 2.0, 3.0
 315
 316   ("u" "ఉ")
 317
 318   ("oo" "ఊ")
 319   ("uu" "ఊ")
 320   ("U" "ఊ")
 321   ("ua" "ఊ")
 322   ("u'" "ఊ")
 323
 324   ("R" "ఋ")
 325   ("r'" "ఋ")  ; from RIT 2.0
 326
 327   ("Ru" "ౠ")
 328   ("r'u" "ౠ") ; from RIT 2.0
 329
 330   ("~l" "ఌ")
 331
 332   ("~L" "ౡ")
 333
 334   ("e" "ఎ")
 335
 336   ("ea" "ఏ")
 337   ("ae" "ఏ")
 338   ("E" "ఏ")
 339   ("e'" "ఏ")
 340
 341   ("ai" "ఐ")
 342   ("ei" "ఐ")  ; from RIT 3.0
 343
 344   ("o" "ఒ")
 345
 346   ("oe" "ఓ")
 347   ("O" "ఓ")
 348   ("oa" "ఓ")
 349   ("o'" "ఓ")
 350
 351   ("au" "ఔ")
 352   ("ou" "ఔ")
 353   ("ow" "ఔ")  ; from RIT 3.0
 354
 355
 356   ; అంకెలు (numbers)
 357   ("0" "౦")
 358   ("1" "౧")
 359   ("2" "౨")
 360   ("3" "౩")
 361   ("4" "౪")
 362   ("5" "౫")
 363   ("6" "౬")
 364   ("7" "౭")
 365   ("8" "౮")
 366   ("9" "౯")
 367
 368
 369   ; punctuation
 370   ("|" "।")   ; from RIT 3.0
 371   ("||" "॥")  ; from Yudit
 372
 373
 374   ; additional modifiers
 375   ("M" "ం")   ; from "internal representation" section of RTS. This combination has been included because it is very widely used in RTS implementations which do not support automatic sunna generation and thus has become the defacto way of manually producing sunna.
 376
 377   ("@M" "ఁ")  ; అర్ధసున్న (ardhasunna), చంద్ర బిందు (chandra bindu)
 378   ("@m" "ఁ")  ; from RIT 3.0
 379
 380   ("@h" "ః")  ; విసర్గ  (visarga)
 381   ("@H" "ః")
 382
 383   ("@n" "�")  ; నకర పొల్లు  (nakara-pollu), నకర విరమ (nakara-virama)
 384   ("@N" "�")  ; from RIT 3.0
 385
 386   ("@2" "�")  ; అవగ్రహ  (avagraha)
 387
 388   ("^" "్‌")        ; పొల్లు (pollu), విరమ (virama), halant
 389
 390
 391   ; m17n stuff
 392   ((S-\ ) "‌")
 393   ((C-@) "‍")
 394  )
 395
 396
 397
 398  (dependent
 399   ("a" (delete @-) "")
 400
 401   ("aa" (delete @-) "ా")
 402   ("a'" (delete @-) "ా")
 403   ("A" (delete @-) "ా")       ; from RIT 3.0
 404
 405   ("i" (delete @-) "ి")
 406
 407   ("ee" (delete @-) "ీ")
 408   ("ii" (delete @-) "ీ")
 409   ("ia" (delete @-) "ీ")
 410   ("i'" (delete @-) "ీ")
 411   ("I" (delete @-) "ీ")       ; from RIT 3.0
 412
 413   ("u" (delete @-) "ు")
 414
 415   ("oo" (delete @-) "ూ")
 416   ("uu" (delete @-) "ూ")
 417   ("U" (delete @-) "ూ")
 418   ("ua" (delete @-) "ూ")
 419   ("u'" (delete @-) "ూ")
 420
 421   ("R" (delete @-) "ృ")
 422   ("r'" (delete @-) "ృ")      ; from RIT 2.0
 423
 424   ("Ru" (delete @-) "ౄ")
 425   ("r'u" (delete @-) "ౄ")     ; from RIT 2.0
 426
 427   ("~l" (delete @-) "")
 428
 429   ("~L" (delete @-) "")
 430
 431   ("e" (delete @-) "ె")
 432
 433   ("ea" (delete @-) "ే")
 434   ("ae" (delete @-) "ే")
 435   ("E" (delete @-) "ే")
 436   ("e'" (delete @-) "ే")
 437
 438   ("ai" (delete @-) "ై")
 439   ("ei" (delete @-) "ై")      ; from RIT 3.0
 440
 441   ("o" (delete @-) "ొ")
 442
 443   ("oe" (delete @-) "ో")
 444   ("O" (delete @-) "ో")
 445   ("oa" (delete @-) "ో")
 446   ("o'" (delete @-) "ో")
 447
 448   ("au" (delete @-) "ౌ")
 449   ("ou" (delete @-) "ౌ")
 450   ("ow" (delete @-) "ౌ")      ; from RIT 3.0
 451
 452
 453   ; additional modifiers
 454   ("^" (delete @-) "్‌")    ; పొల్లు (pollu), విరమ (virama), halant
 455  )
 456
 457
 458  ; m17n stuff
 459  (return
 460   ((Return)))
 461
 462  (backspace
 463   ((BackSpace) (undo)))
 464 )
 465
 466
 467
 468 ; state machine for transliteration
 469 (state
 470  (init
 471   (starter (pushback 1) (shift intermediate))
 472  )
 473
 474  (intermediate
 475   (consonant (shift second))
 476   (consonant2 (shift second))
 477   (sunna-inside-word (shift second-sunna-inside-word))
 478   (sunna-endof-word (shift init))
 479   (independent (shift init))
 480   (backspace)
 481   (return (shift init))
 482  )
 483
 484  (second
 485   (consonant)
 486   (consonant2)
 487   (sunna-inside-word (shift second-sunna-inside-word))
 488   (sunna-endof-word (shift init))
 489   (dependent (shift init))
 490   (backspace)
 491   (return (shift init))
 492  )
 493
 494  (second-sunna-inside-word
 495   (t (mark p))
 496   (consonant (move p) (delete @-) (delete @-) "ం" (move @>) (shift second))
 497   (consonant2 (shift second))
 498   (sunna-inside-word)
 499   (sunna-endof-word (shift init))
 500   (dependent (shift init))
 501   (backspace)
 502  )
 503 )
 504
 505 ;; Local Variables:
 506 ;; coding: utf-8
 507 ;; mode: emacs-lisp
 508 ;; End: