im/te-rts.mim

   1 ;; te-rts.mim -- Telugu input method with RTS method
   2
   3 ;; Copyright (C) 2003, 2004, 2005, 2006
   4 ;;   National Institute of Advanced Industrial Science and Technology (AIST)
   5 ;;   Registration Number H15PRO112
   6 ;; Copyright 2005, 2006, 2010 Suraj N. Kurapati <sunaku@gmail.com>
   7 ;; Copyright 2006 Chaitanya Kamisetty <chaitanya@atc.tcs.co.in>
   8
   9
  10 ;; This file is part of the m17n contrib; a sub-part of the m17n
  11 ;; library.
  12
  13 ;; The m17n library is free software; you can redistribute it and/or
  14 ;; modify it under the terms of the GNU Lesser General Public License
  15 ;; as published by the Free Software Foundation; either version 2.1 of
  16 ;; the License, or (at your option) any later version.
  17
  18 ;; The m17n library is distributed in the hope that it will be useful,
  19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21 ;; Lesser General Public License for more details.
  22
  23 ;; You should have received a copy of the GNU Lesser General Public
  24 ;; License along with the m17n library; if not, write to the Free
  25 ;; Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  26 ;; Boston, MA 02110-1301, USA.
  27
  28
  29 (input-method te rts)
  30
  31 (description "Input method for Telugu script with RTS method.
  32 For the detail of RTS, see the page:
  33   <http://groups.google.com/groups?selm=Bv0A9M.27B@rice.edu>.
  34
  35 This input method is based on the Telugu Rice Transliteration Standard (RTS)
  36 specification[1] and its Rice Inverse Transliterator (RIT) supplement[2].
  37
  38 The original RTS specification was written by Ananda Kishore and Rama Rao
  39 Kanneganti in 1992 and can presently be accessed in the archives[1] of the
  40 'soc.culture.indian.telugu' USENET newsgroup.
  41
  42 The RIT supplement[2] enriches RTS with alternative combinations.  However,
  43 in cases where RIT and RTS define conflicting mappings for the same
  44 combination, such as 'ea', only the RTS mapping is honored.
  45
  46 Finally, this input method deviates from the RTS in the following ways:
  47
  48 * The combination '\@n' yields '�' because its corresponding glyph does not
  49   yet exist in the Telugu unicode chart.
  50
  51 * The combination 'm' yields 'ం' if it appears at the end of a word.  The
  52   user can type 'm&' to bypass this behavior and force 'm' to yield 'మ్'.
  53
  54 * The sunna prevention operator '&' can be used to force a more literal
  55   transliteration of consonant compounds such as 'jn' by writing 'j&n'.
  56
  57 [1]: http://groups.google.com/groups?selm=Bv0A9M.27B@rice.edu
  58 [2]: http://www.teluguworld.org/RIT/rit3.0/manual.html
  59 ")
  60
  61 (title "క")
  62
  63 (map
  64  (starter
  65   ((S-\ )) ((C-@))
  66
  67   ("a") ("b") ("c") ("d") ("e") ("f") ("g") ("h") ("i") ("j")
  68   ("k") ("l") ("m") ("n") ("o") ("p") ("r") ("s") ("t") ("u")
  69   ("v") ("w") ("x") ("y") ("z")
  70
  71   ("A") ("B") ("C") ("D") ("E") ("G") ("H") ("I") ("J") ("K")
  72   ("L") ("M") ("N") ("O") ("P") ("R") ("S") ("T") ("U") ("V")
  73   ("W")
  74
  75   ("0") ("1") ("2") ("3") ("4") ("5") ("6") ("7") ("8") ("9")
  76
  77   ("@") ("|") ("~") ("#")
  78  )
  79
  80  (consonant
  81
  82   ;-------------------------------------------------------------------------
  83   ; row 1 - క ఖ గ ఘ ఙ
  84   ;-------------------------------------------------------------------------
  85
  86   ("k" "క్")
  87
  88   ("kh" "ఖ్")
  89   ("kH" "ఖ్")
  90   ("K" "ఖ్")
  91   ("Kh" "ఖ్")
  92   ("KH" "ఖ్")
  93
  94   ("g" "గ్")
  95
  96   ("gh" "ఘ్")
  97   ("gH" "ఘ్")
  98   ("G" "ఘ్")
  99   ("Gh" "ఘ్")
 100   ("GH" "ఘ్")
 101
 102   ("~m" "ఙ్")
 103
 104   ;-------------------------------------------------------------------------
 105   ; row 2 - చ ఛ జ ఝ ఞ
 106   ;-------------------------------------------------------------------------
 107
 108   ("c" "చ్")
 109   ("ch" "చ్")
 110   ("cH" "చ్")
 111
 112   ("~c" "ౘ")
 113
 114   ("C" "ఛ్")
 115   ("Ch" "ఛ్")
 116   ("CH" "ఛ్")
 117   ("c'" "ఛ్") ; from RIT 2.0, 3.0
 118
 119   ("j" "జ్")
 120   ("z" "జ్") ; from RIT 3.0
 121
 122   ("~j" "ౙ")
 123
 124   ("jh" "ఝ్")
 125   ("jH" "ఝ్")
 126   ("J" "ఝ్")
 127   ("Jh" "ఝ్")
 128   ("JH" "ఝ్")
 129
 130   ("~n" "ఞ్")
 131
 132   ;-------------------------------------------------------------------------
 133   ; row 3 - ట ఠ డ ఢ ణ
 134   ;-------------------------------------------------------------------------
 135
 136   ("T" "ట్")
 137   ("t'" "ట్")
 138
 139   ("Th" "ఠ్")
 140   ("TH" "ఠ్")
 141   ("th'" "ఠ్")
 142   ("tH'" "ఠ్")
 143
 144   ("D" "డ్")
 145   ("d'" "డ్")
 146
 147   ("Dh" "ఢ్")
 148   ("DH" "ఢ్")
 149   ("dh'" "ఢ్")
 150   ("dH'" "ఢ్")
 151
 152   ("N" "ణ్")
 153   ("nh" "ణ్")
 154   ("nH" "ణ్")
 155   ("n'" "ణ్") ; from RIT 2.0, 3.0
 156
 157   ;-------------------------------------------------------------------------
 158   ; row 4 - త థ ద ధ న
 159   ;-------------------------------------------------------------------------
 160
 161   ("t" "త్")
 162
 163   ("th" "థ్")
 164   ("tH" "థ్")
 165
 166   ("d" "ద్")
 167
 168   ("dh" "ధ్")
 169   ("dH" "ధ్")
 170
 171   ;-------------------------------------------------------------------------
 172   ; row 5 - ప ఫ బ భ మ
 173   ;-------------------------------------------------------------------------
 174
 175   ("p" "ప్")
 176
 177   ("f" "ఫ్")
 178   ("P" "ఫ్")
 179   ("ph" "ఫ్")
 180   ("pH" "ఫ్")
 181   ("Ph" "ఫ్")
 182   ("PH" "ఫ్")
 183
 184   ("b" "బ్")
 185
 186   ("bh" "భ్")
 187   ("bH" "భ్")
 188   ("B" "భ్")
 189   ("Bh" "భ్")
 190   ("BH" "భ్")
 191
 192   ;-------------------------------------------------------------------------
 193   ; row 6 - య ర ల వ శ ష స హ ళ క్ష ఱ
 194   ;-------------------------------------------------------------------------
 195
 196   ; ("y" "య్") is defined below in consonant-without-sunna
 197
 198   ; ("r" "ర్") is defined below in consonant-without-sunna
 199
 200   ("l" "ల్")
 201
 202   ("v" "వ్")
 203   ("V" "వ్") ; from RIT 3.0
 204   ("w" "వ్")
 205   ("W" "వ్") ; from RIT 3.0
 206
 207   ("S" "శ్")
 208   ("s'" "శ్") ; from RIT 2.0, 3.0
 209
 210   ("sh" "ష్")
 211   ("sH" "ష్")
 212   ("Sh" "ష్") ; from RIT 3.0
 213   ("SH" "ష్") ; from RIT 3.0
 214
 215   ("s" "స్")
 216
 217   ("h" "హ్")
 218   ("H" "హ్")
 219
 220   ("L" "ళ్")
 221   ("lh" "ళ్")
 222   ("lH" "ళ్")
 223   ("Lh" "ళ్")
 224   ("LH" "ళ్")
 225   ("l'" "ళ్") ; from RIT 2.0, 3.0
 226
 227   ("x" "క్ష్")
 228   ("ksh" "క్ష్")
 229   ("ksH" "క్ష్")
 230   ("ks" "క్స్") ; disambiguation for this input method's 1-character lookahead
 231
 232   ("~r" "ఱ్")
 233   ("r''" "ఱ్") ; from RIT 2.0, 3.0
 234
 235   ;---------------------------------------------------------------------------
 236   ; compounds
 237   ;---------------------------------------------------------------------------
 238
 239   ("jn" "జ్ఞ్")
 240   ("j&n" "జ్న్") ; apply sunna prevention operator to produce literal compound
 241
 242   ("dd'" "డ్డ్") ; from RIT 3.0
 243   ("dd" "ద్ద్") ; disambiguation for this input method's 1-character lookahead
 244
 245   ("tt'" "ట్ట్") ; from RIT 3.0
 246   ("tt" "త్త్") ; disambiguation for this input method's 1-character lookahead
 247  )
 248
 249  (consonant-without-sunna
 250
 251   ; Quotation from "sunna generation" section of RIT 3.0 specification:
 252   ;
 253   ;   when 'n' or 'm' is followed by a consonant except 'r' or 'y' RIT
 254   ;   assumes it to be a sunna
 255   ;
 256   ("r" "ర్")
 257   ("y" "య్")
 258
 259   ; Quotation from "sunna generation" section of RIT 3.0 specification:
 260   ;
 261   ;   You can prevent a sunna generation by writing 'n&' or 'm&'.
 262   ;
 263   ("n&" "న్")
 264   ("m&" "మ్")
 265  )
 266
 267  (sunna-inside-word
 268   ("n" "న్")
 269   ("m" "మ్")
 270  )
 271
 272  (sunna-endof-word
 273
 274   ;-------------------------------------------------------------------------
 275   ; whitespace
 276   ;-------------------------------------------------------------------------
 277
 278   ("m " "ం ")
 279   ((m Tab) "ం\t")
 280   ((m Return) "ం\n")
 281
 282   ;-------------------------------------------------------------------------
 283   ; punctuation
 284   ;-------------------------------------------------------------------------
 285   ;
 286   ; The sequences below are generated by this Bourne shell script:
 287   ;
 288   ;     for ch in '!' '\"' '#' '$' '%' "'" '(' ')' '*' '+' ',' '-' '.' \
 289   ;               '/' '\\' ':' ';' '<' '=' '>' '?' '[' ']' '_' '`' '{' '}'
 290   ;     do echo "  (\"m${ch}\" \"ం${ch}\")"; done
 291   ;
 292   ; Sequences ending with '^' '&' '|' '@' '~' are omitted from the above
 293   ; loop because those punctuation marks already serve a purpose in this
 294   ; input method.
 295   ;
 296   ("m!" "ం!")
 297   ("m\"" "ం\"")
 298   ("m#" "ం#")
 299   ("m$" "ం$")
 300   ("m%" "ం%")
 301   ("m'" "ం'")
 302   ("m(" "ం(")
 303   ("m)" "ం)")
 304   ("m*" "ం*")
 305   ("m+" "ం+")
 306   ("m," "ం,")
 307   ("m-" "ం-")
 308   ("m." "ం.")
 309   ("m/" "ం/")
 310   ("m\\" "ం\\")
 311   ("m:" "ం:")
 312   ("m;" "ం;")
 313   ("m<" "ం<")
 314   ("m=" "ం=")
 315   ("m>" "ం>")
 316   ("m?" "ం?")
 317   ("m[" "ం[")
 318   ("m]" "ం]")
 319   ("m_" "ం_")
 320   ("m`" "ం`")
 321   ("m{" "ం{")
 322   ("m}" "ం}")
 323  )
 324
 325  (independent
 326   ((S-\ ) "‌")
 327   ((C-@) "‍")
 328
 329   ;-------------------------------------------------------------------------
 330   ; vowels
 331   ;-------------------------------------------------------------------------
 332
 333   ("a" "అ")
 334
 335   ("aa" "ఆ")
 336   ("a'" "ఆ")
 337   ("A" "ఆ") ; from RIT 2.0, 3.0
 338
 339   ("i" "ఇ")
 340
 341   ("ee" "ఈ")
 342   ("ii" "ఈ")
 343   ("ia" "ఈ")
 344   ("i'" "ఈ")
 345   ("I" "ఈ") ; from RIT 2.0, 3.0
 346
 347   ("u" "ఉ")
 348
 349   ("oo" "ఊ")
 350   ("uu" "ఊ")
 351   ("U" "ఊ")
 352   ("ua" "ఊ")
 353   ("u'" "ఊ")
 354
 355   ("R" "ఋ")
 356   ("r'" "ఋ") ; from RIT 2.0
 357
 358   ("Ru" "ౠ")
 359   ("r'u" "ౠ") ; from RIT 2.0
 360
 361   ("~l" "ౢ")
 362
 363   ("~L" "ౣ")
 364
 365   ("e" "ఎ")
 366
 367   ("ea" "ఏ")
 368   ("ae" "ఏ")
 369   ("E" "ఏ")
 370   ("e'" "ఏ")
 371
 372   ("ai" "ఐ")
 373   ("ei" "ఐ") ; from RIT 3.0
 374
 375   ("o" "ఒ")
 376
 377   ("oe" "ఓ")
 378   ("O" "ఓ")
 379   ("oa" "ఓ")
 380   ("o'" "ఓ")
 381
 382   ("au" "ఔ")
 383   ("ou" "ఔ")
 384   ("ow" "ఔ") ; from RIT 3.0
 385
 386   ; This combination is defined in the "internal representation" section of
 387   ; RTS.  It was widely used in early RTS implementations which lacked the
 388   ; automatic sunna generation capability and has thus became the defacto
 389   ; way of producing a sunna manually.
 390   ("M" "ం")
 391
 392   ("@M" "ఁ")
 393   ("@m" "ఁ") ; from RIT 3.0
 394
 395   ("@h" "ః")
 396
 397   ("@n" "�")
 398   ("@N" "�") ; from RIT 3.0
 399
 400   ("@2" "ఽ")
 401
 402   ;-------------------------------------------------------------------------
 403   ; digits
 404   ;-------------------------------------------------------------------------
 405
 406   ("0" "౦")
 407   ("1" "౧")
 408   ("2" "౨")
 409   ("3" "౩")
 410   ("4" "౪")
 411   ("5" "౫")
 412   ("6" "౬")
 413   ("7" "౭")
 414   ("8" "౮")
 415   ("9" "౯")
 416
 417   ;-------------------------------------------------------------------------
 418   ; punctuation
 419   ;-------------------------------------------------------------------------
 420
 421   ; The characters at the right-hand-side of these mappings are borrowed
 422   ; from the Devanagiri unicode chart because they do not yet exist in the
 423   ; Telugu unicode chart.
 424   ("|" "।") ; from RIT 3.0
 425   ("||" "॥") ; from Yudit
 426  )
 427
 428  (dependent
 429   ("^" (delete @-) "్‌")
 430
 431   ;-------------------------------------------------------------------------
 432   ; vowels
 433   ;-------------------------------------------------------------------------
 434
 435   ("a" (delete @-) "")
 436
 437   ("aa" (delete @-) "ా")
 438   ("a'" (delete @-) "ా")
 439   ("A" (delete @-) "ా") ; from RIT 3.0
 440
 441   ("i" (delete @-) "ి")
 442
 443   ("ee" (delete @-) "ీ")
 444   ("ii" (delete @-) "ీ")
 445   ("ia" (delete @-) "ీ")
 446   ("i'" (delete @-) "ీ")
 447   ("I" (delete @-) "ీ") ; from RIT 3.0
 448
 449   ("u" (delete @-) "ు")
 450
 451   ("oo" (delete @-) "ూ")
 452   ("uu" (delete @-) "ూ")
 453   ("U" (delete @-) "ూ")
 454   ("ua" (delete @-) "ూ")
 455   ("u'" (delete @-) "ూ")
 456
 457   ("R" (delete @-) "ృ")
 458   ("r'" (delete @-) "ృ") ; from RIT 2.0
 459
 460   ("Ru" (delete @-) "ౄ")
 461   ("r'u" (delete @-) "ౄ") ; from RIT 2.0
 462
 463   ("~l" (delete @-) "ౢ")
 464
 465   ("~L" (delete @-) "ౣ")
 466
 467   ("e" (delete @-) "ె")
 468
 469   ("ea" (delete @-) "ే")
 470   ("ae" (delete @-) "ే")
 471   ("E" (delete @-) "ే")
 472   ("e'" (delete @-) "ే")
 473
 474   ("ai" (delete @-) "ై")
 475   ("ei" (delete @-) "ై") ; from RIT 3.0
 476
 477   ("o" (delete @-) "ొ")
 478
 479   ("oe" (delete @-) "ో")
 480   ("O" (delete @-) "ో")
 481   ("oa" (delete @-) "ో")
 482   ("o'" (delete @-) "ో")
 483
 484   ("au" (delete @-) "ౌ")
 485   ("ou" (delete @-) "ౌ")
 486   ("ow" (delete @-) "ౌ") ; from RIT 3.0
 487  )
 488
 489  (single_hash
 490   ("#" "")
 491  )
 492
 493  (triple_hash
 494   ("###" "#")
 495  )
 496
 497  (invariant
 498   ("a" "a") ("b" "b") ("c" "c") ("d" "d") ("e" "e") ("f" "f") ("g" "g")
 499   ("h" "h") ("i" "i") ("j" "j") ("k" "k") ("l" "l") ("m" "m") ("n" "n")
 500   ("o" "o") ("p" "p") ("q" "q") ("r" "r") ("s" "s") ("t" "t") ("u" "u")
 501   ("v" "v") ("w" "w") ("x" "x") ("y" "y") ("z" "z")
 502
 503   ("A" "A") ("B" "B") ("C" "C") ("D" "D") ("E" "E") ("F" "F") ("G" "G")
 504   ("H" "H") ("I" "I") ("J" "J") ("K" "K") ("L" "L") ("M" "M") ("N" "N")
 505   ("O" "O") ("P" "P") ("Q" "Q") ("R" "R") ("S" "S") ("T" "T") ("U" "U")
 506   ("V" "V") ("W" "W") ("X" "X") ("Y" "Y") ("Z" "Z")
 507
 508   ("0" "0") ("1" "1") ("2" "2") ("3" "3") ("4" "4") ("5" "5") ("6" "6")
 509   ("7" "7") ("8" "8") ("9" "9")
 510
 511   ("~" "~") ("`" "`") ("!" "!") ("@" "@") ("$" "$") ("%" "%") ("^" "^")
 512   ("&" "&") ("*" "*") ("(" "(") (")" ")") ("_" "_") ("-" "-") ("+" "+")
 513   ("=" "=") ("{" "{") ("[" "[") ("}" "}") ("]" "]") ("|" "|" ) ("\\" "\\")
 514   (":" ":") (";" ";") ("\"" "\"") ("\'" "\'") ("<" "<") ("," ",") (">" ">")
 515   ("." ".") ("?" "?") ("/" "/")
 516
 517   (" " " ") ((Tab) ("\t")) ((BackSpace) (undo)) ((Return) ("\n"))
 518  )
 519
 520  (return
 521   ((Return)))
 522
 523  (backspace
 524   ((BackSpace) (undo)))
 525 )
 526
 527 (state
 528  (init
 529   (starter (pushback 1) (shift intermediate))
 530  )
 531
 532  (intermediate
 533   (consonant (shift second))
 534   (consonant-without-sunna (shift second))
 535   (sunna-inside-word (shift second-sunna-inside-word))
 536   (sunna-endof-word (shift init))
 537   (independent (shift init))
 538   (single_hash (shift no_transliteration))
 539   (triple_hash (shift init))
 540   (backspace)
 541   (return (shift init))
 542  )
 543
 544  (second
 545   (consonant)
 546   (consonant-without-sunna)
 547   (sunna-inside-word (shift second-sunna-inside-word))
 548   (sunna-endof-word (shift init))
 549   (dependent (shift init))
 550   (backspace)
 551   (return (shift init))
 552  )
 553
 554  (second-sunna-inside-word
 555   (t (mark p))
 556   (consonant (move p) (delete @-) (delete @-) "ం" (move @>) (shift second))
 557   (consonant-without-sunna (shift second))
 558   (sunna-inside-word)
 559   (sunna-endof-word (shift init))
 560   (dependent (shift init))
 561   (backspace)
 562  )
 563
 564  (no_transliteration
 565   (single_hash (shift init))
 566   (invariant)
 567  )
 568 )
 569
 570 ;; Local Variables:
 571 ;; coding: utf-8
 572 ;; mode: emacs-lisp
 573 ;; End: