1 ;; te-rts.mim -- Telugu input method with RTS method
3 ;; Copyright (C) 2003, 2004, 2005, 2006
4 ;; National Institute of Advanced Industrial Science and Technology (AIST)
5 ;; Registration Number H15PRO112
6 ;; Copyright 2005, 2006, 2010 Suraj N. Kurapati <sunaku@gmail.com>
7 ;; Copyright 2006 Chaitanya Kamisetty <chaitanya@atc.tcs.co.in>
10 ;; This file is part of the m17n contrib; a sub-part of the m17n
13 ;; The m17n library is free software; you can redistribute it and/or
14 ;; modify it under the terms of the GNU Lesser General Public License
15 ;; as published by the Free Software Foundation; either version 2.1 of
16 ;; the License, or (at your option) any later version.
18 ;; The m17n library is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 ;; Lesser General Public License for more details.
23 ;; You should have received a copy of the GNU Lesser General Public
24 ;; License along with the m17n library; if not, write to the Free
25 ;; Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
26 ;; Boston, MA 02110-1301, USA.
31 (description "Input method for Telugu script with RTS method.
32 For the detail of RTS, see the page:
33 <http://groups.google.com/groups?selm=Bv0A9M.27B@rice.edu>.
35 This input method is based on the Telugu Rice Transliteration Standard (RTS)
36 specification[1] and its Rice Inverse Transliterator (RIT) supplement[2].
38 The original RTS specification was written by Ananda Kishore and Rama Rao
39 Kanneganti in 1992 and can presently be accessed in the archives[1] of the
40 'soc.culture.indian.telugu' USENET newsgroup.
42 The RIT supplement[2] enriches RTS with alternative combinations. However,
43 in cases where RIT and RTS define conflicting mappings for the same
44 combination, such as 'ea', only the RTS mapping is honored.
46 Finally, this input method deviates from the RTS in the following ways:
48 * The combination '\@n' yields '�' because its corresponding glyph does not
49 yet exist in the Telugu unicode chart.
51 * The combination 'm' yields 'ం' if it appears at the end of a word. The
52 user can type 'm&' to bypass this behavior and force 'm' to yield 'మ్'.
54 * The sunna prevention operator '&' can be used to force a more literal
55 transliteration of consonant compounds such as 'jn' by writing 'j&n'.
57 [1]: http://groups.google.com/groups?selm=Bv0A9M.27B@rice.edu
58 [2]: http://www.teluguworld.org/RIT/rit3.0/manual.html
67 ("a") ("b") ("c") ("d") ("e") ("f") ("g") ("h") ("i") ("j")
68 ("k") ("l") ("m") ("n") ("o") ("p") ("r") ("s") ("t") ("u")
69 ("v") ("w") ("x") ("y") ("z")
71 ("A") ("B") ("C") ("D") ("E") ("G") ("H") ("I") ("J") ("K")
72 ("L") ("M") ("N") ("O") ("P") ("R") ("S") ("T") ("U") ("V")
75 ("0") ("1") ("2") ("3") ("4") ("5") ("6") ("7") ("8") ("9")
77 ("@") ("|") ("~") ("#")
82 ;-------------------------------------------------------------------------
84 ;-------------------------------------------------------------------------
104 ;-------------------------------------------------------------------------
106 ;-------------------------------------------------------------------------
117 ("c'" "ఛ్") ; from RIT 2.0, 3.0
120 ("z" "జ్") ; from RIT 3.0
132 ;-------------------------------------------------------------------------
134 ;-------------------------------------------------------------------------
155 ("n'" "ణ్") ; from RIT 2.0, 3.0
157 ;-------------------------------------------------------------------------
159 ;-------------------------------------------------------------------------
171 ;-------------------------------------------------------------------------
173 ;-------------------------------------------------------------------------
192 ;-------------------------------------------------------------------------
193 ; row 6 - య ర ల వ శ ష స హ ళ క్ష ఱ
194 ;-------------------------------------------------------------------------
196 ; ("y" "య్") is defined below in consonant-without-sunna
198 ; ("r" "ర్") is defined below in consonant-without-sunna
203 ("V" "వ్") ; from RIT 3.0
205 ("W" "వ్") ; from RIT 3.0
208 ("s'" "శ్") ; from RIT 2.0, 3.0
212 ("Sh" "ష్") ; from RIT 3.0
213 ("SH" "ష్") ; from RIT 3.0
225 ("l'" "ళ్") ; from RIT 2.0, 3.0
230 ("ks" "క్స్") ; disambiguation for this input method's 1-character lookahead
233 ("r''" "ఱ్") ; from RIT 2.0, 3.0
235 ;---------------------------------------------------------------------------
237 ;---------------------------------------------------------------------------
240 ("j&n" "జ్న్") ; apply sunna prevention operator to produce literal compound
242 ("dd'" "డ్డ్") ; from RIT 3.0
243 ("dd" "ద్ద్") ; disambiguation for this input method's 1-character lookahead
245 ("tt'" "ట్ట్") ; from RIT 3.0
246 ("tt" "త్త్") ; disambiguation for this input method's 1-character lookahead
249 (consonant-without-sunna
251 ; Quotation from "sunna generation" section of RIT 3.0 specification:
253 ; when 'n' or 'm' is followed by a consonant except 'r' or 'y' RIT
254 ; assumes it to be a sunna
259 ; Quotation from "sunna generation" section of RIT 3.0 specification:
261 ; You can prevent a sunna generation by writing 'n&' or 'm&'.
274 ;-------------------------------------------------------------------------
276 ;-------------------------------------------------------------------------
282 ;-------------------------------------------------------------------------
284 ;-------------------------------------------------------------------------
286 ; The sequences below are generated by this Bourne shell script:
288 ; for ch in '!' '\"' '#' '$' '%' "'" '(' ')' '*' '+' ',' '-' '.' \
289 ; '/' '\\' ':' ';' '<' '=' '>' '?' '[' ']' '_' '`' '{' '}'
290 ; do echo " (\"m${ch}\" \"ం${ch}\")"; done
292 ; Sequences ending with '^' '&' '|' '@' '~' are omitted from the above
293 ; loop because those punctuation marks already serve a purpose in this
329 ;-------------------------------------------------------------------------
331 ;-------------------------------------------------------------------------
337 ("A" "ఆ") ; from RIT 2.0, 3.0
345 ("I" "ఈ") ; from RIT 2.0, 3.0
356 ("r'" "ఋ") ; from RIT 2.0
359 ("r'u" "ౠ") ; from RIT 2.0
373 ("ei" "ఐ") ; from RIT 3.0
384 ("ow" "ఔ") ; from RIT 3.0
386 ; This combination is defined in the "internal representation" section of
387 ; RTS. It was widely used in early RTS implementations which lacked the
388 ; automatic sunna generation capability and has thus became the defacto
389 ; way of producing a sunna manually.
393 ("@m" "ఁ") ; from RIT 3.0
398 ("@N" "�") ; from RIT 3.0
402 ;-------------------------------------------------------------------------
404 ;-------------------------------------------------------------------------
417 ;-------------------------------------------------------------------------
419 ;-------------------------------------------------------------------------
421 ; The characters at the right-hand-side of these mappings are borrowed
422 ; from the Devanagiri unicode chart because they do not yet exist in the
423 ; Telugu unicode chart.
424 ("|" "।") ; from RIT 3.0
425 ("||" "॥") ; from Yudit
429 ("^" (delete @-) "్")
431 ;-------------------------------------------------------------------------
433 ;-------------------------------------------------------------------------
437 ("aa" (delete @-) "ా")
438 ("a'" (delete @-) "ా")
439 ("A" (delete @-) "ా") ; from RIT 3.0
441 ("i" (delete @-) "ి")
443 ("ee" (delete @-) "ీ")
444 ("ii" (delete @-) "ీ")
445 ("ia" (delete @-) "ీ")
446 ("i'" (delete @-) "ీ")
447 ("I" (delete @-) "ీ") ; from RIT 3.0
449 ("u" (delete @-) "ు")
451 ("oo" (delete @-) "ూ")
452 ("uu" (delete @-) "ూ")
453 ("U" (delete @-) "ూ")
454 ("ua" (delete @-) "ూ")
455 ("u'" (delete @-) "ూ")
457 ("R" (delete @-) "ృ")
458 ("r'" (delete @-) "ృ") ; from RIT 2.0
460 ("Ru" (delete @-) "ౄ")
461 ("r'u" (delete @-) "ౄ") ; from RIT 2.0
463 ("~l" (delete @-) "ౢ")
465 ("~L" (delete @-) "ౣ")
467 ("e" (delete @-) "ె")
469 ("ea" (delete @-) "ే")
470 ("ae" (delete @-) "ే")
471 ("E" (delete @-) "ే")
472 ("e'" (delete @-) "ే")
474 ("ai" (delete @-) "ై")
475 ("ei" (delete @-) "ై") ; from RIT 3.0
477 ("o" (delete @-) "ొ")
479 ("oe" (delete @-) "ో")
480 ("O" (delete @-) "ో")
481 ("oa" (delete @-) "ో")
482 ("o'" (delete @-) "ో")
484 ("au" (delete @-) "ౌ")
485 ("ou" (delete @-) "ౌ")
486 ("ow" (delete @-) "ౌ") ; from RIT 3.0
498 ("a" "a") ("b" "b") ("c" "c") ("d" "d") ("e" "e") ("f" "f") ("g" "g")
499 ("h" "h") ("i" "i") ("j" "j") ("k" "k") ("l" "l") ("m" "m") ("n" "n")
500 ("o" "o") ("p" "p") ("q" "q") ("r" "r") ("s" "s") ("t" "t") ("u" "u")
501 ("v" "v") ("w" "w") ("x" "x") ("y" "y") ("z" "z")
503 ("A" "A") ("B" "B") ("C" "C") ("D" "D") ("E" "E") ("F" "F") ("G" "G")
504 ("H" "H") ("I" "I") ("J" "J") ("K" "K") ("L" "L") ("M" "M") ("N" "N")
505 ("O" "O") ("P" "P") ("Q" "Q") ("R" "R") ("S" "S") ("T" "T") ("U" "U")
506 ("V" "V") ("W" "W") ("X" "X") ("Y" "Y") ("Z" "Z")
508 ("0" "0") ("1" "1") ("2" "2") ("3" "3") ("4" "4") ("5" "5") ("6" "6")
509 ("7" "7") ("8" "8") ("9" "9")
511 ("~" "~") ("`" "`") ("!" "!") ("@" "@") ("$" "$") ("%" "%") ("^" "^")
512 ("&" "&") ("*" "*") ("(" "(") (")" ")") ("_" "_") ("-" "-") ("+" "+")
513 ("=" "=") ("{" "{") ("[" "[") ("}" "}") ("]" "]") ("|" "|" ) ("\\" "\\")
514 (":" ":") (";" ";") ("\"" "\"") ("\'" "\'") ("<" "<") ("," ",") (">" ">")
515 ("." ".") ("?" "?") ("/" "/")
517 (" " " ") ((Tab) ("\t")) ((BackSpace) (undo)) ((Return) ("\n"))
524 ((BackSpace) (undo)))
529 (starter (pushback 1) (shift intermediate))
533 (consonant (shift second))
534 (consonant-without-sunna (shift second))
535 (sunna-inside-word (shift second-sunna-inside-word))
536 (sunna-endof-word (shift init))
537 (independent (shift init))
538 (single_hash (shift no_transliteration))
539 (triple_hash (shift init))
541 (return (shift init))
546 (consonant-without-sunna)
547 (sunna-inside-word (shift second-sunna-inside-word))
548 (sunna-endof-word (shift init))
549 (dependent (shift init))
551 (return (shift init))
554 (second-sunna-inside-word
556 (consonant (move p) (delete @-) (delete @-) "ం" (move @>) (shift second))
557 (consonant-without-sunna (shift second))
559 (sunna-endof-word (shift init))
560 (dependent (shift init))
565 (single_hash (shift init))