From 1e03dbb28a6919b306d0a92ed2ba4cf2fb094390 Mon Sep 17 00:00:00 2001 From: nisikimi Date: Tue, 31 Mar 2009 04:29:56 +0000 Subject: [PATCH] *** empty log message *** --- FORMATS/IM.txt | 1939 +++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 1482 insertions(+), 457 deletions(-) diff --git a/FORMATS/IM.txt b/FORMATS/IM.txt index d8c6697..5998f9e 100644 --- a/FORMATS/IM.txt +++ b/FORMATS/IM.txt @@ -1,4 +1,4 @@ -/* Copyright (C) 2003, 2004, 2005 +/* Copyright (C) 2003, 2004, 2005, 2009 National Institute of Advanced Industrial Science and Technology (AIST) Registration Number H15PRO112 See the end for copying conditions. */ @@ -18,564 +18,1486 @@ methods. @section im-format SYNTAX and SEMANTICS -The following data format defines an input method. The driver loads a -definition from a file, a stream, etc. The definition is converted -into the form of plist in the driver. +The following defines a schema for an input method, written in RelaxNG. +(This schema file can be found at m17n-db-xml/MIM/mim.rng.) +The driver loads a definition from a file, a stream, etc. The definition +is converted into the form of plist in the driver. @verbatim -INPUT-METHOD ::= - IM-DECLARATION ? IM-DESCRIPTION ? TITLE ? - VARIABLE-LIST ? COMMAND-LIST ? MODULE-LIST ? - MACRO-LIST ? MAP-LIST ? STATE-LIST ? -IM-DECLARATION ::= '(' 'input-method' LANGUAGE NAME EXTRA-ID ? VERSION ? ')' -LANGUAGE ::= SYMBOL -NAME ::= SYMBOL -EXTRA-ID ::= SYMBOL -VERSION ::= '(' 'version' VERSION-NUMBER ')' + + + + + + + + + + + + + + + + + + + + + + + + + + -IM-DESCRIPTION ::= '(' 'description' DESCRIPTION ')' -DESCRIPTION ::= MTEXT-OR-GETTEXT | 'nil' -MTEXT-OR-GETTEXT ::= [ MTEXT | '(' '_' MTEXT ')'] +@endverbatim -TITLE ::= '(' 'title' TITLE-TEXT ')' -TITLE-TEXT ::= MTEXT +The top-level node of an input method has a <input-method> tag. + +The element <description> can appear in <input-method>, <variable> or +<command>, and specifies the description text of its parent. The +content of the element <get-text> is translated according to the +current locale by "gettext" (if the translation is provided). + +The element <title> contains a string that is displayed on the screen +when this input method is active. + +#if EXAMPLE_CODE + + + bo + ewts + + Tibetan input method based on EWTS. +This implementation is based on THDL Extended Wylie Transliteration Scheme +Version 2.0 <http://www.thdl.org/collections/langling/ewts/ewts.php>. + ཀ + : : +#endif + +<variable-list> declares variables used in this input method. +<command-list> declares commands used in this input method. +<module-list> declares external modules used in this input method. +<macro-list> declares macros used in this input method. <map-list> +declares maps used in this input method. When an input method is +never standalone and always included in another method, the element +<map-list> can be omitted. <state-list> declares states used in this +input method. When an input system is never standalone and always included in +another system, the element <state-list> can be omitted. + +@subsection im-declarations Input Method Declaration -VARIABLE-LIST ::= '(' 'variable' VARIABLE-DECLARATION * ')' -VARIABLE-DECLARATION ::= '(' VAR-NAME [ DESCRIPTION VALUE VALUE-CANDIDATE * ]')' -VAR-NAME ::= SYMBOL -VALUE ::= MTEXT | SYMBOL | INTEGER -VALUE-CANDIDATE ::= VALUE | '(' RANGE-FROM RANGE-TO ')' -RANGE-FROM ::= INTEGER -RANGE-TO ::= INTEGER +@verbatim -COMMAND-LIST ::= '(' 'command' COMMAND-DECLARATION * ')' -COMMAND-DECLARATION ::= '(' CMD-NAME [ DESCRIPTION KEYSEQ * ] ')' -CMD-NAME ::= SYMBOL + + + + + + t + [a-z]{2,3} + + + + + + nil + + + + + + [^n][^i][^l] + .{1,2} + ....+ + + + + + + + + + + + + [0-9]+\.[0-9]+\.[0-9]+ + + + @endverbatim -@c IM-DECLARATION specifies the language and name of this input -method. - -When @c LANGUAGE is @c t, the use of the input method is not limited -to one language. - -When @c NAME is @c nil, the input method is not standalone, but -is expected to be used in other input methods. In such cases, -@c EXTRA-ID is required to identify the input method. +The element <tags> specifies for which language the input method is, +and the name of the input method. There is one special input method +file "global.mimx" that declares common variables and commands. The +input method driver always loads this file and other input methods can +inherit its variables and commands. + +When the element <language> has "t" as its content, the use of the +input method is not limited to one language. When the content is +other than "t", it must be a valid code in ISO639-1, two-character +code or ISO639-2, three charcter code for the names of languages. + +When the element <name> has "nil" as its content, the input method is +not standalone, but is expected to be used in other input methods. In +such cases, the element <extra-id> is required to identify the input +method. When the element <name> has content other than "nil", the +element <extra-id> is optional. + +#if EXAMPLE_CODE + + bo + ewts + +#endif + +#if EXAMPLE_CODE + + t + nil + zh-util + +#endif + +The optional element <m17n-version> specifies the required minimum +version number of the m17n library. The format is "X.Y.Z" where X is +a major version number, Y is a minor version number, and Z is a patch +level. + +@subsection im-setups Input Method Setups -@c VERSION specifies the required minimum version number of the m17n -library. The format is "XX.YY.ZZ" where XX is a major version -number, YY is a minor version number, and ZZ is a patch level. - -@c DESCRIPTION, if not nil, specifies the description text of an input -method, a variable or a command. If @c MTEXT-OR-GETTEXT takes the -second form, the text is translated according to the current locale by -"gettext" (if the translation is provided). +@verbatim -@c TITLE-TEXT is a text displayed on the screen when this input method -is active. + + + + + + + + + + + + + + + + + + string + + + + symbol + + + + integer + + + + + + + + + + + + + + string + + + + symbol + + + + integer + + + + + + + + + + + + + + -There is one special input method file "global.mim" that declares -common variables and commands. The input method driver always loads -this file and other input methods can inherit the variables and the -commands. +@endverbatim -@c VARIABLE-DECLARATION declares a variable used in this input method. -If a variable must be initialized to the default value, or is to be +<variable-list> declares variables used in this input method. If a +variable must be initialized to the default value, or is to be customized by a user, it must be declared here. The declaration can be used in two ways. One is to introduce a new variable. In that -case, @c VALUE must not be omitted. Another is to inherit the variable -from what declared in "global.mim", and to give the different default -value and/or to make the variable customizable specially for the -current input method. In the latter case, @c VALUE can be omitted. - -@c COMMAND-DECLARATION declares a command used in this input method. -If a command must be bound to the default key sequence, or is to be -customized by a user, it must be declared here. Like @c -VARIABLE-DECLARATION, the declaration can be used in two ways. One is -to introduce a new command. In that case, @c KEYSEQ must not be omitted. -Another is to inherit the command from what declared in "global.mim", -and to give the different key binding and/or to make the command +case, the <value> element in <variable> must not be omitted. Another +is to inherit the variable from what declared in "global.mim", and to +give the different default value and/or to make the variable customizable specially for the current input method. In the latter -case, @c KEYSEQ can be omitted. - +case, <value> can be omitted. + +Each <variable> declares one variable, and a variable is referred with +the attribute "id". <value> of a <variable> can be an integer, a +symbol, or an M-text value. All variables are implicitly initialized +to the integer value zero. + +The M-text (string) <value> can be referred by the <insert> action. +The symbol <value> can not be referred directly, but is used the +library implicitly (e.g. candidates-charset). The integer <value> can +be set, modified and referred by the <set>, <add>, <sub>, <mul>, and +<div> action. It can be referred by the the <insert>, <select>, +<undo>, <if>, and <cond> actions. + +<variable-value-candidate> lists the possible values of the variable. +<c-value> specifies one of the possible value of the variable. It can +be a M-text (string), a symbol or an integer. + +<c-range> specifies a range of integers that the variable can have as +its value. It can be used mixed with <c-value>. The attribute "from" +is the minimum integer value that a variable can take, and the +attribute "to" is the maximum. + +#if EXAMPLE_CODE + + + + Flag to tell whether or not to generate precomposed characters. + If 1, generate precomposed characters if available (e.g. "ྲྀ"(U+0F76). + If 0, generate only decomposed characters (e.g. "ྲྀ" (U+0FB2 U+0F80). + + 0 + + 0 + 1 + + + +#endif + +This code declares one variable "precomposed" whose value can be 0 or +1 and is initially set to 0. @verbatim -MODULE-LIST ::= '(' 'module' MODULE * ')' -MODULE ::= '(' MODULE-NAME FUNCTION * ')' + + predefined + + + handled-keys + predefined-surround-text-flag + @.+ + + + -MODULE-NAME ::= SYMBOL - -FUNCTION ::= SYMBOL @endverbatim -Each @c MODULE declares the name of an external module (i.e. dynamic -library) and function names exported by the module. If a @c FUNCTION has -name "init", it is called with only the default arguments (see the -section about @c CALL) when an input context is created for the input -method. If a @c FUNCTION has name "fini", it is called with only the -default arguments when an input context is destroyed. +Predefined-variables are variables whose "type" attribute has the +value "predefiend". When "id" sttribute has the value "handled-keys", +the value of the variable is the number of handled keys at that +moment. If the "id" attrubyte has the value +"predefined-surround-text-flag", the value of the variable is -1 +if surrounding text is supported, and -2 if not. + +#if EXAMPLE_CODE + +#endif + +This code referes to a predefined varialbe "handled-keys". @verbatim -MACRO-LIST ::= MACRO-INCLUSION ? '(' 'macro' MACRO * ')' MACRO-INCLUSION ? -MACRO ::= '(' MACRO-NAME MACRO-ACTION * ')' + + + + + + command-.* + + + + + + + + + + -MACRO-NAME ::= SYMBOL +@endverbatim -MACRO-ACTION ::= ACTION +<command-list> declares a command used in the input method. If a +command must be bound to the default key sequence, or is to be +customized by a user, it must be declared here. Like <variable-list>, +the declaration can be used in two ways. One is to introduce a new +command. In that case, the <keyseq> element must appear in <command>. +Another is to inherit the command from what declared in "global.mim", +and to give the different key binding and/or to make the command +customizable specially for the current input method. In the latter +case, <keyseq> can be omitted. + +Each <command> declares one command and a command <command> is +referred with the attribute "id". + +#if EXAMPLE_CODE + + + + Commit + Commit the preedit text + + Return + Linefeed + + +#endif -TAGS ::= `(` LANGUAGE NAME EXTRA-ID ? `)` +@verbatim -MACRO-INCLUSION ::= '(' 'include' TAGS 'macro' MACRO-NAME ? ')' + + + + + + module-.* + + + + + + function-.* + + + + + + + @endverbatim -@c MACRO-INCLUSION includes macros from another input method specified -by @c TAGS. When @c MACRO-NAME is not given, all macros from the -input method are included. +Each <module> element declares an external module (i.e. dynamic +library). The value of "id" attribute gives the name of the module + +<function> elements specify function names exported by the module. If +the "id" attribute has the value "function-init", it is called with +only the default arguments (see <call>) when an input context is +created for the input method. If the "id" attribute has the value +"function-fini", it is called with only the default arguments when the +input context is destroyed. + +#if EXAMPLE_CODE + + + + + + + + + + +#endif + +This code declares a module "module-libmimx-anthy" who export six functions. + +@verbatim + + + + + + + macro-.* + + + + + + -@verbatim MAP-LIST ::= MAP-INCLUSION ? '(' 'map' MAP * ')' -MAP-INCLUSION ? +@endverbatim -MAP ::= '(' MAP-NAME RULE * ')' +The elemnt <macro> bundles and names a set of <action>s. The +attribute "id" gives the name of a <macro>, and a macro is referred +with this attribute. -MAP-NAME ::= SYMBOL +#if EXAMPLE_CODE + + + + + + + : : ;; more s + + +#endif -RULE ::= '(' KEYSEQ MAP-ACTION * ')' +This code declares one macro "macro-forward". -KEYSEQ ::= MTEXT | '(' [ SYMBOL | INTEGER ] * ')' +@verbatim -MAP-INCLUSION ::= '(' 'include' TAGS 'map' MAP-NAME ? ')' + + + + + + @endverbatim -When an input method is never standalone and always included in -another method, @c MAP-LIST can be omitted. +A marker is a symbol indicating a character position in the preediting +text. The element <mark-current-position> assigns a position to +a marker. The position of a marker is referred by the elements +<move-to-marker> and <delete-to-marker>. -@c SYMBOL in the definitions of @c MAP-NAME must not be @c t nor @c -nil. +@verbatim -@c MTEXT in the definition of @c KEYSEQ consists of characters that -can be generated by a keyboard. Therefore @c MTEXT usually contains -only ASCII characters. However, if the input method is intended to be -used, for instance, with a West European keyboard, @c MTEXT may -contain Latin-1 characters. + + + + @[0-9] + + @first + @current + @last + @previous + @next + @previous_candidate_list + @next_candidate_list + + + -@c SYMBOL in the definition of @c KEYSEQ must be the return value of -the minput_event_to_key () function. Under the X window system, you -can quickly check the value using the @c xev command. For example, -the return key, the backspace key, and the 0 key on the keypad are -represented as @c (Return) , @c (BackSpace) , and @c (KP_0) -respectively. If the shift, control, meta, alt, super, and hyper -modifiers are used, they are represented by the S- , C- , M- , A- , s- -, and H- prefixes respectively in this order. Thus, "return with -shift with meta with hyper" is @c (S-M-H-Return) . Note that "a with -shift" .. "z with shift" are represented simply as A .. Z . Thus "a -with shift with meta with hyper" is @c (M-H-A) . +@endverbatim -@c INTEGER in the definition of @c KEYSEQ must be a valid character -code. +Predefined markers start with @@. @@0, @@1, ... , @@9 mark the 0th, 1st, +2nd,... 9th position respetively. @@previous_candidate_list mark the +previous position where a candidate list changes. +@@next_candidate_list mark the next position where a candidate list +changes. -@c MAP-INCLUSION includes maps from another input method specified by -@c TAGS. When @c MAP-NAME is not given, all maps from the input method -are included. +#if EXAMPLE_CODE + +#endif +This code deletes character between the first position and the current +position in the buffer. @verbatim -MAP-ACTION ::= ACTION -ACTION ::= INSERT | DELETE | SELECT | MOVE | MARK - | SHOW | HIDE | PUSHBACK | POP | UNDO - | COMMIT | UNHANDLE | SHIFT | CALL - | SET | IF | COND | '(' MACRO-NAME ')' + + + [^@].* + + -PREDEFINED-SYMBOL ::= - '@0' | '@1' | '@2' | '@3' | '@4' - | '@5' | '@6' | '@7' | '@8' | '@9' - | '@<' | '@=' | '@>' | '@-' | '@+' | '@[' | '@]' - | '@@' - | '@-0' | '@-N' | '@+N' @endverbatim +User-defined markers may not start with @@. + +#if EXAMPLE_CODE + +#endif + +This code moves the marker to the usr defined position T. + @verbatim -STATE-LIST ::= STATE-INCUSION ? '(' 'state' STATE * ')' STATE-INCUSION ? -STATE ::= '(' STATE-NAME [ STATE-TITLE-TEXT ] BRANCH * ')' + + + + + + + + + + + +@endverbatim + +The element <predefined-nth-previous-or-following-character> specifies +a character inside or outside of the preedit buffer. -STATE-NAME ::= SYMBOL +When the value of the attribute "position" is a negative integer -N, +the element <predefined-nth-previous-or-following-character> means the +Nth previous character in the preedit buffer. If there are only M +(M<N) previous characters in it, the value is the (N-M)th previous +character from the inputting spot. -STATE-TITLE-TEXT ::= MTEXT +When the value of the attribute "position" is a positive integer N, +the element <predefined-nth-previous-or-following-character> means the +Nth following character in the preedit buffer. If there are only M +(M<N) following characters in it, the value is the (N-M)th following +character from the inputting spot. -BRANCH ::= '(' MAP-NAME BRANCH-ACTION * ')' - | '(' 'nil' BRANCH-ACTION * ')' - | '(' 't' BRANCH-ACTION * ')' +#if EXAMPLE_CODE + +#endif -STATE-INCLUSION ::= '(' 'include' TAGS 'state' STATE-NAME ? ')' +This code refers to the previous character. + +@verbatim + + + @[0-9] + @first + @current + @last + @previous + @next + @previous_candidate_list + @next_candidate_list + + @endverbatim -When an input system is never standalone and always included in -another system, @c STATE-LIST can be omitted. +Predefined-selectors specify positions in a candidate list. They are +used in the element <select>. + +@@0, @@1, ... , @@9 specify the 0th, 1st, ... 9th position +respetively. @@previous means the previous position, and if the +current candidate is the first one in the current candidate group, +this value means the last candidate in the previous candidate +group. @@next means the next position, and if the current candidate is +the last one in the current candidate group, this value means the +first candidate in the next candidate group. +@@previous_candidate_list specifies the candidate in the previous +candidate group having the same candidate index as the current one, +and @@next_candidate_list specifies the candidate in the next +candidate group having the same candidate index as the current one. -@c STATE-INCLUSION includes states from another input method specified -by @c TAGS. When @c STATE-NAME is not given, all states from the input -method are included. +#if EXAMPLE_CODE + + as below. Note that this is just an example, and it ignores such important key as Backspace. @verbatim -(title "拼") - -(map - ;; The initial character of Pinyin. - (starter - ("a") ("b") ... ("h") ("j") ... ("t") ("w") ("x") ("y") ("z")) - - ;; Big table of Pinyin vs the corresponding Chinese characters. - (pinyin - ... - ("bei" ("被北备背悲辈杯倍贝碑" ...)) - ("hao" ("好号毫豪浩耗皓嚎昊郝" ...)) - ("jing" ("经京精境警竟静惊景敬" ...)) - ("ni" ("你呢尼泥逆倪匿拟腻妮" ...)) - ...) - ;; Typing 1, 2, ..., 0 selects the 0th, 1st, ..., 9th candidate. - (choose - ("1" (select 0)) ("2" (select 1)) ... ("9" (select 8)) ("0" (select 9)))) - -(state - (init - ;; When an initial character of Pinyin is typed, re-handle it in - ;; "main" state. Anything else is just produced as is. - (starter (show) (pushback 1) (shift main))) - - (main - ;; When a complete Pinyin sequence is typed, shift to "select" state - ;; to allow users to select one from the candidates. - (pinyin (shift select)) - - ;; When anything else is typed, produce the current candidate (if - ;; any), and re-handle the last input in "init" state. - (nil (hide) (shift init))) - - (select - ;; When a number is typed, select the corresponding canidate, - ;; produce it, and shift to "init" state. - (choose (hide) (shift init)) - - ;; When anything else is typed, produce the current candidate, - ;; and re-handle the last input in "init" state. - (nil (hide) (shift init)))) + .. +"拼" + + + + + + + : : + + + + + + 被北备背悲辈杯倍贝碑... ... + + 好号毫豪浩耗皓嚎昊郝... ... + + 经京精境警竟静惊景敬... ... + + 你呢尼泥逆倪匿拟腻妮... ... + + + + + : : + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @endverbatim @elseif FOR-LATEX -- 1.7.10.4