From: handa Date: Fri, 25 Jul 2003 05:50:25 +0000 (+0000) Subject: *** empty log message *** X-Git-Tag: XML-base~115 X-Git-Url: http://git.chise.org/gitweb/?a=commitdiff_plain;h=2fd8a2ae6d6ab4fedde65d2950ae089a4cdb7a2b;p=m17n%2Fm17n-docs.git *** empty log message *** --- diff --git a/Makefile.am b/Makefile.am index f9d16ba..9cdee0b 100644 --- a/Makefile.am +++ b/Makefile.am @@ -71,24 +71,23 @@ EXAMPLE = \ @M17NLIB@/example/medit.c DBFORMAT = \ - @M17NDB@/FORMATS/ + @M17NDB@/FORMATS -MAN1 = \ - data/m17n-config.txt ${EXAMPLE} +MAN1 = data/m17n-config.txt ${EXAMPLE} -MAN3 = \ - @M17NLIB@/src/m17n-core.h \ +MAN3 = @M17NLIB@/src/m17n-core.h \ @M17NLIB@/src/m17n.h \ @M17NLIB@/src/m17n-gui.h \ @M17NLIB@/src/m17n-X.h \ @M17NLIB@/src/m17n-err.h \ @M17NLIB@/src -MAN5 = \ - data/dbformat.txt +MAN5 = ${DBFORMAT} SRCALL = \ - data/mainpage.txt ${MAN3} data/m17n-config.txt data/example.txt ${MAN5} + data/mainpage.txt \ + ${MAN3} \ + data/m17n-config.txt data/example.txt data/dbformat.txt usr-html html-usr: usr/html/index.html usr/html/parrot.png ja-html html-ja: ja/html/index.html ja/html/parrot.png @@ -193,11 +192,12 @@ doxyfile-man%.usr: doxyfile-man% data/mainpage.txt: data-usr/mainpage.txt cp $< $@ -data/dbformat.txt: - data/example.txt: ${MAN1} utils/example.sh cat ${EXAMPLE} | utils/example.sh > $@ +data/dbformat.txt: ${DBFORMAT} utils/dbformat.sh + cat ${DBFORMAT}/*.txt | utils/dbformat.sh > $@ + data/m17n-config.txt: @M17NLIB@/m17n-config.in sed -n -e '/\/\*\*\*/,/\*\// p' < $< > $@ diff --git a/configure.ac b/configure.ac index 9c48b8b..e1cac72 100644 --- a/configure.ac +++ b/configure.ac @@ -24,9 +24,25 @@ if test "x$M17NLIB" = "x"; then fi else : fi - AC_SUBST(M17NLIB) +# Checks for the m17n database files. +AC_ARG_WITH(m17ndb, + [ --with-m17n-db=DIR directory of m17n-db source files], + M17NDB="$withval") + +if test "x$M17NDB" = "x"; then + if test -d ../m17n-db; then + M17NDB=../m17n-db + elif test -d /usr/local/src/m17n-db; then + M17NDB=/usr/local/src/m17n-db + else + M17NDB=. + fi +else : +fi +AC_SUBST(M17NDB) + AC_CHECK_PROG(HAVEDOXYGEN, doxygen, yes, no) AC_CHECK_PROG(RUBY, ruby, yes, no) AC_CHECK_PROG(LATEX, latex, yes, no) @@ -61,6 +77,7 @@ AC_OUTPUT # Print configuration message. if test $USE_MAINTAINER_MODE = yes; then AC_MSG_NOTICE([Find documentation source in "$M17NLIB"]) + AC_MSG_NOTICE([Find m17n database source in "$M17NDB"]) fi eval dir=${datadir} diff --git a/data/dbformat.txt b/data/dbformat.txt index 04cb4d0..c9769b6 100644 --- a/data/dbformat.txt +++ b/data/dbformat.txt @@ -1,6 +1,12 @@ -/***@page m17nDatabaseFormat Data format of the m17n database +/***en @page m17nDBFormat Data format of the m17n database -@section dbformat General format +This section describes the data formats of the m17n database. */ + +/***en + +@section m17nDBgeneral General format + +@subsection description DESCRIPTION The mdatabase_load () function returns the data specified by tags in the form of plist if the first tag is not @c Mchartable nor @c @@ -83,7 +89,7 @@ between the parentheses. -EXAMPLE +@subsection example EXAMPLE Here is an example of plist that is written in the expression explained above. @@ -105,8 +111,105 @@ It represents the following plist: Msymbol:xyz), Minteger:-456)) @endverbatim +*/ +/***en + +@section m17nDBflt Font Layout Table + +@subsection description DESCRIPTION + +Usually, the rendering engine converts character codes of into glyph +codes one by one by consulting information about encoding of each +selected font. But, for rendering a text that requires complicated +layouting (e.g. Thai and Indic), such an one to one conversion is not +sufficient. In addition, some glyphs must be shifted 2-dimensionally +on the screen. For such a case, a font layout table (FLT in short) +must be used. + +A FLT can contain the information equivarent to OpenType Layout Table +(CMAP, GSUB, and GPOS) in addition to the information about how to +extract a grapheme cluster and how to re-order characters. + +The m17n library loads a FLT from the m17n database by the tags +\. The plist format of the data is as +follows: + +@verbatim +FONT-LAYOUT-TABLE ::= + '(' CATEGORY-TABLE GENERATOR + ( CATEGORY-TABLE ? GENERATOR ) * ')' + +CATEGORY-TABLE ::= + '(' 'category' CATEGORY-SPEC + ')' + +CATEGORY-SPEC ::= + '(' ( CODE | CODE-FROM CODE-TO ) CATEGORY-CODE ')' + +CATEGORY-CODE ::= + [ '?A' | ... | '?Z' | '?a' | ... | '?z' ] + +GENERATOR ::= + '(' 'generator' RULE RULE-DEF * ')' + +RULE-DEF ::= + '(' RULE-NAME RULE ')' + +RULE ::= + REGEXP-RULE | MATCH-RULE | MAP-RULE | PREDEFINED-RULE + | OTF-RULE + +REGEXP-RULE ::= + '(' REGEXP RULE * ')' | RULE-NAME -@section fontenc Font Encoding +MATCH-RULE ::= + '(' MATCH-IDX RULE * ')' | RULE-NAME + +PREDEFINED-RULE ::= + '=' | '*' | '<' | '>' | '|' + +OTF-URLE ::= + 'otf:SCRIPT-TAG[:LANGSYS-TAG]' + where + SCRIPT-TAG is OTF's ScriptTag name (four letters) listed at: + + LANGSYS-TAG is OTF's Language System name (four letters) listed at: + + Example: 'otf:deva' + +MAP-RULE ::= + '(' ( SEQ-TO-SEQ | RANGE-TO-CODE ) + ')' | RULE-NAME + +SEQ-TO-SEQ ::= + '(' CODE + '-1' ( COMBINING ? CODE ) * ')' + +RANGE-TO-CODE ::= + '(' '(' CODE-FROM CODE-TO ')' COMBINING ? CODE ')' + +COMBINING ::= + 'VHOVH' + where V ::= ( 't' | 'c' | 'b' | 'B' ) + H ::= ( 'l' | 'c' | 'r' ) + O ::= ( '.' | XOFF | YOFF XOFF ? ) + XOFF ::= ('<' | '>') DIGITS + YOFF ::= ('+' | '-') DIGITS + Example: 'tc+bc', 'Bl-10<20Br' + +Semantics of PREDEFINED-RULE: + + '*': repeat the previous command (if successful) + '<': start grapheme cluster + '>': end grapheme cluster + '|': produce a special glyph that has category-code ' ' + '=': use the first glyph of the current run as is +@endverbatim + +@subsection seealso SEE ALSO + +m17nDBgeneral(5) +*/ +/***en + +@section m17nDBfontenc Font Encoding The m17n library loads information about the encoding of each font form the m17n database by the tags \. The plist @@ -139,43 +242,10 @@ registry is "iso8859-1". @c ENCODING is a symbol representing a charset. A font matching @c FONT-SPEC supports all characters of the charset, and a character code is mapped to the corresponding glyph code of the font by this charset. +*/ +/***en - -@section fontsize Font Resizing - -In some case, a font contains incorrect information about its size -(typically in the case of a hacked TrueType font), which results in a -bad text layout when such a font is used in combination with the other -fonts. To overcome this problem, the m17n library loads information -about font-size correction from the m17n database by the tags \. The plist format of the data is as follows: - -@verbatim -FONT-RESIZE ::= - PER-FONT-INFO * - -PER-FONT-INFO ::= - '(' FONT-SPEC RESIZE-RATIO ')' - -FONT-SPEC ::= - '(' - [ FOUNDRY FAMILY [ WEIGHT [ STYLE [ STRETCH [ ADSTYLE ] ] ] ] ] REGISTRY - ')' -@endverbatim - -The meanings of @c FOUNDRY to @c REGISTRY are the same as @e Font @e -Encoding. @c RESIZE-RATIO is an integer number specifying by -percentage how much the font-size must be adjusted. For instance, -this @c PER-FONT-INFO: - -@verbatim - ((devanagari-cdac) 150) -@endverbatim - -means that, to use a font of registry "devanagari-cdac" with a -specific size, we have to open an 1.5 times bigger one. - -@section fontset Fontset +@section m17nDBfontset Fontset The m17n library loads a fontset definition from the m17n database by the tags \. The plist format of the data is @@ -230,31 +300,385 @@ propert "ja" if the character is in the repertories of such fonts. Otherwise, try a font of registry "gb2312.1980-0" or "big5-0". If a "han" character does not have @c Mlangauge text property, try all three fonts. +*/ +/***en -@section flt Font Layout Table +@section m17nDBfontsize Font Resizing -Usually, the rendering engine converts character codes of into glyph -codes one by one by consulting information about encoding of each -selected font. But, for rendering a text that requires complicated -layouting (e.g. Thai and Indic), such an one to one conversion is not -sufficient. In addition, some glyphs must be shifted 2-dimensionally -on the screen. For such a case, a font layout table (FLT in short) -must be used. +In some case, a font contains incorrect information about its size +(typically in the case of a hacked TrueType font), which results in a +bad text layout when such a font is used in combination with the other +fonts. To overcome this problem, the m17n library loads information +about font-size correction from the m17n database by the tags \. The plist format of the data is as follows: -A FLT can contain the information equivarent to OpenType Layout Table -(CMAP, GSUB, and GPOS) in addition to the information about how to -extract a grapheme cluster and how to re-order characters. +@verbatim +FONT-RESIZE ::= + PER-FONT-INFO * -The m17n library loads a FLT from the m17n database by the tags -\. The plist format of the data is as -follows: +PER-FONT-INFO ::= + '(' FONT-SPEC RESIZE-RATIO ')' -@verbinclude flt.txt +FONT-SPEC ::= + '(' + [ FOUNDRY FAMILY [ WEIGHT [ STYLE [ STRETCH [ ADSTYLE ] ] ] ] ] REGISTRY + ')' +@endverbatim -@section im Input Method +The meanings of @c FOUNDRY to @c REGISTRY are the same as @e Font @e +Encoding. @c RESIZE-RATIO is an integer number specifying by +percentage how much the font-size must be adjusted. For instance, +this @c PER-FONT-INFO: -@verbinclude im.txt +@verbatim + ((devanagari-cdac) 150) +@endverbatim +means that, to use a font of registry "devanagari-cdac" with a +specific size, we have to open an 1.5 times bigger one. */ +/***en + +@section m17nDBinput Input Metod +@verbatim +INPUT-METHOD ::= + '(' TITLE MAP-LIST MODULE-LIST ? STATE-LIST ')' + +TITLE ::= + '(' title MTEXT ')' + +MAP-LIST ::= + '(' 'map' MAP * ')' + +MAP ::= + '(' MAP-NAME RULE * ')' + +MAP-NAME ::= + symbol + +RULE ::= + '(' KEYSEQ MAP-ACTION * ')' + +KEYSEQ ::= + mtext | '(' [ symbol | integer ] * ')' + +MAP-ACTION ::= + ACTION + +ACTION ::= + mtext | CANDIDATES | '(' ACTION-NAME ACTION-ARG * ')' + +CANDIDATES ::= + '(' CANDIDATE-GROUP * ')' + + (in ASCTION, + MTEXT is a short form of "(insert MTEXT)" + CANDIDATES is a short form of "(candidates CANDIDATE-GROUP *)") + +ACTION-NAME ::= + 'insert' | 'candidates' | 'delete' | 'select' | 'select-group' + | 'move' | 'mark' | 'pushback' | 'undo' | 'shift' | 'call' + +ACTION-ARG ::= + integer | symbol | mtext | CANDIDATE-GROUP + +CANDIDATE-GROUP ::= + mtext | '(' mtext * ')' + +PREDEFINED-SYMBOL ::= + '@0' | '@1' | '@2' | '@3' | '@4' | '@5' | '@6' | '@7' | '@8' | '@9' + | '@<' | '@=' | '@>' | '@-' | '@+' + +MARKER ::= + PREDEFINED-SYMBOL | symbol + +CANDIDATE-INDEX ::= + PREDEFINED-SYMBOL + +CANDIDATE-GROUP-INDEX ::= + PREDEFINED-SYMBOL + +;; The first five actions ('insert' .. 'select-group') are for +;; editing a text in the preediting buffer. The buffer can keep +;; multiple markers. Each marker is represented by a symbol, and +;; keeps a position between characters in the preediting buffer. + +;; PREDEFINED-SYMBOL has special meanings when used as MARKER: +;; '@0', '@1', ... '@9' +;; The 0th, 1th, ... 9th position. +;; '@<', '@=', '@>' +;; The first, current, and end position. +;; '@-', '@+' +;; The previous and next position. + +;; PREDEFINED-SYMBOL has special meanings when used as CANDIDATE-INDEX: +;; '@0', '@1', ... '@9' +;; The 0th, 1th, ... 9th candidate of the current candidate group. +;; '@<', '@=', '@>' +;; The first, current, and end candidate of the current candidate +;; group. +;; '@-' +;; The previous candidate. If the current candidate is the +;; first of the current candidate group, the last candidate of +;; the previous candidate group. +;; '@+' +;; The next candidate. If the current candidate is the last of +;; the current candidate group, the first candidate of the +;; previous candidate group. + +;; PREDEFINED-SYMBOL has special meanings when used as CANDIDATE-INDEX: +;; '@0', '@1', ... '@9' +;; The 0th, 1th, ... 9th candidate of the current candidate group. +;; '@<', '@=', '@>' +;; The first, current, and end candidate of the current candidate +;; group. +;; '@-' +;; The previous candidate. If the current candidate is the +;; first of the current candidate group, the last candidate of +;; the previous candidate group. +;; '@+' +;; The next candidate. If the current candidate is the last of +;; the current candidate group, the first candidate of the +;; previous candidate group. + +;; PREDEFINED-SYMBOL has special meanings when used as +;; CANDIDATE-GROUP-INDEX: +;; '@0', '@1', ... '@9' +;; The 0th, 1th, ... 9th candidate group. +;; '@<', '@=', '@>' +;; The first, current, and end candidate group. +;; '@-', '@+' +;; The previous and the next candidate group. + +;; (insert MTEXT) inserts MTEXT before @=. + +;; (candidates CANDIDATE-GROUP *) set the current candidates list +;; to the list of arguments, set the current candidate group to the +;; first argument, insert the first candidate of the current group +;; before @=, and mark the inserted text as the current candidate. +;; Each element of CANDIDATE-GROUP represents a candidate, i.e. if +;; CANDIDATE-GROUP is MTEXT, each character in MTEXT is a +;; candidate, if CANDIDATE-GROUP is a list of MTEXT, each MTEXT is +;; a candidate. + +;; (select CANDIDATE-INDEX) replaces the current candidate with +;; what specified by CANDIDATE-INDEX. If a candidate of the +;; different candidate group is specified, set the current +;; candidate group to that group. + +;; (select-group CANDIDATE-GROUP-INDEX) sets the current candidate +;; group to a group indicated by CANDIDATE-GROUP-INDEX, and relaces +;; the current candiate with the candiate of the same index in the +;; new group. + +;; (delete MARKER) deletes characters between @= and the position +;; specified by MARKER. + +;; (move MARKER) sets @= to the position of specified by MARKER. + +;; (mark MARKER) sets MARKER to the position of @=. MARKER must +;; not be PREDEFINED-SYMBOL. + +;; (pushback) pushbacks the latest key events to the event queue. + +;; (undo) cancels the last key events. + +;; (shift STATE-NAME) shifts the current state to the state +;; specified by STATE-NAME. + +;; (call FUNCTION ARG *) calls the function FUNCTION of an external +;; module. FUNCTION must be defined in MODULE-LIST. + +;; The function is called with a property list (MPlist *) that has +;; these properties in this order. +;; KEY VALUE +;; --- ----- +;; mtext The current preedit text. +;; symbol The current state name (MSymbol). +;; The remaining properties (if any) are ARGs. +;; +;; The function must return a property list (MPlist *) that +;; represents a list of ACTIONs to take. + +MODULE-LIST ::= + '(' 'module' MODULE * ')' + +MODULE ::= + '(' MODULE-NAME FUNCTION * ')' + +MODULE-NAME ::= + mtext + +FUNCTION ::= + symbol + +STATE-LIST ::= + '(' 'state' STATE * ')' + +STATE ::= + '(' STATE-NAME BRANCH * ')' + +STATE-NAME ::= + symbol + +BRANCH ::= + '(' [ MAP-NAME | 'nil' ] BRANCH-ACTION * ')' + +;; If MAP-NAME is specified, it must be a name of a map defined in +;; MAP-LIST. Otherwise, BRANCH is the default branch of STATE. + +BRANCH-ACTION ::= + ACTION + + +;; Example: + +(title "sample") +(maps + (single + ("a" "A") + ("b" "B")) + (double + ("bb" (("BB" "Bb")))) + (select + ((Left) (select @-)) + ((Right) (select @+)))) + +(states + (init + (single) + (double (shift selection))) + (selection + (select))) + +;; When this input method is loaded, the following state transition +;; machine is created. + +;; STATE-TRANSITION-MACHINE ::= +;; '(' STATE-NAME ROOT-MAP ')' * +;; ROOT-MAP ::= TRANSITION-MAP +;; TRANSITION-MAP ::= +;; '(' INDEX [ KEY | 'nil' ] +;; MAP-ACTIONS TRANSITION-MAPS BRANCH-ACTIONS ')' +;; TRANSITION-MAPS ::= +;; '(' TRANSITION-MAP * ')' +;; MAP-ACTIONS ::= +;; '(' MAP-ACTION * ')' +;; BRANCH-ACTIONS ::= +;; '(' BRANCH-ACTION * ')' + +(init + (#0 nil nil + ((#1 'a' ((insert "A")) nil nil) + (#2 'b' ((insert "B")) + ((#3 'b' ((candidates (("BB" "Bb")))) nil (shift selection))) + nil)) + nil)) +(selection + (#4 nil nil + ((#5 'Left' ((delete @<) (select @-)) nil nil) + (#6 'Right' ((delete @<) (select @+)) nil nil)) + nil)) + +;; The state transition machine keeps these things: +;; STATE: the current state, initially 'init'. +;; MAP: current transition map, initially #0. +;; PREEDIT: the preediting buffer, initially empty. +;; MARKERS: the positions assigned to each marker, @= is initially 0. +;; PRODUCED: the produced text, initially empty. +;; CANDIDATE-LIST: the current candidate group list, initially NULL. +;; CANDIDATE-GROUP: the current candidate group, initially NULL. +;; CANDIDATE: the the current candidate, initially NULL. +;; +;; When MAP is changed to the root map of the initial state, PREEDIT +;; is concatenated to PRODUCED and reset to empty. This way, the +;; machine produces a text. +;; +;; The machine accepts one key KEY, handles it while updating internal +;; information, and return 'nil' (if KEY is correctly handled) or KEY +;; (if KEY can't be handled in the machine). + +;; Here we describes how the key sequence: +;; 'a' 'b' 'b' 'Right' 'b' 'a' +;; is handled by the machine and "aBbba" is produced. + +;; 'a' arrives. +;; +;; Lookup the transition maps of #0 for 'a' and find #1. Change MAP +;; to #1. Perform its map actions. Now PREEDIT contains "a". As it +;; has no sub transition maps, no branch actions, no state to shift, +;; change MAP to #0, the root map of the current state. +;; +;; As we have changed MAP to the root map of the initial map, +;; concatenate PREEDIT to PRODUCED, and reset PREEDIT to empty. +;; +;; Now we have consumed the key. As MAP has sub transition maps, wait +;; for the next key. + +;; 'b' arrives. +;; +;; Lookup #0 for 'b' and find #2. Change MAP to #2. Perform the map +;; actions. Now, PREEDIT contains "B". As it has sub transition +;; maps, and we have consumed the key, wait for the next key. + +;; 'b' arrives. +;; + +;; Lookup #2 for 'b' and find #3. Cancel the change in PREEDIT done +;; by the map actions of #2, and change MAP to #3. Perform the map +;; actions. Now, PREEDIT is "BB", CANDIDATE-LIST is (("BB" "Bb")), +;; CANDIDATE is "BB". As #3 has no sub transition maps and no branch +;; actions, change STATE to 'selection' and change MAP to #4, the root +;; map of 'selection'. As it has sub transition maps, and we have +;; consumed the key. Wait for the next key. + +;; 'Right' arrives. +;; +;; Lookup #4 for 'Right' and find #5. Change MAP to #5. Perform the +;; map actions. Now PREEDIT is "Bb". As #5 has no sub transition +;; maps, no branch actions, no state to shift, change MAP to #4 (the +;; initial map of the current state 'selection'. +;; +;; As #4 has sub transition maps, and we have consumed the key. Wait +;; for the next key. + +;; 'b' arrives. +;; +;; Lookup #4 for 'b' and fail. As #4 has no branch maps, no shift to +;; transit, change STATE to 'init', MAP to #0. +;; +;; As we have changed MAP to the root map of the initial map, +;; concatenate PREEDIT to PRODUCED, and reset PREEDIT to empty. Now +;; PRODUCED has "aBb". +;; +;; As we have not yet consumed the key 'b', handle it in MAP (#0). +;; +;; Lookup #0 for 'b' and find #2. Change MAP to #2. Perform the map +;; actions. Now, PREEDIT contains "B". As it has sub transition +;; maps, and we have consumed the key, wait for the next key. + +;; 'a' arrives. Lookup #2 for 'a' and fail. As #2 has no branch +;; actions, no state to shift, Change MAP to #0, the root map of the +;; current state. +;; +;; As we have changed MAP to the root map of the initial map, +;; concatenate PREEDIT to PRODUCED, and reset PREEDIT to empty. Now +;; PRODUCED has "aBbb". +;; +;; As we have not yet consumed the key 'a', handle it in MAP (#0). +;; +;; Lookup #0 for 'a' and find #1. Change MAP to #1. Perform its map +;; actions. Now PREEDIT contains "a". As it has no sub transition +;; maps, no branch actions, no state to shift, change MAP to #0, the +;; root map of the current state. +;; +;; As we have changed MAP to the root map of the initial map, +;; concatenate PREEDIT to PRODUCED, and reset PREEDIT to empty. Now +;; PRODUCED has "aBbba". + +;; Now we have consumed the key. As MAP has sub transition maps, wait +;; for the next key. +@endverbatim +*/ //// diff --git a/doxyfile b/doxyfile index 044dcf8..122cbbd 100644 --- a/doxyfile +++ b/doxyfile @@ -315,7 +315,7 @@ INPUT = %INPUT% # and *.h) to filter out the source-files in the directories. If left # blank all files are included. -FILE_PATTERNS = *.c +FILE_PATTERNS = *.c *.txt # The RECURSIVE tag can be used to turn specify whether or not subdirectories diff --git a/utils/database.sh b/utils/database.sh deleted file mode 100644 index 1a4b108..0000000 --- a/utils/database.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/sh - -cat <