support more omlgc.
authorimiyazaki <imiyazaki>
Fri, 23 Jan 2004 12:09:47 +0000 (12:09 +0000)
committerimiyazaki <imiyazaki>
Fri, 23 Jan 2004 12:09:47 +0000 (12:09 +0000)
inCHISE

diff --git a/inCHISE b/inCHISE
index 09cb0f8..9f7a731 100755 (executable)
--- a/inCHISE
+++ b/inCHISE
@@ -187,12 +187,25 @@ while(<>){
        }elsif($char=~m/($tex_meta_re)/o){
            print $tex_meta{$1};
            next CHAR;
-       }elsif($char_id>0x20 and $char_id<=0x02af){
-           # Basic Latin
-           # Latin-1 Supplement
-           # Latin Extended-A
-           # Latin Extended-B
-           # IPA Extensions
+       }elsif(($char_id>0x20 and $char_id<=0x021f)
+              # Basic Latin
+              # Latin-1 Supplement
+              # Latin Extended-A
+              # Latin Extended-B (not all)
+              or($char_id>=0x0250 and $char_id<=0x02af)
+              # IPA Extensions
+              or($char_id>=0x0300 and $char_id<=0x033f)
+              or($char_id>=0x0360 and $char_id<=0x036f)
+              # Combining Diacritical Marks
+              or($char_id>=0x1e00 and $char_id<=0x1eff)
+              # Latin Extended Additional
+              or($char_id>=0x0370 and $char_id<=0x03ff)
+               # Greek and Coptic
+              or($char_id>=0x0400 and $char_id<=0x04ff)
+               # Cyrillic
+              or($char_id>=0x0530 and $char_id<=0x058f)
+               # Armenian
+              ){
            print &latin_parse();
            next CHAR;
        }elsif($char_id>=0x2ff0 and $char_id<=0x2fff){
@@ -330,8 +343,19 @@ sub latin_parse{
     $i++;
     while($i<=$#chars){
        $char_id=unpack("U",$chars[$i]);
-       if($char_id>0x20 and $char_id<=0x02af){
+       if(($char_id>0x20 and $char_id<=0x021f)
+          or($char_id>=0x0250 and $char_id<=0x02af)# IPA Extensions
+          or($char_id>=0x0300 and $char_id<=0x033f)# Combining Diacritical Marks
+          or($char_id>=0x0360 and $char_id<=0x036f)
+          or($char_id>=0x0370 and $char_id<=0x03ff)# Greek and Coptic
+          or($char_id>=0x0400 and $char_id<=0x04ff)# Cyrillic
+          or($char_id>=0x0530 and $char_id<=0x058f)# Armenian
+          ){
            $out_str.=$chars[$i];
+       }elsif($char_id>=0x1e00 and $char_id<=0x1eff){
+           # Latin Extended Additional
+            # 0x1e00 -> 0x0600, etc.
+           $out_str.=pack("U",$char_id-0x1800);
        }else{
            $i--;
            last;