From f9120418868c224a29b458672f041e6d209d8762 Mon Sep 17 00:00:00 2001 From: imiyazaki Date: Fri, 23 Jan 2004 12:09:47 +0000 Subject: [PATCH] support more omlgc. --- inCHISE | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/inCHISE b/inCHISE index 09cb0f8..9f7a731 100755 --- a/inCHISE +++ b/inCHISE @@ -187,12 +187,25 @@ while(<>){ }elsif($char=~m/($tex_meta_re)/o){ print $tex_meta{$1}; next CHAR; - }elsif($char_id>0x20 and $char_id<=0x02af){ - # Basic Latin - # Latin-1 Supplement - # Latin Extended-A - # Latin Extended-B - # IPA Extensions + }elsif(($char_id>0x20 and $char_id<=0x021f) + # Basic Latin + # Latin-1 Supplement + # Latin Extended-A + # Latin Extended-B (not all) + or($char_id>=0x0250 and $char_id<=0x02af) + # IPA Extensions + or($char_id>=0x0300 and $char_id<=0x033f) + or($char_id>=0x0360 and $char_id<=0x036f) + # Combining Diacritical Marks + or($char_id>=0x1e00 and $char_id<=0x1eff) + # Latin Extended Additional + or($char_id>=0x0370 and $char_id<=0x03ff) + # Greek and Coptic + or($char_id>=0x0400 and $char_id<=0x04ff) + # Cyrillic + or($char_id>=0x0530 and $char_id<=0x058f) + # Armenian + ){ print &latin_parse(); next CHAR; }elsif($char_id>=0x2ff0 and $char_id<=0x2fff){ @@ -330,8 +343,19 @@ sub latin_parse{ $i++; while($i<=$#chars){ $char_id=unpack("U",$chars[$i]); - if($char_id>0x20 and $char_id<=0x02af){ + if(($char_id>0x20 and $char_id<=0x021f) + or($char_id>=0x0250 and $char_id<=0x02af)# IPA Extensions + or($char_id>=0x0300 and $char_id<=0x033f)# Combining Diacritical Marks + or($char_id>=0x0360 and $char_id<=0x036f) + or($char_id>=0x0370 and $char_id<=0x03ff)# Greek and Coptic + or($char_id>=0x0400 and $char_id<=0x04ff)# Cyrillic + or($char_id>=0x0530 and $char_id<=0x058f)# Armenian + ){ $out_str.=$chars[$i]; + }elsif($char_id>=0x1e00 and $char_id<=0x1eff){ + # Latin Extended Additional + # 0x1e00 -> 0x0600, etc. + $out_str.=pack("U",$char_id-0x1800); }else{ $i--; last; -- 1.7.10.4