update.
[chise/ruby.git] / chise / iconv.rb
1 # Copyright (C) 2002-2004 Kouichirou Eto, All rights reserved.
2
3 require "iconv"
4
5 class Iconv
6   def self.iconv_to_utf8(from, str)
7     iconv = Iconv.new(from, "UTF-8")
8     out = ""
9     begin
10       out << iconv.iconv(str)
11     rescue Iconv::IllegalSequence => e
12       out << e.success
13       ch, str = e.failed.split(//u, 2)
14       out << if respond_to?(:unknown_unicode_handler)
15                u = ch.unpack("U").first
16                unknown_unicode_handler(u)
17              else
18                "?"
19              end
20       retry
21     end
22     out
23   end
24
25   def self.unknown_unicode_handler (u)
26     sprintf("&#x%04x;", u)
27   end
28
29   def self.iconv_to_from(to, from, str)
30     iconv = Iconv.new(to, from)
31     out = ""
32     begin
33       out << iconv.iconv(str)
34     rescue Iconv::IllegalSequence => e
35       out << e.success
36       ch, str = e.failed.split(//u, 2)
37       out << "?"
38       retry
39     rescue Iconv::InvalidCharacter => e
40       out << e.success
41       ch, str = e.failed.split(//u, 2)
42       out << "?"
43       retry
44     end
45     out
46   end
47 end
48
49 class String
50   def euctou8()  Iconv.iconv_to_from("UTF-8", "EUC-JP", self)           end
51   def u8toeuc()  Iconv.iconv_to_from("EUC-JP", "UTF-8", self)           end
52   def sjistou8() Iconv.iconv_to_from("UTF-8", "Shift_JIS", self)        end
53   def u8tosjis() Iconv.iconv_to_from("Shift_JIS", "UTF-8", self)        end
54   def jistou8()  Iconv.iconv_to_from("UTF-8", "ISO-2022-JP", self)      end
55
56   def u8tojis()
57     i = Iconv.new("ISO-2022-JP", "UTF-8")
58     i.iconv(self)+i.close
59   end
60
61   def u8tou16
62     Iconv.iconv_to_from("UTF-16", "UTF-8", self).sub(/\A\376\377/, "")
63   end
64
65   def u8tou32
66     Iconv.iconv_to_from("UTF-32", "UTF-8", self).sub(/\A\0\0\376\377/, "")
67   end
68
69   def u32tou8
70     Iconv.iconv_to_from("UTF-8", "UTF-32", self)
71   end
72
73   def u32tou16
74     Iconv.iconv_to_from("UTF-16", "UTF-32", self).sub(/\A\376\377/, "")
75   end
76
77   def u16toeuc()        Iconv.iconv_to_from("EUC-JP", "UTF-16", self)   end
78   def u16tosjis()       Iconv.iconv_to_from("Shift_JIS", "UTF-16", self) end
79 end