1 /* mconv.c -- Code converter.
2 Copyright (C) 2003, 2004
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24 @page mconv convert file code
26 @section mconv-synopsis SYNOPSIS
28 mconv [ OPTION ... ] [ INFILE [ OUTFILE ] ]
30 @section mconv-description DESCRIPTION
32 Convert encoding of given files from one to another.
34 If INFILE is omitted, the input is taken from standard input. If
35 OUTFILE is omitted, the output written to standard output.
37 The following OPTIONs are available.
43 FROMCODE is the encoding of INFILE (defaults to UTF-8).
47 TOCODE is the encoding of OUTFILE (defaults to UTF-8).
51 Do not stop conversion on error.
59 Print progress information.
63 List available encodings.
83 #include <m17n-misc.h>
87 /* Print all coding system names. */
97 n = mconv_list_codings (&codings);
99 for (i = 0; i < n; i++)
101 name = msymbol_name (codings[i]);
102 len = strlen (name) + 1;
108 printf (" %s", name);
116 /* Print the usage of this program (the name is PROG), and exit with
120 help_exit (char *prog, int exit_code)
128 printf ("Usage: %s [ OPTION ... ] [ INFILE [ OUTFILE ] ]\n", prog);
129 printf ("Convert encoding of given files from one to another.\n");
130 printf (" If INFILE is omitted, the input is taken from standard input.\n");
131 printf (" If OUTFILE is omitted, the output is written to standard output.\n");
132 printf ("The following OPTIONs are available.\n");
133 printf (" %-13s %s", "-f FROMCODE",
134 "FROMCODE is the encoding of INFILE (defaults to UTF-8).\n");
135 printf (" %-13s %s", "-t TOCODE",
136 "TOCODE is the encoding of OUTFILE (defaults to UTF-8).\n");
137 printf (" %-13s %s", "-k", "Do not stop conversion on error.\n");
138 printf (" %-13s %s", "-s", "Suppress warnings.\n");
139 printf (" %-13s %s", "-v", "Print progress information.\n");
140 printf (" %-13s %s", "-l", "List available encodings.\n");
141 printf (" %-13s %s", "--version", "Print version number.\n");
142 printf (" %-13s %s", "-h, --help", "Print this message.\n");
147 /* Check invalid bytes found in the last decoding. Text property
148 Mcharset of such a byte is Mcharset_binary. */
151 check_invalid_bytes (MText *mt)
153 int from = 0, to = 0;
154 int len = mtext_len (mt);
159 int n = mtext_prop_range (mt, Mcharset, from, NULL, &to, 1);
161 = n > 0 ? (MSymbol) mtext_get_prop (mt, from, Mcharset) : Mnil;
163 if (charset == Mcharset_binary)
168 "Invalid bytes (at each character position);\n");
171 for (; from < to; from++)
172 fprintf (stderr, " 0x%02X(%d)", mtext_ref_char (mt, from), from);
178 fprintf (stderr, "\n");
182 /* Check unencoded characters in the last encoding. Text property
183 Mcoding of such a character is Mnil. */
186 check_unencoded_chars (MText *mt, int len)
188 int from = 0, to = 0;
193 int n = mtext_prop_range (mt, Mcoding, from, NULL, &to, 1);
195 = n > 0 ? (MSymbol) mtext_get_prop (mt, from, Mcoding) : Mnil;
202 "Unencoded characters (at each character position):\n");
205 for (; from < to; from++)
206 fprintf (stderr, " 0x%02X(%d)", mtext_ref_char (mt, from), from);
212 fprintf (stderr, "\n");
216 /* Format MSG by FMT and print the result to the stderr, and exit. */
218 #define FATAL_ERROR(fmt, arg) \
220 fprintf (stderr, fmt, arg); \
226 main (int argc, char **argv)
228 int suppress_warning, verbose, continue_on_error;
229 MSymbol incode, outcode;
232 MConverter *converter;
235 /* Initialize the m17n library. */
237 if (merror_code != MERROR_NONE)
238 FATAL_ERROR ("%s\n", "Fail to initialize the m17n library.");
240 /* Default encodings are both UTF-8. */
241 incode = outcode = Mcoding_utf_8;
242 /* By default, read from standard input and write to standard output. */
243 in = stdin, out = stdout;
244 /* By default, all these flags are 0. */
245 suppress_warning = verbose = continue_on_error = 0;
246 /* Parse the command line arguments. */
247 for (i = 1; i < argc; i++)
249 if (! strcmp (argv[i], "--help")
250 || ! strcmp (argv[i], "-h")
251 || ! strcmp (argv[i], "-?"))
252 help_exit (argv[0], 0);
253 else if (! strcmp (argv[i], "--version"))
255 printf ("mconv (m17n library) %s\n", VERSION);
256 printf ("Copyright (C) 2003 AIST, JAPAN\n");
259 else if (! strcmp (argv[i], "-l"))
265 else if (! strcmp (argv[i], "-f"))
267 incode = mconv_resolve_coding (msymbol (argv[++i]));
269 FATAL_ERROR ("Unknown encoding: %s\n", argv[i]);
271 else if (! strcmp (argv[i], "-t"))
273 outcode = mconv_resolve_coding (msymbol (argv[++i]));
275 FATAL_ERROR ("Unknown encoding: %s\n", argv[i]);
277 else if (! strcmp (argv[i], "-k"))
278 continue_on_error = 1;
279 else if (! strcmp (argv[i], "-s"))
280 suppress_warning = 1;
281 else if (! strcmp (argv[i], "-v"))
283 else if (argv[i][0] != '-')
287 in = fopen (argv[i], "r");
289 FATAL_ERROR ("Can't read the file %s\n", argv[i]);
291 else if (out == stdout)
293 out = fopen (argv[i], "w");
295 FATAL_ERROR ("Can't write the file %s\n", argv[i]);
298 help_exit (argv[0], 1);
301 help_exit (argv[0], 1);
304 /* Create an M-text to store the decoded characters. */
307 /* Create a converter for decoding. */
308 converter = mconv_stream_converter (incode, in);
309 /* Instead of doing strict decoding, we decode all input bytes at
310 once, and check invalid bytes later by the fuction
311 check_invalid_bytes. */
312 converter->lenient = 1;
314 mconv_decode (converter, mt);
316 if (! suppress_warning)
317 check_invalid_bytes (mt);
319 fprintf (stderr, "%d bytes (%s) decoded into %d characters,\n",
320 converter->nbytes, msymbol_name (incode), mtext_len (mt));
322 mconv_free_converter (converter);
324 /* Create a converter for encoding. */
325 converter = mconv_stream_converter (outcode, out);
326 /* Instead of doing strict encoding, we encode all characters at
327 once, and check unencoded characters later by the fuction
328 check_unencoded_chars. */
329 converter->lenient = 1;
330 converter->last_block = 1;
331 if (mconv_encode (converter, mt) < 0
332 && ! suppress_warning)
333 fprintf (stderr, "I/O error on writing\n");
334 if (! suppress_warning)
335 check_unencoded_chars (mt, converter->nchars);
337 fprintf (stderr, "%d characters encoded into %d bytes (%s).\n",
338 converter->nchars, converter->nbytes, msymbol_name (outcode));
341 mconv_free_converter (converter);
342 m17n_object_unref (mt);
346 #endif /* not FOR_DOXYGEN */