1 /* mconv.c -- Code converter.
2 Copyright (C) 2003, 2004
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24 @enpage mconv convert file code
26 @section mconv-synopsis SYNOPSIS
28 mconv [ OPTION ... ] [ INFILE [ OUTFILE ] ]
30 @section mconv-description DESCRIPTION
32 Convert encoding of given files from one to another.
34 If INFILE is omitted, the input is taken from standard input. If
35 OUTFILE is omitted, the output written to standard output.
37 The following OPTIONs are available.
43 FROMCODE is the encoding of INFILE (defaults to UTF-8).
47 TOCODE is the encoding of OUTFILE (defaults to UTF-8).
51 Do not stop conversion on error.
59 Print progress information.
63 List available encodings.
76 @japage mconv ¥Õ¥¡¥¤¥ë¤Î¥³¡¼¥É¤òÊÑ´¹¤¹¤ë
78 @section mconv-synopsis SYNOPSIS
80 mconv [ OPTION ... ] [ INFILE [ OUTFILE ] ]
82 @section mconv-description ÀâÌÀ
84 Í¿¤¨¤é¤ì¤¿¥Õ¥¡¥¤¥ë¤Î¥³¡¼¥É¤òÊ̤Τâ¤Î¤ËÊÑ´¹¤¹¤ë¡£
86 INFILE ¤¬¾Êά¤µ¤ì¤¿¾ì¹ç¤Ï¡¢É¸½àÆþÎϤ«¤é¤È¤ë¡£OUTFILE ¤¬¾Êά¤µ¤ì¤¿
87 ¾ì¹ç¤Ï¡¢É¸½à½ÐÎϤؽñ¤½Ð¤¹¡£
89 °Ê²¼¤Î¥ª¥×¥·¥ç¥ó¤¬ÍøÍѤǤ¤ë¡£
95 FROMCODE ¤Ï INFILE ¤Î¥³¡¼¥É·Ï¤Ç¤¢¤ë¡£(¥Ç¥Õ¥©¥ë¥È¤Ï UTF-8)
99 TOCODE ¤Ï OUTFILE ¤Î¥³¡¼¥É·Ï¤Ç¤¢¤ë¡£(¥Ç¥Õ¥©¥ë¥È¤Ï UTF-8)
103 ¥¨¥é¡¼¤ÇÊÑ´¹¤òÄä»ß¤·¤Ê¤¤¡£
115 ÍøÍѲÄǽ¤Ê¥³¡¼¥É·Ï¤òÎóµó¤¹¤ë¡£
119 ¥Ð¡¼¥¸¥ç¥óÈÖ¹æ¤òɽ¼¨¤¹¤ë¡£
123 ¤³¤Î¥á¥Ã¥»¡¼¥¸¤òɽ¼¨¤¹¤ë¡£
135 #include <m17n-misc.h>
137 #define VERSION "1.0"
139 /* Print all coding system names. */
149 n = mconv_list_codings (&codings);
151 for (i = 0; i < n; i++)
153 name = msymbol_name (codings[i]);
154 len = strlen (name) + 1;
160 printf (" %s", name);
168 /* Print the usage of this program (the name is PROG), and exit with
172 help_exit (char *prog, int exit_code)
180 printf ("Usage: %s [ OPTION ... ] [ INFILE [ OUTFILE ] ]\n", prog);
181 printf ("Convert encoding of given files from one to another.\n");
182 printf (" If INFILE is omitted, the input is taken from standard input.\n");
183 printf (" If OUTFILE is omitted, the output is written to standard output.\n");
184 printf ("The following OPTIONs are available.\n");
185 printf (" %-13s %s", "-f FROMCODE",
186 "FROMCODE is the encoding of INFILE (defaults to UTF-8).\n");
187 printf (" %-13s %s", "-t TOCODE",
188 "TOCODE is the encoding of OUTFILE (defaults to UTF-8).\n");
189 printf (" %-13s %s", "-k", "Do not stop conversion on error.\n");
190 printf (" %-13s %s", "-s", "Suppress warnings.\n");
191 printf (" %-13s %s", "-v", "Print progress information.\n");
192 printf (" %-13s %s", "-l", "List available encodings.\n");
193 printf (" %-13s %s", "--version", "Print version number.\n");
194 printf (" %-13s %s", "-h, --help", "Print this message.\n");
199 /* Check invalid bytes found in the last decoding. Text property
200 Mcharset of such a byte is Mcharset_binary. */
203 check_invalid_bytes (MText *mt)
205 int from = 0, to = 0;
206 int len = mtext_len (mt);
211 int n = mtext_prop_range (mt, Mcharset, from, NULL, &to, 1);
213 = n > 0 ? (MSymbol) mtext_get_prop (mt, from, Mcharset) : Mnil;
215 if (charset == Mcharset_binary)
220 "Invalid bytes (at each character position);\n");
223 for (; from < to; from++)
224 fprintf (stderr, " 0x%02X(%d)", mtext_ref_char (mt, from), from);
230 fprintf (stderr, "\n");
234 /* Check unencoded characters in the last encoding. Text property
235 Mcoding of such a character is Mnil. */
238 check_unencoded_chars (MText *mt, int len)
240 int from = 0, to = 0;
245 int n = mtext_prop_range (mt, Mcoding, from, NULL, &to, 1);
247 = n > 0 ? (MSymbol) mtext_get_prop (mt, from, Mcoding) : Mnil;
254 "Unencoded characters (at each character position):\n");
257 for (; from < to; from++)
258 fprintf (stderr, " 0x%02X(%d)", mtext_ref_char (mt, from), from);
264 fprintf (stderr, "\n");
268 /* Format MSG by FMT and print the result to the stderr, and exit. */
270 #define FATAL_ERROR(fmt, arg) \
272 fprintf (stderr, fmt, arg); \
278 main (int argc, char **argv)
280 int suppress_warning, verbose, continue_on_error;
281 MSymbol incode, outcode;
284 MConverter *converter;
287 /* Initialize the m17n library. */
289 if (merror_code != MERROR_NONE)
290 FATAL_ERROR ("%s\n", "Fail to initialize the m17n library.");
292 /* Default encodings are both UTF-8. */
293 incode = outcode = Mcoding_utf_8;
294 /* By default, read from standard input and write to standard output. */
295 in = stdin, out = stdout;
296 /* By default, all these flags are 0. */
297 suppress_warning = verbose = continue_on_error = 0;
298 /* Parse the command line arguments. */
299 for (i = 1; i < argc; i++)
301 if (! strcmp (argv[i], "--help")
302 || ! strcmp (argv[i], "-h")
303 || ! strcmp (argv[i], "-?"))
304 help_exit (argv[0], 0);
305 else if (! strcmp (argv[i], "--version"))
307 printf ("mconv (m17n library) %s\n", VERSION);
308 printf ("Copyright (C) 2003 AIST, JAPAN\n");
311 else if (! strcmp (argv[i], "-l"))
317 else if (! strcmp (argv[i], "-f"))
319 incode = mconv_resolve_coding (msymbol (argv[++i]));
321 FATAL_ERROR ("Unknown encoding: %s\n", argv[i]);
323 else if (! strcmp (argv[i], "-t"))
325 outcode = mconv_resolve_coding (msymbol (argv[++i]));
327 FATAL_ERROR ("Unknown encoding: %s\n", argv[i]);
329 else if (! strcmp (argv[i], "-k"))
330 continue_on_error = 1;
331 else if (! strcmp (argv[i], "-s"))
332 suppress_warning = 1;
333 else if (! strcmp (argv[i], "-v"))
335 else if (argv[i][0] != '-')
339 in = fopen (argv[i], "r");
341 FATAL_ERROR ("Can't read the file %s\n", argv[i]);
343 else if (out == stdout)
345 out = fopen (argv[i], "w");
347 FATAL_ERROR ("Can't write the file %s\n", argv[i]);
350 help_exit (argv[0], 1);
353 help_exit (argv[0], 1);
356 /* Create an M-text to store the decoded characters. */
359 /* Create a converter for decoding. */
360 converter = mconv_stream_converter (incode, in);
361 /* Instead of doing strict decoding, we decode all input bytes at
362 once, and check invalid bytes later by the fuction
363 check_invalid_bytes. */
364 converter->lenient = 1;
366 mconv_decode (converter, mt);
368 if (! suppress_warning)
369 check_invalid_bytes (mt);
371 fprintf (stderr, "%d bytes (%s) decoded into %d characters,\n",
372 converter->nbytes, msymbol_name (incode), mtext_len (mt));
374 mconv_free_converter (converter);
376 /* Create a converter for encoding. */
377 converter = mconv_stream_converter (outcode, out);
378 /* Instead of doing strict encoding, we encode all characters at
379 once, and check unencoded characters later by the fuction
380 check_unencoded_chars. */
381 converter->lenient = 1;
382 converter->last_block = 1;
383 if (mconv_encode (converter, mt) < 0
384 && ! suppress_warning)
385 fprintf (stderr, "I/O error on writing\n");
386 if (! suppress_warning)
387 check_unencoded_chars (mt, converter->nchars);
389 fprintf (stderr, "%d characters encoded into %d bytes (%s).\n",
390 converter->nchars, converter->nbytes, msymbol_name (outcode));
393 mconv_free_converter (converter);
394 m17n_object_unref (mt);
398 #endif /* not FOR_DOXYGEN */