1 /* mconv.c -- Code converter. -*- coding: euc-jp; -*-
2 Copyright (C) 2003, 2004
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
24 @enpage m17n-conv convert file code
26 @section m17n-conv-synopsis SYNOPSIS
28 m17n-conv [ OPTION ... ] [ INFILE [ OUTFILE ] ]
30 @section m17n-conv-description DESCRIPTION
32 Convert encoding of given files from one to another.
34 If INFILE is omitted, the input is taken from standard input. If
35 OUTFILE is omitted, the output written to standard output.
37 The following OPTIONs are available.
43 FROMCODE is the encoding of INFILE (defaults to UTF-8).
47 TOCODE is the encoding of OUTFILE (defaults to UTF-8).
51 Do not stop conversion on error.
59 Print progress information.
63 List available encodings.
76 @japage m17n-conv ¥Õ¥¡¥¤¥ë¤Î¥³¡¼¥É¤òÊÑ´¹¤¹¤ë
78 @section m17n-conv-synopsis SYNOPSIS
80 m17n-conv [ OPTION ... ] [ INFILE [ OUTFILE ] ]
82 @section m17n-conv-description ÀâÌÀ
84 Í¿¤¨¤é¤ì¤¿¥Õ¥¡¥¤¥ë¤Î¥³¡¼¥É¤òÊ̤Τâ¤Î¤ËÊÑ´¹¤¹¤ë¡£
86 INFILE ¤¬¾Êά¤µ¤ì¤¿¾ì¹ç¤Ï¡¢É¸½àÆþÎϤ«¤é¤È¤ë¡£OUTFILE ¤¬¾Êά¤µ¤ì¤¿
87 ¾ì¹ç¤Ï¡¢É¸½à½ÐÎϤؽñ¤½Ð¤¹¡£
89 °Ê²¼¤Î¥ª¥×¥·¥ç¥ó¤¬ÍøÍѤǤ¤ë¡£
95 FROMCODE ¤Ï INFILE ¤Î¥³¡¼¥É·Ï¤Ç¤¢¤ë¡£(¥Ç¥Õ¥©¥ë¥È¤Ï UTF-8)
99 TOCODE ¤Ï OUTFILE ¤Î¥³¡¼¥É·Ï¤Ç¤¢¤ë¡£(¥Ç¥Õ¥©¥ë¥È¤Ï UTF-8)
103 ¥¨¥é¡¼¤ÇÊÑ´¹¤òÄä»ß¤·¤Ê¤¤¡£
115 ÍøÍѲÄǽ¤Ê¥³¡¼¥É·Ï¤òÎóµó¤¹¤ë¡£
119 ¥Ð¡¼¥¸¥ç¥óÈÖ¹æ¤òɽ¼¨¤¹¤ë¡£
123 ¤³¤Î¥á¥Ã¥»¡¼¥¸¤òɽ¼¨¤¹¤ë¡£
135 #include <m17n-misc.h>
137 #define VERSION "1.3.4"
139 /* Print all coding system names. */
142 compare_coding_name (const void *elt1, const void *elt2)
144 const MSymbol *n1 = elt1;
145 const MSymbol *n2 = elt2;
147 return strcmp (msymbol_name (*n1), msymbol_name (*n2));
158 n = mconv_list_codings (&codings);
159 qsort (codings, n, sizeof (MSymbol), compare_coding_name);
161 for (i = 0; i < n; i++)
163 name = msymbol_name (codings[i]);
164 len = strlen (name) + 1;
170 printf (" %s", name);
178 /* Print the usage of this program (the name is PROG), and exit with
182 help_exit (char *prog, int exit_code)
190 printf ("Usage: %s [ OPTION ... ] [ INFILE [ OUTFILE ] ]\n", prog);
191 printf ("Convert encoding of given files from one to another.\n");
192 printf (" If INFILE is omitted, the input is taken from standard input.\n");
193 printf (" If OUTFILE is omitted, the output is written to standard output.\n");
194 printf ("The following OPTIONs are available.\n");
195 printf (" %-13s %s", "-f FROMCODE",
196 "FROMCODE is the encoding of INFILE (defaults to UTF-8).\n");
197 printf (" %-13s %s", "-t TOCODE",
198 "TOCODE is the encoding of OUTFILE (defaults to UTF-8).\n");
199 printf (" %-13s %s", "-k", "Do not stop conversion on error.\n");
200 printf (" %-13s %s", "-s", "Suppress warnings.\n");
201 printf (" %-13s %s", "-v", "Print progress information.\n");
202 printf (" %-13s %s", "-l", "List available encodings.\n");
203 printf (" %-13s %s", "--version", "Print version number.\n");
204 printf (" %-13s %s", "-h, --help", "Print this message.\n");
209 /* Check invalid bytes found in the last decoding. Text property
210 Mcharset of such a byte is Mcharset_binary. */
213 check_invalid_bytes (MText *mt)
215 int from = 0, to = 0;
216 int len = mtext_len (mt);
221 int n = mtext_prop_range (mt, Mcharset, from, NULL, &to, 1);
223 = n > 0 ? (MSymbol) mtext_get_prop (mt, from, Mcharset) : Mnil;
225 if (charset == Mcharset_binary)
230 "Invalid bytes (at each character position);\n");
233 for (; from < to; from++)
234 fprintf (stderr, " 0x%02X(%d)", mtext_ref_char (mt, from), from);
240 fprintf (stderr, "\n");
244 /* Check unencoded characters in the last encoding. Text property
245 Mcoding of such a character is Mnil. */
248 check_unencoded_chars (MText *mt, int len)
250 int from = 0, to = 0;
255 int n = mtext_prop_range (mt, Mcoding, from, NULL, &to, 1);
257 = n > 0 ? (MSymbol) mtext_get_prop (mt, from, Mcoding) : Mnil;
264 "Unencoded characters (at each character position):\n");
267 for (; from < to; from++)
268 fprintf (stderr, " 0x%02X(%d)", mtext_ref_char (mt, from), from);
274 fprintf (stderr, "\n");
278 /* Format MSG by FMT and print the result to the stderr, and exit. */
280 #define FATAL_ERROR(fmt, arg) \
282 fprintf (stderr, fmt, arg); \
288 main (int argc, char **argv)
290 int suppress_warning, verbose, continue_on_error;
291 MSymbol incode, outcode;
294 MConverter *converter;
297 /* Initialize the m17n library. */
299 if (merror_code != MERROR_NONE)
300 FATAL_ERROR ("%s\n", "Fail to initialize the m17n library.");
302 /* Default encodings are both UTF-8. */
303 incode = outcode = Mcoding_utf_8;
304 /* By default, read from standard input and write to standard output. */
305 in = stdin, out = stdout;
306 /* By default, all these flags are 0. */
307 suppress_warning = verbose = continue_on_error = 0;
308 /* Parse the command line arguments. */
309 for (i = 1; i < argc; i++)
311 if (! strcmp (argv[i], "--help")
312 || ! strcmp (argv[i], "-h")
313 || ! strcmp (argv[i], "-?"))
314 help_exit (argv[0], 0);
315 else if (! strcmp (argv[i], "--version"))
317 printf ("m17n-conv (m17n library) %s\n", VERSION);
318 printf ("Copyright (C) 2003 AIST, JAPAN\n");
321 else if (! strcmp (argv[i], "-l"))
327 else if (! strcmp (argv[i], "-f"))
329 incode = mconv_resolve_coding (msymbol (argv[++i]));
331 FATAL_ERROR ("Unknown encoding: %s\n", argv[i]);
333 else if (! strcmp (argv[i], "-t"))
335 outcode = mconv_resolve_coding (msymbol (argv[++i]));
337 FATAL_ERROR ("Unknown encoding: %s\n", argv[i]);
339 else if (! strcmp (argv[i], "-k"))
340 continue_on_error = 1;
341 else if (! strcmp (argv[i], "-s"))
342 suppress_warning = 1;
343 else if (! strcmp (argv[i], "-v"))
345 else if (argv[i][0] != '-')
349 in = fopen (argv[i], "r");
351 FATAL_ERROR ("Can't read the file %s\n", argv[i]);
353 else if (out == stdout)
355 out = fopen (argv[i], "w");
357 FATAL_ERROR ("Can't write the file %s\n", argv[i]);
360 help_exit (argv[0], 1);
363 help_exit (argv[0], 1);
366 /* Create an M-text to store the decoded characters. */
369 /* Create a converter for decoding. */
370 converter = mconv_stream_converter (incode, in);
371 /* Instead of doing strict decoding, we decode all input bytes at
372 once, and check invalid bytes later by the fuction
373 check_invalid_bytes. */
374 converter->lenient = 1;
376 mconv_decode (converter, mt);
378 if (! suppress_warning)
379 check_invalid_bytes (mt);
381 fprintf (stderr, "%d bytes (%s) decoded into %d characters,\n",
382 converter->nbytes, msymbol_name (incode), mtext_len (mt));
384 mconv_free_converter (converter);
386 /* Create a converter for encoding. */
387 converter = mconv_stream_converter (outcode, out);
388 /* Instead of doing strict encoding, we encode all characters at
389 once, and check unencoded characters later by the fuction
390 check_unencoded_chars. */
391 converter->lenient = 1;
392 converter->last_block = 1;
393 if (mconv_encode (converter, mt) < 0
394 && ! suppress_warning)
395 fprintf (stderr, "I/O error on writing\n");
396 if (! suppress_warning)
397 check_unencoded_chars (mt, converter->nchars);
399 fprintf (stderr, "%d characters encoded into %d bytes (%s).\n",
400 converter->nchars, converter->nbytes, msymbol_name (outcode));
403 mconv_free_converter (converter);
406 m17n_object_unref (mt);
410 #endif /* not FOR_DOXYGEN */