1 /* mconv.c -- Code converter. -*- coding: euc-jp; -*-
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
24 @enpage m17n-conv convert file code
26 @section m17n-conv-synopsis SYNOPSIS
28 m17n-conv [ OPTION ... ] [ INFILE [ OUTFILE ] ]
30 @section m17n-conv-description DESCRIPTION
32 Convert encoding of given files from one to another.
34 If INFILE is omitted, the input is taken from standard input. If
35 OUTFILE is omitted, the output written to standard output.
37 The following OPTIONs are available.
43 FROMCODE is the encoding of INFILE (defaults to UTF-8).
47 TOCODE is the encoding of OUTFILE (defaults to UTF-8).
51 Do not stop conversion on error.
59 Print progress information.
63 List available encodings.
76 @japage m17n-conv ¥Õ¥¡¥¤¥ë¤Î¥³¡¼¥É¤òÊÑ´¹¤¹¤ë
78 @section m17n-conv-synopsis SYNOPSIS
80 m17n-conv [ OPTION ... ] [ INFILE [ OUTFILE ] ]
82 @section m17n-conv-description ÀâÌÀ
84 Í¿¤¨¤é¤ì¤¿¥Õ¥¡¥¤¥ë¤Î¥³¡¼¥É¤òÊ̤Τâ¤Î¤ËÊÑ´¹¤¹¤ë¡£
86 INFILE ¤¬¾Êά¤µ¤ì¤¿¾ì¹ç¤Ï¡¢É¸½àÆþÎϤ«¤é¤È¤ë¡£OUTFILE ¤¬¾Êά¤µ¤ì¤¿
87 ¾ì¹ç¤Ï¡¢É¸½à½ÐÎϤؽñ¤½Ð¤¹¡£
89 °Ê²¼¤Î¥ª¥×¥·¥ç¥ó¤¬ÍøÍѤǤ¤ë¡£
95 FROMCODE ¤Ï INFILE ¤Î¥³¡¼¥É·Ï¤Ç¤¢¤ë¡£(¥Ç¥Õ¥©¥ë¥È¤Ï UTF-8)
99 TOCODE ¤Ï OUTFILE ¤Î¥³¡¼¥É·Ï¤Ç¤¢¤ë¡£(¥Ç¥Õ¥©¥ë¥È¤Ï UTF-8)
103 ¥¨¥é¡¼¤ÇÊÑ´¹¤òÄä»ß¤·¤Ê¤¤¡£
115 ÍøÍѲÄǽ¤Ê¥³¡¼¥É·Ï¤òÎóµó¤¹¤ë¡£
119 ¥Ð¡¼¥¸¥ç¥óÈÖ¹æ¤òɽ¼¨¤¹¤ë¡£
123 ¤³¤Î¥á¥Ã¥»¡¼¥¸¤òɽ¼¨¤¹¤ë¡£
135 #include <m17n-misc.h>
137 /* Print all coding system names. */
140 compare_coding_name (const void *elt1, const void *elt2)
142 const MSymbol *n1 = elt1;
143 const MSymbol *n2 = elt2;
145 return strcmp (msymbol_name (*n1), msymbol_name (*n2));
156 n = mconv_list_codings (&codings);
157 qsort (codings, n, sizeof (MSymbol), compare_coding_name);
159 for (i = 0; i < n; i++)
161 name = msymbol_name (codings[i]);
162 len = strlen (name) + 1;
168 printf (" %s", name);
176 /* Print the usage of this program (the name is PROG), and exit with
180 help_exit (char *prog, int exit_code)
188 printf ("Usage: %s [ OPTION ... ] [ INFILE [ OUTFILE ] ]\n", prog);
189 printf ("Convert encoding of given files from one to another.\n");
190 printf (" If INFILE is omitted, the input is taken from standard input.\n");
191 printf (" If OUTFILE is omitted, the output is written to standard output.\n");
192 printf ("The following OPTIONs are available.\n");
193 printf (" %-13s %s", "-f FROMCODE",
194 "FROMCODE is the encoding of INFILE (defaults to UTF-8).\n");
195 printf (" %-13s %s", "-t TOCODE",
196 "TOCODE is the encoding of OUTFILE (defaults to UTF-8).\n");
197 printf (" %-13s %s", "-k", "Do not stop conversion on error.\n");
198 printf (" %-13s %s", "-s", "Suppress warnings.\n");
199 printf (" %-13s %s", "-v", "Print progress information.\n");
200 printf (" %-13s %s", "-l", "List available encodings.\n");
201 printf (" %-13s %s", "--version", "Print version number.\n");
202 printf (" %-13s %s", "-h, --help", "Print this message.\n");
207 /* Check invalid bytes found in the last decoding. Text property
208 Mcharset of such a byte is Mcharset_binary. */
211 check_invalid_bytes (MText *mt)
213 int from = 0, to = 0;
214 int len = mtext_len (mt);
219 int n = mtext_prop_range (mt, Mcharset, from, NULL, &to, 1);
221 = n > 0 ? (MSymbol) mtext_get_prop (mt, from, Mcharset) : Mnil;
223 if (charset == Mcharset_binary)
228 "Invalid bytes (at each character position);\n");
231 for (; from < to; from++)
232 fprintf (stderr, " 0x%02X(%d)", mtext_ref_char (mt, from), from);
238 fprintf (stderr, "\n");
242 /* Check unencoded characters in the last encoding. Text property
243 Mcoding of such a character is Mnil. */
246 check_unencoded_chars (MText *mt, int len)
248 int from = 0, to = 0;
253 int n = mtext_prop_range (mt, Mcoding, from, NULL, &to, 1);
255 = n > 0 ? (MSymbol) mtext_get_prop (mt, from, Mcoding) : Mnil;
262 "Unencoded characters (at each character position):\n");
265 for (; from < to; from++)
266 fprintf (stderr, " 0x%02X(%d)", mtext_ref_char (mt, from), from);
272 fprintf (stderr, "\n");
276 /* Format MSG by FMT and print the result to the stderr, and exit. */
278 #define FATAL_ERROR(fmt, arg) \
280 fprintf (stderr, fmt, arg); \
286 main (int argc, char **argv)
288 int suppress_warning, verbose, continue_on_error;
289 MSymbol incode, outcode;
292 MConverter *converter;
295 /* Initialize the m17n library. */
297 if (merror_code != MERROR_NONE)
298 FATAL_ERROR ("%s\n", "Fail to initialize the m17n library.");
300 /* Default encodings are both UTF-8. */
301 incode = outcode = Mcoding_utf_8;
302 /* By default, read from standard input and write to standard output. */
303 in = stdin, out = stdout;
304 /* By default, all these flags are 0. */
305 suppress_warning = verbose = continue_on_error = 0;
306 /* Parse the command line arguments. */
307 for (i = 1; i < argc; i++)
309 if (! strcmp (argv[i], "--help")
310 || ! strcmp (argv[i], "-h")
311 || ! strcmp (argv[i], "-?"))
312 help_exit (argv[0], 0);
313 else if (! strcmp (argv[i], "--version"))
315 printf ("m17n-conv (m17n library) %s\n", M17NLIB_VERSION_NAME);
316 printf ("Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 AIST, JAPAN\n");
319 else if (! strcmp (argv[i], "-l"))
325 else if (! strcmp (argv[i], "-f"))
327 incode = mconv_resolve_coding (msymbol (argv[++i]));
329 FATAL_ERROR ("Unknown encoding: %s\n", argv[i]);
331 else if (! strcmp (argv[i], "-t"))
333 outcode = mconv_resolve_coding (msymbol (argv[++i]));
335 FATAL_ERROR ("Unknown encoding: %s\n", argv[i]);
337 else if (! strcmp (argv[i], "-k"))
338 continue_on_error = 1;
339 else if (! strcmp (argv[i], "-s"))
340 suppress_warning = 1;
341 else if (! strcmp (argv[i], "-v"))
343 else if (argv[i][0] != '-')
347 in = fopen (argv[i], "r");
349 FATAL_ERROR ("Can't read the file %s\n", argv[i]);
351 else if (out == stdout)
353 out = fopen (argv[i], "w");
355 FATAL_ERROR ("Can't write the file %s\n", argv[i]);
358 help_exit (argv[0], 1);
361 help_exit (argv[0], 1);
364 /* Create an M-text to store the decoded characters. */
367 /* Create a converter for decoding. */
368 converter = mconv_stream_converter (incode, in);
369 /* Instead of doing strict decoding, we decode all input bytes at
370 once, and check invalid bytes later by the fuction
371 check_invalid_bytes. */
372 converter->lenient = 1;
374 mconv_decode (converter, mt);
376 if (! suppress_warning)
377 check_invalid_bytes (mt);
379 fprintf (stderr, "%d bytes (%s) decoded into %d characters,\n",
380 converter->nbytes, msymbol_name (incode), mtext_len (mt));
382 mconv_free_converter (converter);
384 /* Create a converter for encoding. */
385 converter = mconv_stream_converter (outcode, out);
386 /* Instead of doing strict encoding, we encode all characters at
387 once, and check unencoded characters later by the fuction
388 check_unencoded_chars. */
389 converter->lenient = 1;
390 converter->last_block = 1;
391 if (mconv_encode (converter, mt) < 0
392 && ! suppress_warning)
393 fprintf (stderr, "I/O error on writing\n");
394 if (! suppress_warning)
395 check_unencoded_chars (mt, converter->nchars);
397 fprintf (stderr, "%d characters encoded into %d bytes (%s).\n",
398 converter->nchars, converter->nbytes, msymbol_name (outcode));
401 mconv_free_converter (converter);
404 m17n_object_unref (mt);
408 #endif /* not FOR_DOXYGEN */