1 /* mconv.c -- Code converter. -*- coding: euc-jp; -*-
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
24 @enpage m17n-conv convert file code
26 @section m17n-conv-synopsis SYNOPSIS
28 m17n-conv [ OPTION ... ] [ INFILE [ OUTFILE ] ]
30 @section m17n-conv-description DESCRIPTION
32 Convert encoding of given files from one to another.
34 If INFILE is omitted, the input is taken from standard input. If
35 OUTFILE is omitted, the output written to standard output.
37 The following OPTIONs are available.
43 FROMCODE is the encoding of INFILE (defaults to UTF-8).
47 TOCODE is the encoding of OUTFILE (defaults to UTF-8).
51 Do not stop conversion on error.
59 Print progress information.
63 List available encodings.
76 @japage m17n-conv ¥Õ¥¡¥¤¥ë¤Î¥³¡¼¥É¤òÊÑ´¹¤¹¤ë
78 @section m17n-conv-synopsis SYNOPSIS
80 m17n-conv [ OPTION ... ] [ INFILE [ OUTFILE ] ]
82 @section m17n-conv-description ÀâÌÀ
84 Í¿¤¨¤é¤ì¤¿¥Õ¥¡¥¤¥ë¤Î¥³¡¼¥É¤òÊ̤Τâ¤Î¤ËÊÑ´¹¤¹¤ë¡£
86 INFILE ¤¬¾Êά¤µ¤ì¤¿¾ì¹ç¤Ï¡¢É¸½àÆþÎϤ«¤é¤È¤ë¡£OUTFILE ¤¬¾Êά¤µ¤ì¤¿
87 ¾ì¹ç¤Ï¡¢É¸½à½ÐÎϤؽñ¤½Ð¤¹¡£
89 °Ê²¼¤Î¥ª¥×¥·¥ç¥ó¤¬ÍøÍѤǤ¤ë¡£
95 FROMCODE ¤Ï INFILE ¤Î¥³¡¼¥É·Ï¤Ç¤¢¤ë¡£(¥Ç¥Õ¥©¥ë¥È¤Ï UTF-8)
99 TOCODE ¤Ï OUTFILE ¤Î¥³¡¼¥É·Ï¤Ç¤¢¤ë¡£(¥Ç¥Õ¥©¥ë¥È¤Ï UTF-8)
103 ¥¨¥é¡¼¤ÇÊÑ´¹¤òÄä»ß¤·¤Ê¤¤¡£
115 ÍøÍѲÄǽ¤Ê¥³¡¼¥É·Ï¤òÎóµó¤¹¤ë¡£
119 ¥Ð¡¼¥¸¥ç¥óÈÖ¹æ¤òɽ¼¨¤¹¤ë¡£
123 ¤³¤Î¥á¥Ã¥»¡¼¥¸¤òɽ¼¨¤¹¤ë¡£
135 #include <m17n-misc.h>
137 /* Print all coding system names. */
140 compare_coding_name (const void *elt1, const void *elt2)
142 const MSymbol *n1 = elt1;
143 const MSymbol *n2 = elt2;
145 return strcmp (msymbol_name (*n1), msymbol_name (*n2));
156 n = mconv_list_codings (&codings);
157 qsort (codings, n, sizeof (MSymbol), compare_coding_name);
159 for (i = 0; i < n; i++)
161 name = msymbol_name (codings[i]);
162 len = strlen (name) + 1;
168 printf (" %s", name);
176 /* Print the usage of this program (the name is PROG), and exit with
180 help_exit (char *prog, int exit_code)
188 printf ("Usage: %s [ OPTION ... ] [ INFILE [ OUTFILE ] ]\n", prog);
189 printf ("Convert encoding of given files from one to another.\n");
190 printf (" If INFILE is omitted, the input is taken from standard input.\n");
191 printf (" If OUTFILE is omitted, the output is written to standard output.\n");
192 printf ("The following OPTIONs are available.\n");
193 printf (" %-13s %s", "-f FROMCODE",
194 "FROMCODE is the encoding of INFILE (defaults to UTF-8).\n");
195 printf (" %-13s %s", "-t TOCODE",
196 "TOCODE is the encoding of OUTFILE (defaults to UTF-8).\n");
197 printf (" %-13s %s", "-k", "Do not stop conversion on error.\n");
198 printf (" %-13s %s", "-s", "Suppress warnings.\n");
199 printf (" %-13s %s", "-v", "Print progress information.\n");
200 printf (" %-13s %s", "-l", "List available encodings.\n");
201 printf (" %-13s %s", "--version", "Print version number.\n");
202 printf (" %-13s %s", "-h, --help", "Print this message.\n");
206 /* Global flags to control the behaviour. */
207 int suppress_warning;
208 int continue_on_error;
210 /* Check invalid bytes found in the last decoding. Text property
211 Mcharset of such a byte is Mcharset_binary. */
214 check_invalid_bytes (MText *mt)
216 int from = 0, to = 0;
217 int len = mtext_len (mt);
222 int n = mtext_prop_range (mt, Mcharset, from, NULL, &to, 1);
224 = n > 0 ? (MSymbol) mtext_get_prop (mt, from, Mcharset) : Mnil;
226 if (charset == Mcharset_binary)
228 if (! suppress_warning)
233 "Invalid bytes (at each character position);\n");
236 for (; from < to; from++)
237 fprintf (stderr, " 0x%02X(%d)",
238 mtext_ref_char (mt, from), from);
240 if (! continue_on_error)
243 fprintf (stderr, "\n");
251 fprintf (stderr, "\n");
255 /* Check unencoded characters in the last encoding. Text property
256 Mcoding of such a character is Mnil. */
259 check_unencoded_chars (MText *mt, int len)
261 int from = 0, to = 0;
266 int n = mtext_prop_range (mt, Mcoding, from, NULL, &to, 1);
268 = n > 0 ? (MSymbol) mtext_get_prop (mt, from, Mcoding) : Mnil;
272 if (! suppress_warning)
277 "Unencoded chars (at each character position):\n");
280 for (; from < to; from++)
281 fprintf (stderr, " 0x%02X(%d)",
282 mtext_ref_char (mt, from), from);
284 if (! continue_on_error)
287 fprintf (stderr, "\n");
295 fprintf (stderr, "\n");
300 unknown_encoding (char *name)
302 if (! suppress_warning)
304 fprintf (stderr, "Unknown encoding: \"%s\"\n", name);
305 if (mconv_resolve_coding (msymbol ("iso-2022-jp")) == Mnil)
306 fprintf (stderr, "Perhaps the library \"m17n-db\" is missing.\n");
311 /* Format MSG by FMT and print the result to the stderr, and exit. */
313 #define FATAL_ERROR(fmt, arg) \
315 if (! suppress_warning) \
316 fprintf (stderr, fmt, arg); \
322 main (int argc, char **argv)
325 MSymbol incode, outcode;
328 MConverter *converter;
331 /* Initialize the m17n library. */
333 if (merror_code != MERROR_NONE)
334 FATAL_ERROR ("%s\n", "Fail to initialize the m17n library.");
336 /* Default encodings are both UTF-8. */
337 incode = outcode = Mcoding_utf_8;
338 /* By default, read from standard input and write to standard output. */
339 in = stdin, out = stdout;
340 /* By default, all these flags are 0. */
341 suppress_warning = verbose = continue_on_error = 0;
342 /* Parse the command line arguments. */
343 for (i = 1; i < argc; i++)
345 if (! strcmp (argv[i], "--help")
346 || ! strcmp (argv[i], "-h")
347 || ! strcmp (argv[i], "-?"))
348 help_exit (argv[0], 0);
349 else if (! strcmp (argv[i], "--version"))
351 printf ("m17n-conv (m17n library) %s\n", M17NLIB_VERSION_NAME);
352 printf ("Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 AIST, JAPAN\n");
355 else if (! strcmp (argv[i], "-l"))
361 else if (! strcmp (argv[i], "-f"))
363 incode = mconv_resolve_coding (msymbol (argv[++i]));
365 unknown_encoding (argv[i]);
367 else if (! strcmp (argv[i], "-t"))
369 outcode = mconv_resolve_coding (msymbol (argv[++i]));
371 unknown_encoding (argv[i]);
373 else if (! strcmp (argv[i], "-k"))
374 continue_on_error = 1;
375 else if (! strcmp (argv[i], "-s"))
376 suppress_warning = 1;
377 else if (! strcmp (argv[i], "-v"))
379 else if (argv[i][0] != '-')
383 in = fopen (argv[i], "r");
385 FATAL_ERROR ("Can't read the file %s\n", argv[i]);
387 else if (out == stdout)
389 out = fopen (argv[i], "w");
391 FATAL_ERROR ("Can't write the file %s\n", argv[i]);
394 help_exit (argv[0], 1);
397 help_exit (argv[0], 1);
400 suppress_warning = 0;
402 /* Create an M-text to store the decoded characters. */
405 /* Create a converter for decoding. */
406 converter = mconv_stream_converter (incode, in);
408 FATAL_ERROR ("Encoding \"%s\" requires the missing library \"m17n-db\".\n",
409 msymbol_name (incode));
410 /* Instead of doing strict decoding, we decode all input bytes at
411 once, and check invalid bytes later by the fuction
412 check_invalid_bytes. */
413 converter->lenient = 1;
415 mconv_decode (converter, mt);
417 check_invalid_bytes (mt);
419 fprintf (stderr, "%d bytes (%s) decoded into %d characters,\n",
420 converter->nbytes, msymbol_name (incode), mtext_len (mt));
422 mconv_free_converter (converter);
424 /* Create a converter for encoding. */
425 converter = mconv_stream_converter (outcode, out);
427 FATAL_ERROR ("Encoding \"%s\" requires the missing library \"m17n-db\".\n",
428 msymbol_name (outcode));
429 /* Instead of doing strict encoding, we encode all characters at
430 once, and check unencoded characters later by the fuction
431 check_unencoded_chars. */
432 converter->lenient = 1;
433 converter->last_block = 1;
434 if (mconv_encode (converter, mt) < 0
435 && ! suppress_warning)
436 fprintf (stderr, "I/O error on writing\n");
437 check_unencoded_chars (mt, converter->nchars);
439 fprintf (stderr, "%d characters encoded into %d bytes (%s).\n",
440 converter->nchars, converter->nbytes, msymbol_name (outcode));
443 mconv_free_converter (converter);
446 m17n_object_unref (mt);
450 #endif /* not FOR_DOXYGEN */