1 /* mconv.c -- Code converter. -*- coding: euc-jp; -*-
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
24 @enpage m17n-conv convert file code
26 @section m17n-conv-synopsis SYNOPSIS
28 m17n-conv [ OPTION ... ] [ INFILE [ OUTFILE ] ]
30 @section m17n-conv-description DESCRIPTION
32 Convert encoding of given files from one to another.
34 If INFILE is omitted, the input is taken from standard input. If
35 OUTFILE is omitted, the output written to standard output.
37 The following OPTIONs are available.
43 FROMCODE is the encoding of INFILE (defaults to UTF-8).
47 TOCODE is the encoding of OUTFILE (defaults to UTF-8).
51 Do not stop conversion on error.
59 Print progress information.
63 List available encodings.
76 @japage m17n-conv ¥Õ¥¡¥¤¥ë¤Î¥³¡¼¥É¤òÊÑ´¹¤¹¤ë
78 @section m17n-conv-synopsis SYNOPSIS
80 m17n-conv [ OPTION ... ] [ INFILE [ OUTFILE ] ]
82 @section m17n-conv-description ÀâÌÀ
84 Í¿¤¨¤é¤ì¤¿¥Õ¥¡¥¤¥ë¤Î¥³¡¼¥É¤òÊ̤Τâ¤Î¤ËÊÑ´¹¤¹¤ë¡£
86 INFILE ¤¬¾Êά¤µ¤ì¤¿¾ì¹ç¤Ï¡¢É¸½àÆþÎϤ«¤é¤È¤ë¡£OUTFILE ¤¬¾Êά¤µ¤ì¤¿
87 ¾ì¹ç¤Ï¡¢É¸½à½ÐÎϤؽñ¤½Ð¤¹¡£
89 °Ê²¼¤Î¥ª¥×¥·¥ç¥ó¤¬ÍøÍѤǤ¤ë¡£
95 FROMCODE ¤Ï INFILE ¤Î¥³¡¼¥É·Ï¤Ç¤¢¤ë¡£(¥Ç¥Õ¥©¥ë¥È¤Ï UTF-8)
99 TOCODE ¤Ï OUTFILE ¤Î¥³¡¼¥É·Ï¤Ç¤¢¤ë¡£(¥Ç¥Õ¥©¥ë¥È¤Ï UTF-8)
103 ¥¨¥é¡¼¤ÇÊÑ´¹¤òÄä»ß¤·¤Ê¤¤¡£
115 ÍøÍѲÄǽ¤Ê¥³¡¼¥É·Ï¤òÎóµó¤¹¤ë¡£
119 ¥Ð¡¼¥¸¥ç¥óÈÖ¹æ¤òɽ¼¨¤¹¤ë¡£
123 ¤³¤Î¥á¥Ã¥»¡¼¥¸¤òɽ¼¨¤¹¤ë¡£
135 #include <m17n-misc.h>
137 /* Print all coding system names. */
140 compare_coding_name (const void *elt1, const void *elt2)
142 const MSymbol *n1 = elt1;
143 const MSymbol *n2 = elt2;
145 return strcmp (msymbol_name (*n1), msymbol_name (*n2));
156 n = mconv_list_codings (&codings);
157 qsort (codings, n, sizeof (MSymbol), compare_coding_name);
159 for (i = 0; i < n; i++)
161 name = msymbol_name (codings[i]);
162 len = strlen (name) + 1;
168 printf (" %s", name);
176 /* Print the usage of this program (the name is PROG), and exit with
180 help_exit (char *prog, int exit_code)
188 printf ("Usage: %s [ OPTION ... ] [ INFILE [ OUTFILE ] ]\n", prog);
189 printf ("Convert encoding of given files from one to another.\n");
190 printf (" If INFILE is omitted, the input is taken from standard input.\n");
191 printf (" If OUTFILE is omitted, the output is written to standard output.\n");
192 printf ("The following OPTIONs are available.\n");
193 printf (" %-13s %s", "-f FROMCODE",
194 "FROMCODE is the encoding of INFILE (defaults to UTF-8).\n");
195 printf (" %-13s %s", "-t TOCODE",
196 "TOCODE is the encoding of OUTFILE (defaults to UTF-8).\n");
197 printf (" %-13s %s", "-k", "Do not stop conversion on error.\n");
198 printf (" %-13s %s", "-s", "Suppress warnings.\n");
199 printf (" %-13s %s", "-v", "Print progress information.\n");
200 printf (" %-13s %s", "-l", "List available encodings.\n");
201 printf (" %-13s %s", "--version", "Print version number.\n");
202 printf (" %-13s %s", "-h, --help", "Print this message.\n");
207 /* Check invalid bytes found in the last decoding. Text property
208 Mcharset of such a byte is Mcharset_binary. */
211 check_invalid_bytes (MText *mt)
213 int from = 0, to = 0;
214 int len = mtext_len (mt);
219 int n = mtext_prop_range (mt, Mcharset, from, NULL, &to, 1);
221 = n > 0 ? (MSymbol) mtext_get_prop (mt, from, Mcharset) : Mnil;
223 if (charset == Mcharset_binary)
228 "Invalid bytes (at each character position);\n");
231 for (; from < to; from++)
232 fprintf (stderr, " 0x%02X(%d)", mtext_ref_char (mt, from), from);
238 fprintf (stderr, "\n");
242 /* Check unencoded characters in the last encoding. Text property
243 Mcoding of such a character is Mnil. */
246 check_unencoded_chars (MText *mt, int len)
248 int from = 0, to = 0;
253 int n = mtext_prop_range (mt, Mcoding, from, NULL, &to, 1);
255 = n > 0 ? (MSymbol) mtext_get_prop (mt, from, Mcoding) : Mnil;
262 "Unencoded characters (at each character position):\n");
265 for (; from < to; from++)
266 fprintf (stderr, " 0x%02X(%d)", mtext_ref_char (mt, from), from);
272 fprintf (stderr, "\n");
276 /* Format MSG by FMT and print the result to the stderr, and exit. */
278 #define FATAL_ERROR(fmt, arg) \
280 fprintf (stderr, fmt, arg); \
286 main (int argc, char **argv)
288 int suppress_warning, verbose, continue_on_error;
289 MSymbol incode, outcode;
292 MConverter *converter;
295 /* Initialize the m17n library. */
297 if (merror_code != MERROR_NONE)
298 FATAL_ERROR ("%s\n", "Fail to initialize the m17n library.");
300 /* Default encodings are both UTF-8. */
301 incode = outcode = Mcoding_utf_8;
302 /* By default, read from standard input and write to standard output. */
303 in = stdin, out = stdout;
304 /* By default, all these flags are 0. */
305 suppress_warning = verbose = continue_on_error = 0;
306 /* Parse the command line arguments. */
307 for (i = 1; i < argc; i++)
309 if (! strcmp (argv[i], "--help")
310 || ! strcmp (argv[i], "-h")
311 || ! strcmp (argv[i], "-?"))
312 help_exit (argv[0], 0);
313 else if (! strcmp (argv[i], "--version"))
315 printf ("m17n-conv (m17n library) %s\n", M17NLIB_VERSION_NAME);
316 printf ("Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 AIST, JAPAN\n");
319 else if (! strcmp (argv[i], "-l"))
325 else if (! strcmp (argv[i], "-f"))
327 incode = mconv_resolve_coding (msymbol (argv[++i]));
329 FATAL_ERROR ("Unknown encoding: %s\n", argv[i]);
331 else if (! strcmp (argv[i], "-t"))
333 outcode = mconv_resolve_coding (msymbol (argv[++i]));
335 FATAL_ERROR ("Unknown encoding: %s\n", argv[i]);
337 else if (! strcmp (argv[i], "-k"))
338 continue_on_error = 1;
339 else if (! strcmp (argv[i], "-s"))
340 suppress_warning = 1;
341 else if (! strcmp (argv[i], "-v"))
343 else if (argv[i][0] != '-')
347 in = fopen (argv[i], "r");
349 FATAL_ERROR ("Can't read the file %s\n", argv[i]);
351 else if (out == stdout)
353 out = fopen (argv[i], "w");
355 FATAL_ERROR ("Can't write the file %s\n", argv[i]);
358 help_exit (argv[0], 1);
361 help_exit (argv[0], 1);
364 /* Create an M-text to store the decoded characters. */
367 /* Create a converter for decoding. */
368 converter = mconv_stream_converter (incode, in);
370 FATAL_ERROR ("Encoding \"%s\" requires the missing library \"m17n-db\".\n",
371 msymbol_name (incode));
372 /* Instead of doing strict decoding, we decode all input bytes at
373 once, and check invalid bytes later by the fuction
374 check_invalid_bytes. */
375 converter->lenient = 1;
377 mconv_decode (converter, mt);
379 if (! suppress_warning)
380 check_invalid_bytes (mt);
382 fprintf (stderr, "%d bytes (%s) decoded into %d characters,\n",
383 converter->nbytes, msymbol_name (incode), mtext_len (mt));
385 mconv_free_converter (converter);
387 /* Create a converter for encoding. */
388 converter = mconv_stream_converter (outcode, out);
390 FATAL_ERROR ("Encoding \"%s\" requires the missing library \"m17n-db\".\n",
391 msymbol_name (outcode));
392 /* Instead of doing strict encoding, we encode all characters at
393 once, and check unencoded characters later by the fuction
394 check_unencoded_chars. */
395 converter->lenient = 1;
396 converter->last_block = 1;
397 if (mconv_encode (converter, mt) < 0
398 && ! suppress_warning)
399 fprintf (stderr, "I/O error on writing\n");
400 if (! suppress_warning)
401 check_unencoded_chars (mt, converter->nchars);
403 fprintf (stderr, "%d characters encoded into %d bytes (%s).\n",
404 converter->nchars, converter->nbytes, msymbol_name (outcode));
407 mconv_free_converter (converter);
410 m17n_object_unref (mt);
414 #endif /* not FOR_DOXYGEN */