1 /* coding.c -- code conversion module.
2 Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009, 2010
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25 @brief Coding system objects and API for them.
27 The m17n library represents a character encoding scheme (CES) of
28 coded character sets (CCS) as an object called @e coding @e
29 system. Application programs can add original coding systems.
31 To @e encode means converting code-points to character codes and
32 to @e decode means converting character codes back to code-points.
34 Application programs can decode a byte sequence with a specified
35 coding system into an M-text, and inversely, can encode an M-text
36 into a byte sequence. */
40 @brief ¥³¡¼¥É·Ï¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API.
42 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢Éä¹æ²½Ê¸»ú½¸¹ç (coded character set; CCS)
43 ¤Îʸ»úÉä¹ç²½Êý¼° (character encoding scheme; CES) ¤ò @e ¥³¡¼¥É·Ï
44 ¤È¸Æ¤Ö¥ª¥Ö¥¸¥§¥¯¥È¤Çɽ¸½¤¹¤ë¡£
45 ¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥à¤ÏÆȼ«¤Ë¥³¡¼¥É·Ï¤òÄɲ乤뤳¤È¤â¤Ç¤¤ë¡£
47 ¥³¡¼¥É¥Ý¥¤¥ó¥È¤«¤éʸ»ú¥³¡¼¥É¤Ø¤ÎÊÑ´¹¤ò @e ¥¨¥ó¥³¡¼¥É
48 ¤È¸Æ¤Ó¡¢Ê¸»ú¥³¡¼¥É¤«¤é¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ø¤ÎÊÑ´¹¤ò @e ¥Ç¥³¡¼¥É ¤È¸Æ¤Ö¡£
50 ¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥à¤Ï¡¢»ØÄꤵ¤ì¤¿¥³¡¼¥É·Ï¤Ç¥Ð¥¤¥ÈÎó¤ò¥Ç¥³¡¼¥É¤¹¤ë¤³¤È¤Ë¤è¤Ã¤Æ
51 M-text ¤òÆÀ¤ë¤³¤È¤¬¤Ç¤¤ë¡£¤Þ¤¿µÕ¤Ë¡¢»ØÄꤵ¤ì¤¿¥³¡¼¥É·Ï¤Ç M-text
52 ¤ò¥¨¥ó¥³¡¼¥É¤·¤¹¤ë¤³¤È¤Ë¤è¤Ã¤Æ¥Ð¥¤¥ÈÎó¤òÆÀ¤ë¤³¤È¤¬¤Ç¤¤ë¡£ */
56 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
57 /*** @addtogroup m17nInternal
65 #include <sys/types.h>
70 #include "m17n-misc.h"
73 #include "character.h"
80 #define NUM_SUPPORTED_CHARSETS 32
82 /** Structure for coding system object. */
86 /** Name of the coding system. */
89 /** Type of the coding system. */
92 /* Number of supported charsets. */
95 /** Array of supported charsets. */
96 MCharset *charsets[NUM_SUPPORTED_CHARSETS];
98 /** If non-NULL, function to call at the time of creating and
99 reseting a converter. */
100 int (*resetter) (MConverter *converter);
102 int (*decoder) (const unsigned char *str, int str_bytes, MText *mt,
103 MConverter *converter);
105 int (*encoder) (MText *mt, int from, int to,
106 unsigned char *str, int str_bytes,
107 MConverter *converter);
109 /** If non-zero, the coding system decode/encode ASCII characters as
111 int ascii_compatible;
113 /** Pointer to extra information given when the coding system is
114 defined. The meaning depends on <type>. */
117 /** Pointer to information referred on conversion. The meaning
118 depends on <type>. The value NULL means that the coding system
128 MCodingSystem **codings;
131 static struct MCodingList coding_list;
133 static MPlist *coding_definition_list;
137 Pointer to a structure of a coding system. */
139 ¥³¡¼¥É·Ï¤òɽ¤ï¤¹¥Ç¡¼¥¿¹½Â¤¤Ø¤Î¥Ý¥¤¥ó¥¿ */
140 MCodingSystem *coding;
143 Buffer for carryover bytes generated while decoding. */
145 ¥Ç¥³¡¼¥ÉÃæ¤Î¥¥ã¥ê¥£¥ª¡¼¥Ð¡¼¥Ð¥¤¥ÈÍѥХåե¡ */
146 unsigned char carryover[256];
149 Number of carryover bytes. */
151 ¥¥ã¥ê¥£¥ª¡¼¥Ð¡¼¥Ð¥¤¥È¿ô */
155 Beginning of the byte sequence bound to this converter. */
157 ¤³¤Î¥³¥ó¥Ð¡¼¥¿¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥Ð¥¤¥ÈÎó¤ÎÀèƬ°ÌÃÖ */
159 const unsigned char *in;
170 Number of bytes already consumed in buf. */
172 buf Æâ¤Ç¤¹¤Ç¤Ë¾ÃÈñ¤µ¤ì¤¿¥Ð¥¤¥È¿ô */
176 Stream bound to this converter. */
178 ¤³¤Î¥³¥ó¥Ð¡¼¥¿¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥¹¥È¥ê¡¼¥à */
182 Which of above two is in use. */
184 ¾åµ2¼Ô¤Î¤¤¤º¤ì¤¬»È¤ï¤ì¤Æ¤¤¤ë¤« */
204 /* Local macros and functions. */
206 /** At first, set SRC_BASE to SRC. Then check if we have already
207 produced AT_MOST chars. If so, set SRC_END to SRC, and jump to
208 source_end. Otherwise, get one more byte C from SRC. In that
209 case, if SRC == SRC_END, jump to the label source_end. */
211 #define ONE_MORE_BASE_BYTE(c) \
214 if (nchars == at_most) \
219 if (src == src_stop) \
221 if (src == src_end) \
223 src_base = src = source; \
224 if (src == src_end) \
226 src_stop = src_end; \
232 /** Get one more byte C from SRC. If SRC == SRC_END, jump to the
235 #define ONE_MORE_BYTE(c) \
237 if (src == src_stop) \
239 if (src == src_end) \
242 if (src == src_end) \
244 src_stop = src_end; \
250 #define REWIND_SRC_TO_BASE() \
252 if (src_base < source || src_base >= src_end) \
253 src_stop = internal->carryover + internal->carryover_bytes; \
258 /** Push back byte C to SRC. */
260 #define UNGET_ONE_BYTE(c) \
266 internal->carryover[0] = c; \
267 internal->carryover_bytes = 1; \
268 src = internal->carryover; \
269 src_stop = src + 1; \
274 /** Store multibyte representation of character C at DST and increment
275 DST to the next of the produced bytes. DST must be a pointer to
276 data area of M-text MT. If the produced bytes are going to exceed
277 DST_END, enlarge the data area of MT. */
279 #define EMIT_CHAR(c) \
281 int bytes = CHAR_BYTES (c); \
284 if (dst + bytes + 1 > dst_end) \
286 len = dst - mt->data; \
287 bytes = mt->allocated + bytes + (src_stop - src); \
288 mtext__enlarge (mt, bytes); \
289 dst = mt->data + len; \
290 dst_end = mt->data + mt->allocated; \
292 dst += CHAR_STRING (c, dst); \
297 /* Check if there is enough room to produce LEN bytes at DST. If not,
298 go to the label insufficient_destination. */
300 #define CHECK_DST(len) \
302 if (dst + (len) > dst_end) \
303 goto insufficient_destination; \
307 /** Take NUM_CHARS characters (NUM_BYTES bytes) already stored at
308 (MT->data + MT->nbytes) into MT, and put charset property on
309 them with CHARSET->name. */
311 #define TAKEIN_CHARS(mt, num_chars, num_bytes, charset) \
313 int chars = (num_chars); \
317 mtext__takein ((mt), chars, (num_bytes)); \
319 mtext_put_prop ((mt), (mt)->nchars - chars, (mt)->nchars, \
320 Mcharset, (void *) ((charset)->name)); \
325 #define SET_SRC(mt, format, from, to) \
327 if (format <= MTEXT_FORMAT_UTF_8) \
329 src = mt->data + POS_CHAR_TO_BYTE (mt, from); \
330 src_end = mt->data + POS_CHAR_TO_BYTE (mt, to); \
332 else if (format <= MTEXT_FORMAT_UTF_16BE) \
335 = mt->data + (sizeof (short)) * POS_CHAR_TO_BYTE (mt, from); \
337 = mt->data + (sizeof (short)) * POS_CHAR_TO_BYTE (mt, to); \
341 src = mt->data + (sizeof (int)) * from; \
342 src_end = mt->data + (sizeof (int)) * to; \
347 #define ONE_MORE_CHAR(c, bytes, format) \
349 if (src == src_end) \
351 if (format <= MTEXT_FORMAT_UTF_8) \
352 c = STRING_CHAR_AND_BYTES (src, bytes); \
353 else if (format <= MTEXT_FORMAT_UTF_16BE) \
355 c = mtext_ref_char (mt, from++); \
356 bytes = (sizeof (short)) * CHAR_UNITS_UTF16 (c); \
360 c = ((unsigned *) (mt->data))[from++]; \
361 bytes = sizeof (int); \
367 encode_unsupporeted_char (int c, unsigned char *dst, unsigned char *dst_end,
373 len = c < 0x10000 ? 8 : 10;
374 if (dst + len > dst_end)
377 mtext_put_prop (mt, pos, pos + 1, Mcoding, Mnil);
378 format = (c < 0xD800 ? "<U+%04X>"
379 : c < 0xE000 ? "<M+%04X>"
380 : c < 0x10000 ? "<U+%04X>"
381 : c < 0x110000 ? "<U+%06X>"
383 sprintf ((char *) dst, format, c);
389 /** Finish decoding of bytes at SOURCE (ending at SRC_END) into NCHARS
390 characters by CONVERTER into M-text MT. SRC is a pointer to the
391 not-yet processed bytes. ERROR is 1 iff an invalid byte was
395 finish_decoding (MText *mt, MConverter *converter, int nchars,
396 const unsigned char *source, const unsigned char *src_end,
397 const unsigned char *src,
400 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
403 internal->carryover_bytes = 0;
405 || (converter->last_block
406 && ! converter->lenient))
407 converter->result = MCONVERSION_RESULT_INVALID_BYTE;
408 else if (! converter->last_block)
410 unsigned char *dst = internal->carryover;
412 if (src < source || src > src_end)
414 dst += internal->carryover_bytes;
417 while (src < src_end)
419 internal->carryover_bytes = dst - internal->carryover;
420 converter->result = MCONVERSION_RESULT_INSUFFICIENT_SRC;
424 unsigned char *dst = mt->data + mt->nbytes;
425 unsigned char *dst_end = mt->data + mt->allocated;
426 const unsigned char *src_stop = src_end;
428 int last_nchars = nchars;
430 if (src < source || src > src_end)
431 src_stop = internal->carryover + internal->carryover_bytes;
434 if (converter->at_most && nchars == converter->at_most)
448 TAKEIN_CHARS (mt, nchars - last_nchars, dst - (mt->data + mt->nbytes),
450 internal->carryover_bytes = 0;
453 converter->nchars += nchars;
454 converter->nbytes += ((src < source || src > src_end) ? 0 : src - source);
455 return (converter->result == MCONVERSION_RESULT_INVALID_BYTE ? -1 : 0);
460 /* Staffs for coding-systems of type MCODING_TYPE_CHARSET. */
463 setup_coding_charset (MCodingSystem *coding)
465 int ncharsets = coding->ncharsets;
466 unsigned *code_charset_table;
470 /* At first, reorder charset list by dimensions (a charset of
471 smaller dimension comes first). As the number of charsets is
472 usually very small (at most 32), we do a simple sort. */
477 MTABLE_ALLOCA (charsets, NUM_SUPPORTED_CHARSETS, MERROR_CODING);
478 memcpy (charsets, coding->charsets,
479 sizeof (MCharset *) * NUM_SUPPORTED_CHARSETS);
480 for (i = 0; i < 4; i++)
481 for (j = 0; j < ncharsets; j++)
482 if (charsets[j]->dimension == i)
483 coding->charsets[idx++] = charsets[j];
486 MTABLE_CALLOC (code_charset_table, 256, MERROR_CODING);
489 int dim = coding->charsets[ncharsets]->dimension;
490 int from = coding->charsets[ncharsets]->code_range[(dim - 1) * 4];
491 int to = coding->charsets[ncharsets]->code_range[(dim - 1) * 4 + 1];
493 if (coding->charsets[ncharsets]->ascii_compatible)
494 coding->ascii_compatible = 1;
496 code_charset_table[from++] |= 1 << ncharsets;
499 coding->extra_spec = (void *) code_charset_table;
504 reset_coding_charset (MConverter *converter)
506 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
507 MCodingSystem *coding = internal->coding;
510 && setup_coding_charset (coding) < 0)
517 decode_coding_charset (const unsigned char *source, int src_bytes, MText *mt,
518 MConverter *converter)
520 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
521 MCodingSystem *coding = internal->coding;
522 const unsigned char *src = internal->carryover;
523 const unsigned char *src_stop = src + internal->carryover_bytes;
524 const unsigned char *src_end = source + src_bytes;
525 const unsigned char *src_base;
526 unsigned char *dst = mt->data + mt->nbytes;
527 unsigned char *dst_end = mt->data + mt->allocated;
530 int at_most = converter->at_most > 0 ? converter->at_most : -1;
532 unsigned *code_charset_table = (unsigned *) coding->extra_spec;
533 MCharset **charsets = coding->charsets;
534 MCharset *charset = mcharset__ascii;
539 MCharset *this_charset = NULL;
543 ONE_MORE_BASE_BYTE (c);
544 mask = code_charset_table[c];
554 while (! (mask & 1)) mask >>= 1, idx++;
555 this_charset = charsets[idx];
556 dim = this_charset->dimension;
560 code = (code << 8) | c;
563 c = DECODE_CHAR (this_charset, code);
570 if (! converter->lenient)
572 REWIND_SRC_TO_BASE ();
574 this_charset = mcharset__binary;
577 if (this_charset != mcharset__ascii
578 && this_charset != charset)
580 TAKEIN_CHARS (mt, nchars - last_nchars,
581 dst - (mt->data + mt->nbytes), charset);
582 charset = this_charset;
583 last_nchars = nchars;
587 /* We reach here because of an invalid byte. */
591 TAKEIN_CHARS (mt, nchars - last_nchars,
592 dst - (mt->data + mt->nbytes), charset);
593 return finish_decoding (mt, converter, nchars,
594 source, src_end, src_base, error);
598 encode_coding_charset (MText *mt, int from, int to,
599 unsigned char *destination, int dst_bytes,
600 MConverter *converter)
602 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
603 MCodingSystem *coding = internal->coding;
604 unsigned char *src, *src_end;
605 unsigned char *dst = destination;
606 unsigned char *dst_end = dst + dst_bytes;
608 int ncharsets = coding->ncharsets;
609 MCharset **charsets = coding->charsets;
610 int ascii_compatible = coding->ascii_compatible;
611 enum MTextFormat format = mt->format;
613 SET_SRC (mt, format, from, to);
618 ONE_MORE_CHAR (c, bytes, format);
620 if (c < 0x80 && ascii_compatible)
628 MCharset *charset = NULL;
633 charset = charsets[i];
634 code = ENCODE_CHAR (charset, c);
635 if (code != MCHAR_INVALID_CODE)
637 if (++i == ncharsets)
638 goto unsupported_char;
641 CHECK_DST (charset->dimension);
642 if (charset->dimension == 1)
646 else if (charset->dimension == 2)
649 *dst++ = code & 0xFF;
651 else if (charset->dimension == 3)
654 *dst++ = (code >> 8) & 0xFF;
655 *dst++ = code & 0xFF;
660 *dst++ = (code >> 16) & 0xFF;
661 *dst++ = (code >> 8) & 0xFF;
662 *dst++ = code & 0xFF;
673 if (! converter->lenient)
675 len = encode_unsupporeted_char (c, dst, dst_end, mt, from + nchars);
677 goto insufficient_destination;
683 /* We reach here because of an unsupported char. */
684 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
687 insufficient_destination:
688 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
691 converter->nchars += nchars;
692 converter->nbytes += dst - destination;
693 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
697 /* Staffs for coding-systems of type MCODING_TYPE_UTF (8). */
699 #define UTF8_CHARSET(p) \
700 (! ((p)[0] & 0x80) ? (mcharset__unicode) \
701 : CHAR_HEAD_P ((p) + 1) ? (mcharset__binary) \
702 : ! ((p)[0] & 0x20) ? (mcharset__unicode) \
703 : CHAR_HEAD_P ((p) + 2) ? (mcharset__binary) \
704 : ! ((p)[0] & 0x10) ? (mcharset__unicode) \
705 : CHAR_HEAD_P ((p) + 3) ? (mcharset__binary) \
706 : ! ((p)[0] & 0x08) ? ((((((p)[0] & 0x07) << 2) \
707 & (((p)[1] & 0x30) >> 4)) <= 0x10) \
708 ? (mcharset__unicode) \
709 : (mcharset__m17n)) \
710 : CHAR_HEAD_P ((p) + 4) ? (mcharset__binary) \
711 : ! ((p)[0] & 0x04) ? (mcharset__m17n) \
712 : CHAR_HEAD_P ((p) + 5) ? (mcharset__binary) \
713 : ! ((p)[0] & 0x02) ? (mcharset__m17n) \
714 : (mcharset__binary))
718 decode_coding_utf_8 (const unsigned char *source, int src_bytes, MText *mt,
719 MConverter *converter)
721 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
722 MCodingSystem *coding = internal->coding;
723 const unsigned char *src = internal->carryover;
724 const unsigned char *src_stop = src + internal->carryover_bytes;
725 const unsigned char *src_end = source + src_bytes;
726 const unsigned char *src_base;
727 unsigned char *dst = mt->data + mt->nbytes;
728 unsigned char *dst_end = mt->data + mt->allocated;
731 int at_most = converter->at_most > 0 ? converter->at_most : -1;
733 int full = converter->lenient || (coding->charsets[0] == mcharset__m17n);
734 MCharset *charset = NULL;
739 MCharset *this_charset = NULL;
741 ONE_MORE_BASE_BYTE (c);
745 else if (!(c & 0x40))
747 else if (!(c & 0x20))
748 bytes = 2, c &= 0x1F;
749 else if (!(c & 0x10))
750 bytes = 3, c &= 0x0F;
751 else if (!(c & 0x08))
752 bytes = 4, c &= 0x07;
753 else if (!(c & 0x04))
754 bytes = 5, c &= 0x03;
755 else if (!(c & 0x02))
756 bytes = 6, c &= 0x01;
763 if ((c1 & 0xC0) != 0x80)
765 c = (c << 6) | (c1 & 0x3F);
769 || c < 0xD800 || (c >= 0xE000 && c < 0x110000))
773 if (! converter->lenient)
775 REWIND_SRC_TO_BASE ();
777 this_charset = mcharset__binary;
780 if (this_charset != charset)
782 TAKEIN_CHARS (mt, nchars - last_nchars,
783 dst - (mt->data + mt->nbytes), charset);
784 charset = this_charset;
785 last_nchars = nchars;
789 /* We reach here because of an invalid byte. */
793 TAKEIN_CHARS (mt, nchars - last_nchars,
794 dst - (mt->data + mt->nbytes), charset);
795 return finish_decoding (mt, converter, nchars,
796 source, src_end, src_base, error);
800 encode_coding_utf_8 (MText *mt, int from, int to,
801 unsigned char *destination, int dst_bytes,
802 MConverter *converter)
804 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
805 MCodingSystem *coding = internal->coding;
806 unsigned char *src, *src_end;
807 unsigned char *dst = destination;
808 unsigned char *dst_end = dst + dst_bytes;
810 enum MTextFormat format = mt->format;
812 SET_SRC (mt, format, from, to);
814 if (format <= MTEXT_FORMAT_UTF_8
815 && (converter->lenient
816 || coding->charsets[0] == mcharset__m17n))
818 if (dst_bytes < src_end - src)
820 int byte_pos = (src + dst_bytes) - mt->data;
822 to = POS_BYTE_TO_CHAR (mt, byte_pos);
823 byte_pos = POS_CHAR_TO_BYTE (mt, to);
824 src_end = mt->data + byte_pos;
825 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
827 memcpy (destination, src, src_end - src);
829 dst += src_end - src;
837 ONE_MORE_CHAR (c, bytes, format);
839 if ((c >= 0xD800 && c < 0xE000) || c >= 0x110000)
842 dst += CHAR_STRING (c, dst);
846 /* We reach here because of an unsupported char. */
847 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
850 insufficient_destination:
851 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
854 converter->nchars += nchars;
855 converter->nbytes += dst - destination;
856 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
860 /* Staffs for coding-systems of type MCODING_TYPE_UTF (16 & 32). */
881 enum utf_endian endian;
885 setup_coding_utf (MCodingSystem *coding)
887 MCodingInfoUTF *info = (MCodingInfoUTF *) (coding->extra_info);
888 MCodingInfoUTF *spec;
890 if (info->code_unit_bits == 8)
891 coding->ascii_compatible = 1;
892 else if (info->code_unit_bits == 16
893 || info->code_unit_bits == 32)
895 if (info->bom < 0 || info->bom > 2
896 || info->endian < 0 || info->endian > 1)
897 MERROR (MERROR_CODING, -1);
902 MSTRUCT_CALLOC (spec, MERROR_CODING);
904 coding->extra_spec = (void *) (spec);
909 reset_coding_utf (MConverter *converter)
911 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
912 MCodingSystem *coding = internal->coding;
913 struct utf_status *status = (struct utf_status *) &(converter->status);
916 && setup_coding_utf (coding) < 0)
920 status->surrogate = 0;
921 status->bom = ((MCodingInfoUTF *) (coding->extra_spec))->bom;
922 status->endian = ((MCodingInfoUTF *) (coding->extra_spec))->endian;
927 decode_coding_utf_16 (const unsigned char *source, int src_bytes, MText *mt,
928 MConverter *converter)
930 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
931 const unsigned char *src = internal->carryover;
932 const unsigned char *src_stop = src + internal->carryover_bytes;
933 const unsigned char *src_end = source + src_bytes;
934 const unsigned char *src_base;
935 unsigned char *dst = mt->data + mt->nbytes;
936 unsigned char *dst_end = mt->data + mt->allocated;
939 int at_most = converter->at_most > 0 ? converter->at_most : -1;
940 struct utf_status *status = (struct utf_status *) &(converter->status);
941 unsigned char b1, b2;
942 MCharset *charset = NULL;
945 if (status->bom != UTF_BOM_NO)
949 ONE_MORE_BASE_BYTE (b1);
953 status->endian = UTF_BIG_ENDIAN;
954 else if (c == 0xFFFE)
955 status->endian = UTF_LITTLE_ENDIAN;
956 else if (status->bom == UTF_BOM_MAYBE
957 || converter->lenient)
959 status->endian = UTF_BIG_ENDIAN;
960 REWIND_SRC_TO_BASE ();
967 status->bom = UTF_BOM_NO;
973 MCharset *this_charset = NULL;
975 ONE_MORE_BASE_BYTE (b1);
977 if (status->endian == UTF_BIG_ENDIAN)
978 c = ((b1 << 8) | b2);
980 c = ((b2 << 8) | b1);
981 if (c < 0xD800 || c >= 0xE000)
987 if (status->endian == UTF_BIG_ENDIAN)
988 c1 = ((b1 << 8) | b2);
990 c1 = ((b2 << 8) | b1);
991 if (c1 < 0xDC00 || c1 >= 0xE000)
993 c = 0x10000 + ((c - 0xD800) << 10) + (c1 - 0xDC00);
998 if (! converter->lenient)
1000 REWIND_SRC_TO_BASE ();
1003 if (status->endian == UTF_BIG_ENDIAN)
1004 c = ((b1 << 8) | b2);
1006 c = ((b2 << 8) | b1);
1007 this_charset = mcharset__binary;
1010 if (this_charset != charset)
1012 TAKEIN_CHARS (mt, nchars - last_nchars,
1013 dst - (mt->data + mt->nbytes), charset);
1014 charset = this_charset;
1015 last_nchars = nchars;
1019 /* We reach here because of an invalid byte. */
1023 TAKEIN_CHARS (mt, nchars - last_nchars,
1024 dst - (mt->data + mt->nbytes), charset);
1025 return finish_decoding (mt, converter, nchars,
1026 source, src_end, src_base, error);
1031 decode_coding_utf_32 (const unsigned char *source, int src_bytes, MText *mt,
1032 MConverter *converter)
1034 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
1035 const unsigned char *src = internal->carryover;
1036 const unsigned char *src_stop = src + internal->carryover_bytes;
1037 const unsigned char *src_end = source + src_bytes;
1038 const unsigned char *src_base;
1039 unsigned char *dst = mt->data + mt->nbytes;
1040 unsigned char *dst_end = mt->data + mt->allocated;
1042 int last_nchars = 0;
1043 int at_most = converter->at_most > 0 ? converter->at_most : -1;
1044 struct utf_status *status = (struct utf_status *) &(converter->status);
1045 unsigned char b1, b2, b3, b4;
1046 MCharset *charset = NULL;
1049 if (status->bom != UTF_BOM_NO)
1053 ONE_MORE_BASE_BYTE (b1);
1057 c = (b1 << 24) | (b2 << 16) | (b3 << 8) | b4;
1058 if (c == 0x0000FEFF)
1059 status->endian = UTF_BIG_ENDIAN;
1060 else if (c == 0xFFFE0000)
1061 status->endian = UTF_LITTLE_ENDIAN;
1062 else if (status->bom == UTF_BOM_MAYBE
1063 || converter->lenient)
1065 status->endian = UTF_BIG_ENDIAN;
1066 REWIND_SRC_TO_BASE ();
1073 status->bom = UTF_BOM_NO;
1079 MCharset *this_charset = NULL;
1081 ONE_MORE_BASE_BYTE (b1);
1085 if (status->endian == UTF_BIG_ENDIAN)
1086 c = (b1 << 24) | (b2 << 16) | (b3 << 8) | b4;
1088 c = (b4 << 24) | (b3 << 16) | (b2 << 8) | b1;
1089 if (c < 0xD800 || (c >= 0xE000 && c < 0x110000))
1092 if (! converter->lenient)
1094 REWIND_SRC_TO_BASE ();
1096 this_charset = mcharset__binary;
1099 if (this_charset != charset)
1101 TAKEIN_CHARS (mt, nchars - last_nchars,
1102 dst - (mt->data + mt->nbytes), charset);
1103 charset = this_charset;
1104 last_nchars = nchars;
1108 /* We reach here because of an invalid byte. */
1112 TAKEIN_CHARS (mt, nchars - last_nchars,
1113 dst - (mt->data + mt->nbytes), charset);
1114 return finish_decoding (mt, converter, nchars,
1115 source, src_end, src_base, error);
1120 encode_coding_utf_16 (MText *mt, int from, int to,
1121 unsigned char *destination, int dst_bytes,
1122 MConverter *converter)
1124 unsigned char *src, *src_end;
1125 unsigned char *dst = destination;
1126 unsigned char *dst_end = dst + dst_bytes;
1128 struct utf_status *status = (struct utf_status *) &(converter->status);
1129 int big_endian = status->endian == UTF_BIG_ENDIAN;
1130 enum MTextFormat format = mt->format;
1132 SET_SRC (mt, format, from, to);
1134 if (status->bom != UTF_BOM_NO)
1138 *dst++ = 0xFE, *dst++ = 0xFF;
1140 *dst++ = 0xFF, *dst++ = 0xFE;
1141 status->bom = UTF_BOM_NO;
1148 ONE_MORE_CHAR (c, bytes, format);
1150 if (c < 0xD800 || (c >= 0xE000 && c < 0x10000))
1154 *dst++ = c >> 8, *dst++ = c & 0xFF;
1156 *dst++ = c & 0xFF, *dst++ = c >> 8;
1158 else if (c >= 0x10000 && c < 0x110000)
1164 c1 = (c >> 10) + 0xD800;
1165 c2 = (c & 0x3FF) + 0xDC00;
1167 *dst++ = c1 >> 8, *dst++ = c1 & 0xFF,
1168 *dst++ = c2 >> 8, *dst++ = c2 & 0xFF;
1170 *dst++ = c1 & 0xFF, *dst++ = c1 >> 8,
1171 *dst++ = c2 & 0xFF, *dst++ = c2 >> 8;
1175 unsigned char buf[11];
1178 if (! converter->lenient)
1180 len = encode_unsupporeted_char (c, buf, buf + (dst_end - dst),
1183 goto insufficient_destination;
1185 for (i = 0; i < len; i++)
1186 *dst++ = 0, *dst++ = buf[i];
1188 for (i = 0; i < len; i++)
1189 *dst++ = buf[i], *dst++ = 0;
1194 /* We reach here because of an unsupported char. */
1195 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
1198 insufficient_destination:
1199 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
1202 converter->nchars += nchars;
1203 converter->nbytes += dst - destination;
1204 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
1208 encode_coding_utf_32 (MText *mt, int from, int to,
1209 unsigned char *destination, int dst_bytes,
1210 MConverter *converter)
1212 unsigned char *src, *src_end;
1213 unsigned char *dst = destination;
1214 unsigned char *dst_end = dst + dst_bytes;
1216 struct utf_status *status = (struct utf_status *) &(converter->status);
1217 int big_endian = status->endian == UTF_BIG_ENDIAN;
1218 enum MTextFormat format = mt->format;
1220 SET_SRC (mt, format, from, to);
1222 if (status->bom != UTF_BOM_NO)
1226 *dst++ = 0x00, *dst++ = 0x00, *dst++ = 0xFE, *dst++ = 0xFF;
1228 *dst++ = 0xFF, *dst++ = 0xFE, *dst++ = 0x00, *dst++ = 0x00;
1229 status->bom = UTF_BOM_NO;
1236 ONE_MORE_CHAR (c, bytes, format);
1238 if (c < 0xD800 || (c >= 0xE000 && c < 0x110000))
1242 *dst++ = 0x00, *dst++ = c >> 16,
1243 *dst++ = (c >> 8) & 0xFF, *dst++ = c & 0xFF;
1245 *dst++ = c & 0xFF, *dst++ = (c >> 8) & 0xFF,
1246 *dst++ = c >> 16, *dst++ = 0x00;
1250 unsigned char buf[11];
1253 if (! converter->lenient)
1255 len = encode_unsupporeted_char (c, buf, buf + (dst_end - dst),
1258 goto insufficient_destination;
1260 for (i = 0; i < len; i++)
1261 *dst++ = 0, *dst++ = buf[i];
1263 for (i = 0; i < len; i++)
1264 *dst++ = buf[i], *dst++ = 0;
1269 /* We reach here because of an unsupported char. */
1270 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
1273 insufficient_destination:
1274 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
1277 converter->nchars += nchars;
1278 converter->nbytes += dst - destination;
1279 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
1283 /* Staffs for coding-systems of type MCODING_TYPE_ISO_2022. */
1285 #define ISO_CODE_STX 0x02 /* start text */
1286 #define ISO_CODE_SO 0x0E /* shift-out */
1287 #define ISO_CODE_SI 0x0F /* shift-in */
1288 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
1289 #define ISO_CODE_ESC 0x1B /* escape */
1290 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
1291 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
1293 /** Structure pointed by MCodingSystem.extra_spec. */
1295 struct iso_2022_spec
1299 /** Initial graphic registers (0..3) invoked to each graphic
1300 plane left and right. */
1301 int initial_invocation[2];
1303 /** Initially designated charsets for each graphic register. */
1304 MCharset *initial_designation[4];
1312 struct iso_2022_status
1315 MCharset *designation[4];
1316 unsigned single_shifting : 1;
1319 unsigned utf8_shifting : 1;
1320 MCharset *non_standard_charset;
1321 int non_standard_charset_bytes;
1322 int non_standard_encoding;
1325 enum iso_2022_code_class {
1326 ISO_control_0, /* Control codes in the range
1327 0x00..0x1F and 0x7F, except for the
1328 following 4 codes. */
1329 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
1330 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
1331 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
1332 ISO_escape, /* ISO_CODE_SO (0x1B) */
1333 ISO_control_1, /* Control codes in the range
1334 0x80..0x9F, except for the
1335 following 3 codes. */
1336 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
1337 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
1338 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
1339 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
1340 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
1341 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
1342 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
1343 } iso_2022_code_class[256];
1346 #define MCODING_ISO_DESIGNATION_MASK \
1347 (MCODING_ISO_DESIGNATION_G0 \
1348 | MCODING_ISO_DESIGNATION_G1 \
1349 | MCODING_ISO_DESIGNATION_CTEXT \
1350 | MCODING_ISO_DESIGNATION_CTEXT_EXT)
1353 setup_coding_iso_2022 (MCodingSystem *coding)
1355 MCodingInfoISO2022 *info = (MCodingInfoISO2022 *) (coding->extra_info);
1356 int ncharsets = coding->ncharsets;
1357 struct iso_2022_spec *spec;
1358 int designation_policy = info->flags & MCODING_ISO_DESIGNATION_MASK;
1361 coding->ascii_compatible = 0;
1363 MSTRUCT_CALLOC (spec, MERROR_CODING);
1365 spec->flags = info->flags;
1366 spec->initial_invocation[0] = info->initial_invocation[0];
1367 spec->initial_invocation[1] = info->initial_invocation[1];
1368 for (i = 0; i < 4; i++)
1369 spec->initial_designation[i] = NULL;
1370 if (designation_policy)
1372 spec->n_designations = ncharsets;
1373 if (spec->flags & MCODING_ISO_FULL_SUPPORT)
1374 spec->n_designations += mcharset__iso_2022_table.used;
1375 MTABLE_CALLOC (spec->designations, spec->n_designations, MERROR_CODING);
1376 for (i = 0; i < spec->n_designations; i++)
1377 spec->designations[i] = -1;
1381 if (spec->flags & MCODING_ISO_FULL_SUPPORT)
1382 MERROR (MERROR_CODING, -1);
1383 spec->designations = NULL;
1386 for (i = 0; i < ncharsets; i++)
1388 int reg = info->designations[i];
1391 && coding->charsets[i]->final_byte > 0
1392 && (reg < -4 || reg > 3))
1393 MERROR (MERROR_CODING, -1);
1396 if (spec->initial_designation[reg])
1397 MERROR (MERROR_CODING, -1);
1398 spec->initial_designation[reg] = coding->charsets[i];
1402 if (! designation_policy
1403 && ! (spec->flags & MCODING_ISO_EUC_TW_SHIFT))
1404 MERROR (MERROR_CODING, -1);
1408 if (designation_policy)
1409 spec->designations[i] = reg;
1410 if (coding->charsets[i] == mcharset__ascii)
1411 coding->ascii_compatible = 1;
1414 if (coding->ascii_compatible
1415 && (spec->flags & (MCODING_ISO_DESIGNATION_G0
1416 | MCODING_ISO_DESIGNATION_CTEXT
1417 | MCODING_ISO_DESIGNATION_CTEXT_EXT
1418 | MCODING_ISO_LOCKING_SHIFT)))
1419 coding->ascii_compatible = 0;
1421 if (spec->flags & MCODING_ISO_FULL_SUPPORT)
1422 for (i = 0; i < mcharset__iso_2022_table.used; i++)
1424 MCharset *charset = mcharset__iso_2022_table.charsets[i];
1426 spec->designations[ncharsets + i]
1427 = ((designation_policy == MCODING_ISO_DESIGNATION_CTEXT
1428 || designation_policy == MCODING_ISO_DESIGNATION_CTEXT_EXT)
1429 ? (charset->code_range[0] == 32
1430 || charset->code_range[1] == 255)
1431 : designation_policy == MCODING_ISO_DESIGNATION_G1);
1434 spec->use_esc = ((spec->flags & MCODING_ISO_DESIGNATION_MASK)
1435 || ((spec->flags & MCODING_ISO_LOCKING_SHIFT)
1436 && (spec->initial_designation[2]
1437 || spec->initial_designation[3]))
1438 || (! (spec->flags & MCODING_ISO_EIGHT_BIT)
1439 && (spec->flags & MCODING_ISO_SINGLE_SHIFT))
1440 || (spec->flags & MCODING_ISO_ISO6429));
1442 coding->extra_spec = (void *) spec;
1448 reset_coding_iso_2022 (MConverter *converter)
1450 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
1451 MCodingSystem *coding = internal->coding;
1452 struct iso_2022_status *status
1453 = (struct iso_2022_status *) &(converter->status);
1454 struct iso_2022_spec *spec;
1458 && setup_coding_iso_2022 (coding) < 0)
1462 spec = (struct iso_2022_spec *) coding->extra_spec;
1463 status->invocation[0] = spec->initial_invocation[0];
1464 status->invocation[1] = spec->initial_invocation[1];
1465 for (i = 0; i < 4; i++)
1466 status->designation[i] = spec->initial_designation[i];
1467 status->single_shifting = 0;
1474 #define ISO2022_DECODE_DESIGNATION(reg, dim, chars, final, rev) \
1476 MCharset *charset; \
1478 if ((final) < '0' || (final) >= 128) \
1479 goto invalid_byte; \
1482 charset = MCHARSET_ISO_2022 ((dim), (chars), (final)); \
1483 if (! (spec->flags & MCODING_ISO_FULL_SUPPORT)) \
1487 for (i = 0; i < coding->ncharsets; i++) \
1488 if (charset == coding->charsets[i]) \
1490 if (i == coding->ncharsets) \
1491 goto invalid_byte; \
1498 for (i = 0; i < mcharset__iso_2022_table.used; i++) \
1500 charset = mcharset__iso_2022_table.charsets[i]; \
1501 if (charset->revision == (rev) \
1502 && charset->dimension == (dim) \
1503 && charset->final_byte == (final) \
1504 && (charset->code_range[1] == (chars) \
1505 || ((chars) == 96 && charset->code_range[1] == 255))) \
1508 if (i == mcharset__iso_2022_table.used) \
1509 goto invalid_byte; \
1511 status->designation[reg] = charset; \
1516 find_ctext_non_standard_charset (char *charset_name)
1520 if (! strcmp (charset_name, "koi8-r"))
1521 charset = MCHARSET (msymbol ("koi8-r"));
1522 else if (! strcmp (charset_name, "big5-0"))
1523 charset = MCHARSET (msymbol ("big5"));
1530 decode_coding_iso_2022 (const unsigned char *source, int src_bytes, MText *mt,
1531 MConverter *converter)
1533 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
1534 MCodingSystem *coding = internal->coding;
1535 const unsigned char *src = internal->carryover;
1536 const unsigned char *src_stop = src + internal->carryover_bytes;
1537 const unsigned char *src_end = source + src_bytes;
1538 const unsigned char *src_base;
1539 unsigned char *dst = mt->data + mt->nbytes;
1540 unsigned char *dst_end = mt->data + mt->allocated;
1542 int last_nchars = 0;
1543 int at_most = converter->at_most > 0 ? converter->at_most : -1;
1544 struct iso_2022_spec *spec = (struct iso_2022_spec *) coding->extra_spec;
1545 struct iso_2022_status *status
1546 = (struct iso_2022_status *) &(converter->status);
1547 MCharset *charset0, *charset1, *charset;
1549 MCharset *cns_charsets[15];
1551 charset0 = (status->invocation[0] >= 0
1552 ? status->designation[status->invocation[0]] : NULL);
1553 charset1 = (status->invocation[1] >= 0
1554 ? status->designation[status->invocation[1]] : NULL);
1555 charset = mcharset__ascii;
1557 if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
1561 memset (cns_charsets, 0, sizeof (cns_charsets));
1562 for (i = 0; i < coding->ncharsets; i++)
1563 if (coding->charsets[i]->dimension == 2
1564 && coding->charsets[i]->code_range[1] == 126)
1566 int final = coding->charsets[i]->final_byte;
1568 if (final >= 'G' && final <= 'M')
1569 cns_charsets[final - 'G'] = coding->charsets[i];
1571 cns_charsets[14] = coding->charsets[i];
1577 MCharset *this_charset = NULL;
1580 ONE_MORE_BASE_BYTE (c1);
1582 if (status->utf8_shifting)
1585 int bytes = CHAR_BYTES_BY_HEAD (c1);
1589 for (i = 1; i < bytes; i++)
1594 this_charset = UTF8_CHARSET (buf);
1595 c1 = STRING_CHAR_UTF8 (buf);
1599 if (status->non_standard_encoding > 0)
1603 this_charset = status->non_standard_charset;
1604 for (i = 1; i < status->non_standard_charset_bytes; i++)
1607 c1 = (c1 << 8) | c2;
1609 c1 = DECODE_CHAR (this_charset, c1);
1613 switch (iso_2022_code_class[c1])
1615 case ISO_graphic_plane_0:
1616 this_charset = charset0;
1619 case ISO_0x20_or_0x7F:
1621 || (charset0->code_range[0] != 32
1622 && charset0->code_range[1] != 255))
1623 /* This is SPACE or DEL. */
1624 this_charset = mcharset__ascii;
1626 /* This is a graphic character of plane 0. */
1627 this_charset = charset0;
1630 case ISO_graphic_plane_1:
1633 this_charset = charset1;
1636 case ISO_0xA0_or_0xFF:
1638 || charset1->code_range[0] == 33
1639 || ! (spec->flags & MCODING_ISO_EIGHT_BIT))
1641 /* This is a graphic character of plane 1. */
1644 this_charset = charset1;
1648 this_charset = mcharset__ascii;
1655 if ((spec->flags & MCODING_ISO_LOCKING_SHIFT)
1656 && status->designation[1])
1658 status->invocation[0] = 1;
1659 charset0 = status->designation[1];
1662 this_charset = mcharset__ascii;
1666 if (spec->flags & MCODING_ISO_LOCKING_SHIFT)
1668 status->invocation[0] = 0;
1669 charset0 = status->designation[0];
1672 this_charset = mcharset__ascii;
1675 case ISO_single_shift_2_7:
1676 if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT_7))
1678 this_charset = mcharset__ascii;
1682 goto label_escape_sequence;
1684 case ISO_single_shift_2:
1685 if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
1688 if (c1 < 0xA1 || (c1 > 0xA7 && c1 < 0xAF) || c1 > 0xAF
1689 || ! cns_charsets[c1 - 0xA1])
1691 status->designation[2] = cns_charsets[c1 - 0xA1];
1693 else if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT))
1695 /* SS2 is handled as an escape sequence of ESC 'N' */
1697 goto label_escape_sequence;
1699 case ISO_single_shift_3:
1700 if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT))
1702 /* SS2 is handled as an escape sequence of ESC 'O' */
1704 goto label_escape_sequence;
1706 case ISO_control_sequence_introducer:
1707 /* CSI is handled as an escape sequence of ESC '[' ... */
1709 goto label_escape_sequence;
1712 if (! spec->use_esc)
1714 this_charset = mcharset__ascii;
1718 label_escape_sequence:
1719 /* Escape sequences handled here are invocation,
1720 designation, and direction specification. */
1723 case '&': /* revision of following character set */
1724 if (! (spec->flags & MCODING_ISO_DESIGNATION_MASK))
1725 goto unused_escape_sequence;
1727 if (c1 < '@' || c1 > '~')
1730 if (c1 != ISO_CODE_ESC)
1733 goto label_escape_sequence;
1735 case '$': /* designation of 2-byte character set */
1736 if (! (spec->flags & MCODING_ISO_DESIGNATION_MASK))
1737 goto unused_escape_sequence;
1739 if (c1 >= '@' && c1 <= 'B')
1740 { /* designation of JISX0208.1978, GB2312.1980, or
1742 ISO2022_DECODE_DESIGNATION (0, 2, 94, c1, -1);
1744 else if (c1 >= 0x28 && c1 <= 0x2B)
1745 { /* designation of (dimension 2, chars 94) character set */
1747 ISO2022_DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2, -1);
1749 else if (c1 >= 0x2C && c1 <= 0x2F)
1750 { /* designation of (dimension 2, chars 96) character set */
1752 ISO2022_DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2, -1);
1756 /* We must update these variables now. */
1757 if (status->invocation[0] >= 0)
1758 charset0 = status->designation[status->invocation[0]];
1759 if (status->invocation[1] >= 0)
1760 charset1 = status->designation[status->invocation[1]];
1763 case 'n': /* invocation of locking-shift-2 */
1764 if (! (spec->flags & MCODING_ISO_LOCKING_SHIFT)
1765 || ! status->designation[2])
1767 status->invocation[0] = 2;
1768 charset0 = status->designation[2];
1771 case 'o': /* invocation of locking-shift-3 */
1772 if (! (spec->flags & MCODING_ISO_LOCKING_SHIFT)
1773 || ! status->designation[3])
1775 status->invocation[0] = 3;
1776 charset0 = status->designation[3];
1779 case 'N': /* invocation of single-shift-2 */
1780 if (! ((spec->flags & MCODING_ISO_SINGLE_SHIFT)
1781 || (spec->flags & MCODING_ISO_EUC_TW_SHIFT))
1782 || ! status->designation[2])
1784 this_charset = status->designation[2];
1786 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1790 case 'O': /* invocation of single-shift-3 */
1791 if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT)
1792 || ! status->designation[3])
1794 this_charset = status->designation[3];
1796 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1800 case '[': /* specification of direction */
1801 if (! (spec->flags & MCODING_ISO_ISO6429))
1803 /* For the moment, nested direction is not supported.
1804 So, (coding->mode & CODING_MODE_DIRECTION) zero means
1805 left-to-right, and nonzero means right-to-left. */
1809 case ']': /* end of the current direction */
1810 case '0': /* end of the current direction */
1814 case '1': /* start of left-to-right direction */
1821 case '2': /* start of right-to-left direction */
1835 char charset_name[16];
1839 if (! (spec->flags & MCODING_ISO_DESIGNATION_CTEXT_EXT))
1841 /* Compound-text uses these escape sequences:
1843 ESC % G -- utf-8 bytes -- ESC % @
1844 ESC % / 1 M L -- charset name -- STX -- bytes --
1845 ESC % / 2 M L -- charset name -- STX -- bytes --
1846 ESC % / 3 M L -- charset name -- STX -- bytes --
1847 ESC % / 4 M L -- charset name -- STX -- bytes --
1849 It also uses this sequence but that is not yet
1852 ESC % / 0 M L -- charset name -- STX -- bytes -- */
1857 status->utf8_shifting = 1;
1862 if (! status->utf8_shifting)
1864 status->utf8_shifting = 0;
1870 if (c1 < '1' || c1 > '4')
1872 status->non_standard_charset_bytes = c1 - '0';
1875 if (c1 < 128 || c2 < 128)
1877 bytes = (c1 - 128) * 128 + (c2 - 128);
1878 for (i = 0; i < 16; i++)
1881 if (c1 == ISO_CODE_STX)
1883 charset_name[i] = TOLOWER (c1);
1887 charset_name[i++] = '\0';
1888 this_charset = find_ctext_non_standard_charset (charset_name);
1891 status->non_standard_charset = this_charset;
1892 status->non_standard_encoding = bytes - i;
1897 if (! (spec->flags & MCODING_ISO_DESIGNATION_MASK))
1898 goto unused_escape_sequence;
1899 if (c1 >= 0x28 && c1 <= 0x2B)
1900 { /* designation of (dimension 1, chars 94) charset */
1902 ISO2022_DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2, -1);
1904 else if (c1 >= 0x2C && c1 <= 0x2F)
1905 { /* designation of (dimension 1, chars 96) charset */
1907 ISO2022_DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2, -1);
1911 /* We must update these variables now. */
1912 if (status->invocation[0] >= 0)
1913 charset0 = status->designation[status->invocation[0]];
1914 if (status->invocation[1] >= 0)
1915 charset1 = status->designation[status->invocation[1]];
1918 unused_escape_sequence:
1919 UNGET_ONE_BYTE (c1);
1921 this_charset = mcharset__ascii;
1925 if (this_charset->dimension == 1)
1927 if (this_charset->code_range[1] <= 128)
1930 else if (this_charset->dimension == 2)
1933 c1 = ((c1 & 0x7F) << 8) | (c2 & 0x7F);
1935 else /* i.e. (dimension == 3) */
1939 c1 = ((c1 & 0x7F) << 16) | ((c2 & 0x7F) << 8) | (c3 & 0x7F);
1941 c1 = DECODE_CHAR (this_charset, c1);
1945 if (! converter->lenient)
1947 REWIND_SRC_TO_BASE ();
1949 this_charset = mcharset__binary;
1952 if (this_charset != mcharset__ascii
1953 && this_charset != charset)
1955 TAKEIN_CHARS (mt, nchars - last_nchars,
1956 dst - (mt->data + mt->nbytes), charset);
1957 charset = this_charset;
1958 last_nchars = nchars;
1961 if (status->non_standard_encoding > 0)
1962 status->non_standard_encoding -= status->non_standard_charset_bytes;
1964 /* We reach here because of an invalid byte. */
1970 TAKEIN_CHARS (mt, nchars - last_nchars,
1971 dst - (mt->data + mt->nbytes), charset);
1972 return finish_decoding (mt, converter, nchars,
1973 source, src_end, src_base, error);
1977 /* Produce codes (escape sequence) for designating CHARSET to graphic
1978 register REG at DST, and increment DST. If CHARSET->final-char is
1979 '@', 'A', or 'B' and SHORT_FORM is nonzero, produce designation
1980 sequence of short-form. Update STATUS->designation. */
1982 #define ISO2022_ENCODE_DESIGNATION(reg, charset, spec, status) \
1984 char *intermediate_char_94 = "()*+"; \
1985 char *intermediate_char_96 = ",-./"; \
1987 if (dst + 4 > dst_end) \
1988 goto memory_shortage; \
1989 *dst++ = ISO_CODE_ESC; \
1990 if (charset->dimension == 1) \
1992 if (charset->code_range[0] != 32 \
1993 && charset->code_range[1] != 255) \
1994 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
1996 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
2001 if (charset->code_range[0] != 32 \
2002 && charset->code_range[1] != 255) \
2004 if (spec->flags & MCODING_ISO_LONG_FORM \
2006 || charset->final_byte < '@' || charset->final_byte > 'B') \
2007 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
2010 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
2012 *dst++ = charset->final_byte; \
2014 status->designation[reg] = charset; \
2018 /* The following two macros produce codes (control character or escape
2019 sequence) for ISO-2022 single-shift functions (single-shift-2 and
2022 #define ISO2022_ENCODE_SINGLE_SHIFT_2(spec, status) \
2024 if (dst + 2 > dst_end) \
2025 goto memory_shortage; \
2026 if (! (spec->flags & MCODING_ISO_EIGHT_BIT)) \
2027 *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \
2029 *dst++ = ISO_CODE_SS2; \
2030 status->single_shifting = 1; \
2034 #define ISO2022_ENCODE_SINGLE_SHIFT_3(spec, status) \
2036 if (dst + 2 > dst_end) \
2037 goto memory_shortage; \
2038 if (! (spec->flags & MCODING_ISO_EIGHT_BIT)) \
2039 *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \
2041 *dst++ = ISO_CODE_SS3; \
2042 status->single_shifting = 1; \
2046 /* The following four macros produce codes (control character or
2047 escape sequence) for ISO-2022 locking-shift functions (shift-in,
2048 shift-out, locking-shift-2, and locking-shift-3). */
2050 #define ISO2022_ENCODE_SHIFT_IN(status) \
2052 if (dst + 1 > dst_end) \
2053 goto memory_shortage; \
2054 *dst++ = ISO_CODE_SI; \
2055 status->invocation[0] = 0; \
2059 #define ISO2022_ENCODE_SHIFT_OUT(status) \
2061 if (dst + 1 > dst_end) \
2062 goto memory_shortage; \
2063 *dst++ = ISO_CODE_SO; \
2064 status->invocation[0] = 1; \
2068 #define ISO2022_ENCODE_LOCKING_SHIFT_2(status) \
2070 if (dst + 2 > dst_end) \
2071 goto memory_shortage; \
2072 *dst++ = ISO_CODE_ESC, *dst++ = 'n'; \
2073 status->invocation[0] = 2; \
2077 #define ISO2022_ENCODE_LOCKING_SHIFT_3(status) \
2079 if (dst + 2 > dst_end) \
2080 goto memory_shortage; \
2081 *dst++ = ISO_CODE_ESC, *dst++ = 'o'; \
2082 status->invocation[0] = 3; \
2085 #define ISO2022_ENCODE_UTF8_SHIFT_START(len) \
2087 CHECK_DST (3 + len); \
2088 *dst++ = ISO_CODE_ESC; \
2091 status->utf8_shifting = 1; \
2095 #define ISO2022_ENCODE_UTF8_SHIFT_END() \
2098 *dst++ = ISO_CODE_ESC; \
2101 status->utf8_shifting = 0; \
2105 #define ISO2022_ENCODE_NON_STANDARD(name, len) \
2107 CHECK_DST (6 + len + 1 + non_standard_charset_bytes); \
2108 non_standard_begin = dst; \
2109 *dst++ = ISO_CODE_ESC; \
2112 *dst++ = '0' + non_standard_charset_bytes; \
2113 *dst++ = 0, *dst++ = 0; /* filled later */ \
2114 memcpy (dst, name, len); \
2116 *dst++ = ISO_CODE_STX; \
2117 non_standard_bytes = len + 1; \
2122 find_ctext_non_standard_name (MCharset *charset, int *bytes)
2124 char *name = msymbol_name (charset->name);
2126 if (! strcmp (name, "koi8-r"))
2128 else if (! strcmp (name, "big5"))
2129 name = "big5-0", *bytes = 2;
2135 /* Designate CHARSET to a graphic register specified in
2136 SPEC->designation. If the register is not yet invoked to graphic
2137 left not right, invoke it to graphic left. DSTP points to a
2138 variable containing a memory address where the output must go.
2139 DST_END is the limit of that memory.
2141 Return 0 if it succeeds. Return -1 otherwise, which means that the
2142 memory area is too short. By side effect, update the variable that
2146 iso_2022_designate_invoke_charset (MCodingSystem *coding,
2148 struct iso_2022_spec *spec,
2149 struct iso_2022_status *status,
2150 unsigned char **dstp,
2151 unsigned char *dst_end)
2154 unsigned char *dst = *dstp;
2156 for (i = 0; i < 4; i++)
2157 if (charset == status->designation[i])
2162 /* CHARSET is not yet designated to any graphic registers. */
2163 for (i = 0; i < coding->ncharsets; i++)
2164 if (charset == coding->charsets[i])
2166 if (i == coding->ncharsets)
2168 for (i = 0; i < mcharset__iso_2022_table.used; i++)
2169 if (charset == mcharset__iso_2022_table.charsets[i])
2171 i += coding->ncharsets;
2173 i = spec->designations[i];
2174 ISO2022_ENCODE_DESIGNATION (i, charset, spec, status);
2177 if (status->invocation[0] != i
2178 && status->invocation[1] != i)
2180 /* Graphic register I is not yet invoked. */
2183 case 0: /* graphic register 0 */
2184 ISO2022_ENCODE_SHIFT_IN (status);
2187 case 1: /* graphic register 1 */
2188 ISO2022_ENCODE_SHIFT_OUT (status);
2191 case 2: /* graphic register 2 */
2192 if (spec->flags & MCODING_ISO_SINGLE_SHIFT)
2193 ISO2022_ENCODE_SINGLE_SHIFT_2 (spec, status);
2195 ISO2022_ENCODE_LOCKING_SHIFT_2 (status);
2198 case 3: /* graphic register 3 */
2199 if (spec->flags & MCODING_ISO_SINGLE_SHIFT)
2200 ISO2022_ENCODE_SINGLE_SHIFT_3 (spec, status);
2202 ISO2022_ENCODE_LOCKING_SHIFT_3 (status);
2215 /* Reset the invocation/designation status to the initial one. SPEC
2216 and STATUS contain information about the current and initial
2217 invocation /designation status respectively. DSTP points to a
2218 variable containing a memory address where the output must go.
2219 DST_END is the limit of that memory.
2221 Return 0 if it succeeds. Return -1 otherwise, which means that the
2222 memory area is too short. By side effect, update the variable that
2226 iso_2022_reset_invocation_designation (struct iso_2022_spec *spec,
2227 struct iso_2022_status *status,
2228 unsigned char **dstp,
2229 unsigned char *dst_end)
2231 unsigned char *dst = *dstp;
2234 /* Reset the invocation status of GL. We have not yet supported GR
2236 if (status->invocation[0] != spec->initial_invocation[0]
2237 && spec->initial_invocation[0] >= 0)
2239 if (spec->initial_invocation[0] == 0)
2240 ISO2022_ENCODE_SHIFT_IN (status);
2241 else if (spec->initial_invocation[0] == 1)
2242 ISO2022_ENCODE_SHIFT_OUT (status);
2243 else if (spec->initial_invocation[0] == 2)
2244 ISO2022_ENCODE_LOCKING_SHIFT_2 (status);
2245 else /* i.e. spec->initial_invocation[0] == 3 */
2246 ISO2022_ENCODE_LOCKING_SHIFT_3 (status);
2249 /* Reset the designation status of G0..G3. */
2250 for (i = 0; i < 4; i++)
2251 if (status->designation[i] != spec->initial_designation[i]
2252 && spec->initial_designation[i])
2254 MCharset *charset = spec->initial_designation[i];
2256 ISO2022_ENCODE_DESIGNATION (i, charset, spec, status);
2269 encode_coding_iso_2022 (MText *mt, int from, int to,
2270 unsigned char *destination, int dst_bytes,
2271 MConverter *converter)
2273 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2274 MCodingSystem *coding = internal->coding;
2275 unsigned char *src, *src_end;
2276 unsigned char *dst = destination;
2277 unsigned char *dst_end = dst + dst_bytes;
2279 unsigned char *dst_base;
2280 struct iso_2022_spec *spec = (struct iso_2022_spec *) coding->extra_spec;
2281 int full_support = spec->flags & MCODING_ISO_FULL_SUPPORT;
2282 struct iso_2022_status *status
2283 = (struct iso_2022_status *) &(converter->status);
2284 MCharset *primary, *charset0, *charset1;
2285 int next_primary_change;
2286 int ncharsets = coding->ncharsets;
2287 MCharset **charsets = coding->charsets;
2288 MCharset *cns_charsets[15];
2289 int ascii_compatible = coding->ascii_compatible;
2290 MCharset *non_standard_charset = NULL;
2291 int non_standard_charset_bytes = 0;
2292 int non_standard_bytes = 0;
2293 unsigned char *non_standard_begin = NULL;
2294 enum MTextFormat format = mt->format;
2296 SET_SRC (mt, format, from, to);
2298 if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
2302 memset (cns_charsets, 0, sizeof (cns_charsets));
2303 for (i = 0; i < ncharsets; i++)
2304 if (charsets[i]->dimension == 2)
2306 int final = charsets[i]->final_byte;
2308 if (final >= 'G' && final <= 'M')
2309 cns_charsets[final - 'G'] = charsets[i];
2311 cns_charsets[14] = charsets[i];
2315 next_primary_change = from;
2317 charset0 = status->designation[status->invocation[0]];
2318 charset1 = (status->invocation[1] < 0 ? NULL
2319 : status->designation[status->invocation[1]]);
2326 ONE_MORE_CHAR (c, bytes, format);
2328 if (c < 128 && ascii_compatible)
2330 if (status->utf8_shifting)
2331 ISO2022_ENCODE_UTF8_SHIFT_END ();
2335 else if (c <= 32 || c == 127)
2337 if (status->utf8_shifting)
2338 ISO2022_ENCODE_UTF8_SHIFT_END ();
2339 if (spec->flags & MCODING_ISO_RESET_AT_CNTL
2340 || (c == '\n' && spec->flags & MCODING_ISO_RESET_AT_EOL))
2342 if (iso_2022_reset_invocation_designation (spec, status,
2344 goto insufficient_destination;
2345 charset0 = status->designation[status->invocation[0]];
2346 charset1 = (status->invocation[1] < 0 ? NULL
2347 : status->designation[status->invocation[1]]);
2354 unsigned code = MCHAR_INVALID_CODE;
2355 MCharset *charset = NULL;
2357 int pos = from + nchars;
2359 if (pos >= next_primary_change)
2361 MSymbol primary_charset
2362 = (MSymbol) mtext_get_prop (mt, pos, Mcharset);
2363 primary = MCHARSET (primary_charset);
2364 if (primary && primary != mcharset__binary)
2366 if (primary->final_byte <= 0)
2368 else if (! full_support)
2372 for (i = 0; i < ncharsets; i++)
2373 if (primary == charsets[i])
2380 mtext_prop_range (mt, Mcharset, pos,
2381 NULL, &next_primary_change, 0);
2384 if (primary && primary != mcharset__binary)
2386 code = ENCODE_CHAR (primary, c);
2387 if (code != MCHAR_INVALID_CODE)
2392 if (c <= 32 || c == 127)
2395 charset = mcharset__ascii;
2401 for (i = 0; i < ncharsets; i++)
2403 charset = charsets[i];
2404 code = ENCODE_CHAR (charset, c);
2405 if (code != MCHAR_INVALID_CODE)
2410 if (spec->flags & MCODING_ISO_FULL_SUPPORT)
2412 for (i = 0; i < mcharset__iso_2022_table.used; i++)
2414 charset = mcharset__iso_2022_table.charsets[i];
2415 code = ENCODE_CHAR (charset, c);
2416 if (code != MCHAR_INVALID_CODE)
2419 if (i == mcharset__iso_2022_table.used)
2421 if (spec->flags & MCODING_ISO_DESIGNATION_CTEXT_EXT)
2422 goto unsupported_char;
2423 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
2428 goto unsupported_char;
2434 && (charset->final_byte >= 0
2435 || spec->flags & MCODING_ISO_EUC_TW_SHIFT))
2437 if (code >= 0x80 && code < 0xA0)
2438 goto unsupported_char;
2440 if (status->utf8_shifting)
2441 ISO2022_ENCODE_UTF8_SHIFT_END ();
2442 if (charset == charset0)
2444 else if (charset == charset1)
2448 unsigned char *p = NULL;
2450 if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
2454 if (cns_charsets[0] == charset)
2460 for (i = 1; i < 15; i++)
2461 if (cns_charsets[i] == charset)
2464 *dst++ = ISO_CODE_SS2;
2467 status->single_shifting = 1;
2472 if (iso_2022_designate_invoke_charset
2473 (coding, charset, spec, status, &dst, dst_end) < 0)
2474 goto insufficient_destination;
2475 charset0 = status->designation[status->invocation[0]];
2476 charset1 = (status->invocation[1] < 0 ? NULL
2477 : status->designation[status->invocation[1]]);
2479 if (status->single_shifting)
2481 = (spec->flags & MCODING_ISO_EIGHT_BIT) ? 0x80 : 0;
2482 else if (charset == charset0)
2487 if (charset->dimension == 1)
2490 *dst++ = code | gr_mask;
2492 else if (charset->dimension == 2)
2495 *dst++ = (code >> 8) | gr_mask;
2496 *dst++ = (code & 0xFF) | gr_mask;
2501 *dst++ = (code >> 16) | gr_mask;
2502 *dst++ = ((code >> 8) & 0xFF) | gr_mask;
2503 *dst++ = (code & 0xFF) | gr_mask;
2505 status->single_shifting = 0;
2507 else if (charset && spec->flags & MCODING_ISO_DESIGNATION_CTEXT_EXT)
2509 if (charset != non_standard_charset)
2511 char *name = (find_ctext_non_standard_name
2512 (charset, &non_standard_charset_bytes));
2516 int len = strlen (name);
2518 ISO2022_ENCODE_NON_STANDARD (name, len);
2519 non_standard_charset = charset;
2522 non_standard_charset = NULL;
2525 if (non_standard_charset)
2527 if (dst + non_standard_charset_bytes > dst_end)
2528 goto insufficient_destination;
2529 non_standard_bytes += non_standard_charset_bytes;
2530 non_standard_begin[4] = (non_standard_bytes / 128) | 0x80;
2531 non_standard_begin[5] = (non_standard_bytes % 128) | 0x80;
2532 if (non_standard_charset_bytes == 1)
2534 else if (non_standard_charset_bytes == 2)
2535 *dst++ = code >> 8, *dst++ = code & 0xFF;
2536 else if (non_standard_charset_bytes == 3)
2537 *dst++ = code >> 16, *dst++ = (code >> 8) & 0xFF,
2538 *dst++ = code & 0xFF;
2539 else /* i.e non_standard_charset_bytes == 3 */
2540 *dst++ = code >> 24, *dst++ = (code >> 16) & 0xFF,
2541 *dst++ = (code >> 8) & 0xFF, *dst++ = code & 0xFF;
2545 int len = CHAR_BYTES (c);
2548 goto unsupported_char;
2549 if (! status->utf8_shifting)
2550 ISO2022_ENCODE_UTF8_SHIFT_START (len);
2553 CHAR_STRING (c, dst);
2557 goto unsupported_char;
2567 if (iso_2022_designate_invoke_charset (coding, mcharset__ascii,
2570 goto insufficient_destination;
2571 if (! converter->lenient)
2573 len = encode_unsupporeted_char (c, dst, dst_end, mt, from + nchars);
2575 goto insufficient_destination;
2581 /* We reach here because of an unsupported char. */
2582 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
2585 insufficient_destination:
2587 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
2590 if (converter->result == MCONVERSION_RESULT_SUCCESS
2591 && converter->last_block)
2593 if (status->utf8_shifting)
2595 ISO2022_ENCODE_UTF8_SHIFT_END ();
2598 if (spec->flags & MCODING_ISO_RESET_AT_EOL
2599 && charset0 != spec->initial_designation[0])
2601 if (iso_2022_reset_invocation_designation (spec, status,
2603 goto insufficient_destination;
2606 converter->nchars += nchars;
2607 converter->nbytes += dst - destination;
2608 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
2612 /* Staffs for coding-systems of type MCODING_TYPE_MISC. */
2614 /* For SJIS handling... */
2616 #define SJIS_TO_JIS(s1, s2) \
2618 ? (((s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0)) << 8) \
2620 : (((s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1)) << 8) \
2621 | (s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F))))
2623 #define JIS_TO_SJIS(c1, c2) \
2625 ? (((c1 / 2 + ((c1 < 0x5F) ? 0x71 : 0xB1)) << 8) \
2626 | (c2 + ((c2 >= 0x60) ? 0x20 : 0x1F))) \
2627 : (((c1 / 2 + ((c1 < 0x5F) ? 0x70 : 0xB0)) << 8) \
2632 reset_coding_sjis (MConverter *converter)
2634 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2635 MCodingSystem *coding = internal->coding;
2637 if (! coding->ready)
2639 MSymbol kanji_sym = msymbol ("jisx0208.1983");
2640 MCharset *kanji = MCHARSET (kanji_sym);
2641 MSymbol kana_sym = msymbol ("jisx0201-kana");
2642 MCharset *kana = MCHARSET (kana_sym);
2644 if (! kanji || ! kana)
2646 coding->ncharsets = 3;
2647 coding->charsets[1] = kanji;
2648 coding->charsets[2] = kana;
2655 decode_coding_sjis (const unsigned char *source, int src_bytes, MText *mt,
2656 MConverter *converter)
2658 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2659 MCodingSystem *coding = internal->coding;
2660 const unsigned char *src = internal->carryover;
2661 const unsigned char *src_stop = src + internal->carryover_bytes;
2662 const unsigned char *src_end = source + src_bytes;
2663 const unsigned char *src_base;
2664 unsigned char *dst = mt->data + mt->nbytes;
2665 unsigned char *dst_end = mt->data + mt->allocated - MAX_UTF8_CHAR_BYTES;
2667 int last_nchars = 0;
2668 int at_most = converter->at_most > 0 ? converter->at_most : -1;
2670 MCharset *charset_roman = coding->charsets[0];
2671 MCharset *charset_kanji = coding->charsets[1];
2672 MCharset *charset_kana = coding->charsets[2];
2673 MCharset *charset = mcharset__ascii;
2678 MCharset *this_charset;
2681 ONE_MORE_BASE_BYTE (c1);
2686 this_charset = ((c1 <= 0x20 || c1 == 0x7F)
2690 else if ((c1 >= 0x81 && c1 <= 0x9F) || (c1 >= 0xE0 && c1 <= 0xEF))
2693 if ((c2 >= 0x40 && c2 <= 0x7F) || (c2 >= 80 && c2 <= 0xFC))
2695 this_charset = charset_kanji;
2696 c1 = SJIS_TO_JIS (c1, c2);
2701 else if (c1 >= 0xA1 && c1 <= 0xDF)
2703 this_charset = charset_kana;
2709 c = DECODE_CHAR (this_charset, c1);
2714 if (! converter->lenient)
2716 REWIND_SRC_TO_BASE ();
2718 this_charset = mcharset__binary;
2721 if (this_charset != mcharset__ascii
2722 && this_charset != charset)
2724 TAKEIN_CHARS (mt, nchars - last_nchars,
2725 dst - (mt->data + mt->nbytes), charset);
2726 charset = this_charset;
2727 last_nchars = nchars;
2731 /* We reach here because of an invalid byte. */
2735 TAKEIN_CHARS (mt, nchars - last_nchars,
2736 dst - (mt->data + mt->nbytes), charset);
2737 return finish_decoding (mt, converter, nchars,
2738 source, src_end, src_base, error);
2742 encode_coding_sjis (MText *mt, int from, int to,
2743 unsigned char *destination, int dst_bytes,
2744 MConverter *converter)
2746 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2747 MCodingSystem *coding = internal->coding;
2748 unsigned char *src, *src_end;
2749 unsigned char *dst = destination;
2750 unsigned char *dst_end = dst + dst_bytes;
2752 MCharset *charset_roman = coding->charsets[0];
2753 MCharset *charset_kanji = coding->charsets[1];
2754 MCharset *charset_kana = coding->charsets[2];
2755 enum MTextFormat format = mt->format;
2757 SET_SRC (mt, format, from, to);
2764 ONE_MORE_CHAR (c, bytes, format);
2766 if (c <= 0x20 || c == 0x7F)
2773 if ((code = ENCODE_CHAR (charset_roman, c)) != MCHAR_INVALID_CODE)
2778 else if ((code = ENCODE_CHAR (charset_kanji, c))
2779 != MCHAR_INVALID_CODE)
2781 int c1 = code >> 8, c2 = code & 0xFF;
2782 code = JIS_TO_SJIS (c1, c2);
2785 *dst++ = code & 0xFF;
2787 else if ((code = ENCODE_CHAR (charset_kana, c))
2788 != MCHAR_INVALID_CODE)
2791 *dst++ = code | 0x80;
2795 if (! converter->lenient)
2797 len = encode_unsupporeted_char (c, dst, dst_end,
2800 goto insufficient_destination;
2807 /* We reach here because of an unsupported char. */
2808 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
2811 insufficient_destination:
2812 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
2815 converter->nchars += nchars;
2816 converter->nbytes += dst - destination;
2817 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
2821 static MCodingSystem *
2822 find_coding (MSymbol name)
2824 MCodingSystem *coding = (MCodingSystem *) msymbol_get (name, Mcoding);
2829 MSymbol sym = msymbol__canonicalize (name);
2831 plist = mplist_find_by_key (coding_definition_list, sym);
2834 pl = MPLIST_PLIST (plist);
2835 name = MPLIST_VAL (pl);
2836 mconv_define_coding (MSYMBOL_NAME (name), MPLIST_NEXT (pl),
2837 NULL, NULL, NULL, NULL);
2838 coding = (MCodingSystem *) msymbol_get (name, Mcoding);
2839 plist = mplist_pop (plist);
2840 M17N_OBJECT_UNREF (plist);
2845 #define BINDING_NONE 0
2846 #define BINDING_BUFFER 1
2847 #define BINDING_STREAM 2
2849 #define CONVERT_WORKSIZE 0x10000
2855 mcoding__init (void)
2858 MPlist *param, *charsets, *pl;
2860 MLIST_INIT1 (&coding_list, codings, 128);
2861 coding_definition_list = mplist ();
2863 /* ISO-2022 specific initialize routine. */
2864 for (i = 0; i < 0x20; i++)
2865 iso_2022_code_class[i] = ISO_control_0;
2866 for (i = 0x21; i < 0x7F; i++)
2867 iso_2022_code_class[i] = ISO_graphic_plane_0;
2868 for (i = 0x80; i < 0xA0; i++)
2869 iso_2022_code_class[i] = ISO_control_1;
2870 for (i = 0xA1; i < 0xFF; i++)
2871 iso_2022_code_class[i] = ISO_graphic_plane_1;
2872 iso_2022_code_class[0x20] = iso_2022_code_class[0x7F] = ISO_0x20_or_0x7F;
2873 iso_2022_code_class[0xA0] = iso_2022_code_class[0xFF] = ISO_0xA0_or_0xFF;
2874 iso_2022_code_class[0x0E] = ISO_shift_out;
2875 iso_2022_code_class[0x0F] = ISO_shift_in;
2876 iso_2022_code_class[0x19] = ISO_single_shift_2_7;
2877 iso_2022_code_class[0x1B] = ISO_escape;
2878 iso_2022_code_class[0x8E] = ISO_single_shift_2;
2879 iso_2022_code_class[0x8F] = ISO_single_shift_3;
2880 iso_2022_code_class[0x9B] = ISO_control_sequence_introducer;
2882 Mcoding = msymbol ("coding");
2884 Mutf = msymbol ("utf");
2885 Miso_2022 = msymbol ("iso-2022");
2887 Mreset_at_eol = msymbol ("reset-at-eol");
2888 Mreset_at_cntl = msymbol ("reset-at-cntl");
2889 Meight_bit = msymbol ("eight-bit");
2890 Mlong_form = msymbol ("long-form");
2891 Mdesignation_g0 = msymbol ("designation-g0");
2892 Mdesignation_g1 = msymbol ("designation-g1");
2893 Mdesignation_ctext = msymbol ("designation-ctext");
2894 Mdesignation_ctext_ext = msymbol ("designation-ctext-ext");
2895 Mlocking_shift = msymbol ("locking-shift");
2896 Msingle_shift = msymbol ("single-shift");
2897 Msingle_shift_7 = msymbol ("single-shift-7");
2898 Meuc_tw_shift = msymbol ("euc-tw-shift");
2899 Miso_6429 = msymbol ("iso-6429");
2900 Mrevision_number = msymbol ("revision-number");
2901 Mfull_support = msymbol ("full-support");
2902 Mmaybe = msymbol ("maybe");
2904 Mtype = msymbol ("type");
2905 Mcharsets = msymbol_as_managing_key ("charsets");
2906 Mflags = msymbol_as_managing_key ("flags");
2907 Mdesignation = msymbol_as_managing_key ("designation");
2908 Minvocation = msymbol_as_managing_key ("invocation");
2909 Mcode_unit = msymbol ("code-unit");
2910 Mbom = msymbol ("bom");
2911 Mlittle_endian = msymbol ("little-endian");
2914 charsets = mplist ();
2916 /* Setup predefined codings. */
2917 mplist_set (charsets, Msymbol, Mcharset_ascii);
2918 pl = mplist_add (pl, Mtype, Mcharset);
2919 pl = mplist_add (pl, Mcharsets, charsets);
2920 Mcoding_us_ascii = mconv_define_coding ("us-ascii", param,
2921 NULL, NULL, NULL, NULL);
2924 MSymbol alias = msymbol ("ANSI_X3.4-1968");
2925 MCodingSystem *coding
2926 = (MCodingSystem *) msymbol_get (Mcoding_us_ascii, Mcoding);
2928 msymbol_put (alias, Mcoding, coding);
2929 alias = msymbol__canonicalize (alias);
2930 msymbol_put (alias, Mcoding, coding);
2933 mplist_set (charsets, Msymbol, Mcharset_iso_8859_1);
2934 Mcoding_iso_8859_1 = mconv_define_coding ("iso-8859-1", param,
2935 NULL, NULL, NULL, NULL);
2937 mplist_set (charsets, Msymbol, Mcharset_m17n);
2938 mplist_put (param, Mtype, Mutf);
2939 mplist_put (param, Mcode_unit, (void *) 8);
2940 Mcoding_utf_8_full = mconv_define_coding ("utf-8-full", param,
2941 NULL, NULL, NULL, NULL);
2943 mplist_set (charsets, Msymbol, Mcharset_unicode);
2944 Mcoding_utf_8 = mconv_define_coding ("utf-8", param,
2945 NULL, NULL, NULL, NULL);
2947 mplist_put (param, Mcode_unit, (void *) 16);
2948 mplist_put (param, Mbom, Mmaybe);
2949 #ifndef WORDS_BIGENDIAN
2950 mplist_put (param, Mlittle_endian, Mt);
2952 Mcoding_utf_16 = mconv_define_coding ("utf-16", param,
2953 NULL, NULL, NULL, NULL);
2955 mplist_put (param, Mcode_unit, (void *) 32);
2956 Mcoding_utf_32 = mconv_define_coding ("utf-32", param,
2957 NULL, NULL, NULL, NULL);
2959 mplist_put (param, Mcode_unit, (void *) 16);
2960 mplist_put (param, Mbom, Mnil);
2961 mplist_put (param, Mlittle_endian, Mnil);
2962 Mcoding_utf_16be = mconv_define_coding ("utf-16be", param,
2963 NULL, NULL, NULL, NULL);
2965 mplist_put (param, Mcode_unit, (void *) 32);
2966 Mcoding_utf_32be = mconv_define_coding ("utf-32be", param,
2967 NULL, NULL, NULL, NULL);
2969 mplist_put (param, Mcode_unit, (void *) 16);
2970 mplist_put (param, Mlittle_endian, Mt);
2971 Mcoding_utf_16le = mconv_define_coding ("utf-16le", param,
2972 NULL, NULL, NULL, NULL);
2974 mplist_put (param, Mcode_unit, (void *) 32);
2975 Mcoding_utf_32le = mconv_define_coding ("utf-32le", param,
2976 NULL, NULL, NULL, NULL);
2978 mplist_put (param, Mtype, Mnil);
2980 mplist_add (pl, Msymbol, msymbol ("Shift_JIS"));
2981 mplist_put (param, Maliases, pl);
2982 mplist_set (charsets, Msymbol, Mcharset_ascii);
2983 Mcoding_sjis = mconv_define_coding ("sjis", param,
2986 encode_coding_sjis, NULL);
2988 M17N_OBJECT_UNREF (charsets);
2989 M17N_OBJECT_UNREF (param);
2990 M17N_OBJECT_UNREF (pl);
2996 mcoding__fini (void)
3001 for (i = 0; i < coding_list.used; i++)
3003 MCodingSystem *coding = coding_list.codings[i];
3005 if (coding->extra_info)
3006 free (coding->extra_info);
3007 if (coding->extra_spec)
3009 if (coding->type == Miso_2022)
3010 free (((struct iso_2022_spec *) coding->extra_spec)->designations);
3011 free (coding->extra_spec);
3015 MLIST_FREE1 (&coding_list, codings);
3016 MPLIST_DO (plist, coding_definition_list)
3017 M17N_OBJECT_UNREF (MPLIST_VAL (plist));
3018 M17N_OBJECT_UNREF (coding_definition_list);
3022 mconv__register_charset_coding (MSymbol sym)
3024 MSymbol name = msymbol__canonicalize (sym);
3026 if (! mplist_find_by_key (coding_definition_list, name))
3028 MPlist *param = mplist (), *charsets = mplist ();
3030 mplist_set (charsets, Msymbol, sym);
3031 mplist_add (param, Msymbol, sym);
3032 mplist_add (param, Mtype, Mcharset);
3033 mplist_add (param, Mcharsets, charsets);
3034 mplist_put (coding_definition_list, name, param);
3035 M17N_OBJECT_UNREF (charsets);
3041 mcoding__load_from_database ()
3043 MDatabase *mdb = mdatabase_find (msymbol ("coding-list"), Mnil, Mnil, Mnil);
3044 MPlist *def_list, *plist;
3045 MPlist *definitions = coding_definition_list;
3046 int mdebug_flag = MDEBUG_CODING;
3050 MDEBUG_PUSH_TIME ();
3051 def_list = (MPlist *) mdatabase_load (mdb);
3052 MDEBUG_PRINT_TIME ("CODING", (mdebug__output, " to load the data."));
3057 MDEBUG_PUSH_TIME ();
3058 MPLIST_DO (plist, def_list)
3060 MPlist *pl, *aliases;
3061 MSymbol name, canonicalized;
3063 if (! MPLIST_PLIST_P (plist))
3064 MERROR (MERROR_CHARSET, -1);
3065 pl = MPLIST_PLIST (plist);
3066 if (! MPLIST_SYMBOL_P (pl))
3067 MERROR (MERROR_CHARSET, -1);
3068 name = MPLIST_SYMBOL (pl);
3069 canonicalized = msymbol__canonicalize (name);
3070 pl = mplist__from_plist (MPLIST_NEXT (pl));
3071 mplist_push (pl, Msymbol, name);
3072 definitions = mplist_add (definitions, canonicalized, pl);
3073 aliases = mplist_get (pl, Maliases);
3075 MPLIST_DO (aliases, aliases)
3076 if (MPLIST_SYMBOL_P (aliases))
3078 name = MPLIST_SYMBOL (aliases);
3079 canonicalized = msymbol__canonicalize (name);
3080 definitions = mplist_add (definitions, canonicalized, pl);
3081 M17N_OBJECT_REF (pl);
3085 M17N_OBJECT_UNREF (def_list);
3086 MDEBUG_PRINT_TIME ("CODING", (mdebug__output, " to parse the loaded data."));
3092 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
3097 /*** @addtogroup m17nConv */
3101 /***en @name Variables: Symbols representing coding systems */
3102 /***ja @name ÊÑ¿ô: ÄêµÁºÑ¤ß¥³¡¼¥É·Ï¤ò»ØÄꤹ¤ë¤¿¤á¤Î¥·¥ó¥Ü¥ë */
3107 @brief Symbol for the coding system US-ASCII.
3109 The symbol #Mcoding_us_ascii has name <tt>"us-ascii"</tt> and
3110 represents a coding system for the CES US-ASCII. */
3113 @brief US-ASCII ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3115 ¥·¥ó¥Ü¥ë #Mcoding_us_ascii ¤Ï <tt>"us-ascii"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3116 CES US-ASCII ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£
3118 MSymbol Mcoding_us_ascii;
3122 @brief Symbol for the coding system ISO-8859-1.
3124 The symbol #Mcoding_iso_8859_1 has name <tt>"iso-8859-1"</tt> and
3125 represents a coding system for the CES ISO-8859-1. */
3128 @brief ISO-8859-1 ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3130 ¥·¥ó¥Ü¥ë #Mcoding_iso_8859_1 ¤Ï <tt>"iso-8859-1"</tt>
3131 ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢CES ISO-8859-1 ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3133 MSymbol Mcoding_iso_8859_1;
3137 @brief Symbol for the coding system UTF-8.
3139 The symbol #Mcoding_utf_8 has name <tt>"utf-8"</tt> and represents
3140 a coding system for the CES UTF-8. */
3143 @brief UTF-8 ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3145 ¥·¥ó¥Ü¥ë #Mcoding_utf_8 ¤Ï <tt>"utf-8"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢CES
3146 UTF-8 ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£
3149 MSymbol Mcoding_utf_8;
3153 @brief Symbol for the coding system UTF-8-FULL.
3155 The symbol #Mcoding_utf_8_full has name <tt>"utf-8-full"</tt> and
3156 represents a coding system that is a extension of UTF-8. This
3157 coding system uses the same encoding algorithm as UTF-8 but is not
3158 limited to the Unicode characters. It can encode all characters
3159 supported by the m17n library. */
3162 @brief UTF-8-FULL ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3164 ¥·¥ó¥Ü¥ë #Mcoding_utf_8_full ¤Ï <tt>"utf-8-full"</tt>
3165 ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢<tt>"UTF-8"</tt> ¤Î³ÈÄ¥¤Ç¤¢¤ë¥³¡¼¥É·Ï¤ò¼¨¤¹¡£
3166 ¤³¤Î¥³¡¼¥É·Ï¤Ï UTF-8 ¤ÈƱ¤¸¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°¥¢¥ë¥´¥ê¥º¥à¤òÍѤ¤¤ë¤¬¡¢ÂоݤÏ
3167 Unicode ʸ»ú¤Ë¤Ï¸ÂÄꤵ¤ì¤Ê¤¤¡£
3168 ¤Þ¤¿m17n ¥é¥¤¥Ö¥é¥ê¤¬°·¤¦Á´¤Æ¤Îʸ»ú¤ò¥¨¥ó¥³¡¼¥É¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£
3171 MSymbol Mcoding_utf_8_full;
3175 @brief Symbol for the coding system UTF-16.
3177 The symbol #Mcoding_utf_16 has name <tt>"utf-16"</tt> and
3178 represents a coding system for the CES UTF-16 (RFC 2279). */
3180 @brief UTF-16 ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3182 ¥·¥ó¥Ü¥ë #Mcoding_utf_16 ¤Ï <tt>"utf-16"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3183 CES UTF-16 (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£
3186 MSymbol Mcoding_utf_16;
3190 @brief Symbol for the coding system UTF-16BE.
3192 The symbol #Mcoding_utf_16be has name <tt>"utf-16be"</tt> and
3193 represents a coding system for the CES UTF-16BE (RFC 2279). */
3196 @brief UTF-16BE ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3198 ¥·¥ó¥Ü¥ë #Mcoding_utf_16be ¤Ï <tt>"utf-16be"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3199 CES UTF-16BE (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3201 MSymbol Mcoding_utf_16be;
3205 @brief Symbol for the coding system UTF-16LE.
3207 The symbol #Mcoding_utf_16le has name <tt>"utf-16le"</tt> and
3208 represents a coding system for the CES UTF-16LE (RFC 2279). */
3211 @brief UTF-16LE ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3213 ¥·¥ó¥Ü¥ë #Mcoding_utf_16le ¤Ï <tt>"utf-16le"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3214 CES UTF-16LE (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3216 MSymbol Mcoding_utf_16le;
3220 @brief Symbol for the coding system UTF-32.
3222 The symbol #Mcoding_utf_32 has name <tt>"utf-32"</tt> and
3223 represents a coding system for the CES UTF-32 (RFC 2279). */
3226 @brief UTF-32 ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3228 ¥·¥ó¥Ü¥ë #Mcoding_utf_32 ¤Ï <tt>"utf-32"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3229 CES UTF-32 (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3231 MSymbol Mcoding_utf_32;
3235 @brief Symbol for the coding system UTF-32BE.
3237 The symbol #Mcoding_utf_32be has name <tt>"utf-32be"</tt> and
3238 represents a coding system for the CES UTF-32BE (RFC 2279). */
3240 @brief UTF-32BE ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3242 ¥·¥ó¥Ü¥ë #Mcoding_utf_32be ¤Ï <tt>"utf-32be"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3243 CES UTF-32BE (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3245 MSymbol Mcoding_utf_32be;
3249 @brief Symbol for the coding system UTF-32LE.
3251 The symbol #Mcoding_utf_32le has name <tt>"utf-32le"</tt> and
3252 represents a coding system for the CES UTF-32LE (RFC 2279). */
3254 @brief UTF-32LE ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3256 ¥·¥ó¥Ü¥ë #Mcoding_utf_32le ¤Ï <tt>"utf-32le"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3257 CES UTF-32LE (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3259 MSymbol Mcoding_utf_32le;
3263 @brief Symbol for the coding system SJIS.
3265 The symbol #Mcoding_sjis has name <tt>"sjis"</tt> and represents a coding
3266 system for the CES Shift-JIS. */
3268 @brief SJIS ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3270 ¥·¥ó¥Ü¥ë #Mcoding_sjis has ¤Ï <tt>"sjis"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3271 CES Shift-JISÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3273 MSymbol Mcoding_sjis;
3278 @name Variables: Parameter keys for mconv_define_coding (). */
3280 @name ÊÑ¿ô: mconv_define_coding () Íѥѥé¥á¡¼¥¿¥¡¼ */
3285 Parameter key for mconv_define_coding () (which see). */
3287 mconv_define_coding () Íѥѥé¥á¡¼¥¿¥¡¼ (¾ÜºÙ¤Ï mconv_define_coding ()»²¾È). */
3291 MSymbol Mdesignation;
3292 MSymbol Minvocation;
3295 MSymbol Mlittle_endian;
3300 @name Variables: Symbols representing coding system types. */
3302 @name ÊÑ¿ô¡§ ¥³¡¼¥É·Ï¤Î¥¿¥¤¥×¤ò¼¨¤¹¥·¥ó¥Ü¥ë. */
3307 Symbol that can be a value of the #Mtype parameter of a coding
3308 system used in an argument to the mconv_define_coding () function
3311 ´Ø¿ô mconv_define_coding () ¤Î°ú¿ô¤È¤·¤ÆÍѤ¤¤é¤ì¤ë¥³¡¼¥É·Ï¤Î¥Ñ¥é¥á¡¼¥¿
3312 #Mtype ¤ÎÃͤȤʤêÆÀ¤ë¥·¥ó¥Ü¥ë¡£(¾ÜºÙ¤Ï
3313 mconv_define_coding ()»²¾È)¡£ */
3323 @name Variables: Symbols appearing in the value of Mflags parameter. */
3325 @name ÊÑ¿ô¡§ ¥Ñ¥é¥á¡¼¥¿ Mflags ¤ÎÃͤȤʤêÆÀ¤ë¥·¥ó¥Ü¥ë. */
3328 Symbols that can be a value of the @b Mflags parameter of a coding
3329 system used in an argument to the mconv_define_coding () function
3332 ´Ø¿ô mconv_define_coding () ¤Î°ú¿ô¤È¤·¤ÆÍѤ¤¤é¤ì¤ë¥³¡¼¥É·Ï¤Î¥Ñ¥é¥á¡¼¥¿
3333 @b Mflags ¤ÎÃͤȤʤêÆÀ¤ë¥·¥ó¥Ü¥ë¡£(¾ÜºÙ¤Ï
3334 mconv_define_coding ()»²¾È)¡£ */
3335 MSymbol Mreset_at_eol;
3336 MSymbol Mreset_at_cntl;
3339 MSymbol Mdesignation_g0;
3340 MSymbol Mdesignation_g1;
3341 MSymbol Mdesignation_ctext;
3342 MSymbol Mdesignation_ctext_ext;
3343 MSymbol Mlocking_shift;
3344 MSymbol Msingle_shift;
3345 MSymbol Msingle_shift_7;
3346 MSymbol Meuc_tw_shift;
3348 MSymbol Mrevision_number;
3349 MSymbol Mfull_support;
3354 @name Variables: Others
3356 Remaining variables. */
3357 /***ja @name ÊÑ¿ô: ¤½¤Î¾
3363 @brief Symbol whose name is "maybe".
3365 The variable #Mmaybe is a symbol of name <tt>"maybe"</tt>. It is
3366 used a value of @b Mbom parameter of the function
3367 mconv_define_coding () (which see). */
3369 @brief "maybe"¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¥·¥ó¥Ü¥ë.
3371 ÊÑ¿ô #Mmaybe ¤Ï <tt>"maybe"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¡£¤³¤ì¤Ï´Ø¿ô
3372 mconv_define_coding () ¥Ñ¥é¥á¡¼¥¿ @b Mbom ¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤ë¡£
3373 (¾ÜºÙ¤Ï mconv_define_coding () »²¾È)¡£ */
3379 @brief The symbol @c Mcoding.
3381 Any decoded M-text has a text property whose key is the predefined
3382 symbol @c Mcoding. The name of @c Mcoding is
3383 <tt>"coding"</tt>. */
3386 @brief ¥·¥ó¥Ü¥ë @c Mcoding.
3388 ¥Ç¥³¡¼¥É¤µ¤ì¤¿ M-text ¤Ï¤¹¤Ù¤Æ¡¢¥¡¼¤¬ÄêµÁºÑ¤ß¥·¥ó¥Ü¥ë @c Mcoding
3389 ¤Ç¤¢¤ë¤è¤¦¤Ê¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Ä¡£¥·¥ó¥Ü¥ë @c Mcoding ¤Ï
3390 <tt>"coding"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¡£ */
3397 @brief Define a coding system.
3399 The mconv_define_coding () function defines a new coding system
3400 and makes it accessible via a symbol whose name is $NAME. $PLIST
3401 specifies parameters of the coding system as below:
3405 <li> Key is @c Mtype, value is a symbol
3407 The value specifies the type of the coding system. It must be
3408 @b Mcharset, @b Mutf, @b Miso_2022, or @b Mnil.
3410 If the type is @b Mcharset, $EXTRA_INFO is ignored.
3412 If the type is @b Mutf, $EXTRA_INFO must be a pointer to
3415 If the type is @b Miso_2022, $EXTRA_INFO must be a pointer to
3416 #MCodingInfoISO2022.
3418 If the type is #Mnil, the argument $RESETTER, $DECODER, and
3419 $ENCODER must be supplied. $EXTRA_INFO is ignored. Otherwise,
3420 they can be @c NULL and the m17n library provides proper defaults.
3422 <li> Key is @b Mcharsets, value is a plist
3424 The value specifies a list charsets supported by the coding
3425 system. The keys of the plist must be #Msymbol, and the values
3426 must be symbols representing charsets.
3428 <li> Key is @b Mflags, value is a plist
3430 If the type is @b Miso_2022, the values specifies flags to control
3431 the ISO 2022 interpreter. The keys of the plist must e #Msymbol,
3432 and values must be one of the following.
3436 <li> @b Mreset_at_eol
3438 If this flag exists, designation and invocation status is reset to
3439 the initial state at the end of line.
3441 <li> @b Mreset_at_cntl
3443 If this flag exists, designation and invocation status is reset to
3444 the initial state at a control character.
3448 If this flag exists, the graphic plane right is used.
3452 If this flag exists, the over-long escape sequences (ESC '$' '('
3453 \<final_byte\>) are used for designating the CCS JISX0208.1978,
3454 GB2312, and JISX0208.
3456 <li> @b Mdesignation_g0
3458 If this flag and @b Mfull_support exists, designates charsets not
3459 listed in the charset list to the graphic register G0.
3461 <li> @b Mdesignation_g1
3463 If this flag and @b Mfull_support exists, designates charsets not
3464 listed in the charset list to the graphic register G1.
3466 <li> @b Mdesignation_ctext
3468 If this flag and @b Mfull_support exists, designates charsets not
3469 listed in the charset list to a graphic register G0 or G1 based on
3470 the criteria of the Compound Text.
3472 <li> @b Mdesignation_ctext_ext
3474 If this flag and @b Mfull_support exists, designates charsets not
3475 listed in the charset list to a graphic register G0 or G1, or use
3476 extended segment for such charsets based on the criteria of the
3479 <li> @b Mlocking_shift
3481 If this flag exists, use locking shift.
3483 <li> @b Msingle_shift
3485 If this flag exists, use single shift.
3487 <li> @b Msingle_shift_7
3489 If this flag exists, use 7-bit single shift code (0x19).
3491 <li> @b Meuc_tw_shift
3493 If this flag exists, use a special shifting according to EUC-TW.
3497 This flag is currently ignored.
3499 <li> @b Mrevision_number
3501 If this flag exists, use a revision number escape sequence to
3502 designate a charset that has a revision number.
3504 <li> @b Mfull_support
3506 If this flag exists, support all charsets registered in the
3507 International Registry.
3511 <li> Key is @b Mdesignation, value is a plist
3513 If the type is @b Miso_2022, the value specifies how to designate
3514 each supported characters. The keys of the plist must be
3515 #Minteger, and the values must be numbers indicating a graphic
3516 registers. The Nth element value is for the Nth charset of the
3517 charset list. The value 0..3 means that it is assumed that a
3518 charset is already designated to the graphic register 0..3. The
3519 negative value G (-4..-1) means that a charset is not designated
3520 to any register at first, and if necessary, is designated to the
3521 (G+4) graphic register.
3523 <li> Key is @b Minvocation, value is a plist
3525 If the type is @b Miso_2022, the value specifies how to invocate
3526 each graphic registers. The plist length must be one or two. The
3527 keys of the plist must be #Minteger, and the values must be
3528 numbers indicating a graphic register. The value of the first
3529 element specifies which graphic register is invocated to the
3530 graphic plane left. If the length is one, no graphic register is
3531 invocated to the graphic plane right. Otherwise, the value of the
3532 second element specifies which graphic register is invocated to
3533 the graphic plane right.
3535 <li> Key is @b Mcode_unit, value is an integer
3537 If the type is @b Mutf, the value specifies the bit length of a
3538 code-unit. It must be 8, 16, or 32.
3540 <li> Key is @b Mbom, value is a symbol
3542 If the type is @b Mutf and the code-unit bit length is 16 or 32,
3543 it specifies whether or not to use BOM (Byte Order Mark). If the
3544 value is #Mnil (default), BOM is not used, else if the value is
3545 #Mmaybe, the existence of BOM is detected at decoding time, else
3548 <li> Key is @b Mlittle_endian, value is a symbol
3550 If the type is @b Mutf and the code-unit bit length is 16 or 32,
3551 it specifies whether or not the encoding is little endian. If the
3552 value is #Mnil (default), it is big endian, else it is little
3557 $RESETTER is a pointer to a function that resets a converter for
3558 the coding system to the initial status. The pointed function is
3559 called with one argument, a pointer to a converter object.
3561 $DECODER is a pointer to a function that decodes a byte sequence
3562 according to the coding system. The pointed function is called
3563 with four arguments:
3565 @li A pointer to the byte sequence to decode.
3566 @li The number of bytes to decode.
3567 @li A pointer to an M-text to which the decoded characters are appended.
3568 @li A pointer to a converter object.
3570 $DECODER must return 0 if it succeeds. Otherwise it must return -1.
3572 $ENCODER is a pointer to a function that encodes an M-text
3573 according to the coding system. The pointed function is called
3576 @li A pointer to the M-text to encode.
3577 @li The starting position of the encoding.
3578 @li The ending position of the encoding.
3579 @li A pointer to a memory area where the produced bytes are stored.
3580 @li The size of the memory area.
3581 @li A pointer to a converter object.
3583 $ENCODER must return 0 if it succeeds. Otherwise it must return -1.
3585 $EXTRA_INFO is a pointer to a data structure that contains extra
3586 information about the coding system. The type of the data
3587 structure depends on $TYPE.
3591 If the operation was successful, mconv_define_coding () returns a
3592 symbol whose name is $NAME. If an error is detected, it returns
3593 #Mnil and assigns an error code to the external variable #merror_code. */
3596 @brief ¥³¡¼¥É·Ï¤òÄêµÁ¤¹¤ë.
3598 ´Ø¿ô mconv_define_coding () ¤Ï¡¢¿·¤·¤¤¥³¡¼¥É·Ï¤òÄêµÁ¤·¡¢¤½¤ì¤ò
3599 $NAME ¤È¤¤¤¦Ì¾Á°¤Î¥·¥ó¥Ü¥ë·Ðͳ¤Ç¥¢¥¯¥»¥¹¤Ç¤¤ë¤è¤¦¤Ë¤¹¤ë¡£ $PLIST
3600 ¤Ç¤ÏÄêµÁ¤¹¤ë¥³¡¼¥É·Ï¤Î¥Ñ¥é¥á¡¼¥¿¤ò°Ê²¼¤Î¤è¤¦¤Ë»ØÄꤹ¤ë¡£
3604 <li> ¥¡¼¤¬ @c Mtype ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
3606 Ãͤϥ³¡¼¥É·Ï¤Î¥¿¥¤¥×¤òɽ¤·¡¢@b Mcharset, @b Mutf, @b Miso_2022, #Mnil
3607 ¤Î¤¤¤º¤ì¤«¤Ç¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
3609 ¥¿¥¤¥×¤¬ @b Mcharset ¤Ê¤é¤Ð $EXTRA_INFO ¤Ï̵»ë¤µ¤ì¤ë¡£
3611 ¥¿¥¤¥×¤¬ @b Mutf ¤Ê¤é¤Ð $EXTRA_INFO ¤Ï #MCodingInfoUTF
3612 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
3614 ¥¿¥¤¥×¤¬ @b Miso_2022¤Ê¤é¤Ð $EXTRA_INFO ¤Ï #MCodingInfoISO2022
3615 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
3617 ¥¿¥¤¥×¤¬ #Mnil ¤Ê¤é¤Ð¡¢°ú¿ô $RESETTER, $DECODER, $ENCODER
3618 ¤òÍ¿¤¨¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£$EXTRA_INFO ¤Ï̵»ë¤µ¤ì¤ë¡£
3619 ¤½¤ì°Ê³°¤Î¾ì¹ç¤Ë¤Ï¤³¤ì¤é¤Ï @c NULL ¤Ç¤è¤¯¡¢
3620 m17n ¥é¥¤¥Ö¥é¥ê¤¬Å¬Àڤʥǥե©¥ë¥ÈÃͤòÍ¿¤¨¤ë¡£
3622 <li> ¥¡¼¤¬ @b Mcharsets ¤ÇÃͤ¬ plist ¤Î»þ
3624 ÃͤϤ³¤Î¥³¡¼¥É·Ï¤Ç¥µ¥Ý¡¼¥È¤µ¤ì¤ëʸ»ú¥»¥Ã¥È¤Î¥ê¥¹¥È¤Ç¤¢¤ë¡£plist¤Î¥¡¼¤Ï
3625 #Msymbol¡¢ÃͤÏʸ»ú¥»¥Ã¥È¤ò¼¨¤¹¥·¥ó¥Ü¥ë¤Ç¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
3627 <li> ¥¡¼¤¬ @b Mflags Ãͤ¬ plist ¤Î»þ
3629 ¥¿¥¤¥×¤¬ @b Miso_2022 ¤Ê¤é¤Ð¡¢¤³¤ÎÃͤÏ, ISO 2022
3630 ¥¤¥ó¥¿¥×¥ê¥¿ÍѤÎÀ©¸æ¥Õ¥é¥Ã¥°¤ò¼¨¤¹¡£plist ¤Î¥¡¼¤Ï #Msymbol
3631 ¤Ç¤¢¤ê¡¢Ãͤϰʲ¼¤Î¤¤¤º¤ì¤«¤Ç¤¢¤ë¡£
3635 <li> @b Mreset_at_eol
3637 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¿Þ·Áʸ»ú½¸¹ç¤Î»Ø¼¨¤ä¸Æ½Ð¤Ï¹ÔËö¤Ç¥ê¥»¥Ã¥È¤µ¤ì¤ÆÅö½é¤Î¾õÂÖ¤ËÌá¤ë¡£
3639 <li> @b Mreset_at_cntl
3641 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¿Þ·Áʸ»ú½¸¹ç¤Î»Ø¼¨¤ä¸Æ½Ð¤ÏÀ©¸æʸ»ú¤Ë½Ð²ñ¤Ã¤¿»þÅÀ¤Ç¥ê¥»¥Ã¥È¤µ¤ì¤ÆÅö½é¤Î¾õÂÖ¤ËÌá¤ë¡£
3645 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¿Þ·Áʸ»ú½¸¹ç¤Î±¦È¾Ì̤¬ÍѤ¤¤é¤ì¤ë¡£
3649 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢Ê¸»ú½¸¹ç JISX0208.1978, GB2312, JISX0208
3650 ¤ò»Ø¼¨¤¹¤ëºÝ¤Ë over-long ¥¨¥¹¥±¡¼¥×¥·¡¼¥±¥ó¥¹ (ESC '$' '('
3651 \<final_byte\>) ¤¬ÍѤ¤¤é¤ì¤ë¡£
3653 <li> @b Mdesignation_g0
3655 ¤³¤Î¥Õ¥é¥°¤È @b Mfull_support ¤¬¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Ë¸½¤ï¤ì¤Ê¤¤Ê¸»ú¥»¥Ã¥È¤ò
3658 <li> @b Mdesignation_g1
3660 ¤³¤Î¥Õ¥é¥°¤È @b Mfull_support ¤¬¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Ë¸½¤ï¤ì¤Ê¤¤Ê¸»ú¥»¥Ã¥È¤ò
3663 <li> @b Mdesignation_ctext
3665 ¤³¤Î¥Õ¥é¥°¤È @b Mfull_support ¤¬¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Ë¸½¤ï¤ì¤Ê¤¤Ê¸»ú¥»¥Ã¥È¤ò
3666 G0 ½¸¹ç¤Þ¤¿¤Ï G1 ½¸¹ç¤Ë¡¢¥³¥ó¥Ñ¥¦¥ó¥É¥Æ¥¥¹¥È¤Î´ð½à¤Ë¤½¤Ã¤Æ»Ø¼¨¤¹¤ë¡£
3668 <li> @b Mdesignation_ctext_ext
3670 ¤³¤Î¥Õ¥é¥°¤È @b Mfull_support ¤¬¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Ë¸½¤ï¤ì¤Ê¤¤Ê¸»ú¥»¥Ã¥È¤ò
3671 G0 ½¸¹ç¤Þ¤¿¤Ï G1 ½¸¹ç¤Ë¡¢¤¢¤ë¤¤¤Ï³ÈÄ¥¥»¥°¥á¥ó¥È¤Ë¥³¥ó¥Ñ¥¦¥ó¥É¥Æ¥¥¹¥È¤Î´ð½à¤Ë¤½¤Ã¤Æ»Ø¼¨¤¹¤ë¡£
3673 <li> @b Mlocking_shift
3675 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¥í¥Ã¥¥ó¥°¥·¥Õ¥È¤òÍѤ¤¤ë¡£
3677 <li> @b Msingle_shift
3679 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¥·¥ó¥°¥ë¥·¥Õ¥È¤òÍѤ¤¤ë¡£
3681 <li> @b Msingle_shift_7
3683 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢7-bit ¥·¥ó¥°¥ë¥·¥Õ¥È¥³¡¼¥É (0x19) ¤òÍѤ¤¤ë¡£
3685 <li> @b Meuc_tw_shift
3687 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢EUC-TW ¤Ë±è¤Ã¤¿ÆÃÊ̤ʥ·¥Õ¥È¤òÍѤ¤¤ë¡£
3691 ¸½»þÅÀ¤Ç¤ÏÍѤ¤¤é¤ì¤Æ¤¤¤Ê¤¤¡£
3693 <li> @b Mrevision_number
3695 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢revision number ¤ò»ý¤Äʸ»ú¥»¥Ã¥È¤ò»Ø¼¨¤¹¤ëºÝ¤Ë
3696 revision number ¥¨¥¹¥±¡¼¥×¥·¡¼¥¯¥¨¥ó¥¹¤òÍѤ¤¤ë¡£
3698 <li> @b Mfull_support
3700 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢the International Registry
3701 ¤ËÅÐÏ¿¤µ¤ì¤Æ¤¤¤ëÁ´Ê¸»ú¥»¥Ã¥È¤ò¥µ¥Ý¡¼¥È¤¹¤ë¡£
3705 <li> ¥¡¼¤¬ @b Mdesignation ¤ÇÃͤ¬ plist ¤Î»þ
3707 ¥¿¥¤¥×¤¬ @b Miso_2022 ¤Ê¤é¤Ð¡¢ÃͤϳÆʸ»ú¤ò¤É¤Î¤è¤¦¤Ë»Ø¼¨¤¹¤ë¤«¤ò¼¨¤¹¡£
3708 plist ¤Î¥¡¼¤Ï #Minteger¡¢ÃͤϽ¸¹ç¡Êgraphic register¡Ë
3709 ¤ò¼¨¤¹¿ô»ú¤Ç¤¢¤ë¡£NÈÖÌܤÎÍ×ÁǤÎÃͤϡ¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Î N
3710 ÈÖÌܤÎʸ»ú¥»¥Ã¥È¤ËÂбþ¤¹¤ë¡£Ãͤ¬ 0..3 ¤Ç¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¤¬¤¹¤Ç¤Ë
3711 G0..G3 ¤Ë»Ø¼¨ ¤µ¤ì¤Æ¤¤¤ë¡£
3713 Ãͤ¬Éé(-4..-1) ¤Ç¤¢¤ì¤Ð¡¢½é´ü¾õÂ֤ǤÏʸ»ú¥»¥Ã¥È¤¬¤É¤³¤Ë¤â»Ø¼¨¤µ¤ì¤Æ¤¤¤Ê¤¤¤³¤È¡¢É¬ÍפʺݤˤÏ
3714 G0..G3 ¤Î¤½¤ì¤¾¤ì¤Ë»Ø¼¨¤¹¤ë¤³¤È¤ò°ÕÌ£¤¹¤ë¡£
3716 <li> ¥¡¼¤¬ @b Minvocation ¤ÇÃͤ¬ plist ¤Î»þ
3718 ¥¿¥¤¥×¤¬ @b Miso_2022 ¤Ê¤é¤Ð¡¢Ãͤϳƽ¸¹ç¤ò¤É¤Î¤è¤¦¤Ë¸Æ¤Ó½Ð¤¹¤«¤ò¼¨¤¹¡£
3719 plist ¤ÎŤµ¤Ï 1 ¤Ê¤¤¤· 2 ¤Ç¤¢¤ë¡£plist ¤Î¥¡¼¤Ï
3720 #Minteger¡¢ÃͤϽ¸¹ç¡Êgraphic register)¤ò¼¨¤¹¿ô»ú¤Ç¤¢¤ë¡£
3721 ºÇ½é¤ÎÍ×ÁǤÎÃͤ¬¿Þ·Áʸ»ú½¸¹çº¸È¾Ì̤˸ƤӽФµ¤ì¤ë½¸¹ç¤ò¼¨¤¹¡£
3722 plist ¤ÎŤµ¤¬ 1 ¤Ê¤é¤Ð¡¢±¦È¾Ì̤ˤϲ¿¤â¸Æ¤Ó½Ð¤µ¤ì¤Ê¤¤¡£
3723 ¤½¤¦¤Ç¤±¤ì¤Ð¡¢£²¤Ä¤á¤ÎÍ×ÁǤÎÃͤ¬¿Þ·Áʸ»ú½¸¹ç±¦È¾Ì̤˸ƤӽФµ¤ì¤ë½¸¹ç¤ò¼¨¤¹¡£
3725 <li> ¥¡¼¤¬ @b Mcode_unit ¤ÇÃͤ¬À°¿ôÃͤλþ
3727 ¥¿¥¤¥×¤¬ @b Mutf ¤Ê¤é¤Ð¡¢Ãͤϥ³¡¼¥É¥æ¥Ë¥Ã¥È¤Î¥Ó¥Ã¥ÈŤǤ¢¤ê¡¢8, 16,
3728 32 ¤Î¤¤¤º¤ì¤«¤Ç¤¢¤ë¡£
3730 <li> ¥¡¼¤¬ @b Mbom ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
3732 ¥¿¥¤¥×¤¬ @b Mutf ¤Ç¥³¡¼¥É¥æ¥Ë¥Ã¥È¤Î¥Ó¥Ã¥ÈŤ¬ 16 ¤« 32¤Ê¤é¤Ð¡¢ÃͤÏ
3733 BOM (Byte Order Mark) ¤ò»ÈÍѤ¹¤ë¤«¤É¤¦¤«¤ò¼¨¤¹¡£Ãͤ¬¥Ç¥Õ¥©¥ë¥ÈÃͤÎ
3734 #Mnil ¤Ê¤é¤Ð¡¢»ÈÍѤ·¤Ê¤¤¡£Ãͤ¬ #Mmaybe ¤Ê¤é¤Ð¥Ç¥³¡¼¥É»þ¤Ë BOM
3735 ¤¬¤¢¤ë¤«¤É¤¦¤«¤òÄ´¤Ù¤ë¡£¤½¤ì°Ê³°¤Ê¤é¤Ð»ÈÍѤ¹¤ë¡£
3737 <li> ¥¡¼¤¬ @b Mlittle_endian ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
3739 ¥¿¥¤¥×¤¬ @b Mutf ¤Ç¥³¡¼¥É¥æ¥Ë¥Ã¥È¤Î¥Ó¥Ã¥ÈŤ¬ 16 ¤« 32
3740 ¤Ê¤é¤Ð¡¢Ãͤϥ¨¥ó¥³¡¼¥É¤¬ little endian ¤«¤É¤¦¤«¤ò¼¨¤¹¡£Ãͤ¬¥Ç¥Õ¥©¥ë¥ÈÃͤÎ
3741 #Mnil ¤Ê¤é¤Ð big endian ¤Ç¤¢¤ê¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð little endian ¤Ç¤¢¤ë¡£
3746 ¤Ï¤³¤Î¥³¡¼¥É·ÏÍѤΥ³¥ó¥Ð¡¼¥¿¤ò½é´ü¾õÂ֤˥ꥻ¥Ã¥È¤¹¤ë´Ø¿ô¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¡£
3747 ¤³¤Î´Ø¿ô¤Ï¥³¥ó¥Ð¡¼¥¿¥ª¥Ö¥¸¥§¥¯¥È¤Ø¤Î¥Ý¥¤¥ó¥¿¤È¤¤¤¦£±°ú¿ô¤ò¤È¤ë¡£
3749 $DECODER ¤Ï¥Ð¥¤¥ÈÎó¤ò¤³¤Î¥³¡¼¥É·Ï¤Ë½¾¤Ã¤Æ¥Ç¥³¡¼¥É¤¹¤ë´Ø¿ô¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¡£
3750 ¤³¤Î´Ø¿ô¤Ï°Ê²¼¤Î£´°ú¿ô¤ò¤È¤ë¡£
3752 @li ¥Ç¥³¡¼¥É¤¹¤ë¥Ð¥¤¥ÈÎó¤Ø¤Î¥Ý¥¤¥ó¥¿
3753 @li ¥Ç¥³¡¼¥É¤¹¤Ù¤¥Ð¥¤¥È¿ô
3754 @li ¥Ç¥³¡¼¥É·ë²Ì¤Îʸ»ú¤òÉղ乤ë M-text ¤Ø¤Î¥Ý¥¤¥ó¥¿
3755 @li ¥³¥ó¥Ð¡¼¥¿¥ª¥Ö¥¸¥§¥¯¥È¤Ø¤Î¥Ý¥¤¥ó¥¿
3757 $DECODER ¤ÏÀ®¸ù¤·¤¿¤È¤¤Ë¤Ï 0 ¤ò¡¢¼ºÇÔ¤·¤¿¤È¤¤Ë¤Ï -1
3758 ¤òÊÖ¤µ¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
3760 $ENCODER ¤Ï M-text ¤ò¤³¤Î¥³¡¼¥É·Ï¤Ë½¾¤Ã¤Æ¥¨¥ó¥³¡¼¥É¤¹¤ë´Ø¿ô¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¡£
3761 ¤³¤Î´Ø¿ô¤Ï°Ê²¼¤Î£¶°ú¿ô¤ò¤È¤ë¡£
3763 @li ¥¨¥ó¥³¡¼¥É¤¹¤ëM-text ¤Ø¤Î¥Ý¥¤¥ó¥¿
3764 @li M-text ¤Î¥¨¥ó¥³¡¼¥É³«»Ï°ÌÃÖ
3765 @li M-text ¤Î¥¨¥ó¥³¡¼¥É½ªÎ»°ÌÃÖ
3766 @li À¸À®¤·¤¿¥Ð¥¤¥È¤òÊÝ»ý¤¹¤ë¥á¥â¥êÎΰè¤Ø¤Î¥Ý¥¤¥ó¥¿
3767 @li ¥á¥â¥êÎΰè¤Î¥µ¥¤¥º
3768 @li ¥³¥ó¥Ð¡¼¥¿¥ª¥Ö¥¸¥§¥¯¥È¤Ø¤Î¥Ý¥¤¥ó¥¿
3770 $ENCODER ¤ÏÀ®¸ù¤·¤¿¤È¤¤Ë¤Ï 0 ¤ò¡¢¼ºÇÔ¤·¤¿¤È¤¤Ë¤Ï -1
3771 ¤òÊÖ¤µ¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
3773 $EXTRA_INFO ¤Ï¥³¡¼¥Ç¥£¥°¥·¥¹¥Æ¥à¤Ë´Ø¤¹¤ëÄɲþðÊó¤ò´Þ¤à¥Ç¡¼¥¿¹½Â¤¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¡£
3774 ¤³¤Î¥Ç¡¼¥¿¹½Â¤¤Î·¿ $TYPE ¤Ë°Í¸¤¹¤ë¡£
3778 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mconv_define_coding () ¤Ï $NAME
3779 ¤È¤¤¤¦Ì¾Á°¤Î¥·¥ó¥Ü¥ë¤òÊÖ¤¹¡£ ¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï #Mnil
3780 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
3788 mconv_define_coding (const char *name, MPlist *plist,
3789 int (*resetter) (MConverter *),
3790 int (*decoder) (const unsigned char *, int, MText *,
3792 int (*encoder) (MText *, int, int,
3793 unsigned char *, int,
3797 MSymbol sym = msymbol (name);
3799 MCodingSystem *coding;
3802 MSTRUCT_MALLOC (coding, MERROR_CODING);
3804 if ((coding->type = (MSymbol) mplist_get (plist, Mtype)) == Mnil)
3805 coding->type = Mcharset;
3806 pl = (MPlist *) mplist_get (plist, Mcharsets);
3808 MERROR (MERROR_CODING, Mnil);
3809 coding->ncharsets = mplist_length (pl);
3810 if (coding->ncharsets > NUM_SUPPORTED_CHARSETS)
3811 coding->ncharsets = NUM_SUPPORTED_CHARSETS;
3812 for (i = 0; i < coding->ncharsets; i++, pl = MPLIST_NEXT (pl))
3814 MSymbol charset_name;
3816 if (MPLIST_KEY (pl) != Msymbol)
3817 MERROR (MERROR_CODING, Mnil);
3818 charset_name = MPLIST_SYMBOL (pl);
3819 if (! (coding->charsets[i] = MCHARSET (charset_name)))
3820 MERROR (MERROR_CODING, Mnil);
3823 coding->resetter = resetter;
3824 coding->decoder = decoder;
3825 coding->encoder = encoder;
3826 coding->ascii_compatible = 0;
3827 coding->extra_info = extra_info;
3828 coding->extra_spec = NULL;
3831 if (coding->type == Mcharset)
3833 if (! coding->resetter)
3834 coding->resetter = reset_coding_charset;
3835 if (! coding->decoder)
3836 coding->decoder = decode_coding_charset;
3837 if (! coding->encoder)
3838 coding->encoder = encode_coding_charset;
3840 else if (coding->type == Mutf)
3842 MCodingInfoUTF *info = malloc (sizeof (MCodingInfoUTF));
3845 if (! coding->resetter)
3846 coding->resetter = reset_coding_utf;
3848 info->code_unit_bits = (int) mplist_get (plist, Mcode_unit);
3849 if (info->code_unit_bits == 8)
3851 if (! coding->decoder)
3852 coding->decoder = decode_coding_utf_8;
3853 if (! coding->encoder)
3854 coding->encoder = encode_coding_utf_8;
3856 else if (info->code_unit_bits == 16)
3858 if (! coding->decoder)
3859 coding->decoder = decode_coding_utf_16;
3860 if (! coding->encoder)
3861 coding->encoder = encode_coding_utf_16;
3863 else if (info->code_unit_bits == 32)
3865 if (! coding->decoder)
3866 coding->decoder = decode_coding_utf_32;
3867 if (! coding->encoder)
3868 coding->encoder = encode_coding_utf_32;
3871 MERROR (MERROR_CODING, Mnil);
3872 val = (MSymbol) mplist_get (plist, Mbom);
3875 else if (val == Mmaybe)
3880 info->endian = (mplist_get (plist, Mlittle_endian) ? 1 : 0);
3881 coding->extra_info = info;
3883 else if (coding->type == Miso_2022)
3885 MCodingInfoISO2022 *info = malloc (sizeof (MCodingInfoISO2022));
3887 if (! coding->resetter)
3888 coding->resetter = reset_coding_iso_2022;
3889 if (! coding->decoder)
3890 coding->decoder = decode_coding_iso_2022;
3891 if (! coding->encoder)
3892 coding->encoder = encode_coding_iso_2022;
3894 info->initial_invocation[0] = 0;
3895 info->initial_invocation[1] = -1;
3896 pl = (MPlist *) mplist_get (plist, Minvocation);
3899 if (MPLIST_KEY (pl) != Minteger)
3900 MERROR (MERROR_CODING, Mnil);
3901 info->initial_invocation[0] = MPLIST_INTEGER (pl);
3902 if (! MPLIST_TAIL_P (pl))
3904 pl = MPLIST_NEXT (pl);
3905 if (MPLIST_KEY (pl) != Minteger)
3906 MERROR (MERROR_CODING, Mnil);
3907 info->initial_invocation[1] = MPLIST_INTEGER (pl);
3910 memset (info->designations, 0, sizeof (info->designations));
3911 for (i = 0, pl = (MPlist *) mplist_get (plist, Mdesignation);
3912 i < 32 && pl && MPLIST_KEY (pl) == Minteger;
3913 i++, pl = MPLIST_NEXT (pl))
3914 info->designations[i] = MPLIST_INTEGER (pl);
3917 MPLIST_DO (pl, (MPlist *) mplist_get (plist, Mflags))
3921 if (MPLIST_KEY (pl) != Msymbol)
3922 MERROR (MERROR_CODING, Mnil);
3923 val = MPLIST_SYMBOL (pl);
3924 if (val == Mreset_at_eol)
3925 info->flags |= MCODING_ISO_RESET_AT_EOL;
3926 else if (val == Mreset_at_cntl)
3927 info->flags |= MCODING_ISO_RESET_AT_CNTL;
3928 else if (val == Meight_bit)
3929 info->flags |= MCODING_ISO_EIGHT_BIT;
3930 else if (val == Mlong_form)
3931 info->flags |= MCODING_ISO_LOCKING_SHIFT;
3932 else if (val == Mdesignation_g0)
3933 info->flags |= MCODING_ISO_DESIGNATION_G0;
3934 else if (val == Mdesignation_g1)
3935 info->flags |= MCODING_ISO_DESIGNATION_G1;
3936 else if (val == Mdesignation_ctext)
3937 info->flags |= MCODING_ISO_DESIGNATION_CTEXT;
3938 else if (val == Mdesignation_ctext_ext)
3939 info->flags |= MCODING_ISO_DESIGNATION_CTEXT_EXT;
3940 else if (val == Mlocking_shift)
3941 info->flags |= MCODING_ISO_LOCKING_SHIFT;
3942 else if (val == Msingle_shift)
3943 info->flags |= MCODING_ISO_SINGLE_SHIFT;
3944 else if (val == Msingle_shift_7)
3945 info->flags |= MCODING_ISO_SINGLE_SHIFT_7;
3946 else if (val == Meuc_tw_shift)
3947 info->flags |= MCODING_ISO_EUC_TW_SHIFT;
3948 else if (val == Miso_6429)
3949 info->flags |= MCODING_ISO_ISO6429;
3950 else if (val == Mrevision_number)
3951 info->flags |= MCODING_ISO_REVISION_NUMBER;
3952 else if (val == Mfull_support)
3953 info->flags |= MCODING_ISO_FULL_SUPPORT;
3956 coding->extra_info = info;
3960 if (! coding->decoder || ! coding->encoder)
3961 MERROR (MERROR_CODING, Mnil);
3962 if (! coding->resetter)
3966 msymbol_put (sym, Mcoding, coding);
3967 msymbol_put (msymbol__canonicalize (sym), Mcoding, coding);
3968 plist = (MPlist *) mplist_get (plist, Maliases);
3971 MPLIST_DO (pl, plist)
3975 if (MPLIST_KEY (pl) != Msymbol)
3977 alias = MPLIST_SYMBOL (pl);
3978 msymbol_put (alias, Mcoding, coding);
3979 msymbol_put (msymbol__canonicalize (alias), Mcoding, coding);
3983 MLIST_APPEND1 (&coding_list, codings, coding, MERROR_CODING);
3991 @brief Resolve coding system name.
3993 The mconv_resolve_coding () function returns $SYMBOL if it
3994 represents a coding system. Otherwise, canonicalize $SYMBOL as to
3995 a coding system name, and if the canonicalized name represents a
3996 coding system, return it. Otherwise, return #Mnil. */
3998 @brief ¥³¡¼¥É·Ï¤Î̾Á°¤ò²ò·è¤¹¤ë.
4000 ´Ø¿ô mconv_resolve_coding () ¤Ï $SYMBOL ¤¬¥³¡¼¥É·Ï¤ò¼¨¤·¤Æ¤¤¤ì¤Ð¤½¤ì¤òÊÖ¤¹¡£
4001 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¥³¡¼¥É·Ï¤Î̾Á°¤È¤·¤Æ $SYMBOL
4002 ¤òÀµµ¬²½¤·¡¢¤½¤ì¤¬¥³¡¼¥É·Ï¤òɽ¤·¤Æ¤¤¤ì¤ÐÀµµ¬²½¤·¤¿ $SYMBOL ¤òÊÖ¤¹¡£
4003 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð#Mnil ¤òÊÖ¤¹¡£ */
4008 mconv_resolve_coding (MSymbol symbol)
4010 MCodingSystem *coding = find_coding (symbol);
4014 symbol = msymbol__canonicalize (symbol);
4015 coding = find_coding (symbol);
4017 return (coding ? coding->name : Mnil);
4024 @brief List symbols representing coding systems.
4026 The mconv_list_codings () function makes an array of symbols
4027 representing a coding system, stores the pointer to the array in a
4028 place pointed to by $SYMBOLS, and returns the length of the array. */
4030 @brief ¥³¡¼¥É·Ï¤òɽ¤ï¤¹¥·¥ó¥Ü¥ë¤òÎóµó¤¹¤ë.
4032 ´Ø¿ô mchar_list_codings () ¤Ï¡¢¥³¡¼¥É·Ï¤ò¼¨¤¹¥·¥ó¥Ü¥ë¤òʤ٤¿ÇÛÎó¤òºî¤ê¡¢
4033 $SYMBOLS ¤Ç¥Ý¥¤¥ó¥È¤µ¤ì¤¿¾ì½ê¤Ë¤³¤ÎÇÛÎó¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÃÖ¤¡¢ÇÛÎó¤ÎŤµ¤òÊÖ¤¹¡£ */
4036 mconv_list_codings (MSymbol **symbols)
4038 int i = coding_list.used + mplist_length (coding_definition_list);
4042 MTABLE_MALLOC ((*symbols), i, MERROR_CODING);
4044 MPLIST_DO (plist, coding_definition_list)
4046 MPlist *pl = MPLIST_VAL (plist);
4047 (*symbols)[i++] = MPLIST_SYMBOL (pl);
4049 for (j = 0; j < coding_list.used; j++)
4050 if (! mplist_find_by_key (coding_definition_list,
4051 coding_list.codings[j]->name))
4052 (*symbols)[i++] = coding_list.codings[j]->name;
4059 @brief Create a code converter bound to a buffer.
4061 The mconv_buffer_converter () function creates a pointer to a code
4062 converter for coding system $NAME. The code converter is bound
4063 to buffer area of $N bytes pointed to by $BUF. Subsequent
4064 decodings and encodings are done to/from this buffer area.
4066 $NAME can be #Mnil. In this case, a coding system associated
4067 with the current locale (LC_CTYPE) is used.
4070 If the operation was successful, mconv_buffer_converter () returns
4071 the created code converter. Otherwise it returns @c NULL and
4072 assigns an error code to the external variable #merror_code. */
4075 @brief ¥Ð¥Ã¥Õ¥¡¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤òºî¤ë.
4077 ´Ø¿ô mconv_buffer_converter () ¤Ï¡¢¥³¡¼¥É·Ï $NAME
4078 ÍѤΥ³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤òºî¤ë¡£¤³¤Î¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ï¡¢$BUF ¤Ç¼¨¤µ¤ì¤ëÂ礤µ $N
4079 ¥Ð¥¤¥È¤Î¥Ð¥Ã¥Õ¥¡Îΰè¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤ë¡£
4080 ¤³¤ì°Ê¹ß¤Î¥Ç¥³¡¼¥É¤ª¤è¤Ó¥¨¥ó¥³¡¼¥É¤Ï¡¢¤³¤Î¥Ð¥Ã¥Õ¥¡Îΰè¤ËÂФ·¤Æ¹Ô¤Ê¤ï¤ì¤ë¡£
4082 $NAME ¤Ï #Mnil ¤Ç¤¢¤Ã¤Æ¤â¤è¤¤¡£¤³¤Î¾ì¹ç¤Ï¸½ºß¤Î¥í¥±¡¼¥ë
4083 (LC_CTYPE) ¤Ë´ØÏ¢ÉÕ¤±¤é¤ì¤¿¥³¡¼¥É·Ï¤¬»È¤ï¤ì¤ë¡£
4086 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð mconv_buffer_converter () ¤Ï ºîÀ®¤·¤¿¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤òÊÖ¤¹¡£
4087 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
4088 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
4090 @latexonly \IPAlabel{mconverter} @endlatexonly */
4094 @c MERROR_SYMBOL, @c MERROR_CODING
4097 mconv_stream_converter () */
4100 mconv_buffer_converter (MSymbol name, const unsigned char *buf, int n)
4102 MCodingSystem *coding;
4103 MConverter *converter;
4104 MConverterStatus *internal;
4107 name = mlocale_get_prop (mlocale__ctype, Mcoding);
4108 coding = find_coding (name);
4110 MERROR (MERROR_CODING, NULL);
4111 MSTRUCT_CALLOC (converter, MERROR_CODING);
4112 MSTRUCT_CALLOC (internal, MERROR_CODING);
4113 converter->internal_info = internal;
4114 internal->coding = coding;
4115 if (coding->resetter
4116 && (*coding->resetter) (converter) < 0)
4120 MERROR (MERROR_CODING, NULL);
4123 internal->unread = mtext ();
4124 internal->work_mt = mtext ();
4125 mtext__enlarge (internal->work_mt, MAX_UTF8_CHAR_BYTES);
4126 internal->buf.in = buf;
4128 internal->bufsize = n;
4129 internal->binding = BINDING_BUFFER;
4137 @brief Create a code converter bound to a stream.
4139 The mconv_stream_converter () function creates a pointer to a code
4140 converter for coding system $NAME. The code converter is bound
4141 to stream $FP. Subsequent decodings and encodings are done
4142 to/from this stream.
4144 $NAME can be #Mnil. In this case, a coding system associated
4145 with the current locale (LC_CTYPE) is used.
4148 If the operation was successful, mconv_stream_converter ()
4149 returns the created code converter. Otherwise it returns @c NULL
4150 and assigns an error code to the external variable
4154 @brief ¥¹¥È¥ê¡¼¥à¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤òºî¤ë.
4156 ´Ø¿ô mconv_stream_converter () ¤Ï¡¢¥³¡¼¥É·Ï $NAME
4157 ÍѤΥ³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤òºî¤ë¡£¤³¤Î¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ï¡¢¥¹¥È¥ê¡¼¥à $FP
4159 ¤³¤ì°Ê¹ß¤Î¥Ç¥³¡¼¥É¤ª¤è¤Ó¥¨¥ó¥³¡¼¥É¤Ï¡¢¤³¤Î¥¹¥È¥ê¡¼¥à¤ËÂФ·¤Æ¹Ô¤Ê¤ï¤ì¤ë¡£
4161 $NAME ¤Ï #Mnil ¤Ç¤¢¤Ã¤Æ¤â¤è¤¤¡£¤³¤Î¾ì¹ç¤Ï¸½ºß¤Î¥í¥±¡¼¥ë
4162 (LC_CTYPE) ¤Ë´ØÏ¢ÉÕ¤±¤é¤ì¤¿¥³¡¼¥É·Ï¤¬»È¤ï¤ì¤ë¡£
4165 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_stream_converter ()
4166 ¤ÏºîÀ®¤·¤¿¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL
4167 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
4169 @latexonly \IPAlabel{mconverter} @endlatexonly */
4173 @c MERROR_SYMBOL, @c MERROR_CODING
4176 mconv_buffer_converter () */
4179 mconv_stream_converter (MSymbol name, FILE *fp)
4181 MCodingSystem *coding;
4182 MConverter *converter;
4183 MConverterStatus *internal;
4186 name = mlocale_get_prop (mlocale__ctype, Mcoding);
4187 coding = find_coding (name);
4189 MERROR (MERROR_CODING, NULL);
4190 MSTRUCT_CALLOC (converter, MERROR_CODING);
4191 MSTRUCT_CALLOC (internal, MERROR_CODING);
4192 converter->internal_info = internal;
4193 internal->coding = coding;
4194 if (coding->resetter
4195 && (*coding->resetter) (converter) < 0)
4199 MERROR (MERROR_CODING, NULL);
4202 if (fseek (fp, 0, SEEK_CUR) < 0)
4210 internal->seekable = 0;
4213 internal->seekable = 1;
4214 internal->unread = mtext ();
4215 internal->work_mt = mtext ();
4216 mtext__enlarge (internal->work_mt, MAX_UTF8_CHAR_BYTES);
4218 internal->binding = BINDING_STREAM;
4226 @brief Reset a code converter.
4228 The mconv_reset_converter () function resets code converter
4229 $CONVERTER to the initial state.
4232 If $CONVERTER->coding has its own reseter function,
4233 mconv_reset_converter () returns the result of that function
4234 applied to $CONVERTER. Otherwise it returns 0. */
4237 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ò¥ê¥»¥Ã¥È¤¹¤ë.
4239 ´Ø¿ô mconv_reset_converter () ¤Ï¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER
4243 ¤â¤· $CONVERTER->coding ¤Ë¥ê¥»¥Ã¥ÈÍѤδؿô¤¬ÄêµÁ¤µ¤ì¤Æ¤¤¤ë¤Ê¤é¤Ð¡¢
4244 mconv_reset_converter () ¤Ï¤½¤Î´Ø¿ô¤Ë $CONVERTER
4245 ¤òŬÍѤ·¤¿·ë²Ì¤òÊÖ¤·¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð0¤òÊÖ¤¹¡£ */
4248 mconv_reset_converter (MConverter *converter)
4250 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4252 converter->nchars = converter->nbytes = 0;
4253 converter->result = MCONVERSION_RESULT_SUCCESS;
4254 internal->carryover_bytes = 0;
4256 mtext_reset (internal->unread);
4257 if (internal->coding->resetter)
4258 return (*internal->coding->resetter) (converter);
4265 @brief Free a code converter.
4267 The mconv_free_converter () function frees the code converter
4271 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ò²òÊü¤¹¤ë.
4273 ´Ø¿ô mconv_free_converter () ¤Ï¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER
4277 mconv_free_converter (MConverter *converter)
4279 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4281 M17N_OBJECT_UNREF (internal->work_mt);
4282 M17N_OBJECT_UNREF (internal->unread);
4290 @brief Bind a buffer to a code converter.
4292 The mconv_rebind_buffer () function binds buffer area of $N bytes
4293 pointed to by $BUF to code converter $CONVERTER. Subsequent
4294 decodings and encodings are done to/from this newly bound buffer
4298 This function always returns $CONVERTER. */
4301 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ë¥Ð¥Ã¥Õ¥¡Îΰè¤ò·ë¤ÓÉÕ¤±¤ë.
4303 ´Ø¿ô mconv_rebind_buffer () ¤Ï¡¢$BUF ¤Ë¤è¤Ã¤Æ»Ø¤µ¤ì¤¿Â礤µ $N
4304 ¥Ð¥¤¥È¤Î¥Ð¥Ã¥Õ¥¡Îΰè¤ò¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER ¤Ë·ë¤ÓÉÕ¤±¤ë¡£
4305 ¤³¤ì°Ê¹ß¤Î¥Ç¥³¡¼¥É¤ª¤è¤Ó¥¨¥ó¥³¡¼¥É¤Ï¡¢¤³¤Î¿·¤¿¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥Ð¥Ã¥Õ¥¡Îΰè¤ËÂФ·¤Æ¹Ô¤Ê¤ï¤ì¤ë¤è¤¦¤Ë¤Ê¤ë¡£
4308 ¤³¤Î´Ø¿ô¤Ï¾ï¤Ë $CONVERTER ¤òÊÖ¤¹¡£
4310 @latexonly \IPAlabel{mconv_rebind_buffer} @endlatexonly */
4314 mconv_rebind_stream () */
4317 mconv_rebind_buffer (MConverter *converter, const unsigned char *buf, int n)
4319 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4321 internal->buf.in = buf;
4323 internal->bufsize = n;
4324 internal->binding = BINDING_BUFFER;
4331 @brief Bind a stream to a code converter.
4333 The mconv_rebind_stream () function binds stream $FP to code
4334 converter $CONVERTER. Following decodings and encodings are done
4335 to/from this newly bound stream.
4338 This function always returns $CONVERTER. */
4341 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ë¥¹¥È¥ê¡¼¥à¤ò·ë¤ÓÉÕ¤±¤ë.
4343 ´Ø¿ô mconv_rebind_stream () ¤Ï¡¢¥¹¥È¥ê¡¼¥à $FP ¤ò¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿
4344 $CONVERTER ¤Ë·ë¤ÓÉÕ¤±¤ë¡£
4345 ¤³¤ì°Ê¹ß¤Î¥Ç¥³¡¼¥É¤ª¤è¤Ó¥¨¥ó¥³¡¼¥É¤Ï¡¢¤³¤Î¿·¤¿¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥¹¥È¥ê¡¼¥à¤ËÂФ·¤Æ¹Ô¤Ê¤ï¤ì¤ë¤è¤¦¤Ë¤Ê¤ë¡£
4348 ¤³¤Î´Ø¿ô¤Ï¾ï¤Ë $CONVERTER ¤òÊÖ¤¹¡£
4350 @latexonly \IPAlabel{mconv_rebind_stream} @endlatexonly */
4354 mconv_rebind_buffer () */
4357 mconv_rebind_stream (MConverter *converter, FILE *fp)
4359 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4361 if (fseek (fp, 0, SEEK_CUR) < 0)
4365 internal->seekable = 0;
4368 internal->seekable = 1;
4370 internal->binding = BINDING_STREAM;
4377 @brief Decode a byte sequence into an M-text.
4379 The mconv_decode () function decodes a byte sequence and appends
4380 the result at the end of M-text $MT. The source byte sequence is
4381 taken from either the buffer area or the stream that is currently
4382 bound to $CONVERTER.
4385 If the operation was successful, mconv_decode () returns updated
4386 $MT. Otherwise it returns @c NULL and assigns an error code to
4387 the external variable #merror_code. */
4390 @brief ¥Ð¥¤¥ÈÎó¤ò M-text ¤Ë¥Ç¥³¡¼¥É¤¹¤ë.
4392 ´Ø¿ô mconv_decode () ¤Ï¡¢¥Ð¥¤¥ÈÎó¤ò¥Ç¥³¡¼¥É¤·¤Æ¤½¤Î·ë²Ì¤ò M-text
4393 $MT ¤ÎËöÈø¤ËÄɲ乤롣¥Ç¥³¡¼¥É¸µ¤Î¥Ð¥¤¥ÈÎó¤Ï¡¢$CONVERTER
4394 ¤Ë¸½ºß·ë¤ÓÉÕ¤±¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤«¤é¼è¤é¤ì¤ë¡£
4397 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_decode () ¤Ï¹¹¿·¤µ¤ì¤¿ $MT ¤òÊÖ¤¹¡£
4398 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
4399 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4403 @c MERROR_IO, @c MERROR_CODING
4406 mconv_rebind_buffer (), mconv_rebind_stream (),
4407 mconv_encode (), mconv_encode_range (),
4408 mconv_decode_buffer (), mconv_decode_stream () */
4411 mconv_decode (MConverter *converter, MText *mt)
4413 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4414 int at_most = converter->at_most > 0 ? converter->at_most : -1;
4417 M_CHECK_READONLY (mt, NULL);
4419 if (mt->format != MTEXT_FORMAT_UTF_8)
4420 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
4423 mtext__enlarge (mt, MAX_UTF8_CHAR_BYTES);
4425 converter->nchars = converter->nbytes = 0;
4426 converter->result = MCONVERSION_RESULT_SUCCESS;
4428 n = mtext_nchars (internal->unread);
4434 if (at_most > 0 && at_most < limit)
4437 for (i = 0, n -= 1; i < limit; i++, converter->nchars++, n--)
4438 mtext_cat_char (mt, mtext_ref_char (internal->unread, n));
4439 mtext_del (internal->unread, n + 1, internal->unread->nchars);
4442 if (at_most == limit)
4444 converter->at_most -= converter->nchars;
4448 if (internal->binding == BINDING_BUFFER)
4450 (*internal->coding->decoder) (internal->buf.in + internal->used,
4451 internal->bufsize - internal->used,
4453 internal->used += converter->nbytes;
4455 else if (internal->binding == BINDING_STREAM)
4457 unsigned char work[CONVERT_WORKSIZE];
4458 int last_block = converter->last_block;
4459 int use_fread = at_most < 0 && internal->seekable;
4461 converter->last_block = 0;
4464 int nbytes, prev_nbytes;
4466 if (feof (internal->fp))
4469 nbytes = fread (work, sizeof (unsigned char), CONVERT_WORKSIZE,
4473 int c = getc (internal->fp);
4476 work[0] = c, nbytes = 1;
4481 if (ferror (internal->fp))
4483 converter->result = MCONVERSION_RESULT_IO_ERROR;
4488 converter->last_block = last_block;
4489 prev_nbytes = converter->nbytes;
4490 (*internal->coding->decoder) (work, nbytes, mt, converter);
4491 if (converter->nbytes - prev_nbytes < nbytes)
4494 fseek (internal->fp, converter->nbytes - prev_nbytes - nbytes,
4497 ungetc (work[0], internal->fp);
4501 || (converter->at_most > 0
4502 && converter->nchars == converter->at_most))
4505 converter->last_block = last_block;
4507 else /* internal->binding == BINDING_NONE */
4508 MERROR (MERROR_CODING, NULL);
4510 converter->at_most = at_most;
4511 return ((converter->result == MCONVERSION_RESULT_SUCCESS
4512 || converter->result == MCONVERSION_RESULT_INSUFFICIENT_SRC)
4519 @brief Decode a buffer area based on a coding system.
4521 The mconv_decode_buffer () function decodes $N bytes of the buffer
4522 area pointed to by $BUF based on the coding system $NAME. A
4523 temporary code converter for decoding is automatically created
4527 If the operation was successful, mconv_decode_buffer ()
4528 returns the resulting M-text. Otherwise it returns @c NULL and
4529 assigns an error code to the external variable #merror_code. */
4532 @brief ¥³¡¼¥É·Ï¤Ë´ð¤Å¤¤¤Æ¥Ð¥Ã¥Õ¥¡Îΰè¤ò¥Ç¥³¡¼¥É¤¹¤ë.
4534 ´Ø¿ô mconv_decode_buffer () ¤Ï¡¢$BUF ¤Ë¤è¤Ã¤Æ»Ø¤µ¤ì¤¿ $N
4535 ¥Ð¥¤¥È¤Î¥Ð¥Ã¥Õ¥¡Îΰè¤ò¡¢¥³¡¼¥É·Ï $NAME ¤Ë´ð¤Å¤¤¤Æ¥Ç¥³¡¼¥É¤¹¤ë¡£
4536 ¥Ç¥³¡¼¥É¤ËɬÍפʥ³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ÎºîÀ®¤È²òÊü¤Ï¼«Æ°Åª¤Ë¹Ô¤Ê¤ï¤ì¤ë¡£
4539 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_decode_buffer () ¤ÏÆÀ¤é¤ì¤¿ M-text ¤òÊÖ¤¹¡£
4540 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
4541 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4545 @c MERROR_IO, @c MERROR_CODING
4548 mconv_decode (), mconv_decode_stream () */
4551 mconv_decode_buffer (MSymbol name, const unsigned char *buf, int n)
4553 MConverter *converter = mconv_buffer_converter (name, buf, n);
4559 if (! mconv_decode (converter, mt))
4561 M17N_OBJECT_UNREF (mt);
4564 mconv_free_converter (converter);
4571 @brief Decode a stream input based on a coding system.
4573 The mconv_decode_stream () function decodes the entire byte
4574 sequence read in from stream $FP based on the coding system $NAME.
4575 A code converter for decoding is automatically created and freed.
4578 If the operation was successful, mconv_decode_stream () returns
4579 the resulting M-text. Otherwise it returns @c NULL and assigns an
4580 error code to the external variable #merror_code. */
4583 @brief ¥³¡¼¥É·Ï¤Ë´ð¤Å¤¤¤Æ¥¹¥È¥ê¡¼¥àÆþÎϤò¥Ç¥³¡¼¥É¤¹¤ë.
4585 ´Ø¿ô mconv_decode_stream () ¤Ï¡¢¥¹¥È¥ê¡¼¥à $FP
4586 ¤«¤éÆɤ߹þ¤Þ¤ì¤ë¥Ð¥¤¥ÈÎóÁ´ÂΤò¡¢¥³¡¼¥É·Ï $NAME
4587 ¤Ë´ð¤Å¤¤¤Æ¥Ç¥³¡¼¥É¤¹¤ë¡£¥Ç¥³¡¼¥É¤ËɬÍפʥ³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ÎºîÀ®¤È²òÊü¤Ï¼«Æ°Åª¤Ë¹Ô¤Ê¤ï¤ì¤ë¡£
4590 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_decode_stream () ¤ÏÆÀ¤é¤ì¤¿ M-text
4591 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
4592 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4596 @c MERROR_IO, @c MERROR_CODING
4599 mconv_decode (), mconv_decode_buffer () */
4602 mconv_decode_stream (MSymbol name, FILE *fp)
4604 MConverter *converter = mconv_stream_converter (name, fp);
4610 if (! mconv_decode (converter, mt))
4612 M17N_OBJECT_UNREF (mt);
4615 mconv_free_converter (converter);
4621 /***en @brief Encode an M-text into a byte sequence.
4623 The mconv_encode () function encodes M-text $MT and writes the
4624 resulting byte sequence into the buffer area or the stream that is
4625 currently bound to code converter $CONVERTER.
4628 If the operation was successful, mconv_encode () returns the
4629 number of written bytes. Otherwise it returns -1 and assigns an
4630 error code to the external variable #merror_code. */
4633 @brief M-text ¤ò¥Ð¥¤¥ÈÎó¤Ë¥¨¥ó¥³¡¼¥É¤¹¤ë.
4635 ´Ø¿ô mconv_encode () ¤Ï¡¢M-text $MT ¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿
4636 $CONVERTER ¤Ë¸½ºß·ë¤ÓÉÕ¤±¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤ËÆÀ¤é¤ì¤¿¥Ð¥¤¥ÈÎó¤ò½ñ¤¹þ¤à¡£
4639 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_encode () ¤Ï½ñ¤¹þ¤Þ¤ì¤¿¥Ð¥¤¥È¿ô¤òÊÖ¤¹¡£
4640 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
4641 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4645 @c MERROR_IO, @c MERROR_CODING
4648 mconv_rebind_buffer (), mconv_rebind_stream(),
4649 mconv_decode (), mconv_encode_range () */
4652 mconv_encode (MConverter *converter, MText *mt)
4654 return mconv_encode_range (converter, mt, 0, mtext_nchars (mt));
4660 @brief Encode a part of an M-text.
4662 The mconv_encode_range () function encodes the text between $FROM
4663 (inclusive) and $TO (exclusive) in M-text $MT and writes the
4664 resulting byte sequence into the buffer area or the stream that is
4665 currently bound to code converter $CONVERTER.
4668 If the operation was successful, mconv_encode_range () returns the
4669 number of written bytes. Otherwise it returns -1 and assigns an
4670 error code to the external variable #merror_code. */
4673 @brief M-text ¤Î°ìÉô¤ò¥Ð¥¤¥ÈÎó¤Ë¥¨¥ó¥³¡¼¥É¤¹¤ë.
4675 ´Ø¿ô mconv_encode_range () ¤Ï¡¢M-text $MT ¤Î $FROM
4676 ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO ¡Ê$TO¼«ÂΤϴޤޤʤ¤¡Ë
4677 ¤Þ¤Ç¤ÎÈϰϤΥƥ¥¹¥È¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿
4678 $CONVERTER ¤Ë¸½ºß·ë¤ÓÉÕ¤±¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤ËÆÀ¤é¤ì¤¿¥Ð¥¤¥ÈÎó¤ò½ñ¤¹þ¤à¡£
4681 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_encode_range ()
4682 ¤Ï½ñ¤¹þ¤Þ¤ì¤¿¥Ð¥¤¥È¿ô¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
4683 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4687 @c MERROR_RANGE, @c MERROR_IO, @c MERROR_CODING
4690 mconv_rebind_buffer (), mconv_rebind_stream(),
4691 mconv_decode (), mconv_encode () */
4694 mconv_encode_range (MConverter *converter, MText *mt, int from, int to)
4696 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4698 M_CHECK_POS_X (mt, from, -1);
4699 M_CHECK_POS_X (mt, to, -1);
4703 if (converter->at_most > 0 && from + converter->at_most < to)
4704 to = from + converter->at_most;
4706 converter->nchars = converter->nbytes = 0;
4707 converter->result = MCONVERSION_RESULT_SUCCESS;
4709 mtext_put_prop (mt, from, to, Mcoding, internal->coding->name);
4710 if (internal->binding == BINDING_BUFFER)
4712 (*internal->coding->encoder) (mt, from, to,
4713 internal->buf.out + internal->used,
4714 internal->bufsize - internal->used,
4716 internal->used += converter->nbytes;
4718 else if (internal->binding == BINDING_STREAM)
4720 unsigned char work[CONVERT_WORKSIZE];
4725 int prev_nbytes = converter->nbytes;
4728 (*internal->coding->encoder) (mt, from, to, work,
4729 CONVERT_WORKSIZE, converter);
4730 this_nbytes = converter->nbytes - prev_nbytes;
4731 while (written < this_nbytes)
4733 int wrtn = fwrite (work + written, sizeof (unsigned char),
4734 this_nbytes - written, internal->fp);
4736 if (ferror (internal->fp))
4740 if (written < this_nbytes)
4742 converter->result = MCONVERSION_RESULT_IO_ERROR;
4745 from += converter->nchars;
4748 else /* fail safe */
4749 MERROR (MERROR_CODING, -1);
4751 return ((converter->result == MCONVERSION_RESULT_SUCCESS
4752 || converter->result == MCONVERSION_RESULT_INSUFFICIENT_DST)
4753 ? converter->nbytes : -1);
4759 @brief Encode an M-text into a buffer area.
4761 The mconv_encode_buffer () function encodes M-text $MT based on
4762 coding system $NAME and writes the resulting byte sequence into the
4763 buffer area pointed to by $BUF. At most $N bytes are written. A
4764 temporary code converter for encoding is automatically created
4768 If the operation was successful, mconv_encode_buffer () returns
4769 the number of written bytes. Otherwise it returns -1 and assigns
4770 an error code to the external variable #merror_code. */
4773 @brief M-text ¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¥Ð¥Ã¥Õ¥¡Îΰè¤Ë½ñ¤¹þ¤à.
4775 ´Ø¿ô mconv_encode_buffer () ¤ÏM-text $MT ¤ò¥³¡¼¥É·Ï $NAME
4776 ¤Ë´ð¤Å¤¤¤Æ¥¨¥ó¥³¡¼¥É¤·¡¢ÆÀ¤é¤ì¤¿¥Ð¥¤¥ÈÎó¤ò $BUF ¤Î»Ø¤¹¥Ð¥Ã¥Õ¥¡Îΰè¤Ë½ñ¤¹þ¤à¡£
4777 $N ¤Ï½ñ¤¹þ¤àºÇÂç¥Ð¥¤¥È¿ô¤Ç¤¢¤ë¡£
4778 ¥¨¥ó¥³¡¼¥É¤ËɬÍפʥ³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ÎºîÀ®¤È²òÊü¤Ï¼«Æ°Åª¤Ë¹Ô¤Ê¤ï¤ì¤ë¡£
4781 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_encode_buffer () ¤Ï½ñ¤¹þ¤Þ¤ì¤¿¥Ð¥¤¥È¿ô¤òÊÖ¤¹¡£
4782 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð-1¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4786 @c MERROR_IO, @c MERROR_CODING
4789 mconv_encode (), mconv_encode_stream () */
4792 mconv_encode_buffer (MSymbol name, MText *mt, unsigned char *buf, int n)
4794 MConverter *converter = mconv_buffer_converter (name, buf, n);
4799 ret = mconv_encode (converter, mt);
4800 mconv_free_converter (converter);
4807 @brief Encode an M-text to write to a stream.
4809 The mconv_encode_stream () function encodes M-text $MT based on
4810 coding system $NAME and writes the resulting byte sequence to
4811 stream $FP. A temporary code converter for encoding is
4812 automatically created and freed.
4815 If the operation was successful, mconv_encode_stream () returns
4816 the number of written bytes. Otherwise it returns -1 and assigns
4817 an error code to the external variable #merror_code. */
4820 @brief M-text ¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¥¹¥È¥ê¡¼¥à¤Ë½ñ¤¹þ¤à.
4822 ´Ø¿ô mconv_encode_stream () ¤ÏM-text $MT ¤ò¥³¡¼¥É·Ï $NAME
4823 ¤Ë´ð¤Å¤¤¤Æ¥¨¥ó¥³¡¼¥É¤·¡¢ÆÀ¤é¤ì¤¿¥Ð¥¤¥ÈÎó¤ò¥¹¥È¥ê¡¼¥à $FP
4824 ¤Ë½ñ¤½Ð¤¹¡£¥¨¥ó¥³¡¼¥É¤ËɬÍפʥ³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ÎºîÀ®¤È²òÊü¤Ï¼«Æ°Åª¤Ë¹Ô¤Ê¤ï¤ì¤ë¡£
4827 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_encode_stream ()
4828 ¤Ï½ñ¤¹þ¤Þ¤ì¤¿¥Ð¥¤¥È¿ô¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
4829 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4833 @c MERROR_IO, @c MERROR_CODING
4836 mconv_encode (), mconv_encode_buffer (), mconv_encode_file () */
4839 mconv_encode_stream (MSymbol name, MText *mt, FILE *fp)
4841 MConverter *converter = mconv_stream_converter (name, fp);
4846 ret = mconv_encode (converter, mt);
4847 mconv_free_converter (converter);
4854 @brief Read a character via a code converter.
4856 The mconv_getc () function reads one character from the buffer
4857 area or the stream that is currently bound to code converter
4858 $CONVERTER. The decoder of $CONVERTER is used to decode the byte
4859 sequence. The internal status of $CONVERTER is updated
4863 If the operation was successful, mconv_getc () returns the
4864 character read in. If the input source reaches EOF, it returns @c
4865 EOF without changing the external variable #merror_code. If an
4866 error is detected, it returns @c EOF and assigns an error code to
4870 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿·Ðͳ¤Ç°ìʸ»ú¤òÆɤߤ³¤à.
4872 ´Ø¿ô mconv_getc () ¤Ï¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER
4873 ¤Ë¸½ºß·ë¤ÓÉÕ¤±¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤«¤éʸ»ú¤ò°ì¤ÄÆɤ߹þ¤à¡£
4874 ¥Ð¥¤¥ÈÎó¤Î¥Ç¥³¡¼¥É¤Ë¤Ï $CONVERTER ¤Î¥Ç¥³¡¼¥À¤¬ÍѤ¤¤é¤ì¤ë¡£
4875 $CONVERTER ¤ÎÆâÉô¾õÂÖ¤ÏɬÍפ˱þ¤¸¤Æ¹¹¿·¤µ¤ì¤ë¡£
4878 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_getc () ¤ÏÆɤ߹þ¤Þ¤ì¤¿Ê¸»ú¤òÊÖ¤¹¡£ÆþÎϸ»¤¬
4879 EOF ¤Ë㤷¤¿¾ì¹ç¤Ï¡¢³°ÉôÊÑ¿ô #merror_code ¤òÊѤ¨¤º¤Ë @c EOF
4880 ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c EOF ¤òÊÖ¤·¡¢#merror_code
4881 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4888 mconv_ungetc (), mconv_putc (), mconv_gets () */
4891 mconv_getc (MConverter *converter)
4893 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4894 int at_most = converter->at_most;
4896 mtext_reset (internal->work_mt);
4897 converter->at_most = 1;
4898 mconv_decode (converter, internal->work_mt);
4899 converter->at_most = at_most;
4900 return (converter->nchars == 1
4901 ? STRING_CHAR (internal->work_mt->data)
4908 @brief Push a character back to a code converter.
4910 The mconv_ungetc () function pushes character $C back to code
4911 converter $CONVERTER. Any number of characters can be pushed
4912 back. The lastly pushed back character is firstly read by the
4913 subsequent mconv_getc () call. The characters pushed back are
4914 registered only in $CONVERTER; they are not written to the input
4915 source. The internal status of $CONVERTER is updated
4919 If the operation was successful, mconv_ungetc () returns $C.
4920 Otherwise it returns @c EOF and assigns an error code to the
4921 external variable #merror_code. */
4924 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ë°ìʸ»úÌ᤹.
4926 ´Ø¿ô mconv_ungetc () ¤Ï¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER ¤Ëʸ»ú $C
4927 ¤ò²¡¤·Ì᤹¡£Ìᤵ¤ì¤ëʸ»ú¿ô¤ËÀ©¸Â¤Ï¤Ê¤¤¡£¤³¤Î¸å¤Ç mconv_getc ()
4928 ¤ò¸Æ¤Ó½Ð¤·¤¿ºÝ¤Ë¤Ï¡¢ºÇ¸å¤ËÌᤵ¤ì¤¿Ê¸»ú¤¬ºÇ½é¤ËÆɤޤì¤ë¡£Ìᤵ¤ì¤¿Ê¸»ú¤Ï
4929 $CONVERTER ¤ÎÆâÉô¤ËÃߤ¨¤é¤ì¤ë¤À¤±¤Ç¤¢¤ê¡¢¼ÂºÝ¤ËÆþÎϸ»¤Ë½ñ¤¹þ¤Þ¤ì¤ë¤ï¤±¤Ç¤Ï¤Ê¤¤¡£
4930 $CONVERTER ¤ÎÆâÉô¾õÂÖ¤ÏɬÍפ˱þ¤¸¤Æ¹¹¿·¤µ¤ì¤ë¡£
4933 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_ungetc () ¤Ï $C ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c
4934 EOF ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4938 @c MERROR_CODING, @c MERROR_CHAR
4941 mconv_getc (), mconv_putc (), mconv_gets () */
4944 mconv_ungetc (MConverter *converter, int c)
4946 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4948 M_CHECK_CHAR (c, EOF);
4950 converter->result = MCONVERSION_RESULT_SUCCESS;
4951 mtext_cat_char (internal->unread, c);
4958 @brief Write a character via a code converter.
4960 The mconv_putc () function writes character $C to the buffer area
4961 or the stream that is currently bound to code converter
4962 $CONVERTER. The encoder of $CONVERTER is used to encode the
4963 character. The number of bytes actually written is set to the @c
4964 nbytes member of $CONVERTER. The internal status of $CONVERTER
4965 is updated appropriately.
4968 If the operation was successful, mconv_putc () returns $C.
4969 If an error is detected, it returns @c EOF and assigns
4970 an error code to the external variable #merror_code. */
4973 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ò·Ðͳ¤·¤Æ°ìʸ»ú½ñ¤½Ð¤¹.
4975 ´Ø¿ô mconv_putc () ¤Ï¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER
4976 ¤Ë¸½ºß·ë¤ÓÉÕ¤±¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤Ëʸ»ú $C
4977 ¤ò½ñ¤½Ð¤¹¡£Ê¸»ú¤Î¥¨¥ó¥³¡¼¥É¤Ë¤Ï $CONVERTER
4978 ¤Î¥¨¥ó¥³¡¼¥À¤¬ÍѤ¤¤é¤ì¤ë¡£¼ÂºÝ¤Ë½ñ¤½Ð¤µ¤ì¤¿¥Ð¥¤¥È¿ô¤Ï¡¢$CONVERTER ¤Î¥á¥ó¥Ð¡¼
4979 @c nbytes ¤Ë¥»¥Ã¥È¤µ¤ì¤ë¡£$CONVERTER ¤ÎÆâÉô¾õÂÖ¤ÏɬÍפ˱þ¤¸¤Æ¹¹¿·¤µ¤ì¤ë¡£
4982 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_putc () ¤Ï $C ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï
4983 @c EOF ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4987 @c MERROR_CODING, @c MERROR_IO, @c MERROR_CHAR
4990 mconv_getc (), mconv_ungetc (), mconv_gets () */
4993 mconv_putc (MConverter *converter, int c)
4995 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4997 M_CHECK_CHAR (c, EOF);
4998 mtext_reset (internal->work_mt);
4999 mtext_cat_char (internal->work_mt, c);
5000 if (mconv_encode_range (converter, internal->work_mt, 0, 1) < 0)
5008 @brief Read a line using a code converter.
5010 The mconv_gets () function reads one line from the buffer area or
5011 the stream that is currently bound to code converter $CONVERTER.
5012 The decoder of $CONVERTER is used for decoding. The decoded
5013 character sequence is appended at the end of M-text $MT. The
5014 final newline character in the original byte sequence is not
5015 appended. The internal status of $CONVERTER is updated
5019 If the operation was successful, mconv_gets () returns the
5020 modified $MT. If it encounters EOF without reading a single
5021 character, it returns $MT without changing it. If an error is
5022 detected, it returns @c NULL and assigns an error code to
5026 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ò»È¤Ã¤Æ°ì¹ÔÆɤ߹þ¤à.
5028 ´Ø¿ô mconv_gets () ¤Ï¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER
5029 ¤Ë¸½ºß·ë¤ÓÉÕ¤±¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤«¤é 1 ¹Ô¤òÆɤ߹þ¤à¡£
5030 ¥Ð¥¤¥ÈÎó¤Î¥Ç¥³¡¼¥É¤Ë¤Ï $CONVERTER
5031 ¤Î¥Ç¥³¡¼¥À¤¬ÍѤ¤¤é¤ì¤ë¡£¥Ç¥³¡¼¥É¤µ¤ì¤¿Ê¸»úÎó¤Ï M-text $MT
5032 ¤ÎËöÈø¤ËÄɲ䵤ì¤ë¡£¸µ¤Î¥Ð¥¤¥ÈÎó¤Î½ªÃ¼²þ¹Ôʸ»ú¤ÏÄɲ䵤ì¤Ê¤¤¡£
5033 $CONVERTER ¤ÎÆâÉô¾õÂÖ¤ÏɬÍפ˱þ¤¸¤Æ¹¹¿·¤µ¤ì¤ë¡£
5036 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_gets () ¤ÏÊѹ¹¤µ¤ì¤¿ $MT
5037 ¤òÊÖ¤¹¡£¤â¤·1ʸ»ú¤âÆɤޤº¤Ë EOF ¤ËÁø¶ø¤·¤¿¾ì¹ç¤Ï¡¢$MT
5038 ¤òÊѹ¹¤»¤º¤Ë¤½¤Î¤Þ¤ÞÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢
5039 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
5046 mconv_getc (), mconv_ungetc (), mconv_putc () */
5049 mconv_gets (MConverter *converter, MText *mt)
5053 M_CHECK_READONLY (mt, NULL);
5054 if (mt->format != MTEXT_FORMAT_UTF_8)
5055 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
5059 c = mconv_getc (converter);
5060 if (c == EOF || c == '\n')
5062 mtext_cat_char (mt, c);
5064 if (c == EOF && converter->result != MCONVERSION_RESULT_SUCCESS)
5065 /* mconv_getc () sets #merror_code */