1 /* coding.c -- code conversion module.
2 Copyright (C) 2003, 2004
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25 @brief Coding system objects and API for them.
27 The m17n library represents a character encoding scheme (CES) of
28 coded character sets (CCS) as an object called @e coding @e
29 system. Application programs can add original coding systems.
31 To @e encode means converting code-points to character codes and
32 to @e decode means converting character codes back to code-points.
34 Application programs can decode a byte sequence with a specified
35 coding system into an M-text, and inversely, can encode an M-text
36 into a byte sequence. */
40 @brief ¥³¡¼¥É·Ï¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API.
42 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢Éä¹æ²½Ê¸»ú½¸¹ç (coded character set; CCS)
43 ¤Îʸ»úÉä¹ç²½Êý¼° (character encoding scheme; CES) ¤ò @e ¥³¡¼¥É·Ï
44 ¤È¸Æ¤Ö¥ª¥Ö¥¸¥§¥¯¥È¤Çɽ¸½¤¹¤ë¡£
45 ¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥à¤ÏÆȼ«¤Ë¥³¡¼¥É·Ï¤òÄɲ乤뤳¤È¤â¤Ç¤¤ë¡£
47 ¥³¡¼¥É¥Ý¥¤¥ó¥È¤«¤éʸ»ú¥³¡¼¥É¤Ø¤ÎÊÑ´¹¤ò @e ¥¨¥ó¥³¡¼¥É
48 ¤È¸Æ¤Ó¡¢Ê¸»ú¥³¡¼¥É¤«¤é¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ø¤ÎÊÑ´¹¤ò @e ¥Ç¥³¡¼¥É ¤È¸Æ¤Ö¡£
50 ¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥à¤Ï¡¢»ØÄꤵ¤ì¤¿¥³¡¼¥É·Ï¤Ç¥Ð¥¤¥ÈÎó¤ò¥Ç¥³¡¼¥É¤¹¤ë¤³¤È¤Ë¤è¤Ã¤Æ
51 M-text ¤òÆÀ¤ë¤³¤È¤¬¤Ç¤¤ë¡£¤Þ¤¿µÕ¤Ë¡¢»ØÄꤵ¤ì¤¿¥³¡¼¥É·Ï¤Ç M-text
52 ¤ò¥¨¥ó¥³¡¼¥É¤·¤¹¤ë¤³¤È¤Ë¤è¤Ã¤Æ¥Ð¥¤¥ÈÎó¤òÆÀ¤ë¤³¤È¤¬¤Ç¤¤ë¡£ */
56 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
57 /*** @addtogroup m17nInternal
65 #include <sys/types.h>
70 #include "m17n-misc.h"
73 #include "character.h"
80 #define NUM_SUPPORTED_CHARSETS 32
82 /** Structure for coding system object. */
86 /** Name of the coding system. */
89 /** Type of the coding system. */
92 /* Number of supported charsets. */
95 /** Array of supported charsets. */
96 MCharset *charsets[NUM_SUPPORTED_CHARSETS];
98 /** If non-NULL, function to call at the time of creating and
99 reseting a converter. */
100 int (*resetter) (MConverter *converter);
102 int (*decoder) (const unsigned char *str, int str_bytes, MText *mt,
103 MConverter *converter);
105 int (*encoder) (MText *mt, int from, int to,
106 unsigned char *str, int str_bytes,
107 MConverter *converter);
109 /** If non-zero, the coding system decode/encode ASCII characters as
111 int ascii_compatible;
113 /** Pointer to extra information given when the coding system is
114 defined. The meaning depends on <type>. */
117 /** Pointer to information referred on conversion. The meaning
118 depends on <type>. The value NULL means that the coding system
128 MCodingSystem **codings;
131 static struct MCodingList coding_list;
133 static MPlist *coding_definition_list;
137 Pointer to a structure of a coding system. */
139 ¥³¡¼¥É·Ï¤òɽ¤ï¤¹¥Ç¡¼¥¿¹½Â¤¤Ø¤Î¥Ý¥¤¥ó¥¿ */
140 MCodingSystem *coding;
143 Buffer for carryover bytes generated while decoding. */
145 ¥Ç¥³¡¼¥ÉÃæ¤Î¥¥ã¥ê¥£¥ª¡¼¥Ð¡¼¥Ð¥¤¥ÈÍѥХåե¡ */
146 unsigned char carryover[256];
149 Number of carryover bytes. */
151 ¥¥ã¥ê¥£¥ª¡¼¥Ð¡¼¥Ð¥¤¥È¿ô */
155 Beginning of the byte sequence bound to this converter. */
157 ¤³¤Î¥³¥ó¥Ð¡¼¥¿¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥Ð¥¤¥ÈÎó¤ÎÀèƬ°ÌÃÖ */
159 const unsigned char *in;
170 Number of bytes already consumed in buf. */
172 buf Æâ¤Ç¤¹¤Ç¤Ë¾ÃÈñ¤µ¤ì¤¿¥Ð¥¤¥È¿ô */
176 Stream bound to this converter. */
178 ¤³¤Î¥³¥ó¥Ð¡¼¥¿¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥¹¥È¥ê¡¼¥à */
182 Which of above two is in use. */
184 ¾åµ2¼Ô¤Î¤¤¤º¤ì¤¬»È¤ï¤ì¤Æ¤¤¤ë¤« */
204 /* Local macros and functions. */
206 /** At first, set SRC_BASE to SRC. Then check if we have already
207 produced AT_MOST chars. If so, set SRC_END to SRC, and jump to
208 source_end. Otherwise, get one more byte C from SRC. In that
209 case, if SRC == SRC_END, jump to the label source_end. */
211 #define ONE_MORE_BASE_BYTE(c) \
214 if (nchars == at_most) \
219 if (src == src_stop) \
221 if (src == src_end) \
223 src_base = src = source; \
224 if (src == src_end) \
226 src_stop = src_end; \
232 /** Get one more byte C from SRC. If SRC == SRC_END, jump to the
235 #define ONE_MORE_BYTE(c) \
237 if (src == src_stop) \
239 if (src == src_end) \
242 if (src == src_end) \
244 src_stop = src_end; \
250 #define REWIND_SRC_TO_BASE() \
252 if (src_base < source || src_base >= src_end) \
253 src_stop = internal->carryover + internal->carryover_bytes; \
258 /** Push back byte C to SRC. */
260 #define UNGET_ONE_BYTE(c) \
266 internal->carryover[0] = c; \
267 internal->carryover_bytes = 1; \
268 src = internal->carryover; \
269 src_stop = src + 1; \
274 /** Store multibyte representation of character C at DST and increment
275 DST to the next of the produced bytes. DST must be a pointer to
276 data area of M-text MT. If the produced bytes are going to exceed
277 DST_END, enlarge the data area of MT. */
279 #define EMIT_CHAR(c) \
281 int bytes = CHAR_BYTES (c); \
284 if (dst + bytes + 1 > dst_end) \
286 len = dst - mt->data; \
287 bytes = mt->allocated + bytes + (src_stop - src); \
288 mtext__enlarge (mt, bytes); \
289 dst = mt->data + len; \
290 dst_end = mt->data + mt->allocated; \
292 dst += CHAR_STRING (c, dst); \
297 /* Check if there is enough room to produce LEN bytes at DST. If not,
298 go to the label insufficient_destination. */
300 #define CHECK_DST(len) \
302 if (dst + (len) > dst_end) \
303 goto insufficient_destination; \
307 /** Take NUM_CHARS characters (NUM_BYTES bytes) already stored at
308 (MT->data + MT->nbytes) into MT, and put charset property on
309 them with CHARSET->name. */
311 #define TAKEIN_CHARS(mt, num_chars, num_bytes, charset) \
313 int chars = (num_chars); \
317 mtext__takein ((mt), chars, (num_bytes)); \
319 mtext_put_prop ((mt), (mt)->nchars - chars, (mt)->nchars, \
320 Mcharset, (void *) ((charset)->name)); \
325 #define SET_SRC(mt, format, from, to) \
327 if (format <= MTEXT_FORMAT_UTF_8) \
329 src = mt->data + POS_CHAR_TO_BYTE (mt, from); \
330 src_end = mt->data + POS_CHAR_TO_BYTE (mt, to); \
332 else if (format <= MTEXT_FORMAT_UTF_16BE) \
335 = mt->data + (sizeof (short)) * POS_CHAR_TO_BYTE (mt, from); \
337 = mt->data + (sizeof (short)) * POS_CHAR_TO_BYTE (mt, to); \
341 src = mt->data + (sizeof (int)) * from; \
342 src_end = mt->data + (sizeof (int)) * to; \
347 #define ONE_MORE_CHAR(c, bytes, format) \
349 if (src == src_end) \
351 if (format <= MTEXT_FORMAT_UTF_8) \
352 c = STRING_CHAR_AND_BYTES (src, bytes); \
353 else if (format <= MTEXT_FORMAT_UTF_16BE) \
355 c = mtext_ref_char (mt, from++); \
356 bytes = (sizeof (short)) * CHAR_UNITS_UTF16 (c); \
360 c = ((unsigned *) (mt->data))[from++]; \
361 bytes = sizeof (int); \
367 encode_unsupporeted_char (int c, unsigned char *dst, unsigned char *dst_end,
373 len = c < 0x10000 ? 8 : 10;
374 if (dst + len > dst_end)
377 mtext_put_prop (mt, pos, pos + 1, Mcoding, Mnil);
378 format = (c < 0xD800 ? "<U+%04X>"
379 : c < 0xE000 ? "<M+%04X>"
380 : c < 0x10000 ? "<U+%04X>"
381 : c < 0x110000 ? "<U+%06X>"
383 sprintf ((char *) dst, format, c);
389 /** Finish decoding of bytes at SOURCE (ending at SRC_END) into NCHARS
390 characters by CONVERTER into M-text MT. SRC is a pointer to the
391 not-yet processed bytes. ERROR is 1 iff an invalid byte was
395 finish_decoding (MText *mt, MConverter *converter, int nchars,
396 const unsigned char *source, const unsigned char *src_end,
397 const unsigned char *src,
400 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
403 internal->carryover_bytes = 0;
405 || (converter->last_block
406 && ! converter->lenient))
407 converter->result = MCONVERSION_RESULT_INVALID_BYTE;
408 else if (! converter->last_block)
410 unsigned char *dst = internal->carryover;
412 if (src < source || src > src_end)
414 dst += internal->carryover_bytes;
417 while (src < src_end)
419 internal->carryover_bytes = dst - internal->carryover;
420 converter->result = MCONVERSION_RESULT_INSUFFICIENT_SRC;
424 unsigned char *dst = mt->data + mt->nbytes;
425 unsigned char *dst_end = mt->data + mt->allocated;
426 const unsigned char *src_stop = src_end;
428 int last_nchars = nchars;
430 if (src < source || src > src_end)
431 src_stop = internal->carryover + internal->carryover_bytes;
434 if (converter->at_most && nchars == converter->at_most)
448 TAKEIN_CHARS (mt, nchars - last_nchars, dst - (mt->data + mt->nbytes),
450 internal->carryover_bytes = 0;
453 converter->nchars += nchars;
454 converter->nbytes += ((src < source || src > src_end) ? 0 : src - source);
455 return (converter->result == MCONVERSION_RESULT_INVALID_BYTE ? -1 : 0);
460 /* Staffs for coding-systems of type MCODING_TYPE_CHARSET. */
463 setup_coding_charset (MCodingSystem *coding)
465 int ncharsets = coding->ncharsets;
466 unsigned *code_charset_table;
470 /* At first, reorder charset list by dimensions (a charset of
471 smaller dimension comes first). As the number of charsets is
472 usually very small (at most 32), we do a simple sort. */
477 MTABLE_ALLOCA (charsets, NUM_SUPPORTED_CHARSETS, MERROR_CODING);
478 memcpy (charsets, coding->charsets,
479 sizeof (MCharset *) * NUM_SUPPORTED_CHARSETS);
480 for (i = 0; i < 4; i++)
481 for (j = 0; j < ncharsets; j++)
482 if (charsets[j]->dimension == i)
483 coding->charsets[idx++] = charsets[j];
486 MTABLE_CALLOC (code_charset_table, 256, MERROR_CODING);
489 int dim = coding->charsets[ncharsets]->dimension;
490 int from = coding->charsets[ncharsets]->code_range[(dim - 1) * 4];
491 int to = coding->charsets[ncharsets]->code_range[(dim - 1) * 4 + 1];
493 if (coding->charsets[ncharsets]->ascii_compatible)
494 coding->ascii_compatible = 1;
496 code_charset_table[from++] |= 1 << ncharsets;
499 coding->extra_spec = (void *) code_charset_table;
504 reset_coding_charset (MConverter *converter)
506 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
507 MCodingSystem *coding = internal->coding;
510 && setup_coding_charset (coding) < 0)
517 decode_coding_charset (const unsigned char *source, int src_bytes, MText *mt,
518 MConverter *converter)
520 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
521 MCodingSystem *coding = internal->coding;
522 const unsigned char *src = internal->carryover;
523 const unsigned char *src_stop = src + internal->carryover_bytes;
524 const unsigned char *src_end = source + src_bytes;
525 const unsigned char *src_base;
526 unsigned char *dst = mt->data + mt->nbytes;
527 unsigned char *dst_end = mt->data + mt->allocated;
530 int at_most = converter->at_most > 0 ? converter->at_most : -1;
532 unsigned *code_charset_table = (unsigned *) coding->extra_spec;
533 MCharset **charsets = coding->charsets;
534 MCharset *charset = mcharset__ascii;
539 MCharset *this_charset = NULL;
543 ONE_MORE_BASE_BYTE (c);
544 mask = code_charset_table[c];
554 while (! (mask & 1)) mask >>= 1, idx++;
555 this_charset = charsets[idx];
556 dim = this_charset->dimension;
560 code = (code << 8) | c;
563 c = DECODE_CHAR (this_charset, code);
570 if (! converter->lenient)
572 REWIND_SRC_TO_BASE ();
574 this_charset = mcharset__binary;
577 if (this_charset != mcharset__ascii
578 && this_charset != charset)
580 TAKEIN_CHARS (mt, nchars - last_nchars,
581 dst - (mt->data + mt->nbytes), charset);
582 charset = this_charset;
583 last_nchars = nchars;
587 /* We reach here because of an invalid byte. */
591 TAKEIN_CHARS (mt, nchars - last_nchars,
592 dst - (mt->data + mt->nbytes), charset);
593 return finish_decoding (mt, converter, nchars,
594 source, src_end, src_base, error);
598 encode_coding_charset (MText *mt, int from, int to,
599 unsigned char *destination, int dst_bytes,
600 MConverter *converter)
602 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
603 MCodingSystem *coding = internal->coding;
604 unsigned char *src, *src_end;
605 unsigned char *dst = destination;
606 unsigned char *dst_end = dst + dst_bytes;
608 int ncharsets = coding->ncharsets;
609 MCharset **charsets = coding->charsets;
610 int ascii_compatible = coding->ascii_compatible;
611 enum MTextFormat format = mt->format;
613 SET_SRC (mt, format, from, to);
618 ONE_MORE_CHAR (c, bytes, format);
620 if (c < 0x80 && ascii_compatible)
628 MCharset *charset = NULL;
633 charset = charsets[i];
634 code = ENCODE_CHAR (charset, c);
635 if (code != MCHAR_INVALID_CODE)
637 if (++i == ncharsets)
638 goto unsupported_char;
641 CHECK_DST (charset->dimension);
642 if (charset->dimension == 1)
646 else if (charset->dimension == 2)
649 *dst++ = code & 0xFF;
651 else if (charset->dimension == 3)
654 *dst++ = (code >> 8) & 0xFF;
655 *dst++ = code & 0xFF;
660 *dst++ = (code >> 16) & 0xFF;
661 *dst++ = (code >> 8) & 0xFF;
662 *dst++ = code & 0xFF;
673 if (! converter->lenient)
675 len = encode_unsupporeted_char (c, dst, dst_end, mt, from + nchars);
677 goto insufficient_destination;
683 /* We reach here because of an unsupported char. */
684 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
687 insufficient_destination:
688 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
691 converter->nchars += nchars;
692 converter->nbytes += dst - destination;
693 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
697 /* Staffs for coding-systems of type MCODING_TYPE_UTF (8). */
699 #define UTF8_CHARSET(p) \
700 (! ((p)[0] & 0x80) ? (mcharset__unicode) \
701 : CHAR_HEAD_P ((p) + 1) ? (mcharset__binary) \
702 : ! ((p)[0] & 0x20) ? (mcharset__unicode) \
703 : CHAR_HEAD_P ((p) + 2) ? (mcharset__binary) \
704 : ! ((p)[0] & 0x10) ? (mcharset__unicode) \
705 : CHAR_HEAD_P ((p) + 3) ? (mcharset__binary) \
706 : ! ((p)[0] & 0x08) ? ((((((p)[0] & 0x07) << 2) \
707 & (((p)[1] & 0x30) >> 4)) <= 0x10) \
708 ? (mcharset__unicode) \
709 : (mcharset__m17n)) \
710 : CHAR_HEAD_P ((p) + 4) ? (mcharset__binary) \
711 : ! ((p)[0] & 0x04) ? (mcharset__m17n) \
712 : CHAR_HEAD_P ((p) + 5) ? (mcharset__binary) \
713 : ! ((p)[0] & 0x02) ? (mcharset__m17n) \
714 : (mcharset__binary))
718 decode_coding_utf_8 (const unsigned char *source, int src_bytes, MText *mt,
719 MConverter *converter)
721 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
722 MCodingSystem *coding = internal->coding;
723 const unsigned char *src = internal->carryover;
724 const unsigned char *src_stop = src + internal->carryover_bytes;
725 const unsigned char *src_end = source + src_bytes;
726 const unsigned char *src_base;
727 unsigned char *dst = mt->data + mt->nbytes;
728 unsigned char *dst_end = mt->data + mt->allocated;
731 int at_most = converter->at_most > 0 ? converter->at_most : -1;
733 int full = converter->lenient || (coding->charsets[0] == mcharset__m17n);
734 MCharset *charset = NULL;
739 MCharset *this_charset = NULL;
741 ONE_MORE_BASE_BYTE (c);
745 else if (!(c & 0x40))
747 else if (!(c & 0x20))
748 bytes = 2, c &= 0x1F;
749 else if (!(c & 0x10))
750 bytes = 3, c &= 0x0F;
751 else if (!(c & 0x08))
752 bytes = 4, c &= 0x07;
753 else if (!(c & 0x04))
754 bytes = 5, c &= 0x03;
755 else if (!(c & 0x02))
756 bytes = 6, c &= 0x01;
763 if ((c1 & 0xC0) != 0x80)
765 c = (c << 6) | (c1 & 0x3F);
769 || c < 0xD800 || (c >= 0xE000 && c < 0x110000))
773 if (! converter->lenient)
775 REWIND_SRC_TO_BASE ();
777 this_charset = mcharset__binary;
780 if (this_charset != charset)
782 TAKEIN_CHARS (mt, nchars - last_nchars,
783 dst - (mt->data + mt->nbytes), charset);
784 charset = this_charset;
785 last_nchars = nchars;
789 /* We reach here because of an invalid byte. */
793 TAKEIN_CHARS (mt, nchars - last_nchars,
794 dst - (mt->data + mt->nbytes), charset);
795 return finish_decoding (mt, converter, nchars,
796 source, src_end, src_base, error);
800 encode_coding_utf_8 (MText *mt, int from, int to,
801 unsigned char *destination, int dst_bytes,
802 MConverter *converter)
804 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
805 MCodingSystem *coding = internal->coding;
806 unsigned char *src, *src_end;
807 unsigned char *dst = destination;
808 unsigned char *dst_end = dst + dst_bytes;
810 enum MTextFormat format = mt->format;
812 SET_SRC (mt, format, from, to);
814 if (format <= MTEXT_FORMAT_UTF_8
815 && (converter->lenient
816 || coding->charsets[0] == mcharset__m17n))
818 if (dst_bytes < src_end - src)
820 int byte_pos = (src + dst_bytes) - mt->data;
822 to = POS_BYTE_TO_CHAR (mt, byte_pos);
823 byte_pos = POS_CHAR_TO_BYTE (mt, to);
824 src_end = mt->data + byte_pos;
825 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
827 memcpy (destination, src, src_end - src);
829 dst += src_end - src;
837 ONE_MORE_CHAR (c, bytes, format);
839 if ((c >= 0xD800 && c < 0xE000) || c >= 0x110000)
842 dst += CHAR_STRING (c, dst);
846 /* We reach here because of an unsupported char. */
847 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
850 insufficient_destination:
851 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
854 converter->nchars += nchars;
855 converter->nbytes += dst - destination;
856 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
860 /* Staffs for coding-systems of type MCODING_TYPE_UTF (16 & 32). */
881 enum utf_endian endian;
885 setup_coding_utf (MCodingSystem *coding)
887 MCodingInfoUTF *info = (MCodingInfoUTF *) (coding->extra_info);
888 MCodingInfoUTF *spec;
890 if (info->code_unit_bits == 8)
891 coding->ascii_compatible = 1;
892 else if (info->code_unit_bits == 16
893 || info->code_unit_bits == 32)
895 if (info->bom < 0 || info->bom > 2
896 || info->endian < 0 || info->endian > 1)
897 MERROR (MERROR_CODING, -1);
902 MSTRUCT_CALLOC (spec, MERROR_CODING);
904 coding->extra_spec = (void *) (spec);
909 reset_coding_utf (MConverter *converter)
911 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
912 MCodingSystem *coding = internal->coding;
913 struct utf_status *status = (struct utf_status *) &(converter->status);
916 && setup_coding_utf (coding) < 0)
920 status->surrogate = 0;
921 status->bom = ((MCodingInfoUTF *) (coding->extra_spec))->bom;
922 status->endian = ((MCodingInfoUTF *) (coding->extra_spec))->endian;
927 decode_coding_utf_16 (const unsigned char *source, int src_bytes, MText *mt,
928 MConverter *converter)
930 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
931 const unsigned char *src = internal->carryover;
932 const unsigned char *src_stop = src + internal->carryover_bytes;
933 const unsigned char *src_end = source + src_bytes;
934 const unsigned char *src_base;
935 unsigned char *dst = mt->data + mt->nbytes;
936 unsigned char *dst_end = mt->data + mt->allocated;
939 int at_most = converter->at_most > 0 ? converter->at_most : -1;
940 struct utf_status *status = (struct utf_status *) &(converter->status);
941 unsigned char b1, b2;
942 MCharset *charset = NULL;
945 if (status->bom != UTF_BOM_NO)
949 ONE_MORE_BASE_BYTE (b1);
953 status->endian = UTF_BIG_ENDIAN;
954 else if (c == 0xFFFE)
955 status->endian = UTF_LITTLE_ENDIAN;
956 else if (status->bom == UTF_BOM_MAYBE
957 || converter->lenient)
959 status->endian = UTF_BIG_ENDIAN;
960 REWIND_SRC_TO_BASE ();
967 status->bom = UTF_BOM_NO;
973 MCharset *this_charset = NULL;
975 ONE_MORE_BASE_BYTE (b1);
977 if (status->endian == UTF_BIG_ENDIAN)
978 c = ((b1 << 8) | b2);
980 c = ((b2 << 8) | b1);
981 if (c < 0xD800 || c >= 0xE000)
987 if (status->endian == UTF_BIG_ENDIAN)
988 c1 = ((b1 << 8) | b2);
990 c1 = ((b2 << 8) | b1);
991 if (c1 < 0xDC00 || c1 >= 0xE000)
993 c = 0x10000 + ((c - 0xD800) << 10) + (c1 - 0xDC00);
998 if (! converter->lenient)
1000 REWIND_SRC_TO_BASE ();
1003 if (status->endian == UTF_BIG_ENDIAN)
1004 c = ((b1 << 8) | b2);
1006 c = ((b2 << 8) | b1);
1007 this_charset = mcharset__binary;
1010 if (this_charset != charset)
1012 TAKEIN_CHARS (mt, nchars - last_nchars,
1013 dst - (mt->data + mt->nbytes), charset);
1014 charset = this_charset;
1015 last_nchars = nchars;
1019 /* We reach here because of an invalid byte. */
1023 TAKEIN_CHARS (mt, nchars - last_nchars,
1024 dst - (mt->data + mt->nbytes), charset);
1025 return finish_decoding (mt, converter, nchars,
1026 source, src_end, src_base, error);
1031 decode_coding_utf_32 (const unsigned char *source, int src_bytes, MText *mt,
1032 MConverter *converter)
1034 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
1035 const unsigned char *src = internal->carryover;
1036 const unsigned char *src_stop = src + internal->carryover_bytes;
1037 const unsigned char *src_end = source + src_bytes;
1038 const unsigned char *src_base;
1039 unsigned char *dst = mt->data + mt->nbytes;
1040 unsigned char *dst_end = mt->data + mt->allocated;
1042 int last_nchars = 0;
1043 int at_most = converter->at_most > 0 ? converter->at_most : -1;
1044 struct utf_status *status = (struct utf_status *) &(converter->status);
1045 unsigned char b1, b2, b3, b4;
1046 MCharset *charset = NULL;
1049 if (status->bom != UTF_BOM_NO)
1053 ONE_MORE_BASE_BYTE (b1);
1057 c = (b1 << 24) | (b2 << 16) | (b3 << 8) | b4;
1058 if (c == 0x0000FEFF)
1059 status->endian = UTF_BIG_ENDIAN;
1060 else if (c == 0xFFFE0000)
1061 status->endian = UTF_LITTLE_ENDIAN;
1062 else if (status->bom == UTF_BOM_MAYBE
1063 || converter->lenient)
1065 status->endian = UTF_BIG_ENDIAN;
1066 REWIND_SRC_TO_BASE ();
1073 status->bom = UTF_BOM_NO;
1079 MCharset *this_charset = NULL;
1081 ONE_MORE_BASE_BYTE (b1);
1085 if (status->endian == UTF_BIG_ENDIAN)
1086 c = (b1 << 24) | (b2 << 16) | (b3 << 8) | b4;
1088 c = (b4 << 24) | (b3 << 16) | (b2 << 8) | b1;
1089 if (c < 0xD800 || (c >= 0xE000 && c < 0x110000))
1092 if (! converter->lenient)
1094 REWIND_SRC_TO_BASE ();
1096 this_charset = mcharset__binary;
1099 if (this_charset != charset)
1101 TAKEIN_CHARS (mt, nchars - last_nchars,
1102 dst - (mt->data + mt->nbytes), charset);
1103 charset = this_charset;
1104 last_nchars = nchars;
1108 /* We reach here because of an invalid byte. */
1112 TAKEIN_CHARS (mt, nchars - last_nchars,
1113 dst - (mt->data + mt->nbytes), charset);
1114 return finish_decoding (mt, converter, nchars,
1115 source, src_end, src_base, error);
1120 encode_coding_utf_16 (MText *mt, int from, int to,
1121 unsigned char *destination, int dst_bytes,
1122 MConverter *converter)
1124 unsigned char *src, *src_end;
1125 unsigned char *dst = destination;
1126 unsigned char *dst_end = dst + dst_bytes;
1128 struct utf_status *status = (struct utf_status *) &(converter->status);
1129 int big_endian = status->endian == UTF_BIG_ENDIAN;
1130 enum MTextFormat format = mt->format;
1132 SET_SRC (mt, format, from, to);
1134 if (status->bom != UTF_BOM_NO)
1138 *dst++ = 0xFE, *dst++ = 0xFF;
1140 *dst++ = 0xFF, *dst++ = 0xFE;
1141 status->bom = UTF_BOM_NO;
1148 ONE_MORE_CHAR (c, bytes, format);
1150 if (c < 0xD800 || (c >= 0xE000 && c < 0x10000))
1154 *dst++ = c >> 8, *dst++ = c & 0xFF;
1156 *dst++ = c & 0xFF, *dst++ = c >> 8;
1158 else if (c >= 0x10000 && c < 0x110000)
1164 c1 = (c >> 10) + 0xD800;
1165 c2 = (c & 0x3FF) + 0xDC00;
1167 *dst++ = c1 >> 8, *dst++ = c1 & 0xFF,
1168 *dst++ = c2 >> 8, *dst++ = c2 & 0xFF;
1170 *dst++ = c1 & 0xFF, *dst++ = c1 >> 8,
1171 *dst++ = c2 & 0xFF, *dst++ = c2 >> 8;
1175 unsigned char buf[11];
1178 if (! converter->lenient)
1180 len = encode_unsupporeted_char (c, buf, buf + (dst_end - dst),
1183 goto insufficient_destination;
1185 for (i = 0; i < len; i++)
1186 *dst++ = 0, *dst++ = buf[i];
1188 for (i = 0; i < len; i++)
1189 *dst++ = buf[i], *dst++ = 0;
1194 /* We reach here because of an unsupported char. */
1195 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
1198 insufficient_destination:
1199 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
1202 converter->nchars += nchars;
1203 converter->nbytes += dst - destination;
1204 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
1208 encode_coding_utf_32 (MText *mt, int from, int to,
1209 unsigned char *destination, int dst_bytes,
1210 MConverter *converter)
1212 unsigned char *src, *src_end;
1213 unsigned char *dst = destination;
1214 unsigned char *dst_end = dst + dst_bytes;
1216 struct utf_status *status = (struct utf_status *) &(converter->status);
1217 int big_endian = status->endian == UTF_BIG_ENDIAN;
1218 enum MTextFormat format = mt->format;
1220 SET_SRC (mt, format, from, to);
1222 if (status->bom != UTF_BOM_NO)
1226 *dst++ = 0x00, *dst++ = 0x00, *dst++ = 0xFE, *dst++ = 0xFF;
1228 *dst++ = 0xFF, *dst++ = 0xFE, *dst++ = 0x00, *dst++ = 0x00;
1229 status->bom = UTF_BOM_NO;
1236 ONE_MORE_CHAR (c, bytes, format);
1238 if (c < 0xD800 || (c >= 0xE000 && c < 0x110000))
1242 *dst++ = 0x00, *dst++ = c >> 16,
1243 *dst++ = (c >> 8) & 0xFF, *dst++ = c & 0xFF;
1245 *dst++ = c & 0xFF, *dst++ = (c >> 8) & 0xFF,
1246 *dst++ = c >> 16, *dst++ = 0x00;
1250 unsigned char buf[11];
1253 if (! converter->lenient)
1255 len = encode_unsupporeted_char (c, buf, buf + (dst_end - dst),
1258 goto insufficient_destination;
1260 for (i = 0; i < len; i++)
1261 *dst++ = 0, *dst++ = buf[i];
1263 for (i = 0; i < len; i++)
1264 *dst++ = buf[i], *dst++ = 0;
1269 /* We reach here because of an unsupported char. */
1270 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
1273 insufficient_destination:
1274 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
1277 converter->nchars += nchars;
1278 converter->nbytes += dst - destination;
1279 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
1283 /* Staffs for coding-systems of type MCODING_TYPE_ISO_2022. */
1285 #define ISO_CODE_STX 0x02 /* start text */
1286 #define ISO_CODE_SO 0x0E /* shift-out */
1287 #define ISO_CODE_SI 0x0F /* shift-in */
1288 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
1289 #define ISO_CODE_ESC 0x1B /* escape */
1290 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
1291 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
1293 /** Structure pointed by MCodingSystem.extra_spec. */
1295 struct iso_2022_spec
1299 /** Initial graphic registers (0..3) invoked to each graphic
1300 plane left and right. */
1301 int initial_invocation[2];
1303 /** Initially designated charsets for each graphic register. */
1304 MCharset *initial_designation[4];
1312 struct iso_2022_status
1315 MCharset *designation[4];
1316 unsigned single_shifting : 1;
1319 unsigned utf8_shifting : 1;
1320 MCharset *non_standard_charset;
1321 int non_standard_charset_bytes;
1322 int non_standard_encoding;
1325 enum iso_2022_code_class {
1326 ISO_control_0, /* Control codes in the range
1327 0x00..0x1F and 0x7F, except for the
1328 following 4 codes. */
1329 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
1330 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
1331 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
1332 ISO_escape, /* ISO_CODE_SO (0x1B) */
1333 ISO_control_1, /* Control codes in the range
1334 0x80..0x9F, except for the
1335 following 3 codes. */
1336 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
1337 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
1338 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
1339 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
1340 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
1341 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
1342 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
1343 } iso_2022_code_class[256];
1346 #define MCODING_ISO_DESIGNATION_MASK \
1347 (MCODING_ISO_DESIGNATION_G0 \
1348 | MCODING_ISO_DESIGNATION_G1 \
1349 | MCODING_ISO_DESIGNATION_CTEXT \
1350 | MCODING_ISO_DESIGNATION_CTEXT_EXT)
1353 setup_coding_iso_2022 (MCodingSystem *coding)
1355 MCodingInfoISO2022 *info = (MCodingInfoISO2022 *) (coding->extra_info);
1356 int ncharsets = coding->ncharsets;
1357 struct iso_2022_spec *spec;
1358 int designation_policy = info->flags & MCODING_ISO_DESIGNATION_MASK;
1361 coding->ascii_compatible = 0;
1363 MSTRUCT_CALLOC (spec, MERROR_CODING);
1365 spec->flags = info->flags;
1366 spec->initial_invocation[0] = info->initial_invocation[0];
1367 spec->initial_invocation[1] = info->initial_invocation[1];
1368 for (i = 0; i < 4; i++)
1369 spec->initial_designation[i] = NULL;
1370 if (designation_policy)
1372 spec->n_designations = ncharsets;
1373 if (spec->flags & MCODING_ISO_FULL_SUPPORT)
1374 spec->n_designations += mcharset__iso_2022_table.used;
1375 MTABLE_CALLOC (spec->designations, spec->n_designations, MERROR_CODING);
1376 for (i = 0; i < spec->n_designations; i++)
1377 spec->designations[i] = -1;
1381 if (spec->flags & MCODING_ISO_FULL_SUPPORT)
1382 MERROR (MERROR_CODING, -1);
1383 spec->designations = NULL;
1386 for (i = 0; i < ncharsets; i++)
1388 int reg = info->designations[i];
1391 && coding->charsets[i]->final_byte > 0
1392 && (reg < -4 || reg > 3))
1393 MERROR (MERROR_CODING, -1);
1396 if (spec->initial_designation[reg])
1397 MERROR (MERROR_CODING, -1);
1398 spec->initial_designation[reg] = coding->charsets[i];
1402 if (! designation_policy
1403 && ! (spec->flags & MCODING_ISO_EUC_TW_SHIFT))
1404 MERROR (MERROR_CODING, -1);
1408 if (designation_policy)
1409 spec->designations[i] = reg;
1410 if (coding->charsets[i] == mcharset__ascii)
1411 coding->ascii_compatible = 1;
1414 if (coding->ascii_compatible
1415 && (spec->flags & (MCODING_ISO_DESIGNATION_G0
1416 | MCODING_ISO_DESIGNATION_CTEXT
1417 | MCODING_ISO_DESIGNATION_CTEXT_EXT
1418 | MCODING_ISO_LOCKING_SHIFT)))
1419 coding->ascii_compatible = 0;
1421 if (spec->flags & MCODING_ISO_FULL_SUPPORT)
1422 for (i = 0; i < mcharset__iso_2022_table.used; i++)
1424 MCharset *charset = mcharset__iso_2022_table.charsets[i];
1426 spec->designations[ncharsets + i]
1427 = ((designation_policy == MCODING_ISO_DESIGNATION_CTEXT
1428 || designation_policy == MCODING_ISO_DESIGNATION_CTEXT_EXT)
1429 ? (charset->code_range[0] == 32
1430 || charset->code_range[1] == 255)
1431 : designation_policy == MCODING_ISO_DESIGNATION_G1);
1434 spec->use_esc = ((spec->flags & MCODING_ISO_DESIGNATION_MASK)
1435 || ((spec->flags & MCODING_ISO_LOCKING_SHIFT)
1436 && (spec->initial_designation[2]
1437 || spec->initial_designation[3]))
1438 || (! (spec->flags & MCODING_ISO_EIGHT_BIT)
1439 && (spec->flags & MCODING_ISO_SINGLE_SHIFT))
1440 || (spec->flags & MCODING_ISO_ISO6429));
1442 coding->extra_spec = (void *) spec;
1448 reset_coding_iso_2022 (MConverter *converter)
1450 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
1451 MCodingSystem *coding = internal->coding;
1452 struct iso_2022_status *status
1453 = (struct iso_2022_status *) &(converter->status);
1454 struct iso_2022_spec *spec;
1458 && setup_coding_iso_2022 (coding) < 0)
1462 spec = (struct iso_2022_spec *) coding->extra_spec;
1463 status->invocation[0] = spec->initial_invocation[0];
1464 status->invocation[1] = spec->initial_invocation[1];
1465 for (i = 0; i < 4; i++)
1466 status->designation[i] = spec->initial_designation[i];
1467 status->single_shifting = 0;
1474 #define ISO2022_DECODE_DESIGNATION(reg, dim, chars, final, rev) \
1476 MCharset *charset; \
1478 if ((final) < '0' || (final) >= 128) \
1479 goto invalid_byte; \
1482 charset = MCHARSET_ISO_2022 ((dim), (chars), (final)); \
1483 if (! (spec->flags & MCODING_ISO_FULL_SUPPORT)) \
1487 for (i = 0; i < coding->ncharsets; i++) \
1488 if (charset == coding->charsets[i]) \
1490 if (i == coding->ncharsets) \
1491 goto invalid_byte; \
1498 for (i = 0; i < mcharset__iso_2022_table.used; i++) \
1500 charset = mcharset__iso_2022_table.charsets[i]; \
1501 if (charset->revision == (rev) \
1502 && charset->dimension == (dim) \
1503 && charset->final_byte == (final) \
1504 && (charset->code_range[1] == (chars) \
1505 || ((chars) == 96 && charset->code_range[1] == 255))) \
1508 if (i == mcharset__iso_2022_table.used) \
1509 goto invalid_byte; \
1511 status->designation[reg] = charset; \
1516 find_ctext_non_standard_charset (char *charset_name)
1520 if (! strcmp (charset_name, "koi8-r"))
1521 charset = MCHARSET (msymbol ("koi8-r"));
1522 else if (! strcmp (charset_name, "big5-0"))
1523 charset = MCHARSET (msymbol ("big5"));
1530 decode_coding_iso_2022 (const unsigned char *source, int src_bytes, MText *mt,
1531 MConverter *converter)
1533 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
1534 MCodingSystem *coding = internal->coding;
1535 const unsigned char *src = internal->carryover;
1536 const unsigned char *src_stop = src + internal->carryover_bytes;
1537 const unsigned char *src_end = source + src_bytes;
1538 const unsigned char *src_base;
1539 unsigned char *dst = mt->data + mt->nbytes;
1540 unsigned char *dst_end = mt->data + mt->allocated;
1542 int last_nchars = 0;
1543 int at_most = converter->at_most > 0 ? converter->at_most : -1;
1544 struct iso_2022_spec *spec = (struct iso_2022_spec *) coding->extra_spec;
1545 struct iso_2022_status *status
1546 = (struct iso_2022_status *) &(converter->status);
1547 MCharset *charset0, *charset1, *charset;
1549 MCharset *cns_charsets[15];
1551 charset0 = (status->invocation[0] >= 0
1552 ? status->designation[status->invocation[0]] : NULL);
1553 charset1 = (status->invocation[1] >= 0
1554 ? status->designation[status->invocation[1]] : NULL);
1555 charset = mcharset__ascii;
1557 if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
1561 memset (cns_charsets, 0, sizeof (cns_charsets));
1562 for (i = 0; i < coding->ncharsets; i++)
1563 if (coding->charsets[i]->dimension == 2
1564 && coding->charsets[i]->code_range[1] == 126)
1566 int final = coding->charsets[i]->final_byte;
1568 if (final >= 'G' && final <= 'M')
1569 cns_charsets[final - 'G'] = coding->charsets[i];
1571 cns_charsets[14] = coding->charsets[i];
1577 MCharset *this_charset = NULL;
1580 ONE_MORE_BASE_BYTE (c1);
1582 if (status->utf8_shifting)
1585 int bytes = CHAR_BYTES_BY_HEAD (c1);
1589 for (i = 1; i < bytes; i++)
1594 this_charset = UTF8_CHARSET (buf);
1595 c1 = STRING_CHAR_UTF8 (buf);
1599 if (status->non_standard_encoding > 0)
1603 this_charset = status->non_standard_charset;
1604 for (i = 1; i < status->non_standard_charset_bytes; i++)
1607 c1 = (c1 << 8) | c2;
1609 c1 = DECODE_CHAR (this_charset, c1);
1613 switch (iso_2022_code_class[c1])
1615 case ISO_graphic_plane_0:
1616 this_charset = charset0;
1619 case ISO_0x20_or_0x7F:
1621 || (charset0->code_range[0] != 32
1622 && charset0->code_range[1] != 255))
1623 /* This is SPACE or DEL. */
1624 this_charset = mcharset__ascii;
1626 /* This is a graphic character of plane 0. */
1627 this_charset = charset0;
1630 case ISO_graphic_plane_1:
1633 this_charset = charset1;
1636 case ISO_0xA0_or_0xFF:
1638 || charset1->code_range[0] == 33
1639 || ! (spec->flags & MCODING_ISO_EIGHT_BIT))
1641 /* This is a graphic character of plane 1. */
1644 this_charset = charset1;
1648 this_charset = mcharset__ascii;
1655 if ((spec->flags & MCODING_ISO_LOCKING_SHIFT)
1656 && status->designation[1])
1658 status->invocation[0] = 1;
1659 charset0 = status->designation[1];
1662 this_charset = mcharset__ascii;
1666 if (spec->flags & MCODING_ISO_LOCKING_SHIFT)
1668 status->invocation[0] = 0;
1669 charset0 = status->designation[0];
1672 this_charset = mcharset__ascii;
1675 case ISO_single_shift_2_7:
1676 if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT_7))
1678 this_charset = mcharset__ascii;
1682 goto label_escape_sequence;
1684 case ISO_single_shift_2:
1685 if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
1688 if (c1 < 0xA1 || (c1 > 0xA7 && c1 < 0xAF) || c1 > 0xAF
1689 || ! cns_charsets[c1 - 0xA1])
1691 status->designation[2] = cns_charsets[c1 - 0xA1];
1693 else if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT))
1695 /* SS2 is handled as an escape sequence of ESC 'N' */
1697 goto label_escape_sequence;
1699 case ISO_single_shift_3:
1700 if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT))
1702 /* SS2 is handled as an escape sequence of ESC 'O' */
1704 goto label_escape_sequence;
1706 case ISO_control_sequence_introducer:
1707 /* CSI is handled as an escape sequence of ESC '[' ... */
1709 goto label_escape_sequence;
1712 if (! spec->use_esc)
1714 this_charset = mcharset__ascii;
1718 label_escape_sequence:
1719 /* Escape sequences handled here are invocation,
1720 designation, and direction specification. */
1723 case '&': /* revision of following character set */
1724 if (! (spec->flags & MCODING_ISO_DESIGNATION_MASK))
1725 goto unused_escape_sequence;
1727 if (c1 < '@' || c1 > '~')
1730 if (c1 != ISO_CODE_ESC)
1733 goto label_escape_sequence;
1735 case '$': /* designation of 2-byte character set */
1736 if (! (spec->flags & MCODING_ISO_DESIGNATION_MASK))
1737 goto unused_escape_sequence;
1739 if (c1 >= '@' && c1 <= 'B')
1740 { /* designation of JISX0208.1978, GB2312.1980, or
1742 ISO2022_DECODE_DESIGNATION (0, 2, 94, c1, -1);
1744 else if (c1 >= 0x28 && c1 <= 0x2B)
1745 { /* designation of (dimension 2, chars 94) character set */
1747 ISO2022_DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2, -1);
1749 else if (c1 >= 0x2C && c1 <= 0x2F)
1750 { /* designation of (dimension 2, chars 96) character set */
1752 ISO2022_DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2, -1);
1756 /* We must update these variables now. */
1757 charset0 = status->designation[status->invocation[0]];
1758 charset1 = status->designation[status->invocation[1]];
1761 case 'n': /* invocation of locking-shift-2 */
1762 if (! (spec->flags & MCODING_ISO_LOCKING_SHIFT)
1763 || ! status->designation[2])
1765 status->invocation[0] = 2;
1766 charset0 = status->designation[2];
1769 case 'o': /* invocation of locking-shift-3 */
1770 if (! (spec->flags & MCODING_ISO_LOCKING_SHIFT)
1771 || ! status->designation[3])
1773 status->invocation[0] = 3;
1774 charset0 = status->designation[3];
1777 case 'N': /* invocation of single-shift-2 */
1778 if (! ((spec->flags & MCODING_ISO_SINGLE_SHIFT)
1779 || (spec->flags & MCODING_ISO_EUC_TW_SHIFT))
1780 || ! status->designation[2])
1782 this_charset = status->designation[2];
1784 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1788 case 'O': /* invocation of single-shift-3 */
1789 if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT)
1790 || ! status->designation[3])
1792 this_charset = status->designation[3];
1794 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1798 case '[': /* specification of direction */
1799 if (! (spec->flags & MCODING_ISO_ISO6429))
1801 /* For the moment, nested direction is not supported.
1802 So, (coding->mode & CODING_MODE_DIRECTION) zero means
1803 left-to-right, and nonzero means right-to-left. */
1807 case ']': /* end of the current direction */
1808 case '0': /* end of the current direction */
1812 case '1': /* start of left-to-right direction */
1819 case '2': /* start of right-to-left direction */
1833 char charset_name[16];
1837 if (! spec->flags & MCODING_ISO_DESIGNATION_CTEXT_EXT)
1839 /* Compound-text uses these escape sequences:
1841 ESC % G -- utf-8 bytes -- ESC % @
1842 ESC % / 1 M L -- charset name -- STX -- bytes --
1843 ESC % / 2 M L -- charset name -- STX -- bytes --
1844 ESC % / 3 M L -- charset name -- STX -- bytes --
1845 ESC % / 4 M L -- charset name -- STX -- bytes --
1847 It also uses this sequence but that is not yet
1850 ESC % / 0 M L -- charset name -- STX -- bytes -- */
1855 status->utf8_shifting = 1;
1860 if (! status->utf8_shifting)
1862 status->utf8_shifting = 0;
1868 if (c1 < '1' || c1 > '4')
1870 status->non_standard_charset_bytes = c1 - '0';
1873 if (c1 < 128 || c2 < 128)
1875 bytes = (c1 - 128) * 128 + (c2 - 128);
1876 for (i = 0; i < 16; i++)
1879 if (c1 == ISO_CODE_STX)
1881 charset_name[i] = TOLOWER (c1);
1885 charset_name[i++] = '\0';
1886 this_charset = find_ctext_non_standard_charset (charset_name);
1889 status->non_standard_charset = this_charset;
1890 status->non_standard_encoding = bytes - i;
1895 if (! (spec->flags & MCODING_ISO_DESIGNATION_MASK))
1896 goto unused_escape_sequence;
1897 if (c1 >= 0x28 && c1 <= 0x2B)
1898 { /* designation of (dimension 1, chars 94) charset */
1900 ISO2022_DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2, -1);
1902 else if (c1 >= 0x2C && c1 <= 0x2F)
1903 { /* designation of (dimension 1, chars 96) charset */
1905 ISO2022_DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2, -1);
1909 /* We must update these variables now. */
1910 charset0 = status->designation[status->invocation[0]];
1911 charset1 = status->designation[status->invocation[1]];
1914 unused_escape_sequence:
1915 UNGET_ONE_BYTE (c1);
1917 this_charset = mcharset__ascii;
1921 if (this_charset->dimension == 1)
1923 if (this_charset->code_range[1] <= 128)
1926 else if (this_charset->dimension == 2)
1929 c1 = ((c1 & 0x7F) << 8) | (c2 & 0x7F);
1931 else /* i.e. (dimension == 3) */
1935 c1 = ((c1 & 0x7F) << 16) | ((c2 & 0x7F) << 8) | (c3 & 0x7F);
1937 c1 = DECODE_CHAR (this_charset, c1);
1941 if (! converter->lenient)
1943 REWIND_SRC_TO_BASE ();
1945 this_charset = mcharset__binary;
1948 if (this_charset != mcharset__ascii
1949 && this_charset != charset)
1951 TAKEIN_CHARS (mt, nchars - last_nchars,
1952 dst - (mt->data + mt->nbytes), charset);
1953 charset = this_charset;
1954 last_nchars = nchars;
1957 if (status->non_standard_encoding > 0)
1958 status->non_standard_encoding -= status->non_standard_charset_bytes;
1960 /* We reach here because of an invalid byte. */
1966 TAKEIN_CHARS (mt, nchars - last_nchars,
1967 dst - (mt->data + mt->nbytes), charset);
1968 return finish_decoding (mt, converter, nchars,
1969 source, src_end, src_base, error);
1973 /* Produce codes (escape sequence) for designating CHARSET to graphic
1974 register REG at DST, and increment DST. If CHARSET->final-char is
1975 '@', 'A', or 'B' and SHORT_FORM is nonzero, produce designation
1976 sequence of short-form. Update STATUS->designation. */
1978 #define ISO2022_ENCODE_DESIGNATION(reg, charset, spec, status) \
1980 char *intermediate_char_94 = "()*+"; \
1981 char *intermediate_char_96 = ",-./"; \
1983 if (dst + 4 > dst_end) \
1984 goto memory_shortage; \
1985 *dst++ = ISO_CODE_ESC; \
1986 if (charset->dimension == 1) \
1988 if (charset->code_range[0] != 32 \
1989 && charset->code_range[1] != 255) \
1990 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
1992 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
1997 if (charset->code_range[0] != 32 \
1998 && charset->code_range[1] != 255) \
2000 if (spec->flags & MCODING_ISO_LONG_FORM \
2002 || charset->final_byte < '@' || charset->final_byte > 'B') \
2003 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
2006 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
2008 *dst++ = charset->final_byte; \
2010 status->designation[reg] = charset; \
2014 /* The following two macros produce codes (control character or escape
2015 sequence) for ISO-2022 single-shift functions (single-shift-2 and
2018 #define ISO2022_ENCODE_SINGLE_SHIFT_2(spec, status) \
2020 if (dst + 2 > dst_end) \
2021 goto memory_shortage; \
2022 if (! (spec->flags & MCODING_ISO_EIGHT_BIT)) \
2023 *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \
2025 *dst++ = ISO_CODE_SS2; \
2026 status->single_shifting = 1; \
2030 #define ISO2022_ENCODE_SINGLE_SHIFT_3(spec, status) \
2032 if (dst + 2 > dst_end) \
2033 goto memory_shortage; \
2034 if (! (spec->flags & MCODING_ISO_EIGHT_BIT)) \
2035 *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \
2037 *dst++ = ISO_CODE_SS3; \
2038 status->single_shifting = 1; \
2042 /* The following four macros produce codes (control character or
2043 escape sequence) for ISO-2022 locking-shift functions (shift-in,
2044 shift-out, locking-shift-2, and locking-shift-3). */
2046 #define ISO2022_ENCODE_SHIFT_IN(status) \
2048 if (dst + 1 > dst_end) \
2049 goto memory_shortage; \
2050 *dst++ = ISO_CODE_SI; \
2051 status->invocation[0] = 0; \
2055 #define ISO2022_ENCODE_SHIFT_OUT(status) \
2057 if (dst + 1 > dst_end) \
2058 goto memory_shortage; \
2059 *dst++ = ISO_CODE_SO; \
2060 status->invocation[0] = 1; \
2064 #define ISO2022_ENCODE_LOCKING_SHIFT_2(status) \
2066 if (dst + 2 > dst_end) \
2067 goto memory_shortage; \
2068 *dst++ = ISO_CODE_ESC, *dst++ = 'n'; \
2069 status->invocation[0] = 2; \
2073 #define ISO2022_ENCODE_LOCKING_SHIFT_3(status) \
2075 if (dst + 2 > dst_end) \
2076 goto memory_shortage; \
2077 *dst++ = ISO_CODE_ESC, *dst++ = 'o'; \
2078 status->invocation[0] = 3; \
2081 #define ISO2022_ENCODE_UTF8_SHIFT_START(len) \
2083 CHECK_DST (3 + len); \
2084 *dst++ = ISO_CODE_ESC; \
2087 status->utf8_shifting = 1; \
2091 #define ISO2022_ENCODE_UTF8_SHIFT_END() \
2094 *dst++ = ISO_CODE_ESC; \
2097 status->utf8_shifting = 0; \
2101 #define ISO2022_ENCODE_NON_STANDARD(name, len) \
2103 CHECK_DST (6 + len + 1 + non_standard_charset_bytes); \
2104 non_standard_begin = dst; \
2105 *dst++ = ISO_CODE_ESC; \
2108 *dst++ = '0' + non_standard_charset_bytes; \
2109 *dst++ = 0, *dst++ = 0; /* filled later */ \
2110 memcpy (dst, name, len); \
2112 *dst++ = ISO_CODE_STX; \
2113 non_standard_bytes = len + 1; \
2118 find_ctext_non_standard_name (MCharset *charset, int *bytes)
2120 char *name = msymbol_name (charset->name);
2122 if (! strcmp (name, "koi8-r"))
2124 else if (! strcmp (name, "big5"))
2125 name = "big5-0", *bytes = 2;
2131 /* Designate CHARSET to a graphic register specified in
2132 SPEC->designation. If the register is not yet invoked to graphic
2133 left not right, invoke it to graphic left. DSTP points to a
2134 variable containing a memory address where the output must go.
2135 DST_END is the limit of that memory.
2137 Return 0 if it succeeds. Return -1 otherwise, which means that the
2138 memory area is too short. By side effect, update the variable that
2142 iso_2022_designate_invoke_charset (MCodingSystem *coding,
2144 struct iso_2022_spec *spec,
2145 struct iso_2022_status *status,
2146 unsigned char **dstp,
2147 unsigned char *dst_end)
2150 unsigned char *dst = *dstp;
2152 for (i = 0; i < 4; i++)
2153 if (charset == status->designation[i])
2158 /* CHARSET is not yet designated to any graphic registers. */
2159 for (i = 0; i < coding->ncharsets; i++)
2160 if (charset == coding->charsets[i])
2162 if (i == coding->ncharsets)
2164 for (i = 0; i < mcharset__iso_2022_table.used; i++)
2165 if (charset == mcharset__iso_2022_table.charsets[i])
2167 i += coding->ncharsets;
2169 i = spec->designations[i];
2170 ISO2022_ENCODE_DESIGNATION (i, charset, spec, status);
2173 if (status->invocation[0] != i
2174 && status->invocation[1] != i)
2176 /* Graphic register I is not yet invoked. */
2179 case 0: /* graphic register 0 */
2180 ISO2022_ENCODE_SHIFT_IN (status);
2183 case 1: /* graphic register 1 */
2184 ISO2022_ENCODE_SHIFT_OUT (status);
2187 case 2: /* graphic register 2 */
2188 if (spec->flags & MCODING_ISO_SINGLE_SHIFT)
2189 ISO2022_ENCODE_SINGLE_SHIFT_2 (spec, status);
2191 ISO2022_ENCODE_LOCKING_SHIFT_2 (status);
2194 case 3: /* graphic register 3 */
2195 if (spec->flags & MCODING_ISO_SINGLE_SHIFT)
2196 ISO2022_ENCODE_SINGLE_SHIFT_3 (spec, status);
2198 ISO2022_ENCODE_LOCKING_SHIFT_3 (status);
2211 /* Reset the invocation/designation status to the initial one. SPEC
2212 and STATUS contain information about the current and initial
2213 invocation /designation status respectively. DSTP points to a
2214 variable containing a memory address where the output must go.
2215 DST_END is the limit of that memory.
2217 Return 0 if it succeeds. Return -1 otherwise, which means that the
2218 memory area is too short. By side effect, update the variable that
2222 iso_2022_reset_invocation_designation (struct iso_2022_spec *spec,
2223 struct iso_2022_status *status,
2224 unsigned char **dstp,
2225 unsigned char *dst_end)
2227 unsigned char *dst = *dstp;
2230 /* Reset the invocation status of GL. We have not yet supported GR
2232 if (status->invocation[0] != spec->initial_invocation[0]
2233 && spec->initial_invocation[0] >= 0)
2235 if (spec->initial_invocation[0] == 0)
2236 ISO2022_ENCODE_SHIFT_IN (status);
2237 else if (spec->initial_invocation[0] == 1)
2238 ISO2022_ENCODE_SHIFT_OUT (status);
2239 else if (spec->initial_invocation[0] == 2)
2240 ISO2022_ENCODE_LOCKING_SHIFT_2 (status);
2241 else /* i.e. spec->initial_invocation[0] == 3 */
2242 ISO2022_ENCODE_LOCKING_SHIFT_3 (status);
2245 /* Reset the designation status of G0..G3. */
2246 for (i = 0; i < 4; i++)
2247 if (status->designation[i] != spec->initial_designation[i]
2248 && spec->initial_designation[i])
2250 MCharset *charset = spec->initial_designation[i];
2252 ISO2022_ENCODE_DESIGNATION (i, charset, spec, status);
2265 encode_coding_iso_2022 (MText *mt, int from, int to,
2266 unsigned char *destination, int dst_bytes,
2267 MConverter *converter)
2269 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2270 MCodingSystem *coding = internal->coding;
2271 unsigned char *src, *src_end;
2272 unsigned char *dst = destination;
2273 unsigned char *dst_end = dst + dst_bytes;
2275 unsigned char *dst_base;
2276 struct iso_2022_spec *spec = (struct iso_2022_spec *) coding->extra_spec;
2277 int full_support = spec->flags & MCODING_ISO_FULL_SUPPORT;
2278 struct iso_2022_status *status
2279 = (struct iso_2022_status *) &(converter->status);
2280 MCharset *primary, *charset0, *charset1;
2281 int next_primary_change;
2282 int ncharsets = coding->ncharsets;
2283 MCharset **charsets = coding->charsets;
2284 MCharset *cns_charsets[15];
2285 int ascii_compatible = coding->ascii_compatible;
2286 MCharset *non_standard_charset = NULL;
2287 int non_standard_charset_bytes = 0;
2288 int non_standard_bytes = 0;
2289 unsigned char *non_standard_begin = NULL;
2290 enum MTextFormat format = mt->format;
2292 SET_SRC (mt, format, from, to);
2294 if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
2298 memset (cns_charsets, 0, sizeof (cns_charsets));
2299 for (i = 0; i < ncharsets; i++)
2300 if (charsets[i]->dimension == 2)
2302 int final = charsets[i]->final_byte;
2304 if (final >= 'G' && final <= 'M')
2305 cns_charsets[final - 'G'] = charsets[i];
2307 cns_charsets[14] = charsets[i];
2311 next_primary_change = from;
2313 charset0 = status->designation[status->invocation[0]];
2314 charset1 = (status->invocation[1] < 0 ? NULL
2315 : status->designation[status->invocation[1]]);
2322 ONE_MORE_CHAR (c, bytes, format);
2324 if (c < 128 && ascii_compatible)
2326 if (status->utf8_shifting)
2327 ISO2022_ENCODE_UTF8_SHIFT_END ();
2331 else if (c <= 32 || c == 127)
2333 if (status->utf8_shifting)
2334 ISO2022_ENCODE_UTF8_SHIFT_END ();
2335 if (spec->flags & MCODING_ISO_RESET_AT_CNTL
2336 || (c == '\n' && spec->flags & MCODING_ISO_RESET_AT_EOL))
2338 if (iso_2022_reset_invocation_designation (spec, status,
2340 goto insufficient_destination;
2341 charset0 = status->designation[status->invocation[0]];
2342 charset1 = (status->invocation[1] < 0 ? NULL
2343 : status->designation[status->invocation[1]]);
2350 unsigned code = MCHAR_INVALID_CODE;
2351 MCharset *charset = NULL;
2353 int pos = from + nchars;
2355 if (pos >= next_primary_change)
2357 MSymbol primary_charset
2358 = (MSymbol) mtext_get_prop (mt, pos, Mcharset);
2359 primary = MCHARSET (primary_charset);
2360 if (primary && primary != mcharset__binary)
2362 if (primary->final_byte <= 0)
2364 else if (! full_support)
2368 for (i = 0; i < ncharsets; i++)
2369 if (primary == charsets[i])
2376 mtext_prop_range (mt, Mcharset, pos,
2377 NULL, &next_primary_change, 0);
2380 if (primary && primary != mcharset__binary)
2382 code = ENCODE_CHAR (primary, c);
2383 if (code != MCHAR_INVALID_CODE)
2388 if (c <= 32 || c == 127)
2391 charset = mcharset__ascii;
2397 for (i = 0; i < ncharsets; i++)
2399 charset = charsets[i];
2400 code = ENCODE_CHAR (charset, c);
2401 if (code != MCHAR_INVALID_CODE)
2406 if (spec->flags & MCODING_ISO_FULL_SUPPORT)
2408 for (i = 0; i < mcharset__iso_2022_table.used; i++)
2410 charset = mcharset__iso_2022_table.charsets[i];
2411 code = ENCODE_CHAR (charset, c);
2412 if (code != MCHAR_INVALID_CODE)
2415 if (i == mcharset__iso_2022_table.used)
2417 if (spec->flags & MCODING_ISO_DESIGNATION_CTEXT_EXT)
2418 goto unsupported_char;
2419 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
2424 goto unsupported_char;
2430 && (charset->final_byte >= 0
2431 || spec->flags & MCODING_ISO_EUC_TW_SHIFT))
2433 if (code >= 0x80 && code < 0xA0)
2434 goto unsupported_char;
2436 if (status->utf8_shifting)
2437 ISO2022_ENCODE_UTF8_SHIFT_END ();
2438 if (charset == charset0)
2440 else if (charset == charset1)
2444 unsigned char *p = NULL;
2446 if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
2450 if (cns_charsets[0] == charset)
2456 for (i = 1; i < 15; i++)
2457 if (cns_charsets[i] == charset)
2460 *dst++ = ISO_CODE_SS2;
2463 status->single_shifting = 1;
2468 if (iso_2022_designate_invoke_charset
2469 (coding, charset, spec, status, &dst, dst_end) < 0)
2470 goto insufficient_destination;
2471 charset0 = status->designation[status->invocation[0]];
2472 charset1 = (status->invocation[1] < 0 ? NULL
2473 : status->designation[status->invocation[1]]);
2475 if (status->single_shifting)
2477 = (spec->flags & MCODING_ISO_EIGHT_BIT) ? 0x80 : 0;
2478 else if (charset == charset0)
2483 if (charset->dimension == 1)
2486 *dst++ = code | gr_mask;
2488 else if (charset->dimension == 2)
2491 *dst++ = (code >> 8) | gr_mask;
2492 *dst++ = (code & 0xFF) | gr_mask;
2497 *dst++ = (code >> 16) | gr_mask;
2498 *dst++ = ((code >> 8) & 0xFF) | gr_mask;
2499 *dst++ = (code & 0xFF) | gr_mask;
2501 status->single_shifting = 0;
2503 else if (charset && spec->flags & MCODING_ISO_DESIGNATION_CTEXT_EXT)
2505 if (charset != non_standard_charset)
2507 char *name = (find_ctext_non_standard_name
2508 (charset, &non_standard_charset_bytes));
2512 int len = strlen (name);
2514 ISO2022_ENCODE_NON_STANDARD (name, len);
2515 non_standard_charset = charset;
2518 non_standard_charset = NULL;
2521 if (non_standard_charset)
2523 if (dst + non_standard_charset_bytes > dst_end)
2524 goto insufficient_destination;
2525 non_standard_bytes += non_standard_charset_bytes;
2526 non_standard_begin[4] = (non_standard_bytes / 128) | 0x80;
2527 non_standard_begin[5] = (non_standard_bytes % 128) | 0x80;
2528 if (non_standard_charset_bytes == 1)
2530 else if (non_standard_charset_bytes == 2)
2531 *dst++ = code >> 8, *dst++ = code & 0xFF;
2532 else if (non_standard_charset_bytes == 3)
2533 *dst++ = code >> 16, *dst++ = (code >> 8) & 0xFF,
2534 *dst++ = code & 0xFF;
2535 else /* i.e non_standard_charset_bytes == 3 */
2536 *dst++ = code >> 24, *dst++ = (code >> 16) & 0xFF,
2537 *dst++ = (code >> 8) & 0xFF, *dst++ = code & 0xFF;
2541 int len = CHAR_BYTES (c);
2544 goto unsupported_char;
2545 if (! status->utf8_shifting)
2546 ISO2022_ENCODE_UTF8_SHIFT_START (len);
2549 CHAR_STRING (c, dst);
2553 goto unsupported_char;
2563 if (iso_2022_designate_invoke_charset (coding, mcharset__ascii,
2566 goto insufficient_destination;
2567 if (! converter->lenient)
2569 len = encode_unsupporeted_char (c, dst, dst_end, mt, from + nchars);
2571 goto insufficient_destination;
2577 /* We reach here because of an unsupported char. */
2578 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
2581 insufficient_destination:
2583 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
2586 if (converter->result == MCONVERSION_RESULT_SUCCESS
2587 && converter->last_block)
2589 if (status->utf8_shifting)
2591 ISO2022_ENCODE_UTF8_SHIFT_END ();
2594 if (spec->flags & MCODING_ISO_RESET_AT_EOL
2595 && charset0 != spec->initial_designation[0])
2597 if (iso_2022_reset_invocation_designation (spec, status,
2599 goto insufficient_destination;
2602 converter->nchars += nchars;
2603 converter->nbytes += dst - destination;
2604 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
2608 /* Staffs for coding-systems of type MCODING_TYPE_MISC. */
2610 /* For SJIS handling... */
2612 #define SJIS_TO_JIS(s1, s2) \
2614 ? (((s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0)) << 8) \
2616 : (((s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1)) << 8) \
2617 | (s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F))))
2619 #define JIS_TO_SJIS(c1, c2) \
2621 ? (((c1 / 2 + ((c1 < 0x5F) ? 0x71 : 0xB1)) << 8) \
2622 | (c2 + ((c2 >= 0x60) ? 0x20 : 0x1F))) \
2623 : (((c1 / 2 + ((c1 < 0x5F) ? 0x70 : 0xB0)) << 8) \
2628 reset_coding_sjis (MConverter *converter)
2630 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2631 MCodingSystem *coding = internal->coding;
2633 if (! coding->ready)
2635 MSymbol kanji_sym = msymbol ("jisx0208.1983");
2636 MCharset *kanji = MCHARSET (kanji_sym);
2637 MSymbol kana_sym = msymbol ("jisx0201-kana");
2638 MCharset *kana = MCHARSET (kana_sym);
2640 if (! kanji || ! kana)
2642 coding->ncharsets = 3;
2643 coding->charsets[1] = kanji;
2644 coding->charsets[2] = kana;
2651 decode_coding_sjis (const unsigned char *source, int src_bytes, MText *mt,
2652 MConverter *converter)
2654 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2655 MCodingSystem *coding = internal->coding;
2656 const unsigned char *src = internal->carryover;
2657 const unsigned char *src_stop = src + internal->carryover_bytes;
2658 const unsigned char *src_end = source + src_bytes;
2659 const unsigned char *src_base;
2660 unsigned char *dst = mt->data + mt->nbytes;
2661 unsigned char *dst_end = mt->data + mt->allocated - MAX_UTF8_CHAR_BYTES;
2663 int last_nchars = 0;
2664 int at_most = converter->at_most > 0 ? converter->at_most : -1;
2666 MCharset *charset_roman = coding->charsets[0];
2667 MCharset *charset_kanji = coding->charsets[1];
2668 MCharset *charset_kana = coding->charsets[2];
2669 MCharset *charset = mcharset__ascii;
2674 MCharset *this_charset;
2677 ONE_MORE_BASE_BYTE (c1);
2682 this_charset = ((c1 <= 0x20 || c1 == 0x7F)
2686 else if ((c1 >= 0x81 && c1 <= 0x9F) || (c1 >= 0xE0 && c1 <= 0xEF))
2689 if ((c2 >= 0x40 && c2 <= 0x7F) || (c2 >= 80 && c2 <= 0xFC))
2691 this_charset = charset_kanji;
2692 c1 = SJIS_TO_JIS (c1, c2);
2697 else if (c1 >= 0xA1 && c1 <= 0xDF)
2699 this_charset = charset_kana;
2705 c = DECODE_CHAR (this_charset, c1);
2710 if (! converter->lenient)
2712 REWIND_SRC_TO_BASE ();
2714 this_charset = mcharset__binary;
2717 if (this_charset != mcharset__ascii
2718 && this_charset != charset)
2720 TAKEIN_CHARS (mt, nchars - last_nchars,
2721 dst - (mt->data + mt->nbytes), charset);
2722 charset = this_charset;
2723 last_nchars = nchars;
2727 /* We reach here because of an invalid byte. */
2731 TAKEIN_CHARS (mt, nchars - last_nchars,
2732 dst - (mt->data + mt->nbytes), charset);
2733 return finish_decoding (mt, converter, nchars,
2734 source, src_end, src_base, error);
2738 encode_coding_sjis (MText *mt, int from, int to,
2739 unsigned char *destination, int dst_bytes,
2740 MConverter *converter)
2742 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2743 MCodingSystem *coding = internal->coding;
2744 unsigned char *src, *src_end;
2745 unsigned char *dst = destination;
2746 unsigned char *dst_end = dst + dst_bytes;
2748 MCharset *charset_roman = coding->charsets[0];
2749 MCharset *charset_kanji = coding->charsets[1];
2750 MCharset *charset_kana = coding->charsets[2];
2751 enum MTextFormat format = mt->format;
2753 SET_SRC (mt, format, from, to);
2760 ONE_MORE_CHAR (c, bytes, format);
2762 if (c <= 0x20 || c == 0x7F)
2769 if ((code = ENCODE_CHAR (charset_roman, c)) != MCHAR_INVALID_CODE)
2774 else if ((code = ENCODE_CHAR (charset_kanji, c))
2775 != MCHAR_INVALID_CODE)
2777 int c1 = code >> 8, c2 = code & 0xFF;
2778 code = JIS_TO_SJIS (c1, c2);
2781 *dst++ = code & 0xFF;
2783 else if ((code = ENCODE_CHAR (charset_kana, c))
2784 != MCHAR_INVALID_CODE)
2787 *dst++ = code | 0x80;
2791 if (! converter->lenient)
2793 len = encode_unsupporeted_char (c, dst, dst_end,
2796 goto insufficient_destination;
2803 /* We reach here because of an unsupported char. */
2804 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
2807 insufficient_destination:
2808 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
2811 converter->nchars += nchars;
2812 converter->nbytes += dst - destination;
2813 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
2817 static MCodingSystem *
2818 find_coding (MSymbol name)
2820 MCodingSystem *coding = (MCodingSystem *) msymbol_get (name, Mcoding);
2825 MSymbol sym = msymbol__canonicalize (name);
2827 plist = mplist_find_by_key (coding_definition_list, sym);
2830 pl = MPLIST_PLIST (plist);
2831 name = MPLIST_VAL (pl);
2832 mconv_define_coding (MSYMBOL_NAME (name), MPLIST_NEXT (pl),
2833 NULL, NULL, NULL, NULL);
2834 coding = (MCodingSystem *) msymbol_get (name, Mcoding);
2835 plist = mplist_pop (plist);
2836 M17N_OBJECT_UNREF (plist);
2841 #define BINDING_NONE 0
2842 #define BINDING_BUFFER 1
2843 #define BINDING_STREAM 2
2845 #define CONVERT_WORKSIZE 0x10000
2851 mcoding__init (void)
2854 MPlist *param, *charsets, *pl;
2856 MLIST_INIT1 (&coding_list, codings, 128);
2857 coding_definition_list = mplist ();
2859 /* ISO-2022 specific initialize routine. */
2860 for (i = 0; i < 0x20; i++)
2861 iso_2022_code_class[i] = ISO_control_0;
2862 for (i = 0x21; i < 0x7F; i++)
2863 iso_2022_code_class[i] = ISO_graphic_plane_0;
2864 for (i = 0x80; i < 0xA0; i++)
2865 iso_2022_code_class[i] = ISO_control_1;
2866 for (i = 0xA1; i < 0xFF; i++)
2867 iso_2022_code_class[i] = ISO_graphic_plane_1;
2868 iso_2022_code_class[0x20] = iso_2022_code_class[0x7F] = ISO_0x20_or_0x7F;
2869 iso_2022_code_class[0xA0] = iso_2022_code_class[0xFF] = ISO_0xA0_or_0xFF;
2870 iso_2022_code_class[0x0E] = ISO_shift_out;
2871 iso_2022_code_class[0x0F] = ISO_shift_in;
2872 iso_2022_code_class[0x19] = ISO_single_shift_2_7;
2873 iso_2022_code_class[0x1B] = ISO_escape;
2874 iso_2022_code_class[0x8E] = ISO_single_shift_2;
2875 iso_2022_code_class[0x8F] = ISO_single_shift_3;
2876 iso_2022_code_class[0x9B] = ISO_control_sequence_introducer;
2878 Mcoding = msymbol ("coding");
2880 Mutf = msymbol ("utf");
2881 Miso_2022 = msymbol ("iso-2022");
2883 Mreset_at_eol = msymbol ("reset-at-eol");
2884 Mreset_at_cntl = msymbol ("reset-at-cntl");
2885 Meight_bit = msymbol ("eight-bit");
2886 Mlong_form = msymbol ("long-form");
2887 Mdesignation_g0 = msymbol ("designation-g0");
2888 Mdesignation_g1 = msymbol ("designation-g1");
2889 Mdesignation_ctext = msymbol ("designation-ctext");
2890 Mdesignation_ctext_ext = msymbol ("designation-ctext-ext");
2891 Mlocking_shift = msymbol ("locking-shift");
2892 Msingle_shift = msymbol ("single-shift");
2893 Msingle_shift_7 = msymbol ("single-shift-7");
2894 Meuc_tw_shift = msymbol ("euc-tw-shift");
2895 Miso_6429 = msymbol ("iso-6429");
2896 Mrevision_number = msymbol ("revision-number");
2897 Mfull_support = msymbol ("full-support");
2898 Mmaybe = msymbol ("maybe");
2900 Mtype = msymbol ("type");
2901 Mcharsets = msymbol_as_managing_key ("charsets");
2902 Mflags = msymbol_as_managing_key ("flags");
2903 Mdesignation = msymbol_as_managing_key ("designation");
2904 Minvocation = msymbol_as_managing_key ("invocation");
2905 Mcode_unit = msymbol ("code-unit");
2906 Mbom = msymbol ("bom");
2907 Mlittle_endian = msymbol ("little-endian");
2910 charsets = mplist ();
2912 /* Setup predefined codings. */
2913 mplist_set (charsets, Msymbol, Mcharset_ascii);
2914 pl = mplist_add (pl, Mtype, Mcharset);
2915 pl = mplist_add (pl, Mcharsets, charsets);
2916 Mcoding_us_ascii = mconv_define_coding ("us-ascii", param,
2917 NULL, NULL, NULL, NULL);
2920 MSymbol alias = msymbol ("ANSI_X3.4-1968");
2921 MCodingSystem *coding
2922 = (MCodingSystem *) msymbol_get (Mcoding_us_ascii, Mcoding);
2924 msymbol_put (alias, Mcoding, coding);
2925 alias = msymbol__canonicalize (alias);
2926 msymbol_put (alias, Mcoding, coding);
2929 mplist_set (charsets, Msymbol, Mcharset_iso_8859_1);
2930 Mcoding_iso_8859_1 = mconv_define_coding ("iso-8859-1", param,
2931 NULL, NULL, NULL, NULL);
2933 mplist_set (charsets, Msymbol, Mcharset_m17n);
2934 mplist_put (param, Mtype, Mutf);
2935 mplist_put (param, Mcode_unit, (void *) 8);
2936 Mcoding_utf_8_full = mconv_define_coding ("utf-8-full", param,
2937 NULL, NULL, NULL, NULL);
2939 mplist_set (charsets, Msymbol, Mcharset_unicode);
2940 Mcoding_utf_8 = mconv_define_coding ("utf-8", param,
2941 NULL, NULL, NULL, NULL);
2943 mplist_put (param, Mcode_unit, (void *) 16);
2944 mplist_put (param, Mbom, Mmaybe);
2945 #ifndef WORDS_BIGENDIAN
2946 mplist_put (param, Mlittle_endian, Mt);
2948 Mcoding_utf_16 = mconv_define_coding ("utf-16", param,
2949 NULL, NULL, NULL, NULL);
2951 mplist_put (param, Mcode_unit, (void *) 32);
2952 Mcoding_utf_32 = mconv_define_coding ("utf-32", param,
2953 NULL, NULL, NULL, NULL);
2955 mplist_put (param, Mcode_unit, (void *) 16);
2956 mplist_put (param, Mbom, Mnil);
2957 mplist_put (param, Mlittle_endian, Mnil);
2958 Mcoding_utf_16be = mconv_define_coding ("utf-16be", param,
2959 NULL, NULL, NULL, NULL);
2961 mplist_put (param, Mcode_unit, (void *) 32);
2962 Mcoding_utf_32be = mconv_define_coding ("utf-32be", param,
2963 NULL, NULL, NULL, NULL);
2965 mplist_put (param, Mcode_unit, (void *) 16);
2966 mplist_put (param, Mlittle_endian, Mt);
2967 Mcoding_utf_16le = mconv_define_coding ("utf-16le", param,
2968 NULL, NULL, NULL, NULL);
2970 mplist_put (param, Mcode_unit, (void *) 32);
2971 Mcoding_utf_32le = mconv_define_coding ("utf-32le", param,
2972 NULL, NULL, NULL, NULL);
2974 mplist_put (param, Mtype, Mnil);
2975 mplist_set (charsets, Msymbol, Mcharset_ascii);
2976 Mcoding_sjis = mconv_define_coding ("sjis", param,
2979 encode_coding_sjis, NULL);
2981 M17N_OBJECT_UNREF (charsets);
2982 M17N_OBJECT_UNREF (param);
2988 mcoding__fini (void)
2993 for (i = 0; i < coding_list.used; i++)
2995 MCodingSystem *coding = coding_list.codings[i];
2997 if (coding->extra_info)
2998 free (coding->extra_info);
2999 if (coding->extra_spec)
3001 if (coding->type == Miso_2022)
3002 free (((struct iso_2022_spec *) coding->extra_spec)->designations);
3003 free (coding->extra_spec);
3007 MLIST_FREE1 (&coding_list, codings);
3008 MPLIST_DO (plist, coding_definition_list)
3009 M17N_OBJECT_UNREF (MPLIST_VAL (plist));
3010 M17N_OBJECT_UNREF (coding_definition_list);
3014 mconv__register_charset_coding (MSymbol sym)
3016 MSymbol name = msymbol__canonicalize (sym);
3018 if (! mplist_find_by_key (coding_definition_list, name))
3020 MPlist *param = mplist (), *charsets = mplist ();
3022 mplist_set (charsets, Msymbol, sym);
3023 mplist_add (param, Msymbol, sym);
3024 mplist_add (param, Mtype, Mcharset);
3025 mplist_add (param, Mcharsets, charsets);
3026 mplist_put (coding_definition_list, name, param);
3027 M17N_OBJECT_UNREF (charsets);
3033 mcoding__load_from_database ()
3035 MDatabase *mdb = mdatabase_find (msymbol ("coding-list"), Mnil, Mnil, Mnil);
3036 MPlist *def_list, *plist;
3037 MPlist *definitions = coding_definition_list;
3038 int mdebug_flag = MDEBUG_CODING;
3042 MDEBUG_PUSH_TIME ();
3043 def_list = (MPlist *) mdatabase_load (mdb);
3044 MDEBUG_PRINT_TIME ("CODING", (stderr, " to load the data."));
3049 MDEBUG_PUSH_TIME ();
3050 MPLIST_DO (plist, def_list)
3052 MPlist *pl, *aliases;
3053 MSymbol name, canonicalized;
3055 if (! MPLIST_PLIST_P (plist))
3056 MERROR (MERROR_CHARSET, -1);
3057 pl = MPLIST_PLIST (plist);
3058 if (! MPLIST_SYMBOL_P (pl))
3059 MERROR (MERROR_CHARSET, -1);
3060 name = MPLIST_SYMBOL (pl);
3061 canonicalized = msymbol__canonicalize (name);
3062 pl = mplist__from_plist (MPLIST_NEXT (pl));
3063 mplist_push (pl, Msymbol, name);
3064 definitions = mplist_add (definitions, canonicalized, pl);
3065 aliases = mplist_get (pl, Maliases);
3067 MPLIST_DO (aliases, aliases)
3068 if (MPLIST_SYMBOL_P (aliases))
3070 name = MPLIST_SYMBOL (aliases);
3071 canonicalized = msymbol__canonicalize (name);
3072 definitions = mplist_add (definitions, canonicalized, pl);
3073 M17N_OBJECT_REF (pl);
3077 M17N_OBJECT_UNREF (def_list);
3078 MDEBUG_PRINT_TIME ("CODING", (stderr, " to parse the loaded data."));
3084 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
3088 /*** @addtogroup m17nConv */
3092 /***en @name Variables: Symbols representing coding systems */
3093 /***ja @name ÊÑ¿ô: ÄêµÁºÑ¤ß¥³¡¼¥É·Ï¤ò»ØÄꤹ¤ë¤¿¤á¤Î¥·¥ó¥Ü¥ë */
3098 @brief Symbol for the coding system US-ASCII.
3100 The symbol #Mcoding_us_ascii has name <tt>"us-ascii"</tt> and
3101 represents a coding system for the CES US-ASCII. */
3104 @brief US-ASCII ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3106 ¥·¥ó¥Ü¥ë #Mcoding_us_ascii ¤Ï <tt>"us-ascii"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3107 CES US-ASCII ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£
3109 MSymbol Mcoding_us_ascii;
3113 @brief Symbol for the coding system ISO-8859-1.
3115 The symbol #Mcoding_iso_8859_1 has name <tt>"iso-8859-1"</tt> and
3116 represents a coding system for the CES ISO-8859-1. */
3119 @brief ISO-8859-1 ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3121 ¥·¥ó¥Ü¥ë #Mcoding_iso_8859_1 ¤Ï <tt>"iso-8859-1"</tt>
3122 ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢CES ISO-8859-1 ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3124 MSymbol Mcoding_iso_8859_1;
3128 @brief Symbol for the coding system UTF-8.
3130 The symbol #Mcoding_utf_8 has name <tt>"utf-8"</tt> and represents
3131 a coding system for the CES UTF-8. */
3134 @brief UTF-8 ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3136 ¥·¥ó¥Ü¥ë #Mcoding_utf_8 ¤Ï <tt>"utf-8"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢CES
3137 UTF-8 ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£
3140 MSymbol Mcoding_utf_8;
3144 @brief Symbol for the coding system UTF-8-FULL.
3146 The symbol #Mcoding_utf_8_full has name <tt>"utf-8-full"</tt> and
3147 represents a coding system that is a extension of UTF-8. This
3148 coding system uses the same encoding algorithm as UTF-8 but is not
3149 limited to the Unicode characters. It can encode all characters
3150 supported by the m17n library. */
3153 @brief UTF-8-FULL ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3155 ¥·¥ó¥Ü¥ë #Mcoding_utf_8_full ¤Ï <tt>"utf-8-full"</tt>
3156 ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢<tt>"UTF-8"</tt> ¤Î³ÈÄ¥¤Ç¤¢¤ë¥³¡¼¥É·Ï¤ò¼¨¤¹¡£
3157 ¤³¤Î¥³¡¼¥É·Ï¤Ï UTF-8 ¤ÈƱ¤¸¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°¥¢¥ë¥´¥ê¥º¥à¤òÍѤ¤¤ë¤¬¡¢ÂоݤÏ
3158 Unicode ʸ»ú¤Ë¤Ï¸ÂÄꤵ¤ì¤Ê¤¤¡£
3159 ¤Þ¤¿m17n ¥é¥¤¥Ö¥é¥ê¤¬°·¤¦Á´¤Æ¤Îʸ»ú¤ò¥¨¥ó¥³¡¼¥É¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£
3162 MSymbol Mcoding_utf_8_full;
3166 @brief Symbol for the coding system UTF-16.
3168 The symbol #Mcoding_utf_16 has name <tt>"utf-16"</tt> and
3169 represents a coding system for the CES UTF-16 (RFC 2279). */
3171 @brief UTF-16 ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3173 ¥·¥ó¥Ü¥ë #Mcoding_utf_16 ¤Ï <tt>"utf-16"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3174 CES UTF-16 (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£
3177 MSymbol Mcoding_utf_16;
3181 @brief Symbol for the coding system UTF-16BE.
3183 The symbol #Mcoding_utf_16be has name <tt>"utf-16be"</tt> and
3184 represents a coding system for the CES UTF-16BE (RFC 2279). */
3187 @brief UTF-16BE ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3189 ¥·¥ó¥Ü¥ë #Mcoding_utf_16be ¤Ï <tt>"utf-16be"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3190 CES UTF-16BE (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3192 MSymbol Mcoding_utf_16be;
3196 @brief Symbol for the coding system UTF-16LE.
3198 The symbol #Mcoding_utf_16le has name <tt>"utf-16le"</tt> and
3199 represents a coding system for the CES UTF-16LE (RFC 2279). */
3202 @brief UTF-16LE ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3204 ¥·¥ó¥Ü¥ë #Mcoding_utf_16le ¤Ï <tt>"utf-16le"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3205 CES UTF-16LE (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3207 MSymbol Mcoding_utf_16le;
3211 @brief Symbol for the coding system UTF-32.
3213 The symbol #Mcoding_utf_32 has name <tt>"utf-32"</tt> and
3214 represents a coding system for the CES UTF-32 (RFC 2279). */
3217 @brief UTF-32 ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3219 ¥·¥ó¥Ü¥ë #Mcoding_utf_32 ¤Ï <tt>"utf-32"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3220 CES UTF-32 (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3222 MSymbol Mcoding_utf_32;
3226 @brief Symbol for the coding system UTF-32BE.
3228 The symbol #Mcoding_utf_32be has name <tt>"utf-32be"</tt> and
3229 represents a coding system for the CES UTF-32BE (RFC 2279). */
3231 @brief UTF-32BE ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3233 ¥·¥ó¥Ü¥ë #Mcoding_utf_32be ¤Ï <tt>"utf-32be"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3234 CES UTF-32BE (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3236 MSymbol Mcoding_utf_32be;
3240 @brief Symbol for the coding system UTF-32LE.
3242 The symbol #Mcoding_utf_32le has name <tt>"utf-32le"</tt> and
3243 represents a coding system for the CES UTF-32LE (RFC 2279). */
3245 @brief UTF-32LE ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3247 ¥·¥ó¥Ü¥ë #Mcoding_utf_32le ¤Ï <tt>"utf-32le"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3248 CES UTF-32LE (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3250 MSymbol Mcoding_utf_32le;
3254 @brief Symbol for the coding system SJIS.
3256 The symbol #Mcoding_sjis has name <tt>"sjis"</tt> and represents a coding
3257 system for the CES Shift-JIS. */
3259 @brief SJIS ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3261 ¥·¥ó¥Ü¥ë #Mcoding_sjis has ¤Ï <tt>"sjis"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3262 CES Shift-JISÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3264 MSymbol Mcoding_sjis;
3269 @name Variables: Parameter keys for mconv_define_coding (). */
3271 @name ÊÑ¿ô: mconv_define_coding () Íѥѥé¥á¡¼¥¿¥¡¼ */
3276 Parameter key for mconv_define_coding () (which see). */
3278 mconv_define_coding () Íѥѥé¥á¡¼¥¿¥¡¼ (¾ÜºÙ¤Ï mconv_define_coding ()»²¾È). */
3284 MSymbol Mdesignation;
3285 MSymbol Minvocation;
3288 MSymbol Mlittle_endian;
3293 @name Variables: Symbols representing coding system types. */
3295 @name ÊÑ¿ô¡§ ¥³¡¼¥É·Ï¤Î¥¿¥¤¥×¤ò¼¨¤¹¥·¥ó¥Ü¥ë. */
3300 Symbol that can be a value of the #Mtype parameter of a coding
3301 system used in an argument to the mconv_define_coding () function
3304 ´Ø¿ô mconv_define_coding () ¤Î°ú¿ô¤È¤·¤ÆÍѤ¤¤é¤ì¤ë¥³¡¼¥É·Ï¤Î¥Ñ¥é¥á¡¼¥¿
3305 #Mtype ¤ÎÃͤȤʤêÆÀ¤ë¥·¥ó¥Ü¥ë¡£(¾ÜºÙ¤Ï
3306 mconv_define_coding ()»²¾È)¡£ */
3316 @name Variables: Symbols appearing in the value of #Mflags parameter. */
3318 @name ÊÑ¿ô¡§ ¥Ñ¥é¥á¡¼¥¿ #Mflags ¤ÎÃͤȤʤêÆÀ¤ë¥·¥ó¥Ü¥ë. */
3323 Symbols that can be a value of the #Mflags parameter of a coding
3324 system used in an argument to the mconv_define_coding () function
3327 ´Ø¿ô mconv_define_coding () ¤Î°ú¿ô¤È¤·¤ÆÍѤ¤¤é¤ì¤ë¥³¡¼¥É·Ï¤Î¥Ñ¥é¥á¡¼¥¿
3328 #Mflags ¤ÎÃͤȤʤêÆÀ¤ë¥·¥ó¥Ü¥ë¡£(¾ÜºÙ¤Ï
3329 mconv_define_coding ()»²¾È)¡£ */
3330 MSymbol Mreset_at_eol;
3332 MSymbol Mreset_at_cntl;
3335 MSymbol Mdesignation_g0;
3336 MSymbol Mdesignation_g1;
3337 MSymbol Mdesignation_ctext;
3338 MSymbol Mdesignation_ctext_ext;
3339 MSymbol Mlocking_shift;
3340 MSymbol Msingle_shift;
3341 MSymbol Msingle_shift_7;
3342 MSymbol Meuc_tw_shift;
3344 MSymbol Mrevision_number;
3345 MSymbol Mfull_support;
3350 @name Variables: etc
3352 Remaining variables. */
3353 /***ja @name ÊÑ¿ô: ¤½¤Î¾
3359 @brief Symbol whose name is "maybe".
3361 The variable #Mmaybe is a symbol of name <tt>"maybe"</tt>. It is
3362 used a value of #Mbom parameter of the function
3363 mconv_define_coding () (which see). */
3365 @brief "maybe"¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¥·¥ó¥Ü¥ë.
3367 ÊÑ¿ô #Mmaybe ¤Ï <tt>"maybe"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¡£¤³¤ì¤Ï´Ø¿ô
3368 mconv_define_coding () ¥Ñ¥é¥á¡¼¥¿ #Mbom ¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤ë¡£
3369 (¾ÜºÙ¤Ï mconv_define_coding () »²¾È)¡£ */
3375 @brief The symbol @c Mcoding.
3377 Any decoded M-text has a text property whose key is the predefined
3378 symbol @c Mcoding. The name of @c Mcoding is
3379 <tt>"coding"</tt>. */
3382 @brief ¥·¥ó¥Ü¥ë @c Mcoding.
3384 ¥Ç¥³¡¼¥É¤µ¤ì¤¿ M-text ¤Ï¤¹¤Ù¤Æ¡¢¥¡¼¤¬ÄêµÁºÑ¤ß¥·¥ó¥Ü¥ë @c Mcoding
3385 ¤Ç¤¢¤ë¤è¤¦¤Ê¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Ä¡£¥·¥ó¥Ü¥ë @c Mcoding ¤Ï
3386 <tt>"coding"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¡£ */
3394 @brief Define a coding system.
3396 The mconv_define_coding () function defines a new coding system
3397 and makes it accessible via a symbol whose name is $NAME. $PLIST
3398 specifies parameters of the coding system as below:
3402 <li> Key is @c Mtype, value is a symbol
3404 The value specifies the type of the coding system. It must be
3405 #Mcharset, #Mutf, #Miso_2022, or #Mnil.
3407 If the type is #Mcharset, $EXTRA_INFO is ignored.
3409 If the type is #Mutf, $EXTRA_INFO must be a pointer to
3412 If the type is #Miso_2022, $EXTRA_INFO must be a pointer to
3413 #MCodingInfoISO2022.
3415 If the type is #Mnil, the argument $RESETTER, $DECODER, and
3416 $ENCODER must be supplied. $EXTRA_INFO is ignored. Otherwise,
3417 they can be @c NULL and the m17n library provides proper defaults.
3419 <li> Key is #Mcharsets, value is a plist
3421 The value specifies a list charsets supported by the coding
3422 system. The keys of the plist must be #Msymbol, and the values
3423 must be symbols representing charsets.
3425 <li> Key is #Mflags, value is a plist
3427 If the type is #Miso_2022, the values specifies flags to control
3428 the ISO 2022 interpreter. The keys of the plist must e #Msymbol,
3429 and values must be one of the following.
3435 If this flag exists, designation and invocation status is reset to
3436 the initial state at the end of line.
3438 <li> #Mreset_at_cntl
3440 If this flag exists, designation and invocation status is reset to
3441 the initial state at a control character.
3445 If this flag exists, the graphic plane right is used.
3449 If this flag exists, the over-long escape sequences (ESC '$' '('
3450 <final_byte>) are used for designating the CCS JISX0208.1978,
3451 GB2312, and JISX0208.
3453 <li> #Mdesignation_g0
3455 If this flag and #Mfull_support exists, designates charsets not
3456 listed in the charset list to the graphic register G0.
3458 <li> #Mdesignation_g1
3460 If this flag and #Mfull_support exists, designates charsets not
3461 listed in the charset list to the graphic register G1.
3463 <li> #Mdesignation_ctext
3465 If this flag and #Mfull_support exists, designates charsets not
3466 listed in the charset list to a graphic register G0 or G1 based on
3467 the criteria of the Compound Text.
3469 <li> #Mdesignation_ctext_ext
3471 If this flag and #Mfull_support exists, designates charsets not
3472 listed in the charset list to a graphic register G0 or G1, or use
3473 extended segment for such charsets based on the criteria of the
3476 <li> #Mlocking_shift
3478 If this flag exists, use locking shift.
3482 If this flag exists, use single shift.
3484 <li> #Msingle_shift_7
3486 If this flag exists, use 7-bit single shift code (0x19).
3490 If this flag exists, use a special shifting according to EUC-TW.
3494 This flag is currently ignored.
3496 <li> #Mrevision_number
3498 If this flag exists, use a revision number escape sequence to
3499 designate a charset that has a revision number.
3503 If this flag exists, support all charsets registered in the
3504 International Registry.
3508 <li> Key is #Mdesignation, value is a plist
3510 If the type is #Miso_2022, the value specifies how to designate
3511 each supported characters. The keys of the plist must be
3512 #Minteger, and the values must be numbers indicating a graphic
3513 registers. The Nth element value is for the Nth charset of the
3514 charset list. The value 0..3 means that it is assumed that a
3515 charset is already designated to the graphic register 0..3. The
3516 negative value G (-4..-1) means that a charset is not designated
3517 to any register at first, and if necessary, is designated to the
3518 (G+4) graphic register.
3520 <li> Key is #Minvocation, value is a plist
3522 If the type is #Miso_2022, the value specifies how to invocate
3523 each graphic registers. The plist length must be one or two. The
3524 keys of the plist must be #Minteger, and the values must be
3525 numbers indicating a graphic register. The value of the first
3526 element specifies which graphic register is invocated to the
3527 graphic plane left. If the length is one, no graphic register is
3528 invocated to the graphic plane right. Otherwise, the value of the
3529 second element specifies which graphic register is invocated to
3530 the graphic plane right.
3532 <li> Key is #Mcode_unit, value is an integer
3534 If the type is #Mutf, the value specifies the bit length of a
3535 code-unit. It must be 8, 16, or 32.
3537 <li> Key is #Mbom, value is a symbol
3539 If the type is #Mutf and the code-unit bit length is 16 or 32,
3540 it specifies whether or not to use BOM (Byte Order Mark). If the
3541 value is #Mnil (default), BOM is not used, else if the value is
3542 #Mmaybe, the existence of BOM is detected at decoding time, else
3545 <li> Key is #Mlittle_endian, value is a symbol
3547 If the type is #Mutf and the code-unit bit length is 16 or 32,
3548 it specifies whether or not the encoding is little endian. If the
3549 value is #Mnil (default), it is big endian, else it is little
3554 $RESETTER is a pointer to a function that resets a converter for
3555 the coding system to the initial status. The pointed function is
3556 called with one argument, a pointer to a converter object.
3558 $DECODER is a pointer to a function that decodes a byte sequence
3559 according to the coding system. The pointed function is called
3560 with four arguments:
3562 @li A pointer to the byte sequence to decode.
3563 @li The number of bytes to decode.
3564 @li A pointer to an M-text to which the decoded characters are appended.
3565 @li A pointer to a converter object.
3567 $DECODER must return 0 if it succeeds. Otherwise it must return -1.
3569 $ENCODER is a pointer to a function that encodes an M-text
3570 according to the coding system. The pointed function is called
3573 @li A pointer to the M-text to encode.
3574 @li The starting position of the encoding.
3575 @li The ending position of the encoding.
3576 @li A pointer to a memory area where the produced bytes are stored.
3577 @li The size of the memory area.
3578 @li A pointer to a converter object.
3580 $ENCODER must return 0 if it succeeds. Otherwise it must return -1.
3582 $EXTRA_INFO is a pointer to a data structure that contains extra
3583 information about the coding system. The type of the data
3584 structure depends on $TYPE.
3588 If the operation was successful, mconv_define_coding () returns a
3589 symbol whose name is $NAME. If an error is detected, it returns
3590 #Mnil and assigns an error code to the external variable #merror_code. */
3593 @brief ¥³¡¼¥É·Ï¤òÄêµÁ¤¹¤ë.
3595 ´Ø¿ô mconv_define_coding () ¤Ï¡¢¿·¤·¤¤¥³¡¼¥É·Ï¤òÄêµÁ¤·¡¢¤½¤ì¤ò
3596 $NAME ¤È¤¤¤¦Ì¾Á°¤Î¥·¥ó¥Ü¥ë·Ðͳ¤Ç¥¢¥¯¥»¥¹¤Ç¤¤ë¤è¤¦¤Ë¤¹¤ë¡£ $PLIST
3597 ¤Ç¤ÏÄêµÁ¤¹¤ë¥³¡¼¥É·Ï¤Î¥Ñ¥é¥á¡¼¥¿¤ò°Ê²¼¤Î¤è¤¦¤Ë»ØÄꤹ¤ë¡£
3601 <li> ¥¡¼¤¬ @c Mtype ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
3603 Ãͤϥ³¡¼¥É·Ï¤Î¥¿¥¤¥×¤òɽ¤·¡¢#Mcharset, #Mutf, #Miso_2022, #Mnil
3604 ¤Î¤¤¤º¤ì¤«¤Ç¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
3606 ¥¿¥¤¥×¤¬ #Mcharset ¤Ê¤é¤Ð $EXTRA_INFO ¤Ï̵»ë¤µ¤ì¤ë¡£
3608 ¥¿¥¤¥×¤¬ #Mutf ¤Ê¤é¤Ð $EXTRA_INFO ¤Ï #MCodingInfoUTF
3609 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
3611 ¥¿¥¤¥×¤¬ #Miso_2022¤Ê¤é¤Ð $EXTRA_INFO ¤Ï #MCodingInfoISO2022
3612 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
3614 ¥¿¥¤¥×¤¬ #Mnil ¤Ê¤é¤Ð¡¢°ú¿ô $RESETTER, $DECODER, $ENCODER
3615 ¤òÍ¿¤¨¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£$EXTRA_INFO ¤Ï̵»ë¤µ¤ì¤ë¡£
3616 ¤½¤ì°Ê³°¤Î¾ì¹ç¤Ë¤Ï¤³¤ì¤é¤Ï @c NULL ¤Ç¤è¤¯¡¢
3617 m17n ¥é¥¤¥Ö¥é¥ê¤¬Å¬Àڤʥǥե©¥ë¥ÈÃͤòÍ¿¤¨¤ë¡£
3619 <li> ¥¡¼¤¬ #Mcharsets ¤ÇÃͤ¬ plist ¤Î»þ
3621 ÃͤϤ³¤Î¥³¡¼¥É·Ï¤Ç¥µ¥Ý¡¼¥È¤µ¤ì¤ëʸ»ú¥»¥Ã¥È¤Î¥ê¥¹¥È¤Ç¤¢¤ë¡£plist¤Î¥¡¼¤Ï
3622 #Msymbol¡¢ÃͤÏʸ»ú¥»¥Ã¥È¤ò¼¨¤¹¥·¥ó¥Ü¥ë¤Ç¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
3624 <li> ¥¡¼¤¬ #Mflags Ãͤ¬ plist ¤Î»þ
3626 ¥¿¥¤¥×¤¬ #Miso_2022 ¤Ê¤é¤Ð¡¢¤³¤ÎÃͤÏ, ISO 2022
3627 ¥¤¥ó¥¿¥×¥ê¥¿ÍѤÎÀ©¸æ¥Õ¥é¥Ã¥°¤ò¼¨¤¹¡£plist ¤Î¥¡¼¤Ï #Msymbol
3628 ¤Ç¤¢¤ê¡¢Ãͤϰʲ¼¤Î¤¤¤º¤ì¤«¤Ç¤¢¤ë¡£
3634 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¿Þ·Áʸ»ú½¸¹ç¤Î»Ø¼¨¤ä¸Æ½Ð¤Ï¹ÔËö¤Ç¥ê¥»¥Ã¥È¤µ¤ì¤ÆÅö½é¤Î¾õÂÖ¤ËÌá¤ë¡£
3636 <li> #Mreset_at_cntl
3638 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¿Þ·Áʸ»ú½¸¹ç¤Î»Ø¼¨¤ä¸Æ½Ð¤ÏÀ©¸æʸ»ú¤Ë½Ð²ñ¤Ã¤¿»þÅÀ¤Ç¥ê¥»¥Ã¥È¤µ¤ì¤ÆÅö½é¤Î¾õÂÖ¤ËÌá¤ë¡£
3642 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¿Þ·Áʸ»ú½¸¹ç¤Î±¦È¾Ì̤¬ÍѤ¤¤é¤ì¤ë¡£
3646 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢Ê¸»ú½¸¹ç JISX0208.1978, GB2312, JISX0208
3647 ¤ò»Ø¼¨¤¹¤ëºÝ¤Ë over-long ¥¨¥¹¥±¡¼¥×¥·¡¼¥±¥ó¥¹ (ESC '$' '('
3648 <final_byte>) ¤¬ÍѤ¤¤é¤ì¤ë¡£
3650 <li> #Mdesignation_g0
3652 ¤³¤Î¥Õ¥é¥°¤È #Mfull_support ¤¬¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Ë¸½¤ï¤ì¤Ê¤¤Ê¸»ú¥»¥Ã¥È¤ò
3655 <li> #Mdesignation_g1
3657 ¤³¤Î¥Õ¥é¥°¤È #Mfull_support ¤¬¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Ë¸½¤ï¤ì¤Ê¤¤Ê¸»ú¥»¥Ã¥È¤ò
3660 <li> #Mdesignation_ctext
3662 ¤³¤Î¥Õ¥é¥°¤È #Mfull_support ¤¬¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Ë¸½¤ï¤ì¤Ê¤¤Ê¸»ú¥»¥Ã¥È¤ò
3663 G0 ½¸¹ç¤Þ¤¿¤Ï G1 ½¸¹ç¤Ë¡¢¥³¥ó¥Ñ¥¦¥ó¥É¥Æ¥¥¹¥È¤Î´ð½à¤Ë¤½¤Ã¤Æ»Ø¼¨¤¹¤ë¡£
3665 <li> #Mdesignation_ctext_ext
3667 ¤³¤Î¥Õ¥é¥°¤È #Mfull_support ¤¬¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Ë¸½¤ï¤ì¤Ê¤¤Ê¸»ú¥»¥Ã¥È¤ò
3668 G0 ½¸¹ç¤Þ¤¿¤Ï G1 ½¸¹ç¤Ë¡¢¤¢¤ë¤¤¤Ï³ÈÄ¥¥»¥°¥á¥ó¥È¤Ë¥³¥ó¥Ñ¥¦¥ó¥É¥Æ¥¥¹¥È¤Î´ð½à¤Ë¤½¤Ã¤Æ»Ø¼¨¤¹¤ë¡£
3670 <li> #Mlocking_shift
3672 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¥í¥Ã¥¥ó¥°¥·¥Õ¥È¤òÍѤ¤¤ë¡£
3676 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¥·¥ó¥°¥ë¥·¥Õ¥È¤òÍѤ¤¤ë¡£
3678 <li> #Msingle_shift_7
3680 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢7-bit ¥·¥ó¥°¥ë¥·¥Õ¥È¥³¡¼¥É (0x19) ¤òÍѤ¤¤ë¡£
3684 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢EUC-TW ¤Ë±è¤Ã¤¿ÆÃÊ̤ʥ·¥Õ¥È¤òÍѤ¤¤ë¡£
3688 ¸½»þÅÀ¤Ç¤ÏÍѤ¤¤é¤ì¤Æ¤¤¤Ê¤¤¡£
3690 <li> #Mrevision_number
3692 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢revision number ¤ò»ý¤Äʸ»ú¥»¥Ã¥È¤ò»Ø¼¨¤¹¤ëºÝ¤Ë
3693 revision number ¥¨¥¹¥±¡¼¥×¥·¡¼¥¯¥¨¥ó¥¹¤òÍѤ¤¤ë¡£
3697 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢the International Registry
3698 ¤ËÅÐÏ¿¤µ¤ì¤Æ¤¤¤ëÁ´Ê¸»ú¥»¥Ã¥È¤ò¥µ¥Ý¡¼¥È¤¹¤ë¡£
3702 <li> ¥¡¼¤¬ #Mdesignation¤ÇÃͤ¬ plist ¤Î»þ
3704 ¥¿¥¤¥×¤¬ #Miso_2022 ¤Ê¤é¤Ð¡¢ÃͤϳÆʸ»ú¤ò¤É¤Î¤è¤¦¤Ë»Ø¼¨¤¹¤ë¤«¤ò¼¨¤¹¡£
3705 plist ¤Î¥¡¼¤Ï #Minteger¡¢ÃͤϽ¸¹ç¡Êgraphic register¡Ë
3706 ¤ò¼¨¤¹¿ô»ú¤Ç¤¢¤ë¡£NÈÖÌܤÎÍ×ÁǤÎÃͤϡ¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Î N
3707 ÈÖÌܤÎʸ»ú¥»¥Ã¥È¤ËÂбþ¤¹¤ë¡£Ãͤ¬ 0..3 ¤Ç¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¤¬¤¹¤Ç¤Ë
3708 G0..G3 ¤Ë»Ø¼¨ ¤µ¤ì¤Æ¤¤¤ë¡£
3710 Ãͤ¬Éé(-4..-1) ¤Ç¤¢¤ì¤Ð¡¢½é´ü¾õÂ֤ǤÏʸ»ú¥»¥Ã¥È¤¬¤É¤³¤Ë¤â»Ø¼¨¤µ¤ì¤Æ¤¤¤Ê¤¤¤³¤È¡¢É¬ÍפʺݤˤÏ
3711 G0..G3 ¤Î¤½¤ì¤¾¤ì¤Ë»Ø¼¨¤¹¤ë¤³¤È¤ò°ÕÌ£¤¹¤ë¡£
3713 <li> ¥¡¼¤¬ #Minvocation¤ÇÃͤ¬ plist ¤Î»þ
3715 ¥¿¥¤¥×¤¬ #Miso_2022 ¤Ê¤é¤Ð¡¢Ãͤϳƽ¸¹ç¤ò¤É¤Î¤è¤¦¤Ë¸Æ¤Ó½Ð¤¹¤«¤ò¼¨¤¹¡£
3716 plist ¤ÎŤµ¤Ï 1 ¤Ê¤¤¤· 2 ¤Ç¤¢¤ë¡£plist ¤Î¥¡¼¤Ï
3717 #Minteger¡¢ÃͤϽ¸¹ç¡Êgraphic register)¤ò¼¨¤¹¿ô»ú¤Ç¤¢¤ë¡£
3718 ºÇ½é¤ÎÍ×ÁǤÎÃͤ¬¿Þ·Áʸ»ú½¸¹çº¸È¾Ì̤˸ƤӽФµ¤ì¤ë½¸¹ç¤ò¼¨¤¹¡£
3719 plist ¤ÎŤµ¤¬ 1 ¤Ê¤é¤Ð¡¢±¦È¾Ì̤ˤϲ¿¤â¸Æ¤Ó½Ð¤µ¤ì¤Ê¤¤¡£
3720 ¤½¤¦¤Ç¤±¤ì¤Ð¡¢£²¤Ä¤á¤ÎÍ×ÁǤÎÃͤ¬¿Þ·Áʸ»ú½¸¹ç±¦È¾Ì̤˸ƤӽФµ¤ì¤ë½¸¹ç¤ò¼¨¤¹¡£
3722 <li> ¥¡¼¤¬ #Mcode_unit ¤ÇÃͤ¬À°¿ôÃͤλþ
3724 ¥¿¥¤¥×¤¬ #Mutf ¤Ê¤é¤Ð¡¢Ãͤϥ³¡¼¥É¥æ¥Ë¥Ã¥È¤Î¥Ó¥Ã¥ÈŤǤ¢¤ê¡¢8, 16,
3725 32 ¤Î¤¤¤º¤ì¤«¤Ç¤¢¤ë¡£
3727 <li> ¥¡¼¤¬ #Mbom ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
3729 ¥¿¥¤¥×¤¬ #Mutf ¤Ç¥³¡¼¥É¥æ¥Ë¥Ã¥È¤Î¥Ó¥Ã¥ÈŤ¬ 16 ¤« 32¤Ê¤é¤Ð¡¢ÃͤÏ
3730 BOM (Byte Order Mark) ¤ò»ÈÍѤ¹¤ë¤«¤É¤¦¤«¤ò¼¨¤¹¡£Ãͤ¬¥Ç¥Õ¥©¥ë¥ÈÃͤÎ
3731 #Mnil ¤Ê¤é¤Ð¡¢»ÈÍѤ·¤Ê¤¤¡£Ãͤ¬ #Mmaybe ¤Ê¤é¤Ð¥Ç¥³¡¼¥É»þ¤Ë BOM
3732 ¤¬¤¢¤ë¤«¤É¤¦¤«¤òÄ´¤Ù¤ë¡£¤½¤ì°Ê³°¤Ê¤é¤Ð»ÈÍѤ¹¤ë¡£
3734 <li> ¥¡¼¤¬ #Mlittle_endian ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
3736 ¥¿¥¤¥×¤¬ #Mutf ¤Ç¥³¡¼¥É¥æ¥Ë¥Ã¥È¤Î¥Ó¥Ã¥ÈŤ¬ 16 ¤« 32
3737 ¤Ê¤é¤Ð¡¢Ãͤϥ¨¥ó¥³¡¼¥É¤¬ little endian ¤«¤É¤¦¤«¤ò¼¨¤¹¡£Ãͤ¬¥Ç¥Õ¥©¥ë¥ÈÃͤÎ
3738 #Mnil ¤Ê¤é¤Ð big endian ¤Ç¤¢¤ê¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð little endian ¤Ç¤¢¤ë¡£
3743 ¤Ï¤³¤Î¥³¡¼¥É·ÏÍѤΥ³¥ó¥Ð¡¼¥¿¤ò½é´ü¾õÂ֤˥ꥻ¥Ã¥È¤¹¤ë´Ø¿ô¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¡£
3744 ¤³¤Î´Ø¿ô¤Ï¥³¥ó¥Ð¡¼¥¿¥ª¥Ö¥¸¥§¥¯¥È¤Ø¤Î¥Ý¥¤¥ó¥¿¤È¤¤¤¦£±°ú¿ô¤ò¤È¤ë¡£
3746 $DECODER ¤Ï¥Ð¥¤¥ÈÎó¤ò¤³¤Î¥³¡¼¥É·Ï¤Ë½¾¤Ã¤Æ¥Ç¥³¡¼¥É¤¹¤ë´Ø¿ô¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¡£
3747 ¤³¤Î´Ø¿ô¤Ï°Ê²¼¤Î£´°ú¿ô¤ò¤È¤ë¡£
3749 @li ¥Ç¥³¡¼¥É¤¹¤ë¥Ð¥¤¥ÈÎó¤Ø¤Î¥Ý¥¤¥ó¥¿
3750 @li ¥Ç¥³¡¼¥É¤¹¤Ù¤¥Ð¥¤¥È¿ô
3751 @li ¥Ç¥³¡¼¥É·ë²Ì¤Îʸ»ú¤òÉղ乤ë M-text ¤Ø¤Î¥Ý¥¤¥ó¥¿
3752 @li ¥³¥ó¥Ð¡¼¥¿¥ª¥Ö¥¸¥§¥¯¥È¤Ø¤Î¥Ý¥¤¥ó¥¿
3754 $DECODER ¤ÏÀ®¸ù¤·¤¿¤È¤¤Ë¤Ï 0 ¤ò¡¢¼ºÇÔ¤·¤¿¤È¤¤Ë¤Ï -1
3755 ¤òÊÖ¤µ¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
3757 $ENCODER ¤Ï M-text ¤ò¤³¤Î¥³¡¼¥É·Ï¤Ë½¾¤Ã¤Æ¥¨¥ó¥³¡¼¥É¤¹¤ë´Ø¿ô¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¡£
3758 ¤³¤Î´Ø¿ô¤Ï°Ê²¼¤Î£¶°ú¿ô¤ò¤È¤ë¡£
3760 @li ¥¨¥ó¥³¡¼¥É¤¹¤ëM-text ¤Ø¤Î¥Ý¥¤¥ó¥¿
3761 @li M-text ¤Î¥¨¥ó¥³¡¼¥É³«»Ï°ÌÃÖ
3762 @li M-text ¤Î¥¨¥ó¥³¡¼¥É½ªÎ»°ÌÃÖ
3763 @li À¸À®¤·¤¿¥Ð¥¤¥È¤òÊÝ»ý¤¹¤ë¥á¥â¥êÎΰè¤Ø¤Î¥Ý¥¤¥ó¥¿
3764 @li ¥á¥â¥êÎΰè¤Î¥µ¥¤¥º
3765 @li ¥³¥ó¥Ð¡¼¥¿¥ª¥Ö¥¸¥§¥¯¥È¤Ø¤Î¥Ý¥¤¥ó¥¿
3767 $ENCODER ¤ÏÀ®¸ù¤·¤¿¤È¤¤Ë¤Ï 0 ¤ò¡¢¼ºÇÔ¤·¤¿¤È¤¤Ë¤Ï -1
3768 ¤òÊÖ¤µ¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
3770 $EXTRA_INFO ¤Ï¥³¡¼¥Ç¥£¥°¥·¥¹¥Æ¥à¤Ë´Ø¤¹¤ëÄɲþðÊó¤ò´Þ¤à¥Ç¡¼¥¿¹½Â¤¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¡£
3771 ¤³¤Î¥Ç¡¼¥¿¹½Â¤¤Î·¿ $TYPE ¤Ë°Í¸¤¹¤ë¡£
3775 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mconv_define_coding () ¤Ï $NAME
3776 ¤È¤¤¤¦Ì¾Á°¤Î¥·¥ó¥Ü¥ë¤òÊÖ¤¹¡£ ¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï #Mnil
3777 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
3785 mconv_define_coding (const char *name, MPlist *plist,
3786 int (*resetter) (MConverter *),
3787 int (*decoder) (const unsigned char *, int, MText *,
3789 int (*encoder) (MText *, int, int,
3790 unsigned char *, int,
3794 MSymbol sym = msymbol (name);
3796 MCodingSystem *coding;
3799 MSTRUCT_MALLOC (coding, MERROR_CODING);
3801 if ((coding->type = (MSymbol) mplist_get (plist, Mtype)) == Mnil)
3802 coding->type = Mcharset;
3803 pl = (MPlist *) mplist_get (plist, Mcharsets);
3805 MERROR (MERROR_CODING, Mnil);
3806 coding->ncharsets = mplist_length (pl);
3807 if (coding->ncharsets > NUM_SUPPORTED_CHARSETS)
3808 coding->ncharsets = NUM_SUPPORTED_CHARSETS;
3809 for (i = 0; i < coding->ncharsets; i++, pl = MPLIST_NEXT (pl))
3811 MSymbol charset_name;
3813 if (MPLIST_KEY (pl) != Msymbol)
3814 MERROR (MERROR_CODING, Mnil);
3815 charset_name = MPLIST_SYMBOL (pl);
3816 if (! (coding->charsets[i] = MCHARSET (charset_name)))
3817 MERROR (MERROR_CODING, Mnil);
3820 coding->resetter = resetter;
3821 coding->decoder = decoder;
3822 coding->encoder = encoder;
3823 coding->ascii_compatible = 0;
3824 coding->extra_info = extra_info;
3825 coding->extra_spec = NULL;
3828 if (coding->type == Mcharset)
3830 if (! coding->resetter)
3831 coding->resetter = reset_coding_charset;
3832 if (! coding->decoder)
3833 coding->decoder = decode_coding_charset;
3834 if (! coding->encoder)
3835 coding->encoder = encode_coding_charset;
3837 else if (coding->type == Mutf)
3839 MCodingInfoUTF *info = malloc (sizeof (MCodingInfoUTF));
3842 if (! coding->resetter)
3843 coding->resetter = reset_coding_utf;
3845 info->code_unit_bits = (int) mplist_get (plist, Mcode_unit);
3846 if (info->code_unit_bits == 8)
3848 if (! coding->decoder)
3849 coding->decoder = decode_coding_utf_8;
3850 if (! coding->encoder)
3851 coding->encoder = encode_coding_utf_8;
3853 else if (info->code_unit_bits == 16)
3855 if (! coding->decoder)
3856 coding->decoder = decode_coding_utf_16;
3857 if (! coding->encoder)
3858 coding->encoder = encode_coding_utf_16;
3860 else if (info->code_unit_bits == 32)
3862 if (! coding->decoder)
3863 coding->decoder = decode_coding_utf_32;
3864 if (! coding->encoder)
3865 coding->encoder = encode_coding_utf_32;
3868 MERROR (MERROR_CODING, Mnil);
3869 val = (MSymbol) mplist_get (plist, Mbom);
3872 else if (val == Mmaybe)
3877 info->endian = (mplist_get (plist, Mlittle_endian) ? 1 : 0);
3878 coding->extra_info = info;
3880 else if (coding->type == Miso_2022)
3882 MCodingInfoISO2022 *info = malloc (sizeof (MCodingInfoISO2022));
3884 if (! coding->resetter)
3885 coding->resetter = reset_coding_iso_2022;
3886 if (! coding->decoder)
3887 coding->decoder = decode_coding_iso_2022;
3888 if (! coding->encoder)
3889 coding->encoder = encode_coding_iso_2022;
3891 info->initial_invocation[0] = 0;
3892 info->initial_invocation[1] = -1;
3893 pl = (MPlist *) mplist_get (plist, Minvocation);
3896 if (MPLIST_KEY (pl) != Minteger)
3897 MERROR (MERROR_CODING, Mnil);
3898 info->initial_invocation[0] = MPLIST_INTEGER (pl);
3899 if (! MPLIST_TAIL_P (pl))
3901 pl = MPLIST_NEXT (pl);
3902 if (MPLIST_KEY (pl) != Minteger)
3903 MERROR (MERROR_CODING, Mnil);
3904 info->initial_invocation[1] = MPLIST_INTEGER (pl);
3907 memset (info->designations, 0, sizeof (info->designations));
3908 for (i = 0, pl = (MPlist *) mplist_get (plist, Mdesignation);
3909 i < 32 && pl && MPLIST_KEY (pl) == Minteger;
3910 i++, pl = MPLIST_NEXT (pl))
3911 info->designations[i] = MPLIST_INTEGER (pl);
3914 MPLIST_DO (pl, (MPlist *) mplist_get (plist, Mflags))
3918 if (MPLIST_KEY (pl) != Msymbol)
3919 MERROR (MERROR_CODING, Mnil);
3920 val = MPLIST_SYMBOL (pl);
3921 if (val == Mreset_at_eol)
3922 info->flags |= MCODING_ISO_RESET_AT_EOL;
3923 else if (val == Mreset_at_cntl)
3924 info->flags |= MCODING_ISO_RESET_AT_CNTL;
3925 else if (val == Meight_bit)
3926 info->flags |= MCODING_ISO_EIGHT_BIT;
3927 else if (val == Mlong_form)
3928 info->flags |= MCODING_ISO_LOCKING_SHIFT;
3929 else if (val == Mdesignation_g0)
3930 info->flags |= MCODING_ISO_DESIGNATION_G0;
3931 else if (val == Mdesignation_g1)
3932 info->flags |= MCODING_ISO_DESIGNATION_G1;
3933 else if (val == Mdesignation_ctext)
3934 info->flags |= MCODING_ISO_DESIGNATION_CTEXT;
3935 else if (val == Mdesignation_ctext_ext)
3936 info->flags |= MCODING_ISO_DESIGNATION_CTEXT_EXT;
3937 else if (val == Mlocking_shift)
3938 info->flags |= MCODING_ISO_LOCKING_SHIFT;
3939 else if (val == Msingle_shift)
3940 info->flags |= MCODING_ISO_SINGLE_SHIFT;
3941 else if (val == Msingle_shift_7)
3942 info->flags |= MCODING_ISO_SINGLE_SHIFT_7;
3943 else if (val == Meuc_tw_shift)
3944 info->flags |= MCODING_ISO_EUC_TW_SHIFT;
3945 else if (val == Miso_6429)
3946 info->flags |= MCODING_ISO_ISO6429;
3947 else if (val == Mrevision_number)
3948 info->flags |= MCODING_ISO_REVISION_NUMBER;
3949 else if (val == Mfull_support)
3950 info->flags |= MCODING_ISO_FULL_SUPPORT;
3953 coding->extra_info = info;
3957 if (! coding->decoder || ! coding->encoder)
3958 MERROR (MERROR_CODING, Mnil);
3959 if (! coding->resetter)
3963 msymbol_put (sym, Mcoding, coding);
3964 msymbol_put (msymbol__canonicalize (sym), Mcoding, coding);
3965 plist = (MPlist *) mplist_get (plist, Maliases);
3968 MPLIST_DO (pl, plist)
3972 if (MPLIST_KEY (pl) != Msymbol)
3974 alias = MPLIST_SYMBOL (pl);
3975 msymbol_put (alias, Mcoding, coding);
3976 msymbol_put (msymbol__canonicalize (alias), Mcoding, coding);
3980 MLIST_APPEND1 (&coding_list, codings, coding, MERROR_CODING);
3988 @brief Resolve coding system name.
3990 The mconv_resolve_coding () function returns $SYMBOL if it
3991 represents a coding system. Otherwise, canonicalize $SYMBOL as to
3992 a coding system name, and if the canonicalized name represents a
3993 coding system, return it. Otherwise, return #Mnil. */
3995 @brief ¥³¡¼¥É·Ï¤Î̾Á°¤ò²ò·è¤¹¤ë.
3997 ´Ø¿ô mconv_resolve_coding () ¤Ï $SYMBOL ¤¬¥³¡¼¥É·Ï¤ò¼¨¤·¤Æ¤¤¤ì¤Ð¤½¤ì¤òÊÖ¤¹¡£
3998 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¥³¡¼¥É·Ï¤Î̾Á°¤È¤·¤Æ $SYMBOL
3999 ¤òÀµµ¬²½¤·¡¢¤½¤ì¤¬¥³¡¼¥É·Ï¤òɽ¤·¤Æ¤¤¤ì¤ÐÀµµ¬²½¤·¤¿ $SYMBOL ¤òÊÖ¤¹¡£
4000 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð#Mnil ¤òÊÖ¤¹¡£ */
4005 mconv_resolve_coding (MSymbol symbol)
4007 MCodingSystem *coding = find_coding (symbol);
4011 symbol = msymbol__canonicalize (symbol);
4012 coding = find_coding (symbol);
4014 return (coding ? coding->name : Mnil);
4021 @brief List symbols representing coding systems.
4023 The mconv_list_codings () function makes an array of symbols
4024 representing a coding system, stores the pointer to the array in a
4025 place pointed to by $SYMBOLS, and returns the length of the array. */
4027 @brief ¥³¡¼¥É·Ï¤òɽ¤ï¤¹¥·¥ó¥Ü¥ë¤òÎóµó¤¹¤ë.
4029 ´Ø¿ô mchar_list_codings () ¤Ï¡¢¥³¡¼¥É·Ï¤ò¼¨¤¹¥·¥ó¥Ü¥ë¤òʤ٤¿ÇÛÎó¤òºî¤ê¡¢
4030 $SYMBOLS ¤Ç¥Ý¥¤¥ó¥È¤µ¤ì¤¿¾ì½ê¤Ë¤³¤ÎÇÛÎó¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÃÖ¤¡¢ÇÛÎó¤ÎŤµ¤òÊÖ¤¹¡£ */
4033 mconv_list_codings (MSymbol **symbols)
4035 int i = coding_list.used + mplist_length (coding_definition_list);
4039 MTABLE_MALLOC ((*symbols), i, MERROR_CODING);
4041 MPLIST_DO (plist, coding_definition_list)
4043 MPlist *pl = MPLIST_VAL (plist);
4044 (*symbols)[i++] = MPLIST_SYMBOL (pl);
4046 for (j = 0; j < coding_list.used; j++)
4047 if (! mplist_find_by_key (coding_definition_list,
4048 coding_list.codings[j]->name))
4049 (*symbols)[i++] = coding_list.codings[j]->name;
4056 @brief Create a code converter bound to a buffer.
4058 The mconv_buffer_converter () function creates a pointer to a code
4059 converter for coding system $NAME. The code converter is bound
4060 to buffer area of $N bytes pointed to by $BUF. Subsequent
4061 decodings and encodings are done to/from this buffer area.
4063 $NAME can be #Mnil. In this case, a coding system associated
4064 with the current locale (LC_CTYPE) is used.
4067 If the operation was successful, mconv_buffer_converter () returns
4068 the created code converter. Otherwise it returns @c NULL and
4069 assigns an error code to the external variable #merror_code. */
4072 @brief ¥Ð¥Ã¥Õ¥¡¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤òºî¤ë.
4074 ´Ø¿ô mconv_buffer_converter () ¤Ï¡¢¥³¡¼¥É·Ï $NAME
4075 ÍѤΥ³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤òºî¤ë¡£¤³¤Î¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ï¡¢$BUF ¤Ç¼¨¤µ¤ì¤ëÂ礤µ $N
4076 ¥Ð¥¤¥È¤Î¥Ð¥Ã¥Õ¥¡Îΰè¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤ë¡£
4077 ¤³¤ì°Ê¹ß¤Î¥Ç¥³¡¼¥É¤ª¤è¤Ó¥¨¥ó¥³¡¼¥É¤Ï¡¢¤³¤Î¥Ð¥Ã¥Õ¥¡Îΰè¤ËÂФ·¤Æ¹Ô¤Ê¤ï¤ì¤ë¡£
4079 $NAME ¤Ï #Mnil ¤Ç¤¢¤Ã¤Æ¤â¤è¤¤¡£¤³¤Î¾ì¹ç¤Ï¸½ºß¤Î¥í¥±¡¼¥ë
4080 (LC_CTYPE) ¤Ë´ØÏ¢ÉÕ¤±¤é¤ì¤¿¥³¡¼¥É·Ï¤¬»È¤ï¤ì¤ë¡£
4083 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð mconv_buffer_converter () ¤Ï ºîÀ®¤·¤¿¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤òÊÖ¤¹¡£
4084 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
4085 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
4087 @latexonly \IPAlabel{mconverter} @endlatexonly */
4091 @c MERROR_SYMBOL, @c MERROR_CODING
4094 mconv_stream_converter () */
4097 mconv_buffer_converter (MSymbol name, const unsigned char *buf, int n)
4099 MCodingSystem *coding;
4100 MConverter *converter;
4101 MConverterStatus *internal;
4104 name = mlocale_get_prop (mlocale__ctype, Mcoding);
4105 coding = find_coding (name);
4107 MERROR (MERROR_CODING, NULL);
4108 MSTRUCT_CALLOC (converter, MERROR_CODING);
4109 MSTRUCT_CALLOC (internal, MERROR_CODING);
4110 converter->internal_info = internal;
4111 internal->coding = coding;
4112 if (coding->resetter
4113 && (*coding->resetter) (converter) < 0)
4117 MERROR (MERROR_CODING, NULL);
4120 internal->unread = mtext ();
4121 internal->work_mt = mtext ();
4122 mtext__enlarge (internal->work_mt, MAX_UTF8_CHAR_BYTES);
4123 internal->buf.in = buf;
4125 internal->bufsize = n;
4126 internal->binding = BINDING_BUFFER;
4134 @brief Create a code converter bound to a stream.
4136 The mconv_stream_converter () function creates a pointer to a code
4137 converter for coding system $NAME. The code converter is bound
4138 to stream $FP. Subsequent decodings and encodings are done
4139 to/from this stream.
4141 $NAME can be #Mnil. In this case, a coding system associated
4142 with the current locale (LC_CTYPE) is used.
4144 @return If the operation was successful, mconv_stream_converter ()
4145 returns the created code converter. Otherwise it returns @c NULL
4146 and assigns an error code to the external variable
4150 @brief ¥¹¥È¥ê¡¼¥à¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤òºî¤ë.
4152 ´Ø¿ô mconv_stream_converter () ¤Ï¡¢¥³¡¼¥É·Ï $NAME
4153 ÍѤΥ³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤òºî¤ë¡£¤³¤Î¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ï¡¢¥¹¥È¥ê¡¼¥à $FP
4155 ¤³¤ì°Ê¹ß¤Î¥Ç¥³¡¼¥É¤ª¤è¤Ó¥¨¥ó¥³¡¼¥É¤Ï¡¢¤³¤Î¥¹¥È¥ê¡¼¥à¤ËÂФ·¤Æ¹Ô¤Ê¤ï¤ì¤ë¡£
4157 $NAME ¤Ï #Mnil ¤Ç¤¢¤Ã¤Æ¤â¤è¤¤¡£¤³¤Î¾ì¹ç¤Ï¸½ºß¤Î¥í¥±¡¼¥ë
4158 (LC_CTYPE) ¤Ë´ØÏ¢ÉÕ¤±¤é¤ì¤¿¥³¡¼¥É·Ï¤¬»È¤ï¤ì¤ë¡£
4161 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_stream_converter ()
4162 ¤ÏºîÀ®¤·¤¿¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL
4163 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
4165 @latexonly \IPAlabel{mconverter} @endlatexonly */
4169 @c MERROR_SYMBOL, @c MERROR_CODING
4172 mconv_buffer_converter () */
4175 mconv_stream_converter (MSymbol name, FILE *fp)
4177 MCodingSystem *coding;
4178 MConverter *converter;
4179 MConverterStatus *internal;
4182 name = mlocale_get_prop (mlocale__ctype, Mcoding);
4183 coding = find_coding (name);
4185 MERROR (MERROR_CODING, NULL);
4186 MSTRUCT_CALLOC (converter, MERROR_CODING);
4187 MSTRUCT_CALLOC (internal, MERROR_CODING);
4188 converter->internal_info = internal;
4189 internal->coding = coding;
4190 if (coding->resetter
4191 && (*coding->resetter) (converter) < 0)
4195 MERROR (MERROR_CODING, NULL);
4198 if (fseek (fp, 0, SEEK_CUR) < 0)
4206 internal->seekable = 0;
4209 internal->seekable = 1;
4210 internal->unread = mtext ();
4211 internal->work_mt = mtext ();
4212 mtext__enlarge (internal->work_mt, MAX_UTF8_CHAR_BYTES);
4214 internal->binding = BINDING_STREAM;
4222 @brief Reset a code converter.
4224 The mconv_reset_converter () function resets code converter
4225 $CONVERTER to the initial state.
4228 If $CONVERTER->coding has its own reseter function,
4229 mconv_reset_converter () returns the result of that function
4230 applied to $CONVERTER. Otherwise it returns 0. */
4233 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ò¥ê¥»¥Ã¥È¤¹¤ë.
4235 ´Ø¿ô mconv_reset_converter () ¤Ï¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER
4239 ¤â¤· $CONVERTER->coding ¤Ë¥ê¥»¥Ã¥ÈÍѤδؿô¤¬ÄêµÁ¤µ¤ì¤Æ¤¤¤ë¤Ê¤é¤Ð¡¢
4240 mconv_reset_converter () ¤Ï¤½¤Î´Ø¿ô¤Ë $CONVERTER
4241 ¤òŬÍѤ·¤¿·ë²Ì¤òÊÖ¤·¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð0¤òÊÖ¤¹¡£ */
4244 mconv_reset_converter (MConverter *converter)
4246 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4248 converter->nchars = converter->nbytes = 0;
4249 converter->result = MCONVERSION_RESULT_SUCCESS;
4250 internal->carryover_bytes = 0;
4252 mtext_reset (internal->unread);
4253 if (internal->coding->resetter)
4254 return (*internal->coding->resetter) (converter);
4261 @brief Free a code converter.
4263 The mconv_free_converter () function frees the code converter
4267 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ò²òÊü¤¹¤ë.
4269 ´Ø¿ô mconv_free_converter () ¤Ï¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER
4273 mconv_free_converter (MConverter *converter)
4275 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4277 M17N_OBJECT_UNREF (internal->work_mt);
4278 M17N_OBJECT_UNREF (internal->unread);
4286 @brief Bind a buffer to a code converter.
4288 The mconv_rebind_buffer () function binds buffer area of $N bytes
4289 pointed to by $BUF to code converter $CONVERTER. Subsequent
4290 decodings and encodings are done to/from this newly bound buffer
4294 This function always returns $CONVERTER. */
4297 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ë¥Ð¥Ã¥Õ¥¡Îΰè¤ò·ë¤ÓÉÕ¤±¤ë.
4299 ´Ø¿ô mconv_rebind_buffer () ¤Ï¡¢$BUF ¤Ë¤è¤Ã¤Æ»Ø¤µ¤ì¤¿Â礤µ $N
4300 ¥Ð¥¤¥È¤Î¥Ð¥Ã¥Õ¥¡Îΰè¤ò¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER ¤Ë·ë¤ÓÉÕ¤±¤ë¡£
4301 ¤³¤ì°Ê¹ß¤Î¥Ç¥³¡¼¥É¤ª¤è¤Ó¥¨¥ó¥³¡¼¥É¤Ï¡¢¤³¤Î¿·¤¿¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥Ð¥Ã¥Õ¥¡Îΰè¤ËÂФ·¤Æ¹Ô¤Ê¤ï¤ì¤ë¤è¤¦¤Ë¤Ê¤ë¡£
4304 ¤³¤Î´Ø¿ô¤Ï¾ï¤Ë $CONVERTER ¤òÊÖ¤¹¡£
4306 @latexonly \IPAlabel{mconv_rebind_buffer} @endlatexonly */
4310 mconv_rebind_stream () */
4313 mconv_rebind_buffer (MConverter *converter, const unsigned char *buf, int n)
4315 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4317 internal->buf.in = buf;
4319 internal->bufsize = n;
4320 internal->binding = BINDING_BUFFER;
4327 @brief Bind a stream to a code converter.
4329 The mconv_rebind_stream () function binds stream $FP to code
4330 converter $CONVERTER. Following decodings and encodings are done
4331 to/from this newly bound stream.
4334 This function always returns $CONVERTER. */
4337 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ë¥¹¥È¥ê¡¼¥à¤ò·ë¤ÓÉÕ¤±¤ë.
4339 ´Ø¿ô mconv_rebind_stream () ¤Ï¡¢¥¹¥È¥ê¡¼¥à $FP ¤ò¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿
4340 $CONVERTER ¤Ë·ë¤ÓÉÕ¤±¤ë¡£
4341 ¤³¤ì°Ê¹ß¤Î¥Ç¥³¡¼¥É¤ª¤è¤Ó¥¨¥ó¥³¡¼¥É¤Ï¡¢¤³¤Î¿·¤¿¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥¹¥È¥ê¡¼¥à¤ËÂФ·¤Æ¹Ô¤Ê¤ï¤ì¤ë¤è¤¦¤Ë¤Ê¤ë¡£
4344 ¤³¤Î´Ø¿ô¤Ï¾ï¤Ë $CONVERTER ¤òÊÖ¤¹¡£
4346 @latexonly \IPAlabel{mconv_rebind_stream} @endlatexonly */
4350 mconv_rebind_buffer () */
4353 mconv_rebind_stream (MConverter *converter, FILE *fp)
4355 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4357 if (fseek (fp, 0, SEEK_CUR) < 0)
4361 internal->seekable = 0;
4364 internal->seekable = 1;
4366 internal->binding = BINDING_STREAM;
4373 @brief Decode a byte sequence into an M-text.
4375 The mconv_decode () function decodes a byte sequence and appends
4376 the result at the end of M-text $MT. The source byte sequence is
4377 taken from either the buffer area or the stream that is currently
4378 bound to $CONVERTER.
4381 If the operation was successful, mconv_decode () returns updated
4382 $MT. Otherwise it returns @c NULL and assigns an error code to
4383 the external variable #merror_code. */
4386 @brief ¥Ð¥¤¥ÈÎó¤ò M-text ¤Ë¥Ç¥³¡¼¥É¤¹¤ë.
4388 ´Ø¿ô mconv_decode () ¤Ï¡¢¥Ð¥¤¥ÈÎó¤ò¥Ç¥³¡¼¥É¤·¤Æ¤½¤Î·ë²Ì¤ò M-text
4389 $MT ¤ÎËöÈø¤ËÄɲ乤롣¥Ç¥³¡¼¥É¸µ¤Î¥Ð¥¤¥ÈÎó¤Ï¡¢$CONVERTER
4390 ¤Ë¸½ºß·ë¤ÓÉÕ¤±¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤«¤é¼è¤é¤ì¤ë¡£
4393 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_decode () ¤Ï¹¹¿·¤µ¤ì¤¿ $MT ¤òÊÖ¤¹¡£
4394 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
4395 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4399 @c MERROR_IO, @c MERROR_CODING
4402 mconv_rebind_buffer (), mconv_rebind_stream (),
4403 mconv_encode (), mconv_encode_range (),
4404 mconv_decode_buffer (), mconv_decode_stream () */
4407 mconv_decode (MConverter *converter, MText *mt)
4409 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4410 int at_most = converter->at_most > 0 ? converter->at_most : -1;
4413 M_CHECK_READONLY (mt, NULL);
4415 if (mt->format != MTEXT_FORMAT_UTF_8)
4416 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
4419 mtext__enlarge (mt, MAX_UTF8_CHAR_BYTES);
4421 converter->nchars = converter->nbytes = 0;
4422 converter->result = MCONVERSION_RESULT_SUCCESS;
4424 n = mtext_nchars (internal->unread);
4430 if (at_most > 0 && at_most < limit)
4433 for (i = 0, n -= 1; i < limit; i++, converter->nchars++, n--)
4434 mtext_cat_char (mt, mtext_ref_char (internal->unread, n));
4435 mtext_del (internal->unread, n + 1, internal->unread->nchars);
4438 if (at_most == limit)
4440 converter->at_most -= converter->nchars;
4444 if (internal->binding == BINDING_BUFFER)
4446 (*internal->coding->decoder) (internal->buf.in + internal->used,
4447 internal->bufsize - internal->used,
4449 internal->used += converter->nbytes;
4451 else if (internal->binding == BINDING_STREAM)
4453 unsigned char work[CONVERT_WORKSIZE];
4454 int last_block = converter->last_block;
4455 int use_fread = at_most < 0 && internal->seekable;
4457 converter->last_block = 0;
4460 int nbytes, prev_nbytes;
4462 if (feof (internal->fp))
4465 nbytes = fread (work, sizeof (unsigned char), CONVERT_WORKSIZE,
4469 int c = getc (internal->fp);
4472 work[0] = c, nbytes = 1;
4477 if (ferror (internal->fp))
4479 converter->result = MCONVERSION_RESULT_IO_ERROR;
4484 converter->last_block = last_block;
4485 prev_nbytes = converter->nbytes;
4486 (*internal->coding->decoder) (work, nbytes, mt, converter);
4487 if (converter->nbytes - prev_nbytes < nbytes)
4490 fseek (internal->fp, converter->nbytes - prev_nbytes - nbytes,
4493 ungetc (work[0], internal->fp);
4497 || (converter->at_most > 0
4498 && converter->nchars == converter->at_most))
4501 converter->last_block = last_block;
4503 else /* internal->binding == BINDING_NONE */
4504 MERROR (MERROR_CODING, NULL);
4506 converter->at_most = at_most;
4507 return ((converter->result == MCONVERSION_RESULT_SUCCESS
4508 || converter->result == MCONVERSION_RESULT_INSUFFICIENT_SRC)
4515 @brief Decode a buffer area based on a coding system.
4517 The mconv_decode_buffer () function decodes $N bytes of the buffer
4518 area pointed to by $BUF based on the coding system $NAME. A
4519 temporary code converter for decoding is automatically created
4523 If the operation was successful, mconv_decode_buffer ()
4524 returns the resulting M-text. Otherwise it returns @c NULL and
4525 assigns an error code to the external variable #merror_code. */
4528 @brief ¥³¡¼¥É·Ï¤Ë´ð¤Å¤¤¤Æ¥Ð¥Ã¥Õ¥¡Îΰè¤ò¥Ç¥³¡¼¥É¤¹¤ë.
4530 ´Ø¿ô mconv_decode_buffer () ¤Ï¡¢$BUF ¤Ë¤è¤Ã¤Æ»Ø¤µ¤ì¤¿ $N
4531 ¥Ð¥¤¥È¤Î¥Ð¥Ã¥Õ¥¡Îΰè¤ò¡¢¥³¡¼¥É·Ï $NAME ¤Ë´ð¤Å¤¤¤Æ¥Ç¥³¡¼¥É¤¹¤ë¡£
4532 ¥Ç¥³¡¼¥É¤ËɬÍפʥ³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ÎºîÀ®¤È²òÊü¤Ï¼«Æ°Åª¤Ë¹Ô¤Ê¤ï¤ì¤ë¡£
4535 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_decode_buffer () ¤ÏÆÀ¤é¤ì¤¿ M-text ¤òÊÖ¤¹¡£
4536 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
4537 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4541 @c MERROR_IO, @c MERROR_CODING
4544 mconv_decode (), mconv_decode_stream () */
4547 mconv_decode_buffer (MSymbol name, const unsigned char *buf, int n)
4549 MConverter *converter = mconv_buffer_converter (name, buf, n);
4555 if (! mconv_decode (converter, mt))
4557 M17N_OBJECT_UNREF (mt);
4560 mconv_free_converter (converter);
4567 @brief Decode a stream input based on a coding system.
4569 The mconv_decode_stream () function decodes the entire byte
4570 sequence read in from stream $FP based on the coding system $NAME.
4571 A code converter for decoding is automatically created and freed.
4574 If the operation was successful, mconv_decode_stream () returns
4575 the resulting M-text. Otherwise it returns @c NULL and assigns an
4576 error code to the external variable #merror_code. */
4579 @brief ¥³¡¼¥É·Ï¤Ë´ð¤Å¤¤¤Æ¥¹¥È¥ê¡¼¥àÆþÎϤò¥Ç¥³¡¼¥É¤¹¤ë.
4581 ´Ø¿ô mconv_decode_stream () ¤Ï¡¢¥¹¥È¥ê¡¼¥à $FP
4582 ¤«¤éÆɤ߹þ¤Þ¤ì¤ë¥Ð¥¤¥ÈÎóÁ´ÂΤò¡¢¥³¡¼¥É·Ï $NAME
4583 ¤Ë´ð¤Å¤¤¤Æ¥Ç¥³¡¼¥É¤¹¤ë¡£¥Ç¥³¡¼¥É¤ËɬÍפʥ³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ÎºîÀ®¤È²òÊü¤Ï¼«Æ°Åª¤Ë¹Ô¤Ê¤ï¤ì¤ë¡£
4586 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_decode_stream () ¤ÏÆÀ¤é¤ì¤¿ M-text
4587 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
4588 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4592 @c MERROR_IO, @c MERROR_CODING
4595 mconv_decode (), mconv_decode_buffer () */
4598 mconv_decode_stream (MSymbol name, FILE *fp)
4600 MConverter *converter = mconv_stream_converter (name, fp);
4606 if (! mconv_decode (converter, mt))
4608 M17N_OBJECT_UNREF (mt);
4611 mconv_free_converter (converter);
4617 /***en @brief Encode an M-text into a byte sequence.
4619 The mconv_encode () function encodes M-text $MT and writes the
4620 resulting byte sequence into the buffer area or the stream that is
4621 currently bound to code converter $CONVERTER.
4624 If the operation was successful, mconv_encode () returns the
4625 number of written bytes. Otherwise it returns -1 and assigns an
4626 error code to the external variable #merror_code. */
4629 @brief M-text ¤ò¥Ð¥¤¥ÈÎó¤Ë¥¨¥ó¥³¡¼¥É¤¹¤ë.
4631 ´Ø¿ô mconv_encode () ¤Ï¡¢M-text $MT ¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿
4632 $CONVERTER ¤Ë¸½ºß·ë¤ÓÉÕ¤±¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤ËÆÀ¤é¤ì¤¿¥Ð¥¤¥ÈÎó¤ò½ñ¤¹þ¤à¡£
4635 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_encode () ¤Ï½ñ¤¹þ¤Þ¤ì¤¿¥Ð¥¤¥È¿ô¤òÊÖ¤¹¡£
4636 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
4637 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4641 @c MERROR_IO, @c MERROR_CODING
4644 mconv_rebind_buffer (), mconv_rebind_stream(),
4645 mconv_decode (), mconv_encode_range () */
4648 mconv_encode (MConverter *converter, MText *mt)
4650 return mconv_encode_range (converter, mt, 0, mtext_nchars (mt));
4656 @brief Encode a part of an M-text.
4658 The mconv_encode_range () function encodes the text between $FROM
4659 (inclusive) and $TO (exclusive) in M-text $MT and writes the
4660 resulting byte sequence into the buffer area or the stream that is
4661 currently bound to code converter $CONVERTER.
4664 If the operation was successful, mconv_encode_range () returns the
4665 number of written bytes. Otherwise it returns -1 and assigns an
4666 error code to the external variable #merror_code. */
4669 @brief M-text ¤Î°ìÉô¤ò¥Ð¥¤¥ÈÎó¤Ë¥¨¥ó¥³¡¼¥É¤¹¤ë.
4671 ´Ø¿ô mconv_encode_range () ¤Ï¡¢M-text $MT ¤Î $FROM
4672 ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO ¡Ê$TO¼«ÂΤϴޤޤʤ¤¡Ë
4673 ¤Þ¤Ç¤ÎÈϰϤΥƥ¥¹¥È¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿
4674 $CONVERTER ¤Ë¸½ºß·ë¤ÓÉÕ¤±¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤ËÆÀ¤é¤ì¤¿¥Ð¥¤¥ÈÎó¤ò½ñ¤¹þ¤à¡£
4677 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_encode_range ()
4678 ¤Ï½ñ¤¹þ¤Þ¤ì¤¿¥Ð¥¤¥È¿ô¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
4679 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4683 @c MERROR_RANGE, @c MERROR_IO, @c MERROR_CODING
4686 mconv_rebind_buffer (), mconv_rebind_stream(),
4687 mconv_decode (), mconv_encode () */
4690 mconv_encode_range (MConverter *converter, MText *mt, int from, int to)
4692 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4694 M_CHECK_POS_X (mt, from, -1);
4695 M_CHECK_POS_X (mt, to, -1);
4699 if (converter->at_most > 0 && from + converter->at_most < to)
4700 to = from + converter->at_most;
4702 converter->nchars = converter->nbytes = 0;
4703 converter->result = MCONVERSION_RESULT_SUCCESS;
4705 mtext_put_prop (mt, from, to, Mcoding, internal->coding->name);
4706 if (internal->binding == BINDING_BUFFER)
4708 (*internal->coding->encoder) (mt, from, to,
4709 internal->buf.out + internal->used,
4710 internal->bufsize - internal->used,
4712 internal->used += converter->nbytes;
4714 else if (internal->binding == BINDING_STREAM)
4716 unsigned char work[CONVERT_WORKSIZE];
4721 int prev_nbytes = converter->nbytes;
4724 (*internal->coding->encoder) (mt, from, to, work,
4725 CONVERT_WORKSIZE, converter);
4726 this_nbytes = converter->nbytes - prev_nbytes;
4727 while (written < this_nbytes)
4729 int wrtn = fwrite (work + written, sizeof (unsigned char),
4730 this_nbytes - written, internal->fp);
4732 if (ferror (internal->fp))
4736 if (written < this_nbytes)
4738 converter->result = MCONVERSION_RESULT_IO_ERROR;
4741 from += converter->nchars;
4744 else /* fail safe */
4745 MERROR (MERROR_CODING, -1);
4747 return ((converter->result == MCONVERSION_RESULT_SUCCESS
4748 || converter->result == MCONVERSION_RESULT_INSUFFICIENT_DST)
4749 ? converter->nbytes : -1);
4755 @brief Encode an M-text into a buffer area.
4757 The mconv_encode_buffer () function encodes M-text $MT based on
4758 coding system $NAME and writes the resulting byte sequence into the
4759 buffer area pointed to by $BUF. At most $N bytes are written. A
4760 temporary code converter for encoding is automatically created
4764 If the operation was successful, mconv_encode_buffer () returns
4765 the number of written bytes. Otherwise it returns -1 and assigns
4766 an error code to the external variable #merror_code. */
4769 @brief M-text ¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¥Ð¥Ã¥Õ¥¡Îΰè¤Ë½ñ¤¹þ¤à.
4771 ´Ø¿ô mconv_encode_buffer () ¤ÏM-text $MT ¤ò¥³¡¼¥É·Ï $NAME
4772 ¤Ë´ð¤Å¤¤¤Æ¥¨¥ó¥³¡¼¥É¤·¡¢ÆÀ¤é¤ì¤¿¥Ð¥¤¥ÈÎó¤ò $BUF ¤Î»Ø¤¹¥Ð¥Ã¥Õ¥¡Îΰè¤Ë½ñ¤¹þ¤à¡£
4773 $N ¤Ï½ñ¤¹þ¤àºÇÂç¥Ð¥¤¥È¿ô¤Ç¤¢¤ë¡£
4774 ¥¨¥ó¥³¡¼¥É¤ËɬÍפʥ³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ÎºîÀ®¤È²òÊü¤Ï¼«Æ°Åª¤Ë¹Ô¤Ê¤ï¤ì¤ë¡£
4777 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_encode_buffer () ¤Ï½ñ¤¹þ¤Þ¤ì¤¿¥Ð¥¤¥È¿ô¤òÊÖ¤¹¡£
4778 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð-1¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4782 @c MERROR_IO, @c MERROR_CODING
4785 mconv_encode (), mconv_encode_stream () */
4788 mconv_encode_buffer (MSymbol name, MText *mt, unsigned char *buf, int n)
4790 MConverter *converter = mconv_buffer_converter (name, buf, n);
4795 ret = mconv_encode (converter, mt);
4796 mconv_free_converter (converter);
4803 @brief Encode an M-text to write to a stream.
4805 The mconv_encode_stream () function encodes M-text $MT based on
4806 coding system $NAME and writes the resulting byte sequence to
4807 stream $FP. A temporary code converter for encoding is
4808 automatically created and freed.
4811 If the operation was successful, mconv_encode_stream () returns
4812 the number of written bytes. Otherwise it returns -1 and assigns
4813 an error code to the external variable #merror_code. */
4816 @brief M-text ¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¥¹¥È¥ê¡¼¥à¤Ë½ñ¤¹þ¤à.
4818 ´Ø¿ô mconv_encode_stream () ¤ÏM-text $MT ¤ò¥³¡¼¥É·Ï $NAME
4819 ¤Ë´ð¤Å¤¤¤Æ¥¨¥ó¥³¡¼¥É¤·¡¢ÆÀ¤é¤ì¤¿¥Ð¥¤¥ÈÎó¤ò¥¹¥È¥ê¡¼¥à $FP
4820 ¤Ë½ñ¤½Ð¤¹¡£¥¨¥ó¥³¡¼¥É¤ËɬÍפʥ³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ÎºîÀ®¤È²òÊü¤Ï¼«Æ°Åª¤Ë¹Ô¤Ê¤ï¤ì¤ë¡£
4823 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_encode_stream ()
4824 ¤Ï½ñ¤¹þ¤Þ¤ì¤¿¥Ð¥¤¥È¿ô¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
4825 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4829 @c MERROR_IO, @c MERROR_CODING
4832 mconv_encode (), mconv_encode_buffer (), mconv_encode_file () */
4835 mconv_encode_stream (MSymbol name, MText *mt, FILE *fp)
4837 MConverter *converter = mconv_stream_converter (name, fp);
4842 ret = mconv_encode (converter, mt);
4843 mconv_free_converter (converter);
4850 @brief Read a character via a code converter.
4852 The mconv_getc () function reads one character from the buffer
4853 area or the stream that is currently bound to code converter
4854 $CONVERTER. The decoder of $CONVERTER is used to decode the byte
4855 sequence. The internal status of $CONVERTER is updated
4859 If the operation was successful, mconv_getc () returns the
4860 character read in. If the input source reaches EOF, it returns @c
4861 EOF without changing the external variable #merror_code. If an
4862 error is detected, it returns @c EOF and assigns an error code to
4866 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿·Ðͳ¤Ç°ìʸ»ú¤òÆɤߤ³¤à.
4868 ´Ø¿ô mconv_getc () ¤Ï¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER
4869 ¤Ë¸½ºß·ë¤ÓÉÕ¤±¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤«¤éʸ»ú¤ò°ì¤ÄÆɤ߹þ¤à¡£
4870 ¥Ð¥¤¥ÈÎó¤Î¥Ç¥³¡¼¥É¤Ë¤Ï $CONVERTER ¤Î¥Ç¥³¡¼¥À¤¬ÍѤ¤¤é¤ì¤ë¡£
4871 $CONVERTER ¤ÎÆâÉô¾õÂÖ¤ÏɬÍפ˱þ¤¸¤Æ¹¹¿·¤µ¤ì¤ë¡£
4874 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_getc () ¤ÏÆɤ߹þ¤Þ¤ì¤¿Ê¸»ú¤òÊÖ¤¹¡£ÆþÎϸ»¤¬
4875 EOF ¤Ë㤷¤¿¾ì¹ç¤Ï¡¢³°ÉôÊÑ¿ô #merror_code ¤òÊѤ¨¤º¤Ë @c EOF
4876 ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c EOF ¤òÊÖ¤·¡¢#merror_code
4877 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4884 mconv_ungetc (), mconv_putc (), mconv_gets () */
4887 mconv_getc (MConverter *converter)
4889 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4890 int at_most = converter->at_most;
4892 mtext_reset (internal->work_mt);
4893 converter->at_most = 1;
4894 mconv_decode (converter, internal->work_mt);
4895 converter->at_most = at_most;
4896 return (converter->nchars == 1
4897 ? STRING_CHAR (internal->work_mt->data)
4904 @brief Push a character back to a code converter.
4906 The mconv_ungetc () function pushes character $C back to code
4907 converter $CONVERTER. Any number of characters can be pushed
4908 back. The lastly pushed back character is firstly read by the
4909 subsequent mconv_getc () call. The characters pushed back are
4910 registered only in $CONVERTER; they are not written to the input
4911 source. The internal status of $CONVERTER is updated
4915 If the operation was successful, mconv_ungetc () returns $C.
4916 Otherwise it returns @c EOF and assigns an error code to the
4917 external variable #merror_code. */
4920 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ë°ìʸ»úÌ᤹.
4922 ´Ø¿ô mconv_ungetc () ¤Ï¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER ¤Ëʸ»ú $C
4923 ¤ò²¡¤·Ì᤹¡£Ìᤵ¤ì¤ëʸ»ú¿ô¤ËÀ©¸Â¤Ï¤Ê¤¤¡£¤³¤Î¸å¤Ç mconv_getc ()
4924 ¤ò¸Æ¤Ó½Ð¤·¤¿ºÝ¤Ë¤Ï¡¢ºÇ¸å¤ËÌᤵ¤ì¤¿Ê¸»ú¤¬ºÇ½é¤ËÆɤޤì¤ë¡£Ìᤵ¤ì¤¿Ê¸»ú¤Ï
4925 $CONVERTER ¤ÎÆâÉô¤ËÃߤ¨¤é¤ì¤ë¤À¤±¤Ç¤¢¤ê¡¢¼ÂºÝ¤ËÆþÎϸ»¤Ë½ñ¤¹þ¤Þ¤ì¤ë¤ï¤±¤Ç¤Ï¤Ê¤¤¡£
4926 $CONVERTER ¤ÎÆâÉô¾õÂÖ¤ÏɬÍפ˱þ¤¸¤Æ¹¹¿·¤µ¤ì¤ë¡£
4929 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_ungetc () ¤Ï $C ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c
4930 EOF ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4934 @c MERROR_CODING, @c MERROR_CHAR
4937 mconv_getc (), mconv_putc (), mconv_gets () */
4940 mconv_ungetc (MConverter *converter, int c)
4942 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4944 M_CHECK_CHAR (c, EOF);
4946 converter->result = MCONVERSION_RESULT_SUCCESS;
4947 mtext_cat_char (internal->unread, c);
4954 @brief Write a character via a code converter.
4956 The mconv_putc () function writes character $C to the buffer area
4957 or the stream that is currently bound to code converter
4958 $CONVERTER. The encoder of $CONVERTER is used to encode the
4959 character. The number of bytes actually written is set to the @c
4960 nbytes member of $CONVERTER. The internal status of $CONVERTER
4961 is updated appropriately.
4964 If the operation was successful, mconv_putc () returns $C.
4965 If an error is detected, it returns @c EOF and assigns
4966 an error code to the external variable #merror_code. */
4969 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ò·Ðͳ¤·¤Æ°ìʸ»ú½ñ¤½Ð¤¹.
4971 ´Ø¿ô mconv_putc () ¤Ï¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER
4972 ¤Ë¸½ºß·ë¤ÓÉÕ¤±¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤Ëʸ»ú $C
4973 ¤ò½ñ¤½Ð¤¹¡£Ê¸»ú¤Î¥¨¥ó¥³¡¼¥É¤Ë¤Ï $CONVERTER
4974 ¤Î¥¨¥ó¥³¡¼¥À¤¬ÍѤ¤¤é¤ì¤ë¡£¼ÂºÝ¤Ë½ñ¤½Ð¤µ¤ì¤¿¥Ð¥¤¥È¿ô¤Ï¡¢$CONVERTER ¤Î¥á¥ó¥Ð¡¼
4975 @c nbytes ¤Ë¥»¥Ã¥È¤µ¤ì¤ë¡£$CONVERTER ¤ÎÆâÉô¾õÂÖ¤ÏɬÍפ˱þ¤¸¤Æ¹¹¿·¤µ¤ì¤ë¡£
4978 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_putc () ¤Ï $C ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï
4979 @c EOF ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4983 @c MERROR_CODING, @c MERROR_IO, @c MERROR_CHAR
4986 mconv_getc (), mconv_ungetc (), mconv_gets () */
4989 mconv_putc (MConverter *converter, int c)
4991 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4993 M_CHECK_CHAR (c, EOF);
4994 mtext_reset (internal->work_mt);
4995 mtext_cat_char (internal->work_mt, c);
4996 if (mconv_encode_range (converter, internal->work_mt, 0, 1) < 0)
5004 @brief Read a line using a code converter.
5006 The mconv_gets () function reads one line from the buffer area or
5007 the stream that is currently bound to code converter $CONVERTER.
5008 The decoder of $CONVERTER is used for decoding. The decoded
5009 character sequence is appended at the end of M-text $MT. The
5010 final newline character in the original byte sequence is not
5011 appended. The internal status of $CONVERTER is updated
5015 If the operation was successful, mconv_gets () returns the
5016 modified $MT. If it encounters EOF without reading a single
5017 character, it returns $MT without changing it. If an error is
5018 detected, it returns @c NULL and assigns an error code to
5022 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ò»È¤Ã¤Æ°ì¹ÔÆɤ߹þ¤à.
5024 ´Ø¿ô mconv_gets () ¤Ï¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER
5025 ¤Ë¸½ºß·ë¤ÓÉÕ¤±¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤«¤é 1 ¹Ô¤òÆɤ߹þ¤à¡£
5026 ¥Ð¥¤¥ÈÎó¤Î¥Ç¥³¡¼¥É¤Ë¤Ï $CONVERTER
5027 ¤Î¥Ç¥³¡¼¥À¤¬ÍѤ¤¤é¤ì¤ë¡£¥Ç¥³¡¼¥É¤µ¤ì¤¿Ê¸»úÎó¤Ï M-text $MT
5028 ¤ÎËöÈø¤ËÄɲ䵤ì¤ë¡£¸µ¤Î¥Ð¥¤¥ÈÎó¤Î½ªÃ¼²þ¹Ôʸ»ú¤ÏÄɲ䵤ì¤Ê¤¤¡£
5029 $CONVERTER ¤ÎÆâÉô¾õÂÖ¤ÏɬÍפ˱þ¤¸¤Æ¹¹¿·¤µ¤ì¤ë¡£
5032 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_gets () ¤ÏÊѹ¹¤µ¤ì¤¿ $MT
5033 ¤òÊÖ¤¹¡£¤â¤·1ʸ»ú¤âÆɤޤº¤Ë EOF ¤ËÁø¶ø¤·¤¿¾ì¹ç¤Ï¡¢$MT
5034 ¤òÊѹ¹¤»¤º¤Ë¤½¤Î¤Þ¤ÞÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢
5035 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
5042 mconv_getc (), mconv_ungetc (), mconv_putc () */
5045 mconv_gets (MConverter *converter, MText *mt)
5049 M_CHECK_READONLY (mt, NULL);
5050 if (mt->format != MTEXT_FORMAT_UTF_8)
5051 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
5055 c = mconv_getc (converter);
5056 if (c == EOF || c == '\n')
5058 mtext_cat_char (mt, c);
5060 if (c == EOF && converter->result != MCONVERSION_RESULT_SUCCESS)
5061 /* mconv_getc () sets #merror_code */