1 /* coding.c -- code conversion module.
2 Copyright (C) 2003, 2004
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 @brief Coding system objects and API for them.
27 The m17n library represents a character encoding scheme (CES) of
28 coded character sets (CCS) as an object called @e coding @e
29 system. Application programs can add original coding systems.
31 To @e encode means converting code-points to character codes and
32 to @e decode means converting character codes back to code-points.
34 Application programs can decode a byte sequence with a specified
35 coding system into an M-text, and inversely, can encode an M-text
36 into a byte sequence. */
40 @brief ¥³¡¼¥É·Ï¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API.
42 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢Éä¹æ²½Ê¸»ú½¸¹ç (coded character set; CCS)
43 ¤Îʸ»úÉä¹ç²½Êý¼° (character encoding scheme; CES) ¤ò @e ¥³¡¼¥É·Ï
44 ¤È¸Æ¤Ö¥ª¥Ö¥¸¥§¥¯¥È¤Çɽ¸½¤¹¤ë¡£
45 ¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥à¤ÏÆȼ«¤Ë¥³¡¼¥É·Ï¤òÄɲ乤뤳¤È¤â¤Ç¤¤ë¡£
47 ¥³¡¼¥É¥Ý¥¤¥ó¥È¤«¤éʸ»ú¥³¡¼¥É¤Ø¤ÎÊÑ´¹¤ò @e ¥¨¥ó¥³¡¼¥É
48 ¤È¸Æ¤Ó¡¢Ê¸»ú¥³¡¼¥É¤«¤é¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ø¤ÎÊÑ´¹¤ò @e ¥Ç¥³¡¼¥É ¤È¸Æ¤Ö¡£
50 ¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥à¤Ï¡¢»ØÄꤵ¤ì¤¿¥³¡¼¥É·Ï¤Ç¥Ð¥¤¥ÈÎó¤ò¥Ç¥³¡¼¥É¤¹¤ë¤³¤È¤Ë¤è¤Ã¤Æ
51 M-text ¤òÆÀ¤ë¤³¤È¤¬¤Ç¤¤ë¡£¤Þ¤¿µÕ¤Ë¡¢»ØÄꤵ¤ì¤¿¥³¡¼¥É·Ï¤Ç M-text
52 ¤ò¥¨¥ó¥³¡¼¥É¤·¤¹¤ë¤³¤È¤Ë¤è¤Ã¤Æ¥Ð¥¤¥ÈÎó¤òÆÀ¤ë¤³¤È¤¬¤Ç¤¤ë¡£ */
56 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
57 /*** @addtogroup m17nInternal
65 #include <sys/types.h>
70 #include "m17n-misc.h"
73 #include "character.h"
80 #define NUM_SUPPORTED_CHARSETS 32
82 /** Structure for coding system object. */
86 /** Name of the coding system. */
89 /** Type of the coding system. */
92 /* Number of supported charsets. */
95 /** Array of supported charsets. */
96 MCharset *charsets[NUM_SUPPORTED_CHARSETS];
98 /** If non-NULL, function to call at the time of creating and
99 reseting a converter. */
100 int (*resetter) (MConverter *converter);
102 int (*decoder) (const unsigned char *str, int str_bytes, MText *mt,
103 MConverter *converter);
105 int (*encoder) (MText *mt, int from, int to,
106 unsigned char *str, int str_bytes,
107 MConverter *converter);
109 /** If non-zero, the coding system decode/encode ASCII characters as
111 int ascii_compatible;
113 /** Pointer to extra information given when the coding system is
114 defined. The meaning depends on <type>. */
117 /** Pointer to information referred on conversion. The meaning
118 depends on <type>. The value NULL means that the coding system
128 MCodingSystem **codings;
131 static struct MCodingList coding_list;
133 static MPlist *coding_definition_list;
137 Pointer to a structure of a coding system. */
139 ¥³¡¼¥É·Ï¤òɽ¤ï¤¹¥Ç¡¼¥¿¹½Â¤¤Ø¤Î¥Ý¥¤¥ó¥¿ */
140 MCodingSystem *coding;
143 Buffer for carryover bytes generated while decoding. */
145 ¥Ç¥³¡¼¥ÉÃæ¤Î¥¥ã¥ê¥£¥ª¡¼¥Ð¡¼¥Ð¥¤¥ÈÍѥХåե¡ */
146 unsigned char carryover[256];
149 Number of carryover bytes. */
151 ¥¥ã¥ê¥£¥ª¡¼¥Ð¡¼¥Ð¥¤¥È¿ô */
155 Beginning of the byte sequence bound to this converter. */
157 ¤³¤Î¥³¥ó¥Ð¡¼¥¿¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥Ð¥¤¥ÈÎó¤ÎÀèƬ°ÌÃÖ */
159 const unsigned char *in;
170 Number of bytes already consumed in buf. */
172 buf Æâ¤Ç¤¹¤Ç¤Ë¾ÃÈñ¤µ¤ì¤¿¥Ð¥¤¥È¿ô */
176 Stream bound to this converter. */
178 ¤³¤Î¥³¥ó¥Ð¡¼¥¿¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥¹¥È¥ê¡¼¥à */
182 Which of above two is in use. */
184 ¾åµ2¼Ô¤Î¤¤¤º¤ì¤¬»È¤ï¤ì¤Æ¤¤¤ë¤« */
204 /* Local macros and functions. */
206 /** At first, set SRC_BASE to SRC. Then check if we have already
207 produced AT_MOST chars. If so, set SRC_END to SRC, and jump to
208 source_end. Otherwise, get one more byte C from SRC. In that
209 case, if SRC == SRC_END, jump to the label source_end. */
211 #define ONE_MORE_BASE_BYTE(c) \
214 if (nchars == at_most) \
219 if (src == src_stop) \
221 if (src == src_end) \
223 src_base = src = source; \
224 if (src == src_end) \
226 src_stop = src_end; \
232 /** Get one more byte C from SRC. If SRC == SRC_END, jump to the
235 #define ONE_MORE_BYTE(c) \
237 if (src == src_stop) \
239 if (src == src_end) \
242 if (src == src_end) \
244 src_stop = src_end; \
250 #define REWIND_SRC_TO_BASE() \
252 if (src_base < source || src_base >= src_end) \
253 src_stop = internal->carryover + internal->carryover_bytes; \
258 /** Push back byte C to SRC. */
260 #define UNGET_ONE_BYTE(c) \
266 internal->carryover[0] = c; \
267 internal->carryover_bytes = 1; \
268 src = internal->carryover; \
269 src_stop = src + 1; \
274 /** Store multibyte representation of character C at DST and increment
275 DST to the next of the produced bytes. DST must be a pointer to
276 data area of M-text MT. If the produced bytes are going to exceed
277 DST_END, enlarge the data area of MT. */
279 #define EMIT_CHAR(c) \
281 int bytes = CHAR_BYTES (c); \
284 if (dst + bytes + 1 > dst_end) \
286 len = dst - mt->data; \
287 bytes = mt->allocated + bytes + (src_stop - src); \
288 mtext__enlarge (mt, bytes); \
289 dst = mt->data + len; \
290 dst_end = mt->data + mt->allocated; \
292 dst += CHAR_STRING (c, dst); \
297 /* Check if there is enough room to produce LEN bytes at DST. If not,
298 go to the label insufficient_destination. */
300 #define CHECK_DST(len) \
302 if (dst + (len) > dst_end) \
303 goto insufficient_destination; \
307 /** Take NUM_CHARS characters (NUM_BYTES bytes) already stored at
308 (MT->data + MT->nbytes) into MT, and put charset property on
309 them with CHARSET->name. */
311 #define TAKEIN_CHARS(mt, num_chars, num_bytes, charset) \
313 int chars = (num_chars); \
317 mtext__takein ((mt), chars, (num_bytes)); \
319 mtext_put_prop ((mt), (mt)->nchars - chars, (mt)->nchars, \
320 Mcharset, (void *) ((charset)->name)); \
325 #define SET_SRC(mt, format, from, to) \
327 if (format <= MTEXT_FORMAT_UTF_8) \
329 src = mt->data + POS_CHAR_TO_BYTE (mt, from); \
330 src_end = mt->data + POS_CHAR_TO_BYTE (mt, to); \
332 else if (format <= MTEXT_FORMAT_UTF_16BE) \
335 = mt->data + (sizeof (short)) * POS_CHAR_TO_BYTE (mt, from); \
337 = mt->data + (sizeof (short)) * POS_CHAR_TO_BYTE (mt, to); \
341 src = mt->data + (sizeof (int)) * from; \
342 src_end = mt->data + (sizeof (int)) * to; \
347 #define ONE_MORE_CHAR(c, bytes, format) \
349 if (src == src_end) \
351 if (format <= MTEXT_FORMAT_UTF_8) \
352 c = STRING_CHAR_AND_BYTES (src, bytes); \
353 else if (format <= MTEXT_FORMAT_UTF_16BE) \
355 c = mtext_ref_char (mt, from++); \
356 bytes = (sizeof (short)) * CHAR_UNITS_UTF16 (c); \
360 c = ((unsigned *) (mt->data))[from++]; \
361 bytes = sizeof (int); \
367 encode_unsupporeted_char (int c, unsigned char *dst, unsigned char *dst_end,
373 len = c < 0x10000 ? 8 : 10;
374 if (dst + len > dst_end)
377 mtext_put_prop (mt, pos, pos + 1, Mcoding, Mnil);
378 format = (c < 0xD800 ? "<U+%04X>"
379 : c < 0xE000 ? "<M+%04X>"
380 : c < 0x10000 ? "<U+%04X>"
381 : c < 0x110000 ? "<U+%06X>"
383 sprintf ((char *) dst, format, c);
389 /** Finish decoding of bytes at SOURCE (ending at SRC_END) into NCHARS
390 characters by CONVERTER into M-text MT. SRC is a pointer to the
391 not-yet processed bytes. ERROR is 1 iff an invalid byte was
395 finish_decoding (MText *mt, MConverter *converter, int nchars,
396 const unsigned char *source, const unsigned char *src_end,
397 const unsigned char *src,
400 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
403 internal->carryover_bytes = 0;
405 || (converter->last_block
406 && ! converter->lenient))
407 converter->result = MCONVERSION_RESULT_INVALID_BYTE;
408 else if (! converter->last_block)
410 unsigned char *dst = internal->carryover;
412 if (src < source || src > src_end)
414 dst += internal->carryover_bytes;
417 while (src < src_end)
419 internal->carryover_bytes = dst - internal->carryover;
420 converter->result = MCONVERSION_RESULT_INSUFFICIENT_SRC;
424 unsigned char *dst = mt->data + mt->nbytes;
425 unsigned char *dst_end = mt->data + mt->allocated;
426 const unsigned char *src_stop = src_end;
428 int last_nchars = nchars;
430 if (src < source || src > src_end)
431 src_stop = internal->carryover + internal->carryover_bytes;
434 if (converter->at_most && nchars == converter->at_most)
448 TAKEIN_CHARS (mt, nchars - last_nchars, dst - (mt->data + mt->nbytes),
450 internal->carryover_bytes = 0;
453 converter->nchars += nchars;
454 converter->nbytes += ((src < source || src > src_end) ? 0 : src - source);
455 return (converter->result == MCONVERSION_RESULT_INVALID_BYTE ? -1 : 0);
460 /* Staffs for coding-systems of type MCODING_TYPE_CHARSET. */
463 setup_coding_charset (MCodingSystem *coding)
465 int ncharsets = coding->ncharsets;
466 unsigned *code_charset_table;
470 /* At first, reorder charset list by dimensions (a charset of
471 smaller dimension comes first). As the number of charsets is
472 usually very small (at most 32), we do a simple sort. */
477 MTABLE_ALLOCA (charsets, NUM_SUPPORTED_CHARSETS, MERROR_CODING);
478 memcpy (charsets, coding->charsets,
479 sizeof (MCharset *) * NUM_SUPPORTED_CHARSETS);
480 for (i = 0; i < 4; i++)
481 for (j = 0; j < ncharsets; j++)
482 if (charsets[j]->dimension == i)
483 coding->charsets[idx++] = charsets[j];
486 MTABLE_CALLOC (code_charset_table, 256, MERROR_CODING);
489 int dim = coding->charsets[ncharsets]->dimension;
490 int from = coding->charsets[ncharsets]->code_range[(dim - 1) * 4];
491 int to = coding->charsets[ncharsets]->code_range[(dim - 1) * 4 + 1];
493 if (coding->charsets[ncharsets]->ascii_compatible)
494 coding->ascii_compatible = 1;
496 code_charset_table[from++] |= 1 << ncharsets;
499 coding->extra_spec = (void *) code_charset_table;
504 reset_coding_charset (MConverter *converter)
506 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
507 MCodingSystem *coding = internal->coding;
510 && setup_coding_charset (coding) < 0)
517 decode_coding_charset (const unsigned char *source, int src_bytes, MText *mt,
518 MConverter *converter)
520 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
521 MCodingSystem *coding = internal->coding;
522 const unsigned char *src = internal->carryover;
523 const unsigned char *src_stop = src + internal->carryover_bytes;
524 const unsigned char *src_end = source + src_bytes;
525 const unsigned char *src_base;
526 unsigned char *dst = mt->data + mt->nbytes;
527 unsigned char *dst_end = mt->data + mt->allocated;
530 int at_most = converter->at_most > 0 ? converter->at_most : -1;
532 unsigned *code_charset_table = (unsigned *) coding->extra_spec;
533 MCharset **charsets = coding->charsets;
534 MCharset *charset = mcharset__ascii;
539 MCharset *this_charset = NULL;
543 ONE_MORE_BASE_BYTE (c);
544 mask = code_charset_table[c];
554 while (! (mask & 1)) mask >>= 1, idx++;
555 this_charset = charsets[idx];
556 dim = this_charset->dimension;
560 code = (code << 8) | c;
563 c = DECODE_CHAR (this_charset, code);
570 if (! converter->lenient)
572 REWIND_SRC_TO_BASE ();
574 this_charset = mcharset__binary;
577 if (this_charset != mcharset__ascii
578 && this_charset != charset)
580 TAKEIN_CHARS (mt, nchars - last_nchars,
581 dst - (mt->data + mt->nbytes), charset);
582 charset = this_charset;
583 last_nchars = nchars;
587 /* We reach here because of an invalid byte. */
591 TAKEIN_CHARS (mt, nchars - last_nchars,
592 dst - (mt->data + mt->nbytes), charset);
593 return finish_decoding (mt, converter, nchars,
594 source, src_end, src_base, error);
598 encode_coding_charset (MText *mt, int from, int to,
599 unsigned char *destination, int dst_bytes,
600 MConverter *converter)
602 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
603 MCodingSystem *coding = internal->coding;
604 unsigned char *src, *src_end;
605 unsigned char *dst = destination;
606 unsigned char *dst_end = dst + dst_bytes;
608 int ncharsets = coding->ncharsets;
609 MCharset **charsets = coding->charsets;
610 int ascii_compatible = coding->ascii_compatible;
611 enum MTextFormat format = mt->format;
613 SET_SRC (mt, format, from, to);
618 ONE_MORE_CHAR (c, bytes, format);
620 if (c < 0x80 && ascii_compatible)
628 MCharset *charset = NULL;
633 charset = charsets[i];
634 code = ENCODE_CHAR (charset, c);
635 if (code != MCHAR_INVALID_CODE)
637 if (++i == ncharsets)
638 goto unsupported_char;
641 CHECK_DST (charset->dimension);
642 if (charset->dimension == 1)
646 else if (charset->dimension == 2)
649 *dst++ = code & 0xFF;
651 else if (charset->dimension == 3)
654 *dst++ = (code >> 8) & 0xFF;
655 *dst++ = code & 0xFF;
660 *dst++ = (code >> 16) & 0xFF;
661 *dst++ = (code >> 8) & 0xFF;
662 *dst++ = code & 0xFF;
673 if (! converter->lenient)
675 len = encode_unsupporeted_char (c, dst, dst_end, mt, from + nchars);
677 goto insufficient_destination;
683 /* We reach here because of an unsupported char. */
684 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
687 insufficient_destination:
688 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
691 converter->nchars += nchars;
692 converter->nbytes += dst - destination;
693 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
697 /* Staffs for coding-systems of type MCODING_TYPE_UTF (8). */
699 #define UTF8_CHARSET(p) \
700 (! ((p)[0] & 0x80) ? (mcharset__unicode) \
701 : CHAR_HEAD_P ((p) + 1) ? (mcharset__binary) \
702 : ! ((p)[0] & 0x20) ? (mcharset__unicode) \
703 : CHAR_HEAD_P ((p) + 2) ? (mcharset__binary) \
704 : ! ((p)[0] & 0x10) ? (mcharset__unicode) \
705 : CHAR_HEAD_P ((p) + 3) ? (mcharset__binary) \
706 : ! ((p)[0] & 0x08) ? ((((((p)[0] & 0x07) << 2) \
707 & (((p)[1] & 0x30) >> 4)) <= 0x10) \
708 ? (mcharset__unicode) \
709 : (mcharset__m17n)) \
710 : CHAR_HEAD_P ((p) + 4) ? (mcharset__binary) \
711 : ! ((p)[0] & 0x04) ? (mcharset__m17n) \
712 : CHAR_HEAD_P ((p) + 5) ? (mcharset__binary) \
713 : ! ((p)[0] & 0x02) ? (mcharset__m17n) \
714 : (mcharset__binary))
718 decode_coding_utf_8 (const unsigned char *source, int src_bytes, MText *mt,
719 MConverter *converter)
721 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
722 MCodingSystem *coding = internal->coding;
723 const unsigned char *src = internal->carryover;
724 const unsigned char *src_stop = src + internal->carryover_bytes;
725 const unsigned char *src_end = source + src_bytes;
726 const unsigned char *src_base;
727 unsigned char *dst = mt->data + mt->nbytes;
728 unsigned char *dst_end = mt->data + mt->allocated;
731 int at_most = converter->at_most > 0 ? converter->at_most : -1;
733 int full = converter->lenient || (coding->charsets[0] == mcharset__m17n);
734 MCharset *charset = NULL;
739 MCharset *this_charset = NULL;
741 ONE_MORE_BASE_BYTE (c);
745 else if (!(c & 0x40))
747 else if (!(c & 0x20))
748 bytes = 2, c &= 0x1F;
749 else if (!(c & 0x10))
750 bytes = 3, c &= 0x0F;
751 else if (!(c & 0x08))
752 bytes = 4, c &= 0x07;
753 else if (!(c & 0x04))
754 bytes = 5, c &= 0x03;
755 else if (!(c & 0x02))
756 bytes = 6, c &= 0x01;
763 if ((c1 & 0xC0) != 0x80)
765 c = (c << 6) | (c1 & 0x3F);
769 || c < 0xD800 || (c >= 0xE000 && c < 0x110000))
773 if (! converter->lenient)
775 REWIND_SRC_TO_BASE ();
777 this_charset = mcharset__binary;
780 if (this_charset != charset)
782 TAKEIN_CHARS (mt, nchars - last_nchars,
783 dst - (mt->data + mt->nbytes), charset);
784 charset = this_charset;
785 last_nchars = nchars;
789 /* We reach here because of an invalid byte. */
793 TAKEIN_CHARS (mt, nchars - last_nchars,
794 dst - (mt->data + mt->nbytes), charset);
795 return finish_decoding (mt, converter, nchars,
796 source, src_end, src_base, error);
800 encode_coding_utf_8 (MText *mt, int from, int to,
801 unsigned char *destination, int dst_bytes,
802 MConverter *converter)
804 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
805 MCodingSystem *coding = internal->coding;
806 unsigned char *src, *src_end;
807 unsigned char *dst = destination;
808 unsigned char *dst_end = dst + dst_bytes;
810 enum MTextFormat format = mt->format;
812 SET_SRC (mt, format, from, to);
814 if (format <= MTEXT_FORMAT_UTF_8
815 && (converter->lenient
816 || coding->charsets[0] == mcharset__m17n))
818 if (dst_bytes < src_end - src)
820 int byte_pos = (src + dst_bytes) - mt->data;
822 to = POS_BYTE_TO_CHAR (mt, byte_pos);
823 byte_pos = POS_CHAR_TO_BYTE (mt, to);
824 src_end = mt->data + byte_pos;
825 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
827 memcpy (destination, src, src_end - src);
829 dst += src_end - src;
837 ONE_MORE_CHAR (c, bytes, format);
839 if ((c >= 0xD800 && c < 0xE000) || c >= 0x110000)
842 dst += CHAR_STRING (c, dst);
846 /* We reach here because of an unsupported char. */
847 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
850 insufficient_destination:
851 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
854 converter->nchars += nchars;
855 converter->nbytes += dst - destination;
856 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
860 /* Staffs for coding-systems of type MCODING_TYPE_UTF (16 & 32). */
881 enum utf_endian endian;
885 setup_coding_utf (MCodingSystem *coding)
887 MCodingInfoUTF *info = (MCodingInfoUTF *) (coding->extra_info);
888 MCodingInfoUTF *spec;
890 if (info->code_unit_bits == 8)
891 coding->ascii_compatible = 1;
892 else if (info->code_unit_bits == 16
893 || info->code_unit_bits == 32)
895 if (info->bom < 0 || info->bom > 2
896 || info->endian < 0 || info->endian > 1)
897 MERROR (MERROR_CODING, -1);
902 MSTRUCT_CALLOC (spec, MERROR_CODING);
904 coding->extra_spec = (void *) (spec);
909 reset_coding_utf (MConverter *converter)
911 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
912 MCodingSystem *coding = internal->coding;
913 struct utf_status *status = (struct utf_status *) &(converter->status);
916 && setup_coding_utf (coding) < 0)
920 status->surrogate = 0;
921 status->bom = ((MCodingInfoUTF *) (coding->extra_spec))->bom;
922 status->endian = ((MCodingInfoUTF *) (coding->extra_spec))->endian;
927 decode_coding_utf_16 (const unsigned char *source, int src_bytes, MText *mt,
928 MConverter *converter)
930 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
931 const unsigned char *src = internal->carryover;
932 const unsigned char *src_stop = src + internal->carryover_bytes;
933 const unsigned char *src_end = source + src_bytes;
934 const unsigned char *src_base;
935 unsigned char *dst = mt->data + mt->nbytes;
936 unsigned char *dst_end = mt->data + mt->allocated;
939 int at_most = converter->at_most > 0 ? converter->at_most : -1;
940 struct utf_status *status = (struct utf_status *) &(converter->status);
941 unsigned char b1, b2;
942 MCharset *charset = NULL;
945 if (status->bom != UTF_BOM_NO)
949 ONE_MORE_BASE_BYTE (b1);
953 status->endian = UTF_BIG_ENDIAN;
954 else if (c == 0xFFFE)
955 status->endian = UTF_LITTLE_ENDIAN;
956 else if (status->bom == UTF_BOM_MAYBE
957 || converter->lenient)
959 status->endian = UTF_BIG_ENDIAN;
960 REWIND_SRC_TO_BASE ();
967 status->bom = UTF_BOM_NO;
973 MCharset *this_charset = NULL;
975 ONE_MORE_BASE_BYTE (b1);
977 if (status->endian == UTF_BIG_ENDIAN)
978 c = ((b1 << 8) | b2);
980 c = ((b2 << 8) | b1);
981 if (c < 0xD800 || c >= 0xE000)
987 if (status->endian == UTF_BIG_ENDIAN)
988 c1 = ((b1 << 8) | b2);
990 c1 = ((b2 << 8) | b1);
991 if (c1 < 0xDC00 || c1 >= 0xE000)
993 c = 0x10000 + ((c - 0xD800) << 10) + (c1 - 0xDC00);
998 if (! converter->lenient)
1000 REWIND_SRC_TO_BASE ();
1003 if (status->endian == UTF_BIG_ENDIAN)
1004 c = ((b1 << 8) | b2);
1006 c = ((b2 << 8) | b1);
1007 this_charset = mcharset__binary;
1010 if (this_charset != charset)
1012 TAKEIN_CHARS (mt, nchars - last_nchars,
1013 dst - (mt->data + mt->nbytes), charset);
1014 charset = this_charset;
1015 last_nchars = nchars;
1019 /* We reach here because of an invalid byte. */
1023 TAKEIN_CHARS (mt, nchars - last_nchars,
1024 dst - (mt->data + mt->nbytes), charset);
1025 return finish_decoding (mt, converter, nchars,
1026 source, src_end, src_base, error);
1031 decode_coding_utf_32 (const unsigned char *source, int src_bytes, MText *mt,
1032 MConverter *converter)
1034 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
1035 const unsigned char *src = internal->carryover;
1036 const unsigned char *src_stop = src + internal->carryover_bytes;
1037 const unsigned char *src_end = source + src_bytes;
1038 const unsigned char *src_base;
1039 unsigned char *dst = mt->data + mt->nbytes;
1040 unsigned char *dst_end = mt->data + mt->allocated;
1042 int last_nchars = 0;
1043 int at_most = converter->at_most > 0 ? converter->at_most : -1;
1044 struct utf_status *status = (struct utf_status *) &(converter->status);
1045 unsigned char b1, b2, b3, b4;
1046 MCharset *charset = NULL;
1049 if (status->bom != UTF_BOM_NO)
1053 ONE_MORE_BASE_BYTE (b1);
1057 c = (b1 << 24) | (b2 << 16) | (b3 << 8) | b4;
1058 if (c == 0x0000FEFF)
1059 status->endian = UTF_BIG_ENDIAN;
1060 else if (c == 0xFFFE0000)
1061 status->endian = UTF_LITTLE_ENDIAN;
1062 else if (status->bom == UTF_BOM_MAYBE
1063 || converter->lenient)
1065 status->endian = UTF_BIG_ENDIAN;
1066 REWIND_SRC_TO_BASE ();
1073 status->bom = UTF_BOM_NO;
1079 MCharset *this_charset = NULL;
1081 ONE_MORE_BASE_BYTE (b1);
1085 if (status->endian == UTF_BIG_ENDIAN)
1086 c = (b1 << 24) | (b2 << 16) | (b3 << 8) | b4;
1088 c = (b4 << 24) | (b3 << 16) | (b2 << 8) | b1;
1089 if (c < 0xD800 || (c >= 0xE000 && c < 0x110000))
1092 if (! converter->lenient)
1094 REWIND_SRC_TO_BASE ();
1096 this_charset = mcharset__binary;
1099 if (this_charset != charset)
1101 TAKEIN_CHARS (mt, nchars - last_nchars,
1102 dst - (mt->data + mt->nbytes), charset);
1103 charset = this_charset;
1104 last_nchars = nchars;
1108 /* We reach here because of an invalid byte. */
1112 TAKEIN_CHARS (mt, nchars - last_nchars,
1113 dst - (mt->data + mt->nbytes), charset);
1114 return finish_decoding (mt, converter, nchars,
1115 source, src_end, src_base, error);
1120 encode_coding_utf_16 (MText *mt, int from, int to,
1121 unsigned char *destination, int dst_bytes,
1122 MConverter *converter)
1124 unsigned char *src, *src_end;
1125 unsigned char *dst = destination;
1126 unsigned char *dst_end = dst + dst_bytes;
1128 struct utf_status *status = (struct utf_status *) &(converter->status);
1129 int big_endian = status->endian == UTF_BIG_ENDIAN;
1130 enum MTextFormat format = mt->format;
1132 SET_SRC (mt, format, from, to);
1134 if (status->bom != UTF_BOM_NO)
1138 *dst++ = 0xFE, *dst++ = 0xFF;
1140 *dst++ = 0xFF, *dst++ = 0xFE;
1141 status->bom = UTF_BOM_NO;
1148 ONE_MORE_CHAR (c, bytes, format);
1150 if (c < 0xD800 || (c >= 0xE000 && c < 0x10000))
1154 *dst++ = c >> 8, *dst++ = c & 0xFF;
1156 *dst++ = c & 0xFF, *dst++ = c >> 8;
1158 else if (c >= 0x10000 && c < 0x110000)
1164 c1 = (c >> 10) + 0xD800;
1165 c2 = (c & 0x3FF) + 0xDC00;
1167 *dst++ = c1 >> 8, *dst++ = c1 & 0xFF,
1168 *dst++ = c2 >> 8, *dst++ = c2 & 0xFF;
1170 *dst++ = c1 & 0xFF, *dst++ = c1 >> 8,
1171 *dst++ = c2 & 0xFF, *dst++ = c2 >> 8;
1175 unsigned char buf[11];
1178 if (! converter->lenient)
1180 len = encode_unsupporeted_char (c, buf, buf + (dst_end - dst),
1183 goto insufficient_destination;
1185 for (i = 0; i < len; i++)
1186 *dst++ = 0, *dst++ = buf[i];
1188 for (i = 0; i < len; i++)
1189 *dst++ = buf[i], *dst++ = 0;
1194 /* We reach here because of an unsupported char. */
1195 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
1198 insufficient_destination:
1199 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
1202 converter->nchars += nchars;
1203 converter->nbytes += dst - destination;
1204 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
1208 encode_coding_utf_32 (MText *mt, int from, int to,
1209 unsigned char *destination, int dst_bytes,
1210 MConverter *converter)
1212 unsigned char *src, *src_end;
1213 unsigned char *dst = destination;
1214 unsigned char *dst_end = dst + dst_bytes;
1216 struct utf_status *status = (struct utf_status *) &(converter->status);
1217 int big_endian = status->endian == UTF_BIG_ENDIAN;
1218 enum MTextFormat format = mt->format;
1220 SET_SRC (mt, format, from, to);
1222 if (status->bom != UTF_BOM_NO)
1226 *dst++ = 0x00, *dst++ = 0x00, *dst++ = 0xFE, *dst++ = 0xFF;
1228 *dst++ = 0xFF, *dst++ = 0xFE, *dst++ = 0x00, *dst++ = 0x00;
1229 status->bom = UTF_BOM_NO;
1236 ONE_MORE_CHAR (c, bytes, format);
1238 if (c < 0xD800 || (c >= 0xE000 && c < 0x110000))
1242 *dst++ = 0x00, *dst++ = c >> 16,
1243 *dst++ = (c >> 8) & 0xFF, *dst++ = c & 0xFF;
1245 *dst++ = c & 0xFF, *dst++ = (c >> 8) & 0xFF,
1246 *dst++ = c >> 16, *dst++ = 0x00;
1250 unsigned char buf[11];
1253 if (! converter->lenient)
1255 len = encode_unsupporeted_char (c, buf, buf + (dst_end - dst),
1258 goto insufficient_destination;
1260 for (i = 0; i < len; i++)
1261 *dst++ = 0, *dst++ = buf[i];
1263 for (i = 0; i < len; i++)
1264 *dst++ = buf[i], *dst++ = 0;
1269 /* We reach here because of an unsupported char. */
1270 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
1273 insufficient_destination:
1274 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
1277 converter->nchars += nchars;
1278 converter->nbytes += dst - destination;
1279 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
1283 /* Staffs for coding-systems of type MCODING_TYPE_ISO_2022. */
1285 #define ISO_CODE_STX 0x02 /* start text */
1286 #define ISO_CODE_SO 0x0E /* shift-out */
1287 #define ISO_CODE_SI 0x0F /* shift-in */
1288 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
1289 #define ISO_CODE_ESC 0x1B /* escape */
1290 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
1291 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
1293 /** Structure pointed by MCodingSystem.extra_spec. */
1295 struct iso_2022_spec
1299 /** Initial graphic registers (0..3) invoked to each graphic
1300 plane left and right. */
1301 int initial_invocation[2];
1303 /** Initially designated charsets for each graphic register. */
1304 MCharset *initial_designation[4];
1312 struct iso_2022_status
1315 MCharset *designation[4];
1316 unsigned single_shifting : 1;
1319 unsigned utf8_shifting : 1;
1320 MCharset *non_standard_charset;
1321 int non_standard_charset_bytes;
1322 int non_standard_encoding;
1325 enum iso_2022_code_class {
1326 ISO_control_0, /* Control codes in the range
1327 0x00..0x1F and 0x7F, except for the
1328 following 4 codes. */
1329 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
1330 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
1331 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
1332 ISO_escape, /* ISO_CODE_SO (0x1B) */
1333 ISO_control_1, /* Control codes in the range
1334 0x80..0x9F, except for the
1335 following 3 codes. */
1336 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
1337 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
1338 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
1339 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
1340 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
1341 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
1342 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
1343 } iso_2022_code_class[256];
1346 #define MCODING_ISO_DESIGNATION_MASK \
1347 (MCODING_ISO_DESIGNATION_G0 \
1348 | MCODING_ISO_DESIGNATION_G1 \
1349 | MCODING_ISO_DESIGNATION_CTEXT \
1350 | MCODING_ISO_DESIGNATION_CTEXT_EXT)
1353 setup_coding_iso_2022 (MCodingSystem *coding)
1355 MCodingInfoISO2022 *info = (MCodingInfoISO2022 *) (coding->extra_info);
1356 int ncharsets = coding->ncharsets;
1357 struct iso_2022_spec *spec;
1358 int designation_policy = info->flags & MCODING_ISO_DESIGNATION_MASK;
1361 coding->ascii_compatible = 0;
1363 MSTRUCT_CALLOC (spec, MERROR_CODING);
1365 spec->flags = info->flags;
1366 spec->initial_invocation[0] = info->initial_invocation[0];
1367 spec->initial_invocation[1] = info->initial_invocation[1];
1368 for (i = 0; i < 4; i++)
1369 spec->initial_designation[i] = NULL;
1370 if (designation_policy)
1372 spec->n_designations = ncharsets;
1373 if (spec->flags & MCODING_ISO_FULL_SUPPORT)
1374 spec->n_designations += mcharset__iso_2022_table.used;
1375 MTABLE_CALLOC (spec->designations, spec->n_designations, MERROR_CODING);
1376 for (i = 0; i < spec->n_designations; i++)
1377 spec->designations[i] = -1;
1381 if (spec->flags & MCODING_ISO_FULL_SUPPORT)
1382 MERROR (MERROR_CODING, -1);
1383 spec->designations = NULL;
1386 for (i = 0; i < ncharsets; i++)
1388 int reg = info->designations[i];
1391 && coding->charsets[i]->final_byte > 0
1392 && (reg < -4 || reg > 3))
1393 MERROR (MERROR_CODING, -1);
1396 if (spec->initial_designation[reg])
1397 MERROR (MERROR_CODING, -1);
1398 spec->initial_designation[reg] = coding->charsets[i];
1402 if (! designation_policy
1403 && ! (spec->flags & MCODING_ISO_EUC_TW_SHIFT))
1404 MERROR (MERROR_CODING, -1);
1408 if (designation_policy)
1409 spec->designations[i] = reg;
1410 if (coding->charsets[i] == mcharset__ascii)
1411 coding->ascii_compatible = 1;
1414 if (coding->ascii_compatible
1415 && (spec->flags & (MCODING_ISO_DESIGNATION_G0
1416 | MCODING_ISO_DESIGNATION_CTEXT
1417 | MCODING_ISO_DESIGNATION_CTEXT_EXT
1418 | MCODING_ISO_LOCKING_SHIFT)))
1419 coding->ascii_compatible = 0;
1421 if (spec->flags & MCODING_ISO_FULL_SUPPORT)
1422 for (i = 0; i < mcharset__iso_2022_table.used; i++)
1424 MCharset *charset = mcharset__iso_2022_table.charsets[i];
1426 spec->designations[ncharsets + i]
1427 = ((designation_policy == MCODING_ISO_DESIGNATION_CTEXT
1428 || designation_policy == MCODING_ISO_DESIGNATION_CTEXT_EXT)
1429 ? (charset->code_range[0] == 32
1430 || charset->code_range[1] == 255)
1431 : designation_policy == MCODING_ISO_DESIGNATION_G1);
1434 spec->use_esc = ((spec->flags & MCODING_ISO_DESIGNATION_MASK)
1435 || ((spec->flags & MCODING_ISO_LOCKING_SHIFT)
1436 && (spec->initial_designation[2]
1437 || spec->initial_designation[3]))
1438 || (! (spec->flags & MCODING_ISO_EIGHT_BIT)
1439 && (spec->flags & MCODING_ISO_SINGLE_SHIFT))
1440 || (spec->flags & MCODING_ISO_ISO6429));
1442 coding->extra_spec = (void *) spec;
1448 reset_coding_iso_2022 (MConverter *converter)
1450 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
1451 MCodingSystem *coding = internal->coding;
1452 struct iso_2022_status *status
1453 = (struct iso_2022_status *) &(converter->status);
1454 struct iso_2022_spec *spec;
1458 && setup_coding_iso_2022 (coding) < 0)
1462 spec = (struct iso_2022_spec *) coding->extra_spec;
1463 status->invocation[0] = spec->initial_invocation[0];
1464 status->invocation[1] = spec->initial_invocation[1];
1465 for (i = 0; i < 4; i++)
1466 status->designation[i] = spec->initial_designation[i];
1467 status->single_shifting = 0;
1474 #define ISO2022_DECODE_DESIGNATION(reg, dim, chars, final, rev) \
1476 MCharset *charset; \
1478 if ((final) < '0' || (final) >= 128) \
1479 goto invalid_byte; \
1482 charset = MCHARSET_ISO_2022 ((dim), (chars), (final)); \
1483 if (! (spec->flags & MCODING_ISO_FULL_SUPPORT)) \
1487 for (i = 0; i < coding->ncharsets; i++) \
1488 if (charset == coding->charsets[i]) \
1490 if (i == coding->ncharsets) \
1491 goto invalid_byte; \
1498 for (i = 0; i < mcharset__iso_2022_table.used; i++) \
1500 charset = mcharset__iso_2022_table.charsets[i]; \
1501 if (charset->revision == (rev) \
1502 && charset->dimension == (dim) \
1503 && charset->final_byte == (final) \
1504 && (charset->code_range[1] == (chars) \
1505 || ((chars) == 96 && charset->code_range[1] == 255))) \
1508 if (i == mcharset__iso_2022_table.used) \
1509 goto invalid_byte; \
1511 status->designation[reg] = charset; \
1516 find_ctext_non_standard_charset (char *charset_name)
1520 if (! strcmp (charset_name, "koi8-r"))
1521 charset = MCHARSET (msymbol ("koi8-r"));
1522 else if (! strcmp (charset_name, "big5-0"))
1523 charset = MCHARSET (msymbol ("big5"));
1530 decode_coding_iso_2022 (const unsigned char *source, int src_bytes, MText *mt,
1531 MConverter *converter)
1533 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
1534 MCodingSystem *coding = internal->coding;
1535 const unsigned char *src = internal->carryover;
1536 const unsigned char *src_stop = src + internal->carryover_bytes;
1537 const unsigned char *src_end = source + src_bytes;
1538 const unsigned char *src_base;
1539 unsigned char *dst = mt->data + mt->nbytes;
1540 unsigned char *dst_end = mt->data + mt->allocated;
1542 int last_nchars = 0;
1543 int at_most = converter->at_most > 0 ? converter->at_most : -1;
1544 struct iso_2022_spec *spec = (struct iso_2022_spec *) coding->extra_spec;
1545 struct iso_2022_status *status
1546 = (struct iso_2022_status *) &(converter->status);
1547 MCharset *charset0, *charset1, *charset;
1549 MCharset *cns_charsets[15];
1551 charset0 = (status->invocation[0] >= 0
1552 ? status->designation[status->invocation[0]] : NULL);
1553 charset1 = (status->invocation[1] >= 0
1554 ? status->designation[status->invocation[1]] : NULL);
1555 charset = mcharset__ascii;
1557 if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
1561 memset (cns_charsets, 0, sizeof (cns_charsets));
1562 for (i = 0; i < coding->ncharsets; i++)
1563 if (coding->charsets[i]->dimension == 2
1564 && coding->charsets[i]->code_range[1] == 126)
1566 int final = coding->charsets[i]->final_byte;
1568 if (final >= 'G' && final <= 'M')
1569 cns_charsets[final - 'G'] = coding->charsets[i];
1571 cns_charsets[14] = coding->charsets[i];
1577 MCharset *this_charset = NULL;
1580 ONE_MORE_BASE_BYTE (c1);
1582 if (status->utf8_shifting)
1585 int bytes = CHAR_BYTES_BY_HEAD (c1);
1589 for (i = 1; i < bytes; i++)
1594 this_charset = UTF8_CHARSET (buf);
1595 c1 = STRING_CHAR_UTF8 (buf);
1599 if (status->non_standard_encoding > 0)
1603 this_charset = status->non_standard_charset;
1604 for (i = 1; i < status->non_standard_charset_bytes; i++)
1607 c1 = (c1 << 8) | c2;
1609 c1 = DECODE_CHAR (this_charset, c1);
1613 switch (iso_2022_code_class[c1])
1615 case ISO_graphic_plane_0:
1616 this_charset = charset0;
1619 case ISO_0x20_or_0x7F:
1621 || (charset0->code_range[0] != 32
1622 && charset0->code_range[1] != 255))
1623 /* This is SPACE or DEL. */
1624 this_charset = mcharset__ascii;
1626 /* This is a graphic character of plane 0. */
1627 this_charset = charset0;
1630 case ISO_graphic_plane_1:
1633 this_charset = charset1;
1636 case ISO_0xA0_or_0xFF:
1638 || charset1->code_range[0] == 33
1639 || ! (spec->flags & MCODING_ISO_EIGHT_BIT))
1641 /* This is a graphic character of plane 1. */
1644 this_charset = charset1;
1648 this_charset = mcharset__ascii;
1655 if ((spec->flags & MCODING_ISO_LOCKING_SHIFT)
1656 && status->designation[1])
1658 status->invocation[0] = 1;
1659 charset0 = status->designation[1];
1662 this_charset = mcharset__ascii;
1666 if (spec->flags & MCODING_ISO_LOCKING_SHIFT)
1668 status->invocation[0] = 0;
1669 charset0 = status->designation[0];
1672 this_charset = mcharset__ascii;
1675 case ISO_single_shift_2_7:
1676 if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT_7))
1678 this_charset = mcharset__ascii;
1682 goto label_escape_sequence;
1684 case ISO_single_shift_2:
1685 if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
1688 if (c1 < 0xA1 || (c1 > 0xA7 && c1 < 0xAF) || c1 > 0xAF
1689 || ! cns_charsets[c1 - 0xA1])
1691 status->designation[2] = cns_charsets[c1 - 0xA1];
1693 else if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT))
1695 /* SS2 is handled as an escape sequence of ESC 'N' */
1697 goto label_escape_sequence;
1699 case ISO_single_shift_3:
1700 if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT))
1702 /* SS2 is handled as an escape sequence of ESC 'O' */
1704 goto label_escape_sequence;
1706 case ISO_control_sequence_introducer:
1707 /* CSI is handled as an escape sequence of ESC '[' ... */
1709 goto label_escape_sequence;
1712 if (! spec->use_esc)
1714 this_charset = mcharset__ascii;
1718 label_escape_sequence:
1719 /* Escape sequences handled here are invocation,
1720 designation, and direction specification. */
1723 case '&': /* revision of following character set */
1724 if (! (spec->flags & MCODING_ISO_DESIGNATION_MASK))
1725 goto unused_escape_sequence;
1727 if (c1 < '@' || c1 > '~')
1730 if (c1 != ISO_CODE_ESC)
1733 goto label_escape_sequence;
1735 case '$': /* designation of 2-byte character set */
1736 if (! (spec->flags & MCODING_ISO_DESIGNATION_MASK))
1737 goto unused_escape_sequence;
1739 if (c1 >= '@' && c1 <= 'B')
1740 { /* designation of JISX0208.1978, GB2312.1980, or
1742 ISO2022_DECODE_DESIGNATION (0, 2, 94, c1, -1);
1744 else if (c1 >= 0x28 && c1 <= 0x2B)
1745 { /* designation of (dimension 2, chars 94) character set */
1747 ISO2022_DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2, -1);
1749 else if (c1 >= 0x2C && c1 <= 0x2F)
1750 { /* designation of (dimension 2, chars 96) character set */
1752 ISO2022_DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2, -1);
1756 /* We must update these variables now. */
1757 charset0 = status->designation[status->invocation[0]];
1758 charset1 = status->designation[status->invocation[1]];
1761 case 'n': /* invocation of locking-shift-2 */
1762 if (! (spec->flags & MCODING_ISO_LOCKING_SHIFT)
1763 || ! status->designation[2])
1765 status->invocation[0] = 2;
1766 charset0 = status->designation[2];
1769 case 'o': /* invocation of locking-shift-3 */
1770 if (! (spec->flags & MCODING_ISO_LOCKING_SHIFT)
1771 || ! status->designation[3])
1773 status->invocation[0] = 3;
1774 charset0 = status->designation[3];
1777 case 'N': /* invocation of single-shift-2 */
1778 if (! ((spec->flags & MCODING_ISO_SINGLE_SHIFT)
1779 || (spec->flags & MCODING_ISO_EUC_TW_SHIFT))
1780 || ! status->designation[2])
1782 this_charset = status->designation[2];
1784 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1788 case 'O': /* invocation of single-shift-3 */
1789 if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT)
1790 || ! status->designation[3])
1792 this_charset = status->designation[3];
1794 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1798 case '[': /* specification of direction */
1799 if (! (spec->flags & MCODING_ISO_ISO6429))
1801 /* For the moment, nested direction is not supported.
1802 So, (coding->mode & CODING_MODE_DIRECTION) zero means
1803 left-to-right, and nonzero means right-to-left. */
1807 case ']': /* end of the current direction */
1808 case '0': /* end of the current direction */
1812 case '1': /* start of left-to-right direction */
1819 case '2': /* start of right-to-left direction */
1833 char charset_name[16];
1837 if (! spec->flags & MCODING_ISO_DESIGNATION_CTEXT_EXT)
1839 /* Compound-text uses these escape sequences:
1841 ESC % G -- utf-8 bytes -- ESC % @
1842 ESC % / 1 M L -- charset name -- STX -- bytes --
1843 ESC % / 2 M L -- charset name -- STX -- bytes --
1844 ESC % / 3 M L -- charset name -- STX -- bytes --
1845 ESC % / 4 M L -- charset name -- STX -- bytes --
1847 It also uses this sequence but that is not yet
1850 ESC % / 0 M L -- charset name -- STX -- bytes -- */
1855 status->utf8_shifting = 1;
1860 if (! status->utf8_shifting)
1862 status->utf8_shifting = 0;
1868 if (c1 < '1' || c1 > '4')
1870 status->non_standard_charset_bytes = c1 - '0';
1873 if (c1 < 128 || c2 < 128)
1875 bytes = (c1 - 128) * 128 + (c2 - 128);
1876 for (i = 0; i < 16; i++)
1879 if (c1 == ISO_CODE_STX)
1881 charset_name[i] = TOLOWER (c1);
1885 charset_name[i++] = '\0';
1886 this_charset = find_ctext_non_standard_charset (charset_name);
1889 status->non_standard_charset = this_charset;
1890 status->non_standard_encoding = bytes - i;
1895 if (! (spec->flags & MCODING_ISO_DESIGNATION_MASK))
1896 goto unused_escape_sequence;
1897 if (c1 >= 0x28 && c1 <= 0x2B)
1898 { /* designation of (dimension 1, chars 94) charset */
1900 ISO2022_DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2, -1);
1902 else if (c1 >= 0x2C && c1 <= 0x2F)
1903 { /* designation of (dimension 1, chars 96) charset */
1905 ISO2022_DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2, -1);
1909 /* We must update these variables now. */
1910 charset0 = status->designation[status->invocation[0]];
1911 charset1 = status->designation[status->invocation[1]];
1914 unused_escape_sequence:
1915 UNGET_ONE_BYTE (c1);
1917 this_charset = mcharset__ascii;
1921 if (this_charset->dimension == 1)
1923 if (this_charset->code_range[1] <= 128)
1926 else if (this_charset->dimension == 2)
1929 c1 = ((c1 & 0x7F) << 8) | (c2 & 0x7F);
1931 else /* i.e. (dimension == 3) */
1935 c1 = ((c1 & 0x7F) << 16) | ((c2 & 0x7F) << 8) | (c3 & 0x7F);
1937 c1 = DECODE_CHAR (this_charset, c1);
1941 if (! converter->lenient)
1943 REWIND_SRC_TO_BASE ();
1945 this_charset = mcharset__binary;
1948 if (this_charset != mcharset__ascii
1949 && this_charset != charset)
1951 TAKEIN_CHARS (mt, nchars - last_nchars,
1952 dst - (mt->data + mt->nbytes), charset);
1953 charset = this_charset;
1954 last_nchars = nchars;
1957 if (status->non_standard_encoding > 0)
1958 status->non_standard_encoding -= status->non_standard_charset_bytes;
1960 /* We reach here because of an invalid byte. */
1966 TAKEIN_CHARS (mt, nchars - last_nchars,
1967 dst - (mt->data + mt->nbytes), charset);
1968 return finish_decoding (mt, converter, nchars,
1969 source, src_end, src_base, error);
1973 /* Produce codes (escape sequence) for designating CHARSET to graphic
1974 register REG at DST, and increment DST. If CHARSET->final-char is
1975 '@', 'A', or 'B' and SHORT_FORM is nonzero, produce designation
1976 sequence of short-form. Update STATUS->designation. */
1978 #define ISO2022_ENCODE_DESIGNATION(reg, charset, spec, status) \
1980 char *intermediate_char_94 = "()*+"; \
1981 char *intermediate_char_96 = ",-./"; \
1983 if (dst + 4 > dst_end) \
1984 goto memory_shortage; \
1985 *dst++ = ISO_CODE_ESC; \
1986 if (charset->dimension == 1) \
1988 if (charset->code_range[0] != 32 \
1989 && charset->code_range[1] != 255) \
1990 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
1992 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
1997 if (charset->code_range[0] != 32 \
1998 && charset->code_range[1] != 255) \
2000 if (spec->flags & MCODING_ISO_LONG_FORM \
2002 || charset->final_byte < '@' || charset->final_byte > 'B') \
2003 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
2006 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
2008 *dst++ = charset->final_byte; \
2010 status->designation[reg] = charset; \
2014 /* The following two macros produce codes (control character or escape
2015 sequence) for ISO-2022 single-shift functions (single-shift-2 and
2018 #define ISO2022_ENCODE_SINGLE_SHIFT_2(spec, status) \
2020 if (dst + 2 > dst_end) \
2021 goto memory_shortage; \
2022 if (! (spec->flags & MCODING_ISO_EIGHT_BIT)) \
2023 *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \
2025 *dst++ = ISO_CODE_SS2; \
2026 status->single_shifting = 1; \
2030 #define ISO2022_ENCODE_SINGLE_SHIFT_3(spec, status) \
2032 if (dst + 2 > dst_end) \
2033 goto memory_shortage; \
2034 if (! (spec->flags & MCODING_ISO_EIGHT_BIT)) \
2035 *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \
2037 *dst++ = ISO_CODE_SS3; \
2038 status->single_shifting = 1; \
2042 /* The following four macros produce codes (control character or
2043 escape sequence) for ISO-2022 locking-shift functions (shift-in,
2044 shift-out, locking-shift-2, and locking-shift-3). */
2046 #define ISO2022_ENCODE_SHIFT_IN(status) \
2048 if (dst + 1 > dst_end) \
2049 goto memory_shortage; \
2050 *dst++ = ISO_CODE_SI; \
2051 status->invocation[0] = 0; \
2055 #define ISO2022_ENCODE_SHIFT_OUT(status) \
2057 if (dst + 1 > dst_end) \
2058 goto memory_shortage; \
2059 *dst++ = ISO_CODE_SO; \
2060 status->invocation[0] = 1; \
2064 #define ISO2022_ENCODE_LOCKING_SHIFT_2(status) \
2066 if (dst + 2 > dst_end) \
2067 goto memory_shortage; \
2068 *dst++ = ISO_CODE_ESC, *dst++ = 'n'; \
2069 status->invocation[0] = 2; \
2073 #define ISO2022_ENCODE_LOCKING_SHIFT_3(status) \
2075 if (dst + 2 > dst_end) \
2076 goto memory_shortage; \
2077 *dst++ = ISO_CODE_ESC, *dst++ = 'o'; \
2078 status->invocation[0] = 3; \
2081 #define ISO2022_ENCODE_UTF8_SHIFT_START(len) \
2083 CHECK_DST (3 + len); \
2084 *dst++ = ISO_CODE_ESC; \
2087 status->utf8_shifting = 1; \
2091 #define ISO2022_ENCODE_UTF8_SHIFT_END() \
2094 *dst++ = ISO_CODE_ESC; \
2097 status->utf8_shifting = 0; \
2101 #define ISO2022_ENCODE_NON_STANDARD(name, len) \
2103 CHECK_DST (6 + len + 1 + non_standard_charset_bytes); \
2104 non_standard_begin = dst; \
2105 *dst++ = ISO_CODE_ESC; \
2108 *dst++ = '0' + non_standard_charset_bytes; \
2109 *dst++ = 0, *dst++ = 0; /* filled later */ \
2110 memcpy (dst, name, len); \
2112 *dst++ = ISO_CODE_STX; \
2113 non_standard_bytes = len + 1; \
2118 find_ctext_non_standard_name (MCharset *charset, int *bytes)
2120 char *name = msymbol_name (charset->name);
2122 if (! strcmp (name, "koi8-r"))
2124 else if (! strcmp (name, "big5"))
2125 name = "big5-0", *bytes = 2;
2131 /* Designate CHARSET to a graphic register specified in
2132 SPEC->designation. If the register is not yet invoked to graphic
2133 left not right, invoke it to graphic left. DSTP points to a
2134 variable containing a memory address where the output must go.
2135 DST_END is the limit of that memory.
2137 Return 0 if it succeeds. Return -1 otherwise, which means that the
2138 memory area is too short. By side effect, update the variable that
2142 iso_2022_designate_invoke_charset (MCodingSystem *coding,
2144 struct iso_2022_spec *spec,
2145 struct iso_2022_status *status,
2146 unsigned char **dstp,
2147 unsigned char *dst_end)
2150 unsigned char *dst = *dstp;
2152 for (i = 0; i < 4; i++)
2153 if (charset == status->designation[i])
2158 /* CHARSET is not yet designated to any graphic registers. */
2159 for (i = 0; i < coding->ncharsets; i++)
2160 if (charset == coding->charsets[i])
2162 if (i == coding->ncharsets)
2164 for (i = 0; i < mcharset__iso_2022_table.used; i++)
2165 if (charset == mcharset__iso_2022_table.charsets[i])
2167 i += coding->ncharsets;
2169 i = spec->designations[i];
2170 ISO2022_ENCODE_DESIGNATION (i, charset, spec, status);
2173 if (status->invocation[0] != i
2174 && status->invocation[1] != i)
2176 /* Graphic register I is not yet invoked. */
2179 case 0: /* graphic register 0 */
2180 ISO2022_ENCODE_SHIFT_IN (status);
2183 case 1: /* graphic register 1 */
2184 ISO2022_ENCODE_SHIFT_OUT (status);
2187 case 2: /* graphic register 2 */
2188 if (spec->flags & MCODING_ISO_SINGLE_SHIFT)
2189 ISO2022_ENCODE_SINGLE_SHIFT_2 (spec, status);
2191 ISO2022_ENCODE_LOCKING_SHIFT_2 (status);
2194 case 3: /* graphic register 3 */
2195 if (spec->flags & MCODING_ISO_SINGLE_SHIFT)
2196 ISO2022_ENCODE_SINGLE_SHIFT_3 (spec, status);
2198 ISO2022_ENCODE_LOCKING_SHIFT_3 (status);
2211 /* Reset the invocation/designation status to the initial one. SPEC
2212 and STATUS contain information about the current and initial
2213 invocation /designation status respectively. DSTP points to a
2214 variable containing a memory address where the output must go.
2215 DST_END is the limit of that memory.
2217 Return 0 if it succeeds. Return -1 otherwise, which means that the
2218 memory area is too short. By side effect, update the variable that
2222 iso_2022_reset_invocation_designation (struct iso_2022_spec *spec,
2223 struct iso_2022_status *status,
2224 unsigned char **dstp,
2225 unsigned char *dst_end)
2227 unsigned char *dst = *dstp;
2230 /* Reset the invocation status of GL. We have not yet supported GR
2232 if (status->invocation[0] != spec->initial_invocation[0]
2233 && spec->initial_invocation[0] >= 0)
2235 if (spec->initial_invocation[0] == 0)
2236 ISO2022_ENCODE_SHIFT_IN (status);
2237 else if (spec->initial_invocation[0] == 1)
2238 ISO2022_ENCODE_SHIFT_OUT (status);
2239 else if (spec->initial_invocation[0] == 2)
2240 ISO2022_ENCODE_LOCKING_SHIFT_2 (status);
2241 else /* i.e. spec->initial_invocation[0] == 3 */
2242 ISO2022_ENCODE_LOCKING_SHIFT_3 (status);
2245 /* Reset the designation status of G0..G3. */
2246 for (i = 0; i < 4; i++)
2247 if (status->designation[i] != spec->initial_designation[i]
2248 && spec->initial_designation[i])
2250 MCharset *charset = spec->initial_designation[i];
2252 ISO2022_ENCODE_DESIGNATION (i, charset, spec, status);
2265 encode_coding_iso_2022 (MText *mt, int from, int to,
2266 unsigned char *destination, int dst_bytes,
2267 MConverter *converter)
2269 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2270 MCodingSystem *coding = internal->coding;
2271 unsigned char *src, *src_end;
2272 unsigned char *dst = destination;
2273 unsigned char *dst_end = dst + dst_bytes;
2275 unsigned char *dst_base;
2276 struct iso_2022_spec *spec = (struct iso_2022_spec *) coding->extra_spec;
2277 int full_support = spec->flags & MCODING_ISO_FULL_SUPPORT;
2278 struct iso_2022_status *status
2279 = (struct iso_2022_status *) &(converter->status);
2280 MCharset *primary, *charset0, *charset1;
2281 int next_primary_change;
2282 int ncharsets = coding->ncharsets;
2283 MCharset **charsets = coding->charsets;
2284 MCharset *cns_charsets[15];
2285 int ascii_compatible = coding->ascii_compatible;
2286 MCharset *non_standard_charset = NULL;
2287 int non_standard_charset_bytes = 0;
2288 int non_standard_bytes = 0;
2289 unsigned char *non_standard_begin = NULL;
2290 enum MTextFormat format = mt->format;
2292 SET_SRC (mt, format, from, to);
2294 if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
2298 memset (cns_charsets, 0, sizeof (cns_charsets));
2299 for (i = 0; i < ncharsets; i++)
2300 if (charsets[i]->dimension == 2)
2302 int final = charsets[i]->final_byte;
2304 if (final >= 'G' && final <= 'M')
2305 cns_charsets[final - 'G'] = charsets[i];
2307 cns_charsets[14] = charsets[i];
2311 next_primary_change = from;
2313 charset0 = status->designation[status->invocation[0]];
2314 charset1 = (status->invocation[1] < 0 ? NULL
2315 : status->designation[status->invocation[1]]);
2322 ONE_MORE_CHAR (c, bytes, format);
2324 if (c < 128 && ascii_compatible)
2326 if (status->utf8_shifting)
2327 ISO2022_ENCODE_UTF8_SHIFT_END ();
2331 else if (c <= 32 || c == 127)
2333 if (status->utf8_shifting)
2334 ISO2022_ENCODE_UTF8_SHIFT_END ();
2335 if (spec->flags & MCODING_ISO_RESET_AT_CNTL
2336 || (c == '\n' && spec->flags & MCODING_ISO_RESET_AT_EOL))
2338 if (iso_2022_reset_invocation_designation (spec, status,
2340 goto insufficient_destination;
2341 charset0 = status->designation[status->invocation[0]];
2342 charset1 = (status->invocation[1] < 0 ? NULL
2343 : status->designation[status->invocation[1]]);
2350 unsigned code = MCHAR_INVALID_CODE;
2351 MCharset *charset = NULL;
2353 int pos = from + nchars;
2355 if (pos >= next_primary_change)
2357 MSymbol primary_charset
2358 = (MSymbol) mtext_get_prop (mt, pos, Mcharset);
2359 primary = MCHARSET (primary_charset);
2360 if (primary && primary != mcharset__binary)
2362 if (primary->final_byte <= 0)
2364 else if (! full_support)
2368 for (i = 0; i < ncharsets; i++)
2369 if (primary == charsets[i])
2376 mtext_prop_range (mt, Mcharset, pos,
2377 NULL, &next_primary_change, 0);
2380 if (primary && primary != mcharset__binary)
2382 code = ENCODE_CHAR (primary, c);
2383 if (code != MCHAR_INVALID_CODE)
2388 if (c <= 32 || c == 127)
2391 charset = mcharset__ascii;
2397 for (i = 0; i < ncharsets; i++)
2399 charset = charsets[i];
2400 code = ENCODE_CHAR (charset, c);
2401 if (code != MCHAR_INVALID_CODE)
2406 if (spec->flags & MCODING_ISO_FULL_SUPPORT)
2408 for (i = 0; i < mcharset__iso_2022_table.used; i++)
2410 charset = mcharset__iso_2022_table.charsets[i];
2411 code = ENCODE_CHAR (charset, c);
2412 if (code != MCHAR_INVALID_CODE)
2415 if (i == mcharset__iso_2022_table.used)
2417 if (spec->flags & MCODING_ISO_DESIGNATION_CTEXT_EXT)
2418 goto unsupported_char;
2419 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
2424 goto unsupported_char;
2430 && (charset->final_byte >= 0
2431 || spec->flags & MCODING_ISO_EUC_TW_SHIFT))
2433 if (code >= 0x80 && code < 0xA0)
2434 goto unsupported_char;
2436 if (status->utf8_shifting)
2437 ISO2022_ENCODE_UTF8_SHIFT_END ();
2438 if (charset == charset0)
2440 else if (charset == charset1)
2444 unsigned char *p = NULL;
2446 if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
2450 if (cns_charsets[0] == charset)
2456 for (i = 1; i < 15; i++)
2457 if (cns_charsets[i] == charset)
2460 *dst++ = ISO_CODE_SS2;
2463 status->single_shifting = 1;
2468 if (iso_2022_designate_invoke_charset
2469 (coding, charset, spec, status, &dst, dst_end) < 0)
2470 goto insufficient_destination;
2471 charset0 = status->designation[status->invocation[0]];
2472 charset1 = (status->invocation[1] < 0 ? NULL
2473 : status->designation[status->invocation[1]]);
2475 if (status->single_shifting)
2477 = (spec->flags & MCODING_ISO_EIGHT_BIT) ? 0x80 : 0;
2478 else if (charset == charset0)
2483 if (charset->dimension == 1)
2486 *dst++ = code | gr_mask;
2488 else if (charset->dimension == 2)
2491 *dst++ = (code >> 8) | gr_mask;
2492 *dst++ = (code & 0xFF) | gr_mask;
2497 *dst++ = (code >> 16) | gr_mask;
2498 *dst++ = ((code >> 8) & 0xFF) | gr_mask;
2499 *dst++ = (code & 0xFF) | gr_mask;
2501 status->single_shifting = 0;
2503 else if (charset && spec->flags & MCODING_ISO_DESIGNATION_CTEXT_EXT)
2505 if (charset != non_standard_charset)
2507 char *name = (find_ctext_non_standard_name
2508 (charset, &non_standard_charset_bytes));
2512 int len = strlen (name);
2514 ISO2022_ENCODE_NON_STANDARD (name, len);
2515 non_standard_charset = charset;
2518 non_standard_charset = NULL;
2521 if (non_standard_charset)
2523 if (dst + non_standard_charset_bytes > dst_end)
2524 goto insufficient_destination;
2525 non_standard_bytes += non_standard_charset_bytes;
2526 non_standard_begin[4] = (non_standard_bytes / 128) | 0x80;
2527 non_standard_begin[5] = (non_standard_bytes % 128) | 0x80;
2528 if (non_standard_charset_bytes == 1)
2530 else if (non_standard_charset_bytes == 2)
2531 *dst++ = code >> 8, *dst++ = code & 0xFF;
2532 else if (non_standard_charset_bytes == 3)
2533 *dst++ = code >> 16, *dst++ = (code >> 8) & 0xFF,
2534 *dst++ = code & 0xFF;
2535 else /* i.e non_standard_charset_bytes == 3 */
2536 *dst++ = code >> 24, *dst++ = (code >> 16) & 0xFF,
2537 *dst++ = (code >> 8) & 0xFF, *dst++ = code & 0xFF;
2541 int len = CHAR_BYTES (c);
2544 goto unsupported_char;
2545 if (! status->utf8_shifting)
2546 ISO2022_ENCODE_UTF8_SHIFT_START (len);
2549 CHAR_STRING (c, dst);
2553 goto unsupported_char;
2563 if (iso_2022_designate_invoke_charset (coding, mcharset__ascii,
2566 goto insufficient_destination;
2567 if (! converter->lenient)
2569 len = encode_unsupporeted_char (c, dst, dst_end, mt, from + nchars);
2571 goto insufficient_destination;
2577 /* We reach here because of an unsupported char. */
2578 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
2581 insufficient_destination:
2583 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
2586 if (converter->result == MCONVERSION_RESULT_SUCCESS
2587 && converter->last_block)
2589 if (status->utf8_shifting)
2591 ISO2022_ENCODE_UTF8_SHIFT_END ();
2594 if (spec->flags & MCODING_ISO_RESET_AT_EOL
2595 && charset0 != spec->initial_designation[0])
2597 if (iso_2022_reset_invocation_designation (spec, status,
2599 goto insufficient_destination;
2602 converter->nchars += nchars;
2603 converter->nbytes += dst - destination;
2604 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
2608 /* Staffs for coding-systems of type MCODING_TYPE_MISC. */
2610 /* For SJIS handling... */
2612 #define SJIS_TO_JIS(s1, s2) \
2614 ? (((s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0)) << 8) \
2616 : (((s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1)) << 8) \
2617 | (s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F))))
2619 #define JIS_TO_SJIS(c1, c2) \
2621 ? (((c1 / 2 + ((c1 < 0x5F) ? 0x71 : 0xB1)) << 8) \
2622 | (c2 + ((c2 >= 0x60) ? 0x20 : 0x1F))) \
2623 : (((c1 / 2 + ((c1 < 0x5F) ? 0x70 : 0xB0)) << 8) \
2628 reset_coding_sjis (MConverter *converter)
2630 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2631 MCodingSystem *coding = internal->coding;
2633 if (! coding->ready)
2635 MSymbol kanji_sym = msymbol ("jisx0208.1983");
2636 MCharset *kanji = MCHARSET (kanji_sym);
2637 MSymbol kana_sym = msymbol ("jisx0201-kana");
2638 MCharset *kana = MCHARSET (kana_sym);
2640 if (! kanji || ! kana)
2642 coding->ncharsets = 3;
2643 coding->charsets[1] = kanji;
2644 coding->charsets[2] = kana;
2651 decode_coding_sjis (const unsigned char *source, int src_bytes, MText *mt,
2652 MConverter *converter)
2654 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2655 MCodingSystem *coding = internal->coding;
2656 const unsigned char *src = internal->carryover;
2657 const unsigned char *src_stop = src + internal->carryover_bytes;
2658 const unsigned char *src_end = source + src_bytes;
2659 const unsigned char *src_base;
2660 unsigned char *dst = mt->data + mt->nbytes;
2661 unsigned char *dst_end = mt->data + mt->allocated - MAX_UTF8_CHAR_BYTES;
2663 int last_nchars = 0;
2664 int at_most = converter->at_most > 0 ? converter->at_most : -1;
2666 MCharset *charset_roman = coding->charsets[0];
2667 MCharset *charset_kanji = coding->charsets[1];
2668 MCharset *charset_kana = coding->charsets[2];
2669 MCharset *charset = mcharset__ascii;
2674 MCharset *this_charset;
2677 ONE_MORE_BASE_BYTE (c1);
2682 this_charset = ((c1 <= 0x20 || c1 == 0x7F)
2686 else if ((c1 >= 0x81 && c1 <= 0x9F) || (c1 >= 0xE0 && c1 <= 0xEF))
2689 if ((c2 >= 0x40 && c2 <= 0x7F) || (c2 >= 80 && c2 <= 0xFC))
2691 this_charset = charset_kanji;
2692 c1 = SJIS_TO_JIS (c1, c2);
2697 else if (c1 >= 0xA1 && c1 <= 0xDF)
2699 this_charset = charset_kana;
2705 c = DECODE_CHAR (this_charset, c1);
2710 if (! converter->lenient)
2712 REWIND_SRC_TO_BASE ();
2714 this_charset = mcharset__binary;
2717 if (this_charset != mcharset__ascii
2718 && this_charset != charset)
2720 TAKEIN_CHARS (mt, nchars - last_nchars,
2721 dst - (mt->data + mt->nbytes), charset);
2722 charset = this_charset;
2723 last_nchars = nchars;
2727 /* We reach here because of an invalid byte. */
2731 TAKEIN_CHARS (mt, nchars - last_nchars,
2732 dst - (mt->data + mt->nbytes), charset);
2733 return finish_decoding (mt, converter, nchars,
2734 source, src_end, src_base, error);
2738 encode_coding_sjis (MText *mt, int from, int to,
2739 unsigned char *destination, int dst_bytes,
2740 MConverter *converter)
2742 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2743 MCodingSystem *coding = internal->coding;
2744 unsigned char *src, *src_end;
2745 unsigned char *dst = destination;
2746 unsigned char *dst_end = dst + dst_bytes;
2748 MCharset *charset_roman = coding->charsets[0];
2749 MCharset *charset_kanji = coding->charsets[1];
2750 MCharset *charset_kana = coding->charsets[2];
2751 enum MTextFormat format = mt->format;
2753 SET_SRC (mt, format, from, to);
2760 ONE_MORE_CHAR (c, bytes, format);
2762 if (c <= 0x20 || c == 0x7F)
2769 if ((code = ENCODE_CHAR (charset_roman, c)) != MCHAR_INVALID_CODE)
2774 else if ((code = ENCODE_CHAR (charset_kanji, c))
2775 != MCHAR_INVALID_CODE)
2777 int c1 = code >> 8, c2 = code & 0xFF;
2778 code = JIS_TO_SJIS (c1, c2);
2781 *dst++ = code & 0xFF;
2783 else if ((code = ENCODE_CHAR (charset_kana, c))
2784 != MCHAR_INVALID_CODE)
2787 *dst++ = code | 0x80;
2791 if (! converter->lenient)
2793 len = encode_unsupporeted_char (c, dst, dst_end,
2796 goto insufficient_destination;
2803 /* We reach here because of an unsupported char. */
2804 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
2807 insufficient_destination:
2808 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
2811 converter->nchars += nchars;
2812 converter->nbytes += dst - destination;
2813 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
2817 static MCodingSystem *
2818 find_coding (MSymbol name)
2820 MCodingSystem *coding = (MCodingSystem *) msymbol_get (name, Mcoding);
2825 MSymbol sym = msymbol__canonicalize (name);
2827 plist = mplist_find_by_key (coding_definition_list, sym);
2830 pl = MPLIST_PLIST (plist);
2831 name = MPLIST_VAL (pl);
2832 mconv_define_coding (MSYMBOL_NAME (name), MPLIST_NEXT (pl),
2833 NULL, NULL, NULL, NULL);
2834 coding = (MCodingSystem *) msymbol_get (name, Mcoding);
2835 plist = mplist_pop (plist);
2836 M17N_OBJECT_UNREF (plist);
2841 #define BINDING_NONE 0
2842 #define BINDING_BUFFER 1
2843 #define BINDING_STREAM 2
2845 #define CONVERT_WORKSIZE 0x10000
2851 mcoding__init (void)
2854 MPlist *param, *charsets, *pl;
2856 MLIST_INIT1 (&coding_list, codings, 128);
2857 coding_definition_list = mplist ();
2859 /* ISO-2022 specific initialize routine. */
2860 for (i = 0; i < 0x20; i++)
2861 iso_2022_code_class[i] = ISO_control_0;
2862 for (i = 0x21; i < 0x7F; i++)
2863 iso_2022_code_class[i] = ISO_graphic_plane_0;
2864 for (i = 0x80; i < 0xA0; i++)
2865 iso_2022_code_class[i] = ISO_control_1;
2866 for (i = 0xA1; i < 0xFF; i++)
2867 iso_2022_code_class[i] = ISO_graphic_plane_1;
2868 iso_2022_code_class[0x20] = iso_2022_code_class[0x7F] = ISO_0x20_or_0x7F;
2869 iso_2022_code_class[0xA0] = iso_2022_code_class[0xFF] = ISO_0xA0_or_0xFF;
2870 iso_2022_code_class[0x0E] = ISO_shift_out;
2871 iso_2022_code_class[0x0F] = ISO_shift_in;
2872 iso_2022_code_class[0x19] = ISO_single_shift_2_7;
2873 iso_2022_code_class[0x1B] = ISO_escape;
2874 iso_2022_code_class[0x8E] = ISO_single_shift_2;
2875 iso_2022_code_class[0x8F] = ISO_single_shift_3;
2876 iso_2022_code_class[0x9B] = ISO_control_sequence_introducer;
2878 Mcoding = msymbol ("coding");
2880 Mutf = msymbol ("utf");
2881 Miso_2022 = msymbol ("iso-2022");
2883 Mreset_at_eol = msymbol ("reset-at-eol");
2884 Mreset_at_cntl = msymbol ("reset-at-cntl");
2885 Meight_bit = msymbol ("eight-bit");
2886 Mlong_form = msymbol ("long-form");
2887 Mdesignation_g0 = msymbol ("designation-g0");
2888 Mdesignation_g1 = msymbol ("designation-g1");
2889 Mdesignation_ctext = msymbol ("designation-ctext");
2890 Mdesignation_ctext_ext = msymbol ("designation-ctext-ext");
2891 Mlocking_shift = msymbol ("locking-shift");
2892 Msingle_shift = msymbol ("single-shift");
2893 Msingle_shift_7 = msymbol ("single-shift-7");
2894 Meuc_tw_shift = msymbol ("euc-tw-shift");
2895 Miso_6429 = msymbol ("iso-6429");
2896 Mrevision_number = msymbol ("revision-number");
2897 Mfull_support = msymbol ("full-support");
2898 Mmaybe = msymbol ("maybe");
2900 Mtype = msymbol ("type");
2901 Mcharsets = msymbol_as_managing_key ("charsets");
2902 Mflags = msymbol_as_managing_key ("flags");
2903 Mdesignation = msymbol_as_managing_key ("designation");
2904 Minvocation = msymbol_as_managing_key ("invocation");
2905 Mcode_unit = msymbol ("code-unit");
2906 Mbom = msymbol ("bom");
2907 Mlittle_endian = msymbol ("little-endian");
2910 charsets = mplist ();
2912 /* Setup predefined codings. */
2913 mplist_set (charsets, Msymbol, Mcharset_ascii);
2914 pl = mplist_add (pl, Mtype, Mcharset);
2915 pl = mplist_add (pl, Mcharsets, charsets);
2916 Mcoding_us_ascii = mconv_define_coding ("us-ascii", param,
2917 NULL, NULL, NULL, NULL);
2920 MSymbol alias = msymbol ("ANSI_X3.4-1968");
2921 MCodingSystem *coding
2922 = (MCodingSystem *) msymbol_get (Mcoding_us_ascii, Mcoding);
2924 msymbol_put (alias, Mcoding, coding);
2925 alias = msymbol__canonicalize (alias);
2926 msymbol_put (alias, Mcoding, coding);
2929 mplist_set (charsets, Msymbol, Mcharset_iso_8859_1);
2930 Mcoding_iso_8859_1 = mconv_define_coding ("iso-8859-1", param,
2931 NULL, NULL, NULL, NULL);
2933 mplist_set (charsets, Msymbol, Mcharset_m17n);
2934 mplist_put (param, Mtype, Mutf);
2935 mplist_put (param, Mcode_unit, (void *) 8);
2936 Mcoding_utf_8_full = mconv_define_coding ("utf-8-full", param,
2937 NULL, NULL, NULL, NULL);
2939 mplist_set (charsets, Msymbol, Mcharset_unicode);
2940 Mcoding_utf_8 = mconv_define_coding ("utf-8", param,
2941 NULL, NULL, NULL, NULL);
2943 mplist_put (param, Mcode_unit, (void *) 16);
2944 mplist_put (param, Mbom, Mmaybe);
2945 #ifndef WORDS_BIGENDIAN
2946 mplist_put (param, Mlittle_endian, Mt);
2948 Mcoding_utf_16 = mconv_define_coding ("utf-16", param,
2949 NULL, NULL, NULL, NULL);
2951 mplist_put (param, Mcode_unit, (void *) 32);
2952 Mcoding_utf_32 = mconv_define_coding ("utf-32", param,
2953 NULL, NULL, NULL, NULL);
2955 mplist_put (param, Mcode_unit, (void *) 16);
2956 mplist_put (param, Mbom, Mnil);
2957 mplist_put (param, Mlittle_endian, Mnil);
2958 Mcoding_utf_16be = mconv_define_coding ("utf-16be", param,
2959 NULL, NULL, NULL, NULL);
2961 mplist_put (param, Mcode_unit, (void *) 32);
2962 Mcoding_utf_32be = mconv_define_coding ("utf-32be", param,
2963 NULL, NULL, NULL, NULL);
2965 mplist_put (param, Mcode_unit, (void *) 16);
2966 mplist_put (param, Mlittle_endian, Mt);
2967 Mcoding_utf_16le = mconv_define_coding ("utf-16le", param,
2968 NULL, NULL, NULL, NULL);
2970 mplist_put (param, Mcode_unit, (void *) 32);
2971 Mcoding_utf_32le = mconv_define_coding ("utf-32le", param,
2972 NULL, NULL, NULL, NULL);
2974 mplist_put (param, Mtype, Mnil);
2975 mplist_set (charsets, Msymbol, Mcharset_ascii);
2976 Mcoding_sjis = mconv_define_coding ("sjis", param,
2979 encode_coding_sjis, NULL);
2981 M17N_OBJECT_UNREF (charsets);
2982 M17N_OBJECT_UNREF (param);
2988 mcoding__fini (void)
2993 for (i = 0; i < coding_list.used; i++)
2995 MCodingSystem *coding = coding_list.codings[i];
2997 if (coding->extra_info)
2998 free (coding->extra_info);
2999 if (coding->extra_spec)
3001 if (coding->type == Miso_2022)
3002 free (((struct iso_2022_spec *) coding->extra_spec)->designations);
3003 free (coding->extra_spec);
3007 MLIST_FREE1 (&coding_list, codings);
3008 MPLIST_DO (plist, coding_definition_list)
3009 M17N_OBJECT_UNREF (MPLIST_VAL (plist));
3010 M17N_OBJECT_UNREF (coding_definition_list);
3014 mconv__register_charset_coding (MSymbol sym)
3016 MSymbol name = msymbol__canonicalize (sym);
3018 if (! mplist_find_by_key (coding_definition_list, name))
3020 MPlist *param = mplist (), *charsets = mplist ();
3022 mplist_set (charsets, Msymbol, sym);
3023 mplist_add (param, Msymbol, sym);
3024 mplist_add (param, Mtype, Mcharset);
3025 mplist_add (param, Mcharsets, charsets);
3026 mplist_put (coding_definition_list, name, param);
3027 M17N_OBJECT_UNREF (charsets);
3033 mcoding__load_from_database ()
3035 MDatabase *mdb = mdatabase_find (msymbol ("coding-list"), Mnil, Mnil, Mnil);
3036 MPlist *def_list, *plist;
3037 MPlist *definitions = coding_definition_list;
3038 int mdebug_mask = MDEBUG_CODING;
3042 MDEBUG_PUSH_TIME ();
3043 def_list = (MPlist *) mdatabase_load (mdb);
3044 MDEBUG_PRINT_TIME ("CODING", (stderr, " to load the data."));
3049 MDEBUG_PUSH_TIME ();
3050 MPLIST_DO (plist, def_list)
3053 MSymbol name, canonicalized;
3055 if (! MPLIST_PLIST_P (plist))
3056 MERROR (MERROR_CHARSET, -1);
3057 pl = MPLIST_PLIST (plist);
3058 if (! MPLIST_SYMBOL_P (pl))
3059 MERROR (MERROR_CHARSET, -1);
3060 name = MPLIST_SYMBOL (pl);
3061 canonicalized = msymbol__canonicalize (name);
3062 pl = mplist__from_plist (MPLIST_NEXT (pl));
3063 mplist_push (pl, Msymbol, name);
3064 definitions = mplist_add (definitions, canonicalized, pl);
3067 M17N_OBJECT_UNREF (def_list);
3068 MDEBUG_PRINT_TIME ("CODING", (stderr, " to parse the loaded data."));
3074 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
3078 /*** @addtogroup m17nConv */
3082 /***en @name Variables: Symbols representing coding systems */
3083 /***ja @name ÊÑ¿ô: ÄêµÁºÑ¤ß¥³¡¼¥É·Ï¤ò»ØÄꤹ¤ë¤¿¤á¤Î¥·¥ó¥Ü¥ë */
3088 @brief Symbol for the coding system US-ASCII.
3090 The symbol #Mcoding_us_ascii has name <tt>"us-ascii"</tt> and
3091 represents a coding system for the CES US-ASCII. */
3094 @brief US-ASCII ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3096 ¥·¥ó¥Ü¥ë #Mcoding_us_ascii ¤Ï <tt>"us-ascii"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3097 CES US-ASCII ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£
3099 MSymbol Mcoding_us_ascii;
3103 @brief Symbol for the coding system ISO-8859-1.
3105 The symbol #Mcoding_iso_8859_1 has name <tt>"iso-8859-1"</tt> and
3106 represents a coding system for the CES ISO-8859-1. */
3109 @brief ISO-8859-1 ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3111 ¥·¥ó¥Ü¥ë #Mcoding_iso_8859_1 ¤Ï <tt>"iso-8859-1"</tt>
3112 ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢CES ISO-8859-1 ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3114 MSymbol Mcoding_iso_8859_1;
3118 @brief Symbol for the coding system UTF-8.
3120 The symbol #Mcoding_utf_8 has name <tt>"utf-8"</tt> and represents
3121 a coding system for the CES UTF-8. */
3124 @brief UTF-8 ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3126 ¥·¥ó¥Ü¥ë #Mcoding_utf_8 ¤Ï <tt>"utf-8"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢CES
3127 UTF-8 ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£
3130 MSymbol Mcoding_utf_8;
3134 @brief Symbol for the coding system UTF-8-FULL.
3136 The symbol #Mcoding_utf_8_full has name <tt>"utf-8-full"</tt> and
3137 represents a coding system that is a extension of UTF-8. This
3138 coding system uses the same encoding algorithm as UTF-8 but is not
3139 limited to the Unicode characters. It can encode all characters
3140 supported by the m17n library. */
3143 @brief UTF-8-FULL ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3145 ¥·¥ó¥Ü¥ë #Mcoding_utf_8_full ¤Ï <tt>"utf-8-full"</tt>
3146 ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢<tt>"UTF-8"</tt> ¤Î³ÈÄ¥¤Ç¤¢¤ë¥³¡¼¥É·Ï¤ò¼¨¤¹¡£
3147 ¤³¤Î¥³¡¼¥É·Ï¤Ï UTF-8 ¤ÈƱ¤¸¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°¥¢¥ë¥´¥ê¥º¥à¤òÍѤ¤¤ë¤¬¡¢ÂоݤÏ
3148 Unicode ʸ»ú¤Ë¤Ï¸ÂÄꤵ¤ì¤Ê¤¤¡£
3149 ¤Þ¤¿m17n ¥é¥¤¥Ö¥é¥ê¤¬°·¤¦Á´¤Æ¤Îʸ»ú¤ò¥¨¥ó¥³¡¼¥É¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¡£
3152 MSymbol Mcoding_utf_8_full;
3156 @brief Symbol for the coding system UTF-16.
3158 The symbol #Mcoding_utf_16 has name <tt>"utf-16"</tt> and
3159 represents a coding system for the CES UTF-16 (RFC 2279). */
3161 @brief UTF-16 ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3163 ¥·¥ó¥Ü¥ë #Mcoding_utf_16 ¤Ï <tt>"utf-16"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3164 CES UTF-16 (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£
3167 MSymbol Mcoding_utf_16;
3171 @brief Symbol for the coding system UTF-16BE.
3173 The symbol #Mcoding_utf_16be has name <tt>"utf-16be"</tt> and
3174 represents a coding system for the CES UTF-16BE (RFC 2279). */
3177 @brief UTF-16BE ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3179 ¥·¥ó¥Ü¥ë #Mcoding_utf_16be ¤Ï <tt>"utf-16be"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3180 CES UTF-16BE (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3182 MSymbol Mcoding_utf_16be;
3186 @brief Symbol for the coding system UTF-16LE.
3188 The symbol #Mcoding_utf_16le has name <tt>"utf-16le"</tt> and
3189 represents a coding system for the CES UTF-16LE (RFC 2279). */
3192 @brief UTF-16LE ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3194 ¥·¥ó¥Ü¥ë #Mcoding_utf_16le ¤Ï <tt>"utf-16le"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3195 CES UTF-16LE (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3197 MSymbol Mcoding_utf_16le;
3201 @brief Symbol for the coding system UTF-32.
3203 The symbol #Mcoding_utf_32 has name <tt>"utf-32"</tt> and
3204 represents a coding system for the CES UTF-32 (RFC 2279). */
3207 @brief UTF-32 ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3209 ¥·¥ó¥Ü¥ë #Mcoding_utf_32 ¤Ï <tt>"utf-32"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3210 CES UTF-32 (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3212 MSymbol Mcoding_utf_32;
3216 @brief Symbol for the coding system UTF-32BE.
3218 The symbol #Mcoding_utf_32be has name <tt>"utf-32be"</tt> and
3219 represents a coding system for the CES UTF-32BE (RFC 2279). */
3221 @brief UTF-32BE ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3223 ¥·¥ó¥Ü¥ë #Mcoding_utf_32be ¤Ï <tt>"utf-32be"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3224 CES UTF-32BE (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3226 MSymbol Mcoding_utf_32be;
3230 @brief Symbol for the coding system UTF-32LE.
3232 The symbol #Mcoding_utf_32le has name <tt>"utf-32le"</tt> and
3233 represents a coding system for the CES UTF-32LE (RFC 2279). */
3235 @brief UTF-32LE ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3237 ¥·¥ó¥Ü¥ë #Mcoding_utf_32le ¤Ï <tt>"utf-32le"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3238 CES UTF-32LE (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3240 MSymbol Mcoding_utf_32le;
3244 @brief Symbol for the coding system SJIS.
3246 The symbol #Mcoding_sjis has name <tt>"sjis"</tt> and represents a coding
3247 system for the CES Shift-JIS. */
3249 @brief SJIS ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3251 ¥·¥ó¥Ü¥ë #Mcoding_sjis has ¤Ï <tt>"sjis"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3252 CES Shift-JISÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3254 MSymbol Mcoding_sjis;
3259 @name Variables: Parameter keys for mconv_define_coding (). */
3261 @name ÊÑ¿ô: mconv_define_coding () Íѥѥé¥á¡¼¥¿¥¡¼ */
3266 Parameter key for mconv_define_coding () (which see). */
3268 mconv_define_coding () Íѥѥé¥á¡¼¥¿¥¡¼ (¾ÜºÙ¤Ï mconv_define_coding ()»²¾È). */
3274 MSymbol Mdesignation;
3275 MSymbol Minvocation;
3278 MSymbol Mlittle_endian;
3283 @name Variables: Symbols representing coding system types. */
3285 @name ÊÑ¿ô¡§ ¥³¡¼¥É·Ï¤Î¥¿¥¤¥×¤ò¼¨¤¹¥·¥ó¥Ü¥ë. */
3290 Symbol that can be a value of the #Mtype parameter of a coding
3291 system used in an argument to the mconv_define_coding () function
3294 ´Ø¿ô mconv_define_coding () ¤Î°ú¿ô¤È¤·¤ÆÍѤ¤¤é¤ì¤ë¥³¡¼¥É·Ï¤Î¥Ñ¥é¥á¡¼¥¿
3295 #Mtype ¤ÎÃͤȤʤêÆÀ¤ë¥·¥ó¥Ü¥ë¡£(¾ÜºÙ¤Ï
3296 mconv_define_coding ()»²¾È)¡£ */
3306 @name Variables: Symbols appearing in the value of #Mflags parameter. */
3308 @name ÊÑ¿ô¡§ ¥Ñ¥é¥á¡¼¥¿ #Mflags ¤ÎÃͤȤʤêÆÀ¤ë¥·¥ó¥Ü¥ë. */
3313 Symbols that can be a value of the #Mflags parameter of a coding
3314 system used in an argument to the mconv_define_coding () function
3317 ´Ø¿ô mconv_define_coding () ¤Î°ú¿ô¤È¤·¤ÆÍѤ¤¤é¤ì¤ë¥³¡¼¥É·Ï¤Î¥Ñ¥é¥á¡¼¥¿
3318 #Mflags ¤ÎÃͤȤʤêÆÀ¤ë¥·¥ó¥Ü¥ë¡£(¾ÜºÙ¤Ï
3319 mconv_define_coding ()»²¾È)¡£ */
3320 MSymbol Mreset_at_eol;
3322 MSymbol Mreset_at_cntl;
3325 MSymbol Mdesignation_g0;
3326 MSymbol Mdesignation_g1;
3327 MSymbol Mdesignation_ctext;
3328 MSymbol Mdesignation_ctext_ext;
3329 MSymbol Mlocking_shift;
3330 MSymbol Msingle_shift;
3331 MSymbol Msingle_shift_7;
3332 MSymbol Meuc_tw_shift;
3334 MSymbol Mrevision_number;
3335 MSymbol Mfull_support;
3340 @name Variables: etc
3342 Remaining variables. */
3343 /***ja @name ÊÑ¿ô: ¤½¤Î¾
3349 @brief Symbol whose name is "maybe".
3351 The variable #Mmaybe is a symbol of name <tt>"maybe"</tt>. It is
3352 used a value of #Mbom parameter of the function
3353 mconv_define_coding () (which see). */
3355 @brief "maybe"¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¥·¥ó¥Ü¥ë.
3357 ÊÑ¿ô #Mmaybe ¤Ï <tt>"maybe"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¡£¤³¤ì¤Ï´Ø¿ô
3358 mconv_define_coding () ¥Ñ¥é¥á¡¼¥¿ #Mbom ¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤ë¡£
3359 (¾ÜºÙ¤Ï mconv_define_coding () »²¾È)¡£ */
3365 @brief The symbol @c Mcoding.
3367 Any decoded M-text has a text property whose key is the predefined
3368 symbol @c Mcoding. The name of @c Mcoding is
3369 <tt>"coding"</tt>. */
3372 @brief ¥·¥ó¥Ü¥ë @c Mcoding.
3374 ¥Ç¥³¡¼¥É¤µ¤ì¤¿ M-text ¤Ï¤¹¤Ù¤Æ¡¢¥¡¼¤¬ÄêµÁºÑ¤ß¥·¥ó¥Ü¥ë @c Mcoding
3375 ¤Ç¤¢¤ë¤è¤¦¤Ê¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Ä¡£¥·¥ó¥Ü¥ë @c Mcoding ¤Ï
3376 <tt>"coding"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¡£ */
3384 @brief Define a coding system.
3386 The mconv_define_coding () function defines a new coding system
3387 and makes it accessible via a symbol whose name is $NAME. $PLIST
3388 specifies parameters of the coding system as below:
3392 <li> Key is @c Mtype, value is a symbol
3394 The value specifies the type of the coding system. It must be
3395 #Mcharset, #Mutf, #Miso_2022, or #Mnil.
3397 If the type is #Mcharset, $EXTRA_INFO is ignored.
3399 If the type is #Mutf, $EXTRA_INFO must be a pointer to
3402 If the type is #Miso_2022, $EXTRA_INFO must be a pointer to
3403 #MCodingInfoISO2022.
3405 If the type is #Mnil, the argument $RESETTER, $DECODER, and
3406 $ENCODER must be supplied. $EXTRA_INFO is ignored. Otherwise,
3407 they can be @c NULL and the m17n library provides proper defaults.
3409 <li> Key is #Mcharsets, value is a plist
3411 The value specifies a list charsets supported by the coding
3412 system. The keys of the plist must be #Msymbol, and the values
3413 must be symbols representing charsets.
3415 <li> Key is #Mflags, value is a plist
3417 If the type is #Miso_2022, the values specifies flags to control
3418 the ISO 2022 interpreter. The keys of the plist must e #Msymbol,
3419 and values must be one of the following.
3425 If this flag exists, designation and invocation status is reset to
3426 the initial state at the end of line.
3428 <li> #Mreset_at_cntl
3430 If this flag exists, designation and invocation status is reset to
3431 the initial state at a control character.
3435 If this flag exists, the graphic plane right is used.
3439 If this flag exists, the over-long escape sequences (ESC '$' '('
3440 <final_byte>) are used for designating the CCS JISX0208.1978,
3441 GB2312, and JISX0208.
3443 <li> #Mdesignation_g0
3445 If this flag and #Mfull_support exists, designates charsets not
3446 listed in the charset list to the graphic register G0.
3448 <li> #Mdesignation_g1
3450 If this flag and #Mfull_support exists, designates charsets not
3451 listed in the charset list to the graphic register G1.
3453 <li> #Mdesignation_ctext
3455 If this flag and #Mfull_support exists, designates charsets not
3456 listed in the charset list to a graphic register G0 or G1 based on
3457 the criteria of the Compound Text.
3459 <li> #Mdesignation_ctext_ext
3461 If this flag and #Mfull_support exists, designates charsets not
3462 listed in the charset list to a graphic register G0 or G1, or use
3463 extended segment for such charsets based on the criteria of the
3466 <li> #Mlocking_shift
3468 If this flag exists, use locking shift.
3472 If this flag exists, use single shift.
3474 <li> #Msingle_shift_7
3476 If this flag exists, use 7-bit single shift code (0x19).
3480 If this flag exists, use a special shifting according to EUC-TW.
3484 This flag is currently ignored.
3486 <li> #Mrevision_number
3488 If this flag exists, use a revision number escape sequence to
3489 designate a charset that has a revision number.
3493 If this flag exists, support all charsets registered in the
3494 International Registry.
3498 <li> Key is #Mdesignation, value is a plist
3500 If the type is #Miso_2022, the value specifies how to designate
3501 each supported characters. The keys of the plist must be
3502 #Minteger, and the values must be numbers indicating a graphic
3503 registers. The Nth element value is for the Nth charset of the
3504 charset list. The value 0..3 means that it is assumed that a
3505 charset is already designated to the graphic register 0..3. The
3506 negative value G (-4..-1) means that a charset is not designated
3507 to any register at first, and if necessary, is designated to the
3508 (G+4) graphic register.
3510 <li> Key is #Minvocation, value is a plist
3512 If the type is #Miso_2022, the value specifies how to invocate
3513 each graphic registers. The plist length must be one or two. The
3514 keys of the plist must be #Minteger, and the values must be
3515 numbers indicating a graphic register. The value of the first
3516 element specifies which graphic register is invocated to the
3517 graphic plane left. If the length is one, no graphic register is
3518 invocated to the graphic plane right. Otherwise, the value of the
3519 second element specifies which graphic register is invocated to
3520 the graphic plane right.
3522 <li> Key is #Mcode_unit, value is an integer
3524 If the type is #Mutf, the value specifies the bit length of a
3525 code-unit. It must be 8, 16, or 32.
3527 <li> Key is #Mbom, value is a symbol
3529 If the type is #Mutf and the code-unit bit length is 16 or 32,
3530 it specifies whether or not to use BOM (Byte Order Mark). If the
3531 value is #Mnil (default), BOM is not used, else if the value is
3532 #Mmaybe, the existence of BOM is detected at decoding time, else
3535 <li> Key is #Mlittle_endian, value is a symbol
3537 If the type is #Mutf and the code-unit bit length is 16 or 32,
3538 it specifies whether or not the encoding is little endian. If the
3539 value is #Mnil (default), it is big endian, else it is little
3544 $RESETTER is a pointer to a function that resets a converter for
3545 the coding system to the initial status. The pointed function is
3546 called with one argument, a pointer to a converter object.
3548 $DECODER is a pointer to a function that decodes a byte sequence
3549 according to the coding system. The pointed function is called
3550 with four arguments:
3552 @li A pointer to the byte sequence to decode.
3553 @li The number of bytes to decode.
3554 @li A pointer to an M-text to which the decoded characters are appended.
3555 @li A pointer to a converter object.
3557 $DECODER must return 0 if it succeeds. Otherwise it must return -1.
3559 $ENCODER is a pointer to a function that encodes an M-text
3560 according to the coding system. The pointed function is called
3563 @li A pointer to the M-text to encode.
3564 @li The starting position of the encoding.
3565 @li The ending position of the encoding.
3566 @li A pointer to a memory area where the produced bytes are stored.
3567 @li The size of the memory area.
3568 @li A pointer to a converter object.
3570 $ENCODER must return 0 if it succeeds. Otherwise it must return -1.
3572 $EXTRA_INFO is a pointer to a data structure that contains extra
3573 information about the coding system. The type of the data
3574 structure depends on $TYPE.
3578 If the operation was successful, mconv_define_coding () returns a
3579 symbol whose name is $NAME. If an error is detected, it returns
3580 #Mnil and assigns an error code to the external variable #merror_code. */
3583 @brief ¥³¡¼¥É·Ï¤òÄêµÁ¤¹¤ë.
3585 ´Ø¿ô mconv_define_coding () ¤Ï¡¢¿·¤·¤¤¥³¡¼¥É·Ï¤òÄêµÁ¤·¡¢¤½¤ì¤ò
3586 $NAME ¤È¤¤¤¦Ì¾Á°¤Î¥·¥ó¥Ü¥ë·Ðͳ¤Ç¥¢¥¯¥»¥¹¤Ç¤¤ë¤è¤¦¤Ë¤¹¤ë¡£ $PLIST
3587 ¤Ç¤ÏÄêµÁ¤¹¤ë¥³¡¼¥É·Ï¤Î¥Ñ¥é¥á¡¼¥¿¤ò°Ê²¼¤Î¤è¤¦¤Ë»ØÄꤹ¤ë¡£
3591 <li> ¥¡¼¤¬ @c Mtype ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
3593 Ãͤϥ³¡¼¥É·Ï¤Î¥¿¥¤¥×¤òɽ¤·¡¢#Mcharset, #Mutf, #Miso_2022, #Mnil
3594 ¤Î¤¤¤º¤ì¤«¤Ç¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
3596 ¥¿¥¤¥×¤¬ #Mcharset ¤Ê¤é¤Ð $EXTRA_INFO ¤Ï̵»ë¤µ¤ì¤ë¡£
3598 ¥¿¥¤¥×¤¬ #Mutf ¤Ê¤é¤Ð $EXTRA_INFO ¤Ï #MCodingInfoUTF
3599 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
3601 ¥¿¥¤¥×¤¬ #Miso_2022¤Ê¤é¤Ð $EXTRA_INFO ¤Ï #MCodingInfoISO2022
3602 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
3604 ¥¿¥¤¥×¤¬ #Mnil ¤Ê¤é¤Ð¡¢°ú¿ô $RESETTER, $DECODER, $ENCODER
3605 ¤òÍ¿¤¨¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£$EXTRA_INFO ¤Ï̵»ë¤µ¤ì¤ë¡£
3606 ¤½¤ì°Ê³°¤Î¾ì¹ç¤Ë¤Ï¤³¤ì¤é¤Ï @c NULL ¤Ç¤è¤¯¡¢
3607 m17n ¥é¥¤¥Ö¥é¥ê¤¬Å¬Àڤʥǥե©¥ë¥ÈÃͤòÍ¿¤¨¤ë¡£
3609 <li> ¥¡¼¤¬ #Mcharsets ¤ÇÃͤ¬ plist ¤Î»þ
3611 ÃͤϤ³¤Î¥³¡¼¥É·Ï¤Ç¥µ¥Ý¡¼¥È¤µ¤ì¤ëʸ»ú¥»¥Ã¥È¤Î¥ê¥¹¥È¤Ç¤¢¤ë¡£plist¤Î¥¡¼¤Ï
3612 #Msymbol¡¢ÃͤÏʸ»ú¥»¥Ã¥È¤ò¼¨¤¹¥·¥ó¥Ü¥ë¤Ç¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
3614 <li> ¥¡¼¤¬ #Mflags Ãͤ¬ plist ¤Î»þ
3616 ¥¿¥¤¥×¤¬ #Miso_2022 ¤Ê¤é¤Ð¡¢¤³¤ÎÃͤÏ, ISO 2022
3617 ¥¤¥ó¥¿¥×¥ê¥¿ÍѤÎÀ©¸æ¥Õ¥é¥Ã¥°¤ò¼¨¤¹¡£plist ¤Î¥¡¼¤Ï #Msymbol
3618 ¤Ç¤¢¤ê¡¢Ãͤϰʲ¼¤Î¤¤¤º¤ì¤«¤Ç¤¢¤ë¡£
3624 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¿Þ·Áʸ»ú½¸¹ç¤Î»Ø¼¨¤ä¸Æ½Ð¤Ï¹ÔËö¤Ç¥ê¥»¥Ã¥È¤µ¤ì¤ÆÅö½é¤Î¾õÂÖ¤ËÌá¤ë¡£
3626 <li> #Mreset_at_cntl
3628 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¿Þ·Áʸ»ú½¸¹ç¤Î»Ø¼¨¤ä¸Æ½Ð¤ÏÀ©¸æʸ»ú¤Ë½Ð²ñ¤Ã¤¿»þÅÀ¤Ç¥ê¥»¥Ã¥È¤µ¤ì¤ÆÅö½é¤Î¾õÂÖ¤ËÌá¤ë¡£
3632 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¿Þ·Áʸ»ú½¸¹ç¤Î±¦È¾Ì̤¬ÍѤ¤¤é¤ì¤ë¡£
3636 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢Ê¸»ú½¸¹ç JISX0208.1978, GB2312, JISX0208
3637 ¤ò»Ø¼¨¤¹¤ëºÝ¤Ë over-long ¥¨¥¹¥±¡¼¥×¥·¡¼¥±¥ó¥¹ (ESC '$' '('
3638 <final_byte>) ¤¬ÍѤ¤¤é¤ì¤ë¡£
3640 <li> #Mdesignation_g0
3642 ¤³¤Î¥Õ¥é¥°¤È #Mfull_support ¤¬¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Ë¸½¤ï¤ì¤Ê¤¤Ê¸»ú¥»¥Ã¥È¤ò
3645 <li> #Mdesignation_g1
3647 ¤³¤Î¥Õ¥é¥°¤È #Mfull_support ¤¬¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Ë¸½¤ï¤ì¤Ê¤¤Ê¸»ú¥»¥Ã¥È¤ò
3650 <li> #Mdesignation_ctext
3652 ¤³¤Î¥Õ¥é¥°¤È #Mfull_support ¤¬¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Ë¸½¤ï¤ì¤Ê¤¤Ê¸»ú¥»¥Ã¥È¤ò
3653 G0 ½¸¹ç¤Þ¤¿¤Ï G1 ½¸¹ç¤Ë¡¢¥³¥ó¥Ñ¥¦¥ó¥É¥Æ¥¥¹¥È¤Î´ð½à¤Ë¤½¤Ã¤Æ»Ø¼¨¤¹¤ë¡£
3655 <li> #Mdesignation_ctext_ext
3657 ¤³¤Î¥Õ¥é¥°¤È #Mfull_support ¤¬¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Ë¸½¤ï¤ì¤Ê¤¤Ê¸»ú¥»¥Ã¥È¤ò
3658 G0 ½¸¹ç¤Þ¤¿¤Ï G1 ½¸¹ç¤Ë¡¢¤¢¤ë¤¤¤Ï³ÈÄ¥¥»¥°¥á¥ó¥È¤Ë¥³¥ó¥Ñ¥¦¥ó¥É¥Æ¥¥¹¥È¤Î´ð½à¤Ë¤½¤Ã¤Æ»Ø¼¨¤¹¤ë¡£
3660 <li> #Mlocking_shift
3662 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¥í¥Ã¥¥ó¥°¥·¥Õ¥È¤òÍѤ¤¤ë¡£
3666 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¥·¥ó¥°¥ë¥·¥Õ¥È¤òÍѤ¤¤ë¡£
3668 <li> #Msingle_shift_7
3670 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢7-bit ¥·¥ó¥°¥ë¥·¥Õ¥È¥³¡¼¥É (0x19) ¤òÍѤ¤¤ë¡£
3674 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢EUC-TW ¤Ë±è¤Ã¤¿ÆÃÊ̤ʥ·¥Õ¥È¤òÍѤ¤¤ë¡£
3678 ¸½»þÅÀ¤Ç¤ÏÍѤ¤¤é¤ì¤Æ¤¤¤Ê¤¤¡£
3680 <li> #Mrevision_number
3682 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢revision number ¤ò»ý¤Äʸ»ú¥»¥Ã¥È¤ò»Ø¼¨¤¹¤ëºÝ¤Ë
3683 revision number ¥¨¥¹¥±¡¼¥×¥·¡¼¥¯¥¨¥ó¥¹¤òÍѤ¤¤ë¡£
3687 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢the International Registry
3688 ¤ËÅÐÏ¿¤µ¤ì¤Æ¤¤¤ëÁ´Ê¸»ú¥»¥Ã¥È¤ò¥µ¥Ý¡¼¥È¤¹¤ë¡£
3692 <li> ¥¡¼¤¬ #Mdesignation¤ÇÃͤ¬ plist ¤Î»þ
3694 ¥¿¥¤¥×¤¬ #Miso_2022 ¤Ê¤é¤Ð¡¢ÃͤϳÆʸ»ú¤ò¤É¤Î¤è¤¦¤Ë»Ø¼¨¤¹¤ë¤«¤ò¼¨¤¹¡£
3695 plist ¤Î¥¡¼¤Ï #Minteger¡¢ÃͤϽ¸¹ç¡Êgraphic register¡Ë
3696 ¤ò¼¨¤¹¿ô»ú¤Ç¤¢¤ë¡£NÈÖÌܤÎÍ×ÁǤÎÃͤϡ¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Î N
3697 ÈÖÌܤÎʸ»ú¥»¥Ã¥È¤ËÂбþ¤¹¤ë¡£Ãͤ¬ 0..3 ¤Ç¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¤¬¤¹¤Ç¤Ë
3698 G0..G3 ¤Ë»Ø¼¨ ¤µ¤ì¤Æ¤¤¤ë¡£
3700 Ãͤ¬Éé(-4..-1) ¤Ç¤¢¤ì¤Ð¡¢½é´ü¾õÂ֤ǤÏʸ»ú¥»¥Ã¥È¤¬¤É¤³¤Ë¤â»Ø¼¨¤µ¤ì¤Æ¤¤¤Ê¤¤¤³¤È¡¢É¬ÍפʺݤˤÏ
3701 G0..G3 ¤Î¤½¤ì¤¾¤ì¤Ë»Ø¼¨¤¹¤ë¤³¤È¤ò°ÕÌ£¤¹¤ë¡£
3703 <li> ¥¡¼¤¬ #Minvocation¤ÇÃͤ¬ plist ¤Î»þ
3705 ¥¿¥¤¥×¤¬ #Miso_2022 ¤Ê¤é¤Ð¡¢Ãͤϳƽ¸¹ç¤ò¤É¤Î¤è¤¦¤Ë¸Æ¤Ó½Ð¤¹¤«¤ò¼¨¤¹¡£
3706 plist ¤ÎŤµ¤Ï 1 ¤Ê¤¤¤· 2 ¤Ç¤¢¤ë¡£plist ¤Î¥¡¼¤Ï
3707 #Minteger¡¢ÃͤϽ¸¹ç¡Êgraphic register)¤ò¼¨¤¹¿ô»ú¤Ç¤¢¤ë¡£
3708 ºÇ½é¤ÎÍ×ÁǤÎÃͤ¬¿Þ·Áʸ»ú½¸¹çº¸È¾Ì̤˸ƤӽФµ¤ì¤ë½¸¹ç¤ò¼¨¤¹¡£
3709 plist ¤ÎŤµ¤¬ 1 ¤Ê¤é¤Ð¡¢±¦È¾Ì̤ˤϲ¿¤â¸Æ¤Ó½Ð¤µ¤ì¤Ê¤¤¡£
3710 ¤½¤¦¤Ç¤±¤ì¤Ð¡¢£²¤Ä¤á¤ÎÍ×ÁǤÎÃͤ¬¿Þ·Áʸ»ú½¸¹ç±¦È¾Ì̤˸ƤӽФµ¤ì¤ë½¸¹ç¤ò¼¨¤¹¡£
3712 <li> ¥¡¼¤¬ #Mcode_unit ¤ÇÃͤ¬À°¿ôÃͤλþ
3714 ¥¿¥¤¥×¤¬ #Mutf ¤Ê¤é¤Ð¡¢Ãͤϥ³¡¼¥É¥æ¥Ë¥Ã¥È¤Î¥Ó¥Ã¥ÈŤǤ¢¤ê¡¢8, 16,
3715 32 ¤Î¤¤¤º¤ì¤«¤Ç¤¢¤ë¡£
3717 <li> ¥¡¼¤¬ #Mbom ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
3719 ¥¿¥¤¥×¤¬ #Mutf ¤Ç¥³¡¼¥É¥æ¥Ë¥Ã¥È¤Î¥Ó¥Ã¥ÈŤ¬ 16 ¤« 32¤Ê¤é¤Ð¡¢ÃͤÏ
3720 BOM (Byte Order Mark) ¤ò»ÈÍѤ¹¤ë¤«¤É¤¦¤«¤ò¼¨¤¹¡£Ãͤ¬¥Ç¥Õ¥©¥ë¥ÈÃͤÎ
3721 #Mnil ¤Ê¤é¤Ð¡¢»ÈÍѤ·¤Ê¤¤¡£Ãͤ¬ #Mmaybe ¤Ê¤é¤Ð¥Ç¥³¡¼¥É»þ¤Ë BOM
3722 ¤¬¤¢¤ë¤«¤É¤¦¤«¤òÄ´¤Ù¤ë¡£¤½¤ì°Ê³°¤Ê¤é¤Ð»ÈÍѤ¹¤ë¡£
3724 <li> ¥¡¼¤¬ #Mlittle_endian ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
3726 ¥¿¥¤¥×¤¬ #Mutf ¤Ç¥³¡¼¥É¥æ¥Ë¥Ã¥È¤Î¥Ó¥Ã¥ÈŤ¬ 16 ¤« 32
3727 ¤Ê¤é¤Ð¡¢Ãͤϥ¨¥ó¥³¡¼¥É¤¬ little endian ¤«¤É¤¦¤«¤ò¼¨¤¹¡£Ãͤ¬¥Ç¥Õ¥©¥ë¥ÈÃͤÎ
3728 #Mnil ¤Ê¤é¤Ð big endian ¤Ç¤¢¤ê¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð little endian ¤Ç¤¢¤ë¡£
3733 ¤Ï¤³¤Î¥³¡¼¥É·ÏÍѤΥ³¥ó¥Ð¡¼¥¿¤ò½é´ü¾õÂ֤˥ꥻ¥Ã¥È¤¹¤ë´Ø¿ô¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¡£
3734 ¤³¤Î´Ø¿ô¤Ï¥³¥ó¥Ð¡¼¥¿¥ª¥Ö¥¸¥§¥¯¥È¤Ø¤Î¥Ý¥¤¥ó¥¿¤È¤¤¤¦£±°ú¿ô¤ò¤È¤ë¡£
3736 $DECODER ¤Ï¥Ð¥¤¥ÈÎó¤ò¤³¤Î¥³¡¼¥É·Ï¤Ë½¾¤Ã¤Æ¥Ç¥³¡¼¥É¤¹¤ë´Ø¿ô¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¡£
3737 ¤³¤Î´Ø¿ô¤Ï°Ê²¼¤Î£´°ú¿ô¤ò¤È¤ë¡£
3739 @li ¥Ç¥³¡¼¥É¤¹¤ë¥Ð¥¤¥ÈÎó¤Ø¤Î¥Ý¥¤¥ó¥¿
3740 @li ¥Ç¥³¡¼¥É¤¹¤Ù¤¥Ð¥¤¥È¿ô
3741 @li ¥Ç¥³¡¼¥É·ë²Ì¤Îʸ»ú¤òÉղ乤ë M-text ¤Ø¤Î¥Ý¥¤¥ó¥¿
3742 @li ¥³¥ó¥Ð¡¼¥¿¥ª¥Ö¥¸¥§¥¯¥È¤Ø¤Î¥Ý¥¤¥ó¥¿
3744 $DECODER ¤ÏÀ®¸ù¤·¤¿¤È¤¤Ë¤Ï 0 ¤ò¡¢¼ºÇÔ¤·¤¿¤È¤¤Ë¤Ï -1
3745 ¤òÊÖ¤µ¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
3747 $ENCODER ¤Ï M-text ¤ò¤³¤Î¥³¡¼¥É·Ï¤Ë½¾¤Ã¤Æ¥¨¥ó¥³¡¼¥É¤¹¤ë´Ø¿ô¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¡£
3748 ¤³¤Î´Ø¿ô¤Ï°Ê²¼¤Î£¶°ú¿ô¤ò¤È¤ë¡£
3750 @li ¥¨¥ó¥³¡¼¥É¤¹¤ëM-text ¤Ø¤Î¥Ý¥¤¥ó¥¿
3751 @li M-text ¤Î¥¨¥ó¥³¡¼¥É³«»Ï°ÌÃÖ
3752 @li M-text ¤Î¥¨¥ó¥³¡¼¥É½ªÎ»°ÌÃÖ
3753 @li À¸À®¤·¤¿¥Ð¥¤¥È¤òÊÝ»ý¤¹¤ë¥á¥â¥êÎΰè¤Ø¤Î¥Ý¥¤¥ó¥¿
3754 @li ¥á¥â¥êÎΰè¤Î¥µ¥¤¥º
3755 @li ¥³¥ó¥Ð¡¼¥¿¥ª¥Ö¥¸¥§¥¯¥È¤Ø¤Î¥Ý¥¤¥ó¥¿
3757 $ENCODER ¤ÏÀ®¸ù¤·¤¿¤È¤¤Ë¤Ï 0 ¤ò¡¢¼ºÇÔ¤·¤¿¤È¤¤Ë¤Ï -1
3758 ¤òÊÖ¤µ¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
3760 $EXTRA_INFO ¤Ï¥³¡¼¥Ç¥£¥°¥·¥¹¥Æ¥à¤Ë´Ø¤¹¤ëÄɲþðÊó¤ò´Þ¤à¥Ç¡¼¥¿¹½Â¤¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¡£
3761 ¤³¤Î¥Ç¡¼¥¿¹½Â¤¤Î·¿ $TYPE ¤Ë°Í¸¤¹¤ë¡£
3765 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mconv_define_coding () ¤Ï $NAME
3766 ¤È¤¤¤¦Ì¾Á°¤Î¥·¥ó¥Ü¥ë¤òÊÖ¤¹¡£ ¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï #Mnil
3767 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
3775 mconv_define_coding (const char *name, MPlist *plist,
3776 int (*resetter) (MConverter *),
3777 int (*decoder) (const unsigned char *, int, MText *,
3779 int (*encoder) (MText *, int, int,
3780 unsigned char *, int,
3784 MSymbol sym = msymbol (name);
3786 MCodingSystem *coding;
3789 MSTRUCT_MALLOC (coding, MERROR_CODING);
3791 if ((coding->type = (MSymbol) mplist_get (plist, Mtype)) == Mnil)
3792 coding->type = Mcharset;
3793 pl = (MPlist *) mplist_get (plist, Mcharsets);
3795 MERROR (MERROR_CODING, Mnil);
3796 coding->ncharsets = mplist_length (pl);
3797 if (coding->ncharsets > NUM_SUPPORTED_CHARSETS)
3798 coding->ncharsets = NUM_SUPPORTED_CHARSETS;
3799 for (i = 0; i < coding->ncharsets; i++, pl = MPLIST_NEXT (pl))
3801 MSymbol charset_name;
3803 if (MPLIST_KEY (pl) != Msymbol)
3804 MERROR (MERROR_CODING, Mnil);
3805 charset_name = MPLIST_SYMBOL (pl);
3806 if (! (coding->charsets[i] = MCHARSET (charset_name)))
3807 MERROR (MERROR_CODING, Mnil);
3810 coding->resetter = resetter;
3811 coding->decoder = decoder;
3812 coding->encoder = encoder;
3813 coding->ascii_compatible = 0;
3814 coding->extra_info = extra_info;
3815 coding->extra_spec = NULL;
3818 if (coding->type == Mcharset)
3820 if (! coding->resetter)
3821 coding->resetter = reset_coding_charset;
3822 if (! coding->decoder)
3823 coding->decoder = decode_coding_charset;
3824 if (! coding->encoder)
3825 coding->encoder = encode_coding_charset;
3827 else if (coding->type == Mutf)
3829 MCodingInfoUTF *info = malloc (sizeof (MCodingInfoUTF));
3832 if (! coding->resetter)
3833 coding->resetter = reset_coding_utf;
3835 info->code_unit_bits = (int) mplist_get (plist, Mcode_unit);
3836 if (info->code_unit_bits == 8)
3838 if (! coding->decoder)
3839 coding->decoder = decode_coding_utf_8;
3840 if (! coding->encoder)
3841 coding->encoder = encode_coding_utf_8;
3843 else if (info->code_unit_bits == 16)
3845 if (! coding->decoder)
3846 coding->decoder = decode_coding_utf_16;
3847 if (! coding->encoder)
3848 coding->encoder = encode_coding_utf_16;
3850 else if (info->code_unit_bits == 32)
3852 if (! coding->decoder)
3853 coding->decoder = decode_coding_utf_32;
3854 if (! coding->encoder)
3855 coding->encoder = encode_coding_utf_32;
3858 MERROR (MERROR_CODING, Mnil);
3859 val = (MSymbol) mplist_get (plist, Mbom);
3862 else if (val == Mmaybe)
3867 info->endian = (mplist_get (plist, Mlittle_endian) ? 1 : 0);
3868 coding->extra_info = info;
3870 else if (coding->type == Miso_2022)
3872 MCodingInfoISO2022 *info = malloc (sizeof (MCodingInfoISO2022));
3874 if (! coding->resetter)
3875 coding->resetter = reset_coding_iso_2022;
3876 if (! coding->decoder)
3877 coding->decoder = decode_coding_iso_2022;
3878 if (! coding->encoder)
3879 coding->encoder = encode_coding_iso_2022;
3881 info->initial_invocation[0] = 0;
3882 info->initial_invocation[1] = -1;
3883 pl = (MPlist *) mplist_get (plist, Minvocation);
3886 if (MPLIST_KEY (pl) != Minteger)
3887 MERROR (MERROR_CODING, Mnil);
3888 info->initial_invocation[0] = MPLIST_INTEGER (pl);
3889 if (! MPLIST_TAIL_P (pl))
3891 pl = MPLIST_NEXT (pl);
3892 if (MPLIST_KEY (pl) != Minteger)
3893 MERROR (MERROR_CODING, Mnil);
3894 info->initial_invocation[1] = MPLIST_INTEGER (pl);
3897 memset (info->designations, 0, sizeof (info->designations));
3898 for (i = 0, pl = (MPlist *) mplist_get (plist, Mdesignation);
3899 i < 32 && pl && MPLIST_KEY (pl) == Minteger;
3900 i++, pl = MPLIST_NEXT (pl))
3901 info->designations[i] = MPLIST_INTEGER (pl);
3904 MPLIST_DO (pl, (MPlist *) mplist_get (plist, Mflags))
3908 if (MPLIST_KEY (pl) != Msymbol)
3909 MERROR (MERROR_CODING, Mnil);
3910 val = MPLIST_SYMBOL (pl);
3911 if (val == Mreset_at_eol)
3912 info->flags |= MCODING_ISO_RESET_AT_EOL;
3913 else if (val == Mreset_at_cntl)
3914 info->flags |= MCODING_ISO_RESET_AT_CNTL;
3915 else if (val == Meight_bit)
3916 info->flags |= MCODING_ISO_EIGHT_BIT;
3917 else if (val == Mlong_form)
3918 info->flags |= MCODING_ISO_LOCKING_SHIFT;
3919 else if (val == Mdesignation_g0)
3920 info->flags |= MCODING_ISO_DESIGNATION_G0;
3921 else if (val == Mdesignation_g1)
3922 info->flags |= MCODING_ISO_DESIGNATION_G1;
3923 else if (val == Mdesignation_ctext)
3924 info->flags |= MCODING_ISO_DESIGNATION_CTEXT;
3925 else if (val == Mdesignation_ctext_ext)
3926 info->flags |= MCODING_ISO_DESIGNATION_CTEXT_EXT;
3927 else if (val == Mlocking_shift)
3928 info->flags |= MCODING_ISO_LOCKING_SHIFT;
3929 else if (val == Msingle_shift)
3930 info->flags |= MCODING_ISO_SINGLE_SHIFT;
3931 else if (val == Msingle_shift_7)
3932 info->flags |= MCODING_ISO_SINGLE_SHIFT_7;
3933 else if (val == Meuc_tw_shift)
3934 info->flags |= MCODING_ISO_EUC_TW_SHIFT;
3935 else if (val == Miso_6429)
3936 info->flags |= MCODING_ISO_ISO6429;
3937 else if (val == Mrevision_number)
3938 info->flags |= MCODING_ISO_REVISION_NUMBER;
3939 else if (val == Mfull_support)
3940 info->flags |= MCODING_ISO_FULL_SUPPORT;
3943 coding->extra_info = info;
3947 if (! coding->decoder || ! coding->encoder)
3948 MERROR (MERROR_CODING, Mnil);
3949 if (! coding->resetter)
3953 msymbol_put (sym, Mcoding, coding);
3954 msymbol_put (msymbol__canonicalize (sym), Mcoding, coding);
3955 plist = (MPlist *) mplist_get (plist, Maliases);
3958 MPLIST_DO (pl, plist)
3962 if (MPLIST_KEY (pl) != Msymbol)
3964 alias = MPLIST_SYMBOL (pl);
3965 msymbol_put (alias, Mcoding, coding);
3966 msymbol_put (msymbol__canonicalize (alias), Mcoding, coding);
3970 MLIST_APPEND1 (&coding_list, codings, coding, MERROR_CODING);
3978 @brief Resolve coding system name.
3980 The mconv_resolve_coding () function returns $SYMBOL if it
3981 represents a coding system. Otherwise, canonicalize $SYMBOL as to
3982 a coding system name, and if the canonicalized name represents a
3983 coding system, return it. Otherwise, return #Mnil. */
3985 @brief ¥³¡¼¥É·Ï¤Î̾Á°¤ò²ò·è¤¹¤ë.
3987 ´Ø¿ô mconv_resolve_coding () ¤Ï $SYMBOL ¤¬¥³¡¼¥É·Ï¤ò¼¨¤·¤Æ¤¤¤ì¤Ð¤½¤ì¤òÊÖ¤¹¡£
3988 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¥³¡¼¥É·Ï¤Î̾Á°¤È¤·¤Æ $SYMBOL
3989 ¤òÀµµ¬²½¤·¡¢¤½¤ì¤¬¥³¡¼¥É·Ï¤òɽ¤·¤Æ¤¤¤ì¤ÐÀµµ¬²½¤·¤¿ $SYMBOL ¤òÊÖ¤¹¡£
3990 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð#Mnil ¤òÊÖ¤¹¡£ */
3995 mconv_resolve_coding (MSymbol symbol)
3997 MCodingSystem *coding = find_coding (symbol);
4001 symbol = msymbol__canonicalize (symbol);
4002 coding = find_coding (symbol);
4004 return (coding ? coding->name : Mnil);
4011 @brief List symbols representing coding systems.
4013 The mconv_list_codings () function makes an array of symbols
4014 representing a coding system, stores the pointer to the array in a
4015 place pointed to by $SYMBOLS, and returns the length of the array. */
4017 @brief ¥³¡¼¥É·Ï¤òɽ¤ï¤¹¥·¥ó¥Ü¥ë¤òÎóµó¤¹¤ë.
4019 ´Ø¿ô mchar_list_codings () ¤Ï¡¢¥³¡¼¥É·Ï¤ò¼¨¤¹¥·¥ó¥Ü¥ë¤òʤ٤¿ÇÛÎó¤òºî¤ê¡¢
4020 $SYMBOLS ¤Ç¥Ý¥¤¥ó¥È¤µ¤ì¤¿¾ì½ê¤Ë¤³¤ÎÇÛÎó¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÃÖ¤¡¢ÇÛÎó¤ÎŤµ¤òÊÖ¤¹¡£ */
4023 mconv_list_codings (MSymbol **symbols)
4025 int i = coding_list.used + mplist_length (coding_definition_list);
4029 MTABLE_MALLOC ((*symbols), i, MERROR_CODING);
4031 MPLIST_DO (plist, coding_definition_list)
4033 MPlist *pl = MPLIST_VAL (plist);
4034 (*symbols)[i++] = MPLIST_SYMBOL (pl);
4036 for (j = 0; j < coding_list.used; j++)
4037 if (! mplist_find_by_key (coding_definition_list,
4038 coding_list.codings[j]->name))
4039 (*symbols)[i++] = coding_list.codings[j]->name;
4046 @brief Create a code converter bound to a buffer.
4048 The mconv_buffer_converter () function creates a pointer to a code
4049 converter for coding system $NAME. The code converter is bound
4050 to buffer area of $N bytes pointed to by $BUF. Subsequent
4051 decodings and encodings are done to/from this buffer area.
4053 $NAME can be #Mnil. In this case, a coding system associated
4054 with the current locale (LC_CTYPE) is used.
4057 If the operation was successful, mconv_buffer_converter () returns
4058 the created code converter. Otherwise it returns @c NULL and
4059 assigns an error code to the external variable #merror_code. */
4062 @brief ¥Ð¥Ã¥Õ¥¡¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤òºî¤ë.
4064 ´Ø¿ô mconv_buffer_converter () ¤Ï¡¢¥³¡¼¥É·Ï $NAME
4065 ÍѤΥ³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤òºî¤ë¡£¤³¤Î¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ï¡¢$BUF ¤Ç¼¨¤µ¤ì¤ëÂ礤µ $N
4066 ¥Ð¥¤¥È¤Î¥Ð¥Ã¥Õ¥¡Îΰè¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤ë¡£
4067 ¤³¤ì°Ê¹ß¤Î¥Ç¥³¡¼¥É¤ª¤è¤Ó¥¨¥ó¥³¡¼¥É¤Ï¡¢¤³¤Î¥Ð¥Ã¥Õ¥¡Îΰè¤ËÂФ·¤Æ¹Ô¤Ê¤ï¤ì¤ë¡£
4069 $NAME ¤Ï #Mnil ¤Ç¤¢¤Ã¤Æ¤â¤è¤¤¡£¤³¤Î¾ì¹ç¤Ï¸½ºß¤Î¥í¥±¡¼¥ë
4070 (LC_CTYPE) ¤Ë´ØÏ¢ÉÕ¤±¤é¤ì¤¿¥³¡¼¥É·Ï¤¬»È¤ï¤ì¤ë¡£
4073 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð mconv_buffer_converter () ¤Ï ºîÀ®¤·¤¿¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤òÊÖ¤¹¡£
4074 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
4075 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
4077 @latexonly \IPAlabel{mconverter} @endlatexonly */
4081 @c MERROR_SYMBOL, @c MERROR_CODING
4084 mconv_stream_converter () */
4087 mconv_buffer_converter (MSymbol name, const unsigned char *buf, int n)
4089 MCodingSystem *coding;
4090 MConverter *converter;
4091 MConverterStatus *internal;
4094 name = mlocale_get_prop (mlocale__ctype, Mcoding);
4095 coding = find_coding (name);
4097 MERROR (MERROR_CODING, NULL);
4098 MSTRUCT_CALLOC (converter, MERROR_CODING);
4099 MSTRUCT_CALLOC (internal, MERROR_CODING);
4100 converter->internal_info = internal;
4101 internal->coding = coding;
4102 if (coding->resetter
4103 && (*coding->resetter) (converter) < 0)
4107 MERROR (MERROR_CODING, NULL);
4110 internal->unread = mtext ();
4111 internal->work_mt = mtext ();
4112 mtext__enlarge (internal->work_mt, MAX_UTF8_CHAR_BYTES);
4113 internal->buf.in = buf;
4115 internal->bufsize = n;
4116 internal->binding = BINDING_BUFFER;
4124 @brief Create a code converter bound to a stream.
4126 The mconv_stream_converter () function creates a pointer to a code
4127 converter for coding system $NAME. The code converter is bound
4128 to stream $FP. Subsequent decodings and encodings are done
4129 to/from this stream.
4131 $NAME can be #Mnil. In this case, a coding system associated
4132 with the current locale (LC_CTYPE) is used.
4134 @return If the operation was successful, mconv_stream_converter ()
4135 returns the created code converter. Otherwise it returns @c NULL
4136 and assigns an error code to the external variable
4140 @brief ¥¹¥È¥ê¡¼¥à¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤òºî¤ë.
4142 ´Ø¿ô mconv_stream_converter () ¤Ï¡¢¥³¡¼¥É·Ï $NAME
4143 ÍѤΥ³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤òºî¤ë¡£¤³¤Î¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ï¡¢¥¹¥È¥ê¡¼¥à $FP
4145 ¤³¤ì°Ê¹ß¤Î¥Ç¥³¡¼¥É¤ª¤è¤Ó¥¨¥ó¥³¡¼¥É¤Ï¡¢¤³¤Î¥¹¥È¥ê¡¼¥à¤ËÂФ·¤Æ¹Ô¤Ê¤ï¤ì¤ë¡£
4147 $NAME ¤Ï #Mnil ¤Ç¤¢¤Ã¤Æ¤â¤è¤¤¡£¤³¤Î¾ì¹ç¤Ï¸½ºß¤Î¥í¥±¡¼¥ë
4148 (LC_CTYPE) ¤Ë´ØÏ¢ÉÕ¤±¤é¤ì¤¿¥³¡¼¥É·Ï¤¬»È¤ï¤ì¤ë¡£
4151 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_stream_converter ()
4152 ¤ÏºîÀ®¤·¤¿¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL
4153 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
4155 @latexonly \IPAlabel{mconverter} @endlatexonly */
4159 @c MERROR_SYMBOL, @c MERROR_CODING
4162 mconv_buffer_converter () */
4165 mconv_stream_converter (MSymbol name, FILE *fp)
4167 MCodingSystem *coding;
4168 MConverter *converter;
4169 MConverterStatus *internal;
4172 name = mlocale_get_prop (mlocale__ctype, Mcoding);
4173 coding = find_coding (name);
4175 MERROR (MERROR_CODING, NULL);
4176 MSTRUCT_CALLOC (converter, MERROR_CODING);
4177 MSTRUCT_CALLOC (internal, MERROR_CODING);
4178 converter->internal_info = internal;
4179 internal->coding = coding;
4180 if (coding->resetter
4181 && (*coding->resetter) (converter) < 0)
4185 MERROR (MERROR_CODING, NULL);
4188 if (fseek (fp, 0, SEEK_CUR) < 0)
4196 internal->seekable = 0;
4199 internal->seekable = 1;
4200 internal->unread = mtext ();
4201 internal->work_mt = mtext ();
4202 mtext__enlarge (internal->work_mt, MAX_UTF8_CHAR_BYTES);
4204 internal->binding = BINDING_STREAM;
4212 @brief Reset a code converter.
4214 The mconv_reset_converter () function resets code converter
4215 $CONVERTER to the initial state.
4218 If $CONVERTER->coding has its own reseter function,
4219 mconv_reset_converter () returns the result of that function
4220 applied to $CONVERTER. Otherwise it returns 0. */
4223 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ò¥ê¥»¥Ã¥È¤¹¤ë.
4225 ´Ø¿ô mconv_reset_converter () ¤Ï¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER
4229 ¤â¤· $CONVERTER->coding ¤Ë¥ê¥»¥Ã¥ÈÍѤδؿô¤¬ÄêµÁ¤µ¤ì¤Æ¤¤¤ë¤Ê¤é¤Ð¡¢
4230 mconv_reset_converter () ¤Ï¤½¤Î´Ø¿ô¤Ë $CONVERTER
4231 ¤òŬÍѤ·¤¿·ë²Ì¤òÊÖ¤·¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð0¤òÊÖ¤¹¡£ */
4234 mconv_reset_converter (MConverter *converter)
4236 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4238 converter->nchars = converter->nbytes = 0;
4239 converter->result = MCONVERSION_RESULT_SUCCESS;
4240 internal->carryover_bytes = 0;
4241 mtext_reset (internal->unread);
4242 if (internal->coding->resetter)
4243 return (*internal->coding->resetter) (converter);
4250 @brief Free a code converter.
4252 The mconv_free_converter () function frees the code converter
4256 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ò²òÊü¤¹¤ë.
4258 ´Ø¿ô mconv_free_converter () ¤Ï¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER
4262 mconv_free_converter (MConverter *converter)
4264 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4266 M17N_OBJECT_UNREF (internal->work_mt);
4267 M17N_OBJECT_UNREF (internal->unread);
4275 @brief Bind a buffer to a code converter.
4277 The mconv_rebind_buffer () function binds buffer area of $N bytes
4278 pointed to by $BUF to code converter $CONVERTER. Subsequent
4279 decodings and encodings are done to/from this newly bound buffer
4283 This function always returns $CONVERTER. */
4286 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ë¥Ð¥Ã¥Õ¥¡Îΰè¤ò·ë¤ÓÉÕ¤±¤ë.
4288 ´Ø¿ô mconv_rebind_buffer () ¤Ï¡¢$BUF ¤Ë¤è¤Ã¤Æ»Ø¤µ¤ì¤¿Â礤µ $N
4289 ¥Ð¥¤¥È¤Î¥Ð¥Ã¥Õ¥¡Îΰè¤ò¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER ¤Ë·ë¤ÓÉÕ¤±¤ë¡£
4290 ¤³¤ì°Ê¹ß¤Î¥Ç¥³¡¼¥É¤ª¤è¤Ó¥¨¥ó¥³¡¼¥É¤Ï¡¢¤³¤Î¿·¤¿¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥Ð¥Ã¥Õ¥¡Îΰè¤ËÂФ·¤Æ¹Ô¤Ê¤ï¤ì¤ë¤è¤¦¤Ë¤Ê¤ë¡£
4293 ¤³¤Î´Ø¿ô¤Ï¾ï¤Ë $CONVERTER ¤òÊÖ¤¹¡£
4295 @latexonly \IPAlabel{mconv_rebind_buffer} @endlatexonly */
4299 mconv_rebind_stream () */
4302 mconv_rebind_buffer (MConverter *converter, const unsigned char *buf, int n)
4304 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4306 internal->buf.in = buf;
4308 internal->bufsize = n;
4309 internal->binding = BINDING_BUFFER;
4316 @brief Bind a stream to a code converter.
4318 The mconv_rebind_stream () function binds stream $FP to code
4319 converter $CONVERTER. Following decodings and encodings are done
4320 to/from this newly bound stream.
4323 This function always returns $CONVERTER. */
4326 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ë¥¹¥È¥ê¡¼¥à¤ò·ë¤ÓÉÕ¤±¤ë.
4328 ´Ø¿ô mconv_rebind_stream () ¤Ï¡¢¥¹¥È¥ê¡¼¥à $FP ¤ò¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿
4329 $CONVERTER ¤Ë·ë¤ÓÉÕ¤±¤ë¡£
4330 ¤³¤ì°Ê¹ß¤Î¥Ç¥³¡¼¥É¤ª¤è¤Ó¥¨¥ó¥³¡¼¥É¤Ï¡¢¤³¤Î¿·¤¿¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥¹¥È¥ê¡¼¥à¤ËÂФ·¤Æ¹Ô¤Ê¤ï¤ì¤ë¤è¤¦¤Ë¤Ê¤ë¡£
4333 ¤³¤Î´Ø¿ô¤Ï¾ï¤Ë $CONVERTER ¤òÊÖ¤¹¡£
4335 @latexonly \IPAlabel{mconv_rebind_stream} @endlatexonly */
4339 mconv_rebind_buffer () */
4342 mconv_rebind_stream (MConverter *converter, FILE *fp)
4344 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4346 if (fseek (fp, 0, SEEK_CUR) < 0)
4350 internal->seekable = 0;
4353 internal->seekable = 1;
4355 internal->binding = BINDING_STREAM;
4362 @brief Decode a byte sequence into an M-text.
4364 The mconv_decode () function decodes a byte sequence and appends
4365 the result at the end of M-text $MT. The source byte sequence is
4366 taken from either the buffer area or the stream that is currently
4367 bound to $CONVERTER.
4370 If the operation was successful, mconv_decode () returns updated
4371 $MT. Otherwise it returns @c NULL and assigns an error code to
4372 the external variable #merror_code. */
4375 @brief ¥Ð¥¤¥ÈÎó¤ò M-text ¤Ë¥Ç¥³¡¼¥É¤¹¤ë.
4377 ´Ø¿ô mconv_decode () ¤Ï¡¢¥Ð¥¤¥ÈÎó¤ò¥Ç¥³¡¼¥É¤·¤Æ¤½¤Î·ë²Ì¤ò M-text
4378 $MT ¤ÎËöÈø¤ËÄɲ乤롣¥Ç¥³¡¼¥É¸µ¤Î¥Ð¥¤¥ÈÎó¤Ï¡¢$CONVERTER
4379 ¤Ë¸½ºß·ë¤ÓÉÕ¤±¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤«¤é¼è¤é¤ì¤ë¡£
4382 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_decode () ¤Ï¹¹¿·¤µ¤ì¤¿ $MT ¤òÊÖ¤¹¡£
4383 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
4384 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4388 @c MERROR_IO, @c MERROR_CODING
4391 mconv_rebind_buffer (), mconv_rebind_stream (),
4392 mconv_encode (), mconv_encode_range (),
4393 mconv_decode_buffer (), mconv_decode_stream () */
4396 mconv_decode (MConverter *converter, MText *mt)
4398 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4399 int at_most = converter->at_most > 0 ? converter->at_most : -1;
4402 M_CHECK_READONLY (mt, NULL);
4404 if (mt->format != MTEXT_FORMAT_UTF_8)
4405 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
4408 mtext__enlarge (mt, MAX_UTF8_CHAR_BYTES);
4410 converter->nchars = converter->nbytes = 0;
4411 converter->result = MCONVERSION_RESULT_SUCCESS;
4413 n = mtext_nchars (internal->unread);
4419 if (at_most > 0 && at_most < limit)
4422 for (i = 0, n -= 1; i < limit; i++, converter->nchars++, n--)
4423 mtext_cat_char (mt, mtext_ref_char (internal->unread, n));
4424 mtext_del (internal->unread, n + 1, internal->unread->nchars);
4427 if (at_most == limit)
4429 converter->at_most -= converter->nchars;
4433 if (internal->binding == BINDING_BUFFER)
4435 (*internal->coding->decoder) (internal->buf.in + internal->used,
4436 internal->bufsize - internal->used,
4438 internal->used += converter->nbytes;
4440 else if (internal->binding == BINDING_STREAM)
4442 unsigned char work[CONVERT_WORKSIZE];
4443 int last_block = converter->last_block;
4444 int use_fread = at_most < 0 && internal->seekable;
4446 converter->last_block = 0;
4449 int nbytes, prev_nbytes;
4451 if (feof (internal->fp))
4454 nbytes = fread (work, sizeof (unsigned char), CONVERT_WORKSIZE,
4458 int c = getc (internal->fp);
4461 work[0] = c, nbytes = 1;
4466 if (ferror (internal->fp))
4468 converter->result = MCONVERSION_RESULT_IO_ERROR;
4473 converter->last_block = last_block;
4474 prev_nbytes = converter->nbytes;
4475 (*internal->coding->decoder) (work, nbytes, mt, converter);
4476 if (converter->nbytes - prev_nbytes < nbytes)
4479 fseek (internal->fp, converter->nbytes - prev_nbytes - nbytes,
4482 ungetc (work[0], internal->fp);
4486 || (converter->at_most > 0
4487 && converter->nchars == converter->at_most))
4490 converter->last_block = last_block;
4492 else /* internal->binding == BINDING_NONE */
4493 MERROR (MERROR_CODING, NULL);
4495 converter->at_most = at_most;
4496 return ((converter->result == MCONVERSION_RESULT_SUCCESS
4497 || converter->result == MCONVERSION_RESULT_INSUFFICIENT_SRC)
4504 @brief Decode a buffer area based on a coding system.
4506 The mconv_decode_buffer () function decodes $N bytes of the buffer
4507 area pointed to by $BUF based on the coding system $NAME. A
4508 temporary code converter for decoding is automatically created
4512 If the operation was successful, mconv_decode_buffer ()
4513 returns the resulting M-text. Otherwise it returns @c NULL and
4514 assigns an error code to the external variable #merror_code. */
4517 @brief ¥³¡¼¥É·Ï¤Ë´ð¤Å¤¤¤Æ¥Ð¥Ã¥Õ¥¡Îΰè¤ò¥Ç¥³¡¼¥É¤¹¤ë.
4519 ´Ø¿ô mconv_decode_buffer () ¤Ï¡¢$BUF ¤Ë¤è¤Ã¤Æ»Ø¤µ¤ì¤¿ $N
4520 ¥Ð¥¤¥È¤Î¥Ð¥Ã¥Õ¥¡Îΰè¤ò¡¢¥³¡¼¥É·Ï $NAME ¤Ë´ð¤Å¤¤¤Æ¥Ç¥³¡¼¥É¤¹¤ë¡£
4521 ¥Ç¥³¡¼¥É¤ËɬÍפʥ³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ÎºîÀ®¤È²òÊü¤Ï¼«Æ°Åª¤Ë¹Ô¤Ê¤ï¤ì¤ë¡£
4524 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_decode_buffer () ¤ÏÆÀ¤é¤ì¤¿ M-text ¤òÊÖ¤¹¡£
4525 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
4526 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4530 @c MERROR_IO, @c MERROR_CODING
4533 mconv_decode (), mconv_decode_stream () */
4536 mconv_decode_buffer (MSymbol name, const unsigned char *buf, int n)
4538 MConverter *converter = mconv_buffer_converter (name, buf, n);
4544 if (! mconv_decode (converter, mt))
4546 M17N_OBJECT_UNREF (mt);
4549 mconv_free_converter (converter);
4556 @brief Decode a stream input based on a coding system.
4558 The mconv_decode_stream () function decodes the entire byte
4559 sequence read in from stream $FP based on the coding system $NAME.
4560 A code converter for decoding is automatically created and freed.
4563 If the operation was successful, mconv_decode_stream () returns
4564 the resulting M-text. Otherwise it returns @c NULL and assigns an
4565 error code to the external variable #merror_code. */
4568 @brief ¥³¡¼¥É·Ï¤Ë´ð¤Å¤¤¤Æ¥¹¥È¥ê¡¼¥àÆþÎϤò¥Ç¥³¡¼¥É¤¹¤ë.
4570 ´Ø¿ô mconv_decode_stream () ¤Ï¡¢¥¹¥È¥ê¡¼¥à $FP
4571 ¤«¤éÆɤ߹þ¤Þ¤ì¤ë¥Ð¥¤¥ÈÎóÁ´ÂΤò¡¢¥³¡¼¥É·Ï $NAME
4572 ¤Ë´ð¤Å¤¤¤Æ¥Ç¥³¡¼¥É¤¹¤ë¡£¥Ç¥³¡¼¥É¤ËɬÍפʥ³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ÎºîÀ®¤È²òÊü¤Ï¼«Æ°Åª¤Ë¹Ô¤Ê¤ï¤ì¤ë¡£
4575 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_decode_stream () ¤ÏÆÀ¤é¤ì¤¿ M-text
4576 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
4577 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4581 @c MERROR_IO, @c MERROR_CODING
4584 mconv_decode (), mconv_decode_buffer () */
4587 mconv_decode_stream (MSymbol name, FILE *fp)
4589 MConverter *converter = mconv_stream_converter (name, fp);
4595 if (! mconv_decode (converter, mt))
4597 M17N_OBJECT_UNREF (mt);
4600 mconv_free_converter (converter);
4606 /***en @brief Encode an M-text into a byte sequence.
4608 The mconv_encode () function encodes M-text $MT and writes the
4609 resulting byte sequence into the buffer area or the stream that is
4610 currently bound to code converter $CONVERTER.
4613 If the operation was successful, mconv_encode () returns the
4614 number of written bytes. Otherwise it returns -1 and assigns an
4615 error code to the external variable #merror_code. */
4618 @brief M-text ¤ò¥Ð¥¤¥ÈÎó¤Ë¥¨¥ó¥³¡¼¥É¤¹¤ë.
4620 ´Ø¿ô mconv_encode () ¤Ï¡¢M-text $MT ¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿
4621 $CONVERTER ¤Ë¸½ºß·ë¤ÓÉÕ¤±¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤ËÆÀ¤é¤ì¤¿¥Ð¥¤¥ÈÎó¤ò½ñ¤¹þ¤à¡£
4624 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_encode () ¤Ï½ñ¤¹þ¤Þ¤ì¤¿¥Ð¥¤¥È¿ô¤òÊÖ¤¹¡£
4625 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
4626 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4630 @c MERROR_IO, @c MERROR_CODING
4633 mconv_rebind_buffer (), mconv_rebind_stream(),
4634 mconv_decode (), mconv_encode_range () */
4637 mconv_encode (MConverter *converter, MText *mt)
4639 return mconv_encode_range (converter, mt, 0, mtext_nchars (mt));
4645 @brief Encode a part of an M-text.
4647 The mconv_encode_range () function encodes the text between $FROM
4648 (inclusive) and $TO (exclusive) in M-text $MT and writes the
4649 resulting byte sequence into the buffer area or the stream that is
4650 currently bound to code converter $CONVERTER.
4653 If the operation was successful, mconv_encode_range () returns the
4654 number of written bytes. Otherwise it returns -1 and assigns an
4655 error code to the external variable #merror_code. */
4658 @brief M-text ¤Î°ìÉô¤ò¥Ð¥¤¥ÈÎó¤Ë¥¨¥ó¥³¡¼¥É¤¹¤ë.
4660 ´Ø¿ô mconv_encode_range () ¤Ï¡¢M-text $MT ¤Î $FROM
4661 ¡Ê$FROM ¼«ÂΤâ´Þ¤à¡Ë¤«¤é $TO ¡Ê$TO¼«ÂΤϴޤޤʤ¤¡Ë
4662 ¤Þ¤Ç¤ÎÈϰϤΥƥ¥¹¥È¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿
4663 $CONVERTER ¤Ë¸½ºß·ë¤ÓÉÕ¤±¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤ËÆÀ¤é¤ì¤¿¥Ð¥¤¥ÈÎó¤ò½ñ¤¹þ¤à¡£
4666 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_encode_range ()
4667 ¤Ï½ñ¤¹þ¤Þ¤ì¤¿¥Ð¥¤¥È¿ô¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
4668 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4672 @c MERROR_RANGE, @c MERROR_IO, @c MERROR_CODING
4675 mconv_rebind_buffer (), mconv_rebind_stream(),
4676 mconv_decode (), mconv_encode () */
4679 mconv_encode_range (MConverter *converter, MText *mt, int from, int to)
4681 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4683 M_CHECK_POS_X (mt, from, -1);
4684 M_CHECK_POS_X (mt, to, -1);
4688 if (converter->at_most > 0 && from + converter->at_most < to)
4689 to = from + converter->at_most;
4691 converter->nchars = converter->nbytes = 0;
4692 converter->result = MCONVERSION_RESULT_SUCCESS;
4694 mtext_put_prop (mt, from, to, Mcoding, internal->coding->name);
4695 if (internal->binding == BINDING_BUFFER)
4697 (*internal->coding->encoder) (mt, from, to,
4698 internal->buf.out + internal->used,
4699 internal->bufsize - internal->used,
4701 internal->used += converter->nbytes;
4703 else if (internal->binding == BINDING_STREAM)
4705 unsigned char work[CONVERT_WORKSIZE];
4710 int prev_nbytes = converter->nbytes;
4713 (*internal->coding->encoder) (mt, from, to, work,
4714 CONVERT_WORKSIZE, converter);
4715 this_nbytes = converter->nbytes - prev_nbytes;
4716 while (written < this_nbytes)
4718 int wrtn = fwrite (work + written, sizeof (unsigned char),
4719 this_nbytes - written, internal->fp);
4721 if (ferror (internal->fp))
4725 if (written < this_nbytes)
4727 converter->result = MCONVERSION_RESULT_IO_ERROR;
4730 from += converter->nchars;
4733 else /* fail safe */
4734 MERROR (MERROR_CODING, -1);
4736 return ((converter->result == MCONVERSION_RESULT_SUCCESS
4737 || converter->result == MCONVERSION_RESULT_INSUFFICIENT_DST)
4738 ? converter->nbytes : -1);
4744 @brief Encode an M-text into a buffer area.
4746 The mconv_encode_buffer () function encodes M-text $MT based on
4747 coding system $NAME and writes the resulting byte sequence into the
4748 buffer area pointed to by $BUF. At most $N bytes are written. A
4749 temporary code converter for encoding is automatically created
4753 If the operation was successful, mconv_encode_buffer () returns
4754 the number of written bytes. Otherwise it returns -1 and assigns
4755 an error code to the external variable #merror_code. */
4758 @brief M-text ¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¥Ð¥Ã¥Õ¥¡Îΰè¤Ë½ñ¤¹þ¤à.
4760 ´Ø¿ô mconv_encode_buffer () ¤ÏM-text $MT ¤ò¥³¡¼¥É·Ï $NAME
4761 ¤Ë´ð¤Å¤¤¤Æ¥¨¥ó¥³¡¼¥É¤·¡¢ÆÀ¤é¤ì¤¿¥Ð¥¤¥ÈÎó¤ò $BUF ¤Î»Ø¤¹¥Ð¥Ã¥Õ¥¡Îΰè¤Ë½ñ¤¹þ¤à¡£
4762 $N ¤Ï½ñ¤¹þ¤àºÇÂç¥Ð¥¤¥È¿ô¤Ç¤¢¤ë¡£
4763 ¥¨¥ó¥³¡¼¥É¤ËɬÍפʥ³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ÎºîÀ®¤È²òÊü¤Ï¼«Æ°Åª¤Ë¹Ô¤Ê¤ï¤ì¤ë¡£
4766 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_encode_buffer () ¤Ï½ñ¤¹þ¤Þ¤ì¤¿¥Ð¥¤¥È¿ô¤òÊÖ¤¹¡£
4767 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð-1¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4771 @c MERROR_IO, @c MERROR_CODING
4774 mconv_encode (), mconv_encode_stream () */
4777 mconv_encode_buffer (MSymbol name, MText *mt, unsigned char *buf, int n)
4779 MConverter *converter = mconv_buffer_converter (name, buf, n);
4784 ret = mconv_encode (converter, mt);
4785 mconv_free_converter (converter);
4792 @brief Encode an M-text to write to a stream.
4794 The mconv_encode_stream () function encodes M-text $MT based on
4795 coding system $NAME and writes the resulting byte sequence to
4796 stream $FP. A temporary code converter for encoding is
4797 automatically created and freed.
4800 If the operation was successful, mconv_encode_stream () returns
4801 the number of written bytes. Otherwise it returns -1 and assigns
4802 an error code to the external variable #merror_code. */
4805 @brief M-text ¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¥¹¥È¥ê¡¼¥à¤Ë½ñ¤¹þ¤à.
4807 ´Ø¿ô mconv_encode_stream () ¤ÏM-text $MT ¤ò¥³¡¼¥É·Ï $NAME
4808 ¤Ë´ð¤Å¤¤¤Æ¥¨¥ó¥³¡¼¥É¤·¡¢ÆÀ¤é¤ì¤¿¥Ð¥¤¥ÈÎó¤ò¥¹¥È¥ê¡¼¥à $FP
4809 ¤Ë½ñ¤½Ð¤¹¡£¥¨¥ó¥³¡¼¥É¤ËɬÍפʥ³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ÎºîÀ®¤È²òÊü¤Ï¼«Æ°Åª¤Ë¹Ô¤Ê¤ï¤ì¤ë¡£
4812 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_encode_stream ()
4813 ¤Ï½ñ¤¹þ¤Þ¤ì¤¿¥Ð¥¤¥È¿ô¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1
4814 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4818 @c MERROR_IO, @c MERROR_CODING
4821 mconv_encode (), mconv_encode_buffer (), mconv_encode_file () */
4824 mconv_encode_stream (MSymbol name, MText *mt, FILE *fp)
4826 MConverter *converter = mconv_stream_converter (name, fp);
4831 ret = mconv_encode (converter, mt);
4832 mconv_free_converter (converter);
4839 @brief Read a character via a code converter.
4841 The mconv_getc () function reads one character from the buffer
4842 area or the stream that is currently bound to code converter
4843 $CONVERTER. The decoder of $CONVERTER is used to decode the byte
4844 sequence. The internal status of $CONVERTER is updated
4848 If the operation was successful, mconv_getc () returns the
4849 character read in. If the input source reaches EOF, it returns @c
4850 EOF without changing the external variable #merror_code. If an
4851 error is detected, it returns @c EOF and assigns an error code to
4855 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿·Ðͳ¤Ç°ìʸ»ú¤òÆɤߤ³¤à.
4857 ´Ø¿ô mconv_getc () ¤Ï¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER
4858 ¤Ë¸½ºß·ë¤ÓÉÕ¤±¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤«¤éʸ»ú¤ò°ì¤ÄÆɤ߹þ¤à¡£
4859 ¥Ð¥¤¥ÈÎó¤Î¥Ç¥³¡¼¥É¤Ë¤Ï $CONVERTER ¤Î¥Ç¥³¡¼¥À¤¬ÍѤ¤¤é¤ì¤ë¡£
4860 $CONVERTER ¤ÎÆâÉô¾õÂÖ¤ÏɬÍפ˱þ¤¸¤Æ¹¹¿·¤µ¤ì¤ë¡£
4863 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_getc () ¤ÏÆɤ߹þ¤Þ¤ì¤¿Ê¸»ú¤òÊÖ¤¹¡£ÆþÎϸ»¤¬
4864 EOF ¤Ë㤷¤¿¾ì¹ç¤Ï¡¢³°ÉôÊÑ¿ô #merror_code ¤òÊѤ¨¤º¤Ë @c EOF
4865 ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c EOF ¤òÊÖ¤·¡¢#merror_code
4866 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4873 mconv_ungetc (), mconv_putc (), mconv_gets () */
4876 mconv_getc (MConverter *converter)
4878 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4879 int at_most = converter->at_most;
4881 mtext_reset (internal->work_mt);
4882 converter->at_most = 1;
4883 mconv_decode (converter, internal->work_mt);
4884 converter->at_most = at_most;
4885 return (converter->nchars == 1
4886 ? STRING_CHAR (internal->work_mt->data)
4893 @brief Push a character back to a code converter.
4895 The mconv_ungetc () function pushes character $C back to code
4896 converter $CONVERTER. Any number of characters can be pushed
4897 back. The lastly pushed back character is firstly read by the
4898 subsequent mconv_getc () call. The characters pushed back are
4899 registered only in $CONVERTER; they are not written to the input
4900 source. The internal status of $CONVERTER is updated
4904 If the operation was successful, mconv_ungetc () returns $C.
4905 Otherwise it returns @c EOF and assigns an error code to the
4906 external variable #merror_code. */
4909 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ë°ìʸ»úÌ᤹.
4911 ´Ø¿ô mconv_ungetc () ¤Ï¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER ¤Ëʸ»ú $C
4912 ¤ò²¡¤·Ì᤹¡£Ìᤵ¤ì¤ëʸ»ú¿ô¤ËÀ©¸Â¤Ï¤Ê¤¤¡£¤³¤Î¸å¤Ç mconv_getc ()
4913 ¤ò¸Æ¤Ó½Ð¤·¤¿ºÝ¤Ë¤Ï¡¢ºÇ¸å¤ËÌᤵ¤ì¤¿Ê¸»ú¤¬ºÇ½é¤ËÆɤޤì¤ë¡£Ìᤵ¤ì¤¿Ê¸»ú¤Ï
4914 $CONVERTER ¤ÎÆâÉô¤ËÃߤ¨¤é¤ì¤ë¤À¤±¤Ç¤¢¤ê¡¢¼ÂºÝ¤ËÆþÎϸ»¤Ë½ñ¤¹þ¤Þ¤ì¤ë¤ï¤±¤Ç¤Ï¤Ê¤¤¡£
4915 $CONVERTER ¤ÎÆâÉô¾õÂÖ¤ÏɬÍפ˱þ¤¸¤Æ¹¹¿·¤µ¤ì¤ë¡£
4918 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_ungetc () ¤Ï $C ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c
4919 EOF ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4923 @c MERROR_CODING, @c MERROR_CHAR
4926 mconv_getc (), mconv_putc (), mconv_gets () */
4929 mconv_ungetc (MConverter *converter, int c)
4931 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4933 M_CHECK_CHAR (c, EOF);
4935 converter->result = MCONVERSION_RESULT_SUCCESS;
4936 mtext_cat_char (internal->unread, c);
4943 @brief Write a character via a code converter.
4945 The mconv_putc () function writes character $C to the buffer area
4946 or the stream that is currently bound to code converter
4947 $CONVERTER. The encoder of $CONVERTER is used to encode the
4948 character. The number of bytes actually written is set to the @c
4949 nbytes member of $CONVERTER. The internal status of $CONVERTER
4950 is updated appropriately.
4953 If the operation was successful, mconv_putc () returns $C.
4954 If an error is detected, it returns @c EOF and assigns
4955 an error code to the external variable #merror_code. */
4958 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ò·Ðͳ¤·¤Æ°ìʸ»ú½ñ¤½Ð¤¹.
4960 ´Ø¿ô mconv_putc () ¤Ï¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER
4961 ¤Ë¸½ºß·ë¤ÓÉÕ¤±¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤Ëʸ»ú $C
4962 ¤ò½ñ¤½Ð¤¹¡£Ê¸»ú¤Î¥¨¥ó¥³¡¼¥É¤Ë¤Ï $CONVERTER
4963 ¤Î¥¨¥ó¥³¡¼¥À¤¬ÍѤ¤¤é¤ì¤ë¡£¼ÂºÝ¤Ë½ñ¤½Ð¤µ¤ì¤¿¥Ð¥¤¥È¿ô¤Ï¡¢$CONVERTER ¤Î¥á¥ó¥Ð¡¼
4964 @c nbytes ¤Ë¥»¥Ã¥È¤µ¤ì¤ë¡£$CONVERTER ¤ÎÆâÉô¾õÂÖ¤ÏɬÍפ˱þ¤¸¤Æ¹¹¿·¤µ¤ì¤ë¡£
4967 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_putc () ¤Ï $C ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï
4968 @c EOF ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4972 @c MERROR_CODING, @c MERROR_IO, @c MERROR_CHAR
4975 mconv_getc (), mconv_ungetc (), mconv_gets () */
4978 mconv_putc (MConverter *converter, int c)
4980 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4982 M_CHECK_CHAR (c, EOF);
4983 mtext_reset (internal->work_mt);
4984 mtext_cat_char (internal->work_mt, c);
4985 if (mconv_encode_range (converter, internal->work_mt, 0, 1) < 0)
4993 @brief Read a line using a code converter.
4995 The mconv_gets () function reads one line from the buffer area or
4996 the stream that is currently bound to code converter $CONVERTER.
4997 The decoder of $CONVERTER is used for decoding. The decoded
4998 character sequence is appended at the end of M-text $MT. The
4999 final newline character in the original byte sequence is not
5000 appended. The internal status of $CONVERTER is updated
5004 If the operation was successful, mconv_gets () returns the
5005 modified $MT. If it encounters EOF without reading a single
5006 character, it returns $MT without changing it. If an error is
5007 detected, it returns @c NULL and assigns an error code to
5011 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ò»È¤Ã¤Æ°ì¹ÔÆɤ߹þ¤à.
5013 ´Ø¿ô mconv_gets () ¤Ï¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER
5014 ¤Ë¸½ºß·ë¤ÓÉÕ¤±¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤«¤é 1 ¹Ô¤òÆɤ߹þ¤à¡£
5015 ¥Ð¥¤¥ÈÎó¤Î¥Ç¥³¡¼¥É¤Ë¤Ï $CONVERTER
5016 ¤Î¥Ç¥³¡¼¥À¤¬ÍѤ¤¤é¤ì¤ë¡£¥Ç¥³¡¼¥É¤µ¤ì¤¿Ê¸»úÎó¤Ï M-text $MT
5017 ¤ÎËöÈø¤ËÄɲ䵤ì¤ë¡£¸µ¤Î¥Ð¥¤¥ÈÎó¤Î½ªÃ¼²þ¹Ôʸ»ú¤ÏÄɲ䵤ì¤Ê¤¤¡£
5018 $CONVERTER ¤ÎÆâÉô¾õÂÖ¤ÏɬÍפ˱þ¤¸¤Æ¹¹¿·¤µ¤ì¤ë¡£
5021 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_gets () ¤ÏÊѹ¹¤µ¤ì¤¿ $MT
5022 ¤òÊÖ¤¹¡£¤â¤·1ʸ»ú¤âÆɤޤº¤Ë EOF ¤ËÁø¶ø¤·¤¿¾ì¹ç¤Ï¡¢$MT
5023 ¤òÊѹ¹¤»¤º¤Ë¤½¤Î¤Þ¤ÞÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢
5024 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
5031 mconv_getc (), mconv_ungetc (), mconv_putc () */
5034 mconv_gets (MConverter *converter, MText *mt)
5038 M_CHECK_READONLY (mt, NULL);
5039 if (mt->format != MTEXT_FORMAT_UTF_8)
5040 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
5044 c = mconv_getc (converter);
5045 if (c == EOF || c == '\n')
5047 mtext_cat_char (mt, c);
5049 if (c == EOF && converter->result != MCONVERSION_RESULT_SUCCESS)
5050 /* mconv_getc () sets #merror_code */