1 /* coding.c -- code conversion module.
2 Copyright (C) 2003, 2004
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 @brief Coding system objects and API for them.
27 The m17n library represents a character encoding scheme (CES) of
28 coded character sets (CCS) as an object called @e coding @e
29 system. Application programs can add original coding systems.
31 To @e encode means converting code-points to character codes and
32 to @e decode means converting character codes back to code-points.
34 Application programs can decode a byte sequence with a specified
35 coding system into an M-text, and inversely, can encode an M-text
36 into a byte sequence. */
40 @brief ¥³¡¼¥É·Ï¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API.
42 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢Éä¹æ²½Ê¸»ú½¸¹ç (coded character set; CCS) ¤Îʸ
43 »úÉä¹ç²½Êý¼° (character encoding scheme; CES) ¤ò @e ¥³¡¼¥É·Ï ¤È¸Æ
44 ¤Ö¥ª¥Ö¥¸¥§¥¯¥È¤Çɽ¸½¤¹¤ë¡£¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥à¤ÏÆȼ«¤Ë¥³¡¼¥É
45 ·Ï¤òÄɲ乤뤳¤È¤â¤Ç¤¤ë¡£
47 ¥³¡¼¥É¥Ý¥¤¥ó¥È¤«¤éʸ»ú¥³¡¼¥É¤Ø¤ÎÊÑ´¹¤ò @e ¥¨¥ó¥³¡¼¥É ¤È¸Æ¤Ó¡¢Ê¸»ú
48 ¥³¡¼¥É¤«¤é¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ø¤ÎÊÑ´¹¤ò @e ¥Ç¥³¡¼¥É ¤È¸Æ¤Ö¡£
50 ¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥à¤Ï¡¢»ØÄꤵ¤ì¤¿¥³¡¼¥É·Ï¤Ç¥Ð¥¤¥ÈÎó¤ò¥Ç¥³¡¼
51 ¥É¤¹¤ë¤³¤È¤Ç M-text ¤òÆÀ¤ë¤³¤È¤¬¤Ç¤¤ë¡£¤Þ¤¿µÕ¤Ë¡¢»ØÄꤵ¤ì¤¿¥³¡¼¥É
52 ·Ï¤Ç M-text ¤ò¥¨¥ó¥³¡¼¥É¤·¤¹¤ë¤³¤È¤Ç¥Ð¥¤¥ÈÎó¤òÆÀ¤ë¤³¤È¤¬¤Ç¤¤ë¡£ */
56 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
57 /*** @addtogroup m17nInternal
65 #include <sys/types.h>
70 #include "m17n-misc.h"
73 #include "character.h"
80 #define NUM_SUPPORTED_CHARSETS 32
82 /** Structure for coding system object. */
86 /** Name of the coding system. */
89 /** Type of the coding system. */
92 /* Number of supported charsets. */
95 /** Array of supported charsets. */
96 MCharset *charsets[NUM_SUPPORTED_CHARSETS];
98 /** If non-NULL, function to call at the time of creating and
99 reseting a converter. */
100 int (*resetter) (MConverter *converter);
102 int (*decoder) (const unsigned char *str, int str_bytes, MText *mt,
103 MConverter *converter);
105 int (*encoder) (MText *mt, int from, int to,
106 unsigned char *str, int str_bytes,
107 MConverter *converter);
109 /** If non-zero, the coding system decode/encode ASCII characters as
111 int ascii_compatible;
113 /** Pointer to extra information given when the coding system is
114 defined. The meaning depends on <type>. */
117 /** Pointer to information referred on conversion. The meaning
118 depends on <type>. The value NULL means that the coding system
128 MCodingSystem **codings;
131 static struct MCodingList coding_list;
133 static MPlist *coding_definition_list;
137 Pointer to a structure of a coding system. */
139 ¥³¡¼¥É·Ï¤òɽ¤ï¤¹¥Ç¡¼¥¿¹½Â¤¤Ø¤Î¥Ý¥¤¥ó¥¿ */
140 MCodingSystem *coding;
143 Buffer for carryover bytes generated while decoding. */
145 ¥Ç¥³¡¼¥ÉÃæ¤Î¥¥ã¥ê¥£¥ª¡¼¥Ð¡¼¥Ð¥¤¥ÈÍѥХåե¡ */
146 unsigned char carryover[256];
149 Number of carryover bytes. */
151 ¥¥ã¥ê¥£¥ª¡¼¥Ð¡¼¥Ð¥¤¥È¿ô */
155 Beginning of the byte sequence bound to this converter. */
157 ¤³¤Î¥³¥ó¥Ð¡¼¥¿¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥Ð¥¤¥ÈÎó¤ÎÀèƬ°ÌÃÖ */
167 Number of bytes already consumed in buf. */
169 buf Æâ¤Ç¤¹¤Ç¤Ë¾ÃÈñ¤µ¤ì¤¿¥Ð¥¤¥È¿ô */
173 Stream bound to this converter. */
175 ¤³¤Î¥³¥ó¥Ð¡¼¥¿¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥¹¥È¥ê¡¼¥à */
179 Which of above two is in use. */
181 ¾åµ2¼Ô¤Î¤¤¤º¤ì¤¬»È¤ï¤ì¤Æ¤¤¤ë¤« */
201 /* Local macros and functions. */
203 /** At first, set SRC_BASE to SRC. Then check if we have already
204 produced AT_MOST chars. If so, set SRC_END to SRC, and jump to
205 source_end. Otherwise, get one more byte C from SRC. In that
206 case, if SRC == SRC_END, jump to the label source_end. */
208 #define ONE_MORE_BASE_BYTE(c) \
211 if (nchars == at_most) \
216 if (src == src_stop) \
218 if (src == src_end) \
220 src_base = src = source; \
221 if (src == src_end) \
223 src_stop = src_end; \
229 /** Get one more byte C from SRC. If SRC == SRC_END, jump to the
232 #define ONE_MORE_BYTE(c) \
234 if (src == src_stop) \
236 if (src == src_end) \
239 if (src == src_end) \
241 src_stop = src_end; \
247 #define REWIND_SRC_TO_BASE() \
249 if (src_base < source || src_base >= src_end) \
250 src_stop = internal->carryover + internal->carryover_bytes; \
255 /** Push back byte C to SRC. */
257 #define UNGET_ONE_BYTE(c) \
263 internal->carryover[0] = c; \
264 internal->carryover_bytes = 1; \
265 src = internal->carryover; \
266 src_stop = src + 1; \
271 /** Store multibyte representation of character C at DST and increment
272 DST to the next of the produced bytes. DST must be a pointer to
273 data area of M-text MT. If the produced bytes are going to exceed
274 DST_END, enlarge the data area of MT. */
276 #define EMIT_CHAR(c) \
278 int bytes = CHAR_BYTES (c); \
281 if (dst + bytes + 1 > dst_end) \
283 len = dst - mt->data; \
284 bytes = mt->allocated + bytes + (src_stop - src); \
285 mtext__enlarge (mt, bytes); \
286 dst = mt->data + len; \
287 dst_end = mt->data + mt->allocated; \
289 dst += CHAR_STRING (c, dst); \
294 /* Check if there is enough room to produce LEN bytes at DST. If not,
295 go to the label insufficient_destination. */
297 #define CHECK_DST(len) \
299 if (dst + (len) > dst_end) \
300 goto insufficient_destination; \
304 /** Take NUM_CHARS characters (NUM_BYTES bytes) already stored at
305 (MT->data + MT->nbytes) into MT, and put charset property on
306 them with CHARSET->name. */
308 #define TAKEIN_CHARS(mt, num_chars, num_bytes, charset) \
310 int chars = (num_chars); \
314 mtext__takein ((mt), chars, (num_bytes)); \
316 mtext_put_prop ((mt), (mt)->nchars - chars, (mt)->nchars, \
317 Mcharset, (void *) ((charset)->name)); \
322 #define SET_SRC(mt, format, from, to) \
324 if (format <= MTEXT_FORMAT_UTF_8) \
326 src = mt->data + POS_CHAR_TO_BYTE (mt, from); \
327 src_end = mt->data + POS_CHAR_TO_BYTE (mt, to); \
329 else if (format <= MTEXT_FORMAT_UTF_16BE) \
332 = mt->data + (sizeof (short)) * POS_CHAR_TO_BYTE (mt, from); \
334 = mt->data + (sizeof (short)) * POS_CHAR_TO_BYTE (mt, to); \
338 src = mt->data + (sizeof (int)) * from; \
339 src_end = mt->data + (sizeof (int)) * to; \
344 #define ONE_MORE_CHAR(c, bytes, format) \
346 if (src == src_end) \
348 if (format <= MTEXT_FORMAT_UTF_8) \
349 c = STRING_CHAR_AND_BYTES (src, bytes); \
350 else if (format <= MTEXT_FORMAT_UTF_16BE) \
352 c = mtext_ref_char (mt, from++); \
353 bytes = (sizeof (short)) * CHAR_UNITS_UTF16 (c); \
357 c = ((unsigned *) (mt->data))[from++]; \
358 bytes = sizeof (int); \
364 encode_unsupporeted_char (int c, unsigned char *dst, unsigned char *dst_end,
370 len = c < 0x10000 ? 8 : 10;
371 if (dst + len > dst_end)
374 mtext_put_prop (mt, pos, pos + 1, Mcoding, Mnil);
375 format = (c < 0xD800 ? "<U+%04X>"
376 : c < 0xE000 ? "<M+%04X>"
377 : c < 0x10000 ? "<U+%04X>"
378 : c < 0x110000 ? "<U+%06X>"
380 sprintf ((char *) dst, format, c);
386 /** Finish decoding of bytes at SOURCE (ending at SRC_END) into NCHARS
387 characters by CONVERTER into M-text MT. SRC is a pointer to the
388 not-yet processed bytes. ERROR is 1 iff an invalid byte was
392 finish_decoding (MText *mt, MConverter *converter, int nchars,
393 const unsigned char *source, const unsigned char *src_end,
394 const unsigned char *src,
397 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
400 internal->carryover_bytes = 0;
402 || (converter->last_block
403 && ! converter->lenient))
404 converter->result = MCONVERSION_RESULT_INVALID_BYTE;
405 else if (! converter->last_block)
407 unsigned char *dst = internal->carryover;
409 if (src < source || src > src_end)
411 dst += internal->carryover_bytes;
414 while (src < src_end)
416 internal->carryover_bytes = dst - internal->carryover;
417 converter->result = MCONVERSION_RESULT_INSUFFICIENT_SRC;
421 unsigned char *dst = mt->data + mt->nbytes;
422 unsigned char *dst_end = mt->data + mt->allocated;
423 const unsigned char *src_stop = src_end;
425 int last_nchars = nchars;
427 if (src < source || src > src_end)
428 src_stop = internal->carryover + internal->carryover_bytes;
431 if (converter->at_most && nchars == converter->at_most)
445 TAKEIN_CHARS (mt, nchars - last_nchars, dst - (mt->data + mt->nbytes),
447 internal->carryover_bytes = 0;
450 converter->nchars += nchars;
451 converter->nbytes += ((src < source || src > src_end) ? 0 : src - source);
452 return (converter->result == MCONVERSION_RESULT_INVALID_BYTE ? -1 : 0);
457 /* Staffs for coding-systems of type MCODING_TYPE_CHARSET. */
460 setup_coding_charset (MCodingSystem *coding)
462 int ncharsets = coding->ncharsets;
463 unsigned *code_charset_table;
467 /* At first, reorder charset list by dimensions (a charset of
468 smaller dimension comes first). As the number of charsets is
469 usually very small (at most 32), we do a simple sort. */
474 MTABLE_ALLOCA (charsets, NUM_SUPPORTED_CHARSETS, MERROR_CODING);
475 memcpy (charsets, coding->charsets,
476 sizeof (MCharset *) * NUM_SUPPORTED_CHARSETS);
477 for (i = 0; i < 4; i++)
478 for (j = 0; j < ncharsets; j++)
479 if (charsets[j]->dimension == i)
480 coding->charsets[idx++] = charsets[j];
483 MTABLE_CALLOC (code_charset_table, 256, MERROR_CODING);
486 int dim = coding->charsets[ncharsets]->dimension;
487 int from = coding->charsets[ncharsets]->code_range[(dim - 1) * 4];
488 int to = coding->charsets[ncharsets]->code_range[(dim - 1) * 4 + 1];
490 if (coding->charsets[ncharsets]->ascii_compatible)
491 coding->ascii_compatible = 1;
493 code_charset_table[from++] |= 1 << ncharsets;
496 coding->extra_spec = (void *) code_charset_table;
501 reset_coding_charset (MConverter *converter)
503 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
504 MCodingSystem *coding = internal->coding;
507 && setup_coding_charset (coding) < 0)
514 decode_coding_charset (const unsigned char *source, int src_bytes, MText *mt,
515 MConverter *converter)
517 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
518 MCodingSystem *coding = internal->coding;
519 const unsigned char *src = internal->carryover;
520 const unsigned char *src_stop = src + internal->carryover_bytes;
521 const unsigned char *src_end = source + src_bytes;
522 const unsigned char *src_base;
523 unsigned char *dst = mt->data + mt->nbytes;
524 unsigned char *dst_end = mt->data + mt->allocated;
527 int at_most = converter->at_most > 0 ? converter->at_most : -1;
529 unsigned *code_charset_table = (unsigned *) coding->extra_spec;
530 MCharset **charsets = coding->charsets;
531 MCharset *charset = mcharset__ascii;
536 MCharset *this_charset = NULL;
540 ONE_MORE_BASE_BYTE (c);
541 mask = code_charset_table[c];
551 while (! (mask & 1)) mask >>= 1, idx++;
552 this_charset = charsets[idx];
553 dim = this_charset->dimension;
557 code = (code << 8) | c;
560 c = DECODE_CHAR (this_charset, code);
567 if (! converter->lenient)
569 REWIND_SRC_TO_BASE ();
571 this_charset = mcharset__binary;
574 if (this_charset != mcharset__ascii
575 && this_charset != charset)
577 TAKEIN_CHARS (mt, nchars - last_nchars,
578 dst - (mt->data + mt->nbytes), charset);
579 charset = this_charset;
580 last_nchars = nchars;
584 /* We reach here because of an invalid byte. */
588 TAKEIN_CHARS (mt, nchars - last_nchars,
589 dst - (mt->data + mt->nbytes), charset);
590 return finish_decoding (mt, converter, nchars,
591 source, src_end, src_base, error);
595 encode_coding_charset (MText *mt, int from, int to,
596 unsigned char *destination, int dst_bytes,
597 MConverter *converter)
599 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
600 MCodingSystem *coding = internal->coding;
601 unsigned char *src, *src_end;
602 unsigned char *dst = destination;
603 unsigned char *dst_end = dst + dst_bytes;
605 int ncharsets = coding->ncharsets;
606 MCharset **charsets = coding->charsets;
607 int ascii_compatible = coding->ascii_compatible;
608 enum MTextFormat format = mt->format;
610 SET_SRC (mt, format, from, to);
615 ONE_MORE_CHAR (c, bytes, format);
617 if (c < 0x80 && ascii_compatible)
625 MCharset *charset = NULL;
630 charset = charsets[i];
631 code = ENCODE_CHAR (charset, c);
632 if (code != MCHAR_INVALID_CODE)
634 if (++i == ncharsets)
635 goto unsupported_char;
638 CHECK_DST (charset->dimension);
639 if (charset->dimension == 1)
643 else if (charset->dimension == 2)
646 *dst++ = code & 0xFF;
648 else if (charset->dimension == 3)
651 *dst++ = (code >> 8) & 0xFF;
652 *dst++ = code & 0xFF;
657 *dst++ = (code >> 16) & 0xFF;
658 *dst++ = (code >> 8) & 0xFF;
659 *dst++ = code & 0xFF;
670 if (! converter->lenient)
672 len = encode_unsupporeted_char (c, dst, dst_end, mt, from + nchars);
674 goto insufficient_destination;
680 /* We reach here because of an unsupported char. */
681 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
684 insufficient_destination:
685 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
688 converter->nchars += nchars;
689 converter->nbytes += dst - destination;
690 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
694 /* Staffs for coding-systems of type MCODING_TYPE_UTF (8). */
696 #define UTF8_CHARSET(p) \
697 (! ((p)[0] & 0x80) ? (mcharset__unicode) \
698 : CHAR_HEAD_P ((p) + 1) ? (mcharset__binary) \
699 : ! ((p)[0] & 0x20) ? (mcharset__unicode) \
700 : CHAR_HEAD_P ((p) + 2) ? (mcharset__binary) \
701 : ! ((p)[0] & 0x10) ? (mcharset__unicode) \
702 : CHAR_HEAD_P ((p) + 3) ? (mcharset__binary) \
703 : ! ((p)[0] & 0x08) ? ((((((p)[0] & 0x07) << 2) \
704 & (((p)[1] & 0x30) >> 4)) <= 0x10) \
705 ? (mcharset__unicode) \
706 : (mcharset__m17n)) \
707 : CHAR_HEAD_P ((p) + 4) ? (mcharset__binary) \
708 : ! ((p)[0] & 0x04) ? (mcharset__m17n) \
709 : CHAR_HEAD_P ((p) + 5) ? (mcharset__binary) \
710 : ! ((p)[0] & 0x02) ? (mcharset__m17n) \
711 : (mcharset__binary))
715 decode_coding_utf_8 (const unsigned char *source, int src_bytes, MText *mt,
716 MConverter *converter)
718 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
719 MCodingSystem *coding = internal->coding;
720 const unsigned char *src = internal->carryover;
721 const unsigned char *src_stop = src + internal->carryover_bytes;
722 const unsigned char *src_end = source + src_bytes;
723 const unsigned char *src_base;
724 unsigned char *dst = mt->data + mt->nbytes;
725 unsigned char *dst_end = mt->data + mt->allocated;
728 int at_most = converter->at_most > 0 ? converter->at_most : -1;
730 int full = converter->lenient || (coding->charsets[0] == mcharset__m17n);
731 MCharset *charset = NULL;
736 MCharset *this_charset = NULL;
738 ONE_MORE_BASE_BYTE (c);
742 else if (!(c & 0x40))
744 else if (!(c & 0x20))
745 bytes = 2, c &= 0x1F;
746 else if (!(c & 0x10))
747 bytes = 3, c &= 0x0F;
748 else if (!(c & 0x08))
749 bytes = 4, c &= 0x07;
750 else if (!(c & 0x04))
751 bytes = 5, c &= 0x03;
752 else if (!(c & 0x02))
753 bytes = 6, c &= 0x01;
760 if ((c1 & 0xC0) != 0x80)
762 c = (c << 6) | (c1 & 0x3F);
766 || c < 0xD800 || (c >= 0xE000 && c < 0x110000))
770 if (! converter->lenient)
772 REWIND_SRC_TO_BASE ();
774 this_charset = mcharset__binary;
777 if (this_charset != charset)
779 TAKEIN_CHARS (mt, nchars - last_nchars,
780 dst - (mt->data + mt->nbytes), charset);
781 charset = this_charset;
782 last_nchars = nchars;
786 /* We reach here because of an invalid byte. */
790 TAKEIN_CHARS (mt, nchars - last_nchars,
791 dst - (mt->data + mt->nbytes), charset);
792 return finish_decoding (mt, converter, nchars,
793 source, src_end, src_base, error);
797 encode_coding_utf_8 (MText *mt, int from, int to,
798 unsigned char *destination, int dst_bytes,
799 MConverter *converter)
801 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
802 MCodingSystem *coding = internal->coding;
803 unsigned char *src, *src_end;
804 unsigned char *dst = destination;
805 unsigned char *dst_end = dst + dst_bytes;
807 enum MTextFormat format = mt->format;
809 SET_SRC (mt, format, from, to);
811 if (format <= MTEXT_FORMAT_UTF_8
812 && (converter->lenient
813 || coding->charsets[0] == mcharset__m17n))
815 if (dst_bytes < src_end - src)
817 int byte_pos = (src + dst_bytes) - mt->data;
819 to = POS_BYTE_TO_CHAR (mt, byte_pos);
820 byte_pos = POS_CHAR_TO_BYTE (mt, to);
821 src_end = mt->data + byte_pos;
822 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
824 memcpy (destination, src, src_end - src);
826 dst += src_end - src;
834 ONE_MORE_CHAR (c, bytes, format);
836 if ((c >= 0xD800 && c < 0xE000) || c >= 0x110000)
839 dst += CHAR_STRING (c, dst);
843 /* We reach here because of an unsupported char. */
844 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
847 insufficient_destination:
848 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
851 converter->nchars += nchars;
852 converter->nbytes += dst - destination;
853 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
857 /* Staffs for coding-systems of type MCODING_TYPE_UTF (16 & 32). */
878 enum utf_endian endian;
882 setup_coding_utf (MCodingSystem *coding)
884 MCodingInfoUTF *info = (MCodingInfoUTF *) (coding->extra_info);
885 MCodingInfoUTF *spec;
887 if (info->code_unit_bits == 8)
888 coding->ascii_compatible = 1;
889 else if (info->code_unit_bits == 16
890 || info->code_unit_bits == 32)
892 if (info->bom < 0 || info->bom > 2
893 || info->endian < 0 || info->endian > 1)
894 MERROR (MERROR_CODING, -1);
899 MSTRUCT_CALLOC (spec, MERROR_CODING);
901 coding->extra_spec = (void *) (spec);
906 reset_coding_utf (MConverter *converter)
908 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
909 MCodingSystem *coding = internal->coding;
910 struct utf_status *status = (struct utf_status *) &(converter->status);
913 && setup_coding_utf (coding) < 0)
917 status->surrogate = 0;
918 status->bom = ((MCodingInfoUTF *) (coding->extra_spec))->bom;
919 status->endian = ((MCodingInfoUTF *) (coding->extra_spec))->endian;
924 decode_coding_utf_16 (const unsigned char *source, int src_bytes, MText *mt,
925 MConverter *converter)
927 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
928 const unsigned char *src = internal->carryover;
929 const unsigned char *src_stop = src + internal->carryover_bytes;
930 const unsigned char *src_end = source + src_bytes;
931 const unsigned char *src_base;
932 unsigned char *dst = mt->data + mt->nbytes;
933 unsigned char *dst_end = mt->data + mt->allocated;
936 int at_most = converter->at_most > 0 ? converter->at_most : -1;
937 struct utf_status *status = (struct utf_status *) &(converter->status);
938 unsigned char b1, b2;
939 MCharset *charset = NULL;
942 if (status->bom != UTF_BOM_NO)
946 ONE_MORE_BASE_BYTE (b1);
950 status->endian = UTF_BIG_ENDIAN;
951 else if (c == 0xFFFE)
952 status->endian = UTF_LITTLE_ENDIAN;
953 else if (status->bom == UTF_BOM_MAYBE
954 || converter->lenient)
956 status->endian = UTF_BIG_ENDIAN;
957 REWIND_SRC_TO_BASE ();
964 status->bom = UTF_BOM_NO;
970 MCharset *this_charset = NULL;
972 ONE_MORE_BASE_BYTE (b1);
974 if (status->endian == UTF_BIG_ENDIAN)
975 c = ((b1 << 8) | b2);
977 c = ((b2 << 8) | b1);
978 if (c < 0xD800 || c >= 0xE000)
984 if (status->endian == UTF_BIG_ENDIAN)
985 c1 = ((b1 << 8) | b2);
987 c1 = ((b2 << 8) | b1);
988 if (c1 < 0xDC00 || c1 >= 0xE000)
990 c = 0x10000 + ((c - 0xD800) << 10) + (c1 - 0xDC00);
995 if (! converter->lenient)
997 REWIND_SRC_TO_BASE ();
1000 if (status->endian == UTF_BIG_ENDIAN)
1001 c = ((b1 << 8) | b2);
1003 c = ((b2 << 8) | b1);
1004 this_charset = mcharset__binary;
1007 if (this_charset != charset)
1009 TAKEIN_CHARS (mt, nchars - last_nchars,
1010 dst - (mt->data + mt->nbytes), charset);
1011 charset = this_charset;
1012 last_nchars = nchars;
1016 /* We reach here because of an invalid byte. */
1020 TAKEIN_CHARS (mt, nchars - last_nchars,
1021 dst - (mt->data + mt->nbytes), charset);
1022 return finish_decoding (mt, converter, nchars,
1023 source, src_end, src_base, error);
1028 decode_coding_utf_32 (const unsigned char *source, int src_bytes, MText *mt,
1029 MConverter *converter)
1031 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
1032 const unsigned char *src = internal->carryover;
1033 const unsigned char *src_stop = src + internal->carryover_bytes;
1034 const unsigned char *src_end = source + src_bytes;
1035 const unsigned char *src_base;
1036 unsigned char *dst = mt->data + mt->nbytes;
1037 unsigned char *dst_end = mt->data + mt->allocated;
1039 int last_nchars = 0;
1040 int at_most = converter->at_most > 0 ? converter->at_most : -1;
1041 struct utf_status *status = (struct utf_status *) &(converter->status);
1042 unsigned char b1, b2, b3, b4;
1043 MCharset *charset = NULL;
1046 if (status->bom != UTF_BOM_NO)
1050 ONE_MORE_BASE_BYTE (b1);
1054 c = (b1 << 24) | (b2 << 16) | (b3 << 8) | b4;
1055 if (c == 0x0000FEFF)
1056 status->endian = UTF_BIG_ENDIAN;
1057 else if (c == 0xFFFE0000)
1058 status->endian = UTF_LITTLE_ENDIAN;
1059 else if (status->bom == UTF_BOM_MAYBE
1060 || converter->lenient)
1062 status->endian = UTF_BIG_ENDIAN;
1063 REWIND_SRC_TO_BASE ();
1070 status->bom = UTF_BOM_NO;
1076 MCharset *this_charset = NULL;
1078 ONE_MORE_BASE_BYTE (b1);
1082 if (status->endian == UTF_BIG_ENDIAN)
1083 c = (b1 << 24) | (b2 << 16) | (b3 << 8) | b4;
1085 c = (b4 << 24) | (b3 << 16) | (b2 << 8) | b1;
1086 if (c < 0xD800 || (c >= 0xE000 && c < 0x110000))
1089 if (! converter->lenient)
1091 REWIND_SRC_TO_BASE ();
1093 this_charset = mcharset__binary;
1096 if (this_charset != charset)
1098 TAKEIN_CHARS (mt, nchars - last_nchars,
1099 dst - (mt->data + mt->nbytes), charset);
1100 charset = this_charset;
1101 last_nchars = nchars;
1105 /* We reach here because of an invalid byte. */
1109 TAKEIN_CHARS (mt, nchars - last_nchars,
1110 dst - (mt->data + mt->nbytes), charset);
1111 return finish_decoding (mt, converter, nchars,
1112 source, src_end, src_base, error);
1117 encode_coding_utf_16 (MText *mt, int from, int to,
1118 unsigned char *destination, int dst_bytes,
1119 MConverter *converter)
1121 unsigned char *src, *src_end;
1122 unsigned char *dst = destination;
1123 unsigned char *dst_end = dst + dst_bytes;
1125 struct utf_status *status = (struct utf_status *) &(converter->status);
1126 int big_endian = status->endian == UTF_BIG_ENDIAN;
1127 enum MTextFormat format = mt->format;
1129 SET_SRC (mt, format, from, to);
1131 if (status->bom != UTF_BOM_NO)
1135 *dst++ = 0xFE, *dst++ = 0xFF;
1137 *dst++ = 0xFF, *dst++ = 0xFE;
1138 status->bom = UTF_BOM_NO;
1145 ONE_MORE_CHAR (c, bytes, format);
1147 if (c < 0xD800 || (c >= 0xE000 && c < 0x10000))
1151 *dst++ = c >> 8, *dst++ = c & 0xFF;
1153 *dst++ = c & 0xFF, *dst++ = c >> 8;
1155 else if (c >= 0x10000 && c < 0x110000)
1161 c1 = (c >> 10) + 0xD800;
1162 c2 = (c & 0x3FF) + 0xDC00;
1164 *dst++ = c1 >> 8, *dst++ = c1 & 0xFF,
1165 *dst++ = c2 >> 8, *dst++ = c2 & 0xFF;
1167 *dst++ = c1 & 0xFF, *dst++ = c1 >> 8,
1168 *dst++ = c2 & 0xFF, *dst++ = c2 >> 8;
1172 unsigned char buf[11];
1175 if (! converter->lenient)
1177 len = encode_unsupporeted_char (c, buf, buf + (dst_end - dst),
1180 goto insufficient_destination;
1182 for (i = 0; i < len; i++)
1183 *dst++ = 0, *dst++ = buf[i];
1185 for (i = 0; i < len; i++)
1186 *dst++ = buf[i], *dst++ = 0;
1191 /* We reach here because of an unsupported char. */
1192 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
1195 insufficient_destination:
1196 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
1199 converter->nchars += nchars;
1200 converter->nbytes += dst - destination;
1201 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
1205 encode_coding_utf_32 (MText *mt, int from, int to,
1206 unsigned char *destination, int dst_bytes,
1207 MConverter *converter)
1209 unsigned char *src, *src_end;
1210 unsigned char *dst = destination;
1211 unsigned char *dst_end = dst + dst_bytes;
1213 struct utf_status *status = (struct utf_status *) &(converter->status);
1214 int big_endian = status->endian == UTF_BIG_ENDIAN;
1215 enum MTextFormat format = mt->format;
1217 SET_SRC (mt, format, from, to);
1219 if (status->bom != UTF_BOM_NO)
1223 *dst++ = 0x00, *dst++ = 0x00, *dst++ = 0xFE, *dst++ = 0xFF;
1225 *dst++ = 0xFF, *dst++ = 0xFE, *dst++ = 0x00, *dst++ = 0x00;
1226 status->bom = UTF_BOM_NO;
1233 ONE_MORE_CHAR (c, bytes, format);
1235 if (c < 0xD800 || (c >= 0xE000 && c < 0x110000))
1239 *dst++ = 0x00, *dst++ = c >> 16,
1240 *dst++ = (c >> 8) & 0xFF, *dst++ = c & 0xFF;
1242 *dst++ = c & 0xFF, *dst++ = (c >> 8) & 0xFF,
1243 *dst++ = c >> 16, *dst++ = 0x00;
1247 unsigned char buf[11];
1250 if (! converter->lenient)
1252 len = encode_unsupporeted_char (c, buf, buf + (dst_end - dst),
1255 goto insufficient_destination;
1257 for (i = 0; i < len; i++)
1258 *dst++ = 0, *dst++ = buf[i];
1260 for (i = 0; i < len; i++)
1261 *dst++ = buf[i], *dst++ = 0;
1266 /* We reach here because of an unsupported char. */
1267 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
1270 insufficient_destination:
1271 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
1274 converter->nchars += nchars;
1275 converter->nbytes += dst - destination;
1276 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
1280 /* Staffs for coding-systems of type MCODING_TYPE_ISO_2022. */
1282 #define ISO_CODE_STX 0x02 /* start text */
1283 #define ISO_CODE_SO 0x0E /* shift-out */
1284 #define ISO_CODE_SI 0x0F /* shift-in */
1285 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
1286 #define ISO_CODE_ESC 0x1B /* escape */
1287 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
1288 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
1290 /** Structure pointed by MCodingSystem.extra_spec. */
1292 struct iso_2022_spec
1296 /** Initial graphic registers (0..3) invoked to each graphic
1297 plane left and right. */
1298 int initial_invocation[2];
1300 /** Initially designated charsets for each graphic register. */
1301 MCharset *initial_designation[4];
1309 struct iso_2022_status
1312 MCharset *designation[4];
1313 unsigned single_shifting : 1;
1316 unsigned utf8_shifting : 1;
1317 MCharset *non_standard_charset;
1318 int non_standard_charset_bytes;
1319 int non_standard_encoding;
1322 enum iso_2022_code_class {
1323 ISO_control_0, /* Control codes in the range
1324 0x00..0x1F and 0x7F, except for the
1325 following 4 codes. */
1326 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
1327 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
1328 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
1329 ISO_escape, /* ISO_CODE_SO (0x1B) */
1330 ISO_control_1, /* Control codes in the range
1331 0x80..0x9F, except for the
1332 following 3 codes. */
1333 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
1334 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
1335 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
1336 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
1337 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
1338 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
1339 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
1340 } iso_2022_code_class[256];
1343 #define MCODING_ISO_DESIGNATION_MASK \
1344 (MCODING_ISO_DESIGNATION_G0 \
1345 | MCODING_ISO_DESIGNATION_G1 \
1346 | MCODING_ISO_DESIGNATION_CTEXT \
1347 | MCODING_ISO_DESIGNATION_CTEXT_EXT)
1350 setup_coding_iso_2022 (MCodingSystem *coding)
1352 MCodingInfoISO2022 *info = (MCodingInfoISO2022 *) (coding->extra_info);
1353 int ncharsets = coding->ncharsets;
1354 struct iso_2022_spec *spec;
1355 int designation_policy = info->flags & MCODING_ISO_DESIGNATION_MASK;
1358 coding->ascii_compatible = 0;
1360 MSTRUCT_CALLOC (spec, MERROR_CODING);
1362 spec->flags = info->flags;
1363 spec->initial_invocation[0] = info->initial_invocation[0];
1364 spec->initial_invocation[1] = info->initial_invocation[1];
1365 for (i = 0; i < 4; i++)
1366 spec->initial_designation[i] = NULL;
1367 if (designation_policy)
1369 spec->n_designations = ncharsets;
1370 if (spec->flags & MCODING_ISO_FULL_SUPPORT)
1371 spec->n_designations += mcharset__iso_2022_table.used;
1372 MTABLE_CALLOC (spec->designations, spec->n_designations, MERROR_CODING);
1373 for (i = 0; i < spec->n_designations; i++)
1374 spec->designations[i] = -1;
1378 if (spec->flags & MCODING_ISO_FULL_SUPPORT)
1379 MERROR (MERROR_CODING, -1);
1380 spec->designations = NULL;
1383 for (i = 0; i < ncharsets; i++)
1385 int reg = info->designations[i];
1388 && coding->charsets[i]->final_byte > 0
1389 && (reg < -4 || reg > 3))
1390 MERROR (MERROR_CODING, -1);
1393 if (spec->initial_designation[reg])
1394 MERROR (MERROR_CODING, -1);
1395 spec->initial_designation[reg] = coding->charsets[i];
1399 if (! designation_policy
1400 && ! (spec->flags & MCODING_ISO_EUC_TW_SHIFT))
1401 MERROR (MERROR_CODING, -1);
1405 if (designation_policy)
1406 spec->designations[i] = reg;
1407 if (coding->charsets[i] == mcharset__ascii)
1408 coding->ascii_compatible = 1;
1411 if (coding->ascii_compatible
1412 && (spec->flags & (MCODING_ISO_DESIGNATION_G0
1413 | MCODING_ISO_DESIGNATION_CTEXT
1414 | MCODING_ISO_DESIGNATION_CTEXT_EXT
1415 | MCODING_ISO_LOCKING_SHIFT)))
1416 coding->ascii_compatible = 0;
1418 if (spec->flags & MCODING_ISO_FULL_SUPPORT)
1419 for (i = 0; i < mcharset__iso_2022_table.used; i++)
1421 MCharset *charset = mcharset__iso_2022_table.charsets[i];
1423 spec->designations[ncharsets + i]
1424 = ((designation_policy == MCODING_ISO_DESIGNATION_CTEXT
1425 || designation_policy == MCODING_ISO_DESIGNATION_CTEXT_EXT)
1426 ? (charset->code_range[0] == 32
1427 || charset->code_range[1] == 255)
1428 : designation_policy == MCODING_ISO_DESIGNATION_G1);
1431 spec->use_esc = ((spec->flags & MCODING_ISO_DESIGNATION_MASK)
1432 || ((spec->flags & MCODING_ISO_LOCKING_SHIFT)
1433 && (spec->initial_designation[2]
1434 || spec->initial_designation[3]))
1435 || (! (spec->flags & MCODING_ISO_EIGHT_BIT)
1436 && (spec->flags & MCODING_ISO_SINGLE_SHIFT))
1437 || (spec->flags & MCODING_ISO_ISO6429));
1439 coding->extra_spec = (void *) spec;
1445 reset_coding_iso_2022 (MConverter *converter)
1447 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
1448 MCodingSystem *coding = internal->coding;
1449 struct iso_2022_status *status
1450 = (struct iso_2022_status *) &(converter->status);
1451 struct iso_2022_spec *spec;
1455 && setup_coding_iso_2022 (coding) < 0)
1459 spec = (struct iso_2022_spec *) coding->extra_spec;
1460 status->invocation[0] = spec->initial_invocation[0];
1461 status->invocation[1] = spec->initial_invocation[1];
1462 for (i = 0; i < 4; i++)
1463 status->designation[i] = spec->initial_designation[i];
1464 status->single_shifting = 0;
1471 #define ISO2022_DECODE_DESIGNATION(reg, dim, chars, final, rev) \
1473 MCharset *charset; \
1475 if ((final) < '0' || (final) >= 128) \
1476 goto invalid_byte; \
1479 charset = MCHARSET_ISO_2022 ((dim), (chars), (final)); \
1480 if (! (spec->flags & MCODING_ISO_FULL_SUPPORT)) \
1484 for (i = 0; i < coding->ncharsets; i++) \
1485 if (charset == coding->charsets[i]) \
1487 if (i == coding->ncharsets) \
1488 goto invalid_byte; \
1495 for (i = 0; i < mcharset__iso_2022_table.used; i++) \
1497 charset = mcharset__iso_2022_table.charsets[i]; \
1498 if (charset->revision == (rev) \
1499 && charset->dimension == (dim) \
1500 && charset->final_byte == (final) \
1501 && (charset->code_range[1] == (chars) \
1502 || ((chars) == 96 && charset->code_range[1] == 255))) \
1505 if (i == mcharset__iso_2022_table.used) \
1506 goto invalid_byte; \
1508 status->designation[reg] = charset; \
1513 find_ctext_non_standard_charset (char *charset_name)
1517 if (! strcmp (charset_name, "koi8-r"))
1518 charset = MCHARSET (msymbol ("koi8-r"));
1519 else if (! strcmp (charset_name, "big5-0"))
1520 charset = MCHARSET (msymbol ("big5"));
1527 decode_coding_iso_2022 (const unsigned char *source, int src_bytes, MText *mt,
1528 MConverter *converter)
1530 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
1531 MCodingSystem *coding = internal->coding;
1532 const unsigned char *src = internal->carryover;
1533 const unsigned char *src_stop = src + internal->carryover_bytes;
1534 const unsigned char *src_end = source + src_bytes;
1535 const unsigned char *src_base;
1536 unsigned char *dst = mt->data + mt->nbytes;
1537 unsigned char *dst_end = mt->data + mt->allocated;
1539 int last_nchars = 0;
1540 int at_most = converter->at_most > 0 ? converter->at_most : -1;
1541 struct iso_2022_spec *spec = (struct iso_2022_spec *) coding->extra_spec;
1542 struct iso_2022_status *status
1543 = (struct iso_2022_status *) &(converter->status);
1544 MCharset *charset0, *charset1, *charset;
1546 MCharset *cns_charsets[15];
1548 charset0 = (status->invocation[0] >= 0
1549 ? status->designation[status->invocation[0]] : NULL);
1550 charset1 = (status->invocation[1] >= 0
1551 ? status->designation[status->invocation[1]] : NULL);
1552 charset = mcharset__ascii;
1554 if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
1558 memset (cns_charsets, 0, sizeof (cns_charsets));
1559 for (i = 0; i < coding->ncharsets; i++)
1560 if (coding->charsets[i]->dimension == 2
1561 && coding->charsets[i]->code_range[1] == 126)
1563 int final = coding->charsets[i]->final_byte;
1565 if (final >= 'G' && final <= 'M')
1566 cns_charsets[final - 'G'] = coding->charsets[i];
1568 cns_charsets[14] = coding->charsets[i];
1574 MCharset *this_charset = NULL;
1577 ONE_MORE_BASE_BYTE (c1);
1579 if (status->utf8_shifting)
1582 int bytes = CHAR_BYTES_BY_HEAD (c1);
1586 for (i = 1; i < bytes; i++)
1591 this_charset = UTF8_CHARSET (buf);
1592 c1 = STRING_CHAR_UTF8 (buf);
1596 if (status->non_standard_encoding > 0)
1600 this_charset = status->non_standard_charset;
1601 for (i = 1; i < status->non_standard_charset_bytes; i++)
1604 c1 = (c1 << 8) | c2;
1606 c1 = DECODE_CHAR (this_charset, c1);
1610 switch (iso_2022_code_class[c1])
1612 case ISO_graphic_plane_0:
1613 this_charset = charset0;
1616 case ISO_0x20_or_0x7F:
1618 || (charset0->code_range[0] != 32
1619 && charset0->code_range[1] != 255))
1620 /* This is SPACE or DEL. */
1621 this_charset = mcharset__ascii;
1623 /* This is a graphic character of plane 0. */
1624 this_charset = charset0;
1627 case ISO_graphic_plane_1:
1630 this_charset = charset1;
1633 case ISO_0xA0_or_0xFF:
1635 || charset1->code_range[0] == 33
1636 || ! (spec->flags & MCODING_ISO_EIGHT_BIT))
1638 /* This is a graphic character of plane 1. */
1641 this_charset = charset1;
1645 this_charset = mcharset__ascii;
1652 if ((spec->flags & MCODING_ISO_LOCKING_SHIFT)
1653 && status->designation[1])
1655 status->invocation[0] = 1;
1656 charset0 = status->designation[1];
1659 this_charset = mcharset__ascii;
1663 if (spec->flags & MCODING_ISO_LOCKING_SHIFT)
1665 status->invocation[0] = 0;
1666 charset0 = status->designation[0];
1669 this_charset = mcharset__ascii;
1672 case ISO_single_shift_2_7:
1673 if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT_7))
1675 this_charset = mcharset__ascii;
1679 goto label_escape_sequence;
1681 case ISO_single_shift_2:
1682 if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
1685 if (c1 < 0xA1 || (c1 > 0xA7 && c1 < 0xAF) || c1 > 0xAF
1686 || ! cns_charsets[c1 - 0xA1])
1688 status->designation[2] = cns_charsets[c1 - 0xA1];
1690 else if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT))
1692 /* SS2 is handled as an escape sequence of ESC 'N' */
1694 goto label_escape_sequence;
1696 case ISO_single_shift_3:
1697 if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT))
1699 /* SS2 is handled as an escape sequence of ESC 'O' */
1701 goto label_escape_sequence;
1703 case ISO_control_sequence_introducer:
1704 /* CSI is handled as an escape sequence of ESC '[' ... */
1706 goto label_escape_sequence;
1709 if (! spec->use_esc)
1711 this_charset = mcharset__ascii;
1715 label_escape_sequence:
1716 /* Escape sequences handled here are invocation,
1717 designation, and direction specification. */
1720 case '&': /* revision of following character set */
1721 if (! (spec->flags & MCODING_ISO_DESIGNATION_MASK))
1722 goto unused_escape_sequence;
1724 if (c1 < '@' || c1 > '~')
1727 if (c1 != ISO_CODE_ESC)
1730 goto label_escape_sequence;
1732 case '$': /* designation of 2-byte character set */
1733 if (! (spec->flags & MCODING_ISO_DESIGNATION_MASK))
1734 goto unused_escape_sequence;
1736 if (c1 >= '@' && c1 <= 'B')
1737 { /* designation of JISX0208.1978, GB2312.1980, or
1739 ISO2022_DECODE_DESIGNATION (0, 2, 94, c1, -1);
1741 else if (c1 >= 0x28 && c1 <= 0x2B)
1742 { /* designation of (dimension 2, chars 94) character set */
1744 ISO2022_DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2, -1);
1746 else if (c1 >= 0x2C && c1 <= 0x2F)
1747 { /* designation of (dimension 2, chars 96) character set */
1749 ISO2022_DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2, -1);
1753 /* We must update these variables now. */
1754 charset0 = status->designation[status->invocation[0]];
1755 charset1 = status->designation[status->invocation[1]];
1758 case 'n': /* invocation of locking-shift-2 */
1759 if (! (spec->flags & MCODING_ISO_LOCKING_SHIFT)
1760 || ! status->designation[2])
1762 status->invocation[0] = 2;
1763 charset0 = status->designation[2];
1766 case 'o': /* invocation of locking-shift-3 */
1767 if (! (spec->flags & MCODING_ISO_LOCKING_SHIFT)
1768 || ! status->designation[3])
1770 status->invocation[0] = 3;
1771 charset0 = status->designation[3];
1774 case 'N': /* invocation of single-shift-2 */
1775 if (! ((spec->flags & MCODING_ISO_SINGLE_SHIFT)
1776 || (spec->flags & MCODING_ISO_EUC_TW_SHIFT))
1777 || ! status->designation[2])
1779 this_charset = status->designation[2];
1781 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1785 case 'O': /* invocation of single-shift-3 */
1786 if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT)
1787 || ! status->designation[3])
1789 this_charset = status->designation[3];
1791 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1795 case '[': /* specification of direction */
1796 if (! (spec->flags & MCODING_ISO_ISO6429))
1798 /* For the moment, nested direction is not supported.
1799 So, (coding->mode & CODING_MODE_DIRECTION) zero means
1800 left-to-right, and nonzero means right-to-left. */
1804 case ']': /* end of the current direction */
1805 case '0': /* end of the current direction */
1809 case '1': /* start of left-to-right direction */
1816 case '2': /* start of right-to-left direction */
1830 char charset_name[16];
1834 if (! spec->flags & MCODING_ISO_DESIGNATION_CTEXT_EXT)
1836 /* Compound-text uses these escape sequences:
1838 ESC % G -- utf-8 bytes -- ESC % @
1839 ESC % / 1 M L -- charset name -- STX -- bytes --
1840 ESC % / 2 M L -- charset name -- STX -- bytes --
1841 ESC % / 3 M L -- charset name -- STX -- bytes --
1842 ESC % / 4 M L -- charset name -- STX -- bytes --
1844 It also uses this sequence but that is not yet
1847 ESC % / 0 M L -- charset name -- STX -- bytes -- */
1852 status->utf8_shifting = 1;
1857 if (! status->utf8_shifting)
1859 status->utf8_shifting = 0;
1865 if (c1 < '1' || c1 > '4')
1867 status->non_standard_charset_bytes = c1 - '0';
1870 if (c1 < 128 || c2 < 128)
1872 bytes = (c1 - 128) * 128 + (c2 - 128);
1873 for (i = 0; i < 16; i++)
1876 if (c1 == ISO_CODE_STX)
1878 charset_name[i] = TOLOWER (c1);
1882 charset_name[i++] = '\0';
1883 this_charset = find_ctext_non_standard_charset (charset_name);
1886 status->non_standard_charset = this_charset;
1887 status->non_standard_encoding = bytes - i;
1892 if (! (spec->flags & MCODING_ISO_DESIGNATION_MASK))
1893 goto unused_escape_sequence;
1894 if (c1 >= 0x28 && c1 <= 0x2B)
1895 { /* designation of (dimension 1, chars 94) charset */
1897 ISO2022_DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2, -1);
1899 else if (c1 >= 0x2C && c1 <= 0x2F)
1900 { /* designation of (dimension 1, chars 96) charset */
1902 ISO2022_DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2, -1);
1906 /* We must update these variables now. */
1907 charset0 = status->designation[status->invocation[0]];
1908 charset1 = status->designation[status->invocation[1]];
1911 unused_escape_sequence:
1912 UNGET_ONE_BYTE (c1);
1914 this_charset = mcharset__ascii;
1918 if (this_charset->dimension == 1)
1920 if (this_charset->code_range[1] <= 128)
1923 else if (this_charset->dimension == 2)
1926 c1 = ((c1 & 0x7F) << 8) | (c2 & 0x7F);
1928 else /* i.e. (dimension == 3) */
1932 c1 = ((c1 & 0x7F) << 16) | ((c2 & 0x7F) << 8) | (c3 & 0x7F);
1934 c1 = DECODE_CHAR (this_charset, c1);
1938 if (! converter->lenient)
1940 REWIND_SRC_TO_BASE ();
1942 this_charset = mcharset__binary;
1945 if (this_charset != mcharset__ascii
1946 && this_charset != charset)
1948 TAKEIN_CHARS (mt, nchars - last_nchars,
1949 dst - (mt->data + mt->nbytes), charset);
1950 charset = this_charset;
1951 last_nchars = nchars;
1954 if (status->non_standard_encoding > 0)
1955 status->non_standard_encoding -= status->non_standard_charset_bytes;
1957 /* We reach here because of an invalid byte. */
1963 TAKEIN_CHARS (mt, nchars - last_nchars,
1964 dst - (mt->data + mt->nbytes), charset);
1965 return finish_decoding (mt, converter, nchars,
1966 source, src_end, src_base, error);
1970 /* Produce codes (escape sequence) for designating CHARSET to graphic
1971 register REG at DST, and increment DST. If CHARSET->final-char is
1972 '@', 'A', or 'B' and SHORT_FORM is nonzero, produce designation
1973 sequence of short-form. Update STATUS->designation. */
1975 #define ISO2022_ENCODE_DESIGNATION(reg, charset, spec, status) \
1977 char *intermediate_char_94 = "()*+"; \
1978 char *intermediate_char_96 = ",-./"; \
1980 if (dst + 4 > dst_end) \
1981 goto memory_shortage; \
1982 *dst++ = ISO_CODE_ESC; \
1983 if (charset->dimension == 1) \
1985 if (charset->code_range[0] != 32 \
1986 && charset->code_range[1] != 255) \
1987 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
1989 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
1994 if (charset->code_range[0] != 32 \
1995 && charset->code_range[1] != 255) \
1997 if (spec->flags & MCODING_ISO_LONG_FORM \
1999 || charset->final_byte < '@' || charset->final_byte > 'B') \
2000 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
2003 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
2005 *dst++ = charset->final_byte; \
2007 status->designation[reg] = charset; \
2011 /* The following two macros produce codes (control character or escape
2012 sequence) for ISO-2022 single-shift functions (single-shift-2 and
2015 #define ISO2022_ENCODE_SINGLE_SHIFT_2(spec, status) \
2017 if (dst + 2 > dst_end) \
2018 goto memory_shortage; \
2019 if (! (spec->flags & MCODING_ISO_EIGHT_BIT)) \
2020 *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \
2022 *dst++ = ISO_CODE_SS2; \
2023 status->single_shifting = 1; \
2027 #define ISO2022_ENCODE_SINGLE_SHIFT_3(spec, status) \
2029 if (dst + 2 > dst_end) \
2030 goto memory_shortage; \
2031 if (! (spec->flags & MCODING_ISO_EIGHT_BIT)) \
2032 *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \
2034 *dst++ = ISO_CODE_SS3; \
2035 status->single_shifting = 1; \
2039 /* The following four macros produce codes (control character or
2040 escape sequence) for ISO-2022 locking-shift functions (shift-in,
2041 shift-out, locking-shift-2, and locking-shift-3). */
2043 #define ISO2022_ENCODE_SHIFT_IN(status) \
2045 if (dst + 1 > dst_end) \
2046 goto memory_shortage; \
2047 *dst++ = ISO_CODE_SI; \
2048 status->invocation[0] = 0; \
2052 #define ISO2022_ENCODE_SHIFT_OUT(status) \
2054 if (dst + 1 > dst_end) \
2055 goto memory_shortage; \
2056 *dst++ = ISO_CODE_SO; \
2057 status->invocation[0] = 1; \
2061 #define ISO2022_ENCODE_LOCKING_SHIFT_2(status) \
2063 if (dst + 2 > dst_end) \
2064 goto memory_shortage; \
2065 *dst++ = ISO_CODE_ESC, *dst++ = 'n'; \
2066 status->invocation[0] = 2; \
2070 #define ISO2022_ENCODE_LOCKING_SHIFT_3(status) \
2072 if (dst + 2 > dst_end) \
2073 goto memory_shortage; \
2074 *dst++ = ISO_CODE_ESC, *dst++ = 'o'; \
2075 status->invocation[0] = 3; \
2078 #define ISO2022_ENCODE_UTF8_SHIFT_START(len) \
2080 CHECK_DST (3 + len); \
2081 *dst++ = ISO_CODE_ESC; \
2084 status->utf8_shifting = 1; \
2088 #define ISO2022_ENCODE_UTF8_SHIFT_END() \
2091 *dst++ = ISO_CODE_ESC; \
2094 status->utf8_shifting = 0; \
2098 #define ISO2022_ENCODE_NON_STANDARD(name, len) \
2100 CHECK_DST (6 + len + 1 + non_standard_charset_bytes); \
2101 non_standard_begin = dst; \
2102 *dst++ = ISO_CODE_ESC; \
2105 *dst++ = '0' + non_standard_charset_bytes; \
2106 *dst++ = 0, *dst++ = 0; /* filled later */ \
2107 memcpy (dst, name, len); \
2109 *dst++ = ISO_CODE_STX; \
2110 non_standard_bytes = len + 1; \
2115 find_ctext_non_standard_name (MCharset *charset, int *bytes)
2117 char *name = msymbol_name (charset->name);
2119 if (! strcmp (name, "koi8-r"))
2121 else if (! strcmp (name, "big5"))
2122 name = "big5-0", *bytes = 2;
2128 /* Designate CHARSET to a graphic register specified in
2129 SPEC->designation. If the register is not yet invoked to graphic
2130 left not right, invoke it to graphic left. DSTP points to a
2131 variable containing a memory address where the output must go.
2132 DST_END is the limit of that memory.
2134 Return 0 if it succeeds. Return -1 otherwise, which means that the
2135 memory area is too short. By side effect, update the variable that
2139 iso_2022_designate_invoke_charset (MCodingSystem *coding,
2141 struct iso_2022_spec *spec,
2142 struct iso_2022_status *status,
2143 unsigned char **dstp,
2144 unsigned char *dst_end)
2147 unsigned char *dst = *dstp;
2149 for (i = 0; i < 4; i++)
2150 if (charset == status->designation[i])
2155 /* CHARSET is not yet designated to any graphic registers. */
2156 for (i = 0; i < coding->ncharsets; i++)
2157 if (charset == coding->charsets[i])
2159 if (i == coding->ncharsets)
2161 for (i = 0; i < mcharset__iso_2022_table.used; i++)
2162 if (charset == mcharset__iso_2022_table.charsets[i])
2164 i += coding->ncharsets;
2166 i = spec->designations[i];
2167 ISO2022_ENCODE_DESIGNATION (i, charset, spec, status);
2170 if (status->invocation[0] != i
2171 && status->invocation[1] != i)
2173 /* Graphic register I is not yet invoked. */
2176 case 0: /* graphic register 0 */
2177 ISO2022_ENCODE_SHIFT_IN (status);
2180 case 1: /* graphic register 1 */
2181 ISO2022_ENCODE_SHIFT_OUT (status);
2184 case 2: /* graphic register 2 */
2185 if (spec->flags & MCODING_ISO_SINGLE_SHIFT)
2186 ISO2022_ENCODE_SINGLE_SHIFT_2 (spec, status);
2188 ISO2022_ENCODE_LOCKING_SHIFT_2 (status);
2191 case 3: /* graphic register 3 */
2192 if (spec->flags & MCODING_ISO_SINGLE_SHIFT)
2193 ISO2022_ENCODE_SINGLE_SHIFT_3 (spec, status);
2195 ISO2022_ENCODE_LOCKING_SHIFT_3 (status);
2208 /* Reset the invocation/designation status to the initial one. SPEC
2209 and STATUS contain information about the current and initial
2210 invocation /designation status respectively. DSTP points to a
2211 variable containing a memory address where the output must go.
2212 DST_END is the limit of that memory.
2214 Return 0 if it succeeds. Return -1 otherwise, which means that the
2215 memory area is too short. By side effect, update the variable that
2219 iso_2022_reset_invocation_designation (struct iso_2022_spec *spec,
2220 struct iso_2022_status *status,
2221 unsigned char **dstp,
2222 unsigned char *dst_end)
2224 unsigned char *dst = *dstp;
2227 /* Reset the invocation status of GL. We have not yet supported GR
2229 if (status->invocation[0] != spec->initial_invocation[0]
2230 && spec->initial_invocation[0] >= 0)
2232 if (spec->initial_invocation[0] == 0)
2233 ISO2022_ENCODE_SHIFT_IN (status);
2234 else if (spec->initial_invocation[0] == 1)
2235 ISO2022_ENCODE_SHIFT_OUT (status);
2236 else if (spec->initial_invocation[0] == 2)
2237 ISO2022_ENCODE_LOCKING_SHIFT_2 (status);
2238 else /* i.e. spec->initial_invocation[0] == 3 */
2239 ISO2022_ENCODE_LOCKING_SHIFT_3 (status);
2242 /* Reset the designation status of G0..G3. */
2243 for (i = 0; i < 4; i++)
2244 if (status->designation[i] != spec->initial_designation[i]
2245 && spec->initial_designation[i])
2247 MCharset *charset = spec->initial_designation[i];
2249 ISO2022_ENCODE_DESIGNATION (i, charset, spec, status);
2262 encode_coding_iso_2022 (MText *mt, int from, int to,
2263 unsigned char *destination, int dst_bytes,
2264 MConverter *converter)
2266 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2267 MCodingSystem *coding = internal->coding;
2268 unsigned char *src, *src_end;
2269 unsigned char *dst = destination;
2270 unsigned char *dst_end = dst + dst_bytes;
2272 unsigned char *dst_base;
2273 struct iso_2022_spec *spec = (struct iso_2022_spec *) coding->extra_spec;
2274 int full_support = spec->flags & MCODING_ISO_FULL_SUPPORT;
2275 struct iso_2022_status *status
2276 = (struct iso_2022_status *) &(converter->status);
2277 MCharset *primary, *charset0, *charset1;
2278 int next_primary_change;
2279 int ncharsets = coding->ncharsets;
2280 MCharset **charsets = coding->charsets;
2281 MCharset *cns_charsets[15];
2282 int ascii_compatible = coding->ascii_compatible;
2283 MCharset *non_standard_charset = NULL;
2284 int non_standard_charset_bytes = 0;
2285 int non_standard_bytes = 0;
2286 unsigned char *non_standard_begin = NULL;
2287 enum MTextFormat format = mt->format;
2289 SET_SRC (mt, format, from, to);
2291 if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
2295 memset (cns_charsets, 0, sizeof (cns_charsets));
2296 for (i = 0; i < ncharsets; i++)
2297 if (charsets[i]->dimension == 2)
2299 int final = charsets[i]->final_byte;
2301 if (final >= 'G' && final <= 'M')
2302 cns_charsets[final - 'G'] = charsets[i];
2304 cns_charsets[14] = charsets[i];
2308 next_primary_change = from;
2310 charset0 = status->designation[status->invocation[0]];
2311 charset1 = (status->invocation[1] < 0 ? NULL
2312 : status->designation[status->invocation[1]]);
2319 ONE_MORE_CHAR (c, bytes, format);
2321 if (c < 128 && ascii_compatible)
2323 if (status->utf8_shifting)
2324 ISO2022_ENCODE_UTF8_SHIFT_END ();
2328 else if (c <= 32 || c == 127)
2330 if (status->utf8_shifting)
2331 ISO2022_ENCODE_UTF8_SHIFT_END ();
2332 if (spec->flags & MCODING_ISO_RESET_AT_CNTL
2333 || (c == '\n' && spec->flags & MCODING_ISO_RESET_AT_EOL))
2335 if (iso_2022_reset_invocation_designation (spec, status,
2337 goto insufficient_destination;
2338 charset0 = status->designation[status->invocation[0]];
2339 charset1 = (status->invocation[1] < 0 ? NULL
2340 : status->designation[status->invocation[1]]);
2347 unsigned code = MCHAR_INVALID_CODE;
2348 MCharset *charset = NULL;
2350 int pos = from + nchars;
2352 if (pos >= next_primary_change)
2354 MSymbol primary_charset
2355 = (MSymbol) mtext_get_prop (mt, pos, Mcharset);
2356 primary = MCHARSET (primary_charset);
2357 if (primary && primary != mcharset__binary)
2359 if (primary->final_byte <= 0)
2361 else if (! full_support)
2365 for (i = 0; i < ncharsets; i++)
2366 if (primary == charsets[i])
2373 mtext_prop_range (mt, Mcharset, pos,
2374 NULL, &next_primary_change, 0);
2377 if (primary && primary != mcharset__binary)
2379 code = ENCODE_CHAR (primary, c);
2380 if (code != MCHAR_INVALID_CODE)
2385 if (c <= 32 || c == 127)
2388 charset = mcharset__ascii;
2394 for (i = 0; i < ncharsets; i++)
2396 charset = charsets[i];
2397 code = ENCODE_CHAR (charset, c);
2398 if (code != MCHAR_INVALID_CODE)
2403 if (spec->flags & MCODING_ISO_FULL_SUPPORT)
2405 for (i = 0; i < mcharset__iso_2022_table.used; i++)
2407 charset = mcharset__iso_2022_table.charsets[i];
2408 code = ENCODE_CHAR (charset, c);
2409 if (code != MCHAR_INVALID_CODE)
2412 if (i == mcharset__iso_2022_table.used)
2414 if (spec->flags & MCODING_ISO_DESIGNATION_CTEXT_EXT)
2415 goto unsupported_char;
2416 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
2421 goto unsupported_char;
2427 && (charset->final_byte >= 0
2428 || spec->flags & MCODING_ISO_EUC_TW_SHIFT))
2430 if (code >= 0x80 && code < 0xA0)
2431 goto unsupported_char;
2433 if (status->utf8_shifting)
2434 ISO2022_ENCODE_UTF8_SHIFT_END ();
2435 if (charset == charset0)
2437 else if (charset == charset1)
2441 unsigned char *p = NULL;
2443 if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
2447 if (cns_charsets[0] == charset)
2453 for (i = 1; i < 15; i++)
2454 if (cns_charsets[i] == charset)
2457 *dst++ = ISO_CODE_SS2;
2460 status->single_shifting = 1;
2465 if (iso_2022_designate_invoke_charset
2466 (coding, charset, spec, status, &dst, dst_end) < 0)
2467 goto insufficient_destination;
2468 charset0 = status->designation[status->invocation[0]];
2469 charset1 = (status->invocation[1] < 0 ? NULL
2470 : status->designation[status->invocation[1]]);
2472 if (status->single_shifting)
2474 = (spec->flags & MCODING_ISO_EIGHT_BIT) ? 0x80 : 0;
2475 else if (charset == charset0)
2480 if (charset->dimension == 1)
2483 *dst++ = code | gr_mask;
2485 else if (charset->dimension == 2)
2488 *dst++ = (code >> 8) | gr_mask;
2489 *dst++ = (code & 0xFF) | gr_mask;
2494 *dst++ = (code >> 16) | gr_mask;
2495 *dst++ = ((code >> 8) & 0xFF) | gr_mask;
2496 *dst++ = (code & 0xFF) | gr_mask;
2498 status->single_shifting = 0;
2500 else if (charset && spec->flags & MCODING_ISO_DESIGNATION_CTEXT_EXT)
2502 if (charset != non_standard_charset)
2504 char *name = (find_ctext_non_standard_name
2505 (charset, &non_standard_charset_bytes));
2509 int len = strlen (name);
2511 ISO2022_ENCODE_NON_STANDARD (name, len);
2512 non_standard_charset = charset;
2515 non_standard_charset = NULL;
2518 if (non_standard_charset)
2520 if (dst + non_standard_charset_bytes > dst_end)
2521 goto insufficient_destination;
2522 non_standard_bytes += non_standard_charset_bytes;
2523 non_standard_begin[4] = (non_standard_bytes / 128) | 0x80;
2524 non_standard_begin[5] = (non_standard_bytes % 128) | 0x80;
2525 if (non_standard_charset_bytes == 1)
2527 else if (non_standard_charset_bytes == 2)
2528 *dst++ = code >> 8, *dst++ = code & 0xFF;
2529 else if (non_standard_charset_bytes == 3)
2530 *dst++ = code >> 16, *dst++ = (code >> 8) & 0xFF,
2531 *dst++ = code & 0xFF;
2532 else /* i.e non_standard_charset_bytes == 3 */
2533 *dst++ = code >> 24, *dst++ = (code >> 16) & 0xFF,
2534 *dst++ = (code >> 8) & 0xFF, *dst++ = code & 0xFF;
2538 int len = CHAR_BYTES (c);
2541 goto unsupported_char;
2542 if (! status->utf8_shifting)
2543 ISO2022_ENCODE_UTF8_SHIFT_START (len);
2546 CHAR_STRING (c, dst);
2550 goto unsupported_char;
2560 if (iso_2022_designate_invoke_charset (coding, mcharset__ascii,
2563 goto insufficient_destination;
2564 if (! converter->lenient)
2566 len = encode_unsupporeted_char (c, dst, dst_end, mt, from + nchars);
2568 goto insufficient_destination;
2574 /* We reach here because of an unsupported char. */
2575 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
2578 insufficient_destination:
2580 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
2583 if (converter->result == MCONVERSION_RESULT_SUCCESS
2584 && converter->last_block)
2586 if (status->utf8_shifting)
2588 ISO2022_ENCODE_UTF8_SHIFT_END ();
2591 if (spec->flags & MCODING_ISO_RESET_AT_EOL
2592 && charset0 != spec->initial_designation[0])
2594 if (iso_2022_reset_invocation_designation (spec, status,
2596 goto insufficient_destination;
2599 converter->nchars += nchars;
2600 converter->nbytes += dst - destination;
2601 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
2605 /* Staffs for coding-systems of type MCODING_TYPE_MISC. */
2607 /* For SJIS handling... */
2609 #define SJIS_TO_JIS(s1, s2) \
2611 ? (((s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0)) << 8) \
2613 : (((s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1)) << 8) \
2614 | (s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F))))
2616 #define JIS_TO_SJIS(c1, c2) \
2618 ? (((c1 / 2 + ((c1 < 0x5F) ? 0x71 : 0xB1)) << 8) \
2619 | (c2 + ((c2 >= 0x60) ? 0x20 : 0x1F))) \
2620 : (((c1 / 2 + ((c1 < 0x5F) ? 0x70 : 0xB0)) << 8) \
2625 reset_coding_sjis (MConverter *converter)
2627 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2628 MCodingSystem *coding = internal->coding;
2630 if (! coding->ready)
2632 MSymbol kanji_sym = msymbol ("jisx0208.1983");
2633 MCharset *kanji = MCHARSET (kanji_sym);
2634 MSymbol kana_sym = msymbol ("jisx0201-kana");
2635 MCharset *kana = MCHARSET (kana_sym);
2637 if (! kanji || ! kana)
2639 coding->ncharsets = 3;
2640 coding->charsets[1] = kanji;
2641 coding->charsets[2] = kana;
2648 decode_coding_sjis (const unsigned char *source, int src_bytes, MText *mt,
2649 MConverter *converter)
2651 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2652 MCodingSystem *coding = internal->coding;
2653 const unsigned char *src = internal->carryover;
2654 const unsigned char *src_stop = src + internal->carryover_bytes;
2655 const unsigned char *src_end = source + src_bytes;
2656 const unsigned char *src_base;
2657 unsigned char *dst = mt->data + mt->nbytes;
2658 unsigned char *dst_end = mt->data + mt->allocated - MAX_UTF8_CHAR_BYTES;
2660 int last_nchars = 0;
2661 int at_most = converter->at_most > 0 ? converter->at_most : -1;
2663 MCharset *charset_roman = coding->charsets[0];
2664 MCharset *charset_kanji = coding->charsets[1];
2665 MCharset *charset_kana = coding->charsets[2];
2666 MCharset *charset = mcharset__ascii;
2671 MCharset *this_charset;
2674 ONE_MORE_BASE_BYTE (c1);
2679 this_charset = ((c1 <= 0x20 || c1 == 0x7F)
2683 else if ((c1 >= 0x81 && c1 <= 0x9F) || (c1 >= 0xE0 && c1 <= 0xEF))
2686 if ((c2 >= 0x40 && c2 <= 0x7F) || (c2 >= 80 && c2 <= 0xFC))
2688 this_charset = charset_kanji;
2689 c1 = SJIS_TO_JIS (c1, c2);
2694 else if (c1 >= 0xA1 && c1 <= 0xDF)
2696 this_charset = charset_kana;
2702 c = DECODE_CHAR (this_charset, c1);
2707 if (! converter->lenient)
2709 REWIND_SRC_TO_BASE ();
2711 this_charset = mcharset__binary;
2714 if (this_charset != mcharset__ascii
2715 && this_charset != charset)
2717 TAKEIN_CHARS (mt, nchars - last_nchars,
2718 dst - (mt->data + mt->nbytes), charset);
2719 charset = this_charset;
2720 last_nchars = nchars;
2724 /* We reach here because of an invalid byte. */
2728 TAKEIN_CHARS (mt, nchars - last_nchars,
2729 dst - (mt->data + mt->nbytes), charset);
2730 return finish_decoding (mt, converter, nchars,
2731 source, src_end, src_base, error);
2735 encode_coding_sjis (MText *mt, int from, int to,
2736 unsigned char *destination, int dst_bytes,
2737 MConverter *converter)
2739 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2740 MCodingSystem *coding = internal->coding;
2741 unsigned char *src, *src_end;
2742 unsigned char *dst = destination;
2743 unsigned char *dst_end = dst + dst_bytes;
2745 MCharset *charset_roman = coding->charsets[0];
2746 MCharset *charset_kanji = coding->charsets[1];
2747 MCharset *charset_kana = coding->charsets[2];
2748 enum MTextFormat format = mt->format;
2750 SET_SRC (mt, format, from, to);
2757 ONE_MORE_CHAR (c, bytes, format);
2759 if (c <= 0x20 || c == 0x7F)
2766 if ((code = ENCODE_CHAR (charset_roman, c)) != MCHAR_INVALID_CODE)
2771 else if ((code = ENCODE_CHAR (charset_kanji, c))
2772 != MCHAR_INVALID_CODE)
2774 int c1 = code >> 8, c2 = code & 0xFF;
2775 code = JIS_TO_SJIS (c1, c2);
2778 *dst++ = code & 0xFF;
2780 else if ((code = ENCODE_CHAR (charset_kana, c))
2781 != MCHAR_INVALID_CODE)
2784 *dst++ = code | 0x80;
2788 if (! converter->lenient)
2790 len = encode_unsupporeted_char (c, dst, dst_end,
2793 goto insufficient_destination;
2800 /* We reach here because of an unsupported char. */
2801 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
2804 insufficient_destination:
2805 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
2808 converter->nchars += nchars;
2809 converter->nbytes += dst - destination;
2810 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
2814 static MCodingSystem *
2815 find_coding (MSymbol name)
2817 MCodingSystem *coding = (MCodingSystem *) msymbol_get (name, Mcoding);
2822 MSymbol sym = msymbol__canonicalize (name);
2824 plist = mplist_find_by_key (coding_definition_list, sym);
2827 pl = MPLIST_PLIST (plist);
2828 name = MPLIST_VAL (pl);
2829 mconv_define_coding (MSYMBOL_NAME (name), MPLIST_NEXT (pl),
2830 NULL, NULL, NULL, NULL);
2831 coding = (MCodingSystem *) msymbol_get (name, Mcoding);
2832 plist = mplist_pop (plist);
2833 M17N_OBJECT_UNREF (plist);
2838 #define BINDING_NONE 0
2839 #define BINDING_BUFFER 1
2840 #define BINDING_STREAM 2
2842 #define CONVERT_WORKSIZE 0x10000
2848 mcoding__init (void)
2851 MPlist *param, *charsets, *pl;
2853 MLIST_INIT1 (&coding_list, codings, 128);
2854 coding_definition_list = mplist ();
2856 /* ISO-2022 specific initialize routine. */
2857 for (i = 0; i < 0x20; i++)
2858 iso_2022_code_class[i] = ISO_control_0;
2859 for (i = 0x21; i < 0x7F; i++)
2860 iso_2022_code_class[i] = ISO_graphic_plane_0;
2861 for (i = 0x80; i < 0xA0; i++)
2862 iso_2022_code_class[i] = ISO_control_1;
2863 for (i = 0xA1; i < 0xFF; i++)
2864 iso_2022_code_class[i] = ISO_graphic_plane_1;
2865 iso_2022_code_class[0x20] = iso_2022_code_class[0x7F] = ISO_0x20_or_0x7F;
2866 iso_2022_code_class[0xA0] = iso_2022_code_class[0xFF] = ISO_0xA0_or_0xFF;
2867 iso_2022_code_class[0x0E] = ISO_shift_out;
2868 iso_2022_code_class[0x0F] = ISO_shift_in;
2869 iso_2022_code_class[0x19] = ISO_single_shift_2_7;
2870 iso_2022_code_class[0x1B] = ISO_escape;
2871 iso_2022_code_class[0x8E] = ISO_single_shift_2;
2872 iso_2022_code_class[0x8F] = ISO_single_shift_3;
2873 iso_2022_code_class[0x9B] = ISO_control_sequence_introducer;
2875 Mcoding = msymbol ("coding");
2877 Mutf = msymbol ("utf");
2878 Miso_2022 = msymbol ("iso-2022");
2880 Mreset_at_eol = msymbol ("reset-at-eol");
2881 Mreset_at_cntl = msymbol ("reset-at-cntl");
2882 Meight_bit = msymbol ("eight-bit");
2883 Mlong_form = msymbol ("long-form");
2884 Mdesignation_g0 = msymbol ("designation-g0");
2885 Mdesignation_g1 = msymbol ("designation-g1");
2886 Mdesignation_ctext = msymbol ("designation-ctext");
2887 Mdesignation_ctext_ext = msymbol ("designation-ctext-ext");
2888 Mlocking_shift = msymbol ("locking-shift");
2889 Msingle_shift = msymbol ("single-shift");
2890 Msingle_shift_7 = msymbol ("single-shift-7");
2891 Meuc_tw_shift = msymbol ("euc-tw-shift");
2892 Miso_6429 = msymbol ("iso-6429");
2893 Mrevision_number = msymbol ("revision-number");
2894 Mfull_support = msymbol ("full-support");
2895 Mmaybe = msymbol ("maybe");
2897 Mtype = msymbol ("type");
2898 Mcharsets = msymbol_as_managing_key ("charsets");
2899 Mflags = msymbol_as_managing_key ("flags");
2900 Mdesignation = msymbol_as_managing_key ("designation");
2901 Minvocation = msymbol_as_managing_key ("invocation");
2902 Mcode_unit = msymbol ("code-unit");
2903 Mbom = msymbol ("bom");
2904 Mlittle_endian = msymbol ("little-endian");
2907 charsets = mplist ();
2909 /* Setup predefined codings. */
2910 mplist_set (charsets, Msymbol, Mcharset_ascii);
2911 pl = mplist_add (pl, Mtype, Mcharset);
2912 pl = mplist_add (pl, Mcharsets, charsets);
2913 Mcoding_us_ascii = mconv_define_coding ("us-ascii", param,
2914 NULL, NULL, NULL, NULL);
2917 MSymbol alias = msymbol ("ANSI_X3.4-1968");
2918 MCodingSystem *coding
2919 = (MCodingSystem *) msymbol_get (Mcoding_us_ascii, Mcoding);
2921 msymbol_put (alias, Mcoding, coding);
2922 alias = msymbol__canonicalize (alias);
2923 msymbol_put (alias, Mcoding, coding);
2926 mplist_set (charsets, Msymbol, Mcharset_iso_8859_1);
2927 Mcoding_iso_8859_1 = mconv_define_coding ("iso-8859-1", param,
2928 NULL, NULL, NULL, NULL);
2930 mplist_set (charsets, Msymbol, Mcharset_m17n);
2931 mplist_put (param, Mtype, Mutf);
2932 mplist_put (param, Mcode_unit, (void *) 8);
2933 Mcoding_utf_8_full = mconv_define_coding ("utf-8-full", param,
2934 NULL, NULL, NULL, NULL);
2936 mplist_set (charsets, Msymbol, Mcharset_unicode);
2937 Mcoding_utf_8 = mconv_define_coding ("utf-8", param,
2938 NULL, NULL, NULL, NULL);
2940 mplist_put (param, Mcode_unit, (void *) 16);
2941 mplist_put (param, Mbom, Mmaybe);
2942 #ifndef WORDS_BIGENDIAN
2943 mplist_put (param, Mlittle_endian, Mt);
2945 Mcoding_utf_16 = mconv_define_coding ("utf-16", param,
2946 NULL, NULL, NULL, NULL);
2948 mplist_put (param, Mcode_unit, (void *) 32);
2949 Mcoding_utf_32 = mconv_define_coding ("utf-32", param,
2950 NULL, NULL, NULL, NULL);
2952 mplist_put (param, Mcode_unit, (void *) 16);
2953 mplist_put (param, Mbom, Mnil);
2954 mplist_put (param, Mlittle_endian, Mnil);
2955 Mcoding_utf_16be = mconv_define_coding ("utf-16be", param,
2956 NULL, NULL, NULL, NULL);
2958 mplist_put (param, Mcode_unit, (void *) 32);
2959 Mcoding_utf_32be = mconv_define_coding ("utf-32be", param,
2960 NULL, NULL, NULL, NULL);
2962 mplist_put (param, Mcode_unit, (void *) 16);
2963 mplist_put (param, Mlittle_endian, Mt);
2964 Mcoding_utf_16le = mconv_define_coding ("utf-16le", param,
2965 NULL, NULL, NULL, NULL);
2967 mplist_put (param, Mcode_unit, (void *) 32);
2968 Mcoding_utf_32le = mconv_define_coding ("utf-32le", param,
2969 NULL, NULL, NULL, NULL);
2971 mplist_put (param, Mtype, Mnil);
2972 mplist_set (charsets, Msymbol, Mcharset_ascii);
2973 Mcoding_sjis = mconv_define_coding ("sjis", param,
2976 encode_coding_sjis, NULL);
2978 M17N_OBJECT_UNREF (charsets);
2979 M17N_OBJECT_UNREF (param);
2985 mcoding__fini (void)
2990 for (i = 0; i < coding_list.used; i++)
2992 MCodingSystem *coding = coding_list.codings[i];
2994 if (coding->extra_info)
2995 free (coding->extra_info);
2996 if (coding->extra_spec)
2998 if (coding->type == Miso_2022)
2999 free (((struct iso_2022_spec *) coding->extra_spec)->designations);
3000 free (coding->extra_spec);
3004 MLIST_FREE1 (&coding_list, codings);
3005 MPLIST_DO (plist, coding_definition_list)
3006 M17N_OBJECT_UNREF (MPLIST_VAL (plist));
3007 M17N_OBJECT_UNREF (coding_definition_list);
3011 mconv__register_charset_coding (MSymbol sym)
3013 MSymbol name = msymbol__canonicalize (sym);
3015 if (! mplist_find_by_key (coding_definition_list, name))
3017 MPlist *param = mplist (), *charsets = mplist ();
3019 mplist_set (charsets, Msymbol, sym);
3020 mplist_add (param, Msymbol, sym);
3021 mplist_add (param, Mtype, Mcharset);
3022 mplist_add (param, Mcharsets, charsets);
3023 mplist_put (coding_definition_list, name, param);
3024 M17N_OBJECT_UNREF (charsets);
3030 mcoding__load_from_database ()
3032 MDatabase *mdb = mdatabase_find (msymbol ("coding-list"), Mnil, Mnil, Mnil);
3033 MPlist *def_list, *plist;
3034 MPlist *definitions = coding_definition_list;
3035 int mdebug_mask = MDEBUG_CODING;
3039 MDEBUG_PUSH_TIME ();
3040 def_list = (MPlist *) mdatabase_load (mdb);
3041 MDEBUG_PRINT_TIME ("CODING", (stderr, " to load the data."));
3046 MDEBUG_PUSH_TIME ();
3047 MPLIST_DO (plist, def_list)
3050 MSymbol name, canonicalized;
3052 if (! MPLIST_PLIST_P (plist))
3053 MERROR (MERROR_CHARSET, -1);
3054 pl = MPLIST_PLIST (plist);
3055 if (! MPLIST_SYMBOL_P (pl))
3056 MERROR (MERROR_CHARSET, -1);
3057 name = MPLIST_SYMBOL (pl);
3058 canonicalized = msymbol__canonicalize (name);
3059 pl = mplist__from_plist (MPLIST_NEXT (pl));
3060 mplist_push (pl, Msymbol, name);
3061 definitions = mplist_add (definitions, canonicalized, pl);
3064 M17N_OBJECT_UNREF (def_list);
3065 MDEBUG_PRINT_TIME ("CODING", (stderr, " to parse the loaded data."));
3071 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
3075 /*** @addtogroup m17nConv */
3079 /***en @name Variables: Symbols representing coding systems */
3080 /***ja @name ÊÑ¿ô: ÄêµÁºÑ¤ß¥³¡¼¥É·Ï¤ò»ØÄꤹ¤ë¤¿¤á¤Î¥·¥ó¥Ü¥ë */
3085 @brief Symbol for the coding system US-ASCII.
3087 The symbol #Mcoding_us_ascii has name <tt>"us-ascii"</tt> and
3088 represents a coding system for the CES US-ASCII. */
3091 @brief US-ASCII ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3093 ¥·¥ó¥Ü¥ë #Mcoding_us_ascii ¤Ï <tt>"us-ascii"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3094 CES US-ASCII ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£
3096 MSymbol Mcoding_us_ascii;
3100 @brief Symbol for the coding system ISO-8859-1.
3102 The symbol #Mcoding_iso_8859_1 has name <tt>"iso-8859-1"</tt> and
3103 represents a coding system for the CES ISO-8859-1. */
3106 @brief ISO-8859-1 ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3108 ¥·¥ó¥Ü¥ë #Mcoding_iso_8859_1 ¤Ï <tt>"iso-8859-1"</tt> ¤È¤¤¤¦Ì¾Á°
3109 ¤ò»ý¤Á¡¢CES ISO-8859-1 ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3111 MSymbol Mcoding_iso_8859_1;
3115 @brief Symbol for the coding system UTF-8.
3117 The symbol #Mcoding_utf_8 has name <tt>"utf-8"</tt> and represents
3118 a coding system for the CES UTF-8. */
3121 @brief UTF-8 ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3123 ¥·¥ó¥Ü¥ë #Mcoding_utf_8 ¤Ï <tt>"utf-8"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢CES
3124 UTF-8 ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£
3127 MSymbol Mcoding_utf_8;
3131 @brief Symbol for the coding system UTF-8-FULL.
3133 The symbol #Mcoding_utf_8_full has name <tt>"utf-8-full"</tt> and
3134 represents a coding system that is a extension of UTF-8. This
3135 coding system uses the same encoding algorithm as UTF-8 but is not
3136 limited to the Unicode characters. It can encode all characters
3137 supported by the m17n library. */
3140 @brief UTF-8-FULL ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3142 ¥·¥ó¥Ü¥ë #Mcoding_utf_8_full ¤Ï <tt>"utf-8-full"</tt> ¤È¤¤¤¦Ì¾Á°¤ò
3143 »ý¤Á¡¢<tt>"UTF-8"</tt> ¤Î³ÈÄ¥¤Ç¤¢¤ë¥³¡¼¥É·Ï¤ò¼¨¤¹¡£¤³¤Î¥³¡¼¥É·Ï¤Ï
3144 UTF-8 ¤ÈƱ¤¸¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°¥¢¥ë¥´¥ê¥º¥à¤òÍѤ¤¤ë¤¬¡¢ÂÐ¾Ý¤Ï Unicode
3145 ¤Îʸ»ú¤Ë¸Â¤é¤Ê¤¤¡£¤Þ¤¿m17n ¥é¥¤¥Ö¥é¥ê¤¬°·¤¦Á´¤Æ¤Îʸ»ú¤ò¥¨¥ó¥³¡¼¥É
3149 MSymbol Mcoding_utf_8_full;
3153 @brief Symbol for the coding system UTF-16.
3155 The symbol #Mcoding_utf_16 has name <tt>"utf-16"</tt> and
3156 represents a coding system for the CES UTF-16 (RFC 2279). */
3158 @brief UTF-16 ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3160 ¥·¥ó¥Ü¥ë #Mcoding_utf_16 ¤Ï <tt>"utf-16"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3161 CES UTF-16 (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£
3164 MSymbol Mcoding_utf_16;
3168 @brief Symbol for the coding system UTF-16BE.
3170 The symbol #Mcoding_utf_16be has name <tt>"utf-16be"</tt> and
3171 represents a coding system for the CES UTF-16BE (RFC 2279). */
3174 @brief UTF-16BE ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3176 ¥·¥ó¥Ü¥ë #Mcoding_utf_16be ¤Ï <tt>"utf-16be"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3177 CES UTF-16BE (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3179 MSymbol Mcoding_utf_16be;
3183 @brief Symbol for the coding system UTF-16LE.
3185 The symbol #Mcoding_utf_16le has name <tt>"utf-16le"</tt> and
3186 represents a coding system for the CES UTF-16LE (RFC 2279). */
3189 @brief UTF-16LE ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3191 ¥·¥ó¥Ü¥ë #Mcoding_utf_16le ¤Ï <tt>"utf-16le"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3192 CES UTF-16LE (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3194 MSymbol Mcoding_utf_16le;
3198 @brief Symbol for the coding system UTF-32.
3200 The symbol #Mcoding_utf_32 has name <tt>"utf-32"</tt> and
3201 represents a coding system for the CES UTF-32 (RFC 2279). */
3204 @brief UTF-32 ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3206 ¥·¥ó¥Ü¥ë #Mcoding_utf_32 ¤Ï <tt>"utf-32"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3207 CES UTF-32 (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3209 MSymbol Mcoding_utf_32;
3213 @brief Symbol for the coding system UTF-32BE.
3215 The symbol #Mcoding_utf_32be has name <tt>"utf-32be"</tt> and
3216 represents a coding system for the CES UTF-32BE (RFC 2279). */
3218 @brief UTF-32BE ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3220 ¥·¥ó¥Ü¥ë #Mcoding_utf_32be ¤Ï <tt>"utf-32be"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3221 CES UTF-32BE (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3223 MSymbol Mcoding_utf_32be;
3227 @brief Symbol for the coding system UTF-32LE.
3229 The symbol #Mcoding_utf_32le has name <tt>"utf-32le"</tt> and
3230 represents a coding system for the CES UTF-32LE (RFC 2279). */
3232 @brief UTF-32LE ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3234 ¥·¥ó¥Ü¥ë #Mcoding_utf_32le ¤Ï <tt>"utf-32le"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3235 CES UTF-32LE (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3237 MSymbol Mcoding_utf_32le;
3241 @brief Symbol for the coding system SJIS.
3243 The symbol #Mcoding_sjis has name <tt>"sjis"</tt> and represents a coding
3244 system for the CES Shift-JIS. */
3246 @brief SJIS ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë.
3248 ¥·¥ó¥Ü¥ë #Mcoding_sjis has ¤Ï <tt>"sjis"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3249 CES Shift-JISÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3251 MSymbol Mcoding_sjis;
3256 @name Variables: Parameter keys for mconv_define_coding (). */
3258 @name ÊÑ¿ô: mconv_define_coding () Íѥѥé¥á¡¼¥¿¥¡¼ */
3263 Parameter key for mconv_define_coding () (which see). */
3265 mconv_define_coding () Íѥѥé¥á¡¼¥¿¥¡¼ (¾ÜºÙ¤Ï mconv_define_coding ()»²¾È). */
3271 MSymbol Mdesignation;
3272 MSymbol Minvocation;
3275 MSymbol Mlittle_endian;
3280 @name Variables: Symbols representing coding system types. */
3282 @name ÊÑ¿ô¡§ ¥³¡¼¥É·Ï¤Î¥¿¥¤¥×¤ò¼¨¤¹¥·¥ó¥Ü¥ë. */
3287 Symbol that can be a value of the #Mtype parameter of a coding
3288 system used in an argument to the mconv_define_coding () function
3291 ´Ø¿ô mconv_define_coding () ¤Î°ú¿ô¤È¤·¤ÆÍѤ¤¤é¤ì¤ë¡¢¥³¡¼¥É·Ï¤Î
3292 ¥Ñ¥é¥á¡¼¥¿ #Mtype ¤ÎÃͤȤʤêÆÀ¤ë¥·¥ó¥Ü¥ë¡£(¾ÜºÙ¤Ï
3293 mconv_define_coding ()»²¾È)¡£ */
3303 @name Variables: Symbols appearing in the value of #Mflags parameter. */
3305 @name ÊÑ¿ô¡§ ¥Ñ¥é¥á¡¼¥¿ #Mflags ¤ÎÃͤȤʤêÆÀ¤ë¥·¥ó¥Ü¥ë. */
3310 Symbols that can be a value of the #Mflags parameter of a coding
3311 system used in an argument to the mconv_define_coding () function
3314 ´Ø¿ô mconv_define_coding () ¤Î°ú¿ô¤È¤·¤ÆÍѤ¤¤é¤ì¤ë¡¢¥³¡¼¥É·Ï¤Î
3315 ¥Ñ¥é¥á¡¼¥¿ #Mflags ¤ÎÃͤȤʤêÆÀ¤ë¥·¥ó¥Ü¥ë¡£(¾ÜºÙ¤Ï
3316 mconv_define_coding ()»²¾È)¡£ */
3317 MSymbol Mreset_at_eol;
3319 MSymbol Mreset_at_cntl;
3322 MSymbol Mdesignation_g0;
3323 MSymbol Mdesignation_g1;
3324 MSymbol Mdesignation_ctext;
3325 MSymbol Mdesignation_ctext_ext;
3326 MSymbol Mlocking_shift;
3327 MSymbol Msingle_shift;
3328 MSymbol Msingle_shift_7;
3329 MSymbol Meuc_tw_shift;
3331 MSymbol Mrevision_number;
3332 MSymbol Mfull_support;
3337 @name Variables: etc
3339 Remaining variables. */
3340 /***ja @name ÊÑ¿ô: ¤½¤Î¾
3346 @brief Symbol whose name is "maybe".
3348 The variable #Mmaybe is a symbol of name <tt>"maybe"</tt>. It is
3349 used a value of #Mbom parameter of the function
3350 mconv_define_coding () (which see). */
3352 @brief "maybe"¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¥·¥ó¥Ü¥ë.
3354 ÊÑ¿ô #Mmaybe ¤Ï <tt>"maybe"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¡£¤³¤ì¤Ï´Ø¿ô
3355 mconv_define_coding () ¥Ñ¥é¥á¡¼¥¿ #Mbom ¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤ë¡£(¾Ü
3356 ºÙ¤Ï mconv_define_coding () »²¾È)¡£ */
3362 @brief The symbol @c Mcoding.
3364 Any decoded M-text has a text property whose key is the predefined
3365 symbol @c Mcoding. The name of @c Mcoding is
3366 <tt>"coding"</tt>. */
3369 @brief ¥·¥ó¥Ü¥ë @c Mcoding.
3371 ¥Ç¥³¡¼¥É¤µ¤ì¤¿ M-text ¤Ï¤¹¤Ù¤Æ¡¢¥¡¼¤¬ÄêµÁºÑ¤ß¥·¥ó¥Ü¥ë @c Mcoding
3372 ¤Ç¤¢¤ë¤è¤¦¤Ê¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Ä¡£¥·¥ó¥Ü¥ë @c Mcoding ¤Ï
3373 <tt>"coding"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¡£ */
3381 @brief Define a coding system.
3383 The mconv_define_coding () function defines a new coding system
3384 and makes it accessive via a symbol whose name is $NAME. $PLIST
3385 specifies parameters of the coding system as below:
3389 <li> Key is @c Mtype, value is a symbol
3391 The value specifies the type of the coding system. It must be
3392 #Mcharset, #Mutf, #Miso_2022, or #Mnil.
3394 If the type is #Mcharset, $EXTRA_INFO is ignored.
3396 If the type is #Mutf, $EXTRA_INFO must be a pointer to
3399 If the type is #Miso_2022, $EXTRA_INFO must be a pointer to
3400 #MCodingInfoISO2022.
3402 If the type is #Mnil, the argument $RESETTER, $DECODER, and
3403 $ENCODER must be supplied. $EXTRA_INFO is ignored. Otherwise,
3404 they can be @c NULL and the m17n library provides proper defaults.
3406 <li> Key is #Mcharsets, value is a plist
3408 The value specifies a list charsets supported by the coding
3409 system. The keys of the plist must be #Msymbol, and the values
3410 must be symbols representing charsets.
3412 <li> Key is #Mflags, value is a plist
3414 If the type is #Miso_2022, the values specifies flags to control
3415 the ISO 2022 interpreter. The keys of the plist must e #Msymbol,
3416 and values must be one of the following.
3422 If this flag exists, designation and invocation status is reset to
3423 the initial state at the end of line.
3425 <li> #Mreset_at_cntl
3427 If this flag exists, designation and invocation status is reset to
3428 the initial state at a control character.
3432 If this flag exists, the graphic plane right is used.
3436 If this flag exists, the over-long escape sequences (ESC '$' '('
3437 <final_byte>) are used for designating the CCS JISX0208.1978,
3438 GB2312, and JISX0208.
3440 <li> #Mdesignation_g0
3442 If this flag and #Mfull_support exists, designates charsets not
3443 listed in the charset list to the graphic register G0.
3445 <li> #Mdesignation_g1
3447 If this flag and #Mfull_support exists, designates charsets not
3448 listed in the charset list to the graphic register G1.
3450 <li> #Mdesignation_ctext
3452 If this flag and #Mfull_support exists, designates charsets not
3453 listed in the charset list to a graphic register G0 or G1 based on
3454 the criteria of the Compound Text.
3456 <li> #Mdesignation_ctext_ext
3458 If this flag and #Mfull_support exists, designates charsets not
3459 listed in the charset list to a graphic register G0 or G1, or use
3460 extended segment for such charsets based on the criteria of the
3463 <li> #Mlocking_shift
3465 If this flag exists, use locking shift.
3469 If this flag exists, use single shift.
3471 <li> #Msingle_shift_7
3473 If this flag exists, use 7-bit single shift code (0x19).
3475 <li> #Meuc_tw_shift;
3477 If this flag exists, use a special shifting according to EUC-TW.
3481 This flag is currently ignored.
3483 <li> #Mrevision_number
3485 If this flag exists, use a revision number escape sequence to
3486 designate a charset that has a revision number.
3490 If this flag exists, support all charsets registered in the
3491 International Registry.
3495 <li> Key is #Mdesignation, value is a plist
3497 If the type is #Miso_2022, the value specifies how to designate
3498 each supported characters. The keys of the plist must be
3499 #Minteger, and the values must be numbers indicating a graphic
3500 registers. The Nth element value is for the Nth charset of the
3501 charset list. The value 0..3 means that it is assumed that a
3502 charset is already designated to the graphic register 0..3. The
3503 negative value G (-4..-1) means that a charset is not designated
3504 to any register at first, and if necessary, is designated to the
3505 (G+4) graphic register.
3507 <li> Key is #Minvocation, value is a plist
3509 If the type is #Miso_2022, the value specifies how to invocate
3510 each graphic registers. The plist length must be one or two. The
3511 keys of the plist must be #Minteger, and the values must be
3512 numbers indicating a graphic register. The value of the first
3513 element specifies which graphic register is invocated to the
3514 graphic plane left. If the length is one, no graphic register is
3515 invocated to the graphic plane right. Otherwise, the value of the
3516 second element specifies which graphic register is invocated to
3517 the graphic plane right.
3519 <li> Key is #Mcode_unit, value is an integer
3521 If the type is #Mutf, the value specifies the bit length of a
3522 code-unit. It must be 8, 16, or 32.
3524 <li> Key is #Mbom, value is a symbol
3526 If the type is #Mutf and the code-unit bit length is 16 or 32,
3527 it specifies whether or not to use BOM (Byte Order Mark). If the
3528 value is #Mnil (default), BOM is not used, else if the value is
3529 #Mmaybe, the existence of BOM is detected at decoding time, else
3532 <li> Key is #Mlittle_endian, value is a symbol
3534 If the type is #Mutf and the code-unit bit length is 16 or 32,
3535 it specifies whether or not the encoding is little endian. If the
3536 value is #Mnil (default), it is big endian, else it is little
3541 $RESETTER is a pointer to a function that resets a converter for
3542 the coding system to the initial status. The pointed function is
3543 called with one argument, a pointer to a converter object.
3545 $DECODER is a pointer to a function that decodes a byte sequence
3546 according to the coding system. The pointed function is called
3547 with four arguments:
3549 @li A pointer to the byte sequence to decode.
3550 @li The number of bytes to decode.
3551 @li A pointer to an M-text to which the decoded characters are appended.
3552 @li A pointer to a converter object.
3554 $DECODER must return 0 if it succeeds. Otherwise it must return -1.
3556 $ENCODER is a pointer to a function that encodes an M-text
3557 according to the coding system. The pointed function is called
3560 @li A pointer to the M-text to encode.
3561 @li The starting position of the encoding.
3562 @li The ending position of the encoding.
3563 @li A pointer to a memory area where the produced bytes are stored.
3564 @li The size of the memory area.
3565 @li A pointer to a converter object.
3567 $ENCODER must return 0 if it succeeds. Otherwise it must return -1.
3569 $EXTRA_INFO is a pointer to a data structure that contains extra
3570 information about the coding system. The type of the data
3571 structure depends on $TYPE.
3575 If the operation was successful, mconv_define_coding () returns a
3576 symbol whose name is $NAME. If an error is detected, it returns
3577 #Mnil and assigns an error code to the external variable #merror_code. */
3580 @brief ¥³¡¼¥É·Ï¤òÄêµÁ¤¹¤ë.
3582 ´Ø¿ô mconv_define_coding () ¤Ï¡¢¿·¤·¤¤¥³¡¼¥É·Ï¤òÄêµÁ¤·¡¢¤½¤ì¤ò
3583 $NAME ¤È¤¤¤¦Ì¾Á°¤Î¥·¥ó¥Ü¥ë·Ðͳ¤Ç¥¢¥¯¥»¥¹¤Ç¤¤ë¤è¤¦¤Ë¤¹¤ë¡£ $PLIST
3584 ¤Ç¤ÏÄêµÁ¤¹¤ë¥³¡¼¥É·Ï¤Î¥Ñ¥é¥á¡¼¥¿¤ò°Ê²¼¤Î¤è¤¦¤Ë»ØÄꤹ¤ë¡£
3588 <li> ¥¡¼¤¬ @c Mtype ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
3590 Ãͤϥ³¡¼¥É·Ï¤Î¥¿¥¤¥×¤òɽ¤·¡¢#Mcharset, #Mutf, #Miso_2022, #Mnil ¤Î
3591 ¤¤¤º¤ì¤«¤Ç¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
3593 ¥¿¥¤¥×¤¬ #Mcharset ¤Ê¤é¤Ð $EXTRA_INFO ¤Ï̵»ë¤µ¤ì¤ë¡£
3595 ¥¿¥¤¥×¤¬ #Mutf ¤Ê¤é¤Ð $EXTRA_INFO ¤Ï #MCodingInfoUTF ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç
3598 ¥¿¥¤¥×¤¬ #Miso_2022¤Ê¤é¤Ð $EXTRA_INFO ¤Ï #MCodingInfoISO2022 ¤Ø¤Î¥Ý
3599 ¥¤¥ó¥¿¤Ç¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
3601 ¥¿¥¤¥×¤¬ #Mnil ¤Ê¤é¤Ð¡¢°ú¿ô $RESETTER, $DECODER, $ENCODER ¤òÍ¿¤¨¤Ê
3602 ¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£$EXTRA_INFO ¤Ï̵»ë¤µ¤ì¤ë¡£¤½¤ì°Ê³°¤Î¾ì¹ç¤Ë¤Ï¤³¤ì¤é
3603 ¤Ï @c NULL ¤Ç¹½¤ï¤Ê¤¤¡£¤½¤ÎºÝ¤Ë¤Ï m17n ¥é¥¤¥Ö¥é¥ê¤¬Å¬Àڤʥǥե©¥ë
3606 <li> ¥¡¼¤¬ #Mcharsets ¤ÇÃͤ¬ plist ¤Î»þ
3608 ÃͤϤ³¤Î¥³¡¼¥É·Ï¤Ç¥µ¥Ý¡¼¥È¤µ¤ì¤ëʸ»ú¥»¥Ã¥È¤Î¥ê¥¹¥È¤Ç¤¢¤ë¡£plist¤Î¥¡¼¤Ï
3609 #Msymbol¡¢ÃͤÏʸ»ú¥»¥Ã¥È¤ò¼¨¤¹¥·¥ó¥Ü¥ë¤Ç¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
3611 <li> ¥¡¼¤¬ #Mflags Ãͤ¬ plist ¤Î»þ
3613 ¥¿¥¤¥×¤¬ #Miso_2022 ¤Ê¤é¤Ð¡¢¤³¤ÎÃͤÏ, ISO 2022 ¥¤¥ó¥¿¥×¥ê¥¿ÍѤÎÀ©
3614 ¸æ¥Õ¥é¥Ã¥°¤ò¼¨¤¹¡£plist¤Î¥¡¼¤Ï#Msymbol¡¢Ãͤϰʲ¼¤Î¤¤¤º¤ì¤«¤Ç¤Ê¤¯
3621 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¿Þ·Áʸ»ú½¸¹ç¤Î»Ø¼¨¤ä¸Æ½Ð¤Ï¹ÔËö¤Ç¥ê¥»¥Ã¥È¤µ¤ì¤Æ
3624 <li> #Mreset_at_cntl
3626 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¿Þ·Áʸ»ú½¸¹ç¤Î»Ø¼¨¤ä¸Æ½Ð¤ÏÀ©¸æʸ»ú¤Ë½Ð²ñ¤Ã¤¿»þ
3627 ÅÀ¤Ç¥ê¥»¥Ã¥È¤µ¤ì¤ÆÅö½é¤Î¾õÂÖ¤ËÌá¤ë¡£
3631 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¿Þ·Áʸ»ú½¸¹ç¤Î±¦È¾Ì̤¬ÍѤ¤¤é¤ì¤ë¡£
3635 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢Ê¸»ú½¸¹ç JISX0208.1978, GB2312, JISX0208 ¤ò»Ø
3636 ¼¨¤¹¤ëºÝ¤Ë over-long ¥¨¥¹¥±¡¼¥×¥·¡¼¥±¥ó¥¹ (ESC '$' '('
3637 <final_byte>) ¤¬ÍѤ¤¤é¤ì¤ë¡£
3639 <li> #Mdesignation_g0
3641 ¤³¤Î¥Õ¥é¥°¤È #Mfull_support ¤¬¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Ë¸½¤ï¤ì¤Ê¤¤
3642 ʸ»ú¥»¥Ã¥È¤ò G0 ½¸¹ç¤Ë»Ø¼¨¤¹¤ë¡£
3644 <li> #Mdesignation_g1
3646 ¤³¤Î¥Õ¥é¥°¤È #Mfull_support ¤¬¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Ë¸½¤ï¤ì¤Ê¤¤
3647 ʸ»ú¥»¥Ã¥È¤ò G1 ½¸¹ç¤Ë»Ø¼¨¤¹¤ë¡£
3649 <li> #Mdesignation_ctext
3651 ¤³¤Î¥Õ¥é¥°¤È #Mfull_support ¤¬¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Ë¸½¤ï¤ì¤Ê¤¤
3652 ʸ»ú¥»¥Ã¥È¤ò G0 ½¸¹ç¤Þ¤¿¤Ï G1 ½¸¹ç¤Ë¡¢¥³¥ó¥Ñ¥¦¥ó¥É¥Æ¥¥¹¥È¤Î´ð½à¤Ë
3655 <li> #Mdesignation_ctext_ext
3657 ¤³¤Î¥Õ¥é¥°¤È #Mfull_support ¤¬¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Ë¸½¤ï¤ì¤Ê¤¤
3658 ʸ»ú¥»¥Ã¥È¤ò G0 ½¸¹ç¤Þ¤¿¤Ï G1 ½¸¹ç¤Ë¡¢¤¢¤ë¤¤¤Ï³ÈÄ¥¥»¥°¥á¥ó¥È¤Ë¥³¥ó
3659 ¥Ñ¥¦¥ó¥É¥Æ¥¥¹¥È¤Î´ð½à¤Ë¤½¤Ã¤Æ»Ø¼¨¤¹¤ë¡£
3661 <li> #Mlocking_shift
3663 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¥í¥Ã¥¥ó¥°¥·¥Õ¥È¤òÍѤ¤¤ë¡£
3667 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¥·¥ó¥°¥ë¥·¥Õ¥È¤òÍѤ¤¤ë¡£
3669 <li> #Msingle_shift_7
3671 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢7-bit ¥·¥ó¥°¥ë¥·¥Õ¥È¥³¡¼¥É (0x19) ¤òÍѤ¤¤ë¡£
3673 <li> #Meuc_tw_shift;
3675 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢EUC-TW ¤Ë±è¤Ã¤¿ÆÃÊ̤ʥ·¥Õ¥È¤òÍѤ¤¤ë¡£
3679 ¸½»þÅÀ¤Ç¤ÏÍѤ¤¤é¤ì¤Æ¤¤¤Ê¤¤¡£
3681 <li> #Mrevision_number
3683 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢revision number ¤ò»ý¤Äʸ»ú¥»¥Ã¥È¤ò»Ø¼¨¤¹¤ëºÝ¤Ë
3684 revision number ¥¨¥¹¥±¡¼¥×¥·¡¼¥¯¥¨¥ó¥¹¤òÍѤ¤¤ë¡£
3688 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢the International Registry ¤ËÅÐÏ¿¤µ¤ì¤Æ¤¤¤ëÁ´
3689 ʸ»ú¥»¥Ã¥È¤ò¥µ¥Ý¡¼¥È¤¹¤ë¡£
3693 <li> ¥¡¼¤¬ #Mdesignation¤ÇÃͤ¬ plist ¤Î»þ
3695 ¥¿¥¤¥×¤¬ #Miso_2022 ¤Ê¤é¤Ð¡¢ÃͤϳÆʸ»ú¤ò¤É¤Î¤è¤¦¤Ë»Ø¼¨¤¹¤ë¤«¤ò¼¨¤¹¡£
3696 plist ¤Î¥¡¼¤Ï #Minteger¡¢ÃͤϽ¸¹ç¡Êgraphic register¡Ë¤ò¼¨¤¹¿ô»ú¤Ç
3697 ¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£NÈÖÌܤÎÍ×ÁǤÎÃͤϡ¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Î N ÈÖÌÜ ¤Î
3698 ʸ»ú¥»¥Ã¥È¤ËÂбþ¤¹¤ë¡£Ãͤ¬ 0..3 ¤Ç¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¤¬¤¹¤Ç¤Ë
3699 G0..G3 ¤Ë»Ø¼¨ ¤µ¤ì¤Æ¤¤¤ë¡£
3701 Ãͤ¬Éé(-4..-1) ¤Ç¤¢¤ì¤Ð¡¢½é´ü¾õÂ֤ǤÏʸ»ú¥»¥Ã¥È¤¬¤É¤³¤Ë¤â»Ø¼¨¤µ¤ì
3702 ¤Æ¤¤¤Ê¤¤¤³¤È¡¢É¬ÍפʺݤˤÏG0..G3 ¤Î¤½¤ì¤¾¤ì¤Ë»Ø¼¨¤¹¤ë¤³¤È¤ò°ÕÌ£¤¹
3705 <li> ¥¡¼¤¬ #Minvocation¤ÇÃͤ¬ plist ¤Î»þ
3707 ¥¿¥¤¥×¤¬ #Miso_2022 ¤Ê¤é¤Ð¡¢Ãͤϳƽ¸¹ç¤ò¤É¤Î¤è¤¦¤Ë¸Æ¤Ó½Ð¤¹¤«¤ò¼¨¤¹¡£
3708 plist ¤ÎŤµ¤Ï 1 ¤Ê¤¤¤· 2 ¤Ç¤¢¤ë¡£plist ¤Î¥¡¼¤Ï #Minteger¡¢ÃͤϽ¸
3709 ¹ç¡Êgraphic register)¤ò¼¨¤¹¿ô»ú¤Ç¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£ºÇ½é¤ÎÍ×ÁǤÎÃÍ
3710 ¤¬¿Þ·Áʸ»ú½¸¹çº¸È¾Ì̤˸ƤӽФµ¤ì¤ë½¸¹ç¤Ç¤¢¤ë¡£ plist ¤ÎŤµ¤¬ 1 ¤Ê
3711 ¤é¤Ð¡¢±¦È¾Ì̤ˤϲ¿¤â¸Æ¤Ó½Ð¤µ¤ì¤Ê¤¤¡£¤½¤¦¤Ç¤±¤ì¤Ð¡¢£²¤Ä¤á¤ÎÍ×ÁǤÎÃÍ
3712 ¤¬¿Þ·Áʸ»ú½¸¹ç±¦È¾Ì̤˸ƤӽФµ¤ì¤ë½¸¹ç¤È¤Ê¤ë¡£
3714 <li> ¥¡¼¤¬ #Mcode_unit ¤ÇÃͤ¬À°¿ôÃͤλþ
3716 ¥¿¥¤¥×¤¬ #Mutf ¤Ê¤é¤Ð¡¢Ãͤϥ³¡¼¥É¥æ¥Ë¥Ã¥È¤Î¥Ó¥Ã¥ÈŤǤ¢¤ê¡¢8, 16,
3717 32 ¤Î¤¤¤º¤ì¤«¤Ç¤¢¤ë¡£
3719 <li> ¥¡¼¤¬ #Mbom ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
3721 ¥¿¥¤¥×¤¬ #Mutf ¤Ç¥³¡¼¥É¥æ¥Ë¥Ã¥È¤Î¥Ó¥Ã¥ÈŤ¬ 16 ¤« 32¤Ê¤é¤Ð¡¢ÃͤÏ
3722 BOM (Byte Order Mark) ¤ò»ÈÍѤ¹¤ë¤«¤É¤¦¤«¤ò¼¨¤¹¡£Ãͤ¬¥Ç¥Õ¥©¥ë¥ÈÃͤÎ
3723 #Mnil ¤Ê¤é¤Ð¡¢»ÈÍѤ·¤Ê¤¤¡£Ãͤ¬#Mmaybe ¤Ê¤é¤Ð¥Ç¥³¡¼¥É»þ¤Ë BOM ¤¬¤¢
3724 ¤ë¤«¤É¤¦¤«¤òÄ´¤Ù¤ë¡£¤½¤ì°Ê³°¤Ê¤é¤Ð»ÈÍѤ¹¤ë¡£
3726 <li> ¥¡¼¤¬ #Mlittle_endian ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
3728 ¥¿¥¤¥×¤¬ #Mutf ¤Ç¥³¡¼¥É¥æ¥Ë¥Ã¥È¤Î¥Ó¥Ã¥ÈŤ¬ 16 ¤« 32¤Ê¤é¤Ð¡¢Ãͤϥ¨
3729 ¥ó¥³¡¼¥É¤¬ little endian ¤«¤É¤¦¤«¤ò¼¨¤¹¡£Ãͤ¬¥Ç¥Õ¥©¥ë¥ÈÃͤΠ#Mnil
3730 ¤Ê¤é¤Ð big endian ¤Ç¤¢¤ê¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð little endian ¤Ç¤¢¤ë¡£
3734 $RESETTER ¤Ï¤³¤Î¥³¡¼¥É·ÏÍѤΥ³¥ó¥Ð¡¼¥¿¤ò½é´ü¾õÂ֤˥ꥻ¥Ã¥È¤¹¤ë´Ø¿ô
3735 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¡£¤³¤Î´Ø¿ô¤Ï¥³¥ó¥Ð¡¼¥¿¥ª¥Ö¥¸¥§¥¯¥È¤Ø¤Î¥Ý¥¤¥ó¥¿¤È
3738 $DECODER ¤Ï¥Ð¥¤¥ÈÎó¤ò¤³¤Î¥³¡¼¥É·Ï¤Ë½¾¤Ã¤Æ¥Ç¥³¡¼¥É¤¹¤ë´Ø¿ô¤Ø¤Î¥Ý¥¤
3739 ¥ó¥¿¤Ç¤¢¤ë¡£¤³¤Î´Ø¿ô¤Ï°Ê²¼¤Î4°ú¿ô¤ò¤È¤ë¡£
3741 @li ¥Ç¥³¡¼¥É¤¹¤ë¥Ð¥¤¥ÈÎó¤Ø¤Î¥Ý¥¤¥ó¥¿
3742 @li ¥Ç¥³¡¼¥É¤¹¤Ù¤¥Ð¥¤¥È¿ô
3743 @li ¥Ç¥³¡¼¥É·ë²Ì¤Îʸ»ú¤òÉղ乤ë M-text ¤Ø¤Î¥Ý¥¤¥ó¥¿
3744 @li ¥³¥ó¥Ð¡¼¥¿¥ª¥Ö¥¸¥§¥¯¥È¤Ø¤Î¥Ý¥¤¥ó¥¿
3746 $DECODER ¤ÏÀ®¸ù¤·¤¿¤È¤¤Ë¤Ï0¤ò¡¢¼ºÇÔ¤·¤¿¤È¤¤Ë¤Ï-1¤òÊÖ¤µ¤Ê¤¯¤Æ¤Ï¤Ê
3749 $ENCODER ¤Ï M-text ¤ò¤³¤Î¥³¡¼¥É·Ï¤Ë½¾¤Ã¤Æ¥¨¥ó¥³¡¼¥É¤¹¤ë´Ø¿ô¤Ø¤Î¥Ý
3750 ¥¤¥ó¥¿¤Ç¤¢¤ë¡£¤³¤Î´Ø¿ô¤Ï°Ê²¼¤Î6°ú¿ô¤ò¤È¤ë¡£
3752 @li ¥¨¥ó¥³¡¼¥É¤¹¤ëM-text ¤Ø¤Î¥Ý¥¤¥ó¥¿
3753 @li M-text ¤Î¥¨¥ó¥³¡¼¥É³«»Ï°ÌÃÖ
3754 @li M-text ¤Î¥¨¥ó¥³¡¼¥É½ªÎ»°ÌÃÖ
3755 @li À¸À®¤·¤¿¥Ð¥¤¥È¤òÊÝ»ý¤¹¤ë¥á¥â¥êÎΰè¤Ø¤Î¥Ý¥¤¥ó¥¿
3756 @li ¥á¥â¥êÎΰè¤Î¥µ¥¤¥º
3757 @li ¥³¥ó¥Ð¡¼¥¿¥ª¥Ö¥¸¥§¥¯¥È¤Ø¤Î¥Ý¥¤¥ó¥¿
3759 $ENCODER ¤ÏÀ®¸ù¤·¤¿¤È¤¤Ë¤Ï0¤ò¡¢¼ºÇÔ¤·¤¿¤È¤¤Ë¤Ï-1¤òÊÖ¤µ¤Ê¤¯¤Æ¤Ï¤Ê
3762 $EXTRA_INFO ¤Ï¥³¡¼¥Ç¥£¥°¥·¥¹¥Æ¥à¤Ë´Ø¤¹¤ëÄɲþðÊó¤ò´Þ¤à¥Ç¡¼¥¿¹½Â¤¤Ø
3763 ¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¡£¤³¤Î¥Ç¡¼¥¿¹½Â¤¤Î¥¿¥¤¥×¤Ï $TYPE ¤Ë°Í¸¤¹¤ë¡£
3767 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mconv_define_coding () ¤Ï $NAME ¤È¤¤¤¦Ì¾Á°¤Î¥·
3768 ¥ó¥Ü¥ë¤òÊÖ¤¹¡£ ¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì
3769 ¤¿¾ì¹ç¤Ï #Mnil ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
3777 mconv_define_coding (const char *name, MPlist *plist,
3778 int (*resetter) (MConverter *),
3779 int (*decoder) (const unsigned char *, int, MText *,
3781 int (*encoder) (MText *, int, int,
3782 unsigned char *, int,
3786 MSymbol sym = msymbol (name);
3788 MCodingSystem *coding;
3791 MSTRUCT_MALLOC (coding, MERROR_CODING);
3793 if ((coding->type = (MSymbol) mplist_get (plist, Mtype)) == Mnil)
3794 coding->type = Mcharset;
3795 pl = (MPlist *) mplist_get (plist, Mcharsets);
3797 MERROR (MERROR_CODING, Mnil);
3798 coding->ncharsets = mplist_length (pl);
3799 if (coding->ncharsets > NUM_SUPPORTED_CHARSETS)
3800 coding->ncharsets = NUM_SUPPORTED_CHARSETS;
3801 for (i = 0; i < coding->ncharsets; i++, pl = MPLIST_NEXT (pl))
3803 MSymbol charset_name;
3805 if (MPLIST_KEY (pl) != Msymbol)
3806 MERROR (MERROR_CODING, Mnil);
3807 charset_name = MPLIST_SYMBOL (pl);
3808 if (! (coding->charsets[i] = MCHARSET (charset_name)))
3809 MERROR (MERROR_CODING, Mnil);
3812 coding->resetter = resetter;
3813 coding->decoder = decoder;
3814 coding->encoder = encoder;
3815 coding->ascii_compatible = 0;
3816 coding->extra_info = extra_info;
3817 coding->extra_spec = NULL;
3820 if (coding->type == Mcharset)
3822 if (! coding->resetter)
3823 coding->resetter = reset_coding_charset;
3824 if (! coding->decoder)
3825 coding->decoder = decode_coding_charset;
3826 if (! coding->encoder)
3827 coding->encoder = encode_coding_charset;
3829 else if (coding->type == Mutf)
3831 MCodingInfoUTF *info = malloc (sizeof (MCodingInfoUTF));
3834 if (! coding->resetter)
3835 coding->resetter = reset_coding_utf;
3837 info->code_unit_bits = (int) mplist_get (plist, Mcode_unit);
3838 if (info->code_unit_bits == 8)
3840 if (! coding->decoder)
3841 coding->decoder = decode_coding_utf_8;
3842 if (! coding->encoder)
3843 coding->encoder = encode_coding_utf_8;
3845 else if (info->code_unit_bits == 16)
3847 if (! coding->decoder)
3848 coding->decoder = decode_coding_utf_16;
3849 if (! coding->encoder)
3850 coding->encoder = encode_coding_utf_16;
3852 else if (info->code_unit_bits == 32)
3854 if (! coding->decoder)
3855 coding->decoder = decode_coding_utf_32;
3856 if (! coding->encoder)
3857 coding->encoder = encode_coding_utf_32;
3860 MERROR (MERROR_CODING, Mnil);
3861 val = (MSymbol) mplist_get (plist, Mbom);
3864 else if (val == Mmaybe)
3869 info->endian = (mplist_get (plist, Mlittle_endian) ? 1 : 0);
3870 coding->extra_info = info;
3872 else if (coding->type == Miso_2022)
3874 MCodingInfoISO2022 *info = malloc (sizeof (MCodingInfoISO2022));
3876 if (! coding->resetter)
3877 coding->resetter = reset_coding_iso_2022;
3878 if (! coding->decoder)
3879 coding->decoder = decode_coding_iso_2022;
3880 if (! coding->encoder)
3881 coding->encoder = encode_coding_iso_2022;
3883 info->initial_invocation[0] = 0;
3884 info->initial_invocation[1] = -1;
3885 pl = (MPlist *) mplist_get (plist, Minvocation);
3888 if (MPLIST_KEY (pl) != Minteger)
3889 MERROR (MERROR_CODING, Mnil);
3890 info->initial_invocation[0] = MPLIST_INTEGER (pl);
3891 if (! MPLIST_TAIL_P (pl))
3893 pl = MPLIST_NEXT (pl);
3894 if (MPLIST_KEY (pl) != Minteger)
3895 MERROR (MERROR_CODING, Mnil);
3896 info->initial_invocation[1] = MPLIST_INTEGER (pl);
3899 memset (info->designations, 0, sizeof (info->designations));
3900 for (i = 0, pl = (MPlist *) mplist_get (plist, Mdesignation);
3901 i < 32 && pl && MPLIST_KEY (pl) == Minteger;
3902 i++, pl = MPLIST_NEXT (pl))
3903 info->designations[i] = MPLIST_INTEGER (pl);
3906 MPLIST_DO (pl, (MPlist *) mplist_get (plist, Mflags))
3910 if (MPLIST_KEY (pl) != Msymbol)
3911 MERROR (MERROR_CODING, Mnil);
3912 val = MPLIST_SYMBOL (pl);
3913 if (val == Mreset_at_eol)
3914 info->flags |= MCODING_ISO_RESET_AT_EOL;
3915 else if (val == Mreset_at_cntl)
3916 info->flags |= MCODING_ISO_RESET_AT_CNTL;
3917 else if (val == Meight_bit)
3918 info->flags |= MCODING_ISO_EIGHT_BIT;
3919 else if (val == Mlong_form)
3920 info->flags |= MCODING_ISO_LOCKING_SHIFT;
3921 else if (val == Mdesignation_g0)
3922 info->flags |= MCODING_ISO_DESIGNATION_G0;
3923 else if (val == Mdesignation_g1)
3924 info->flags |= MCODING_ISO_DESIGNATION_G1;
3925 else if (val == Mdesignation_ctext)
3926 info->flags |= MCODING_ISO_DESIGNATION_CTEXT;
3927 else if (val == Mdesignation_ctext_ext)
3928 info->flags |= MCODING_ISO_DESIGNATION_CTEXT_EXT;
3929 else if (val == Mlocking_shift)
3930 info->flags |= MCODING_ISO_LOCKING_SHIFT;
3931 else if (val == Msingle_shift)
3932 info->flags |= MCODING_ISO_SINGLE_SHIFT;
3933 else if (val == Msingle_shift_7)
3934 info->flags |= MCODING_ISO_SINGLE_SHIFT_7;
3935 else if (val == Meuc_tw_shift)
3936 info->flags |= MCODING_ISO_EUC_TW_SHIFT;
3937 else if (val == Miso_6429)
3938 info->flags |= MCODING_ISO_ISO6429;
3939 else if (val == Mrevision_number)
3940 info->flags |= MCODING_ISO_REVISION_NUMBER;
3941 else if (val == Mfull_support)
3942 info->flags |= MCODING_ISO_FULL_SUPPORT;
3945 coding->extra_info = info;
3949 if (! coding->decoder || ! coding->encoder)
3950 MERROR (MERROR_CODING, Mnil);
3951 if (! coding->resetter)
3955 msymbol_put (sym, Mcoding, coding);
3956 msymbol_put (msymbol__canonicalize (sym), Mcoding, coding);
3957 plist = (MPlist *) mplist_get (plist, Maliases);
3960 MPLIST_DO (pl, plist)
3964 if (MPLIST_KEY (pl) != Msymbol)
3966 alias = MPLIST_SYMBOL (pl);
3967 msymbol_put (alias, Mcoding, coding);
3968 msymbol_put (msymbol__canonicalize (alias), Mcoding, coding);
3972 MLIST_APPEND1 (&coding_list, codings, coding, MERROR_CODING);
3980 @brief Resolve coding system name.
3982 The mconv_resolve_coding () function returns $SYMBOL if it
3983 represents a coding system. Otherwise, canonicalize $SYMBOL as to
3984 a coding system name, and if the canonicalized name represents a
3985 coding system, return it. Otherwise, return #Mnil. */
3987 @brief ¥³¡¼¥É·Ï¤Î̾Á°¤ò²ò·è¤¹¤ë.
3989 ´Ø¿ô mconv_resolve_coding () ¤Ï $SYMBOL ¤¬¥³¡¼¥É·Ï¤ò¼¨¤·¤Æ¤¤¤ì¤Ð¤½
3990 ¤ì¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¥³¡¼¥É·Ï¤Î̾Á°¤È¤·¤Æ $SYMBOL ¤òÀµµ¬²½¤·¡¢
3991 ¤½¤ì¤¬¥³¡¼¥É·Ï¤ò¼¨¤·¤Æ¤¤¤ì¤ÐÀµµ¬²½¤·¤¿Ì¾Á°¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð
3997 mconv_resolve_coding (MSymbol symbol)
3999 MCodingSystem *coding = find_coding (symbol);
4003 symbol = msymbol__canonicalize (symbol);
4004 coding = find_coding (symbol);
4006 return (coding ? coding->name : Mnil);
4013 @brief List symbols representing coding systems.
4015 The mconv_list_codings () function makes an array of symbols
4016 representing a coding system, stores the pointer to the array in a
4017 place pointed to by $SYMBOLS, and returns the length of the array. */
4019 @brief ¥³¡¼¥É·Ï¤òɽ¤ï¤¹¥·¥ó¥Ü¥ë¤òÎóµó¤¹¤ë.
4021 ´Ø¿ô mchar_list_codings () ¤Ï¡¢¥³¡¼¥É·Ï¤ò¼¨¤¹¥·¥ó¥Ü¥ë¤òʤ٤¿ÇÛÎó
4022 ¤òºî¤ê¡¢$SYMBOLS ¤Ç¥Ý¥¤¥ó¥È¤µ¤ì¤¿¾ì½ê¤Ë¤³¤ÎÇÛÎó¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÃÖ¤¡¢
4023 ÇÛÎó¤ÎŤµ¤òÊÖ¤¹¡£ */
4026 mconv_list_codings (MSymbol **symbols)
4028 int i = coding_list.used + mplist_length (coding_definition_list);
4032 MTABLE_MALLOC ((*symbols), i, MERROR_CODING);
4034 MPLIST_DO (plist, coding_definition_list)
4036 MPlist *pl = MPLIST_VAL (plist);
4037 (*symbols)[i++] = MPLIST_SYMBOL (pl);
4039 for (j = 0; j < coding_list.used; j++)
4040 if (! mplist_find_by_key (coding_definition_list,
4041 coding_list.codings[j]->name))
4042 (*symbols)[i++] = coding_list.codings[j]->name;
4049 @brief Create a code converter bound to a buffer.
4051 The mconv_buffer_converter () function creates a pointer to a code
4052 converter for coding system $CODING. The code converter is bound
4053 to buffer area of $N bytes pointed to by $BUF. Subsequent
4054 decodings and encodings are done to/from this buffer area.
4056 $CODING can be #Mnil. In this case, a coding system associated
4057 with the current locale (LC_CTYPE) is used.
4060 If the operation was successful, mconv_buffer_converter () returns
4061 the created code converter. Otherwise it returns @c NULL and
4062 assigns an error code to the external variable #merror_code. */
4065 @brief ¥Ð¥Ã¥Õ¥¡¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤òºî¤ë.
4067 ´Ø¿ô mconv_buffer_converter () ¤Ï¡¢¥³¡¼¥É·Ï $CODING ÍѤΥ³¡¼¥É¥³¥ó
4068 ¥Ð¡¼¥¿¤òºî¤ë¡£¤³¤Î¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ï¡¢$BUF ¤Ç¼¨¤µ¤ì¤ëÂ礤µ $N ¥Ð
4069 ¥¤¥È¤Î¥Ð¥Ã¥Õ¥¡Îΰè¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤ë¡£¤³¤ì°Ê¹ß¤Î¥Ç¥³¡¼¥É¤ª¤è¤Ó
4070 ¥¨¥ó¥³¡¼¥É¤Ï¡¢¤³¤Î¥Ð¥Ã¥Õ¥¡Îΰè¤ËÂФ·¤Æ¹Ô¤Ê¤ï¤ì¤ë¡£
4072 $CODING ¤Ï #Mnil ¤Ç¤¢¤Ã¤Æ¤â¤è¤¤¡£¤³¤Î¾ì¹ç¤Ï¸½ºß¤Î¥í¥±¡¼¥ë
4073 (LC_CTYPE) ¤Ë´ØÏ¢ÉÕ¤±¤é¤ì¤¿¥³¡¼¥É·Ï¤¬»È¤ï¤ì¤ë¡£
4076 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð mconv_buffer_converter () ¤Ï ºîÀ®¤·¤¿¥³¡¼¥É¥³
4077 ¥ó¥Ð¡¼¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
4078 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
4080 @latexonly \IPAlabel{mconverter} @endlatexonly */
4084 @c MERROR_SYMBOL, @c MERROR_CODING
4087 mconv_stream_converter () */
4090 mconv_buffer_converter (MSymbol name, unsigned char *buf, int n)
4092 MCodingSystem *coding;
4093 MConverter *converter;
4094 MConverterStatus *internal;
4097 name = mlocale_get_prop (mlocale__ctype, Mcoding);
4098 coding = find_coding (name);
4100 MERROR (MERROR_CODING, NULL);
4101 MSTRUCT_CALLOC (converter, MERROR_CODING);
4102 MSTRUCT_CALLOC (internal, MERROR_CODING);
4103 converter->internal_info = internal;
4104 internal->coding = coding;
4105 if (coding->resetter
4106 && (*coding->resetter) (converter) < 0)
4110 MERROR (MERROR_CODING, NULL);
4113 internal->unread = mtext ();
4114 internal->work_mt = mtext ();
4115 mtext__enlarge (internal->work_mt, MAX_UTF8_CHAR_BYTES);
4116 internal->buf = buf;
4118 internal->bufsize = n;
4119 internal->binding = BINDING_BUFFER;
4127 @brief Create a code converter bound to a stream.
4129 The mconv_stream_converter () function create a pointer to a code
4130 converter for coding system $CODING. The code converter is bound
4131 to stream $FP. Subsequent decodings and encodings are done
4132 to/from this stream.
4134 $CODING can be #Mnil. In this case, a coding system associated
4135 with the current locale (LC_CTYPE) is used.
4137 @return If the operation was successful, mconv_stream_converter ()
4138 returns the created code converter. Otherwise it returns @c NULL
4139 and assigns an error code to the external variable
4143 @brief ¥¹¥È¥ê¡¼¥à¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤òºî¤ë.
4145 ´Ø¿ô mconv_stream_converter () ¤Ï¡¢¥³¡¼¥É·Ï $CODING ÍѤΥ³¡¼¥É¥³¥ó
4146 ¥Ð¡¼¥¿¤òºî¤ë¡£¤³¤Î¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ï¡¢¥¹¥È¥ê¡¼¥à $FP ¤Ë·ë¤ÓÉÕ¤±¤é
4147 ¤ì¤ë¡£¤³¤ì°Ê¹ß¤Î¥Ç¥³¡¼¥É¤ª¤è¤Ó¥¨¥ó¥³¡¼¥É¤Ï¡¢¤³¤Î¥¹¥È¥ê¡¼¥à¤ËÂФ·¤Æ
4150 $CODING ¤Ï #Mnil ¤Ç¤¢¤Ã¤Æ¤â¤è¤¤¡£¤³¤Î¾ì¹ç¤Ï¸½ºß¤Î¥í¥±¡¼¥ë
4151 (LC_CTYPE) ¤Ë´ØÏ¢ÉÕ¤±¤é¤ì¤¿¥³¡¼¥É·Ï¤¬»È¤ï¤ì¤ë¡£
4154 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_stream_converter () ¤ÏºîÀ®¤·¤¿
4155 ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
4156 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
4158 @latexonly \IPAlabel{mconverter} @endlatexonly */
4162 @c MERROR_SYMBOL, @c MERROR_CODING
4165 mconv_buffer_converter () */
4168 mconv_stream_converter (MSymbol name, FILE *fp)
4170 MCodingSystem *coding;
4171 MConverter *converter;
4172 MConverterStatus *internal;
4175 name = mlocale_get_prop (mlocale__ctype, Mcoding);
4176 coding = find_coding (name);
4178 MERROR (MERROR_CODING, NULL);
4179 MSTRUCT_CALLOC (converter, MERROR_CODING);
4180 MSTRUCT_CALLOC (internal, MERROR_CODING);
4181 converter->internal_info = internal;
4182 internal->coding = coding;
4183 if (coding->resetter
4184 && (*coding->resetter) (converter) < 0)
4188 MERROR (MERROR_CODING, NULL);
4191 if (fseek (fp, 0, SEEK_CUR) < 0)
4199 internal->seekable = 0;
4202 internal->seekable = 1;
4203 internal->unread = mtext ();
4204 internal->work_mt = mtext ();
4205 mtext__enlarge (internal->work_mt, MAX_UTF8_CHAR_BYTES);
4207 internal->binding = BINDING_STREAM;
4215 @brief Reset a code converter.
4217 The mconv_reset_converter () function resets code converter
4218 $CONVERTER to the initial state.
4221 If $CONVERTER->coding has its own reseter function,
4222 mconv_reset_converter () returns the result of that function
4223 applied to $CONVERTER. Otherwise it returns 0. */
4226 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ò¥ê¥»¥Ã¥È¤¹¤ë.
4228 ´Ø¿ô mconv_reset_converter () ¤Ï¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER ¤ò½é´ü
4232 ¤â¤· $CONVERTER->coding ¤Ë¥ê¥»¥Ã¥ÈÍѤδؿô¤¬ÄêµÁ¤µ¤ì¤Æ¤¤¤ë¤Ê¤é¤Ð¡¢
4233 mconv_reset_converter () ¤Ï¤½¤Î´Ø¿ô¤Ë $CONVERTER ¤òŬÍѤ·¤¿·ë²Ì¤ò
4234 ÊÖ¤·¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð0¤òÊÖ¤¹¡£ */
4237 mconv_reset_converter (MConverter *converter)
4239 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4241 converter->nchars = converter->nbytes = 0;
4242 converter->result = MCONVERSION_RESULT_SUCCESS;
4243 internal->carryover_bytes = 0;
4244 mtext_reset (internal->unread);
4245 if (internal->coding->resetter)
4246 return (*internal->coding->resetter) (converter);
4253 @brief Free a code converter.
4255 The mconv_free_converter () function frees the code converter
4259 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ò²òÊü¤¹¤ë.
4261 ´Ø¿ô mconv_free_converter () ¤Ï¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER ¤ò²òÊü
4265 mconv_free_converter (MConverter *converter)
4267 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4269 M17N_OBJECT_UNREF (internal->work_mt);
4270 M17N_OBJECT_UNREF (internal->unread);
4278 @brief Bind a buffer to a code converter.
4280 The mconv_rebind_buffer () function binds buffer area of $N bytes
4281 pointed to by $BUF to code converter $CONVERTER. Subsequent
4282 decodings and encodings are done to/from this newly bound buffer
4286 This function always returns $CONVERTER. */
4289 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ë¥Ð¥Ã¥Õ¥¡Îΰè¤ò·ë¤ÓÉÕ¤±¤ë.
4291 ´Ø¿ô mconv_rebind_buffer () ¤Ï¡¢$BUF ¤Ë¤è¤Ã¤Æ»Ø¤µ¤ì¤¿Â礤µ $N ¥Ð
4292 ¥¤¥È¤Î¥Ð¥Ã¥Õ¥¡Îΰè¤ò¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER ¤Ë·ë¤ÓÉÕ¤±¤ë¡£¤³¤ì
4293 °Ê¹ß¤Î¥Ç¥³¡¼¥É¤ª¤è¤Ó¥¨¥ó¥³¡¼¥É¤Ï¡¢¤³¤Î¿·¤¿¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥Ð¥Ã¥Õ¥¡
4294 Îΰè¤ËÂФ·¤Æ¹Ô¤Ê¤ï¤ì¤ë¤è¤¦¤Ë¤Ê¤ë¡£
4297 ¤³¤Î´Ø¿ô¤Ï¾ï¤Ë $CONVERTER ¤òÊÖ¤¹¡£
4299 @latexonly \IPAlabel{mconv_rebind_buffer} @endlatexonly */
4303 mconv_rebind_stream () */
4306 mconv_rebind_buffer (MConverter *converter, unsigned char *buf, int n)
4308 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4310 internal->buf = buf;
4312 internal->bufsize = n;
4313 internal->binding = BINDING_BUFFER;
4320 @brief Bind a stream to a code converter.
4322 The mconv_rebind_stream () function binds stream $FP to code
4323 converter $CONVERTER. Following decodings and encodings are done
4324 to/from this newly bound stream.
4327 This function always returns $CONVERTER. */
4330 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ë¥¹¥È¥ê¡¼¥à¤ò·ë¤ÓÉÕ¤±¤ë.
4332 ´Ø¿ô mconv_rebind_stream () ¤Ï¡¢¥¹¥È¥ê¡¼¥à $FP ¤ò¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿
4333 $CONVERTER ¤Ë·ë¤ÓÉÕ¤±¤ë¡£¤³¤ì°Ê¹ß¤Î¥Ç¥³¡¼¥É¤ª¤è¤Ó¥¨¥ó¥³¡¼¥É¤Ï¡¢
4334 ¤³¤Î¿·¤¿¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥¹¥È¥ê¡¼¥à¤ËÂФ·¤Æ¹Ô¤Ê¤ï¤ì¤ë¤è¤¦¤Ë¤Ê¤ë¡£
4337 ¤³¤Î´Ø¿ô¤Ï¾ï¤Ë $CONVERTER ¤òÊÖ¤¹¡£
4339 @latexonly \IPAlabel{mconv_rebind_stream} @endlatexonly */
4343 mconv_rebind_buffer () */
4346 mconv_rebind_stream (MConverter *converter, FILE *fp)
4348 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4350 if (fseek (fp, 0, SEEK_CUR) < 0)
4354 internal->seekable = 0;
4357 internal->seekable = 1;
4359 internal->binding = BINDING_STREAM;
4366 @brief Decode a byte sequence into an M-text.
4368 The mconv_decode () function decodes a byte sequence and appends
4369 the result at the end of M-text $MT. The source byte sequence is
4370 taken from currently bound the buffer area or the stream.
4373 If the operation was successful, mconv_decode () returns updated
4374 $MT. Otherwise it returns @c NULL and assigns an error code to
4375 the external variable #merror_code. */
4378 @brief ¥Ð¥¤¥ÈÎó¤ò M-text ¤Ë¥Ç¥³¡¼¥É¤¹¤ë.
4380 ´Ø¿ô mconv_decode () ¤Ï¡¢¥Ð¥¤¥ÈÎó¤ò¥Ç¥³¡¼¥É¤·¤Æ¤½¤Î·ë²Ì¤ò M-text
4381 $MT ¤ÎËöÈø¤ËÄɲ乤롣¥Ç¥³¡¼¥É¸µ¤Î¥Ð¥¤¥ÈÎó¤Ï¡¢¸½ºß·ë¤ÓÉÕ¤±¤é¤ì¤Æ¤¤¤ë
4382 ¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤«¤é¼è¤é¤ì¤ë¡£
4385 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_decode () ¤Ï¹¹¿·¤µ¤ì¤¿ $MT ¤òÊÖ¤¹¡£¤½
4386 ¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤ò
4391 @c MERROR_IO, @c MERROR_CODING
4394 mconv_rebind_buffer (), mconv_rebind_stream (),
4395 mconv_encode (), mconv_encode_range (),
4396 mconv_decode_buffer (), mconv_decode_stream () */
4399 mconv_decode (MConverter *converter, MText *mt)
4401 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4402 int at_most = converter->at_most > 0 ? converter->at_most : -1;
4405 M_CHECK_READONLY (mt, NULL);
4407 if (mt->format != MTEXT_FORMAT_UTF_8)
4408 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
4411 mtext__enlarge (mt, MAX_UTF8_CHAR_BYTES);
4413 converter->nchars = converter->nbytes = 0;
4414 converter->result = MCONVERSION_RESULT_SUCCESS;
4416 n = mtext_nchars (internal->unread);
4422 if (at_most > 0 && at_most < limit)
4425 for (i = 0, n -= 1; i < limit; i++, converter->nchars++, n--)
4426 mtext_cat_char (mt, mtext_ref_char (internal->unread, n));
4427 mtext_del (internal->unread, n + 1, internal->unread->nchars);
4430 if (at_most == limit)
4432 converter->at_most -= converter->nchars;
4436 if (internal->binding == BINDING_BUFFER)
4438 (*internal->coding->decoder) (internal->buf + internal->used,
4439 internal->bufsize - internal->used,
4441 internal->used += converter->nbytes;
4443 else if (internal->binding == BINDING_STREAM)
4445 unsigned char work[CONVERT_WORKSIZE];
4446 int last_block = converter->last_block;
4447 int use_fread = at_most < 0 && internal->seekable;
4449 converter->last_block = 0;
4452 int nbytes, prev_nbytes;
4454 if (feof (internal->fp))
4457 nbytes = fread (work, sizeof (unsigned char), CONVERT_WORKSIZE,
4461 int c = getc (internal->fp);
4464 work[0] = c, nbytes = 1;
4469 if (ferror (internal->fp))
4471 converter->result = MCONVERSION_RESULT_IO_ERROR;
4476 converter->last_block = last_block;
4477 prev_nbytes = converter->nbytes;
4478 (*internal->coding->decoder) (work, nbytes, mt, converter);
4479 if (converter->nbytes - prev_nbytes < nbytes)
4482 fseek (internal->fp, converter->nbytes - prev_nbytes - nbytes,
4485 ungetc (work[0], internal->fp);
4489 || (converter->at_most > 0
4490 && converter->nchars == converter->at_most))
4493 converter->last_block = last_block;
4495 else /* internal->binding == BINDING_NONE */
4496 MERROR (MERROR_CODING, NULL);
4498 converter->at_most = at_most;
4499 return ((converter->result == MCONVERSION_RESULT_SUCCESS
4500 || converter->result == MCONVERSION_RESULT_INSUFFICIENT_SRC)
4507 @brief Decode a buffer area based on a coding system.
4509 The mconv_decode_buffer () function decodes $N bytes of buffer
4510 area pointed to by $BUF based on the coding system $NAME. A
4511 temporary code converter for decoding is automatically created
4515 If the operation was successful, mconv_decode_buffer ()
4516 returns the resulting M-text. Otherwise it returns @c NULL and
4517 assigns an error code to the external variable #merror_code. */
4520 @brief ¥³¡¼¥É·Ï¤Ë´ð¤Å¤¤¤Æ¥Ð¥Ã¥Õ¥¡Îΰè¤ò¥Ç¥³¡¼¥É¤¹¤ë.
4522 ´Ø¿ô mconv_decode_buffer () ¤Ï¡¢$BUF ¤Ë¤è¤Ã¤Æ»Ø¤µ¤ì¤¿ $N ¥Ð¥¤¥È¤Î
4523 ¥Ð¥Ã¥Õ¥¡Îΰè¤ò¡¢¥³¡¼¥É·Ï $NAME ¤Ë´ð¤Å¤¤¤Æ¥Ç¥³¡¼¥É¤¹¤ë¡£¥Ç¥³¡¼¥É¤Ë
4524 ɬÍפʥ³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ÎºîÀ®¤È²òÊü¤Ï¼«Æ°Åª¤Ë¹Ô¤Ê¤ï¤ì¤ë¡£
4527 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_decode_buffer () ¤ÏÆÀ¤é¤ì¤¿ M-text ¤ò
4528 ÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼
4529 ¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4533 @c MERROR_IO, @c MERROR_CODING
4536 mconv_decode (), mconv_decode_stream () */
4539 mconv_decode_buffer (MSymbol name, unsigned char *buf, int n)
4541 MConverter *converter = mconv_buffer_converter (name, buf, n);
4547 if (! mconv_decode (converter, mt))
4549 M17N_OBJECT_UNREF (mt);
4552 mconv_free_converter (converter);
4559 @brief Decode a stream input based on a coding system.
4561 The mconv_decode_stream () function decodes the entire byte
4562 sequence read in from stream $FP based on the coding system $NAME.
4563 A code converter for decoding is automatically created and freed.
4566 If the operation was successful, mconv_decode_stream () returns
4567 the resulting M-text. Otherwise it returns @c NULL and assigns an
4568 error code to the external variable #merror_code. */
4571 @brief ¥³¡¼¥É·Ï¤Ë´ð¤Å¤¤¤Æ¥¹¥È¥ê¡¼¥àÆþÎϤò¥Ç¥³¡¼¥É¤¹¤ë.
4573 ´Ø¿ô mconv_decode_stream () ¤Ï¡¢¥¹¥È¥ê¡¼¥à $FP ¤«¤éÆɤ߹þ¤Þ¤ì¤ë¥Ð
4574 ¥¤¥ÈÎóÁ´ÂΤò¡¢¥³¡¼¥É·Ï $NAME ¤Ë´ð¤Å¤¤¤Æ¥Ç¥³¡¼¥É¤¹¤ë¡£¥Ç¥³¡¼¥É¤Ëɬ
4575 Íפʥ³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ÎºîÀ®¤È²òÊü¤Ï¼«Æ°Åª¤Ë¹Ô¤Ê¤ï¤ì¤ë¡£
4578 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_decode_stream () ¤ÏÆÀ¤é¤ì¤¿ M-text ¤òÊÖ
4579 ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼
4584 @c MERROR_IO, @c MERROR_CODING
4587 mconv_decode (), mconv_decode_buffer () */
4590 mconv_decode_stream (MSymbol name, FILE *fp)
4592 MConverter *converter = mconv_stream_converter (name, fp);
4598 if (! mconv_decode (converter, mt))
4600 M17N_OBJECT_UNREF (mt);
4603 mconv_free_converter (converter);
4609 /***en @brief Encode an M-text into a byte sequence.
4611 The mconv_encode () function encodes M-text $MT and writes the
4612 resulting byte sequence into the buffer area or the stream that is
4613 currently bound to code converter $CONVERTER.
4616 If the operation was successful, mconv_encode () returns the
4617 number of written bytes. Otherwise it returns -1 and assigns an
4618 error code to the external variable #merror_code. */
4621 @brief M-text ¤ò¥Ð¥¤¥ÈÎó¤Ë¥¨¥ó¥³¡¼¥É¤¹¤ë.
4623 ´Ø¿ô mconv_encode () ¤Ï¡¢M-text $MT ¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼
4624 ¥¿ $CONVERTER ¤Ë¸½ºß·ë¤ÓÉÕ¤±¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼
4625 ¥à¤ËÆÀ¤é¤ì¤¿¥Ð¥¤¥ÈÎó¤ò½ñ¤¹þ¤à¡£
4628 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_encode () ¤Ï½ñ¤¹þ¤Þ¤ì¤¿¥Ð¥¤¥È¿ô¤òÊÖ¤¹¡£
4629 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄê
4634 @c MERROR_IO, @c MERROR_CODING
4637 mconv_rebind_buffer (), mconv_rebind_stream(),
4638 mconv_decode (), mconv_encode_range () */
4641 mconv_encode (MConverter *converter, MText *mt)
4643 return mconv_encode_range (converter, mt, 0, mtext_nchars (mt));
4649 @brief Encode a part of an M-text.
4651 The mconv_encode_range () function encodes the text between $FROM
4652 (inclusive) and $TO (exclusive) in M-text $MT and writes the
4653 resulting byte sequence into the buffer area or the stream that is
4654 currently bound to code converter $CONVERTER.
4657 If the operation was successful, mconv_encode_range () returns the
4658 number of written bytes. Otherwise it returns -1 and assigns an
4659 error code to the external variable #merror_code. */
4662 @brief M-text ¤Î°ìÉô¤ò¤ò¥Ð¥¤¥ÈÎó¤Ë¥¨¥ó¥³¡¼¥É¤¹¤ë.
4664 ´Ø¿ô mconv_encode_range () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é
4665 $TO ¡Ê´Þ¤Þ¤Ê¤¤¡Ë¤Þ¤Ç¤ÎÈϰϤΥƥ¥¹¥È¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼
4666 ¥¿ $CONVERTER ¤Ë¸½ºß·ë¤ÓÉÕ¤±¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼
4667 ¥à¤ËÆÀ¤é¤ì¤¿¥Ð¥¤¥ÈÎó¤ò½ñ¤¹þ¤à¡£
4670 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_encode_range () ¤Ï½ñ¤¹þ¤Þ¤ì¤¿¥Ð¥¤¥È¿ô
4671 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼
4676 @c MERROR_RANGE, @c MERROR_IO, @c MERROR_CODING
4679 mconv_rebind_buffer (), mconv_rebind_stream(),
4680 mconv_decode (), mconv_encode () */
4683 mconv_encode_range (MConverter *converter, MText *mt, int from, int to)
4685 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4687 M_CHECK_POS_X (mt, from, -1);
4688 M_CHECK_POS_X (mt, to, -1);
4692 if (converter->at_most > 0 && from + converter->at_most < to)
4693 to = from + converter->at_most;
4695 converter->nchars = converter->nbytes = 0;
4696 converter->result = MCONVERSION_RESULT_SUCCESS;
4698 mtext_put_prop (mt, from, to, Mcoding, internal->coding->name);
4699 if (internal->binding == BINDING_BUFFER)
4701 (*internal->coding->encoder) (mt, from, to,
4702 internal->buf + internal->used,
4703 internal->bufsize - internal->used,
4705 internal->used += converter->nbytes;
4707 else if (internal->binding == BINDING_STREAM)
4709 unsigned char work[CONVERT_WORKSIZE];
4714 int prev_nbytes = converter->nbytes;
4717 (*internal->coding->encoder) (mt, from, to, work,
4718 CONVERT_WORKSIZE, converter);
4719 this_nbytes = converter->nbytes - prev_nbytes;
4720 while (written < this_nbytes)
4722 int wrtn = fwrite (work + written, sizeof (unsigned char),
4723 this_nbytes - written, internal->fp);
4725 if (ferror (internal->fp))
4729 if (written < this_nbytes)
4731 converter->result = MCONVERSION_RESULT_IO_ERROR;
4734 from += converter->nchars;
4737 else /* fail safe */
4738 MERROR (MERROR_CODING, -1);
4740 return ((converter->result == MCONVERSION_RESULT_SUCCESS
4741 || converter->result == MCONVERSION_RESULT_INSUFFICIENT_DST)
4742 ? converter->nbytes : -1);
4748 @brief Encode an M-text into a buffer area.
4750 The mconv_encode_buffer () function encodes M-text $MT based on
4751 coding system $NAME and writes the resulting byte sequence into the
4752 buffer area pointed to by $BUF. At most $N bytes are written. A
4753 temporary code converter for encoding is automatically created
4757 If the operation was successful, mconv_encode_buffer () returns
4758 the number of written bytes. Otherwise it returns -1 and assigns
4759 an error code to the external variable #merror_code. */
4762 @brief M-text ¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¥Ð¥Ã¥Õ¥¡Îΰè¤Ë½ñ¤¹þ¤à.
4764 ´Ø¿ô mconv_encode_buffer () ¤ÏM-text $MT ¤ò¥³¡¼¥É·Ï $NAME ¤Ë´ð¤Å¤¤
4765 ¤Æ¥¨¥ó¥³¡¼¥É¤·¡¢ÆÀ¤é¤ì¤¿¥Ð¥¤¥ÈÎó¤ò $BUF ¤Î»Ø¤¹¥Ð¥Ã¥Õ¥¡Îΰè¤Ë½ñ¤¹þ
4766 ¤à¡£$N ¤Ï½ñ¤¹þ¤àºÇÂç¥Ð¥¤¥È¿ô¤Ç¤¢¤ë¡£¥¨¥ó¥³¡¼¥É¤ËɬÍפʥ³¡¼¥É¥³¥ó
4767 ¥Ð¡¼¥¿¤ÎºîÀ®¤È²òÊü¤Ï¼«Æ°Åª¤Ë¹Ô¤Ê¤ï¤ì¤ë¡£
4770 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_encode_buffer () ¤Ï½ñ¤¹þ¤Þ¤ì¤¿¥Ð¥¤¥È
4771 ¿ô¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð-1¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼
4776 @c MERROR_IO, @c MERROR_CODING
4779 mconv_encode (), mconv_encode_stream () */
4782 mconv_encode_buffer (MSymbol name, MText *mt, unsigned char *buf, int n)
4784 MConverter *converter = mconv_buffer_converter (name, buf, n);
4789 ret = mconv_encode (converter, mt);
4790 mconv_free_converter (converter);
4797 @brief Encode an M-text to write to a stream.
4799 The mconv_encode_stream () function encodes M-text $MT based on
4800 coding system $NAME and writes the resulting byte sequence to
4801 stream $FP. A temporary code converter for encoding is
4802 automatically created and freed.
4805 If the operation was successful, mconv_encode_stream () returns
4806 the number of written bytes. Otherwise it returns -1 and assigns
4807 an error code to the external variable #merror_code. */
4810 @brief M-text ¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¥¹¥È¥ê¡¼¥à¤Ë½ñ¤¹þ¤à.
4812 ´Ø¿ô mconv_encode_stream () ¤ÏM-text $MT ¤ò¥³¡¼¥É·Ï $NAME ¤Ë´ð¤Å¤¤
4813 ¤Æ¥¨¥ó¥³¡¼¥É¤·¡¢ÆÀ¤é¤ì¤¿¥Ð¥¤¥ÈÎó¤ò¥¹¥È¥ê¡¼¥à $FP ¤Ë½ñ¤½Ð¤¹¡£¥¨¥ó
4814 ¥³¡¼¥É¤ËɬÍפʥ³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ÎºîÀ®¤È²òÊü¤Ï¼«Æ°Åª¤Ë¹Ô¤Ê¤ï¤ì¤ë¡£
4817 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_encode_stream () ¤Ï½ñ¤¹þ¤Þ¤ì¤¿¥Ð¥¤¥È¿ô
4818 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð-1¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É
4823 @c MERROR_IO, @c MERROR_CODING
4826 mconv_encode (), mconv_encode_buffer (), mconv_encode_file () */
4829 mconv_encode_stream (MSymbol name, MText *mt, FILE *fp)
4831 MConverter *converter = mconv_stream_converter (name, fp);
4836 ret = mconv_encode (converter, mt);
4837 mconv_free_converter (converter);
4844 @brief Read a character via a code converter.
4846 The mconv_getc () function reads one character from the buffer
4847 area or the stream that is currently bound to code converter
4848 $CONVERTER. The decoder of $CONVERTER is used to decode the byte
4849 sequence. The internal status of $CONVERTER is updated
4853 If the operation was successful, mconv_getc () returns the
4854 character read in. If the input source reaches EOF, it returns @c
4855 EOF without changing the external variable #merror_code. If an
4856 error is detected, it returns @c EOF and assigns an error code to
4860 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿·Ðͳ¤Ç1ʸ»úÆɤà.
4862 ´Ø¿ô mconv_getc () ¤Ï¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER ¤Ë¸½ºß·ë¤ÓÉÕ¤±
4863 ¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤«¤é 1 ʸ»ú¤òÆɤ߹þ¤à¡£¥Ð
4864 ¥¤¥ÈÎó¤Î¥Ç¥³¡¼¥É¤Ë¤Ï $CONVERTER ¤Î¥Ç¥³¡¼¥À¤¬ÍѤ¤¤é¤ì¤ë¡£$CONVERTER
4865 ¤ÎÆâÉô¾õÂÖ¤ÏɬÍפ˱þ¤¸¤Æ¹¹¿·¤µ¤ì¤ë¡£
4868 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_getc () ¤ÏÆɤ߹þ¤Þ¤ì¤¿Ê¸»ú¤òÊÖ¤¹¡£ÆþÎϸ»¤¬
4869 EOF ¤Ë㤷¤¿¾ì¹ç¤Ï¡¢³°ÉôÊÑ¿ô #merror_code ¤òÊѤ¨¤º¤Ë @c EOF ¤òÊÖ¤¹¡£
4870 ¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c EOF ¤òÊÖ¤·¡¢#merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É
4878 mconv_ungetc (), mconv_putc (), mconv_gets () */
4881 mconv_getc (MConverter *converter)
4883 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4884 int at_most = converter->at_most;
4886 mtext_reset (internal->work_mt);
4887 converter->at_most = 1;
4888 mconv_decode (converter, internal->work_mt);
4889 converter->at_most = at_most;
4890 return (converter->nchars == 1
4891 ? STRING_CHAR (internal->work_mt->data)
4898 @brief Push a character back to a code converter.
4900 The mconv_ungetc () function pushes character $C back to code
4901 converter $CONVERTER. Any number of characters can be pushed
4902 back. The lastly pushed back character is firstly read by the
4903 subsequent mconv_getc () call. The characters pushed back are
4904 registered only in $CONVERTER; they are not written to the input
4905 source. The internal status of $CONVERTER is updated
4909 If the operation was successful, mconv_ungetc () returns $C.
4910 Otherwise it returns @c EOF and assigns an error code to the
4911 external variable #merror_code. */
4914 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ë1ʸ»úÌ᤹.
4916 ´Ø¿ô mconv_ungetc () ¤Ï¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER ¤Ëʸ»ú $C ¤ò
4917 ²¡¤·Ì᤹¡£Ì᤹ʸ»ú¿ô¤ËÀ©¸Â¤Ï¤Ê¤¤¡£¤³¤Î¸å¤Ë mconv_getc () ¤ò¸Æ¤Ó½Ð
4918 ¤¹¤È¡¢ºÇ¸å¤ËÌᤵ¤ì¤¿Ê¸»ú¤¬ºÇ½é¤ËÆɤޤì¤ë¡£Ìᤵ¤ì¤¿Ê¸»ú¤Ï
4919 $CONVERTER ¤ÎÆâÉô¤ËÃߤ¨¤é¤ì¤ë¤À¤±¤Ç¤¢¤ê¡¢¼ÂºÝ¤ËÆþÎϸ»¤Ë½ñ¤¹þ¤Þ¤ì
4920 ¤ë¤ï¤±¤Ç¤Ï¤Ê¤¤¡£$CONVERTER ¤ÎÆâÉô¾õÂÖ¤ÏɬÍפ˱þ¤¸¤Æ¹¹¿·¤µ¤ì¤ë¡£
4923 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_ungetc () ¤Ï $C ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c
4924 EOF ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4928 @c MERROR_CODING, @c MERROR_CHAR
4931 mconv_getc (), mconv_putc (), mconv_gets () */
4934 mconv_ungetc (MConverter *converter, int c)
4936 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4938 M_CHECK_CHAR (c, EOF);
4940 converter->result = MCONVERSION_RESULT_SUCCESS;
4941 mtext_cat_char (internal->unread, c);
4948 @brief Write a character via a code converter.
4950 The mconv_putc () function writes character $C to the buffer area
4951 or the stream that is currently bound to code converter
4952 $CONVERTER. The encoder of $CONVERTER is used to encode the
4953 character. The number of bytes actually written is set to the @c
4954 nbytes member of $CONVERTER. The internal status of $CONVERTER
4955 is updated appropriately.
4958 If the operation was successful, mconv_putc () returns $C.
4959 If an error is detected, it returns @c EOF and assigns
4960 an error code to the external variable #merror_code. */
4963 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ò·Ðͳ¤Ç1ʸ»ú½ñ¤¯.
4965 ´Ø¿ô mconv_putc () ¤Ï¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER ¤Ë¸½ºß·ë¤ÓÉÕ¤±
4966 ¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤Ëʸ»ú $C ¤ò½ñ¤½Ð¤¹¡£Ê¸»ú
4967 ¤Î¥¨¥ó¥³¡¼¥É¤Ë¤Ï $CONVERTER ¤Î¥¨¥ó¥³¡¼¥À¤¬ÍѤ¤¤é¤ì¤ë¡£¼ÂºÝ¤Ë½ñ¤½Ð
4968 ¤µ¤ì¤¿¥Ð¥¤¥È¿ô¤Ï¡¢$CONVERTER ¤Î ¥á¥ó¥Ð¡¼ @c nbytes ¤Ë¥»¥Ã¥È¤µ¤ì¤ë¡£
4969 $CONVERTER ¤ÎÆâÉô¾õÂÖ¤ÏɬÍפ˱þ¤¸¤Æ¹¹¿·¤µ¤ì¤ë¡£
4972 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_putc () ¤Ï $C ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç
4973 ¤Ï @c EOF ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4977 @c MERROR_CODING, @c MERROR_IO, @c MERROR_CHAR
4980 mconv_getc (), mconv_ungetc (), mconv_gets () */
4983 mconv_putc (MConverter *converter, int c)
4985 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4987 M_CHECK_CHAR (c, EOF);
4988 mtext_reset (internal->work_mt);
4989 mtext_cat_char (internal->work_mt, c);
4990 if (mconv_encode_range (converter, internal->work_mt, 0, 1) < 0)
4998 @brief Read a line using a code converter.
5000 The mconv_gets () function reads one line from the buffer area or
5001 the stream that is currently bound to code converter $CONVERTER.
5002 The decoder of $CONVERTER is used for decoding. The decoded
5003 character sequence is appended at the end of M-text $MT. The
5004 final newline character in the original byte sequence is not
5005 appended. The internal status of $CONVERTER is updated
5009 If the operation was successful, mconv_gets () returns the
5010 modified $MT. If it encounters EOF without reading a single
5011 character, it returns $MT without changing it. If an error is
5012 detected, it returns @c NULL and assigns an error code to
5016 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ò»È¤Ã¤Æ1¹ÔÆɤà.
5018 ´Ø¿ô mconv_gets () ¤Ï¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER ¤Ë¸½ºß·ë¤ÓÉÕ¤±
5019 ¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤«¤é 1 ¹Ô¤òÆɤ߹þ¤à¡£¥Ð¥¤
5020 ¥ÈÎó¤Î¥Ç¥³¡¼¥É¤Ë¤Ï $CONVERTER ¤Î¥Ç¥³¡¼¥À¤¬ÍѤ¤¤é¤ì¤ë¡£¥Ç¥³¡¼¥É¤µ¤ì
5021 ¤¿Ê¸»úÎó¤Ï M-text $MT ¤ÎËöÈø¤ËÄɲ䵤ì¤ë¡£¸µ¤Î¥Ð¥¤¥ÈÎó¤Î½ªÃ¼²þ¹Ôʸ
5022 »ú¤ÏÄɲ䵤ì¤Ê¤¤¡£$CONVERTER ¤ÎÆâÉô¾õÂÖ¤ÏɬÍפ˱þ¤¸¤Æ¹¹¿·¤µ¤ì¤ë¡£
5025 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_gets () ¤ÏÊѹ¹¤µ¤ì¤¿ $MT ¤òÊÖ¤¹¡£¤â¤·1ʸ»ú
5026 ¤âÆɤޤº¤Ë EOF ¤ËÅö¤¿¤Ã¤¿¾ì¹ç¤Ï¡¢$MT ¤òÊѹ¹¤»¤º¤Ë¤½¤Î¤Þ¤ÞÊÖ¤¹¡£¥¨
5027 ¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢#merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤ò
5035 mconv_getc (), mconv_ungetc (), mconv_putc () */
5038 mconv_gets (MConverter *converter, MText *mt)
5042 M_CHECK_READONLY (mt, NULL);
5043 if (mt->format != MTEXT_FORMAT_UTF_8)
5044 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
5048 c = mconv_getc (converter);
5049 if (c == EOF || c == '\n')
5051 mtext_cat_char (mt, c);
5053 if (c == EOF && converter->result != MCONVERSION_RESULT_SUCCESS)
5054 /* mconv_getc () sets #merror_code */