1 /* coding.c -- code conversion module.
2 Copyright (C) 2003, 2004
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 @brief Coding system objects and API for them.
27 The m17n library represents a character encoding scheme (CES) of
28 coded character sets (CCS) as an object called @e coding @e
29 system. Application programs can add original coding systems.
31 To @e encode means converting code-points to character codes and
32 to @e decode means converting character codes back to code-points.
34 Application programs can decode a byte sequence with a specified
35 coding system into an M-text, and inversely, can encode an M-text
36 into a byte sequence. */
40 @brief ¥³¡¼¥É·Ï¥ª¥Ö¥¸¥§¥¯¥È¤È¤½¤ì¤Ë´Ø¤¹¤ë API
42 m17n ¥é¥¤¥Ö¥é¥ê¤Ï¡¢Éä¹æ²½Ê¸»ú½¸¹ç (coded character set; CCS) ¤Îʸ
43 »úÉä¹ç²½Êý¼° (character encoding scheme; CES) ¤ò @e ¥³¡¼¥É·Ï ¤È¸Æ
44 ¤Ö¥ª¥Ö¥¸¥§¥¯¥È¤Çɽ¸½¤¹¤ë¡£¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥à¤ÏÆȼ«¤Ë¥³¡¼¥É
45 ·Ï¤òÄɲ乤뤳¤È¤â¤Ç¤¤ë¡£
47 ¥³¡¼¥É¥Ý¥¤¥ó¥È¤«¤éʸ»ú¥³¡¼¥É¤Ø¤ÎÊÑ´¹¤ò @e ¥¨¥ó¥³¡¼¥É ¤È¸Æ¤Ó¡¢Ê¸»ú
48 ¥³¡¼¥É¤«¤é¥³¡¼¥É¥Ý¥¤¥ó¥È¤Ø¤ÎÊÑ´¹¤ò @e ¥Ç¥³¡¼¥É ¤È¸Æ¤Ö¡£
50 ¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¥×¥í¥°¥é¥à¤Ï¡¢»ØÄꤵ¤ì¤¿¥³¡¼¥É·Ï¤Ç¥Ð¥¤¥ÈÎó¤ò¥Ç¥³¡¼
51 ¥É¤¹¤ë¤³¤È¤Ç M-text ¤òÆÀ¤ë¤³¤È¤¬¤Ç¤¤ë¡£¤Þ¤¿µÕ¤Ë¡¢»ØÄꤵ¤ì¤¿¥³¡¼¥É
52 ·Ï¤Ç M-text ¤ò¥¨¥ó¥³¡¼¥É¤·¤¹¤ë¤³¤È¤Ç¥Ð¥¤¥ÈÎó¤òÆÀ¤ë¤³¤È¤¬¤Ç¤¤ë¡£ */
56 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
57 /*** @addtogroup m17nInternal
65 #include <sys/types.h>
70 #include "m17n-misc.h"
73 #include "character.h"
80 #define NUM_SUPPORTED_CHARSETS 32
82 /** Structure for coding system object. */
86 /** Name of the coding system. */
89 /** Type of the coding system. */
92 /* Number of supported charsets. */
95 /** Array of supported charsets. */
96 MCharset *charsets[NUM_SUPPORTED_CHARSETS];
98 /** If non-NULL, function to call at the time of creating and
99 reseting a converter. */
100 int (*resetter) (MConverter *converter);
102 int (*decoder) (unsigned char *str, int str_bytes, MText *mt,
103 MConverter *converter);
105 int (*encoder) (MText *mt, int from, int to,
106 unsigned char *str, int str_bytes,
107 MConverter *converter);
109 /** If non-zero, the coding system decode/encode ASCII characters as
111 int ascii_compatible;
113 /** Pointer to extra information given when the coding system is
114 defined. The meaning depends on <type>. */
117 /** Pointer to information referred on conversion. The meaning
118 depends on <type>. The value NULL means that the coding system
128 MCodingSystem **codings;
131 static struct MCodingList coding_list;
133 static MPlist *coding_definition_list;
137 Pointer to a structure of a coding system. */
139 ¥³¡¼¥É·Ï¤òɽ¤ï¤¹¥Ç¡¼¥¿¹½Â¤¤Ø¤Î¥Ý¥¤¥ó¥¿ */
140 MCodingSystem *coding;
143 Buffer for carryover bytes generated while decoding. */
145 ¥Ç¥³¡¼¥ÉÃæ¤Î¥¥ã¥ê¥£¥ª¡¼¥Ð¡¼¥Ð¥¤¥ÈÍѥХåե¡ */
146 unsigned char carryover[256];
149 Number of carryover bytes. */
151 ¥¥ã¥ê¥£¥ª¡¼¥Ð¡¼¥Ð¥¤¥È¿ô */
155 Beginning of the byte sequence bound to this converter. */
157 ¤³¤Î¥³¥ó¥Ð¡¼¥¿¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥Ð¥¤¥ÈÎó¤ÎÀèƬ°ÌÃÖ */
167 Number of bytes already consumed in buf. */
169 buf Æâ¤Ç¤¹¤Ç¤Ë¾ÃÈñ¤µ¤ì¤¿¥Ð¥¤¥È¿ô */
173 Stream bound to this converter. */
175 ¤³¤Î¥³¥ó¥Ð¡¼¥¿¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥¹¥È¥ê¡¼¥à */
179 Which of above two is in use. */
181 ¾åµ2¼Ô¤Î¤¤¤º¤ì¤¬»È¤ï¤ì¤Æ¤¤¤ë¤« */
201 /* Local macros and functions. */
203 /** At first, set SRC_BASE to SRC. Then check if we have already
204 produced AT_MOST chars. If so, set SRC_END to SRC, and jump to
205 source_end. Otherwise, get one more byte C from SRC. In that
206 case, if SRC == SRC_END, jump to the label source_end. */
208 #define ONE_MORE_BASE_BYTE(c) \
211 if (nchars == at_most) \
216 if (src == src_stop) \
218 if (src == src_end) \
220 src_base = src = source; \
221 if (src == src_end) \
223 src_stop = src_end; \
229 /** Get one more byte C from SRC. If SRC == SRC_END, jump to the
232 #define ONE_MORE_BYTE(c) \
234 if (src == src_stop) \
236 if (src == src_end) \
239 if (src == src_end) \
241 src_stop = src_end; \
247 #define REWIND_SRC_TO_BASE() \
249 if (src_base < source || src_base >= src_end) \
250 src_stop = internal->carryover + internal->carryover_bytes; \
255 /** Push back byte C to SRC. */
257 #define UNGET_ONE_BYTE(c) \
263 internal->carryover[0] = c; \
264 internal->carryover_bytes = 1; \
265 src = internal->carryover; \
266 src_stop = src + 1; \
271 /** Store multibyte representation of character C at DST and increment
272 DST to the next of the produced bytes. DST must be a pointer to
273 data area of M-text MT. If the produced bytes are going to exceed
274 DST_END, enlarge the data area of MT. */
276 #define EMIT_CHAR(c) \
278 int bytes = CHAR_BYTES (c); \
281 if (dst + bytes + 1 > dst_end) \
283 len = dst - mt->data; \
284 bytes = mt->allocated + bytes + (src_stop - src); \
285 mtext__enlarge (mt, bytes); \
286 dst = mt->data + len; \
287 dst_end = mt->data + mt->allocated; \
289 dst += CHAR_STRING (c, dst); \
294 /* Check if there is enough room to produce LEN bytes at DST. If not,
295 go to the label insufficient_destination. */
297 #define CHECK_DST(len) \
299 if (dst + (len) > dst_end) \
300 goto insufficient_destination; \
304 /** Take NUM_CHARS characters (NUM_BYTES bytes) already stored at
305 (MT->data + MT->nbytes) into MT, and put charset property on
306 them with CHARSET->name. */
308 #define TAKEIN_CHARS(mt, num_chars, num_bytes, charset) \
310 int chars = (num_chars); \
314 mtext__takein ((mt), chars, (num_bytes)); \
316 mtext_put_prop ((mt), (mt)->nchars - chars, (mt)->nchars, \
317 Mcharset, (void *) ((charset)->name)); \
322 #define SET_SRC(mt, format, from, to) \
324 if (format <= MTEXT_FORMAT_UTF_8) \
326 src = mt->data + POS_CHAR_TO_BYTE (mt, from); \
327 src_end = mt->data + POS_CHAR_TO_BYTE (mt, to); \
329 else if (format <= MTEXT_FORMAT_UTF_16BE) \
332 = mt->data + (sizeof (short)) * POS_CHAR_TO_BYTE (mt, from); \
334 = mt->data + (sizeof (short)) * POS_CHAR_TO_BYTE (mt, to); \
338 src = mt->data + (sizeof (int)) * from; \
339 src_end = mt->data + (sizeof (int)) * to; \
344 #define ONE_MORE_CHAR(c, bytes, format) \
346 if (src == src_end) \
348 if (format <= MTEXT_FORMAT_UTF_8) \
349 c = STRING_CHAR_AND_BYTES (src, bytes); \
350 else if (format <= MTEXT_FORMAT_UTF_16BE) \
352 c = mtext_ref_char (mt, from++); \
353 bytes = (sizeof (short)) * CHAR_UNITS_UTF16 (c); \
357 c = ((unsigned *) (mt->data))[from++]; \
358 bytes = sizeof (int); \
364 encode_unsupporeted_char (int c, unsigned char *dst, unsigned char *dst_end,
370 len = c < 0x10000 ? 8 : 10;
371 if (dst + len > dst_end)
374 mtext_put_prop (mt, pos, pos + 1, Mcoding, Mnil);
375 format = (c < 0xD800 ? "<U+%04X>"
376 : c < 0xE000 ? "<M+%04X>"
377 : c < 0x10000 ? "<U+%04X>"
378 : c < 0x110000 ? "<U+%06X>"
380 sprintf ((char *) dst, format, c);
386 /** Finish decoding of bytes at SOURCE (ending at SRC_END) into NCHARS
387 characters by CONVERTER into M-text MT. SRC is a pointer to the
388 not-yet processed bytes. ERROR is 1 iff an invalid byte was
392 finish_decoding (MText *mt, MConverter *converter, int nchars,
393 unsigned char *source, unsigned char *src_end,
397 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
400 internal->carryover_bytes = 0;
402 || (converter->last_block
403 && ! converter->lenient))
404 converter->result = MCONVERSION_RESULT_INVALID_BYTE;
405 else if (! converter->last_block)
407 unsigned char *dst = internal->carryover;
409 if (src < source || src > src_end)
411 dst += internal->carryover_bytes;
414 while (src < src_end)
416 internal->carryover_bytes = dst - internal->carryover;
417 converter->result = MCONVERSION_RESULT_INSUFFICIENT_SRC;
421 unsigned char *dst = mt->data + mt->nbytes;
422 unsigned char *dst_end = mt->data + mt->allocated;
423 unsigned char *src_stop = src_end;
425 int last_nchars = nchars;
427 if (src < source || src > src_end)
428 src_stop = internal->carryover + internal->carryover_bytes;
431 if (converter->at_most && nchars == converter->at_most)
445 TAKEIN_CHARS (mt, nchars - last_nchars, dst - (mt->data + mt->nbytes),
447 internal->carryover_bytes = 0;
450 converter->nchars += nchars;
451 converter->nbytes += ((src < source || src > src_end) ? 0 : src - source);
452 return (converter->result == MCONVERSION_RESULT_INVALID_BYTE ? -1 : 0);
457 /* Staffs for coding-systems of type MCODING_TYPE_CHARSET. */
460 setup_coding_charset (MCodingSystem *coding)
462 int ncharsets = coding->ncharsets;
463 unsigned *code_charset_table;
467 /* At first, reorder charset list by dimensions (a charset of
468 smaller dimension comes first). As the number of charsets is
469 usually very small (at most 32), we do a simple sort. */
474 MTABLE_ALLOCA (charsets, NUM_SUPPORTED_CHARSETS, MERROR_CODING);
475 memcpy (charsets, coding->charsets,
476 sizeof (MCharset *) * NUM_SUPPORTED_CHARSETS);
477 for (i = 0; i < 4; i++)
478 for (j = 0; j < ncharsets; j++)
479 if (charsets[j]->dimension == i)
480 coding->charsets[idx++] = charsets[j];
483 MTABLE_CALLOC (code_charset_table, 256, MERROR_CODING);
486 int dim = coding->charsets[ncharsets]->dimension;
487 int from = coding->charsets[ncharsets]->code_range[(dim - 1) * 4];
488 int to = coding->charsets[ncharsets]->code_range[(dim - 1) * 4 + 1];
490 if (coding->charsets[ncharsets]->ascii_compatible)
491 coding->ascii_compatible = 1;
493 code_charset_table[from++] |= 1 << ncharsets;
496 coding->extra_spec = (void *) code_charset_table;
501 reset_coding_charset (MConverter *converter)
503 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
504 MCodingSystem *coding = internal->coding;
507 && setup_coding_charset (coding) < 0)
514 decode_coding_charset (unsigned char *source, int src_bytes, MText *mt,
515 MConverter *converter)
517 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
518 MCodingSystem *coding = internal->coding;
519 unsigned char *src = internal->carryover;
520 unsigned char *src_stop = src + internal->carryover_bytes;
521 unsigned char *src_end = source + src_bytes;
522 unsigned char *src_base;
523 unsigned char *dst = mt->data + mt->nbytes;
524 unsigned char *dst_end = mt->data + mt->allocated;
527 int at_most = converter->at_most > 0 ? converter->at_most : -1;
529 unsigned *code_charset_table = (unsigned *) coding->extra_spec;
530 MCharset **charsets = coding->charsets;
531 MCharset *charset = mcharset__ascii;
536 MCharset *this_charset = NULL;
540 ONE_MORE_BASE_BYTE (c);
541 mask = code_charset_table[c];
551 while (! (mask & 1)) mask >>= 1, idx++;
552 this_charset = charsets[idx];
553 dim = this_charset->dimension;
557 code = (code << 8) | c;
560 c = DECODE_CHAR (this_charset, code);
567 if (! converter->lenient)
569 REWIND_SRC_TO_BASE ();
571 this_charset = mcharset__binary;
574 if (this_charset != mcharset__ascii
575 && this_charset != charset)
577 TAKEIN_CHARS (mt, nchars - last_nchars,
578 dst - (mt->data + mt->nbytes), charset);
579 charset = this_charset;
580 last_nchars = nchars;
584 /* We reach here because of an invalid byte. */
588 TAKEIN_CHARS (mt, nchars - last_nchars,
589 dst - (mt->data + mt->nbytes), charset);
590 return finish_decoding (mt, converter, nchars,
591 source, src_end, src_base, error);
595 encode_coding_charset (MText *mt, int from, int to,
596 unsigned char *destination, int dst_bytes,
597 MConverter *converter)
599 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
600 MCodingSystem *coding = internal->coding;
601 unsigned char *src, *src_end;
602 unsigned char *dst = destination;
603 unsigned char *dst_end = dst + dst_bytes;
605 int ncharsets = coding->ncharsets;
606 MCharset **charsets = coding->charsets;
607 int ascii_compatible = coding->ascii_compatible;
608 enum MTextFormat format = mt->format;
610 SET_SRC (mt, format, from, to);
615 ONE_MORE_CHAR (c, bytes, format);
617 if (c < 0x80 && ascii_compatible)
625 MCharset *charset = NULL;
630 charset = charsets[i];
631 code = ENCODE_CHAR (charset, c);
632 if (code != MCHAR_INVALID_CODE)
634 if (++i == ncharsets)
635 goto unsupported_char;
638 CHECK_DST (charset->dimension);
639 if (charset->dimension == 1)
643 else if (charset->dimension == 2)
646 *dst++ = code & 0xFF;
648 else if (charset->dimension == 3)
651 *dst++ = (code >> 8) & 0xFF;
652 *dst++ = code & 0xFF;
657 *dst++ = (code >> 16) & 0xFF;
658 *dst++ = (code >> 8) & 0xFF;
659 *dst++ = code & 0xFF;
670 if (! converter->lenient)
672 len = encode_unsupporeted_char (c, dst, dst_end, mt, from + nchars);
674 goto insufficient_destination;
680 /* We reach here because of an unsupported char. */
681 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
684 insufficient_destination:
685 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
688 converter->nchars += nchars;
689 converter->nbytes += dst - destination;
690 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
694 /* Staffs for coding-systems of type MCODING_TYPE_UTF (8). */
696 #define UTF8_CHARSET(p) \
697 (! ((p)[0] & 0x80) ? (mcharset__unicode) \
698 : CHAR_HEAD_P ((p) + 1) ? (mcharset__binary) \
699 : ! ((p)[0] & 0x20) ? (mcharset__unicode) \
700 : CHAR_HEAD_P ((p) + 2) ? (mcharset__binary) \
701 : ! ((p)[0] & 0x10) ? (mcharset__unicode) \
702 : CHAR_HEAD_P ((p) + 3) ? (mcharset__binary) \
703 : ! ((p)[0] & 0x08) ? ((((((p)[0] & 0x07) << 2) \
704 & (((p)[1] & 0x30) >> 4)) <= 0x10) \
705 ? (mcharset__unicode) \
706 : (mcharset__m17n)) \
707 : CHAR_HEAD_P ((p) + 4) ? (mcharset__binary) \
708 : ! ((p)[0] & 0x04) ? (mcharset__m17n) \
709 : CHAR_HEAD_P ((p) + 5) ? (mcharset__binary) \
710 : ! ((p)[0] & 0x02) ? (mcharset__m17n) \
711 : (mcharset__binary))
715 decode_coding_utf_8 (unsigned char *source, int src_bytes, MText *mt,
716 MConverter *converter)
718 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
719 MCodingSystem *coding = internal->coding;
720 unsigned char *src = internal->carryover;
721 unsigned char *src_stop = src + internal->carryover_bytes;
722 unsigned char *src_end = source + src_bytes;
723 unsigned char *src_base;
724 unsigned char *dst = mt->data + mt->nbytes;
725 unsigned char *dst_end = mt->data + mt->allocated;
728 int at_most = converter->at_most > 0 ? converter->at_most : -1;
730 int full = converter->lenient || (coding->charsets[0] == mcharset__m17n);
731 MCharset *charset = NULL;
736 MCharset *this_charset = NULL;
738 ONE_MORE_BASE_BYTE (c);
742 else if (!(c & 0x40))
744 else if (!(c & 0x20))
745 bytes = 2, c &= 0x1F;
746 else if (!(c & 0x10))
747 bytes = 3, c &= 0x0F;
748 else if (!(c & 0x08))
749 bytes = 4, c &= 0x07;
750 else if (!(c & 0x04))
751 bytes = 5, c &= 0x03;
752 else if (!(c & 0x02))
753 bytes = 6, c &= 0x01;
760 if ((c1 & 0xC0) != 0x80)
762 c = (c << 6) | (c1 & 0x3F);
766 || c < 0xD800 || (c >= 0xE000 && c < 0x110000))
770 if (! converter->lenient)
772 REWIND_SRC_TO_BASE ();
774 this_charset = mcharset__binary;
777 if (this_charset != charset)
779 TAKEIN_CHARS (mt, nchars - last_nchars,
780 dst - (mt->data + mt->nbytes), charset);
781 charset = this_charset;
782 last_nchars = nchars;
786 /* We reach here because of an invalid byte. */
790 TAKEIN_CHARS (mt, nchars - last_nchars,
791 dst - (mt->data + mt->nbytes), charset);
792 return finish_decoding (mt, converter, nchars,
793 source, src_end, src_base, error);
797 encode_coding_utf_8 (MText *mt, int from, int to,
798 unsigned char *destination, int dst_bytes,
799 MConverter *converter)
801 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
802 MCodingSystem *coding = internal->coding;
803 unsigned char *src, *src_end;
804 unsigned char *dst = destination;
805 unsigned char *dst_end = dst + dst_bytes;
807 enum MTextFormat format = mt->format;
809 SET_SRC (mt, format, from, to);
811 if (format <= MTEXT_FORMAT_UTF_8
812 && (converter->lenient
813 || coding->charsets[0] == mcharset__m17n))
815 if (dst_bytes < src_end - src)
817 int byte_pos = (src + dst_bytes) - mt->data;
819 to = POS_BYTE_TO_CHAR (mt, byte_pos);
820 byte_pos = POS_CHAR_TO_BYTE (mt, to);
821 src_end = mt->data + byte_pos;
822 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
824 memcpy (destination, src, src_end - src);
826 dst += src_end - src;
834 ONE_MORE_CHAR (c, bytes, format);
836 if ((c >= 0xD800 && c < 0xE000) || c >= 0x110000)
839 dst += CHAR_STRING (c, dst);
843 /* We reach here because of an unsupported char. */
844 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
847 insufficient_destination:
848 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
851 converter->nchars += nchars;
852 converter->nbytes += dst - destination;
853 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
857 /* Staffs for coding-systems of type MCODING_TYPE_UTF (16 & 32). */
878 enum utf_endian endian;
882 setup_coding_utf (MCodingSystem *coding)
884 MCodingInfoUTF *info = (MCodingInfoUTF *) (coding->extra_info);
885 MCodingInfoUTF *spec;
887 if (info->code_unit_bits == 8)
888 coding->ascii_compatible = 1;
889 else if (info->code_unit_bits == 16
890 || info->code_unit_bits == 32)
892 if (info->bom < 0 || info->bom > 2
893 || info->endian < 0 || info->endian > 1)
894 MERROR (MERROR_CODING, -1);
899 MSTRUCT_CALLOC (spec, MERROR_CODING);
901 coding->extra_spec = (void *) (spec);
906 reset_coding_utf (MConverter *converter)
908 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
909 MCodingSystem *coding = internal->coding;
910 struct utf_status *status = (struct utf_status *) &(converter->status);
913 && setup_coding_utf (coding) < 0)
917 status->surrogate = 0;
918 status->bom = ((MCodingInfoUTF *) (coding->extra_spec))->bom;
919 status->endian = ((MCodingInfoUTF *) (coding->extra_spec))->endian;
924 decode_coding_utf_16 (unsigned char *source, int src_bytes, MText *mt,
925 MConverter *converter)
927 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
928 unsigned char *src = internal->carryover;
929 unsigned char *src_stop = src + internal->carryover_bytes;
930 unsigned char *src_end = source + src_bytes;
931 unsigned char *src_base;
932 unsigned char *dst = mt->data + mt->nbytes;
933 unsigned char *dst_end = mt->data + mt->allocated;
936 int at_most = converter->at_most > 0 ? converter->at_most : -1;
937 struct utf_status *status = (struct utf_status *) &(converter->status);
938 unsigned char b1, b2;
939 MCharset *charset = NULL;
942 if (status->bom != UTF_BOM_NO)
946 ONE_MORE_BASE_BYTE (b1);
950 status->endian = UTF_BIG_ENDIAN;
951 else if (c == 0xFFFE)
952 status->endian = UTF_LITTLE_ENDIAN;
953 else if (status->bom == UTF_BOM_MAYBE
954 || converter->lenient)
956 status->endian = UTF_BIG_ENDIAN;
957 REWIND_SRC_TO_BASE ();
964 status->bom = UTF_BOM_NO;
970 MCharset *this_charset = NULL;
972 ONE_MORE_BASE_BYTE (b1);
974 if (status->endian == UTF_BIG_ENDIAN)
975 c = ((b1 << 8) | b2);
977 c = ((b2 << 8) | b1);
978 if (c < 0xD800 || c >= 0xE000)
984 if (status->endian == UTF_BIG_ENDIAN)
985 c1 = ((b1 << 8) | b2);
987 c1 = ((b2 << 8) | b1);
988 if (c1 < 0xDC00 || c1 >= 0xE000)
990 c = 0x10000 + ((c - 0xD800) << 10) + (c1 - 0xDC00);
995 if (! converter->lenient)
997 REWIND_SRC_TO_BASE ();
1000 if (status->endian == UTF_BIG_ENDIAN)
1001 c = ((b1 << 8) | b2);
1003 c = ((b2 << 8) | b1);
1004 this_charset = mcharset__binary;
1007 if (this_charset != charset)
1009 TAKEIN_CHARS (mt, nchars - last_nchars,
1010 dst - (mt->data + mt->nbytes), charset);
1011 charset = this_charset;
1012 last_nchars = nchars;
1016 /* We reach here because of an invalid byte. */
1020 TAKEIN_CHARS (mt, nchars - last_nchars,
1021 dst - (mt->data + mt->nbytes), charset);
1022 return finish_decoding (mt, converter, nchars,
1023 source, src_end, src_base, error);
1028 decode_coding_utf_32 (unsigned char *source, int src_bytes, MText *mt,
1029 MConverter *converter)
1031 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
1032 unsigned char *src = internal->carryover;
1033 unsigned char *src_stop = src + internal->carryover_bytes;
1034 unsigned char *src_end = source + src_bytes;
1035 unsigned char *src_base;
1036 unsigned char *dst = mt->data + mt->nbytes;
1037 unsigned char *dst_end = mt->data + mt->allocated;
1039 int last_nchars = 0;
1040 int at_most = converter->at_most > 0 ? converter->at_most : -1;
1041 struct utf_status *status = (struct utf_status *) &(converter->status);
1042 unsigned char b1, b2, b3, b4;
1043 MCharset *charset = NULL;
1046 if (status->bom != UTF_BOM_NO)
1050 ONE_MORE_BASE_BYTE (b1);
1054 c = (b1 << 24) | (b2 << 16) | (b3 << 8) | b4;
1055 if (c == 0x0000FEFF)
1056 status->endian = UTF_BIG_ENDIAN;
1057 else if (c == 0xFFFE0000)
1058 status->endian = UTF_LITTLE_ENDIAN;
1059 else if (status->bom == UTF_BOM_MAYBE
1060 || converter->lenient)
1062 status->endian = UTF_BIG_ENDIAN;
1063 REWIND_SRC_TO_BASE ();
1070 status->bom = UTF_BOM_NO;
1076 MCharset *this_charset = NULL;
1078 ONE_MORE_BASE_BYTE (b1);
1082 if (status->endian == UTF_BIG_ENDIAN)
1083 c = (b1 << 24) | (b2 << 16) | (b3 << 8) | b4;
1085 c = (b4 << 24) | (b3 << 16) | (b2 << 8) | b1;
1086 if (c < 0xD800 || (c >= 0xE000 && c < 0x110000))
1089 if (! converter->lenient)
1091 REWIND_SRC_TO_BASE ();
1093 this_charset = mcharset__binary;
1096 if (this_charset != charset)
1098 TAKEIN_CHARS (mt, nchars - last_nchars,
1099 dst - (mt->data + mt->nbytes), charset);
1100 charset = this_charset;
1101 last_nchars = nchars;
1105 /* We reach here because of an invalid byte. */
1109 TAKEIN_CHARS (mt, nchars - last_nchars,
1110 dst - (mt->data + mt->nbytes), charset);
1111 return finish_decoding (mt, converter, nchars,
1112 source, src_end, src_base, error);
1117 encode_coding_utf_16 (MText *mt, int from, int to,
1118 unsigned char *destination, int dst_bytes,
1119 MConverter *converter)
1121 unsigned char *src, *src_end;
1122 unsigned char *dst = destination;
1123 unsigned char *dst_end = dst + dst_bytes;
1125 struct utf_status *status = (struct utf_status *) &(converter->status);
1126 int big_endian = status->endian == UTF_BIG_ENDIAN;
1127 enum MTextFormat format = mt->format;
1129 SET_SRC (mt, format, from, to);
1131 if (status->bom != UTF_BOM_NO)
1135 *dst++ = 0xFE, *dst++ = 0xFF;
1137 *dst++ = 0xFF, *dst++ = 0xFE;
1138 status->bom = UTF_BOM_NO;
1145 ONE_MORE_CHAR (c, bytes, format);
1147 if (c < 0xD800 || (c >= 0xE000 && c < 0x10000))
1151 *dst++ = c >> 8, *dst++ = c & 0xFF;
1153 *dst++ = c & 0xFF, *dst++ = c >> 8;
1155 else if (c >= 0x10000 && c < 0x110000)
1161 c1 = (c >> 10) + 0xD800;
1162 c2 = (c & 0x3FF) + 0xDC00;
1164 *dst++ = c1 >> 8, *dst++ = c1 & 0xFF,
1165 *dst++ = c2 >> 8, *dst++ = c2 & 0xFF;
1167 *dst++ = c1 & 0xFF, *dst++ = c1 >> 8,
1168 *dst++ = c2 & 0xFF, *dst++ = c2 >> 8;
1172 unsigned char buf[11];
1175 if (! converter->lenient)
1177 len = encode_unsupporeted_char (c, buf, buf + (dst_end - dst),
1180 goto insufficient_destination;
1182 for (i = 0; i < len; i++)
1183 *dst++ = 0, *dst++ = buf[i];
1185 for (i = 0; i < len; i++)
1186 *dst++ = buf[i], *dst++ = 0;
1191 /* We reach here because of an unsupported char. */
1192 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
1195 insufficient_destination:
1196 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
1199 converter->nchars += nchars;
1200 converter->nbytes += dst - destination;
1201 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
1205 encode_coding_utf_32 (MText *mt, int from, int to,
1206 unsigned char *destination, int dst_bytes,
1207 MConverter *converter)
1209 unsigned char *src, *src_end;
1210 unsigned char *dst = destination;
1211 unsigned char *dst_end = dst + dst_bytes;
1213 struct utf_status *status = (struct utf_status *) &(converter->status);
1214 int big_endian = status->endian == UTF_BIG_ENDIAN;
1215 enum MTextFormat format = mt->format;
1217 SET_SRC (mt, format, from, to);
1219 if (status->bom != UTF_BOM_NO)
1223 *dst++ = 0x00, *dst++ = 0x00, *dst++ = 0xFE, *dst++ = 0xFF;
1225 *dst++ = 0xFF, *dst++ = 0xFE, *dst++ = 0x00, *dst++ = 0x00;
1226 status->bom = UTF_BOM_NO;
1233 ONE_MORE_CHAR (c, bytes, format);
1235 if (c < 0xD800 || (c >= 0xE000 && c < 0x110000))
1239 *dst++ = 0x00, *dst++ = c >> 16,
1240 *dst++ = (c >> 8) & 0xFF, *dst++ = c & 0xFF;
1242 *dst++ = c & 0xFF, *dst++ = (c >> 8) & 0xFF,
1243 *dst++ = c >> 16, *dst++ = 0x00;
1247 unsigned char buf[11];
1250 if (! converter->lenient)
1252 len = encode_unsupporeted_char (c, buf, buf + (dst_end - dst),
1255 goto insufficient_destination;
1257 for (i = 0; i < len; i++)
1258 *dst++ = 0, *dst++ = buf[i];
1260 for (i = 0; i < len; i++)
1261 *dst++ = buf[i], *dst++ = 0;
1266 /* We reach here because of an unsupported char. */
1267 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
1270 insufficient_destination:
1271 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
1274 converter->nchars += nchars;
1275 converter->nbytes += dst - destination;
1276 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
1280 /* Staffs for coding-systems of type MCODING_TYPE_ISO_2022. */
1282 #define ISO_CODE_STX 0x02 /* start text */
1283 #define ISO_CODE_SO 0x0E /* shift-out */
1284 #define ISO_CODE_SI 0x0F /* shift-in */
1285 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
1286 #define ISO_CODE_ESC 0x1B /* escape */
1287 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
1288 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
1290 /** Structure pointed by MCodingSystem.extra_spec. */
1292 struct iso_2022_spec
1296 /** Initial graphic registers (0..3) invoked to each graphic
1297 plane left and right. */
1298 int initial_invocation[2];
1300 /** Initially designated charsets for each graphic register. */
1301 MCharset *initial_designation[4];
1309 struct iso_2022_status
1312 MCharset *designation[4];
1313 unsigned single_shifting : 1;
1316 unsigned utf8_shifting : 1;
1317 MCharset *non_standard_charset;
1318 int non_standard_charset_bytes;
1319 int non_standard_encoding;
1322 enum iso_2022_code_class {
1323 ISO_control_0, /* Control codes in the range
1324 0x00..0x1F and 0x7F, except for the
1325 following 4 codes. */
1326 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
1327 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
1328 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
1329 ISO_escape, /* ISO_CODE_SO (0x1B) */
1330 ISO_control_1, /* Control codes in the range
1331 0x80..0x9F, except for the
1332 following 3 codes. */
1333 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
1334 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
1335 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
1336 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
1337 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
1338 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
1339 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
1340 } iso_2022_code_class[256];
1343 #define MCODING_ISO_DESIGNATION_MASK \
1344 (MCODING_ISO_DESIGNATION_G0 \
1345 | MCODING_ISO_DESIGNATION_G1 \
1346 | MCODING_ISO_DESIGNATION_CTEXT \
1347 | MCODING_ISO_DESIGNATION_CTEXT_EXT)
1350 setup_coding_iso_2022 (MCodingSystem *coding)
1352 MCodingInfoISO2022 *info = (MCodingInfoISO2022 *) (coding->extra_info);
1353 int ncharsets = coding->ncharsets;
1354 struct iso_2022_spec *spec;
1355 int designation_policy = info->flags & MCODING_ISO_DESIGNATION_MASK;
1358 coding->ascii_compatible = 0;
1360 MSTRUCT_CALLOC (spec, MERROR_CODING);
1362 spec->flags = info->flags;
1363 spec->initial_invocation[0] = info->initial_invocation[0];
1364 spec->initial_invocation[1] = info->initial_invocation[1];
1365 for (i = 0; i < 4; i++)
1366 spec->initial_designation[i] = NULL;
1367 if (designation_policy)
1369 spec->n_designations = ncharsets;
1370 if (spec->flags & MCODING_ISO_FULL_SUPPORT)
1371 spec->n_designations += mcharset__iso_2022_table.used;
1372 MTABLE_CALLOC (spec->designations, spec->n_designations, MERROR_CODING);
1373 for (i = 0; i < spec->n_designations; i++)
1374 spec->designations[i] = -1;
1378 if (spec->flags & MCODING_ISO_FULL_SUPPORT)
1379 MERROR (MERROR_CODING, -1);
1380 spec->designations = NULL;
1383 for (i = 0; i < ncharsets; i++)
1385 int reg = info->designations[i];
1388 && coding->charsets[i]->final_byte > 0
1389 && (reg < -4 || reg > 3))
1390 MERROR (MERROR_CODING, -1);
1393 if (spec->initial_designation[reg])
1394 MERROR (MERROR_CODING, -1);
1395 spec->initial_designation[reg] = coding->charsets[i];
1399 if (! designation_policy
1400 && ! (spec->flags & MCODING_ISO_EUC_TW_SHIFT))
1401 MERROR (MERROR_CODING, -1);
1405 if (designation_policy)
1406 spec->designations[i] = reg;
1407 if (coding->charsets[i] == mcharset__ascii)
1408 coding->ascii_compatible = 1;
1411 if (coding->ascii_compatible
1412 && (spec->flags & (MCODING_ISO_DESIGNATION_G0
1413 | MCODING_ISO_DESIGNATION_CTEXT
1414 | MCODING_ISO_DESIGNATION_CTEXT_EXT
1415 | MCODING_ISO_LOCKING_SHIFT)))
1416 coding->ascii_compatible = 0;
1418 if (spec->flags & MCODING_ISO_FULL_SUPPORT)
1419 for (i = 0; i < mcharset__iso_2022_table.used; i++)
1421 MCharset *charset = mcharset__iso_2022_table.charsets[i];
1423 spec->designations[ncharsets + i]
1424 = ((designation_policy == MCODING_ISO_DESIGNATION_CTEXT
1425 || designation_policy == MCODING_ISO_DESIGNATION_CTEXT_EXT)
1426 ? (charset->code_range[0] == 32
1427 || charset->code_range[1] == 255)
1428 : designation_policy == MCODING_ISO_DESIGNATION_G1);
1431 spec->use_esc = ((spec->flags & MCODING_ISO_DESIGNATION_MASK)
1432 || ((spec->flags & MCODING_ISO_LOCKING_SHIFT)
1433 && (spec->initial_designation[2]
1434 || spec->initial_designation[3]))
1435 || (! (spec->flags & MCODING_ISO_EIGHT_BIT)
1436 && (spec->flags & MCODING_ISO_SINGLE_SHIFT))
1437 || (spec->flags & MCODING_ISO_ISO6429));
1439 coding->extra_spec = (void *) spec;
1445 reset_coding_iso_2022 (MConverter *converter)
1447 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
1448 MCodingSystem *coding = internal->coding;
1449 struct iso_2022_status *status
1450 = (struct iso_2022_status *) &(converter->status);
1451 struct iso_2022_spec *spec;
1455 && setup_coding_iso_2022 (coding) < 0)
1459 spec = (struct iso_2022_spec *) coding->extra_spec;
1460 status->invocation[0] = spec->initial_invocation[0];
1461 status->invocation[1] = spec->initial_invocation[1];
1462 for (i = 0; i < 4; i++)
1463 status->designation[i] = spec->initial_designation[i];
1464 status->single_shifting = 0;
1471 #define ISO2022_DECODE_DESIGNATION(reg, dim, chars, final, rev) \
1473 MCharset *charset; \
1475 if ((final) < '0' || (final) >= 128) \
1476 goto invalid_byte; \
1479 charset = MCHARSET_ISO_2022 ((dim), (chars), (final)); \
1480 if (! (spec->flags & MCODING_ISO_FULL_SUPPORT)) \
1484 for (i = 0; i < coding->ncharsets; i++) \
1485 if (charset == coding->charsets[i]) \
1487 if (i == coding->ncharsets) \
1488 goto invalid_byte; \
1495 for (i = 0; i < mcharset__iso_2022_table.used; i++) \
1497 charset = mcharset__iso_2022_table.charsets[i]; \
1498 if (charset->revision == (rev) \
1499 && charset->dimension == (dim) \
1500 && charset->final_byte == (final) \
1501 && (charset->code_range[1] == (chars) \
1502 || ((chars) == 96 && charset->code_range[1] == 255))) \
1505 if (i == mcharset__iso_2022_table.used) \
1506 goto invalid_byte; \
1508 status->designation[reg] = charset; \
1513 find_ctext_non_standard_charset (char *charset_name)
1517 if (! strcmp (charset_name, "koi8-r"))
1518 charset = MCHARSET (msymbol ("koi8-r"));
1519 else if (! strcmp (charset_name, "big5-0"))
1520 charset = MCHARSET (msymbol ("big5"));
1527 decode_coding_iso_2022 (unsigned char *source, int src_bytes, MText *mt,
1528 MConverter *converter)
1530 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
1531 MCodingSystem *coding = internal->coding;
1532 unsigned char *src = internal->carryover;
1533 unsigned char *src_stop = src + internal->carryover_bytes;
1534 unsigned char *src_end = source + src_bytes;
1535 unsigned char *src_base;
1536 unsigned char *dst = mt->data + mt->nbytes;
1537 unsigned char *dst_end = mt->data + mt->allocated;
1539 int last_nchars = 0;
1540 int at_most = converter->at_most > 0 ? converter->at_most : -1;
1541 struct iso_2022_spec *spec = (struct iso_2022_spec *) coding->extra_spec;
1542 struct iso_2022_status *status
1543 = (struct iso_2022_status *) &(converter->status);
1544 MCharset *charset0, *charset1, *charset;
1546 MCharset *cns_charsets[15];
1548 charset0 = (status->invocation[0] >= 0
1549 ? status->designation[status->invocation[0]] : NULL);
1550 charset1 = (status->invocation[1] >= 0
1551 ? status->designation[status->invocation[1]] : NULL);
1552 charset = mcharset__ascii;
1554 if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
1558 memset (cns_charsets, 0, sizeof (cns_charsets));
1559 for (i = 0; i < coding->ncharsets; i++)
1560 if (coding->charsets[i]->dimension == 2
1561 && coding->charsets[i]->code_range[1] == 126)
1563 int final = coding->charsets[i]->final_byte;
1565 if (final >= 'G' && final <= 'M')
1566 cns_charsets[final - 'G'] = coding->charsets[i];
1568 cns_charsets[14] = coding->charsets[i];
1574 MCharset *this_charset = NULL;
1577 ONE_MORE_BASE_BYTE (c1);
1579 if (status->utf8_shifting)
1582 int bytes = CHAR_BYTES_BY_HEAD (c1);
1586 for (i = 1; i < bytes; i++)
1591 this_charset = UTF8_CHARSET (buf);
1592 c1 = STRING_CHAR_UTF8 (buf);
1596 if (status->non_standard_encoding > 0)
1600 this_charset = status->non_standard_charset;
1601 for (i = 1; i < status->non_standard_charset_bytes; i++)
1604 c1 = (c1 << 8) | c2;
1606 c1 = DECODE_CHAR (this_charset, c1);
1610 switch (iso_2022_code_class[c1])
1612 case ISO_graphic_plane_0:
1613 this_charset = charset0;
1616 case ISO_0x20_or_0x7F:
1618 || (charset0->code_range[0] != 32
1619 && charset0->code_range[1] != 255))
1620 /* This is SPACE or DEL. */
1621 this_charset = mcharset__ascii;
1623 /* This is a graphic character of plane 0. */
1624 this_charset = charset0;
1627 case ISO_graphic_plane_1:
1630 this_charset = charset1;
1633 case ISO_0xA0_or_0xFF:
1635 || charset1->code_range[0] == 33
1636 || ! (spec->flags & MCODING_ISO_EIGHT_BIT))
1638 /* This is a graphic character of plane 1. */
1641 this_charset = charset1;
1645 this_charset = mcharset__ascii;
1652 if ((spec->flags & MCODING_ISO_LOCKING_SHIFT)
1653 && status->designation[1])
1655 status->invocation[0] = 1;
1656 charset0 = status->designation[1];
1659 this_charset = mcharset__ascii;
1663 if (spec->flags & MCODING_ISO_LOCKING_SHIFT)
1665 status->invocation[0] = 0;
1666 charset0 = status->designation[0];
1669 this_charset = mcharset__ascii;
1672 case ISO_single_shift_2_7:
1673 if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT_7))
1675 this_charset = mcharset__ascii;
1679 goto label_escape_sequence;
1681 case ISO_single_shift_2:
1682 if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
1685 if (c1 < 0xA1 || (c1 > 0xA7 && c1 < 0xAF) || c1 > 0xAF
1686 || ! cns_charsets[c1 - 0xA1])
1688 status->designation[2] = cns_charsets[c1 - 0xA1];
1690 else if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT))
1692 /* SS2 is handled as an escape sequence of ESC 'N' */
1694 goto label_escape_sequence;
1696 case ISO_single_shift_3:
1697 if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT))
1699 /* SS2 is handled as an escape sequence of ESC 'O' */
1701 goto label_escape_sequence;
1703 case ISO_control_sequence_introducer:
1704 /* CSI is handled as an escape sequence of ESC '[' ... */
1706 goto label_escape_sequence;
1709 if (! spec->use_esc)
1711 this_charset = mcharset__ascii;
1715 label_escape_sequence:
1716 /* Escape sequences handled here are invocation,
1717 designation, and direction specification. */
1720 case '&': /* revision of following character set */
1721 if (! (spec->flags & MCODING_ISO_DESIGNATION_MASK))
1722 goto unused_escape_sequence;
1724 if (c1 < '@' || c1 > '~')
1727 if (c1 != ISO_CODE_ESC)
1730 goto label_escape_sequence;
1732 case '$': /* designation of 2-byte character set */
1733 if (! (spec->flags & MCODING_ISO_DESIGNATION_MASK))
1734 goto unused_escape_sequence;
1736 if (c1 >= '@' && c1 <= 'B')
1737 { /* designation of JISX0208.1978, GB2312.1980, or
1739 ISO2022_DECODE_DESIGNATION (0, 2, 94, c1, -1);
1741 else if (c1 >= 0x28 && c1 <= 0x2B)
1742 { /* designation of (dimension 2, chars 94) character set */
1744 ISO2022_DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2, -1);
1746 else if (c1 >= 0x2C && c1 <= 0x2F)
1747 { /* designation of (dimension 2, chars 96) character set */
1749 ISO2022_DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2, -1);
1753 /* We must update these variables now. */
1754 charset0 = status->designation[status->invocation[0]];
1755 charset1 = status->designation[status->invocation[1]];
1758 case 'n': /* invocation of locking-shift-2 */
1759 if (! (spec->flags & MCODING_ISO_LOCKING_SHIFT)
1760 || ! status->designation[2])
1762 status->invocation[0] = 2;
1763 charset0 = status->designation[2];
1766 case 'o': /* invocation of locking-shift-3 */
1767 if (! (spec->flags & MCODING_ISO_LOCKING_SHIFT)
1768 || ! status->designation[3])
1770 status->invocation[0] = 3;
1771 charset0 = status->designation[3];
1774 case 'N': /* invocation of single-shift-2 */
1775 if (! ((spec->flags & MCODING_ISO_SINGLE_SHIFT)
1776 || (spec->flags & MCODING_ISO_EUC_TW_SHIFT))
1777 || ! status->designation[2])
1779 this_charset = status->designation[2];
1781 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1785 case 'O': /* invocation of single-shift-3 */
1786 if (! (spec->flags & MCODING_ISO_SINGLE_SHIFT)
1787 || ! status->designation[3])
1789 this_charset = status->designation[3];
1791 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1795 case '[': /* specification of direction */
1796 if (! (spec->flags & MCODING_ISO_ISO6429))
1798 /* For the moment, nested direction is not supported.
1799 So, (coding->mode & CODING_MODE_DIRECTION) zero means
1800 left-to-right, and nonzero means right-to-left. */
1804 case ']': /* end of the current direction */
1805 case '0': /* end of the current direction */
1809 case '1': /* start of left-to-right direction */
1816 case '2': /* start of right-to-left direction */
1830 char charset_name[16];
1834 if (! spec->flags & MCODING_ISO_DESIGNATION_CTEXT_EXT)
1836 /* Compound-text uses these escape sequences:
1838 ESC % G -- utf-8 bytes -- ESC % @
1839 ESC % / 1 M L -- charset name -- STX -- bytes --
1840 ESC % / 2 M L -- charset name -- STX -- bytes --
1841 ESC % / 3 M L -- charset name -- STX -- bytes --
1842 ESC % / 4 M L -- charset name -- STX -- bytes --
1844 It also uses this sequence but that is not yet
1847 ESC % / 0 M L -- charset name -- STX -- bytes -- */
1852 status->utf8_shifting = 1;
1857 if (! status->utf8_shifting)
1859 status->utf8_shifting = 0;
1865 if (c1 < '1' || c1 > '4')
1867 status->non_standard_charset_bytes = c1 - '0';
1870 if (c1 < 128 || c2 < 128)
1872 bytes = (c1 - 128) * 128 + (c2 - 128);
1873 for (i = 0; i < 16; i++)
1876 if (c1 == ISO_CODE_STX)
1878 charset_name[i] = TOLOWER (c1);
1882 charset_name[i++] = '\0';
1883 this_charset = find_ctext_non_standard_charset (charset_name);
1886 status->non_standard_charset = this_charset;
1887 status->non_standard_encoding = bytes - i;
1892 if (! (spec->flags & MCODING_ISO_DESIGNATION_MASK))
1893 goto unused_escape_sequence;
1894 if (c1 >= 0x28 && c1 <= 0x2B)
1895 { /* designation of (dimension 1, chars 94) charset */
1897 ISO2022_DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2, -1);
1899 else if (c1 >= 0x2C && c1 <= 0x2F)
1900 { /* designation of (dimension 1, chars 96) charset */
1902 ISO2022_DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2, -1);
1906 /* We must update these variables now. */
1907 charset0 = status->designation[status->invocation[0]];
1908 charset1 = status->designation[status->invocation[1]];
1911 unused_escape_sequence:
1912 UNGET_ONE_BYTE (c1);
1914 this_charset = mcharset__ascii;
1918 if (this_charset->dimension == 1)
1920 if (this_charset->code_range[1] <= 128)
1923 else if (this_charset->dimension == 2)
1926 c1 = ((c1 & 0x7F) << 8) | (c2 & 0x7F);
1928 else /* i.e. (dimension == 3) */
1932 c1 = ((c1 & 0x7F) << 16) | ((c2 & 0x7F) << 8) | (c3 & 0x7F);
1934 c1 = DECODE_CHAR (this_charset, c1);
1938 if (! converter->lenient)
1940 REWIND_SRC_TO_BASE ();
1942 this_charset = mcharset__binary;
1945 if (this_charset != mcharset__ascii
1946 && this_charset != charset)
1948 TAKEIN_CHARS (mt, nchars - last_nchars,
1949 dst - (mt->data + mt->nbytes), charset);
1950 charset = this_charset;
1951 last_nchars = nchars;
1954 if (status->non_standard_encoding > 0)
1955 status->non_standard_encoding -= status->non_standard_charset_bytes;
1957 /* We reach here because of an invalid byte. */
1963 TAKEIN_CHARS (mt, nchars - last_nchars,
1964 dst - (mt->data + mt->nbytes), charset);
1965 return finish_decoding (mt, converter, nchars,
1966 source, src_end, src_base, error);
1970 /* Produce codes (escape sequence) for designating CHARSET to graphic
1971 register REG at DST, and increment DST. If CHARSET->final-char is
1972 '@', 'A', or 'B' and SHORT_FORM is nonzero, produce designation
1973 sequence of short-form. Update STATUS->designation. */
1975 #define ISO2022_ENCODE_DESIGNATION(reg, charset, spec, status) \
1977 char *intermediate_char_94 = "()*+"; \
1978 char *intermediate_char_96 = ",-./"; \
1980 if (dst + 4 > dst_end) \
1981 goto memory_shortage; \
1982 *dst++ = ISO_CODE_ESC; \
1983 if (charset->dimension == 1) \
1985 if (charset->code_range[0] != 32 \
1986 && charset->code_range[1] != 255) \
1987 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
1989 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
1994 if (charset->code_range[0] != 32 \
1995 && charset->code_range[1] != 255) \
1997 if (spec->flags & MCODING_ISO_LONG_FORM \
1999 || charset->final_byte < '@' || charset->final_byte > 'B') \
2000 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
2003 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
2005 *dst++ = charset->final_byte; \
2007 status->designation[reg] = charset; \
2011 /* The following two macros produce codes (control character or escape
2012 sequence) for ISO-2022 single-shift functions (single-shift-2 and
2015 #define ISO2022_ENCODE_SINGLE_SHIFT_2(spec, status) \
2017 if (dst + 2 > dst_end) \
2018 goto memory_shortage; \
2019 if (! (spec->flags & MCODING_ISO_EIGHT_BIT)) \
2020 *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \
2022 *dst++ = ISO_CODE_SS2; \
2023 status->single_shifting = 1; \
2027 #define ISO2022_ENCODE_SINGLE_SHIFT_3(spec, status) \
2029 if (dst + 2 > dst_end) \
2030 goto memory_shortage; \
2031 if (! (spec->flags & MCODING_ISO_EIGHT_BIT)) \
2032 *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \
2034 *dst++ = ISO_CODE_SS3; \
2035 status->single_shifting = 1; \
2039 /* The following four macros produce codes (control character or
2040 escape sequence) for ISO-2022 locking-shift functions (shift-in,
2041 shift-out, locking-shift-2, and locking-shift-3). */
2043 #define ISO2022_ENCODE_SHIFT_IN(status) \
2045 if (dst + 1 > dst_end) \
2046 goto memory_shortage; \
2047 *dst++ = ISO_CODE_SI; \
2048 status->invocation[0] = 0; \
2052 #define ISO2022_ENCODE_SHIFT_OUT(status) \
2054 if (dst + 1 > dst_end) \
2055 goto memory_shortage; \
2056 *dst++ = ISO_CODE_SO; \
2057 status->invocation[0] = 1; \
2061 #define ISO2022_ENCODE_LOCKING_SHIFT_2(status) \
2063 if (dst + 2 > dst_end) \
2064 goto memory_shortage; \
2065 *dst++ = ISO_CODE_ESC, *dst++ = 'n'; \
2066 status->invocation[0] = 2; \
2070 #define ISO2022_ENCODE_LOCKING_SHIFT_3(status) \
2072 if (dst + 2 > dst_end) \
2073 goto memory_shortage; \
2074 *dst++ = ISO_CODE_ESC, *dst++ = 'o'; \
2075 status->invocation[0] = 3; \
2078 #define ISO2022_ENCODE_UTF8_SHIFT_START(len) \
2080 CHECK_DST (3 + len); \
2081 *dst++ = ISO_CODE_ESC; \
2084 status->utf8_shifting = 1; \
2088 #define ISO2022_ENCODE_UTF8_SHIFT_END() \
2091 *dst++ = ISO_CODE_ESC; \
2094 status->utf8_shifting = 0; \
2098 #define ISO2022_ENCODE_NON_STANDARD(name, len) \
2100 CHECK_DST (6 + len + 1 + non_standard_charset_bytes); \
2101 non_standard_begin = dst; \
2102 *dst++ = ISO_CODE_ESC; \
2105 *dst++ = '0' + non_standard_charset_bytes; \
2106 *dst++ = 0, *dst++ = 0; /* filled later */ \
2107 memcpy (dst, name, len); \
2109 *dst++ = ISO_CODE_STX; \
2110 non_standard_bytes = len + 1; \
2115 find_ctext_non_standard_name (MCharset *charset, int *bytes)
2117 char *name = msymbol_name (charset->name);
2119 if (! strcmp (name, "koi8-r"))
2121 else if (! strcmp (name, "big5"))
2122 name = "big5-0", *bytes = 2;
2128 /* Designate CHARSET to a graphic register specified in
2129 SPEC->designation. If the register is not yet invoked to graphic
2130 left not right, invoke it to graphic left. DSTP points to a
2131 variable containing a memory address where the output must go.
2132 DST_END is the limit of that memory.
2134 Return 0 if it succeeds. Return -1 otherwise, which means that the
2135 memory area is too short. By side effect, update the variable that
2139 iso_2022_designate_invoke_charset (MCodingSystem *coding,
2141 struct iso_2022_spec *spec,
2142 struct iso_2022_status *status,
2143 unsigned char **dstp,
2144 unsigned char *dst_end)
2147 unsigned char *dst = *dstp;
2149 for (i = 0; i < 4; i++)
2150 if (charset == status->designation[i])
2155 /* CHARSET is not yet designated to any graphic registers. */
2156 for (i = 0; i < coding->ncharsets; i++)
2157 if (charset == coding->charsets[i])
2159 if (i == coding->ncharsets)
2161 for (i = 0; i < mcharset__iso_2022_table.used; i++)
2162 if (charset == mcharset__iso_2022_table.charsets[i])
2164 i += coding->ncharsets;
2166 i = spec->designations[i];
2167 ISO2022_ENCODE_DESIGNATION (i, charset, spec, status);
2170 if (status->invocation[0] != i
2171 && status->invocation[1] != i)
2173 /* Graphic register I is not yet invoked. */
2176 case 0: /* graphic register 0 */
2177 ISO2022_ENCODE_SHIFT_IN (status);
2180 case 1: /* graphic register 1 */
2181 ISO2022_ENCODE_SHIFT_OUT (status);
2184 case 2: /* graphic register 2 */
2185 if (spec->flags & MCODING_ISO_SINGLE_SHIFT)
2186 ISO2022_ENCODE_SINGLE_SHIFT_2 (spec, status);
2188 ISO2022_ENCODE_LOCKING_SHIFT_2 (status);
2191 case 3: /* graphic register 3 */
2192 if (spec->flags & MCODING_ISO_SINGLE_SHIFT)
2193 ISO2022_ENCODE_SINGLE_SHIFT_3 (spec, status);
2195 ISO2022_ENCODE_LOCKING_SHIFT_3 (status);
2208 /* Reset the invocation/designation status to the initial one. SPEC
2209 and STATUS contain information about the current and initial
2210 invocation /designation status respectively. DSTP points to a
2211 variable containing a memory address where the output must go.
2212 DST_END is the limit of that memory.
2214 Return 0 if it succeeds. Return -1 otherwise, which means that the
2215 memory area is too short. By side effect, update the variable that
2219 iso_2022_reset_invocation_designation (struct iso_2022_spec *spec,
2220 struct iso_2022_status *status,
2221 unsigned char **dstp,
2222 unsigned char *dst_end)
2224 unsigned char *dst = *dstp;
2227 /* Reset the invocation status of GL. We have not yet supported GR
2229 if (status->invocation[0] != spec->initial_invocation[0]
2230 && spec->initial_invocation[0] >= 0)
2232 if (spec->initial_invocation[0] == 0)
2233 ISO2022_ENCODE_SHIFT_IN (status);
2234 else if (spec->initial_invocation[0] == 1)
2235 ISO2022_ENCODE_SHIFT_OUT (status);
2236 else if (spec->initial_invocation[0] == 2)
2237 ISO2022_ENCODE_LOCKING_SHIFT_2 (status);
2238 else /* i.e. spec->initial_invocation[0] == 3 */
2239 ISO2022_ENCODE_LOCKING_SHIFT_3 (status);
2242 /* Reset the designation status of G0..G3. */
2243 for (i = 0; i < 4; i++)
2244 if (status->designation[i] != spec->initial_designation[i]
2245 && spec->initial_designation[i])
2247 MCharset *charset = spec->initial_designation[i];
2249 ISO2022_ENCODE_DESIGNATION (i, charset, spec, status);
2262 encode_coding_iso_2022 (MText *mt, int from, int to,
2263 unsigned char *destination, int dst_bytes,
2264 MConverter *converter)
2266 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2267 MCodingSystem *coding = internal->coding;
2268 unsigned char *src, *src_end;
2269 unsigned char *dst = destination;
2270 unsigned char *dst_end = dst + dst_bytes;
2272 unsigned char *dst_base;
2273 struct iso_2022_spec *spec = (struct iso_2022_spec *) coding->extra_spec;
2274 int full_support = spec->flags & MCODING_ISO_FULL_SUPPORT;
2275 struct iso_2022_status *status
2276 = (struct iso_2022_status *) &(converter->status);
2277 MCharset *primary, *charset0, *charset1;
2278 int next_primary_change;
2279 int ncharsets = coding->ncharsets;
2280 MCharset **charsets = coding->charsets;
2281 MCharset *cns_charsets[15];
2282 int ascii_compatible = coding->ascii_compatible;
2283 MCharset *non_standard_charset = NULL;
2284 int non_standard_charset_bytes = 0;
2285 int non_standard_bytes = 0;
2286 unsigned char *non_standard_begin = NULL;
2287 enum MTextFormat format = mt->format;
2289 SET_SRC (mt, format, from, to);
2291 if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
2295 memset (cns_charsets, 0, sizeof (cns_charsets));
2296 for (i = 0; i < ncharsets; i++)
2297 if (charsets[i]->dimension == 2)
2299 int final = charsets[i]->final_byte;
2301 if (final >= 'G' && final <= 'M')
2302 cns_charsets[final - 'G'] = charsets[i];
2304 cns_charsets[14] = charsets[i];
2308 next_primary_change = from;
2310 charset0 = status->designation[status->invocation[0]];
2311 charset1 = (status->invocation[1] < 0 ? NULL
2312 : status->designation[status->invocation[1]]);
2319 ONE_MORE_CHAR (c, bytes, format);
2321 if (c < 128 && ascii_compatible)
2323 if (status->utf8_shifting)
2324 ISO2022_ENCODE_UTF8_SHIFT_END ();
2328 else if (c <= 32 || c == 127)
2330 if (status->utf8_shifting)
2331 ISO2022_ENCODE_UTF8_SHIFT_END ();
2332 if (spec->flags & MCODING_ISO_RESET_AT_CNTL
2333 || (c == '\n' && spec->flags & MCODING_ISO_RESET_AT_EOL))
2335 if (iso_2022_reset_invocation_designation (spec, status,
2337 goto insufficient_destination;
2338 charset0 = status->designation[status->invocation[0]];
2339 charset1 = (status->invocation[1] < 0 ? NULL
2340 : status->designation[status->invocation[1]]);
2347 unsigned code = MCHAR_INVALID_CODE;
2348 MCharset *charset = NULL;
2350 int pos = from + nchars;
2352 if (pos >= next_primary_change)
2354 MSymbol primary_charset
2355 = (MSymbol) mtext_get_prop (mt, pos, Mcharset);
2356 primary = MCHARSET (primary_charset);
2357 if (primary && primary != mcharset__binary)
2359 if (primary->final_byte <= 0)
2361 else if (! full_support)
2365 for (i = 0; i < ncharsets; i++)
2366 if (primary == charsets[i])
2373 mtext_prop_range (mt, Mcharset, pos,
2374 NULL, &next_primary_change, 0);
2377 if (primary && primary != mcharset__binary)
2379 code = ENCODE_CHAR (primary, c);
2380 if (code != MCHAR_INVALID_CODE)
2385 if (c <= 32 || c == 127)
2388 charset = mcharset__ascii;
2394 for (i = 0; i < ncharsets; i++)
2396 charset = charsets[i];
2397 code = ENCODE_CHAR (charset, c);
2398 if (code != MCHAR_INVALID_CODE)
2403 if (spec->flags & MCODING_ISO_FULL_SUPPORT)
2405 for (i = 0; i < mcharset__iso_2022_table.used; i++)
2407 charset = mcharset__iso_2022_table.charsets[i];
2408 code = ENCODE_CHAR (charset, c);
2409 if (code != MCHAR_INVALID_CODE)
2412 if (i == mcharset__iso_2022_table.used)
2414 if (spec->flags & MCODING_ISO_DESIGNATION_CTEXT_EXT)
2415 goto unsupported_char;
2416 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
2421 goto unsupported_char;
2427 && (charset->final_byte >= 0
2428 || spec->flags & MCODING_ISO_EUC_TW_SHIFT))
2430 if (code >= 0x80 && code < 0xA0)
2431 goto unsupported_char;
2433 if (status->utf8_shifting)
2434 ISO2022_ENCODE_UTF8_SHIFT_END ();
2435 if (charset == charset0)
2437 else if (charset == charset1)
2441 unsigned char *p = NULL;
2443 if (spec->flags & MCODING_ISO_EUC_TW_SHIFT)
2447 if (cns_charsets[0] == charset)
2453 for (i = 1; i < 15; i++)
2454 if (cns_charsets[i] == charset)
2457 *dst++ = ISO_CODE_SS2;
2460 status->single_shifting = 1;
2465 if (iso_2022_designate_invoke_charset
2466 (coding, charset, spec, status, &dst, dst_end) < 0)
2467 goto insufficient_destination;
2468 charset0 = status->designation[status->invocation[0]];
2469 charset1 = (status->invocation[1] < 0 ? NULL
2470 : status->designation[status->invocation[1]]);
2472 if (status->single_shifting)
2474 = (spec->flags & MCODING_ISO_EIGHT_BIT) ? 0x80 : 0;
2475 else if (charset == charset0)
2480 if (charset->dimension == 1)
2483 *dst++ = code | gr_mask;
2485 else if (charset->dimension == 2)
2488 *dst++ = (code >> 8) | gr_mask;
2489 *dst++ = (code & 0xFF) | gr_mask;
2494 *dst++ = (code >> 16) | gr_mask;
2495 *dst++ = ((code >> 8) & 0xFF) | gr_mask;
2496 *dst++ = (code & 0xFF) | gr_mask;
2498 status->single_shifting = 0;
2500 else if (charset && spec->flags & MCODING_ISO_DESIGNATION_CTEXT_EXT)
2502 if (charset != non_standard_charset)
2504 char *name = (find_ctext_non_standard_name
2505 (charset, &non_standard_charset_bytes));
2509 int len = strlen (name);
2511 ISO2022_ENCODE_NON_STANDARD (name, len);
2512 non_standard_charset = charset;
2515 non_standard_charset = NULL;
2518 if (non_standard_charset)
2520 if (dst + non_standard_charset_bytes > dst_end)
2521 goto insufficient_destination;
2522 non_standard_bytes += non_standard_charset_bytes;
2523 non_standard_begin[4] = (non_standard_bytes / 128) | 0x80;
2524 non_standard_begin[5] = (non_standard_bytes % 128) | 0x80;
2525 if (non_standard_charset_bytes == 1)
2527 else if (non_standard_charset_bytes == 2)
2528 *dst++ = code >> 8, *dst++ = code & 0xFF;
2529 else if (non_standard_charset_bytes == 3)
2530 *dst++ = code >> 16, *dst++ = (code >> 8) & 0xFF,
2531 *dst++ = code & 0xFF;
2532 else /* i.e non_standard_charset_bytes == 3 */
2533 *dst++ = code >> 24, *dst++ = (code >> 16) & 0xFF,
2534 *dst++ = (code >> 8) & 0xFF, *dst++ = code & 0xFF;
2538 int len = CHAR_BYTES (c);
2541 goto unsupported_char;
2542 if (! status->utf8_shifting)
2543 ISO2022_ENCODE_UTF8_SHIFT_START (len);
2546 CHAR_STRING (c, dst);
2550 goto unsupported_char;
2560 if (iso_2022_designate_invoke_charset (coding, mcharset__ascii,
2563 goto insufficient_destination;
2564 if (! converter->lenient)
2566 len = encode_unsupporeted_char (c, dst, dst_end, mt, from + nchars);
2568 goto insufficient_destination;
2574 /* We reach here because of an unsupported char. */
2575 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
2578 insufficient_destination:
2580 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
2583 if (converter->result == MCONVERSION_RESULT_SUCCESS
2584 && converter->last_block)
2586 if (status->utf8_shifting)
2588 ISO2022_ENCODE_UTF8_SHIFT_END ();
2591 if (spec->flags & MCODING_ISO_RESET_AT_EOL
2592 && charset0 != spec->initial_designation[0])
2594 if (iso_2022_reset_invocation_designation (spec, status,
2596 goto insufficient_destination;
2599 converter->nchars += nchars;
2600 converter->nbytes += dst - destination;
2601 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
2605 /* Staffs for coding-systems of type MCODING_TYPE_MISC. */
2607 /* For SJIS handling... */
2609 #define SJIS_TO_JIS(s1, s2) \
2611 ? (((s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0)) << 8) \
2613 : (((s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1)) << 8) \
2614 | (s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F))))
2616 #define JIS_TO_SJIS(c1, c2) \
2618 ? (((c1 / 2 + ((c1 < 0x5F) ? 0x71 : 0xB1)) << 8) \
2619 | (c2 + ((c2 >= 0x60) ? 0x20 : 0x1F))) \
2620 : (((c1 / 2 + ((c1 < 0x5F) ? 0x70 : 0xB0)) << 8) \
2625 reset_coding_sjis (MConverter *converter)
2627 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2628 MCodingSystem *coding = internal->coding;
2630 if (! coding->ready)
2632 MSymbol kanji_sym = msymbol ("jisx0208.1983");
2633 MCharset *kanji = MCHARSET (kanji_sym);
2634 MSymbol kana_sym = msymbol ("jisx0201-kana");
2635 MCharset *kana = MCHARSET (kana_sym);
2637 if (! kanji_sym || ! kana_sym)
2639 coding->ncharsets = 3;
2640 coding->charsets[1] = kanji;
2641 coding->charsets[2] = kana;
2648 decode_coding_sjis (unsigned char *source, int src_bytes, MText *mt,
2649 MConverter *converter)
2651 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2652 MCodingSystem *coding = internal->coding;
2653 unsigned char *src = internal->carryover;
2654 unsigned char *src_stop = src + internal->carryover_bytes;
2655 unsigned char *src_end = source + src_bytes;
2656 unsigned char *src_base;
2657 unsigned char *dst = mt->data + mt->nbytes;
2658 unsigned char *dst_end = mt->data + mt->allocated - MAX_UTF8_CHAR_BYTES;
2660 int last_nchars = 0;
2661 int at_most = converter->at_most > 0 ? converter->at_most : -1;
2663 MCharset *charset_roman = coding->charsets[0];
2664 MCharset *charset_kanji = coding->charsets[1];
2665 MCharset *charset_kana = coding->charsets[2];
2666 MCharset *charset = mcharset__ascii;
2671 MCharset *this_charset;
2674 ONE_MORE_BASE_BYTE (c1);
2679 this_charset = ((c1 <= 0x20 || c1 == 0x7F)
2683 else if ((c1 >= 0x81 && c1 <= 0x9F) || (c1 >= 0xE0 && c1 <= 0xEF))
2686 if ((c2 >= 0x40 && c2 <= 0x7F) || (c2 >= 80 && c2 <= 0xFC))
2688 this_charset = charset_kanji;
2689 c1 = SJIS_TO_JIS (c1, c2);
2694 else if (c1 >= 0xA1 && c1 <= 0xDF)
2696 this_charset = charset_kana;
2702 c = DECODE_CHAR (this_charset, c1);
2707 if (! converter->lenient)
2709 REWIND_SRC_TO_BASE ();
2711 this_charset = mcharset__binary;
2714 if (this_charset != mcharset__ascii
2715 && this_charset != charset)
2717 TAKEIN_CHARS (mt, nchars - last_nchars,
2718 dst - (mt->data + mt->nbytes), charset);
2719 charset = this_charset;
2720 last_nchars = nchars;
2724 /* We reach here because of an invalid byte. */
2728 TAKEIN_CHARS (mt, nchars - last_nchars,
2729 dst - (mt->data + mt->nbytes), charset);
2730 return finish_decoding (mt, converter, nchars,
2731 source, src_end, src_base, error);
2735 encode_coding_sjis (MText *mt, int from, int to,
2736 unsigned char *destination, int dst_bytes,
2737 MConverter *converter)
2739 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
2740 MCodingSystem *coding = internal->coding;
2741 unsigned char *src, *src_end;
2742 unsigned char *dst = destination;
2743 unsigned char *dst_end = dst + dst_bytes;
2745 MCharset *charset_roman = coding->charsets[0];
2746 MCharset *charset_kanji = coding->charsets[1];
2747 MCharset *charset_kana = coding->charsets[2];
2748 enum MTextFormat format = mt->format;
2750 SET_SRC (mt, format, from, to);
2757 ONE_MORE_CHAR (c, bytes, format);
2759 if (c <= 0x20 || c == 0x7F)
2766 if ((code = ENCODE_CHAR (charset_roman, c)) != MCHAR_INVALID_CODE)
2771 else if ((code = ENCODE_CHAR (charset_kanji, c))
2772 != MCHAR_INVALID_CODE)
2774 int c1 = code >> 8, c2 = code & 0xFF;
2775 code = JIS_TO_SJIS (c1, c2);
2778 *dst++ = code & 0xFF;
2780 else if ((code = ENCODE_CHAR (charset_kana, c))
2781 != MCHAR_INVALID_CODE)
2784 *dst++ = code | 0x80;
2788 if (! converter->lenient)
2790 len = encode_unsupporeted_char (c, dst, dst_end,
2793 goto insufficient_destination;
2800 /* We reach here because of an unsupported char. */
2801 converter->result = MCONVERSION_RESULT_INVALID_CHAR;
2804 insufficient_destination:
2805 converter->result = MCONVERSION_RESULT_INSUFFICIENT_DST;
2808 converter->nchars += nchars;
2809 converter->nbytes += dst - destination;
2810 return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0);
2814 static MCodingSystem *
2815 find_coding (MSymbol name)
2817 MCodingSystem *coding = (MCodingSystem *) msymbol_get (name, Mcoding);
2821 MPlist *param = mplist_get (coding_definition_list, name);
2825 param = mplist__from_plist (param);
2826 mconv_define_coding (MSYMBOL_NAME (name), param, NULL, NULL, NULL, NULL);
2827 coding = (MCodingSystem *) msymbol_get (name, Mcoding);
2828 M17N_OBJECT_UNREF (param);
2833 #define BINDING_NONE 0
2834 #define BINDING_BUFFER 1
2835 #define BINDING_STREAM 2
2837 #define CONVERT_WORKSIZE 0x10000
2843 mcoding__init (void)
2846 MPlist *param, *charsets, *pl;
2848 MLIST_INIT1 (&coding_list, codings, 128);
2849 coding_definition_list = mplist ();
2851 /* ISO-2022 specific initialize routine. */
2852 for (i = 0; i < 0x20; i++)
2853 iso_2022_code_class[i] = ISO_control_0;
2854 for (i = 0x21; i < 0x7F; i++)
2855 iso_2022_code_class[i] = ISO_graphic_plane_0;
2856 for (i = 0x80; i < 0xA0; i++)
2857 iso_2022_code_class[i] = ISO_control_1;
2858 for (i = 0xA1; i < 0xFF; i++)
2859 iso_2022_code_class[i] = ISO_graphic_plane_1;
2860 iso_2022_code_class[0x20] = iso_2022_code_class[0x7F] = ISO_0x20_or_0x7F;
2861 iso_2022_code_class[0xA0] = iso_2022_code_class[0xFF] = ISO_0xA0_or_0xFF;
2862 iso_2022_code_class[0x0E] = ISO_shift_out;
2863 iso_2022_code_class[0x0F] = ISO_shift_in;
2864 iso_2022_code_class[0x19] = ISO_single_shift_2_7;
2865 iso_2022_code_class[0x1B] = ISO_escape;
2866 iso_2022_code_class[0x8E] = ISO_single_shift_2;
2867 iso_2022_code_class[0x8F] = ISO_single_shift_3;
2868 iso_2022_code_class[0x9B] = ISO_control_sequence_introducer;
2870 Mcoding = msymbol ("coding");
2872 Mutf = msymbol ("utf");
2873 Miso_2022 = msymbol ("iso-2022");
2875 Mreset_at_eol = msymbol ("reset-at-eol");
2876 Mreset_at_cntl = msymbol ("reset-at-cntl");
2877 Meight_bit = msymbol ("eight-bit");
2878 Mlong_form = msymbol ("long-form");
2879 Mdesignation_g0 = msymbol ("designation-g0");
2880 Mdesignation_g1 = msymbol ("designation-g1");
2881 Mdesignation_ctext = msymbol ("designation-ctext");
2882 Mdesignation_ctext_ext = msymbol ("designation-ctext-ext");
2883 Mlocking_shift = msymbol ("locking-shift");
2884 Msingle_shift = msymbol ("single-shift");
2885 Msingle_shift_7 = msymbol ("single-shift-7");
2886 Meuc_tw_shift = msymbol ("euc-tw-shift");
2887 Miso_6429 = msymbol ("iso-6429");
2888 Mrevision_number = msymbol ("revision-number");
2889 Mfull_support = msymbol ("full-support");
2890 Mmaybe = msymbol ("maybe");
2892 Mtype = msymbol ("type");
2893 Mcharsets = msymbol_as_managing_key ("charsets");
2894 Mflags = msymbol_as_managing_key ("flags");
2895 Mdesignation = msymbol_as_managing_key ("designation");
2896 Minvocation = msymbol_as_managing_key ("invocation");
2897 Mcode_unit = msymbol ("code-unit");
2898 Mbom = msymbol ("bom");
2899 Mlittle_endian = msymbol ("little-endian");
2902 charsets = mplist ();
2904 /* Setup predefined codings. */
2905 mplist_set (charsets, Msymbol, Mcharset_ascii);
2906 pl = mplist_add (pl, Mtype, Mcharset);
2907 pl = mplist_add (pl, Mcharsets, charsets);
2908 Mcoding_us_ascii = mconv_define_coding ("us-ascii", param,
2909 NULL, NULL, NULL, NULL);
2912 MSymbol alias = msymbol ("ANSI_X3.4-1968");
2913 MCodingSystem *coding
2914 = (MCodingSystem *) msymbol_get (Mcoding_us_ascii, Mcoding);
2916 msymbol_put (alias, Mcoding, coding);
2917 alias = msymbol__canonicalize (alias);
2918 msymbol_put (alias, Mcoding, coding);
2921 mplist_set (charsets, Msymbol, Mcharset_iso_8859_1);
2922 Mcoding_iso_8859_1 = mconv_define_coding ("iso-8859-1", param,
2923 NULL, NULL, NULL, NULL);
2925 mplist_set (charsets, Msymbol, Mcharset_m17n);
2926 mplist_put (param, Mtype, Mutf);
2927 mplist_put (param, Mcode_unit, (void *) 8);
2928 Mcoding_utf_8_full = mconv_define_coding ("utf-8-full", param,
2929 NULL, NULL, NULL, NULL);
2931 mplist_set (charsets, Msymbol, Mcharset_unicode);
2932 Mcoding_utf_8 = mconv_define_coding ("utf-8", param,
2933 NULL, NULL, NULL, NULL);
2935 mplist_put (param, Mcode_unit, (void *) 16);
2936 mplist_put (param, Mbom, Mmaybe);
2937 #ifndef WORDS_BIGENDIAN
2938 mplist_put (param, Mlittle_endian, Mt);
2940 Mcoding_utf_16 = mconv_define_coding ("utf-16", param,
2941 NULL, NULL, NULL, NULL);
2943 mplist_put (param, Mcode_unit, (void *) 32);
2944 Mcoding_utf_32 = mconv_define_coding ("utf-32", param,
2945 NULL, NULL, NULL, NULL);
2947 mplist_put (param, Mcode_unit, (void *) 16);
2948 mplist_put (param, Mbom, Mnil);
2949 mplist_put (param, Mlittle_endian, Mnil);
2950 Mcoding_utf_16be = mconv_define_coding ("utf-16be", param,
2951 NULL, NULL, NULL, NULL);
2953 mplist_put (param, Mcode_unit, (void *) 32);
2954 Mcoding_utf_32be = mconv_define_coding ("utf-32be", param,
2955 NULL, NULL, NULL, NULL);
2957 mplist_put (param, Mcode_unit, (void *) 16);
2958 mplist_put (param, Mlittle_endian, Mt);
2959 Mcoding_utf_16le = mconv_define_coding ("utf-16le", param,
2960 NULL, NULL, NULL, NULL);
2962 mplist_put (param, Mcode_unit, (void *) 32);
2963 Mcoding_utf_32le = mconv_define_coding ("utf-32le", param,
2964 NULL, NULL, NULL, NULL);
2966 mplist_put (param, Mtype, Mnil);
2967 mplist_set (charsets, Msymbol, Mcharset_ascii);
2968 Mcoding_sjis = mconv_define_coding ("sjis", param,
2971 encode_coding_sjis, NULL);
2973 M17N_OBJECT_UNREF (charsets);
2974 M17N_OBJECT_UNREF (param);
2980 mcoding__fini (void)
2985 for (i = 0; i < coding_list.used; i++)
2987 MCodingSystem *coding = coding_list.codings[i];
2989 if (coding->extra_info)
2990 free (coding->extra_info);
2991 if (coding->extra_spec)
2993 if (coding->type == Miso_2022)
2994 free (((struct iso_2022_spec *) coding->extra_spec)->designations);
2995 free (coding->extra_spec);
2999 MLIST_FREE1 (&coding_list, codings);
3000 MPLIST_DO (plist, coding_definition_list)
3001 M17N_OBJECT_UNREF (MPLIST_VAL (plist));
3002 M17N_OBJECT_UNREF (coding_definition_list);
3006 mconv__define_coding_from_charset (MSymbol sym)
3008 MPlist *param = mplist (), *charsets = mplist ();
3010 mplist_set (charsets, Msymbol, sym);
3011 mplist_add (param, Mtype, Mcharset);
3012 mplist_add (param, Mcharsets, charsets);
3013 mconv_define_coding (msymbol_name (sym), param, NULL, NULL, NULL, NULL);
3014 M17N_OBJECT_UNREF (charsets);
3015 M17N_OBJECT_UNREF (param);
3019 mconv__register_charset_coding (MSymbol sym)
3021 if (! mplist_find_by_key (coding_definition_list, sym))
3023 MPlist *param = mplist (), *charsets = mplist ();
3025 mplist_set (charsets, Msymbol, sym);
3026 mplist_add (param, Msymbol, Mtype);
3027 mplist_add (param, Msymbol, Mcharset);
3028 mplist_add (param, Msymbol, Mcharsets);
3029 mplist_add (param, Mplist, charsets);
3030 mplist_put (coding_definition_list, sym, param);
3031 M17N_OBJECT_UNREF (charsets);
3037 mcoding__load_from_database ()
3039 MDatabase *mdb = mdatabase_find (msymbol ("coding-list"), Mnil, Mnil, Mnil);
3040 MPlist *def_list, *plist;
3041 MPlist *definitions = coding_definition_list;
3042 int mdebug_mask = MDEBUG_CODING;
3046 MDEBUG_PUSH_TIME ();
3047 def_list = (MPlist *) mdatabase_load (mdb);
3048 MDEBUG_PRINT_TIME ("CODING", (stderr, " to load the data."));
3053 MDEBUG_PUSH_TIME ();
3054 MPLIST_DO (plist, def_list)
3059 if (! MPLIST_PLIST_P (plist))
3060 MERROR (MERROR_CHARSET, -1);
3061 pl = MPLIST_PLIST (plist);
3062 if (! MPLIST_SYMBOL_P (pl))
3063 MERROR (MERROR_CHARSET, -1);
3064 name = MPLIST_SYMBOL (pl);
3065 pl = MPLIST_NEXT (pl);
3066 definitions = mplist_add (definitions, name, pl);
3067 M17N_OBJECT_REF (pl);
3070 M17N_OBJECT_UNREF (def_list);
3071 MDEBUG_PRINT_TIME ("CODING", (stderr, " to parse the loaded data."));
3077 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
3081 /*** @addtogroup m17nConv */
3085 /***en @name Variables: Symbols representing a coding system */
3086 /***ja @name ÊÑ¿ô: ÄêµÁºÑ¤ß¥³¡¼¥É·Ï¤ò»ØÄꤹ¤ë¤¿¤á¤Î¥·¥ó¥Ü¥ë */
3091 @brief Symbol for the coding system US-ASCII
3093 The symbol #Mcoding_us_ascii has name <tt>"us-ascii"</tt> and
3094 represents a coding system for the CES US-ASCII. */
3097 @brief US-ASCII ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë
3099 ¥·¥ó¥Ü¥ë #Mcoding_us_ascii ¤Ï <tt>"us-ascii"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3100 CES US-ASCII ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£
3102 MSymbol Mcoding_us_ascii;
3106 @brief Symbol for the coding system ISO-8859-1
3108 The symbol #Mcoding_iso_8859_1 has name <tt>"iso-8859-1"</tt> and
3109 represents a coding system for the CES ISO-8859-1. */
3112 @brief ISO-8859-1 ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë
3114 ¥·¥ó¥Ü¥ë #Mcoding_iso_8859_1 ¤Ï <tt>"iso-8859-1"</tt> ¤È¤¤¤¦Ì¾Á°
3115 ¤ò»ý¤Á¡¢CES ISO-8859-1 ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3117 MSymbol Mcoding_iso_8859_1;
3121 @brief Symbol for the coding system UTF-8
3123 The symbol #Mcoding_utf_8 has name <tt>"utf-8"</tt> and represents
3124 a coding system for the CES UTF-8. */
3127 @brief UTF-8 ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë
3129 ¥·¥ó¥Ü¥ë #Mcoding_utf_8 ¤Ï <tt>"utf-8"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢CES
3130 UTF-8 ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£
3133 MSymbol Mcoding_utf_8;
3137 @brief Symbol for the coding system UTF-8-FULL
3139 The symbol #Mcoding_utf_8_full has name <tt>"utf-8-full"</tt> and
3140 represents a coding system that is a extension of UTF-8. This
3141 coding system uses the same encoding algorithm as UTF-8 but is not
3142 limited to the Unicode characters. It can encode all characters
3143 supported by the m17n library. */
3146 @brief UTF-8-FULL ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë
3148 ¥·¥ó¥Ü¥ë #Mcoding_utf_8_full ¤Ï <tt>"utf-8-full"</tt> ¤È¤¤¤¦Ì¾Á°¤ò
3149 »ý¤Á¡¢<tt>"UTF-8"</tt> ¤Î³ÈÄ¥¤Ç¤¢¤ë¥³¡¼¥É·Ï¤ò¼¨¤¹¡£¤³¤Î¥³¡¼¥É·Ï¤Ï
3150 UTF-8 ¤ÈƱ¤¸¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°¥¢¥ë¥´¥ê¥º¥à¤òÍѤ¤¤ë¤¬¡¢ÂÐ¾Ý¤Ï Unicode
3151 ¤Îʸ»ú¤Ë¸Â¤é¤Ê¤¤¡£¤Þ¤¿m17n ¥é¥¤¥Ö¥é¥ê¤¬°·¤¦Á´¤Æ¤Îʸ»ú¤ò¥¨¥ó¥³¡¼¥É
3155 MSymbol Mcoding_utf_8_full;
3159 @brief Symbol for the coding system UTF-16
3161 The symbol #Mcoding_utf_16 has name <tt>"utf-16"</tt> and
3162 represents a coding system for the CES UTF-16 (RFC 2279). */
3164 @brief UTF-16 ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë
3166 ¥·¥ó¥Ü¥ë #Mcoding_utf_16 ¤Ï <tt>"utf-16"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3167 CES UTF-16 (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£
3170 MSymbol Mcoding_utf_16;
3174 @brief Symbol for the coding system UTF-16BE
3176 The symbol #Mcoding_utf_16be has name <tt>"utf-16be"</tt> and
3177 represents a coding system for the CES UTF-16BE (RFC 2279). */
3180 @brief UTF-16BE ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë
3182 ¥·¥ó¥Ü¥ë #Mcoding_utf_16be ¤Ï <tt>"utf-16be"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3183 CES UTF-16BE (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3185 MSymbol Mcoding_utf_16be;
3189 @brief Symbol for the coding system UTF-16LE
3191 The symbol #Mcoding_utf_16le has name <tt>"utf-16le"</tt> and
3192 represents a coding system for the CES UTF-16LE (RFC 2279). */
3195 @brief UTF-16LE ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë
3197 ¥·¥ó¥Ü¥ë #Mcoding_utf_16le ¤Ï <tt>"utf-16le"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3198 CES UTF-16LE (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3200 MSymbol Mcoding_utf_16le;
3204 @brief Symbol for the coding system UTF-32
3206 The symbol #Mcoding_utf_32 has name <tt>"utf-32"</tt> and
3207 represents a coding system for the CES UTF-32 (RFC 2279). */
3210 @brief UTF-32 ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë
3212 ¥·¥ó¥Ü¥ë #Mcoding_utf_32 ¤Ï <tt>"utf-32"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3213 CES UTF-32 (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3215 MSymbol Mcoding_utf_32;
3219 @brief Symbol for the coding system UTF-32be
3221 The symbol #Mcoding_utf_32be has name <tt>"utf-32be"</tt> and
3222 represents a coding system for the CES UTF-32BE (RFC 2279). */
3224 @brief UTF-32be ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë
3226 ¥·¥ó¥Ü¥ë #Mcoding_utf_32be ¤Ï <tt>"utf-32be"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3227 CES UTF-32BE (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3229 MSymbol Mcoding_utf_32be;
3233 @brief Symbol for the coding system UTF-32LE
3235 The symbol #Mcoding_utf_32le has name <tt>"utf-32le"</tt> and
3236 represents a coding system for the CES UTF-32LE (RFC 2279). */
3238 @brief UTF-32LE ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë
3240 ¥·¥ó¥Ü¥ë #Mcoding_utf_32le ¤Ï <tt>"utf-32le"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3241 CES UTF-32LE (RFC 2279) ÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3243 MSymbol Mcoding_utf_32le;
3247 @brief Symbol for the coding system SJIS
3249 The symbol #Mcoding_sjis has name <tt>"sjis"</tt> and represents a coding
3250 system for the CES Shift-JIS. */
3252 @brief SJIS ¥³¡¼¥É·Ï¤Î¥·¥ó¥Ü¥ë
3254 ¥·¥ó¥Ü¥ë #Mcoding_sjis has ¤Ï <tt>"sjis"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Á¡¢
3255 CES Shift-JISÍѤΥ³¡¼¥É·Ï¤ò¼¨¤¹¡£ */
3257 MSymbol Mcoding_sjis;
3262 @name Variables: Parameter keys for mconv_define_coding (). */
3264 @name ÊÑ¿ô: mconv_define_coding () Íѥѥé¥á¡¼¥¿¥¡¼ */
3269 Parameter key for mconv_define_coding () (which see). */
3271 mconv_define_coding () Íѥѥé¥á¡¼¥¿¥¡¼ (¾ÜºÙ¤Ï mconv_define_coding ()»²¾È). */
3277 MSymbol Mdesignation;
3278 MSymbol Minvocation;
3281 MSymbol Mlittle_endian;
3286 @name Variables: Symbols representing coding system type. */
3288 @name ÊÑ¿ô¡§ ¥³¡¼¥É·Ï¤Î¥¿¥¤¥×¤ò¼¨¤¹¥·¥ó¥Ü¥ë. */
3293 Symbol that can be a value of the #Mtype parameter of a coding
3294 system used in an argument to the mconv_define_coding () function
3297 ´Ø¿ô mconv_define_coding () ¤Î°ú¿ô¤È¤·¤ÆÍѤ¤¤é¤ì¤ë¡¢¥³¡¼¥É·Ï¤Î
3298 ¥Ñ¥é¥á¡¼¥¿ #Mtype ¤ÎÃͤȤʤêÆÀ¤ë¥·¥ó¥Ü¥ë¡£(¾ÜºÙ¤Ï
3299 mconv_define_coding ()»²¾È)¡£ */
3309 @name Variables: Symbols appearing in the value of #Mflags parameter. */
3311 @name ÊÑ¿ô¡§ ¥Ñ¥é¥á¡¼¥¿ #Mflags ¤ÎÃͤȤʤêÆÀ¤ë¥·¥ó¥Ü¥ë. */
3316 Symbol that can be a value of the #Mflags parameter of a coding
3317 system used in an argument to the mconv_define_coding () function
3320 ´Ø¿ô mconv_define_coding () ¤Î°ú¿ô¤È¤·¤ÆÍѤ¤¤é¤ì¤ë¡¢¥³¡¼¥É·Ï¤Î
3321 ¥Ñ¥é¥á¡¼¥¿ #Mflags ¤ÎÃͤȤʤêÆÀ¤ë¥·¥ó¥Ü¥ë¡£(¾ÜºÙ¤Ï
3322 mconv_define_coding ()»²¾È)¡£ */
3323 MSymbol Mreset_at_eol;
3325 MSymbol Mreset_at_cntl;
3328 MSymbol Mdesignation_g0;
3329 MSymbol Mdesignation_g1;
3330 MSymbol Mdesignation_ctext;
3331 MSymbol Mdesignation_ctext_ext;
3332 MSymbol Mlocking_shift;
3333 MSymbol Msingle_shift;
3334 MSymbol Msingle_shift_7;
3335 MSymbol Meuc_tw_shift;
3337 MSymbol Mrevision_number;
3338 MSymbol Mfull_support;
3343 @name Variables: etc
3345 Remaining variables. */
3346 /***ja @name ÊÑ¿ô: ¤½¤Î¾
3352 @brief Symbol whose name is "maybe".
3354 The variable #Mmaybe is a symbol of name <tt>"maybe"</tt>. It is
3355 used a value of #Mbom parameter of the function
3356 mconv_define_coding () (which see). */
3358 @brief "maybe"¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¥·¥ó¥Ü¥ë
3360 ÊÑ¿ô #Mmaybe ¤Ï <tt>"maybe"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¡£¤³¤ì¤Ï´Ø¿ô
3361 mconv_define_coding () ¥Ñ¥é¥á¡¼¥¿ #Mbom ¤ÎÃͤȤ·¤ÆÍѤ¤¤é¤ì¤ë¡£(¾Ü
3362 ºÙ¤Ï mconv_define_coding () »²¾È)¡£ */
3368 @brief The symbol @c Mcoding
3370 Any decoded M-text has a text property whose key is the predefined
3371 symbol @c Mcoding. The name of @c Mcoding is
3372 <tt>"coding"</tt>. */
3375 @brief ¥·¥ó¥Ü¥ë @c Mcoding
3377 ¥Ç¥³¡¼¥É¤µ¤ì¤¿ M-text ¤Ï¤¹¤Ù¤Æ¡¢¥¡¼¤¬ÄêµÁºÑ¤ß¥·¥ó¥Ü¥ë @c Mcoding
3378 ¤Ç¤¢¤ë¤è¤¦¤Ê¥Æ¥¥¹¥È¥×¥í¥Ñ¥Æ¥£¤ò»ý¤Ä¡£¥·¥ó¥Ü¥ë @c Mcoding ¤Ï
3379 <tt>"coding"</tt> ¤È¤¤¤¦Ì¾Á°¤ò»ý¤Ä¡£ */
3387 @brief Define a coding system
3389 The mconv_define_coding () function defines a new coding system
3390 and makes it accessive via a symbol whose name is $NAME. $PLIST
3391 specifies parameters of the coding system as below:
3395 <li> Key is @c Mtype, value is a symbol
3397 The value specifies the type of the coding system. It must be
3398 #Mcharset, #Mutf, #Miso_2022, or #Mnil.
3400 If the type is #Mcharset, $EXTRA_INFO is ignored.
3402 If the type is #Mutf, $EXTRA_INFO must be a pointer to
3405 If the type is #Miso_2022, $EXTRA_INFO must be a pointer to
3406 #MCodingInfoISO2022.
3408 If the type is #Mnil, the argument $RESETTER, $DECODER, and
3409 $ENCODER must be supplied. $EXTRA_INFO is ignored. Otherwise,
3410 they can be @c NULL and the m17n library provides proper defaults.
3412 <li> Key is #Mcharsets, value is a plist
3414 The value specifies a list charsets supported by the coding
3415 system. The keys of the plist must be #Msymbol, and the values
3416 must be symbols representing charsets.
3418 <li> Key is #Mflags, value is a plist
3420 If the type is #Miso_2022, the values specifies flags to control
3421 the ISO 2022 interpreter. The keys of the plist must e #Msymbol,
3422 and values must be one of the following.
3428 If this flag exists, designation and invocation status is reset to
3429 the initial state at the end of line.
3431 <li> #Mreset_at_cntl
3433 If this flag exists, designation and invocation status is reset to
3434 the initial state at a control character.
3438 If this flag exists, the graphic plane right is used.
3442 If this flag exists, the over-long escape sequences (ESC '$' '('
3443 <final_byte>) are used for designating the CCS JISX0208.1978,
3444 GB2312, and JISX0208.
3446 <li> #Mdesignation_g0
3448 If this flag and #Mfull_support exists, designates charsets not
3449 listed in the charset list to the graphic register G0.
3451 <li> #Mdesignation_g1
3453 If this flag and #Mfull_support exists, designates charsets not
3454 listed in the charset list to the graphic register G1.
3456 <li> #Mdesignation_ctext
3458 If this flag and #Mfull_support exists, designates charsets not
3459 listed in the charset list to a graphic register G0 or G1 based on
3460 the criteria of the Compound Text.
3462 <li> #Mdesignation_ctext_ext
3464 If this flag and #Mfull_support exists, designates charsets not
3465 listed in the charset list to a graphic register G0 or G1, or use
3466 extended segment for such charsets based on the criteria of the
3469 <li> #Mlocking_shift
3471 If this flag exists, use locking shift.
3475 If this flag exists, use single shift.
3477 <li> #Msingle_shift_7
3479 If this flag exists, use 7-bit single shift code (0x19).
3481 <li> #Meuc_tw_shift;
3483 If this flag exists, use a special shifting according to EUC-TW.
3487 This flag is currently ignored.
3489 <li> #Mrevision_number
3491 If this flag exists, use a revision number escape sequence to
3492 designate a charset that has a revision number.
3496 If this flag exists, support all charsets registered in the
3497 International Registry.
3501 <li> Key is #Mdesignation, value is a plist
3503 If the type is #Miso_2022, the value specifies how to designate
3504 each supported characters. The keys of the plist must be
3505 #Minteger, and the values must be numbers indicating a graphic
3506 registers. The Nth element value is for the Nth charset of the
3507 charset list. The value 0..3 means that it is assumed that a
3508 charset is already designated to the graphic register 0..3. The
3509 negative value G (-4..-1) means that a charset is not designated
3510 to any register at first, and if necessary, is designated to the
3511 (G+4) graphic register.
3513 <li> Key is #Minvocation, value is a plist
3515 If the type is #Miso_2022, the value specifies how to invocate
3516 each graphic registers. The plist length must be one or two. The
3517 keys of the plist must be #Minteger, and the values must be
3518 numbers indicating a graphic register. The value of the first
3519 element specifies which graphic register is invocated to the
3520 graphic plane left. If the length is one, no graphic register is
3521 invocated to the graphic plane right. Otherwise, the value of the
3522 second element specifies which graphic register is invocated to
3523 the graphic plane right.
3525 <li> Key is #Mcode_unit, value is an integer
3527 If the type is #Mutf, the value specifies the bit length of a
3528 code-unit. It must be 8, 16, or 32.
3530 <li> Key is #Mbom, value is a symbol
3532 If the type is #Mutf and the code-unit bit length is 16 or 32,
3533 it specifies whether or not to use BOM (Byte Order Mark). If the
3534 value is #Mnil (default), BOM is not used, else if the value is
3535 #Mmaybe, the existence of BOM is detected at decoding time, else
3538 <li> Key is #Mlittle_endian, value is a symbol
3540 If the type is #Mutf and the code-unit bit length is 16 or 32,
3541 it specifies whether or not the encoding is little endian. If the
3542 value is #Mnil (default), it is big endian, else it is little
3547 $RESETTER is a pointer to a function that resets a converter for
3548 the coding system to the initial status. The pointed function is
3549 called with one argument, a pointer to a converter object.
3551 $DECODER is a pointer to a function that decodes a byte sequence
3552 according to the coding system. The pointed function is called
3553 with four arguments:
3555 @li A pointer to the byte sequence to decode.
3556 @li The number of bytes to decode.
3557 @li A pointer to an M-text to which the decoded characters are appended.
3558 @li A pointer to a converter object.
3560 $DECODER must return 0 if it succeeds. Otherwise it must return -1.
3562 $ENCODER is a pointer to a function that encodes an M-text
3563 according to the coding system. The pointed function is called
3566 @li A pointer to the M-text to encode.
3567 @li The starting position of the encoding.
3568 @li The ending position of the encoding.
3569 @li A pointer to a memory area where the produced bytes are stored.
3570 @li The size of the memory area.
3571 @li A pointer to a converter object.
3573 $ENCODER must return 0 if it succeeds. Otherwise it must return -1.
3575 $EXTRA_INFO is a pointer to a data structure that contains extra
3576 information about the coding system. The type of the data
3577 structure depends on $TYPE.
3581 If the operation was successful, mconv_define_coding () returns a
3582 symbol whose name is $NAME. If an error is detected, it returns
3583 #Mnil and assigns an error code to the external variable #merror_code. */
3586 @brief ¥³¡¼¥É·Ï¤òÄêµÁ¤¹¤ë.
3588 ´Ø¿ô mconv_define_coding () ¤Ï¡¢¿·¤·¤¤¥³¡¼¥É·Ï¤òÄêµÁ¤·¡¢¤½¤ì¤ò
3589 $NAME ¤È¤¤¤¦Ì¾Á°¤Î¥·¥ó¥Ü¥ë·Ðͳ¤Ç¥¢¥¯¥»¥¹¤Ç¤¤ë¤è¤¦¤Ë¤¹¤ë¡£ $PLIST
3590 ¤Ç¤ÏÄêµÁ¤¹¤ë¥³¡¼¥É·Ï¤Î¥Ñ¥é¥á¡¼¥¿¤ò°Ê²¼¤Î¤è¤¦¤Ë»ØÄꤹ¤ë¡£
3594 <li> ¥¡¼¤¬ @c Mtype ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
3596 Ãͤϥ³¡¼¥É·Ï¤Î¥¿¥¤¥×¤òɽ¤·¡¢#Mcharset, #Mutf, #Miso_2022, #Mnil ¤Î
3597 ¤¤¤º¤ì¤«¤Ç¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
3599 ¥¿¥¤¥×¤¬ #Mcharset ¤Ê¤é¤Ð $EXTRA_INFO ¤Ï̵»ë¤µ¤ì¤ë¡£
3601 ¥¿¥¤¥×¤¬ #Mutf ¤Ê¤é¤Ð $EXTRA_INFO ¤Ï#MCodingInfoUTF ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç
3604 ¥¿¥¤¥×¤¬ #Miso_2022¤Ê¤é¤Ð $EXTRA_INFO ¤Ï#MCodingInfoISO2022 ¤Ø¤Î¥Ý
3605 ¥¤¥ó¥¿¤Ç¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
3607 ¥¿¥¤¥×¤¬ #Mnil ¤Ê¤é¤Ð¡¢°ú¿ô $RESETTER, $DECODER, $ENCODER ¤òÍ¿¤¨¤Ê
3608 ¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£$EXTRA_INFO ¤Ï̵»ë¤µ¤ì¤ë¡£¤½¤ì°Ê³°¤Î¾ì¹ç¤Ë¤Ï¤³¤ì¤é
3609 ¤Ï @c NULL ¤Ç¹½¤ï¤Ê¤¤¡£¤½¤ÎºÝ¤Ë¤Ï m17n ¥é¥¤¥Ö¥é¥ê¤¬Å¬Àڤʥǥե©¥ë
3612 <li> ¥¡¼¤¬ #Mcharsets ¤ÇÃͤ¬ plist ¤Î»þ
3614 ÃͤϤ³¤Î¥³¡¼¥É·Ï¤Ç¥µ¥Ý¡¼¥È¤µ¤ì¤ëʸ»ú¥»¥Ã¥È¤Î¥ê¥¹¥È¤Ç¤¢¤ë¡£plist¤Î¥¡¼¤Ï
3615 #Msymbol¡¢ÃͤÏʸ»ú¥»¥Ã¥È¤ò¼¨¤¹¥·¥ó¥Ü¥ë¤Ç¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
3617 <li> ¥¡¼¤¬ #Mflags Ãͤ¬ plist ¤Î»þ
3619 ¥¿¥¤¥×¤¬ #Miso_2022 ¤Ê¤é¤Ð¡¢¤³¤ÎÃͤÏ, ISO 2022 ¥¤¥ó¥¿¥×¥ê¥¿ÍѤÎÀ©
3620 ¸æ¥Õ¥é¥Ã¥°¤ò¼¨¤¹¡£plist¤Î¥¡¼¤Ï#Msymbol¡¢Ãͤϰʲ¼¤Î¤¤¤º¤ì¤«¤Ç¤Ê¤¯
3627 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¿Þ·Áʸ»ú½¸¹ç¤Î»Ø¼¨¤ä¸Æ½Ð¤Ï¹ÔËö¤Ç¥ê¥»¥Ã¥È¤µ¤ì¤Æ
3630 <li> #Mreset_at_cntl
3632 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¿Þ·Áʸ»ú½¸¹ç¤Î»Ø¼¨¤ä¸Æ½Ð¤ÏÀ©¸æʸ»ú¤Ë½Ð²ñ¤Ã¤¿»þ
3633 ÅÀ¤Ç¥ê¥»¥Ã¥È¤µ¤ì¤ÆÅö½é¤Î¾õÂÖ¤ËÌá¤ë¡£
3637 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¿Þ·Áʸ»ú½¸¹ç¤Î±¦È¾Ì̤¬ÍѤ¤¤é¤ì¤ë¡£
3641 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢Ê¸»ú½¸¹ç JISX0208.1978, GB2312, JISX0208 ¤ò»Ø
3642 ¼¨¤¹¤ëºÝ¤Ë over-long ¥¨¥¹¥±¡¼¥×¥·¡¼¥±¥ó¥¹ (ESC '$' '('
3643 <final_byte>) ¤¬ÍѤ¤¤é¤ì¤ë¡£
3645 <li> #Mdesignation_g0
3647 ¤³¤Î¥Õ¥é¥°¤È #Mfull_support ¤¬¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Ë¸½¤ï¤ì¤Ê¤¤
3648 ʸ»ú¥»¥Ã¥È¤ò G0 ½¸¹ç¤Ë»Ø¼¨¤¹¤ë¡£
3650 <li> #Mdesignation_g1
3652 ¤³¤Î¥Õ¥é¥°¤È #Mfull_support ¤¬¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Ë¸½¤ï¤ì¤Ê¤¤
3653 ʸ»ú¥»¥Ã¥È¤ò G1 ½¸¹ç¤Ë»Ø¼¨¤¹¤ë¡£
3655 <li> #Mdesignation_ctext
3657 ¤³¤Î¥Õ¥é¥°¤È #Mfull_support ¤¬¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Ë¸½¤ï¤ì¤Ê¤¤
3658 ʸ»ú¥»¥Ã¥È¤ò G0 ½¸¹ç¤Þ¤¿¤Ï G1 ½¸¹ç¤Ë¡¢¥³¥ó¥Ñ¥¦¥ó¥É¥Æ¥¥¹¥È¤Î´ð½à¤Ë
3661 <li> #Mdesignation_ctext_ext
3663 ¤³¤Î¥Õ¥é¥°¤È #Mfull_support ¤¬¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Ë¸½¤ï¤ì¤Ê¤¤
3664 ʸ»ú¥»¥Ã¥È¤ò G0 ½¸¹ç¤Þ¤¿¤Ï G1 ½¸¹ç¤Ë¡¢¤¢¤ë¤¤¤Ï³ÈÄ¥¥»¥°¥á¥ó¥È¤Ë¥³¥ó
3665 ¥Ñ¥¦¥ó¥É¥Æ¥¥¹¥È¤Î´ð½à¤Ë¤½¤Ã¤Æ»Ø¼¨¤¹¤ë¡£
3667 <li> #Mlocking_shift
3669 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¥í¥Ã¥¥ó¥°¥·¥Õ¥È¤òÍѤ¤¤ë¡£
3673 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢¥·¥ó¥°¥ë¥·¥Õ¥È¤òÍѤ¤¤ë¡£
3675 <li> #Msingle_shift_7
3677 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢7-bit ¥·¥ó¥°¥ë¥·¥Õ¥È¥³¡¼¥É (0x19) ¤òÍѤ¤¤ë¡£
3679 <li> #Meuc_tw_shift;
3681 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢EUC-TW ¤Ë±è¤Ã¤¿ÆÃÊ̤ʥ·¥Õ¥È¤òÍѤ¤¤ë¡£
3685 ¸½¾õ¤Ç¤ÏÍѤ¤¤é¤ì¤Æ¤¤¤Ê¤¤¡£
3687 <li> #Mrevision_number
3689 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢revision number ¤ò»ý¤Äʸ»ú¥»¥Ã¥È¤ò»Ø¼¨¤¹¤ëºÝ¤Ë
3690 revision number ¥¨¥¹¥±¡¼¥×¥·¡¼¥¯¥¨¥ó¥¹¤òÍѤ¤¤ë¡£
3694 ¤³¤Î¥Õ¥é¥°¤¬¤¢¤ì¤Ð¡¢the International Registry ¤ËÅÐÏ¿¤µ¤ì¤Æ¤¤¤ëÁ´
3695 ʸ»ú¥»¥Ã¥È¤ò¥µ¥Ý¡¼¥È¤¹¤ë¡£
3699 <li> ¥¡¼¤¬ #Mdesignation¤ÇÃͤ¬ plist ¤Î»þ
3701 ¥¿¥¤¥×¤¬ #Miso_2022 ¤Ê¤é¤Ð¡¢ÃͤϳÆʸ»ú¤ò¤É¤Î¤è¤¦¤Ë»Ø¼¨¤¹¤ë¤«¤ò¼¨¤¹¡£
3702 plist ¤Î¥¡¼¤Ï #Minteger¡¢ÃͤϽ¸¹ç¡Êgraphic register¡Ë¤ò¼¨¤¹¿ô»ú¤Ç
3703 ¤Ê¤¯¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£NÈÖÌܤÎÍ×ÁǤÎÃͤϡ¢Ê¸»ú¥»¥Ã¥È¥ê¥¹¥È¤Î N ÈÖÌÜ ¤Î
3704 ʸ»ú¥»¥Ã¥È¤ËÂбþ¤¹¤ë¡£Ãͤ¬ 0..3 ¤Ç¤¢¤ì¤Ð¡¢Ê¸»ú¥»¥Ã¥È¤¬¤¹¤Ç¤Ë
3705 G0..G3 ¤Ë»Ø¼¨ ¤µ¤ì¤Æ¤¤¤ë¡£
3707 Ãͤ¬Éé(-4..-1) ¤Ç¤¢¤ì¤Ð¡¢½é´ü¾õÂ֤ǤÏʸ»ú¥»¥Ã¥È¤¬¤É¤³¤Ë¤â»Ø¼¨¤µ¤ì
3708 ¤Æ¤¤¤Ê¤¤¤³¤È¡¢É¬ÍפʺݤˤÏG0..G3 ¤Î¤½¤ì¤¾¤ì¤Ë»Ø¼¨¤¹¤ë¤³¤È¤ò°ÕÌ£¤¹
3711 <li> ¥¡¼¤¬ #Minvocation¤ÇÃͤ¬ plist ¤Î»þ
3713 ¥¿¥¤¥×¤¬ #Miso_2022 ¤Ê¤é¤Ð¡¢Ãͤϳƽ¸¹ç¤ò¤É¤Î¤è¤¦¤Ë¸Æ¤Ó½Ð¤¹¤«¤ò¼¨¤¹¡£
3714 plist ¤ÎŤµ¤Ï 1 ¤Ê¤¤¤· 2 ¤Ç¤¢¤ë¡£plist ¤Î¥¡¼¤Ï#Minteger¡¢ÃͤϽ¸
3715 ¹ç¡Êgraphic register)¤ò¼¨¤¹¿ô»ú¤Ç¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£ºÇ½é¤ÎÍ×ÁǤÎÃÍ
3716 ¤¬¿Þ·Áʸ»ú½¸¹çº¸È¾Ì̤˸ƤӽФµ¤ì¤ë½¸¹ç¤Ç¤¢¤ë¡£ plist ¤ÎŤµ¤¬ 1 ¤Ê
3717 ¤é¤Ð¡¢±¦È¾Ì̤ˤϲ¿¤â¸Æ¤Ó½Ð¤µ¤ì¤Ê¤¤¡£¤½¤¦¤Ç¤±¤ì¤Ð¡¢£²¤Ä¤á¤ÎÍ×ÁǤÎÃÍ
3718 ¤¬¿Þ·Áʸ»ú½¸¹ç±¦È¾Ì̤˸ƤӽФµ¤ì¤ë½¸¹ç¤È¤Ê¤ë¡£
3720 <li> ¥¡¼¤¬ #Mcode_unit ¤ÇÃͤ¬À°¿ôÃͤλþ
3722 ¥¿¥¤¥×¤¬ #Mutf ¤Ê¤é¤Ð¡¢Ãͤϥ³¡¼¥É¥æ¥Ë¥Ã¥È¤Î¥Ó¥Ã¥ÈŤǤ¢¤ê¡¢8, 16,
3723 32 ¤Î¤¤¤º¤ì¤«¤Ç¤¢¤ë¡£
3725 <li> ¥¡¼¤¬ #Mbom ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
3727 ¥¿¥¤¥×¤¬ #Mutf ¤Ç¥³¡¼¥É¥æ¥Ë¥Ã¥È¤Î¥Ó¥Ã¥ÈŤ¬ 16 ¤« 32¤Ê¤é¤Ð¡¢ÃͤÏ
3728 BOM (Byte Order Mark) ¤ò»ÈÍѤ¹¤ë¤«¤É¤¦¤«¤ò¼¨¤¹¡£Ãͤ¬¥Ç¥Õ¥©¥ë¥ÈÃͤÎ
3729 #Mnil ¤Ê¤é¤Ð¡¢»ÈÍѤ·¤Ê¤¤¡£Ãͤ¬#Mmaybe ¤Ê¤é¤Ð¥Ç¥³¡¼¥É»þ¤Ë BOM ¤¬¤¢
3730 ¤ë¤«¤É¤¦¤«¤òÄ´¤Ù¤ë¡£¤½¤ì°Ê³°¤Ê¤é¤Ð»ÈÍѤ¹¤ë¡£
3732 <li> ¥¡¼¤¬ #Mlittle_endian ¤ÇÃͤ¬¥·¥ó¥Ü¥ë¤Î»þ
3734 ¥¿¥¤¥×¤¬ #Mutf ¤Ç¥³¡¼¥É¥æ¥Ë¥Ã¥È¤Î¥Ó¥Ã¥ÈŤ¬ 16 ¤« 32¤Ê¤é¤Ð¡¢Ãͤϥ¨
3735 ¥ó¥³¡¼¥É¤¬ little endian ¤«¤É¤¦¤«¤ò¼¨¤¹¡£Ãͤ¬¥Ç¥Õ¥©¥ë¥ÈÃͤÎ#Mnil
3736 ¤Ê¤é¤Ð big endian ¤Ç¤¢¤ê¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð little endian ¤Ç¤¢¤ë¡£
3740 $RESETTER ¤Ï¤³¤Î¥³¡¼¥É·ÏÍѤΥ³¥ó¥Ð¡¼¥¿¤ò½é´ü¾õÂ֤˥ꥻ¥Ã¥È¤¹¤ë´Ø¿ô
3741 ¤Ø¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¡£¤³¤Î´Ø¿ô¤Ï¥³¥ó¥Ð¡¼¥¿¥ª¥Ö¥¸¥§¥¯¥È¤Ø¤Î¥Ý¥¤¥ó¥¿¤È
3744 $DECODER ¤Ï¥Ð¥¤¥ÈÎó¤ò¤³¤Î¥³¡¼¥É·Ï¤Ë½¾¤Ã¤Æ¥Ç¥³¡¼¥É¤¹¤ë´Ø¿ô¤Ø¤Î¥Ý¥¤
3745 ¥ó¥¿¤Ç¤¢¤ë¡£¤³¤Î´Ø¿ô¤Ï°Ê²¼¤Î4°ú¿ô¤ò¤È¤ë¡£
3747 @li ¥Ð¥¤¥ÈÎó¤Ø¤Î¥Ý¥¤¥ó¥¿
3748 @li ¥Ç¥³¡¼¥É¤¹¤Ù¤¥Ð¥¤¥È¿ô
3749 @li ¥Ç¥³¡¼¥É·ë²Ì¤Îʸ»ú¤òÉղ乤ë M-text ¤Ø¤Î¥Ý¥¤¥ó¥¿
3750 @li ¥³¥ó¥Ð¡¼¥¿¥ª¥Ö¥¸¥§¥¯¥È¤Ø¤Î¥Ý¥¤¥ó¥¿
3752 $DECODER ¤ÏÀ®¸ù¤·¤¿¤È¤¤Ë¤Ï0¤ò¡¢¼ºÇÔ¤·¤¿¤È¤¤Ë¤Ï-1¤òÊÖ¤µ¤Ê¤¯¤Æ¤Ï¤Ê
3755 $ENCODER ¤Ï M-text ¤ò¤³¤Î¥³¡¼¥É·Ï¤Ë½¾¤Ã¤Æ¥¨¥ó¥³¡¼¥É¤¹¤ë´Ø¿ô¤Ø¤Î¥Ý
3756 ¥¤¥ó¥¿¤Ç¤¢¤ë¡£¤³¤Î´Ø¿ô¤Ï°Ê²¼¤Î6°ú¿ô¤ò¤È¤ë¡£
3758 @li M-text ¤Ø¤Î¥Ý¥¤¥ó¥¿
3759 @li M-text ¤Î¥¨¥ó¥³¡¼¥É³«»Ï°ÌÃÖ
3760 @li M-text ¤Î¥¨¥ó¥³¡¼¥É½ªÎ»°ÌÃÖ
3761 @li À¸À®¤·¤¿¥Ð¥¤¥È¤òÊÝ»ý¤¹¤ë¥á¥â¥êÎΰè¤Ø¤Î¥Ý¥¤¥ó¥¿
3762 @li ¥á¥â¥êÎΰè¤Î¥µ¥¤¥º
3763 @li ¥³¥ó¥Ð¡¼¥¿¥ª¥Ö¥¸¥§¥¯¥È¤Ø¤Î¥Ý¥¤¥ó¥¿
3765 $ENCODER ¤ÏÀ®¸ù¤·¤¿¤È¤¤Ë¤Ï0¤ò¡¢¼ºÇÔ¤·¤¿¤È¤¤Ë¤Ï-1¤òÊÖ¤µ¤Ê¤¯¤Æ¤Ï¤Ê
3768 $EXTRA_INFO ¤Ï¥³¡¼¥Ç¥£¥°¥·¥¹¥Æ¥à¤Ë´Ø¤¹¤ëÄɲþðÊó¤ò´Þ¤à¥Ç¡¼¥¿¹½Â¤¤Ø
3769 ¤Î¥Ý¥¤¥ó¥¿¤Ç¤¢¤ë¡£¤³¤Î¥Ç¡¼¥¿¹½Â¤¤Î¥¿¥¤¥×¤Ï $TYPE ¤Ë°Í¸¤¹¤ë¡£
3773 ½èÍý¤ËÀ®¸ù¤¹¤ì¤Ð mconv_define_coding () ¤Ï $NAME ¤È¤¤¤¦Ì¾Á°¤Î¥·
3774 ¥ó¥Ü¥ë¤òÊÖ¤¹¡£ ¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì
3775 ¤¿¾ì¹ç¤Ï #Mnil ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
3783 mconv_define_coding (char *name, MPlist *plist,
3784 int (*resetter) (MConverter *),
3785 int (*decoder) (unsigned char *, int, MText *,
3787 int (*encoder) (MText *, int, int,
3788 unsigned char *, int,
3792 MSymbol sym = msymbol (name);
3794 MCodingSystem *coding;
3797 MSTRUCT_MALLOC (coding, MERROR_CODING);
3799 if ((coding->type = (MSymbol) mplist_get (plist, Mtype)) == Mnil)
3800 coding->type = Mcharset;
3801 pl = (MPlist *) mplist_get (plist, Mcharsets);
3803 MERROR (MERROR_CODING, Mnil);
3804 coding->ncharsets = mplist_length (pl);
3805 if (coding->ncharsets > NUM_SUPPORTED_CHARSETS)
3806 coding->ncharsets = NUM_SUPPORTED_CHARSETS;
3807 for (i = 0; i < coding->ncharsets; i++, pl = MPLIST_NEXT (pl))
3809 MSymbol charset_name;
3811 if (MPLIST_KEY (pl) != Msymbol)
3812 MERROR (MERROR_CODING, Mnil);
3813 charset_name = MPLIST_SYMBOL (pl);
3814 if (! (coding->charsets[i] = MCHARSET (charset_name)))
3815 MERROR (MERROR_CODING, Mnil);
3818 coding->resetter = resetter;
3819 coding->decoder = decoder;
3820 coding->encoder = encoder;
3821 coding->ascii_compatible = 0;
3822 coding->extra_info = extra_info;
3823 coding->extra_spec = NULL;
3826 if (coding->type == Mcharset)
3828 if (! coding->resetter)
3829 coding->resetter = reset_coding_charset;
3830 if (! coding->decoder)
3831 coding->decoder = decode_coding_charset;
3832 if (! coding->encoder)
3833 coding->encoder = encode_coding_charset;
3835 else if (coding->type == Mutf)
3837 MCodingInfoUTF *info = malloc (sizeof (MCodingInfoUTF));
3840 if (! coding->resetter)
3841 coding->resetter = reset_coding_utf;
3843 info->code_unit_bits = (int) mplist_get (plist, Mcode_unit);
3844 if (info->code_unit_bits == 8)
3846 if (! coding->decoder)
3847 coding->decoder = decode_coding_utf_8;
3848 if (! coding->encoder)
3849 coding->encoder = encode_coding_utf_8;
3851 else if (info->code_unit_bits == 16)
3853 if (! coding->decoder)
3854 coding->decoder = decode_coding_utf_16;
3855 if (! coding->encoder)
3856 coding->encoder = encode_coding_utf_16;
3858 else if (info->code_unit_bits == 32)
3860 if (! coding->decoder)
3861 coding->decoder = decode_coding_utf_32;
3862 if (! coding->encoder)
3863 coding->encoder = encode_coding_utf_32;
3866 MERROR (MERROR_CODING, Mnil);
3867 val = (MSymbol) mplist_get (plist, Mbom);
3870 else if (val == Mmaybe)
3875 info->endian = (mplist_get (plist, Mlittle_endian) ? 1 : 0);
3876 coding->extra_info = info;
3878 else if (coding->type == Miso_2022)
3880 MCodingInfoISO2022 *info = malloc (sizeof (MCodingInfoISO2022));
3882 if (! coding->resetter)
3883 coding->resetter = reset_coding_iso_2022;
3884 if (! coding->decoder)
3885 coding->decoder = decode_coding_iso_2022;
3886 if (! coding->encoder)
3887 coding->encoder = encode_coding_iso_2022;
3889 info->initial_invocation[0] = 0;
3890 info->initial_invocation[1] = -1;
3891 pl = (MPlist *) mplist_get (plist, Minvocation);
3894 if (MPLIST_KEY (pl) != Minteger)
3895 MERROR (MERROR_CODING, Mnil);
3896 info->initial_invocation[0] = MPLIST_INTEGER (pl);
3897 if (! MPLIST_TAIL_P (pl))
3899 pl = MPLIST_NEXT (pl);
3900 if (MPLIST_KEY (pl) != Minteger)
3901 MERROR (MERROR_CODING, Mnil);
3902 info->initial_invocation[1] = MPLIST_INTEGER (pl);
3905 memset (info->designations, 0, sizeof (info->designations));
3906 for (i = 0, pl = (MPlist *) mplist_get (plist, Mdesignation);
3907 i < 32 && pl && MPLIST_KEY (pl) == Minteger;
3908 i++, pl = MPLIST_NEXT (pl))
3909 info->designations[i] = MPLIST_INTEGER (pl);
3912 MPLIST_DO (pl, (MPlist *) mplist_get (plist, Mflags))
3916 if (MPLIST_KEY (pl) != Msymbol)
3917 MERROR (MERROR_CODING, Mnil);
3918 val = MPLIST_SYMBOL (pl);
3919 if (val == Mreset_at_eol)
3920 info->flags |= MCODING_ISO_RESET_AT_EOL;
3921 else if (val == Mreset_at_cntl)
3922 info->flags |= MCODING_ISO_RESET_AT_CNTL;
3923 else if (val == Meight_bit)
3924 info->flags |= MCODING_ISO_EIGHT_BIT;
3925 else if (val == Mlong_form)
3926 info->flags |= MCODING_ISO_LOCKING_SHIFT;
3927 else if (val == Mdesignation_g0)
3928 info->flags |= MCODING_ISO_DESIGNATION_G0;
3929 else if (val == Mdesignation_g1)
3930 info->flags |= MCODING_ISO_DESIGNATION_G1;
3931 else if (val == Mdesignation_ctext)
3932 info->flags |= MCODING_ISO_DESIGNATION_CTEXT;
3933 else if (val == Mdesignation_ctext_ext)
3934 info->flags |= MCODING_ISO_DESIGNATION_CTEXT_EXT;
3935 else if (val == Mlocking_shift)
3936 info->flags |= MCODING_ISO_LOCKING_SHIFT;
3937 else if (val == Msingle_shift)
3938 info->flags |= MCODING_ISO_SINGLE_SHIFT;
3939 else if (val == Msingle_shift_7)
3940 info->flags |= MCODING_ISO_SINGLE_SHIFT_7;
3941 else if (val == Meuc_tw_shift)
3942 info->flags |= MCODING_ISO_EUC_TW_SHIFT;
3943 else if (val == Miso_6429)
3944 info->flags |= MCODING_ISO_ISO6429;
3945 else if (val == Mrevision_number)
3946 info->flags |= MCODING_ISO_REVISION_NUMBER;
3947 else if (val == Mfull_support)
3948 info->flags |= MCODING_ISO_FULL_SUPPORT;
3951 coding->extra_info = info;
3955 if (! coding->decoder || ! coding->encoder)
3956 MERROR (MERROR_CODING, Mnil);
3957 if (! coding->resetter)
3961 msymbol_put (sym, Mcoding, coding);
3962 msymbol_put (msymbol__canonicalize (sym), Mcoding, coding);
3963 plist = (MPlist *) mplist_get (plist, Maliases);
3966 MPLIST_DO (pl, plist)
3970 if (MPLIST_KEY (pl) != Msymbol)
3972 alias = MPLIST_SYMBOL (pl);
3973 msymbol_put (alias, Mcoding, coding);
3974 msymbol_put (msymbol__canonicalize (alias), Mcoding, coding);
3978 MLIST_APPEND1 (&coding_list, codings, coding, MERROR_CODING);
3986 @brief Resolve coding system name.
3988 The mconv_resolve_coding () function returns $SYMBOL if it
3989 represents a coding system. Otherwise, canonicalize $SYMBOL as to
3990 a coding system name, and if the canonicalized name represents a
3991 coding system, return it. Otherwise, return #Mnil. */
3993 @brief ¥³¡¼¥É·Ï¤Î̾Á°¤ò²ò·è¤¹¤ë.
3995 ´Ø¿ô mconv_resolve_coding () ¤Ï $SYMBOL ¤¬¥³¡¼¥É·Ï¤ò¼¨¤·¤Æ¤¤¤ì¤Ð¤½
3996 ¤ì¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð¥³¡¼¥É·Ï¤Î̾Á°¤È¤·¤Æ $SYMBOL ¤òÀµµ¬²½¤·¡¢
3997 ¤½¤ì¤¬¥³¡¼¥É·Ï¤ò¼¨¤·¤Æ¤¤¤ì¤ÐÀµµ¬²½¤·¤¿Ì¾Á°¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð
4003 mconv_resolve_coding (MSymbol symbol)
4005 MCodingSystem *coding = find_coding (symbol);
4009 symbol = msymbol__canonicalize (symbol);
4010 coding = find_coding (symbol);
4012 return (coding ? coding->name : Mnil);
4019 @brief List symbols representing a coding system.
4021 The mconv_list_codings () function makes an array of symbols
4022 representing a coding system, stores the pointer to the array in a
4023 place pointed to by $SYMBOLS, and returns the length of the array. */
4025 @brief ¥³¡¼¥É·Ï¤òɽ¤ï¤¹¥·¥ó¥Ü¥ë¤òÎóµó¤¹¤ë
4027 ´Ø¿ô mchar_list_codings () ¤Ï¡¢¥³¡¼¥É·Ï¤ò¼¨¤¹¥·¥ó¥Ü¥ë¤òʤ٤¿ÇÛÎó
4028 ¤òºî¤ê¡¢$SYMBOLS ¤Ç¥Ý¥¤¥ó¥È¤µ¤ì¤¿¾ì½ê¤Ë¤³¤ÎÇÛÎó¤Ø¤Î¥Ý¥¤¥ó¥¿¤òÃÖ¤¡¢
4029 ÇÛÎó¤ÎŤµ¤òÊÖ¤¹¡£ */
4032 mconv_list_codings (MSymbol **symbols)
4034 int i = coding_list.used + mplist_length (coding_definition_list);
4038 MTABLE_MALLOC ((*symbols), i, MERROR_CODING);
4040 MPLIST_DO (plist, coding_definition_list)
4041 (*symbols)[i++] = MPLIST_KEY (plist);
4042 for (j = 0; j < coding_list.used; j++)
4043 if (! mplist_find_by_key (coding_definition_list,
4044 coding_list.codings[j]->name))
4045 (*symbols)[i++] = coding_list.codings[j]->name;
4052 @brief Create a code converter bound to a buffer.
4054 The mconv_buffer_converter () function creates a pointer to a code
4055 converter for coding system $CODING. The code converter is bound
4056 to buffer area of $N bytes pointed to by $BUF. Subsequent
4057 decodings and encodings are done to/from this buffer area.
4059 $CODING can be #Mnil. In this case, a coding system associated
4060 with the current locale (LC_CTYPE) is used.
4063 If the operation was successful, mconv_buffer_converter () returns
4064 the created code converter. Otherwise it returns @c NULL and
4065 assigns an error code to the external variable #merror_code. */
4068 @brief ¥Ð¥Ã¥Õ¥¡¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤òºî¤ë
4070 ´Ø¿ô mconv_buffer_converter () ¤Ï¡¢¥³¡¼¥É·Ï $CODING ÍѤΥ³¡¼¥É¥³¥ó
4071 ¥Ð¡¼¥¿¤òºî¤ë¡£¤³¤Î¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ï¡¢$BUF ¤Ç¼¨¤µ¤ì¤ëÂ礤µ $N ¥Ð
4072 ¥¤¥È¤Î¥Ð¥Ã¥Õ¥¡Îΰè¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤ë¡£¤³¤ì°Ê¹ß¤Î¥Ç¥³¡¼¥É¤ª¤è¤Ó
4073 ¥¨¥ó¥³¡¼¥É¤Ï¡¢¤³¤Î¥Ð¥Ã¥Õ¥¡Îΰè¤ËÂФ·¤Æ¹Ô¤Ê¤ï¤ì¤ë¡£
4075 $CODING ¤Ï #Mnil ¤Ç¤¢¤Ã¤Æ¤â¤è¤¤¡£¤³¤Î¾ì¹ç¤Ï¸½ºß¤Î¥í¥±¡¼¥ë
4076 (LC_CTYPE) ¤Ë´ØÏ¢ÉÕ¤±¤é¤ì¤¿¥³¡¼¥É·Ï¤¬»È¤ï¤ì¤ë¡£
4079 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð mconv_buffer_converter () ¤Ï ºîÀ®¤·¤¿¥³¡¼¥É¥³
4080 ¥ó¥Ð¡¼¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code
4081 ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
4083 @latexonly \IPAlabel{mconverter} @endlatexonly */
4087 @c MERROR_SYMBOL, @c MERROR_CODING
4090 mconv_stream_converter () */
4093 mconv_buffer_converter (MSymbol name, unsigned char *buf, int n)
4095 MCodingSystem *coding;
4096 MConverter *converter;
4097 MConverterStatus *internal;
4100 name = mlocale_get_prop (mlocale__ctype, Mcoding);
4101 coding = find_coding (name);
4103 MERROR (MERROR_CODING, NULL);
4104 MSTRUCT_CALLOC (converter, MERROR_CODING);
4105 MSTRUCT_CALLOC (internal, MERROR_CODING);
4106 converter->internal_info = internal;
4107 internal->coding = coding;
4108 if (coding->resetter
4109 && (*coding->resetter) (converter) < 0)
4113 MERROR (MERROR_CODING, NULL);
4116 internal->unread = mtext ();
4117 internal->work_mt = mtext ();
4118 mtext__enlarge (internal->work_mt, MAX_UTF8_CHAR_BYTES);
4119 internal->buf = buf;
4121 internal->bufsize = n;
4122 internal->binding = BINDING_BUFFER;
4130 @brief Create a code converter bound to a stream.
4132 The mconv_stream_converter () function create a pointer to a code
4133 converter for coding system $CODING. The code converter is bound
4134 to stream $FP. Subsequent decodings and encodings are done
4135 to/from this stream.
4137 $CODING can be #Mnil. In this case, a coding system associated
4138 with the current locale (LC_CTYPE) is used.
4140 @return If the operation was successful, mconv_stream_converter ()
4141 returns the created code converter. Otherwise it returns @c NULL
4142 and assigns an error code to the external variable @c
4146 @brief ¥¹¥È¥ê¡¼¥à¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤òºî¤ë
4148 ´Ø¿ô mconv_stream_converter () ¤Ï¡¢¥³¡¼¥É·Ï $CODING ÍѤΥ³¡¼¥É¥³¥ó
4149 ¥Ð¡¼¥¿¤òºî¤ë¡£¤³¤Î¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ï¡¢¥¹¥È¥ê¡¼¥à $FP ¤Ë·ë¤ÓÉÕ¤±¤é
4150 ¤ì¤ë¡£¤³¤ì°Ê¹ß¤Î¥Ç¥³¡¼¥É¤ª¤è¤Ó¥¨¥ó¥³¡¼¥É¤Ï¡¢¤³¤Î¥¹¥È¥ê¡¼¥à¤ËÂФ·¤Æ
4153 $CODING ¤Ï #Mnil ¤Ç¤¢¤Ã¤Æ¤â¤è¤¤¡£¤³¤Î¾ì¹ç¤Ï¸½ºß¤Î¥í¥±¡¼¥ë
4154 (LC_CTYPE) ¤Ë´ØÏ¢ÉÕ¤±¤é¤ì¤¿¥³¡¼¥É·Ï¤¬»È¤ï¤ì¤ë¡£
4157 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_stream_converter () ¤ÏºîÀ®¤·¤¿
4158 ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô
4159 #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£
4161 @latexonly \IPAlabel{mconverter} @endlatexonly */
4165 @c MERROR_SYMBOL, @c MERROR_CODING
4168 mconv_buffer_converter () */
4171 mconv_stream_converter (MSymbol name, FILE *fp)
4173 MCodingSystem *coding;
4174 MConverter *converter;
4175 MConverterStatus *internal;
4178 name = mlocale_get_prop (mlocale__ctype, Mcoding);
4179 coding = find_coding (name);
4181 MERROR (MERROR_CODING, NULL);
4182 MSTRUCT_CALLOC (converter, MERROR_CODING);
4183 MSTRUCT_CALLOC (internal, MERROR_CODING);
4184 converter->internal_info = internal;
4185 internal->coding = coding;
4186 if (coding->resetter
4187 && (*coding->resetter) (converter) < 0)
4191 MERROR (MERROR_CODING, NULL);
4194 if (fseek (fp, 0, SEEK_CUR) < 0)
4202 internal->seekable = 0;
4205 internal->seekable = 1;
4206 internal->unread = mtext ();
4207 internal->work_mt = mtext ();
4208 mtext__enlarge (internal->work_mt, MAX_UTF8_CHAR_BYTES);
4210 internal->binding = BINDING_STREAM;
4218 @brief Reset a code converter.
4220 The mconv_reset_converter () function resets code converter
4221 $CONVERTER to the initial state.
4224 If $CONVERTER->coding has its own reseter function,
4225 mconv_reset_converter () returns the result of that function
4226 applied to $CONVERTER. Otherwise it returns 0. */
4229 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ò¥ê¥»¥Ã¥È¤¹¤ë
4231 ´Ø¿ô mconv_reset_converter () ¤Ï¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER ¤ò½é´ü
4235 ¤â¤· $CONVERTER->coding ¤Ë¥ê¥»¥Ã¥ÈÍѤδؿô¤¬ÄêµÁ¤µ¤ì¤Æ¤¤¤ë¤Ê¤é¤Ð¡¢
4236 mconv_reset_converter () ¤Ï¤½¤Î´Ø¿ô¤Ë $CONVERTER ¤òŬÍѤ·¤¿·ë²Ì¤ò
4237 ÊÖ¤·¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤Ð0¤òÊÖ¤¹¡£ */
4240 mconv_reset_converter (MConverter *converter)
4242 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4244 converter->nchars = converter->nbytes = 0;
4245 converter->result = MCONVERSION_RESULT_SUCCESS;
4246 internal->carryover_bytes = 0;
4247 mtext_reset (internal->unread);
4248 if (internal->coding->resetter)
4249 return (*internal->coding->resetter) (converter);
4256 @brief Free a code converter.
4258 The mconv_free_converter () function frees the code converter
4262 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ò²òÊü¤¹¤ë
4264 ´Ø¿ô mconv_free_converter () ¤Ï¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER ¤ò²òÊü
4268 mconv_free_converter (MConverter *converter)
4270 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4272 M17N_OBJECT_UNREF (internal->work_mt);
4273 M17N_OBJECT_UNREF (internal->unread);
4281 @brief Bind a buffer to a code converter.
4283 The mconv_rebind_buffer () function binds buffer area of $N bytes
4284 pointed to by $BUF to code converter $CONVERTER. Subsequent
4285 decodings and encodings are done to/from this newly bound buffer
4289 This function always returns $CONVERTER. */
4292 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ë¥Ð¥Ã¥Õ¥¡Îΰè¤ò·ë¤ÓÉÕ¤±¤ë
4294 ´Ø¿ô mconv_rebind_buffer () ¤Ï¡¢$BUF ¤Ë¤è¤Ã¤Æ»Ø¤µ¤ì¤¿Â礤µ $N ¥Ð
4295 ¥¤¥È¤Î¥Ð¥Ã¥Õ¥¡Îΰè¤ò¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER ¤Ë·ë¤ÓÉÕ¤±¤ë¡£¤³¤ì
4296 °Ê¹ß¤Î¥Ç¥³¡¼¥É¤ª¤è¤Ó¥¨¥ó¥³¡¼¥É¤Ï¡¢¤³¤Î¿·¤¿¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥Ð¥Ã¥Õ¥¡
4297 Îΰè¤ËÂФ·¤Æ¹Ô¤Ê¤ï¤ì¤ë¤è¤¦¤Ë¤Ê¤ë¡£
4300 ¤³¤Î´Ø¿ô¤Ï¾ï¤Ë $CONVERTER ¤òÊÖ¤¹¡£
4302 @latexonly \IPAlabel{mconv_rebind_buffer} @endlatexonly */
4306 mconv_rebind_stream () */
4309 mconv_rebind_buffer (MConverter *converter, unsigned char *buf, int n)
4311 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4313 internal->buf = buf;
4315 internal->bufsize = n;
4316 internal->binding = BINDING_BUFFER;
4323 @brief Bind a stream to a code converter.
4325 The mconv_rebind_stream () function binds stream $FP to code
4326 converter $CONVERTER. Following decodings and encodings are done
4327 to/from this newly bound stream.
4330 This function always returns $CONVERTER. */
4333 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ë¥¹¥È¥ê¡¼¥à¤ò·ë¤ÓÉÕ¤±¤ë
4335 ´Ø¿ô mconv_rebind_stream () ¤Ï¡¢¥¹¥È¥ê¡¼¥à $FP ¤ò¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿
4336 $CONVERTER ¤Ë·ë¤ÓÉÕ¤±¤ë¡£¤³¤ì°Ê¹ß¤Î¥Ç¥³¡¼¥É¤ª¤è¤Ó¥¨¥ó¥³¡¼¥É¤Ï¡¢
4337 ¤³¤Î¿·¤¿¤Ë·ë¤ÓÉÕ¤±¤é¤ì¤¿¥¹¥È¥ê¡¼¥à¤ËÂФ·¤Æ¹Ô¤Ê¤ï¤ì¤ë¤è¤¦¤Ë¤Ê¤ë¡£
4340 ¤³¤Î´Ø¿ô¤Ï¾ï¤Ë $CONVERTER ¤òÊÖ¤¹¡£
4342 @latexonly \IPAlabel{mconv_rebind_stream} @endlatexonly */
4346 mconv_rebind_buffer () */
4349 mconv_rebind_stream (MConverter *converter, FILE *fp)
4351 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4353 if (fseek (fp, 0, SEEK_CUR) < 0)
4357 internal->seekable = 0;
4360 internal->seekable = 1;
4362 internal->binding = BINDING_STREAM;
4369 @brief Decode a byte sequence into an M-text.
4371 The mconv_decode () function decodes a byte sequence and appends
4372 the result at the end of M-text $MT. The source byte sequence is
4373 taken from currently bound the buffer area or the stream.
4376 If the operation was successful, mconv_decode () returns updated
4377 $MT. Otherwise it returns @c NULL and assigns an error code to
4378 the external variable #merror_code. */
4381 @brief ¥Ð¥¤¥ÈÎó¤ò M-text ¤Ë¥Ç¥³¡¼¥É¤¹¤ë
4383 ´Ø¿ô mconv_decode () ¤Ï¡¢¥Ð¥¤¥ÈÎó¤ò¥Ç¥³¡¼¥É¤·¤Æ¤½¤Î·ë²Ì¤ò M-text
4384 $MT ¤ÎËöÈø¤ËÄɲ乤롣¥Ç¥³¡¼¥É¸µ¤Î¥Ð¥¤¥ÈÎó¤Ï¡¢¸½ºß·ë¤ÓÉÕ¤±¤é¤ì¤Æ¤¤¤ë
4385 ¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤«¤é¼è¤é¤ì¤ë¡£
4388 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_decode () ¤Ï¹¹¿·¤µ¤ì¤¿ $MT ¤òÊÖ¤¹¡£¤½
4389 ¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤ò
4394 @c MERROR_IO, @c MERROR_CODING
4397 mconv_rebind_buffer (), mconv_rebind_stream (),
4398 mconv_encode (), mconv_encode_range (),
4399 mconv_decode_buffer (), mconv_decode_stream () */
4402 mconv_decode (MConverter *converter, MText *mt)
4404 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4405 int at_most = converter->at_most > 0 ? converter->at_most : -1;
4408 M_CHECK_READONLY (mt, NULL);
4411 mtext__enlarge (mt, MAX_UTF8_CHAR_BYTES);
4413 converter->nchars = converter->nbytes = 0;
4414 converter->result = MCONVERSION_RESULT_SUCCESS;
4416 n = mtext_nchars (internal->unread);
4422 if (at_most > 0 && at_most < limit)
4425 for (i = 0, n -= 1; i < limit; i++, converter->nchars++, n--)
4426 mtext_cat_char (mt, mtext_ref_char (internal->unread, n));
4427 mtext_del (internal->unread, n + 1, internal->unread->nchars);
4430 if (at_most == limit)
4432 converter->at_most -= converter->nchars;
4436 if (internal->binding == BINDING_BUFFER)
4438 (*internal->coding->decoder) (internal->buf + internal->used,
4439 internal->bufsize - internal->used,
4441 internal->used += converter->nbytes;
4443 else if (internal->binding == BINDING_STREAM)
4445 unsigned char work[CONVERT_WORKSIZE];
4446 int last_block = converter->last_block;
4447 int use_fread = at_most < 0 && internal->seekable;
4449 converter->last_block = 0;
4452 int nbytes, prev_nbytes;
4454 if (feof (internal->fp))
4457 nbytes = fread (work, sizeof (unsigned char), CONVERT_WORKSIZE,
4461 int c = getc (internal->fp);
4464 work[0] = c, nbytes = 1;
4469 if (ferror (internal->fp))
4471 converter->result = MCONVERSION_RESULT_IO_ERROR;
4476 converter->last_block = last_block;
4477 prev_nbytes = converter->nbytes;
4478 (*internal->coding->decoder) (work, nbytes, mt, converter);
4479 if (converter->nbytes - prev_nbytes < nbytes)
4482 fseek (internal->fp, converter->nbytes - prev_nbytes - nbytes,
4485 ungetc (work[0], internal->fp);
4489 || (converter->at_most > 0
4490 && converter->nchars == converter->at_most))
4493 converter->last_block = last_block;
4495 else /* internal->binding == BINDING_NONE */
4496 MERROR (MERROR_CODING, NULL);
4498 converter->at_most = at_most;
4499 return ((converter->result == MCONVERSION_RESULT_SUCCESS
4500 || converter->result == MCONVERSION_RESULT_INSUFFICIENT_SRC)
4507 @brief Decode a buffer area based on a coding system.
4509 The mconv_decode_buffer () function decodes $N bytes of buffer
4510 area pointed to by $BUF based on the coding system $NAME. A
4511 temporary code converter for decoding is automatically created
4515 If the operation was successful, mconv_decode_buffer ()
4516 returns the resulting M-text. Otherwise it returns @c NULL and
4517 assigns an error code to the external variable #merror_code. */
4520 @brief ¥³¡¼¥É·Ï¤Ë´ð¤Å¤¤¤Æ¥Ð¥Ã¥Õ¥¡Îΰè¤ò¥Ç¥³¡¼¥É¤¹¤ë
4522 ´Ø¿ô mconv_decode_buffer () ¤Ï¡¢$BUF ¤Ë¤è¤Ã¤Æ»Ø¤µ¤ì¤¿ $N ¥Ð¥¤¥È¤Î
4523 ¥Ð¥Ã¥Õ¥¡Îΰè¤ò¡¢¥³¡¼¥É·Ï $NAME ¤Ë´ð¤Å¤¤¤Æ¥Ç¥³¡¼¥É¤¹¤ë¡£¥Ç¥³¡¼¥É¤Ë
4524 ɬÍפʥ³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ÎºîÀ®¤È²òÊü¤Ï¼«Æ°Åª¤Ë¹Ô¤Ê¤ï¤ì¤ë¡£
4527 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_decode_buffer () ¤ÏÆÀ¤é¤ì¤¿ M-text ¤ò
4528 ÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼
4529 ¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4533 @c MERROR_IO, @c MERROR_CODING
4536 mconv_decode (), mconv_decode_stream () */
4539 mconv_decode_buffer (MSymbol name, unsigned char *buf, int n)
4541 MConverter *converter = mconv_buffer_converter (name, buf, n);
4547 if (! mconv_decode (converter, mt))
4549 M17N_OBJECT_UNREF (mt);
4552 mconv_free_converter (converter);
4559 @brief Decode a stream input based on a coding system.
4561 The mconv_decode_stream () function decodes the entire byte
4562 sequence read in from stream $FP based on the coding system $NAME.
4563 A code converter for decoding is automatically created and freed.
4566 If the operation was successful, mconv_decode_stream () returns
4567 the resulting M-text. Otherwise it returns @c NULL and assigns an
4568 error code to the external variable #merror_code. */
4571 @brief ¥³¡¼¥É·Ï¤Ë´ð¤Å¤¤¤Æ¥¹¥È¥ê¡¼¥àÆþÎϤò¥Ç¥³¡¼¥É¤¹¤ë
4573 ´Ø¿ô mconv_decode_stream () ¤Ï¡¢¥¹¥È¥ê¡¼¥à $FP ¤«¤éÆɤ߹þ¤Þ¤ì¤ë¥Ð
4574 ¥¤¥ÈÎóÁ´ÂΤò¡¢¥³¡¼¥É·Ï $NAME ¤Ë´ð¤Å¤¤¤Æ¥Ç¥³¡¼¥É¤¹¤ë¡£¥Ç¥³¡¼¥É¤Ëɬ
4575 Íפʥ³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ÎºîÀ®¤È²òÊü¤Ï¼«Æ°Åª¤Ë¹Ô¤Ê¤ï¤ì¤ë¡£
4578 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_decode_stream () ¤ÏÆÀ¤é¤ì¤¿ M-text ¤òÊÖ
4579 ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c NULL ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼
4584 @c MERROR_IO, @c MERROR_CODING
4587 mconv_decode (), mconv_decode_buffer () */
4590 mconv_decode_stream (MSymbol name, FILE *fp)
4592 MConverter *converter = mconv_stream_converter (name, fp);
4598 if (! mconv_decode (converter, mt))
4600 M17N_OBJECT_UNREF (mt);
4603 mconv_free_converter (converter);
4609 /***en @brief Encode an M-text into a byte sequence.
4611 The mconv_encode () function encodes M-text $MT and writes the
4612 resulting byte sequence into the buffer area or the stream that is
4613 currently bound to code converter $CONVERTER.
4616 If the operation was successful, mconv_encode () returns the
4617 number of written bytes. Otherwise it returns -1 and assigns an
4618 error code to the external variable #merror_code. */
4621 @brief M-text ¤ò¥Ð¥¤¥ÈÎó¤Ë¥¨¥ó¥³¡¼¥É¤¹¤ë
4623 ´Ø¿ô mconv_encode () ¤Ï¡¢M-text $MT ¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼
4624 ¥¿ $CONVERTER ¤Ë¸½ºß·ë¤ÓÉÕ¤±¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼
4628 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_encode () ¤Ï½ñ¤¹þ¤Þ¤ì¤¿¥Ð¥¤¥È¿ô¤òÊÖ¤¹¡£
4629 ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄê
4634 @c MERROR_IO, @c MERROR_CODING
4637 mconv_rebind_buffer (), mconv_rebind_stream(),
4638 mconv_decode (), mconv_encode_range () */
4641 mconv_encode (MConverter *converter, MText *mt)
4643 return mconv_encode_range (converter, mt, 0, mtext_nchars (mt));
4649 @brief Encode a part of an M-text
4651 The mconv_encode_range () function encodes the text between $FROM
4652 (inclusive) and $TO (exclusive) in M-text $MT and writes the
4653 resulting byte sequence into the buffer area or the stream that is
4654 currently bound to code converter $CONVERTER.
4657 If the operation was successful, mconv_encode_range () returns the
4658 number of written bytes. Otherwise it returns -1 and assigns an
4659 error code to the external variable #merror_code. */
4662 @brief M-text ¤Î°ìÉô¤ò¤ò¥Ð¥¤¥ÈÎó¤Ë¥¨¥ó¥³¡¼¥É¤¹¤ë
4664 ´Ø¿ô mconv_encode_range () ¤Ï¡¢M-text $MT ¤Î $FROM ¡Ê´Þ¤à¡Ë¤«¤é
4665 $TO ¡Ê´Þ¤Þ¤Ê¤¤¡Ë¤Þ¤Ç¤ÎÈϰϤΥƥ¥¹¥È¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼
4666 ¥¿ $CONVERTER ¤Ë¸½ºß·ë¤ÓÉÕ¤±¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼
4670 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_encode_range () ¤Ï½ñ¤¹þ¤Þ¤ì¤¿¥Ð¥¤¥È¿ô
4671 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð -1 ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼
4676 @c MERROR_RANGE, @c MERROR_IO, @c MERROR_CODING
4679 mconv_rebind_buffer (), mconv_rebind_stream(),
4680 mconv_decode (), mconv_encode () */
4683 mconv_encode_range (MConverter *converter, MText *mt, int from, int to)
4685 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4687 M_CHECK_POS_X (mt, from, -1);
4688 M_CHECK_POS_X (mt, to, -1);
4692 if (converter->at_most > 0 && from + converter->at_most < to)
4693 to = from + converter->at_most;
4695 converter->nchars = converter->nbytes = 0;
4696 converter->result = MCONVERSION_RESULT_SUCCESS;
4698 mtext_put_prop (mt, from, to, Mcoding, internal->coding->name);
4699 if (internal->binding == BINDING_BUFFER)
4701 (*internal->coding->encoder) (mt, from, to,
4702 internal->buf + internal->used,
4703 internal->bufsize - internal->used,
4705 internal->used += converter->nbytes;
4707 else if (internal->binding == BINDING_STREAM)
4709 unsigned char work[CONVERT_WORKSIZE];
4714 int prev_nbytes = converter->nbytes;
4717 (*internal->coding->encoder) (mt, from, to, work,
4718 CONVERT_WORKSIZE, converter);
4719 this_nbytes = converter->nbytes - prev_nbytes;
4720 while (written < this_nbytes)
4722 int wrtn = fwrite (work + written, sizeof (unsigned char),
4723 this_nbytes - written, internal->fp);
4725 if (ferror (internal->fp))
4729 if (written < this_nbytes)
4731 converter->result = MCONVERSION_RESULT_IO_ERROR;
4734 from += converter->nchars;
4737 else /* fail safe */
4738 MERROR (MERROR_CODING, -1);
4740 return ((converter->result == MCONVERSION_RESULT_SUCCESS
4741 || converter->result == MCONVERSION_RESULT_INSUFFICIENT_DST)
4742 ? converter->nbytes : -1);
4748 @brief Encode an M-text into a buffer area.
4750 The mconv_encode_buffer () function encodes M-text $MT based on
4751 coding system $NAME and writes the resulting byte sequence into the
4752 buffer area pointed to by $BUF. At most $N bytes are written. A
4753 temporary code converter for encoding is automatically created
4757 If the operation was successful, mconv_encode_buffer () returns
4758 the number of written bytes. Otherwise it returns -1 and assigns
4759 an error code to the external variable #merror_code. */
4762 @brief M-text ¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¥Ð¥Ã¥Õ¥¡Îΰè¤Ë½ñ¤¹þ¤à
4764 ´Ø¿ô mconv_encode_buffer () ¤ÏM-text $MT ¤ò¥³¡¼¥É·Ï $NAME ¤Ë´ð¤Å¤¤
4765 ¤Æ¥¨¥ó¥³¡¼¥É¤·¡¢ÆÀ¤é¤ì¤¿¥Ð¥¤¥ÈÎó¤ò $BUF ¤Î»Ø¤¹¥Ð¥Ã¥Õ¥¡Îΰè¤Ë½ñ¤¹þ
4766 ¤à¡£$N ¤Ï½ñ¤¹þ¤àºÇÂç¥Ð¥¤¥È¿ô¤Ç¤¢¤ë¡£¥¨¥ó¥³¡¼¥É¤ËɬÍפʥ³¡¼¥É¥³¥ó
4767 ¥Ð¡¼¥¿¤ÎºîÀ®¤È²òÊü¤Ï¼«Æ°Åª¤Ë¹Ô¤Ê¤ï¤ì¤ë¡£
4770 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_encode_buffer () ¤Ï½ñ¤¹þ¤Þ¤ì¤¿¥Ð¥¤¥È
4771 ¿ô¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð-1¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼
4776 @c MERROR_IO, @c MERROR_CODING
4779 mconv_encode (), mconv_encode_stream () */
4782 mconv_encode_buffer (MSymbol name, MText *mt, unsigned char *buf, int n)
4784 MConverter *converter = mconv_buffer_converter (name, buf, n);
4789 ret = mconv_encode (converter, mt);
4790 mconv_free_converter (converter);
4797 @brief Encode an M-text to write to a stream.
4799 The mconv_encode_stream () function encodes M-text $MT based on
4800 coding system $NAME and writes the resulting byte sequence to
4801 stream $FP. A temporary code converter for encoding is
4802 automatically created and freed.
4805 If the operation was successful, mconv_encode_stream () returns
4806 the number of written bytes. Otherwise it returns -1 and assigns
4807 an error code to the external variable #merror_code. */
4810 @brief M-text ¤ò¥¨¥ó¥³¡¼¥É¤·¤Æ¥¹¥È¥ê¡¼¥à¤Ë½ñ¤¹þ¤à
4812 ´Ø¿ô mconv_encode_stream () ¤ÏM-text $MT ¤ò¥³¡¼¥É·Ï $NAME ¤Ë´ð¤Å¤¤
4813 ¤Æ¥¨¥ó¥³¡¼¥É¤·¡¢ÆÀ¤é¤ì¤¿¥Ð¥¤¥ÈÎó¤ò¥¹¥È¥ê¡¼¥à $FP ¤Ë½ñ¤½Ð¤¹¡£¥¨¥ó
4814 ¥³¡¼¥É¤ËɬÍפʥ³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ÎºîÀ®¤È²òÊü¤Ï¼«Æ°Åª¤Ë¹Ô¤Ê¤ï¤ì¤ë¡£
4817 ¤â¤·½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_encode_stream () ¤Ï½ñ¤¹þ¤Þ¤ì¤¿¥Ð¥¤¥È¿ô
4818 ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð-1¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É
4823 @c MERROR_IO, @c MERROR_CODING
4826 mconv_encode (), mconv_encode_buffer (), mconv_encode_file () */
4829 mconv_encode_stream (MSymbol name, MText *mt, FILE *fp)
4831 MConverter *converter = mconv_stream_converter (name, fp);
4836 ret = mconv_encode (converter, mt);
4837 mconv_free_converter (converter);
4844 @brief Read a character via a code converter.
4846 The mconv_getc () function reads one character from the buffer
4847 area or the stream that is currently bound to code converter
4848 $CONVERTER. The decoder of $CONVERTER is used to decode the byte
4849 sequence. The internal status of $CONVERTER is updated
4853 If the operation was successful, mconv_getc () returns the
4854 character read in. If the input source reaches EOF, it returns @c
4855 EOF without changing the external variable #merror_code. If an
4856 error is detected, it returns @c EOF and assigns an error code to
4860 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿·Ðͳ¤Ç1ʸ»úÆɤà
4862 ´Ø¿ô mconv_getc () ¤Ï¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER ¤Ë¸½ºß·ë¤ÓÉÕ¤±
4863 ¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤«¤é1ʸ»ú¤òÆɤ߹þ¤à¡£¥Ð¥¤
4864 ¥ÈÎó¤Î¥Ç¥³¡¼¥É¤Ë¤Ï $CONVERTER ¤Î¥Ç¥³¡¼¥À¤¬ÍѤ¤¤é¤ì¤ë¡£$CONVERTER
4865 ¤ÎÆâÉô¾õÂÖ¤ÏɬÍפ˱þ¤¸¤Æ¹¹¿·¤µ¤ì¤ë¡£
4868 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_getc () ¤ÏÆɤ߹þ¤Þ¤ì¤¿Ê¸»ú¤òÊÖ¤¹¡£ÆþÎϸ»¤¬
4869 EOF ¤Ë㤷¤¿¾ì¹ç¤Ï¡¢³°ÉôÊÑ¿ô #merror_code ¤òÊѤ¨¤º¤Ë @c EOF ¤òÊÖ¤¹¡£
4870 ¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c EOF ¤òÊÖ¤·¡¢#merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É
4878 mconv_ungetc (), mconv_putc (), mconv_gets () */
4881 mconv_getc (MConverter *converter)
4883 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4884 int at_most = converter->at_most;
4886 mtext_reset (internal->work_mt);
4887 converter->at_most = 1;
4888 mconv_decode (converter, internal->work_mt);
4889 converter->at_most = at_most;
4890 return (converter->nchars == 1
4891 ? STRING_CHAR (internal->work_mt->data)
4898 @brief Push a character back to a code converter.
4900 The mconv_ungetc () function pushes character $C back to code
4901 converter $CONVERTER. Any number of characters can be pushed
4902 back. The lastly pushed back character is firstly read by the
4903 subsequent mconv_getc () call. The characters pushed back are
4904 registered only in $CONVERTER; they are not written to the input
4905 source. The internal status of $CONVERTER is updated
4909 If the operation was successful, mconv_ungetc () returns $C.
4910 Otherwise it returns @c EOF and assigns an error code to the
4911 external variable #merror_code. */
4914 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤Ë1ʸ»úÌ᤹
4916 ´Ø¿ô mconv_ungetc () ¤Ï¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER ¤Ëʸ»ú $C ¤ò
4917 ²¡¤·Ì᤹¡£Ì᤹ʸ»ú¿ô¤ËÀ©¸Â¤Ï¤Ê¤¤¡£¤³¤Î¸å¤Ë mconv_getc () ¤ò¸Æ¤Ó½Ð
4918 ¤¹¤È¡¢ºÇ¸å¤ËÌᤵ¤ì¤¿Ê¸»ú¤¬ºÇ½é¤ËÆɤޤì¤ë¡£Ìᤵ¤ì¤¿Ê¸»ú¤Ï
4919 $CONVERTER ¤ÎÆâÉô¤ËÃߤ¨¤é¤ì¤ë¤À¤±¤Ç¤¢¤ê¡¢¼ÂºÝ¤ËÆþÎϸ»¤Ë½ñ¤¹þ¤Þ¤ì
4920 ¤ë¤ï¤±¤Ç¤Ï¤Ê¤¤¡£$CONVERTER ¤ÎÆâÉô¾õÂÖ¤ÏɬÍפ˱þ¤¸¤Æ¹¹¿·¤µ¤ì¤ë¡£
4923 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_ungetc () ¤Ï $C ¤òÊÖ¤¹¡£¤½¤¦¤Ç¤Ê¤±¤ì¤Ð @c
4924 EOF ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4928 @c MERROR_CODING, @c MERROR_CHAR
4931 mconv_getc (), mconv_putc (), mconv_gets () */
4934 mconv_ungetc (MConverter *converter, int c)
4936 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4938 M_CHECK_CHAR (c, EOF);
4940 converter->result = MCONVERSION_RESULT_SUCCESS;
4941 mtext_cat_char (internal->unread, c);
4948 @brief Write a character via a code converter.
4950 The mconv_putc () function writes character $C to the buffer area
4951 or the stream that is currently bound to code converter
4952 $CONVERTER. The encoder of $CONVERTER is used to encode the
4953 character. The number of bytes actually written is set to the @c
4954 nbytes member of $CONVERTER. The internal status of $CONVERTER
4955 is updated appropriately.
4958 If the operation was successful, mconv_putc () returns $C.
4959 If an error is detected, it returns @c EOF and assigns
4960 an error code to the external variable #merror_code. */
4963 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ò·Ðͳ¤Ç1ʸ»ú½ñ¤¯
4965 ´Ø¿ô mconv_putc () ¤Ï¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER ¤Ë¸½ºß·ë¤ÓÉÕ¤±
4966 ¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤Ëʸ»ú $C ¤ò½ñ¤½Ð¤¹¡£Ê¸»ú
4967 ¤Î¥¨¥ó¥³¡¼¥É¤Ë¤Ï $CONVERTER ¤Î¥¨¥ó¥³¡¼¥À¤¬ÍѤ¤¤é¤ì¤ë¡£¼ÂºÝ¤Ë½ñ¤½Ð
4968 ¤µ¤ì¤¿¥Ð¥¤¥È¿ô¤Ï¡¢$CONVERTER ¤Î ¥á¥ó¥Ð¡¼ @c nbytes ¤Ë¥»¥Ã¥È¤µ¤ì¤ë¡£
4969 $CONVERTER ¤ÎÆâÉô¾õÂÖ¤ÏɬÍפ˱þ¤¸¤Æ¹¹¿·¤µ¤ì¤ë¡£
4972 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_putc () ¤Ï $C ¤òÊÖ¤¹¡£¥¨¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç
4973 ¤Ï @c EOF ¤òÊÖ¤·¡¢³°ÉôÊÑ¿ô #merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤òÀßÄꤹ¤ë¡£ */
4977 @c MERROR_CODING, @c MERROR_IO, @c MERROR_CHAR
4980 mconv_getc (), mconv_ungetc (), mconv_gets () */
4983 mconv_putc (MConverter *converter, int c)
4985 MConverterStatus *internal = (MConverterStatus *) converter->internal_info;
4987 M_CHECK_CHAR (c, EOF);
4988 mtext_reset (internal->work_mt);
4989 mtext_cat_char (internal->work_mt, c);
4990 if (mconv_encode_range (converter, internal->work_mt, 0, 1) < 0)
4998 @brief Read a line using a code converter.
5000 The mconv_gets () function reads one line from the buffer area or
5001 the stream that is currently bound to code converter $CONVERTER.
5002 The decoder of $CONVERTER is used for decoding. The decoded
5003 character sequence is appended at the end of M-text $MT. The
5004 final newline character in the original byte sequence is not
5005 appended. The internal status of $CONVERTER is updated
5009 If the operation was successful, mconv_gets () returns the
5010 modified $MT. If it encounters EOF without reading a single
5011 character, it returns $MT without changing it. If an error is
5012 detected, it returns @c NULL and assigns an error code to @c
5016 @brief ¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿¤ò»È¤Ã¤Æ1¹ÔÆɤà
5018 ´Ø¿ô mconv_gets () ¤Ï¡¢¥³¡¼¥É¥³¥ó¥Ð¡¼¥¿ $CONVERTER ¤Ë¸½ºß·ë¤ÓÉÕ¤±
5019 ¤é¤ì¤Æ¤¤¤ë¥Ð¥Ã¥Õ¥¡Îΰ褢¤ë¤¤¤Ï¥¹¥È¥ê¡¼¥à¤«¤é1¹Ô¤òÆɤ߹þ¤à¡£¥Ð¥¤¥È
5020 Îó¤Î¥Ç¥³¡¼¥É¤Ë¤Ï $CONVERTER ¤Î¥Ç¥³¡¼¥À¤¬ÍѤ¤¤é¤ì¤ë¡£¥Ç¥³¡¼¥É¤µ¤ì¤¿
5021 ʸ»úÎó¤Ï M-text $MT ¤ÎËöÈø¤ËÄɲ䵤ì¤ë¡£¸µ¤Î¥Ð¥¤¥ÈÎó¤Î½ªÃ¼²þ¹Ôʸ»ú
5022 ¤ÏÄɲ䵤ì¤Ê¤¤¡£$CONVERTER ¤ÎÆâÉô¾õÂÖ¤ÏɬÍפ˱þ¤¸¤Æ¹¹¿·¤µ¤ì¤ë¡£
5025 ½èÍý¤¬À®¸ù¤¹¤ì¤Ð¡¢mconv_gets () ¤ÏÊѹ¹¤µ¤ì¤¿ $MT ¤òÊÖ¤¹¡£¤â¤·1ʸ»ú
5026 ¤âÆɤޤº¤Ë EOF ¤ËÅö¤¿¤Ã¤¿¾ì¹ç¤Ï¡¢$MT ¤òÊѹ¹¤»¤º¤Ë¤½¤Î¤Þ¤ÞÊÖ¤¹¡£¥¨
5027 ¥é¡¼¤¬¸¡½Ð¤µ¤ì¤¿¾ì¹ç¤Ï @c NULL ¤òÊÖ¤·¡¢#merror_code ¤Ë¥¨¥é¡¼¥³¡¼¥É¤ò
5035 mconv_getc (), mconv_ungetc (), mconv_putc () */
5038 mconv_gets (MConverter *converter, MText *mt)
5042 M_CHECK_READONLY (mt, NULL);
5045 c = mconv_getc (converter);
5046 if (c == EOF || c == '\n')
5048 mtext_cat_char (mt, c);
5050 if (c == EOF && converter->result != MCONVERSION_RESULT_SUCCESS)
5051 /* mconv_getc () sets merror_code */