(char_encode_as_entity_reference): New function.
authortomo <tomo>
Sat, 10 Aug 2002 15:12:36 +0000 (15:12 +0000)
committertomo <tomo>
Sat, 10 Aug 2002 15:12:36 +0000 (15:12 +0000)
(char_encode_utf8): Use `char_encode_as_entity_reference'.

src/text-coding.c

index b9f3b20..f7cbe9e 100644 (file)
@@ -2281,199 +2281,6 @@ struct decoding_stream
   struct detection_state decst;
 };
 
-#ifdef UTF2000
-extern Lisp_Object Qcomposition;
-
-INLINE_HEADER void
-decode_flush_er_chars (struct decoding_stream *str, unsigned_char_dynarr* dst);
-INLINE_HEADER void
-decode_flush_er_chars (struct decoding_stream *str, unsigned_char_dynarr* dst)
-{
-  if ( str->er_counter > 0)
-    {
-      Dynarr_add_many (dst, str->er_buf, str->er_counter);
-      str->er_counter = 0;
-    }
-}
-
-void decode_add_er_char (struct decoding_stream *str, Emchar character,
-                        unsigned_char_dynarr* dst);
-void
-decode_add_er_char (struct decoding_stream *str, Emchar c,
-                   unsigned_char_dynarr* dst)
-{
-  if (str->er_counter == 0)
-    {
-      if (CODING_SYSTEM_USE_ENTITY_REFERENCE (str->codesys)
-         && (c == '&') )
-       {
-         str->er_buf[0] = '&';
-         str->er_counter++;
-       }
-      else
-       DECODE_ADD_UCS_CHAR (c, dst);
-    }
-  else if (c == ';')
-    {
-      Lisp_Object string = make_string (str->er_buf,
-                                       str->er_counter);
-      Lisp_Object rest = Vcoded_charset_entity_reference_alist;
-      Lisp_Object cell;
-      Lisp_Object ret;
-      Lisp_Object pat;
-      Lisp_Object ccs;
-      int base;
-
-      while (!NILP (rest))
-       {                     
-         cell = Fcar (rest);
-         ccs = Fcar (cell);
-         if (NILP (ccs = Ffind_charset (ccs)))
-           continue;
-
-         cell = Fcdr (cell);
-         ret = Fcar (cell);
-         if (STRINGP (ret))
-           pat = ret;
-         else
-           continue;
-
-         cell = Fcdr (cell);
-         cell = Fcdr (cell);
-         ret = Fcar (cell);
-         if (EQ (ret, Qd))
-           {
-             pat = concat3 (build_string ("^&"),
-                            pat, build_string ("\\([0-9]+\\)$"));
-             base = 10;
-           }
-         else if (EQ (ret, Qx))
-           {
-             pat = concat3 (build_string ("^&"),
-                            pat, build_string ("\\([0-9a-f]+\\)$"));
-             base = 16;
-           }
-         else if (EQ (ret, QX))
-           {
-             pat = concat3 (build_string ("^&"),
-                            pat, build_string ("\\([0-9A-F]+\\)$"));
-             base = 16;
-           }
-         else
-           continue;
-
-         if (!NILP (Fstring_match (pat, string, Qnil, Qnil)))
-           {
-             int code
-               = XINT (Fstring_to_number
-                       (Fsubstring (string,
-                                    Fmatch_beginning (make_int (1)),
-                                    Fmatch_end (make_int (1))),
-                        make_int (base)));
-
-             DECODE_ADD_UCS_CHAR (DECODE_CHAR (ccs, code), dst);
-             goto decoded;
-           }
-         rest = Fcdr (rest);
-       }
-      if (!NILP (Fstring_match (build_string ("^&MCS-\\([0-9A-F]+\\)$"),
-                               string, Qnil, Qnil)))
-       {
-         int code
-           = XINT (Fstring_to_number
-                   (Fsubstring (string,
-                                Fmatch_beginning (make_int (1)),
-                                Fmatch_end (make_int (1))),
-                    make_int (16)));
-
-         DECODE_ADD_UCS_CHAR (code, dst);
-       }
-      else
-       {
-         Dynarr_add_many (dst, str->er_buf, str->er_counter);
-         Dynarr_add (dst, ';');
-       }
-    decoded:
-      str->er_counter = 0;
-    }
-  else if ( (str->er_counter >= 16) || (c >= 0x7F) )
-    {
-      Dynarr_add_many (dst, str->er_buf, str->er_counter);
-      str->er_counter = 0;
-      DECODE_ADD_UCS_CHAR (c, dst);
-    }
-  else
-    str->er_buf[str->er_counter++] = c;
-}
-
-INLINE_HEADER void
-COMPOSE_FLUSH_CHARS (struct decoding_stream *str, unsigned_char_dynarr* dst);
-INLINE_HEADER void
-COMPOSE_FLUSH_CHARS (struct decoding_stream *str, unsigned_char_dynarr* dst)
-{
-  unsigned i;
-
-  for (i = 0; i < str->combined_char_count; i++)
-    decode_add_er_char (str, str->combined_chars[i], dst);
-  str->combined_char_count = 0;
-  str->combining_table = Qnil;
-}
-
-void COMPOSE_ADD_CHAR (struct decoding_stream *str, Emchar character,
-                      unsigned_char_dynarr* dst);
-void
-COMPOSE_ADD_CHAR (struct decoding_stream *str,
-                 Emchar character, unsigned_char_dynarr* dst)
-{
-  if (CODING_SYSTEM_DISABLE_COMPOSITION (str->codesys))
-    decode_add_er_char (str, character, dst);
-  else if (!CONSP (str->combining_table))
-    {
-      Lisp_Object ret
-       = Fget_char_attribute (make_char (character), Qcomposition, Qnil);
-
-      if (NILP (ret))
-       decode_add_er_char (str, character, dst);
-      else
-       {
-         str->combined_chars[0] = character;
-         str->combined_char_count = 1;
-         str->combining_table = ret;
-       }
-    }
-  else
-    {
-      Lisp_Object ret = Fcdr (Fassq (make_char (character), str->combining_table));
-
-      if (CHARP (ret))
-       {
-         Emchar char2 = XCHARVAL (ret);
-         ret = Fget_char_attribute (make_char (character), Qcomposition, Qnil);
-         if (NILP (ret))
-           {
-             decode_add_er_char (str, character, dst);
-             str->combined_char_count = 0;
-             str->combining_table = Qnil;
-           }
-         else
-           {
-             str->combined_chars[0] = char2;
-             str->combined_char_count = 1;
-             str->combining_table = ret;
-           }
-       }
-      else
-       {
-         COMPOSE_FLUSH_CHARS (str, dst);
-         decode_add_er_char (str, character, dst);
-       }
-    }
-}
-#else /* not UTF2000 */
-#define COMPOSE_FLUSH_CHARS(str, dst)
-#define COMPOSE_ADD_CHAR(str, ch, dst) DECODE_ADD_UCS_CHAR (ch, dst)
-#endif /* UTF2000 */
-
 static ssize_t decoding_reader (Lstream *stream,
                                unsigned char *data, size_t size);
 static ssize_t decoding_writer (Lstream *stream,
@@ -3368,6 +3175,278 @@ text_encode_generic (Lstream *encoding, const Bufbyte *src,
 }
 
 \f
+#ifdef UTF2000
+/************************************************************************/
+/*                          entity reference                            */
+/************************************************************************/
+
+INLINE_HEADER void
+decode_flush_er_chars (struct decoding_stream *str, unsigned_char_dynarr* dst);
+INLINE_HEADER void
+decode_flush_er_chars (struct decoding_stream *str, unsigned_char_dynarr* dst)
+{
+  if ( str->er_counter > 0)
+    {
+      Dynarr_add_many (dst, str->er_buf, str->er_counter);
+      str->er_counter = 0;
+    }
+}
+
+void decode_add_er_char (struct decoding_stream *str, Emchar character,
+                        unsigned_char_dynarr* dst);
+void
+decode_add_er_char (struct decoding_stream *str, Emchar c,
+                   unsigned_char_dynarr* dst)
+{
+  if (str->er_counter == 0)
+    {
+      if (CODING_SYSTEM_USE_ENTITY_REFERENCE (str->codesys)
+         && (c == '&') )
+       {
+         str->er_buf[0] = '&';
+         str->er_counter++;
+       }
+      else
+       DECODE_ADD_UCS_CHAR (c, dst);
+    }
+  else if (c == ';')
+    {
+      Lisp_Object string = make_string (str->er_buf,
+                                       str->er_counter);
+      Lisp_Object rest = Vcoded_charset_entity_reference_alist;
+      Lisp_Object cell;
+      Lisp_Object ret;
+      Lisp_Object pat;
+      Lisp_Object ccs;
+      int base;
+
+      while (!NILP (rest))
+       {                     
+         cell = Fcar (rest);
+         ccs = Fcar (cell);
+         if (NILP (ccs = Ffind_charset (ccs)))
+           continue;
+
+         cell = Fcdr (cell);
+         ret = Fcar (cell);
+         if (STRINGP (ret))
+           pat = ret;
+         else
+           continue;
+
+         cell = Fcdr (cell);
+         cell = Fcdr (cell);
+         ret = Fcar (cell);
+         if (EQ (ret, Qd))
+           {
+             pat = concat3 (build_string ("^&"),
+                            pat, build_string ("\\([0-9]+\\)$"));
+             base = 10;
+           }
+         else if (EQ (ret, Qx))
+           {
+             pat = concat3 (build_string ("^&"),
+                            pat, build_string ("\\([0-9a-f]+\\)$"));
+             base = 16;
+           }
+         else if (EQ (ret, QX))
+           {
+             pat = concat3 (build_string ("^&"),
+                            pat, build_string ("\\([0-9A-F]+\\)$"));
+             base = 16;
+           }
+         else
+           continue;
+
+         if (!NILP (Fstring_match (pat, string, Qnil, Qnil)))
+           {
+             int code
+               = XINT (Fstring_to_number
+                       (Fsubstring (string,
+                                    Fmatch_beginning (make_int (1)),
+                                    Fmatch_end (make_int (1))),
+                        make_int (base)));
+
+             DECODE_ADD_UCS_CHAR (DECODE_CHAR (ccs, code), dst);
+             goto decoded;
+           }
+         rest = Fcdr (rest);
+       }
+      if (!NILP (Fstring_match (build_string ("^&MCS-\\([0-9A-F]+\\)$"),
+                               string, Qnil, Qnil)))
+       {
+         int code
+           = XINT (Fstring_to_number
+                   (Fsubstring (string,
+                                Fmatch_beginning (make_int (1)),
+                                Fmatch_end (make_int (1))),
+                    make_int (16)));
+
+         DECODE_ADD_UCS_CHAR (code, dst);
+       }
+      else
+       {
+         Dynarr_add_many (dst, str->er_buf, str->er_counter);
+         Dynarr_add (dst, ';');
+       }
+    decoded:
+      str->er_counter = 0;
+    }
+  else if ( (str->er_counter >= 16) || (c >= 0x7F) )
+    {
+      Dynarr_add_many (dst, str->er_buf, str->er_counter);
+      str->er_counter = 0;
+      DECODE_ADD_UCS_CHAR (c, dst);
+    }
+  else
+    str->er_buf[str->er_counter++] = c;
+}
+
+void char_encode_as_entity_reference (Emchar ch, char* buf);
+void
+char_encode_as_entity_reference (Emchar ch, char* buf)
+{
+  Lisp_Object rest = Vcoded_charset_entity_reference_alist;
+  Lisp_Object cell;
+  Lisp_Object ccs;
+  int format_columns, idx;
+  char format[18];
+
+  while (!NILP (rest))
+    {
+      cell = Fcar (rest);
+      ccs = Fcar (cell);
+      if (!NILP (ccs = Ffind_charset (ccs)))
+       {
+         int code_point = charset_code_point (ccs, ch);
+
+         if ( code_point >= 0 )
+           {
+             Lisp_Object ret;
+
+             cell = Fcdr (cell);
+             ret = Fcar (cell);
+             if (STRINGP (ret) && ((idx = XSTRING_LENGTH (ret)) <= 6))
+               {
+                 format[0] = '&';
+                 strncpy (&format[1], XSTRING_DATA (ret), idx);
+                 idx++;
+               }
+             else
+               continue;
+
+             cell = Fcdr (cell);
+             ret = Fcar (cell);
+             if (INTP (ret))
+               {
+                 format[idx++] = '%';
+                 format_columns = XINT (ret);
+                 if ( (2 <= format_columns) && (format_columns <= 8) )
+                   {
+                     format [idx++] = '0';
+                     format [idx++] = '0' + format_columns;
+                   }
+               }
+
+             cell = Fcdr (cell);
+             ret = Fcar (cell);
+             if (EQ (ret, Qd))
+               format [idx++] = 'd';
+             else if (EQ (ret, Qx))
+               format [idx++] = 'x';
+             else if (EQ (ret, QX))
+               format [idx++] = 'X';
+             else
+               continue;
+             format [idx++] = ';';
+             format [idx++] = 0;
+
+             sprintf (buf, format, code_point);
+             return;
+           }
+       }
+      rest = Fcdr (rest);
+    }
+  sprintf (buf, "&MCS-%08X;", ch);
+}
+
+\f
+/************************************************************************/
+/*                          character composition                       */
+/************************************************************************/
+extern Lisp_Object Qcomposition;
+
+INLINE_HEADER void
+COMPOSE_FLUSH_CHARS (struct decoding_stream *str, unsigned_char_dynarr* dst);
+INLINE_HEADER void
+COMPOSE_FLUSH_CHARS (struct decoding_stream *str, unsigned_char_dynarr* dst)
+{
+  unsigned i;
+
+  for (i = 0; i < str->combined_char_count; i++)
+    decode_add_er_char (str, str->combined_chars[i], dst);
+  str->combined_char_count = 0;
+  str->combining_table = Qnil;
+}
+
+void COMPOSE_ADD_CHAR (struct decoding_stream *str, Emchar character,
+                      unsigned_char_dynarr* dst);
+void
+COMPOSE_ADD_CHAR (struct decoding_stream *str,
+                 Emchar character, unsigned_char_dynarr* dst)
+{
+  if (CODING_SYSTEM_DISABLE_COMPOSITION (str->codesys))
+    decode_add_er_char (str, character, dst);
+  else if (!CONSP (str->combining_table))
+    {
+      Lisp_Object ret
+       = Fget_char_attribute (make_char (character), Qcomposition, Qnil);
+
+      if (NILP (ret))
+       decode_add_er_char (str, character, dst);
+      else
+       {
+         str->combined_chars[0] = character;
+         str->combined_char_count = 1;
+         str->combining_table = ret;
+       }
+    }
+  else
+    {
+      Lisp_Object ret
+       = Fcdr (Fassq (make_char (character), str->combining_table));
+
+      if (CHARP (ret))
+       {
+         Emchar char2 = XCHARVAL (ret);
+         ret = Fget_char_attribute (make_char (character), Qcomposition,
+                                    Qnil);
+         if (NILP (ret))
+           {
+             decode_add_er_char (str, character, dst);
+             str->combined_char_count = 0;
+             str->combining_table = Qnil;
+           }
+         else
+           {
+             str->combined_chars[0] = char2;
+             str->combined_char_count = 1;
+             str->combining_table = ret;
+           }
+       }
+      else
+       {
+         COMPOSE_FLUSH_CHARS (str, dst);
+         decode_add_er_char (str, character, dst);
+       }
+    }
+}
+#else /* not UTF2000 */
+#define COMPOSE_FLUSH_CHARS(str, dst)
+#define COMPOSE_ADD_CHAR(str, ch, dst) DECODE_ADD_UCS_CHAR (ch, dst)
+#endif /* UTF2000 */
+
+\f
 /************************************************************************/
 /*                          Shift-JIS methods                           */
 /************************************************************************/
@@ -3815,17 +3894,17 @@ decode_coding_big5 (Lstream *decoding, const Extbyte *src,
              decode_flush_er_chars (str, dst);
              cpos = c;
            }
-         else if ( c >= ' ' )
-           {
-             /* DECODE_ADD_BINARY_CHAR (c, dst); */
-             decode_add_er_char (str, c, dst);
-           }
-         else
+         else if ( c < ' ' )
            {
              decode_flush_er_chars (str, dst);
              DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
              DECODE_ADD_BINARY_CHAR (c, dst);
            }
+         else
+           {
+             /* DECODE_ADD_BINARY_CHAR (c, dst); */
+             decode_add_er_char (str, c, dst);
+           }
        }
     label_continue_loop:;
     }
@@ -4323,67 +4402,9 @@ char_encode_utf8 (struct encoding_stream *str, Emchar ch,
            code_point = XINT (ret);
          else if (CODING_SYSTEM_USE_ENTITY_REFERENCE (str->codesys))
            {
-             Lisp_Object rest = Vcoded_charset_entity_reference_alist;
-             Lisp_Object cell;
-             Lisp_Object ccs;
-             int format_columns, idx;
-             char buf[16], format[16];
+             char buf[18];
 
-             while (!NILP (rest))
-               {
-                 cell = Fcar (rest);
-                 ccs = Fcar (cell);
-                 if (!NILP (ccs = Ffind_charset (ccs)))
-                   {
-                     if ( (code_point
-                           = charset_code_point (ccs, ch)) >= 0 )
-                       {
-                         cell = Fcdr (cell);
-                         ret = Fcar (cell);
-                         if (STRINGP (ret)
-                             && ((idx =XSTRING_LENGTH (ret)) <= 6))
-                           {
-                             strncpy (format, XSTRING_DATA (ret), idx);
-                           }
-                         else
-                           continue;
-
-                         cell = Fcdr (cell);
-                         ret = Fcar (cell);
-                         if (INTP (ret))
-                           {
-                             format [idx++] = '%';
-                             format_columns = XINT (ret);
-                             if ( (2 <= format_columns)
-                                  && (format_columns <= 8) )
-                               {
-                                 format [idx++] = '0';
-                                 format [idx++] = '0' + format_columns;
-                               }
-                           }
-
-                         cell = Fcdr (cell);
-                         ret = Fcar (cell);
-                         if (EQ (ret, Qd))
-                           format [idx++] = 'd';
-                         else if (EQ (ret, Qx))
-                           format [idx++] = 'x';
-                         else if (EQ (ret, QX))
-                           format [idx++] = 'X';
-                         else
-                           continue;
-                         format [idx++] = 0;
-
-                         sprintf (buf, format, code_point);
-                         Dynarr_add (dst, '&');
-                         Dynarr_add_many (dst, buf, strlen (buf));
-                         Dynarr_add (dst, ';');
-                         return;
-                       }
-                   }
-                 rest = Fcdr (rest);
-               }
-             sprintf (buf, "&MCS-%08X;", ch);
+             char_encode_as_entity_reference (ch, buf);
              Dynarr_add_many (dst, buf, strlen (buf));
              return;
            }