XEmacs 21.4.12 "Portable Code".

[chise/xemacs-chise.git.1] / src / file-coding.c
diff --git a/src/file-coding.c b/src/file-coding.c

index aaca9ef..fe42ebd 100644 (file)
--- a/src/file-coding.c
+++ b/src/file-coding.c
@@ -46,17 +46,17 @@ Lisp_Object Vcoding_system_for_write;
  Lisp_Object Vfile_name_coding_system;
  
  /* Table of symbols identifying each coding category. */
-Lisp_Object coding_category_symbol[CODING_CATEGORY_LAST + 1];
+Lisp_Object coding_category_symbol[CODING_CATEGORY_LAST];
  
  
  
  struct file_coding_dump {
    /* Coding system currently associated with each coding category. */
-  Lisp_Object coding_category_system[CODING_CATEGORY_LAST + 1];
+  Lisp_Object coding_category_system[CODING_CATEGORY_LAST];
  
    /* Table of all coding categories in decreasing order of priority.
       This describes a permutation of the possible coding categories. */
-  int coding_category_by_priority[CODING_CATEGORY_LAST + 1];
+  int coding_category_by_priority[CODING_CATEGORY_LAST];
  
  #ifdef MULE
    Lisp_Object ucs_to_mule_table[65536];
@@ -64,7 +64,7 @@ struct file_coding_dump {
  } *fcd;
  
  static const struct lrecord_description fcd_description_1[] = {
-  { XD_LISP_OBJECT_ARRAY, offsetof (struct file_coding_dump, coding_category_system), CODING_CATEGORY_LAST + 1 },
+  { XD_LISP_OBJECT_ARRAY, offsetof (struct file_coding_dump, coding_category_system), CODING_CATEGORY_LAST },
  #ifdef MULE
    { XD_LISP_OBJECT_ARRAY, offsetof (struct file_coding_dump, ucs_to_mule_table), countof (fcd->ucs_to_mule_table) },
  #endif
@@ -176,47 +176,47 @@ EXFUN (Fcopy_coding_system, 2);
  #ifdef MULE
  struct detection_state;
  static int detect_coding_sjis (struct detection_state *st,
-                              const Extbyte *src, size_t n);
+                              const Extbyte *src, Lstream_data_count n);
  static void decode_coding_sjis (Lstream *decoding, const Extbyte *src,
-                               unsigned_char_dynarr *dst, size_t n);
+                               unsigned_char_dynarr *dst, Lstream_data_count n);
  static void encode_coding_sjis (Lstream *encoding, const Bufbyte *src,
-                               unsigned_char_dynarr *dst, size_t n);
+                               unsigned_char_dynarr *dst, Lstream_data_count n);
  static int detect_coding_big5 (struct detection_state *st,
-                              const Extbyte *src, size_t n);
+                              const Extbyte *src, Lstream_data_count n);
  static void decode_coding_big5 (Lstream *decoding, const Extbyte *src,
-                               unsigned_char_dynarr *dst, size_t n);
+                               unsigned_char_dynarr *dst, Lstream_data_count n);
  static void encode_coding_big5 (Lstream *encoding, const Bufbyte *src,
-                               unsigned_char_dynarr *dst, size_t n);
+                               unsigned_char_dynarr *dst, Lstream_data_count n);
  static int detect_coding_ucs4 (struct detection_state *st,
-                              const Extbyte *src, size_t n);
+                              const Extbyte *src, Lstream_data_count n);
  static void decode_coding_ucs4 (Lstream *decoding, const Extbyte *src,
-                               unsigned_char_dynarr *dst, size_t n);
+                               unsigned_char_dynarr *dst, Lstream_data_count n);
  static void encode_coding_ucs4 (Lstream *encoding, const Bufbyte *src,
-                               unsigned_char_dynarr *dst, size_t n);
+                               unsigned_char_dynarr *dst, Lstream_data_count n);
  static int detect_coding_utf8 (struct detection_state *st,
-                              const Extbyte *src, size_t n);
+                              const Extbyte *src, Lstream_data_count n);
  static void decode_coding_utf8 (Lstream *decoding, const Extbyte *src,
-                               unsigned_char_dynarr *dst, size_t n);
+                               unsigned_char_dynarr *dst, Lstream_data_count n);
  static void encode_coding_utf8 (Lstream *encoding, const Bufbyte *src,
-                               unsigned_char_dynarr *dst, size_t n);
+                               unsigned_char_dynarr *dst, Lstream_data_count n);
  static int postprocess_iso2022_mask (int mask);
  static void reset_iso2022 (Lisp_Object coding_system,
                            struct iso2022_decoder *iso);
  static int detect_coding_iso2022 (struct detection_state *st,
-                                 const Extbyte *src, size_t n);
+                                 const Extbyte *src, Lstream_data_count n);
  static void decode_coding_iso2022 (Lstream *decoding, const Extbyte *src,
-                                  unsigned_char_dynarr *dst, size_t n);
+                                  unsigned_char_dynarr *dst, Lstream_data_count n);
  static void encode_coding_iso2022 (Lstream *encoding, const Bufbyte *src,
-                                  unsigned_char_dynarr *dst, size_t n);
+                                  unsigned_char_dynarr *dst, Lstream_data_count n);
  #endif /* MULE */
  static void decode_coding_no_conversion (Lstream *decoding, const Extbyte *src,
-                                        unsigned_char_dynarr *dst, size_t n);
+                                        unsigned_char_dynarr *dst, Lstream_data_count n);
  static void encode_coding_no_conversion (Lstream *encoding, const Bufbyte *src,
-                                        unsigned_char_dynarr *dst, size_t n);
+                                        unsigned_char_dynarr *dst, Lstream_data_count n);
  static void mule_decode (Lstream *decoding, const Extbyte *src,
-                        unsigned_char_dynarr *dst, size_t n);
+                        unsigned_char_dynarr *dst, Lstream_data_count n);
  static void mule_encode (Lstream *encoding, const Bufbyte *src,
-                        unsigned_char_dynarr *dst, size_t n);
+                        unsigned_char_dynarr *dst, Lstream_data_count n);
  
  typedef struct codesys_prop codesys_prop;
  struct codesys_prop
@@ -1433,7 +1433,7 @@ decode_coding_category (Lisp_Object symbol)
    int i;
  
    CHECK_SYMBOL (symbol);
-  for (i = 0; i <= CODING_CATEGORY_LAST; i++)
+  for (i = 0; i < CODING_CATEGORY_LAST; i++)
      if (EQ (coding_category_symbol[i], symbol))
        return i;
  
@@ -1449,7 +1449,7 @@ Return a list of all recognized coding categories.
    int i;
    Lisp_Object list = Qnil;
  
-  for (i = CODING_CATEGORY_LAST; i >= 0; i--)
+  for (i = CODING_CATEGORY_LAST - 1; i >= 0; i--)
      list = Fcons (coding_category_symbol[i], list);
    return list;
  }
@@ -1463,13 +1463,13 @@ previously.
  */
         (list))
  {
-  int category_to_priority[CODING_CATEGORY_LAST + 1];
+  int category_to_priority[CODING_CATEGORY_LAST];
    int i, j;
    Lisp_Object rest;
  
    /* First generate a list that maps coding categories to priorities. */
  
-  for (i = 0; i <= CODING_CATEGORY_LAST; i++)
+  for (i = 0; i < CODING_CATEGORY_LAST; i++)
      category_to_priority[i] = -1;
  
    /* Highest priority comes from the specified list. */
@@ -1486,7 +1486,7 @@ previously.
    /* Now go through the existing categories by priority to retrieve
       the categories not yet specified and preserve their priority
       order. */
-  for (j = 0; j <= CODING_CATEGORY_LAST; j++)
+  for (j = 0; j < CODING_CATEGORY_LAST; j++)
      {
        int cat = fcd->coding_category_by_priority[j];
        if (category_to_priority[cat] < 0)
@@ -1496,7 +1496,7 @@ previously.
    /* Now we need to construct the inverse of the mapping we just
       constructed. */
  
-  for (i = 0; i <= CODING_CATEGORY_LAST; i++)
+  for (i = 0; i < CODING_CATEGORY_LAST; i++)
      fcd->coding_category_by_priority[category_to_priority[i]] = i;
  
    /* Phew!  That was confusing. */
@@ -1511,7 +1511,7 @@ Return a list of coding categories in descending order of priority.
    int i;
    Lisp_Object list = Qnil;
  
-  for (i = CODING_CATEGORY_LAST; i >= 0; i--)
+  for (i = CODING_CATEGORY_LAST - 1; i >= 0; i--)
      list = Fcons (coding_category_symbol[fcd->coding_category_by_priority[i]],
                   list);
    return list;
@@ -1632,7 +1632,7 @@ mask_has_at_most_one_bit_p (int mask)
  
  static eol_type_t
  detect_eol_type (struct detection_state *st, const Extbyte *src,
-                size_t n)
+                Lstream_data_count n)
  {
    while (n--)
      {
@@ -1675,7 +1675,7 @@ detect_eol_type (struct detection_state *st, const Extbyte *src,
  
  static int
  detect_coding_type (struct detection_state *st, const Extbyte *src,
-                   size_t n, int just_do_eol)
+                   Lstream_data_count n, int just_do_eol)
  {
    if (st->eol_type == EOL_AUTODETECT)
      st->eol_type = detect_eol_type (st, src, n);
@@ -1761,7 +1761,7 @@ coding_system_from_mask (int mask)
  #endif
        /* Look through the coding categories by priority and find
          the first one that is allowed. */
-      for (i = 0; i <= CODING_CATEGORY_LAST; i++)
+      for (i = 0; i < CODING_CATEGORY_LAST; i++)
         {
           cat = fcd->coding_category_by_priority[i];
           if ((mask & (1 << cat)) &&
@@ -1785,6 +1785,8 @@ coding_system_from_mask (int mask)
     that should be unnecessary with the explicit eol-type argument. */
  
  #define LENGTH(string_constant) (sizeof (string_constant) - 1)
+/* number of leading lines to check for a coding cookie */
+#define LINES_TO_CHECK 2
  
  void
  determine_real_coding_system (Lstream *stream, Lisp_Object *codesys_in_out,
@@ -1806,15 +1808,15 @@ determine_real_coding_system (Lstream *stream, Lisp_Object *codesys_in_out,
        Extbyte buf[4096];
        Lisp_Object coding_system = Qnil;
        Extbyte *p;
-      ssize_t nread = Lstream_read (stream, buf, sizeof (buf));
+      Lstream_data_count nread = Lstream_read (stream, buf, sizeof (buf));
        Extbyte *scan_end;
+      int lines_checked = 0;
  
        /* Look for initial "-*-"; mode line prefix */
        for (p = buf,
              scan_end = buf + nread - LENGTH ("-*-coding:?-*-");
            p <= scan_end
-            && *p != '\n'
-            && *p != '\r';
+            && lines_checked < LINES_TO_CHECK;
            p++)
         if (*p == '-' && *(p+1) == '*' && *(p+2) == '-')
           {
@@ -1823,8 +1825,7 @@ determine_real_coding_system (Lstream *stream, Lisp_Object *codesys_in_out,
             for (p = local_vars_beg,
                    scan_end = buf + nread - LENGTH ("-*-");
                  p <= scan_end
-                  && *p != '\n'
-                  && *p != '\r';
+                  && lines_checked < LINES_TO_CHECK;
                  p++)
               if (*p == '-' && *(p+1) == '*' && *(p+2) == '-')
                 {
@@ -1866,8 +1867,24 @@ determine_real_coding_system (Lstream *stream, Lisp_Object *codesys_in_out,
                       }
                   break;
                 }
+             /* #### file must use standard EOLs or we miss 2d line */
+             /* #### not to mention this is broken for UTF-16 DOS files */
+             else if (*p == '\n' || *p == '\r')
+               {
+                 lines_checked++;
+                 /* skip past multibyte (DOS) newline */
+                 if (*p == '\r' && *(p+1) == '\n') p++;
+               }
             break;
           }
+       /* #### file must use standard EOLs or we miss 2d line */
+       /* #### not to mention this is broken for UTF-16 DOS files */
+       else if (*p == '\n' || *p == '\r')
+         {
+           lines_checked++;
+           /* skip past multibyte (DOS) newline */
+           if (*p == '\r' && *(p+1) == '\n') p++;
+         }
  
        if (NILP (coding_system))
         do
@@ -1940,7 +1957,7 @@ type.  Optional arg BUFFER defaults to the current buffer.
    while (1)
      {
        Extbyte random_buffer[4096];
-      ssize_t nread = Lstream_read (istr, random_buffer, sizeof (random_buffer));
+      Lstream_data_count nread = Lstream_read (istr, random_buffer, sizeof (random_buffer));
  
        if (!nread)
         break;
@@ -1959,7 +1976,7 @@ type.  Optional arg BUFFER defaults to the current buffer.
  #ifdef MULE
        decst.mask = postprocess_iso2022_mask (decst.mask);
  #endif
-      for (i = CODING_CATEGORY_LAST; i >= 0; i--)
+      for (i = CODING_CATEGORY_LAST - 1; i >= 0; i--)
         {
           int sys = fcd->coding_category_by_priority[i];
           if (decst.mask & (1 << sys))
@@ -2107,10 +2124,10 @@ struct decoding_stream
    struct detection_state decst;
  };
  
-static ssize_t decoding_reader (Lstream *stream,
-                               unsigned char *data, size_t size);
-static ssize_t decoding_writer (Lstream *stream,
-                               const unsigned char *data, size_t size);
+static Lstream_data_count decoding_reader (Lstream *stream,
+                               unsigned char *data, Lstream_data_count size);
+static Lstream_data_count decoding_writer (Lstream *stream,
+                               const unsigned char *data, Lstream_data_count size);
  static int decoding_rewinder   (Lstream *stream);
  static int decoding_seekable_p (Lstream *stream);
  static int decoding_flusher    (Lstream *stream);
@@ -2142,12 +2159,12 @@ decoding_marker (Lisp_Object stream)
  /* Read SIZE bytes of data and store it into DATA.  We are a decoding stream
     so we read data from the other end, decode it, and store it into DATA. */
  
-static ssize_t
-decoding_reader (Lstream *stream, unsigned char *data, size_t size)
+static Lstream_data_count
+decoding_reader (Lstream *stream, unsigned char *data, Lstream_data_count size)
  {
    struct decoding_stream *str = DECODING_STREAM_DATA (stream);
    unsigned char *orig_data = data;
-  ssize_t read_size;
+  Lstream_data_count read_size;
    int error_occurred = 0;
  
    /* We need to interface to mule_decode(), which expects to take some
@@ -2163,7 +2180,7 @@ decoding_reader (Lstream *stream, unsigned char *data, size_t size)
          most SIZE bytes, and delete the data from the runoff. */
        if (Dynarr_length (str->runoff) > 0)
         {
-         size_t chunk = min (size, (size_t) Dynarr_length (str->runoff));
+         Lstream_data_count chunk = min (size, (Lstream_data_count) Dynarr_length (str->runoff));
           memcpy (data, Dynarr_atp (str->runoff, 0), chunk);
           Dynarr_delete_many (str->runoff, 0, chunk);
           data += chunk;
@@ -2204,11 +2221,11 @@ decoding_reader (Lstream *stream, unsigned char *data, size_t size)
      return data - orig_data;
  }
  
-static ssize_t
-decoding_writer (Lstream *stream, const unsigned char *data, size_t size)
+static Lstream_data_count
+decoding_writer (Lstream *stream, const unsigned char *data, Lstream_data_count size)
  {
    struct decoding_stream *str = DECODING_STREAM_DATA (stream);
-  ssize_t retval;
+  Lstream_data_count retval;
  
    /* Decode all our data into the runoff, and then attempt to write
       it all out to the other end.  Remove whatever chunk we succeeded
@@ -2366,7 +2383,7 @@ make_decoding_output_stream (Lstream *stream, Lisp_Object codesys)
  
  static void
  mule_decode (Lstream *decoding, const Extbyte *src,
-            unsigned_char_dynarr *dst, size_t n)
+            unsigned_char_dynarr *dst, Lstream_data_count n)
  {
    struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
  
@@ -2488,7 +2505,7 @@ BUFFER defaults to the current buffer if unspecified.
        char tempbuf[1024]; /* some random amount */
        Bufpos newpos, even_newer_pos;
        Bufpos oldpos = lisp_buffer_stream_startpos (istr);
-      ssize_t size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf));
+      Lstream_data_count size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf));
  
        if (!size_in_bytes)
         break;
@@ -2573,9 +2590,9 @@ struct encoding_stream
  #endif /* MULE */
  };
  
-static ssize_t encoding_reader (Lstream *stream, unsigned char *data, size_t size);
-static ssize_t encoding_writer (Lstream *stream, const unsigned char *data,
-                               size_t size);
+static Lstream_data_count encoding_reader (Lstream *stream, unsigned char *data, Lstream_data_count size);
+static Lstream_data_count encoding_writer (Lstream *stream, const unsigned char *data,
+                               Lstream_data_count size);
  static int encoding_rewinder   (Lstream *stream);
  static int encoding_seekable_p (Lstream *stream);
  static int encoding_flusher    (Lstream *stream);
@@ -2607,12 +2624,12 @@ encoding_marker (Lisp_Object stream)
  /* Read SIZE bytes of data and store it into DATA.  We are a encoding stream
     so we read data from the other end, encode it, and store it into DATA. */
  
-static ssize_t
-encoding_reader (Lstream *stream, unsigned char *data, size_t size)
+static Lstream_data_count
+encoding_reader (Lstream *stream, unsigned char *data, Lstream_data_count size)
  {
    struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
    unsigned char *orig_data = data;
-  ssize_t read_size;
+  Lstream_data_count read_size;
    int error_occurred = 0;
  
    /* We need to interface to mule_encode(), which expects to take some
@@ -2669,11 +2686,11 @@ encoding_reader (Lstream *stream, unsigned char *data, size_t size)
      return data - orig_data;
  }
  
-static ssize_t
-encoding_writer (Lstream *stream, const unsigned char *data, size_t size)
+static Lstream_data_count
+encoding_writer (Lstream *stream, const unsigned char *data, Lstream_data_count size)
  {
    struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
-  ssize_t retval;
+  Lstream_data_count retval;
  
    /* Encode all our data into the runoff, and then attempt to write
       it all out to the other end.  Remove whatever chunk we succeeded
@@ -2813,7 +2830,7 @@ make_encoding_output_stream (Lstream *stream, Lisp_Object codesys)
  
  static void
  mule_encode (Lstream *encoding, const Bufbyte *src,
-            unsigned_char_dynarr *dst, size_t n)
+            unsigned_char_dynarr *dst, Lstream_data_count n)
  {
    struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
  
@@ -2899,7 +2916,7 @@ text.  BUFFER defaults to the current buffer if unspecified.
        char tempbuf[1024]; /* some random amount */
        Bufpos newpos, even_newer_pos;
        Bufpos oldpos = lisp_buffer_stream_startpos (istr);
-      ssize_t size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf));
+      Lstream_data_count size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf));
  
        if (!size_in_bytes)
         break;
@@ -2962,7 +2979,7 @@ text.  BUFFER defaults to the current buffer if unspecified.
    ((c) >= 0xA1 && (c) <= 0xDF)
  
  static int
-detect_coding_sjis (struct detection_state *st, const Extbyte *src, size_t n)
+detect_coding_sjis (struct detection_state *st, const Extbyte *src, Lstream_data_count n)
  {
    while (n--)
      {
@@ -2985,7 +3002,7 @@ detect_coding_sjis (struct detection_state *st, const Extbyte *src, size_t n)
  
  static void
  decode_coding_sjis (Lstream *decoding, const Extbyte *src,
-                   unsigned_char_dynarr *dst, size_t n)
+                   unsigned_char_dynarr *dst, Lstream_data_count n)
  {
    struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
    unsigned int flags  = str->flags;
@@ -3041,7 +3058,7 @@ decode_coding_sjis (Lstream *decoding, const Extbyte *src,
  
  static void
  encode_coding_sjis (Lstream *encoding, const Bufbyte *src,
-                   unsigned_char_dynarr *dst, size_t n)
+                   unsigned_char_dynarr *dst, Lstream_data_count n)
  {
    struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
    unsigned int flags  = str->flags;
@@ -3243,7 +3260,7 @@ Return the corresponding character code in SHIFT-JIS as a cons of two bytes.
  } while (0)
  
  static int
-detect_coding_big5 (struct detection_state *st, const Extbyte *src, size_t n)
+detect_coding_big5 (struct detection_state *st, const Extbyte *src, Lstream_data_count n)
  {
    while (n--)
      {
@@ -3267,7 +3284,7 @@ detect_coding_big5 (struct detection_state *st, const Extbyte *src, size_t n)
  
  static void
  decode_coding_big5 (Lstream *decoding, const Extbyte *src,
-                   unsigned_char_dynarr *dst, size_t n)
+                   unsigned_char_dynarr *dst, Lstream_data_count n)
  {
    struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
    unsigned int flags  = str->flags;
@@ -3316,7 +3333,7 @@ decode_coding_big5 (Lstream *decoding, const Extbyte *src,
  
  static void
  encode_coding_big5 (Lstream *encoding, const Bufbyte *src,
-                   unsigned_char_dynarr *dst, size_t n)
+                   unsigned_char_dynarr *dst, Lstream_data_count n)
  {
    unsigned char c;
    struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
@@ -3582,7 +3599,7 @@ encode_ucs4 (Lisp_Object charset,
  }
  
  static int
-detect_coding_ucs4 (struct detection_state *st, const Extbyte *src, size_t n)
+detect_coding_ucs4 (struct detection_state *st, const Extbyte *src, Lstream_data_count n)
  {
    while (n--)
      {
@@ -3607,7 +3624,7 @@ detect_coding_ucs4 (struct detection_state *st, const Extbyte *src, size_t n)
  
  static void
  decode_coding_ucs4 (Lstream *decoding, const Extbyte *src,
-                   unsigned_char_dynarr *dst, size_t n)
+                   unsigned_char_dynarr *dst, Lstream_data_count n)
  {
    struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
    unsigned int flags = str->flags;
@@ -3643,7 +3660,7 @@ decode_coding_ucs4 (Lstream *decoding, const Extbyte *src,
  
  static void
  encode_coding_ucs4 (Lstream *encoding, const Bufbyte *src,
-                   unsigned_char_dynarr *dst, size_t n)
+                   unsigned_char_dynarr *dst, Lstream_data_count n)
  {
    struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
    unsigned int flags = str->flags;
@@ -3781,7 +3798,7 @@ encode_coding_ucs4 (Lstream *encoding, const Bufbyte *src,
  /************************************************************************/
  
  static int
-detect_coding_utf8 (struct detection_state *st, const Extbyte *src, size_t n)
+detect_coding_utf8 (struct detection_state *st, const Extbyte *src, Lstream_data_count n)
  {
    while (n--)
      {
@@ -3816,7 +3833,7 @@ detect_coding_utf8 (struct detection_state *st, const Extbyte *src, size_t n)
  
  static void
  decode_coding_utf8 (Lstream *decoding, const Extbyte *src,
-                   unsigned_char_dynarr *dst, size_t n)
+                   unsigned_char_dynarr *dst, Lstream_data_count n)
  {
    struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
    unsigned int flags  = str->flags;
@@ -3930,7 +3947,7 @@ encode_utf8 (Lisp_Object charset,
  
  static void
  encode_coding_utf8 (Lstream *encoding, const Bufbyte *src,
-                   unsigned_char_dynarr *dst, size_t n)
+                   unsigned_char_dynarr *dst, Lstream_data_count n)
  {
    struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
    unsigned int flags  = str->flags;
@@ -4282,7 +4299,48 @@ fit_to_be_escape_quoted (unsigned char c)
  
     If CHECK_INVALID_CHARSETS is non-zero, check for designation
     or invocation of an invalid character set and treat that as
-   an unrecognized escape sequence. */
+   an unrecognized escape sequence.
+
+   ********************************************************************
+
+   #### Strategies for error annotation and coding orthogonalization
+
+   We really want to separate out a number of things.  Conceptually,
+   there is a nested syntax.
+
+   At the top level is the ISO 2022 extension syntax, including charset
+   designation and invocation, and certain auxiliary controls such as the
+   ISO 6429 direction specification.  These are octet-oriented, with the
+   single exception (AFAIK) of the "exit Unicode" sequence which uses the
+   UTF's natural width (1 byte for UTF-7 and UTF-8, 2 bytes for UCS-2 and
+   UTF-16, and 4 bytes for UCS-4 and UTF-32).  This will be treated as a
+   (deprecated) special case in Unicode processing.
+
+   The middle layer is ISO 2022 character interpretation.  This will depend
+   on the current state of the ISO 2022 registers, and assembles octets
+   into the character's internal representation.
+
+   The lowest level is translating system control conventions.  At present
+   this is restricted to newline translation, but one could imagine doing
+   tab conversion or line wrapping here.  "Escape from Unicode" processing
+   would be done at this level.
+
+   At each level the parser will verify the syntax.  In the case of a
+   syntax error or warning (such as a redundant escape sequence that affects
+   no characters), the parser will take some action, typically inserting the
+   erroneous octets directly into the output and creating an annotation
+   which can be used by higher level I/O to mark the affected region.
+
+   This should make it possible to do something sensible about separating
+   newline convention processing from character construction, and about
+   preventing ISO 2022 escape sequences from being recognized
+   inappropriately.
+
+   The basic strategy will be to have octet classification tables, and
+   switch processing according to the table entry.
+
+   It's possible that, by doing the processing with tables of functions or
+   the like, the parser can be used for both detection and translation. */
  
  static int
  parse_iso2022_esc (Lisp_Object codesys, struct iso2022_decoder *iso,
@@ -4648,7 +4706,7 @@ parse_iso2022_esc (Lisp_Object codesys, struct iso2022_decoder *iso,
  }
  
  static int
-detect_coding_iso2022 (struct detection_state *st, const Extbyte *src, size_t n)
+detect_coding_iso2022 (struct detection_state *st, const Extbyte *src, Lstream_data_count n)
  {
    int mask;
  
@@ -4839,7 +4897,7 @@ ensure_correct_direction (int direction, Lisp_Coding_System *codesys,
  
  static void
  decode_coding_iso2022 (Lstream *decoding, const Extbyte *src,
-                      unsigned_char_dynarr *dst, size_t n)
+                      unsigned_char_dynarr *dst, Lstream_data_count n)
  {
    struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
    unsigned int flags  = str->flags;
@@ -5165,7 +5223,7 @@ ensure_shift_out (struct encoding_stream *str, unsigned_char_dynarr *dst)
  
  static void
  encode_coding_iso2022 (Lstream *encoding, const Bufbyte *src,
-                      unsigned_char_dynarr *dst, size_t n)
+                      unsigned_char_dynarr *dst, Lstream_data_count n)
  {
    unsigned char charmask, c;
    unsigned char char_boundary;
@@ -5474,7 +5532,7 @@ encode_coding_iso2022 (Lstream *encoding, const Bufbyte *src,
     interpreted as being in any particular decoding. */
  static void
  decode_coding_no_conversion (Lstream *decoding, const Extbyte *src,
-                            unsigned_char_dynarr *dst, size_t n)
+                            unsigned_char_dynarr *dst, Lstream_data_count n)
  {
    struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
    unsigned int flags  = str->flags;
@@ -5498,7 +5556,7 @@ decode_coding_no_conversion (Lstream *decoding, const Extbyte *src,
  
  static void
  encode_coding_no_conversion (Lstream *encoding, const Bufbyte *src,
-                            unsigned_char_dynarr *dst, size_t n)
+                            unsigned_char_dynarr *dst, Lstream_data_count n)
  {
    unsigned char c;
    struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
@@ -5697,10 +5755,10 @@ vars_of_file_coding (void)
    int i;
  
    fcd = xnew (struct file_coding_dump);
-  dumpstruct (&fcd, &fcd_description);
+  dump_add_root_struct_ptr (&fcd, &fcd_description);
  
    /* Initialize to something reasonable ... */
-  for (i = 0; i <= CODING_CATEGORY_LAST; i++)
+  for (i = 0; i < CODING_CATEGORY_LAST; i++)
      {
        fcd->coding_category_system[i] = Qnil;
        fcd->coding_category_by_priority[i] = i;
@@ -5764,7 +5822,7 @@ complex_vars_of_file_coding (void)
      make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
  
    the_codesys_prop_dynarr = Dynarr_new (codesys_prop);
-  dumpstruct (&the_codesys_prop_dynarr, &codesys_prop_dynarr_description);
+  dump_add_root_struct_ptr (&the_codesys_prop_dynarr, &codesys_prop_dynarr_description);
  
  #define DEFINE_CODESYS_PROP(Prop_Type, Sym) do \
  {                                              \