From: handa Date: Mon, 16 Feb 2009 02:06:50 +0000 (+0000) Subject: *** empty log message *** X-Git-Tag: XML-BEFORE-XEX~5 X-Git-Url: http://git.chise.org/gitweb/?a=commitdiff_plain;h=a7a6dba5ea04631d400e2cbc32c51ca3ba27e681;p=m17n%2Fm17n-lib.git *** empty log message *** --- diff --git a/src/Makefile.am b/src/Makefile.am index 4c282a9..5449f15 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -49,7 +49,7 @@ libm17n_core_la_LDFLAGS = -export-dynamic ${VINFO} libm17n_la_SOURCES = \ charset.h charset.c \ coding.h coding.c \ - input.h input.c \ + input.h input.c input-xml.c \ language.h language.c \ mlocale.h locale.c \ m17n.h m17n.c diff --git a/src/coding.c b/src/coding.c index 9e3d29d..b991c83 100644 --- a/src/coding.c +++ b/src/coding.c @@ -702,7 +702,72 @@ encode_coding_charset (MText *mt, int from, int to, } -/* Staffs for coding-systems of type MCODING_TYPE_UTF (8). */ + +/* Staffs for coding-systems of type MCODING_TYPE_UTF. */ + +enum utf_bom + { + UTF_BOM_MAYBE, + UTF_BOM_NO, + UTF_BOM_YES, + UTF_BOM_MAX + }; + +enum utf_endian + { + UTF_BIG_ENDIAN, + UTF_LITTLE_ENDIAN, + UTF_ENDIAN_MAX + }; + +struct utf_status +{ + int surrogate; + enum utf_bom bom; + enum utf_endian endian; +}; + +static int +setup_coding_utf (MCodingSystem *coding) +{ + MCodingInfoUTF *info = (MCodingInfoUTF *) (coding->extra_info); + MCodingInfoUTF *spec; + + if (info->code_unit_bits == 8) + coding->ascii_compatible = 1; + else if (info->code_unit_bits == 16 + || info->code_unit_bits == 32) + { + if (info->bom < 0 || info->bom > 2 + || info->endian < 0 || info->endian > 1) + MERROR (MERROR_CODING, -1); + } + else + return -1; + + MSTRUCT_CALLOC (spec, MERROR_CODING); + *spec = *info; + coding->extra_spec = (void *) (spec); + return 0; +} + +static int +reset_coding_utf (MConverter *converter) +{ + MConverterStatus *internal = (MConverterStatus *) converter->internal_info; + MCodingSystem *coding = internal->coding; + struct utf_status *status = (struct utf_status *) &(converter->status); + + if (! coding->ready + && setup_coding_utf (coding) < 0) + return -1; + coding->ready = 1; + + status->surrogate = 0; + status->bom = ((MCodingInfoUTF *) (coding->extra_spec))->bom; + status->endian = ((MCodingInfoUTF *) (coding->extra_spec))->endian; + return 0; +} #define UTF8_CHARSET(p) \ (! ((p)[0] & 0x80) ? (mcharset__unicode) \ @@ -737,10 +802,33 @@ decode_coding_utf_8 (const unsigned char *source, int src_bytes, MText *mt, int nchars = 0; int last_nchars = 0; int at_most = converter->at_most > 0 ? converter->at_most : -1; + struct utf_status *status = (struct utf_status *) &(converter->status); int error = 0; int full = converter->lenient || (coding->charsets[0] == mcharset__m17n); MCharset *charset = NULL; + if (status->bom != UTF_BOM_NO) + { + int c; + + ONE_MORE_BASE_BYTE (c); + if (c != 0xEF) + REWIND_SRC_TO_BASE (); + else + { + ONE_MORE_BYTE (c); + if (c != 0xBB) + REWIND_SRC_TO_BASE (); + else + { + ONE_MORE_BYTE (c); + if (c != 0xBF) + REWIND_SRC_TO_BASE (); + } + } + status->bom = UTF_BOM_NO; + } + while (1) { int c, c1, bytes; @@ -864,73 +952,6 @@ encode_coding_utf_8 (MText *mt, int from, int to, return (converter->result == MCONVERSION_RESULT_INVALID_CHAR ? -1 : 0); } - -/* Staffs for coding-systems of type MCODING_TYPE_UTF (16 & 32). */ - -enum utf_bom - { - UTF_BOM_MAYBE, - UTF_BOM_NO, - UTF_BOM_YES, - UTF_BOM_MAX - }; - -enum utf_endian - { - UTF_BIG_ENDIAN, - UTF_LITTLE_ENDIAN, - UTF_ENDIAN_MAX - }; - -struct utf_status -{ - int surrogate; - enum utf_bom bom; - enum utf_endian endian; -}; - -static int -setup_coding_utf (MCodingSystem *coding) -{ - MCodingInfoUTF *info = (MCodingInfoUTF *) (coding->extra_info); - MCodingInfoUTF *spec; - - if (info->code_unit_bits == 8) - coding->ascii_compatible = 1; - else if (info->code_unit_bits == 16 - || info->code_unit_bits == 32) - { - if (info->bom < 0 || info->bom > 2 - || info->endian < 0 || info->endian > 1) - MERROR (MERROR_CODING, -1); - } - else - return -1; - - MSTRUCT_CALLOC (spec, MERROR_CODING); - *spec = *info; - coding->extra_spec = (void *) (spec); - return 0; -} - -static int -reset_coding_utf (MConverter *converter) -{ - MConverterStatus *internal = (MConverterStatus *) converter->internal_info; - MCodingSystem *coding = internal->coding; - struct utf_status *status = (struct utf_status *) &(converter->status); - - if (! coding->ready - && setup_coding_utf (coding) < 0) - return -1; - coding->ready = 1; - - status->surrogate = 0; - status->bom = ((MCodingInfoUTF *) (coding->extra_spec))->bom; - status->endian = ((MCodingInfoUTF *) (coding->extra_spec))->endian; - return 0; -} - static int decode_coding_utf_16 (const unsigned char *source, int src_bytes, MText *mt, MConverter *converter) @@ -1139,7 +1160,7 @@ encode_coding_utf_16 (MText *mt, int from, int to, SET_SRC (mt, format, from, to); - if (status->bom != UTF_BOM_NO) + if (status->bom == UTF_BOM_YES) { CHECK_DST (2); if (big_endian) @@ -1227,7 +1248,7 @@ encode_coding_utf_32 (MText *mt, int from, int to, SET_SRC (mt, format, from, to); - if (status->bom != UTF_BOM_NO) + if (status->bom == UTF_BOM_YES) { CHECK_DST (4); if (big_endian) @@ -2941,6 +2962,7 @@ mcoding__init (void) mplist_set (charsets, Msymbol, Mcharset_m17n); mplist_put (param, Mtype, Mutf); mplist_put (param, Mcode_unit, (void *) 8); + mplist_put (param, Mbom, Mmaybe); Mcoding_utf_8_full = mconv_define_coding ("utf-8-full", param, NULL, NULL, NULL, NULL); diff --git a/src/database.c b/src/database.c index b3a8e65..d1caecf 100644 --- a/src/database.c +++ b/src/database.c @@ -528,6 +528,7 @@ load_database (MSymbol *tags, void *extra_info) MERROR (MERROR_DB, NULL); } +#if 0 if (db_info->status != MDB_STATUS_UPDATED) { if (! xml_validate (db_info)) @@ -538,6 +539,7 @@ load_database (MSymbol *tags, void *extra_info) } db_info->status = MDB_STATUS_UPDATED; } +#endif MDEBUG_PRINT1 (" from %s\n", MTEXT_STR (path)); if (tags[0] == Mchar_table) @@ -551,8 +553,20 @@ load_database (MSymbol *tags, void *extra_info) else { int c; + unsigned char buf[4]; - while ((c = getc (fp)) != EOF && isspace (c)); + if (fgets (buf, 4, fp) == NULL) + c = EOF; + else + { + /* Skip BOM of UTF-8. */ + if ((c = buf[0]) != 0xEF) + fseek (fp, 0L, SEEK_SET); + else if (buf[1] != 0xBB) + fseek (fp, 0L, SEEK_SET); + else if (buf[2] != 0xBF) + fseek (fp, 0L, SEEK_SET); + } if (c == '<') { MDatabaseLoaderXML loader = find_xml_loader (tags); @@ -561,12 +575,10 @@ load_database (MSymbol *tags, void *extra_info) MERROR (MERROR_DB, NULL); fclose (fp); fp = NULL; - value = loader (tags, MTEXT_STR (path), - ! db_info->system_database, NULL); + value = loader (db_info, MTEXT_STR (path)); } else if (c != EOF) { - ungetc (c, fp); value = mplist__from_file (fp, NULL); } else @@ -605,6 +617,7 @@ static int update_dir_info (MDatabaseInfo *dir_info) { struct stat statbuf; + MText *mdb_file; if (! file_readble_p (dir_info->dirname, NULL, &statbuf)) { @@ -616,34 +629,26 @@ update_dir_info (MDatabaseInfo *dir_info) return 1; } - if (file_readble_p (dir_info->dirname, mdb_xml, &statbuf)) - { - if (dir_info->filename == mdb_xml - && dir_info->time >= statbuf.st_mtime) - return 0; - M17N_OBJECT_UNREF (dir_info->filename); - dir_info->filename = mdb_xml; - M17N_OBJECT_REF (mdb_xml); - dir_info->format = Mxml; - dir_info->mtime = statbuf.st_mtime; - dir_info->time = 0; - return 1; - } - if (file_readble_p (dir_info->dirname, mdb_dir, &statbuf)) - { - if (dir_info->filename == mdb_dir - && dir_info->time >= statbuf.st_mtime) - return 0; - M17N_OBJECT_UNREF (dir_info->filename); - dir_info->filename = mdb_dir; - M17N_OBJECT_REF (mdb_dir); - dir_info->format = Mplist; - dir_info->mtime = statbuf.st_mtime; - dir_info->time = 0; - return 1; - } + for (mdb_file = mdb_xml; mdb_file; + mdb_file = (mdb_file == mdb_dir ? NULL : mdb_dir)) + if (file_readble_p (dir_info->dirname, mdb_file, &statbuf)) + { + if (dir_info->filename == mdb_file + && dir_info->time >= statbuf.st_mtime) + return 0; + dir_info->status = MDB_STATUS_OUTDATED; + M17N_OBJECT_UNREF (dir_info->filename); + dir_info->filename = mdb_file; + M17N_OBJECT_REF (mdb_file); + dir_info->format = mdb_file == mdb_xml ? Mxml : Mplist; + dir_info->time = dir_info->mtime = statbuf.st_mtime; + return 1; + } + dir_info->status = MDB_STATUS_INVALID; if (! dir_info->filename) return 0; + /* The directory is readable but doesn't have mdb.xml nor + mdb.dir. */ dir_info->filename = NULL; dir_info->mtime = dir_info->time = 0; return 1; @@ -677,10 +682,10 @@ find_database (MSymbol tags[4]) p = MPLIST_PLIST (MPLIST_NEXT (p)); mdb = MPLIST_VAL (MPLIST_NEXT (p)); db_info = mdb->extra_info; - if (db_info->status != MDB_STATUS_DISABLED) + if (db_info->status == MDB_STATUS_OUTDATED) { register_databases_in_files (mdb->tag, db_info); - db_info->status = MDB_STATUS_DISABLED; + db_info->status = MDB_STATUS_UPDATED; return find_database (tags); } } @@ -782,7 +787,6 @@ register_database (MSymbol tags[4], if (mdb->extra_info) { db_info = mdb->extra_info; - M17N_OBJECT_UNREF (db_info->dirname); M17N_OBJECT_UNREF (db_info->filename); M17N_OBJECT_UNREF (db_info->validater); M17N_OBJECT_UNREF (db_info->properties); @@ -862,10 +866,13 @@ parse_mdb_xml_item (xmlNodePtr node) else if (STR_EQ (cur->name, "format")) format = msymbol (content); else if (STR_EQ (cur->name, "schema")) - schema = msymbol (content); - else if (STR_EQ (cur->name, "schema-filename")) - schema_file = mtext__from_data (content, strlen (content), - MTEXT_FORMAT_UTF_8, 1); + { + attr = xmlHasProp (cur, "type"); + schema = msymbol ((char *) attr->children->content); + content = cur->children->children->content; + schema_file = mtext__from_data (content, strlen (content), + MTEXT_FORMAT_UTF_8, 1); + } } if (format == Mnil) pl = mplist_add (pl, Mtext, filename); @@ -978,7 +985,7 @@ xml_validate (MDatabaseInfo *db_info) xmlDocPtr doc = NULL; MText *path = NULL; char *file; - int result = 0; + int result; if (db_info->schema == Mnil) return 1; @@ -991,62 +998,7 @@ xml_validate (MDatabaseInfo *db_info) | XML_PARSE_XINCLUDE | XML_PARSE_COMPACT)); if (! doc) MERROR (MERROR_DB, 0); - if (db_info->schema == Mdtd) - { - if (db_info->validater) - /* Not yet supported. */ - result = 0; - else - result = 1; - } - else if (! db_info->validater) - result = 0; - else - { - path = get_database_file (NULL, db_info->validater); - if (! path) - result = 0; - else - { - file = MTEXT_STR (path); - if (db_info->schema == Mxml_schema) - { - xmlSchemaParserCtxtPtr context = NULL; - xmlSchemaPtr schema = NULL; - xmlSchemaValidCtxtPtr valid_context = NULL; - - if ((context = xmlSchemaNewParserCtxt (file)) - && (schema = xmlSchemaParse (context)) - && (valid_context = xmlSchemaNewValidCtxt (schema)) - && xmlSchemaValidateDoc (valid_context, doc) == 0) - result = 1; - if (valid_context) - xmlSchemaFreeValidCtxt (valid_context); - if (schema) - xmlSchemaFree (schema); - if (context) - xmlSchemaFreeParserCtxt (context); - } - else if (db_info->schema == Mrelaxng) - { - xmlRelaxNGParserCtxtPtr context = NULL; - xmlRelaxNGPtr schema = NULL; - xmlRelaxNGValidCtxtPtr valid_context = NULL; - - if ((context = xmlRelaxNGNewParserCtxt (file)) - && (schema = xmlRelaxNGParse (context)) - && (valid_context = xmlRelaxNGNewValidCtxt (schema)) - && xmlRelaxNGValidateDoc (valid_context, doc) == 0) - result = 1; - if (valid_context) - xmlRelaxNGFreeValidCtxt (valid_context); - if (schema) - xmlRelaxNGFree (schema); - if (context) - xmlRelaxNGFreeParserCtxt (context); - } - } - } + result = mdatabase__validate (doc, db_info); xmlFreeDoc (doc); return result; } @@ -1248,6 +1200,9 @@ parse_header_sexp (char *filename, MSymbol tags[4], MDatabaseInfo *db_info) M17N_OBJECT_UNREF (plist); MERROR (MERROR_DB, 0); } + if (db_info->filename) + MERROR (MERROR_DB, 0); + db_info->filename = MTEXT_FOR_FILE (filename); M17N_OBJECT_UNREF (plist); return 1; } @@ -1259,7 +1214,7 @@ merge_info (MSymbol tags1[4], MDatabaseInfo *info1, int i; for (i = 0; i < 4; i++) - if (tags1[i] != Masterisk && tags1[i] != tags2[i]) + if (tags1[i] && tags1[i] != Masterisk && tags1[i] != tags2[i]) goto err; if (info2->format == Mnil) info2->format = info1->format; @@ -1274,7 +1229,7 @@ merge_info (MSymbol tags1[4], MDatabaseInfo *info1, info2->filename = info1->filename; M17N_OBJECT_REF (info2->filename); } - if (! info2->validater) + if (! info2->validater && info1->validater) { info2->validater = info1->validater; M17N_OBJECT_REF (info2->validater); @@ -1292,21 +1247,27 @@ merge_info (MSymbol tags1[4], MDatabaseInfo *info1, static void register_databases_in_files (MSymbol tags[4], MDatabaseInfo *db_info) { - glob_t globbuf; - int i; + int check[3]; + glob_t globbuf[3]; + MText *dirname[3]; + int i, j; MPlist *plist; - MText *dirname; + + check[0] = check[1] = check[2] = 0; if (ABSOLUTE_PATH_P (db_info->filename)) { - if (glob (MTEXT_STR (db_info->filename), GLOB_NOSORT, NULL, &globbuf)) - return; - dirname = NULL; + if (glob (MTEXT_STR (db_info->filename), GLOB_NOSORT, NULL, globbuf) == 0) + { + check[0] = 1; + dirname[0] = NULL; + } } else { MDatabaseInfo *dir_info; + j = 2; MPLIST_DO (plist, mdatabase__dir_list) { MText *path; @@ -1315,46 +1276,50 @@ register_databases_in_files (MSymbol tags[4], MDatabaseInfo *db_info) if (dir_info->status == MDB_STATUS_DISABLED) continue; path = GEN_PATH (dir_info->dirname, db_info->filename); - if (glob (MTEXT_STR (path), GLOB_NOSORT, NULL, &globbuf) == 0) - break; + if (glob (MTEXT_STR (path), GLOB_NOSORT, NULL, globbuf + j) == 0) + { + check[j] = 1; + dirname[j] = dir_info->dirname; + } + j--; } - if (MPLIST_TAIL_P (plist)) - return; - dirname = dir_info->dirname; } - for (i = 0; i < globbuf.gl_pathc; i++) - { - MDatabaseInfo this; - MSymbol tags2[4]; - - memset (&this, 0, sizeof (MDatabaseInfo)); - if (dirname) - { - this.dirname = dirname; - M17N_OBJECT_REF (dirname); - this.filename = MTEXT_FOR_FILE (globbuf.gl_pathv[i] - + mtext_nbytes (dirname)); - } - else - this.filename = MTEXT_FOR_FILE (globbuf.gl_pathv[i]); - if (db_info->format == Mxml) - { - xmlTextReaderPtr reader - = xmlReaderForFile (globbuf.gl_pathv[i], "utf-8", - XML_PARSE_NOBLANKS - | XML_PARSE_NOENT - | XML_PARSE_XINCLUDE); - if (reader - && parse_header_xml (reader, tags2, &this) - && merge_info (tags, db_info, tags2, &this)) - register_database (tags2, load_database, &this, MDB_TYPE_AUTO); - } - else if (parse_header_sexp (globbuf.gl_pathv[i], tags2, &this) - && merge_info (tags, db_info, tags2, &this)) - register_database (tags2, load_database, &this, MDB_TYPE_AUTO); - } - globfree (&globbuf); + for (j = 0; j < 3; j++) + if (check[j]) + { + for (i = 0; i < globbuf[j].gl_pathc; i++) + { + MDatabaseInfo this; + MSymbol tags2[4]; + + memset (&this, 0, sizeof (MDatabaseInfo)); + if (dirname[j]) + this.filename = MTEXT_FOR_FILE (globbuf[j].gl_pathv[i] + + mtext_nbytes (dirname[j])); + else + this.filename = MTEXT_FOR_FILE (globbuf[j].gl_pathv[i]); + if (db_info->format == Mxml) + { + xmlTextReaderPtr reader + = xmlReaderForFile (globbuf[j].gl_pathv[i], "utf-8", + XML_PARSE_NOBLANKS + | XML_PARSE_NOENT + | XML_PARSE_XINCLUDE); + if (reader + && parse_header_xml (reader, tags2, &this) + && merge_info (tags, db_info, tags2, &this)) + register_database (tags2, load_database, &this, + MDB_TYPE_AUTO); + } + else if (parse_header_sexp (globbuf[j].gl_pathv[i], tags2, &this) + && merge_info (tags, db_info, tags2, &this)) + register_database (tags2, load_database, &this, MDB_TYPE_AUTO); + M17N_OBJECT_UNREF (this.filename); + M17N_OBJECT_UNREF (this.validater); + } + globfree (globbuf + j); + } } static int @@ -1372,10 +1337,10 @@ expand_wildcard_database (MPlist *plist) mdb = MPLIST_VAL (plist); if (mdb->loader == load_database && (db_info = mdb->extra_info) - && db_info->status != MDB_STATUS_DISABLED) + && db_info->status != MDB_STATUS_UPDATED) { register_databases_in_files (mdb->tag, db_info); - db_info->status = MDB_STATUS_DISABLED; + db_info->status = MDB_STATUS_UPDATED; return 1; } return 0; @@ -1503,6 +1468,7 @@ mdatabase__update (void) { MPlist *plist, *p0, *p1, *p2, *p3; int rescan = 0; + int mdebug_flag = MDEBUG_DATABASE; /* Update elements of mdatabase__dir_list. */ MPLIST_DO (plist, mdatabase__dir_list) @@ -1554,14 +1520,19 @@ mdatabase__update (void) MText *path; if (dir_info->status == MDB_STATUS_DISABLED - || ! dir_info->filename) + || dir_info->status == MDB_STATUS_INVALID) continue; path = GEN_PATH (dir_info->dirname, dir_info->filename); + MDEBUG_PRINT1 (" [DB] Parsing <%s>", MTEXT_STR (path)); p0 = (dir_info->format == Mxml ? parse_mdb_xml (MTEXT_STR (path), ! dir_info->system_database) : parse_mdb_dir (MTEXT_STR (path))); if (! p0) - continue; + { + MDEBUG_PRINT (" fail\n"); + continue; + } + MDEBUG_PRINT ("\n"); MPLIST_DO (p1, p0) { MSymbol tags[4]; @@ -1616,8 +1587,7 @@ mdatabase__load_for_keys (MDatabase *mdb, MPlist *keys) if (! loader) MERROR (MERROR_DB, NULL); - plist = loader (mdb->tag, MTEXT_STR (path), - ! db_info->system_database, keys); + plist = loader (db_info, MTEXT_STR (path)); } else if (c != EOF) plist = mplist__from_file (fp, keys); @@ -1667,10 +1637,7 @@ mdatabase__find_file (char *filename) if (! path) filename = NULL; else - { - filename = strdup (MTEXT_STR (path)); - M17N_OBJECT_UNREF (path); - } + filename = strdup (MTEXT_STR (path)); M17N_OBJECT_UNREF (file); return filename; } @@ -1861,11 +1828,77 @@ mdatabase__register_xml_loader (MSymbol tags[4], MDatabaseLoaderXML loader) plist = MPLIST_NEXT (pl); } } - MPLIST_KEY (plist) = Mt; + mplist_add (plist, Mt, NULL); MPLIST_FUNC (plist) = (M17NFunc) loader; MPLIST_SET_VAL_FUNC_P (plist); } +int +mdatabase__validate (xmlDocPtr doc, MDatabaseInfo *db_info) +{ + int result = 0; + MText *path = NULL; + char *file; + + if (db_info->schema == Mdtd) + { + if (db_info->validater) + /* Not yet supported. */ + result = 0; + else + result = 1; + } + else if (db_info->validater) + { + path = get_database_file (NULL, db_info->validater); + if (! path) + result = 0; + else + { + file = MTEXT_STR (path); + if (db_info->schema == Mxml_schema) + { + xmlSchemaParserCtxtPtr context = NULL; + xmlSchemaPtr schema = NULL; + xmlSchemaValidCtxtPtr valid_context = NULL; + + if ((context = xmlSchemaNewParserCtxt (file)) + && (schema = xmlSchemaParse (context)) + && (valid_context = xmlSchemaNewValidCtxt (schema)) + && xmlSchemaValidateDoc (valid_context, doc) == 0) + result = 1; + if (valid_context) + xmlSchemaFreeValidCtxt (valid_context); + if (schema) + xmlSchemaFree (schema); + if (context) + xmlSchemaFreeParserCtxt (context); + } + else if (db_info->schema == Mrelaxng) + { + xmlRelaxNGParserCtxtPtr context = NULL; + xmlRelaxNGPtr schema = NULL; + xmlRelaxNGValidCtxtPtr valid_context = NULL; + + if ((context = xmlRelaxNGNewParserCtxt (file)) + && (schema = xmlRelaxNGParse (context)) + && (valid_context = xmlRelaxNGNewValidCtxt (schema)) + && xmlRelaxNGValidateDoc (valid_context, doc) == 0) + result = 1; + if (valid_context) + xmlRelaxNGFreeValidCtxt (valid_context); + if (schema) + xmlRelaxNGFree (schema); + if (context) + xmlRelaxNGFreeParserCtxt (context); + if (! result) + MERROR (MERROR_DB, result); + } + } + } + return result; +} + /*** @} */ #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */ @@ -1969,11 +2002,12 @@ mdatabase_find (MSymbol tag0, MSymbol tag1, MSymbol tag2, MSymbol tag3) MPlist * mdatabase_list (MSymbol tag0, MSymbol tag1, MSymbol tag2, MSymbol tag3) { - MPlist *plist = mplist (), *pl = plist; - MPlist *p, *p0, *p1, *p2, *p3; + MPlist *plist, *pl, *p, *p0, *p1, *p2, *p3; mdatabase__update (); + retry: + plist = mplist (), pl = plist; MPLIST_DO (p, mdatabase__list) { p0 = MPLIST_PLIST (p); @@ -1989,7 +2023,7 @@ mdatabase_list (MSymbol tag0, MSymbol tag1, MSymbol tag2, MSymbol tag3) if (expand_wildcard_database (p1)) { M17N_OBJECT_UNREF (plist); - return mdatabase_list (tag0, tag1, tag2, tag3); + goto retry; } continue; } @@ -2003,7 +2037,7 @@ mdatabase_list (MSymbol tag0, MSymbol tag1, MSymbol tag2, MSymbol tag3) if (expand_wildcard_database (p2)) { M17N_OBJECT_UNREF (plist); - return mdatabase_list (tag0, tag1, tag2, tag3); + goto retry; } continue; } @@ -2017,7 +2051,7 @@ mdatabase_list (MSymbol tag0, MSymbol tag1, MSymbol tag2, MSymbol tag3) if (expand_wildcard_database (p3)) { M17N_OBJECT_UNREF (plist); - return mdatabase_list (tag0, tag1, tag2, tag3); + goto retry; } continue; } diff --git a/src/database.h b/src/database.h index 55eb5d5..2589041 100644 --- a/src/database.h +++ b/src/database.h @@ -100,8 +100,7 @@ typedef struct MPlist *properties; } MDatabaseInfo; -typedef MPlist *(*MDatabaseLoaderXML) (MSymbol *, char *filename, int validate, - MPlist *keys); +typedef MPlist *(*MDatabaseLoaderXML) (MDatabaseInfo *, char *filename); extern MPlist *mdatabase__dir_list; @@ -125,4 +124,7 @@ extern MPlist *mdatabase__props (MDatabase *mdb); extern void *(*mdatabase__load_charset_func) (FILE *fp, MSymbol charset_name); +#include +extern int mdatabase__validate (xmlDocPtr doc, MDatabaseInfo *db_info); + #endif /* not _M17N_DATABASE_H_ */ diff --git a/src/input-xml.c b/src/input-xml.c new file mode 100644 index 0000000..201caa1 --- /dev/null +++ b/src/input-xml.c @@ -0,0 +1,1240 @@ +/* input-xml.c -- XML decoder for input method module. + Copyright (C) 2009 + National Institute of Advanced Industrial Science and Technology (AIST) + Registration Number H15PRO112 + + This file is part of the m17n library. + + The m17n library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License + as published by the Free Software Foundation; either version 2.1 of + the License, or (at your option) any later version. + + The m17n library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the m17n library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + 02111-1307, USA. */ + +#include +#include +#include +#include +#include +#include +#include + +#include "config.h" +#include "m17n.h" +#include "m17n-misc.h" +#include "internal.h" +#include "database.h" + +static void decode_saction (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent); + +/* Sometimes isspace () returns non-zero values for chars between 255 + and EOF. */ +static int +Isspace (int ch) +{ + if (ch == 32 || ch == 8 || ch == 10 || ch == 13) + return 1; + else + return 0; +} + +static MSymbol +decode_predefined (xmlChar *ptr) +{ + char str[3]; + + str[0] = '@'; + str[2] = '\0'; + if (xmlStrEqual (ptr, (xmlChar *) "first")) + str [1] = '<'; + else if (xmlStrEqual (ptr, (xmlChar *) "current")) + str [1] = '='; + else if (xmlStrEqual (ptr, (xmlChar *) "last")) + str [1] = '>'; + else if (xmlStrEqual (ptr, (xmlChar *) "previous")) + str [1] = '-'; + else if (xmlStrEqual (ptr, (xmlChar *) "next")) + str [1] = '+'; + else if (xmlStrEqual (ptr, (xmlChar *) "previous_candidate_list")) + str [1] = '['; + else if (xmlStrEqual (ptr, (xmlChar *) "next_candidate_list")) + str [1] = ']'; + else + str [1] = *ptr; + return msymbol (str); +} + +static void +decode_marker (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + xmlChar *ptr = xmlGetProp (cur, (xmlChar *) "position"); + + if (ptr) + mplist_add (parent, Msymbol, decode_predefined (ptr + 1)); + else + { + ptr = xmlGetProp (cur, (xmlChar *) "markerID"); + mplist_add (parent, Msymbol, msymbol ((char *) ptr)); + } + xmlFree (ptr); +} + +static void +decode_variable_reference (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + /* + xmlChar *id = xmlGetProp (cur, (xmlChar *) "id"); + xmlChar *type = xmlGetProp (cur, (xmlChar *) "type"); + + if (type) + { + xmlFree (type); + if (xmlStrEqual (id, (xmlChar *) "handled-keys")) + mplist_add (parent, Msymbol, msymbol ("@@")); + else if (xmlStrEqual (id, (xmlChar *) "predefined-surround-text-flag")) + mplist_add (parent, Msymbol, msymbol ("@-0")); + else + mplist_add (parent, Msymbol, msymbol ((char *) id)); + } + else + mplist_add (parent, Msymbol, msymbol ((char *) id)); + + xmlFree (id); + */ + + xmlChar *ptr = xmlGetProp (cur, (xmlChar *) "id"); + + if (xmlStrEqual (ptr, (xmlChar *) "handled-keys")) + mplist_add (parent, Msymbol, msymbol ("@@")); + else if (xmlStrEqual (ptr, (xmlChar *) "predefined-surround-text-flag")) + mplist_add (parent, Msymbol, msymbol ("@-0")); + else + mplist_add (parent, Msymbol, msymbol ((char *) ptr)); + xmlFree (ptr); +} + +static void +decode_integer (xmlChar *ptr, MPlist *parent) +{ + while (Isspace (*ptr)) + ptr++; + + if (xmlStrlen (ptr) >= 3 + && (ptr[0] == '0' || ptr[0] == '#') + && ptr[1] == 'x') + { + int val; + + sscanf ((char *) ptr + 2, "%x", &val); + mplist_add (parent, Minteger, (void *) val); + } + else if (ptr[0] == '?') + { + int val, len = 4; + + val = xmlGetUTF8Char (ptr + 1, &len); + mplist_add (parent, Minteger, (void *) val); + } + else + mplist_add (parent, Minteger, (void *) atoi ((char *) ptr)); +} + +static void +decode_keyseq (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + xmlChar *ptr; + + if (xmlStrEqual (cur->name, (xmlChar *) "command-reference")) + { + ptr = xmlGetProp (cur, (xmlChar *) "id"); + /* +8 to skip "command-" */ + mplist_add (parent, Msymbol, msymbol ((char *) ptr + 8)); + xmlFree (ptr); + } + + else if ((ptr = xmlGetProp (cur, (xmlChar *) "keys"))) + mplist_add (parent, Mtext, + mtext_from_data (ptr, xmlStrlen (ptr), MTEXT_FORMAT_UTF_8)); + + else + { + MPlist *plist = mplist (); + + for (cur = cur->xmlChildrenNode; cur; cur = cur->next) + { + ptr = xmlNodeListGetString (doc, cur->xmlChildrenNode, 1); + if (xmlStrEqual (cur->name, (xmlChar *) "key-event")) + /* + { + char *p; + + sscanf ((char *) ptr, " %ms ", &p); + mplist_add (plist, Msymbol, msymbol (p)); + printf ("(%s)", p); + free (p); + } + */ + mplist_add (plist, Msymbol, msymbol ((char *) ptr)); + else /* character-code */ + decode_integer (ptr, plist); + xmlFree (ptr); + } + mplist_add (parent, Mplist, plist); + } +} + +static void +decode_plist (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + /* to be written */ +} + +static void +decode_expr (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + if (xmlStrEqual (cur->name, (xmlChar *) "expr")) + { + xmlChar *ptr = xmlGetProp (cur, (xmlChar *) "operator"); + MPlist *plist = mplist (); + + mplist_add (plist, Msymbol, msymbol ((char *) ptr)); + xmlFree (ptr); + + for (cur = cur->xmlChildrenNode; cur; cur = cur->next) + decode_expr (doc, cur, plist); + + mplist_add (parent, Mplist, plist); + } + else if (xmlStrEqual (cur->name, (xmlChar *) "int-val")) + { + xmlChar *ptr = xmlNodeListGetString (doc, cur->xmlChildrenNode, 1); + + decode_integer (ptr, parent); + xmlFree (ptr); + } + else if (xmlStrEqual (cur->name, (xmlChar *) "predefined-nth-previous-or-following-character")) + { + xmlChar *ptr = xmlGetProp (cur, (xmlChar *) "position"); + char str[8]; + + snprintf (str, 8, "@%+d", atoi ((char *) ptr)); + xmlFree (ptr); + mplist_add (parent, Msymbol, msymbol (str)); + } + else /* variable-reference */ + decode_variable_reference (doc, cur, parent); +} + +static void +decode_insert (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + xmlChar *ptr; + MPlist *plist = mplist (); + + mplist_add (plist, Msymbol, msymbol ("insert")); + + if ((ptr = xmlGetProp (cur, (xmlChar *) "string"))) + mplist_add (plist, Mtext, + mtext_from_data (ptr, xmlStrlen (ptr), MTEXT_FORMAT_UTF_8)); + + else if ((ptr = xmlGetProp (cur, (xmlChar *) "character"))) + { + decode_integer (ptr, plist); + xmlFree (ptr); + } + + else if ((ptr = xmlGetProp (cur, (xmlChar *) "character-or-string"))) + { + xmlFree (ptr); + ptr = xmlGetProp (cur->xmlChildrenNode, (xmlChar *) "id"); + mplist_add (plist, Msymbol, msymbol ((char *) ptr)); + xmlFree (ptr); + } + + else if ((cur = cur->xmlChildrenNode) + && xmlStrEqual (cur->name, (xmlChar *) "candidates")) + { + MPlist *pl = mplist (); + + mplist_add (plist, Mplist, pl); + M17N_OBJECT_UNREF (pl); + while (cur) + { + ptr = xmlNodeListGetString (doc, cur->xmlChildrenNode, 1); + pl = mplist_add (pl, Mtext, + mtext_from_data (ptr, xmlStrlen (ptr), + MTEXT_FORMAT_UTF_8)); + cur = cur->next; + } + } + + else /* list-of-candidates */ + while (cur) + { + xmlChar *start; + MPlist *plist0 = mplist (), *plist1 = mplist (); + int ch, len = 4, skipping = 1; + + ptr = xmlNodeListGetString (doc, cur->xmlChildrenNode, 1); + + while ((ch = xmlGetUTF8Char (ptr, &len))) + { + if (skipping && ! Isspace (ch)) + { + start = ptr; + skipping = 0; + } + else if (! skipping && Isspace (ch)) + { + *ptr = '\0'; + mplist_add (plist0, Mtext, + mtext_from_data (start, xmlStrlen (start), + MTEXT_FORMAT_UTF_8)); + skipping = 1; + } + ptr += len; + len = 4; + } + if (!skipping) + { + mplist_add (plist0, Mtext, + mtext_from_data (start, xmlStrlen (start), + MTEXT_FORMAT_UTF_8)); + } + + mplist_add (plist1, Mplist, plist0); + mplist_add (plist, Mplist, plist1); + cur = cur->next; + } + + mplist_add (parent, Mplist, plist); +} + +static void +decode_delete (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + xmlChar *ptr; + MPlist *plist = mplist (); + + mplist_add (plist, Msymbol, msymbol ("delete")); + + if (xmlStrEqual (cur->name, (xmlChar *) "delete-to-marker")) + decode_marker (doc, cur, plist); + else if (xmlStrEqual (cur->name, + (xmlChar *) "delete-to-character-position")) + { + ptr = xmlNodeListGetString (doc, cur->xmlChildrenNode, 1); + decode_integer (ptr, plist); + xmlFree (ptr); + } + else /* delete-n-characters */ + { + char str[8]; + + ptr = xmlGetProp (cur, (xmlChar *) "n"); + snprintf (str, 8, "@%+d", atoi ((char *) ptr)); + xmlFree (ptr); + mplist_add (plist, Msymbol, msymbol (str)); + } + + mplist_add (parent, Mplist, plist); +} + +static void +decode_select (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + xmlChar *ptr; + MPlist *plist = mplist (); + + mplist_add (plist, Msymbol, msymbol ("select")); + + if ((ptr = xmlGetProp (cur, (xmlChar *) "selector"))) + mplist_add (plist, Msymbol, decode_predefined (ptr + 1)); + else + { + ptr = xmlGetProp (cur, (xmlChar *) "index"); + if (xmlStrEqual (ptr, (xmlChar *) "variable")) + { + xmlFree (ptr); + ptr = xmlGetProp (cur->xmlChildrenNode, (xmlChar *) "id"); + mplist_add (plist, Msymbol, msymbol ((char *) ptr)); + } + else + decode_integer (ptr, plist); + } + xmlFree (ptr); + + mplist_add (parent, Mplist, plist); +} + +static void +decode_move (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + MPlist *plist = mplist (); + + mplist_add (plist, Msymbol, msymbol ("move")); + + if (xmlStrEqual (cur->name, (xmlChar *) "move-to-marker")) + decode_marker (doc, cur, plist); + + else /* move-to-character-position */ + { + xmlChar *ptr = xmlNodeListGetString (doc, cur->xmlChildrenNode, 1); + + decode_integer (ptr, plist); + xmlFree (ptr); + } + + mplist_add (parent, Mplist, plist); +} + +static void +decode_mark (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + MPlist *plist = mplist (); + + mplist_add (plist, Msymbol, msymbol ("mark")); + + decode_marker (doc, cur, plist); + + mplist_add (parent, Mplist, plist); +} + +static void +decode_pushback (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + MPlist *plist = mplist (); + + mplist_add (plist, Msymbol, msymbol ("pushback")); + + if (xmlStrEqual (cur->name, (xmlChar *) "pushback-n-events")) + { + xmlChar *ptr = xmlGetProp (cur, (xmlChar *) "n"); + + decode_integer (ptr, plist); + xmlFree (ptr); + } + + else /* pushback-keyseq */ + decode_keyseq (doc, cur->xmlChildrenNode, plist); + + mplist_add (parent, Mplist, plist); +} + +static void +decode_undo (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + xmlChar *ptr; + MPlist *plist = mplist (); + + mplist_add (plist, Msymbol, msymbol ("undo")); + + if ((ptr = xmlGetProp (cur, (xmlChar *) "target-of-undo"))) + { + decode_integer (ptr, plist); + xmlFree (ptr); + } + else if (cur->xmlChildrenNode) + decode_variable_reference (doc, cur->xmlChildrenNode, plist); + + mplist_add (parent, Mplist, plist); +} + +static void +decode_call (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + xmlChar *ptr; + MPlist *plist = mplist (); + + mplist_add (plist, Msymbol, msymbol ("call")); + + ptr = xmlGetProp (cur, (xmlChar *) "id"); + /* +7 to skip "module-" */ + mplist_add (plist, Msymbol, msymbol ((char *) ptr + 7)); + xmlFree (ptr); + + cur = cur->xmlChildrenNode; + ptr = xmlGetProp (cur, (xmlChar *) "id"); + /* +9 to skip "function-" */ + mplist_add (plist, Msymbol, msymbol ((char *) ptr + 9)); + xmlFree (ptr); + + for (cur = cur->next; cur; cur = cur->next) + { + ptr = xmlGetProp (cur, (xmlChar *) "type"); + if (xmlStrEqual (ptr, (xmlChar *) "string")) + { + xmlFree (ptr); + ptr = xmlNodeListGetString (doc, cur->xmlChildrenNode, 1); + mplist_add (plist, Mtext, mtext_from_data (ptr, xmlStrlen (ptr), + MTEXT_FORMAT_UTF_8)); + } + else if (xmlStrEqual (ptr, (xmlChar *) "integer")) + { + xmlFree (ptr); + ptr = xmlNodeListGetString (doc, cur->xmlChildrenNode, 1); + decode_integer (ptr, plist); + xmlFree (ptr); + } + else if (xmlStrEqual (ptr, (xmlChar *) "plist")) + { + xmlFree (ptr); + decode_plist (doc, cur->xmlChildrenNode, plist); + } + else /* symbol */ + { + xmlFree (ptr); + decode_variable_reference (doc, cur->xmlChildrenNode, plist); + } + } + mplist_add (parent, Mplist, plist); +} + +static void +decode_set (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + xmlChar *ptr; + MPlist *plist = mplist (); + + mplist_add (plist, Msymbol, msymbol ((char *) cur->name)); + + ptr = xmlGetProp (cur, (xmlChar *) "id"); + mplist_add (plist, Msymbol, msymbol ((char *) ptr)); + xmlFree (ptr); + + decode_expr (doc, cur->xmlChildrenNode, plist); + + mplist_add (parent, Mplist, plist); +} + +static void +decode_if (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + xmlChar *ptr; + xmlNodePtr cur0; + MPlist *plist = mplist (), *plist0 = mplist (); + + ptr = xmlGetProp (cur, (xmlChar *) "condition"); + mplist_add (plist, Msymbol, msymbol ((char *) ptr)); + xmlFree (ptr); + + cur = cur->xmlChildrenNode; /* 1st arg */ + decode_expr (doc, cur, plist); + + cur = cur->next; /* 2nd arg */ + decode_expr (doc, cur, plist); + + cur = cur->next; /* then */ + plist0 = mplist (); + for (cur0 = cur->xmlChildrenNode; cur0; cur0 = cur0->next) + decode_saction (doc, cur0, plist0); + mplist_add (plist, Mplist, plist0); + + cur = cur->next; /* else */ + if (cur) + { + plist0 = mplist (); + for (cur0 = cur->xmlChildrenNode; cur0; cur0 = cur0->next) + decode_saction (doc, cur0, plist0); + mplist_add (plist, Mplist, plist0); + } + + mplist_add (parent, Mplist, plist); +} + +static void +decode_conditional (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + MPlist *plist = mplist (); + + mplist_add (plist, Msymbol, msymbol ("cond")); + + for (cur = cur->xmlChildrenNode; cur; cur = cur->next) + { + xmlNodePtr cur0 = cur->xmlChildrenNode; + MPlist *plist0 = mplist (); + + decode_expr (doc, cur0, plist0); + + while ((cur0 = cur0->next)) + decode_saction (doc, cur0, plist0); + + mplist_add (plist, Mplist, plist0); + } + + mplist_add (parent, Mplist, plist); +} + +static void +decode_action (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + MPlist *plist = mplist (); + + if (xmlStrEqual (cur->name, (xmlChar *) "insert")) + decode_insert (doc, cur, parent); + + else if (xmlStrEqual (cur->name, (xmlChar *) "delete-to-marker") + || xmlStrEqual (cur->name, + (xmlChar *) "delete-to-character-position") + || xmlStrEqual (cur->name, (xmlChar *) "delete-n-characters")) + decode_delete (doc, cur, parent); + + else if (xmlStrEqual (cur->name, (xmlChar *) "select")) + decode_select (doc, cur, parent); + + else if (xmlStrEqual (cur->name, (xmlChar *) "show-candidates")) + { + mplist_add (plist, Msymbol, msymbol ("show")); + mplist_add (parent, Mplist, plist); + } + + else if (xmlStrEqual (cur->name, (xmlChar *) "hide-candidates")) + { + mplist_add (plist, Msymbol, msymbol ("hide")); + mplist_add (parent, Mplist, plist); + } + + else if (xmlStrEqual (cur->name, (xmlChar *) "move-to-marker") + || xmlStrEqual (cur->name, (xmlChar *) "move-to-character-position")) + decode_move (doc, cur, parent); + + else if (xmlStrEqual (cur->name, (xmlChar *) "mark-current-position")) + decode_mark (doc, cur, parent); + + else if (xmlStrEqual (cur->name, (xmlChar *) "pushback-n-events") + || xmlStrEqual (cur->name, (xmlChar *) "pushback-keyseq")) + decode_pushback (doc, cur, parent); + + else if (xmlStrEqual (cur->name, (xmlChar *) "pop")) + { + mplist_add (plist, Msymbol, msymbol ((char *) cur->name)); + mplist_add (parent, Mplist, plist); + } + + else if (xmlStrEqual (cur->name, (xmlChar *) "undo")) + decode_undo (doc, cur, parent); + + else if (xmlStrEqual (cur->name, (xmlChar *) "commit")) + { + mplist_add (plist, Msymbol, msymbol ((char *) cur->name)); + mplist_add (parent, Mplist, plist); + } + + else if (xmlStrEqual (cur->name, (xmlChar *) "unhandle")) + { + mplist_add (plist, Msymbol, msymbol ((char *) cur->name)); + mplist_add (parent, Mplist, plist); + } + + else if (xmlStrEqual (cur->name, (xmlChar *) "call")) + decode_call (doc, cur, parent); + + else if (xmlStrEqual (cur->name, (xmlChar *) "set") + || xmlStrEqual (cur->name, (xmlChar *) "add") + || xmlStrEqual (cur->name, (xmlChar *) "sub") + || xmlStrEqual (cur->name, (xmlChar *) "mul") + || xmlStrEqual (cur->name, (xmlChar *) "div")) + decode_set (doc, cur, parent); + + else if (xmlStrEqual (cur->name, (xmlChar *) "if")) + decode_if (doc, cur, parent); + + else if (xmlStrEqual (cur->name, (xmlChar *) "conditional")) + decode_conditional (doc, cur, parent); + + else if (xmlStrEqual (cur->name, (xmlChar *) "macro-reference")) + { + xmlChar *ptr = xmlGetProp (cur, (xmlChar *) "id"); + + /* +6 to skip "macro-" */ + mplist_add (plist, Msymbol, msymbol ((char *) ptr + 6)); + mplist_add (parent, Mplist, plist); + xmlFree (ptr); + } + + else + { + /* never comes here */ + } +} + +static void +decode_saction (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + if (xmlStrEqual (cur->name, (xmlChar *) "shift-to")) + { + xmlChar *ptr = xmlGetProp (cur, (xmlChar *) "id"); + MPlist *plist = mplist (); + + /* +6 to skip "state-" */ + mplist_add (plist, Msymbol, msymbol ("shift")); + mplist_add (plist, Msymbol, msymbol ((char *) ptr + 6)); + mplist_add (parent, Mplist, plist); + xmlFree (ptr); + } + else + decode_action (doc, cur, parent); +} + +static void +decode_description (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + xmlChar *ptr; + + if (xmlStrEqual (cur->name, (xmlChar *) "get-text")) + { + MPlist *plist = mplist (); + + ptr = xmlNodeListGetString (doc, cur->xmlChildrenNode, 1); + mplist_add (plist, Msymbol, msymbol ("_")); + mplist_add (plist, Mtext, + mtext_from_data (ptr, xmlStrlen (ptr), MTEXT_FORMAT_UTF_8)); + mplist_add (parent, Mplist, plist); + } + else + { + ptr = xmlNodeListGetString (doc, cur, 1); + mplist_add (parent, Mtext, + mtext_from_data (ptr, xmlStrlen (ptr), MTEXT_FORMAT_UTF_8)); + } +} + +/***/ + +static void +decode_im_declaration (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + xmlChar *ptr; + MPlist *plist = mplist (); + + /* tags */ + mplist_add (plist, Msymbol, msymbol ("input-method")); + + /* language */ + cur = cur->xmlChildrenNode; + ptr = xmlNodeListGetString (doc, cur->xmlChildrenNode, 1); + mplist_add (plist, Msymbol, msymbol ((char *) ptr)); + xmlFree (ptr); + + /* name */ + cur = cur->next; + ptr = xmlNodeListGetString (doc, cur->xmlChildrenNode, 1); + mplist_add (plist, Msymbol, msymbol ((char *) ptr)); + xmlFree (ptr); + cur = cur->next; + + /* extra-id */ + if (cur && xmlStrEqual (cur->name, (xmlChar *) "extra-id")) + { + ptr = xmlNodeListGetString (doc, cur->xmlChildrenNode, 1); + mplist_add (plist, Msymbol, msymbol ((char *) ptr)); + xmlFree (ptr); + cur = cur->next; + } + + /* m17n-version */ + if (cur && xmlStrEqual (cur->name, (xmlChar *) "m17n-version")) + { + MPlist *plist0 = mplist (); + + ptr = xmlNodeListGetString (doc, cur->xmlChildrenNode, 1); + mplist_add (plist0, Msymbol, msymbol ("version")); + mplist_add (plist0, Mtext, + mtext_from_data (ptr, xmlStrlen (ptr), MTEXT_FORMAT_UTF_8)); + mplist_add (plist, Mplist, plist0); + } + + mplist_add (parent, Mplist, plist); +} + +static void +decode_im_description (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + MPlist *plist = mplist (); + + mplist_add (plist, Msymbol, msymbol ("description")); + decode_description (doc, cur->xmlChildrenNode, plist); + mplist_add (parent, Mplist, plist); +} + +static void +decode_title (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + xmlChar *ptr = xmlNodeListGetString (doc, cur->xmlChildrenNode, 1); + MPlist *plist = mplist (); + + mplist_add (plist, Msymbol, msymbol ("title")); + mplist_add (plist, Mtext, + mtext_from_data (ptr, xmlStrlen (ptr), MTEXT_FORMAT_UTF_8)); + mplist_add (parent, Mplist, plist); +} + +static void +decode_variable (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + xmlChar *ptr; + MPlist *plist = mplist (); + + ptr = xmlGetProp (cur, (xmlChar *) "id"); + mplist_add (plist, Msymbol, msymbol ((char *) ptr)); + xmlFree (ptr); + + if ((cur = cur->xmlChildrenNode)) + { + /* description */ + if (xmlStrEqual (cur->name, (xmlChar *) "description")) + { + decode_description (doc, cur->xmlChildrenNode, plist); + cur = cur->next; + } + else + mplist_add (plist, Msymbol, Mnil); + + /* value */ + if (cur && xmlStrEqual (cur->name, (xmlChar *) "value")) + { + xmlChar *valuetype; + + ptr = xmlNodeListGetString (doc, cur->xmlChildrenNode, 1); + valuetype = xmlGetProp (cur, (xmlChar *) "type"); + if (xmlStrEqual (valuetype, (xmlChar *) "string")) + mplist_add (plist, Mtext, + mtext_from_data (ptr, xmlStrlen (ptr), + MTEXT_FORMAT_UTF_8)); + else if (xmlStrEqual (valuetype, (xmlChar *) "symbol")) + { + mplist_add (plist, Msymbol, msymbol ((char *) ptr)); + xmlFree (ptr); + } + else /* integer */ + { + decode_integer (ptr, plist); + xmlFree (ptr); + } + xmlFree (valuetype); + cur = cur->next; + } + + /* variable-value-candidate */ + if (cur) + for (cur = cur->xmlChildrenNode; cur; cur = cur->next) + { + if (xmlStrEqual (cur->name, (xmlChar *) "c-value")) + { + xmlChar *valuetype; + + ptr = xmlNodeListGetString (doc, cur->xmlChildrenNode, 1); + valuetype = xmlGetProp (cur, (xmlChar *) "type"); + if (xmlStrEqual (valuetype, (xmlChar *) "string")) + mplist_add (plist, Mtext, + mtext_from_data (ptr, xmlStrlen (ptr), + MTEXT_FORMAT_UTF_8)); + else if (xmlStrEqual (valuetype, (xmlChar *) "symbol")) + { + mplist_add (plist, Msymbol, msymbol ((char *) ptr)); + xmlFree (ptr); + } + else /* integer */ + { + decode_integer (ptr, plist); + xmlFree (ptr); + } + xmlFree (valuetype); + } + else /* c-range */ + { + MPlist *range = mplist (); + + ptr = xmlGetProp (cur, (xmlChar *) "from"); + decode_integer (ptr, range); + xmlFree (ptr); + ptr = xmlGetProp (cur, (xmlChar *) "to"); + decode_integer (ptr, range); + xmlFree (ptr); + mplist_add (plist, Mplist, range); + } + } + } + + mplist_add (parent, Mplist, plist); +} + +static void +decode_variable_list (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + MPlist *plist = mplist (); + + mplist_add (plist, Msymbol, msymbol ("variable")); + + for (cur = cur->xmlChildrenNode; cur; cur = cur->next) + decode_variable (doc, cur, plist); + + mplist_add (parent, Mplist, plist); +} + +void +decode_command (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + xmlChar *ptr; + MPlist *plist = mplist (); + + ptr = xmlGetProp (cur, (xmlChar *) "id"); + /* +8 to skip "command-" */ + mplist_add (plist, Msymbol, msymbol ((char *) ptr + 8)); + xmlFree (ptr); + + if ((cur = cur->xmlChildrenNode)) + { + /* description */ + if (xmlStrEqual (cur->name, (xmlChar *) "description")) + { + decode_description (doc, cur->xmlChildrenNode, plist); + cur = cur->next; + } + else + mplist_add (plist, Msymbol, Mnil); + + /* keyseq */ + for (; cur; cur = cur->next) + decode_keyseq (doc, cur, plist); + } + + mplist_add (parent, Mplist, plist); +} + +static void +decode_command_list (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + MPlist *plist = mplist (); + + mplist_add (plist, Msymbol, msymbol ("command")); + + for (cur = cur->xmlChildrenNode; cur; cur = cur->next) + decode_command (doc, cur, plist); + + mplist_add (parent, Mplist, plist); +} + +static void +decode_module (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + xmlChar *ptr; + MPlist *plist = mplist (); + + ptr = xmlGetProp (cur, (xmlChar *) "id"); + /* +7 to skip "module-" */ + mplist_add (plist, Msymbol, msymbol ((char *) ptr + 7)); + xmlFree (ptr); + + for (cur = cur->xmlChildrenNode; cur; cur = cur->next) + { + ptr = xmlGetProp (cur, (xmlChar *) "id"); + /* +9 to skip "function" */ + mplist_add (plist, Msymbol, msymbol ((char *) ptr + 9)); + xmlFree (ptr); + } + + mplist_add (parent, Mplist, plist); +} + +static void +decode_module_list (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + MPlist *plist = mplist (); + + mplist_add (plist, Msymbol, msymbol ("module")); + + for (cur = cur->xmlChildrenNode; cur; cur = cur->next) + decode_module (doc, cur, plist); + + mplist_add (parent, Mplist, plist); +} + +static void +decode_macro (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + xmlChar *ptr; + MPlist *plist = mplist (); + + ptr = xmlGetProp (cur, (xmlChar *) "id"); + /* +6 to skip "macro-" */ + mplist_add (plist, Msymbol, msymbol ((char *) ptr + 6)); + xmlFree (ptr); + + for (cur = cur->xmlChildrenNode; cur; cur = cur->next) + decode_action (doc, cur, plist); + + mplist_add (parent, Mplist, plist); +} + +static void +decode_macro_list (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + MPlist *plist = mplist (); + + mplist_add (plist, Msymbol, msymbol ("macro")); + + for (cur = cur->xmlChildrenNode; cur; cur = cur->next) + decode_macro (doc, cur, plist); + + mplist_add (parent, Mplist, plist); +} + +static void +decode_rule (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + MPlist *plist = mplist (); + + cur = cur->xmlChildrenNode; + decode_keyseq (doc, cur, plist); + cur = cur->next; + + for (; cur; cur = cur->next) + decode_action (doc, cur, plist); + + mplist_add (parent, Mplist, plist); +} + +static void +decode_map (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + xmlChar *ptr; + MPlist *plist = mplist (); + + ptr = xmlGetProp (cur, (xmlChar *) "id"); + /* +4 to skip "map-" */ + mplist_add (plist, Msymbol, msymbol ((char *) ptr + 4)); + xmlFree (ptr); + + for (cur = cur->xmlChildrenNode; cur; cur = cur->next) + decode_rule (doc, cur, plist); + + mplist_add (parent, Mplist, plist); +} + +static void +decode_map_list (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + MPlist *plist = mplist (); + + mplist_add (plist, Msymbol, msymbol ("map")); + for (cur = cur->xmlChildrenNode; cur; cur = cur->next) + decode_map (doc, cur, plist); + + mplist_add (parent, Mplist, plist); +} + +static void +decode_branch (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + xmlChar *ptr; + MPlist *plist = mplist (); + + if (xmlStrEqual (cur->name, (xmlChar *) "state-hook")) + mplist_add (plist, Msymbol, Mt); + else if (xmlStrEqual (cur->name, (xmlChar *) "catch-all-branch")) + mplist_add (plist, Msymbol, Mnil); + else /* branch */ + { + ptr = xmlGetProp (cur, (xmlChar *) "branch-selecting-map"); + /* +4 to skip "map-" */ + mplist_add (plist, Msymbol, msymbol ((char *) ptr + 4)); + xmlFree (ptr); + } + + for (cur = cur->xmlChildrenNode; cur; cur = cur->next) + decode_saction (doc, cur, plist); + + if (mplist_length (plist)) + mplist_add (parent, Mplist, plist); +} + +static void +decode_state (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + xmlChar *ptr; + MPlist *plist = mplist (); + + ptr = xmlGetProp (cur, (xmlChar *) "id"); + /* +6 to skip "state-" */ + mplist_add (plist, Msymbol, msymbol ((char *) ptr + 6)); + xmlFree (ptr); + + for (cur = cur->xmlChildrenNode; cur; cur = cur->next) + { + if (xmlStrEqual (cur->name, (xmlChar *) "state-title-text")) + { + ptr = xmlNodeListGetString (doc, cur->xmlChildrenNode, 1); + mplist_add (plist, Mtext, mtext_from_data (ptr, xmlStrlen (ptr), + MTEXT_FORMAT_UTF_8)); + } + else + decode_branch (doc, cur, plist); + } + + mplist_add (parent, Mplist, plist); +} + +static void +decode_state_list (xmlDocPtr doc, xmlNodePtr cur, MPlist *parent) +{ + MPlist *plist = mplist (); + + mplist_add (plist, Msymbol, msymbol ("state")); + + for (cur = cur->xmlChildrenNode; cur; cur = cur->next) + decode_state (doc, cur, plist); + + mplist_add (parent, Mplist, plist); +} + +static int +rewrite_include (xmlNodePtr cur) +{ + xmlChar *ptr, *suffix, *filename, *fullname, *newvalue; + int len; + + ptr = xmlGetProp (cur, (xmlChar *) "href"); + suffix = (xmlChar *) xmlStrstr (ptr, (xmlChar *) "#xmlns"); + + len = suffix - ptr; + filename = malloc (len + 1); + filename[0] = '\0'; + xmlStrncat (filename, ptr, len); + fullname = (xmlChar *) mdatabase__find_file ((char *) filename); + if (! fullname) + { + xmlFree (ptr); + free (filename); + return -1; + } + else + { + newvalue = xmlStrncatNew (fullname, suffix, -1); + xmlSetProp (cur, (xmlChar *) "href", newvalue); + xmlFree (ptr); + free (filename); + free (fullname); + xmlFree (newvalue); + return 0; + } +} + +static int +prepare_include (xmlNodePtr cur) +{ + xmlNodePtr cur0; + + for (cur = cur->xmlChildrenNode; cur; cur = cur->next) + if (xmlStrEqual (cur->name, (xmlChar *) "macro-list") + || xmlStrEqual (cur->name, (xmlChar *) "map-list") + || xmlStrEqual (cur->name, (xmlChar *) "state-list")) + for (cur0 = cur->xmlChildrenNode; cur0; cur0 = cur0->next) + if (xmlStrEqual (cur0->name, (xmlChar *) "include")) + if (rewrite_include (cur0) == -1) + return -1; + return 0; +} + +MPlist * +minput__load_xml (MDatabaseInfo *db_info, char *filename) +{ + xmlDocPtr doc; + xmlNodePtr cur; + MPlist *xml = mplist (); + + doc = xmlReadFile (filename, NULL, XML_PARSE_NOENT | XML_PARSE_NOBLANKS); + if (! doc) + return NULL; + + cur = xmlDocGetRootElement (doc); + if (! cur) + { + xmlFreeDoc (doc); + return NULL; + } + + if (xmlStrcmp (cur->name, (xmlChar *) "input-method")) + { + xmlFreeDoc (doc); + return NULL; + } + + if (prepare_include (cur) == -1) + { + xmlFreeDoc (doc); + return NULL; + } + + xmlXIncludeProcessFlags (doc, XML_PARSE_NOENT | XML_PARSE_NOBLANKS + | XML_PARSE_NOXINCNODE); + if (! mdatabase__validate (doc, db_info)) + { + xmlFreeDoc (doc); + MERROR (MERROR_IM, NULL); + } + + cur = xmlDocGetRootElement (doc)->xmlChildrenNode; + decode_im_declaration (doc, cur, xml); + cur = cur->next; + + if (cur && xmlStrEqual (cur->name, (xmlChar *) "description")) + { + decode_im_description (doc, cur, xml); + cur = cur->next; + } + + if (cur && xmlStrEqual (cur->name, (xmlChar *) "title")) + { + decode_title (doc, cur, xml); + cur = cur->next; + } + + if (cur && xmlStrEqual (cur->name, (xmlChar *) "variable-list")) + { + decode_variable_list (doc, cur, xml); + cur = cur->next; + } + + if (cur && xmlStrEqual (cur->name, (xmlChar *) "command-list")) + { + decode_command_list (doc, cur, xml); + cur = cur->next; + } + + if (cur && xmlStrEqual (cur->name, (xmlChar *) "module-list")) + { + decode_module_list (doc, cur, xml); + cur = cur->next; + } + + if (cur && xmlStrEqual (cur->name, (xmlChar *) "macro-list")) + { + decode_macro_list (doc, cur, xml); + cur = cur->next; + } + + if (cur && xmlStrEqual (cur->name, (xmlChar *) "map-list")) + { + decode_map_list (doc, cur, xml); + cur = cur->next; + } + + if (cur && xmlStrEqual (cur->name, (xmlChar *) "state-list")) + { + decode_state_list (doc, cur, xml); + cur = cur->next; + } + + xmlFreeDoc (doc); + return xml; +} diff --git a/src/input.c b/src/input.c index 631bb84..5123eac 100644 --- a/src/input.c +++ b/src/input.c @@ -1,5 +1,5 @@ /* input.c -- input method module. - Copyright (C) 2003, 2004, 2005, 2006 + Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 National Institute of Advanced Industrial Science and Technology (AIST) Registration Number H15PRO112 @@ -270,7 +270,8 @@ static int update_global_info (void); static int update_custom_info (void); static MInputMethodInfo *get_im_info (MSymbol, MSymbol, MSymbol, MSymbol); -static MDatabaseLoaderXML load_xml_input_method; +extern MPlist minput__load_xml (MDatabaseInfo *, char *); +/*extern MDatabaseLoaderXML minput__load_xml;*/ void @@ -504,7 +505,7 @@ fully_initialize () tags[0] = Minput_method; tags[1] = tags[2] = tags[3] = Mnil; - mdatabase__register_xml_loader (tags, load_xml_input_method); + mdatabase__register_xml_loader (tags, minput__load_xml); } im_info_list = mplist (); @@ -1492,9 +1493,10 @@ update_custom_info (void) || (mtext_nbytes (custom_dir_info->filename) + strlen (CUSTOM_FILE) > PATH_MAX - 1)) return -1; - dirname = mtext_data (custom_dir_info->filename, NULL, &len, NULL, NULL); + dirname = mtext_data (custom_dir_info->dirname, NULL, &len, NULL, NULL); strcpy (custom_path, dirname); - custom_path[len++] = PATH_SEPARATOR; + if (custom_path[len - 1] != PATH_SEPARATOR) + custom_path[len++] = PATH_SEPARATOR; strcpy (custom_path + len, CUSTOM_FILE); im_custom_mdb = mdatabase_define (Minput_method, Mt, Mnil, Mconfig, NULL, custom_path); diff --git a/src/plist.c b/src/plist.c index be42e1d..38cab18 100644 --- a/src/plist.c +++ b/src/plist.c @@ -177,6 +177,31 @@ get_byte (MStream *st) #define UNGETC(c, st) (--((st)->p)) +static void +init_stream (MStream *st, FILE *fp, unsigned char *str, int n) +{ + st->eof = 0; + st->fp = fp; + if (fp) + { + int c; + + st->fp = fp; + st->p = st->pend = st->buffer; + if ((c = GETC (st)) != 0xEF) + st->p = st->buffer; + else if ((c = GETC (st)) != 0xBB) + st->p = st->buffer; + else if ((c = GETC (st)) != 0xBF) + st->p = st->buffer; + } + else + { + st->p = str; + st->pend = str + n; + } +} + /** Mapping table for reading a number. Hexadecimal chars (0..9,A..F,a..F) are mapped to the corresponding numbers. Apostrophe (code 39) is mapped to 254. All the other bytes are @@ -800,9 +825,7 @@ mplist__from_file (FILE *fp, MPlist *keys) MPlist *plist, *pl; MStream st; - st.fp = fp; - st.eof = 0; - st.p = st.pend = st.buffer; + init_stream (&st, fp, NULL, 0); MPLIST_NEW (plist); pl = plist; while ((pl = read_element (pl, &st, keys))); @@ -848,10 +871,7 @@ mplist__from_string (unsigned char *str, int n) MPlist *plist, *pl; MStream st; - st.fp = NULL; - st.eof = 0; - st.p = str; - st.pend = str + n; + init_stream (&st, NULL, str, n); MPLIST_NEW (plist); pl = plist; while ((pl = read_element (pl, &st, NULL)));