X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=chise.c;h=5d382c14ed37e8fab537556870485a95394a3642;hb=c334c5225035cef665c4e6c55d4167f065026d67;hp=6e8bb4650d0a09b15f87e907100d81ed45abcdb7;hpb=929758a18cec876b64c0d3d66f009ce7b5464965;p=chise%2Flibchise.git diff --git a/chise.c b/chise.c index 6e8bb46..5d382c1 100644 --- a/chise.c +++ b/chise.c @@ -2,10 +2,222 @@ # include "config.h" #endif +#ifndef HAVE_STRNLEN +/* original in mysql, strings/strnlen.c. +uint strnlen(register const char *s, register uint maxlen) +{ + const char *end= (const char *)memchr(s, '\0', maxlen); + return end ? (uint) (end - s) : maxlen; +} +*/ +static inline int +strnlen (register const char *s, register int maxlen) +{ + const char *end= (const char *)memchr(s, '\0', maxlen); + return end ? (int) (end - s) : maxlen; +} +#endif + #include "chise.h" #define xzero(lvalue) ((void) memset (&(lvalue), '\0', sizeof (lvalue))) +CHISE_Char_ID +chise_char_id_parse_c_string (unsigned char *str, int len) +{ + int i = 0; + + if ( (len >= 2) && (str[i++] == '?') ) + { + unsigned char c = str[i++]; + int counter; + CHISE_Char_ID cid; + + if (c == '\\') + { + if (len < 3) + return -1; + c = str[i++]; + if (c == '^') + { + if (len < 4) + return -1; + c = str[i++]; + if (c == '?') + return 0x7F; + else + return c & (0x80 | 0x1F); + } + } + if ( c < 0xC0 ) + { + cid = c; + counter = 0; + } + else if ( c < 0xE0 ) + { + cid = c & 0x1f; + counter = 1; + } + else if ( c < 0xF0 ) + { + cid = c & 0x0f; + counter = 2; + } + else if ( c < 0xF8 ) + { + cid = c & 0x07; + counter = 3; + } + else if ( c < 0xFC ) + { + cid = c & 0x03; + counter = 4; + } + else + { + cid = c & 0x01; + counter = 5; + } + + if (counter + 2 <= len) + { + int j; + + for (j = 0; j < counter; j++) + cid = (cid << 6) | (str[j + i] & 0x3F); + return cid; + } + } + return -1; +} + +int +chise_format_char_id (CHISE_Char_ID cid, unsigned char *dest, int len) +{ + int i = 0; + + dest[i++] = '?'; + if (cid == '\t') + { + dest[i++] = '\\'; + dest[i++] = 't'; + dest[i] = '\0'; + return i; + } + else if (cid == '\n') + { + dest[i++] = '\\'; + dest[i++] = 'n'; + dest[i] = '\0'; + return i; + } + else if (cid == '\r') + { + dest[i++] = '\\'; + dest[i++] = 'r'; + dest[i] = '\0'; + return i; + } + else if (cid == 0x1C) + { + dest[i++] = '\\'; + dest[i++] = '^'; + dest[i++] = '\\'; + dest[i++] = '\\'; + dest[i] = '\0'; + return i; + } + else if (cid <= 0x1F) + { + dest[i++] = '\\'; + dest[i++] = '^'; + dest[i++] = '@' + cid; + dest[i] = '\0'; + return i; + } + else if ( (cid == ' ') || (cid == '"') || + (cid == '#') || (cid == '\'') || + (cid == '(') || (cid == ')') || + (cid == ',') || (cid == '.') || + (cid == ';') || (cid == '?') || + (cid == '[') || (cid == '\\') || + (cid == ']') || (cid == '`') ) + { + dest[i++] = '\\'; + dest[i++] = cid; + dest[i] = '\0'; + return i; + } + else if (cid <= 0x7E) + { + dest[i++] = cid; + dest[i] = '\0'; + return i; + } + else if (cid == 0x7F) + { + dest[i++] = '\\'; + dest[i++] = '^'; + dest[i++] = '?'; + dest[i] = '\0'; + return i; + } + else if (cid <= 0x9F) + { + dest[i++] = '\\'; + dest[i++] = '^'; + dest[i++] = ((cid + '@') >> 6) | 0xC0; + dest[i++] = ((cid + '@') & 0x3F) | 0x80; + dest[i] = '\0'; + return i; + } + else if (cid <= 0x7FF) + { + dest[i++] = (cid >> 6) | 0xC0; + dest[i++] = (cid & 0x3F) | 0x80; + dest[i] = '\0'; + return i; + } + else if (cid <= 0xFFFF) + { + dest[i++] = (cid >> 12) | 0xE0; + dest[i++]= ((cid >> 6) & 0x3F) | 0x80; + dest[i++]= (cid & 0x3F) | 0x80; + dest[i] = '\0'; + return i; + } + else if (cid <= 0x1FFFFF) + { + dest[i++]= (cid >> 18) | 0xF0; + dest[i++]= ((cid >> 12) & 0x3F) | 0x80; + dest[i++]= ((cid >> 6) & 0x3F) | 0x80; + dest[i++]= (cid & 0x3F) | 0x80; + dest[i] = '\0'; + return i; + } + else if (cid <= 0x3FFFFFF) + { + dest[i++]= (cid >> 24) | 0xF8; + dest[i++]= ((cid >> 18) & 0x3F) | 0x80; + dest[i++]= ((cid >> 12) & 0x3F) | 0x80; + dest[i++]= ((cid >> 6) & 0x3F) | 0x80; + dest[i++]= (cid & 0x3F) | 0x80; + dest[i] = '\0'; + return i; + } + else + { + dest[i++]= (cid >> 30) | 0xFC; + dest[i++]= ((cid >> 24) & 0x3F) | 0x80; + dest[i++]= ((cid >> 18) & 0x3F) | 0x80; + dest[i++]= ((cid >> 12) & 0x3F) | 0x80; + dest[i++]= ((cid >> 6) & 0x3F) | 0x80; + dest[i++]= (cid & 0x3F) | 0x80; + dest[i] = '\0'; + return i; + } +} int chise_open_data_source (CHISE_DS *ds, CHISE_DS_Type type, char *location) @@ -25,7 +237,7 @@ chise_close_data_source (CHISE_DS *ds) int -chise_open_decoding_table (CHISE_Decoding_Table **db, +chise_open_decoding_table (CHISE_Decoding_Table *db, CHISE_DS *ds, const char *ccs, DBTYPE real_subtype, u_int32_t accessmask, int modemask) @@ -58,78 +270,28 @@ chise_dt_get_char (CHISE_Decoding_Table *db, int code_point) unsigned char *str = (unsigned char *)chise_value_data (&valdatum); int len = strnlen (str, chise_value_size (&valdatum)); - int i = 0; - - if ( (len >= 2) && (str[i++] == '?') ) - { - unsigned char c = str[i++]; - int counter; - CHISE_Char_ID cid; - - if (c == '\\') - { - if (len < 3) - return -1; - c = str[i++]; - if (c == '^') - { - if (len < 4) - return -1; - c = str[i++]; - if (c == '?') - return 0x7F; - else - return c & (0x80 | 0x1F); - } - } - if ( c < 0xC0 ) - { - cid = c; - counter = 0; - } - else if ( c < 0xE0 ) - { - cid = c & 0x1f; - counter = 1; - } - else if ( c < 0xF0 ) - { - cid = c & 0x0f; - counter = 2; - } - else if ( c < 0xF8 ) - { - cid = c & 0x07; - counter = 3; - } - else if ( c < 0xFC ) - { - cid = c & 0x03; - counter = 4; - } - else - { - cid = c & 0x01; - counter = 5; - } - if (counter + 2 <= len) - { - int j; - - for (j = 0; j < counter; j++) - cid = (cid << 6) | (str[j + i] & 0x3F); - return cid; - } - } + return chise_char_id_parse_c_string (str, len); } return -1; } +int +chise_dt_put_char (CHISE_Decoding_Table *db, + int code_point, CHISE_Char_ID cid) +{ + CHISE_Value valdatum; + char key_buf[16], val_buf[8]; + + sprintf(key_buf, "%d", code_point); + chise_format_char_id (cid, val_buf, 8); + return chise_put_attribute_table (db, key_buf, val_buf); +} + int -chise_open_feature_table (CHISE_Feature_Table **db, +chise_open_feature_table (CHISE_Feature_Table *db, CHISE_DS *ds, const char *feature, DBTYPE real_subtype, u_int32_t accessmask, int modemask) @@ -148,64 +310,46 @@ chise_close_feature_table (CHISE_Feature_Table *db) return -1; } -int chise_ft_get_value (CHISE_Feature_Table *db, - CHISE_Char_ID cid, CHISE_Value *valdatum) +int +chise_ft_get_value (CHISE_Feature_Table *db, + CHISE_Char_ID cid, CHISE_Value *valdatum) { unsigned char key_buf[8]; - key_buf[0] = '?'; - if (cid <= 0x7f) - { - key_buf[1] = cid; - key_buf[2] = '\0'; - } - else if (cid <= 0x7ff) - { - key_buf[1] = (cid >> 6) | 0xc0; - key_buf[2] = (cid & 0x3f) | 0x80; - key_buf[3] = '\0'; - } - else if (cid <= 0xffff) - { - key_buf[1] = (cid >> 12) | 0xe0; - key_buf[2]= ((cid >> 6) & 0x3f) | 0x80; - key_buf[3]= (cid & 0x3f) | 0x80; - key_buf[4] = '\0'; - } - else if (cid <= 0x1fffff) - { - key_buf[1]= (cid >> 18) | 0xf0; - key_buf[2]= ((cid >> 12) & 0x3f) | 0x80; - key_buf[3]= ((cid >> 6) & 0x3f) | 0x80; - key_buf[4]= (cid & 0x3f) | 0x80; - key_buf[5] = '\0'; - } - else if (cid <= 0x3ffffff) - { - key_buf[1]= (cid >> 24) | 0xf8; - key_buf[2]= ((cid >> 18) & 0x3f) | 0x80; - key_buf[3]= ((cid >> 12) & 0x3f) | 0x80; - key_buf[4]= ((cid >> 6) & 0x3f) | 0x80; - key_buf[5]= (cid & 0x3f) | 0x80; - key_buf[6] = '\0'; - } - else + chise_format_char_id (cid, key_buf, 8); + return chise_get_attribute_table (db, key_buf, valdatum); +} + +void +chise_ft_iterate (CHISE_Feature_Table *ft, + int (*func) (CHISE_Feature_Table *ft, + CHISE_Char_ID cid, CHISE_Value *valdatum)) +{ + DBT keydatum, valdatum; + DBC *dbcp; + int status; + + xzero (keydatum); + xzero (valdatum); + + status = ft->dbp->cursor (ft->dbp, NULL, &dbcp, 0); + for (status = dbcp->c_get (dbcp, &keydatum, &valdatum, DB_FIRST); + status == 0; + status = dbcp->c_get (dbcp, &keydatum, &valdatum, DB_NEXT)) { - key_buf[1]= (cid >> 30) | 0xfc; - key_buf[2]= ((cid >> 24) & 0x3f) | 0x80; - key_buf[3]= ((cid >> 18) & 0x3f) | 0x80; - key_buf[4]= ((cid >> 12) & 0x3f) | 0x80; - key_buf[5]= ((cid >> 6) & 0x3f) | 0x80; - key_buf[6]= (cid & 0x3f) | 0x80; - key_buf[7] = '\0'; + unsigned char *key_str = (unsigned char *)keydatum.data; + int key_len = strnlen (key_str, keydatum.size); + CHISE_Char_ID key = chise_char_id_parse_c_string (key_str, key_len); + int ret; + + if (ret = func (ft, key, &valdatum)) + break; } - return - chise_get_attribute_table (db, key_buf, valdatum); + dbcp->c_close (dbcp); } - int -chise_open_attribute_table (CHISE_Attribute_Table **db, +chise_open_attribute_table (CHISE_Attribute_Table *ft, const char *db_dir, const char *encoding, const char *feature, DBTYPE real_subtype, @@ -213,16 +357,17 @@ chise_open_attribute_table (CHISE_Attribute_Table **db, { DB* dbase; int status; - int len; + int len, flen, i; int size; - char *db_file_name; + char *db_file_name, *sp; status = db_create (&dbase, NULL, 0); if (status) return -1; len = strlen (db_dir); - size = len + strlen (encoding) + strlen (feature) + 4; + flen = strlen (feature); + size = len + strlen (encoding) + flen * 3 + 4; db_file_name = alloca (size); strcpy (db_file_name, db_dir); if (db_file_name[len - 1] != '/') @@ -232,7 +377,21 @@ chise_open_attribute_table (CHISE_Attribute_Table **db, } strcat (db_file_name, encoding); strcat (db_file_name, "/"); - strcat (db_file_name, feature); + /* strcat (db_file_name, feature); */ + sp = &db_file_name[strlen (db_file_name)]; + for (i = 0; i < flen; i++) + { + int c = feature[i]; + + if ( (c == '/') || (c == '%') ) + { + sprintf (sp, "%%%02X", c); + sp += 3; + } + else + *sp++ = c; + } + *sp = '\0'; status = dbase->open (dbase, db_file_name, NULL, real_subtype, accessmask, modemask); if (status) @@ -240,23 +399,23 @@ chise_open_attribute_table (CHISE_Attribute_Table **db, dbase->close (dbase, 0); return -1; } - *db = dbase; + ft->dbp = dbase; return 0; } int -chise_close_attribute_table (CHISE_Attribute_Table *db) +chise_close_attribute_table (CHISE_Attribute_Table *ft) { - if (db) + if (ft->dbp) { - db->sync (db, 0); - db->close (db, 0); + ft->dbp->sync (ft->dbp, 0); + ft->dbp->close (ft->dbp, 0); } return 0; } int -chise_get_attribute_table (CHISE_Attribute_Table *db, +chise_get_attribute_table (CHISE_Attribute_Table *ft, char *key, CHISE_Value *valdatum) { DBT keydatum; @@ -269,6 +428,27 @@ chise_get_attribute_table (CHISE_Attribute_Table *db, keydatum.data = key; keydatum.size = strlen (key); - status = db->get (db, NULL, &keydatum, valdatum, 0); + status = ft->dbp->get (ft->dbp, NULL, &keydatum, valdatum, 0); + return status; +} + +int +chise_put_attribute_table (CHISE_Attribute_Table *ft, + char *key, char *value) +{ + DBT keydatum, valdatum; + int status = 0; + + /* DB Version 2 requires DBT's to be zeroed before use. */ + xzero (keydatum); + xzero (valdatum); + + keydatum.data = key; + keydatum.size = strlen (key); + + valdatum.data = value; + valdatum.size = strlen (value); + + status = ft->dbp->put (ft->dbp, NULL, &keydatum, &valdatum, 0); return status; }