X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=src%2Fcharacter.h;h=de73430285be736bc96194478d5c7bd4381b2029;hb=1b7bcea372680c0e52493e54e3b4c2e16744101d;hp=9e35912cdc9aed02247f79df8245b606988e1654;hpb=2f5663ae6894ac71b0062131d2cc7b7352d34a60;p=m17n%2Fm17n-lib.git diff --git a/src/character.h b/src/character.h index 9e35912..de73430 100644 --- a/src/character.h +++ b/src/character.h @@ -1,5 +1,5 @@ /* character.h -- header file for the character module. - Copyright (C) 2003, 2004 + Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 National Institute of Advanced Industrial Science and Technology (AIST) Registration Number H15PRO112 @@ -17,7 +17,7 @@ You should have received a copy of the GNU Lesser General Public License along with the m17n library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 02111-1307, USA. */ #ifndef _M17N_CHARACTER_H_ @@ -40,8 +40,19 @@ #define MAX_UTF8_CHAR_BYTES 6 #define MAX_UNICODE_CHAR_BYTES 4 +#define USHORT_SIZE (sizeof (unsigned short)) +#define UINT_SIZE (sizeof (unsigned int)) + +/* Return how many bytes one unit (char, short, or int) in FORMAT + occupies. */ + +#define UNIT_BYTES(format) \ + ((format) <= MTEXT_FORMAT_UTF_8 ? 1 \ + : (format) <= MTEXT_FORMAT_UTF_16BE ? USHORT_SIZE \ + : UINT_SIZE) + /* Return how many units (char, short, or int) C will occupy in - MText->data. */ + MText->data. If C is not in the supported range, return 0. */ #define CHAR_UNITS_ASCII(c) ((c) < 0x80) @@ -53,19 +64,13 @@ : (c) < 0x4000000 ? 5 \ : 6) -#define CHAR_UNITS_UTF16(c) \ - ((c) < 0x10000 ? 1 \ - : (c) < 0x110000 ? 2 \ - : 0) - +#define CHAR_UNITS_UTF16(c) ((c) < 0x110000 ? (2 - ((c) < 0x10000)) : 0) #define CHAR_UNITS_UTF32(c) 1 #define CHAR_UNITS(c, format) \ - ((c) < 0x80 ? 1 \ - : (format) == MTEXT_FORMAT_UTF8 ? CHAR_UNITS_UTF8 (c) \ - : (format) == MTEXT_FORMAT_UTF16 ? CHAR_UNITS_UTF16 (c) \ - : (format) == MTEXT_FORMAT_ASCII ? 0 \ + ((format) <= MTEXT_FORMAT_UTF_8 ? CHAR_UNITS_UTF8 (c) \ + : (format) <= MTEXT_FORMAT_UTF_16BE ? CHAR_UNITS_UTF16 (c) \ : CHAR_UNITS_UTF32 (c)) #define CHAR_BYTES CHAR_UNITS_UTF8 @@ -83,9 +88,9 @@ (2 - (*(unsigned short *) (p) < 0xD800 \ || *(unsigned short *) (p) >= 0xDC00)) -#define CHAR_UNITS_AT(c, format) \ - ((format) == MTEXT_FORMAT_UTF16 ? CHAR_UNITS_AT_UTF16 (c) \ - : (format) == MTEXT_FORMAT_UTF8 ? CHAR_UNITS_AT_UTF8 (c) \ +#define CHAR_UNITS_AT(mt, p) \ + ((mt)->format <= MTEXT_FORMAT_UTF_8 ? CHAR_UNITS_AT_UTF8 (p) \ + : (mt)->format <= MTEXT_FORMAT_UTF_16BE ? CHAR_UNITS_AT_UTF16 (p) \ : 1) #define CHAR_BYTES_AT CHAR_UNITS_AT_UTF8 @@ -103,8 +108,8 @@ (2 - ((unsigned short) (c) < 0xD800 || (unsigned short) (c) >= 0xDC00)) #define CHAR_UNITS_BY_HEAD(c, format) \ - ((format) == MTEXT_FORMAT_UTF16 ? CHAR_UNITS_BY_HEAD_UTF16 (c) \ - : (format) == MTEXT_FORMAT_UTF8 ? CHAR_UNITS_BY_HEAD_UTF8 (c) \ + ((format) <= MTEXT_FORMAT_UTF_8 ? CHAR_UNITS_BY_HEAD_UTF8 (c) \ + : (format) <= MTEXT_FORMAT_UTF_16BE ? CHAR_UNITS_BY_HEAD_UTF16 (c) \ : 1) #define CHAR_BYTES_BY_HEAD CHAR_UNITS_BY_HEAD_UTF8 @@ -141,33 +146,33 @@ #define STRING_CHAR STRING_CHAR_UTF8 -#define STRING_CHAR_ADVANCE_UTF8(p) \ - (!(*(p) & 0x80) ? *(p)++ \ - : !(*(p) & 0x20) ? (((*(p)++ & 0x1F) << 6) \ - | (*(p)++ & 0x3F)) \ - : !(*(p) & 0x10) ? (((*(p)++ & 0x0F) << 12) \ - | ((*(p)++ & 0x3F) << 6) \ - | (*(p)++ & 0x3F)) \ - : !(*(p) & 0x08) ? (((*(p)++ & 0x07) << 18) \ - | ((*(p)++ & 0x3F) << 12) \ - | ((*(p)++ & 0x3F) << 6) \ - | (*(p)++ & 0x3F)) \ - : !(*(p) & 0x04) ? (((*(p)++ & 0x03) << 24) \ - | ((*(p)++ & 0x3F) << 18) \ - | ((*(p)++ & 0x3F) << 12) \ - | ((*(p)++ & 0x3F) << 6) \ - | (*(p)++ & 0x3F)) \ - : (((*(p)++ & 0x01) << 30) \ - | ((*(p)++ & 0x3F) << 24) \ - | ((*(p)++ & 0x3F) << 18) \ - | ((*(p)++ & 0x3F) << 12) \ - | ((*(p)++ & 0x3F) << 6) \ - | (*(p)++ & 0x3F))) - -#define STRING_CHAR_ADVANCE_UTF16(p) \ - (((unsigned short) (p)[0] < 0xD800 || (unsigned short) (p)[0] >= 0xDC00) \ - ? *(p)++ \ - : (((*(p)++ - 0xD800) << 10) + (*(p)++ - 0xDC00) + 0x10000)) +#define STRING_CHAR_ADVANCE_UTF8(p) \ + (!(*(p) & 0x80) ? ((p)++, (p)[-1]) \ + : !(*(p) & 0x20) ? ((p) += 2, ((((p)[-2] & 0x1F) << 6) \ + | ((p)[-1] & 0x3F))) \ + : !(*(p) & 0x10) ? ((p) += 3, ((((p)[-3] & 0x0F) << 12) \ + | (((p)[-2] & 0x3F) << 6) \ + | ((p)[-1] & 0x3F))) \ + : !(*(p) & 0x08) ? ((p) += 4, ((((p)[-4] & 0x07) << 18) \ + | (((p)[-3] & 0x3F) << 12) \ + | (((p)[-2] & 0x3F) << 6) \ + | ((p)[-1] & 0x3F))) \ + : !(*(p) & 0x04) ? ((p) += 5, ((((p)[-5] & 0x03) << 24) \ + | (((p)[-4] & 0x3F) << 18) \ + | (((p)[-3] & 0x3F) << 12) \ + | (((p)[-2] & 0x3F) << 6) \ + | ((p)[-1] & 0x3F))) \ + : ((p) += 6, ((((p)[-6] & 0x01) << 30) \ + | (((p)[-5] & 0x3F) << 24) \ + | (((p)[-4] & 0x3F) << 18) \ + | (((p)[-3] & 0x3F) << 12) \ + | (((p)[-2] & 0x3F) << 6) \ + | ((p)[-1] & 0x3F)))) + +#define STRING_CHAR_ADVANCE_UTF16(p) \ + (((p)[0] < 0xD800 || (p)[0] >= 0xDC00) \ + ? ((p)++, (p)[-1]) \ + : ((p) += 2, ((((p)[-2] - 0xD800) << 10) + ((p)[-1] - 0xDC00) + 0x10000))) #define STRING_CHAR_ADVANCE STRING_CHAR_ADVANCE_UTF8 @@ -206,11 +211,11 @@ (((p)[0] - 0xD800) << 10) + ((p)[1] - 0xDC00) + 0x10000)) #define STRING_CHAR_AND_UNITS(p, units, format) \ - ((format) == MTEXT_FORMAT_UTF16 \ - ? STRING_CHAR_AND_UNITS_UTF16 (p, units) \ - : (format) == MTEXT_FORMAT_UTF8 \ + ((format) <= MTEXT_FORMAT_UTF_8 \ ? STRING_CHAR_AND_UNITS_UTF8 (p, units) \ - : ((units) = 1, (p)[0])) + : (format) <= MTEXT_FORMAT_UTF_16BE \ + ? STRING_CHAR_AND_UNITS_UTF16 (p, units) \ + : ((units) = 1, ((unsigned) (p))[0])) #define STRING_CHAR_AND_BYTES STRING_CHAR_AND_UNITS_UTF8 @@ -280,4 +285,6 @@ || ((c) >= 'a' && (c) <= 'z') \ || ((c) >= '0' && (c) <= '9')) +extern void mchar__define_prop (MSymbol key, MSymbol type, void *mdb); + #endif /* not _M17N_CHARACTER_H_ */