From c9386fb53ce2b5cb856e1835682213be1fc24baf Mon Sep 17 00:00:00 2001 From: ntakahas Date: Mon, 15 Aug 2005 04:53:23 +0000 Subject: [PATCH] New file. --- UNIDATA/PropList.txt | 1049 +++++++++++++++++++++++++++++++++++++++++ UNIDATA/SpecialCasing.txt | 260 ++++++++++ UNIDATA/WordBreakProperty.txt | 515 ++++++++++++++++++++ 3 files changed, 1824 insertions(+) create mode 100644 UNIDATA/PropList.txt create mode 100644 UNIDATA/SpecialCasing.txt create mode 100644 UNIDATA/WordBreakProperty.txt diff --git a/UNIDATA/PropList.txt b/UNIDATA/PropList.txt new file mode 100644 index 0000000..befc49e --- /dev/null +++ b/UNIDATA/PropList.txt @@ -0,0 +1,1049 @@ +# PropList-4.1.0.txt +# Date: 2005-03-26, 00:34:40 GMT [MD] +# +# Unicode Character Database +# Copyright (c) 1991-2005 Unicode, Inc. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# For documentation, see UCD.html + +# ================================================ + +0009..000D ; White_Space # Cc [5] .. +0020 ; White_Space # Zs SPACE +0085 ; White_Space # Cc +00A0 ; White_Space # Zs NO-BREAK SPACE +1680 ; White_Space # Zs OGHAM SPACE MARK +180E ; White_Space # Zs MONGOLIAN VOWEL SEPARATOR +2000..200A ; White_Space # Zs [11] EN QUAD..HAIR SPACE +2028 ; White_Space # Zl LINE SEPARATOR +2029 ; White_Space # Zp PARAGRAPH SEPARATOR +202F ; White_Space # Zs NARROW NO-BREAK SPACE +205F ; White_Space # Zs MEDIUM MATHEMATICAL SPACE +3000 ; White_Space # Zs IDEOGRAPHIC SPACE + +# Total code points: 26 + +# ================================================ + +200E..200F ; Bidi_Control # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK +202A..202E ; Bidi_Control # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE + +# Total code points: 7 + +# ================================================ + +200C..200D ; Join_Control # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER + +# Total code points: 2 + +# ================================================ + +002D ; Dash # Pd HYPHEN-MINUS +058A ; Dash # Pd ARMENIAN HYPHEN +1806 ; Dash # Pd MONGOLIAN TODO SOFT HYPHEN +2010..2015 ; Dash # Pd [6] HYPHEN..HORIZONTAL BAR +2053 ; Dash # Po SWUNG DASH +207B ; Dash # Sm SUPERSCRIPT MINUS +208B ; Dash # Sm SUBSCRIPT MINUS +2212 ; Dash # Sm MINUS SIGN +2E17 ; Dash # Pd DOUBLE OBLIQUE HYPHEN +301C ; Dash # Pd WAVE DASH +3030 ; Dash # Pd WAVY DASH +30A0 ; Dash # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN +FE31..FE32 ; Dash # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH +FE58 ; Dash # Pd SMALL EM DASH +FE63 ; Dash # Pd SMALL HYPHEN-MINUS +FF0D ; Dash # Pd FULLWIDTH HYPHEN-MINUS + +# Total code points: 22 + +# ================================================ + +002D ; Hyphen # Pd HYPHEN-MINUS +00AD ; Hyphen # Cf SOFT HYPHEN +058A ; Hyphen # Pd ARMENIAN HYPHEN +1806 ; Hyphen # Pd MONGOLIAN TODO SOFT HYPHEN +2010..2011 ; Hyphen # Pd [2] HYPHEN..NON-BREAKING HYPHEN +2E17 ; Hyphen # Pd DOUBLE OBLIQUE HYPHEN +30FB ; Hyphen # Po KATAKANA MIDDLE DOT +FE63 ; Hyphen # Pd SMALL HYPHEN-MINUS +FF0D ; Hyphen # Pd FULLWIDTH HYPHEN-MINUS +FF65 ; Hyphen # Po HALFWIDTH KATAKANA MIDDLE DOT + +# Total code points: 11 + +# ================================================ + +0022 ; Quotation_Mark # Po QUOTATION MARK +0027 ; Quotation_Mark # Po APOSTROPHE +00AB ; Quotation_Mark # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00BB ; Quotation_Mark # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +2018 ; Quotation_Mark # Pi LEFT SINGLE QUOTATION MARK +2019 ; Quotation_Mark # Pf RIGHT SINGLE QUOTATION MARK +201A ; Quotation_Mark # Ps SINGLE LOW-9 QUOTATION MARK +201B..201C ; Quotation_Mark # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK +201D ; Quotation_Mark # Pf RIGHT DOUBLE QUOTATION MARK +201E ; Quotation_Mark # Ps DOUBLE LOW-9 QUOTATION MARK +201F ; Quotation_Mark # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2039 ; Quotation_Mark # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; Quotation_Mark # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +300C ; Quotation_Mark # Ps LEFT CORNER BRACKET +300D ; Quotation_Mark # Pe RIGHT CORNER BRACKET +300E ; Quotation_Mark # Ps LEFT WHITE CORNER BRACKET +300F ; Quotation_Mark # Pe RIGHT WHITE CORNER BRACKET +301D ; Quotation_Mark # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; Quotation_Mark # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +FE41 ; Quotation_Mark # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET +FE42 ; Quotation_Mark # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET +FE43 ; Quotation_Mark # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET +FE44 ; Quotation_Mark # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET +FF02 ; Quotation_Mark # Po FULLWIDTH QUOTATION MARK +FF07 ; Quotation_Mark # Po FULLWIDTH APOSTROPHE +FF62 ; Quotation_Mark # Ps HALFWIDTH LEFT CORNER BRACKET +FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET + +# Total code points: 29 + +# ================================================ + +0021 ; Terminal_Punctuation # Po EXCLAMATION MARK +002C ; Terminal_Punctuation # Po COMMA +002E ; Terminal_Punctuation # Po FULL STOP +003A..003B ; Terminal_Punctuation # Po [2] COLON..SEMICOLON +003F ; Terminal_Punctuation # Po QUESTION MARK +037E ; Terminal_Punctuation # Po GREEK QUESTION MARK +0387 ; Terminal_Punctuation # Po GREEK ANO TELEIA +0589 ; Terminal_Punctuation # Po ARMENIAN FULL STOP +05C3 ; Terminal_Punctuation # Po HEBREW PUNCTUATION SOF PASUQ +060C ; Terminal_Punctuation # Po ARABIC COMMA +061B ; Terminal_Punctuation # Po ARABIC SEMICOLON +061F ; Terminal_Punctuation # Po ARABIC QUESTION MARK +06D4 ; Terminal_Punctuation # Po ARABIC FULL STOP +0700..070A ; Terminal_Punctuation # Po [11] SYRIAC END OF PARAGRAPH..SYRIAC CONTRACTION +070C ; Terminal_Punctuation # Po SYRIAC HARKLEAN METOBELUS +0964..0965 ; Terminal_Punctuation # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +0E5A..0E5B ; Terminal_Punctuation # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT +0F08 ; Terminal_Punctuation # Po TIBETAN MARK SBRUL SHAD +0F0D..0F12 ; Terminal_Punctuation # Po [6] TIBETAN MARK SHAD..TIBETAN MARK RGYA GRAM SHAD +104A..104B ; Terminal_Punctuation # Po [2] MYANMAR SIGN LITTLE SECTION..MYANMAR SIGN SECTION +1361..1368 ; Terminal_Punctuation # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR +166D..166E ; Terminal_Punctuation # Po [2] CANADIAN SYLLABICS CHI SIGN..CANADIAN SYLLABICS FULL STOP +16EB..16ED ; Terminal_Punctuation # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION +17D4..17D6 ; Terminal_Punctuation # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH +17DA ; Terminal_Punctuation # Po KHMER SIGN KOOMUUT +1802..1805 ; Terminal_Punctuation # Po [4] MONGOLIAN COMMA..MONGOLIAN FOUR DOTS +1808..1809 ; Terminal_Punctuation # Po [2] MONGOLIAN MANCHU COMMA..MONGOLIAN MANCHU FULL STOP +1944..1945 ; Terminal_Punctuation # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +203C..203D ; Terminal_Punctuation # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG +2047..2049 ; Terminal_Punctuation # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK +3001..3002 ; Terminal_Punctuation # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP +FE50..FE52 ; Terminal_Punctuation # Po [3] SMALL COMMA..SMALL FULL STOP +FE54..FE57 ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK +FF01 ; Terminal_Punctuation # Po FULLWIDTH EXCLAMATION MARK +FF0C ; Terminal_Punctuation # Po FULLWIDTH COMMA +FF0E ; Terminal_Punctuation # Po FULLWIDTH FULL STOP +FF1A..FF1B ; Terminal_Punctuation # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON +FF1F ; Terminal_Punctuation # Po FULLWIDTH QUESTION MARK +FF61 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC FULL STOP +FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA + +# Total code points: 86 + +# ================================================ + +005E ; Other_Math # Sk CIRCUMFLEX ACCENT +03D0..03D2 ; Other_Math # L& [3] GREEK BETA SYMBOL..GREEK UPSILON WITH HOOK SYMBOL +03D5 ; Other_Math # L& GREEK PHI SYMBOL +03F0..03F1 ; Other_Math # L& [2] GREEK KAPPA SYMBOL..GREEK RHO SYMBOL +03F4..03F5 ; Other_Math # L& [2] GREEK CAPITAL THETA SYMBOL..GREEK LUNATE EPSILON SYMBOL +2016 ; Other_Math # Po DOUBLE VERTICAL LINE +2032..2034 ; Other_Math # Po [3] PRIME..TRIPLE PRIME +2040 ; Other_Math # Pc CHARACTER TIE +2061..2063 ; Other_Math # Cf [3] FUNCTION APPLICATION..INVISIBLE SEPARATOR +207D ; Other_Math # Ps SUPERSCRIPT LEFT PARENTHESIS +207E ; Other_Math # Pe SUPERSCRIPT RIGHT PARENTHESIS +208D ; Other_Math # Ps SUBSCRIPT LEFT PARENTHESIS +208E ; Other_Math # Pe SUBSCRIPT RIGHT PARENTHESIS +20D0..20DC ; Other_Math # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20E1 ; Other_Math # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E5..20E6 ; Other_Math # Mn [2] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING DOUBLE VERTICAL STROKE OVERLAY +2102 ; Other_Math # L& DOUBLE-STRUCK CAPITAL C +210A..2113 ; Other_Math # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; Other_Math # L& DOUBLE-STRUCK CAPITAL N +2119..211D ; Other_Math # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; Other_Math # L& DOUBLE-STRUCK CAPITAL Z +2128 ; Other_Math # L& BLACK-LETTER CAPITAL Z +2129 ; Other_Math # So TURNED GREEK SMALL LETTER IOTA +212C..212D ; Other_Math # L& [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C +212F..2131 ; Other_Math # L& [3] SCRIPT SMALL E..SCRIPT CAPITAL F +2133..2134 ; Other_Math # L& [2] SCRIPT CAPITAL M..SCRIPT SMALL O +2135..2138 ; Other_Math # Lo [4] ALEF SYMBOL..DALET SYMBOL +213C..213F ; Other_Math # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2145..2149 ; Other_Math # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +23B7 ; Other_Math # So RADICAL SYMBOL BOTTOM +23D0 ; Other_Math # So VERTICAL LINE EXTENSION +27C5 ; Other_Math # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; Other_Math # Pe RIGHT S-SHAPED BAG DELIMITER +27E6 ; Other_Math # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; Other_Math # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; Other_Math # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; Other_Math # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; Other_Math # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; Other_Math # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +2983 ; Other_Math # Ps LEFT WHITE CURLY BRACKET +2984 ; Other_Math # Pe RIGHT WHITE CURLY BRACKET +2985 ; Other_Math # Ps LEFT WHITE PARENTHESIS +2986 ; Other_Math # Pe RIGHT WHITE PARENTHESIS +2987 ; Other_Math # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; Other_Math # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; Other_Math # Ps Z NOTATION LEFT BINDING BRACKET +298A ; Other_Math # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; Other_Math # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; Other_Math # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; Other_Math # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; Other_Math # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; Other_Math # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; Other_Math # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; Other_Math # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; Other_Math # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; Other_Math # Ps LEFT ARC LESS-THAN BRACKET +2994 ; Other_Math # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; Other_Math # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; Other_Math # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; Other_Math # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; Other_Math # Pe RIGHT BLACK TORTOISE SHELL BRACKET +29D8 ; Other_Math # Ps LEFT WIGGLY FENCE +29D9 ; Other_Math # Pe RIGHT WIGGLY FENCE +29DA ; Other_Math # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; Other_Math # Pe RIGHT DOUBLE WIGGLY FENCE +29FC ; Other_Math # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; Other_Math # Pe RIGHT-POINTING CURVED ANGLE BRACKET +FE61 ; Other_Math # Po SMALL ASTERISK +FE63 ; Other_Math # Pd SMALL HYPHEN-MINUS +FE68 ; Other_Math # Po SMALL REVERSE SOLIDUS +FF3C ; Other_Math # Po FULLWIDTH REVERSE SOLIDUS +FF3E ; Other_Math # Sk FULLWIDTH CIRCUMFLEX ACCENT +1D400..1D454 ; Other_Math # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; Other_Math # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; Other_Math # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; Other_Math # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; Other_Math # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; Other_Math # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; Other_Math # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; Other_Math # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; Other_Math # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; Other_Math # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; Other_Math # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; Other_Math # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; Other_Math # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; Other_Math # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; Other_Math # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; Other_Math # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; Other_Math # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; Other_Math # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A3 ; Other_Math # L& [338] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL MONOSPACE SMALL Z +1D6A8..1D6C0 ; Other_Math # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C2..1D6DA ; Other_Math # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6FA ; Other_Math # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FC..1D714 ; Other_Math # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D734 ; Other_Math # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D736..1D74E ; Other_Math # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D76E ; Other_Math # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D770..1D788 ; Other_Math # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D7A8 ; Other_Math # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7AA..1D7C2 ; Other_Math # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7C9 ; Other_Math # L& [6] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC PI SYMBOL +1D7CE..1D7FF ; Other_Math # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE + +# Total code points: 1102 + +# ================================================ + +0030..0039 ; Hex_Digit # Nd [10] DIGIT ZERO..DIGIT NINE +0041..0046 ; Hex_Digit # L& [6] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER F +0061..0066 ; Hex_Digit # L& [6] LATIN SMALL LETTER A..LATIN SMALL LETTER F +FF10..FF19 ; Hex_Digit # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE +FF21..FF26 ; Hex_Digit # L& [6] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER F +FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER F + +# Total code points: 44 + +# ================================================ + +0030..0039 ; ASCII_Hex_Digit # Nd [10] DIGIT ZERO..DIGIT NINE +0041..0046 ; ASCII_Hex_Digit # L& [6] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER F +0061..0066 ; ASCII_Hex_Digit # L& [6] LATIN SMALL LETTER A..LATIN SMALL LETTER F + +# Total code points: 22 + +# ================================================ + +0345 ; Other_Alphabetic # Mn COMBINING GREEK YPOGEGRAMMENI +05B0..05B9 ; Other_Alphabetic # Mn [10] HEBREW POINT SHEVA..HEBREW POINT HOLAM +05BB..05BD ; Other_Alphabetic # Mn [3] HEBREW POINT QUBUTS..HEBREW POINT METEG +05BF ; Other_Alphabetic # Mn HEBREW POINT RAFE +05C1..05C2 ; Other_Alphabetic # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; Other_Alphabetic # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; Other_Alphabetic # Mn HEBREW POINT QAMATS QATAN +0610..0615 ; Other_Alphabetic # Mn [6] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL HIGH TAH +064B..0657 ; Other_Alphabetic # Mn [13] ARABIC FATHATAN..ARABIC INVERTED DAMMA +0659..065E ; Other_Alphabetic # Mn [6] ARABIC ZWARAKAY..ARABIC FATHA WITH TWO DOTS +0670 ; Other_Alphabetic # Mn ARABIC LETTER SUPERSCRIPT ALEF +06D6..06DC ; Other_Alphabetic # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06E1..06E4 ; Other_Alphabetic # Mn [4] ARABIC SMALL HIGH DOTLESS HEAD OF KHAH..ARABIC SMALL HIGH MADDA +06E7..06E8 ; Other_Alphabetic # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06ED ; Other_Alphabetic # Mn ARABIC SMALL LOW MEEM +0711 ; Other_Alphabetic # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0730..073F ; Other_Alphabetic # Mn [16] SYRIAC PTHAHA ABOVE..SYRIAC RWAHA +07A6..07B0 ; Other_Alphabetic # Mn [11] THAANA ABAFILI..THAANA SUKUN +0901..0902 ; Other_Alphabetic # Mn [2] DEVANAGARI SIGN CANDRABINDU..DEVANAGARI SIGN ANUSVARA +0903 ; Other_Alphabetic # Mc DEVANAGARI SIGN VISARGA +093E..0940 ; Other_Alphabetic # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0941..0948 ; Other_Alphabetic # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +0949..094C ; Other_Alphabetic # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +0962..0963 ; Other_Alphabetic # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0981 ; Other_Alphabetic # Mn BENGALI SIGN CANDRABINDU +0982..0983 ; Other_Alphabetic # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +09BE..09C0 ; Other_Alphabetic # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II +09C1..09C4 ; Other_Alphabetic # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09C7..09C8 ; Other_Alphabetic # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; Other_Alphabetic # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09D7 ; Other_Alphabetic # Mc BENGALI AU LENGTH MARK +09E2..09E3 ; Other_Alphabetic # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +0A01..0A02 ; Other_Alphabetic # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A03 ; Other_Alphabetic # Mc GURMUKHI SIGN VISARGA +0A3E..0A40 ; Other_Alphabetic # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A41..0A42 ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4C ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN OO..GURMUKHI VOWEL SIGN AU +0A70..0A71 ; Other_Alphabetic # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A81..0A82 ; Other_Alphabetic # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0A83 ; Other_Alphabetic # Mc GUJARATI SIGN VISARGA +0ABE..0AC0 ; Other_Alphabetic # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC1..0AC5 ; Other_Alphabetic # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; Other_Alphabetic # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0AC9 ; Other_Alphabetic # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; Other_Alphabetic # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0AE2..0AE3 ; Other_Alphabetic # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0B01 ; Other_Alphabetic # Mn ORIYA SIGN CANDRABINDU +0B02..0B03 ; Other_Alphabetic # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B3E ; Other_Alphabetic # Mc ORIYA VOWEL SIGN AA +0B3F ; Other_Alphabetic # Mn ORIYA VOWEL SIGN I +0B40 ; Other_Alphabetic # Mc ORIYA VOWEL SIGN II +0B41..0B43 ; Other_Alphabetic # Mn [3] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC R +0B47..0B48 ; Other_Alphabetic # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; Other_Alphabetic # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B56 ; Other_Alphabetic # Mn ORIYA AI LENGTH MARK +0B57 ; Other_Alphabetic # Mc ORIYA AU LENGTH MARK +0B82 ; Other_Alphabetic # Mn TAMIL SIGN ANUSVARA +0BBE..0BBF ; Other_Alphabetic # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I +0BC0 ; Other_Alphabetic # Mn TAMIL VOWEL SIGN II +0BC1..0BC2 ; Other_Alphabetic # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; Other_Alphabetic # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; Other_Alphabetic # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BD7 ; Other_Alphabetic # Mc TAMIL AU LENGTH MARK +0C01..0C03 ; Other_Alphabetic # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C3E..0C40 ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C41..0C44 ; Other_Alphabetic # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C46..0C48 ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4C ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN O..TELUGU VOWEL SIGN AU +0C55..0C56 ; Other_Alphabetic # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C82..0C83 ; Other_Alphabetic # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0CBE ; Other_Alphabetic # Mc KANNADA VOWEL SIGN AA +0CBF ; Other_Alphabetic # Mn KANNADA VOWEL SIGN I +0CC0..0CC4 ; Other_Alphabetic # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR +0CC6 ; Other_Alphabetic # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; Other_Alphabetic # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; Other_Alphabetic # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CCC ; Other_Alphabetic # Mn KANNADA VOWEL SIGN AU +0CD5..0CD6 ; Other_Alphabetic # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0D02..0D03 ; Other_Alphabetic # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D3E..0D40 ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II +0D41..0D43 ; Other_Alphabetic # Mn [3] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC R +0D46..0D48 ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D57 ; Other_Alphabetic # Mc MALAYALAM AU LENGTH MARK +0D82..0D83 ; Other_Alphabetic # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0DCF..0DD1 ; Other_Alphabetic # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD2..0DD4 ; Other_Alphabetic # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; Other_Alphabetic # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DD8..0DDF ; Other_Alphabetic # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA +0DF2..0DF3 ; Other_Alphabetic # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0E31 ; Other_Alphabetic # Mn THAI CHARACTER MAI HAN-AKAT +0E34..0E3A ; Other_Alphabetic # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E4D ; Other_Alphabetic # Mn THAI CHARACTER NIKHAHIT +0EB1 ; Other_Alphabetic # Mn LAO VOWEL SIGN MAI KAN +0EB4..0EB9 ; Other_Alphabetic # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU +0EBB..0EBC ; Other_Alphabetic # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO +0ECD ; Other_Alphabetic # Mn LAO NIGGAHITA +0F71..0F7E ; Other_Alphabetic # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F7F ; Other_Alphabetic # Mc TIBETAN SIGN RNAM BCAD +0F80..0F81 ; Other_Alphabetic # Mn [2] TIBETAN VOWEL SIGN REVERSED I..TIBETAN VOWEL SIGN REVERSED II +0F90..0F97 ; Other_Alphabetic # Mn [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; Other_Alphabetic # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +102C ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN AA +102D..1030 ; Other_Alphabetic # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1031 ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN E +1032 ; Other_Alphabetic # Mn MYANMAR VOWEL SIGN AI +1036 ; Other_Alphabetic # Mn MYANMAR SIGN ANUSVARA +1038 ; Other_Alphabetic # Mc MYANMAR SIGN VISARGA +1056..1057 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +1058..1059 ; Other_Alphabetic # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +135F ; Other_Alphabetic # Mn ETHIOPIC COMBINING GEMINATION MARK +1712..1713 ; Other_Alphabetic # Mn [2] TAGALOG VOWEL SIGN I..TAGALOG VOWEL SIGN U +1732..1733 ; Other_Alphabetic # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1752..1753 ; Other_Alphabetic # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1772..1773 ; Other_Alphabetic # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B6 ; Other_Alphabetic # Mc KHMER VOWEL SIGN AA +17B7..17BD ; Other_Alphabetic # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17BE..17C5 ; Other_Alphabetic # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C6 ; Other_Alphabetic # Mn KHMER SIGN NIKAHIT +17C7..17C8 ; Other_Alphabetic # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +18A9 ; Other_Alphabetic # Mn MONGOLIAN LETTER ALI GALI DAGALGA +1920..1922 ; Other_Alphabetic # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1923..1926 ; Other_Alphabetic # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1927..1928 ; Other_Alphabetic # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1929..192B ; Other_Alphabetic # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; Other_Alphabetic # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1932 ; Other_Alphabetic # Mn LIMBU SMALL LETTER ANUSVARA +1933..1938 ; Other_Alphabetic # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +19B0..19C0 ; Other_Alphabetic # Mc [17] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE VOWEL SIGN IY +19C8..19C9 ; Other_Alphabetic # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2 +1A17..1A18 ; Other_Alphabetic # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A19..1A1B ; Other_Alphabetic # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE +24B6..24E9 ; Other_Alphabetic # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z +A823..A824 ; Other_Alphabetic # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A825..A826 ; Other_Alphabetic # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A827 ; Other_Alphabetic # Mc SYLOTI NAGRI VOWEL SIGN OO +FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA +10A01..10A03 ; Other_Alphabetic # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; Other_Alphabetic # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; Other_Alphabetic # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA + +# Total code points: 488 + +# ================================================ + +3006 ; Ideographic # Lo IDEOGRAPHIC CLOSING MARK +3007 ; Ideographic # Nl IDEOGRAPHIC NUMBER ZERO +3021..3029 ; Ideographic # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +3038..303A ; Ideographic # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +3400..4DB5 ; Ideographic # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 +4E00..9FBB ; Ideographic # Lo [20924] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FBB +F900..FA2D ; Ideographic # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D +FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +20000..2A6D6 ; Ideographic # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 +2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D + +# Total code points: 71181 + +# ================================================ + +005E ; Diacritic # Sk CIRCUMFLEX ACCENT +0060 ; Diacritic # Sk GRAVE ACCENT +00A8 ; Diacritic # Sk DIAERESIS +00AF ; Diacritic # Sk MACRON +00B4 ; Diacritic # Sk ACUTE ACCENT +00B7 ; Diacritic # Po MIDDLE DOT +00B8 ; Diacritic # Sk CEDILLA +02B0..02C1 ; Diacritic # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C2..02C5 ; Diacritic # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD +02C6..02D1 ; Diacritic # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02D2..02DF ; Diacritic # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT +02E0..02E4 ; Diacritic # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02E5..02ED ; Diacritic # Sk [9] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER UNASPIRATED +02EE ; Diacritic # Lm MODIFIER LETTER DOUBLE APOSTROPHE +02EF..02FF ; Diacritic # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW +0300..034E ; Diacritic # Mn [79] COMBINING GRAVE ACCENT..COMBINING UPWARDS ARROW BELOW +0350..0357 ; Diacritic # Mn [8] COMBINING RIGHT ARROWHEAD ABOVE..COMBINING RIGHT HALF RING ABOVE +035D..0362 ; Diacritic # Mn [6] COMBINING DOUBLE BREVE..COMBINING DOUBLE RIGHTWARDS ARROW BELOW +0374..0375 ; Diacritic # Sk [2] GREEK NUMERAL SIGN..GREEK LOWER NUMERAL SIGN +037A ; Diacritic # Lm GREEK YPOGEGRAMMENI +0384..0385 ; Diacritic # Sk [2] GREEK TONOS..GREEK DIALYTIKA TONOS +0483..0486 ; Diacritic # Mn [4] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PSILI PNEUMATA +0559 ; Diacritic # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +0591..05A1 ; Diacritic # Mn [17] HEBREW ACCENT ETNAHTA..HEBREW ACCENT PAZER +05A3..05B9 ; Diacritic # Mn [23] HEBREW ACCENT MUNAH..HEBREW POINT HOLAM +05BB..05BD ; Diacritic # Mn [3] HEBREW POINT QUBUTS..HEBREW POINT METEG +05BF ; Diacritic # Mn HEBREW POINT RAFE +05C1..05C2 ; Diacritic # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4 ; Diacritic # Mn HEBREW MARK UPPER DOT +064B..0652 ; Diacritic # Mn [8] ARABIC FATHATAN..ARABIC SUKUN +0657..0658 ; Diacritic # Mn [2] ARABIC INVERTED DAMMA..ARABIC MARK NOON GHUNNA +06DF..06E0 ; Diacritic # Mn [2] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH UPRIGHT RECTANGULAR ZERO +06E5..06E6 ; Diacritic # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06EA..06EC ; Diacritic # Mn [3] ARABIC EMPTY CENTRE LOW STOP..ARABIC ROUNDED HIGH STOP WITH FILLED CENTRE +0730..074A ; Diacritic # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +07A6..07B0 ; Diacritic # Mn [11] THAANA ABAFILI..THAANA SUKUN +093C ; Diacritic # Mn DEVANAGARI SIGN NUKTA +094D ; Diacritic # Mn DEVANAGARI SIGN VIRAMA +0951..0954 ; Diacritic # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT +09BC ; Diacritic # Mn BENGALI SIGN NUKTA +09CD ; Diacritic # Mn BENGALI SIGN VIRAMA +0A3C ; Diacritic # Mn GURMUKHI SIGN NUKTA +0A4D ; Diacritic # Mn GURMUKHI SIGN VIRAMA +0ABC ; Diacritic # Mn GUJARATI SIGN NUKTA +0ACD ; Diacritic # Mn GUJARATI SIGN VIRAMA +0B3C ; Diacritic # Mn ORIYA SIGN NUKTA +0B4D ; Diacritic # Mn ORIYA SIGN VIRAMA +0BCD ; Diacritic # Mn TAMIL SIGN VIRAMA +0C4D ; Diacritic # Mn TELUGU SIGN VIRAMA +0CBC ; Diacritic # Mn KANNADA SIGN NUKTA +0CCD ; Diacritic # Mn KANNADA SIGN VIRAMA +0D4D ; Diacritic # Mn MALAYALAM SIGN VIRAMA +0DCA ; Diacritic # Mn SINHALA SIGN AL-LAKUNA +0E47..0E4C ; Diacritic # Mn [6] THAI CHARACTER MAITAIKHU..THAI CHARACTER THANTHAKHAT +0E4E ; Diacritic # Mn THAI CHARACTER YAMAKKAN +0EC8..0ECC ; Diacritic # Mn [5] LAO TONE MAI EK..LAO CANCELLATION MARK +0F18..0F19 ; Diacritic # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F35 ; Diacritic # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; Diacritic # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; Diacritic # Mn TIBETAN MARK TSA -PHRU +0F3E..0F3F ; Diacritic # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F82..0F84 ; Diacritic # Mn [3] TIBETAN SIGN NYI ZLA NAA DA..TIBETAN MARK HALANTA +0F86..0F87 ; Diacritic # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0FC6 ; Diacritic # Mn TIBETAN SYMBOL PADMA GDAN +1037 ; Diacritic # Mn MYANMAR SIGN DOT BELOW +1039 ; Diacritic # Mn MYANMAR SIGN VIRAMA +17C9..17D3 ; Diacritic # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17DD ; Diacritic # Mn KHMER SIGN ATTHACAN +1939..193B ; Diacritic # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1D2C..1D61 ; Diacritic # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI +1D62..1D6A ; Diacritic # L& [9] LATIN SUBSCRIPT SMALL LETTER I..GREEK SUBSCRIPT SMALL LETTER CHI +1FBD ; Diacritic # Sk GREEK KORONIS +1FBF..1FC1 ; Diacritic # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI +1FCD..1FCF ; Diacritic # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI +1FDD..1FDF ; Diacritic # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI +1FED..1FEF ; Diacritic # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA +1FFD..1FFE ; Diacritic # Sk [2] GREEK OXIA..GREEK DASIA +302A..302F ; Diacritic # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +3099..309A ; Diacritic # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309B..309C ; Diacritic # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +30FC ; Diacritic # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK +FB1E ; Diacritic # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FE20..FE23 ; Diacritic # Mn [4] COMBINING LIGATURE LEFT HALF..COMBINING DOUBLE TILDE RIGHT HALF +FF3E ; Diacritic # Sk FULLWIDTH CIRCUMFLEX ACCENT +FF40 ; Diacritic # Sk FULLWIDTH GRAVE ACCENT +FF70 ; Diacritic # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF9E..FF9F ; Diacritic # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFE3 ; Diacritic # Sk FULLWIDTH MACRON +1D167..1D169 ; Diacritic # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D16D..1D172 ; Diacritic # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D17B..1D182 ; Diacritic # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; Diacritic # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; Diacritic # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO + +# Total code points: 482 + +# ================================================ + +00B7 ; Extender # Po MIDDLE DOT +02D0..02D1 ; Extender # Lm [2] MODIFIER LETTER TRIANGULAR COLON..MODIFIER LETTER HALF TRIANGULAR COLON +0640 ; Extender # Lm ARABIC TATWEEL +0E46 ; Extender # Lm THAI CHARACTER MAIYAMOK +0EC6 ; Extender # Lm LAO KO LA +1843 ; Extender # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +3005 ; Extender # Lm IDEOGRAPHIC ITERATION MARK +3031..3035 ; Extender # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +309D..309E ; Extender # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +30FC..30FE ; Extender # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +A015 ; Extender # Lm YI SYLLABLE WU +FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK + +# Total code points: 20 + +# ================================================ + +02B0..02B8 ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y +02C0..02C1 ; Other_Lowercase # Lm [2] MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP +02E0..02E4 ; Other_Lowercase # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +0345 ; Other_Lowercase # Mn COMBINING GREEK YPOGEGRAMMENI +037A ; Other_Lowercase # Lm GREEK YPOGEGRAMMENI +1D2C..1D61 ; Other_Lowercase # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI +1D78 ; Other_Lowercase # Lm MODIFIER LETTER CYRILLIC EN +1D9B..1DBF ; Other_Lowercase # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +2090..2094 ; Other_Lowercase # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA +2170..217F ; Other_Lowercase # Nl [16] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND +24D0..24E9 ; Other_Lowercase # So [26] CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z + +# Total code points: 157 + +# ================================================ + +2160..216F ; Other_Uppercase # Nl [16] ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND +24B6..24CF ; Other_Uppercase # So [26] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z + +# Total code points: 42 + +# ================================================ + +FDD0..FDEF ; Noncharacter_Code_Point # Cn [32] .. +FFFE..FFFF ; Noncharacter_Code_Point # Cn [2] .. +1FFFE..1FFFF ; Noncharacter_Code_Point # Cn [2] .. +2FFFE..2FFFF ; Noncharacter_Code_Point # Cn [2] .. +3FFFE..3FFFF ; Noncharacter_Code_Point # Cn [2] .. +4FFFE..4FFFF ; Noncharacter_Code_Point # Cn [2] .. +5FFFE..5FFFF ; Noncharacter_Code_Point # Cn [2] .. +6FFFE..6FFFF ; Noncharacter_Code_Point # Cn [2] .. +7FFFE..7FFFF ; Noncharacter_Code_Point # Cn [2] .. +8FFFE..8FFFF ; Noncharacter_Code_Point # Cn [2] .. +9FFFE..9FFFF ; Noncharacter_Code_Point # Cn [2] .. +AFFFE..AFFFF ; Noncharacter_Code_Point # Cn [2] .. +BFFFE..BFFFF ; Noncharacter_Code_Point # Cn [2] .. +CFFFE..CFFFF ; Noncharacter_Code_Point # Cn [2] .. +DFFFE..DFFFF ; Noncharacter_Code_Point # Cn [2] .. +EFFFE..EFFFF ; Noncharacter_Code_Point # Cn [2] .. +FFFFE..FFFFF ; Noncharacter_Code_Point # Cn [2] .. +10FFFE..10FFFF; Noncharacter_Code_Point # Cn [2] .. + +# Total code points: 66 + +# ================================================ + +09BE ; Other_Grapheme_Extend # Mc BENGALI VOWEL SIGN AA +09D7 ; Other_Grapheme_Extend # Mc BENGALI AU LENGTH MARK +0B3E ; Other_Grapheme_Extend # Mc ORIYA VOWEL SIGN AA +0B57 ; Other_Grapheme_Extend # Mc ORIYA AU LENGTH MARK +0BBE ; Other_Grapheme_Extend # Mc TAMIL VOWEL SIGN AA +0BD7 ; Other_Grapheme_Extend # Mc TAMIL AU LENGTH MARK +0CC2 ; Other_Grapheme_Extend # Mc KANNADA VOWEL SIGN UU +0CD5..0CD6 ; Other_Grapheme_Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0D3E ; Other_Grapheme_Extend # Mc MALAYALAM VOWEL SIGN AA +0D57 ; Other_Grapheme_Extend # Mc MALAYALAM AU LENGTH MARK +0DCF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN AELA-PILLA +0DDF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN GAYANUKITTA +200C..200D ; Other_Grapheme_Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER +1D165 ; Other_Grapheme_Extend # Mc MUSICAL SYMBOL COMBINING STEM +1D16E..1D172 ; Other_Grapheme_Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5 + +# Total code points: 21 + +# ================================================ + +034F ; Grapheme_Link # Mn COMBINING GRAPHEME JOINER +094D ; Grapheme_Link # Mn DEVANAGARI SIGN VIRAMA +09CD ; Grapheme_Link # Mn BENGALI SIGN VIRAMA +0A4D ; Grapheme_Link # Mn GURMUKHI SIGN VIRAMA +0ACD ; Grapheme_Link # Mn GUJARATI SIGN VIRAMA +0B4D ; Grapheme_Link # Mn ORIYA SIGN VIRAMA +0BCD ; Grapheme_Link # Mn TAMIL SIGN VIRAMA +0C4D ; Grapheme_Link # Mn TELUGU SIGN VIRAMA +0CCD ; Grapheme_Link # Mn KANNADA SIGN VIRAMA +0D4D ; Grapheme_Link # Mn MALAYALAM SIGN VIRAMA +0DCA ; Grapheme_Link # Mn SINHALA SIGN AL-LAKUNA +0E3A ; Grapheme_Link # Mn THAI CHARACTER PHINTHU +1039 ; Grapheme_Link # Mn MYANMAR SIGN VIRAMA +17D2 ; Grapheme_Link # Mn KHMER SIGN COENG +A806 ; Grapheme_Link # Mn SYLOTI NAGRI SIGN HASANTA +10A3F ; Grapheme_Link # Mn KHAROSHTHI VIRAMA + +# Total code points: 16 + +# ================================================ + +2FF0..2FF1 ; IDS_Binary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO BELOW +2FF4..2FFB ; IDS_Binary_Operator # So [8] IDEOGRAPHIC DESCRIPTION CHARACTER FULL SURROUND..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID + +# Total code points: 10 + +# ================================================ + +2FF2..2FF3 ; IDS_Trinary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW + +# Total code points: 2 + +# ================================================ + +2E80..2E99 ; Radical # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP +2E9B..2EF3 ; Radical # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE +2F00..2FD5 ; Radical # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE + +# Total code points: 329 + +# ================================================ + +3400..4DB5 ; Unified_Ideograph # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 +4E00..9FBB ; Unified_Ideograph # Lo [20924] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FBB +FA0E..FA0F ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPATIBILITY IDEOGRAPH-FA0F +FA11 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA11 +FA13..FA14 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPATIBILITY IDEOGRAPH-FA14 +FA1F ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA1F +FA21 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA21 +FA23..FA24 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA23..CJK COMPATIBILITY IDEOGRAPH-FA24 +FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..CJK COMPATIBILITY IDEOGRAPH-FA29 +20000..2A6D6 ; Unified_Ideograph # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 + +# Total code points: 70229 + +# ================================================ + +034F ; Other_Default_Ignorable_Code_Point # Mn COMBINING GRAPHEME JOINER +115F..1160 ; Other_Default_Ignorable_Code_Point # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER +2064..2069 ; Other_Default_Ignorable_Code_Point # Cn [6] .. +3164 ; Other_Default_Ignorable_Code_Point # Lo HANGUL FILLER +FFA0 ; Other_Default_Ignorable_Code_Point # Lo HALFWIDTH HANGUL FILLER +FFF0..FFF8 ; Other_Default_Ignorable_Code_Point # Cn [9] .. +E0000 ; Other_Default_Ignorable_Code_Point # Cn +E0002..E001F ; Other_Default_Ignorable_Code_Point # Cn [30] .. +E0080..E00FF ; Other_Default_Ignorable_Code_Point # Cn [128] .. +E01F0..E0FFF ; Other_Default_Ignorable_Code_Point # Cn [3600] .. + +# Total code points: 3779 + +# ================================================ + +0340..0341 ; Deprecated # Mn [2] COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK +17A3 ; Deprecated # Lo KHMER INDEPENDENT VOWEL QAQ +17D3 ; Deprecated # Mn KHMER SIGN BATHAMASAT +206A..206F ; Deprecated # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES + +# Total code points: 10 + +# ================================================ + +0069..006A ; Soft_Dotted # L& [2] LATIN SMALL LETTER I..LATIN SMALL LETTER J +012F ; Soft_Dotted # L& LATIN SMALL LETTER I WITH OGONEK +0268 ; Soft_Dotted # L& LATIN SMALL LETTER I WITH STROKE +029D ; Soft_Dotted # L& LATIN SMALL LETTER J WITH CROSSED-TAIL +02B2 ; Soft_Dotted # Lm MODIFIER LETTER SMALL J +03F3 ; Soft_Dotted # L& GREEK LETTER YOT +0456 ; Soft_Dotted # L& CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +0458 ; Soft_Dotted # L& CYRILLIC SMALL LETTER JE +1D62 ; Soft_Dotted # L& LATIN SUBSCRIPT SMALL LETTER I +1D96 ; Soft_Dotted # L& LATIN SMALL LETTER I WITH RETROFLEX HOOK +1DA4 ; Soft_Dotted # Lm MODIFIER LETTER SMALL I WITH STROKE +1DA8 ; Soft_Dotted # Lm MODIFIER LETTER SMALL J WITH CROSSED-TAIL +1E2D ; Soft_Dotted # L& LATIN SMALL LETTER I WITH TILDE BELOW +1ECB ; Soft_Dotted # L& LATIN SMALL LETTER I WITH DOT BELOW +2071 ; Soft_Dotted # L& SUPERSCRIPT LATIN SMALL LETTER I +2148..2149 ; Soft_Dotted # L& [2] DOUBLE-STRUCK ITALIC SMALL I..DOUBLE-STRUCK ITALIC SMALL J +1D422..1D423 ; Soft_Dotted # L& [2] MATHEMATICAL BOLD SMALL I..MATHEMATICAL BOLD SMALL J +1D456..1D457 ; Soft_Dotted # L& [2] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL ITALIC SMALL J +1D48A..1D48B ; Soft_Dotted # L& [2] MATHEMATICAL BOLD ITALIC SMALL I..MATHEMATICAL BOLD ITALIC SMALL J +1D4BE..1D4BF ; Soft_Dotted # L& [2] MATHEMATICAL SCRIPT SMALL I..MATHEMATICAL SCRIPT SMALL J +1D4F2..1D4F3 ; Soft_Dotted # L& [2] MATHEMATICAL BOLD SCRIPT SMALL I..MATHEMATICAL BOLD SCRIPT SMALL J +1D526..1D527 ; Soft_Dotted # L& [2] MATHEMATICAL FRAKTUR SMALL I..MATHEMATICAL FRAKTUR SMALL J +1D55A..1D55B ; Soft_Dotted # L& [2] MATHEMATICAL DOUBLE-STRUCK SMALL I..MATHEMATICAL DOUBLE-STRUCK SMALL J +1D58E..1D58F ; Soft_Dotted # L& [2] MATHEMATICAL BOLD FRAKTUR SMALL I..MATHEMATICAL BOLD FRAKTUR SMALL J +1D5C2..1D5C3 ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF SMALL I..MATHEMATICAL SANS-SERIF SMALL J +1D5F6..1D5F7 ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF BOLD SMALL I..MATHEMATICAL SANS-SERIF BOLD SMALL J +1D62A..1D62B ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF ITALIC SMALL I..MATHEMATICAL SANS-SERIF ITALIC SMALL J +1D65E..1D65F ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL I..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL J +1D692..1D693 ; Soft_Dotted # L& [2] MATHEMATICAL MONOSPACE SMALL I..MATHEMATICAL MONOSPACE SMALL J + +# Total code points: 44 + +# ================================================ + +0E40..0E44 ; Logical_Order_Exception # Lo [5] THAI CHARACTER SARA E..THAI CHARACTER SARA AI MAIMALAI +0EC0..0EC4 ; Logical_Order_Exception # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI + +# Total code points: 10 + +# ================================================ + +2118 ; Other_ID_Start # So SCRIPT CAPITAL P +212E ; Other_ID_Start # So ESTIMATED SYMBOL +309B..309C ; Other_ID_Start # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + +# Total code points: 4 + +# ================================================ + +1369..1371 ; Other_ID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE + +# Total code points: 9 + +# ================================================ + +0021 ; STerm # Po EXCLAMATION MARK +002E ; STerm # Po FULL STOP +003F ; STerm # Po QUESTION MARK +055C ; STerm # Po ARMENIAN EXCLAMATION MARK +055E ; STerm # Po ARMENIAN QUESTION MARK +0589 ; STerm # Po ARMENIAN FULL STOP +061F ; STerm # Po ARABIC QUESTION MARK +06D4 ; STerm # Po ARABIC FULL STOP +0700..0702 ; STerm # Po [3] SYRIAC END OF PARAGRAPH..SYRIAC SUBLINEAR FULL STOP +0964..0965 ; STerm # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +104A..104B ; STerm # Po [2] MYANMAR SIGN LITTLE SECTION..MYANMAR SIGN SECTION +1362 ; STerm # Po ETHIOPIC FULL STOP +1367..1368 ; STerm # Po [2] ETHIOPIC QUESTION MARK..ETHIOPIC PARAGRAPH SEPARATOR +166E ; STerm # Po CANADIAN SYLLABICS FULL STOP +1803 ; STerm # Po MONGOLIAN FULL STOP +1809 ; STerm # Po MONGOLIAN MANCHU FULL STOP +1944..1945 ; STerm # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +203C..203D ; STerm # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG +2047..2049 ; STerm # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK +3002 ; STerm # Po IDEOGRAPHIC FULL STOP +FE52 ; STerm # Po SMALL FULL STOP +FE56..FE57 ; STerm # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK +FF01 ; STerm # Po FULLWIDTH EXCLAMATION MARK +FF0E ; STerm # Po FULLWIDTH FULL STOP +FF1F ; STerm # Po FULLWIDTH QUESTION MARK +FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP + +# Total code points: 36 + +# ================================================ + +180B..180D ; Variation_Selector # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +FE00..FE0F ; Variation_Selector # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 259 + +# ================================================ + +0009..000D ; Pattern_White_Space # Cc [5] .. +0020 ; Pattern_White_Space # Zs SPACE +0085 ; Pattern_White_Space # Cc +200E..200F ; Pattern_White_Space # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK +2028 ; Pattern_White_Space # Zl LINE SEPARATOR +2029 ; Pattern_White_Space # Zp PARAGRAPH SEPARATOR + +# Total code points: 11 + +# ================================================ + +0021..0023 ; Pattern_Syntax # Po [3] EXCLAMATION MARK..NUMBER SIGN +0024 ; Pattern_Syntax # Sc DOLLAR SIGN +0025..0027 ; Pattern_Syntax # Po [3] PERCENT SIGN..APOSTROPHE +0028 ; Pattern_Syntax # Ps LEFT PARENTHESIS +0029 ; Pattern_Syntax # Pe RIGHT PARENTHESIS +002A ; Pattern_Syntax # Po ASTERISK +002B ; Pattern_Syntax # Sm PLUS SIGN +002C ; Pattern_Syntax # Po COMMA +002D ; Pattern_Syntax # Pd HYPHEN-MINUS +002E..002F ; Pattern_Syntax # Po [2] FULL STOP..SOLIDUS +003A..003B ; Pattern_Syntax # Po [2] COLON..SEMICOLON +003C..003E ; Pattern_Syntax # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN +003F..0040 ; Pattern_Syntax # Po [2] QUESTION MARK..COMMERCIAL AT +005B ; Pattern_Syntax # Ps LEFT SQUARE BRACKET +005C ; Pattern_Syntax # Po REVERSE SOLIDUS +005D ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET +005E ; Pattern_Syntax # Sk CIRCUMFLEX ACCENT +0060 ; Pattern_Syntax # Sk GRAVE ACCENT +007B ; Pattern_Syntax # Ps LEFT CURLY BRACKET +007C ; Pattern_Syntax # Sm VERTICAL LINE +007D ; Pattern_Syntax # Pe RIGHT CURLY BRACKET +007E ; Pattern_Syntax # Sm TILDE +00A1 ; Pattern_Syntax # Po INVERTED EXCLAMATION MARK +00A2..00A5 ; Pattern_Syntax # Sc [4] CENT SIGN..YEN SIGN +00A6..00A7 ; Pattern_Syntax # So [2] BROKEN BAR..SECTION SIGN +00A9 ; Pattern_Syntax # So COPYRIGHT SIGN +00AB ; Pattern_Syntax # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00AC ; Pattern_Syntax # Sm NOT SIGN +00AE ; Pattern_Syntax # So REGISTERED SIGN +00B0 ; Pattern_Syntax # So DEGREE SIGN +00B1 ; Pattern_Syntax # Sm PLUS-MINUS SIGN +00B6 ; Pattern_Syntax # So PILCROW SIGN +00BB ; Pattern_Syntax # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +00BF ; Pattern_Syntax # Po INVERTED QUESTION MARK +00D7 ; Pattern_Syntax # Sm MULTIPLICATION SIGN +00F7 ; Pattern_Syntax # Sm DIVISION SIGN +2010..2015 ; Pattern_Syntax # Pd [6] HYPHEN..HORIZONTAL BAR +2016..2017 ; Pattern_Syntax # Po [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE +2018 ; Pattern_Syntax # Pi LEFT SINGLE QUOTATION MARK +2019 ; Pattern_Syntax # Pf RIGHT SINGLE QUOTATION MARK +201A ; Pattern_Syntax # Ps SINGLE LOW-9 QUOTATION MARK +201B..201C ; Pattern_Syntax # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK +201D ; Pattern_Syntax # Pf RIGHT DOUBLE QUOTATION MARK +201E ; Pattern_Syntax # Ps DOUBLE LOW-9 QUOTATION MARK +201F ; Pattern_Syntax # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2020..2027 ; Pattern_Syntax # Po [8] DAGGER..HYPHENATION POINT +2030..2038 ; Pattern_Syntax # Po [9] PER MILLE SIGN..CARET +2039 ; Pattern_Syntax # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; Pattern_Syntax # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +203B..203E ; Pattern_Syntax # Po [4] REFERENCE MARK..OVERLINE +2041..2043 ; Pattern_Syntax # Po [3] CARET INSERTION POINT..HYPHEN BULLET +2044 ; Pattern_Syntax # Sm FRACTION SLASH +2045 ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH QUILL +2046 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH QUILL +2047..2051 ; Pattern_Syntax # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY +2052 ; Pattern_Syntax # Sm COMMERCIAL MINUS SIGN +2053 ; Pattern_Syntax # Po SWUNG DASH +2055..205E ; Pattern_Syntax # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS +2190..2194 ; Pattern_Syntax # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW +2195..2199 ; Pattern_Syntax # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219A..219B ; Pattern_Syntax # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE +219C..219F ; Pattern_Syntax # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A0 ; Pattern_Syntax # Sm RIGHTWARDS TWO HEADED ARROW +21A1..21A2 ; Pattern_Syntax # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A3 ; Pattern_Syntax # Sm RIGHTWARDS ARROW WITH TAIL +21A4..21A5 ; Pattern_Syntax # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A6 ; Pattern_Syntax # Sm RIGHTWARDS ARROW FROM BAR +21A7..21AD ; Pattern_Syntax # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW +21AE ; Pattern_Syntax # Sm LEFT RIGHT ARROW WITH STROKE +21AF..21CD ; Pattern_Syntax # So [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE +21CE..21CF ; Pattern_Syntax # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; Pattern_Syntax # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D2 ; Pattern_Syntax # Sm RIGHTWARDS DOUBLE ARROW +21D3 ; Pattern_Syntax # So DOWNWARDS DOUBLE ARROW +21D4 ; Pattern_Syntax # Sm LEFT RIGHT DOUBLE ARROW +21D5..21F3 ; Pattern_Syntax # So [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW +21F4..22FF ; Pattern_Syntax # Sm [268] RIGHT ARROW WITH SMALL CIRCLE..Z NOTATION BAG MEMBERSHIP +2300..2307 ; Pattern_Syntax # So [8] DIAMETER SIGN..WAVY LINE +2308..230B ; Pattern_Syntax # Sm [4] LEFT CEILING..RIGHT FLOOR +230C..231F ; Pattern_Syntax # So [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER +2320..2321 ; Pattern_Syntax # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL +2322..2328 ; Pattern_Syntax # So [7] FROWN..KEYBOARD +2329 ; Pattern_Syntax # Ps LEFT-POINTING ANGLE BRACKET +232A ; Pattern_Syntax # Pe RIGHT-POINTING ANGLE BRACKET +232B..237B ; Pattern_Syntax # So [81] ERASE TO THE LEFT..NOT CHECK MARK +237C ; Pattern_Syntax # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW +237D..239A ; Pattern_Syntax # So [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL +239B..23B3 ; Pattern_Syntax # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM +23B4 ; Pattern_Syntax # Ps TOP SQUARE BRACKET +23B5 ; Pattern_Syntax # Pe BOTTOM SQUARE BRACKET +23B6 ; Pattern_Syntax # Po BOTTOM SQUARE BRACKET OVER TOP SQUARE BRACKET +23B7..23DB ; Pattern_Syntax # So [37] RADICAL SYMBOL BOTTOM..FUSE +23DC..23FF ; Pattern_Syntax # Cn [36] .. +2400..2426 ; Pattern_Syntax # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO +2427..243F ; Pattern_Syntax # Cn [25] .. +2440..244A ; Pattern_Syntax # So [11] OCR HOOK..OCR DOUBLE BACKSLASH +244B..245F ; Pattern_Syntax # Cn [21] .. +2500..25B6 ; Pattern_Syntax # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE +25B7 ; Pattern_Syntax # Sm WHITE RIGHT-POINTING TRIANGLE +25B8..25C0 ; Pattern_Syntax # So [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE +25C1 ; Pattern_Syntax # Sm WHITE LEFT-POINTING TRIANGLE +25C2..25F7 ; Pattern_Syntax # So [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT +25F8..25FF ; Pattern_Syntax # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE +2600..266E ; Pattern_Syntax # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN +266F ; Pattern_Syntax # Sm MUSIC SHARP SIGN +2670..269C ; Pattern_Syntax # So [45] WEST SYRIAC CROSS..FLEUR-DE-LIS +269D..269F ; Pattern_Syntax # Cn [3] .. +26A0..26B1 ; Pattern_Syntax # So [18] WARNING SIGN..FUNERAL URN +26B2..2700 ; Pattern_Syntax # Cn [79] .. +2701..2704 ; Pattern_Syntax # So [4] UPPER BLADE SCISSORS..WHITE SCISSORS +2705 ; Pattern_Syntax # Cn +2706..2709 ; Pattern_Syntax # So [4] TELEPHONE LOCATION SIGN..ENVELOPE +270A..270B ; Pattern_Syntax # Cn [2] .. +270C..2727 ; Pattern_Syntax # So [28] VICTORY HAND..WHITE FOUR POINTED STAR +2728 ; Pattern_Syntax # Cn +2729..274B ; Pattern_Syntax # So [35] STRESS OUTLINED WHITE STAR..HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK +274C ; Pattern_Syntax # Cn +274D ; Pattern_Syntax # So SHADOWED WHITE CIRCLE +274E ; Pattern_Syntax # Cn +274F..2752 ; Pattern_Syntax # So [4] LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPER RIGHT SHADOWED WHITE SQUARE +2753..2755 ; Pattern_Syntax # Cn [3] .. +2756 ; Pattern_Syntax # So BLACK DIAMOND MINUS WHITE X +2757 ; Pattern_Syntax # Cn +2758..275E ; Pattern_Syntax # So [7] LIGHT VERTICAL BAR..HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT +275F..2760 ; Pattern_Syntax # Cn [2] .. +2761..2767 ; Pattern_Syntax # So [7] CURVED STEM PARAGRAPH SIGN ORNAMENT..ROTATED FLORAL HEART BULLET +2768 ; Pattern_Syntax # Ps MEDIUM LEFT PARENTHESIS ORNAMENT +2769 ; Pattern_Syntax # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT +276A ; Pattern_Syntax # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT +276B ; Pattern_Syntax # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT +276C ; Pattern_Syntax # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT +276D ; Pattern_Syntax # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT +276E ; Pattern_Syntax # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT +276F ; Pattern_Syntax # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT +2770 ; Pattern_Syntax # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT +2771 ; Pattern_Syntax # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT +2772 ; Pattern_Syntax # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT +2773 ; Pattern_Syntax # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT +2774 ; Pattern_Syntax # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT +2775 ; Pattern_Syntax # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT +2794 ; Pattern_Syntax # So HEAVY WIDE-HEADED RIGHTWARDS ARROW +2795..2797 ; Pattern_Syntax # Cn [3] .. +2798..27AF ; Pattern_Syntax # So [24] HEAVY SOUTH EAST ARROW..NOTCHED LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW +27B0 ; Pattern_Syntax # Cn +27B1..27BE ; Pattern_Syntax # So [14] NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW..OPEN-OUTLINED RIGHTWARDS ARROW +27BF ; Pattern_Syntax # Cn +27C0..27C4 ; Pattern_Syntax # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET +27C5 ; Pattern_Syntax # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; Pattern_Syntax # Pe RIGHT S-SHAPED BAG DELIMITER +27C7..27CF ; Pattern_Syntax # Cn [9] .. +27D0..27E5 ; Pattern_Syntax # Sm [22] WHITE DIAMOND WITH CENTRED DOT..WHITE SQUARE WITH RIGHTWARDS TICK +27E6 ; Pattern_Syntax # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; Pattern_Syntax # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; Pattern_Syntax # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; Pattern_Syntax # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; Pattern_Syntax # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; Pattern_Syntax # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC..27EF ; Pattern_Syntax # Cn [4] .. +27F0..27FF ; Pattern_Syntax # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW +2800..28FF ; Pattern_Syntax # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 +2900..2982 ; Pattern_Syntax # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON +2983 ; Pattern_Syntax # Ps LEFT WHITE CURLY BRACKET +2984 ; Pattern_Syntax # Pe RIGHT WHITE CURLY BRACKET +2985 ; Pattern_Syntax # Ps LEFT WHITE PARENTHESIS +2986 ; Pattern_Syntax # Pe RIGHT WHITE PARENTHESIS +2987 ; Pattern_Syntax # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; Pattern_Syntax # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; Pattern_Syntax # Ps Z NOTATION LEFT BINDING BRACKET +298A ; Pattern_Syntax # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; Pattern_Syntax # Ps LEFT ARC LESS-THAN BRACKET +2994 ; Pattern_Syntax # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; Pattern_Syntax # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; Pattern_Syntax # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; Pattern_Syntax # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; Pattern_Syntax # Pe RIGHT BLACK TORTOISE SHELL BRACKET +2999..29D7 ; Pattern_Syntax # Sm [63] DOTTED FENCE..BLACK HOURGLASS +29D8 ; Pattern_Syntax # Ps LEFT WIGGLY FENCE +29D9 ; Pattern_Syntax # Pe RIGHT WIGGLY FENCE +29DA ; Pattern_Syntax # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; Pattern_Syntax # Pe RIGHT DOUBLE WIGGLY FENCE +29DC..29FB ; Pattern_Syntax # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS +29FC ; Pattern_Syntax # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; Pattern_Syntax # Pe RIGHT-POINTING CURVED ANGLE BRACKET +29FE..2AFF ; Pattern_Syntax # Sm [258] TINY..N-ARY WHITE VERTICAL BAR +2B00..2B13 ; Pattern_Syntax # So [20] NORTH EAST WHITE ARROW..SQUARE WITH BOTTOM HALF BLACK +2B14..2BFF ; Pattern_Syntax # Cn [236] .. +2E00..2E01 ; Pattern_Syntax # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER +2E02 ; Pattern_Syntax # Pi LEFT SUBSTITUTION BRACKET +2E03 ; Pattern_Syntax # Pf RIGHT SUBSTITUTION BRACKET +2E04 ; Pattern_Syntax # Pi LEFT DOTTED SUBSTITUTION BRACKET +2E05 ; Pattern_Syntax # Pf RIGHT DOTTED SUBSTITUTION BRACKET +2E06..2E08 ; Pattern_Syntax # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER +2E09 ; Pattern_Syntax # Pi LEFT TRANSPOSITION BRACKET +2E0A ; Pattern_Syntax # Pf RIGHT TRANSPOSITION BRACKET +2E0B ; Pattern_Syntax # Po RAISED SQUARE +2E0C ; Pattern_Syntax # Pi LEFT RAISED OMISSION BRACKET +2E0D ; Pattern_Syntax # Pf RIGHT RAISED OMISSION BRACKET +2E0E..2E16 ; Pattern_Syntax # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE +2E17 ; Pattern_Syntax # Pd DOUBLE OBLIQUE HYPHEN +2E18..2E1B ; Pattern_Syntax # Cn [4] .. +2E1C ; Pattern_Syntax # Pi LEFT LOW PARAPHRASE BRACKET +2E1D ; Pattern_Syntax # Pf RIGHT LOW PARAPHRASE BRACKET +2E1E..2E7F ; Pattern_Syntax # Cn [98] .. +3001..3003 ; Pattern_Syntax # Po [3] IDEOGRAPHIC COMMA..DITTO MARK +3008 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET +3009 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET +300A ; Pattern_Syntax # Ps LEFT DOUBLE ANGLE BRACKET +300B ; Pattern_Syntax # Pe RIGHT DOUBLE ANGLE BRACKET +300C ; Pattern_Syntax # Ps LEFT CORNER BRACKET +300D ; Pattern_Syntax # Pe RIGHT CORNER BRACKET +300E ; Pattern_Syntax # Ps LEFT WHITE CORNER BRACKET +300F ; Pattern_Syntax # Pe RIGHT WHITE CORNER BRACKET +3010 ; Pattern_Syntax # Ps LEFT BLACK LENTICULAR BRACKET +3011 ; Pattern_Syntax # Pe RIGHT BLACK LENTICULAR BRACKET +3012..3013 ; Pattern_Syntax # So [2] POSTAL MARK..GETA MARK +3014 ; Pattern_Syntax # Ps LEFT TORTOISE SHELL BRACKET +3015 ; Pattern_Syntax # Pe RIGHT TORTOISE SHELL BRACKET +3016 ; Pattern_Syntax # Ps LEFT WHITE LENTICULAR BRACKET +3017 ; Pattern_Syntax # Pe RIGHT WHITE LENTICULAR BRACKET +3018 ; Pattern_Syntax # Ps LEFT WHITE TORTOISE SHELL BRACKET +3019 ; Pattern_Syntax # Pe RIGHT WHITE TORTOISE SHELL BRACKET +301A ; Pattern_Syntax # Ps LEFT WHITE SQUARE BRACKET +301B ; Pattern_Syntax # Pe RIGHT WHITE SQUARE BRACKET +301C ; Pattern_Syntax # Pd WAVE DASH +301D ; Pattern_Syntax # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; Pattern_Syntax # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +3020 ; Pattern_Syntax # So POSTAL MARK FACE +3030 ; Pattern_Syntax # Pd WAVY DASH +FD3E ; Pattern_Syntax # Ps ORNATE LEFT PARENTHESIS +FD3F ; Pattern_Syntax # Pe ORNATE RIGHT PARENTHESIS +FE45..FE46 ; Pattern_Syntax # Po [2] SESAME DOT..WHITE SESAME DOT + +# Total code points: 2760 diff --git a/UNIDATA/SpecialCasing.txt b/UNIDATA/SpecialCasing.txt new file mode 100644 index 0000000..11a5b1d --- /dev/null +++ b/UNIDATA/SpecialCasing.txt @@ -0,0 +1,260 @@ +# SpecialCasing-4.1.0.txt +# Date: 2005-03-26, 00:35:45 GMT [MD] +# +# Unicode Character Database +# Copyright (c) 1991-2005 Unicode, Inc. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# For documentation, see UCD.html +# +# Special Casing Properties +# +# This file is a supplement to the UnicodeData file. +# It contains additional information about the casing of Unicode characters. +# (For compatibility, the UnicodeData.txt file only contains case mappings for +# characters where they are 1-1, and does not have locale-specific mappings.) +# For more information, see the discussion of Case Mappings in the Unicode Standard. +# +# All code points not listed in this file that do not have a simple case mappings +# in UnicodeData.txt map to themselves. +# ================================================================================ +# Format +# ================================================================================ +# The entries in this file are in the following machine-readable format: +# +# ; ; ; <upper> ; (<condition_list> ;)? # <comment> +# +# <code>, <lower>, <title>, and <upper> provide character values in hex. If there is more +# than one character, they are separated by spaces. Other than as used to separate +# elements, spaces are to be ignored. +# +# The <condition_list> is optional. Where present, it consists of one or more locale IDs +# or contexts, separated by spaces. In these conditions: +# - A condition list overrides the normal behavior if all of the listed conditions are true. +# - The context is always the context of the characters in the original string, +# NOT in the resulting string. +# - Case distinctions in the condition list are not significant. +# - Conditions preceded by "Not_" represent the negation of the condition. +# +# A locale ID is defined by taking any language tag as defined by +# RFC 3066 (or its successor), and replacing '-' by '_'. +# +# A context for a character C is defined by Section 3.13 Default Case Operations, +# on p. 89-90 of The Unicode Standard, Version 4.0, as amended by Unicode 4.1.0, +# as specified in http://www.unicode.org/versions/Unicode4.1.0/ +# +# Parsers of this file must be prepared to deal with future additions to this format: +# * Additional contexts +# * Additional fields +# ================================================================================ + +# ================================================================================ +# Unconditional mappings +# ================================================================================ + +# The German es-zed is special--the normal mapping is to SS. +# Note: the titlecase should never occur in practice. It is equal to titlecase(uppercase(<es-zed>)) + +00DF; 00DF; 0053 0073; 0053 0053; # LATIN SMALL LETTER SHARP S + +# Preserve canonical equivalence for I with dot. Turkic is handled below. + +0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE + +# Ligatures + +FB00; FB00; 0046 0066; 0046 0046; # LATIN SMALL LIGATURE FF +FB01; FB01; 0046 0069; 0046 0049; # LATIN SMALL LIGATURE FI +FB02; FB02; 0046 006C; 0046 004C; # LATIN SMALL LIGATURE FL +FB03; FB03; 0046 0066 0069; 0046 0046 0049; # LATIN SMALL LIGATURE FFI +FB04; FB04; 0046 0066 006C; 0046 0046 004C; # LATIN SMALL LIGATURE FFL +FB05; FB05; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE LONG S T +FB06; FB06; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE ST + +0587; 0587; 0535 0582; 0535 0552; # ARMENIAN SMALL LIGATURE ECH YIWN +FB13; FB13; 0544 0576; 0544 0546; # ARMENIAN SMALL LIGATURE MEN NOW +FB14; FB14; 0544 0565; 0544 0535; # ARMENIAN SMALL LIGATURE MEN ECH +FB15; FB15; 0544 056B; 0544 053B; # ARMENIAN SMALL LIGATURE MEN INI +FB16; FB16; 054E 0576; 054E 0546; # ARMENIAN SMALL LIGATURE VEW NOW +FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH + +# No corresponding uppercase precomposed character + +0149; 0149; 02BC 004E; 02BC 004E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +0390; 0390; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +03B0; 03B0; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS +01F0; 01F0; 004A 030C; 004A 030C; # LATIN SMALL LETTER J WITH CARON +1E96; 1E96; 0048 0331; 0048 0331; # LATIN SMALL LETTER H WITH LINE BELOW +1E97; 1E97; 0054 0308; 0054 0308; # LATIN SMALL LETTER T WITH DIAERESIS +1E98; 1E98; 0057 030A; 0057 030A; # LATIN SMALL LETTER W WITH RING ABOVE +1E99; 1E99; 0059 030A; 0059 030A; # LATIN SMALL LETTER Y WITH RING ABOVE +1E9A; 1E9A; 0041 02BE; 0041 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING +1F50; 1F50; 03A5 0313; 03A5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI +1F52; 1F52; 03A5 0313 0300; 03A5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA +1F54; 1F54; 03A5 0313 0301; 03A5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA +1F56; 1F56; 03A5 0313 0342; 03A5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI +1FB6; 1FB6; 0391 0342; 0391 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI +1FC6; 1FC6; 0397 0342; 0397 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI +1FD2; 1FD2; 0399 0308 0300; 0399 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA +1FD3; 1FD3; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6; 1FD6; 0399 0342; 0399 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI +1FD7; 1FD7; 0399 0308 0342; 0399 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI +1FE2; 1FE2; 03A5 0308 0300; 03A5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA +1FE3; 1FE3; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA +1FE4; 1FE4; 03A1 0313; 03A1 0313; # GREEK SMALL LETTER RHO WITH PSILI +1FE6; 1FE6; 03A5 0342; 03A5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI +1FE7; 1FE7; 03A5 0308 0342; 03A5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI +1FF6; 1FF6; 03A9 0342; 03A9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI + +# IMPORTANT-when capitalizing iota-subscript (0345) +# It MUST be in normalized form--moved to the end of any sequence of combining marks. +# This is because logically it represents a following base character! +# E.g. <iota_subscript> (<Mn> | <Mc> | <Me>)+ => (<Mn> | <Mc> | <Me>)+ <iota_subscript> +# It should never be the first character in a word, so in titlecasing it can be left as is. + +# The following cases are already in the UnicodeData file, so are only commented here. + +# 0345; 0345; 0345; 0399; # COMBINING GREEK YPOGEGRAMMENI + +# All letters with YPOGEGRAMMENI (iota-subscript) or PROSGEGRAMMENI (iota adscript) +# have special uppercases. +# Note: characters with PROSGEGRAMMENI are actually titlecase, not uppercase! + +1F80; 1F80; 1F88; 1F08 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI +1F81; 1F81; 1F89; 1F09 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI +1F82; 1F82; 1F8A; 1F0A 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1F83; 1F83; 1F8B; 1F0B 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1F84; 1F84; 1F8C; 1F0C 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1F85; 1F85; 1F8D; 1F0D 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1F86; 1F86; 1F8E; 1F0E 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1F87; 1F87; 1F8F; 1F0F 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F88; 1F80; 1F88; 1F08 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI +1F89; 1F81; 1F89; 1F09 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI +1F8A; 1F82; 1F8A; 1F0A 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F8B; 1F83; 1F8B; 1F0B 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F8C; 1F84; 1F8C; 1F0C 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F8D; 1F85; 1F8D; 1F0D 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F8E; 1F86; 1F8E; 1F0E 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F8F; 1F87; 1F8F; 1F0F 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F90; 1F90; 1F98; 1F28 0399; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI +1F91; 1F91; 1F99; 1F29 0399; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI +1F92; 1F92; 1F9A; 1F2A 0399; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1F93; 1F93; 1F9B; 1F2B 0399; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1F94; 1F94; 1F9C; 1F2C 0399; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1F95; 1F95; 1F9D; 1F2D 0399; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1F96; 1F96; 1F9E; 1F2E 0399; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1F97; 1F97; 1F9F; 1F2F 0399; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F98; 1F90; 1F98; 1F28 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI +1F99; 1F91; 1F99; 1F29 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI +1F9A; 1F92; 1F9A; 1F2A 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F9B; 1F93; 1F9B; 1F2B 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F9C; 1F94; 1F9C; 1F2C 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F9D; 1F95; 1F9D; 1F2D 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F9E; 1F96; 1F9E; 1F2E 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F9F; 1F97; 1F9F; 1F2F 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FA0; 1FA0; 1FA8; 1F68 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI +1FA1; 1FA1; 1FA9; 1F69 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI +1FA2; 1FA2; 1FAA; 1F6A 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1FA3; 1FA3; 1FAB; 1F6B 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1FA4; 1FA4; 1FAC; 1F6C 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1FA5; 1FA5; 1FAD; 1F6D 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1FA6; 1FA6; 1FAE; 1F6E 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1FA7; 1FA7; 1FAF; 1F6F 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1FA8; 1FA0; 1FA8; 1F68 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI +1FA9; 1FA1; 1FA9; 1F69 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI +1FAA; 1FA2; 1FAA; 1F6A 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1FAB; 1FA3; 1FAB; 1F6B 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1FAC; 1FA4; 1FAC; 1F6C 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1FAD; 1FA5; 1FAD; 1F6D 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1FAE; 1FA6; 1FAE; 1F6E 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1FAF; 1FA7; 1FAF; 1F6F 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FB3; 1FB3; 1FBC; 0391 0399; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI +1FBC; 1FB3; 1FBC; 0391 0399; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FC3; 1FC3; 1FCC; 0397 0399; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI +1FCC; 1FC3; 1FCC; 0397 0399; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FF3; 1FF3; 1FFC; 03A9 0399; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI +1FFC; 1FF3; 1FFC; 03A9 0399; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI + +# Some characters with YPOGEGRAMMENI also have no corresponding titlecases + +1FB2; 1FB2; 1FBA 0345; 1FBA 0399; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI +1FB4; 1FB4; 0386 0345; 0386 0399; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FC2; 1FC2; 1FCA 0345; 1FCA 0399; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI +1FC4; 1FC4; 0389 0345; 0389 0399; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FF2; 1FF2; 1FFA 0345; 1FFA 0399; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI +1FF4; 1FF4; 038F 0345; 038F 0399; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI + +1FB7; 1FB7; 0391 0342 0345; 0391 0342 0399; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI +1FC7; 1FC7; 0397 0342 0345; 0397 0342 0399; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI +1FF7; 1FF7; 03A9 0342 0345; 03A9 0342 0399; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI + +# ================================================================================ +# Conditional mappings +# ================================================================================ + +# Special case for final form of sigma + +03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA + +# Note: the following cases for non-final are already in the UnicodeData file. + +# 03A3; 03C3; 03A3; 03A3; # GREEK CAPITAL LETTER SIGMA +# 03C3; 03C3; 03A3; 03A3; # GREEK SMALL LETTER SIGMA +# 03C2; 03C2; 03A3; 03A3; # GREEK SMALL LETTER FINAL SIGMA + +# Note: the following cases are not included, since they would case-fold in lowercasing + +# 03C3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK SMALL LETTER SIGMA +# 03C2; 03C3; 03A3; 03A3; Not_Final_Sigma; # GREEK SMALL LETTER FINAL SIGMA + +# ================================================================================ +# Locale-sensitive mappings +# ================================================================================ + +# Lithuanian + +# Lithuanian retains the dot in a lowercase i when followed by accents. + +# Remove DOT ABOVE after "i" with upper or titlecase + +0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE + +# Introduce an explicit dot above when lowercasing capital I's and J's +# whenever there are more accents above. +# (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek) + +0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I +004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J +012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK +00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE +00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE +0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE + +# ================================================================================ + +# Turkish and Azeri + +# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri +# The following rules handle those cases. + +0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE +0130; 0069; 0130; 0130; az; # LATIN CAPITAL LETTER I WITH DOT ABOVE + +# When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. +# This matches the behavior of the canonically equivalent I-dot_above + +0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE +0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE + +# When lowercasing, unless an I is before a dot_above, it turns into a dotless i. + +0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I +0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I + +# When uppercasing, i turns into a dotted capital I + +0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I +0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I + +# Note: the following case is already in the UnicodeData file. + +# 0131; 0131; 0049; 0049; tr; # LATIN SMALL LETTER DOTLESS I diff --git a/UNIDATA/WordBreakProperty.txt b/UNIDATA/WordBreakProperty.txt new file mode 100644 index 0000000..e0c8f5c --- /dev/null +++ b/UNIDATA/WordBreakProperty.txt @@ -0,0 +1,515 @@ +# WordBreakProperty-4.1.0.txt +# Date: 2005-03-26, 00:36:01 GMT [MD] +# +# Unicode Character Database +# Copyright (c) 1991-2005 Unicode, Inc. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# For documentation, see UCD.html + +# ================================================ + +# Property: Word_Break + +# All code points not explicitly listed for Word_Break +# have the value Other. + +# ================================================ + +00AD ; Format # Cf SOFT HYPHEN +0600..0603 ; Format # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +06DD ; Format # Cf ARABIC END OF AYAH +070F ; Format # Cf SYRIAC ABBREVIATION MARK +17B4..17B5 ; Format # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +200B ; Format # Cf ZERO WIDTH SPACE +200E..200F ; Format # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK +202A..202E ; Format # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE +2060..2063 ; Format # Cf [4] WORD JOINER..INVISIBLE SEPARATOR +206A..206F ; Format # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES +FEFF ; Format # Cf ZERO WIDTH NO-BREAK SPACE +FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR +1D173..1D17A ; Format # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE +E0001 ; Format # Cf LANGUAGE TAG +E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG + +# Total code points: 136 + +# ================================================ + +3031..3035 ; Katakana # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +309B..309C ; Katakana # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +30A0 ; Katakana # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN +30A1..30FA ; Katakana # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FC..30FE ; Katakana # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +30FF ; Katakana # Lo KATAKANA DIGRAPH KOTO +31F0..31FF ; Katakana # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +FF66..FF6F ; Katakana # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU +FF70 ; Katakana # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N +FF9E..FF9F ; Katakana # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK + +# Total code points: 176 + +# ================================================ + +0041..005A ; ALetter # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +0061..007A ; ALetter # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00A0 ; ALetter # Zs NO-BREAK SPACE +00AA ; ALetter # L& FEMININE ORDINAL INDICATOR +00B5 ; ALetter # L& MICRO SIGN +00BA ; ALetter # L& MASCULINE ORDINAL INDICATOR +00C0..00D6 ; ALetter # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00F6 ; ALetter # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS +00F8..01BA ; ALetter # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BB ; ALetter # Lo LATIN LETTER TWO WITH STROKE +01BC..01BF ; ALetter # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C0..01C3 ; ALetter # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK +01C4..0241 ; ALetter # L& [126] LATIN CAPITAL LETTER DZ WITH CARON..LATIN CAPITAL LETTER GLOTTAL STOP +0250..02AF ; ALetter # L& [96] LATIN SMALL LETTER TURNED A..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02C1 ; ALetter # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C6..02D1 ; ALetter # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02E0..02E4 ; ALetter # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02EE ; ALetter # Lm MODIFIER LETTER DOUBLE APOSTROPHE +037A ; ALetter # Lm GREEK YPOGEGRAMMENI +0386 ; ALetter # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; ALetter # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; ALetter # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..03A1 ; ALetter # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO +03A3..03CE ; ALetter # L& [44] GREEK CAPITAL LETTER SIGMA..GREEK SMALL LETTER OMEGA WITH TONOS +03D0..03F5 ; ALetter # L& [38] GREEK BETA SYMBOL..GREEK LUNATE EPSILON SYMBOL +03F7..0481 ; ALetter # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA +048A..04CE ; ALetter # L& [69] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EM WITH TAIL +04D0..04F9 ; ALetter # L& [42] CYRILLIC CAPITAL LETTER A WITH BREVE..CYRILLIC SMALL LETTER YERU WITH DIAERESIS +0500..050F ; ALetter # L& [16] CYRILLIC CAPITAL LETTER KOMI DE..CYRILLIC SMALL LETTER KOMI TJE +0531..0556 ; ALetter # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0559 ; ALetter # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +0561..0587 ; ALetter # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN +05D0..05EA ; ALetter # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05F0..05F2 ; ALetter # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD +05F3 ; ALetter # Po HEBREW PUNCTUATION GERESH +0621..063A ; ALetter # Lo [26] ARABIC LETTER HAMZA..ARABIC LETTER GHAIN +0640 ; ALetter # Lm ARABIC TATWEEL +0641..064A ; ALetter # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH +066E..066F ; ALetter # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0671..06D3 ; ALetter # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D5 ; ALetter # Lo ARABIC LETTER AE +06E5..06E6 ; ALetter # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06EE..06EF ; ALetter # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +06FA..06FC ; ALetter # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FF ; ALetter # Lo ARABIC LETTER HEH WITH INVERTED V +0710 ; ALetter # Lo SYRIAC LETTER ALAPH +0712..072F ; ALetter # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH +074D..076D ; ALetter # Lo [33] SYRIAC LETTER SOGDIAN ZHAIN..ARABIC LETTER SEEN WITH TWO DOTS VERTICALLY ABOVE +0780..07A5 ; ALetter # Lo [38] THAANA LETTER HAA..THAANA LETTER WAAVU +07B1 ; ALetter # Lo THAANA LETTER NAA +0903 ; ALetter # Mc DEVANAGARI SIGN VISARGA +0904..0939 ; ALetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA +093D ; ALetter # Lo DEVANAGARI SIGN AVAGRAHA +093E..0940 ; ALetter # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0949..094C ; ALetter # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +0950 ; ALetter # Lo DEVANAGARI OM +0958..0961 ; ALetter # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL +097D ; ALetter # Lo DEVANAGARI LETTER GLOTTAL STOP +0982..0983 ; ALetter # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +0985..098C ; ALetter # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L +098F..0990 ; ALetter # Lo [2] BENGALI LETTER E..BENGALI LETTER AI +0993..09A8 ; ALetter # Lo [22] BENGALI LETTER O..BENGALI LETTER NA +09AA..09B0 ; ALetter # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; ALetter # Lo BENGALI LETTER LA +09B6..09B9 ; ALetter # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09BD ; ALetter # Lo BENGALI SIGN AVAGRAHA +09BF..09C0 ; ALetter # Mc [2] BENGALI VOWEL SIGN I..BENGALI VOWEL SIGN II +09C7..09C8 ; ALetter # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; ALetter # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09CE ; ALetter # Lo BENGALI LETTER KHANDA TA +09DC..09DD ; ALetter # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF..09E1 ; ALetter # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL +09F0..09F1 ; ALetter # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +0A03 ; ALetter # Mc GURMUKHI SIGN VISARGA +0A05..0A0A ; ALetter # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0F..0A10 ; ALetter # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A13..0A28 ; ALetter # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A2A..0A30 ; ALetter # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A32..0A33 ; ALetter # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA +0A35..0A36 ; ALetter # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA +0A38..0A39 ; ALetter # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A3E..0A40 ; ALetter # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A59..0A5C ; ALetter # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA +0A5E ; ALetter # Lo GURMUKHI LETTER FA +0A72..0A74 ; ALetter # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR +0A83 ; ALetter # Mc GUJARATI SIGN VISARGA +0A85..0A8D ; ALetter # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8F..0A91 ; ALetter # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A93..0AA8 ; ALetter # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA +0AAA..0AB0 ; ALetter # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; ALetter # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; ALetter # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0ABD ; ALetter # Lo GUJARATI SIGN AVAGRAHA +0ABE..0AC0 ; ALetter # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC9 ; ALetter # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; ALetter # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0AD0 ; ALetter # Lo GUJARATI OM +0AE0..0AE1 ; ALetter # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0B02..0B03 ; ALetter # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B05..0B0C ; ALetter # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0F..0B10 ; ALetter # Lo [2] ORIYA LETTER E..ORIYA LETTER AI +0B13..0B28 ; ALetter # Lo [22] ORIYA LETTER O..ORIYA LETTER NA +0B2A..0B30 ; ALetter # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; ALetter # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; ALetter # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B3D ; ALetter # Lo ORIYA SIGN AVAGRAHA +0B40 ; ALetter # Mc ORIYA VOWEL SIGN II +0B47..0B48 ; ALetter # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; ALetter # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B5C..0B5D ; ALetter # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F..0B61 ; ALetter # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL +0B71 ; ALetter # Lo ORIYA LETTER WA +0B83 ; ALetter # Lo TAMIL SIGN VISARGA +0B85..0B8A ; ALetter # Lo [6] TAMIL LETTER A..TAMIL LETTER UU +0B8E..0B90 ; ALetter # Lo [3] TAMIL LETTER E..TAMIL LETTER AI +0B92..0B95 ; ALetter # Lo [4] TAMIL LETTER O..TAMIL LETTER KA +0B99..0B9A ; ALetter # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA +0B9C ; ALetter # Lo TAMIL LETTER JA +0B9E..0B9F ; ALetter # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA +0BA3..0BA4 ; ALetter # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA +0BA8..0BAA ; ALetter # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA +0BAE..0BB9 ; ALetter # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA +0BBF ; ALetter # Mc TAMIL VOWEL SIGN I +0BC1..0BC2 ; ALetter # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; ALetter # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; ALetter # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0C01..0C03 ; ALetter # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C05..0C0C ; ALetter # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0E..0C10 ; ALetter # Lo [3] TELUGU LETTER E..TELUGU LETTER AI +0C12..0C28 ; ALetter # Lo [23] TELUGU LETTER O..TELUGU LETTER NA +0C2A..0C33 ; ALetter # Lo [10] TELUGU LETTER PA..TELUGU LETTER LLA +0C35..0C39 ; ALetter # Lo [5] TELUGU LETTER VA..TELUGU LETTER HA +0C41..0C44 ; ALetter # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C60..0C61 ; ALetter # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C82..0C83 ; ALetter # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0C85..0C8C ; ALetter # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8E..0C90 ; ALetter # Lo [3] KANNADA LETTER E..KANNADA LETTER AI +0C92..0CA8 ; ALetter # Lo [23] KANNADA LETTER O..KANNADA LETTER NA +0CAA..0CB3 ; ALetter # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA +0CB5..0CB9 ; ALetter # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA +0CBD ; ALetter # Lo KANNADA SIGN AVAGRAHA +0CBE ; ALetter # Mc KANNADA VOWEL SIGN AA +0CC0..0CC1 ; ALetter # Mc [2] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN U +0CC3..0CC4 ; ALetter # Mc [2] KANNADA VOWEL SIGN VOCALIC R..KANNADA VOWEL SIGN VOCALIC RR +0CC7..0CC8 ; ALetter # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; ALetter # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CDE ; ALetter # Lo KANNADA LETTER FA +0CE0..0CE1 ; ALetter # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL +0D02..0D03 ; ALetter # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D05..0D0C ; ALetter # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L +0D0E..0D10 ; ALetter # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI +0D12..0D28 ; ALetter # Lo [23] MALAYALAM LETTER O..MALAYALAM LETTER NA +0D2A..0D39 ; ALetter # Lo [16] MALAYALAM LETTER PA..MALAYALAM LETTER HA +0D3F..0D40 ; ALetter # Mc [2] MALAYALAM VOWEL SIGN I..MALAYALAM VOWEL SIGN II +0D46..0D48 ; ALetter # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; ALetter # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D60..0D61 ; ALetter # Lo [2] MALAYALAM LETTER VOCALIC RR..MALAYALAM LETTER VOCALIC LL +0D82..0D83 ; ALetter # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0D85..0D96 ; ALetter # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA +0D9A..0DB1 ; ALetter # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA +0DB3..0DBB ; ALetter # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA +0DBD ; ALetter # Lo SINHALA LETTER DANTAJA LAYANNA +0DC0..0DC6 ; ALetter # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA +0DD0..0DD1 ; ALetter # Mc [2] SINHALA VOWEL SIGN KETTI AEDA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD8..0DDE ; ALetter # Mc [7] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA +0DF2..0DF3 ; ALetter # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0F00 ; ALetter # Lo TIBETAN SYLLABLE OM +0F40..0F47 ; ALetter # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA +0F49..0F6A ; ALetter # Lo [34] TIBETAN LETTER NYA..TIBETAN LETTER FIXED-FORM RA +0F7F ; ALetter # Mc TIBETAN SIGN RNAM BCAD +0F88..0F8B ; ALetter # Lo [4] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN GRU MED RGYINGS +1000..1021 ; ALetter # Lo [34] MYANMAR LETTER KA..MYANMAR LETTER A +1023..1027 ; ALetter # Lo [5] MYANMAR LETTER I..MYANMAR LETTER E +1029..102A ; ALetter # Lo [2] MYANMAR LETTER O..MYANMAR LETTER AU +102C ; ALetter # Mc MYANMAR VOWEL SIGN AA +1031 ; ALetter # Mc MYANMAR VOWEL SIGN E +1038 ; ALetter # Mc MYANMAR SIGN VISARGA +1050..1055 ; ALetter # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL +1056..1057 ; ALetter # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +10A0..10C5 ; ALetter # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10D0..10FA ; ALetter # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FC ; ALetter # Lm MODIFIER LETTER GEORGIAN NAR +1100..1159 ; ALetter # Lo [90] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG YEORINHIEUH +115F..11A2 ; ALetter # Lo [68] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG SSANGARAEA +11A8..11F9 ; ALetter # Lo [82] HANGUL JONGSEONG KIYEOK..HANGUL JONGSEONG YEORINHIEUH +1200..1248 ; ALetter # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA +124A..124D ; ALetter # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +1250..1256 ; ALetter # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1258 ; ALetter # Lo ETHIOPIC SYLLABLE QHWA +125A..125D ; ALetter # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE +1260..1288 ; ALetter # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +128A..128D ; ALetter # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +1290..12B0 ; ALetter # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B2..12B5 ; ALetter # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B8..12BE ; ALetter # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12C0 ; ALetter # Lo ETHIOPIC SYLLABLE KXWA +12C2..12C5 ; ALetter # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE +12C8..12D6 ; ALetter # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O +12D8..1310 ; ALetter # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +1312..1315 ; ALetter # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1318..135A ; ALetter # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +1380..138F ; ALetter # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE +13A0..13F4 ; ALetter # Lo [85] CHEROKEE LETTER A..CHEROKEE LETTER YV +1401..166C ; ALetter # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA +166F..1676 ; ALetter # Lo [8] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS NNGAA +1681..169A ; ALetter # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH +16A0..16EA ; ALetter # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16EE..16F0 ; ALetter # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL +1700..170C ; ALetter # Lo [13] TAGALOG LETTER A..TAGALOG LETTER YA +170E..1711 ; ALetter # Lo [4] TAGALOG LETTER LA..TAGALOG LETTER HA +1720..1731 ; ALetter # Lo [18] HANUNOO LETTER A..HANUNOO LETTER HA +1740..1751 ; ALetter # Lo [18] BUHID LETTER A..BUHID LETTER HA +1760..176C ; ALetter # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA +176E..1770 ; ALetter # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA +1780..17B3 ; ALetter # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B6 ; ALetter # Mc KHMER VOWEL SIGN AA +17BE..17C5 ; ALetter # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C7..17C8 ; ALetter # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +17D7 ; ALetter # Lm KHMER SIGN LEK TOO +17DC ; ALetter # Lo KHMER SIGN AVAKRAHASANYA +1820..1842 ; ALetter # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1843 ; ALetter # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1877 ; ALetter # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA +1880..18A8 ; ALetter # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA +1900..191C ; ALetter # Lo [29] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER HA +1923..1926 ; ALetter # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1929..192B ; ALetter # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; ALetter # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1933..1938 ; ALetter # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1950..196D ; ALetter # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI +1970..1974 ; ALetter # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 +1980..19A9 ; ALetter # Lo [42] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW XVA +19B0..19C0 ; ALetter # Mc [17] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE VOWEL SIGN IY +19C1..19C7 ; ALetter # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B +19C8..19C9 ; ALetter # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2 +1A00..1A16 ; ALetter # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA +1A19..1A1B ; ALetter # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE +1D00..1D2B ; ALetter # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D2C..1D61 ; ALetter # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI +1D62..1D77 ; ALetter # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D78 ; ALetter # Lm MODIFIER LETTER CYRILLIC EN +1D79..1D9A ; ALetter # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBF ; ALetter # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1E00..1E9B ; ALetter # L& [156] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER LONG S WITH DOT ABOVE +1EA0..1EF9 ; ALetter # L& [90] LATIN CAPITAL LETTER A WITH DOT BELOW..LATIN SMALL LETTER Y WITH TILDE +1F00..1F15 ; ALetter # L& [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; ALetter # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; ALetter # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; ALetter # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; ALetter # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; ALetter # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; ALetter # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; ALetter # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; ALetter # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; ALetter # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; ALetter # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE ; ALetter # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; ALetter # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; ALetter # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD0..1FD3 ; ALetter # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; ALetter # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE0..1FEC ; ALetter # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF2..1FF4 ; ALetter # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; ALetter # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +2071 ; ALetter # L& SUPERSCRIPT LATIN SMALL LETTER I +207F ; ALetter # L& SUPERSCRIPT LATIN SMALL LETTER N +2090..2094 ; ALetter # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA +2102 ; ALetter # L& DOUBLE-STRUCK CAPITAL C +2107 ; ALetter # L& EULER CONSTANT +210A..2113 ; ALetter # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; ALetter # L& DOUBLE-STRUCK CAPITAL N +2119..211D ; ALetter # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; ALetter # L& DOUBLE-STRUCK CAPITAL Z +2126 ; ALetter # L& OHM SIGN +2128 ; ALetter # L& BLACK-LETTER CAPITAL Z +212A..212D ; ALetter # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C +212F..2131 ; ALetter # L& [3] SCRIPT SMALL E..SCRIPT CAPITAL F +2133..2134 ; ALetter # L& [2] SCRIPT CAPITAL M..SCRIPT SMALL O +2135..2138 ; ALetter # Lo [4] ALEF SYMBOL..DALET SYMBOL +2139 ; ALetter # L& INFORMATION SOURCE +213C..213F ; ALetter # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2145..2149 ; ALetter # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +2160..2183 ; ALetter # Nl [36] ROMAN NUMERAL ONE..ROMAN NUMERAL REVERSED ONE HUNDRED +24B6..24E9 ; ALetter # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z +2C00..2C2E ; ALetter # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE +2C30..2C5E ; ALetter # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE +2C80..2CE4 ; ALetter # L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI +2D00..2D25 ; ALetter # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D30..2D65 ; ALetter # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D6F ; ALetter # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2D80..2D96 ; ALetter # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE +2DA0..2DA6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA8..2DAE ; ALetter # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DB0..2DB6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB8..2DBE ; ALetter # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO +2DC0..2DC6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC8..2DCE ; ALetter # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DD0..2DD6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD8..2DDE ; ALetter # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO +3005 ; ALetter # Lm IDEOGRAPHIC ITERATION MARK +303B ; ALetter # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +303C ; ALetter # Lo MASU MARK +3105..312C ; ALetter # Lo [40] BOPOMOFO LETTER B..BOPOMOFO LETTER GN +3131..318E ; ALetter # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +31A0..31B7 ; ALetter # Lo [24] BOPOMOFO LETTER BU..BOPOMOFO FINAL LETTER H +A000..A014 ; ALetter # Lo [21] YI SYLLABLE IT..YI SYLLABLE E +A015 ; ALetter # Lm YI SYLLABLE WU +A016..A48C ; ALetter # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR +A800..A801 ; ALetter # Lo [2] SYLOTI NAGRI LETTER A..SYLOTI NAGRI LETTER I +A803..A805 ; ALetter # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O +A807..A80A ; ALetter # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO +A80C..A822 ; ALetter # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO +A823..A824 ; ALetter # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A827 ; ALetter # Mc SYLOTI NAGRI VOWEL SIGN OO +AC00..D7A3 ; ALetter # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH +FA30..FA6A ; ALetter # Lo [59] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6A +FB00..FB06 ; ALetter # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; ALetter # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FB1D ; ALetter # Lo HEBREW LETTER YOD WITH HIRIQ +FB1F..FB28 ; ALetter # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB2A..FB36 ; ALetter # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB38..FB3C ; ALetter # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3E ; ALetter # Lo HEBREW LETTER MEM WITH DAGESH +FB40..FB41 ; ALetter # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB43..FB44 ; ALetter # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB46..FBB1 ; ALetter # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FBD3..FD3D ; ALetter # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM +FD50..FD8F ; ALetter # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD92..FDC7 ; ALetter # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDF0..FDFB ; ALetter # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU +FE70..FE74 ; ALetter # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM +FE76..FEFC ; ALetter # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM +FF21..FF3A ; ALetter # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF41..FF5A ; ALetter # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +FFA0..FFBE ; ALetter # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7 ; ALetter # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF ; ALetter # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7 ; ALetter # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I +10000..1000B ; ALetter # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE +1000D..10026 ; ALetter # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO +10028..1003A ; ALetter # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO +1003C..1003D ; ALetter # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE +1003F..1004D ; ALetter # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO +10050..1005D ; ALetter # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +10080..100FA ; ALetter # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 +10140..10174 ; ALetter # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS +10300..1031E ; ALetter # Lo [31] OLD ITALIC LETTER A..OLD ITALIC LETTER UU +10330..10349 ; ALetter # Lo [26] GOTHIC LETTER AHSA..GOTHIC LETTER OTHAL +1034A ; ALetter # Nl GOTHIC LETTER NINE HUNDRED +10380..1039D ; ALetter # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU +103A0..103C3 ; ALetter # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C8..103CF ; ALetter # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH +103D1..103D5 ; ALetter # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED +10400..1044F ; ALetter # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW +10450..1049D ; ALetter # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO +10800..10805 ; ALetter # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10808 ; ALetter # Lo CYPRIOT SYLLABLE JO +1080A..10835 ; ALetter # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10837..10838 ; ALetter # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +1083C ; ALetter # Lo CYPRIOT SYLLABLE ZA +1083F ; ALetter # Lo CYPRIOT SYLLABLE ZO +10A00 ; ALetter # Lo KHAROSHTHI LETTER A +10A10..10A13 ; ALetter # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA +10A15..10A17 ; ALetter # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A19..10A33 ; ALetter # Lo [27] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA +1D400..1D454 ; ALetter # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; ALetter # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; ALetter # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; ALetter # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; ALetter # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; ALetter # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; ALetter # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; ALetter # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; ALetter # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; ALetter # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; ALetter # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; ALetter # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; ALetter # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; ALetter # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; ALetter # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; ALetter # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; ALetter # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; ALetter # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; ALetter # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; ALetter # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C2..1D6DA ; ALetter # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6FA ; ALetter # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FC..1D714 ; ALetter # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D734 ; ALetter # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D736..1D74E ; ALetter # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D76E ; ALetter # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D770..1D788 ; ALetter # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D7A8 ; ALetter # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7AA..1D7C2 ; ALetter # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7C9 ; ALetter # L& [6] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC PI SYMBOL + +# Total code points: 20161 + +# ================================================ + +0027 ; MidLetter # Po APOSTROPHE +003A ; MidLetter # Po COLON +00B7 ; MidLetter # Po MIDDLE DOT +05F4 ; MidLetter # Po HEBREW PUNCTUATION GERSHAYIM +2019 ; MidLetter # Pf RIGHT SINGLE QUOTATION MARK +2027 ; MidLetter # Po HYPHENATION POINT + +# Total code points: 6 + +# ================================================ + +002C ; MidNum # Po COMMA +002E ; MidNum # Po FULL STOP +003B ; MidNum # Po SEMICOLON +037E ; MidNum # Po GREEK QUESTION MARK +0589 ; MidNum # Po ARMENIAN FULL STOP +060D ; MidNum # Po ARABIC DATE SEPARATOR +2044 ; MidNum # Sm FRACTION SLASH +FE10 ; MidNum # Po PRESENTATION FORM FOR VERTICAL COMMA +FE13..FE14 ; MidNum # Po [2] PRESENTATION FORM FOR VERTICAL COLON..PRESENTATION FORM FOR VERTICAL SEMICOLON + +# Total code points: 10 + +# ================================================ + +0030..0039 ; Numeric # Nd [10] DIGIT ZERO..DIGIT NINE +0660..0669 ; Numeric # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE +066B..066C ; Numeric # Po [2] ARABIC DECIMAL SEPARATOR..ARABIC THOUSANDS SEPARATOR +06F0..06F9 ; Numeric # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE +0966..096F ; Numeric # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE +09E6..09EF ; Numeric # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE +0A66..0A6F ; Numeric # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE +0AE6..0AEF ; Numeric # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0B66..0B6F ; Numeric # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE +0BE6..0BEF ; Numeric # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE +0C66..0C6F ; Numeric # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE +0CE6..0CEF ; Numeric # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE +0D66..0D6F ; Numeric # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE +0E50..0E59 ; Numeric # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE +0ED0..0ED9 ; Numeric # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE +0F20..0F29 ; Numeric # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE +1040..1049 ; Numeric # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE +17E0..17E9 ; Numeric # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE +1810..1819 ; Numeric # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE +1946..194F ; Numeric # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE +19D0..19D9 ; Numeric # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE +104A0..104A9 ; Numeric # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE +1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE + +# Total code points: 262 + +# ================================================ + +005F ; ExtendNumLet # Pc LOW LINE +203F..2040 ; ExtendNumLet # Pc [2] UNDERTIE..CHARACTER TIE +2054 ; ExtendNumLet # Pc INVERTED UNDERTIE +FE33..FE34 ; ExtendNumLet # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE +FE4D..FE4F ; ExtendNumLet # Pc [3] DASHED LOW LINE..WAVY LOW LINE +FF3F ; ExtendNumLet # Pc FULLWIDTH LOW LINE + +# Total code points: 10 -- 1.7.10.4