1 /* mtext-lbrk.c -- line break
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
6 This file is part of the m17n library.
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
23 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
24 /*** @addtogroup m17nInternal
33 #include "m17n-misc.h"
41 LBC_QU, /* quotation */
43 LBC_NS, /* no-start */
44 LBC_EX, /* exclamation/interrogation */
45 LBC_SY, /* Syntax (slash) */
46 LBC_IS, /* infix (numeric) separator */
50 LBC_AL, /* alphabetic */
51 LBC_ID, /* ideograph (atomic) */
52 LBC_IN, /* inseparable */
54 LBC_BA, /* break after */
55 LBC_BB, /* break before */
56 LBC_B2, /* break both */
57 LBC_ZW, /* ZW space */
58 LBC_CM, /* combining mark */
59 LBC_WJ, /* word joiner */
61 /* used for 4.1 pair table */
62 LBC_H2, /* Hamgul 2 Jamo Syllable */
63 LBC_H3, /* Hangul 3 Jamo Syllable */
64 LBC_JL, /* Jamo leading consonant */
65 LBC_JV, /* Jamo vowel */
66 LBC_JT, /* Jamo trailing consonant */
68 /* These are not handled in the pair tables. */
69 LBC_SA, /* south (east) asian */
71 LBC_PS, /* paragraph and line separators */
72 LBC_BK, /* hard break (newline) */
73 LBC_CR, /* carriage return */
74 LBC_LF, /* line feed */
75 LBC_NL, /* next line */
76 LBC_CB, /* contingent break opportunity */
77 LBC_SG, /* surrogate */
78 LBC_AI, /* ambiguous */
87 LBA_COMBINING_INDIRECT = '#',
88 LBA_COMBINING_PROHIBITED = '@',
93 /* The pair table of line break actions. */
94 static char *lba_pair_table[] =
95 /* OP GL SY PO ID BA ZW H2 JV
96 CL NS IS NU IN BB CM H3 JT
97 QU EX PR AL HY B2 WJ JL */
98 { "^^^^^^^^^^^^^^^^^^^@^^^^^^", /* OP */
99 "_^%%^^^^_%____%%__^#^_____", /* CL */
100 "^^%%%^^^%%%%%%%%%%^#^%%%%%", /* QU */
101 "%^%%%^^^%%%%%%%%%%^#^%%%%%", /* GL */
102 "_^%%%^^^______%%__^#^_____", /* NS */
103 "_^%%%^^^______%%__^#^_____", /* EX */
104 "_^%%%^^^__%___%%__^#^_____", /* SY */
105 "_^%%%^^^__%%__%%__^#^_____", /* IS */
106 "%^%%%^^^__%%%_%%__^#^%%%%%", /* PR */
107 "_^%%%^^^______%%__^#^_____", /* PO */
108 "_^%%%^^^_%%%_%%%__^#^_____", /* NU */
109 "_^%%%^^^__%%_%%%__^#^_____", /* AL */
110 "_^%%%^^^_%___%%%__^#^_____", /* ID */
111 "_^%%%^^^_____%%%__^#^_____", /* IN */
112 "_^%%%^^^__%___%%__^#^_____", /* HY */
113 "_^%%%^^^______%%__^#^_____", /* BA */
114 "%^%%%^^^%%%%%%%%%%^#^%%%%%", /* BB */
115 "_^%%%^^^______%%_^^#^_____", /* B2 */
116 "__________________^_______", /* ZW */
117 "_^%%%^^^__%%_%%%__^#^_____", /* CM */
118 "%^%%%^^^%%%%%%%%%%^#^%%%%%", /* WJ */
119 "_^%%%^^^_%___%%%__^#^___%%", /* H2 */
120 "_^%%%^^^_%___%%%__^#^____%", /* H3 */
121 "_^%%%^^^_%___%%%__^#^%%%%_", /* JL */
122 "_^%%%^^^_%___%%%__^#^___%%", /* JV */
123 "_^%%%^^^_%___%%%__^#^____%" /* JT */
126 static MCharTable *lbc_table;
128 /* Set LBC to enum LineBreakClass of the character at POS of MT
129 (length is LEN) while converting LBC_AI and LBC_XX to LBC_AL,
130 LBC_CB to LBC_B2, LBC_CR, LBC_LF, and LBC_NL to LBC_BK. If POS is
131 out of range, set LBC to LBC_BK. */
133 #define GET_LBC(LBC, MT, LEN, POS, OPTION) \
135 if ((POS) < 0 || (POS) >= (LEN)) \
139 int c = mtext_ref_char ((MT), (POS)); \
140 (LBC) = (enum LineBreakClass) mchartable_lookup (lbc_table, c); \
141 if ((LBC) == LBC_NL) \
143 else if ((LBC) == LBC_AI) \
144 (LBC) = ((OPTION) & MTEXT_LBO_AI_AS_ID) ? LBC_ID : LBC_AL; \
145 else if (! ((OPTION) & MTEXT_LBO_KOREAN_SP) \
146 && (LBC) >= LBC_H2 && (LBC) <= LBC_JT) \
148 else if ((LBC) == LBC_CB) \
150 else if ((LBC) == LBC_XX) \
157 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
162 /*** @addtogroup m17nMtext */
167 mtext_line_break (MText *mt, int pos, int option, int *after)
169 int break_before, break_after;
170 int len = mtext_len (mt);
171 enum LineBreakClass lbc;
172 enum LineBreakClass Blbc, Albc; /* B(efore) and A(fter) lbcs. */
173 int Bpos, Apos; /* B(efore) and A(fter) positions. */
174 enum LineBreakAction action;
178 /* The end of text is an explicit break position. */
186 MSymbol key = mchar_define_property ("linebreak", Minteger);
188 lbc_table = mchar_get_prop_table (key, NULL);
191 GET_LBC (lbc, mt, len, pos, option);
196 if (option & MTEXT_LBO_SP_CM)
198 GET_LBC (Albc, mt, len, Apos + 1, option);
199 Albc = (Albc == LBC_CM) ? LBC_ID : LBC_SP;
201 while (Albc == LBC_SP)
204 GET_LBC (Albc, mt, len, Apos, option);
207 if ((option & MTEXT_LBO_SP_CM) && (Albc == LBC_CM))
210 GET_LBC (Albc, mt, len, Apos, option);
214 Apos++, Albc = LBC_CM;
219 else if (Albc == LBC_LF)
221 GET_LBC (Albc, mt, len, Apos - 1, option);
226 else if (Albc == LBC_SA)
227 Albc = mtext__word_segment (mt, Apos, &Apos, NULL) > 0 ? LBC_BB : LBC_AL;
229 /* After exiting from the following loop, if Apos is positive, it is
230 the previous (including POS) break position. */
236 /* Now Bpos == Apos. */
239 GET_LBC (Blbc, mt, len, Bpos, option);
240 } while (Blbc == LBC_SP);
242 if (Blbc == LBC_BK || Blbc == LBC_LF || Blbc == LBC_CR)
244 /* Explicit break. */
248 indirect = Bpos + 1 < Apos;
254 GET_LBC (Blbc, mt, len, Bpos, option);
255 } while (Blbc == LBC_CM);
256 if ((option & MTEXT_LBO_SP_CM) && (Blbc == LBC_SP))
258 else if (Blbc == LBC_SP || Blbc == LBC_ZW
259 || Blbc == LBC_BK || Blbc == LBC_LF || Blbc == LBC_CR)
267 mtext__word_segment (mt, Bpos, &next, NULL);
273 action = lba_pair_table[Blbc][Albc];
274 if (action == LBA_DIRECT)
276 else if (action == LBA_INDIRECT)
281 else if (action == LBA_COMBINING_INDIRECT)
288 Apos = next, Albc = LBC_BB;
290 Apos = Bpos, Albc = Blbc;
293 if (break_before > 0)
297 if (break_before == pos)
300 *after = break_before;
305 /* Now find a break position after POS. */
313 GET_LBC (Blbc, mt, len, Bpos, option);
314 } while (Blbc == LBC_CM);
315 if (Blbc == LBC_SP || Blbc == LBC_ZW
316 || Blbc == LBC_BK || Blbc == LBC_LF || Blbc == LBC_CR)
318 if ((Blbc == LBC_SP) && (option & MTEXT_LBO_SP_CM))
327 mtext__word_segment (mt, Bpos, NULL, &Bpos);
330 else if (Blbc == LBC_SP)
332 if (option & MTEXT_LBO_SP_CM)
334 GET_LBC (Blbc, mt, len, Bpos + 1, option);
336 Blbc = LBC_ID, Bpos++;
340 while (Blbc == LBC_SP)
343 GET_LBC (Blbc, mt, len, Bpos, option);
349 /* After exiting from the following loop, if Apos is positive, it is
350 the next break position. */
356 /* Now Bpos == Apos. */
357 if (Blbc == LBC_LF || Blbc == LBC_BK || Blbc == LBC_CR)
362 GET_LBC (Blbc, mt, len, Bpos + 1, option);
371 GET_LBC (Albc, mt, len, Apos, option);
372 } while (Albc == LBC_SP);
378 /* Explicit break at the end of text. */
381 indirect = Bpos + 1 < Apos;
384 Albc = mtext__word_segment (mt, Apos, NULL, &next) ? LBC_BB : LBC_AL;
386 action = lba_pair_table[Blbc][Albc];
387 if (action == LBA_DIRECT)
388 /* Direct break at Apos. */
390 else if (action == LBA_INDIRECT)
395 else if (action == LBA_COMBINING_INDIRECT)
399 if (option & MTEXT_LBO_SP_CM)
405 Bpos = next, Blbc = LBC_AL;
415 *after = break_after;
417 return (break_before > 0 ? break_before : break_after);