8000 ICU-23038 [TODO(egg): patch into 0669e86c] Naïvely patch the tailore… · unicode-org/icu@1aa2ad1 · GitHub
[go: up one dir, main page]

Skip to content

Commit 1aa2ad1

Browse files
committed
ICU-23038 [TODO(egg): patch into 0669e86] Naïvely patch the tailored new monkeys
1 parent c86ea2b commit 1aa2ad1

File tree

5 files changed

+31
-31
lines changed

5 files changed

+31
-31
lines changed

icu4c/source/test/testdata/break_rules/line_cj.txt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ AL = [:LineBreak = Alphabetic:];
2222
AP = [:LineBreak = Aksara_Prebase:];
2323
AS = [:LineBreak = Aksara_Start:];
2424
BA = [:LineBreak = Break_After:];
25-
HH = [\u2010]; # \u2010 is HYPHEN, default line break is BA.
25+
HH = [:LineBreak = Unambiguous_Hyphen:];
2626
BB = [:LineBreak = Break_Before:];
2727
BK = [:LineBreak = Mandatory_Break:];
2828
B2 = [:LineBreak = Break_Both:];
@@ -183,7 +183,7 @@ LB11.3: WJ CM* [^CM];
183183
LB20a.2: GL CM* (HY | HH) CM* AL;
184184
LB12: GL CM* [^CM];
185185

186-
LB12a: [^SP BA HY] CM* GL;
186+
LB12a: [^SP BA HY HH] CM* GL;
187187

188188
# LB 13 Do not break before ‘]’ or ‘!’ or ‘/’, even after spaces.
189189
LB13.1: [^SP] CM* [CL CP EX SY];
@@ -203,18 +203,18 @@ LB19.1: QU CM* [^CM];
203203
LB20.1: . CM* ZWJ CB;
204204
LB20.2: . CM* ÷ CB;
205205

206-
LB20a.6: CB CM* ZWJ (HY | HH) CM* AL;
206+
LB20a.6: CB CM* ZWJ (HY | HH) CM* (AL | HL);
207207
LB20.3: CB CM* ZWJ [^CM];
208208
LB20.4: CB CM* ÷;
209209

210210
# LB 20a Do not break after a word-initial hyphen.
211-
LB20a.1: ^(HY | HH) CM* AL;
211+
LB20a.1: ^(HY | HH) CM* (AL | HL);
212212

213213
# Note: Rule 21a must come before 21 to prevent 21.1 from matching HL BA, then
214214
# not picking up the continuing match after the BA from 21a.
215-
LB21a: HL CM* (HY | BAminuseaFWH) CM* [^CM CB HL];
215+
LB21a: HL CM* (HY | HH) CM* [^CM CB HL];
216216

217-
LB21.1: . CM* [BA HY NS];
217+
LB21.1: . CM* [BA HY HH NS];
218218
LB21.2: BB CM* [^CM CB];
219219

220220
LB21b: SY CM* HL;

icu4c/source/test/testdata/break_rules/line_loose.txt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ AL = [:LineBreak = Alphabetic:];
2929
AP = [:LineBreak = Aksara_Prebase:];
3030
AS = [:LineBreak = Aksara_Start:];
3131
BA = [:LineBreak = Break_After:];
32-
HH = [\u2010]; # \u2010 is HYPHEN, default line break is BA.
32+
HH = [:LineBreak = Unambiguous_Hyphen:];
3333
BB = [:LineBreak = Break_Before:];
3434
BK = [:LineBreak = Mandatory_Break:];
3535
B2 = [:LineBreak = Break_Both:];
@@ -184,7 +184,7 @@ LB11.3: WJ CM* [^CM];
184184
LB20a.2: GL CM* (HY | HH) CM* AL;
185185
LB12: GL CM* [^CM];
186186

187-
LB12a: [^SP BA HY] CM* GL;
187+
LB12a: [^SP BA HY HH] CM* GL;
188188

189189
# LB 13 Do not break before ‘]’ or ‘!’ or ‘/’, even after spaces.
190190
LB13.1: [^SP] CM* [CL CP EX SY];
@@ -204,18 +204,18 @@ LB19.1: QU CM* [^CM];
204204
LB20.1: . CM* ZWJ CB;
205205
LB20.2: . CM* ÷ CB;
206206

207-
LB20a.6: CB CM* ZWJ (HY | HH) CM* AL;
207+
LB20a.6: CB CM* ZWJ (HY | HH) CM* (AL | HL);
208208
LB20.3: CB CM* ZWJ [^CM];
209209
LB20.4: CB CM* ÷;
210210

211211
# LB 20a Do not break after a word-initial hyphen.
212-
LB20a.1: ^(HY | HH) CM* AL;
212+
LB20a.1: ^(HY | HH) CM* (AL | HL);
213213

214214
# Note: Rule 21a must come before 21 to prevent 21.1 from matching HL BA, then
215215
# not picking up the continuing match after the BA from 21a.
216-
LB21a: HL CM* (HY | BAminuseaFWH) CM* [^CM CB HL];
216+
LB21a: HL CM* (HY | HH) CM* [^CM CB HL];
217217

218-
LB21.1: . CM* [BA HY NS];
218+
LB21.1: . CM* [BA HY HH NS];
219219
LB21.2: BB CM* [^CM CB];
220220

221221
LB21b: SY CM* HL;

icu4c/source/test/testdata/break_rules/line_loose_cj.txt

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ AP = [:LineBreak = Aksara_Prebase:];
4444
AS = [:LineBreak = Aksara_Start:];
4545
BAX = [\u2010 \u2013];
4646
BA = [[:LineBreak = Break_After:] - BAX];
47-
HH = [\u2010]; # \u2010 is HYPHEN, default line break is BA.
47+
HH = [:LineBreak = Unambiguous_Hyphen:];
4848
BB = [:LineBreak = Break_Before:];
4949
BK = [:LineBreak = Mandatory_Break:];
5050
B2 = [:LineBreak = Break_Both:];
@@ -203,7 +203,7 @@ LB11.3: WJ CM* [^CM];
203203
LB20a.2: GL CM* (HY | HH) CM* AL;
204204
LB12: GL CM* [^CM];
205205

206-
LB12a: [^SP BA BAX HY] CM* GL;
206+
LB12a: [^SP BA BAX HY HH] CM* GL;
207207

208208
# LB 13 Do not break before ‘]’ or ‘!’ or ‘/’, even after spaces.
209209
LB13.1: [^SP] CM* [CL CP EX SY];
@@ -224,22 +224,22 @@ LB19.1: QU CM* [^CM];
224224
LB20.1: . CM* ZWJ CB;
225225
LB20.2: . CM* ÷ CB;
226226

227-
LB20a.6: CB CM* ZWJ (HY | HH) CM* AL;
227+
LB20a.6: CB CM* ZWJ (HY | HH) CM* (AL | HL);
228228
LB20.3: CB CM* ZWJ [^CM];
229229
LB20.4: CB CM* ÷;
230230

231231
# LB 20a Do not break after a word-initial hyphen.
232-
LB20a.1: ^(HY | HH) CM* AL;
232+
LB20a.1: ^(HY | HH) CM* (AL | HL);
233233

234234
# Note: Rule 21a must come before 21 to prevent 21.1 from matching HL BA, then
235235
# not picking up the continuing match after the BA from 21a.
236236
# LB 21a Don't break after Hebrew + Hyphen
237237
# HL (HY | BA) x
238238

239-
LB21a: HL CM* (HY | BAminuseaFWH | BAX) CM* [^CM CB HL];
239+
LB21a: HL CM* (HY | HH) CM* [^CM CB HL];
240240

241-
LB21.1: [^ID] CM* [BA BAX HY NS];
242-
LB21.2: ID CM* [BA HY NS];
241+
LB21.1: [^ID] CM* [BA BAX HY HH NS];
242+
LB21.2: ID CM* [BA HY HH NS];
243243
LB21.3: BB CM* [^CM CB];
244244

245245
LB21b: SY CM* HL;

icu4c/source/test/testdata/break_rules/line_normal.txt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ AL = [:LineBreak = Alphabetic:];
3131
AP = [:LineBreak = Aksara_Prebase:];
3232
AS = [:LineBreak = Aksara_Start:];
3333
BA = [:LineBreak = Break_After:];
34-
HH = [\u2010]; # \u2010 is HYPHEN, default line break is BA.
34+
HH = [:LineBreak = Unambiguous_Hyphen:];
3535
BB = [:LineBreak = Break_Before:];
3636
BK = [:LineBreak = Mandatory_Break:];
3737
B2 = [:LineBreak = Break_Both:];
@@ -185,7 +185,7 @@ LB11.3: WJ CM* [^CM];
185185
LB20a.2: GL CM* (HY | HH) CM* AL;
186186
LB12: GL CM* [^CM];
187187

188-
LB12a: [^SP BA HY] CM* GL;
188+
LB12a: [^SP BA HY HH] CM* GL;
189189

190190
# LB 13 Do not break before ‘]’ or ‘!’ or ‘/’, even after spaces.
191191
LB13.1: [^SP] CM* [CL CP EX SY];
@@ -205,18 +205,18 @@ LB19.1: QU CM* [^CM];
205205
LB20.1: . CM* ZWJ CB;
206206
LB20.2: . CM* ÷ CB;
207207

208-
LB20a.6: CB CM* ZWJ (HY | HH) CM* AL;
208+
LB20a.6: CB CM* ZWJ (HY | HH) CM* (AL | HL);
209209
LB20.3: CB CM* ZWJ [^CM];
210210
LB20.4: CB CM* ÷;
211211

212212
# LB 20a Do not break after a word-initial hyphen.
213-
LB20a.1: ^(HY | HH) CM* AL;
213+
LB20a.1: ^(HY | HH) CM* (AL | HL);
214214

215215
# Note: Rule 21a must come before 21 to prevent 21.1 from matching HL BA, then
216216
# not picking up the continuing match after the BA from 21a.
217-
LB21a: HL CM* (HY | BAminuseaFWH) CM* [^CM CB HL];
217+
LB21a: HL CM* (HY | HH) CM* [^CM CB HL];
218218

219-
LB21.1: . CM* [BA HY NS];
219+
LB21.1: . CM* [BA HY HH NS];
220220
LB21.2: BB CM* [^CM CB];
221221

222222
LB21b: SY CM* HL;

icu4c/source/test/testdata/break_rules/line_normal_cj.txt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ AL = [:LineBreak = Alphabetic:];
3232
AP = [:LineBreak = Aksara_Prebase:];
3333
AS = [:LineBreak = Aksara_Start:];
3434
BA = [:LineBreak = Break_After:];
35-
HH = [\u2010]; # \u2010 is HYPHEN, default line break is BA.
35+
HH = [:LineBreak = Unambiguous_Hyphen:];
3636
BB = [:LineBreak = Break_Before:];
3737
BK = [:LineBreak = Mandatory_Break:];
3838
B2 = [:LineBreak = Break_Both:];
@@ -189,7 +189,7 @@ LB11.3: WJ CM* [^CM];
189189
LB20a.2: GL CM* (HY | HH) CM* AL;
190190
LB12: GL CM* [^CM];
191191

192-
LB12a: [^SP BA HY] CM* GL;
192+
LB12a: [^SP BA HY HH] CM* GL;
193193

194194
# LB 13 Do not break before ‘]’ or ‘!’ or ‘/’, even after spaces.
195195
LB13.1: [^SP] CM* [CL CP EX SY];
@@ -209,19 +209,19 @@ LB19.1: QU CM* [^CM];
209209
LB20.1: . CM* ZWJ CB;
210210
LB20.2: . CM* ÷ CB;
211211

212-
LB20a.6: CB CM* ZWJ (HY | HH) CM* AL;
212+
LB20a.6: CB CM* ZWJ (HY | HH) CM* (AL | HL);
213213
LB20.3: CB CM* ZWJ [^CM];
214214
LB20.4: CB CM* ÷;
215215

216216
# LB 20a Do not break after a word-initial hyphen.
217-
LB20a.1: ^(HY | HH) CM* AL;
217+
LB20a.1: ^(HY | HH) CM* (AL | HL);
218218

219219
# Note: Rule 21a must come before 21 to prevent 21.1 from matching HL BA, then
220220
# not picking up the continuing match after the BA from 21a.
221-
LB21a: HL CM* (HY | BAminuseaFWH) CM* [^CM CB HL];
221+
LB21a: HL CM* (HY | HH) CM* [^CM CB HL];
222222

223223
# DO allow breaks here before $NSXcm, so don't include it
224-
LB21.1: . CM* [BA HY NS];
224+
LB21.1: . CM* [BA HY HH NS];
225225
LB21.2: BB CM* [^CM CB];
226226

227227
LB21b: SY CM* HL;

0 commit comments

Comments
 (0)
0