8000 Test all punctiation and whitespace codepoints that should be include… · web-platform-tests/wpt@49e1b5b · GitHub
[go: up one dir, main page]

Skip to content

Commit 49e1b5b

Browse files
Test all punctiation and whitespace codepoints that should be included in ::first-letter
1 parent b3ef617 commit 49e1b5b

File tree

2 files changed

+338
-39
lines changed

2 files changed

+338
-39
lines changed

css/css-pseudo/first-letter-punctuation-and-space-ref.html

Lines changed: 0 additions & 17 deletions
This file was deleted.
Lines changed: 338 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,341 @@
11
<!doctype html>
2-
<html>
3-
<head>
4-
<meta charset="utf-8">
5-
<title>CSS Test: ::first-letter formatting</title>
6-
<link rel="author" title="Johannes Odland" href="mailto:johannes.odland@gmail.com">
7-
<link rel="match" href="first-letter-punctuation-and-space-ref.html">
8-
<link rel="help" href="https://drafts.csswg.org/css-pseudo-4/#first-letter-pseudo">
9-
<meta name="assert" content="Test checks that punctuation, intervening space separators and letters still have proper ::first-letter styling.">
10-
<style>
11-
div {
12-
font-size: 36px;
2+
<meta charset="utf-8">
3+
<title>CSS Test: punctuation and intervening whitespace codepoints that are included in ::first-letter</title>
4+
<link rel="author" title="Johannes Odland" href="mailto:johannes.odland@gmail.com">
5+
<link rel="help" href="https://drafts.csswg.org/css-pseudo-4/#first-letter-pseudo">
6+
<link rel="stylesheet" type="text/css" href="/fonts/ahem.css"/>
7+
<script src="/resources/testharness.js"></script>
8+
<script src="/resources/testharnessreport.js"></script>
9+
<style>
10+
#target div {
11+
position: absolute;
12+
top: 0;
13+
left: 0;
14+
font-family: Ahem;
15+
font-size: 16px !important;
16+
opacity: 0;
17+
}
18+
19+
#target div::first-letter {
20+
color: green;
21+
font-size: 0;
22+
}
23+
</style>
24+
<script>
25+
/* Punctuation in (Pc), (Pd), (Ps), (Pe), (Pi), (Pf), (Po) */
26+
const Pc = ['_', '‿', '⁀', '⁔', '︳', '︴', '﹍', '﹎', '﹏', '_'];
27+
const Pd = ['-', '֊', '־', '᐀', '᠆', '‐', '‑', '‒', '–', '—', '―', '⸗', '⸚', '⸺', '⸻', '⹀', '⹝', '〜', '〰', '゠', '︱', '︲', '﹘', '﹣', '-', '𐺭'];
28+
const Ps = ['(', '[', '{', '༺', '༼', '᚛', '‚', '„', '⁅', '⁽', '₍', '⌈', '⌊', '〈', '❨', '❪', '❬', '❮', '❰', '❲',
29+
'❴', '⟅', '⟦', '⟨', '⟪', '⟬', '⟮', '⦃', '⦅', '⦇', '⦉', '⦋', '⦑', '⦓', '⦕', '⦗', '⧼', '⦍', '⦏', '⧘', '⧚', '⸢', '⸤',
30+
'⸦', '⸨', '⹕', '⹗', '⹂', '⹙', '⹛', '〈', '《', '「', '『', '【', '〔', '〖', '〘', '〚', '〝', '﴿', '︗', '︵',
31+
'︷', '︹', '︻', '︽', '︿', '﹁', '﹃', '﹇', '﹙', '﹛', '﹝', '(', '[', '{', '⦅', '「'];
32+
const Pe = [')', ']', '}', '༻', '༽', '᚜', '⁆', '⁾', '₎', '⌉', '⌋', '〉', '❩', '❫', '❭', '❯', '❱', '❳',
33+
'❵', '⟆', '⟧', '⟩', '⟫', '⟭', '⟯', '⦄', '⦆', '⦈', '⦊', '⦌', '⦒', '⦔', '⦖', '⦘', '⧽', '⦎', '⦐', '⧙', '⧛', '⸣', '⸥',
34+
'⸧', '⸩', '⹖', '⹘', '⹚', '⹜', '〉', '》', '」', '』', '】', '〕', '〗', '〙', '〛', '〞', '〟', '﴾', '︘', '︶',
35+
'︸', '︺', '︼', '︾', '﹀', '﹂', '﹄', '﹈', '﹚', '﹜', '﹞', ')', ']', '}', '⦆', '」'];
36+
const Pi = ['«', '‘', '‛', '“', '‟', '‹', '⸂', '⸄', '⸉', '⸌', '⸜', '⸠'];
37+
const Pf = ['»', '’', '”', '›', '⸃', '⸅', '⸊', '⸍', '⸝', '⸡'];
38+
const Po = [
39+
// Basic Latin
40+
'!', '"', '#', '%', '&', '\'', '*', '\\',
41+
',', '.', '/', ':', ';', '?', '@',
42+
// Latin 1 Supplement
43+
'¡', '§', '¶', '·', '¿',
44+
// Greek And Coptic
45+
';', '·',
46+
// Armenian
47+
'՚', '՛', '՜', '՝', '՞', '՟', '։',
48+
// Hebrew
49+
'׀', '׃', '׆', '׳', '״',
50+
// Arabic
51+
'؉', '؊', '،', '؍', '؛', '؝', '؞', '؟', '٪', '٫', '٬', '٭', '۔',
52+
// Syriac
53+
'܀', '܁', '܂', '܃', '܄', '܅', '܆', '܇', '܈', '܉', '܊', '܋', '܌', '܍',
54+
// NKo
55+
'߷', '߸', '߹',
56+
// Samaritan
57+
'࠰', '࠱', '࠲', '࠳', '࠴', '࠵', '࠶', '࠷', '࠸', '࠹', '࠺', '࠻', '࠼', '࠽', '࠾',
58+
// Mandaic
59+
'࡞',
60+
// Devanagari
61+
'।', '॥', '॰',
62+
// Bengali
63+
'৽',
64+
// Gurmukhi
65+
'੶',
66+
// Gujarati
67+
'૰',
68+
// Telugu
69+
'౷',
70+
// Kannada
71+
'಄',
72+
// Sinhala,
73+
'෴',
74+
// Thai
75+
'๏', '๚', '๛',
76+
// Tibetan
77+
'༄', '༅', '༆', '༇', '࿓', '࿔', '༈', '༉', '༊', '་', '༌', '།', '༎', '༏', '༐', '༑', '༒', '༔', '྅', '࿐', '࿑', '࿒', '࿙', '࿚',
78+
// Myanmar
79+
'၊', '။', '၌', '၍', '၎', '၏',
80+
// Georgian
81+
'჻',
82+
// Ethiopic
83+
'፠', '፡', '።', '፣', '፤', '፥', '፦', '፧', '፨',
84+
// Unified Canadian Aboriginal Syllabics
85+
'᙮',
86+
// Runic
87+
'᛫', '᛬', '᛭',
88+
// Hanunoo
89+
'᜵', '᜶',
90+
// Khmer
91+
'។', '៕', '៖', '៘', '៙', '៚',
92+
// Mongolian
93+
'᠀', '᠁', '᠂', '᠃', '᠄', '᠅', '᠇', '᠈', '᠉', '᠊',
94+
// Limbu
95+
'᥄', '᥅',
96+
// Buginese
97+
'᨞', '᨟',
98+
// Tai Tham
99+
'᪠', '᪡', '᪢', '᪣', '᪤', '᪥', '᪦', '᪨', '᪩', '᪪', '᪫', '᪬', '᪭',
100+
// Balinese
101+
'᭚', '᭛', '᭜', '᭝', '᭞', '᭟', '᭠', '᭽', '᭾',
102+
// Batak
103+
'᯼', '᯽', '᯾', '᯿',
104+
// Lepcha
105+
'᰻', '᰼', '᰽', '᰾', '᰿',
106+
// Ol Chiki
107+
'᱾', '᱿',
108+
// Sundanese Supplement
109+
'᳀', '᳁', '᳂', '᳃', '᳄', '᳅', '᳆', '᳇',
110+
// Vedic Extensions
111+
'᳓',
112+
// General Punctuation
113+
'‖', '‗', '†', '‡', '•', '‣', '․', '‥', '…', '‧', '‰', '‱', '′', '″', '‴', '‵', '‶', '‷', '‸', '※', '‽', '‾',
114+
'⁁', '⁂', '⁃', '⁊', '⁋', '⁌', '⁍', '⁎', '⁏', '⁐', '⁑', '⁓', '⁕', '⁗', '‼', '⁇', '⁈', '⁉', '⁖', '⁘', '⁙', '⁚', '⁛',
115+
'⁜', '⁝', '⁞',
116+
// Coptic
117+
'⳹', '⳺', '⳻', '⳼', '⳾', '⳿',
118+
// Tifinagh
119+
'⵰',
120+
// Supplemental Punctuation
121+
'⸀', '⸁', '⸆', '⸇', '⸈', '⸋', '⸎', '⸏', '⸐', '⸑', '⸒', '⸓', '⸔', '⸕', '⸖', '⸘', '⸙', '⸛', '⸞', '⸟', '⸪', '⸫',
122+
'⸬', '⸭', '⸮', '⸰', '⸱', '⸳', '⸴', '⸿', '⹊', '⹋', '⹌', '⹍', '⹎', '⹏', '⹒', '⹓', '⹔', '⸲', '⸵', '⸶', '⸷', '⸸',
123+
'⸹', '⸼', '⸽', '⸾', '⹁', '⹃', '⹄', '⹅', '⹆', '⹇', '⹈', '⹉',
124+
// CJK Symbols And Punctuation
125+
'、', '。', '〃', '〽',
126+
// Katakana
127+
'・',
128+
// Lisu
129+
'꓾', '꓿',
130+
// Vai
131+
'꘍', '꘎', '꘏',
132+
// Cyrillic Extended B
133+
'꙳', '꙾',
134+
// Bamum
135+
'꛲', '꛳', '꛴', '꛵', '꛶', '꛷',
136+
// Phags Pa
137+
'꡴', '꡵', '꡶', '꡷',
138+
// Saurashtra
139+
'꣎', '꣏',
140+
// Devanagari Extended
141+
'꣸', '꣹', '꣺', '꣼',
142+
// Kayah Li
143+
'꤮', '꤯',
144+
// Rejang
145+
'꥟',
146+
// Javanese
147+
'꧁', '꧂', '꧃', '꧄', '꧅', '꧆', '꧇', '꧈', '꧉', '꧊', '꧋', '꧌', '꧍', '꧞', '꧟',
148+
// Cham
149+
'꩜', '꩝', '꩞', '꩟',
150+
// Tai Viet
151+
'꫞', '꫟',
152+
// Meetei Mayek Extensions, Meetei Mayek
153+
'꫰', '꫱', '꯫',
154+
// Vertical Forms
155+
'︐', '︑', '︒', '︓', '︔', '︕', '︖', '︙',
156+
// CJK Compatibility Forms
157+
'︰', '﹅', '﹆', '﹉', '﹊', '﹋', '﹌',
158+
// Small Form Variants
159+
'﹐', '﹑', '﹒', '﹔', '﹕', '﹖', '﹗', '﹟', '﹠', '﹡', '﹨', '﹪', '﹫',
160+
// Halfwidth And Fullwidth Forms
161+
'!', '"', '#', '%', '&', ''', '*', ',', '.', '/', ':', ';', '?', '@', '\', '。', '、', '・',
162+
// Aegean Numbers
163+
'𐄀', '𐄁', '𐄂',
164+
// Ugaritic
165+
'𐎟',
166+
// Old Persian
167+
'𐏐',
168+
// Caucasian Albanian
169+
'𐕯',
170+
// Imperial Aramaic
171+
'𐡗',
172+
// Phoenician
173+
'𐤟',
174+
// Lydian
175+
'𐤿',
176+
// Kharoshthi
177+
'𐩐', '𐩑', '𐩒', '𐩓', '𐩔', '𐩕', '𐩖', '𐩗', '𐩘',
178+
// Old South Arabian
179+
'𐩿',
180+
// Manichaean
181+
'𐫰', '𐫱', '𐫲', '𐫳', '𐫴', '𐫵', '𐫶',
182+
// Avestan
183+
'𐬹', '𐬺', '𐬻', '𐬼', '𐬽', '𐬾', '𐬿',
184+
// Psalter Pahlavi
185+
'𐮙', '𐮚', '𐮛', '𐮜',
186+
// Sogdian
187+
'𐽕', '𐽖', '𐽗', '𐽘', '𐽙',
188+
// Old Uyghur
189+
'𐾆', '𐾇', '𐾈', '𐾉',
190+
// Brahmi
191+
'𑁇', '𑁈', '𑁉', '𑁊', '𑁋', '𑁌', '𑁍',
192+
// Kaithi
193+
'𑂻', '𑂼', '𑂾', '𑂿', '𑃀', '𑃁',
194+
// Chakma
195+
'𑅀', '𑅁', '𑅂', '𑅃',
196+
// Mahajani
197+
'𑅴', '𑅵',
198+
// Sharada
199+
'𑇅', '𑇆', '𑇇', '𑇈', '𑇍', '𑇛', '𑇝', '𑇞', '𑇟',
200+
// Khojki
201+
'𑈸', '𑈹', '𑈺', '𑈻', '𑈼', '𑈽',
202+
// Multani
203+
'𑊩',
204+
// Newa
205+
'𑑋', '𑑌', '𑑍', '𑑎', '𑑏', '𑑚', '𑑛', '𑑝',
206+
// Tirhuta
207+
'𑓆',
208+
// Siddham
209+
'𑗁', '𑗂', '𑗃', '𑗄', '𑗅', '𑗆', '𑗇', '𑗈', '𑗉', '𑗊', '𑗋', '𑗌', '𑗍', '𑗎', '𑗏', '𑗐', '𑗑', '𑗒', '𑗓', '𑗔', '𑗕', '𑗖', '𑗗',
210+
// Modi
211+
'𑙁', '𑙂', '𑙃',
212+
// Mongolian Supplement
213+
'𑙠', '𑙡', '𑙢', '𑙣', '𑙤', '𑙥', '𑙦', '𑙧', '𑙨', '𑙩', '𑙪', '𑙫', '𑙬',
214+
// Takri
215+
'𑚹',
216+
// Ahom
217+
'𑜼', '𑜽', '𑜾',
218+
// Dogra
219+
'𑠻',
220+
// Dives Akuru
221+
'𑥄', '𑥅', '𑥆',
222+
// Nandinagari
223+
'𑧢',
224+
// Zanabazar Square
225+
'𑨿', '𑩀', '𑩅', '𑩆', '𑩁', '𑩂', '𑩃', '𑩄',
226+
// Soyombo
227+
'𑪚', '𑪛', '𑪜', '𑪞', '𑪟', '𑪠', '𑪡', '𑪢',
228+
// Devanagari Extended A
229+
'𑬀', '𑬁', '𑬂', '𑬃', '𑬄', '𑬅', '𑬆', '𑬇', '𑬈', '𑬉',
230+
// Bhaiksuki
231+
'𑱁', '𑱂', '𑱃', '𑱄', '𑱅',
232+
// Marchen
233+
'𑱰', '𑱱',
234+
// Makasar
235+
'𑻷', '𑻸',
236+
// Kawi
237+
'𑽃', '𑽄', '𑽅', '𑽆', '𑽇', '𑽈', '𑽉', '𑽊', '𑽋', '𑽌', '𑽍', '𑽎', '𑽏',
238+
// Tamil Supplement
239+
'𑿿',
240+
// Cuneiform Numbers And Punctuation
241+
'𒑰', '𒑱', '𒑲', '𒑳', '𒑴',
242+
// Cypro Minoan
243+
'𒿱', '𒿲',
244+
// Mro
245+
'𖩮', '𖩯',
246+
// Bassa Vah
247+
'𖫵',
248+
// Pahawh Hmong
249+
'𖬷', '𖬸', '𖬹', '𖬺', '𖬻', '𖭄',
250+
// Medefaidrin
251+
'𖺗', '𖺘', '𖺙', '𖺚',
252+
// Ideographic Symbols And Punctuation
253+
'𖿢',
254+
// Duployan
255+
'𛲟',
256+
// Sutton SignWriting
257+
'𝪇', '𝪈', '𝪉', '𝪊', '𝪋',
258+
// Adlam
259+
'𞥞', '𞥟'
260+
];
261+
262+
const Zs = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '];
263+
264+
// Word-separator characters are typographic character units whose primary purpose and general usage is to separate
265+
// words. In Unicode this includes (but is not exhaustively defined as) the space (U+0020),
266+
// the no-break space (U+00A0), the Ethiopic word space (U+1361), the Aegean word separators (U+10100,U+10101),
267+
// the Ugaritic word divider (U+1039F), and the Phoenician word separator (U+1091F).
268+
const wordSeparators = [' ', ' ', '፡', '𐄀', '𐄁', '𐎟', '𐤟'];
269+
270+
function formatCodepoint(str) {
271+
return `\\u+${str.codePointAt(0).toString(16).padStart(4, '0')}`;
272+
}
273+
274+
setup({explicit_done: true});
275+
276+
async function runTest() {
277+
const target = document.querySelector("#target");
278+
let tests = [];
279+
280+
function setupTest(testString, name) {
281+
const el = document.createElement('div');
282+
el.innerHTML = testString;
283+
target.appendChild(el);
284+
285+
tests.push({el, name});
13286
}
14-
div::first-letter {
15-
color: green;
287+
288+
// All punctuation—i.e, characters that belong to the Punctuation (P*) Unicode
289+
// general category [UAX44]—that precedes the first letter,
290+
for (const p of [Pc, Pd, Ps, Pe, Pi, Pf, Po].flat()) {
291+
setupTest(`${p}Test`, `Preceding punctuation ${p} (${formatCodepoint(p)}) should be included`)
16292
}
17-
</style>
18-
</head>
19-
<body>
20-
<p>Test passes if the "T"s and preceding punctuation below are green.</p>
21-
<div>– Test</div>
22-
<div>« Test</div>
23-
<div>&#xab;&nbsp;Test</div>
24-
</body>
25-
</html>
293+
294+
const multiplePrecedingCharacters = Pc[0] + Pd[0] + Ps[0]
295+
setupTest(`${multiplePrecedingCharacters}Test`, `Multiple preceding punctuation characters ${multiplePrecedingCharacters} should be included`)
296+
297+
// as well as any intervening typographic space—characters belonging to the Zs
298+
// Unicode general category [UAX44] other than U+3000 IDEOGRAPHIC SPACE.
299+
for (const s of Zs.filter(char => char.codePointAt(0) !== 0x3000)) {
300+
setupTest(`"${s}Test`, `Intervening typographic space ${(formatCodepoint(s))} before the first-letter should be included`)
301+
}
302+
303+
const multipleInterveningSpaces = Zs[5] + Zs[6] + Zs[7]
304+
setupTest(`"${multipleInterveningSpaces}Test`, `Multiple intervening typographic spaces before the first-letter should be included`)
305+
306+
// Any punctuation other than opening punctuation and dashes—i.e. characters that belong to the Punctuation (P*)
307+
// Unicode general category, excluding Open Punctuation (Ps) and Dash Punctuation (Pd)—that follows the first letter,
308+
for (const p of [Pc, Pe, Pi, Pf, Po].flat()) {
309+
setupTest(`T${p}est`, `Following punctuation ${p} (${formatCodepoint(p)}) should be included`)
310+
}
311+
312+
const multipleFollowingCharacters = Pc[0] + Pe[0] + Pi[0]
313+
setupTest(`${multipleFollowingCharacters}Test`, `Multiple following punctuation characters ${multipleFollowingCharacters} should be included`)
314+
315+
// as well as any intervening typographic space—characters belonging to the Zs Unicode general category [UAX44]
316+
// other than U+3000 IDEOGRAPHIC SPACE or a word separator.
317+
for (const s of Zs.filter(char => char.codePointAt(0) !== 0x3000 && !wordSeparators.includes(char))) {
318+
setupTest(`T${s}"est`, `Intervening typographic space ${(formatCodepoint(s))} after the first-letter should be included`)
319+
}
320+
321+
setupTest(`T${multipleInterveningSpaces}"est`, `Multiple intervening typographic spaces after the first-letter should be included`)
322+
323+
await new Promise(r => requestAnimationFrame(() => requestAnimationFrame(r)));
324+
325+
326+
for (const {el, name} of tests) {
327+
// First-letter should include the T and surrounding punctuation and whitespace.
328+
// As the ::first-letter has font-size 0, the remaining size of the 'est' letters should be 3em = 48px
329+
test(() => assert_equals(el.offsetWidth, 48, 'width'), name);
330+
}
331+
332+
target.innerHTML = ''
333+
tests = []
334+
done();
335+
}
336+
</script>
337+
<body onload="document.fonts.ready.then(() => { runTest(); })">
338+
<div id="target">
339+
<div>Placeholder to ensure font has loaded</div>
340+
</div>
341+
</body>

0 commit comments

Comments
 (0)
0