8000 Merge branch 'master' of github.com:golang/text · golang/text@237bd37 · GitHub
[go: up one dir, main page]

Skip to content

Commit 237bd37

Browse files
committed
Merge branch 'master' of github.com:golang/text
2 parents d1ff9ab + 566b44f commit 237bd37

32 files changed

+205
-220
lines changed

README.md

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33
[![Go Reference](https://pkg.go.dev/badge/golang.org/x/text.svg)](https://pkg.go.dev/golang.org/x/text)
44

5-
This repository holds supplementary Go libraries for text processing, many involving Unicode.
5+
This repository holds supplementary Go packages for text processing,
6+
many involving Unicode.
67

78
## CLDR Versioning
89

@@ -11,13 +12,11 @@ by your Go compiler. The `x/text` repository supports multiple versions of
1112
Unicode and will match the version of Unicode to that of the Go compiler. At the
1213
moment this is supported for Go compilers from version 1.7.
1314

14-
## Download/Install
15+
## Contribute
1516

16-
The easiest way to install is to run `go get -u golang.org/x/text`. You can
17-
also manually git clone the repository to `$GOPATH/src/golang.org/x/text`.
17+
To submit changes to this repository, see http://go.dev/doc/contribute.
1818

19-
## Contribute
20-
To submit changes to this repository, see http://golang.org/doc/contribute.html.
19+
The git repository is https://go.googlesource.com/text.
2120

2221
To generate the tables in this repository (except for the encoding tables),
2322
run go generate from this directory. By default tables are generated for the
@@ -29,6 +28,7 @@ directory, which holds all files that are used as a source for generating the
2928
tables. This directory will also serve as a cache.
3029

3130
## Testing
31+
3232
Run
3333

3434
go test ./...
@@ -52,6 +52,7 @@ directory which holds all files that are used as a source for generating the
5252
tables. This directory will also serve as a cache.
5353

5454
## Versions
55+
5556
To update a Unicode version run
5657

5758
UNICODE_VERSION=x.x.x go generate
@@ -73,11 +74,8 @@ So updating to a different version may not work.
7374

7475
The files in DATA/{iana|icu|w3|whatwg} are currently not versioned.
7576

76-
## Report Issues / Send Patches
77-
78-
This repository uses Gerrit for code changes. To learn how to submit changes to
79-
this repository, see https://golang.org/doc/contribute.html.
77+
## Report Issues
8078

81-
The main issue tracker for the image repository is located at
82-
https://github.com/golang/go/issues. Prefix your issue with "x/text:" in the
79+
The main issue tracker for the text repository is located at
80+
https://go.dev/issues. Prefix your issue with "x/text:" in the
8381
subject line, so it is easy to find.

cases/context_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ import (
99
"testing"
1010
"unicode"
1111

12-
"golang.org/x/text/internal/testtext"
1312
"golang.org/x/text/language"
1413
"golang.org/x/text/transform"
1514
"golang.org/x/text/unicode/norm"
@@ -213,7 +212,8 @@ func TestCCC(t *testing.T) {
213212

214213
func TestWordBreaks(t *testing.T) {
215214
for _, tt := range breakTest {
216-
testtext.Run(t, tt, func(t *testing.T) {
215+
desc := norm.NFC.String(tt)
216+
t.Run(desc, func(t *testing.T) {
217217
parts := strings.Split(tt, "|")
218218
want := ""
219219
for _, s := range parts {

cases/icu_test.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111
"strings"
1212
"testing"
1313

14-
"golang.org/x/text/internal/testtext"
1514
"golang.org/x/text/language"
1615
"golang.org/x/text/unicode/norm"
1716
)
@@ -83,7 +82,7 @@ func TestICUConformance(t *testing.T) {
8382
if exclude(c, tag, s) {
8483
continue
8584
}
86-
testtext.Run(t, path.Join(c, tag, s), func(t *testing.T) {
85+
t.Run(path.Join(c, tag, s), func(t *testing.T) {
8786
want := doICU(tag, c, s)
8887
got := doGo(tag, c, s)
8988
if norm.NFC.String(got) != norm.NFC.String(want) {

cases/map_test.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ func TestAlloc(t *testing.T) {
205205
// func() Caser { return Title(language.Und) },
206206
// func() Caser { return Title(language.Und, HandleFinalSigma(false)) },
207207
} {
208-
testtext.Run(t, "", func(t *testing.T) {
208+
t.Run("", func(t *testing.T) {
209209
var c Caser
210210
v := testtext.AllocsPerRun(10, func() {
211211
c = f()
@@ -234,7 +234,7 @@ func testHandover(t *testing.T, c Caser, src string) {
234234

235235
// Test handover for each substring of the prefix.
236236
for i := 0; i < pSrc; i++ {
237-
testtext.Run(t, fmt.Sprint("interleave/", i), func(t *testing.T) {
237+
t.Run(fmt.Sprint("interleave/", i), func(t *testing.T) {
238238
dst := make([]byte, 4*len(src))
239239
c.Reset()
240240
nSpan, _ := c.Span([]byte(src[:i]), false)
@@ -299,7 +299,7 @@ func TestHandover(t *testing.T) {
299299
"'", "n bietje",
300300
}}
301301
for _, tc := range testCases {
302-
testtext.Run(t, tc.desc, func(t *testing.T) {
302+
t.Run(tc.desc, func(t *testing.T) {
303303
src := tc.first + tc.second
304304
want := tc.t.String(src)
305305
tc.t.Reset()
@@ -601,7 +601,7 @@ func init() {
601601

602602
func TestShortBuffersAndOverflow(t *testing.T) {
603603
for i, tt := range bufferTests {
604-
testtext.Run(t, tt.desc, func(t *testing.T) {
604+
t.Run(tt.desc, func(t *testing.T) {
605605
buf := make([]byte, tt.dstSize)
606606
got := []byte{}
607607
var nSrc, nDst int
@@ -827,7 +827,7 @@ func TestSpan(t *testing.T) {
827827
err: transform.ErrEndOfSpan,
828828
t: Title(language.Afrikaans),
829829
}} {
830-
testtext.Run(t, tt.desc, func(t *testing.T) {
830+
t.Run(tt.desc, func(t *testing.T) {
831831
for p := 0; p < len(tt.want); p += utf8.RuneLen([]rune(tt.src[p:])[0]) {
832832
tt.t.Reset()
833833
n, err := tt.t.Span([]byte(tt.src[:p]), false)
@@ -901,7 +901,7 @@ func BenchmarkCasers(b *testing.B) {
901901
{"title", bytes.ToTitle},
902902
{"upper", bytes.ToUpper},
903903
} {
904-
testtext.Bench(b, path.Join(s.name, "bytes", f.name), func(b *testing.B) {
904+
b.Run(path.Join(s.name, "bytes", f.name), func(b *testing.B) {
905905
b.SetBytes(int64(len(src)))
906906
for i := 0; i < b.N; i++ {
907907
f.fn(src)
@@ -921,7 +921,7 @@ func BenchmarkCasers(b *testing.B) {
921921
} {
922922
c := Caser{t.caser}
923923
dst := make([]byte, len(src))
924-
testtext.Bench(b, path.Join(s.name, t.name, "transform"), func(b *testing.B) {
924+
b.Run(path.Join(s.name, t.name, "transform"), func(b *testing.B) {
925925
b.SetBytes(int64(len(src)))
926926
for i := 0; i < b.N; i++ {
927927
c.Reset()
@@ -934,7 +934,7 @@ func BenchmarkCasers(b *testing.B) {
934934
continue
935935
}
936936
spanSrc := c.Bytes(src)
937-
testtext.Bench(b, path.Join(s.name, t.name, "span"), func(b *testing.B) {
937+
b.Run(path.Join(s.name, t.name, "span"), func(b *testing.B) {
938938
c.Reset()
939939
if n, _ := c.Span(spanSrc, true); n < len(spanSrc) {
940940
b.Fatalf("spanner is not recognizing text %q as done (at %d)", spanSrc, n)

collate/build/contract_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,6 @@ func TestPrintContractionTrieSet(t *testing.T) {
247247
print(&testdata, buf, "test")
248248
if contractTrieOutput != buf.String() {
249249
t.Errorf("output differs; found\n%s", buf.String())
250-
println(string(buf.Bytes()))
251250
}
252251
}
253252

encoding/japanese/all_test.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"golang.org/x/text/encoding/internal"
1515
"golang.org/x/text/encoding/internal/enctest"
1616
"golang.org/x/text/transform"
17+
"golang.org/x/text/unicode/norm"
1718
)
1819

1920
func dec(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
@@ -127,7 +128,7 @@ func TestNonRepertoire(t *testing.T) {
127128
}
128129
for _, tc := range testCases {
129130
dir, tr, wantErr := tc.init(tc.e)
130-
t.Run(fmt.Sprintf("%s/%v/%q", dir, tc.e, short(tc.src)), func(t *testing.T) {
131+
t.Run(fmt.Sprintf("%s/%v/%q", dir, tc.e, shortNFC(tc.src)), func(t *testing.T) {
131132
dst := make([]byte, 100000)
132133
src := []byte(tc.src)
133134
for i := 0; i <= len(tc.src); i++ {
@@ -148,7 +149,8 @@ func TestNonRepertoire(t *testing.T) {
148149
}
149150
}
150151

151-
func short(s string) string {
152+
func shortNFC(s string) string {
153+
s = norm.NFC.String(s)
152154
if len(s) <= 50 {
153155
return s
154156
}

go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
module golang.org/x/text
22

3-
go 1.18
3+
go 1.23.0
44

55
require golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // tagx:ignore
66

77
require golang.org/x/mod v0.17.0 // indirect; tagx:ignore
88

9-
require golang.org/x/sync v0.8.0 // indirect
9+
require golang.org/x/sync v0.12.0 // indirect

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA=
22
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
3-
golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
4-
golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
3+
golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw=
4+
golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
55
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg=
66
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=

internal/export/idna/idna_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ func doTest(t *testing.T, f func(string) (string, error), name, input, want, err
8181
in = strings.Replace(in, `\U`, "#", -1)
8282
name = fmt.Sprintf("%s/%s/%s", name, in, test)
8383

84-
testtext.Run(t, name, func(t *testing.T) {
84+
t.Run(name, func(t *testing.T) {
8585
got, err := f(input)
8686

8787
if err != nil {

internal/export/unicode/gen.go

Lines changed: 52 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@ import (
1313
"flag"
1414
"fmt"
1515
"log"
16+
"maps"
1617
"os"
1718
"regexp"
19+
"slices"
1820
"sort"
1921
"strings"
2022
"unicode"
@@ -90,13 +92,15 @@ func println(args ...interface{}) {
9092
var category = map[string]bool{
9193
// Nd Lu etc.
9294
// We use one-character names to identify merged categories
93-
"L": true, // Lu Ll Lt Lm Lo
94-
"P": true, // Pc Pd Ps Pe Pu Pf Po
95-
"M": true, // Mn Mc Me
96-
"N": true, // Nd Nl No
97-
"S": true, // Sm Sc Sk So
98-
"Z": true, // Zs Zl Zp
99-
"C": true, // Cc Cf Cs Co Cn
95+
"L": true, // Lu Ll Lt Lm Lo
96+
"LC": true, // Lu Ll Lt
97+
"P": true, // Pc Pd Ps Pe Pu Pf Po
98+
"M": true, // Mn Mc Me
99+
"N": true, // Nd Nl No
100+
"S": true, // Sm Sc Sk So
101+
"Z": true, // Zs Zl Zp
102+
"C": true, // Cc Cf Cs Co Cn
103+
"Cn": true, // unassigned
100104
}
101105

102106
// This contains only the properties we're interested in.
@@ -149,6 +153,9 @@ func categoryOp(code rune, class uint8) bool {
149153
}
150154

151155
func loadChars() {
156+
for code := range chars {
157+
chars[code].category = "Cn" // unassigned
158+
}
152159
ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
153160
c := Char{codePoint: p.Rune(0)}
154161

@@ -201,6 +208,7 @@ func loadCasefold() {
201208
}
202209

203210
var categoryMapping = map[string]string{
211+
"LC": "Letter, cased: Ll | Lt | Lu",
204212
"Lu": "Letter, uppercase",
205213
"Ll": "Letter, lowercase",
206214
"Lt": "Letter, titlecase",
@@ -257,6 +265,7 @@ func printCategories() {
257265
printf("\t%q: %s,\n", k, k)
258266
}
259267
print("}\n\n")
268+
printCategoryAliases()
260269
}
261270

262271
decl := make(sort.StringSlice, len(list))
@@ -272,7 +281,7 @@ func printCategories() {
272281
varDecl := ""
273282
switch name {
274283
case "C":
275-
varDecl = "\tOther = _C; // Other/C is the set of Unicode control and special characters, category C.\n"
284+
varDecl = "\tOther = _C; // Other/C is the set of Unicode control, special, and unassigned code points, category C.\n"
276285
varDecl += "\tC = _C\n"
277286
case "L":
278287
varDecl = "\tLetter = _L; // Letter/L is the set of Unicode letters, category L.\n"
@@ -315,14 +324,14 @@ func printCategories() {
315324
}
316325
decl[ndecl] = varDecl
317326
ndecl++
327+
match := func(cat string) bool { return cat == name }
318328
if len(name) == 1 { // unified categories
319-
dumpRange(
320-
"_"+name,
321-
func(code rune) bool { return categoryOp(code, name[0]) })
322-
continue
329+
match = func(cat string) bool { return strings.HasPrefix(cat, name) }
323330
}
324-
dumpRange("_"+name,
325-
func(code rune) bool { return chars[code].category == name })
331+
if name == "LC" { // special unified category
332+
match = func(cat string) bool { return cat == "Ll" || cat == "Lt" || cat == "Lu" }
333+
}
334+
dumpRange("_"+name, func(code rune) bool { return match(chars[code].category) })
326335
}
327336
decl.Sort()
328337
println("// These variables have type *RangeTable.")
@@ -333,6 +342,35 @@ func printCategories() {
333342
print(")\n\n")
334343
}
335344

345+
func printCategoryAliases() {
346+
known := make(map[string]bool)
347+
for _, name := range allCategories() {
348+
known[name] = true
349+
}
350+
351+
table := make(map[string]string)
352+
ucd.Parse(gen.OpenUCDFile("PropertyValueAliases.txt"), func(p *ucd.Parser) {
353+
if p.String(0) != "gc" {
354+
return
355+
}
356+
name := p.String(1)
357+
if !known[name] {
358+
logger.Print("unknown category: ", name)
359+
}
360+
table[p.String(2)] = name
361+
if a := p.String(3); a != "" {
362+
table[a] = name
363+
}
364+
})
365+
366+
println("// CategoryAliases maps category aliases to standard category names.")
367+
println("var CategoryAliases = map[string]string{")
368+
for _, name := range slices.Sorted(maps.Keys(table)) {
369+
printf("\t%q: %q,\n", name, table[name])
370+
}
371+
print("}\n\n")
372+
}
373+
336374
type Op func(code rune) bool
337375

338376
func dumpRange(name string, inCategory Op) {

internal/number/format.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -394,9 +394,7 @@ func appendScientific(dst []byte, f *Formatter, n *Digits) (b []byte, postPre, p
394394
exp := n.Exp - int32(n.Comma)
395395
exponential := f.Symbol(SymExponential)
396396
if exponential == "E" {
397-
dst = append(dst, "\u202f"...) // NARROW NO-BREAK SPACE
398397
dst = append(dst, f.Symbol(SymSuperscriptingExponent)...)
399-
dst = append(dst, "\u202f"...) // NARROW NO-BREAK SPACE
400398
dst = f.AppendDigit(dst, 1)
401399
dst = f.AppendDigit(dst, 0)
402400
switch {

0 commit comments

Comments
 (0)
0