diff --git a/icu4j/main/translit/src/main/java/com/ibm/icu/text/BreakTransliterator.java b/icu4j/main/translit/src/main/java/com/ibm/icu/text/BreakTransliterator.java index 56d8d84603dd..823497bb4ebc 100644 --- a/icu4j/main/translit/src/main/java/com/ibm/icu/text/BreakTransliterator.java +++ b/icu4j/main/translit/src/main/java/com/ibm/icu/text/BreakTransliterator.java @@ -22,8 +22,6 @@ final class BreakTransliterator extends Transliterator { private BreakIterator bi; private String insertion; - private int[] boundaries = new int[50]; - private int boundaryCount = 0; public BreakTransliterator(String ID, UnicodeFilter filter, BreakIterator bi, String insertion) { super(ID, filter); @@ -52,8 +50,9 @@ public void setInsertion(String insertion) { public BreakIterator getBreakIterator() { // Defer initialization of BreakIterator because it is slow, // typically over 2000 ms. - if (bi == null) bi = BreakIterator.getWordInstance(new ULocale("th_TH")); - return bi; + // Using a holder class for safe init without a volatile-read. + if (bi == null) bi = WordBreakIteratorHolder.BI; + return (BreakIterator) bi.clone(); } ///CLOVER:OFF @@ -74,10 +73,11 @@ public void setBreakIterator(BreakIterator bi) { | (1<0 cased (UCaseProps.LOWER etc.) ==0 uncased <0 case-ignorable int type; @@ -108,8 +107,6 @@ protected synchronized void handleTransliterate(Replaceable text, iter.setLimit(offsets.limit); iter.setContextLimits(offsets.contextStart, offsets.contextLimit); - result.setLength(0); - // Walk through original string // If there is a case change, modify corresponding position in replaceable int delta; diff --git a/icu4j/main/translit/src/main/java/com/ibm/icu/text/UppercaseTransliterator.java b/icu4j/main/translit/src/main/java/com/ibm/icu/text/UppercaseTransliterator.java index cbfae3229216..305c46819ae1 100644 --- a/icu4j/main/translit/src/main/java/com/ibm/icu/text/UppercaseTransliterator.java +++ b/icu4j/main/translit/src/main/java/com/ibm/icu/text/UppercaseTransliterator.java @@ -39,8 +39,6 @@ public Transliterator getInstance(String ID) { private final ULocale locale; private final UCaseProps csp; - private ReplaceableContextIterator iter; - private StringBuilder result; private int caseLocale; /** @@ -50,8 +48,6 @@ public UppercaseTransliterator(ULocale loc) { super(_ID, null); locale = loc; csp=UCaseProps.INSTANCE; - iter=new ReplaceableContextIterator(); - result = new StringBuilder(); caseLocale = UCaseProps.getCaseLocale(locale); } @@ -59,7 +55,7 @@ public UppercaseTransliterator(ULocale loc) { * Implements {@link Transliterator#handleTransliterate}. */ @Override - protected synchronized void handleTransliterate(Replaceable text, + protected void handleTransliterate(Replaceable text, Position offsets, boolean isIncremental) { if(csp==null) { return; @@ -69,8 +65,10 @@ protected synchronized void handleTransliterate(Replaceable text, return; } + ReplaceableContextIterator iter = new ReplaceableContextIterator(); + StringBuilder result = new StringBuilder(); + iter.setText(text); - result.setLength(0); int c, delta; // Walk through original string diff --git a/icu4j/perf-tests/README.txt b/icu4j/perf-tests/README.txt index d26f3fdafa54..dc143cc32448 100644 --- a/icu4j/perf-tests/README.txt +++ b/icu4j/perf-tests/README.txt @@ -43,6 +43,9 @@ COLLATION TESTS The collation tests run only on the command line with tabular output: perl collationperf.pl |& tee collation_output.txt +JMH +Some performance tests run using OpenJDK JMH. Example invocation: + mvn clean package exec:java -pl perf-tests -Pjmh_benchmark OTHER COMMAND LINE TESTS Additional tests are run from the command line, each producing an HTML diff --git a/icu4j/perf-tests/pom.xml b/icu4j/perf-tests/pom.xml index a4debcade133..d6883778caf8 100644 --- a/icu4j/perf-tests/pom.xml +++ b/icu4j/perf-tests/pom.xml @@ -43,6 +43,73 @@ commons-cli ${commons-cli.version} + + org.openjdk.jmh + jmh-core + ${jmh.version} + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmh.version} + provided + + + + jmh_benchmark + + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.openjdk.jmh.generators.BenchmarkProcessor + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + build-jmh-classpath + + build-classpath + + + runtime + jmhClasspath + + + + + + org.codehaus.mojo + exec-maven-plugin + + org.openjdk.jmh.Main + + -f + 1 + -wi + 5 + -i + 10 + + + + java.class.path + ${project.build.outputDirectory}${path.separator}${jmhClasspath} + + + + + + + + + diff --git a/icu4j/perf-tests/src/main/java/com/ibm/icu/dev/test/perf/BreakTransliteratorPerf.java b/icu4j/perf-tests/src/main/java/com/ibm/icu/dev/test/perf/BreakTransliteratorPerf.java new file mode 100644 index 000000000000..699eedb47621 --- /dev/null +++ b/icu4j/perf-tests/src/main/java/com/ibm/icu/dev/test/perf/BreakTransliteratorPerf.java @@ -0,0 +1,30 @@ +// © 2025 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package com.ibm.icu.dev.test.perf; + +import java.util.concurrent.TimeUnit; + +import com.ibm.icu.text.BreakTransliteratorAccess; +import com.ibm.icu.text.Transliterator; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; + +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +public class BreakTransliteratorPerf { + + static final Transliterator TITLE = BreakTransliteratorAccess.newInstance(); + + @Benchmark + public String testShort() { + return TITLE.transliterate("Cat"); + } + + @Benchmark + public String testSentence() { + return TITLE.transliterate("The Quick Brown Fox jumped over the Lazy Dog"); + } + +} diff --git a/icu4j/perf-tests/src/main/java/com/ibm/icu/dev/test/perf/CaseFoldTransliteratorPerfTest.java b/icu4j/perf-tests/src/main/java/com/ibm/icu/dev/test/perf/CaseFoldTransliteratorPerfTest.java new file mode 100644 index 000000000000..d75fde1e58ac --- /dev/null +++ b/icu4j/perf-tests/src/main/java/com/ibm/icu/dev/test/perf/CaseFoldTransliteratorPerfTest.java @@ -0,0 +1,29 @@ +// © 2025 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package com.ibm.icu.dev.test.perf; + +import java.util.concurrent.TimeUnit; + +import com.ibm.icu.text.Transliterator; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; + +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +public class CaseFoldTransliteratorPerfTest { + + static final Transliterator CASE = Transliterator.getInstance("CaseFold"); + + @Benchmark + public String testShort() { + return CASE.transliterate("Cat"); + } + + @Benchmark + public String testSentence() { + return CASE.transliterate("The Quick Brown Fox Jumped Over The Lazy Dog"); + } + +} diff --git a/icu4j/perf-tests/src/main/java/com/ibm/icu/dev/test/perf/LowercaseTransliteratorPerf.java b/icu4j/perf-tests/src/main/java/com/ibm/icu/dev/test/perf/LowercaseTransliteratorPerf.java new file mode 100644 index 000000000000..a0acab6e663c --- /dev/null +++ b/icu4j/perf-tests/src/main/java/com/ibm/icu/dev/test/perf/LowercaseTransliteratorPerf.java @@ -0,0 +1,29 @@ +// © 2025 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package com.ibm.icu.dev.test.perf; + +import java.util.concurrent.TimeUnit; + +import com.ibm.icu.text.Transliterator; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; + +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +public class LowercaseTransliteratorPerf { + + static final Transliterator LOWER = Transliterator.getInstance("Lower"); + + @Benchmark + public String testShort() { + return LOWER.transliterate("Cat"); + } + + @Benchmark + public String testSentence() { + return LOWER.transliterate("The Quick Brown Fox Jumped Over The Lazy Dog"); + } + +} diff --git a/icu4j/perf-tests/src/main/java/com/ibm/icu/dev/test/perf/TitlecaseTransliteratorPerf.java b/icu4j/perf-tests/src/main/java/com/ibm/icu/dev/test/perf/TitlecaseTransliteratorPerf.java new file mode 100644 index 000000000000..071bc1e569e0 --- /dev/null +++ b/icu4j/perf-tests/src/main/java/com/ibm/icu/dev/test/perf/TitlecaseTransliteratorPerf.java @@ -0,0 +1,29 @@ +// © 2025 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package com.ibm.icu.dev.test.perf; + +import java.util.concurrent.TimeUnit; + +import com.ibm.icu.text.Transliterator; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; + +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +public class TitlecaseTransliteratorPerf { + + static final Transliterator TITLE = Transliterator.getInstance("Title"); + + @Benchmark + public String testShort() { + return TITLE.transliterate("CAT"); + } + + @Benchmark + public String testSentence() { + return TITLE.transliterate("the quick brown fox jumped over the lazy dog"); + } + +} diff --git a/icu4j/perf-tests/src/main/java/com/ibm/icu/dev/test/perf/UppercaseTransliteratorPerf.java b/icu4j/perf-tests/src/main/java/com/ibm/icu/dev/test/perf/UppercaseTransliteratorPerf.java new file mode 100644 index 000000000000..b98d6e6b6bd2 --- /dev/null +++ b/icu4j/perf-tests/src/main/java/com/ibm/icu/dev/test/perf/UppercaseTransliteratorPerf.java @@ -0,0 +1,29 @@ +// © 2025 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package com.ibm.icu.dev.test.perf; + +import java.util.concurrent.TimeUnit; + +import com.ibm.icu.text.Transliterator; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; + +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +public class UppercaseTransliteratorPerf { + + static final Transliterator UPPER = Transliterator.getInstance("Upper"); + + @Benchmark + public String testShort() { + return UPPER.transliterate("Cat"); + } + + @Benchmark + public String testSentence() { + return UPPER.transliterate("The Quick Brown Fox Jumped Over The Lazy Dog"); + } + +} diff --git a/icu4j/perf-tests/src/main/java/com/ibm/icu/text/BreakTransliteratorAccess.java b/icu4j/perf-tests/src/main/java/com/ibm/icu/text/BreakTransliteratorAccess.java new file mode 100644 index 000000000000..5afbe1e92bd1 --- /dev/null +++ b/icu4j/perf-tests/src/main/java/com/ibm/icu/text/BreakTransliteratorAccess.java @@ -0,0 +1,8 @@ +package com.ibm.icu.text; + +public class BreakTransliteratorAccess { + // Non-public access + public static Transliterator newInstance() { + return new BreakTransliterator("Any-Break", UnicodeSet.ALL_CODE_POINTS); + } +} diff --git a/icu4j/pom.xml b/icu4j/pom.xml index 10694b79de73..eb7a31e4a195 100644 --- a/icu4j/pom.xml +++ b/icu4j/pom.xml @@ -61,6 +61,7 @@ ${project.build.directory}/release_directory + 1.37 4.13.2 1.1.1 2.11.0