8000 complete · CJavaScala/native2ascii@fec3e80 · GitHub
[go: up one dir, main page]

Skip to content

Commit fec3e80

Browse files
author
Peng.Li
committed
complete
0 parents  commit fec3e80

File tree

7 files changed

+121
-0
lines changed

7 files changed

+121
-0
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
.idea/
2+
target/

build.sbt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 10000 @@
1+
name := "native2ascii"
2+
3+
version := "1.0"
4+
5+
scalaVersion := "2.11.7"
6+
7+
libraryDependencies ++= Seq(
8+
"org.scalatest" %% "scalatest" % "2.2.5",
9+
"org.scalacheck" %% "scalacheck" % "1.12.5"
10+
)

project/build.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
sbt.version = 0.13.8

project/plugins.sbt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
logLevel := Level.Warn

src/main/scala/centaur/AAA.scala

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
package centaur
2+
3+
object AAA extends App {
4+
5+
println("\\usdf".split("\\\\u").toList)
6+
7+
8+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package centaur
2+
3+
import java.nio.charset.Charset
4+
5+
object Native2Ascii {
6+
7+
def native2ascii(native: CharSequence): CharSequence = {
8+
val sss = native.toString.map {
9+
case c if isAscii(c) => c.toString
10+
case c => toUnicode(c)
11+
}.mkString
12+
sss
13+
}
14+
15+
def ascii2native(ascii: CharSequence): CharSequence = {
16+
val Array(head, tail@_*) = ascii.toString.split( """\\u""")
17+
head + tail.flatMap {
18+
case str if str.length >= 4 =>
19+
val (u, normal) = str.splitAt(4)
20+
Seq(Integer.parseInt(u, 16).toChar.toString, normal)
21+
case str => Seq(str)
22+
}.mkString
23+
}
24+
25+
26+
def isAscii(char: Char): Boolean = {
27+
Charset.forName("US-ASCII").newEncoder().canEncode(char.toString)
28+
}
29+
30+
private def toUnicode(char: Char): String = {
31+
"\\u" + Integer.toHexString(char.toInt | 0x10000).substring(1)
32+
}
33+
34+
}
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
package centaur
2+
3+
import org.scalacheck.{Arbitrary, Gen}
4+
import org.scalatest.prop.PropertyChecks
5+
import org.scalatest.{FunSuite, Matchers}
6+
7+
class Native2AsciiSpec extends FunSuite with Matchers with PropertyChecks {
8+
import Native2Ascii._
9+
10+
case class AsciiChar(char: Char)
11+
case class NonAsciiChar(char: Char)
12+
13+
implicit val asciiCharArbitrary: Arbitrary[AsciiChar] = Arbitrary(
14+
Gen.choose(32, 127).map(_.toChar).map(AsciiChar)
15+
)
16+
17+
implicit val nonAsciiCharArbitrary: Arbitrary[NonAsciiChar] = Arbitrary(
18+
Arbitrary.arbitrary[Char].filter(c => !isAscii(c)).map(NonAsciiChar)
19+
)
20+
21+
test("native2ascii should convert ascii & non-ascii chars correctly") {
22+
assert(native2ascii("").toString == "")
23+
assert(native2ascii("路·").toString == "\\u8def\\u00b7")
24+
assert(native2ascii("中abc").toString == "\\u4e2dabc")
25+
assert(native2ascii("声明:此为贴吧助手插件的功能,不是百度贴吧原有功能~。设置称呼后,在帖子页面,您设置的称呼将会自动显示在自己的头像上。").toString ==
26+
"\\u58f0\\u660e\\uff1a\\u6b64\\u4e3a\\u8d34\\u5427\\u52a9\\u624b\\u63d2\\u4ef6\\u7684\\u529f\\u80fd\\uff0c\\u4e0d\\u662f\\u767e\\u5ea6\\u8d34\\u5427\\u539f\\u6709\\u529f\\u80fd\\uff5e\\u3002\\u8bbe\\u7f6e\\u79f0\\u547c\\u540e\\uff0c\\u5728\\u5e16\\u5b50\\u9875\\u9762\\uff0c\\u60a8\\u8bbe\\u7f6e\\u7684\\u79f0\\u547c\\u5c06\\u4f1a\\u81ea\\u52a8\\u663e\\u793a\\u5728\\u81ea\\u5df1\\u7684\\u5934\\u50cf\\u4e0a\\u3002")
27+
}
28+
29+
test("ascii2native should convert escaped strings to original string correctly") {
30+
assert(ascii2native("\\u4e2dabc").toString == "中abc")
31+
assert(ascii2native("\\u8def\\u00b7").toString == "路·")
32+
assert(ascii2native("\\u58f0\\u660e\\uff1a\\u6b64\\u4e3a\\u8d34\\u5427\\u52a9\\u624b\\u63d2\\u4ef6\\u7684\\u529f\\u80fd\\uff0c\\u4e0d\\u662f\\u767e\\u5ea6\\u8d34\\u5427\\u539f\\u6709\\u529f\\u80fd\\uff5e\\u3002\\u8bbe\\u7f6e\\u79f0\\u547c\\u540e\\uff0c\\u5728\\u5e16\\u5b50\\u9875\\u9762\\uff0c\\u60a8\\u8bbe\\u7f6e\\u7684\\u79f0\\u547c\\u5c06\\u4f1a\\u81ea\\u52a8\\u663e\\u793a\\u5728\\u81ea\\u5df1\\u7684\\u5934\\u50cf\\u4e0a\\u3002").toString == "声明:此为贴吧助手插件的功能,不是百度贴吧原有功能~。设置称呼后,在帖子页面,您设置的称呼将会自动显示在自己的头像上。")
33+
}
34+
35+
test("ascii string should remain unchanged when native2ascii") {
36+
forAll { chars: List[AsciiChar] =>
37+
val str = new String(chars.map(_.char).toArray)
38+
native2ascii(str) shouldEqual str
39+
}
40+
}
41+
42+
test("ascii string without `\\u` should remain unchanged when ascii2native") {
43+
forAll { chars: List[AsciiChar] =>
44+
val str = new String(chars.map(_.char).toArray)
45+
whenever(!str.contains("\\u")) {
46+
ascii2native(str) shouldBe str
47+
}
48+
}
49+
}
50+
51+
test("Non-iso8859_1 string should be converted") {
52+
forAll { char: NonAsciiChar =>
53+
val str = char.char.toString
54+
native2ascii(str) shouldNot be(str)
55+
}
56+
}
57+
58+
test("string should be converted forward and backward") {
59+
forAll { str: String =>
60+
ascii2native(native2ascii(str)) shouldBe str
61+
}
62+
}
63+
64+
65+
}

0 commit comments

Comments
 (0)
0