randomf
diff --git a/‎README.md
Lines changed: 3 additions & 3 deletions b/‎README.md
Lines changed: 3 additions & 3 deletions
diff --git a/‎RELEASE_NOTES.md
Lines changed: 3 additions & 3 deletions b/‎RELEASE_NOTES.md
Lines changed: 3 additions & 3 deletions
diff --git a/‎build.sbt
Lines changed: 9 additions & 1 deletion b/‎build.sbt
Lines changed: 9 additions & 1 deletion
diff --git a/‎python/sparktestingbase/sqltestcase.py
Lines changed: 1 addition & 0 deletions b/‎python/sparktestingbase/sqltestcase.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎python/sparktestingbase/test/simple_sql_test.py
Lines changed: 18 additions & 0 deletions b/‎python/sparktestingbase/test/simple_sql_test.py
Lines changed: 18 additions & 0 deletions
diff --git a/‎src/main/2.0/scala/com/holdenkarau/spark/testing/SharedMiniCluster.scala
Lines changed: 72 additions & 0 deletions b/‎src/main/2.0/scala/com/holdenkarau/spark/testing/SharedMiniCluster.scala
Lines changed: 72 additions & 0 deletions
diff --git a/‎src/main/2.2/scala/com/holdenkarau/spark/testing/StructuredStreamingBase.scala
Lines changed: 75 additions & 0 deletions b/‎src/main/2.2/scala/com/holdenkarau/spark/testing/StructuredStreamingBase.scala
Lines changed: 75 additions & 0 deletions
diff --git a/‎src/main/1.3/scala/com/holdenkarau/spark/testing/SharedMiniCluster.scala renamed to ‎src/main/pre-2.0/scala/com/holdenkarau/spark/testing/SharedMiniCluster.scala
Lines changed: 1 addition & 0 deletions b/‎src/main/1.3/scala/com/holdenkarau/spark/testing/SharedMiniCluster.scala renamed to ‎src/main/pre-2.0/scala/com/holdenkarau/spark/testing/SharedMiniCluster.scala
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/test/2.2/scala/com/holdenkarau/spark/testing/StructuredStreamingSampleTests.scala
Lines changed: 31 additions & 0 deletions b/‎src/test/2.2/scala/com/holdenkarau/spark/testing/StructuredStreamingSampleTests.scala
Lines changed: 31 additions & 0 deletions
@@ -12,10 +12,10 @@ This is not my beautiful code.
 
 ## How?
 
-So you include com.holdenkarau.spark-testing-base [spark_version]_0.7.2 and extend one of the classes and write some simple tests instead.  For example to include this in a project using Spark 2.2.0:
+So you include com.holdenkarau.spark-testing-base [spark_version]_0.7.4 and extend one of the classes and write some simple tests instead.  For example to include this in a project using Spark 2.2.0:
 
 ```scala
-"com.holdenkarau" %% "spark-testing-base" % "2.2.0_0.7.2" % "test"
+"com.holdenkarau" %% "spark-testing-base" % "2.2.0_0.7.4" % "test"
 ```
 
 or
@@ -24,7 +24,7 @@ or
 <dependency>
     <groupId>com.holdenkarau</groupId>
     <artifactId>spark-testing-base_2.11</artifactId>
-    <version>${spark.version}_0.7.2</version>
+    <version>${spark.version}_0.7.4</version>
     <scope>test</scope>
 </dependency>
 ```
 
@@ -4,17 +4,17 @@
  - Re-add Scala 2.10 support up to and including Spark 2.2.X series
  - Attempt to make it so that users doing SQL tests without Hive don't need the hive jars.
  - Don't reset the SparkSession provider when in reuse mode.
- - Add workaround for inaccessiable active context info in Spark 2.0
+ - Add workaround for inaccessible active context info in Spark 2.0
  - Upgrade to Hadoop 2.8.1 for mini cluster
  - Change build env after travis changes
 # 0.7.2
- - Add expiremental support to for reusing a SparkContext/Session accross multiple suites. For Spark 2.0+ only.
+ - Add experimental support to for reusing a SparkContext/Session across multiple suites. For Spark 2.0+ only.
 # 0.7.1
  - Upgrade mini cluster hadoop dependencies
  - Add support for Spark 2.2.0
  - YARNCluster now requires SPARK_HOME to be set so as to configure spark.yarn.jars (workaround for YARN bug from deprecated code in Spark 2.2).
 # 0.7
- - Add Python RDD comparisions
+ - Add Python RDD comparisons
  - Switch to JDK8 for Spark 2.1.1+
  - Add back Kafka tests
  - Make it easier to disable Hive support when running tests
 
@@ -123,7 +123,15 @@ unmanagedSourceDirectories in Compile  := {
 }
 
 unmanagedSourceDirectories in Test  := {
-  if (sparkVersion.value >= "2.0.0") Seq(
+  if (sparkVersion.value >= "2.2.0") Seq(
+    (sourceDirectory in Test)(_ / "2.2/scala"),
+    (sourceDirectory in Test)(_ / "2.0/scala"),
+    (sourceDirectory in Test)(_ / "1.6/scala"), (sourceDirectory in Test)(_ / "1.6/java"),
+    (sourceDirectory in Test)(_ / "1.4/scala"),
+    (sourceDirectory in Test)(_ / "kafka/scala"),
+    (sourceDirectory in Test)(_ / "1.3/scala"), (sourceDirectory in Test)(_ / "1.3/java")
+  ).join.value
+  else if (sparkVersion.value >= "2.0.0") Seq(
     (sourceDirectory in Test)(_ / "2.0/scala"),
     (sourceDirectory in Test)(_ / "1.6/scala"), (sourceDirectory in Test)(_ / "1.6/java"),
     (sourceDirectory in Test)(_ / "1.4/scala"),
 
@@ -66,6 +66,7 @@ def assertDataFrameEqual(self, expected, result, tol=0):
         try:
             expectedRDD = expected.rdd.cache()
             resultRDD = result.rdd.cache()
+            self.assertEqual(expectedRDD.count(), resultRDD.count())
 
             def zipWithIndex(rdd):
                 """Zip with index (idx, data)"""
 
@@ -18,13 +18,19 @@
 
 from datetime import datetime
 from pyspark.sql import Row
+from pyspark.sql.types import StructType
 from sparktestingbase.sqltestcase import SQLTestCase
 import unittest2
 
 
 class SimpleSQLTest(SQLTestCase):
     """A simple test."""
 
+    def test_empty_expected_equal(self):
+        allTypes = self.sc.parallelize([])
+        df = self.sqlCtx.createDataFrame(allTypes, StructType([]))
+        self.assertDataFrameEqual(df, df)
+
     def test_simple_expected_equal(self):
         allTypes = self.sc.parallelize([Row(
             i=1, s="string", d=1.0, l=1,
@@ -68,5 +74,17 @@ def test_dif_schemas_unequal(self):
         allTypes2 = self.sc.parallelize([Row(d="1.0")])
         self.assertDataFrameEqual(allTypes1.toDF(), allTypes2.toDF(), 0.0001)
 
+    @unittest2.expectedFailure
+    def test_empty_dataframe_unequal(self):
+        allTypes = self.sc.parallelize([Row(
+            i=1, s="string", d=1.001, l=1,
+            b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1),
+            time=datetime(2014, 8, 1, 14, 1, 5))])
+        empty = self.sc.parallelize([])
+        self.assertDataFrameEqual(
+            allTypes.toDF(),
+            self.sqlCtx.createDataFrame(empty, allTypes.toDF().schema), 0.1)
+
+
 if __name__ == "__main__":
     unittest2.main()
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.holdenkarau.spark.testing
+
+import org.apache.spark.{SparkConf, SparkContext, EvilSparkContext}
+import org.scalatest.{BeforeAndAfterAll, Suite}
+
+/**
+ * Shares an HDFS MiniCluster based `SparkContext` between all tests in a suite and
+ * closes it at the end. This requires that the env variable SPARK_HOME is set.
+ * Further more if this is used in Spark versions prior to 1.6.3,
+ * all Spark tests must run against the yarn mini cluster.
+ *
+ * (see https://issues.apache.org/jira/browse/SPARK-10812 for details).
+ */
+trait SharedMiniCluster extends BeforeAndAfterAll
+    with HDFSClusterLike
+    with YARNClusterLike
+    with SparkContextProvider{
+  self: Suite =>
+  @transient private var _sc: SparkContext = _
+
+  def sc: SparkContext = _sc
+
+  val master = "yarn-client"
+
+  override def beforeAll() {
+    // Try and do setup, and in-case we fail shutdown
+    try {
+      super.startHDFS()
+      super.startYARN()
+
+      // Stop the spark context if already running
+      EvilSparkContext.stopActiveSparkContext()
+      // Create the new context
+      val sparkConf = new SparkConf().setMaster(master).setAppName("test")
+      _sc = new SparkContext(sparkConf)
+      setup(_sc)
+    } catch {
+      case e: Throwable =>
+        super.shutdownYARN()
+        super.shutdownHDFS()
+        throw e
+    }
+    super.beforeAll()
+  }
+
+  override def afterAll() {
+    Option(sc).foreach(_.stop())
+    _sc = null
+
+    super.shutdownYARN()
+    super.shutdownHDFS()
+
+    super.afterAll()
+  }
+}
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.holdenkarau.spark.testing
+
+import org.apache.spark.sql._
+import org.apache.spark.sql.execution.streaming._
+
+import org.scalatest.Suite
+
+import scala.reflect.ClassTag
+
+/**
+ * Early Experimental Structured Streaming Base.
+ */
+trait StructuredStreamingBase extends DataFrameSuiteBase
+    with StructuredStreamingBaseLike { self: Suite =>
+  /**
+   * Test a simple streams end state
+   */
+  def testSimpleStreamEndState[T: Encoder, R: Encoder](
+    spark: SparkSession,
+    input: Seq[Seq[T]],
+    expected: Seq[R],
+    mode: String,
+    queryFunction: Dataset[T] => Dataset[R]) = {
+    val result = runSimpleStreamEndState(spark, input, mode, queryFunction)
+    assert(result === expected)
+  }
+}
+
+trait StructuredStreamingBaseLike extends SparkContextProvider
+    with TestSuiteLike with Serializable {
+  var count = 0
+  /**
+   * Run a simple streams end state
+   */
+  private[holdenkarau] def runSimpleStreamEndState[T: Encoder, R: Encoder](
+    spark: SparkSession,
+    input: Seq[Seq[T]],
+    mode: String,
+    queryFunction: Dataset[T] => Dataset[R]) = {
+    import spark.implicits._
+    implicit val sqlContext = spark.sqlContext
+    val inputStream = MemoryStream[T]
+    val transformed = queryFunction(inputStream.toDS())
+    count = count + 1
+    val query = transformed.writeStream.
+      format("memory").
+      outputMode(mode).
+      queryName(queryName).
+      start()
+    input.foreach(batch => inputStream.addData(batch))
+    // Block until all processed
+    query.processAllAvailable()
+    val table = spark.table(queryName).as[R]
+    val resultRows = table.collect()
+    resultRows.toSeq
+  }
+}
@@ -45,6 +45,7 @@ trait SharedMiniCluster extends BeforeAndAfterAll
       super.startHDFS()
       super.startYARN()
 
+      // Create the new context
       val sparkConf = new SparkConf().setMaster(master).setAppName("test")
       _sc = new SparkContext(sparkConf)
       setup(_sc)
 
@@ -0,0 +1,31 @@
+package com.holdenkarau.spark.testing
+
+import org.apache.spark.sql._
+
+import org.scalatest.FunSuite
+
+class StructuredStreamingTests
+    extends FunSuite with SharedSparkContext with StructuredStreamingBase {
+  // re-use the spark context
+  override implicit def reuseContextIfPossible: Boolean = true
+
+  test("add 3") {
+    import<
9DDD
/span> spark.implicits._
+    val input = List(List(1), List(2, 3))
+    val expected = List(4, 5, 6)
+    def compute(input: Dataset[Int]): Dataset[Int] = {
+      input.map(elem => elem + 3)
+    }
+    testSimpleStreamEndState(spark, input, expected, "append", compute)
+  }
+
+  test("stringify") {
+    import spark.implicits._
+    val input = List(List(1), List(2, 3))
+    val expected = List("1", "2", "3")
+    def compute(input: Dataset[Int]): Dataset[String] = {
+      input.map(elem => elem.toString)
+    }
+    testSimpleStreamEndState(spark, input, expected, "append", compute)
+  }
+}