convexsetgithub
diff --git a/‎.gitignore
Lines changed: 10 additions & 0 deletions b/‎.gitignore
Lines changed: 10 additions & 0 deletions
diff --git a/‎LOCO/run-LOCO-local.sh
Lines changed: 10 additions & 12 deletions b/‎LOCO/run-LOCO-local.sh
Lines changed: 10 additions & 12 deletions
diff --git a/‎LOCO/src/main/scala/driver.scala
Lines changed: 18 additions & 13 deletions b/‎LOCO/src/main/scala/driver.scala
Lines changed: 18 additions & 13 deletions
diff --git a/‎preprocessingUtils/run-preprocessing-local.sh
Lines changed: 7 additions & 6 deletions b/‎preprocessingUtils/run-preprocessing-local.sh
Lines changed: 7 additions & 6 deletions
diff --git a/‎preprocessingUtils/src/main/scala/main.scala
Lines changed: 7 additions & 7 deletions b/‎preprocessingUtils/src/main/scala/main.scala
Lines changed: 7 additions & 7 deletions
@@ -16,3 +16,13 @@ preprocessingUtils/.idea
 
 temp
 data/climate-serialized
+data/dogs_vs_cats-serialized
+data/dogs_vs_cats_small_test-colwise
+data/dogs_vs_cats_small_test-rowwiseLabeledPoint
+data/dogs_vs_cats_small_train-colwise
+data/dogs_vs_cats_small_train-rowwiseLabeledPoint
+dogs_vs_cats_small_train-responseTrain.txt
+dogs_vs_cats_small_train-nFeats.txt
+dogs_vs_cats_small_test-responseTest.txt
+climate-serialized.zip
+dogs_vs_cats-serialized.zip
@@ -4,21 +4,19 @@ $SPARK_HOME/bin/spark-submit \
 --class "LOCO.driver" \
 --master local[4] \
 --driver-memory 1G \
-target/scala-2.10/LOCO-assembly-0.1.5.jar \
+target/scala-2.10/LOCO-assembly-0.2.0.jar \
 --classification=false \
---optimizer=SDCA \
 --numIterations=5000 \
---dataFormat=text \
---textDataFormat=spaces \
---separateTrainTestFiles=true \
---trainingDatafile="../data/climate_train.txt" \
---testDatafile="../data/climate_test.txt" \
---center=true \
+--trainingDatafile="../data/climate-serialized/climate-train-colwise/" \
+--testDatafile="../data/climate-serialized/climate-test-colwise/" \
+--responsePathTrain="../data/climate-serialized/climate-responseTrain.txt" \
+--responsePathTest="../data/climate-serialized/climate-responseTest.txt" \
+--nFeats="../data/climate-serialized/climate-nFeats.txt" \
 --projection=SDCT \
---concatenate=true \
---CVKind=none \
---lambda=70 \
---nFeatsProj=260 \
+--concatenate=false \
+--CV=false \
+--lambda=75 \
+--nFeatsProj=389 \
 --nPartitions=4 \
 --nExecutors=1
 "$@"
@@ -50,18 +50,23 @@ object driver {
     // training input path
     val trainingDatafile =
-      options.getOrElse("trainingDatafile", "../data/climate-serialized/climate-train-colwise/")
+      options.getOrElse("trainingDatafile", "../data/dogs_vs_cats-serialized/dogs_vs_cats_small_train-colwise/")
+//      options.getOrElse("trainingDatafile", "../data/climate-serialized/climate-train-colwise/")
     // test input path
     val testDatafile =
-        options.getOrElse("testDatafile", "../data/climate-serialized/climate-test-colwise/")
+        options.getOrElse("testDatafile", "../data/dogs_vs_cats-serialized/dogs_vs_cats_small_test-colwise/")
+//        options.getOrElse("testDatafile", "../data/climate-serialized/climate-test-colwise/")
     // response vector - training
     val responsePathTrain =
-    options.getOrElse("responsePathTrain", "../data/climate-serialized/climate-responseTrain.txt")
+      options.getOrElse("responsePathTrain", "../data/dogs_vs_cats-serialized/dogs_vs_cats_small_train-responseTrain.txt")
+//      options.getOrElse("responsePathTrain", "../data/climate-serialized/climate-responseTrain.txt")
     // response vector - test
     val responsePathTest =
-      options.getOrElse("responsePathTest", "../data/climate-serialized/climate-responseTest.txt")
+      options.getOrElse("responsePathTest", "../data/dogs_vs_cats-serialized/dogs_vs_cats_small_test-responseTest.txt")
+//      options.getOrElse("responsePathTest", "../data/climate-serialized/climate-responseTest.txt")
     // number of features
-    val nFeatsPath = options.getOrElse("nFeats", "../data/climate-serialized/climate-nFeats.txt")
+    val nFeatsPath = options.getOrElse("nFeats", "../data/dogs_vs_cats-serialized/dogs_vs_cats_small_train-nFeats.txt")
+//      options.getOrElse("nFeats", "../data/climate-serialized/climate-nFeats.txt")
     // random seed
     val randomSeed = options.getOrElse("seed", "3").toInt
     // shall sparse data structures be used?
@@ -70,9 +75,9 @@ object driver {
     // 2) specify algorithm, loss function, and optimizer (if applicable)
 
     // specify whether classification or ridge regression shall be used
-    val classification = options.getOrElse("classification", "false").toBoolean
+    val classification = options.getOrElse("classification", "true").toBoolean
     // number of iterations used in SDCA
-    val numIterations = options.getOrElse("numIterations", "20000").toInt
+    val numIterations = options.getOrElse("numIterations", "5000").toInt
     // set duality gap as convergence criterion
     val stoppingDualityGap = options.getOrElse("stoppingDualityGap", "0.01").toDouble
     // specify whether duality gap as convergence criterion shall be used
@@ -83,23 +88,23 @@ object driver {
     // specify projection (sparse or SDCT)
     val projection = options.getOrElse("projection", "SDCT")
     // specify projection dimension
-    val nFeatsProj = options.getOrElse("nFeatsProj", "389").toInt
+    val nFeatsProj = options.getOrElse("nFeatsProj", "200").toInt
     // concatenate or add
     val concatenate = options.getOrElse("concatenate", "false").toBoolean
     // cross validation
     val CV = options.getOrElse("CV", "false").toBoolean
     // k for k-fold CV
-    val kfold = options.getOrElse("kfold", "2").toInt
+    val kfold = options.getOrElse("kfold", "5").toInt
     // regularization parameter sequence start used in CV
-    val lambdaSeqFrom = options.getOrElse("lambdaSeqFrom", "1").toDouble
+    val lambdaSeqFrom = options.getOrElse("lambdaSeqFrom", "0.1").toDouble
     // regularization parameter sequence end used in CV
-    val lambdaSeqTo = options.getOrElse("lambdaSeqTo", "10").toDouble
+    val lambdaSeqTo = options.getOrElse("lambdaSeqTo", "5").toDouble
     // regularization parameter sequence step size used in CV
-    val lambdaSeqBy = options.getOrElse("lambdaSeqBy", "1").toDouble
+    val lambdaSeqBy = options.getOrElse("lambdaSeqBy", ".1").toDouble
     // create lambda sequence
     val lambdaSeq = lambdaSeqFrom to lambdaSeqTo by lambdaSeqBy
     // regularization parameter to be used if CVKind == "none"
-    val lambda = options.getOrElse("lambda", "95").toDouble
+    val lambda = options.getOrElse("lambda", "4.4").toDouble
 
     // print out inputs
     println("\nSpecify input and output options: ")
 
@@ -3,18 +3,19 @@
 $SPARK_HOME/bin/spark-submit \
 --class "preprocessingUtils.main" \
 --master local[4] \
-target/scala-2.10/preprocess-assembly-0.1.jar \
+target/scala-2.10/preprocess-assembly-0.2.jar \
 --dataFormat=text \
+--sparse=false \
 --textDataFormat=spaces \
 --separateTrainTestFiles=false \
+--proportionTest=0.2 \
 --dataFile="../data/dogs_vs_cats_n5000.txt" \
 --centerFeatures=true \
 --scaleFeatures=true \
 --centerResponse=false \
 --scaleResponse=false \
---outputTrainFileName="../data/dogs_vs_cats_n5000_train_" \
---outputTestFileName="../data/dogs_vs_cats_n5000_test_" \
---outputClass=DataPoint \
---twoOutputClasses=true \
---secondOutputClass=LabeledPoint
+--outputTrainFileName="../data/dogs_vs_cats_small_train" \
+--outputTestFileName="../data/dogs_vs_cats_small_test" \
+--outputClass=LabeledPoint \
+--seed=1
 "$@"
@@ -36,9 +36,9 @@ object main {
     // "libsvm", "spaces" or "comma"
     val textDataFormat = options.getOrElse("textDataFormat", "spaces")
     // input path
-    val dataFile = options.getOrElse("dataFile", "../data/E2006")
+    val dataFile = options.getOrElse("dataFile", "../data/dogs_vs_cats_n5000.txt")
     // provide training and test set as separate files?
-    val separateTrainTestFiles = options.getOrElse("separateTrainTestFiles", "true").toBoolean
+    val separateTrainTestFiles = options.getOrElse("separateTrainTestFiles", "false").toBoolean
     // training input path
     val trainingDatafile =
       options.getOrElse("trainingDatafile", "../data/climate_train.txt")
@@ -53,21 +53,21 @@ object main {
     val timestamp = System.currentTimeMillis.toString
     // file name for training file output
     val outputTrainFileName =
-      options.getOrElse("outputTrainFileName", "output/outTrain" + timestamp)
+      options.getOrElse("outputTrainFileName", "output/dogs_vs_cats_small_train")
     // file name for test file output
-    val outputTestFileName = options.getOrElse("outputTestFileName", "output/outTest" + timestamp)
+    val outputTestFileName = options.getOrElse("outputTestFileName", "output/dogs_vs_cats_small_test")
     // specify class of output: DataPoint, LabeledPoint or DoubleArray
-    val outputClass = options.getOrElse("outputClass", "DataPoint")
+    val outputClass = options.getOrElse("outputClass", "LabeledPoint")
     // if two different output formats are desired, set to true
     val twoOutputClasses = options.getOrElse("twoOutputClasses", "false").toBoolean
     // specify second output format
     val secondOutputClass = options.getOrElse("secondOutputClass", "LabeledPoint")
     // center the features to have mean zero
-    val centerFeatures = options.getOrElse("centerFeatures", "false").toBoolean
+    val centerFeatures = options.getOrElse("centerFeatures", "true").toBoolean
     // center the response to have mean zero
     val centerResponse = options.getOrElse("centerResponse", "false").toBoolean
     // scale the features to have unit variance
-    val scaleFeatures = options.getOrElse("scaleFeatures", "false").toBoolean
+    val scaleFeatures = options.getOrElse("scaleFeatures", "true").toBoolean
     // scale the response to have unit variance
     val scaleResponse = options.getOrElse("scaleResponse", "false").toBoolean