8000 updated scripts to run locally · convexsetgithub/loco-lib@9ca8cd4 · GitHub
[go: up one dir, main page]

10000 Skip to content

Commit 9ca8cd4

Browse files
updated scripts to run locally
1 parent 465eeb0 commit 9ca8cd4

File tree

5 files changed

+52
-38
lines changed

5 files changed

+52
-38
lines changed

.gitignore

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,13 @@ preprocessingUtils/.idea
1616

1717
temp
1818
data/climate-serialized
19+
data/dogs_vs_cats-serialized
20+
data/dogs_vs_cats_small_test-colwise
21+
data/dogs_vs_cats_small_test-rowwiseLabeledPoint
22+
data/dogs_vs_cats_small_train-colwise
23+
data/dogs_vs_cats_small_train-rowwiseLabeledPoint
24+
dogs_vs_cats_small_train-responseTrain.txt
25+
dogs_vs_cats_small_train-nFeats.txt
26+
dogs_vs_cats_small_test-responseTest.txt
27+
climate-serialized.zip
28+
dogs_vs_cats-serialized.zip

LOCO/run-LOCO-local.sh

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,19 @@ $SPARK_HOME/bin/spark-submit \
44
--class "LOCO.driver" \
55
--master local[4] \
66
--driver-memory 1G \
7-
target/scala-2.10/LOCO-assembly-0.1.5.jar \
7+
target/scala-2.10/LOCO-assembly-0.2.0.jar \
88
--classification=false \
9-
--optimizer=SDCA \
109
--numIterations=5000 \
11-
--dataFormat=text \
12-
--textDataFormat=spaces \
13-
--separateTrainTestFiles=true \
14-
--trainingDatafile="../data/climate_train.txt" \
15-
--testDatafile="../data/climate_test.txt" \
16-
--center=true \
10+
--trainingDatafile="../data/climate-serialized/climate-train-colwise/" \
11+
--testDatafile="../data/climate-serialized/climate-test-colwise/" \
12+
--responsePathTrain="../data/climate-serialized/climate-responseTrain.txt" \
13+
--responsePathTest="../data/climate-serialized/climate-responseTest.txt" \
14+
--nFeats="../data/climate-serialized/climate-nFeats.txt" \
1715
--projection=SDCT \
18-
--concatenate=true \
19-
--CVKind=none \
20-
--lambda=70 \
21-
--nFeatsProj=260 \
16+
--concatenate=false \
17+
--CV=false \
18+
--lambda=75 \
19+
--nFeatsProj=389 \
2220
--nPartitions=4 \
2321
--nExecutors=1
2422
"$@"

LOCO/src/main/scala/driver.scala

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -50,18 +50,23 @@ object driver {
5050
val nExecutors = options.getOrElse("nExecutors","1").toInt
5151
// training input path
5252
val trainingDatafile =
53-
options.getOrElse("trainingDatafile", "../data/climate-serialized/climate-train-colwise/")
53+
options.getOrElse("trainingDatafile", "../data/dogs_vs_cats-serialized/dogs_vs_cats_small_train-colwise/")
54+
// options.getOrElse("trainingDatafile", "../data/climate-serialized/climate-train-colwise/")
5455
// test input path
5556
val testDatafile =
56-
options.getOrElse("testDatafile", "../data/climate-serialized/climate-test-colwise/")
57+
options.getOrElse("testDatafile", "../data/dogs_vs_cats-serialized/dogs_vs_cats_small_test-colwise/")
58+
// options.getOrElse("testDatafile", "../data/climate-serialized/climate-test-colwise/")
5759
// response vector - training
5860
val responsePathTrain =
59-
options.getOrElse("responsePathTrain", "../data/climate-serialized/climate-responseTrain.txt")
61+
options.getOrElse("responsePathTrain", "../data/dogs_vs_cats-serialized/dogs_vs_cats_small_train-responseTrain.txt")
62+
// options.getOrElse("responsePathTrain", "../data/climate-serialized/climate-responseTrain.txt")
6063
// response vector - test
6164
val responsePathTest =
62-
options.getOrElse("responsePathTest", "../data/climate-serialized/climate-responseTest.txt")
65+
options.getOrElse("responsePathTest", "../data/dogs_vs_cats-serialized/dogs_vs_cats_small_test-responseTest.txt")
66+
// options.getOrElse("responsePathTest", "../data/climate-serialized/climate-responseTest.txt")
6367
// number of features
64-
val nFeatsPath = options.getOrElse("nFeats", "../data/climate-serialized/climate-nFeats.txt")
68+
val nFeatsPath = options.getOrElse("nFeats", "../data/dogs_vs_cats-serialized/dogs_vs_cats_small_train-nFeats.txt")
69+
// options.getOrElse("nFeats", "../data/climate-serialized/climate-nFeats.txt")
6570
// random seed
6671
val randomSeed = options.getOrElse("seed", "3").toInt
6772
// shall sparse data structures be used?
@@ -70,9 +75,9 @@ object driver {
7075
// 2) specify algorithm, loss function, and optimizer (if applicable)
7176

7277
// specify whether classification or ridge regression shall be used
73-
val classification = options.getOrElse("classification", "false").toBoolean
78+
val classification = options.getOrElse("classification", "true").toBoolean
7479
// number of iterations used in SDCA
75-
val numIterations = options.getOrElse("numIterations", "20000").toInt
80+
val numIterations = options.getOrElse("numIterations", "5000").toInt
7681
// set duality gap as convergence criterion
7782
val stoppingDualityGap = options.getOrElse("stoppingDualityGap", "0.01").toDouble
7883
// specify whether duality gap as convergence criterion shall be used
@@ -83,23 +88,23 @@ object driver {
8388
// specify projection (sparse or SDCT)
8489
val projection = options.getOrElse("projection", "SDCT")
8590
// specify projection dimension
86-
val nFeatsProj = options.getOrElse("nFeatsProj", "389").toInt
91+
val nFeatsProj = options.getOrElse("nFeatsProj", "200").toInt
8792
// concatenate or add
8893
val concatenate = options.getOrElse("concatenate", "false").toBoolean
8994
// cross validation
9095
val CV = options.getOrElse("CV", "false").toBoolean
9196
// k for k-fold CV
92-
val kfold = options.getOrElse("kfold", "2").toInt
97+
val kfold = options.getOrElse("kfold", "5").toInt
9398
// regularization parameter sequence start used in CV
94-
val lambdaSeqFrom = options.getOrElse("lambdaSeqFrom", "1").toDouble
99+
val lambdaSeqFrom = options.getOrElse("lambdaSeqFrom", "0.1").toDouble
95100
// regularization parameter sequence end used in CV
96-
val lambdaSeqTo = options.getOrElse("lambdaSeqTo", "10").toDouble
101+
val lambdaSeqTo = options.getOrElse("lambdaSeqTo", "5").toDouble
97102
// regularization parameter sequence step size used in CV
98-
val lambdaSeqBy = options.getOrElse("lambdaSeqBy", "1").toDouble
103+
val lambdaSeqBy = options.getOrElse("lambdaSeqBy", ".1").toDouble
99104
// create lambda sequence
100105
val lambdaSeq = lambdaSeqFrom to lambdaSeqTo by lambdaSeqBy
101106
// regularization parameter to be used if CVKind == "none"
102-
val lambda = options.getOrElse("lambda", "95").toDouble
107+
val lambda = options.getOrElse("lambda", "4.4").toDouble
103108

104109
// print out inputs
105110
println("\nSpecify input and output options: ")

preprocessingUtils/run-preprocessing-local.sh

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,19 @@
33
$SPARK_HOME/bin/spark-submit \
44
--class "preprocessingUtils.main" \
55
--master local[4] \
6-
target/scala-2.10/preprocess-assembly-0.1.jar \
6+
target/scala-2.10/preprocess-assembly-0.2.jar \
77
--dataFormat=text \
8+
--sparse=false \
89
--textDataFormat=spaces \
910
--separateTrainTestFiles=false \
11+
--proportionTest=0.2 \
1012
--dataFile="../data/dogs_vs_cats_n5000.txt" \
1113
--centerFeatures=true \
1214
--scaleFeatures=true \
1315
--centerResponse=false \
1416
--scaleResponse=false \
15-
--outputTrainFileName="../data/dogs_vs_cats_n5000_train_" \
16-
--outputTestFileName="../data/dogs_vs_cats_n5000_test_" \
17-
--outputClass=DataPoint \
18-
--twoOutputClasses=true \
19-
--secondOutputClass=LabeledPoint
17+
--outputTrainFileName="../data/dogs_vs_cats_small_train" \
18+
--outputTestFileName="../data/dogs_vs_cats_small_test" \
19+
--outputClass=LabeledPoint \
20+
--seed=1
2021
"$@"

preprocessingUtils/src/main/scala/main.scala

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@ object main {
3636
// "libsvm", "spaces" or "comma"
3737
val textDataFormat = options.getOrElse("textDataFormat", "spaces")
3838
// input path
39-
val dataFile = options.getOrElse("dataFile", "../data/E2006")
39+
val dataFile = options.getOrElse("dataFile", "../data/dogs_vs_cats_n5000.txt")
4040
// provide training and test set as separate files?
41-
val separateTrainTestFiles = options.getOrElse("separateTrainTestFiles", "true").toBoolean
41+
val separateTrainTestFiles = options.getOrElse("separateTrainTestFiles", "false").toBoolean
4242
// training input path
4343
val trainingDatafile =
4444
options.getOrElse("trainingDatafile", "../data/climate_train.txt")
@@ -53,21 +53,21 @@ object main {
5353
val timestamp = System.currentTimeMillis.toString
5454
// file name for training file output
5555
val outputTrainFileName =
56-
options.getOrElse("outputTrainFileName", "output/outTrain" + timestamp)
56+
options.getOrElse("outputTrainFileName", "output/dogs_vs_cats_small_train")
5757
// file name for test file output
58-
val outputTestFileName = options.getOrElse("outputTestFileName", "output/outTest" + timestamp)
58+
val outputTestFileName = options.getOrElse("outputTestFileName", "output/dogs_vs_cats_small_test")
5959
// specify class of output: DataPoint, LabeledPoint or DoubleArray
60-
val outputClass = options.getOrElse("outputClass", "DataPoint")
60+
val outputClass = options.getOrElse("outputClass", "LabeledPoint")
6161
// if two different output formats are desired, set to true
6262
val twoOutputClasses = options.getOrElse("twoOutputClasses", "false").toBoolean
6363
// specify second output format
6464
val secondOutputClass = options.getOrElse("secondOutputClass", "LabeledPoint")
6565
// center the features to have mean zero
66-
val centerFeatures = options.getOrElse("centerFeatures", "false").toBoolean
66+
val centerFeatures = options.getOrElse("centerFeatures", "true").toBoolean
6767
// center the response to have mean zero
6868
val centerResponse = options.getOrElse("centerResponse", "false").toBoolean
6969
// scale the features to have unit variance
70-
val scaleFeatures = options.getOrElse("scaleFeatures", "false").toBoolean
70+
val scaleFeatures = options.getOrElse("scaleFeatures", "true").toBoolean
7171
// scale the response to have unit variance
7272
val scaleResponse = options.getOrElse("scaleResponse", "false").toBoolean
7373

0 commit comments

Comments
 (0)
0