Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
package predict
import org.rogach.scallop._
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
import org.apache.log4j.Logger
import org.apache.log4j.Level
import scala.math
import shared.predictions._
class Conf(arguments: Seq[String]) extends ScallopConf(arguments) {
val train = opt[String](required = true)
val test = opt[String](required = true)
val separator = opt[String](default=Some("\t"))
val num_measurements = opt[Int](default=Some(0))
val json = opt[String]()
verify()
}
object Baseline extends App {
// Remove these lines if encountering/debugging Spark
Logger.getLogger("org").setLevel(Level.OFF)
Logger.getLogger("akka").setLevel(Level.OFF)
val spark = SparkSession.builder()
.master("local[1]")
.getOrCreate()
spark.sparkContext.setLogLevel("ERROR")
println("")
println("******************************************************")
var conf = new Conf(args)
// For these questions, data is collected in a scala Array
// to not depend on Spark
println("Loading training data from: " + conf.train())
val train = load(spark, conf.train(), conf.separator()).collect()
println("Loading test data from: " + conf.test())
val test = load(spark, conf.test(), conf.separator()).collect()
val measurements = (1 to conf.num_measurements()).map(x => timingInMs(() => {
Thread.sleep(1000) // Do everything here from train and test
42 // Output answer as last value
}))
val timings = measurements.map(t => t._2) // Retrieve the timing measurements
// Save answers as JSON
def printToFile(content: String,
location: String = "./answers.json") =
Some(new java.io.PrintWriter(location)).foreach{
f => try{
f.write(content)
} finally{ f.close }
}
conf.json.toOption match {
case None => ;
case Some(jsonFile) => {
var answers = ujson.Obj(
"Meta" -> ujson.Obj(
"1.Train" -> ujson.Str(conf.train()),
"2.Test" -> ujson.Str(conf.test()),
"3.Measurements" -> ujson.Num(conf.num_measurements())
),
"B.1" -> ujson.Obj(
"1.GlobalAvg" -> ujson.Num(0.0), // Datatype of answer: Double
"2.User1Avg" -> ujson.Num(0.0), // Datatype of answer: Double
"3.Item1Avg" -> ujson.Num(0.0), // Datatype of answer: Double
"4.Item1AvgDev" -> ujson.Num(0.0), // Datatype of answer: Double
"5.PredUser1Item1" -> ujson.Num(0.0) // Datatype of answer: Double
),
"B.2" -> ujson.Obj(
"1.GlobalAvgMAE" -> ujson.Num(0.0), // Datatype of answer: Double
"2.UserAvgMAE" -> ujson.Num(0.0), // Datatype of answer: Double
"3.ItemAvgMAE" -> ujson.Num(0.0), // Datatype of answer: Double
"4.BaselineMAE" -> ujson.Num(0.0) // Datatype of answer: Double
),
"B.3" -> ujson.Obj(
"1.GlobalAvg" -> ujson.Obj(
"average (ms)" -> ujson.Num(mean(timings)), // Datatype of answer: Double
"stddev (ms)" -> ujson.Num(std(timings)) // Datatype of answer: Double
),
"2.UserAvg" -> ujson.Obj(
"average (ms)" -> ujson.Num(mean(timings)), // Datatype of answer: Double
"stddev (ms)" -> ujson.Num(std(timings)) // Datatype of answer: Double
),
"3.ItemAvg" -> ujson.Obj(
"average (ms)" -> ujson.Num(mean(timings)), // Datatype of answer: Double
"stddev (ms)" -> ujson.Num(std(timings)) // Datatype of answer: Double
),
"4.Baseline" -> ujson.Obj(
"average (ms)" -> ujson.Num(mean(timings)), // Datatype of answer: Double
"stddev (ms)" -> ujson.Num(std(timings)) // Datatype of answer: Double
)
)
)
val json = ujson.write(answers, 4)
println(json)
println("Saving answers in: " + jsonFile)
printToFile(json.toString, jsonFile)
}
}
println("")
spark.close()
}