Skip to content
Snippets Groups Projects
Commit b79b8529 authored by Erick Lavoie's avatar Erick Lavoie
Browse files

Updated README

parent 2a82ef7f
No related branches found
No related tags found
No related merge requests found
......@@ -46,12 +46,18 @@ Do include your own ratings in your final submission so we can check your answer
## Compute predictions
````
> sbt "runMain predict.Predictor --train data/ml-100k/u1.base --test data/ml-100k/u1.test --json answers.json"
> sbt "runMain stats.Analyzer --data data/ml-100k/u.data --json statistics.json"
````
## Compute predictions
````
> sbt "runMain predict.Predictor --train data/ml-100k/u1.base --test data/ml-100k/u1.test --json predictions.json"
````
## Compute recommendations
````
> sbt 'runMain recommend.Recommender'
> sbt "runMain recommend.Recommender --data data/ml-100k/u.data --personal data/personal.csv --json recommendations.json"
````
## Package for submission
......
id,title,
1,Toy Story (1995),
2,GoldenEye (1995),
3,Four Rooms (1995),
......
package recommend
import org.rogach.scallop._
import org.json4s.jackson.Serialization
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
import org.apache.log4j.Logger
import org.apache.log4j.Level
class Conf(arguments: Seq[String]) extends ScallopConf(arguments) {
val data = opt[String](required = true)
val personal = opt[String](required = true)
val json = opt[String]()
verify()
}
case class Rating(user: Int, item: Int, rating: Double)
object Recommender extends App {
println("Computing recommendations ...")
println("Done")
// Remove these lines if encountering/debugging Spark
Logger.getLogger("org").setLevel(Level.OFF)
Logger.getLogger("akka").setLevel(Level.OFF)
val spark = SparkSession.builder()
.master("local[1]")
.getOrCreate()
spark.sparkContext.setLogLevel("ERROR")
println("")
println("******************************************************")
var conf = new Conf(args)
println("Loading data from: " + conf.data())
val dataFile = spark.sparkContext.textFile(conf.data())
val data = dataFile.map(l => {
val cols = l.split("\t").map(_.trim)
Rating(cols(0).toInt, cols(1).toInt, cols(2).toDouble)
})
assert(data.count == 100000, "Invalid data")
println("Loading personal data from: " + conf.personal())
val personalFile = spark.sparkContext.textFile(conf.personal())
// TODO: Extract ratings and movie titles
assert(personalFile.count == 1682, "Invalid personal data")
// Save answers as JSON
def printToFile(content: String,
location: String = "./answers.json") =
Some(new java.io.PrintWriter(location)).foreach{
f => try{
f.write(content)
} finally{ f.close }
}
conf.json.toOption match {
case None => ;
case Some(jsonFile) => {
var json = "";
{
// Limiting the scope of implicit formats with {}
implicit val formats = org.json4s.DefaultFormats
val answers: Map[String, Any] = Map(
"4.1.1" -> List[Any](
List(0,"Tron", 5.0),
List(0,"Tron", 5.0),
List(0,"Tron", 5.0),
List(0,"Tron", 5.0),
List(0,"Tron", 5.0)
)
)
json = Serialization.writePretty(answers)
}
println(json)
println("Saving answers in: " + jsonFile)
printToFile(json, jsonFile)
}
}
println("")
spark.close()
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment