From b79b8529f1fac5923115aeefb228b7cfb927e1a0 Mon Sep 17 00:00:00 2001
From: Erick Lavoie <erick.lavoie@epfl.ch>
Date: Wed, 17 Feb 2021 09:49:12 +0100
Subject: [PATCH] Updated README

---
 README.md                                  | 10 ++-
 data/personal.csv                          |  1 -
 src/main/scala/recommend/Recommender.scala | 81 +++++++++++++++++++++-
 3 files changed, 87 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 4f29493..e1965f4 100644
--- a/README.md
+++ b/README.md
@@ -46,12 +46,18 @@ Do include your own ratings in your final submission so we can check your answer
 ## Compute predictions
 
 ````
-> sbt "runMain predict.Predictor --train data/ml-100k/u1.base --test data/ml-100k/u1.test --json answers.json"
+> sbt "runMain stats.Analyzer --data data/ml-100k/u.data --json statistics.json"
+````
+
+## Compute predictions
+
+````
+> sbt "runMain predict.Predictor --train data/ml-100k/u1.base --test data/ml-100k/u1.test --json predictions.json"
 ````
 
 ## Compute recommendations
 ````
-> sbt 'runMain recommend.Recommender'
+> sbt "runMain recommend.Recommender --data data/ml-100k/u.data --personal data/personal.csv --json recommendations.json"
 ````
 
 ## Package for submission
diff --git a/data/personal.csv b/data/personal.csv
index 2beace6..091077d 100644
--- a/data/personal.csv
+++ b/data/personal.csv
@@ -1,4 +1,3 @@
-id,title,
 1,Toy Story (1995),
 2,GoldenEye (1995),
 3,Four Rooms (1995),
diff --git a/src/main/scala/recommend/Recommender.scala b/src/main/scala/recommend/Recommender.scala
index 13f5f6e..2746154 100644
--- a/src/main/scala/recommend/Recommender.scala
+++ b/src/main/scala/recommend/Recommender.scala
@@ -1,6 +1,83 @@
 package recommend
 
+import org.rogach.scallop._
+import org.json4s.jackson.Serialization
+import org.apache.spark.rdd.RDD
+
+import org.apache.spark.sql.SparkSession
+import org.apache.log4j.Logger
+import org.apache.log4j.Level
+
+class Conf(arguments: Seq[String]) extends ScallopConf(arguments) {
+  val data = opt[String](required = true)
+  val personal = opt[String](required = true)
+  val json = opt[String]()
+  verify()
+}
+
+case class Rating(user: Int, item: Int, rating: Double)
+
 object Recommender extends App {
-  println("Computing recommendations ...")
-  println("Done")
+  // Remove these lines if encountering/debugging Spark
+  Logger.getLogger("org").setLevel(Level.OFF)
+  Logger.getLogger("akka").setLevel(Level.OFF)
+  val spark = SparkSession.builder()
+    .master("local[1]")
+    .getOrCreate()
+  spark.sparkContext.setLogLevel("ERROR") 
+
+  println("")
+  println("******************************************************")
+
+  var conf = new Conf(args) 
+  println("Loading data from: " + conf.data()) 
+  val dataFile = spark.sparkContext.textFile(conf.data())
+  val data = dataFile.map(l => {
+      val cols = l.split("\t").map(_.trim)
+      Rating(cols(0).toInt, cols(1).toInt, cols(2).toDouble)
+  }) 
+  assert(data.count == 100000, "Invalid data")
+
+  println("Loading personal data from: " + conf.personal()) 
+  val personalFile = spark.sparkContext.textFile(conf.personal())
+  // TODO: Extract ratings and movie titles
+  assert(personalFile.count == 1682, "Invalid personal data")
+
+ 
+
+  // Save answers as JSON
+  def printToFile(content: String, 
+                  location: String = "./answers.json") =
+    Some(new java.io.PrintWriter(location)).foreach{
+      f => try{
+        f.write(content)
+      } finally{ f.close }
+  }
+  conf.json.toOption match {
+    case None => ; 
+    case Some(jsonFile) => {
+      var json = "";
+      {
+        // Limiting the scope of implicit formats with {}
+        implicit val formats = org.json4s.DefaultFormats
+        val answers: Map[String, Any] = Map(
+           "4.1.1" -> List[Any](
+             List(0,"Tron", 5.0),
+             List(0,"Tron", 5.0),
+             List(0,"Tron", 5.0),
+             List(0,"Tron", 5.0),
+             List(0,"Tron", 5.0)
+           )
+         )
+        json = Serialization.writePretty(answers)
+      }
+
+      println(json)
+      println("Saving answers in: " + jsonFile)
+      printToFile(json, jsonFile)
+    }
+  }
+
+  println("")
+  spark.close()
 }
-- 
GitLab