Compare revisions

9e18c6ea · 4bf48fc7 · 4bf48fc7 · 4bf48fc7 · 4bf48fc7 · 4bf48fc7
--- a/src/test/scala/RecommendationTest.scala
+++ b/src/test/scala/RecommendationTest.scala
-import org.scalatest.funsuite._
-final class Recommendation extends AnyFunSuite {
-  test("recommendation works") {
-    assert(true)
-  }
-}
--- a/src/test/scala/distributed/ApproximateTests.scala
+++ b/src/test/scala/distributed/ApproximateTests.scala
+package test.distributed
+
+import breeze.linalg._
+import breeze.numerics._
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.BeforeAndAfterAll
+import shared.predictions._
+import test.shared.helpers._
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.SparkContext
+
+class ApproximateTests extends AnyFunSuite with BeforeAndAfterAll {
+  
+   val separator = "\t"
+   val train2Path = "data/ml-100k/u2.base"
+   val test2Path = "data/ml-100k/u2.test"
+   var train2 : CSCMatrix[Double] = null
+   var test2 : CSCMatrix[Double] = null
+   var sc : SparkContext = null
+
+   override def beforeAll {
+     train2 = load(train2Path, separator, 943, 1682)
+     test2 = load(test2Path, separator, 943, 1682)
+
+     val spark = SparkSession.builder().master("local[2]").getOrCreate();
+     spark.sparkContext.setLogLevel("ERROR")
+     sc = spark.sparkContext
+   }
+
+   // Provide tests to show how to call your code to do the following tasks.
+   // Ensure you use the same function calls to produce the JSON outputs in
+   // the corresponding application.
+   // Add assertions with the answer you expect from your code, up to the 4th
+   // decimal after the (floating) point, on data/ml-100k/u2.base (as loaded above).
+   test("Approximate kNN predictor with 10 partitions and replication of 2") { 
+    var partitionedUsers : Seq[Set[Int]] = partitionUsers(
+      943, 
+      10, 
+      2 
+    )
+
+     // Similarity between user 1 and itself
+     assert(within(1.0, 0.0, 0.0001))
+ 
+     // Similarity between user 1 and 864
+     assert(within(1.0, 0.0, 0.0001))
+
+     // Similarity between user 1 and 344
+     assert(within(1.0, 0.0, 0.0001))
+
+     // Similarity between user 1 and 16
+     assert(within(1.0, 0.0, 0.0001))
+
+     // Similarity between user 1 and 334
+     assert(within(1.0, 0.0, 0.0001))
+
+     // Similarity between user 1 and 2
+     assert(within(1.0, 0.0, 0.0001))
+
+     // MAE on test
+     assert(within(1.0, 0.0, 0.0001))
+   } 
+}
--- a/src/test/scala/distributed/ExactTests.scala
+++ b/src/test/scala/distributed/ExactTests.scala
+package test.distributed
+
+import breeze.linalg._
+import breeze.numerics._
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.BeforeAndAfterAll
+import shared.predictions._
+import test.shared.helpers._
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.SparkContext
+
+class ExactTests extends AnyFunSuite with BeforeAndAfterAll {
+  
+   val separator = "\t"
+   val train2Path = "data/ml-100k/u2.base"
+   val test2Path = "data/ml-100k/u2.test"
+   var train2 : CSCMatrix[Double] = null
+   var test2 : CSCMatrix[Double] = null
+   var sc : SparkContext = null
+
+   override def beforeAll {
+     train2 = load(train2Path, separator, 943, 1682)
+     test2 = load(test2Path, separator, 943, 1682)
+
+     val spark = SparkSession.builder().master("local[2]").getOrCreate();
+     spark.sparkContext.setLogLevel("ERROR")
+     sc = spark.sparkContext
+   }
+
+   // Provide tests to show how to call your code to do the following tasks.
+   // Ensure you use the same function calls to produce the JSON outputs in
+   // the corresponding application.
+   // Add assertions with the answer you expect from your code, up to the 4th
+   // decimal after the (floating) point, on data/ml-100k/u2.base (as loaded above).
+   test("kNN predictor with k=10") { 
+
+     // Similarity between user 1 and itself
+     assert(within(1.0, 0.0, 0.0001))
+ 
+     // Similarity between user 1 and 864
+     assert(within(1.0, 0.0, 0.0001))
+
+     // Similarity between user 1 and 886
+     assert(within(1.0, 0.0, 0.0001))
+
+     // Prediction user 1 and item 1
+     assert(within(1.0, 0.0, 0.0001))
+
+     // Prediction user 327 and item 2
+     assert(within(1.0, 0.0, 0.0001))
+
+     // MAE on test
+     assert(within(1.0, 0.0, 0.0001)) 
+   } 
+}
--- a/src/test/scala/optimizing/OptimizingTests.scala
+++ b/src/test/scala/optimizing/OptimizingTests.scala
+package test.optimizing
+
+import breeze.linalg._
+import breeze.numerics._
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.BeforeAndAfterAll
+import shared.predictions._
+import test.shared.helpers._
+
+class OptimizingTests extends AnyFunSuite with BeforeAndAfterAll {
+  
+   val separator = "\t"
+   val train2Path = "data/ml-100k/u2.base"
+   val test2Path = "data/ml-100k/u2.test"
+   var train2 : CSCMatrix[Double] = null
+   var test2 : CSCMatrix[Double] = null
+
+   override def beforeAll {
+       // For these questions, train and test are collected in a scala Array
+       // to not depend on Spark
+       train2 = load(train2Path, separator, 943, 1682)
+       test2 = load(test2Path, separator, 943, 1682)
+   }
+
+   // Provide tests to show how to call your code to do the following tasks.
+   // Ensure you use the same function calls to produce the JSON outputs in
+   // the corresponding application.
+   // Add assertions with the answer you expect from your code, up to the 4th
+   // decimal after the (floating) point, on data/ml-100k/u2.base (as loaded above).
+   test("kNN predictor with k=10") { 
+
+     // Similarity between user 1 and itself
+     assert(within(1.0, 0.0, 0.0001))
+ 
+     // Similarity between user 1 and 864
+     assert(within(1.0, 0.0, 0.0001))
+
+     // Similarity between user 1 and 886
+     assert(within(1.0, 0.0, 0.0001))
+
+     // Prediction user 1 and item 1
+     assert(within(1.0, 0.0, 0.0001))
+
+     // Prediction user 327 and item 2
+     assert(within(1.0, 0.0, 0.0001))
+
+     // MAE on test2
+     assert(within(1.0, 0.0, 0.0001)) 
+   } 
+}
--- a/src/test/scala/shared/helpers.scala
+++ b/src/test/scala/shared/helpers.scala
+package test.shared
+
+package object helpers {
+
+  def within(actual :Double, expected :Double, interval :Double) : Boolean = {
+    return actual >= (expected - interval) && actual <= (expected + interval)
+  }
+}
--- a/test.sh
+++ b/test.sh
+#!/usr/bin/env bash
+# If your default java install does not work, explicitly 
+# provide the path to the JDK 1.8 installation. On OSX
+# with homebrew:
+# export JAVA_HOME=/usr/local/Cellar/openjdk@8/1.8.0+282; ./test.sh
+export JAVA_OPTS="-Xmx8G";
+RUN=./logs/test-$(date "+%Y-%m-%d-%H:%M:%S")-$(hostname)
+mkdir -p $RUN
+LOGS=$RUN/log.txt
+sbt "testOnly test.AllTests" 2>&1 >>$LOGS
--- a/timing.sh
+++ b/timing.sh
+#!/usr/bin/env bash
+# If your default java install does not work, explicitly 
+# provide the path to the JDK 1.8 installation. On OSX
+# with homebrew:
+# export JAVA_HOME=/usr/local/Cellar/openjdk@8/1.8.0+282; ./run.sh
+export JAVA_OPTS="-Xmx8G";
+RUN=./logs/timing-$(date "+%Y-%m-%d-%H:%M:%S")-$(hostname)
+mkdir -p $RUN
+LOGS=$RUN/log.txt
+source ./config.sh 
+echo "------------------- OPTIMIZING    ---------------------" >> $LOGS
+sbt "runMain scaling.Optimizing --train $ML100Ku2base --test $ML100Ku2test --json $RUN/optimized-100k.json --users 943 --movies 1682 --num_measurements 3" 2>&1 >>$LOGS
+echo "------------------- DISTRIBUTED EXACT ---------------------" >> $LOGS
+for W in 1 2 4; do
+    sbt "runMain distributed.Exact --train $ML1Mrbtrain --test $ML1Mrbtest --separator :: --json $RUN/exact-1m-$W.json --k 300 --master local[$W] --users 6040 --movies 3952 --num_measurements 3" 2>&1 >>$LOGS;
+done
+echo "------------------- APPROXIMATE EXACT ---------------------" >> $LOGS
+for W in 1 2 4; do
+    sbt "runMain distributed.Approximate --train $ML1Mrbtrain --test $ML1Mrbtest --separator :: --json $RUN/approximate-1m-$W.json --k 300 --master local[$W] --users 6040 --movies 3952 --num_measurements 3" 2>&1 >>$LOGS;
+done
No results found