Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • sacs/cs-449-sds-public/project/cs449-template-m2-2022
  • hlanfran/cs449-template-m2-2022
2 results
Show changes
import org.scalatest.funsuite._
final class Recommendation extends AnyFunSuite {
test("recommendation works") {
assert(true)
}
}
package test.distributed
import breeze.linalg._
import breeze.numerics._
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.BeforeAndAfterAll
import shared.predictions._
import test.shared.helpers._
import org.apache.spark.sql.SparkSession
import org.apache.spark.SparkContext
class ApproximateTests extends AnyFunSuite with BeforeAndAfterAll {
val separator = "\t"
val train2Path = "data/ml-100k/u2.base"
val test2Path = "data/ml-100k/u2.test"
var train2 : CSCMatrix[Double] = null
var test2 : CSCMatrix[Double] = null
var sc : SparkContext = null
override def beforeAll {
train2 = load(train2Path, separator, 943, 1682)
test2 = load(test2Path, separator, 943, 1682)
val spark = SparkSession.builder().master("local[2]").getOrCreate();
spark.sparkContext.setLogLevel("ERROR")
sc = spark.sparkContext
}
// Provide tests to show how to call your code to do the following tasks.
// Ensure you use the same function calls to produce the JSON outputs in
// the corresponding application.
// Add assertions with the answer you expect from your code, up to the 4th
// decimal after the (floating) point, on data/ml-100k/u2.base (as loaded above).
test("Approximate kNN predictor with 10 partitions and replication of 2") {
var partitionedUsers : Seq[Set[Int]] = partitionUsers(
943,
10,
2
)
// Similarity between user 1 and itself
assert(within(1.0, 0.0, 0.0001))
// Similarity between user 1 and 864
assert(within(1.0, 0.0, 0.0001))
// Similarity between user 1 and 344
assert(within(1.0, 0.0, 0.0001))
// Similarity between user 1 and 16
assert(within(1.0, 0.0, 0.0001))
// Similarity between user 1 and 334
assert(within(1.0, 0.0, 0.0001))
// Similarity between user 1 and 2
assert(within(1.0, 0.0, 0.0001))
// MAE on test
assert(within(1.0, 0.0, 0.0001))
}
}
package test.distributed
import breeze.linalg._
import breeze.numerics._
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.BeforeAndAfterAll
import shared.predictions._
import test.shared.helpers._
import org.apache.spark.sql.SparkSession
import org.apache.spark.SparkContext
class ExactTests extends AnyFunSuite with BeforeAndAfterAll {
val separator = "\t"
val train2Path = "data/ml-100k/u2.base"
val test2Path = "data/ml-100k/u2.test"
var train2 : CSCMatrix[Double] = null
var test2 : CSCMatrix[Double] = null
var sc : SparkContext = null
override def beforeAll {
train2 = load(train2Path, separator, 943, 1682)
test2 = load(test2Path, separator, 943, 1682)
val spark = SparkSession.builder().master("local[2]").getOrCreate();
spark.sparkContext.setLogLevel("ERROR")
sc = spark.sparkContext
}
// Provide tests to show how to call your code to do the following tasks.
// Ensure you use the same function calls to produce the JSON outputs in
// the corresponding application.
// Add assertions with the answer you expect from your code, up to the 4th
// decimal after the (floating) point, on data/ml-100k/u2.base (as loaded above).
test("kNN predictor with k=10") {
// Similarity between user 1 and itself
assert(within(1.0, 0.0, 0.0001))
// Similarity between user 1 and 864
assert(within(1.0, 0.0, 0.0001))
// Similarity between user 1 and 886
assert(within(1.0, 0.0, 0.0001))
// Prediction user 1 and item 1
assert(within(1.0, 0.0, 0.0001))
// Prediction user 327 and item 2
assert(within(1.0, 0.0, 0.0001))
// MAE on test
assert(within(1.0, 0.0, 0.0001))
}
}
package test.optimizing
import breeze.linalg._
import breeze.numerics._
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.BeforeAndAfterAll
import shared.predictions._
import test.shared.helpers._
class OptimizingTests extends AnyFunSuite with BeforeAndAfterAll {
val separator = "\t"
val train2Path = "data/ml-100k/u2.base"
val test2Path = "data/ml-100k/u2.test"
var train2 : CSCMatrix[Double] = null
var test2 : CSCMatrix[Double] = null
override def beforeAll {
// For these questions, train and test are collected in a scala Array
// to not depend on Spark
train2 = load(train2Path, separator, 943, 1682)
test2 = load(test2Path, separator, 943, 1682)
}
// Provide tests to show how to call your code to do the following tasks.
// Ensure you use the same function calls to produce the JSON outputs in
// the corresponding application.
// Add assertions with the answer you expect from your code, up to the 4th
// decimal after the (floating) point, on data/ml-100k/u2.base (as loaded above).
test("kNN predictor with k=10") {
// Similarity between user 1 and itself
assert(within(1.0, 0.0, 0.0001))
// Similarity between user 1 and 864
assert(within(1.0, 0.0, 0.0001))
// Similarity between user 1 and 886
assert(within(1.0, 0.0, 0.0001))
// Prediction user 1 and item 1
assert(within(1.0, 0.0, 0.0001))
// Prediction user 327 and item 2
assert(within(1.0, 0.0, 0.0001))
// MAE on test2
assert(within(1.0, 0.0, 0.0001))
}
}
package test.shared
package object helpers {
def within(actual :Double, expected :Double, interval :Double) : Boolean = {
return actual >= (expected - interval) && actual <= (expected + interval)
}
}
#!/usr/bin/env bash
# If your default java install does not work, explicitly
# provide the path to the JDK 1.8 installation. On OSX
# with homebrew:
# export JAVA_HOME=/usr/local/Cellar/openjdk@8/1.8.0+282; ./test.sh
export JAVA_OPTS="-Xmx8G";
RUN=./logs/test-$(date "+%Y-%m-%d-%H:%M:%S")-$(hostname)
mkdir -p $RUN
LOGS=$RUN/log.txt
sbt "testOnly test.AllTests" 2>&1 >>$LOGS
#!/usr/bin/env bash
# If your default java install does not work, explicitly
# provide the path to the JDK 1.8 installation. On OSX
# with homebrew:
# export JAVA_HOME=/usr/local/Cellar/openjdk@8/1.8.0+282; ./run.sh
export JAVA_OPTS="-Xmx8G";
RUN=./logs/timing-$(date "+%Y-%m-%d-%H:%M:%S")-$(hostname)
mkdir -p $RUN
LOGS=$RUN/log.txt
source ./config.sh
echo "------------------- OPTIMIZING ---------------------" >> $LOGS
sbt "runMain scaling.Optimizing --train $ML100Ku2base --test $ML100Ku2test --json $RUN/optimized-100k.json --users 943 --movies 1682 --num_measurements 3" 2>&1 >>$LOGS
echo "------------------- DISTRIBUTED EXACT ---------------------" >> $LOGS
for W in 1 2 4; do
sbt "runMain distributed.Exact --train $ML1Mrbtrain --test $ML1Mrbtest --separator :: --json $RUN/exact-1m-$W.json --k 300 --master local[$W] --users 6040 --movies 3952 --num_measurements 3" 2>&1 >>$LOGS;
done
echo "------------------- APPROXIMATE EXACT ---------------------" >> $LOGS
for W in 1 2 4; do
sbt "runMain distributed.Approximate --train $ML1Mrbtrain --test $ML1Mrbtest --separator :: --json $RUN/approximate-1m-$W.json --k 300 --master local[$W] --users 6040 --movies 3952 --num_measurements 3" 2>&1 >>$LOGS;
done