Skip to content
Snippets Groups Projects
Commit 8f051ddb authored by Parshikov Tikhon's avatar Parshikov Tikhon
Browse files

First fixes approximate

parent f10298c3
No related branches found
No related tags found
No related merge requests found
...@@ -249,22 +249,25 @@ package object predictions ...@@ -249,22 +249,25 @@ package object predictions
} }
//5 //5
def distributed_knn_approximate(preprocessed_ratings : CSCMatrix[Double], k : Int, spark_context: SparkContext, nbPartitions : Int, replication : Int) : Array[Int] = { def distributed_knn_approximate(preprocessed_ratings : DenseMatrix[Double], k : Int, spark_context: SparkContext, nbPartitions : Int, replication : Int) : DenseMatrix[Double] = {
val new_ratings = new CSCMatrix[Double](ratings.rows, ratings.cols) val new_ratings = new CSCMatrix[Double](preprocessed_ratings.rows, preprocessed_ratings.cols)
//Seq[Set[Int]
users_partition = partitionUsers (preprocessed_ratings.rows, nbPartitions,replication) val users_partition = partitionUsers (preprocessed_ratings.rows, nbPartitions,replication)
val broadcast = sc.broadcast(preprocessed_ratings.toDense) val broadcast = sc.broadcast(preprocessed_ratings)
val approximate_topk = sc.parallelize(users_partition).map(partition_iterator => { val approximate_topk = sc.parallelize(users_partition).map(partition_iterator => {
val ratings = broadcast.value val ratings = broadcast.value
val partition = ratings(partition_iterator,::) val all_users = (0 until ratings.rows).toSeq
val exclude_users =all_users.diff(partition_iterator.toSeq)
val partition = ratings.delete(exclude_users,Axis._0)
val similarities = partition * partition.t val similarities = partition * partition.t
val partition_index = partition.zipWith(Array[Int](partition.rows)) //val partition_index = partition.zipWith(Array[Int](partition.rows))
//TODO how to integrate indexes in knn //TODO how to integrate indexes in knn
val topk = partition_index.map(x => knn(x,k,similarities)) val sorted_users =partition_iterator.toArray.sorted
val topk = partition_iterator.map(x => (x,knn(sorted_users.indexOf(x),k,similarities)))
topk
}).collect() }).collect()
//redo knn //redo knn
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment