import org.apache.spark.SparkConf
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._
import org.apache.spark.sql.expressions._
object crossjoin {
def main(args:Array[String]):Unit= {
val spark: SparkSession = SparkSession.builder()
.master("local[*]")
.appName("SparkByExamples.com")
.getOrCreate()
var sparkConf: SparkConf = null
sparkConf = new SparkConf().set("spark.sql.crossJoin.enabled", "true")
spark.sparkContext.setLogLevel("ERROR")
import spark.implicits._
val df1 = List("IN","PK", "AU","SL").toDF("country")
df1.show()
//df1.withColumn("combinations", //collect_set("country").over(Window.orderBy()))
//.show(false)
}
}
Input:
+-------+
|country|
+-------+
| IN|
| PK|
| AU|
| SL|
+-------+
output
+--------+
| result|
+--------+
|AU vs SL|
|AU vs PK|
|AU vs IN|
|IN vs PK|
+--------+
The result should not contain duplicates. Like something cross join should be performed i think. i tried but am unable to solve it. i got this sql query.
select concat(c1.country,'vs',c2.country) as result from country c1
left join country c2 on c1.country!=c2.coutry
where c1.country!='PK' and c2.country!='IN' and (c1.country!='SL' or c2.country='PK')
order by result