looks like i found the solution in case anyone ever encounters a similar
challenge...

df = spark.createDataFrame(
[("a", 1, 0), ("a", 2, 42), ("a", 3, 10), ("b", 4, -1), ("b", 5, -2), ("b",
6, 12)],
("key", "consumerID", "feature")
)

df.show()

schema = StructType([
    StructField("ID_1", DoubleType()),
    StructField("ID_2", DoubleType()),
    StructField("feature1", DoubleType()),
    StructField("feature2", DoubleType()),
])


@pandas_udf(schema, PandasUDFType.GROUPED_MAP)
def get_all_combinations(df):
  p=[]
  for i in range(len(df)):
    for j in range(len(df)):
      if i<j:
       
p.append([df.consumerID[i],df.consumerID[j],df.feature[i],df.feature[j]])
  return pd.DataFrame(p)
  
display(df.groupBy('key').apply(get_all_combinations))



--
Sent from: http://apache-spark-user-list.1001560.n3.nabble.com/

---------------------------------------------------------------------
To unsubscribe e-mail: user-unsubscr...@spark.apache.org

Reply via email to