looks like i found the solution in case anyone ever encounters a similar challenge...
df = spark.createDataFrame( [("a", 1, 0), ("a", 2, 42), ("a", 3, 10), ("b", 4, -1), ("b", 5, -2), ("b", 6, 12)], ("key", "consumerID", "feature") ) df.show() schema = StructType([ StructField("ID_1", DoubleType()), StructField("ID_2", DoubleType()), StructField("feature1", DoubleType()), StructField("feature2", DoubleType()), ]) @pandas_udf(schema, PandasUDFType.GROUPED_MAP) def get_all_combinations(df): p=[] for i in range(len(df)): for j in range(len(df)): if i<j: p.append([df.consumerID[i],df.consumerID[j],df.feature[i],df.feature[j]]) return pd.DataFrame(p) display(df.groupBy('key').apply(get_all_combinations)) -- Sent from: http://apache-spark-user-list.1001560.n3.nabble.com/ --------------------------------------------------------------------- To unsubscribe e-mail: user-unsubscr...@spark.apache.org