from pyspark.sql import SparkSession from pyspark.sql.functions import pandas_udf import pandas as pd # Create SparkSession spark = SparkSession.builder.getOrCreate() # Example DataFrame data = [(1, 2), (3, 4), (5, 6)] df = spark.createDataFrame(data, schema=["A", "B"]) # Define a Pandas UDF @pandas_udf("double") def vectorized_addition(a: pd.Series, b: pd.Series) -> pd.Series: return a + b # Apply Pandas UDF result = df.select(vectorized_addition(df["A"], df["B"]).alias("Sum")) result.show()
Pandas UDF
Creator
Creator
Seonglae ChoCreated
Created
2025 Jan 26 0:50Editor
Editor
Seonglae ChoEdited
Edited
2025 Jan 26 0:50Refs
Refs
Pandas 