Pandas’ apply method for row-wise operations can be inefficient
import numpy as np import pandas as pd import time # Define the complex function def complex_function(x1, x2): return np.log(x1 + 1) * np.sqrt(x2) # Generate sample data df = pd.DataFrame({ 'feature1': np.random.rand(1000000), 'feature2': np.random.rand(1000000) }) # Using pandas apply (slower) start_time = time.time() df['new_feature_apply'] = df.apply(lambda row: complex_function( row['feature1'], row['feature2']), axis=1) apply_duration = time.time() - start_time # Using NumPy vectorization (faster) start_time = time.time() df['new_feature_vectorized'] = complex_function(df['feature1'].values, df['feature2'].values) vectorized_duration = time.time() - start_time print(f"Apply duration: {apply_duration:.2f} seconds") print(f"Vectorized duration: {vectorized_duration:.2f} seconds") #Apply duration: 10.07 seconds #Vectorized duration: 0.01 seconds