NonParamDML always returns constant effect for continuous treatment
I'm been seeing NonParamDML from econml 0.16.0 always returns constant effect for my data (continuous treatment, binary output). A toy example:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
# Generate STRONGLY non-linear data
np.random.seed(123)
n = 2000
X_test = np.random.normal(size=(n, 4))
T_test = np.random.uniform(0, 6, size=(n, 1))
# Very strong non-linear effect - cubic with clear differences
true_effect = 0.5 * (T_test - 3)**3 + 2 * (T_test - 3) # Cubic centered at 3
Y_logit = X_test.sum(axis=1, keepdims=True) + true_effect + np.random.normal(0, 0.3, (n, 1))
Y_prob = 1 / (1 + np.exp(-Y_logit))
Y_test = np.random.binomial(1, Y_prob.flatten()).reshape(-1, 1)
# Use your exact model specification
est_test = NonParamDML(
model_y=RandomForestClassifier(n_estimators=200, max_depth=10),
model_t=RandomForestRegressor(n_estimators=200, max_depth=10),
model_final=RandomForestRegressor(n_estimators=200, max_depth=10),
random_state=42,
discrete_treatment=False,
discrete_outcome=True,
cv=3
)
est_test.fit(Y_test.ravel(), T_test.ravel(), X=X_test)
# Test on the same ranges
Xd_test = X_test[:100]
print("=== TESTING WITH SYNTHETIC NON-LINEAR DATA ===")
print(f"ATE (1→2): {est_test.ate(Xd_test, T0=1, T1=2)}")
print(f"ATE (2→3): {est_test.ate(Xd_test, T0=2, T1=3)}")
print(f"ATE (3→4): {est_test.ate(Xd_test, T0=3, T1=4)}")
print(f"ATE (4→5): {est_test.ate(Xd_test, T0=4, T1=5)}")
# These SHOULD be different if your setup is working
And output:
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
=== TESTING WITH SYNTHETIC NON-LINEAR DATA ===
ATE (1→2): [0.23992668]
ATE (2→3): [0.23992668]
ATE (3→4): [0.23992668]
ATE (4→5): [0.23992668]
Is this behavior expected? I'm only seeing from EconML document of an example with binary treatment but can't find any example of continuous treatment.
This is expected: NonParamDML fits a final model that is an arbitrary function of X, but the effect is still linear in T. If you want non-linearity in T you can use a treatment featurizer (with or without using NonParamDML) - see the "What if my treatments are continuous and don’t have a linear effect on the outcome?" section of https://www.pywhy.org/EconML/spec/estimation/dml.html#usage-faqs.
Hi @kbattocchi , I tried both LinearDML and NonParamDML with PolynomialFeatures as treatment_featurizer but they still output const ate values. Did I miss anything?
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from econml.dml import DML, SparseLinearDML, LinearDML, NonParamDML, CausalForestDML
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
# Generate STRONGLY non-linear data
np.random.seed(123)
n = 2000
X_test = np.random.normal(size=(n, 4))
T_test = np.random.uniform(0, 6, size=(n, 1))
# Very strong non-linear effect - cubic with clear differences
true_effect = 0.5 * (T_test - 3)**3 + 2 * (T_test - 3) # Cubic centered at 3
Y_logit = X_test.sum(axis=1, keepdims=True) + true_effect + np.random.normal(0, 0.3, (n, 1))
Y_prob = 1 / (1 + np.exp(-Y_logit))
Y_test = np.random.binomial(1, Y_prob.flatten()).reshape(-1, 1)
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=3, interaction_only=True, include_bias=False)
# Use your exact model specification
est_test = NonParamDML(
model_y=RandomForestClassifier(n_estimators=200, max_depth=10),
model_t=RandomForestRegressor(n_estimators=200, max_depth=10),
model_final=RandomForestRegressor(n_estimators=200, max_depth=10),
random_state=42,
discrete_treatment=False,
discrete_outcome=True,
cv=2,
treatment_featurizer=poly
)
est_test.fit(Y_test, T_test, X=X_test)
# Test on the same ranges
Xd_test = X_test[:100]
print("=== TESTING WITH SYNTHETIC NON-LINEAR DATA ===")
print(f"ATE (1→2): {est_test.ate(Xd_test, T0=1, T1=2)}")
print(f"ATE (2→3): {est_test.ate(Xd_test, T0=2, T1=3)}")
print(f"ATE (3→4): {est_test.ate(Xd_test, T0=3, T1=4)}")
print(f"ATE (4→5): {est_test.ate(Xd_test, T0=4, T1=5)}")
It's outputing
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
=== TESTING WITH SYNTHETIC NON-LINEAR DATA ===
ATE (1→2): [0.28580963]
ATE (2→3): [0.28580963]
ATE (3→4): [0.28580963]
ATE (4→5): [0.28580963]