simPop
simPop copied to clipboard
Missing values might not treated well in simContinous
Here is an executable example:
Data:
library(simPop)
data(eusilc13puf, package = "simPop")
df <- eusilc13puf[,c(1:6, 8:9,14, 16, 46)]
df$age <- as.numeric(df$age)
df$pid <- as.factor(df$pid)
vars <- c("hhid", "hsize", "region", "age", "sex", "pid", "eco_stat", "citizenship",
"pgrossIncome","hgrossIncome","weight")
colnames(df) <- vars
# We are using reduced weights to obtain a reduced population by factor 100 for computation time
df$weight <- df$weight/100
inp <-
specifyInput(
data = df,
hhid = "hhid",
hhsize = "hsize",
strata = "region",
weight = "weight"
)
simPop <-
simStructure(
data = inp,
method = "distribution",
basicHHvars = c("age", "sex", "region")
)
simPop <-
simCategorical(
simPop,
additional = c("eco_stat", "citizenship"),
method = "multinom"
)
And here comes the error:
simPop <- simContinuous(
simPop,
method = "lm",
additional = "pgrossIncome",
regModel = ~ sex + hsize + eco_stat + citizenship + age,
zeros = TRUE,
log = FALSE,
const = 1,
alpha = NULL, #0.05,
residuals = TRUE
)
Reason is most likely related missing values in variable ecoStat that causes in line 338 of simContinous
X <- model.matrix(estimationModel, data = dataS) # has about 10000 rows
y <- dataS[[name]] # has more than 13000 entries
weights <- dataS[[weight]] # has more than 13000 entries
Afterwards mod <- logitreg(X, y, weights = weights) fails.