2

I am using the data available here: http://www.ads.org.pl/pobieranie-zbioru-danych.php?id=91 which is a survey with 17497 observations of 1681 variables. I am interested in variable q32, which is a net recalled salary and has many missings. To avoid the bias I am trying to run a multiple imputation using Amelia package in R.

I do the following:

library(foreign)
dataset = read.spss("C:\\(place the link to the dataset once you download it here)", to.data.frame=TRUE)

is.na(dataset$q32)
    dataset$q32
##q32 refers to the net salary of the respondent. R treats all negative numbers as NA, however 99998, i.e. "difficult to say" and 99999, i.e. "no data" are being kept,     hence they need to be removed, see specification of the survey, p.20 for more details
dataset$q32[dataset$q32==99998] <- NA
dataset$q32[dataset$q32==99999] <- NA 
dataset[dataset==99] <- NA

##audit-check: removing records that seem to be errounous outliers: http://cran.r-project.org/doc/contrib/de_Jonge+van_der_Loo-Introduction_to_data_cleaning_with_R.pdf
dataset$q32[dataset$q32==20000] <- NA
dataset$q32[dataset$q32==15000] <- NA 
dataset$q32[dataset$q32==30000] <- NA

#########multiple imputation: 
install.packages("Amelia")
library("Amelia")


##datacleaning: removing the unneeded columns: 

dataset$pgssyear <- as.numeric(dataset$pgssyear)
MyData <- dataset[, c(1:20, 49:504) ]
MyData <- cbind(MyData, dataset$wr18, dataset$na26a, dataset$na26b, dataset$na26c, dataset$intsex, dataset$intcoh, dataset$intedu, dataset$intedu1, dataset$q164a, dataset$q164b_1, dataset$q164b_2, dataset$q164b_3)

##telling Amelia which variables are ordinal:

ord <- c("voiev49", "voiev16", "region8", "region6", "size", "hompop", "babies", "preteen", "teens", "adults", "q7d", "q8", "q9mon", "q10a", "q10b", "q10c1", "q10c2", "q10d", "q10reg", "q10mob", "q11aa", "q11ab", "q11ac", "q11ad", "q11ae", "q11af", "q11ag", "q11ah", "q11ai", "q11aj", "q11ak", "q11al", "q11am", "q11b1", "q11b2", "q11c", "q11fam", "q12a", "q12b", "q13isco", "q13isc9", "q13isc27", "q13e", "q13e2", "q13f", "q14a", "q14a1", "q14ed", "q14b", "q15isco", "q15isc9", "q15isc27", "q15e", "q16a", "q16a1", "q16ed", "q16b", "q17", "q18st", "q18a", "q18b", "q18b1", "q18c", "q18d", "q18e", "q18f", "q18g", "q18h", "q18st2", "q19ab", "q20", "q22isco", "q22isc9", "q22isc27", "q22kgn", "q22kgn25", "q22ekd", "q22ekd30", "q22e11", "q22e12", "q22f", "q23a", "q23b", "q23ab", "q25", "q29isco", "q29isc9", "q29isc27", "q29kgn", "q29kgn25", "q29ekd", "q29ekd30", "q29e12", "q29f", "q30a", "q30b", "q30ab", "q33", "q35", "q36", "q37", "q38", "q39a", "q40", "q41a", "q41a1", "q42st", "q42a", "q42b", "q42b1", "q42c", "q42d", "q42e", "q42f", "q42g", "q42h", "q43ab", "q44", "q46isco", "q46isc9", "q46isc27", "q46kgn", "q46kgn25", "q46ekd", "q46ekd30", "q46e11", "q46e12", "q46f", "q47a", "q47b", "q47ab", "q48a", "q48a1", "q48ed", "q48b", "q48c", "q49a", "q49b", "q50a", "q50b", "q50d", "q50e", "q50f", "q50g", "q51", "q52a", "q52b", "q52c", "q53a", "q53b", "q53c", "q54a", "q54b", "q54c", "q56a", "q56b", "q56c", "q56d", "q56e", "q56f", "q56g", "q56h", "q56i", "q56j", "q56k", "q56l", "q58", "q59", "q60a", "q60b", "q60c", "q60d", "q60e", "q60f", "q60g", "q60h", "q60i", "q60j", "q60k", "q60l", "q60m", "q60n", "q60o", "q60p", "q60r", "q61a", "q61b", "q61c", "q61d", "q61e", "q61f", "q61g", "q61h", "q61i", "q61j", "q61jx", "q61k", "q61l", "q61m", "q61n", "q61o", "q61p", "q61r", "q63", "q64_1", "q64_2", "q64_3", "q64_4", "q64_5", "q64obe", "q64pop", "q64thn", "q64wor", "q64hel", "q65", "q66", "q67", "q68", "q69", "q69n", "q70", "q70n", "q71", "q72", "q73", "q74", "q75", "q76", "q77", "q78", "q79", "q80", "q81", "q82", "q83b", "q83ca", "q83cb", "q83cc", "q83cd", "q83ce", "q83cf", "q83cg", "q83ch", "q83ci", "q83cj", "q83ck", "q83cl", "q84b", "q84ca", "q84cb", "q84cc", "q84cd", "q84ce", "q84cf", "q84cg", "q84ch", "q84ci", "q84cj", "q84ck", "q84cl", "q85a", "q85b", "q85c", "q85d", "q86a", "q86b", "q86c", "q86d", "q86e", "q87a", "q87b", "q87c", "q87d", "q87e", "q87f", "q87g", "q87h", "q87i", "q88", "q89", "q91_a", "q91_b", "q91_c", "q91_d", "q91_e", "q91inc", "q91sec", "q91hrs", "q91pro", "q91mea", "q92a", "q92b", "q92c", "q92d", "q92e", "q92f", "q92g", "q92h", "q92j", "q92k", "q92l", "q92m", "q92n", "q93", "q94", "q95", "q96", "q97", "q98", "q99", "q100", "q101", "q102a", "q102b", "q102c", "q102d", "q102e", "q102f", "q102g", "q103", "q104", "q105", "q106", "q107", "q108a", "q108b", "q108c", "q108d", "q110", "q111", "q112", "q113_1", "q113_2", "q113_3", "q113_4", "q115", "q116", "q117xa","q117xb", "q117xc", "q117xd", "q117xe", "q117xf", "q117xg", "q117a", "q117b", "q117c", "q118a", "q118b", "q119a", "q119b", "q120a", "q120b", "q121a", "q121b", "q121c", "q121d", "q121e", "q121f", "q122xa", "q122xb", "q122a", "q122b", "q123a", "q123b", "q124za", "q124zb", "q217a", "q217b", "q218a", "q218b", "q219a", "q219b", "q220", "q221a", "q221b", "q125a", "q125b", "q126", "q127", "q128", "q129", "q130", "q131a", "q131a1", "q131ed", "q131b", "q131b1", "q131cd", "q131e", "q131f", "q131g", "q131h", "q131i", "q131l", "q131m", "q132a", "q132c", "q132da", "q132db", "q132dc", "dataset$wr18", "dataset$na26a", "dataset$na26b", "dataset$na26c", "dataset$intsex", "dataset$intedu", "dataset$intedu1", "dataset$q164a", "dataset$q164b_1", "dataset$q164b_2", "dataset$q164b_3")

## running the imputation:

Counterfactual1 <- amelia(MyData, m=10, ts="pgssyear", idvars = c("ballot", "pgss_bal", "form", "issp", "supplem", "city100"), ords =  ord)

Then I receive an error message: Error in contrasts<-(*tmp*, value = contr.funs[1 + isOF[nn]]) : contrasts can be applied only to factors with 2 or more levels In addition: There were 27 warnings (use warnings() to see them)

How can I know which variable causes the problem? What actually the problem is? In some posts online I saw that this message appears during different procedures and often due to NAs in the database. However, the imputation is to replace the NAs, so it should not be a problem. I opened the file in SPSS, as well. The data gets displayed as numbers, not strings, so I am not sure what's wrong in here....

Asiack
  • 47
  • 8

0 Answers0