I have some columns in a data frame which I need to log-transform.
I'm using the following code to do so:
cols = c("LotArea", "MasVnrArea", "LotFrontage", "BsmtFinSF2", "LowQualFinSF",
"BsmtHalfBath", "KitchenAbvGr", "OpenPorchSF",
"EnclosedPorch", "MiscVal", "PoolArea", "3SsnPorch", "ScreenPorch")
train[, which(names(train) %in% cols)] <- log(train[, which(names(train) %in% cols)] + 1, 2)
And I'm getting a lot of these warnings:
Supplied 13 items to be assigned to 1455 items of column 'LotFrontage' (recycled leaving remainder of 12 items)
And also:
Coerced 'double' RHS to 'integer' to match the column's type; may have truncated precision.
Either change the target column to 'double' first (by creating a new 'double' vector length 1455 (nrows of entire table) and assign that; i.e. 'replace' column),
or coerce RHS to 'integer' (e.g. 1L, NA_[real|integer]_, as.*, etc) to make your intent clear and for speed.
Or, set the column type correctly up front when you create the table and stick to it, please
I Have the same warning(s) for each of the columns specified, all of them are int
columns
Am I Doing something wrong?
To help reproduce, Here's the output of dput(head(train))
:
structure(list(Id = 1:6, MSSubClass = c(60L, 20L, 60L, 70L, 60L,
50L), MSZoning = structure(c(4L, 4L, 4L, 4L, 4L, 4L), .Label = c("C (all)",
"FV", "RH", "RL", "RM"), class = "factor"), LotFrontage = c(0L,
0L, 1L, 1L, 1L, 1L), LotArea = c(0L, 0L, 1L, 1L, 1L, 1L), Street = c(2L,
2L, 2L, 2L, 2L, 2L), Alley = c(0L, 0L, 0L, 0L, 0L, 0L), LotShape = structure(c(4L,
4L, 1L, 1L, 1L, 1L), .Label = c("IR1", "IR2", "IR3", "Reg"), class = "factor"),
LandContour = structure(c(4L, 4L, 4L, 4L, 4L, 4L), .Label = c("Bnk",
"HLS", "Low", "Lvl"), class = "factor"), Utilities = c(4L,
4L, 4L, 4L, 4L, 4L), LotConfig = structure(c(5L, 3L, 5L,
1L, 3L, 5L), .Label = c("Corner", "CulDSac", "FR2", "FR3",
"Inside"), class = "factor"), LandSlope = c(2L, 2L, 2L, 2L,
2L, 2L), Neighborhood = structure(c(6L, 25L, 6L, 7L, 14L,
12L), .Label = c("Blmngtn", "Blueste", "BrDale", "BrkSide",
"ClearCr", "CollgCr", "Crawfor", "Edwards", "Gilbert", "IDOTRR",
"MeadowV", "Mitchel", "NAmes", "NoRidge", "NPkVill", "NridgHt",
"NWAmes", "OldTown", "Sawyer", "SawyerW", "Somerst", "StoneBr",
"SWISU", "Timber", "Veenker"), class = "factor"), Condition1 = structure(c(3L,
2L, 3L, 3L, 3L, 3L), .Label = c("Artery", "Feedr", "Norm",
"PosA", "PosN", "RRAe", "RRAn", "RRNe", "RRNn"), class = "factor"),
Condition2 = structure(c(3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Artery",
"Feedr", "Norm", "PosA", "PosN", "RRAe", "RRAn", "RRNn"), class = "factor"),
BldgType = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1Fam",
"2fmCon", "Duplex", "Twnhs", "TwnhsE"), class = "factor"),
HouseStyle = structure(c(6L, 3L, 6L, 6L, 6L, 1L), .Label = c("1.5Fin",
"1.5Unf", "1Story", "2.5Fin", "2.5Unf", "2Story", "SFoyer",
"SLvl"), class = "factor"), OverallQual = c(7L, 6L, 7L, 7L,
8L, 5L), OverallCond = c(5L, 8L, 5L, 5L, 5L, 5L), YearBuilt = c(2003L,
1976L, 2001L, 1915L, 2000L, 1993L), YearRemodAdd = c(2003L,
1976L, 2002L, 1970L, 2000L, 1995L), RoofStyle = structure(c(2L,
2L, 2L, 2L, 2L, 2L), .Label = c("Flat", "Gable", "Gambrel",
"Hip", "Mansard", "Shed"), class = "factor"), RoofMatl = structure(c(2L,
2L, 2L, 2L, 2L, 2L), .Label = c("ClyTile", "CompShg", "Membran",
"Metal", "Roll", "Tar&Grv", "WdShake", "WdShngl"), class = "factor"),
Exterior1st = structure(c(13L, 9L, 13L, 14L, 13L, 13L), .Label = c("AsbShng",
"AsphShn", "BrkComm", "BrkFace", "CBlock", "CemntBd", "HdBoard",
"ImStucc", "MetalSd", "Plywood", "Stone", "Stucco", "VinylSd",
"Wd Sdng", "WdShing"), class = "factor"), Exterior2nd = structure(c(14L,
9L, 14L, 16L, 14L, 14L), .Label = c("AsbShng", "AsphShn",
"Brk Cmn", "BrkFace", "CBlock", "CmentBd", "HdBoard", "ImStucc",
"MetalSd", "Other", "Plywood", "Stone", "Stucco", "VinylSd",
"Wd Sdng", "Wd Shng"), class = "factor"), MasVnrType = structure(c(3L,
1L, 3L, 1L, 3L, 1L), .Label = c("0", "BrkCmn", "BrkFace",
"Stone"), class = "factor"), MasVnrArea = c(0L, 0L, 1L, 1L,
1L, 1L), ExterQual = c(3L, 2L, 3L, 2L, 3L, 2L), ExterCond = c(2L,
2L, 2L, 2L, 2L, 2L), Foundation = structure(c(3L, 2L, 3L,
1L, 3L, 6L), .Label = c("BrkTil", "CBlock", "PConc", "Slab",
"Stone", "Wood"), class = "factor"), BsmtQual = c(4L, 4L,
4L, 3L, 4L, 4L), BsmtCond = c(3L, 3L, 3L, 4L, 3L, 3L), BsmtExposure = c(1L,
4L, 2L, 1L, 3L, 1L), BsmtFinType1 = c(6L, 5L, 6L, 5L, 6L,
6L), BsmtFinSF1 = c(706L, 978L, 486L, 216L, 655L, 732L),
BsmtFinType2 = c(1L, 1L, 1L, 1L, 1L, 1L), BsmtFinSF2 = c(0L,
0L, 1L, 1L, 1L, 1L), BsmtUnfSF = c(150L, 284L, 434L, 540L,
490L, 64L), TotalBsmtSF = c(856L, 1262L, 920L, 756L, 1145L,
796L), Heating = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Floor",
"GasA", "GasW", "Grav", "OthW", "Wall"), class = "factor"),
HeatingQC = c(5L, 5L, 5L, 4L, 5L, 5L), CentralAir = c(1L,
1L, 1L, 1L, 1L, 1L), Electrical = c(4L, 4L, 4L, 4L, 4L, 4L
), `1stFlrSF` = c(856L, 1262L, 920L, 961L, 1145L, 796L),
`2ndFlrSF` = c(854L, 0L, 866L, 756L, 1053L, 566L), LowQualFinSF = c(0L,
0L, 1L, 1L, 1L, 1L), GrLivArea = c(1710L, 1262L, 1786L, 1717L,
2198L, 1362L), BsmtFullBath = c(1L, 0L, 1L, 1L, 1L, 1L),
BsmtHalfBath = c(0L, 0L, 1L, 1L, 1L, 1L), FullBath = c(2L,
2L, 2L, 1L, 2L, 1L), HalfBath = c(1L, 0L, 1L, 0L, 1L, 1L),
BedroomAbvGr = c(3L, 3L, 3L, 3L, 4L, 1L), KitchenAbvGr = c(0L,
0L, 1L, 1L, 1L, 1L), KitchenQual = c(4L, 3L, 4L, 4L, 4L,
3L), TotRmsAbvGrd = c(8L, 6L, 6L, 7L, 9L, 5L), Functional = structure(c(7L,
7L, 7L, 7L, 7L, 7L), .Label = c("Maj1", "Maj2", "Min1", "Min2",
"Mod", "Sev", "Typ"), class = "factor"), Fireplaces = c(0L,
1L, 1L, 1L, 1L, 0L), FireplaceQu = c(0L, 3L, 3L, 4L, 3L,
0L), GarageType = structure(c(3L, 3L, 3L, 7L, 3L, 3L), .Label = c("0",
"2Types", "Attchd", "Basment", "BuiltIn", "CarPort", "Detchd"
), class = "factor"), GarageYrBlt = c(2003L, 1976L, 2001L,
1998L, 2000L, 1993L), GarageFinish = c(2L, 2L, 2L, 1L, 2L,
1L), GarageCars = c(2L, 2L, 2L, 3L, 3L, 2L), GarageArea = c(548L,
460L, 608L, 642L, 836L, 480L), GarageQual = c(3L, 3L, 3L,
3L, 3L, 3L), GarageCond = c(3L, 3L, 3L, 3L, 3L, 3L), PavedDrive = c(2L,
2L, 2L, 2L, 2L, 2L), WoodDeckSF = c(0L, 298L, 0L, 0L, 192L,
40L), OpenPorchSF = c(0L, 0L, 1L, 1L, 1L, 1L), EnclosedPorch = c(0L,
0L, 1L, 1L, 1L, 1L), `3SsnPorch` = c(0L, 0L, 1L, 1L, 1L,
1L), ScreenPorch = c(0L, 0L, 1L, 1L, 1L, 1L), PoolArea = c(0L,
0L, 1L, 1L, 1L, 1L), PoolQC = c(0L, 0L, 0L, 0L, 0L, 0L),
Fence = c(0L, 0L, 0L, 0L, 0L, 1L), MiscFeature = c(0L, 0L,
0L, 0L, 0L, 1L), MiscVal = c(0L, 0L, 1L, 1L, 1L, 1L), MoSold = c(2L,
5L, 9L, 2L, 12L, 10L), YrSold = c(2008L, 2007L, 2008L, 2006L,
2008L, 2009L), SaleType = structure(c(9L, 9L, 9L, 9L, 9L,
9L), .Label = c("COD", "Con", "ConLD", "ConLI", "ConLw",
"CWD", "New", "Oth", "WD"), class = "factor"), SaleCondition = structure(c(5L,
5L, 5L, 1L, 5L, 5L), .Label = c("Abnorml", "AdjLand", "Alloca",
"Family", "Normal", "Partial"), class = "factor"), SalePrice = c(208500L,
181500L, 223500L, 140000L, 250000L, 143000L)), .Names = c("Id",
"MSSubClass", "MSZoning", "LotFrontage", "LotArea", "Street",
"Alley", "LotShape", "LandContour", "Utilities", "LotConfig",
"LandSlope", "Neighborhood", "Condition1", "Condition2", "BldgType",
"HouseStyle", "OverallQual", "OverallCond", "YearBuilt", "YearRemodAdd",
"RoofStyle", "RoofMatl", "Exterior1st", "Exterior2nd", "MasVnrType",
"MasVnrArea", "ExterQual", "ExterCond", "Foundation", "BsmtQual",
"BsmtCond", "BsmtExposure", "BsmtFinType1", "BsmtFinSF1", "BsmtFinType2",
"BsmtFinSF2", "BsmtUnfSF", "TotalBsmtSF", "Heating", "HeatingQC",
"CentralAir", "Electrical", "1stFlrSF", "2ndFlrSF", "LowQualFinSF",
"GrLivArea", "BsmtFullBath", "BsmtHalfBath", "FullBath", "HalfBath",
"BedroomAbvGr", "KitchenAbvGr", "KitchenQual", "TotRmsAbvGrd",
"Functional", "Fireplaces", "FireplaceQu", "GarageType", "GarageYrBlt",
"GarageFinish", "GarageCars", "GarageArea", "GarageQual", "GarageCond",
"PavedDrive", "WoodDeckSF", "OpenPorchSF", "EnclosedPorch", "3SsnPorch",
"ScreenPorch", "PoolArea", "PoolQC", "Fence", "MiscFeature",
"MiscVal", "MoSold", "YrSold", "SaleType", "SaleCondition", "SalePrice"
), class = c("data.table", "data.frame"), row.names = c(NA, -6L
), .internal.selfref = <pointer: 0x00000000026c0788>)
Thanks