I've a dataframe with state level info, and I need to be able to extract that info and store it in a better format.
Goal: For each key, the total weight should be 100. If not, add the state as 'Other' with remaining weight. Some states may not have any weight, in such cases, it should be replaced with 100.
And the output required is
structure(list(Key = c("ATS", "AHP", "ATR", "BWBR", "BAMT", "BMPR"),
STATE = c("TX-22.41% GA-19.70% DC-11.97% MA-10.52% ...", "SC-13.46%
TX-13.37% NC-10.31% IA-9.54% ...", "TX-22.26% AR-16.47% MO-12.43%
NE-11.45% ...", "NJ", "TN-86.25% NJ-13.75%", "MA")), row.names =
c(NA, 6L), class = "data.frame")
Wrote the following code and it works. But is there a better way of handling the same?
allStateInfo = NULL
for (n in 1:length(maindt$t)) {
thisRecord = maindt[n,]
print(thisRecord$STATE)
sVec = unlist(strsplit(x = thisRecord$STATE, split = "% ", fixed = T))
totalWeight = 0
# If there is any data in State Name
if (length(sVec) > 0) {
for (z in 1:length(sVec)) {
# Split with String by identifying the percentage with Ex: Tx-77.88%
result = regexpr("[.][0-9]", sVec[z], perl = TRUE)
for (i in 1:length(result)) {
if (!is.na(result[i])) {
if (result[i] > 0) {
# Identify with - digit
position = regexpr("[-][0-9]", sVec[z], perl = TRUE)
state = substr(sVec[z], 1, position - 1)
wgt = substr(sVec[z], position + 1, 1000000L)
}
else {
# Get the state alone
state = sVec[z]
wgt = "0"
}
} else {
state = sVec[z]
wgt = "0"
}
# IF the state is ...., replace it with other
state = gsub(pattern = '...', replacement = 'Other', x = state, fixed = T)
wgt = gsub(pattern = '%', replacement = '', x = wgt, fixed = T)
wgt = as.numeric(wgt) / 100
# If the weight is 0, then get the balance of remaining weight in the state
if (wgt > 0) {
totalWeight = totalWeight + wgt
} else {
wgt = 1 - totalWeight
totalWeight = totalWeight + wgt
}
pDf = data.frame(key = maind$key, tstateInd = z, final_state = state, weight = wgt)
allStateInfo = rbind(allPropDf, pDf)
}
}
} else {
state = 'Other'
wgt = 1
totalWeight = totalWeight + wgt
pDf = data.frame(key = maind$key, tstateInd = z, final_state = state, weight = wgt)
allStateInfo = rbind(allPropDf, pDf)
}
if (totalWeight < 1) {
wgt = 1 - totalWeight
state = 'Other'
z = length(sVec) + 1
pDf = data.frame(key = maind$key, tstateInd = z, final_state = state, weight = wgt)
allStateInfo = rbind(allPropDf, pDf)
}
}