I have written code in order to randomly add points to a numeric variable to increase the weighted mean score by 10% storing the new scores in variable S1.
This is done by calculating the total number of points that need to be added to increase the mean by 10%. Next step is to randomly select until the weighted sum of responses is equal to the target - but not adding points where the score is already 10 so as not to pass the maximum value on the scale. The final stage is to select whether the sum that is just above or just below the target is closest and select this sample to add points to.
The code works ok but doesn't look efficient. I am an R novice and have read that loops should be avoided as much as possible, but cannot work out an alternative. Is it possible to do what I am attempting, but more efficiently?
#Create random data
library(stats)
set.seed(21821)
ncust <- 1000
cust.df <- data.frame(cust.id=as.factor(c(1:ncust)))
wtvar <- rnorm(ncust, mean=1, sd=0.2)
V1 <- floor(rnorm(ncust, mean=7.5, sd=3))
V1[V1 > 10] <- 10
V1[V1 < 1] <- 1
cust.df$V1 <- V1
cust.df$wtvar <- wtvar
#Function to determine sample required
random.sample <- function(x) {
(pctadd <- (sum(cust.df$V1*cust.df$wtvar)*0.1)/sum(cust.df$V1[!cust.df$V1==10]*cust.df$wtvar[!cust.df$V1==10])) #percentage of resps (weighted) who need points added to make 10% increase
(numadd <- (sum(cust.df$V1*cust.df$wtvar)*0.1)) #sum of weights needed to make 10% increase
wgttot <- vector(mode="numeric", length=0)
idtot <- vector(mode="numeric", length=0)
id.ref <- cust.df$cust.id[!cust.df$V1==10]
repeat {
preidtot <- idtot
prewgttot <- wgttot
(t.id <- as.numeric(sample(id.ref, 1)))
(t.wgt <- cust.df$wtvar[cust.df$cust.id==t.id])
id.ref <- id.ref[!id.ref==t.id]
wgttot <- c(wgttot,t.wgt)
idtot <- c(idtot, t.id)
if (sum(wgttot) > numadd) break
}
prediff <- numadd - sum(prewgttot)
postdiff <- sum(wgttot) - numadd
if (prediff < postdiff) {
x <- preidtot
} else {
x <- idtot
}
return(x)
}
tempids <- random.sample()
#Apply sample rule
cust.df$S1 = ifelse(cust.df$cust.id %in% tempids, cust.df$V1 + 1, cust.df$V1)
#Check ~10% increase achieved
weighted.mean(cust.df$V1,cust.df$wtvar)
weighted.mean(cust.df$S1,cust.df$wtvar)