0

I'm trying to fit probability distributions in R using EnvStat package and looping to calculate multiple columns at once. Columns have different lengths and some code error is happening. The data frame does not remain in numeric format. Error message: 'x' must be a numeric vector I couldn't identify the error. Could anyone help? Many thanks

Follow code:

x = runif(n = 50, min = 1, max = 12)
y = runif(n = 70, min = 5, max = 15)
z = runif(n = 35, min = 1, max = 10)
m = runif(n = 80, min = 6, max = 18)

length(x) = length(m)
length(y) = length(m)
length(z) = length(m)

df = data.frame(x=x,y=y,z=z,m=m)
df

library(EnvStats)

nproc = 4
cont = 1
dfr = data.frame(variavel = character(nproc), 
                 locationevd= (nproc), scaleevd= (nproc),
                 stringsAsFactors = F)


# i = 2

for (i in 1:4) {
  
  print(i) 
  
  nome.var=colnames(df)
  
  df = df[,c(i)]
  df = na.omit(df)
  
  variavela = nome.var[i]
  dfr$variavel[cont] = variavela
  
  evd = eevd(df);evd
  locationevd = evd$parameters[[1]]
  dfr$locationevd[cont] = locationevd
  scaleevd = evd$parameters[[2]]
  dfr$scaleevd[cont] = scaleevd

  cont = cont + 1
  
}

writexl::write_xlsx(dfr, path = "Results.xls")
Marcel
  • 147
  • 4
  • Why not put them in a list rather than extending the shorter vectors to put them in a dataframe? – IRTFM Jan 07 '22 at 21:34

1 Answers1

0

Two major changes to you code: First, use a list instead of a dataframe (so you can accommodate unequal vector lengths):

x = runif(n = 50, min = 1, max = 12)
y = runif(n = 70, min = 5, max = 15)
z = runif(n = 35, min = 1, max = 10)
m = runif(n = 80, min = 6, max = 18)


vl = list(x=x,y=y,z=z,m=m)
vl
if (!require(EnvStats){ install.packages('EnvStats'); library(EnvStats)}
   
nproc = 4
# cont = 1   Not used
dfr = data.frame(variavel = character(nproc), 
                 locationevd= (nproc), scaleevd= (nproc),
                 stringsAsFactors = F)

Second: Use one loop index and not use "cont" index

for ( i in 1:length(vl) ) {
    # print(i)    Not needed
    nome.var=names(vl)  # probably should have been done before loop
    var = vl[[i]]
    variavela = nome.var[i]
    dfr$variavel[i] = variavela  # all those could have been one step
    
    evd = eevd( vl[[i]] )   # ;evd
    locationevd = evd$parameters[[1]]
    dfr$locationevd[i] = locationevd
    scaleevd = evd$parameters[[2]]
    dfr$scaleevd[i] = scaleevd
}

Which gets you the desired structure:

 dfr
  variavel locationevd scaleevd
1        x    5.469831 2.861025
2        y    7.931819 2.506236
3        z    3.519528 2.040744
4        m   10.591660 3.223352
IRTFM
  • 258,963
  • 21
  • 364
  • 487