0

I need to calculate the kurtosis and skewness using the tapply function for every consecutive independent bins/window in the given dataset.

data <- data.frame(Time = c("09:01:01", "09:01:02", "09:01:03", "09:01:04", "09:01:05", "09:01:06", "09:01:07", "09:01:08",
                      "09:01:09", "09:01:10", "09:01:11", "09:01:12", "09:01:13", "09:01:14", "09:01:15", "09:01:16",
                      "09:01:17", "09:01:18", "09:01:19", "09:01:20"),
             variable = c(36, 1, 46, 37, 29, 38, 11, 56, 45, 28, 6, 9, 51, 27, 38, 43, 16, 19, 33, 44))

data

  Time        variable
  09:01:01     36 
  09:01:02     1
  09:01:03     46
  09:01:04     37 
  09:01:05     29
  09:01:06     38
  09:01:07     11
  09:01:08     56
  09:01:09     45
  09:01:10     28
  09:01:11     6
  09:01:12     9
  09:01:13     51
  09:01:14     27
  09:01:15     38
  09:01:16     43
  09:01:17     16
  09:01:18     19
  09:01:19     33
  09:01:20     44

so i used the following code for calculating the skewness and kurtosis

x <- data$variable
a <- tapply(x, head(rep(seq(ceiling(length(x)/5)), each=5),length(x)), kurtosis)
b <- tapply(x, head(rep(seq(ceiling(length(x)/5)), each=5),length(x)), skewness)

Error in tapply(x, head(rep(seq(ceiling(length(x)/5)), each = 5), length(x)),  : object 'kurtosis' not found

the expected result should be as follows: for mean

Time        variable
  09:01:01     29.8 
  09:01:06     35.6
  09:01:11     26.2
  09:01:16     31

the expected result for skewness should be as follows

Time        variable
  09:01:01     -1.55899
  09:01:06     -0.49703
  09:01:11     0.213318
  09:01:16     -0.21706

thanks in advance

jogo
  • 12,469
  • 11
  • 37
  • 42
Kumar
  • 169
  • 1
  • 16
  • From wich package the functions `kurtosis()` and `skewness()` should be? Did you forget the line `library("...")`? – jogo Sep 25 '18 at 07:32
  • 1
    Yes, I forgot to use the library.......however the results are not matching when compared with the results obtained from excel – Kumar Sep 25 '18 at 11:14
  • 1
    i used the library(e1071) and used this syntax - "tapply(x, head(rep(seq(ceiling(length(x)/5)), each=5),length(x)), kurtosis, type = 2)" and i got the error as " Error in FUN(X[[i]], ...) : unused argument (type = 2)" – Kumar Sep 25 '18 at 12:10
  • It seems that also `data$variable` is a charcter or a factor in your example! Please explore the result of `str(data)`. ... or use the dataframe definition inserted by another SO-user. Please read https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example – jogo Sep 25 '18 at 12:55

1 Answers1

0

So it works:

data <- data.frame(Time = c("09:01:01", "09:01:02", "09:01:03", "09:01:04", "09:01:05", "09:01:06", "09:01:07", "09:01:08",
      "09:01:09", "09:01:10", "09:01:11", "09:01:12", "09:01:13", "09:01:14", "09:01:15", "09:01:16",
      "09:01:17", "09:01:18", "09:01:19", "09:01:20"),
   variable = c(36, 1, 46, 37, 29, 38, 11, 56, 45, 28, 6, 9, 51, 27, 38, 43, 16, 19, 33, 44), 
   stringsAsFactors=FALSE
)
library("e1071")
data$group <- gl(n=nrow(data)%/%5, k=5, length=nrow(data))

data.frame(Time=tapply(data$Time, data$group, `[`, 1),
           m =tapply(data$variable, data$group, mean), 
           s1=tapply(data$variable, data$group, FUN=skewness), 
           s2=tapply(data$variable, data$group, FUN=skewness, type=2),
           k1=tapply(data$variable, data$group, FUN=kurtosis),
           k2=tapply(data$variable, data$group, FUN=kurtosis, type=2)
)
#       Time    m         s1         s2        k1           k2
# 1 09:01:01 29.8 -0.7483143 -1.5589882 -1.259323  2.879232018
# 2 09:01:06 35.6 -0.2385734 -0.4970280 -1.719941  0.000368498
# 3 09:01:11 26.2  0.1023926  0.2133180 -2.020074 -1.875465251
# 4 09:01:16 31.0 -0.1041885 -0.2170593 -2.179430 -2.871439621

or

with(data, data.frame(Time=tapply(Time, group, `[`, 1),
           m =tapply(variable, group, mean), 
           s1=tapply(variable, group, FUN=skewness), 
           s2=tapply(variable, group, FUN=skewness, type=2),
           k1=tapply(variable, group, FUN=kurtosis),
           k2=tapply(variable, group, FUN=kurtosis, type=2)
))
jogo
  • 12,469
  • 11
  • 37
  • 42
  • the given solution is not working for calculating s2 and k2. in addition, the time variable is not coming as given in the solution – Kumar Sep 25 '18 at 12:44
  • The answer is working as it is and is giving the desired values of skewness in `$s2` as shown. Please pay attention to factors, e.g. `data.frame(..., stringsAsFactors=FALSE)`. Eventually you have to coerce your data: `data$Time <- as.character(data$Time)` – jogo Sep 25 '18 at 12:47