I have a list which looks like,
lapply(sample_list, head, 3)
$`2016-04-24 00:00:00.tcp`
ports freq
8 443 296
12 80 170
5 23 92
$`2016-04-24 00:00:00.udp`
ports freq
4 161 138
7 53 45
1 123 28
$`2016-04-24 01:00:00.tcp`
ports freq
13 443 342
20 80 215
10 25 60
$`2016-04-24 01:00:00.udp`
ports freq
4 161 85
8 53 42
12 902 27
I want to merge the data frames that come from the same protocol (i.e. the tcp
together and udp
together)
So the final result would be a new list with 2 data frames; One for tcp
and one for udp
such that,
lapply(final_list, head, 3)
$tcp
ports freq.00:00:00 freq.01:00:00
1 443 296 342
2 80 170 215
3 23 92 51
$udp
ports freq.00:00:00 freq.01:00:00
1 161 138 85
2 53 45 42
3 123 28 19
DATA
dput(sample_list)
structure(list(`2016-04-24 00:00:00.tcp` = structure(list(ports = c("443",
"80", "23", "21", "22", "25", "445", "110", "389", "135", "465",
"514", "91", "995", "84", "902"), freq = structure(c(296L, 170L,
92L, 18L, 16L, 15L, 14L, 4L, 3L, 2L, 2L, 2L, 2L, 2L, 1L, 1L), .Dim = 16L)), .Names = c("ports",
"freq"), row.names = c(8L, 12L, 5L, 3L, 4L, 6L, 9L, 1L, 7L, 2L,
10L, 11L, 15L, 16L, 13L, 14L), class = "data.frame"), `2016-04-24 00:00:00.udp` = structure(list(
ports = c("161", "53", "123", "902", "137", "514", "138",
"623", "69", "88", "500"), freq = structure(c(138L, 45L,
28L, 26L, 24L, 24L, 6L, 6L, 5L, 4L, 1L), .Dim = 11L)), .Names = c("ports",
"freq"), row.names = c(4L, 7L, 1L, 11L, 2L, 6L, 3L, 8L, 9L, 10L,
5L), class = "data.frame"), `2016-04-24 01:00:00.tcp` = structure(list(
ports = c("443", "80", "25", "23", "88", "21", "161", "22",
"445", "135", "389", "993", "548", "110", "143", "502", "514",
"81", "995", "102", "111", "311", "444", "789", "902", "91"
), freq = structure(c(342L, 215L, 60L, 51L, 42L, 32L, 31L,
18L, 18L, 6L, 5L, 4L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Dim = 26L)), .Names = c("ports", "freq"
), row.names = c(13L, 20L, 10L, 9L, 22L, 7L, 6L, 8L, 15L, 4L,
12L, 25L, 18L, 2L, 5L, 16L, 17L, 21L, 26L, 1L, 3L, 11L, 14L,
19L, 23L, 24L), class = "data.frame"), `2016-04-24 01:00:00.udp` = structure(list(
ports = c("161", "53", "902", "514", "123", "137", "69",
"138", "389", "443", "88", "623"), freq = structure(c(85L,
42L, 27L, 24L, 19L, 15L, 15L, 4L, 2L, 2L, 2L, 1L), .Dim = 12L)), .Names = c("ports",
"freq"), row.names = c(4L, 8L, 12L, 7L, 1L, 2L, 10L, 3L, 5L,
6L, 11L, 9L), class = "data.frame")), .Names = c("2016-04-24 00:00:00.tcp",
"2016-04-24 00:00:00.udp", "2016-04-24 01:00:00.tcp", "2016-04-24 01:00:00.udp"
))
Bonus question: What is the structure of freq
? I never saw int [1:16(1d)]
before.
str(sample_list$`2016-04-24 00:00:00.tcp`)
'data.frame': 16 obs. of 2 variables:
$ ports: chr "443" "80" "23" "21" ...
$ freq : int [1:16(1d)] 296 170 92 18 16 15 14 4 3 2 ...
The code I used to create the list (In this case called try1
)
protocol_list <- lapply(per_hour1, function(i) split(i, i$protocol))
Analytic_Protocol_List <- lapply(protocol_list, function(i) lapply(i, dest.ports))
try1 <- lapply(unlist(Analytic_Protocol_List, recursive = FALSE), `[[`, 1)
Note that solutions from similar questions do not work for this case. Maybe because of the structure?