0

I tried to reshape my data frame from wide to long format. At the moment the data frame looks like this:

structure(list(study_site = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 
5L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 4L, 4L, 1L, 2L, 3L, 1L, 4L, 1L, 
4L, 3L, 3L, 3L, 1L, 3L, 5L, 4L, 4L, 4L, 3L, 3L, 5L, 5L, 4L, 4L, 
4L, 1L, 4L, 3L, 5L, 5L, 5L, 1L, 3L, 5L, 3L, 3L, 3L, 5L, 5L, 3L, 
4L, 2L), .Label = c("N", "no_nest", "O", "S", "W"), class = "factor"), 
    coords.N = structure(c(54L, 54L, 40L, 40L, 40L, 40L, 39L, 
    67L, 67L, 55L, 55L, 64L, 64L, 64L, 78L, 81L, 47L, 80L, 83L, 
    60L, 46L, 46L, 76L, 88L, 88L, 88L, 84L, 84L, 30L, 58L, 58L, 
    58L, 25L, 25L, 19L, 19L, 42L, 42L, 42L, 29L, 45L, 90L, 91L, 
    91L, 91L, 91L, 89L, 89L, 87L, 87L, 87L, 56L, 56L, 61L, 35L, 
    36L), .Label = c("40.40463", "48.40168", "48.40178", "48.40215", 
    "48.40235", "48.40309", "48.40390", "48.40393", "48.40396", 
    "48.40405", "48.40410", "48.40411", "48.40415", "48.40416", 
    "48.40424", "48.40425", "48.40430", "48.40435", "48.40436 ", 
    "48.40438", "48.40443", "48.40450", "48.40451", "48.40454", 
    "48.40455", "48.40459", "48.40460", "48.40461", "48.40466", 
    "48.40466 ", "48.40467", "48.40469", "48.40471", "48.40477", 
    "48.40479 ", "48.40481", "48.40482", "48.40483", "48.40488 ", 
    "48.40491", "48.40493", "48.40504 ", "48.40508", "48.40513", 
    "48.40515", "48.40519 ", "48.40522 ", "48.40523", "48.40525", 
    "48.40526", "48.40529", "48.40532", "48.40537", "48.40537 ", 
    "48.40538 ", "48.40543 ", "48.40549", "48.40549 ", "48.40557", 
    "48.40557 ", "48.40558", "48.40565", "48.40571", "48.40575", 
    "48.40580", "48.40584", "48.40586 ", "48.40591", "48.40596", 
    "48.40598", "48.40599", "48.40611", "48.40612", "48.40617", 
    "48.40626", "48.40632 ", "48.40633", "48.40635 ", "48.40636", 
    "48.40637", "48.40638 ", "48.40639", "48.40639 ", "48.40641 ", 
    "48.40652", "48.40655", "48.40656 ", "48.40657 ", "48.40687 ", 
    "48.40690 ", "48.40703", "48.40718", "48.40719", "48.40726", 
    "48.40742", "48.40748", "NO_DATA"), class = "factor"), coords.E = structure(c(67L, 
    67L, 49L, 49L, 49L, 49L, 27L, 67L, 67L, 70L, 70L, 68L, 68L, 
    68L, 87L, 94L, 68L, 83L, 90L, 73L, 52L, 52L, 2L, 95L, 95L, 
    95L, 93L, 93L, 32L, 69L, 69L, 69L, 55L, 55L, 24L, 24L, 29L, 
    29L, 29L, 30L, 48L, 85L, 1L, 1L, 1L, 1L, 78L, 78L, 79L, 79L, 
    79L, 64L, 64L, 63L, 66L, 45L), .Label = c(" 015.82024", " 015.82164", 
    "015.80237", "015.80263", "015.80309", "015.80341", "015.80369", 
    "015.80388", "015.80394", "015.80399", "015.80406", "015.80435", 
    "015.80436", "015.80466", "015.80512", "015.80517", "015.80548", 
    "015.80551", "015.80572", "015.80583", "015.80609", "015.80636", 
    "015.80659", "015.80703", "015.80723", "015.80779", "015.80795", 
    "015.80803", "015.80821", "015.80843", "015.80871", "015.80875", 
    "015.80888", "015.80897", "015.80901", "015.80903", "015.80905", 
    "015.80906", "015.80908", "015.80909", "015.80921", "015.80923", 
    "015.80929", "015.80939", "015.80993", "015.81007", "015.81018", 
    "015.81087", "015.81113", "015.81132", "015.81151", "015.81180", 
    "015.81241", "015.81273", "015.81305", "015.81406", "015.81422", 
    "015.81522", "015.81526", "015.81543", "015.81546", "015.81564", 
    "015.81628", "015.81632", "015.81678", "015.81682", "015.81700", 
    "015.81703", "015.81735", "015.81739", "015.81770", "015.81783", 
    "015.81784", "015.81800", "015.81849", "015.81992", "015.82012", 
    "015.82029", "015.82039", "015.82083", "015.82099", "015.82126", 
    "015.82180", "015.82230", "015.82232", "015.82255", "015.82265", 
    "015.82290", "015.82303", "015.82304", "015.82346", "015.82362", 
    "015.82376", "015.82398", "015.82451", "015.82500", "015.82519", 
    "015.82555", "015.82579", "015.82634", "NO_DATA"), class = "factor"), 
    study_ID = c(120L, 120L, 1L, 1L, 1L, 1L, 9L, 39L, 39L, 109L, 
    109L, 110L, 110L, 110L, 45L, 58L, 121L, 96L, 97L, 40L, 43L, 
    43L, 47L, 57L, 57L, 57L, 114L, 114L, 67L, 71L, 71L, 71L, 
    83L, 83L, 4L, 4L, 10L, 10L, 10L, 106L, 108L, 46L, 115L, 115L, 
    115L, 115L, 116L, 116L, 117L, 117L, 117L, 70L, 70L, 119L, 
    95L, 3L), species = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 2L), .Label = c("barn swallow", "no_nest"), class = "factor"), 
    first_visit = c(1L, 2L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 5L, 0L, 
    1L, 0L, 2L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 
    1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 2L, 0L, 3L, 0L, 0L, 0L, 4L, 
    1L, 8L, 0L, 0L, 1L, 2L, 1L, 5L, 0L, 0L, 1L, 0L, 1L, 1L, 0L
    ), second_visit = c(1L, 2L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 5L, 
    0L, 1L, 0L, 2L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 
    0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 2L, 0L, 4L, 0L, 0L, 
    4L, 1L, 0L, 8L, 0L, 1L, 2L, 1L, 0L, 5L, 0L, 1L, 0L, 0L, 1L, 
    0L), third_visit = c(0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 
    0L, 5L, 0L, 1L, 2L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 
    0L, 0L, 0L, 1L, 1L, 0L, 0L, 2L, 0L, 1L, 0L, 2L, 0L, 0L, 6L, 
    1L, 4L, 1L, 0L, 0L, 8L, 1L, 2L, 1L, 0L, 0L, 5L, 1L, 1L, 0L, 
    0L, 0L), used_1st_visit = c(0L, 2L, 1L, 0L, 0L, 0L, 1L, 0L, 
    1L, 2L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 
    0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 3L, 0L, 
    0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 
    1L, 0L, 0L), used_2nd_visit = c(0L, 2L, 1L, 0L, 0L, 0L, 1L, 
    0L, 1L, 2L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 
    1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 2L, 0L, 
    4L, 0L, 0L, 0L, 1L, 0L, 5L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 
    0L, 0L, 0L, 0L), used_3rd_visit = c(0L, 0L, 1L, 0L, 0L, 0L, 
    1L, 0L, 1L, 0L, 4L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 
    1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 2L, 0L, 1L, 0L, 2L, 
    0L, 0L, 6L, 1L, 0L, 1L, 0L, 0L, 2L, 0L, 0L, 1L, 0L, 0L, 2L, 
    0L, 1L, 0L, 0L, 0L), nest_condition = structure(c(3L, 5L, 
    5L, 2L, 5L, 5L, 5L, 5L, 3L, 5L, 5L, 5L, 5L, 2L, 5L, 3L, 2L, 
    4L, 5L, 5L, 5L, 5L, 3L, 2L, 5L, 5L, 2L, 2L, 5L, 1L, 5L, 5L, 
    5L, 5L, 5L, 5L, 3L, 5L, 5L, 5L, 2L, 3L, 5L, 5L, 5L, 2L, 5L, 
    3L, 5L, 5L, 5L, 2L, 5L, 3L, 5L, 4L), .Label = c(" ready ", 
    "damaged", "in_progress", "no_nest", "ready"), class = "factor"), 
    nesting_site = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 3L, 1L, 
    1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L), .Label = c("inside", "no_nest", "outside"), class = "factor"), 
    distance = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 3L, 4L, 3L, 6L, 4L, 4L, 2L, 2L, 4L, 2L, 
    2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    3L, 2L, 4L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    4L, 6L), .Label = c("1", "2", "3", "4", "no_data", "no_nest"
    ), class = "factor"), material = structure(c(5L, 5L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 5L, 5L, 1L, 1L, 2L), .Label = c("fine", "fine plaster", 
    "medium fine plaster", "no_data", "rough", "rough plaster", 
    "smooth plaster", "under construction", "wood"), class = "factor"), 
    housetype = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 4L, 
    3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 
    3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 5L, 5L, 5L, 
    4L, 4L, 5L, 3L, 3L, 3L, 3L, 5L, 5L, 3L, 3L, 3L, 4L, 4L, 5L, 
    5L, 4L), .Label = c("auto repair shop", "barn ", "hall", 
    "residence", "stable"), class = "factor"), usage_house = structure(c(5L, 
    5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
    5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
    5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
    5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L), .Label = c("auto_repair", 
    "barn", "inhabited", "under construction", "used"), class = "factor"), 
    age = c(1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L)), row.names = c(1L, 
2L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 
19L, 21L, 22L, 23L, 24L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 
35L, 36L, 37L, 38L, 39L, 40L, 41L, 89L, 90L, 91L, 92L, 93L, 94L, 
95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L, 104L, 105L, 
106L, 107L, 108L, 109L, 111L), class = "data.frame")

used_1st..2nd... means that the birds have used this certain number of nests at the first, second,... control.

I would like to have that each row in my data frame to always represent a used/ unused nest as well as no_nest:

     ID species `1st_visit` `2nd_visit` `3rd_visit` used_1st_visit used_2nd_visit used_3rd_visit
  <dbl> <chr>         <dbl>       <dbl>       <dbl>          <dbl>          <dbl>          <dbl>
1   120 barn s~           1           1           0              0              0              0
2   120 barn s~           1           1           0              1              1              0
3   120 barn s~           1           1           0              1              1              0
4    39 barn s~           1           1           1              1              1              1
5     8 barn s~           1           1           1              1              0              0
6     8 barn s~           1           1           1              0              0              0

Unfortunately I have no idea how to concatenate the columns to get the final data frame. Does anybody has an idea?

Valabe
  • 71
  • 1
  • 1
  • 4

1 Answers1

1

I'm not completely sure what you are asking for, but this is what I understood: In the long data frame...

  • if all visits (coulmns used_first_visit, used_sec_visit etc.) are 0, combine them to one row, marking it 0
  • if any visits are not 0, keep as many rows as there are non-zero visits and mark them with 1

This is my dplyr-solution (it's not very pretty, but it works):

# create data
dat <- data.frame("visits" = c("first", "first", "second", "second", "third", "third"), "study_id" = rep(120, 6), "used_first_visit" = c(0, 2, 0, 2, 0, 2), "used_sec_visit" = c(0, 2, 0, 2, 0, 2), "used_thrd_visit" = rep(0, 6), "nest_cond" = c("damaged", "ready", "damaged", "ready", "damaged", "ready"))

# make long data frame and filter values
dat_long <- dat %>%
  pivot_longer(c(3:5),names_to = "whatever", values_to = "used")  %>% # make long data frame
  select(-c(whatever)) %>% # get rid of name column
  group_by(visits, nest_cond) %>% # group data
  mutate(used = ifelse(all(used == 0) & row_number() == 1, 10, used)) %>% # if the whole group is 0, mark one row for later filtering
  filter(used > 0 ) %>% # filter
  mutate(used = ifelse(used == 10, 0, 1)) # change to correct numbers

Let me know if this is not what you are looking for!

Em Laskey
  • 508
  • 4
  • 15
  • For future questions: The propability of getting an answer to your question is much higher if you provide a bit of your data, eg. with ````dput()````. Pictures are not very useful and a proper mini-data set just makes things a lot easier! – Em Laskey Aug 10 '20 at 14:27
  • thanks for the advice. I had no idea how to do this. I hope no its a bit better. – Valabe Aug 12 '20 at 06:40
  • Thanks for the effort, but it's still not possible to directly enter the data in R. Try ````dput(your_data)````and copy the resulting output directly into your question. See: https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example – Em Laskey Aug 12 '20 at 07:06
  • Also I'm not sure anymore whether my solution is what you are looking for - is my code working? If you're happy with it, you can also accept the answer by checking the tick next to it. – Em Laskey Aug 12 '20 at 07:07
  • Yes, your code is working, thanks. I am trying to apply it on my entire data frame right now. – Valabe Aug 12 '20 at 07:31