2

In the last question I did they pointed out that less data would be easy to read and understand as part of the reproducible example. On the way to asking again I tried to shorten the data via dput(head(data)) but I get the same as if I do dput(data) or dput(data[1:6, ]) or even dput(data)[1:6, ] (in this last case I get also the 6 first rows of the data after the whole dput)

Is there a simple way to do it? At the dput options I didn't find anything and there must be a solution to avoid deleting by hand what I do not want to show.

Here is the whole dput data:

>dput(data)
structure(list(GOterm = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 
34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 
47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 
60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 71L, 72L, 76L, 77L, 78L, 
83L, 87L, 88L, 89L, 93L, 96L, 97L, 101L, 103L, 104L, 105L, 106L, 
109L, 111L, 113L, 114L, 116L), .Label = c("GO:0000746", "GO:0000910", 
"GO:0006091", "GO:0006259", "GO:0006351", "GO:0006399", "GO:0006412", 
"GO:0006457", "GO:0006464", "GO:0006468", "GO:0006486", "GO:0006520", 
"GO:0006725", "GO:0006766", "GO:0006810", "GO:0006811", "GO:0006839", 
"GO:0006897", "GO:0006950", "GO:0006970", "GO:0006974", "GO:0006979", 
"GO:0006986", "GO:0006997", "GO:0007005", "GO:0007010", "GO:0007029", 
"GO:0007031", "GO:0007033", "GO:0007034", "GO:0007049", "GO:0007059", 
"GO:0007114", "GO:0007124", "GO:0007126", "GO:0007165", "GO:0009408", 
"GO:0009409", "GO:0015031", "GO:0016044", "GO:0016050", "GO:0016070", 
"GO:0016071", "GO:0016072", "GO:0016192", "GO:0016567", "GO:0016568", 
"GO:0016570", "GO:0019725", "GO:0030435", "GO:0031505", "GO:0032196", 
"GO:0032989", "GO:0042221", "GO:0042254", "GO:0042594", "GO:0043543", 
"GO:0044255", "GO:0044257", "GO:0044262", "GO:0045333", "GO:0046483", 
"GO:0048193", "GO:0051169", "GO:0051186", "GO:0051276", "GO:0070271", 
"GO:0000278", "GO:0000902", "GO:0002181", "GO:0005975", "GO:0006325", 
"GO:0006353", "GO:0006360", "GO:0006366", "GO:0006383", "GO:0006397", 
"GO:0006401", "GO:0006414", "GO:0006418", "GO:0006470", "GO:0006605", 
"GO:0006629", "GO:0006865", "GO:0006869", "GO:0006873", "GO:0006887", 
"GO:0006914", "GO:0008033", "GO:0008213", "GO:0008643", "GO:0009311", 
"GO:0009451", "GO:0015931", "GO:0016197", "GO:0023052", "GO:0031399", 
"GO:0032543", "GO:0042255", "GO:0042273", "GO:0042274", "GO:0043144", 
"GO:0043934", "GO:0045454", "GO:0051052", "GO:0051321", "GO:0051603", 
"GO:0051604", "GO:0051726", "GO:0055086", "GO:0070647", "GO:0000054", 
"GO:0001403", "GO:0006352", "GO:0006354", "GO:0006364", "GO:0006413", 
"GO:0006417", "GO:0006497", "GO:0008380", "GO:0009072", "GO:0051049", 
"GO:0061025", "GO:0071554"), class = "factor"), GOdesc = structure(c(16L, 
17L, 23L, 19L, 58L, 62L, 59L, 37L, 39L, 40L, 38L, 3L, 4L, 67L, 
60L, 27L, 30L, 20L, 51L, 48L, 46L, 49L, 52L, 33L, 29L, 18L, 21L, 
34L, 64L, 63L, 2L, 14L, 1L, 43L, 28L, 56L, 47L, 45L, 41L, 9L, 
65L, 54L, 31L, 55L, 66L, 42L, 12L, 26L, 7L, 57L, 22L, 61L, 6L, 
44L, 53L, 50L, 35L, 8L, 10L, 5L, 11L, 25L, 24L, 32L, 15L, 13L, 
36L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA), .Label = c("cell budding", "cell cycle", 
"cellular amino acid and metabolic process", "cellular aromatic compound metabolic process", 
"cellular carbohydrate metabolic process", "cellular component morphogenesis", 
"cellular homeostasis", "cellular lipid metabolic process", "cellular membrane organization", 
"cellular protein catabolic process", "cellular respiration", 
"chromatin modification", "chromosome organization and biogenesis", 
"chromosome segregation", "cofactor metabolic process", "conjugation", 
"cytokinesis", "cytoskeleton organization and biogenesis", "DNA metabolic process", 
"endocytosis", "ER organization and biogenesis", "fungal-type cell wall organization", 
"generation of precursor metabolites and energy", "golgi vesicle transport", 
"heterocycle metabolic process", "histone modification", "ion transport", 
"meiosis", "mitchondrion organization", "mitochondrial transport", 
"mRNA metabolic process", "nuclear transport", "nucleus organization", 
"peroxisome organization", "protein acylation", "protein complex biogenesis", 
"protein folding", "protein glycosylation", "protein modification process", 
"protein phosphorylation", "protein transport", "protein ubiquitination", 
"pseudohyphal growth", "response to chemical stimulus", "response to cold", 
"response to DNA damage stimulus", "response to heat", "response to osmotic stress", 
"response to oxidative stress", "response to starvation", "response to stress", 
"response to unfolded protein", "ribosome biogenesis", "RNA metabolic process", 
"rRNA metabolic process", "signal transduction", "sporulation resulting in formation of a cellular spore", 
"transcription", "translation", "transport", "transposition", 
"tRNA metabolic process", "vacuolar transport", "vacuole organizations", 
"vesicle organization", "vesicle-mediated transport", "vitamin metabolic process"
), class = "factor"), GSA_p33_SC = c(NA, -1, NA, NA, NA, NA, 
NA, 1, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, -1, NA, NA, 
-1, -1, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA), GSA_p33_X33 = c(NA, NA, -1, NA, NA, NA, NA, NA, 
NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, 1, NA, NA, NA, NA, NA, NA, 1, 1, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, 
NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, 
NA), GSA_p38_SC = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
1, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, 
NA, NA, NA, -1, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA), GSA_p38_X33 = c(NA, 
1, NA, NA, NA, NA, NA, 1, NA, NA, 1, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, 1, 
1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, -1, NA, NA, 1, NA, NA), GSA_p52_SC = c(NA, NA, NA, NA, 
NA, NA, NA, 1, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, 
-1, -1, NA, NA, NA), GSA_p52_X33 = c(NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, 
NA, -1, NA, 1, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, -1, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, -1, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, -1, NA, 
NA, NA, NA), GSA_p64_SC = c(NA, NA, NA, NA, NA, NA, NA, 1, NA, 
NA, 1, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, 
1, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, -1, NA, -1, -1, 
NA, NA, NA, -1, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, -1, 1, 
-1, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA
), GSA_p64_X33 = c(1, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, 1, NA, NA, 
NA, NA, NA, NA, -1, 1, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, 
NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, -1, -1), GSA_SC_X33 = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, 
NA, NA, NA, NA, NA, NA, -1, NA, 1, NA, NA, NA, NA, NA, NA, 1, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, 
NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, 
1, NA, NA, 1, -1, NA, -1, NA, NA, NA, -1, 1, NA, NA, NA, NA, 
NA, -1, NA, NA, NA, NA, NA, NA)), .Names = c("GOterm", "GOdesc", 
"GSA_p33_SC", "GSA_p33_X33", "GSA_p38_SC", "GSA_p38_X33", "GSA_p52_SC", 
"GSA_p52_X33", "GSA_p64_SC", "GSA_p64_X33", "GSA_SC_X33"), row.names = c(NA, 
-89L), class = "data.frame")

A shortened version could be like:

structure(list(GOterm = structure(c(1L, 2L, 3L, 4L, 5L, 6L),
.Label = c("GO:0000746", "GO:0000910", "GO:0006091", "GO:0006259",
 "GO:0006351", "GO:0006399"), class = "factor"),
 GOdesc = structure(c(16L,17L, 23L, 19L, 58L, 62L),
.Label = c("cell budding", "cell cycle", 
    "cellular amino acid and metabolic process", "cellular aromatic compound
 metabolic process", "cellular carbohydrate metabolic process", "cellular
component morphogenesis"), class = "factor"),
GSA_p33_SC = c(NA, -1, NA, NA, NA, NA),
GSA_p33_X33 = c(NA, NA, -1, NA, NA, NA), 
GSA_p38_SC = c(NA, NA, NA, NA, NA, NA), 
GSA_p38_X33 = c(NA, 1, NA, NA, NA, NA), 
GSA_p52_SC = c(NA, NA, NA, NA, NA, NA), 
GSA_p52_X33 = c(NA, NA, NA, NA, NA, NA),
GSA_p64_SC = c(NA, NA, NA, NA, NA, NA),
GSA_p64_X33 = c(1, NA, NA, NA, NA, NA),
GSA_SC_X33 = c(NA, NA, NA, NA, NA, NA)),
.Names = c("GOterm", "GOdesc", 
    "GSA_p33_SC", "GSA_p33_X33", "GSA_p38_SC", "GSA_p38_X33", "GSA_p52_SC", 
    "GSA_p52_X33", "GSA_p64_SC", "GSA_p64_X33", "GSA_SC_X33"), row.names = c(NA, 
    -6L), class = "data.frame"))
llrs
  • 3,308
  • 35
  • 68

2 Answers2

3

All of that extra funk is from your factor levels. If you know your problem will still be reproducible after dropping these levels, then you can consider (wait for it) droplevels:

> dput(droplevels(head(data)))
structure(list(GOterm = structure(1:6, .Label = c("GO:0000746", 
"GO:0000910", "GO:0006091", "GO:0006259", "GO:0006351", "GO:0006399"
), class = "factor"), GOdesc = structure(c(1L, 2L, 4L, 3L, 5L, 
6L), .Label = c("conjugation", "cytokinesis", "DNA metabolic process", 
"generation of precursor metabolites and energy", "transcription", 
"tRNA metabolic process"), class = "factor"), GSA_p33_SC = c(NA, 
-1, NA, NA, NA, NA), GSA_p33_X33 = c(NA, NA, -1, NA, NA, NA), 
    GSA_p38_SC = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_), GSA_p38_X33 = c(NA, 1, NA, NA, NA, NA), GSA_p52_SC = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), GSA_p52_X33 = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), GSA_p64_SC = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), GSA_p64_X33 = c(1, 
    NA, NA, NA, NA, NA), GSA_SC_X33 = c(NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_)), .Names = c("GOterm", "GOdesc", 
"GSA_p33_SC", "GSA_p33_X33", "GSA_p38_SC", "GSA_p38_X33", "GSA_p52_SC", 
"GSA_p52_X33", "GSA_p64_SC", "GSA_p64_X33", "GSA_SC_X33"), row.names = c(NA, 
6L), class = "data.frame") 

This is more easily demonstrated in the following example:

x <- factor("A", levels = LETTERS)
x
# [1] A
# Levels: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
dput(x)
# structure(1L, .Label = c("A", "B", "C", "D", "E", "F", "G", "H", 
# "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", 
# "V", "W", "X", "Y", "Z"), class = "factor")
dput(droplevels(x))
# structure(1L, .Label = "A", class = "factor")
A5C1D2H2I1M1N2O1R2T1
  • 190,393
  • 28
  • 405
  • 485
  • So the length of the `dput` is due to the levels? I mean, this works for me, but there is a more general solution? – llrs Nov 04 '13 at 12:17
  • @Llopis, see my edit, but in summary, yes, the length of your `dput` was so big because of the `levels` in your `factor` variables. An alternative is to create a small example that doesn't necessarily use your original data, but still reproduces the problem you're trying to solve. Working on such sample data also sometimes helps you solve your problem on your own, because you have to think about where the problem might lie when creating your sample data. – A5C1D2H2I1M1N2O1R2T1 Nov 04 '13 at 12:24
0

Another way to shorten it up would be to convert the columns to character before dput. The data can then be read back in with as.data.frame and factor levels are preserved.

First subset

> data2 <- data[sample(nrow(data), 4), ]

Then dput as characters

> d <- dput(lapply(data2, as.character))
structure(list(GOterm = c("GO:0000746", "GO:0070647", "GO:0006914", 
"GO:0007010"), GOdesc = c("conjugation", NA, NA, "cytoskeleton organization and biogenesis"
), GSA_p33_SC = c(NA_character_, NA_character_, NA_character_, 
NA_character_), GSA_p33_X33 = c(NA, NA, "1", "1"), GSA_p38_SC = c(NA_character_, 
NA_character_, NA_character_, NA_character_), GSA_p38_X33 = c(NA_character_, 
NA_character_, NA_character_, NA_character_), GSA_p52_SC = c(NA, 
"-1", NA, NA), GSA_p52_X33 = c(NA, NA, NA, "1"), GSA_p64_SC = c(NA, 
NA, NA, "1"), GSA_p64_X33 = c("1", NA, NA, NA), GSA_SC_X33 = c(NA, 
NA, NA, "1")), .Names = c("GOterm", "GOdesc", "GSA_p33_SC", "GSA_p33_X33", 
"GSA_p38_SC", "GSA_p38_X33", "GSA_p52_SC", "GSA_p52_X33", "GSA_p64_SC", 
"GSA_p64_X33", "GSA_SC_X33"))

And read back in

> as.data.frame(d)
Rich Scriven
  • 97,041
  • 11
  • 181
  • 245