0

i have a data set like that :

# install.packages(dplyr)
library(dplyr)
df <- tibble(period = c(201501,201502,201503,201504,201505,201506,201507,201508,201509,201510,201511,201512,201513),
             sales = sample(1:100,13),
             P1 = c(1,0,0,0,0,0,0,0,0,0,0,0,0),
             P10 = c(0,0,0,0,0,0,0,0,0,1,0,0,0),
             P11 = c(0,0,0,0,0,0,0,0,0,0,1,0,0),
             P12 = c(0,0,0,0,0,0,0,0,0,0,0,1,0),
             P13 = c(0,0,0,0,0,0,0,0,0,0,0,0,1),
             P2 = c(0,1,0,0,0,0,0,0,0,0,0,0,0),
             P3 = c(0,0,1,0,0,0,0,0,0,0,0,0,0),
             P4 = c(0,0,0,1,0,0,0,0,0,0,0,0,0),
             P5 = c(0,0,0,0,1,0,0,0,0,0,0,0,0),
             P6 = c(0,0,0,0,0,1,0,0,0,0,0,0,0),
             P7 = c(0,0,0,0,0,0,1,0,0,0,0,0,0),
             P8 = c(0,0,0,0,0,0,0,1,0,0,0,0,0),
             P9 = c(0,0,0,0,0,0,0,0,1,0,0,0,0),
             )
print(df)

so i have this :

# A tibble: 13 x 15
period sales    P1   P10   P11   P12   P13    P2    P3    P4    P5    P6
<dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
  1 201501    92     1     0     0     0     0     0     0     0     0     0
2 201502    60     0     0     0     0     0     1     0     0     0     0
3 201503    31     0     0     0     0     0     0     1     0     0     0
4 201504    74     0     0     0     0     0     0     0     1     0     0
5 201505    82     0     0     0     0     0     0     0     0     1     0
6 201506    86     0     0     0     0     0     0     0     0     0     1
7 201507    19     0     0     0     0     0     0     0     0     0     0
8 201508    32     0     0     0     0     0     0     0     0     0     0
9 201509    99     0     0     0     0     0     0     0     0     0     0
10 201510    47     0     1     0     0     0     0     0     0     0     0
11 201511    21     0     0     1     0     0     0     0     0     0     0
12 201512    77     0     0     0     1     0     0     0     0     0     0
13 201513    25     0     0     0     0     1     0     0     0     0     0
# ... with 3 more variables: P7 <dbl>, P8 <dbl>, P9 <dbl>

is there an automated way to get the same tibble but with the P+number columns in the right order : P1,P2,P3,P4 etc etc...

cutifire
  • 5
  • 4
  • 1
    Does this answer your question? [dply: order columns alphabetically in R](https://stackoverflow.com/questions/29873293/dply-order-columns-alphabetically-in-r) – englealuze May 28 '20 at 07:57

2 Answers2

2

We can use mixedsort/mixedorder from gtools :

library(dplyr)
df %>% select(period, sales, gtools::mixedsort(names(.)))


#  period sales    P1    P2    P3    P4    P5    P6    P7    P8    P9   P10   P11   P12   P13
#    <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 201501    34     1     0     0     0     0     0     0     0     0     0     0     0     0
# 2 201502    22     0     1     0     0     0     0     0     0     0     0     0     0     0
# 3 201503    17     0     0     1     0     0     0     0     0     0     0     0     0     0
# 4 201504    91     0     0     0     1     0     0     0     0     0     0     0     0     0
# 5 201505    27     0     0     0     0     1     0     0     0     0     0     0     0     0
# 6 201506    58     0     0     0     0     0     1     0     0     0     0     0     0     0
# 7 201507    57     0     0     0     0     0     0     1     0     0     0     0     0     0
# 8 201508     2     0     0     0     0     0     0     0     1     0     0     0     0     0
# 9 201509    24     0     0     0     0     0     0     0     0     1     0     0     0     0
#10 201510    92     0     0     0     0     0     0     0     0     0     1     0     0     0
#11 201511    21     0     0     0     0     0     0     0     0     0     0     1     0     0
#12 201512    59     0     0     0     0     0     0     0     0     0     0     0     1     0
#13 201513     4     0     0     0     0     0     0     0     0     0     0     0     0     1
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
1

Here is a base R solution, which relies that you have an additional two columns at the beginning,

i1 <- order(as.numeric(gsub('\\D+', '', names(df[-c(1:2)]))))
df <- df[c(1, 2, i1 + 2)]

Investigating the names now, we get

names(df)
#[1] "period" "sales"  "P1"     "P2"     "P3"     "P4"     "P5"     "P6"     "P7"     "P8"     "P9"     "P10"    "P11"    "P12"    "P13"  
Sotos
  • 51,121
  • 6
  • 32
  • 66