0

I have this dataset:

1 "MX000017004195504TMAX  310  I  310  I  310  I  320  I  330  I  320  I  320  I  330  I  330  I  330  I  330~
 2 "MX000017004195504TMIN  150  I  150  I  160  I  150  I  160  I  160  I  160  I  160  I  160  I  170  I  170~
 3 "MX000017004195504PRCP    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0~
 4 "MX000017004195505TMAX  310  I  310  I  310  I  300  I  300  I  300  I  310  I  310  I  310  I  300  I  300~
 5 "MX000017004195505TMIN  200  I  160  I  160  I  150  I  150  I  150  I  160  I  160  I  170  I  170  I  170~
 6 "MX000017004195505PRCP    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0~
 7 "MX000017004195506TMAX  300  I  290  I  280  I  270  I  280  I  260  I  230  I  270  I  280  I  280  I  270~
 8 "MX000017004195506TMIN  160  I  160  I  150  I  140  I  160  I  150  I  150  I  150  I  160  I  150  I  160~
 9 "MX000017004195506PRCP  103  I  435  I  422  I    0  I  243  I   17  I    0  I    0  I    0  I   27  I    0~
10 "MX000017004195507TMAX  270  I  270  I  260  I  260  I  240  I  240  I  190  I  210  I  250  I  250  I  260~
# ... with 1,704 more rows

I need to make every string or digit after the "I" letter in a column.

I tried:

# To separate age to 2 columns 
weather <- weather %>%
     separate(weather$V1, into= c("Date", "values"), sep = "I")

But it did not work.

Martin Gal
  • 16,640
  • 5
  • 21
  • 39
Moh
  • 1
  • 1

3 Answers3

0

You could try this:

library(dplyr)
library(tidyr)
library(stringr)

weather %>% 
  transmute(Date = str_extract(V1, "^MX[\\d|\\w]+"),
            values = str_split(str_remove(V1, "^MX[\\d|\\w]+"), "I")) %>% 
  unnest(values) %>% 
  mutate(values = str_trim(values))

which returns

# A tibble: 110 x 2
   Date                  values
   <chr>                 <chr> 
 1 MX000017004195504TMAX 310   
 2 MX000017004195504TMAX 310   
 3 MX000017004195504TMAX 310   
 4 MX000017004195504TMAX 320   
 5 MX000017004195504TMAX 330   
 6 MX000017004195504TMAX 320   
 7 MX000017004195504TMAX 320   
 8 MX000017004195504TMAX 330   
 9 MX000017004195504TMAX 330   
10 MX000017004195504TMAX 330   
# ... with 100 more rows

based on

Data

structure(list(V1 = c("MX000017004195504TMAX  310  I  310  I  310  I  320  I  330  I  320  I  320  I  330  I  330  I  330  I  330", 
"MX000017004195504TMIN  150  I  150  I  160  I  150  I  160  I  160  I  160  I  160  I  160  I  170  I  170", 
"MX000017004195504PRCP    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0", 
"MX000017004195505TMAX  310  I  310  I  310  I  300  I  300  I  300  I  310  I  310  I  310  I  300  I  300", 
"MX000017004195505TMIN  200  I  160  I  160  I  150  I  150  I  150  I  160  I  160  I  170  I  170  I  170", 
"MX000017004195505PRCP    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0  I    0", 
"MX000017004195506TMAX  300  I  290  I  280  I  270  I  280  I  260  I  230  I  270  I  280  I  280  I  270", 
"MX000017004195506TMIN  160  I  160  I  150  I  140  I  160  I  150  I  150  I  150  I  160  I  150  I  160", 
"MX000017004195506PRCP  103  I  435  I  422  I    0  I  243  I   17  I    0  I    0  I    0  I   27  I    0", 
"MX000017004195507TMAX  270  I  270  I  260  I  260  I  240  I  240  I  190  I  210  I  250  I  250  I  260"
)), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"
))
Martin Gal
  • 16,640
  • 5
  • 21
  • 39
  • Hi, thanks its a good try, But notice that you ignored the other columns, I want to deal with I letter as seperator . – Moh Jul 28 '21 at 06:30
  • Orginal Data is from Text File: – Moh Jul 28 '21 at 06:39
  • I actually used all rows you showed in your question and I used "I" as seperator in `str_split()`. – Martin Gal Jul 28 '21 at 07:41
  • Thats Good, But tray copy those rows in text file then Deal with them as needed. ... Thank alot. – Moh Jul 28 '21 at 10:25
  • I don't know what you are asking for. I used the data you showed in your question. Copying those into a text file doesn't make any sense for me, especially in context of your question. You could edit your question and show your actual data there. Please take a look at [How to make a great reproducible example](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example). – Martin Gal Jul 28 '21 at 10:49
  • Thanks My dear i found The Soulution, It locks Like This: 'code' # Tidying the weather data weather <- read.delim("D:/Projects/Tidy_data/weather.txt",header = F, sep = "", quote = "\"'" ,stringsAsFactors = FALSE , skipNul = FALSE) weather <- as.tibble(weather) weather # Making List of Column with "I" To Be Removed By Sequence se = seq(3,63,2) se # Loop for Deleting "I" Columns from se sequence library(stringr) library(dplyr) for(i in se) { g <- glue::glue("V{i}") weather[[g]] <- NULL print(g) } weather 'code' – Moh Jul 28 '21 at 21:53
  • You can acutally answer your own question. – Martin Gal Jul 28 '21 at 22:06
  • when i asket, I wasent have any replay, But still make Tries To find what I need, Thats All, Thank You for all Effort my Mate. – Moh Jul 28 '21 at 22:17
0
`### Tidying the weather data
weather <- read.delim("D:/Projects/Tidy_data/weather.txt",header = F, sep = "", quote = "\"'" ,stringsAsFactors = FALSE , skipNul = FALSE)
weather <- as.tibble(weather)
weather


### Making List of Column of "I" To Be Removed By Sequence
se = seq(3,63,2)
se


### Loop for Deleting "I" Columns from se sequence
library(stringr)
library(dplyr)

for(i in se) {
  g <-  glue::glue("V{i}")
  weather[[g]] <- NULL
  print(g)
  }
weather `
Moh
  • 1
  • 1
0
Result is:

> weather
# A tibble: 1,714 x 32
   V1       V2    V4    V6    V8    V10   V12   V14   V16   V18   V20   V22   V24   V26   V28   V30   V32   V34   V36  
   <chr>    <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
 1 MX00001~ 310   310   310   320   330   320   320   330   330   330   330   320   310   310   320   320   320   310  
 2 MX00001~ 150   150   160   150   160   160   160   160   160   170   170   160   160   160   170   170   160   160  
 3 MX00001~ 0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0    
 4 MX00001~ 310   310   310   300   300   300   310   310   310   300   300   310   300   310   320   320   330   330  
 5 MX00001~ 200   160   160   150   150   150   160   160   170   170   170   170   160   170   170   180   190   180  
 6 MX00001~ 0     0     0     0     0     0     0     0     0     0     0     12    24    0     0     0     0     0    
 7 MX00001~ 300   290   280   270   280   260   230   270   280   280   270   290   290   270   290   290   290   300  
 8 MX00001~ 160   160   150   140   160   150   150   150   160   150   160   180   170   140   160   150   160   170  
 9 MX00001~ 103   435   422   0     243   17    0     0     0     27    0     15    292   0     28    76    0     0    
10 MX00001~ 270   270   260   260   240   240   190   210   250   250   260   240   250   250   270   240   230   240  
# ... with 1,704 more rows, and 13 more variables: V38 <chr>, V40 <chr>, V42 <chr>, V44 <chr>, V46 <chr>, V48 <chr>,
#   V50 <chr>, V52 <chr>, V54 <chr>, V56 <chr>, V58 <chr>, V60 <chr>, V62 <int>
Moh
  • 1
  • 1