0

I have following data:

x1 = sample(1:10, 100, replace=T)
x2 = sample(1:3, 100, replace=T)
x3 = sample(50:100, 100, replace=T)
y1 = sample(50:100, 100, replace=T)
y2 = sample(50:100, 100, replace=T)

mydf = data.frame(x1,x2,x3,y1,y2)
head(mydf)
  x1 x2 x3  y1 y2
1  2  2 96 100 73
2  5  2 77  93 52
3 10  1 86  54 80
4  3  2 98  59 94
5  2  2 85  94 85
6  9  2 56  79 99

I have following data:

I want to do correlations and produce following output:

        x1                      x2                  x3
y1  r.value; p.value    r.value; p.value    r.value; p.value

y2  r.value; p.value    r.value; p.value    r.value; p.value

R value needs to be rounded to 2 digits and p_value to 3 digits.

How can this be done? Thanks for your help.

I tried following:

library(Hmisc)
res = rcorr(as.matrix(mydf), type="pearson")
res

      x1    x2    x3    y1    y2
x1  1.00 -0.01 -0.16 -0.28 -0.21
x2 -0.01  1.00 -0.20 -0.10 -0.13
x3 -0.16 -0.20  1.00  0.14 -0.09
y1 -0.28 -0.10  0.14  1.00  0.12
y2 -0.21 -0.13 -0.09  0.12  1.00

n= 100 

P
   x1     x2     x3     y1     y2    
x1        0.9520 0.1089 0.0047 0.0364
x2 0.9520        0.0444 0.3463 0.1887
x3 0.1089 0.0444        0.1727 0.3948
y1 0.0047 0.3463 0.1727        0.2482
y2 0.0364 0.1887 0.3948 0.2482       

matrix(paste0(round(res[[1]][,1:3],2),';',round(res[[3]][1:2,],4)),ncol=3)
     [,1]           [,2]           [,3]          
[1,] "1;NA"         "-0.01;0.0444" "-0.16;NA"    
[2,] "-0.01;0.952"  "1;0.0047"     "-0.2;0.952"  
[3,] "-0.16;0.952"  "-0.2;0.3463"  "1;0.952"     
[4,] "-0.28;NA"     "-0.1;0.0364"  "0.14;NA"     
[5,] "-0.21;0.1089" "-0.13;0.1887" "-0.09;0.1089"

But the combination is not correct.

rnso
  • 23,686
  • 25
  • 112
  • 234
  • Do you need a string output for each cell (based on the `;`)? – akrun Dec 11 '14 at 08:36
  • Yes. That is the problem. I need to show both values together for each combination. – rnso Dec 11 '14 at 08:37
  • 1
    What is your specific programming question? Please show us the code you've tried and why it didn't meet your needs. Sharing your attempts helps everyone. It demonstrates that you've taken the time to try to help yourself, and it saves us from reiterating obvious answers, and it helps you get a more specific and relevant answer. – Henrik Dec 11 '14 at 08:39
  • Added my attempt in the question above. Thanks for pointing out. – rnso Dec 11 '14 at 08:44

2 Answers2

2

You can also do the following, which doesn't need to precise the positions of rows/columns you need :

matrix(paste(unlist(round(res[[1]],2)),unlist(round(res[[3]],3)),sep=";"),
       nrow=nrow(res[[1]]),dimnames=dimnames(res[[1]]))

update : I added a dimnames parameter so the dimnames are "transmitted" to the result matrix.

For example, with the random sampling I had, you'll get :

   x1            x2            x3            y1            y2           
x1 "1;NA"        "-0.2;0.052"  "0.02;0.833"  "-0.04;0.674" "0.02;0.819" 
x2 "-0.2;0.052"  "1;NA"        "-0.13;0.202" "-0.01;0.896" "0.05;0.653" 
x3 "0.02;0.833"  "-0.13;0.202" "1;NA"        "-0.05;0.636" "-0.13;0.185"
y1 "-0.04;0.674" "-0.01;0.896" "-0.05;0.636" "1;NA"        "-0.02;0.858"
y2 "0.02;0.819"  "0.05;0.653"  "-0.13;0.185" "-0.02;0.858" "1;NA"
Cath
  • 23,906
  • 5
  • 52
  • 86
1

Try

r2 <- matrix(0, ncol=3, nrow=2, 
         dimnames=list( paste0('y',1:2), paste0('x',1:3)))
r2[] <- paste(round(res$r[4:5,1:3],2), round(res$P[4:5,1:3],4), sep="; ")

Update

You could create a function like below

 f1 <- function(df){
   df1 <- df[order(colnames(df))]
   indx <- sub('\\d+', '', colnames(df1))
   indx1 <- which(indx[-1]!= indx[-length(indx)])
   indx2 <- (indx1+1):ncol(df1) 
   r2 <- matrix(0, ncol=indx1, nrow=(ncol(df1)-indx1), 
           dimnames=list(colnames(df1)[indx2], colnames(df1)[1:indx1]))
   r1 <- rcorr(as.matrix(df1), type='pearson') 
   r2[] <- paste(round(r1$r[indx2,1:indx1],2), round(r1$P[indx2,1:indx1],4),
                       sep="; ")
   r2
  }


  f1(mydf) #using your dataset (`set.seed` is different)
  #        x1              x2             x3            
  #y1 "0.07; 0.4773"  "0.02; 0.84"   "0.21; 0.0385"
  #y2 "-0.08; 0.4363" "0.08; 0.4146" "0.02; 0.8599"

  Testing with unordered dataset

  f1(mydf1)
  #          x1              x2             x3              x4             
  #y1 "-0.08; 0.4086" "0.17; 0.0945" "-0.25; 0.0112" "-0.16; 0.1025"
  #y2 "0.07; 0.5174"  "-0.1; 0.3054" "0.03; 0.7478"  "-0.06; 0.5776"

Update2

If you want a function to have the numeric index argument

f2 <- function(df, v1, v2){
    r2 <- matrix(0, nrow=length(v2), ncol=length(v1),
          dimnames=list(colnames(df)[v2], colnames(df)[v1]))
    r1 <- rcorr(as.matrix(df), type='pearson')
   r2[] <- paste(round(r1$r[v2,v1],2), round(r1$P[v2,v1],4), sep="; ")
   r2
}

f2(mydf, 1:3, 4:5)

f2(mydf, c(1,3), c(2,4,5))

data

 set.seed(29)
 x1 = sample(1:10, 100, replace=T)
 x2 = sample(1:3, 100, replace=T)
 x3 = sample(50:100, 100, replace=T)
 x4  <- sample(40:80, 100, replace=TRUE)
 y1 = sample(50:100, 100, replace=T)
 y2 = sample(50:100, 100, replace=T)

 mydfN = data.frame(x1,x2,x3,x4, y1,y2)

 set.seed(25)
 mydf1 <- mydfN[sample(colnames(mydfN))]
akrun
  • 874,273
  • 37
  • 540
  • 662
  • @rnso No problem. You can change the `numeric index` to a bit more general `res$r[paste0('x',1:3), paste0('y', 1:2)]` – akrun Dec 11 '14 at 09:01
  • Can we create a function f(mydf, 1:3, 4:5) and get output with column names put properly? (The names may be any and not just x1,y1 etc). – rnso Dec 11 '14 at 10:23
  • @rnso It depends upon the position of the `x` and `y` variables. Suppose if the `mydf` has columns not in the same order, first it needs to be ordered. – akrun Dec 11 '14 at 10:25
  • @rnso Okay, then I will try – akrun Dec 11 '14 at 10:40
  • The function is to get dataframe and 2 sets of column numbers, eg: f1(mydf, 1:4, 5:6) ; or f1(mydf, c(1,3,4), c(2,5,6)) – rnso Dec 11 '14 at 11:36
  • @rnso But that also needs some assumptions that you had already seen the dataset. My function automatically gets you the format based on the inherent groupings (at least by the pattern you showed). – akrun Dec 11 '14 at 11:39
  • It is perfect. I tried it on real data and it works very well. Thanks. – rnso Dec 11 '14 at 12:29