0

I try to create a Pearson Correlation Table between a series of returns (ret) on a given day. The number of Items I want to compare differs every day. It varies between 50 to 200 items.

On a given day that I want to analyze, (here on September 1 I want to see the correlation over the past 3 days), I have 1 table RetTable that contains all the returns per Item like this:

Id  RetDate     Ret
1   2017-08-31  -0.062720
1   2017-08-30  0.032650
1   2017-08-29  -0.086360
2   2017-08-31  -0.033100
2   2017-08-30  0.032900
2   2017-08-29  -0.032400
3   2017-08-31  -0.017900
3   2017-08-30  -0.018300
3   2017-08-29  -0.015200

From that table I created a another Table (MeanTable) with the MEAN for each item like this:

Id  MeanRet 
1   -0.038810
2   -0.010866
3   -0.017133

The next step is to subtract the average return from each return which gives me that table DiffRetAvgTable:

ID  RetDate DiffRetAvg
1   2017-08-31  -0.023910
1   2017-08-30  0.071460
1   2017-08-29  -0.047550
2   2017-08-31  -0.022234
2   2017-08-30  0.043766
2   2017-08-29  -0.021534
3   2017-08-31  -0.000767
3   2017-08-30  -0.001167
3   2017-08-29  0.001933

The next step I believe would be to multiply each DiffRetAvg results by its correspondent for each items. That's where I'm stuck. If I had only 2 or 3 items I could use simple arithmetic but I don't know how many items I have from one day to another and they are too many anyway.

The Correlation Table in pivot would look like this:

ID 1  2  3
1   1  ?  ?
2   ?  1  ?
3   ?  ?  1

(An item correlation with itself will always be 1)

Here is the code I have so far

DECLARE @MeanTable AS TABLE (Id INT, AvgRet NUMERIC(10,6))
DECLARE @SquareTable AS TABLE (Id INT, SqRoot NUMERIC(10,6))
DECLARE @RetTable AS TABLE (Id INT,RetDate DATE, Ret NUMERIC(10,6))
DECLARE @DiffRetAvgTable AS TABLE (Id INT, RetDate DATE, DiffRetAVG NUMERIC(10,6))
DECLARE @PairMultiTable AS TABLE (ID1 int, ID2 int,Exp2 NUMERIC(10,6))
DECLARE @PairMultiTable2 AS TABLE (ID1 int, ID2 int,SumPairMulti NUMERIC(10,6))

INSERT INTO @RetTable VALUES
(1,'2017-08-31' ,-0.06272),
(1,'2017-08-30' ,   0.03265),
(1,'2017-08-29' ,   -0.08636),
(2,'2017-08-31' ,   -0.0331),
(2,'2017-08-30' ,   0.0329),
(2,'2017-08-29' ,   -0.0324),
(3,'2017-08-31' ,   -0.0179),
(3,'2017-08-30' ,   -0.0183),
(3,'2017-08-29' ,   -0.0152)

--Step 1 : Get all the average by Items
INSERT INTO @MeanTable SELECT ID, avg(Ret) AS avgRet FROM @RetTable GROUP BY ID

--Step 2 : Substract the average from every occurence in each Item and call it DiffRetAvg
INSERT INTO @DiffRetAvgTable SELECT a.ID,RetDate, ret-AvgRet AS DiffRetAvg FROM @RetTable a INNER JOIN @MeanTable b on a.Id = b.Id

-- Step 3 Multiply each DiffRetAvg for each occurence by its correspondant for another occurence and call it SumPairMulti  
INSERT INTO @PairMultiTable2
SELECT  ID1 ,  ID2,  sum(Multi) AS SumPairMulti FROM (
SELECT a.ID AS ID1, b.ID AS ID2, cast(a.ID AS NVARCHAR(3)) + '_' + cast(b.id AS NVARCHAR(3)) AS Pair_ID, a.Retdate , a.diffRetAvg*b.DiffRetAVG AS Multi FROM @DiffRetAvgTable a
INNER JOIN @DiffRetAvgTable b on a.RetDate =b.RetDate) c
GROUP BY ID1, ID2

-- Step 4 Calculate the square of each DiffRetAvg for each occurence
INSERT INTO @PairMultiTable
SELECT  ID1 ,  ID2,  sum(Multi) AS Exp2 FROM (
SELECT a.ID AS ID1, b.ID AS ID2,  a.Retdate , a.diffRetAvg*b.DiffRetAVG AS Multi FROM @DiffRetAvgTable a
INNER JOIN @DiffRetAvgTable b ON a.RetDate =b.RetDate) c
WHERE ID1=ID2
GROUP BY ID1, ID2

--Step 5 Calculate the correlation between each Item with this formula : SumPairMulti/sqrt(ID1_Exp2*ID2_EXP2)

SELECT c.ID1, c.ID2,  d.SumPairMulti/SQRTProd AS Correlation FROM (
SELECT a.ID1, b.ID2, sqrt( a.Exp2*b.Exp2 ) AS SQRTProd FROM @PairMultiTable a
CROSS JOIN @PairMultiTable b ) c
INNER JOIN @PairMultiTable2 d ON d.ID1 = c.ID1 and d.id2 = c.id2

Which gives me this Table:

ID1 ID2 Correlation
1   1   1
2   1   0.980387747360583
3   1   -0.71935145479392
1   2   0.980387747360583
2   2   1
3   2   -0.578754772354275
1   3   -0.71935145479392
2   3   -0.578754772354275
3   3   1`

Now I would like to have an output like this:

ID    1     2     3
1    1    0.98  -0.72
2    0.98   1   -0.58
3    -0.72 -0.58   1
Rich
  • 271
  • 1
  • 13
  • `The next step I believe would be ..` mean you dont know what is the next step or you dont know how execute the step? – Juan Carlos Oropeza Nov 01 '17 at 19:39
  • if is `PIVOT` what you want, check [**here**](https://technet.microsoft.com/es-es/library/ms177410(v=sql.105).aspx) – Juan Carlos Oropeza Nov 01 '17 at 19:49
  • In your pivot rows are item id and columns are days? – Juan Carlos Oropeza Nov 01 '17 at 19:50
  • The next step is to multiply ID1 X ID2 for each date, then ID1 X ID3, then ID2 X ID3. When that is done the next step is to sum all the ID1-2 together, all the ID1-3, all the ID2-3. Then the Correlation table could be created with this formula: SUM(1-2) / SQRT of (sum ID1 square + sum of ID2 square) Than gives the 1-2 correlation .9804. I need this for all the item-item combination – Rich Nov 01 '17 at 20:13
  • The returns Table has returns per day. A typical correlation table would have 90 days of returns for each ID – Rich Nov 01 '17 at 20:17

1 Answers1

0
DECLARE @MeanTable AS TABLE (Id INT, AvgRet NUMERIC(10,6))
DECLARE @SquareTable AS TABLE (Id INT, SqRoot NUMERIC(10,6))
DECLARE @RetTable AS TABLE (Id INT,RetDate DATE, Ret NUMERIC(10,6))
DECLARE @DiffRetAvgTable AS TABLE (Id INT, RetDate DATE, DiffRetAVG NUMERIC(10,6))
DECLARE @PairMultiTable AS TABLE (ID1 int, ID2 int,Exp2 NUMERIC(10,6))
DECLARE @PairMultiTable2 AS TABLE (ID1 int, ID2 int,SumPairMulti NUMERIC(10,6))
DECLARE @CorrTable AS TABLE (ID1 INT, ID2 INT,  Correlation NUMERIC(10,6))

INSERT INTO @RetTable VALUES
(1,'2017-08-31' ,-0.06272),
(1,'2017-08-30' ,   0.03265),
(1,'2017-08-29' ,   -0.08636),
(2,'2017-08-31' ,   -0.0331),
(2,'2017-08-30' ,   0.0329),
(2,'2017-08-29' ,   -0.0324),
(3,'2017-08-31' ,   -0.0179),
(3,'2017-08-30' ,   -0.0183),
(3,'2017-08-29' ,   -0.0152)

--Step 1 : Get all the average by Items
INSERT INTO @MeanTable SELECT ID, avg(Ret) AS avgRet FROM @RetTable GROUP BY ID

--Step 2 : Substract the average from every occurence in each Item and call it DiffRetAvg
INSERT INTO @DiffRetAvgTable SELECT a.ID,RetDate, ret-AvgRet AS DiffRetAvg FROM @RetTable a INNER JOIN @MeanTable b on a.Id = b.Id

-- Step 3 Multiply each DiffRetAvg for each occurence by its correspondant for another occurence and call it SumPairMulti  
INSERT INTO @PairMultiTable2
SELECT  ID1 ,  ID2,  sum(Multi) AS SumPairMulti FROM (
SELECT a.ID AS ID1, b.ID AS ID2, cast(a.ID AS NVARCHAR(3)) + '_' + cast(b.id AS NVARCHAR(3)) AS Pair_ID, a.Retdate , a.diffRetAvg*b.DiffRetAVG AS Multi FROM @DiffRetAvgTable a
INNER JOIN @DiffRetAvgTable b on a.RetDate =b.RetDate) c
GROUP BY ID1, ID2

-- Step 4 Calculate the square of each DiffRetAvg for each occurence
INSERT INTO @PairMultiTable
SELECT  ID1 ,  ID2,  sum(Multi) AS Exp2 FROM (
SELECT a.ID AS ID1, b.ID AS ID2,  a.Retdate , a.diffRetAvg*b.DiffRetAVG AS Multi FROM @DiffRetAvgTable a
INNER JOIN @DiffRetAvgTable b ON a.RetDate =b.RetDate) c
WHERE ID1=ID2
GROUP BY ID1, ID2

--Step 5 Calculate the correlation between each Item with this formula : SumPairMulti/sqrt(ID1_Exp2*ID2_EXP2)
DELETE FROM CorrTable
INSERT INTO CorrTable
SELECT c.ID1, c.ID2,  d.SumPairMulti/SQRTProd AS Correlation FROM (
SELECT a.ID1, b.ID2, sqrt( a.Exp2*b.Exp2 ) AS SQRTProd FROM @PairMultiTable a
CROSS JOIN @PairMultiTable b ) c
INNER JOIN @PairMultiTable2 d ON d.ID1 = c.ID1 and d.id2 = c.id2

--Step 6 Create Pivot Table using : 

SQL Server 2005 Pivot on Unknown Number of Columns

DECLARE @cols AS NVARCHAR(MAX),
@query  AS NVARCHAR(MAX)
select @cols = STUFF((SELECT distinct ',' + QUOTENAME(ID2) 
            from CorrTable
    FOR XML PATH(''), TYPE
    ).value('.', 'NVARCHAR(MAX)') 
,1,1,'')
set @query = 'SELECT ID1, ' + @cols + ' from 
        (
        select ID1, ID2, Correlation
        from CorrTable
    ) x
    pivot 
    (
        min(Correlation)
        for ID2 in (' + @cols + ')
    ) p '
execute(@query)
Rich
  • 271
  • 1
  • 13