here is the version using this matlib.py by Ernesto P. Adorio. From him you need
With these following code find coeff of linear regression
from matlib import transpose, mattmat, vec2colmat, mat2vec, matdim, matprint
from qr import qr
def readdat():
f = open('dat','r')
x, y = [], []
f.next()
for line in f:
val = line.split()
y.append(float(val[1]))
x.append([float(p) for p in val[2:]])
return x, y
def bsub(r, z):
""" solves "R b = z", where r is triangular"""
m, n = matdim(r)
p, q = matdim(z)
b = [[0] * n]
pp, qq = matdim(b)
for j in range(n-1, -1, -1):
zz = z[0][j] - sum(r[j][k]*b[0][k] for k in range(j+1, n))
b[0][j] = zz / r[j][j]
return b
def linreg(y, x):
# prepend x with 1
for xx in x:
xx.insert(0, 1.0)
# QR decomposition
q, r = qr(x)
# z = Q^T y
z = mattmat(q, vec2colmat(y))
# back substitute to find b in R b = z
b = bsub(r, transpose(z))
b = b[0]
return b
def tester():
# read test data
x, y = readdat()
# calculate coeff
b = linreg(y, x)
for i,coef in enumerate(b):
print 'coef b%d: %f' % (i, coef)
if __name__ == "__main__":
tester()
Took test data from here: Multiple Regression in Data Mining, which looks like
Case Y X1 X2 X3 X4 X5 X6
1 43 51 30 39 61 92 45
2 63 64 51 54 63 73 47
3 71 70 68 69 76 86 48
4 61 63 45 47 54 84 35
5 81 78 56 66 71 83 47
6 43 55 49 44 54 49 34
7 58 67 42 56 66 68 35
8 71 75 50 55 70 66 41
9 72 82 72 67 71 83 31
10 67 61 45 47 62 80 41
11 64 53 53 58 58 67 34
12 67 60 47 39 59 74 41
13 69 62 57 42 55 63 25
14 68 83 83 45 59 77 35
15 77 77 54 72 79 77 46
16 81 90 50 72 60 54 36
17 74 85 64 69 79 79 63
18 65 60 65 75 55 80 60
19 65 70 46 57 75 85 46
20 50 58 68 54 64 78 52
with sample output (NOTE: this is not my output, the example's!!)
Multiple R-squared 0.656
Residual SS 738.900
Std. Dev. Estimate 7.539
Coefficient StdError t-statistic p-value
Constant 13.182 16.746 0.787 0.445
X1 0.583 0.232 2.513 0.026
X2 -0.044 0.167 -0.263 0.797
X3 0.329 0.219 1.501 0.157
X4 -0.057 0.317 -0.180 0.860
X5 0.112 0.196 0.570 0.578
X6 -0.197 0.247 -0.798 0.439
The code above printed this. Need more flipping textbook to do the stdev etc. but got the number i expected for coeffs.
python linreg.py
coef b0: 13.182283
coef b1: 0.583462
coef b2: -0.043824
coef b3: 0.328782
coef b4: -0.057067
coef b5: 0.111868
coef b6: -0.197083