5

I'm trying to generate a density plot for a list of x,y points, each one of them associated to a given density value. See this image to see what I'm after.

I tried applying the code written by Joe Kington in this answer but it returns the numpy.linalg.linalg.LinAlgError: singular matrix error.

This is a MWE of my code (basically the same code by Joe, only the data arrays are changed):

import numpy as np
import matplotlib.pyplot as plt
import scipy.interpolate

x = np.array([0.005, 0.018, 0.008, 0.015, 0.016, 0.0135, 0.0155, 0.0155, 0.0105, 0.005, 0.0125, 0.0185, 0.0095, 0.003, 0.019, 0.0175, 0.0165, 0.011, 0.007, 0.0195, 0.017, 0.011, 0.0125, 0.0165, 0.0045, 0.0145, 0.02, 0.0185, 0.001, 0.015, 0.0105, 0.016, 0.0185, 0.0035, 0.0025, 0.0015, 0.0055, 0.0185, 0.005, 0.0135, 0.0175, 0.0095, 0.0095, 0.0115, 0.0025, 0.0105, 0.0015, 0.0045, 0.011, 0.009, 0.0045, 0.013, 0.016, 0.009, 0.018, 0.0145, 0.013, 0.0105, 0.019, 0.0145, 0.0145, 0.016, 0.014, 0.01, 0.018, 0.0075, 0.0195, 0.019, 0.012, 0.0035, 0.015, 0.0095, 0.0005, 0.0045, 0.011, 0.005, 0.019, 0.0015, 0.01, 0.0055, 0.0005, 0.018, 0.0155, 0.0065, 0.016, 0.002, 0.015, 0.0045, 0.0075, 0.0035, 0.0145, 0.018, 0.0145, 0.009, 0.0125, 0.005, 0.002, 0.0125, 0.013, 0.002, 0.007, 0.0125, 0.006, 0.015, 0.009, 0.0115, 0.0095, 0.016, 0.0045, 0.0035, 0.004, 0.0195, 0.0085, 0.0115, 0.011, 0.0175, 0.0115, 0.0085, 0.0185, 0.009, 0.007, 0.006, 0.0195, 0.018, 0.0115, 0.004, 0.017, 0.001, 0.0065, 0.018, 0.0175, 0.0115, 0.013, 0.0095, 0.0175, 0.0175, 0.012, 0.006, 0.015, 0.0075, 0.0015, 0.0085, 0.011, 0.0025, 0.001, 0.0105, 0.02, 0.0005, 0.0035, 0.0185, 0.0085, 0.011, 0.0005, 0.0055, 0.018, 0.019, 0.004, 0.01, 0.002, 0.0005, 0.0085, 0.0095, 0.0175, 0.0035, 0.0125, 0.0085, 0.0175, 0.011, 0.011, 0.0075, 0.0185, 0.0115, 0.0085, 0.005, 0.002, 0.003, 0.0095, 0.007, 0.011, 0.001, 0.0135, 0.003, 0.0125, 0.007, 0.0055, 0.0075, 0.019, 0.0055, 0.001, 0.0055, 0.003, 0.0085, 0.017, 0.01, 0.0065, 0.008, 0.013, 0.005, 0.0115, 0.005, 0.0055, 0.019, 0.001, 0.0095, 0.011, 0.008, 0.0165, 0.0195, 0.0025, 0.02, 0.0045, 0.0175, 0.018, 0.012, 0.0055, 0.008, 0.0025, 0.0155, 0.0055, 0.003, 0.0055, 0.0065, 0.011, 0.013, 0.0075, 0.0045, 0.005, 0.004, 0.0155, 0.0075, 0.0075, 0.0065, 0.0105, 0.0185, 0.0045, 0.0175, 0.0055, 0.0045, 0.0145, 0.015, 0.02, 0.007, 0.019, 0.019, 0.0075, 0.004, 0.009, 0.016, 0.0125, 0.0135, 0.012, 0.0145, 0.017, 0.006, 0.0085, 0.015, 0.0105, 0.0005, 0.003, 0.011, 0.0035, 0.0065, 0.011, 0.017, 0.003, 0.0145, 0.011, 0.0025, 0.0175, 0.011, 0.014, 0.01, 0.004, 0.0015, 0.0075, 0.0095, 0.009, 0.0195, 0.0025, 0.0135, 0.015, 0.006, 0.016, 0.016, 0.0105, 0.0065, 0.011, 0.019, 0.0145, 0.0065, 0.0185, 0.019, 0.0075, 0.0095, 0.0145, 0.013, 0.0175, 0.0085, 0.002, 0.005, 0.013, 0.0045, 0.0175, 0.01, 0.019, 0.004, 0.018, 0.0135, 0.0105, 0.0095, 0.0015, 0.0145, 0.019, 0.02, 0.012, 0.0145, 0.018, 0.0195, 0.001, 0.013, 0.0145, 0.0015, 0.0025, 0.0085, 0.0075, 0.0035, 0.019, 0.0125, 0.017, 0.0145, 0.003, 0.011, 0.0135, 0.003, 0.0085, 0.0195, 0.011, 0.015])
y = np.array([7.1, 9.35, 6.7, 9.9, 7.85, 7.4, 8.3, 8.8, 9.55, 8.55, 9.45, 8.7, 8.45, 8.0, 7.6, 8.45, 7.25, 7.95, 7.6, 7.75, 8.4, 7.55, 7.5, 7.1, 8.7, 7.0, 8.55, 9.45, 6.95, 9.6, 9.4, 7.65, 7.7, 9.15, 8.15, 9.95, 8.1, 8.6, 9.2, 8.8, 7.8, 7.85, 9.2, 6.9, 8.1, 9.8, 8.05, 7.45, 7.05, 7.3, 9.25, 8.8, 7.7, 9.25, 9.0, 6.7, 6.9, 9.15, 7.9, 7.35, 7.8, 7.35, 8.0, 7.0, 9.75, 8.95, 7.75, 9.25, 8.95, 7.6, 7.2, 7.6, 9.35, 7.3, 7.0, 6.65, 7.0, 9.9, 6.85, 9.8, 7.6, 9.35, 7.7, 8.2, 9.55, 7.0, 8.05, 9.95, 7.2, 9.7, 9.65, 8.85, 7.2, 9.9, 7.35, 6.9, 7.65, 9.8, 8.6, 8.75, 8.8, 7.1, 7.05, 7.8, 8.9, 8.15, 10.05, 9.95, 6.85, 7.1, 8.6, 9.45, 7.4, 7.45, 6.6, 9.9, 7.7, 9.95, 9.9, 8.1, 10.0, 7.3, 7.85, 9.95, 8.6, 8.55, 9.7, 8.6, 8.8, 9.6, 8.45, 6.65, 7.05, 8.5, 8.85, 9.55, 9.75, 8.95, 6.9, 6.8, 7.15, 9.95, 9.05, 8.1, 9.4, 8.05, 7.55, 8.85, 9.9, 9.65, 6.65, 8.6, 7.15, 8.95, 8.45, 8.8, 9.3, 9.4, 8.3, 9.85, 7.45, 6.85, 7.25, 7.55, 7.35, 9.5, 9.85, 9.75, 7.75, 7.55, 6.65, 6.6, 7.35, 8.25, 7.5, 8.2, 9.0, 7.95, 8.3, 8.15, 7.7, 7.45, 7.75, 7.8, 8.7, 9.9, 8.2, 7.1, 8.9, 8.85, 6.8, 7.2, 7.1, 7.65, 8.7, 6.9, 9.4, 7.25, 9.8, 8.4, 7.6, 8.5, 7.95, 6.7, 8.45, 9.2, 8.8, 7.85, 7.95, 8.7, 7.55, 9.6, 8.85, 8.9, 8.1, 8.25, 8.1, 8.3, 8.9, 7.1, 9.8, 8.25, 8.75, 6.85, 8.9, 6.95, 9.0, 8.35, 9.0, 7.15, 8.9, 8.2, 9.15, 6.65, 9.35, 8.85, 6.85, 7.8, 8.4, 7.75, 7.55, 7.85, 7.6, 8.2, 7.15, 8.55, 7.8, 8.8, 9.75, 9.0, 9.65, 7.15, 7.3, 7.1, 9.7, 7.75, 8.85, 9.75, 7.75, 7.1, 9.8, 9.95, 7.0, 9.0, 6.65, 7.55, 6.7, 7.65, 9.7, 7.15, 8.6, 8.55, 7.0, 9.4, 7.25, 9.0, 9.45, 8.2, 7.9, 8.95, 8.05, 8.9, 7.7, 7.35, 7.55, 9.75, 8.8, 7.35, 8.2, 8.7, 8.7, 8.2, 7.6, 8.4, 7.15, 8.8, 7.25, 7.4, 7.65, 9.2, 7.3, 7.05, 8.45, 7.0, 8.55, 8.2, 8.45, 7.4, 9.15, 8.45, 7.15, 8.75, 7.05, 7.5, 9.45, 8.85, 7.15, 7.85, 8.9, 8.8, 9.2, 8.1, 9.95, 7.55, 7.4, 9.65, 6.85, 8.85, 8.9, 7.0, 7.2, 8.6, 7.4, 8.55, 7.45, 8.15, 9.45, 6.85])
z = np.array([3021.0279029149683, 4975.3037400799076, 2166.9841077494534, 6289.9297927621046, 1769.826392967929, 1718.8244103972752, 1762.4826301548458, 2892.0488281847693, 6271.3213266755065, 2065.6752057097788, 3376.244940630062, 2082.5656535205321, 1823.5812514071088, 1973.1591378311682, 1797.3251073485019, 2612.0911561842113, 2249.1162757223706, 2398.027412668795, 2502.482089998005, 1819.1869918508887, 2377.09819196745, 1781.4988210953811, 1706.0247161815421, 1909.0435719934635, 1662.0553564384486, 2132.6588030625549, 2136.2280746624447, 5130.5751044254675, 1793.6247353368949, 6337.4932727294181, 4462.7694292877422, 2110.5308215132864, 1867.4707084026049, 2088.1839351230669, 2461.8645333625827, 5419.1889489642499, 2129.4105626134383, 2005.6244970468119, 3827.1395925866591, 2513.2456828786935, 2164.255723310996, 2206.3593513204733, 2790.194999425913, 1877.3040520904212, 1879.8192626127952, 5842.4922672912217, 2439.7109266628595, 1825.4377748685583, 1992.6098863537443, 1903.4690337855423, 4266.0357702742913, 1910.2633981988065, 1881.8280410083426, 3366.6103402944168, 2647.1141831688333, 1875.7829962178498, 1918.1350622011096, 2053.737955354547, 1828.9518511755123, 1884.073961574501, 1865.9550644370991, 1763.9694644756794, 1926.8518278729125, 2097.1403545248913, 6198.7268871463293, 1708.009992247851, 1818.0568126526525, 4452.1016078799939, 2047.2960066111493, 1963.8657274866737, 2047.4948739008223, 1768.3096547617561, 2447.5028998716234, 2482.7108962691, 2128.0018288469919, 1806.7301882321021, 1772.3246603000719, 5803.738518186321, 1798.572994122713, 6342.6198323644467, 2719.0407047465369, 5872.4448971310285, 1718.0287344031356, 1975.8553803398997, 4089.0418254578526, 2827.6461855175298, 2047.5731199798217, 6182.5306850708821, 1799.2216814306257, 5821.8657988768718, 5261.9920805401234, 1728.5326365076648, 2498.4478787599023, 5711.0176941257841, 2132.2157852666737, 2305.8342275164105, 1742.9260379707273, 4495.0605229412349, 2307.025813218379, 2267.0830186364001, 1850.0099188084214, 1788.4569797768027, 1799.9473765786229, 1701.3152693662535, 2714.7002916944525, 1832.2386812881207, 6033.4178712040912, 5795.5855254086073, 2354.2479143787036, 3136.1334489510668, 2053.8469973983347, 6322.4332297897745, 1981.3654435442081, 1819.4461046157055, 2257.6059180738293, 4709.7418781823208, 1961.9279449958558, 6243.2423074873122, 6175.1119528389308, 2177.0613286219436, 6187.6795632100693, 1830.8592800553179, 1811.1712359480662, 6106.7472822509098, 1952.8809811764972, 1870.965173064684, 6228.8248244690431, 2699.164454284873, 2295.7322910170833, 3807.5109993951892, 2188.8297091094282, 1813.4642741233388, 2413.5448089794727, 1989.4306848559088, 2201.1048218029914, 6295.4190187566846, 6243.2423074873122, 2733.0328883407497, 2158.2110618263923, 2050.959769289871, 2543.0427216636222, 6233.4918957305254, 1919.4414073039704, 1703.7336685448629, 3858.7713493494748, 1943.9159186025965, 2079.3275013804068, 2319.7081124813781, 5994.9649670064709, 6251.8593579480848, 2139.6291622278786, 2162.581672501492, 2841.7605426723185, 2048.5187718112393, 2193.2036553224912, 2251.9333157773744, 2629.2190178228229, 3690.6316021925595, 2637.030920619778, 5842.4922672912217, 1884.2560494399575, 1843.1577358091208, 2163.1556153631163, 2651.9755344260984, 2209.7526452687962, 3108.0919418402241, 4387.2789976920412, 4698.6052131550832, 1947.2382624692616, 1721.7122378504125, 1883.567174876209, 1791.5387081525541, 1808.3840823374492, 2027.1259150370804, 3008.0670290601133, 2783.8860603503094, 3548.6751272484839, 2119.2582438340964, 1842.8306356209096, 1811.8758696043149, 1993.8806420387857, 2230.114543957538, 2524.6101073467694, 2369.3233933185866, 2317.7688836929988, 6134.6748393553416, 1885.4706325047302, 3154.9375544439058, 2314.5555039102414, 1766.5362521033173, 2819.8071270945788, 2689.7822575815635, 2204.2481365143462, 1971.6766514902765, 2272.7389701563638, 1756.1110070448294, 2800.2434013912875, 2093.2498937672021, 6187.6795632100693, 1841.0113159699797, 1860.7680087210833, 1982.8860785887384, 2801.9132242057758, 2744.0105956647167, 2025.3324054638258, 2722.3349116785962, 2684.0320952587235, 1877.8708415793587, 1997.7146695460535, 2104.1892082880063, 2725.8442629042142, 3625.9437800615337, 2145.9331536509681, 1964.5714745836017, 1941.6269985047763, 1840.9640642894606, 1858.1655072711599, 2122.3339549617058, 2076.5760478221541, 2185.2013499877644, 6148.6869234687683, 1996.7536642289936, 2231.2474284717237, 2098.8536029973343, 1742.4203420876274, 2306.2812497850091, 2721.5057732393784, 1875.4024744482288, 3064.375291034848, 1829.6452820628654, 1794.8335411785936, 1915.9536359891322, 2502.9808788537543, 1768.7422023935612, 6233.4309130206575, 2399.5441046522633, 1771.295681640004, 2095.2163400865065, 2170.1556654381593, 2121.6059394283293, 2207.0814350651663, 1809.6000363096575, 1908.4235529291209, 1856.0817854726031, 1862.2542145669672, 1778.8206888476191, 1754.4167167356909, 2747.6536379759777, 4392.6756654129649, 3565.8650546652166, 5476.6856895960746, 2786.4985484893873, 1915.735638108521, 2041.4366888494503, 6187.6795632100693, 2341.2486995710688, 2103.793796652817, 5865.8139190498505, 1699.9828833421673, 1993.3785877622747, 5964.8913804791482, 6156.2099074387725, 2413.3395548571748, 2099.4689601305859, 2915.1159717066835, 1782.5651767836252, 1850.0197312477569, 2104.4166049482315, 6206.2249413381178, 2990.8556200784769, 2235.4837197118086, 1952.9282245926095, 2414.4380556536712, 3224.5742704799181, 1712.3537287234201, 3377.4874074163745, 3562.3822787444183, 1674.4781633929065, 1770.3781243649291, 2512.7270580207683, 2519.4737433197356, 2600.7028461763307, 1687.6149740077039, 1827.2126312245898, 1765.946329287706, 5842.4922672912217, 2133.4553630401206, 1987.9591394650151, 1818.3518579455942, 1999.0936725599697, 1988.5131267757788, 2059.2467809359555, 1741.9951505552397, 2018.1693071570285, 2441.0117858276699, 2031.5820629850468, 2021.9955740863736, 2121.6430818723975, 1789.2775504127987, 5638.9892789948635, 2326.4686662613717, 2959.3842095472223, 1984.1449776696347, 1774.9347978340554, 2327.371938051715, 1881.5330953605135, 2139.1151745789821, 1669.0467180215926, 2525.0241948857788, 1906.6525847893236, 2557.5130308053958, 1900.4062816966011, 2095.8573933740408, 1773.6179090742717, 6266.4557619192337, 1935.0743271817748, 2135.1532490883042, 1738.9633786795393, 2115.9327681390764, 2580.6955754655746, 2720.535179681423, 1860.1178987262649, 5333.5674851558724, 1778.2603949578265, 2081.653719365047, 6048.5456782611936, 1904.0520140723243, 2135.8697908711961, 2297.7416719290022, 2290.7393924215753, 3308.5077581082523, 1689.2883297381291, 1754.1733576638849, 2134.7146778225488, 1856.944696747086, 1820.2830994284482, 6183.9447504226655, 2285.18744670974])

# Set up a regular grid of interpolation points
xi, yi = np.linspace(x.min(), x.max(), 100), np.linspace(y.min(), y.max(), 100)
xi, yi = np.meshgrid(xi, yi)
# Interpolate
rbf = scipy.interpolate.Rbf(x, y, z, function='linear')
zi = rbf(xi, yi)

plt.imshow(zi, vmin=z.min(), vmax=z.max(), origin='lower',
           extent=[x.min(), x.max(), y.min(), y.max()])
plt.scatter(x, y, c=z)
plt.colorbar()
plt.show()

(sorry for the messy x,y,z arrays but that's how my original code gets them from another portion of the code)

Is there a way to by-pass that error given the format of my data? This answer states that using numpy.linalg.lstsq could serve as a workaround but I haven't been able to make it work.

Community
  • 1
  • 1
Gabriel
  • 40,504
  • 73
  • 230
  • 404
  • Please give the full error message, including the line number which will help us figure out where the bad matrix is being referenced. – James Thiele Jan 31 '14 at 22:19
  • Is it possible that you have two points with the same or very close (x,y) values but different z values? That might cause this problem. – hunse Jan 31 '14 at 22:24
  • Yes @hunse, I have several points with equal values. I just realized this. @James, the full error can be seen if you run the `MWE` above. – Gabriel Feb 01 '14 at 02:52

2 Answers2

2

The problem is that you have multiple points with the same (x,y) value and different z values (see for example points 1 and 81). An easy solution, but one that's a bit of a hack, is simply to add noise to the (x,y) points.

x = x + np.random.normal(scale=1e-8, size=x.shape)
y = y + np.random.normal(scale=1e-8, size=y.shape)

If you do this, you'll fix the problem about the non-invertible matrix.

Another problem I noticed is that the final surface ends up looking really stretched, because the RBF function assumes that both the x and y coordinates are scaled similarly, but in your case they're not. Rescaling them both to be between zero and one fixes this. You can still use their original scales for plotting.

rescale = lambda x: (x - x.min()) / (x.max() - x.min())
xs = rescale(x)
ys = rescale(y)

I would recommend rescaling before adding the noise, so that the noise on both axes is scaled the same.

Finally, you should add aspect='auto' to your imshow call so that the resulting plot is a reasonable shape. With those changes, I think your code should do what you want.

hunse
  • 3,175
  • 20
  • 25
  • This definitely works but I have to wonder how it affects my interpretation of the density displayed. In any case, the answer is spot on so I'll mark it as accepted. Thank you very much hunse! – Gabriel Feb 01 '14 at 02:54
2

As @hunse correctly pointed out, you are encountering a LinAlgError because the fitting problem you are trying to solve is overdetermined - in essence, because you have multiple z-values for some x,y pairs it is impossible to find an exact solution that will pass through all x,y,z points. This is why the call to linalg.solve fails.

Although you can't find an exact solution, you can still solve this in the least-squares sense by replacing linalg.solve with linalg.lstsq, as Muhammad Alkarouri's answer alluded to.

Here's an example of a patched version of scipy.interpolate.Rbf that solves for overdetermined cases:

class LSQ_Rbf(scipy.interpolate.Rbf):

    def __init__(self, *args, **kwargs):
        self.xi = asarray([asarray(a, dtype=float_).flatten()
                           for a in args[:-1]])
        self.N = self.xi.shape[-1]
        self.di = asarray(args[-1]).flatten()

        if not all([x.size == self.di.size for x in self.xi]):
            raise ValueError("All arrays must be equal length.")

        self.norm = kwargs.pop('norm', self._euclidean_norm)
        r = self._call_norm(self.xi, self.xi)
        self.epsilon = kwargs.pop('epsilon', None)
        if self.epsilon is None:
            self.epsilon = r.mean()
        self.smooth = kwargs.pop('smooth', 0.0)

        self.function = kwargs.pop('function', 'multiquadric')

        # attach anything left in kwargs to self
        #  for use by any user-callable function or
        #  to save on the object returned.
        for item, value in kwargs.items():
            setattr(self, item, value)

        self.A = self._init_function(r) - eye(self.N)*self.smooth
        # use linalg.lstsq rather than linalg.solve to deal with 
        # overdetermined cases
        self.nodes = np.linalg.lstsq(self.A, self.di)[0]

I've just copy-pasted the __init__ function from the original class and modified the last line.

Here's what the output looks like (I've also rescaled the input x,y values as per hunse's suggestion):

enter image description here

hunse's answer will probably give quite similar results to mine provided the scale of the noise you add is chosen appropriately, but explicitly finding the least-squares solution makes a lot more sense in my mind, and will almost certainly be more numerically stable.

Community
  • 1
  • 1
ali_m
  • 71,714
  • 23
  • 223
  • 298
  • This is a fantastic answer, thank you so much ali_m! I've found out that `scipy.interpolate.Rbf` becomes amazingly slow of the arrays get too big (a couple of thousand elements) so I've moved to another form of presenting my data. This is a great contribution nonetheless, you should perhaps consider pitching it to the Scipy folks? Not sure how that project is managed. – Gabriel Feb 02 '14 at 01:47
  • 1
    To be honest I think it's pretty rare for people to do spline fitting on overdetermined data - none of scipy's other multivariate interpolation functions seem to support this. For systems of equations that do have an exact solution, `lstsq` is a fair bit slower than `solve` and I suspect it's probably less precise. I might consider a pull request adding `lstsq=False` as a kwarg to `Rbf`. – ali_m Feb 02 '14 at 02:01