Trying to generate a lot of data with numpy
without success. To be clear is not the libraries fault - I just do not have enough memory in my computer ~ 32gb of ram.
Is there a better way to do this such as simultaneously save on disk rather than in memory?
Here is the working code:
import numpy as np
import tensorflow as tf
def main():
(images, labels), (_, _) = tf.keras.datasets.mnist.load_data(path="mnist.npz")
rescaled_images = np.reshape(images, (images.shape[0], images.shape[1] * images.shape[1]))
indices = np.where(labels==0)
zero_images = np.take(rescaled_images, indices, axis=0)[0]
x0 = np.array(zero_images).astype(np.float32)
length = zero_images.shape[1]
ones = np.ones(length)
steps = 1000000 # <--- problem even with one step
mu = -2.0
sigma = 1.5
dt = 0.01
data = []
for image in x0:
diff_imgs = [image]
x = x0
for i in range(steps):
x = x + mu * ones * dt + sigma * ones * np.sqrt(dt) * np.random.randn(length)
diff_imgs.append(x)
data.append(diff_imgs)
data = np.array(data, dtype=np.float32)
if __name__ == '__main__':
main()