0

Suppose, my data is as follows (we have 90041 rows of data here):

 2.268    7.042 5.781 5.399 5.373 5.423  -9.118   5.488   5.166   4.852  7.470  6.452  6.069     0 0 0 1 0 1 1 3 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 2.101    5.781 5.399 5.373 5.423 5.247   5.488   5.166   4.852   5.164  6.452  6.069  6.197     0 1 1 3 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 2.222    5.399 5.373 5.423 5.247 5.485   5.166   4.852   5.164   4.943  6.069  6.197  6.434     0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 2.416    5.373 5.423 5.247 5.485 6.675   4.852   5.164   4.943   8.103  6.197  6.434  8.264     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 3.028    5.423 5.247 5.485 6.675 6.372   5.164   4.943   8.103  -9.152  6.434  8.264  9.047     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-1.235    5.247 5.485 6.675 6.372 5.669   4.943   8.103  -9.152  -8.536  8.264  9.047 11.954     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-0.953    5.485 6.675 6.372 5.669 5.304   8.103  -9.152  -8.536   5.433  9.047 11.954  6.703     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 2.233    6.675 6.372 5.669 5.304 5.461  -9.152  -8.536   5.433   4.924 11.954  6.703  6.407     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 2.313    6.372 5.669 5.304 5.461 5.265  -8.536   5.433   4.924   5.007  6.703  6.407  6.088     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 2.314    5.669 5.304 5.461 5.265 5.379   5.433   4.924   5.007   5.057  6.407  6.088  6.410     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
... ... ...
... ... ...

Now, let us look into the following source code:

import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

import sys, random
import time

import tensorflow as tf
from   tensorflow import keras
from   tensorflow.keras.models import Sequential
from   tensorflow.keras.layers import Dense
from   tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import numpy as np

def load_data_func(fname: str, yyy_index: int, **selection):
    i = 0
    file = open(fname)
    if "top_n_lines" in selection:
        lines = [next(file) for _ in range(int(selection["top_n_lines"]))]
    elif "random_n_lines" in selection:
        tmp_lines = file.readlines()
        lines = random.sample(tmp_lines, int(selection["random_n_lines"]))
    else:
        lines = file.readlines()

    data_x, data_y = [], []
    for l in lines:
        row = l.strip().split()  # return a list of words from the line.
        x = [float(ix) for ix in row[yyy_index+1:]]  # convert 3rd to 20th word into a vector of float numbers.
        y = float(row[yyy_index])  # select the 7th column.
        data_x.append(x)  # append the vector into 'data_x'
        data_y.append(y)  # append the vector into 'data_y'
    # END for l in lines

    num_rows = len(data_x)

    print("row size = ", len(data_x[0]))

    given_fraction = selection.get("validation_part", 1.0)
    if given_fraction > 0.9999:
        valid_x, valid_y = data_x, data_y
    else:
        n = int(num_rows * given_fraction)
        data_x, data_y = data_x[n:], data_y[n:]
        valid_x, valid_y = data_x[:n], data_y[:n]
    # END of if-else block

    print("size of x = ", len(data_x))
    print("size of y = ", len(data_y))

    tx = tf.convert_to_tensor(data_x, dtype=tf.float32)
    ty = tf.convert_to_tensor(data_y, dtype=tf.float32)

    vx = tf.convert_to_tensor(valid_x, dtype=tf.float32)
    vy = tf.convert_to_tensor(valid_y, dtype=tf.float32)

    return tx, ty, vx, vy
# END of the function

When I call it like the following:

train_x, train_y, validate_x, validate_y = \
        load_data_func(
            fname="data_file.dat",
            yyy_index=6,
            random_n_lines=90000,
            validation_part=0.2
        )

print("row count", len(train_x))
print("col count", len(train_x[0]))

I get the following error:

my_user@my_remote_server:~/my_project_dir$ python3 load_data_test.py
row size =  40
size of x =  72000
size of y =  72000
Traceback (most recent call last):
  File "load_data_test.py", line 74, in <module>
    validation_part=0.2
  File "load_data_test.py", line 58, in load_data_func
    tx = tf.convert_to_tensor(data_x, dtype=tf.float32)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py", line 206, in wrapper
    return target(*args, **kwargs)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1431, in convert_to_tensor_v2_with_dispatch
    value, dtype=dtype, dtype_hint=dtype_hint, name=name)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1441, in convert_to_tensor_v2
    as_ref=False)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/profiler/trace.py", line 163, in wrapped
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1566, in convert_to_tensor
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 339, in _constant_tensor_conversion_function
    return constant(v, dtype=dtype, name=name)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 265, in constant
    allow_broadcast=True)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 276, in _constant_impl
    return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 301, in _constant_eager_impl
    t = convert_to_eager_tensor(value, ctx, dtype)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 98, in convert_to_eager_tensor
    return ops.EagerTensor(value, ctx.device_name, dtype)
ValueError: Can't convert non-rectangular Python sequence to Tensor.
my_user@my_remote_server:~/my_project_dir$

Interestingly, the following calls do not show any error:

train_x, train_y, validate_x, validate_y = \
        load_data_func(
            fname="data_file.dat",
            yyy_index=6,
            top_n_lines=90000, #<============
            validation_part=0.2
        )


train_x, train_y, validate_x, validate_y = \
    load_data_k(
        fname="data_file.dat",
        yyy_index=6,
        random_n_lines=60000,  #<=============
        validation_part=0.2
    )

What am I doing wrong?

user366312
  • 16,949
  • 65
  • 235
  • 452
  • Does this answer your question? [ValueError: Can't convert non-rectangular Python sequence to Tensor](https://stackoverflow.com/questions/56304986/valueerror-cant-convert-non-rectangular-python-sequence-to-tensor) – Innat Jan 28 '22 at 13:56
  • 1
    @M.Innat, I edited the question. The linked question is not the same as my question. – user366312 Jan 28 '22 at 13:58

1 Answers1

2

Have you checked the last row of your datas ? If it does not have the same number of lines than the other rows, the variable data_x is no longer rectangular, wich raises a value error as explained in the link from M.Innat.
It would also explain why you don't have the error using the top n lines & 6000 randoms lines (your code work as long as it doens't include the last layer)

Antoine
  • 51
  • 2