After some painful debugging I can confirm the sequence that the slow one takes, in the DataFrame ctor :
elif isinstance(data, (list, types.GeneratorType)):
if isinstance(data, types.GeneratorType):
data = list(data)
if len(data) > 0:
if is_list_like(data[0]) and getattr(data[0], 'ndim', 1) == 1:
if is_named_tuple(data[0]) and columns is None:
columns = data[0]._fields
arrays, columns = _to_arrays(data, columns, dtype=dtype)
Here it tests the type of the passed data, as it's list-like it then tries to test each element for it's type, it's not expecting a list containing a np array so then it comes here:
def _to_arrays(data, columns, coerce_float=False, dtype=None):
"""
Return list of arrays, columns
"""
if isinstance(data, DataFrame):
if columns is not None:
arrays = [data._ixs(i, axis=1).values
for i, col in enumerate(data.columns) if col in columns]
else:
columns = data.columns
arrays = [data._ixs(i, axis=1).values for i in range(len(columns))]
return arrays, columns
if not len(data):
if isinstance(data, np.ndarray):
columns = data.dtype.names
if columns is not None:
return [[]] * len(columns), columns
return [], [] # columns if columns is not None else []
if isinstance(data[0], (list, tuple)):
return _list_to_arrays(data, columns, coerce_float=coerce_float,
dtype=dtype)
then here:
def _list_to_arrays(data, columns, coerce_float=False, dtype=None):
if len(data) > 0 and isinstance(data[0], tuple):
content = list(lib.to_object_array_tuples(data).T)
else:
# list of lists
content = list(lib.to_object_array(data).T)
return _convert_object_array(content, columns, dtype=dtype,
coerce_float=coerce_float)
and finally here:
def _convert_object_array(content, columns, coerce_float=False, dtype=None):
if columns is None:
columns = _default_index(len(content))
else:
if len(columns) != len(content): # pragma: no cover
# caller's responsibility to check for this...
raise AssertionError('%d columns passed, passed data had %s '
'columns' % (len(columns), len(content)))
# provide soft conversion of object dtypes
def convert(arr):
if dtype != object and dtype != np.object:
arr = lib.maybe_convert_objects(arr, try_float=coerce_float)
arr = _possibly_cast_to_datetime(arr, dtype)
return arr
arrays = [convert(arr) for arr in content]
return arrays, columns
You can see that there is no optimisation in the construction it performs and it essentially just iterates through every element, converts it (which will copy it) and returns a list of arrays.
For the other path, as the np array shape and dtypes are more pandas friendly it can take a view on the data or copy if required but it already knows enough to optimise the construction