I have this dataframe.
I am trying to follow this example.
The target value I want to predict on is the zg500
. The other feature I want to use is tas
.
I want to create the feature columns, in order to combine the latitudes and longitudes:
import numpy as np
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import feature_column
df = pd.read_csv('./df.csv')
# if unamed column exists
#df.drop(['Unnamed: 0'],
# axis=1,
# inplace=True)
df.dropna(inplace=True)
# A utility method to create a tf.data dataset from a Pandas Dataframe
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
dataframe = dataframe.copy()
labels = dataframe.pop('zg500')
ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
if shuffle:
ds = ds.shuffle(buffer_size=len(dataframe))
ds = ds.batch(batch_size)
return ds
batch_size = 16
train_ds = df_to_dataset(df, batch_size=batch_size)
feature_columns = []
tas = feature_column.numeric_column("tas")
latitude = feature_column.numeric_column("lats")
longitude = feature_column.numeric_column("lons")
bucketized_lat = feature_column.bucketized_column(latitude, boundaries=[0, 20, 40, 70])
bucketized_lon = feature_column.bucketized_column(longitude, boundaries=[-45, -20, 0, 20, 60])
feature_columns.append(tas)
feature_columns.append(bucketized_lat)
feature_columns.append(bucketized_lon)
lat_lon = feature_column.crossed_column([bucketized_lat, bucketized_lon], 1000)
lat_lon = feature_column.indicator_column(lat_lon)
feature_columns.append(lat_lon)
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)
Create the model:
model = tf.keras.Sequential([
feature_layer,
tf.keras.layers.Dense(10, activation='relu'),
tf.keras.layers.Dense(1)
])
model.compile(optimizer='adam',
loss='mse')
history = model.fit(train_ds, epochs=2)
Right now, I am receiving nan loss:
10918/10918 [==============================] - 10s 861us/step - loss: nan
Epoch 2/2
10918/10918 [==============================] - 10s 857us/step - loss: nan
Also, I was wondering why using the df
dataframe instead of train_ds
:
history = model.fit(df.iloc[:, [0, 2, 3]].values,
df.iloc[:, 1].values,
epochs=2)
produces:
ValueError: ('We expected a dictionary here. Instead we got: ', <tf.Tensor 'IteratorGetNext:0' shape=(32, 3) dtype=float32>)