0

My Flask API which deploys keras model is too slow. It only serves 180 requests in 10 seconds. The API has 3 .py files.

server_side.py:

def predict():

    if request.method == "POST":

        comment = request.form["comment"]
        movies = [int(x) for x in comment.split(",")]

        movies, g_input, t_input, movie_copy = prepare_inputs(
                movies)

        #print(movies[:10],g_input,t_input)

        preds = make_prediction(movies, g_input, t_input, MODEL)

        most_similar = preds[0].argsort()[-(10 + len(movie_copy)) :][::-1]
        watched_movies, rec_movies = result(movie_copy, most_similar, DF)

        context = {
            "table1": [watched_movies.to_html(classes="data")],
            "title1": watched_movies.columns.values,
            "table2": [rec_movies.to_html(classes="data")],
            "title2": rec_movies.columns.values,
        }

    return render_template("prediction.html", context=context)


if __name__ == "__main__":
    global MODEL
    MODEL = MODEL
    app.run(threaded=False)

prediction_helper.py: has two methods, one to predict and another to prepare inputs to the required length and shape.

def make_prediction(movies, g_input, t_input, model):

    return model.predict(
        [np.array([movies,]), np.array([g_input,]), np.array([t_input,])]
    )


def prepare_inputs(movie_input):

    movies_copy = movie_input[:]

    genres = ' '.join(DF.genres[DF["movieId"].isin(movie_input)].values)
    genres = genres.split(" ")

    tags = ' '.join(DF.tag[DF["movieId"].isin(movie_input)].values)
    tags = tags.split(" ")

    genres = [
        list(GENRES_MAP.keys())[list(GENRES_MAP.values()).index(i)] for i in genres
    ]
    genres = list(set(genres))
    genres = pad(genres, INPUT_LENGTH["genre_len"])

    tags = [
        list(TAG_MAP.keys())[list(TAG_MAP.values()).index(i)]
        for i in tags
        if i not in ("em", "cs", "se")
    ]
    tags = list(set(tags))
    if len(tags) > 100:
        tags = tags[0:100]
    else:
        tags = pad(tags, INPUT_LENGTH["tag_len"])

    movie_input = pad(movie_input, INPUT_LENGTH["movie_len"])

    return movie_input, genres, tags, movies_copy

data_loader.py: has two methods, pad which pads a list to the required length and another which extracts rows based on the prediction indices.

import pickle
import pandas as pd
from keras.models import load_model

MODEL = load_model("final_train1.h5")

DF = pd.read_csv("new_df.csv")
DF["tag"] = DF["tag"].fillna("")
PICKLE_IN = open("genres_map.pickle", "rb")
GENRES_MAP = pickle.load(PICKLE_IN)

PICKLE_IN = open("tag_map.pickle", "rb")
TAG_MAP = pickle.load(PICKLE_IN)

INPUT_LENGTH = {"movie_len":2698, "genre_len":24, "tag_len":100}

def pad(lst, width):

    lst.extend([0] * (width - len(lst)))
    return lst


def result(movie_copy, most_similar, DF):


    rec_movies = DF.set_index("movieId").loc[most_similar]

    watched_movies = DF.set_index("movieId").loc[movie_copy]

    return watched_movies, rec_movies
ashishjohn
  • 69
  • 7

2 Answers2

1

Flask documentation says the development server is not for production. Use a production WSGI server such as Gunicorn along with Nginx.

davidism
  • 121,510
  • 29
  • 395
  • 339
1

You shouldn't use flask built-in server that just designed for development purpose and not for production deployment. Instead, you should use an external server for production deployment like Waitress, uWSGI, Gunicorn or Nginx. You can read here for the options for flask production deployment: https://flask.palletsprojects.com/en/1.1.x/deploying/

Nur Faizin
  • 171
  • 6