Request to TFServing fails with std::bad_alloc


I have a problem. I want to make a prediction with TFServing but unfourtnaly as soon as I call the API of TFServing the docker container crashes with the following error:

2022-10-05 08:22:19.091237: I tensorflow_serving/model_servers/] Exporting HTTP/REST API at:localhost:8601 ...
terminate called after throwing an instance of 'std::bad_alloc'
  what():  std::bad_alloc

I am using TFServing inside a docker container an the call comes from a flask server. What is the problem for that? I have for the VM 16GB RAM.

from flask import current_app, flash, jsonify, make_response, redirect, request, url_for
from keras_preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from dotenv import load_dotenv
from loguru import logger
from pathlib import Path
from flask import Flask
import tensorflow as tf
import numpy as np
import requests
import string
import pickle5 as pickle
import nltk
import re
import os

app = Flask(__name__)

def index():

    txt = "This is a text"
    output = get_prediction_probability(txt)
    return output

def text_wragling(text):
    x = text.lower()
    x = remove_URL(x)
    x = remove_punct(x)
    x = remove_stopwords(x)
    with open('tokenizer.pickle', 'rb') as handle:
        tokenizer = pickle.load(handle)
    x = tokenizer.texts_to_sequences([x])
    # pad
    x = pad_sequences(x, maxlen=int(os.getenv('NLP__MAXLEN')))
    return x

def remove_URL(text):
    url = re.compile(r"https?://\S+|www.\.S+")
    return url.sub(r"",text)

def remove_punct(text):
    translator = str.maketrans("", "", string.punctuation)
    return text.translate(translator)

def remove_stopwords(text):

    from nltk.corpus import stopwords
    stop = set(stopwords.words("english"))
    filtered_words = [word.lower() for word in text.split() if word.lower() not in stop]
    return " ".join(filtered_words)

def get_prediction_probability(txt):
    x = text_wragling(txt)"Txt wragling")
    data = {
        "instances": [
    }"Get prediction from model")
    response ="http://localhost:8601/v1/models/nlp_model/labels/production:predict", json=data)
    probability = (np.asarray(response.json()['predictions']).max(axis=1))
    pred = np.asarray(response.json()['predictions']).argmax(axis=1)
    with open('labelenconder.pickle', 'rb') as handle:
        le = pickle.load(handle)
    pred = le.classes_[pred]
    prediction = pred[0]
    return {
        "prediction": prediction,
        "probability": probability[0]

if __name__ == '__main__':


FROM tensorflow/serving


version: '3'

    container_name: tfserving
    build: ..
      - "8601:8601"
      - ./model.config:/models/model.config
      - ../model:/models/model

      - TENSORFLOW_MODEL_BASE_PATH=/models/model/
    entrypoint: [ "bash", "-c", "tensorflow_model_server --rest_api_port=8601  --allow_version_labels_for_unavailable_models --model_config_file=/models/model.config"]


model_config_list {
  config {
    name: 'nlp_model'
    base_path: '/models/model/'
    model_platform: 'tensorflow'
    model_version_policy {
      specific {
        versions: 1
        versions: 2
    version_labels {
      key: 'production'
      value: 1
    version_labels {
      key: 'beta'
      value: 2


This an error and has already been reported and been fixed on TensorFlow Serving 2.11 (not yet released).
You can use nightly release from docker-hub.

You can find this issue here #2048.

Answered By – Test

This Answer collected from stackoverflow, is licensed under cc by-sa 2.5 , cc by-sa 3.0 and cc by-sa 4.0

Leave a Reply

(*) Required, Your email will not be published