incompatible shapes error using datagen.flow_from_directory() in a functional API keras model


I am a super n00b attempting to learn TF and keras. I would like to create a model using the Functional API and fed by ImageDataGenerator() and flow_from_directory(). I am limited to using spyder (5.1.5) and python 3.7, keras 2.8.0, tensorflow 2.8.0.

I have organized sample patches into labelled folders to support flow_from_directory(). There are 7 classes and each patch is a small .png image, size is supposed to be 128 x 128 x 3.

However, when I attempt to call I receive a ValueError:

Traceback (most recent call last):

  File ~\.spyder-py3\MtP_treeCounts\ in <module>
    history =,

  File ~\Anaconda3\envs\tf28\lib\site-packages\keras\utils\ in error_handler
    raise e.with_traceback(filtered_tb) from None

  File ~\Anaconda3\envs\tf28\lib\site-packages\tensorflow\python\framework\ in autograph_handler
    raise e.ag_error_metadata.to_exception(e)

ValueError: in user code:

    File "C:\Users\jlovitt\Anaconda3\envs\tf28\lib\site-packages\keras\engine\", line 1021, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\jlovitt\Anaconda3\envs\tf28\lib\site-packages\keras\engine\", line 1010, in step_function  **
        outputs =, args=(data,))
    File "C:\Users\jlovitt\Anaconda3\envs\tf28\lib\site-packages\keras\engine\", line 1000, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\jlovitt\Anaconda3\envs\tf28\lib\site-packages\keras\engine\", line 860, in train_step
        loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "C:\Users\jlovitt\Anaconda3\envs\tf28\lib\site-packages\keras\engine\", line 918, in compute_loss
        return self.compiled_loss(
    File "C:\Users\jlovitt\Anaconda3\envs\tf28\lib\site-packages\keras\engine\", line 201, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "C:\Users\jlovitt\Anaconda3\envs\tf28\lib\site-packages\keras\", line 141, in __call__
        losses = call_fn(y_true, y_pred)
    File "C:\Users\jlovitt\Anaconda3\envs\tf28\lib\site-packages\keras\", line 245, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "C:\Users\jlovitt\Anaconda3\envs\tf28\lib\site-packages\keras\", line 1789, in categorical_crossentropy
        return backend.categorical_crossentropy(
    File "C:\Users\jlovitt\Anaconda3\envs\tf28\lib\site-packages\keras\", line 5083, in categorical_crossentropy

    ValueError: Shapes (None, None) and (None, 128, 128, 1) are incompatible

I don’t think my generator is generating anything. I assume the issue is linked to my model being fed something like [50,7] (where batch size is 50 and 7 is the number of classes) instead of [50,128,128,3] which would be 50 individual patches pulled randomly from across the class labelled folders. So it’s not actually training anything.

Here is the code:

# set up
import numpy as np

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import models
from tensorflow.keras.layers import Input, Conv2D,Conv1D, UpSampling2D, concatenate,Dense, Flatten, Dropout,BatchNormalization, MaxPooling2D
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras import backend as K

del model
#build generator & train set

datagen = ImageDataGenerator(
    dtype = np.float32,
    data_format = "channels_last",

image_height = 128
image_width = 128
batch_size = 50

ds_train = datagen.flow_from_directory(
    batch_size = batch_size,
    class_mode = 'categorical',
    seed =42,
#set params

# STEP_SIZE_TRAIN = round(int(ds_train.n//ds_train.batch_size),-1)

# STEP_SIZE_VALID = round(int(ds_validation.n//ds_validation.batch_size),-1)

lr = 0.001
#define model

def U_model():
    in1 = Input(shape=(256,256,3))

    conv1 = Conv2D(32,(3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(in1)
    conv1 = Dropout(0.1)(conv1)
    conv1 = Conv2D(32,(3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(conv1)
    pool1 = MaxPooling2D((2,2))(conv1)

    conv2 = Conv2D(64,(3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(pool1)
    conv2 = Dropout(0.1)(conv2)
    conv2 = Conv2D(64,(3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(conv2)
    pool2 = MaxPooling2D((2,2))(conv2)

    conv3 = Conv2D(128,(3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(pool2)
    conv3 = Dropout(0.1)(conv3)
    conv3 = Conv2D(128,(3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(conv3)
    pool3 = MaxPooling2D((2,2))(conv3)
    conv4 = Conv2D(128, 3, activation='relu', kernel_initializer='he_normal', padding='same')(pool3)
    conv4 = Dropout(0.1)(conv4)
    conv4 = Conv2D(128, 3, activation='relu', kernel_initializer='he_normal', padding='same')(conv4)
    up1 = concatenate([UpSampling2D((2,2))(conv4),conv3],axis=-1)
    conv5 = Conv2D(64,(3,3), activation='relu', kernel_initializer='he_normal', padding='same')(up1)
    conv5 = Dropout(0.1)(conv5)
    conv5 = Conv2D(64,(3,3), activation='relu', kernel_initializer='he_normal', padding='same')(conv5)
    up2 = concatenate([UpSampling2D((2,2))(conv5), conv2], axis=-1)
    conv6 = Conv2D(64, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(up2)
    conv6 = Dropout(0.1)(conv6)
    conv6 = Conv2D(64, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(conv6)

    up3 = concatenate([UpSampling2D((2,2))(conv6), conv1], axis=-1)
    conv7 = Conv2D(32, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(up3)
    conv7 = Dropout(0.1)(conv7)
    conv7 = Conv2D(32, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(conv7)
    out1 = keras.layers.Dense(7)(conv7)
    #defining inputs and outputs of model
    model = Model(inputs=[in1], outputs=[out1])

    model.compile(loss="categorical_crossentropy", optimizer =keras.optimizers.SGD(learning_rate=lr,momentum=0.9),metrics=[tf.keras.metrics.MeanSquaredError(),tf.keras.metrics.MeanAbsoluteError()])
    return model

model = U_model()
#train model

history =,


As it turns out I solved the issue with the following:

changed optimizer to Adam in compiler,
added a flatten() layer prior to my final dense(7) output

Answered By – IPutGooglyEyesOnIt

This Answer collected from stackoverflow, is licensed under cc by-sa 2.5 , cc by-sa 3.0 and cc by-sa 4.0

Leave a Reply

(*) Required, Your email will not be published