Data Cardinality value error

from sklearn.model_selection import train_test_split #to split data

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
import pennylane as qml

def binary(x, y):
    keep = (y == 3) | (y == 6)
    x, y = x[keep], y[keep]
    y = (y == 3)
    return x,y

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train, x_test = x_train[..., np.newaxis]/255.0, x_test[..., np.newaxis]/255.0
x_train, y_train = binary(x_train, y_train)
x_test, y_test = binary(x_test, y_test)
x_train = tf.image.resize(x_train, (4,4)).numpy()
x_test = tf.image.resize(x_test, (4,4)).numpy()
print(x_train.shape)

y_train = tf.keras.utils.to_categorical(y_train)
y_test = tf.keras.utils.to_categorical(y_test)


def block(arr, rows, cols):
    h, w = arr.shape
    assert h % rows == 0, f"{h} rows not divided by {rows}"
    assert w % cols == 0, f"{w} cols not divided by {cols}"
    return (arr.reshape(h//rows, rows, -1, cols)
               .swapaxes(1,2)
               .reshape(-1, rows, cols))

size = 4*4
min_size = 2
minc = min_size**2
parts = int(size/minc)
print("parts ", parts)

x_train = [block(x_train[i,:,:,0],min_size,min_size) for i in range(len(x_train))]

n_qubits = 2
dev = qml.device("default.qubit", wires=n_qubits)

@qml.qnode(dev)
def qnode(inputs, weights):
    qml.AngleEmbedding(inputs, wires=range(n_qubits))
    qml.BasicEntanglerLayers(weights, wires=range(n_qubits))
    return [qml.expval(qml.PauliZ(wires=i)) for i in range(n_qubits)]

import pennylane as qml

n_qubits1 = 4
dev = qml.device("default.qubit", wires=4)

@qml.qnode(dev, interface="tf")
def qnode1(inputs, weights):
    qml.AngleEmbedding(inputs, wires=range(n_qubits1))
    qml.BasicEntanglerLayers(weights, wires=range(n_qubits1))
    return [qml.expval(qml.PauliZ(wires=0))]

n_layers = 1
weight_shapes1 = {"weights": (n_layers, 2,2)}
weight_shapes = {"weights": (n_layers, 2)}

qlayer_1 = qml.qnn.KerasLayer(qnode, weight_shapes, output_dim=2)
qlayer_2 = qml.qnn.KerasLayer(qnode, weight_shapes, output_dim=2)
clayer_2 = tf.keras.layers.Dense(2, activation="softmax")

kernel_model_list = []
kernel_model_list_input = []
kernel_model_list_output = []

for i in range(parts):
    
    model_input = tf.keras.Input(shape=(2,2))
    
    qlayer_m = qml.qnn.KerasLayer(qnode1, weight_shapes1, output_dim=1)(model_input)
    
    model = tf.keras.Model(inputs=model_input, outputs=qlayer_m)

 
    kernel_model_list.append(model)
    kernel_model_list_input.append(model.input)
    kernel_model_list_output.append(model.output)

print("input", len(kernel_model_list_input))


z_input = tf.concat(kernel_model_list_output, axis=1)

x_1, x_2 = tf.split(z_input, 2)

x_1 = qlayer_1(x_1)

x_2 = qlayer_2(x_2)

x = tf.concat([x_1, x_2], axis=1)

z = clayer_2(z_input)

model = tf.keras.Model(inputs=kernel_model_list_input, outputs=z)


model.compile(
    loss=tf.keras.losses.categorical_crossentropy,
    optimizer=tf.keras.optimizers.Adam(),
    metrics=[tf.keras.metrics.categorical_accuracy]
)
fitting = model.fit([*x_train], np.array(y_train), epochs=6, batch_size=4,  verbose=2)

resize x_train into 4by4 then divided into 2by2, i.e. total 4 parts. Then trying to process using hybrid qlayer, clayer model. It throws error during fitting i.e. ValueError: Data cardinality is ambiguous:
I used [*x_train] for multiple parts of image during fitting. But it did not work.
can you please help.

Hey @Amandeep! It looks like x here isn’t defined. Can you please supply your entire code? It might also be helpful to supply a minimal example that reproduces the error you’re getting.

Hi @isaacdevlugt, that are empty lists created. I shared the complete code.

Can you explain what it is that you’re trying to accomplish with your code?

@isaacdevlugt First i resize the images mnist to 4 by 4. Then constructed its parts of 2by2, which will be 4 parts in single mnist image. Then, idea is to process all parts independently by qnode (using 4 qubits). After that output of each patch is concatenated and split to two parts, so that it can be given two different qnodes.

Are you able to reduce your code down to something simpler that also reproduces the error you’re getting? It’s a little hard for me to decipher exactly what’s going on. Reducing your code down might also help you find the error as well!

@isaacdevlugt Sorry to say this is the minimal code. And the error only comes when we fit the model ```
fitting = model.fit([*x_train], np.array(y_train), epochs=6, batch_size=4)

in part of [*x_train]

Hey @Amandeep! I’m honestly having a hard time figuring out how your code works. But, I can tell you that the data cardinality error essentially boils down to the fact that, after all of the processing you do on x_train, the number of inputs doesn’t match the number of outputs that you’re comparing your model’s output to.

I think this stackoverflow thread sums it up nice: python - ValueError: Data cardinality is ambiguous: x sizes: 12000 y sizes: 640 Make sure all arrays contain the same number of samples - Stack Overflow

I distilled your code down a bit for you by using dummy data instead of the full mnist dataset:

import numpy as np
import tensorflow as tf
import pennylane as qml


def binary(x, y):
    keep = (y == 3) | (y == 6)
    x, y = x[keep], y[keep]
    y = y == 3
    return x, y


(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

print(x_train.shape, type(x_train), y_train.shape)

num_samples = 10

x_train = tf.cast(np.random.randint(0, 1, size=(num_samples, 28, 28)), dtype=tf.float32)
y_train = tf.cast(np.random.randint(0, 9, size=num_samples), dtype=tf.float32)

x_test = tf.cast(np.random.randint(0, 1, size=(num_samples, 28, 28)), dtype=tf.float32)
y_test = tf.cast(np.random.randint(0, 9, size=num_samples), dtype=tf.float32)

print(x_train.shape, type(x_train), y_train.shape)

x_train, x_test = x_train[..., np.newaxis] / 255.0, x_test[..., np.newaxis] / 255.0
x_train, y_train = binary(x_train, y_train)
x_test, y_test = binary(x_test, y_test)
x_train = tf.image.resize(x_train, (4, 4)).numpy().squeeze()
x_test = tf.image.resize(x_test, (4, 4)).numpy().squeeze()
print(x_train.shape)

y_train = tf.keras.utils.to_categorical(y_train)
y_test = tf.keras.utils.to_categorical(y_test)

def block(arr, rows, cols):
    h, w = arr.shape
    assert h % rows == 0, f"{h} rows not divided by {rows}"
    assert w % cols == 0, f"{w} cols not divided by {cols}"
    return arr.reshape(h // rows, rows, -1, cols).swapaxes(1, 2).reshape(-1, rows, cols)


size = 4 * 4
min_size = 2
minc = min_size**2
parts = int(size / minc)
print("parts ", parts)

x_train = [block(x_train[i, :, :], min_size, min_size) for i in range(len(x_train))]

print(len(x_train), x_train[0].shape)

n_qubits = 2
dev = qml.device("default.qubit", wires=n_qubits)


@qml.qnode(dev)
def qnode(inputs, weights):
    qml.AngleEmbedding(inputs, wires=range(n_qubits))
    qml.BasicEntanglerLayers(weights, wires=range(n_qubits))
    return [qml.expval(qml.PauliZ(wires=i)) for i in range(n_qubits)]


n_qubits1 = 4
dev = qml.device("default.qubit", wires=4)


@qml.qnode(dev, interface="tf")
def qnode1(inputs, weights):
    qml.AngleEmbedding(inputs, wires=range(n_qubits1))
    qml.BasicEntanglerLayers(weights, wires=range(n_qubits1))
    return [qml.expval(qml.PauliZ(wires=0))]

n_layers = 1
weight_shapes1 = {"weights": (n_layers, 2, 2)}
weight_shapes = {"weights": (n_layers, 2)}

qlayer_1 = qml.qnn.KerasLayer(qnode, weight_shapes, output_dim=2)
qlayer_2 = qml.qnn.KerasLayer(qnode, weight_shapes, output_dim=2)
clayer_2 = tf.keras.layers.Dense(2, activation="softmax")

kernel_model_list = []
kernel_model_list_input = []
kernel_model_list_output = []

for i in range(parts):
    model_input = tf.keras.Input(shape=(2, 2))

    qlayer_m = qml.qnn.KerasLayer(qnode1, weight_shapes1, output_dim=1)(model_input)

    model = tf.keras.Model(inputs=model_input, outputs=qlayer_m)

    kernel_model_list.append(model)
    kernel_model_list_input.append(model.input)
    kernel_model_list_output.append(model.output)

z_input = tf.concat(kernel_model_list_output, axis=1)
z = clayer_2(z_input)

model = tf.keras.Model(inputs=kernel_model_list_input, outputs=z)

model.compile(
    loss=tf.keras.losses.categorical_crossentropy,
    optimizer=tf.keras.optimizers.Adam(),
    metrics=[tf.keras.metrics.categorical_accuracy],
)

print("x_train:", len(x_train))
print("x_train[0]:", x_train[0].shape)
print("y_train:", len(y_train))
print("y_train[0]:", y_train[0])

fitting = model.fit(x_train, y_train, epochs=1, batch_size=4, verbose=2)

Let me know if this helps!

@isaacdevlugt It did not work. It shows an data cardinality is ambiguous. Although the same code works fine in TF/cirq. like in tf cirq, we have x_train in form tfq tensor, then i use [*x_train] during fit, it works fine.

Yep! I know my attached code doesn’t work, but it’s more stripped down compared to your original code and still reproduces the error :smile:. Does this help you narrow down where your data processing goes awry?

@isaacdevlugt Sorry to say it did not help. The error is still is shape error.

@isaacdevlugt

Fig no 4 in the paper can help understand the problem…

The idea is to divide image into four parts and run quantum model on all parts independently and jointly will predict the output.

Hey @Amandeep,

I suggest that you take your model apart and write down with pen and paper what the in & out dimensions should be for each layer, then try to match that with what your code is showing you. It’s a relatively complex model, so it will help to piece it apart layer-by-layer!