DataLoader Class picking up parent folder

I am trying to train a convolutional neural network on an image classification task. For some reason my dataloader class is picking up the parent folder and I suspect this is causing issues when attempting to train the model as it is giving me an error “RuntimeError: weight tensor should be defined either for all 64 classes or no classes but got weight tensor of shape: [99]”

Here’s my code:

import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset, random_split
import torchvision
from torchvision import datasets, models, transforms
import numpy as np
import matplotlib.pyplot as plt
import os
from PIL import Image


data_path="/kaggle/input/facial-age/face_age"

class CustomImageFolder(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.images = []
        self.labels = []

        # Iterate over all subdirectories
        for label in sorted(os.listdir(root_dir)):
            if label == 'face_age':
                continue
            label_path = os.path.join(root_dir, label)
            if os.path.isdir(label_path):
                for img_file in os.listdir(label_path):
                    img_path = os.path.join(label_path, img_file)
                    if os.path.isfile(img_path):
                        self.images.append(img_path)
                        self.labels.append(label)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, int(label)

# Define the transform
transform = transforms.Compose([transforms.ToTensor()])

# Create the custom dataset
dataset = CustomImageFolder(root_dir=data_path, transform=transform)

# Check the first few items
for i in range(5):
    image, label = dataset[i]
    print(f'Label: {label}, Image Shape: {image.shape}')


train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


dataset = datasets.ImageFolder(root=data_path)
print(dataset.class_to_idx)
print(len(dataset.class_to_idx))

The last cell is outputting the parent folder along with all the folder inside it. This makes no sense.

here’s the dataset used: https://www.kaggle.com/datasets/frabbisw/facial-age

Leave a Comment