Training my own dataset
Hi what would be the data format and folder layout that I need to setup if I have my own dataset?
Thx
+1
the same question
Here's the solution that worked for me.
I created a csv file containing path for image and its label. I wrote a custom class for data loading.
import os
import pandas as pd
import torch
from torch.utils.data import Dataset
from skimage import io
from skimage.transform import resize
class SignatureDataset(Dataset):
def __init__(self,csv_file, root_dir, transform = None):
self.annotations = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.annotations)
def __getitem__(self,index):
img_path = os.path.join(self.root_dir, self.annotations.iloc[index,0])
image = io.imread(img_path)
image = resize(image,(3,256,256))
y_label = torch.tensor(int(self.annotations.iloc[index,1]))
if self.transform:
image = self.transform(image)
return (torch.from_numpy(image.copy()).float(),y_label)
I changed bit_hyperrule.py accordingly. Here are the changes:
known_dataset_sizes = {
'cifar10': (32, 32),
'cifar100': (32, 32),
'oxford_iiit_pet': (224, 224),
'oxford_flowers102': (224, 224),
'imagenet2012': (224, 224),
'customDataset' : (256,256),
}
You can change the resolution from the get_resolution function (from bit.hyperrule.py) if needed.
I changed /bit_pytorch/train.py file to load the custom data.
elif args.dataset == "imagenet2012":
train_set = tv.datasets.ImageFolder(pjoin(args.datadir, "train"), train_tx)
valid_set = tv.datasets.ImageFolder(pjoin(args.datadir, "val"), val_tx)
elif args.dataset == "customDataset":
dataset = customDataset.SignatureDataset(csv_file="customDataLoader.csv", root_dir="path to custom dataset", transform = None)
train_set, valid_set = torch.utils.data.random_split(dataset, [10841,2711]) #[training, validation]
Keep customDataLoader.csv as well as customDataset.py in the root folder (with bit_hyperrule.py).
Run the code using command: python -m bit_pytorch.train --name custom_classifier --model BiT-M-R50x1 --logdir /tmp/bit_logs --dataset customDataset.
I had changed the default values (for batch_size, etc.) from the code itself.
Hope that helps. !
References: To create customDataset.py, you can refer to https://www.youtube.com/watch?v=ZoZHd0Zm3RY
Here's the solution that worked for me.
I created a csv file containing path for image and its label. I wrote a custom class for data loading.
import os import pandas as pd import torch from torch.utils.data import Dataset from skimage import io from skimage.transform import resize class SignatureDataset(Dataset): def __init__(self,csv_file, root_dir, transform = None): self.annotations = pd.read_csv(csv_file) self.root_dir = root_dir self.transform = transform def __len__(self): return len(self.annotations) def __getitem__(self,index): img_path = os.path.join(self.root_dir, self.annotations.iloc[index,0]) image = io.imread(img_path) image = resize(image,(3,256,256)) y_label = torch.tensor(int(self.annotations.iloc[index,1])) if self.transform: image = self.transform(image) return (torch.from_numpy(image.copy()).float(),y_label)I changed bit_hyperrule.py accordingly. Here are the changes:
known_dataset_sizes = { 'cifar10': (32, 32), 'cifar100': (32, 32), 'oxford_iiit_pet': (224, 224), 'oxford_flowers102': (224, 224), 'imagenet2012': (224, 224), 'customDataset' : (256,256), }You can change the resolution from the get_resolution function (from bit.hyperrule.py) if needed.
I changed /bit_pytorch/train.py file to load the custom data.
elif args.dataset == "imagenet2012": train_set = tv.datasets.ImageFolder(pjoin(args.datadir, "train"), train_tx) valid_set = tv.datasets.ImageFolder(pjoin(args.datadir, "val"), val_tx) elif args.dataset == "customDataset": dataset = customDataset.SignatureDataset(csv_file="customDataLoader.csv", root_dir="path to custom dataset", transform = None) train_set, valid_set = torch.utils.data.random_split(dataset, [10841,2711]) #[training, validation]Keep customDataLoader.csv as well as customDataset.py in the root folder (with bit_hyperrule.py).
Run the code using command: python -m bit_pytorch.train --name custom_classifier --model BiT-M-R50x1 --logdir /tmp/bit_logs --dataset customDataset.
I had changed the default values (for batch_size, etc.) from the code itself.
Hope that helps. !
References: To create customDataset.py, you can refer to https://www.youtube.com/watch?v=ZoZHd0Zm3RY
I have tried yours,but it raise:AttributeError: 'MyDataset' object has no attribute 'classes'.

Here's the solution that worked for me. I created a csv file containing path for image and its label. I wrote a custom class for data loading.
import os import pandas as pd import torch from torch.utils.data import Dataset from skimage import io from skimage.transform import resize class SignatureDataset(Dataset): def __init__(self,csv_file, root_dir, transform = None): self.annotations = pd.read_csv(csv_file) self.root_dir = root_dir self.transform = transform def __len__(self): return len(self.annotations) def __getitem__(self,index): img_path = os.path.join(self.root_dir, self.annotations.iloc[index,0]) image = io.imread(img_path) image = resize(image,(3,256,256)) y_label = torch.tensor(int(self.annotations.iloc[index,1])) if self.transform: image = self.transform(image) return (torch.from_numpy(image.copy()).float(),y_label)I changed bit_hyperrule.py accordingly. Here are the changes:
known_dataset_sizes = { 'cifar10': (32, 32), 'cifar100': (32, 32), 'oxford_iiit_pet': (224, 224), 'oxford_flowers102': (224, 224), 'imagenet2012': (224, 224), 'customDataset' : (256,256), }You can change the resolution from the get_resolution function (from bit.hyperrule.py) if needed. I changed /bit_pytorch/train.py file to load the custom data.
elif args.dataset == "imagenet2012": train_set = tv.datasets.ImageFolder(pjoin(args.datadir, "train"), train_tx) valid_set = tv.datasets.ImageFolder(pjoin(args.datadir, "val"), val_tx) elif args.dataset == "customDataset": dataset = customDataset.SignatureDataset(csv_file="customDataLoader.csv", root_dir="path to custom dataset", transform = None) train_set, valid_set = torch.utils.data.random_split(dataset, [10841,2711]) #[training, validation]Keep customDataLoader.csv as well as customDataset.py in the root folder (with bit_hyperrule.py). Run the code using command: python -m bit_pytorch.train --name custom_classifier --model BiT-M-R50x1 --logdir /tmp/bit_logs --dataset customDataset. I had changed the default values (for batch_size, etc.) from the code itself. Hope that helps. ! References: To create customDataset.py, you can refer to https://www.youtube.com/watch?v=ZoZHd0Zm3RY
I have tried yours,but it raise:AttributeError: 'MyDataset' object has no attribute 'classes'.
When I tried the mentioned solution, it worked. Can you try changing line no. 205 to:
model = models.KNOWN_MODELS[args.model](head_size=len(valid_set[1]), zero_head=True)
Here's the solution that worked for me. I created a csv file containing path for image and its label. I wrote a custom class for data loading.
import os import pandas as pd import torch from torch.utils.data import Dataset from skimage import io from skimage.transform import resize class SignatureDataset(Dataset): def __init__(self,csv_file, root_dir, transform = None): self.annotations = pd.read_csv(csv_file) self.root_dir = root_dir self.transform = transform def __len__(self): return len(self.annotations) def __getitem__(self,index): img_path = os.path.join(self.root_dir, self.annotations.iloc[index,0]) image = io.imread(img_path) image = resize(image,(3,256,256)) y_label = torch.tensor(int(self.annotations.iloc[index,1])) if self.transform: image = self.transform(image) return (torch.from_numpy(image.copy()).float(),y_label)I changed bit_hyperrule.py accordingly. Here are the changes:
known_dataset_sizes = { 'cifar10': (32, 32), 'cifar100': (32, 32), 'oxford_iiit_pet': (224, 224), 'oxford_flowers102': (224, 224), 'imagenet2012': (224, 224), 'customDataset' : (256,256), }You can change the resolution from the get_resolution function (from bit.hyperrule.py) if needed. I changed /bit_pytorch/train.py file to load the custom data.
elif args.dataset == "imagenet2012": train_set = tv.datasets.ImageFolder(pjoin(args.datadir, "train"), train_tx) valid_set = tv.datasets.ImageFolder(pjoin(args.datadir, "val"), val_tx) elif args.dataset == "customDataset": dataset = customDataset.SignatureDataset(csv_file="customDataLoader.csv", root_dir="path to custom dataset", transform = None) train_set, valid_set = torch.utils.data.random_split(dataset, [10841,2711]) #[training, validation]Keep customDataLoader.csv as well as customDataset.py in the root folder (with bit_hyperrule.py). Run the code using command: python -m bit_pytorch.train --name custom_classifier --model BiT-M-R50x1 --logdir /tmp/bit_logs --dataset customDataset. I had changed the default values (for batch_size, etc.) from the code itself. Hope that helps. ! References: To create customDataset.py, you can refer to https://www.youtube.com/watch?v=ZoZHd0Zm3RY
I have tried yours,but it raise:AttributeError: 'MyDataset' object has no attribute 'classes'.
When I tried the mentioned solution, it worked. Can you try changing line no. 205 to:
model = models.KNOWN_MODELS[args.model](head_size=len(valid_set[1]), zero_head=True)
OK,Thanks