diff --git a/DashAI/back/initial_components.py b/DashAI/back/initial_components.py index 592735c1b..49ea258f1 100644 --- a/DashAI/back/initial_components.py +++ b/DashAI/back/initial_components.py @@ -131,6 +131,10 @@ from DashAI.back.metrics.translation.bleu import Bleu from DashAI.back.metrics.translation.chrf import Chrf from DashAI.back.metrics.translation.ter import Ter +from DashAI.back.models.cnn_image_classifier import CNNImageClassifier +from DashAI.back.models.efficientnet_b0_image_classifier import ( + EfficientNetB0ImageClassifier, +) # Models from DashAI.back.models.hugging_face.albert_transformer import AlbertTransformer @@ -203,7 +207,10 @@ XlmRobertaTransformer, ) from DashAI.back.models.hugging_face.xlnet_transformer import XlnetTransformer +from DashAI.back.models.lenet5_image_classifier import LeNet5ImageClassifier from DashAI.back.models.mlp_image_classifier import MLPImageClassifier +from DashAI.back.models.resnet18_image_classifier import ResNet18ImageClassifier +from DashAI.back.models.resnet50_image_classifier import ResNet50ImageClassifier from DashAI.back.models.scikit_learn.adaboost_classifier import AdaBoostClassifier from DashAI.back.models.scikit_learn.adaboost_regression import AdaBoostRegression from DashAI.back.models.scikit_learn.bagging_classifier import BaggingClassifier @@ -380,6 +387,11 @@ def get_initial_components(): XlmRobertaTransformer, XlnetTransformer, MLPImageClassifier, + CNNImageClassifier, + LeNet5ImageClassifier, + ResNet18ImageClassifier, + ResNet50ImageClassifier, + EfficientNetB0ImageClassifier, # Dataloaders ARFFDataLoader, CSVDataLoader, diff --git a/DashAI/back/models/base_model.py b/DashAI/back/models/base_model.py index 38877388e..a7a1d4808 100644 --- a/DashAI/back/models/base_model.py +++ b/DashAI/back/models/base_model.py @@ -1,5 +1,7 @@ """Base Model abstract class.""" +import logging +import math from abc import ABCMeta, abstractmethod from typing import TYPE_CHECKING, Any, Dict, Final, final @@ -12,6 +14,8 @@ if TYPE_CHECKING: from DashAI.back.dataloaders.classes.dashai_dataset import DashAIDataset +logger = logging.getLogger(__name__) + class BaseModel(ConfigObject, metaclass=ABCMeta): """Abstract base class for all machine learning models in DashAI. @@ -277,6 +281,15 @@ def calculate_metrics( results = {} for metric in metrics: score = metric.score(y_transformed, y_pred) + if not math.isfinite(score): + logger.warning( + "Metric %s returned a non-finite value (%s) for split %s " + "(e.g. only one class present in the split). Skipping.", + metric.__name__, + score, + split, + ) + continue results[metric.__name__] = score # Save to database diff --git a/DashAI/back/models/base_torchvision_image_classifier.py b/DashAI/back/models/base_torchvision_image_classifier.py new file mode 100644 index 000000000..d93fa688c --- /dev/null +++ b/DashAI/back/models/base_torchvision_image_classifier.py @@ -0,0 +1,539 @@ +"""Shared base class for torchvision-based image classifiers.""" + +from __future__ import annotations + +import abc + +from DashAI.back.core.schema_fields import ( + BaseSchema, + bool_field, + enum_field, + float_field, + int_field, + schema_field, +) +from DashAI.back.core.utils import MultilingualString +from DashAI.back.models.base_model import BaseModel +from DashAI.back.models.utils import DEVICE_ENUM, DEVICE_PLACEHOLDER, DEVICE_TO_IDX + + +class TorchvisionImageClassifierSchema(BaseSchema): + """Shared training parameters for torchvision-based image classifiers.""" + + epochs: schema_field( + int_field(ge=1), + placeholder=10, + description=MultilingualString( + en=( + "The number of epochs to train the model. An epoch is a full " + "iteration over the training data." + ), + es=( + "El número de épocas para entrenar el modelo. Una época es una " + "iteración completa sobre los datos de entrenamiento." + ), + pt=( + "O número de épocas para treinar o modelo. Uma época é uma " + "iteração completa sobre os dados de treinamento." + ), + ), + alias=MultilingualString(en="Epochs", es="Épocas", pt="Épocas"), + ) # type: ignore + + learning_rate: schema_field( + float_field(gt=0.0), + placeholder=0.001, + description=MultilingualString( + en="Learning rate for the Adam optimizer.", + es="Tasa de aprendizaje para el optimizador Adam.", + pt="Taxa de aprendizado para o otimizador Adam.", + ), + alias=MultilingualString( + en="Learning rate", + es="Tasa de aprendizaje", + pt="Taxa de aprendizado", + ), + ) # type: ignore + + batch_size: schema_field( + int_field(ge=1), + placeholder=32, + description=MultilingualString( + en=( + "Number of images processed together in each training step. " + "Larger values speed up training but require more memory." + ), + es=( + "Número de imágenes procesadas juntas en cada paso de " + "entrenamiento. Valores más grandes aceleran el entrenamiento " + "pero requieren más memoria." + ), + pt=( + "Número de imagens processadas juntas em cada etapa de " + "treinamento. Valores maiores aceleram o treinamento " + "mas requerem mais memória." + ), + ), + alias=MultilingualString( + en="Batch size", es="Tamaño de lote", pt="Tamanho do lote" + ), + ) # type: ignore + + image_size: schema_field( + int_field(ge=32), + placeholder=224, + description=MultilingualString( + en=( + "Images are resized to this value (in pixels) for both width " + "and height. Use 224 for ImageNet-pretrained models." + ), + es=( + "Las imágenes se redimensionan a este valor (en píxeles) tanto " + "en ancho como en alto. Use 224 para modelos preentrenados " + "en ImageNet." + ), + pt=( + "As imagens são redimensionadas para este valor (em pixels) tanto " + "em largura quanto em altura. Use 224 para modelos pré-treinados " + "no ImageNet." + ), + ), + alias=MultilingualString( + en="Image size", es="Tamaño de imagen", pt="Tamanho da imagem" + ), + ) # type: ignore + + dropout_rate: schema_field( + float_field(ge=0.0, lt=1.0), + placeholder=0.0, + description=MultilingualString( + en=( + "Dropout rate applied before the output layer. " + "Values between 0.2 and 0.5 help prevent overfitting." + ), + es=( + "Tasa de dropout aplicada antes de la capa de salida. " + "Valores entre 0.2 y 0.5 ayudan a prevenir el sobreajuste." + ), + pt=( + "Taxa de dropout aplicada antes da camada de saída. " + "Valores entre 0.2 e 0.5 ajudam a prevenir o sobreajuste." + ), + ), + alias=MultilingualString( + en="Dropout rate", es="Tasa de dropout", pt="Taxa de dropout" + ), + ) # type: ignore + + weight_decay: schema_field( + float_field(ge=0.0), + placeholder=0.0, + description=MultilingualString( + en=( + "L2 regularization coefficient for the Adam optimizer. " + "Typical values: 1e-4 to 1e-2." + ), + es=( + "Coeficiente de regularización L2 para el optimizador Adam. " + "Valores típicos: 1e-4 a 1e-2." + ), + pt=( + "Coeficiente de regularização L2 para o otimizador Adam. " + "Valores típicos: 1e-4 a 1e-2." + ), + ), + alias=MultilingualString( + en="Weight decay", es="Decaimiento de pesos", pt="Decaimento de pesos" + ), + ) # type: ignore + + pretrained: schema_field( + bool_field(), + placeholder=True, + description=MultilingualString( + en=( + "If True, loads weights pre-trained on ImageNet. " + "Recommended when your dataset is small or similar to natural images." + ), + es=( + "Si es True, carga pesos preentrenados en ImageNet. " + "Recomendado cuando el dataset es pequeño o similar " + "a imágenes naturales." + ), + pt=( + "Se True, carrega pesos pré-treinados no ImageNet. " + "Recomendado quando o conjunto de dados é pequeno ou similar " + "a imagens naturais." + ), + ), + alias=MultilingualString(en="Pretrained", es="Preentrenado", pt="Pré-treinado"), + ) # type: ignore + + freeze_backbone: schema_field( + bool_field(), + placeholder=False, + description=MultilingualString( + en=( + "If True, freezes the convolutional backbone and only trains " + "the classifier head. Useful for very small datasets." + ), + es=( + "Si es True, congela el backbone convolucional y solo entrena " + "el clasificador final. Útil para datasets muy pequeños." + ), + pt=( + "Se True, congela o backbone convolucional e treina apenas " + "o classificador final. Útil para conjuntos de dados muito pequenos." + ), + ), + alias=MultilingualString( + en="Freeze backbone", + es="Congelar backbone", + pt="Congelar backbone", + ), + ) # type: ignore + + device: schema_field( + enum_field(enum=DEVICE_ENUM), + placeholder=DEVICE_PLACEHOLDER, + description=MultilingualString( + en="Hardware device used for training and inference (CPU/GPU).", + es="Dispositivo de hardware para entrenamiento e inferencia (CPU/GPU).", + pt="Dispositivo de hardware usado para treinamento e inferência (CPU/GPU).", + ), + alias=MultilingualString(en="Device", es="Dispositivo", pt="Dispositivo"), + ) # type: ignore + + +def _make_image_dataset(x_dataset, y_dataset=None, image_size=224): + import torch.utils.data + from torchvision import transforms + + class _ImageDataset(torch.utils.data.Dataset): + def __init__(self, x_ds, y_ds, img_size): + self.x_dataset = x_ds + self.y_dataset = y_ds + self.transforms = transforms.Compose( + [ + transforms.Lambda(lambda img: img.convert("RGB")), + transforms.Resize((img_size, img_size)), + transforms.ToTensor(), + transforms.Normalize( + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225], + ), + ] + ) + + self.image_col_name = list(x_ds.features.keys())[0] + self.label_col_name = ( + list(y_ds.features.keys())[0] if y_ds is not None else None + ) + + self.label_to_idx = {} + self.idx_to_label = {} + if self.label_col_name: + unique_labels = sorted(set(self.y_dataset[self.label_col_name])) + self.label_to_idx = { + label: idx for idx, label in enumerate(unique_labels) + } + self.idx_to_label = { + idx: label for label, idx in self.label_to_idx.items() + } + + def num_classes(self): + if self.label_col_name is None: + return 0 + return len(self.label_to_idx) + + def __len__(self): + return len(self.x_dataset) + + def __getitem__(self, idx): + image = self.transforms(self.x_dataset[idx][self.image_col_name].to_pil()) + if self.label_col_name is None: + return image + label_str = self.y_dataset[idx][self.label_col_name] + return image, self.label_to_idx[label_str] + + return _ImageDataset(x_dataset, y_dataset, image_size) + + +class TorchvisionImageClassifier(BaseModel, abc.ABC): + """Abstract base for torchvision image classifiers. + + Subclasses must implement: + - ``_build_backbone(num_classes, pretrained)`` — return the adapted model. + - ``_classifier_head()`` — return the head module unfrozen when + ``freeze_backbone=True``. + """ + + SCHEMA = TorchvisionImageClassifierSchema + COMPATIBLE_COMPONENTS = ["ImageClassificationTask"] + + @abc.abstractmethod + def _build_backbone(self, num_classes: int, pretrained: bool): + """Build and return the adapted torchvision model.""" + + @abc.abstractmethod + def _classifier_head(self): + """Return the classifier head module (kept trainable when freezing).""" + + @staticmethod + def _collate_fn_with_labels(batch): + import torch + + images = torch.stack([item[0] for item in batch]) + labels = torch.tensor([item[1] for item in batch], dtype=torch.long) + return images, labels + + @staticmethod + def _collate_fn_no_labels(batch): + import torch + + return torch.stack(batch) + + def __init__( + self, + epochs=10, + learning_rate=0.001, + batch_size=32, + image_size=224, + dropout_rate=0.0, + weight_decay=0.0, + pretrained=True, + freeze_backbone=False, + device=DEVICE_PLACEHOLDER, + **kwargs, + ): + import torch + + self.epochs = epochs + self.learning_rate = learning_rate + self.batch_size = batch_size + self.image_size = image_size + self.dropout_rate = dropout_rate + self.weight_decay = weight_decay + self.pretrained = pretrained + self.freeze_backbone = freeze_backbone + self._device_name = device + self.device = torch.device( + f"cuda:{DEVICE_TO_IDX.get(device)}" + if DEVICE_TO_IDX.get(device, -1) >= 0 + else "cpu" + ) + self.model = None + self.optimizer = None + self.num_classes = None + self.idx_to_label = {} + self.label_to_idx = {} + + def _freeze_backbone_params(self): + for p in self.model.parameters(): + p.requires_grad = False + for p in self._classifier_head().parameters(): + p.requires_grad = True + + def prepare_output(self, dataset, is_fit=False): + """Encode string labels to integer indices matching the model's class order.""" + import pyarrow as pa + + from DashAI.back.dataloaders.classes.dashai_dataset import DashAIDataset + + if not self.label_to_idx: + return dataset + + col_name = dataset.column_names[0] + encoded = [self.label_to_idx.get(lbl, lbl) for lbl in dataset[col_name]] + return DashAIDataset(pa.table({col_name: encoded})) + + def train(self, x_train, y_train, x_validation=None, y_validation=None): + """Fine-tune the backbone on the provided image dataset. + + Parameters + ---------- + x_train : DashAIDataset + Input dataset containing images. + y_train : DashAIDataset + Target dataset containing string labels. + x_validation : DashAIDataset, optional + Validation input features. Defaults to None. + y_validation : DashAIDataset, optional + Validation target labels. Defaults to None. + + Returns + ------- + BaseTorchvisionImageClassifier + The trained model instance. + """ + import torch + import torch.nn as nn + import torch.optim as optim + import torch.utils.data + + from DashAI.back.core.enums.metrics import LevelEnum, SplitEnum + + image_dataset = _make_image_dataset( + x_train, y_dataset=y_train, image_size=self.image_size + ) + self.num_classes = image_dataset.num_classes() + self.idx_to_label = image_dataset.idx_to_label + self.label_to_idx = image_dataset.label_to_idx + + train_loader = torch.utils.data.DataLoader( + image_dataset, + batch_size=self.batch_size, + shuffle=True, + collate_fn=self._collate_fn_with_labels, + ) + + self.model = self._build_backbone(self.num_classes, self.pretrained).to( + self.device + ) + + if self.freeze_backbone: + self._freeze_backbone_params() + + criterion = nn.CrossEntropyLoss() + self.optimizer = optim.Adam( + filter(lambda p: p.requires_grad, self.model.parameters()), + lr=self.learning_rate, + weight_decay=self.weight_decay, + ) + + for epoch in range(self.epochs): + self.model.train() + for images, labels in train_loader: + images, labels = images.to(self.device), labels.to(self.device) + self.optimizer.zero_grad() + loss = criterion(self.model(images), labels) + loss.backward() + self.optimizer.step() + + self.model.eval() + self.calculate_metrics( + split=SplitEnum.TRAIN, + level=LevelEnum.EPOCH, + x_data=x_train, + y_data=y_train, + log_index=epoch + 1, + ) + if x_validation is not None: + self.calculate_metrics( + split=SplitEnum.VALIDATION, + level=LevelEnum.EPOCH, + x_data=x_validation, + y_data=y_validation, + log_index=epoch + 1, + ) + + return self + + def predict(self, x): + """Return per-class probability matrix for each image. + + Parameters + ---------- + x : DashAIDataset + Input dataset containing images. + + Returns + ------- + np.ndarray + Array of shape (n_samples, n_classes) with softmax probabilities. + """ + import numpy as np + import torch + import torch.utils.data + + image_dataset = _make_image_dataset( + x, y_dataset=None, image_size=self.image_size + ) + loader = torch.utils.data.DataLoader( + image_dataset, + batch_size=self.batch_size, + shuffle=False, + collate_fn=self._collate_fn_no_labels, + ) + + self.model.eval() + all_probs = [] + with torch.no_grad(): + for images in loader: + logits = self.model(images.to(self.device)) + all_probs.append(torch.softmax(logits, dim=1).cpu().numpy()) + + return np.concatenate(all_probs, axis=0) + + def save(self, filename: str) -> None: + """Save the model checkpoint to disk. + + Parameters + ---------- + filename : str + Path where the checkpoint will be saved. + """ + import torch + + torch.save( + { + "model_state_dict": self.model.state_dict(), + "optimizer_state_dict": self.optimizer.state_dict(), + "epochs": self.epochs, + "learning_rate": self.learning_rate, + "batch_size": self.batch_size, + "image_size": self.image_size, + "dropout_rate": self.dropout_rate, + "weight_decay": self.weight_decay, + "pretrained": self.pretrained, + "freeze_backbone": self.freeze_backbone, + "device_name": self._device_name, + "num_classes": self.num_classes, + "idx_to_label": self.idx_to_label, + "label_to_idx": self.label_to_idx, + }, + filename, + ) + + @classmethod + def load(cls, filename: str): + """Load a model checkpoint from disk. + + Parameters + ---------- + filename : str + Path to the checkpoint file. + + Returns + ------- + BaseTorchvisionImageClassifier + Instance with loaded weights. + """ + import torch + import torch.optim as optim + + ckpt = torch.load(filename, map_location=torch.device("cpu")) + instance = cls( + epochs=ckpt["epochs"], + learning_rate=ckpt["learning_rate"], + batch_size=ckpt.get("batch_size", 32), + image_size=ckpt.get("image_size", 224), + dropout_rate=ckpt.get("dropout_rate", 0.0), + weight_decay=ckpt.get("weight_decay", 0.0), + pretrained=False, + freeze_backbone=ckpt.get("freeze_backbone", False), + device=ckpt.get("device_name", DEVICE_PLACEHOLDER), + ) + instance.num_classes = ckpt["num_classes"] + instance.idx_to_label = ckpt.get("idx_to_label", {}) + instance.label_to_idx = ckpt.get("label_to_idx", {}) + instance.model = instance._build_backbone( + instance.num_classes, pretrained=False + ) + instance.model.load_state_dict(ckpt["model_state_dict"]) + instance.optimizer = optim.Adam( + filter(lambda p: p.requires_grad, instance.model.parameters()), + weight_decay=instance.weight_decay, + ) + instance.optimizer.load_state_dict(ckpt["optimizer_state_dict"]) + return instance diff --git a/DashAI/back/models/cnn_image_classifier.py b/DashAI/back/models/cnn_image_classifier.py new file mode 100644 index 000000000..a7d529ecb --- /dev/null +++ b/DashAI/back/models/cnn_image_classifier.py @@ -0,0 +1,621 @@ +"""CNN-based image classifier for DashAI.""" + +from __future__ import annotations + +from DashAI.back.core.schema_fields import ( + BaseSchema, + enum_field, + float_field, + int_field, + schema_field, +) +from DashAI.back.core.utils import MultilingualString +from DashAI.back.models.base_model import BaseModel +from DashAI.back.models.utils import DEVICE_ENUM, DEVICE_PLACEHOLDER, DEVICE_TO_IDX + + +class CNNImageClassifierSchema(BaseSchema): + """Configuration parameters for the CNN Image Classifier.""" + + epochs: schema_field( + int_field(ge=1), + placeholder=10, + description=MultilingualString( + en=( + "The number of epochs to train the model. An epoch is a full " + "iteration over the training data." + ), + es=( + "El número de épocas para entrenar el modelo. Una época es una " + "iteración completa sobre los datos de entrenamiento." + ), + pt=( + "O número de épocas para treinar o modelo. Uma época é uma " + "iteração completa sobre os dados de treinamento." + ), + ), + alias=MultilingualString(en="Epochs", es="Épocas", pt="Épocas"), + ) # type: ignore + + learning_rate: schema_field( + float_field(gt=0.0), + placeholder=0.001, + description=MultilingualString( + en="Learning rate for the Adam optimizer.", + es="Tasa de aprendizaje para el optimizador Adam.", + pt="Taxa de aprendizado para o otimizador Adam.", + ), + alias=MultilingualString( + en="Learning rate", + es="Tasa de aprendizaje", + pt="Taxa de aprendizado", + ), + ) # type: ignore + + batch_size: schema_field( + int_field(ge=1), + placeholder=32, + description=MultilingualString( + en=( + "Number of images processed together in each training step. " + "Larger values speed up training but require more memory." + ), + es=( + "Número de imágenes procesadas juntas en cada paso de " + "entrenamiento. Valores más grandes aceleran el entrenamiento " + "pero requieren más memoria." + ), + pt=( + "Número de imagens processadas juntas em cada etapa de " + "treinamento. Valores maiores aceleram o treinamento " + "mas requerem mais memória." + ), + ), + alias=MultilingualString( + en="Batch size", es="Tamaño de lote", pt="Tamanho do lote" + ), + ) # type: ignore + + image_size: schema_field( + int_field(ge=8), + placeholder=64, + description=MultilingualString( + en=( + "Images are resized to this value (in pixels) for both width " + "and height before training. Must be at least 2^num_conv_blocks." + ), + es=( + "Las imágenes se redimensionan a este valor (en píxeles) tanto " + "en ancho como en alto. Debe ser al menos 2^num_conv_blocks." + ), + pt=( + "As imagens são redimensionadas para este valor (em pixels) tanto " + "em largura quanto em altura. Deve ser pelo menos 2^num_conv_blocks." + ), + ), + alias=MultilingualString( + en="Image size", es="Tamaño de imagen", pt="Tamanho da imagem" + ), + ) # type: ignore + + num_conv_blocks: schema_field( + int_field(ge=1, le=5), + placeholder=3, + description=MultilingualString( + en=( + "Number of convolutional blocks. Each block applies a " + "convolution, ReLU activation, and max-pooling that halves " + "the spatial dimensions." + ), + es=( + "Número de bloques convolucionales. Cada bloque aplica una " + "convolución, activación ReLU y max-pooling que reduce a la " + "mitad las dimensiones espaciales." + ), + pt=( + "Número de blocos convolucionais. Cada bloco aplica uma " + "convolução, ativação ReLU e max-pooling que reduz à metade " + "as dimensões espaciais." + ), + ), + alias=MultilingualString( + en="Number of conv blocks", + es="Número de bloques conv", + pt="Número de blocos conv", + ), + ) # type: ignore + + initial_filters: schema_field( + int_field(ge=8), + placeholder=32, + description=MultilingualString( + en=( + "Number of filters in the first convolutional block. " + "Each subsequent block doubles this number." + ), + es=( + "Número de filtros en el primer bloque convolucional. " + "Cada bloque siguiente duplica este número." + ), + pt=( + "Número de filtros no primeiro bloco convolucional. " + "Cada bloco subsequente dobra este número." + ), + ), + alias=MultilingualString( + en="Initial filters", es="Filtros iniciales", pt="Filtros iniciais" + ), + ) # type: ignore + + dropout_rate: schema_field( + float_field(ge=0.0, lt=1.0), + placeholder=0.0, + description=MultilingualString( + en=( + "Fraction of neurons randomly deactivated before the output " + "layer. Values between 0.2 and 0.5 help prevent overfitting. " + "Use 0.0 to disable." + ), + es=( + "Fracción de neuronas desactivadas aleatoriamente antes de la " + "capa de salida. Valores entre 0.2 y 0.5 ayudan a prevenir el " + "sobreajuste. Use 0.0 para desactivarlo." + ), + pt=( + "Fração de neurônios desativados aleatoriamente antes da " + "camada de saída. Valores entre 0.2 e 0.5 ajudam a prevenir o " + "sobreajuste. Use 0.0 para desativar." + ), + ), + alias=MultilingualString( + en="Dropout rate", es="Tasa de dropout", pt="Taxa de dropout" + ), + ) # type: ignore + + weight_decay: schema_field( + float_field(ge=0.0), + placeholder=0.0, + description=MultilingualString( + en=( + "L2 regularization coefficient for the Adam optimizer. " + "Typical values: 1e-4 to 1e-2." + ), + es=( + "Coeficiente de regularización L2 para el optimizador Adam. " + "Valores típicos: 1e-4 a 1e-2." + ), + pt=( + "Coeficiente de regularização L2 para o otimizador Adam. " + "Valores típicos: 1e-4 a 1e-2." + ), + ), + alias=MultilingualString( + en="Weight decay", es="Decaimiento de pesos", pt="Decaimento de pesos" + ), + ) # type: ignore + + device: schema_field( + enum_field(enum=DEVICE_ENUM), + placeholder=DEVICE_PLACEHOLDER, + description=MultilingualString( + en="Hardware device used for training and inference (CPU/GPU).", + es="Dispositivo de hardware para entrenamiento e inferencia (CPU/GPU).", + pt="Dispositivo de hardware usado para treinamento e inferência (CPU/GPU).", + ), + alias=MultilingualString(en="Device", es="Dispositivo", pt="Dispositivo"), + ) # type: ignore + + +def _make_image_dataset(x_dataset, y_dataset=None, image_size=64): + import torch.utils.data + from torchvision import transforms + + class _ImageDataset(torch.utils.data.Dataset): + def __init__(self, x_ds, y_ds, img_size): + self.x_dataset = x_ds + self.y_dataset = y_ds + self.transforms = transforms.Compose( + [ + transforms.Resize((img_size, img_size)), + transforms.ToTensor(), + ] + ) + + self.image_col_name = list(x_ds.features.keys())[0] + self.label_col_name = ( + list(y_ds.features.keys())[0] if y_ds is not None else None + ) + + self.label_to_idx = {} + self.idx_to_label = {} + if self.label_col_name: + unique_labels = sorted(set(self.y_dataset[self.label_col_name])) + self.label_to_idx = { + label: idx for idx, label in enumerate(unique_labels) + } + self.idx_to_label = { + idx: label for label, idx in self.label_to_idx.items() + } + + self.tensor_shape = self.transforms( + self.x_dataset[0][self.image_col_name].to_pil() + ).shape + + def num_classes(self): + if self.label_col_name is None: + return 0 + return len(self.label_to_idx) + + def __len__(self): + return len(self.x_dataset) + + def __getitem__(self, idx): + image = self.transforms(self.x_dataset[idx][self.image_col_name].to_pil()) + if self.label_col_name is None: + return image + label_str = self.y_dataset[idx][self.label_col_name] + return image, self.label_to_idx[label_str] + + return _ImageDataset(x_dataset, y_dataset, image_size) + + +def _build_cnn_model( + input_channels, + input_size, + num_classes, + num_conv_blocks, + initial_filters, + dropout_rate, +): + import torch.nn as nn + + class _CNNBlock(nn.Module): + def __init__(self, in_channels, out_channels): + super().__init__() + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1) + self.relu = nn.ReLU() + self.pool = nn.MaxPool2d(kernel_size=2, stride=2) + + def forward(self, x): + return self.pool(self.relu(self.conv(x))) + + class _CNN(nn.Module): + def __init__(self, in_ch, in_sz, n_cls, n_blocks, init_f, drop_r): + super().__init__() + self.conv_blocks = nn.ModuleList() + out_ch = init_f + for _ in range(n_blocks): + self.conv_blocks.append(_CNNBlock(in_ch, out_ch)) + in_ch = out_ch + out_ch *= 2 + + final_spatial = in_sz // (2**n_blocks) + flat_dim = in_ch * final_spatial * final_spatial + self.dropout = nn.Dropout(drop_r) + self.fc = nn.Linear(flat_dim, n_cls) + + def forward(self, x): + for block in self.conv_blocks: + x = block(x) + x = x.view(x.size(0), -1) + return self.fc(self.dropout(x)) + + return _CNN( + input_channels, + input_size, + num_classes, + num_conv_blocks, + initial_filters, + dropout_rate, + ) + + +class CNNImageClassifier(BaseModel): + """CNN-based image classifier. + + A convolutional neural network with configurable depth and width that + learns spatial features hierarchically via conv→ReLU→pool blocks, + followed by a dropout-regularized linear output layer. + """ + + SCHEMA = CNNImageClassifierSchema + COMPATIBLE_COMPONENTS = ["ImageClassificationTask"] + DISPLAY_NAME: str = MultilingualString( + en="CNN Image Classifier", + es="Clasificador de Imágenes CNN", + pt="Classificador de Imagens CNN", + ) + DESCRIPTION: str = MultilingualString( + en=( + "A Convolutional Neural Network (CNN) image classifier that learns " + "spatial features through configurable conv→ReLU→pool blocks, " + "with filters doubling at each stage." + ), + es=( + "Un clasificador de imágenes basado en Red Neuronal Convolucional " + "(CNN) que aprende características espaciales mediante bloques " + "conv→ReLU→pool configurables, duplicando los filtros en cada etapa." + ), + pt=( + "Um classificador de imagens baseado em Rede Neural Convolucional " + "(CNN) que aprende características espaciais por meio de blocos " + "conv→ReLU→pool configuráveis, dobrando os filtros em cada etapa." + ), + ) + COLOR: str = "#1565C0" + ICON: str = "Layers" + + @staticmethod + def _collate_fn_with_labels(batch): + import torch + + images = torch.stack([item[0] for item in batch]) + labels = torch.tensor([item[1] for item in batch], dtype=torch.long) + return images, labels + + @staticmethod + def _collate_fn_no_labels(batch): + import torch + + return torch.stack(batch) + + def __init__( + self, + epochs=10, + learning_rate=0.001, + batch_size=32, + image_size=64, + num_conv_blocks=3, + initial_filters=32, + dropout_rate=0.0, + weight_decay=0.0, + device=DEVICE_PLACEHOLDER, + **kwargs, + ): + import torch + + self.epochs = epochs + self.learning_rate = learning_rate + self.batch_size = batch_size + self.image_size = image_size + self.num_conv_blocks = num_conv_blocks + self.initial_filters = initial_filters + self.dropout_rate = dropout_rate + self.weight_decay = weight_decay + self._device_name = device + self.device = torch.device( + f"cuda:{DEVICE_TO_IDX.get(device)}" + if DEVICE_TO_IDX.get(device, -1) >= 0 + else "cpu" + ) + self.model = None + self.optimizer = None + self.input_channels = None + self.num_classes = None + self.idx_to_label = {} + self.label_to_idx = {} + + def _validate_architecture(self): + min_size = 2**self.num_conv_blocks + if self.image_size < min_size: + raise ValueError( + f"image_size ({self.image_size}) must be at least " + f"2^num_conv_blocks = {min_size} " + f"for {self.num_conv_blocks} convolutional block(s)." + ) + + def prepare_output(self, dataset, is_fit=False): + """Encode string labels to integer indices matching the model's class order.""" + import pyarrow as pa + + from DashAI.back.dataloaders.classes.dashai_dataset import DashAIDataset + + if not self.label_to_idx: + return dataset + + col_name = dataset.column_names[0] + encoded = [self.label_to_idx.get(lbl, lbl) for lbl in dataset[col_name]] + return DashAIDataset(pa.table({col_name: encoded})) + + def train(self, x_train, y_train, x_validation=None, y_validation=None): + """Train the CNN on the provided image dataset. + + Parameters + ---------- + x_train : DashAIDataset + Input dataset containing images. + y_train : DashAIDataset + Target dataset containing labels. + x_validation : DashAIDataset, optional + Validation input features. Defaults to None. + y_validation : DashAIDataset, optional + Validation target labels. Defaults to None. + + Returns + ------- + CNNImageClassifier + The trained model instance. + """ + import torch + import torch.nn as nn + import torch.optim as optim + import torch.utils.data + + from DashAI.back.core.enums.metrics import LevelEnum, SplitEnum + + self._validate_architecture() + + image_dataset = _make_image_dataset( + x_train, y_dataset=y_train, image_size=self.image_size + ) + self.input_channels = image_dataset.tensor_shape[0] + self.num_classes = image_dataset.num_classes() + self.idx_to_label = image_dataset.idx_to_label + self.label_to_idx = image_dataset.label_to_idx + + train_loader = torch.utils.data.DataLoader( + image_dataset, + batch_size=self.batch_size, + shuffle=True, + collate_fn=self._collate_fn_with_labels, + ) + + self.model = _build_cnn_model( + self.input_channels, + self.image_size, + self.num_classes, + self.num_conv_blocks, + self.initial_filters, + self.dropout_rate, + ).to(self.device) + + criterion = nn.CrossEntropyLoss() + self.optimizer = optim.Adam( + self.model.parameters(), + lr=self.learning_rate, + weight_decay=self.weight_decay, + ) + + for epoch in range(self.epochs): + self.model.train() + for images, labels in train_loader: + images, labels = images.to(self.device), labels.to(self.device) + self.optimizer.zero_grad() + loss = criterion(self.model(images), labels) + loss.backward() + self.optimizer.step() + + self.model.eval() + self.calculate_metrics( + split=SplitEnum.TRAIN, + level=LevelEnum.EPOCH, + x_data=x_train, + y_data=y_train, + log_index=epoch + 1, + ) + if x_validation is not None: + self.calculate_metrics( + split=SplitEnum.VALIDATION, + level=LevelEnum.EPOCH, + x_data=x_validation, + y_data=y_validation, + log_index=epoch + 1, + ) + + return self + + def predict(self, x): + """Return per-class probability matrix for each image. + + Parameters + ---------- + x : DashAIDataset + Input dataset containing images. + + Returns + ------- + np.ndarray + Array of shape (n_samples, n_classes) with softmax probabilities. + """ + import numpy as np + import torch + import torch.utils.data + + image_dataset = _make_image_dataset( + x, y_dataset=None, image_size=self.image_size + ) + loader = torch.utils.data.DataLoader( + image_dataset, + batch_size=self.batch_size, + shuffle=False, + collate_fn=self._collate_fn_no_labels, + ) + + self.model.eval() + all_probs = [] + with torch.no_grad(): + for images in loader: + logits = self.model(images.to(self.device)) + all_probs.append(torch.softmax(logits, dim=1).cpu().numpy()) + + return np.concatenate(all_probs, axis=0) + + def save(self, filename: str) -> None: + """Save the model checkpoint to disk. + + Parameters + ---------- + filename : str + Path where the checkpoint will be saved. + """ + import torch + + torch.save( + { + "model_state_dict": self.model.state_dict(), + "optimizer_state_dict": self.optimizer.state_dict(), + "epochs": self.epochs, + "learning_rate": self.learning_rate, + "batch_size": self.batch_size, + "image_size": self.image_size, + "num_conv_blocks": self.num_conv_blocks, + "initial_filters": self.initial_filters, + "dropout_rate": self.dropout_rate, + "weight_decay": self.weight_decay, + "device_name": self._device_name, + "input_channels": self.input_channels, + "num_classes": self.num_classes, + "idx_to_label": self.idx_to_label, + "label_to_idx": self.label_to_idx, + }, + filename, + ) + + @classmethod + def load(cls, filename: str): + """Load a model checkpoint from disk. + + Parameters + ---------- + filename : str + Path to the checkpoint file. + + Returns + ------- + CNNImageClassifier + Instance with loaded weights. + """ + import torch + import torch.optim as optim + + ckpt = torch.load(filename, map_location=torch.device("cpu")) + instance = cls( + epochs=ckpt["epochs"], + learning_rate=ckpt["learning_rate"], + batch_size=ckpt.get("batch_size", 32), + image_size=ckpt.get("image_size", 64), + num_conv_blocks=ckpt.get("num_conv_blocks", 3), + initial_filters=ckpt.get("initial_filters", 32), + dropout_rate=ckpt.get("dropout_rate", 0.0), + weight_decay=ckpt.get("weight_decay", 0.0), + device=ckpt.get("device_name", DEVICE_PLACEHOLDER), + ) + instance.input_channels = ckpt["input_channels"] + instance.num_classes = ckpt["num_classes"] + instance.idx_to_label = ckpt.get("idx_to_label", {}) + instance.label_to_idx = ckpt.get("label_to_idx", {}) + instance.model = _build_cnn_model( + instance.input_channels, + instance.image_size, + instance.num_classes, + instance.num_conv_blocks, + instance.initial_filters, + instance.dropout_rate, + ) + instance.model.load_state_dict(ckpt["model_state_dict"]) + instance.optimizer = optim.Adam( + instance.model.parameters(), + weight_decay=instance.weight_decay, + ) + instance.optimizer.load_state_dict(ckpt["optimizer_state_dict"]) + return instance diff --git a/DashAI/back/models/efficientnet_b0_image_classifier.py b/DashAI/back/models/efficientnet_b0_image_classifier.py new file mode 100644 index 000000000..e8033aca0 --- /dev/null +++ b/DashAI/back/models/efficientnet_b0_image_classifier.py @@ -0,0 +1,59 @@ +"""EfficientNet-B0 image classifier for DashAI.""" + +from DashAI.back.core.utils import MultilingualString +from DashAI.back.models.base_torchvision_image_classifier import ( + TorchvisionImageClassifier, + TorchvisionImageClassifierSchema, +) + + +class EfficientNetB0ImageClassifier(TorchvisionImageClassifier): + """EfficientNet-B0 image classifier (Tan & Le, 2019). + + Compact baseline of the EfficientNet family, which scales network width, + depth, and resolution jointly. The classifier head is replaced to match + the number of target classes. Supports ImageNet pre-trained weights. + """ + + SCHEMA = TorchvisionImageClassifierSchema + COMPATIBLE_COMPONENTS = ["ImageClassificationTask"] + DISPLAY_NAME: str = MultilingualString( + en="EfficientNet-B0", + es="EfficientNet-B0", + pt="EfficientNet-B0", + ) + DESCRIPTION: str = MultilingualString( + en=( + "EfficientNet-B0 (Tan & Le, 2019). Scales network width, depth, " + "and resolution jointly for the best accuracy/efficiency trade-off. " + "Smaller and faster than ResNet-18 at similar accuracy." + ), + es=( + "EfficientNet-B0 (Tan & Le, 2019). Escala ancho, profundidad y " + "resolución de la red de forma conjunta para el mejor balance entre " + "accuracy y eficiencia. Más pequeño y rápido que ResNet-18." + ), + pt=( + "EfficientNet-B0 (Tan & Le, 2019). Escala largura, profundidade e " + "resolução da rede de forma conjunta para o melhor equilíbrio entre " + "acurácia e eficiência. Menor e mais rápido que o ResNet-18." + ), + ) + COLOR: str = "#00838F" + ICON: str = "Speed" + + def _build_backbone(self, num_classes: int, pretrained: bool): + import torch.nn as nn + from torchvision.models import EfficientNet_B0_Weights, efficientnet_b0 + + weights = EfficientNet_B0_Weights.DEFAULT if pretrained else None + model = efficientnet_b0(weights=weights) + in_features = model.classifier[1].in_features + model.classifier = nn.Sequential( + nn.Dropout(self.dropout_rate), + nn.Linear(in_features, num_classes), + ) + return model + + def _classifier_head(self): + return self.model.classifier diff --git a/DashAI/back/models/lenet5_image_classifier.py b/DashAI/back/models/lenet5_image_classifier.py new file mode 100644 index 000000000..97de14f51 --- /dev/null +++ b/DashAI/back/models/lenet5_image_classifier.py @@ -0,0 +1,534 @@ +"""LeNet-5 image classifier for DashAI.""" + +from __future__ import annotations + +from DashAI.back.core.schema_fields import ( + BaseSchema, + enum_field, + float_field, + int_field, + schema_field, +) +from DashAI.back.core.utils import MultilingualString +from DashAI.back.models.base_model import BaseModel +from DashAI.back.models.utils import DEVICE_ENUM, DEVICE_PLACEHOLDER, DEVICE_TO_IDX + + +class LeNet5ImageClassifierSchema(BaseSchema): + """Configuration parameters for the LeNet-5 Image Classifier.""" + + epochs: schema_field( + int_field(ge=1), + placeholder=10, + description=MultilingualString( + en=( + "The number of epochs to train the model. An epoch is a full " + "iteration over the training data." + ), + es=( + "El número de épocas para entrenar el modelo. Una época es una " + "iteración completa sobre los datos de entrenamiento." + ), + pt=( + "O número de épocas para treinar o modelo. Uma época é uma " + "iteração completa sobre os dados de treinamento." + ), + ), + alias=MultilingualString(en="Epochs", es="Épocas", pt="Épocas"), + ) # type: ignore + + learning_rate: schema_field( + float_field(gt=0.0), + placeholder=0.001, + description=MultilingualString( + en="Learning rate for the Adam optimizer.", + es="Tasa de aprendizaje para el optimizador Adam.", + pt="Taxa de aprendizado para o otimizador Adam.", + ), + alias=MultilingualString( + en="Learning rate", + es="Tasa de aprendizaje", + pt="Taxa de aprendizado", + ), + ) # type: ignore + + batch_size: schema_field( + int_field(ge=1), + placeholder=32, + description=MultilingualString( + en=( + "Number of images processed together in each training step. " + "Larger values speed up training but require more memory." + ), + es=( + "Número de imágenes procesadas juntas en cada paso de " + "entrenamiento. Valores más grandes aceleran el entrenamiento " + "pero requieren más memoria." + ), + pt=( + "Número de imagens processadas juntas em cada etapa de " + "treinamento. Valores maiores aceleram o treinamento " + "mas requerem mais memória." + ), + ), + alias=MultilingualString( + en="Batch size", es="Tamaño de lote", pt="Tamanho do lote" + ), + ) # type: ignore + + image_size: schema_field( + int_field(ge=16), + placeholder=32, + description=MultilingualString( + en=( + "Images are resized to this value (in pixels) for both width " + "and height. The original LeNet-5 uses 32×32." + ), + es=( + "Las imágenes se redimensionan a este valor (en píxeles) tanto " + "en ancho como en alto. El LeNet-5 original usa 32×32." + ), + pt=( + "As imagens são redimensionadas para este valor (em pixels) tanto " + "em largura quanto em altura. O LeNet-5 original usa 32×32." + ), + ), + alias=MultilingualString( + en="Image size", es="Tamaño de imagen", pt="Tamanho da imagem" + ), + ) # type: ignore + + dropout_rate: schema_field( + float_field(ge=0.0, lt=1.0), + placeholder=0.0, + description=MultilingualString( + en=( + "Dropout rate applied between fully-connected layers. " + "Values between 0.2 and 0.5 help prevent overfitting. " + "Use 0.0 to reproduce the original LeNet-5." + ), + es=( + "Tasa de dropout entre las capas completamente conectadas. " + "Valores entre 0.2 y 0.5 ayudan a prevenir el sobreajuste. " + "Use 0.0 para reproducir el LeNet-5 original." + ), + pt=( + "Taxa de dropout aplicada entre as camadas completamente conectadas. " + "Valores entre 0.2 e 0.5 ajudam a prevenir o sobreajuste. " + "Use 0.0 para reproduzir o LeNet-5 original." + ), + ), + alias=MultilingualString( + en="Dropout rate", es="Tasa de dropout", pt="Taxa de dropout" + ), + ) # type: ignore + + weight_decay: schema_field( + float_field(ge=0.0), + placeholder=0.0, + description=MultilingualString( + en=( + "L2 regularization coefficient for the Adam optimizer. " + "Typical values: 1e-4 to 1e-2." + ), + es=( + "Coeficiente de regularización L2 para el optimizador Adam. " + "Valores típicos: 1e-4 a 1e-2." + ), + pt=( + "Coeficiente de regularização L2 para o otimizador Adam. " + "Valores típicos: 1e-4 a 1e-2." + ), + ), + alias=MultilingualString( + en="Weight decay", es="Decaimiento de pesos", pt="Decaimento de pesos" + ), + ) # type: ignore + + device: schema_field( + enum_field(enum=DEVICE_ENUM), + placeholder=DEVICE_PLACEHOLDER, + description=MultilingualString( + en="Hardware device used for training and inference (CPU/GPU).", + es="Dispositivo de hardware para entrenamiento e inferencia (CPU/GPU).", + pt="Dispositivo de hardware usado para treinamento e inferência (CPU/GPU).", + ), + alias=MultilingualString(en="Device", es="Dispositivo", pt="Dispositivo"), + ) # type: ignore + + +def _make_image_dataset(x_dataset, y_dataset=None, image_size=32): + import torch.utils.data + from torchvision import transforms + + class _ImageDataset(torch.utils.data.Dataset): + def __init__(self, x_ds, y_ds, img_size): + self.x_dataset = x_ds + self.y_dataset = y_ds + self.transforms = transforms.Compose( + [ + transforms.Resize((img_size, img_size)), + transforms.ToTensor(), + ] + ) + + self.image_col_name = list(x_ds.features.keys())[0] + self.label_col_name = ( + list(y_ds.features.keys())[0] if y_ds is not None else None + ) + + self.label_to_idx = {} + self.idx_to_label = {} + if self.label_col_name: + unique_labels = sorted(set(self.y_dataset[self.label_col_name])) + self.label_to_idx = { + label: idx for idx, label in enumerate(unique_labels) + } + self.idx_to_label = { + idx: label for label, idx in self.label_to_idx.items() + } + + self.tensor_shape = self.transforms( + self.x_dataset[0][self.image_col_name].to_pil() + ).shape + + def num_classes(self): + if self.label_col_name is None: + return 0 + return len(self.label_to_idx) + + def __len__(self): + return len(self.x_dataset) + + def __getitem__(self, idx): + image = self.transforms(self.x_dataset[idx][self.image_col_name].to_pil()) + if self.label_col_name is None: + return image + label_str = self.y_dataset[idx][self.label_col_name] + return image, self.label_to_idx[label_str] + + return _ImageDataset(x_dataset, y_dataset, image_size) + + +def _build_lenet5_model(input_channels, input_size, num_classes, dropout_rate): + import torch + import torch.nn as nn + + class _LeNet5(nn.Module): + def __init__(self, in_ch, in_sz, n_cls, drop_r): + super().__init__() + self.conv_layers = nn.Sequential( + nn.Conv2d(in_ch, 6, kernel_size=5), + nn.Tanh(), + nn.AvgPool2d(kernel_size=2, stride=2), + nn.Conv2d(6, 16, kernel_size=5), + nn.Tanh(), + nn.AvgPool2d(kernel_size=2, stride=2), + ) + + dummy = torch.zeros(1, in_ch, in_sz, in_sz) + flat_dim = self.conv_layers(dummy).view(1, -1).shape[1] + + self.classifier = nn.Sequential( + nn.Linear(flat_dim, 120), + nn.Tanh(), + nn.Dropout(drop_r), + nn.Linear(120, 84), + nn.Tanh(), + nn.Dropout(drop_r), + nn.Linear(84, n_cls), + ) + + def forward(self, x): + x = self.conv_layers(x) + return self.classifier(x.view(x.size(0), -1)) + + return _LeNet5(input_channels, input_size, num_classes, dropout_rate) + + +class LeNet5ImageClassifier(BaseModel): + """LeNet-5 image classifier (LeCun et al., 1998). + + The original convolutional neural network architecture, featuring two + conv→tanh→pool blocks followed by three fully-connected layers. + Uses Tanh activations and average pooling as in the original paper. + """ + + SCHEMA = LeNet5ImageClassifierSchema + COMPATIBLE_COMPONENTS = ["ImageClassificationTask"] + DISPLAY_NAME: str = MultilingualString( + en="LeNet-5", + es="LeNet-5", + pt="LeNet-5", + ) + DESCRIPTION: str = MultilingualString( + en=( + "The original CNN architecture (LeCun et al., 1998). Two " + "conv→tanh→pool blocks followed by three fully-connected layers. " + "Ideal for small images and educational use." + ), + es=( + "La arquitectura CNN original (LeCun et al., 1998). Dos bloques " + "conv→tanh→pool seguidos de tres capas completamente conectadas. " + "Ideal para imágenes pequeñas y uso educativo." + ), + pt=( + "A arquitetura CNN original (LeCun et al., 1998). Dois blocos " + "conv→tanh→pool seguidos de três camadas completamente conectadas. " + "Ideal para imagens pequenas e uso educacional." + ), + ) + COLOR: str = "#7B1FA2" + ICON: str = "History" + + @staticmethod + def _collate_fn_with_labels(batch): + import torch + + images = torch.stack([item[0] for item in batch]) + labels = torch.tensor([item[1] for item in batch], dtype=torch.long) + return images, labels + + @staticmethod + def _collate_fn_no_labels(batch): + import torch + + return torch.stack(batch) + + def __init__( + self, + epochs=10, + learning_rate=0.001, + batch_size=32, + image_size=32, + dropout_rate=0.0, + weight_decay=0.0, + device=DEVICE_PLACEHOLDER, + **kwargs, + ): + import torch + + self.epochs = epochs + self.learning_rate = learning_rate + self.batch_size = batch_size + self.image_size = image_size + self.dropout_rate = dropout_rate + self.weight_decay = weight_decay + self._device_name = device + self.device = torch.device( + f"cuda:{DEVICE_TO_IDX.get(device)}" + if DEVICE_TO_IDX.get(device, -1) >= 0 + else "cpu" + ) + self.model = None + self.optimizer = None + self.input_channels = None + self.num_classes = None + self.idx_to_label = {} + self.label_to_idx = {} + + def prepare_output(self, dataset, is_fit=False): + """Encode string labels to integer indices matching the model's class order.""" + import pyarrow as pa + + from DashAI.back.dataloaders.classes.dashai_dataset import DashAIDataset + + if not self.label_to_idx: + return dataset + + col_name = dataset.column_names[0] + encoded = [self.label_to_idx.get(lbl, lbl) for lbl in dataset[col_name]] + return DashAIDataset(pa.table({col_name: encoded})) + + def train(self, x_train, y_train, x_validation=None, y_validation=None): + """Train LeNet-5 on the provided image dataset. + + Parameters + ---------- + x_train : DashAIDataset + Input dataset containing images. + y_train : DashAIDataset + Target dataset containing string labels. + x_validation : DashAIDataset, optional + Validation input features. Defaults to None. + y_validation : DashAIDataset, optional + Validation target labels. Defaults to None. + + Returns + ------- + LeNet5ImageClassifier + The trained model instance. + """ + import torch + import torch.nn as nn + import torch.optim as optim + import torch.utils.data + + from DashAI.back.core.enums.metrics import LevelEnum, SplitEnum + + image_dataset = _make_image_dataset( + x_train, y_dataset=y_train, image_size=self.image_size + ) + self.input_channels = image_dataset.tensor_shape[0] + self.num_classes = image_dataset.num_classes() + self.idx_to_label = image_dataset.idx_to_label + self.label_to_idx = image_dataset.label_to_idx + + train_loader = torch.utils.data.DataLoader( + image_dataset, + batch_size=self.batch_size, + shuffle=True, + collate_fn=self._collate_fn_with_labels, + ) + + self.model = _build_lenet5_model( + self.input_channels, + self.image_size, + self.num_classes, + self.dropout_rate, + ).to(self.device) + + criterion = nn.CrossEntropyLoss() + self.optimizer = optim.Adam( + self.model.parameters(), + lr=self.learning_rate, + weight_decay=self.weight_decay, + ) + + for epoch in range(self.epochs): + self.model.train() + for images, labels in train_loader: + images, labels = images.to(self.device), labels.to(self.device) + self.optimizer.zero_grad() + loss = criterion(self.model(images), labels) + loss.backward() + self.optimizer.step() + + self.model.eval() + self.calculate_metrics( + split=SplitEnum.TRAIN, + level=LevelEnum.EPOCH, + x_data=x_train, + y_data=y_train, + log_index=epoch + 1, + ) + if x_validation is not None: + self.calculate_metrics( + split=SplitEnum.VALIDATION, + level=LevelEnum.EPOCH, + x_data=x_validation, + y_data=y_validation, + log_index=epoch + 1, + ) + + return self + + def predict(self, x): + """Return per-class probability matrix for each image. + + Parameters + ---------- + x : DashAIDataset + Input dataset containing images. + + Returns + ------- + np.ndarray + Array of shape (n_samples, n_classes) with softmax probabilities. + """ + import numpy as np + import torch + import torch.utils.data + + image_dataset = _make_image_dataset( + x, y_dataset=None, image_size=self.image_size + ) + loader = torch.utils.data.DataLoader( + image_dataset, + batch_size=self.batch_size, + shuffle=False, + collate_fn=self._collate_fn_no_labels, + ) + + self.model.eval() + all_probs = [] + with torch.no_grad(): + for images in loader: + logits = self.model(images.to(self.device)) + all_probs.append(torch.softmax(logits, dim=1).cpu().numpy()) + + return np.concatenate(all_probs, axis=0) + + def save(self, filename: str) -> None: + """Save the model checkpoint to disk. + + Parameters + ---------- + filename : str + Path where the checkpoint will be saved. + """ + import torch + + torch.save( + { + "model_state_dict": self.model.state_dict(), + "optimizer_state_dict": self.optimizer.state_dict(), + "epochs": self.epochs, + "learning_rate": self.learning_rate, + "batch_size": self.batch_size, + "image_size": self.image_size, + "dropout_rate": self.dropout_rate, + "weight_decay": self.weight_decay, + "device_name": self._device_name, + "input_channels": self.input_channels, + "num_classes": self.num_classes, + "idx_to_label": self.idx_to_label, + "label_to_idx": self.label_to_idx, + }, + filename, + ) + + @classmethod + def load(cls, filename: str): + """Load a model checkpoint from disk. + + Parameters + ---------- + filename : str + Path to the checkpoint file. + + Returns + ------- + LeNet5ImageClassifier + Instance with loaded weights. + """ + import torch + import torch.optim as optim + + ckpt = torch.load(filename, map_location=torch.device("cpu")) + instance = cls( + epochs=ckpt["epochs"], + learning_rate=ckpt["learning_rate"], + batch_size=ckpt.get("batch_size", 32), + image_size=ckpt.get("image_size", 32), + dropout_rate=ckpt.get("dropout_rate", 0.0), + weight_decay=ckpt.get("weight_decay", 0.0), + device=ckpt.get("device_name", DEVICE_PLACEHOLDER), + ) + instance.input_channels = ckpt["input_channels"] + instance.num_classes = ckpt["num_classes"] + instance.idx_to_label = ckpt.get("idx_to_label", {}) + instance.label_to_idx = ckpt.get("label_to_idx", {}) + instance.model = _build_lenet5_model( + instance.input_channels, + instance.image_size, + instance.num_classes, + instance.dropout_rate, + ) + instance.model.load_state_dict(ckpt["model_state_dict"]) + instance.optimizer = optim.Adam( + instance.model.parameters(), + weight_decay=instance.weight_decay, + ) + instance.optimizer.load_state_dict(ckpt["optimizer_state_dict"]) + return instance diff --git a/DashAI/back/models/mlp_image_classifier.py b/DashAI/back/models/mlp_image_classifier.py index 35a8d624f..8cfb482ce 100644 --- a/DashAI/back/models/mlp_image_classifier.py +++ b/DashAI/back/models/mlp_image_classifier.py @@ -1,13 +1,10 @@ """MLP-based image classifier for DashAI.""" -import torch -import torch.nn as nn -import torch.optim as optim -import torch.utils.data -from torchvision import transforms +from __future__ import annotations from DashAI.back.core.schema_fields import ( BaseSchema, + enum_field, float_field, int_field, list_field, @@ -15,6 +12,7 @@ ) from DashAI.back.core.utils import MultilingualString from DashAI.back.models.base_model import BaseModel +from DashAI.back.models.utils import DEVICE_ENUM, DEVICE_PLACEHOLDER, DEVICE_TO_IDX class MLPImageClassifierSchema(BaseSchema): @@ -32,12 +30,16 @@ class MLPImageClassifierSchema(BaseSchema): "El número de épocas para entrenar el modelo. Una época es una " "iteración completa sobre los datos de entrenamiento." ), + pt=( + "O número de épocas para treinar o modelo. Uma época é uma " + "iteração completa sobre os dados de treinamento." + ), de=( "Die Anzahl der Epochen zum Trainieren des Modells. Eine Epoche ist " "eine vollständige Iteration über die Trainingsdaten." ), ), - alias=MultilingualString(en="Epochs", es="Épocas", de="Epochen"), + alias=MultilingualString(en="Epochs", es="Épocas", pt="Épocas", de="Epochen"), ) # type: ignore learning_rate: schema_field( @@ -46,10 +48,14 @@ class MLPImageClassifierSchema(BaseSchema): description=MultilingualString( en="Learning rate for the Adam optimizer.", es="Tasa de aprendizaje para el optimizador Adam.", + pt="Taxa de aprendizado para o otimizador Adam.", de="Lernrate für den Adam-Optimierer.", ), alias=MultilingualString( - en="Learning rate", es="Tasa de aprendizaje", de="Lernrate" + en="Learning rate", + es="Tasa de aprendizaje", + pt="Taxa de aprendizado", + de="Lernrate", ), ) # type: ignore @@ -65,6 +71,10 @@ class MLPImageClassifierSchema(BaseSchema): "Las capas ocultas y sus dimensiones. Especifique el número " "de unidades de cada capa separadas por comas." ), + pt=( + "As camadas ocultas e suas dimensões. Especifique o número " + "de unidades de cada camada separadas por vírgulas." + ), de=( "Die verdeckten Schichten und ihre Dimensionen. Geben Sie die Anzahl " "der Einheiten jeder Schicht durch Kommas getrennt an." @@ -73,80 +83,202 @@ class MLPImageClassifierSchema(BaseSchema): alias=MultilingualString( en="Hidden layer dimensions", es="Dimensiones de capas ocultas", + pt="Dimensões das camadas ocultas", de="Dimensionen der verdeckten Schichten", ), ) # type: ignore + batch_size: schema_field( + int_field(ge=1), + placeholder=32, + description=MultilingualString( + en=( + "Number of images processed together in each training step. " + "Larger values speed up training but require more memory." + ), + es=( + "Número de imágenes procesadas juntas en cada paso de entrenamiento. " + "Valores más grandes aceleran el entrenamiento " + "pero requieren más memoria." + ), + pt=( + "Número de imagens processadas juntas em cada etapa de treinamento. " + "Valores maiores aceleram o treinamento " + "mas requerem mais memória." + ), + ), + alias=MultilingualString( + en="Batch size", es="Tamaño de lote", pt="Tamanho do lote" + ), + ) # type: ignore -class _ImageDataset(torch.utils.data.Dataset): - """Torch Dataset wrapper for DashAI image datasets.""" - - def __init__(self, x_dataset, y_dataset=None): - self.x_dataset = x_dataset - self.y_dataset = y_dataset - self.transforms = transforms.Compose( - [ - transforms.Resize((30, 30)), - transforms.ToTensor(), - ] - ) - - self.image_col_name = list(x_dataset.features.keys())[0] - self.label_col_name = ( - list(y_dataset.features.keys())[0] if y_dataset is not None else None - ) - - self.label_to_idx = {} - self.idx_to_label = {} - if self.label_col_name: - unique_labels = sorted(set(self.y_dataset[self.label_col_name])) - self.label_to_idx = {label: idx for idx, label in enumerate(unique_labels)} - self.idx_to_label = {idx: label for label, idx in self.label_to_idx.items()} - - self.tensor_shape = self.transforms( - self.x_dataset[0][self.image_col_name].to_pil() - ).shape - - def num_classes(self): - if self.label_col_name is None: - return 0 - return len(self.label_to_idx) - - def __len__(self): - return len(self.x_dataset) - - def __getitem__(self, idx): - image = self.transforms(self.x_dataset[idx][self.image_col_name].to_pil()) - if self.label_col_name is None: - return image - label_str = self.y_dataset[idx][self.label_col_name] - label_idx = self.label_to_idx[label_str] - return image, label_idx - - -class _MLP(nn.Module): - """Multi-Layer Perceptron for image classification.""" - - def __init__(self, input_dim, output_dim, hidden_dims): - super().__init__() - self.hidden_layers = nn.ModuleList() - previous_dim = input_dim + image_size: schema_field( + int_field(ge=8), + placeholder=64, + description=MultilingualString( + en=( + "Images are resized to this value (in pixels) for both width " + "and height before training. Larger sizes preserve more detail " + "but increase training time." + ), + es=( + "Las imágenes se redimensionan a este valor (en píxeles) " + "tanto en ancho como en alto antes del entrenamiento. " + "Tamaños más grandes preservan más detalle " + "pero aumentan el tiempo de entrenamiento." + ), + pt=( + "As imagens são redimensionadas para este valor (em pixels) " + "tanto em largura quanto em altura antes do treinamento. " + "Tamanhos maiores preservam mais detalhes " + "mas aumentam o tempo de treinamento." + ), + ), + alias=MultilingualString( + en="Image size", es="Tamaño de imagen", pt="Tamanho da imagem" + ), + ) # type: ignore - for hidden_dim in hidden_dims: - self.hidden_layers.append(nn.Linear(previous_dim, hidden_dim)) - previous_dim = hidden_dim + dropout_rate: schema_field( + float_field(ge=0.0, lt=1.0), + placeholder=0.0, + description=MultilingualString( + en=( + "Fraction of neurons randomly deactivated during each training step. " + "Values between 0.2 and 0.5 help prevent overfitting. " + "Use 0.0 to disable." + ), + es=( + "Fracción de neuronas desactivadas aleatoriamente en cada paso de " + "entrenamiento. Valores entre 0.2 y 0.5 ayudan a prevenir " + "el sobreajuste. Use 0.0 para desactivarlo." + ), + pt=( + "Fração de neurônios desativados aleatoriamente em cada etapa de " + "treinamento. Valores entre 0.2 e 0.5 ajudam a prevenir " + "o sobreajuste. Use 0.0 para desativar." + ), + ), + alias=MultilingualString( + en="Dropout rate", es="Tasa de dropout", pt="Taxa de dropout" + ), + ) # type: ignore - self.output_layer = nn.Linear(previous_dim, output_dim) - self.relu = nn.ReLU() + weight_decay: schema_field( + float_field(ge=0.0), + placeholder=0.0, + description=MultilingualString( + en=( + "L2 regularization coefficient for the Adam optimizer. Penalizes large " + "weights to improve generalization. Typical values: 1e-4 to 1e-2." + ), + es=( + "Coeficiente de regularización L2 para el optimizador Adam. Penaliza " + "pesos grandes para mejorar la generalización. " + "Valores típicos: 1e-4 a 1e-2." + ), + pt=( + "Coeficiente de regularização L2 para o otimizador Adam. Penaliza " + "pesos grandes para melhorar a generalização. " + "Valores típicos: 1e-4 a 1e-2." + ), + ), + alias=MultilingualString( + en="Weight decay", es="Decaimiento de pesos", pt="Decaimento de pesos" + ), + ) # type: ignore - def forward(self, x: torch.Tensor): - batch_size = x.shape[0] - x = x.view(batch_size, -1) + device: schema_field( + enum_field(enum=DEVICE_ENUM), + placeholder=DEVICE_PLACEHOLDER, + description=MultilingualString( + en="Hardware device used for training and inference (CPU/GPU).", + es="Dispositivo de hardware para entrenamiento e inferencia (CPU/GPU).", + pt="Dispositivo de hardware usado para treinamento e inferência (CPU/GPU).", + ), + alias=MultilingualString(en="Device", es="Dispositivo", pt="Dispositivo"), + ) # type: ignore - for layer in self.hidden_layers: - x = self.relu(layer(x)) - return self.output_layer(x) +def _make_image_dataset(x_dataset, y_dataset=None, image_size=64): + import torch.utils.data + from torchvision import transforms + + class _ImageDataset(torch.utils.data.Dataset): + def __init__(self, x_ds, y_ds, img_size): + self.x_dataset = x_ds + self.y_dataset = y_ds + self.transforms = transforms.Compose( + [ + transforms.Resize((img_size, img_size)), + transforms.ToTensor(), + ] + ) + + self.image_col_name = list(x_ds.features.keys())[0] + self.label_col_name = ( + list(y_ds.features.keys())[0] if y_ds is not None else None + ) + + self.label_to_idx = {} + self.idx_to_label = {} + if self.label_col_name: + unique_labels = sorted(set(self.y_dataset[self.label_col_name])) + self.label_to_idx = { + label: idx for idx, label in enumerate(unique_labels) + } + self.idx_to_label = { + idx: label for label, idx in self.label_to_idx.items() + } + + self.tensor_shape = self.transforms( + self.x_dataset[0][self.image_col_name].to_pil() + ).shape + + def num_classes(self): + if self.label_col_name is None: + return 0 + return len(self.label_to_idx) + + def __len__(self): + return len(self.x_dataset) + + def __getitem__(self, idx): + image = self.transforms(self.x_dataset[idx][self.image_col_name].to_pil()) + if self.label_col_name is None: + return image + label_str = self.y_dataset[idx][self.label_col_name] + return image, self.label_to_idx[label_str] + + return _ImageDataset(x_dataset, y_dataset, image_size) + + +def _build_mlp_model(input_dim, output_dim, hidden_dims, dropout_rate=0.0): + import torch.nn as nn + + class _MLP(nn.Module): + def __init__(self, in_dim, out_dim, h_dims, drop_r): + super().__init__() + self.hidden_layers = nn.ModuleList() + self.dropout_layers = nn.ModuleList() + prev_dim = in_dim + for h_dim in h_dims: + self.hidden_layers.append(nn.Linear(prev_dim, h_dim)) + self.dropout_layers.append(nn.Dropout(drop_r)) + prev_dim = h_dim + self.output_layer = nn.Linear(prev_dim, out_dim) + self.relu = nn.ReLU() + + def forward(self, x): + batch_size = x.shape[0] + x = x.view(batch_size, -1) + for layer, dropout in zip( + self.hidden_layers, self.dropout_layers, strict=True + ): + x = dropout(self.relu(layer(x))) + return self.output_layer(x) + + return _MLP(input_dim, output_dim, hidden_dims, dropout_rate) class MLPImageClassifier(BaseModel): @@ -161,6 +293,7 @@ class MLPImageClassifier(BaseModel): DISPLAY_NAME: str = MultilingualString( en="MLP Image Classifier", es="Clasificador de Imágenes MLP", + pt="Classificador de Imagens MLP", de="MLP-Bildklassifikator", ) DESCRIPTION: str = MultilingualString( @@ -174,6 +307,11 @@ class MLPImageClassifier(BaseModel): "que aplana los píxeles de la imagen y los pasa por capas ocultas " "completamente conectadas con activación ReLU para clasificación." ), + pt=( + "Um classificador de imagens baseado em Perceptron Multicamada (MLP) " + "que achata os pixels da imagem e os passa por camadas ocultas " + "completamente conectadas com ativação ReLU para classificação." + ), de=( "Ein Bildklassifikator auf Basis eines Mehrschichtigen Perzeptrons (MLP), " "der Bildpixel abflacht und durch konfigurierbare vollständig verbundene " @@ -185,23 +323,47 @@ class MLPImageClassifier(BaseModel): @staticmethod def _collate_fn_with_labels(batch): - """Custom collate function for batches with (image, label) tuples.""" + import torch + images = torch.stack([item[0] for item in batch]) labels = torch.tensor([item[1] for item in batch], dtype=torch.long) return images, labels @staticmethod def _collate_fn_no_labels(batch): - """Custom collate function for batches with only images.""" + import torch + return torch.stack(batch) - def __init__(self, epochs=10, learning_rate=0.001, hidden_dims=None, **kwargs): + def __init__( + self, + epochs=10, + learning_rate=0.001, + hidden_dims=None, + batch_size=32, + image_size=64, + dropout_rate=0.0, + weight_decay=0.0, + device=DEVICE_PLACEHOLDER, + **kwargs, + ): + import torch + if hidden_dims is None: hidden_dims = [128, 64] self.epochs = epochs self.learning_rate = learning_rate self.hidden_dims = hidden_dims - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.batch_size = batch_size + self.image_size = image_size + self.dropout_rate = dropout_rate + self.weight_decay = weight_decay + self._device_name = device + self.device = torch.device( + f"cuda:{DEVICE_TO_IDX.get(device)}" + if DEVICE_TO_IDX.get(device, -1) >= 0 + else "cpu" + ) self.model = None self.optimizer = None self.input_dim = None @@ -234,16 +396,25 @@ def train(self, x_train, y_train, x_validation=None, y_validation=None): y_train : DashAIDataset Target dataset containing labels. x_validation : DashAIDataset, optional - Validation input features (unused). Defaults to None. + Validation input features. Defaults to None. y_validation : DashAIDataset, optional - Validation target labels (unused). Defaults to None. + Validation target labels. Defaults to None. Returns ------- MLPImageClassifier The trained model instance. """ - image_dataset = _ImageDataset(x_train, y_dataset=y_train) + import torch + import torch.nn as nn + import torch.optim as optim + import torch.utils.data + + from DashAI.back.core.enums.metrics import LevelEnum, SplitEnum + + image_dataset = _make_image_dataset( + x_train, y_dataset=y_train, image_size=self.image_size + ) self.input_dim = ( image_dataset.tensor_shape[0] @@ -251,25 +422,29 @@ def train(self, x_train, y_train, x_validation=None, y_validation=None): * image_dataset.tensor_shape[2] ) self.output_dim = image_dataset.num_classes() - self.idx_to_label = image_dataset.idx_to_label self.label_to_idx = image_dataset.label_to_idx train_loader = torch.utils.data.DataLoader( image_dataset, - batch_size=32, + batch_size=self.batch_size, shuffle=True, collate_fn=self._collate_fn_with_labels, ) - self.model = _MLP(self.input_dim, self.output_dim, self.hidden_dims).to( - self.device - ) + self.model = _build_mlp_model( + self.input_dim, self.output_dim, self.hidden_dims, self.dropout_rate + ).to(self.device) + criterion = nn.CrossEntropyLoss() - self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate) + self.optimizer = optim.Adam( + self.model.parameters(), + lr=self.learning_rate, + weight_decay=self.weight_decay, + ) - self.model.train() - for _ in range(self.epochs): + for epoch in range(self.epochs): + self.model.train() for images, labels in train_loader: images, labels = images.to(self.device), labels.to(self.device) self.optimizer.zero_grad() @@ -278,6 +453,23 @@ def train(self, x_train, y_train, x_validation=None, y_validation=None): loss.backward() self.optimizer.step() + self.model.eval() + self.calculate_metrics( + split=SplitEnum.TRAIN, + level=LevelEnum.EPOCH, + x_data=x_train, + y_data=y_train, + log_index=epoch + 1, + ) + if x_validation is not None: + self.calculate_metrics( + split=SplitEnum.VALIDATION, + level=LevelEnum.EPOCH, + x_data=x_validation, + y_data=y_validation, + log_index=epoch + 1, + ) + return self def predict(self, x): @@ -290,19 +482,23 @@ def predict(self, x): Returns ------- - list of lists - List of predicted probabilities for each class for each image. + np.ndarray + Array of shape (n_samples, n_classes) with softmax probabilities. """ - image_dataset = _ImageDataset(x, y_dataset=None) + import numpy as np + import torch + import torch.utils.data + + image_dataset = _make_image_dataset( + x, y_dataset=None, image_size=self.image_size + ) test_loader = torch.utils.data.DataLoader( image_dataset, - batch_size=32, + batch_size=self.batch_size, shuffle=False, collate_fn=self._collate_fn_no_labels, ) - import numpy as np - self.model.eval() all_probs = [] with torch.no_grad(): @@ -326,12 +522,19 @@ def save(self, filename: str) -> None: filename : str Path where the checkpoint will be saved. """ + import torch + checkpoint = { "model_state_dict": self.model.state_dict(), "optimizer_state_dict": self.optimizer.state_dict(), "epochs": self.epochs, "learning_rate": self.learning_rate, "hidden_dims": self.hidden_dims, + "batch_size": self.batch_size, + "image_size": self.image_size, + "dropout_rate": self.dropout_rate, + "weight_decay": self.weight_decay, + "device_name": self._device_name, "input_dim": self.input_dim, "output_dim": self.output_dim, "idx_to_label": self.idx_to_label, @@ -353,19 +556,33 @@ def load(cls, filename: str): MLPImageClassifier Instance with loaded weights. """ + import torch + import torch.optim as optim + checkpoint = torch.load(filename, map_location=torch.device("cpu")) instance = cls( epochs=checkpoint["epochs"], learning_rate=checkpoint["learning_rate"], hidden_dims=checkpoint["hidden_dims"], + batch_size=checkpoint.get("batch_size", 32), + image_size=checkpoint.get("image_size", 64), + dropout_rate=checkpoint.get("dropout_rate", 0.0), + weight_decay=checkpoint.get("weight_decay", 0.0), + device=checkpoint.get("device_name", DEVICE_PLACEHOLDER), ) instance.input_dim = checkpoint["input_dim"] instance.output_dim = checkpoint["output_dim"] - instance.model = _MLP( - instance.input_dim, instance.output_dim, instance.hidden_dims + instance.model = _build_mlp_model( + instance.input_dim, + instance.output_dim, + instance.hidden_dims, + instance.dropout_rate, ) instance.model.load_state_dict(checkpoint["model_state_dict"]) - instance.optimizer = optim.Adam(instance.model.parameters()) + instance.optimizer = optim.Adam( + instance.model.parameters(), + weight_decay=instance.weight_decay, + ) instance.optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) instance.idx_to_label = checkpoint.get("idx_to_label", {}) instance.label_to_idx = checkpoint.get("label_to_idx", {}) diff --git a/DashAI/back/models/resnet18_image_classifier.py b/DashAI/back/models/resnet18_image_classifier.py new file mode 100644 index 000000000..5c196262a --- /dev/null +++ b/DashAI/back/models/resnet18_image_classifier.py @@ -0,0 +1,59 @@ +"""ResNet-18 image classifier for DashAI.""" + +from DashAI.back.core.utils import MultilingualString +from DashAI.back.models.base_torchvision_image_classifier import ( + TorchvisionImageClassifier, + TorchvisionImageClassifierSchema, +) + + +class ResNet18ImageClassifier(TorchvisionImageClassifier): + """ResNet-18 image classifier (He et al., 2015). + + 18-layer residual network with skip connections that solve the vanishing + gradient problem. The final fully-connected layer is replaced to match the + number of target classes. Supports ImageNet pre-trained weights. + """ + + SCHEMA = TorchvisionImageClassifierSchema + COMPATIBLE_COMPONENTS = ["ImageClassificationTask"] + DISPLAY_NAME: str = MultilingualString( + en="ResNet-18", + es="ResNet-18", + pt="ResNet-18", + ) + DESCRIPTION: str = MultilingualString( + en=( + "ResNet-18 (He et al., 2015). An 18-layer residual network with " + "skip connections that enable training very deep networks. " + "The most-cited CNN in academic literature." + ), + es=( + "ResNet-18 (He et al., 2015). Red residual de 18 capas con " + "conexiones de salto que permiten entrenar redes muy profundas. " + "La CNN más citada en la literatura académica." + ), + pt=( + "ResNet-18 (He et al., 2015). Rede residual de 18 camadas com " + "conexões de salto que permitem treinar redes muito profundas. " + "A CNN mais citada na literatura acadêmica." + ), + ) + COLOR: str = "#2E7D32" + ICON: str = "AccountTree" + + def _build_backbone(self, num_classes: int, pretrained: bool): + import torch.nn as nn + from torchvision.models import ResNet18_Weights, resnet18 + + weights = ResNet18_Weights.DEFAULT if pretrained else None + model = resnet18(weights=weights) + in_features = model.fc.in_features + model.fc = nn.Sequential( + nn.Dropout(self.dropout_rate), + nn.Linear(in_features, num_classes), + ) + return model + + def _classifier_head(self): + return self.model.fc diff --git a/DashAI/back/models/resnet50_image_classifier.py b/DashAI/back/models/resnet50_image_classifier.py new file mode 100644 index 000000000..33017cf52 --- /dev/null +++ b/DashAI/back/models/resnet50_image_classifier.py @@ -0,0 +1,60 @@ +"""ResNet-50 image classifier for DashAI.""" + +from DashAI.back.core.utils import MultilingualString +from DashAI.back.models.base_torchvision_image_classifier import ( + TorchvisionImageClassifier, + TorchvisionImageClassifierSchema, +) + + +class ResNet50ImageClassifier(TorchvisionImageClassifier): + """ResNet-50 image classifier (He et al., 2015). + + 50-layer residual network using bottleneck blocks. Deeper and more + accurate than ResNet-18, and the most-cited CNN variant in the academic + literature. The final FC layer is replaced to match the target classes. + Supports ImageNet pre-trained weights. + """ + + SCHEMA = TorchvisionImageClassifierSchema + COMPATIBLE_COMPONENTS = ["ImageClassificationTask"] + DISPLAY_NAME: str = MultilingualString( + en="ResNet-50", + es="ResNet-50", + pt="ResNet-50", + ) + DESCRIPTION: str = MultilingualString( + en=( + "ResNet-50 (He et al., 2015). A 50-layer residual network with " + "bottleneck blocks and skip connections. The most-cited CNN variant " + "in academic papers; supports ImageNet pre-trained weights." + ), + es=( + "ResNet-50 (He et al., 2015). Red residual de 50 capas con bloques " + "bottleneck y conexiones de salto. La variante CNN más citada en " + "papers académicos; soporta pesos preentrenados en ImageNet." + ), + pt=( + "ResNet-50 (He et al., 2015). Rede residual de 50 camadas com blocos " + "bottleneck e conexões de salto. A variante CNN mais citada em " + "artigos acadêmicos; suporta pesos pré-treinados no ImageNet." + ), + ) + COLOR: str = "#1B5E20" + ICON: str = "AccountTree" + + def _build_backbone(self, num_classes: int, pretrained: bool): + import torch.nn as nn + from torchvision.models import ResNet50_Weights, resnet50 + + weights = ResNet50_Weights.DEFAULT if pretrained else None + model = resnet50(weights=weights) + in_features = model.fc.in_features + model.fc = nn.Sequential( + nn.Dropout(self.dropout_rate), + nn.Linear(in_features, num_classes), + ) + return model + + def _classifier_head(self): + return self.model.fc diff --git a/DashAI/back/tasks/image_classification_task.py b/DashAI/back/tasks/image_classification_task.py index 26b24bb02..57e407787 100644 --- a/DashAI/back/tasks/image_classification_task.py +++ b/DashAI/back/tasks/image_classification_task.py @@ -33,6 +33,12 @@ class ImageClassificationTask(ClassificationTask): "las imágenes, lo que permite una clasificación precisa de nuevas " "instancias." ), + pt=( + "A classificação de imagens no aprendizado de máquina envolve a " + "previsão de rótulos categóricos para dados de imagem. Os modelos " + "são treinados para aprender padrões visuais e características nas " + "imagens, permitindo uma classificação precisa de novas instâncias." + ), de=( "Bildklassifikation im maschinellen Lernen umfasst die Vorhersage " "kategorialer Zielgrößen für Bilddaten. Modelle werden trainiert, um " @@ -43,6 +49,7 @@ class ImageClassificationTask(ClassificationTask): DISPLAY_NAME: str = MultilingualString( en="Image Classification", es="Clasificación de Imágenes", + pt="Classificação de Imagens", de="Bildklassifikation", ) SCORING_PROFILES = {