MPS incompatibility: Calls into the C++ engine to run the backward pass

@kulinseth

🐛 Describe the bug

I've been build a CLIP model

This is my architecture

from transformers import DistilBertModel
import torch
import torch.nn as nn

class TextEncoderHead(nn.Module):
    def __init__(self, model):
        super(TextEncoderHead, self).__init__()
        self.model = model
        self.seq1 = nn.Sequential(
            nn.Linear(768, 512),
            nn.LayerNorm(512)
        )

    def forward(self, input_ids, attention_mask):
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
        outputs = outputs.last_hidden_state.mean(dim=1)
        outputs = self.seq1(outputs)
        return outputs.contiguous()
    
class ImageEncoderHead(nn.Module):
    def __init__(self, model):
        super(ImageEncoderHead, self).__init__()
        self.model = model
        self.seq1 = nn.Sequential(
            nn.Linear(768, 512),
            nn.LayerNorm(512)
        )
    
    def forward(self, pixel_values):
        outputs = self.model(pixel_values=pixel_values)
        outputs = outputs.last_hidden_state.mean(dim=1)
        outputs = self.seq1(outputs)
        return outputs.contiguous()
    
class CLIPChemistryModel(nn.Module):
    def __init__(self, text_encoder, image_encoder):
        super(CLIPChemistryModel, self).__init__()
        self.text_encoder = text_encoder
        self.image_encoder = image_encoder

    def forward(self, image, input_ids, attention_mask):
        # calculate the embeddings
        ie = self.image_encoder(image)
        te = self.text_encoder(input_ids, attention_mask)
        return ie, te

I have this trainer and loss function

import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm

def trainer_fn(model, dataloader_train, dataloader_val, epochs, loss_fn, optimizer, device):

    total_loss_train = []
    total_loss_val = []

    model.to(device)

    for epoch in tqdm(range(epochs), desc="Training..."):
        # MODEL TRAINING 
        model.train()
        running_loss = 0
        counter = 0 
        for batch in dataloader_train:
            image, input_ids, attention_mask = batch
            image = image.to(device)
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            # Verificar si los tensores son contiguos
            print(f"image is contiguous: {image.is_contiguous()}")
            print(f"input_ids is contiguous: {input_ids.is_contiguous()}")
            print(f"attention_mask is contiguous: {attention_mask.is_contiguous()}")
            # Forward pass
            image_embeddings, text_embeddings = model(image, input_ids, attention_mask)
            # Verificar si los tensores de embeddings son contiguos
            print(f"image_embeddings is contiguous: {image_embeddings.is_contiguous()}")
            print(f"text_embeddings is contiguous: {text_embeddings.is_contiguous()}")
            
            # Calculate the loss
            loss = loss_fn(image_embeddings, text_embeddings)
            print(loss)
            # Backward pass
            loss.backward()
            # Optimize the weights
            optimizer.step()
            # Zero the gradients
            optimizer.zero_grad()
            # Update the learning rate
            running_loss += loss.item()
            counter += 1
            print(counter)
        total_loss_train.append(running_loss/counter)

        # MODEL EVALUATION
        model.eval()
        running_vloss = 0
        vcounter = 0
        with torch.no_grad():
            for batch in dataloader_val:
                image, input_ids, attention_mask = batch
                image = image.to(device)
                input_ids = input_ids.to(device)
                attention_mask = attention_mask.to(device)
            
                # forward pass
                image_embeddings, text_embeddings = model(image, input_ids, attention_mask)

                print(f"image_embeddings is contiguous: {image_embeddings.is_contiguous()}")
                print(f"text_embeddings is contiguous: {text_embeddings.is_contiguous()}")
                

                # calculate the loss
                loss = loss_fn(image_embeddings=image_embeddings, text_embeddings=text_embeddings)
                running_vloss += loss.item()
                vcounter += 1
        total_loss_val.append(running_vloss/vcounter)

   
7D39
     # PRINT THE LOSS
        print(f"Epoch {epoch+1} - Train Loss: {total_loss_train[-1]} - Validation Loss: {total_loss_val[-1]}")


def contrastive_loss(image_embeddings, text_embeddings, temperature=1.0):
    """
    Compute contrastive loss between image and text embeddings.
    """
    temperature = torch.tensor(temperature, device=image_embeddings.device).float()
    image_embeddings = image_embeddings.contiguous().float()
    text_embeddings = text_embeddings.contiguous().float()
    batch_size = image_embeddings.shape[0]
    image_embeddings = F.normalize(image_embeddings, p=2, dim=1)
    text_embeddings = F.normalize(text_embeddings, p=2, dim=1)
    logits = torch.einsum('nc,mc->nm', [image_embeddings, text_embeddings])
    logits = logits * torch.exp(temperature)
    labels = torch.arange(batch_size, device=image_embeddings.device)
    loss_i2t = F.cross_entropy(logits, labels)
    loss_t2i = F.cross_entropy(logits.t(), labels)
    loss = (loss_i2t + loss_t2i) / 2
    return loss

When I run over mps, this causes this error

Traceback (most recent call last):
File "/Users/sebastianalejandrosarastizambonino/Documents/projects/CLIP_Pytorch/src/trainer.py", line 74, in
main()
File "/Users/sebastianalejandrosarastizambonino/Documents/projects/CLIP_Pytorch/src/trainer.py", line 61, in main
trainer_fn(
File "/Users/sebastianalejandrosarastizambonino/Documents/projects/CLIP_Pytorch/src/utils.py", line 37, in trainer_fn
loss.backward()
File "/opt/anaconda3/envs/clip/lib/python3.11/site-packages/torch/_tensor.py", line 581, in backward
torch.autograd.backward(
File "/opt/anaconda3/envs/clip/lib/python3.11/site-packages/torch/autograd/init.py", line 347, in backward
_engine_run_backward(
File "/opt/anaconda3/envs/clip/lib/python3.11/site-packages/torch/autograd/graph.py", line 825, in _engine_run_backward
return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.

I changed to CPU and works fine. Does anyone know why this happens?

Versions

torch 2.5.1
macos 15.1

cc @kulinseth @albanD @malfet @DenisVieriu97 @jhavukainen

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

🐛 Describe the bug

Versions

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Description

🐛 Describe the bug

Versions

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions