Spaces:
Runtime error
Runtime error
| import torch | |
| import torch.nn as nn | |
| from . import register_connector | |
| from .base import Connector | |
| class MoFMLP(nn.Module): | |
| def __init__(self, config): | |
| super().__init__() | |
| modules_clip = [nn.Linear(config.vision_hidden_size, config.hidden_size), | |
| nn.GELU(), | |
| nn.Linear(config.hidden_size, config.hidden_size) | |
| ] | |
| modules_dinov2 = [nn.Linear(config.vision_hidden_size, config.hidden_size), | |
| nn.GELU(), | |
| nn.Linear(config.hidden_size, config.hidden_size) | |
| ] | |
| self.clip = nn.Sequential(*modules_clip) | |
| self.dinov2 = nn.Sequential(*modules_dinov2) | |
| def forward(self, x): | |
| image_features_clip = self.clip(x[0]) | |
| image_features_dinov2 = self.dinov2(x[1]) | |
| bs = image_features_clip.size(0) | |
| total_len = image_features_clip.size(1)+image_features_dinov2.size(1) | |
| dim = image_features_clip.size(-1) | |
| merged_features = torch.empty(bs, total_len, dim).to(device=x[0].device, dtype=x[0].dtype) | |
| merged_features[:,0::2] = image_features_clip | |
| merged_features[:,1::2] = image_features_dinov2 | |
| return merged_features | |
| class MoFMLPConnector(Connector): | |
| def __init__(self, config): | |
| super().__init__() | |
| self._connector = MoFMLP(config) | |