InstaDeepAI
/

segment_borzoi

@@ -7,7 +7,9 @@ from einops import rearrange
 from torch import einsum
 from transformers import PretrainedConfig, PreTrainedModel
-from genomics_research.segmentnt.layers.torch.segmentation_head import TorchUNetHead
 FEATURES = [
     "protein_coding_gene",
@@ -91,7 +93,7 @@ class SegmentBorzoi(PreTrainedModel):
         # Correct transformer
         for layer in self.transformer:
-            layer[0].fn[1] = BorzoiAttentionLayer(
                 config.embed_dim,
                 heads=config.num_attention_heads,
                 dim_key=config.attention_dim_key,
@@ -105,7 +107,7 @@ class SegmentBorzoi(PreTrainedModel):
         self.separable1.conv_layer[1].bias = None
         self.separable0.conv_layer[1].bias = None
-    def forward(self, x):
         # Stem
         x = x.transpose(1, 2)
         x = self.stem(x)
@@ -199,14 +201,14 @@ def relative_shift(x: torch.Tensor) -> torch.Tensor:
     to_pad = torch.zeros_like(x[..., :1])
     x = torch.cat((to_pad, x), dim=-1)
     _, h, t1, t2 = x.shape
-    x = x.reshape(-1, h, t2, t1)
     x = x[:, :, 1:, :]
-    x = x.reshape(-1, h, t1, t2 - 1)
     return x[..., : ((t2 + 1) // 2)]
 class BorzoiAttentionLayer(nn.Module):
-    def __init__(
         self,
         dim,
         *,
@@ -216,7 +218,7 @@ class BorzoiAttentionLayer(nn.Module):
         dim_value=64,
         dropout=0.0,
         pos_dropout=0.0,
-    ):
         super().__init__()
         self.scale = dim_key**-0.5
         self.heads = heads
@@ -232,22 +234,29 @@ class BorzoiAttentionLayer(nn.Module):
         self.num_rel_pos_features = num_rel_pos_features
         self.to_rel_k = nn.Linear(num_rel_pos_features, dim_key * heads, bias=False)
-        self.rel_content_bias = nn.Parameter(torch.randn(1, heads, 1, dim_key))
-        self.rel_pos_bias = nn.Parameter(torch.randn(1, heads, 1, dim_key))
         # dropouts
         self.pos_dropout = nn.Dropout(pos_dropout)
         self.attn_dropout = nn.Dropout(dropout)
-    def forward(self, x):
         n, h = x.shape[-2], self.heads
         q = self.to_q(x)
         k = self.to_k(x)
         v = self.to_v(x)
-        q, k, v = map(lambda t: rearrange(t, "b n (h d) -> b h n d", h=h), (q, k, v))
         q = q * self.scale

 from torch import einsum
 from transformers import PretrainedConfig, PreTrainedModel
+from genomics_research.segmentnt.porting_to_pytorch.layers.segmentation_head import (
+    TorchUNetHead,
+)
 FEATURES = [
     "protein_coding_gene",
         # Correct transformer
         for layer in self.transformer:
+            layer[0].fn[1] = BorzoiAttentionLayer(  # type: ignore
                 config.embed_dim,
                 heads=config.num_attention_heads,
                 dim_key=config.attention_dim_key,
         self.separable1.conv_layer[1].bias = None
         self.separable0.conv_layer[1].bias = None
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         # Stem
         x = x.transpose(1, 2)
         x = self.stem(x)
     to_pad = torch.zeros_like(x[..., :1])
     x = torch.cat((to_pad, x), dim=-1)
     _, h, t1, t2 = x.shape
+    x = x.reshape(-1, h, t2, t1)  # noqa: FKA100
     x = x[:, :, 1:, :]
+    x = x.reshape(-1, h, t1, t2 - 1)  # noqa: FKA100
     return x[..., : ((t2 + 1) // 2)]
 class BorzoiAttentionLayer(nn.Module):
+    def __init__(  # type: ignore
         self,
         dim,
         *,
         dim_value=64,
         dropout=0.0,
         pos_dropout=0.0,
+    ) -> None:
         super().__init__()
         self.scale = dim_key**-0.5
         self.heads = heads
         self.num_rel_pos_features = num_rel_pos_features
         self.to_rel_k = nn.Linear(num_rel_pos_features, dim_key * heads, bias=False)
+        self.rel_content_bias = nn.Parameter(
+            torch.randn(1, heads, 1, dim_key)  # noqa: FKA100
+        )
+        self.rel_pos_bias = nn.Parameter(
+            torch.randn(1, heads, 1, dim_key)  # noqa: FKA100
+        )
         # dropouts
         self.pos_dropout = nn.Dropout(pos_dropout)
         self.attn_dropout = nn.Dropout(dropout)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         n, h = x.shape[-2], self.heads
         q = self.to_q(x)
         k = self.to_k(x)
         v = self.to_v(x)
+        q, k, v = map(  # noqa
+            lambda t: rearrange(t, "b n (h d) -> b h n d", h=h),  # type: ignore
+            (q, k, v),
+        )
         q = q * self.scale