fix againg

Files changed (7) hide show

__init__.py +1 -0
config.json +13 -10
create_model.py +6 -3
modeling/__pycache__/modeling_resnet.cpython-39.pyc +0 -0
modeling/modeling_resnet.py +518 -0
preprocessor_config.json +0 -18
pytorch_model.bin +2 -2

__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

config.json CHANGED Viewed

@@ -1,22 +1,25 @@
 {
-  "_name_or_path": "/home/fxmarty/hf_internship/tiny-testing-remote-code",
   "architectures": [
     "ResNetCustomForImageClassification"
   ],
   "depths": [
-    3,
-    4,
-    6,
-    3
   ],
   "downsample_in_first_stage": false,
   "embedding_size": 64,
   "hidden_act": "relu",
   "hidden_sizes": [
-    6,
-    12,
-    24,
-    48
   ],
   "id2label": {
     "0": "tench, Tinca tinca",
@@ -2021,7 +2024,7 @@
     "zebra": 340,
     "zucchini, courgette": 939
   },
-  "layer_type": "bottleneck",
   "model_type": "resnet",
   "num_channels": 3,
   "out_features": null,

 {
+  "_name_or_path": "microsoft/resnet-18",
   "architectures": [
     "ResNetCustomForImageClassification"
   ],
+  "auto_map": {
+    "AutoModelForImageClassification": "modeling_resnet.ResNetCustomForImageClassification"
+  },
   "depths": [
+    2,
+    2,
+    2,
+    2
   ],
   "downsample_in_first_stage": false,
   "embedding_size": 64,
   "hidden_act": "relu",
   "hidden_sizes": [
+    64,
+    128,
+    256,
+    512
   ],
   "id2label": {
     "0": "tench, Tinca tinca",
     "zebra": 340,
     "zucchini, courgette": 939
   },
+  "layer_type": "basic",
   "model_type": "resnet",
   "num_channels": 3,
   "out_features": null,

create_model.py CHANGED Viewed

@@ -1,8 +1,11 @@
 from transformers import AutoConfig
-from modeling import ResNetCustomForImageClassification
-cfg = AutoConfig.from_pretrained("/home/fxmarty/hf_internship/tiny-testing-remote-code")
 model = ResNetCustomForImageClassification(cfg)
-model.save_pretrained("/home/fxmarty/hf_internship/tiny-testing-remote-code")

 from transformers import AutoConfig
+from modeling.modeling_resnet import ResNetCustomForImageClassification
+cfg = AutoConfig.from_pretrained("microsoft/resnet-18")
+ResNetCustomForImageClassification.register_for_auto_class("AutoModelForImageClassification")
 model = ResNetCustomForImageClassification(cfg)
+model.save_pretrained("/home/fxmarty/hf_internship/tiny-testing-remote-code")

modeling/__pycache__/modeling_resnet.cpython-39.pyc ADDED Viewed

Binary file (16.1 kB). View file

modeling/modeling_resnet.py ADDED Viewed

	@@ -0,0 +1,518 @@

+# coding=utf-8
+# Copyright 2022 Microsoft Research, Inc. and The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" PyTorch ResNet model."""
+from typing import Optional
+import torch
+import torch.utils.checkpoint
+from torch import Tensor, nn
+from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
+from transformers.activations import ACT2FN
+from transformers.modeling_outputs import (
+    BackboneOutput,
+    BaseModelOutputWithNoAttention,
+    BaseModelOutputWithPoolingAndNoAttention,
+    ImageClassifierOutputWithNoAttention,
+)
+from transformers.modeling_utils import BackboneMixin, PreTrainedModel
+from transformers.utils import (
+    add_code_sample_docstrings,
+    add_start_docstrings,
+    add_start_docstrings_to_model_forward,
+    logging,
+    replace_return_docstrings,
+)
+from transformers import ResNetConfig
+logger = logging.get_logger(__name__)
+# General docstring
+_CONFIG_FOR_DOC = "ResNetConfig"
+_FEAT_EXTRACTOR_FOR_DOC = "AutoImageProcessor"
+# Base docstring
+_CHECKPOINT_FOR_DOC = "microsoft/resnet-50"
+_EXPECTED_OUTPUT_SHAPE = [1, 2048, 7, 7]
+# Image classification docstring
+_IMAGE_CLASS_CHECKPOINT = "microsoft/resnet-50"
+_IMAGE_CLASS_EXPECTED_OUTPUT = "tiger cat"
+RESNET_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    "microsoft/resnet-50",
+    # See all resnet models at https://huggingface.co/models?filter=resnet
+]
+class ResNetConvLayer(nn.Module):
+    def __init__(
+        self, in_channels: int, out_channels: int, kernel_size: int = 3, stride: int = 1, activation: str = "relu"
+    ):
+        super().__init__()
+        self.convolution = nn.Conv2d(
+            in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=kernel_size // 2, bias=False
+        )
+        self.normalization = nn.BatchNorm2d(out_channels)
+        self.activation = ACT2FN[activation] if activation is not None else nn.Identity()
+    def forward(self, input: Tensor) -> Tensor:
+        hidden_state = self.convolution(input)
+        hidden_state = self.normalization(hidden_state)
+        hidden_state = self.activation(hidden_state)
+        return hidden_state
+class ResNetEmbeddings(nn.Module):
+    """
+    ResNet Embeddings (stem) composed of a single aggressive convolution.
+    """
+    def __init__(self, config: ResNetConfig):
+        super().__init__()
+        self.embedder = ResNetConvLayer(
+            config.num_channels, config.embedding_size, kernel_size=7, stride=2, activation=config.hidden_act
+        )
+        self.pooler = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.num_channels = config.num_channels
+    def forward(self, pixel_values: Tensor) -> Tensor:
+        num_channels = pixel_values.shape[1]
+        if num_channels != self.num_channels:
+            raise ValueError(
+                "Make sure that the channel dimension of the pixel values match with the one set in the configuration."
+            )
+        embedding = self.embedder(pixel_values)
+        embedding = self.pooler(embedding)
+        return embedding
+class ResNetShortCut(nn.Module):
+    """
+    ResNet shortcut, used to project the residual features to the correct size. If needed, it is also used to
+    downsample the input using `stride=2`.
+    """
+    def __init__(self, in_channels: int, out_channels: int, stride: int = 2):
+        super().__init__()
+        self.convolution = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False)
+        self.normalization = nn.BatchNorm2d(out_channels)
+    def forward(self, input: Tensor) -> Tensor:
+        hidden_state = self.convolution(input)
+        hidden_state = self.normalization(hidden_state)
+        return hidden_state
+class ResNetBasicLayer(nn.Module):
+    """
+    A classic ResNet's residual layer composed by two `3x3` convolutions.
+    """
+    def __init__(self, in_channels: int, out_channels: int, stride: int = 1, activation: str = "relu"):
+        super().__init__()
+        should_apply_shortcut = in_channels != out_channels or stride != 1
+        self.shortcut = (
+            ResNetShortCut(in_channels, out_channels, stride=stride) if should_apply_shortcut else nn.Identity()
+        )
+        self.layer = nn.Sequential(
+            ResNetConvLayer(in_channels, out_channels, stride=stride),
+            ResNetConvLayer(out_channels, out_channels, activation=None),
+        )
+        self.activation = ACT2FN[activation]
+    def forward(self, hidden_state):
+        residual = hidden_state
+        hidden_state = self.layer(hidden_state)
+        residual = self.shortcut(residual)
+        hidden_state += residual
+        hidden_state = self.activation(hidden_state)
+        return hidden_state
+class ResNetBottleNeckLayer(nn.Module):
+    """
+    A classic ResNet's bottleneck layer composed by three `3x3` convolutions.
+    The first `1x1` convolution reduces the input by a factor of `reduction` in order to make the second `3x3`
+    convolution faster. The last `1x1` convolution remaps the reduced features to `out_channels`.
+    """
+    def __init__(
+        self, in_channels: int, out_channels: int, stride: int = 1, activation: str = "relu", reduction: int = 4
+    ):
+        super().__init__()
+        should_apply_shortcut = in_channels != out_channels or stride != 1
+        reduces_channels = out_channels // reduction
+        self.shortcut = (
+            ResNetShortCut(in_channels, out_channels, stride=stride) if should_apply_shortcut else nn.Identity()
+        )
+        self.layer = nn.Sequential(
+            ResNetConvLayer(in_channels, reduces_channels, kernel_size=1),
+            ResNetConvLayer(reduces_channels, reduces_channels, stride=stride),
+            ResNetConvLayer(reduces_channels, out_channels, kernel_size=1, activation=None),
+        )
+        self.activation = ACT2FN[activation]
+    def forward(self, hidden_state):
+        residual = hidden_state
+        hidden_state = self.layer(hidden_state)
+        residual = self.shortcut(residual)
+        hidden_state += residual
+        hidden_state = self.activation(hidden_state)
+        return hidden_state
+class ResNetStage(nn.Module):
+    """
+    A ResNet stage composed by stacked layers.
+    """
+    def __init__(
+        self,
+        config: ResNetConfig,
+        in_channels: int,
+        out_channels: int,
+        stride: int = 2,
+        depth: int = 2,
+    ):
+        super().__init__()
+        layer = ResNetBottleNeckLayer if config.layer_type == "bottleneck" else ResNetBasicLayer
+        self.layers = nn.Sequential(
+            # downsampling is done in the first layer with stride of 2
+            layer(in_channels, out_channels, stride=stride, activation=config.hidden_act),
+            *[layer(out_channels, out_channels, activation=config.hidden_act) for _ in range(depth - 1)],
+        )
+    def forward(self, input: Tensor) -> Tensor:
+        hidden_state = input
+        for layer in self.layers:
+            hidden_state = layer(hidden_state)
+            hidden_state = hidden_state + 1
+            print("having fun in my custom code")
+        return hidden_state
+class ResNetEncoder(nn.Module):
+    def __init__(self, config: ResNetConfig):
+        super().__init__()
+        self.stages = nn.ModuleList([])
+        # based on `downsample_in_first_stage` the first layer of the first stage may or may not downsample the input
+        self.stages.append(
+            ResNetStage(
+                config,
+                config.embedding_size,
+                config.hidden_sizes[0],
+                stride=2 if config.downsample_in_first_stage else 1,
+                depth=config.depths[0],
+            )
+        )
+        in_out_channels = zip(config.hidden_sizes, config.hidden_sizes[1:])
+        for (in_channels, out_channels), depth in zip(in_out_channels, config.depths[1:]):
+            self.stages.append(ResNetStage(config, in_channels, out_channels, depth=depth))
+    def forward(
+        self, hidden_state: Tensor, output_hidden_states: bool = False, return_dict: bool = True
+    ) -> BaseModelOutputWithNoAttention:
+        hidden_states = () if output_hidden_states else None
+        for stage_module in self.stages:
+            if output_hidden_states:
+                hidden_states = hidden_states + (hidden_state,)
+            hidden_state = stage_module(hidden_state)
+        if output_hidden_states:
+            hidden_states = hidden_states + (hidden_state,)
+        if not return_dict:
+            return tuple(v for v in [hidden_state, hidden_states] if v is not None)
+        return BaseModelOutputWithNoAttention(
+            last_hidden_state=hidden_state,
+            hidden_states=hidden_states,
+        )
+class ResNetPreTrainedModel(PreTrainedModel):
+    """
+    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
+    models.
+    """
+    config_class = ResNetConfig
+    base_model_prefix = "resnet"
+    main_input_name = "pixel_values"
+    supports_gradient_checkpointing = True
+    def _init_weights(self, module):
+        if isinstance(module, nn.Conv2d):
+            nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")
+        elif isinstance(module, (nn.BatchNorm2d, nn.GroupNorm)):
+            nn.init.constant_(module.weight, 1)
+            nn.init.constant_(module.bias, 0)
+    def _set_gradient_checkpointing(self, module, value=False):
+        if isinstance(module, ResNetEncoder):
+            module.gradient_checkpointing = value
+RESNET_START_DOCSTRING = r"""
+    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass. Use it
+    as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
+    behavior.
+    Parameters:
+        config ([`ResNetConfig`]): Model configuration class with all the parameters of the model.
+            Initializing with a config file does not load the weights associated with the model, only the
+            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
+"""
+RESNET_INPUTS_DOCSTRING = r"""
+    Args:
+        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
+            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
+            [`AutoImageProcessor.__call__`] for details.
+        output_hidden_states (`bool`, *optional*):
+            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
+            more detail.
+        return_dict (`bool`, *optional*):
+            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
+"""
+@add_start_docstrings(
+    "The bare ResNet model outputting raw features without any specific head on top.",
+    RESNET_START_DOCSTRING,
+)
+class ResNetModel(ResNetPreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.config = config
+        self.embedder = ResNetEmbeddings(config)
+        self.encoder = ResNetEncoder(config)
+        self.pooler = nn.AdaptiveAvgPool2d((1, 1))
+        # Initialize weights and apply final processing
+        self.post_init()
+    @add_start_docstrings_to_model_forward(RESNET_INPUTS_DOCSTRING)
+    @add_code_sample_docstrings(
+        processor_class=_FEAT_EXTRACTOR_FOR_DOC,
+        checkpoint=_CHECKPOINT_FOR_DOC,
+        output_type=BaseModelOutputWithPoolingAndNoAttention,
+        config_class=_CONFIG_FOR_DOC,
+        modality="vision",
+        expected_output=_EXPECTED_OUTPUT_SHAPE,
+    )
+    def forward(
+        self, pixel_values: Tensor, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None
+    ) -> BaseModelOutputWithPoolingAndNoAttention:
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        embedding_output = self.embedder(pixel_values)
+        encoder_outputs = self.encoder(
+            embedding_output, output_hidden_states=output_hidden_states, return_dict=return_dict
+        )
+        last_hidden_state = encoder_outputs[0]
+        pooled_output = self.pooler(last_hidden_state)
+        if not return_dict:
+            return (last_hidden_state, pooled_output) + encoder_outputs[1:]
+        return BaseModelOutputWithPoolingAndNoAttention(
+            last_hidden_state=last_hidden_state,
+            pooler_output=pooled_output,
+            hidden_states=encoder_outputs.hidden_states,
+        )
+@add_start_docstrings(
+    """
+    ResNet Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
+    ImageNet.
+    """,
+    RESNET_START_DOCSTRING,
+)
+class ResNetCustomForImageClassification(ResNetPreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+        self.resnet = ResNetModel(config)
+        # classification head
+        self.classifier = nn.Sequential(
+            nn.Flatten(),
+            nn.Linear(config.hidden_sizes[-1], config.num_labels) if config.num_labels > 0 else nn.Identity(),
+        )
+        # initialize weights and apply final processing
+        self.post_init()
+    @add_start_docstrings_to_model_forward(RESNET_INPUTS_DOCSTRING)
+    @add_code_sample_docstrings(
+        processor_class=_FEAT_EXTRACTOR_FOR_DOC,
+        checkpoint=_IMAGE_CLASS_CHECKPOINT,
+        output_type=ImageClassifierOutputWithNoAttention,
+        config_class=_CONFIG_FOR_DOC,
+        expected_output=_IMAGE_CLASS_EXPECTED_OUTPUT,
+    )
+    def forward(
+        self,
+        pixel_values: Optional[torch.FloatTensor] = None,
+        labels: Optional[torch.LongTensor] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ) -> ImageClassifierOutputWithNoAttention:
+        r"""
+        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
+            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
+            config.num_labels - 1]`. If `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
+        """
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        outputs = self.resnet(pixel_values, output_hidden_states=output_hidden_states, return_dict=return_dict)
+        pooled_output = outputs.pooler_output if return_dict else outputs[1]
+        logits = self.classifier(pooled_output)
+        loss = None
+        if labels is not None:
+            if self.config.problem_type is None:
+                if self.num_labels == 1:
+                    self.config.problem_type = "regression"
+                elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
+                    self.config.problem_type = "single_label_classification"
+                else:
+                    self.config.problem_type = "multi_label_classification"
+            if self.config.problem_type == "regression":
+                loss_fct = MSELoss()
+                if self.num_labels == 1:
+                    loss = loss_fct(logits.squeeze(), labels.squeeze())
+                else:
+                    loss = loss_fct(logits, labels)
+            elif self.config.problem_type == "single_label_classification":
+                loss_fct = CrossEntropyLoss()
+                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
+            elif self.config.problem_type == "multi_label_classification":
+                loss_fct = BCEWithLogitsLoss()
+                loss = loss_fct(logits, labels)
+        if not return_dict:
+            output = (logits,) + outputs[2:]
+            return (loss,) + output if loss is not None else output
+        return ImageClassifierOutputWithNoAttention(loss=loss, logits=logits, hidden_states=outputs.hidden_states)
+@add_start_docstrings(
+    """
+    ResNet backbone, to be used with frameworks like DETR and MaskFormer.
+    """,
+    RESNET_START_DOCSTRING,
+)
+class ResNetBackbone(ResNetPreTrainedModel, BackboneMixin):
+    def __init__(self, config):
+        super().__init__(config)
+        self.stage_names = config.stage_names
+        self.embedder = ResNetEmbeddings(config)
+        self.encoder = ResNetEncoder(config)
+        self.out_features = config.out_features if config.out_features is not None else [self.stage_names[-1]]
+        out_feature_channels = {}
+        out_feature_channels["stem"] = config.embedding_size
+        for idx, stage in enumerate(self.stage_names[1:]):
+            out_feature_channels[stage] = config.hidden_sizes[idx]
+        self.out_feature_channels = out_feature_channels
+        # initialize weights and apply final processing
+        self.post_init()
+    @property
+    def channels(self):
+        return [self.out_feature_channels[name] for name in self.out_features]
+    @add_start_docstrings_to_model_forward(RESNET_INPUTS_DOCSTRING)
+    @replace_return_docstrings(output_type=BackboneOutput, config_class=_CONFIG_FOR_DOC)
+    def forward(
+        self, pixel_values: Tensor, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None
+    ) -> BackboneOutput:
+        """
+        Returns:
+        Examples:
+        ```python
+        >>> from transformers import AutoImageProcessor, AutoBackbone
+        >>> import torch
+        >>> from PIL import Image
+        >>> import requests
+        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+        >>> image = Image.open(requests.get(url, stream=True).raw)
+        >>> processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")
+        >>> model = AutoBackbone.from_pretrained(
+        ...     "microsoft/resnet-50", out_features=["stage1", "stage2", "stage3", "stage4"]
+        ... )
+        >>> inputs = processor(image, return_tensors="pt")
+        >>> outputs = model(**inputs)
+        >>> feature_maps = outputs.feature_maps
+        >>> list(feature_maps[-1].shape)
+        [1, 2048, 7, 7]
+        ```"""
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        embedding_output = self.embedder(pixel_values)
+        outputs = self.encoder(embedding_output, output_hidden_states=True, return_dict=True)
+        hidden_states = outputs.hidden_states
+        feature_maps = ()
+        for idx, stage in enumerate(self.stage_names):
+            if stage in self.out_features:
+                feature_maps += (hidden_states[idx],)
+        if not return_dict:
+            output = (feature_maps,)
+            if output_hidden_states:
+                output += (outputs.hidden_states,)
+            return output
+        return BackboneOutput(
+            feature_maps=feature_maps,
+            hidden_states=outputs.hidden_states if output_hidden_states else None,
+            attentions=None,
+        )

preprocessor_config.json DELETED Viewed

@@ -1,18 +0,0 @@
-{
-  "crop_pct": 0.875,
-  "do_normalize": true,
-  "do_resize": true,
-  "feature_extractor_type": "ConvNextFeatureExtractor",
-  "image_mean": [
-    0.485,
-    0.456,
-    0.406
-  ],
-  "image_std": [
-    0.229,
-    0.224,
-    0.225
-  ],
-  "resample": 3,
-  "size": 224
-}

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b41ec5a4bea6eee004ec1213ed11685f6d61f457f8e4ce91190dabb9b8edf680
-size 401037

 version https://git-lfs.github.com/spec/v1
+oid sha256:8f478b667de57399a36a48edda1a0c261b8370677f3b500f9dd740afc4967e15
+size 46837749