Enable analyzing nested input- and output-dicts (#212)

snimu · web-flow · commit c879e2a393d4 · 2023-02-05T11:49:42.000-08:00
* enable analyzing nested input- and output dicts * enable analyzing nested input- and output dicts * skip tests that require torch v1.8 or above when an older version is installed * add test for highly nested dicts, fix error found by it - `elem_bytes` in `LayerInfo.calculate_size(...)` didn't work for nested dicts * `LayerInfo.calculate_size.extract_tensor` now works with `dict` properly - adapted highly_nested_dict_model.out accordingly * simplified `test_highly_nested_dict_model` * `LayerInfo.calculate_size.extract_tensor` now works properly for objects with `tensor`-attribute - Found error in new testcase that comes with this commit * Add docstring to test to explain what exactly it tests * test all edge-cases of `LayerInfo.calculate_size.extract_tensor` * use `dim=0` in `F.softmax` explicitely (implicit use depreciated) * replace custom `torchversion_at_least` with `packaging.version.parse` * modify `EdgecaseInputOutputModel` to increase test-coverage missing: - not hasattr(inputs, "__getitem__") - last return * use torch_nested-package to simplify `LayerInfo.calculate_size` - torch_nested has 99.something% test-coverage - Makes test-coverage for this package much easier - Increases readability & extensibility * Move back from using torch-nested. Fix and use `nested_list_size` instead - Fixes issue#141 - Increases test-coverage - Produces more plausible output for some cases * Fix problem with accessing of dicts Fix [issue#214](#215) * Install compressai in workflows
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -56,6 +56,8 @@ jobs:
         python -m pip install --upgrade pip
         python -m pip install mypy pytest pytest-cov
         pip install torch==${{ matrix.pytorch-version }} torchvision
+        pip install transformers
+        pip install compressai
     - name: mypy
       if: ${{ matrix.pytorch-version == '1.13' }}
       run: |
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -8,3 +8,5 @@ pylint
 pytest
 pytest-cov
 pre-commit
+transformers
+compressai
diff --git a/tests/fixtures/models.py b/tests/fixtures/models.py
@@ -3,7 +3,7 @@
 
 import math
 from collections import namedtuple
-from typing import Any, cast
+from typing import Any, Sequence, cast
 
 import torch
 from torch import nn
@@ -323,6 +323,64 @@ def forward(
         return x
 
 
+class ObjectWithTensors:
+    """A class with a 'tensors'-attribute."""
+
+    def __init__(self, tensors: torch.Tensor | Sequence[Any]) -> None:
+        self.tensors = tensors
+
+
+class HighlyNestedDictModel(nn.Module):
+    """Model that returns a highly nested dict."""
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.lin1 = nn.Linear(10, 10)
+        self.lin2 = nn.Linear(10, 10)
+
+    def forward(
+        self, x: torch.Tensor
+    ) -> dict[str, tuple[dict[str, list[ObjectWithTensors]]]]:
+        x = self.lin1(x)
+        x = self.lin2(x)
+        x = F.softmax(x, dim=0)
+        return {"foo": ({"bar": [ObjectWithTensors(x)]},)}
+
+
+class IntWithGetitem(int):
+    """An int with a __getitem__ method."""
+
+    def __init__(self, tensor: torch.Tensor) -> None:
+        super().__init__()
+        self.tensor = tensor
+
+    def __int__(self) -> IntWithGetitem:
+        return self
+
+    def __getitem__(self, val: int) -> torch.Tensor:
+        return self.tensor * val
+
+
+class EdgecaseInputOutputModel(nn.Module):
+    """
+    For testing LayerInfo.calculate_size.extract_tensor:
+
+    case hasattr(inputs, "__getitem__") but not
+    isinstance(inputs, (list, tuple, dict)).
+
+    case not inputs.
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.linear = nn.Linear(3, 1)
+
+    def forward(self, input_list: dict[str, torch.Tensor]) -> dict[str, IntWithGetitem]:
+        x = input_list["foo"] if input_list else torch.ones(3)
+        x = self.linear(x)
+        return {"foo": IntWithGetitem(x)}
+
+
 class NamedTuple(nn.Module):
     """Model that takes in a NamedTuple as input."""
 
diff --git a/tests/test_output/bert.out b/tests/test_output/bert.out
@@ -0,0 +1,38 @@
+====================================================================================================
+Layer (type:depth-idx)                             Output Shape              Param #
+====================================================================================================
+BertModel                                          [2, 768]                  --
+├─BertEmbeddings: 1-1                              [2, 512, 768]             --
+│    └─Embedding: 2-1                              [2, 512, 768]             23,440,896
+│    └─Embedding: 2-2                              [2, 512, 768]             1,536
+│    └─Embedding: 2-3                              [1, 512, 768]             393,216
+│    └─LayerNorm: 2-4                              [2, 512, 768]             1,536
+│    └─Dropout: 2-5                                [2, 512, 768]             --
+├─BertEncoder: 1-2                                 [2, 512, 768]             --
+│    └─ModuleList: 2-6                             --                        --
+│    │    └─BertLayer: 3-1                         [2, 512, 768]             7,087,872
+│    │    └─BertLayer: 3-2                         [2, 512, 768]             7,087,872
+│    │    └─BertLayer: 3-3                         [2, 512, 768]             7,087,872
+│    │    └─BertLayer: 3-4                         [2, 512, 768]             7,087,872
+│    │    └─BertLayer: 3-5                         [2, 512, 768]             7,087,872
+│    │    └─BertLayer: 3-6                         [2, 512, 768]             7,087,872
+│    │    └─BertLayer: 3-7                         [2, 512, 768]             7,087,872
+│    │    └─BertLayer: 3-8                         [2, 512, 768]             7,087,872
+│    │    └─BertLayer: 3-9                         [2, 512, 768]             7,087,872
+│    │    └─BertLayer: 3-10                        [2, 512, 768]             7,087,872
+│    │    └─BertLayer: 3-11                        [2, 512, 768]             7,087,872
+│    │    └─BertLayer: 3-12                        [2, 512, 768]             7,087,872
+├─BertPooler: 1-3                                  [2, 768]                  --
+│    └─Linear: 2-7                                 [2, 768]                  590,592
+│    └─Tanh: 2-8                                   [2, 768]                  --
+====================================================================================================
+Total params: 109,482,240
+Trainable params: 109,482,240
+Non-trainable params: 0
+Total mult-adds (M): 218.57
+====================================================================================================
+Input size (MB): 0.01
+Forward/backward pass size (MB): 852.50
+Params size (MB): 437.93
+Estimated Total Size (MB): 1290.45
+====================================================================================================
diff --git a/tests/test_output/compressai.out b/tests/test_output/compressai.out
@@ -0,0 +1,45 @@
+===============================================================================================
+Layer (type:depth-idx)                        Output Shape              Param #
+===============================================================================================
+FactorizedPrior                               [1, 192, 16, 16]          --
+├─Sequential: 1-1                             [1, 192, 16, 16]          --
+│    └─Conv2d: 2-1                            [1, 128, 128, 128]        9,728
+│    └─GDN: 2-2                               [1, 128, 128, 128]        16,512
+│    │    └─NonNegativeParametrizer: 3-1      [128]                     --
+│    │    └─NonNegativeParametrizer: 3-2      [128, 128]                --
+│    └─Conv2d: 2-3                            [1, 128, 64, 64]          409,728
+│    └─GDN: 2-4                               [1, 128, 64, 64]          16,512
+│    │    └─NonNegativeParametrizer: 3-3      [128]                     --
+│    │    └─NonNegativeParametrizer: 3-4      [128, 128]                --
+│    └─Conv2d: 2-5                            [1, 128, 32, 32]          409,728
+│    └─GDN: 2-6                               [1, 128, 32, 32]          16,512
+│    │    └─NonNegativeParametrizer: 3-5      [128]                     --
+│    │    └─NonNegativeParametrizer: 3-6      [128, 128]                --
+│    └─Conv2d: 2-7                            [1, 192, 16, 16]          614,592
+├─EntropyBottleneck: 1-2                      [1, 192, 16, 16]          11,712
+│    └─LowerBound: 2-8                        [192, 1, 256]             --
+├─Sequential: 1-3                             [1, 3, 256, 256]          --
+│    └─ConvTranspose2d: 2-9                   [1, 128, 32, 32]          614,528
+│    └─GDN: 2-10                              [1, 128, 32, 32]          16,512
+│    │    └─NonNegativeParametrizer: 3-7      [128]                     --
+│    │    └─NonNegativeParametrizer: 3-8      [128, 128]                --
+│    └─ConvTranspose2d: 2-11                  [1, 128, 64, 64]          409,728
+│    └─GDN: 2-12                              [1, 128, 64, 64]          16,512
+│    │    └─NonNegativeParametrizer: 3-9      [128]                     --
+│    │    └─NonNegativeParametrizer: 3-10     [128, 128]                --
+│    └─ConvTranspose2d: 2-13                  [1, 128, 128, 128]        409,728
+│    └─GDN: 2-14                              [1, 128, 128, 128]        16,512
+│    │    └─NonNegativeParametrizer: 3-11     [128]                     --
+│    │    └─NonNegativeParametrizer: 3-12     [128, 128]                --
+│    └─ConvTranspose2d: 2-15                  [1, 3, 256, 256]          9,603
+===============================================================================================
+Total params: 2,998,147
+Trainable params: 2,998,147
+Non-trainable params: 0
+Total mult-adds (G): 12.06
+===============================================================================================
+Input size (MB): 0.79
+Forward/backward pass size (MB): 46.01
+Params size (MB): 11.55
+Estimated Total Size (MB): 58.34
+===============================================================================================
diff --git a/tests/test_output/edgecase_input_output_model.out b/tests/test_output/edgecase_input_output_model.out
@@ -0,0 +1,16 @@
+==========================================================================================
+Layer (type:depth-idx)                   Output Shape              Param #
+==========================================================================================
+EdgecaseInputOutputModel                 --                        --
+├─Linear: 1-1                            [1]                       4
+==========================================================================================
+Total params: 4
+Trainable params: 4
+Non-trainable params: 0
+Total mult-adds (M): 0.00
+==========================================================================================
+Input size (MB): 0.00
+Forward/backward pass size (MB): 0.00
+Params size (MB): 0.00
+Estimated Total Size (MB): 0.00
+==========================================================================================
diff --git a/tests/test_output/flan_t5_small.out b/tests/test_output/flan_t5_small.out
@@ -0,0 +1,46 @@
+==============================================================================================================
+Layer (type:depth-idx)                                       Output Shape              Param #
+==============================================================================================================
+T5ForConditionalGeneration                                   [2, 100, 512]             --
+├─T5Stack: 1-1                                               [2, 100, 512]             35,332,800
+├─T5Stack: 1-2                                               --                        (recursive)
+│    └─Embedding: 2-1                                        [2, 100, 512]             16,449,536
+├─T5Stack: 1-3                                               --                        (recursive)
+│    └─Dropout: 2-2                                          [2, 100, 512]             --
+│    └─ModuleList: 2-3                                       --                        --
+│    │    └─T5Block: 3-1                                     [2, 100, 512]             2,360,512
+│    │    └─T5Block: 3-2                                     [2, 100, 512]             2,360,320
+│    │    └─T5Block: 3-3                                     [2, 100, 512]             2,360,320
+│    │    └─T5Block: 3-4                                     [2, 100, 512]             2,360,320
+│    │    └─T5Block: 3-5                                     [2, 100, 512]             2,360,320
+│    │    └─T5Block: 3-6                                     [2, 100, 512]             2,360,320
+│    │    └─T5Block: 3-7                                     [2, 100, 512]             2,360,320
+│    │    └─T5Block: 3-8                                     [2, 100, 512]             2,360,320
+│    └─T5LayerNorm: 2-4                                      [2, 100, 512]             512
+│    └─Dropout: 2-5                                          [2, 100, 512]             --
+├─T5Stack: 1-4                                               [2, 6, 100, 64]           16,449,536
+│    └─Embedding: 2-6                                        [2, 100, 512]             (recursive)
+│    └─Dropout: 2-7                                          [2, 100, 512]             --
+│    └─ModuleList: 2-8                                       --                        --
+│    │    └─T5Block: 3-9                                     [2, 100, 512]             3,147,456
+│    │    └─T5Block: 3-10                                    [2, 100, 512]             3,147,264
+│    │    └─T5Block: 3-11                                    [2, 100, 512]             3,147,264
+│    │    └─T5Block: 3-12                                    [2, 100, 512]             3,147,264
+│    │    └─T5Block: 3-13                                    [2, 100, 512]             3,147,264
+│    │    └─T5Block: 3-14                                    [2, 100, 512]             3,147,264
+│    │    └─T5Block: 3-15                                    [2, 100, 512]             3,147,264
+│    │    └─T5Block: 3-16                                    [2, 100, 512]             3,147,264
+│    └─T5LayerNorm: 2-9                                      [2, 100, 512]             512
+│    └─Dropout: 2-10                                         [2, 100, 512]             --
+├─Linear: 1-5                                                [2, 100, 32128]           16,449,536
+==============================================================================================================
+Total params: 128,743,488
+Trainable params: 128,743,488
+Non-trainable params: 0
+Total mult-adds (M): 186.86
+==============================================================================================================
+Input size (MB): 0.00
+Forward/backward pass size (MB): 217.84
+Params size (MB): 307.84
+Estimated Total Size (MB): 525.69
+==============================================================================================================
diff --git a/tests/test_output/highly_nested_dict_model.out b/tests/test_output/highly_nested_dict_model.out
@@ -0,0 +1,17 @@
+==========================================================================================
+Layer (type:depth-idx)                   Output Shape              Param #
+==========================================================================================
+HighlyNestedDictModel                    [10]                      --
+├─Linear: 1-1                            [10]                      110
+├─Linear: 1-2                            [10]                      110
+==========================================================================================
+Total params: 220
+Trainable params: 220
+Non-trainable params: 0
+Total mult-adds (M): 0.00
+==========================================================================================
+Input size (MB): 0.00
+Forward/backward pass size (MB): 0.00
+Params size (MB): 0.00
+Estimated Total Size (MB): 0.00
+==========================================================================================
diff --git a/tests/torchinfo_test.py b/tests/torchinfo_test.py
@@ -13,8 +13,10 @@
     ConvLayerB,
     CustomParameter,
     DictParameter,
+    EdgecaseInputOutputModel,
     EmptyModule,
     FakePrunedLayerModel,
+    HighlyNestedDictModel,
     InsideModel,
     LinearModel,
     LSTMNet,
@@ -344,6 +346,26 @@ def test_module_dict() -> None:
     )
 
 
+def test_highly_nested_dict_model() -> None:
+    """
+    Test the following three if-clauses
+    from LayerInfo.calculate_size.extract_tensor: 1, 2, 4, 5
+    (starts counting from 1)
+    """
+    model = HighlyNestedDictModel()
+    summary(model, input_data=torch.ones(10))
+
+
+def test_edgecase_input_output_model() -> None:
+    """
+    Test the following two if-clauses
+    from LayerInfo.calculate_size.extract_tensor: 3
+    (starts counting from 1) as well as the final return.
+    """
+    model = EdgecaseInputOutputModel()
+    summary(model, input_data=[{}])
+
+
 def test_model_with_args() -> None:
     summary(RecursiveNet(), input_size=(1, 64, 28, 28), args1="args1", args2="args2")
 
diff --git a/tests/torchinfo_xl_test.py b/tests/torchinfo_xl_test.py
@@ -1,6 +1,13 @@
 import pytest
 import torch
 import torchvision  # type: ignore[import]
+from compressai.zoo import image_models  # type: ignore[import]
+from packaging import version
+from transformers import (  # type: ignore[import]
+    AutoModelForSeq2SeqLM,
+    BertConfig,
+    BertModel,
+)
 
 from tests.fixtures.genotype import GenotypeNetwork  # type: ignore[attr-defined]
 from tests.fixtures.tmva_net import TMVANet  # type: ignore[attr-defined]
@@ -143,3 +150,40 @@ def test_google() -> None:
     # Check googlenet in training mode since InceptionAux layers are used in
     # forward-prop in train mode but not in eval mode.
     summary(google_net, (1, 3, 112, 112), depth=7, mode="train")
+
+
+@pytest.mark.skipif(
+    version.parse(torch.__version__) < version.parse("1.8"),
+    reason="FlanT5Small only works for PyTorch v1.8 and above",
+)
+def test_flan_t5_small() -> None:
+    model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
+    inputs = {
+        "input_ids": torch.zeros(2, 100).long(),
+        "attention_mask": torch.zeros(2, 100).long(),
+        "labels": torch.zeros(2, 100).long(),
+    }
+    summary(model, input_data=inputs)
+
+
+@pytest.mark.skipif(
+    version.parse(torch.__version__) < version.parse("1.8"),
+    reason="BertModel only works for PyTorch v1.8 and above",
+)
+def test_bert() -> None:
+    model = BertModel(BertConfig())
+    summary(
+        model,
+        input_size=[(2, 512), (2, 512), (2, 512)],
+        dtypes=[torch.int, torch.int, torch.int],
+        device="cpu",
+    )
+
+
+@pytest.mark.skipif(
+    version.parse(torch.__version__) < version.parse("1.8"),
+    reason="compressai only works for PyTorch v1.8 and above",
+)
+def test_compressai() -> None:
+    model = image_models["bmshj2018-factorized"](quality=4, pretrained=True)
+    summary(model, (1, 3, 256, 256))
diff --git a/torchinfo/layer_info.py b/torchinfo/layer_info.py
diff --git a/torchinfo/model_statistics.py b/torchinfo/model_statistics.py