Add "Trainable" column (#128)

bsridatta · TylerYep · web-flow · commit 7def795b75ed · 2022-05-15T15:27:02.000-07:00
* add is_trainable column

* test: testcase for is_trainable column

* model which has fully, partial and non trainable modules
* update tests that require all coloums to display is_trainable coloumn as well

* docs: update README.md

* fix type ignores and nits

* Rename is_trainable to trainable

* Calculate trainable in pre_hook

* Fix readme

Co-authored-by: Tyler Yep &lt;tyler.yep@robinhood.com&gt;
diff --git a/README.md b/README.md
@@ -115,7 +115,8 @@ Summarize the given PyTorch model. Summarized information includes:
     2) input/output shapes,
     3) kernel shape,
     4) # of parameters,
-    5) # of operations (Mult-Adds)
+    5) # of operations (Mult-Adds),
+    6) whether layer is trainable
 
 NOTE: If neither input_data or input_size are provided, no forward pass through the
 network is performed, and the provided model information is limited to layer names.
@@ -166,6 +167,7 @@ Args:
                 "num_params",
                 "kernel_size",
                 "mult_adds",
+                "trainable",
             )
             Default: ("output_size", "num_params")
             If input_data / input_size are not provided, only "num_params" is used.
diff --git a/tests/fixtures/models.py b/tests/fixtures/models.py
@@ -502,6 +502,32 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         return self.w * x + self.b
 
 
+class MixedTrainable(nn.Module):
+    """Model with fully, partial and non trainable modules."""
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.fully_trainable = nn.Conv1d(1, 1, 1)
+
+        self.partially_trainable = nn.Conv1d(1, 1, 1, bias=True)
+        assert self.partially_trainable.bias is not None
+        self.partially_trainable.bias.requires_grad = False
+
+        self.non_trainable = nn.Conv1d(1, 1, 1, 1, bias=True)
+        self.non_trainable.weight.requires_grad = False
+        assert self.non_trainable.bias is not None
+        self.non_trainable.bias.requires_grad = False
+
+        self.dropout = nn.Dropout()
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.fully_trainable(x)
+        x = self.partially_trainable(x)
+        x = self.non_trainable(x)
+        x = self.dropout(x)
+        return x
+
+
 class ReuseLinear(nn.Module):
     """Model that uses a reference to the same Linear layer over and over."""
 
diff --git a/tests/test_output/parameter_list.out b/tests/test_output/parameter_list.out
@@ -1,18 +1,18 @@
-===================================================================================================================
-Layer (type:depth-idx)                   Kernel Shape    Input Shape     Output Shape    Param #         Mult-Adds
-===================================================================================================================
-ParameterListModel                       --              --              --              --              --
-├─ParameterList: 1-1                     --              --              --              30,000          --
-│    └─0                                 [100, 100]                                      ├─10,000
-│    └─1                                 [100, 200]                                      └─20,000
-===================================================================================================================
+================================================================================================================================================================
+Layer (type:depth-idx)                   Kernel Shape         Input Shape          Output Shape         Param #              Mult-Adds            Trainable
+================================================================================================================================================================
+ParameterListModel                       --                   --                   --                   --                   --                   True
+├─ParameterList: 1-1                     --                   --                   --                   30,000               --                   True
+│    └─0                                 [100, 100]                                                     ├─10,000
+│    └─1                                 [100, 200]                                                     └─20,000
+================================================================================================================================================================
 Total params: 30,000
 Trainable params: 30,000
 Non-trainable params: 0
 Total mult-adds (M): 0.00
-===================================================================================================================
+================================================================================================================================================================
 Input size (MB): 0.04
 Forward/backward pass size (MB): 0.00
 Params size (MB): 0.12
 Estimated Total Size (MB): 0.16
-===================================================================================================================
+================================================================================================================================================================
diff --git a/tests/test_output/single_input_all_cols.out b/tests/test_output/single_input_all_cols.out
@@ -1,20 +1,20 @@
-============================================================================================================================================
-Layer (type:depth-idx)                   Kernel Shape         Input Shape          Output Shape         Param #              Mult-Adds
-============================================================================================================================================
-SingleInputNet                           --                   --                   --                   --                   --
-├─Conv2d: 1-1                            [5, 5]               [7, 1, 28, 28]       [7, 10, 24, 24]      260                  1,048,320
-├─Conv2d: 1-2                            [5, 5]               [7, 10, 12, 12]      [7, 20, 8, 8]        5,020                2,248,960
-├─Dropout2d: 1-3                         --                   [7, 20, 8, 8]        [7, 20, 8, 8]        --                   --
-├─Linear: 1-4                            --                   [7, 320]             [7, 50]              16,050               112,350
-├─Linear: 1-5                            --                   [7, 50]              [7, 10]              510                  3,570
-============================================================================================================================================
+================================================================================================================================================================
+Layer (type:depth-idx)                   Kernel Shape         Input Shape          Output Shape         Param #              Mult-Adds            Trainable
+================================================================================================================================================================
+SingleInputNet                           --                   --                   --                   --                   --                   True
+├─Conv2d: 1-1                            [5, 5]               [7, 1, 28, 28]       [7, 10, 24, 24]      260                  1,048,320            True
+├─Conv2d: 1-2                            [5, 5]               [7, 10, 12, 12]      [7, 20, 8, 8]        5,020                2,248,960            True
+├─Dropout2d: 1-3                         --                   [7, 20, 8, 8]        [7, 20, 8, 8]        --                   --                   --
+├─Linear: 1-4                            --                   [7, 320]             [7, 50]              16,050               112,350              True
+├─Linear: 1-5                            --                   [7, 50]              [7, 10]              510                  3,570                True
+================================================================================================================================================================
 Total params: 21,840
 Trainable params: 21,840
 Non-trainable params: 0
 Total mult-adds (M): 3.41
-============================================================================================================================================
+================================================================================================================================================================
 Input size (MB): 0.02
 Forward/backward pass size (MB): 0.40
 Params size (MB): 0.09
 Estimated Total Size (MB): 0.51
-============================================================================================================================================
+================================================================================================================================================================
diff --git a/tests/test_output/trainable_column.out b/tests/test_output/trainable_column.out
@@ -0,0 +1,19 @@
+============================================================================================================================================
+Layer (type:depth-idx)                   Kernel Shape              Input Shape               Output Shape              Trainable
+============================================================================================================================================
+MixedTrainable                           --                        --                        --                        Partial
+├─Conv1d: 1-1                            [1]                       [1, 1, 1]                 [1, 1, 1]                 True
+├─Conv1d: 1-2                            [1]                       [1, 1, 1]                 [1, 1, 1]                 Partial
+├─Conv1d: 1-3                            [1]                       [1, 1, 1]                 [1, 1, 1]                 False
+├─Dropout: 1-4                           --                        [1, 1, 1]                 [1, 1, 1]                 --
+============================================================================================================================================
+Total params: 6
+Trainable params: 3
+Non-trainable params: 3
+Total mult-adds (M): 0.00
+============================================================================================================================================
+Input size (MB): 0.00
+Forward/backward pass size (MB): 0.00
+Params size (MB): 0.00
+Estimated Total Size (MB): 0.00
+============================================================================================================================================
diff --git a/tests/torchinfo_test.py b/tests/torchinfo_test.py
@@ -14,6 +14,7 @@
     FakePrunedLayerModel,
     LinearModel,
     LSTMNet,
+    MixedTrainable,
     MixedTrainableParameters,
     ModuleDictModel,
     MultipleInputNetDifferentDtypes,
@@ -111,13 +112,12 @@ def test_multiple_input_types() -> None:
 
 def test_single_input_all_cols() -> None:
     model = SingleInputNet()
-    col_names = ("kernel_size", "input_size", "output_size", "num_params", "mult_adds")
     input_shape = (7, 1, 28, 28)
     summary(
         model,
         input_data=torch.randn(*input_shape),
         depth=1,
-        col_names=col_names,
+        col_names=list(ColumnSettings),
         col_width=20,
     )
 
@@ -194,7 +194,7 @@ def test_parameter_list() -> None:
         input_size=(100, 100),
         verbose=2,
         col_names=list(ColumnSettings),
-        col_width=15,
+        col_width=20,
     )
 
 
@@ -462,3 +462,11 @@ def test_pruned_adversary() -> None:
     results = summary(second_model, input_size=(1,))
 
     assert results.total_params == 32  # should be 64
+
+
+def test_trainable_column() -> None:
+    summary(
+        MixedTrainable(),
+        input_size=(1, 1, 1),
+        col_names=("kernel_size", "input_size", "output_size", "trainable"),
+    )
diff --git a/torchinfo/enums.py b/torchinfo/enums.py
@@ -29,6 +29,7 @@ class ColumnSettings(str, Enum):
     OUTPUT_SIZE = "output_size"
     NUM_PARAMS = "num_params"
     MULT_ADDS = "mult_adds"
+    TRAINABLE = "trainable"
 
 
 @unique
diff --git a/torchinfo/formatting.py b/torchinfo/formatting.py
@@ -12,6 +12,7 @@
     ColumnSettings.OUTPUT_SIZE: "Output Shape",
     ColumnSettings.NUM_PARAMS: "Param #",
     ColumnSettings.MULT_ADDS: "Mult-Adds",
+    ColumnSettings.TRAINABLE: "Trainable",
 }
 
 
@@ -113,6 +114,7 @@ def layer_info_to_row(
             ColumnSettings.MULT_ADDS: layer_info.macs_to_str(
                 reached_max_depth, children_layers
             ),
+            ColumnSettings.TRAINABLE: self.str_(layer_info.trainable),
         }
         start_str = self.get_start_str(layer_info.depth)
         layer_name = layer_info.get_layer_name(self.show_var_name, self.show_depth)
diff --git a/torchinfo/layer_info.py b/torchinfo/layer_info.py
@@ -59,6 +59,7 @@ def __init__(
         self.param_bytes = 0
         self.output_bytes = 0
         self.macs = 0
+        self.trainable = self.is_trainable(module)
 
     def __repr__(self) -> str:
         return f"{self.class_name}: {self.depth}"
@@ -159,6 +160,24 @@ def get_kernel_size(module: nn.Module) -> int | list[int] | None:
             return kernel_size
         return None
 
+    @staticmethod
+    def is_trainable(module: nn.Module) -> str:
+        """
+        Checks if the module is trainable. Returns:
+            "True", if all the parameters are trainable (`requires_grad=True`)
+            "False" if none of the parameters are trainable.
+            "Partial" if some weights are trainable, but not all.
+            "--" if no module has no parameters, like Dropout.
+        """
+        module_requires_grad = [param.requires_grad for param in module.parameters()]
+        if not module_requires_grad:
+            return "--"
+        if all(module_requires_grad):
+            return "True"
+        if any(module_requires_grad):
+            return "Partial"
+        return "False"
+
     def get_layer_name(self, show_var_name: bool, show_depth: bool) -> str:
         layer_name = self.class_name
         if show_var_name and self.var_name:
diff --git a/torchinfo/torchinfo.py b/torchinfo/torchinfo.py
@@ -70,7 +70,8 @@ def summary(
         2) input/output shapes,
         3) kernel shape,
         4) # of parameters,
-        5) # of operations (Mult-Adds)
+        5) # of operations (Mult-Adds),
+        6) whether layer is trainable
 
     NOTE: If neither input_data or input_size are provided, no forward pass through the
     network is performed, and the provided model information is limited to layer names.
@@ -121,6 +122,7 @@ class name as the key. If the forward pass is an expensive operation,
                     "num_params",
                     "kernel_size",
                     "mult_adds",
+                    "trainable",
                 )
                 Default: ("output_size", "num_params")
                 If input_data / input_size are not provided, only "num_params" is used.

Original file line number	Diff line number	Diff line change
`@@ -12,6 +12,7 @@`
`12`	`12`	`ColumnSettings.OUTPUT_SIZE: "Output Shape",`
`13`	`13`	`ColumnSettings.NUM_PARAMS: "Param #",`
`14`	`14`	`ColumnSettings.MULT_ADDS: "Mult-Adds",`
	`15`	`+ ColumnSettings.TRAINABLE: "Trainable",`
`15`	`16`	`}`
`16`	`17`
`17`	`18`
`@@ -113,6 +114,7 @@ def layer_info_to_row(`
`113`	`114`	`ColumnSettings.MULT_ADDS: layer_info.macs_to_str(`
`114`	`115`	`reached_max_depth, children_layers`
`115`	`116`	`),`
	`117`	`+ ColumnSettings.TRAINABLE: self.str_(layer_info.trainable),`
`116`	`118`	`}`
`117`	`119`	`start_str = self.get_start_str(layer_info.depth)`
`118`	`120`	`layer_name = layer_info.get_layer_name(self.show_var_name, self.show_depth)`