diff --git a/torch/_decomp/__init__.py b/torch/_decomp/__init__.py
index 37b50a2efddf62..83012f475a5b8d 100644
--- a/torch/_decomp/__init__.py
+++ b/torch/_decomp/__init__.py
@@ -87,8 +87,7 @@ def _add_op_to_registry(registry, op, fn):
         overloads.append(op)
     else:
         assert isinstance(op, OpOverloadPacket)
-        for ol in op.overloads():
-            overloads.append(getattr(op, ol))
+        overloads.extend(getattr(op, ol) for ol in op.overloads())
 
     for op_overload in overloads:
         if op_overload in registry:
diff --git a/torch/_dynamo/bytecode_transformation.py b/torch/_dynamo/bytecode_transformation.py
index 16de6ef0ce3e81..eb9610891f2905 100644
--- a/torch/_dynamo/bytecode_transformation.py
+++ b/torch/_dynamo/bytecode_transformation.py
@@ -1602,10 +1602,7 @@ def template():
                     new_insts.append(inst)
             insts = new_insts
 
-        returns = []
-        for inst in insts:
-            if inst.opname == "RETURN_VALUE":
-                returns.append(inst)
+        returns = [inst for inst in insts if inst.opname == "RETURN_VALUE"]
 
         if len(returns) == 1 and returns[0] is insts[-1]:
             # only 1 return at the end - just pop it
diff --git a/torch/_dynamo/graph_region_tracker.py b/torch/_dynamo/graph_region_tracker.py
index 9875e448c995b2..585cf450bdc4fb 100644
--- a/torch/_dynamo/graph_region_tracker.py
+++ b/torch/_dynamo/graph_region_tracker.py
@@ -40,10 +40,8 @@ def _extract_tensor_metadata_for_node_hash(
 ) -> tuple[Callable[[T], T], tuple[Any, ...]]:
     from torch._inductor.codecache import _ident, extract_tensor_metadata_for_cache_key
 
-    out = []
     metadata = extract_tensor_metadata_for_cache_key(x)
-    for field in fields(metadata):
-        out.append(getattr(metadata, field.name))
+    out = [getattr(metadata, field.name) for field in fields(metadata)]
 
     return (_ident, tuple(out))
 
diff --git a/torch/_dynamo/polyfills/__init__.py b/torch/_dynamo/polyfills/__init__.py
index 3ef4d94a1385c7..7765bb4378a935 100644
--- a/torch/_dynamo/polyfills/__init__.py
+++ b/torch/_dynamo/polyfills/__init__.py
@@ -188,9 +188,7 @@ def foreach_map_fn(*args):
     if not at_least_one_list:
         return op(*args[1:])
 
-    out = []
-    for unpacked in zip(*new_args):
-        out.append(op(*unpacked))
+    out = [op(*unpacked) for unpacked in zip(*new_args)]
 
     return out
 
diff --git a/torch/_higher_order_ops/scan.py b/torch/_higher_order_ops/scan.py
index 7eaad027638516..d35c952408e61b 100644
--- a/torch/_higher_order_ops/scan.py
+++ b/torch/_higher_order_ops/scan.py
@@ -135,9 +135,7 @@ def add(x: torch.Tensor, y: torch.Tensor):
     dim = utils.canonicalize_dim(ndim, dim)
 
     # Move scan dim to 0 and always perform scan on dim 0
-    leaves_xs = []
-    for elem in leaves_xs_orig:
-        leaves_xs.append(torch.movedim(elem, dim, 0))
+    leaves_xs = [torch.movedim(elem, dim, 0) for elem in leaves_xs_orig]
 
     out = combine_fn(
         pytree.tree_unflatten(leaves_init, spec_init),
diff --git a/torch/_higher_order_ops/triton_kernel_wrap.py b/torch/_higher_order_ops/triton_kernel_wrap.py
index 185f2f5a13091b..bff661a52b9722 100644
--- a/torch/_higher_order_ops/triton_kernel_wrap.py
+++ b/torch/_higher_order_ops/triton_kernel_wrap.py
@@ -1422,10 +1422,11 @@ def call_triton_kernel(
             return self.call_triton_kernel(new_var, args, kwargs, tx)
 
         if isinstance(variable.kernel, Autotuner):
-            special_param_names = []
-            for name in SPECIAL_CONFIG_NAMES:
-                if name in variable.kernel.fn.arg_names:
-                    special_param_names.append(name)
+            special_param_names = [
+                name
+                for name in SPECIAL_CONFIG_NAMES
+                if name in variable.kernel.fn.arg_names
+            ]
 
             if special_param_names:
                 # If the Triton kernel has SPECIAL_CONFIG_NAMES in parameters, those should
diff --git a/torch/_inductor/codegen/triton_utils.py b/torch/_inductor/codegen/triton_utils.py
index 193080d360c0d4..dae378d1f7a37a 100644
--- a/torch/_inductor/codegen/triton_utils.py
+++ b/torch/_inductor/codegen/triton_utils.py
@@ -97,10 +97,7 @@ def signature_of(arg: KernelArgType, *, size_dtype: Optional[str]) -> str:
 
 
 def non_constexpr_signature(signature):
-    new_signature = []
-    for arg in signature:
-        if not isinstance(arg, ConstexprArg):
-            new_signature.append(arg)
+    new_signature = [arg for arg in signature if not isinstance(arg, ConstexprArg)]
 
     return new_signature
 
diff --git a/torch/_inductor/codegen/wrapper.py b/torch/_inductor/codegen/wrapper.py
index 0822ddd191373c..2f38a56eb2825d 100644
--- a/torch/_inductor/codegen/wrapper.py
+++ b/torch/_inductor/codegen/wrapper.py
@@ -1741,10 +1741,12 @@ def add_arg(idx, arg, is_constexpr=False, equals_1=False, equals_none=False):
         # Distinguish between different functions using function id
         cache_key: list[Any] = [id(kernel.fn)]
         if len(configs) > 0:
-            for arg in kwargs.values():
-                # We need to key on non tensor arg only in autotune mode
-                if not isinstance(arg, (ir.Buffer, ir.ReinterpretView)):
-                    cache_key.append(arg)
+            # We need to key on non tensor arg only in autotune mode
+            cache_key.extend(
+                arg
+                for arg in kwargs.values()
+                if not isinstance(arg, (ir.Buffer, ir.ReinterpretView))
+            )
         cache_key.append(str(triton_meta))
         cache_key = tuple(cache_key)
 
diff --git a/torch/_inductor/ir.py b/torch/_inductor/ir.py
index 34231f0a7ed6a6..f5530754c93e7e 100644
--- a/torch/_inductor/ir.py
+++ b/torch/_inductor/ir.py
@@ -5732,8 +5732,9 @@ def get_kernel_and_metadata(self):  # type: ignore[no-untyped-def]
                 restore_value_args.extend(kernel.restore_value)
 
             if hasattr(kernel, "reset_idx"):
-                for i in kernel.reset_idx:
-                    reset_to_zero_args.append(kernel.fn.arg_names[i])
+                reset_to_zero_args.extend(
+                    kernel.fn.arg_names[i] for i in kernel.reset_idx
+                )
             else:
                 assert hasattr(kernel, "reset_to_zero")
                 reset_to_zero_args.extend(kernel.reset_to_zero)
diff --git a/torch/_inductor/kernel/flex_attention.py b/torch/_inductor/kernel/flex_attention.py
index b8d6065bc12014..ceeef5c1ad7560 100644
--- a/torch/_inductor/kernel/flex_attention.py
+++ b/torch/_inductor/kernel/flex_attention.py
@@ -897,7 +897,6 @@ def lower_cpu(
             "torch.compile on current platform is not supported for CPU."
         )
 
-    fake_buffers: list[Buffer] = []  # noqa: F821
     placeholder_inps = [
         create_placeholder(name, dtype, query.get_device())
         for name, dtype in [
@@ -937,9 +936,7 @@ def lower_cpu(
         + mask_graph_placeholder_inps
         + list(mask_mod_other_buffers)
     )
-    for item in buffer_list:
-        if isinstance(item, TensorBox):
-            fake_buffers.append(item.data.data)  # type: ignore[attr-defined]
+    fake_buffers: list[Buffer] = [item.data.data for item in buffer_list if isinstance(item, TensorBox)]  # type: ignore[attr-defined]
 
     (
         query,
diff --git a/torch/_inductor/select_algorithm.py b/torch/_inductor/select_algorithm.py
index 01e95756fc2eee..9c2f7442d7e455 100644
--- a/torch/_inductor/select_algorithm.py
+++ b/torch/_inductor/select_algorithm.py
@@ -630,8 +630,10 @@ def modification(
                 ), f"Expected the subgraph to be a ComputedBuffer or a List[ComputedBuffer], got {type(subgraph)}"
                 # Handle scatter stores
                 if isinstance(subgraph, list):
-                    for scatter_graph in subgraph:
-                        scatters.append(self._handle_scatter_graph(scatter_graph))
+                    scatters.extend(
+                        self._handle_scatter_graph(scatter_graph)
+                        for scatter_graph in subgraph
+                    )
                 elif isinstance(subgraph.data, ir.InputBuffer):
                     out = subgraph.data.make_loader()(())
                 else:
diff --git a/torch/_inductor/utils.py b/torch/_inductor/utils.py
index bca692af9ad8c1..b92eec689d9331 100644
--- a/torch/_inductor/utils.py
+++ b/torch/_inductor/utils.py
@@ -606,10 +606,11 @@ def get_kernel_metadata(node_schedule, wrapper):
     # print the aot_autograd graph fragment
     if single_graph is not None:
         detailed_metadata.append(f"{wrapper.comment} Graph fragment:")
-        for n in inductor_nodes:
-            # TODO(future): maybe refactor torch/fx/graph.py to make it easy to
-            # generate python code for graph fragments
-            detailed_metadata.append(f"{wrapper.comment}   {n.format_node()}")
+        # TODO(future): maybe refactor torch/fx/graph.py to make it easy to
+        # generate python code for graph fragments
+        detailed_metadata.extend(
+            f"{wrapper.comment}   {n.format_node()}" for n in inductor_nodes
+        )
 
     return metadata, "\n".join(detailed_metadata)
 
diff --git a/torch/ao/quantization/backend_config/_common_operator_config_utils.py b/torch/ao/quantization/backend_config/_common_operator_config_utils.py
index 60f2fe86b12e41..eeb65bf338818e 100644
--- a/torch/ao/quantization/backend_config/_common_operator_config_utils.py
+++ b/torch/ao/quantization/backend_config/_common_operator_config_utils.py
@@ -714,14 +714,14 @@ def _get_bn_configs(dtype_configs: list[DTypeConfig]) -> list[BackendPatternConf
         )
 
     # fused bn configs
-    for fused_bn in bn_to_fused_bn.values():
-        bn_configs.append(
-            BackendPatternConfig(fused_bn)
-            .set_observation_type(
-                ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT
-            )  # noqa: E131
-            .set_dtype_configs(dtype_configs)
-        )
+    bn_configs.extend(
+        BackendPatternConfig(fused_bn)
+        .set_observation_type(
+            ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT
+        )  # noqa: E131
+        .set_dtype_configs(dtype_configs)
+        for fused_bn in bn_to_fused_bn.values()
+    )
     return bn_configs
 
 
diff --git a/torch/distributed/algorithms/model_averaging/utils.py b/torch/distributed/algorithms/model_averaging/utils.py
index 0438043a6e7401..7efdfe5ec16cfa 100644
--- a/torch/distributed/algorithms/model_averaging/utils.py
+++ b/torch/distributed/algorithms/model_averaging/utils.py
@@ -70,9 +70,11 @@ def get_params_to_average(
                 filtered_params.append(param_data)
         elif isinstance(param, dict):
             # optimizer.param_groups input
-            for param_data in param["params"]:
-                if param_data.grad is not None:
-                    filtered_params.append(param_data)
+            filtered_params.extend(
+                param_data
+                for param_data in param["params"]
+                if param_data.grad is not None
+            )
         else:
             raise NotImplementedError(
                 f"Parameter input of type {type(param)} is not supported"
diff --git a/torch/distributed/tensor/_ops/_einsum_strategy.py b/torch/distributed/tensor/_ops/_einsum_strategy.py
index 0db79ed2f7002c..4d258487b26787 100644
--- a/torch/distributed/tensor/_ops/_einsum_strategy.py
+++ b/torch/distributed/tensor/_ops/_einsum_strategy.py
@@ -150,8 +150,7 @@ def gen_einsum_strategies(
         # linearity strategy
         if linearity:
             linearity_placement_list: list[Placement] = [Partial()]
-            for input_dim in input_dims:
-                linearity_placement_list.append(Partial())
+            linearity_placement_list.extend(Partial() for input_dim in input_dims)
             mesh_dim_strategies.append(linearity_placement_list)
 
         all_mesh_dim_strategies.append(mesh_dim_strategies)
diff --git a/torch/jit/supported_ops.py b/torch/jit/supported_ops.py
index 791a11a9b3aa7d..3c3c85dd72556b 100644
--- a/torch/jit/supported_ops.py
+++ b/torch/jit/supported_ops.py
@@ -72,9 +72,11 @@ def is_tensor_method(schema):
     for elem in dir(torch.Tensor):
         if not _hidden(elem):
             schemas = torch._C._jit_get_schemas_for_operator("aten::" + elem)
-            for schema in schemas:
-                if is_tensor_method(schema):
-                    methods.append(_emit_schema("Tensor", elem, schema, arg_start=1))
+            methods.extend(
+                _emit_schema("Tensor", elem, schema, arg_start=1)
+                for schema in schemas
+                if is_tensor_method(schema)
+            )
 
     return "Supported Tensor Methods", methods
 
@@ -115,10 +117,12 @@ def _get_nn_functional_ops():
             builtin = _find_builtin(getattr(mod, elem))
             if builtin is not None:
                 schemas = torch._C._jit_get_schemas_for_operator(builtin)
-                for schema in schemas:
-                    # remove _tan but not __and__
-                    if not _hidden(elem):
-                        functions.append(_emit_schema(name, elem, schema))
+                # remove _tan but not __and__
+                functions.extend(
+                    _emit_schema(name, elem, schema)
+                    for schema in schemas
+                    if not _hidden(elem)
+                )
     return "Supported PyTorch Functions", functions
 
 
@@ -164,8 +168,9 @@ def _get_torchscript_builtins():
         builtin = _find_builtin(fn)
         if builtin is not None:
             schemas = torch._C._jit_get_schemas_for_operator(builtin)
-            for schema in schemas:
-                functions.append(_emit_schema(mod.__name__, fn.__name__, schema))
+            functions.extend(
+                _emit_schema(mod.__name__, fn.__name__, schema) for schema in schemas
+            )
 
     return "TorchScript Builtin Functions", functions
 
@@ -271,8 +276,7 @@ def _get_global_builtins():
         if fn in op_renames:
             op_name = op_renames[fn]
         schemas = torch._C._jit_get_schemas_for_operator(op_name)
-        for s in schemas:
-            schematized_ops.append(_emit_schema(None, fn, s, padding=0))
+        schematized_ops.extend(_emit_schema(None, fn, s, padding=0) for s in schemas)
         if len(schemas) > 0:
             schematized_ops.append("")
         else:
diff --git a/torch/nn/parallel/comm.py b/torch/nn/parallel/comm.py
index 42b3dbd908d644..0b948f1e4cd469 100644
--- a/torch/nn/parallel/comm.py
+++ b/torch/nn/parallel/comm.py
@@ -156,11 +156,10 @@ def reduce_add_coalesced(inputs, destination=None, buffer_size=10485760):
             _flatten_dense_tensors(chunk) for chunk in chunks
         ]  # (num_gpus,)
         flat_result = reduce_add(flat_tensors, destination)
-        for t in _unflatten_dense_tensors(flat_result, chunks[0]):
-            # The unflattened tensors do not share storage, and we don't expose
-            # base flat tensor anyways, so give them different version counters.
-            # See NOTE [ Version Counter in comm.*_coalesced ]
-            output.append(t.data)
+        # The unflattened tensors do not share storage, and we don't expose
+        # base flat tensor anyways, so give them different version counters.
+        # See NOTE [ Version Counter in comm.*_coalesced ]
+        output.extend(t.data for t in _unflatten_dense_tensors(flat_result, chunks[0]))
     return tuple(_reorder_tensors_as(output, ref_order))
 
 
diff --git a/torch/onnx/_internal/fx/fx_onnx_interpreter.py b/torch/onnx/_internal/fx/fx_onnx_interpreter.py
index 1a1cbc9ae922f8..60323e9949ad00 100644
--- a/torch/onnx/_internal/fx/fx_onnx_interpreter.py
+++ b/torch/onnx/_internal/fx/fx_onnx_interpreter.py
@@ -185,11 +185,10 @@ def _retrieve_or_adapt_input_to_graph_set(
             onnxscript_graph_building.TorchScriptTensor
             | None
             | tuple[onnxscript_graph_building.TorchScriptTensor, ...]
-        ] = []
-        for tensor in onnx_tensor:
-            sequence_elements.append(
-                fx_name_to_onnxscript_value[tensor.name] if tensor is not None else None  # type: ignore[index, union-attr]
-            )
+        ] = [
+            fx_name_to_onnxscript_value[tensor.name] if tensor is not None else None  # type: ignore[index, union-attr]
+            for tensor in onnx_tensor
+        ]
         return sequence_elements
     if isinstance(onnx_tensor, torch.dtype):
         onnx_tensor = int(  # type: ignore[call-overload]
diff --git a/torch/testing/_internal/distributed/distributed_test.py b/torch/testing/_internal/distributed/distributed_test.py
index 6dc5bd4d5c6194..773ac2bd91a3f4 100644
--- a/torch/testing/_internal/distributed/distributed_test.py
+++ b/torch/testing/_internal/distributed/distributed_test.py
@@ -7380,17 +7380,14 @@ def forward(self, x, rank):
                     )
                 )
 
-            throw_on_early_term_tests = []
-            for test_input in models_to_test:
-                throw_on_early_term_tests.append(
-                    DDPUnevenTestInput(
-                        name=test_input.name,
-                        model=test_input.model,
-                        inp=test_input.inp,
-                        sync_interval=test_input.sync_interval,
-                        throw_on_early_termination=True,
-                    )
-                )
+            throw_on_early_term_tests = [
+                DDPUnevenTestInput(
+                    name=test_input.name,
+                    model=test_input.model,
+                    inp=test_input.inp,
+                    sync_interval=test_input.sync_interval,
+                    throw_on_early_termination=True,
+                ) for test_input in models_to_test]
 
             models_to_test.extend(models_with_sync)
             models_to_test.extend(throw_on_early_term_tests)
diff --git a/torch/testing/_internal/distributed/rpc/dist_autograd_test.py b/torch/testing/_internal/distributed/rpc/dist_autograd_test.py
index e113b87ccebda8..0a833aa29f6a90 100644
--- a/torch/testing/_internal/distributed/rpc/dist_autograd_test.py
+++ b/torch/testing/_internal/distributed/rpc/dist_autograd_test.py
@@ -2765,9 +2765,7 @@ def test_gradients_synchronizations(self):
 
                 dist_autograd.backward(context_id, [x.sum()])
 
-                futs = []
-                for remote_layer in remote_layers:
-                    futs.append(remote_layer.rpc_async().gradients(context_id))
+                futs = [remote_layer.rpc_async().gradients(context_id) for remote_layer in remote_layers]
 
                 for i in range(len(futs)):
                     local_gradients = [p.grad for p in local_layers[i].parameters()]
diff --git a/torchgen/dest/register_dispatch_key.py b/torchgen/dest/register_dispatch_key.py
index 015537df12e05a..d5e1009219a8a8 100644
--- a/torchgen/dest/register_dispatch_key.py
+++ b/torchgen/dest/register_dispatch_key.py
@@ -955,10 +955,10 @@ def generate_defn(cpp_sig: CppSignature) -> str:
             # Go over each output, and check if there is a proxy created for it.
             # If so, copy it over to the original output.
             if k is SchemaKind.out or k is SchemaKind.inplace:
-                for i in range(len(f.func.returns)):
-                    sig_body.append(
-                        f"if (op.proxy_outputs_[{i}].has_value()) op.outputs_[{i}].get().copy_(*op.proxy_outputs_[{i}]);"
-                    )
+                sig_body.extend(
+                    f"if (op.proxy_outputs_[{i}].has_value()) op.outputs_[{i}].get().copy_(*op.proxy_outputs_[{i}]);"
+                    for i in range(len(f.func.returns))
+                )
 
             # Destructively return the final tensors
             # TODO: Do this in translate instead