diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
index 9632ca9a6..3e3ff95dc 100644
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yml
@@ -43,7 +43,13 @@ jobs:
       working-directory: ./python
       run: |
         poetry build
-        poetry install
+        poetry install --with dev
+    - name: Code Style
+      working-directory: ./python
+      run: |
+        poetry run python -m black --check graphframes
+        poetry run python -m flake8 graphframes
+        poetry run python -m isort --check graphframes
     - name: Test
       working-directory: ./python
       run: |
diff --git a/python/graphframes/__init__.py b/python/graphframes/__init__.py
index 03f1e4943..bded262bc 100644
--- a/python/graphframes/__init__.py
+++ b/python/graphframes/__init__.py
@@ -1,4 +1,3 @@
-
 from .graphframe import GraphFrame
 
-__all__ = ['GraphFrame']
+__all__ = ["GraphFrame"]
diff --git a/python/graphframes/console.py b/python/graphframes/console.py
index d2b38d28b..dd9c4cd31 100644
--- a/python/graphframes/console.py
+++ b/python/graphframes/console.py
@@ -1,4 +1,5 @@
 import click
+
 from graphframes.tutorials import download
 
 
diff --git a/python/graphframes/examples/__init__.py b/python/graphframes/examples/__init__.py
index 8b92ef01f..2003b0191 100644
--- a/python/graphframes/examples/__init__.py
+++ b/python/graphframes/examples/__init__.py
@@ -1,5 +1,4 @@
-
 from .belief_propagation import BeliefPropagation
 from .graphs import Graphs
 
-__all__ = ['BeliefPropagation', 'Graphs']
+__all__ = ["BeliefPropagation", "Graphs"]
diff --git a/python/graphframes/examples/belief_propagation.py b/python/graphframes/examples/belief_propagation.py
index c013450d7..b1434e231 100644
--- a/python/graphframes/examples/belief_propagation.py
+++ b/python/graphframes/examples/belief_propagation.py
@@ -18,14 +18,17 @@
 import math
 from typing import Union
 
+from pyspark.sql import SparkSession
+from pyspark.sql import functions as sqlfunctions
+from pyspark.sql import types
+
 # Import subpackage examples here explicitly so that
 # this module can be run directly with spark-submit.
 import graphframes.examples
 from graphframes import GraphFrame
 from graphframes.lib import AggregateMessages as AM
-from pyspark.sql import SparkSession, functions as sqlfunctions, types
 
-__all__ = ['BeliefPropagation']
+__all__ = ["BeliefPropagation"]
 
 
 class BeliefPropagation:
@@ -61,7 +64,7 @@ class BeliefPropagation:
     * Coloring the graph by assigning a color to each vertex such that no neighboring vertices
       share the same color.
     * In each step of BP, update all vertices of a single color.  Alternate colors.
-     """
+    """  # noqa: W605
 
     @classmethod
     def runBPwithGraphFrames(cls, g: GraphFrame, numIter: int) -> GraphFrame:
@@ -71,12 +74,12 @@ def runBPwithGraphFrames(cls, g: GraphFrame, numIter: int) -> GraphFrame:
         """
         # choose colors for vertices for BP scheduling
         colorG = cls._colorGraph(g)
-        numColors = colorG.vertices.select('color').distinct().count()
+        numColors = colorG.vertices.select("color").distinct().count()
 
         # TODO: handle vertices without any edges
 
         # initialize vertex beliefs at 0.0
-        gx = GraphFrame(colorG.vertices.withColumn('belief', sqlfunctions.lit(0.0)), colorG.edges)
+        gx = GraphFrame(colorG.vertices.withColumn("belief", sqlfunctions.lit(0.0)), colorG.edges)
 
         # run BP for numIter iterations
         for iter_ in range(numIter):
@@ -85,37 +88,40 @@ def runBPwithGraphFrames(cls, g: GraphFrame, numIter: int) -> GraphFrame:
                 # Send messages to vertices of the current color.
                 # We may send to source or destination since edges are treated as undirected.
                 msgForSrc = sqlfunctions.when(
-                    AM.src['color'] == color,
-                    AM.edge['b'] * AM.dst['belief'])
+                    AM.src["color"] == color, AM.edge["b"] * AM.dst["belief"]
+                )
                 msgForDst = sqlfunctions.when(
-                    AM.dst['color'] == color,
-                    AM.edge['b'] * AM.src['belief'])
+                    AM.dst["color"] == color, AM.edge["b"] * AM.src["belief"]
+                )
                 # numerically stable sigmoid
                 logistic = sqlfunctions.udf(cls._sigmoid, returnType=types.DoubleType())
                 aggregates = gx.aggregateMessages(
                     sqlfunctions.sum(AM.msg).alias("aggMess"),
                     sendToSrc=msgForSrc,
-                    sendToDst=msgForDst)
+                    sendToDst=msgForDst,
+                )
                 v = gx.vertices
                 # receive messages and update beliefs for vertices of the current color
                 newBeliefCol = sqlfunctions.when(
-                    (v['color'] == color) & (aggregates['aggMess'].isNotNull()),
-                    logistic(aggregates['aggMess'] + v['a'])
-                ).otherwise(v['belief'])  # keep old beliefs for other colors
-                newVertices = (v
-                    .join(aggregates, on=(v['id'] == aggregates['id']), how='left_outer')
-                    .drop(aggregates['id'])  # drop duplicate ID column (from outer join)
-                    .withColumn('newBelief', newBeliefCol)  # compute new beliefs
-                    .drop('aggMess')  # drop messages
-                    .drop('belief')  # drop old beliefs
-                    .withColumnRenamed('newBelief', 'belief')
+                    (v["color"] == color) & (aggregates["aggMess"].isNotNull()),
+                    logistic(aggregates["aggMess"] + v["a"]),
+                ).otherwise(
+                    v["belief"]
+                )  # keep old beliefs for other colors
+                newVertices = (
+                    v.join(aggregates, on=(v["id"] == aggregates["id"]), how="left_outer")
+                    .drop(aggregates["id"])  # drop duplicate ID column (from outer join)
+                    .withColumn("newBelief", newBeliefCol)  # compute new beliefs
+                    .drop("aggMess")  # drop messages
+                    .drop("belief")  # drop old beliefs
+                    .withColumnRenamed("newBelief", "belief")
                 )
                 # cache new vertices using workaround for SPARK-1334
                 cachedNewVertices = AM.getCachedDataFrame(newVertices)
                 gx = GraphFrame(cachedNewVertices, gx.edges)
 
         # Drop the "color" column from vertices
-        return GraphFrame(gx.vertices.drop('color'), gx.edges)
+        return GraphFrame(gx.vertices.drop("color"), gx.edges)
 
     @staticmethod
     def _colorGraph(g: GraphFrame) -> GraphFrame:
@@ -132,7 +138,7 @@ def _colorGraph(g: GraphFrame) -> GraphFrame:
         """
 
         colorUDF = sqlfunctions.udf(lambda i, j: (i + j) % 2, returnType=types.IntegerType())
-        v = g.vertices.withColumn('color', colorUDF(sqlfunctions.col('i'), sqlfunctions.col('j')))
+        v = g.vertices.withColumn("color", colorUDF(sqlfunctions.col("i"), sqlfunctions.col("j")))
         return GraphFrame(v, g.edges)
 
     @staticmethod
@@ -164,12 +170,12 @@ def main() -> None:
     results = BeliefPropagation.runBPwithGraphFrames(g, numIter)
 
     # display beliefs
-    beliefs = results.vertices.select('id', 'belief')
+    beliefs = results.vertices.select("id", "belief")
     print("Done with BP. Final beliefs after {} iterations:".format(numIter))
     beliefs.show()
 
     spark.stop()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/python/graphframes/examples/graphs.py b/python/graphframes/examples/graphs.py
index 8db04aecc..0a3af2028 100644
--- a/python/graphframes/examples/graphs.py
+++ b/python/graphframes/examples/graphs.py
@@ -17,11 +17,12 @@
 
 import itertools
 
-from pyspark.sql import functions as sqlfunctions, SparkSession
+from pyspark.sql import SparkSession
+from pyspark.sql import functions as sqlfunctions
 
 from graphframes import GraphFrame
 
-__all__ = ['Graphs']
+__all__ = ["Graphs"]
 
 
 class Graphs:
@@ -37,24 +38,30 @@ def __init__(self, spark: SparkSession) -> None:
     def friends(self) -> GraphFrame:
         """A GraphFrame of friends in a (fake) social network."""
         # Vertex DataFrame
-        v = self._spark.createDataFrame([
-            ("a", "Alice", 34),
-            ("b", "Bob", 36),
-            ("c", "Charlie", 30),
-            ("d", "David", 29),
-            ("e", "Esther", 32),
-            ("f", "Fanny", 36)
-        ], ["id", "name", "age"])
+        v = self._spark.createDataFrame(
+            [
+                ("a", "Alice", 34),
+                ("b", "Bob", 36),
+                ("c", "Charlie", 30),
+                ("d", "David", 29),
+                ("e", "Esther", 32),
+                ("f", "Fanny", 36),
+            ],
+            ["id", "name", "age"],
+        )
         # Edge DataFrame
-        e = self._spark.createDataFrame([
-            ("a", "b", "friend"),
-            ("b", "c", "follow"),
-            ("c", "b", "follow"),
-            ("f", "c", "follow"),
-            ("e", "f", "follow"),
-            ("e", "d", "friend"),
-            ("d", "a", "friend")
-        ], ["src", "dst", "relationship"])
+        e = self._spark.createDataFrame(
+            [
+                ("a", "b", "friend"),
+                ("b", "c", "follow"),
+                ("c", "b", "follow"),
+                ("f", "c", "follow"),
+                ("e", "f", "follow"),
+                ("e", "d", "friend"),
+                ("d", "a", "friend"),
+            ],
+            ["src", "dst", "relationship"],
+        )
         # Create a GraphFrame
         return GraphFrame(v, e)
 
@@ -83,41 +90,44 @@ def gridIsingModel(self, n: int, vStd: float = 1.0, eStd: float = 1.0) -> GraphF
             and "b".  Edges are directed, but they should be treated as undirected in any algorithms
             run on this model. Vertex IDs are of the form "i,j".  E.g., vertex "1,3" is in the
             second row and fourth column of the grid.
-        """
+        """  # noqa: W605
         # check param n
         if n < 1:
             raise ValueError(
-                "Grid graph must have size >= 1, but was given invalid value n = {}"
-                .format(n))
+                "Grid graph must have size >= 1, but was given invalid value n = {}".format(n)
+            )
 
         # create coodinates grid
         coordinates = self._spark.createDataFrame(
-            itertools.product(range(n), range(n)),
-            schema=('i', 'j'))
+            itertools.product(range(n), range(n)), schema=("i", "j")
+        )
 
         # create SQL expression for converting coordinates (i,j) to a string ID "i,j"
         # avoid Cartesian join due to SPARK-15425: use generator since n should be small
-        toIDudf = sqlfunctions.udf(lambda i, j: '{},{}'.format(i,j))
+        toIDudf = sqlfunctions.udf(lambda i, j: "{},{}".format(i, j))
 
         # create the vertex DataFrame
         # create SQL expression for converting coordinates (i,j) to a string ID "i,j"
-        vIDcol = toIDudf(sqlfunctions.col('i'), sqlfunctions.col('j'))
+        vIDcol = toIDudf(sqlfunctions.col("i"), sqlfunctions.col("j"))
         # add random parameters generated from a normal distribution
         seed = 12345
-        vertices = (coordinates.withColumn('id', vIDcol)
-            .withColumn('a', sqlfunctions.randn(seed) * vStd))
+        vertices = coordinates.withColumn("id", vIDcol).withColumn(
+            "a", sqlfunctions.randn(seed) * vStd
+        )
 
         # create the edge DataFrame
         # create SQL expression for converting coordinates (i,j+1) and (i+1,j) to string IDs
-        rightIDcol = toIDudf(sqlfunctions.col('i'), sqlfunctions.col('j') + 1)
-        downIDcol = toIDudf(sqlfunctions.col('i') + 1, sqlfunctions.col('j'))
-        horizontalEdges = (coordinates.filter(sqlfunctions.col('j') != n - 1)
-            .select(vIDcol.alias('src'), rightIDcol.alias('dst')))
-        verticalEdges = (coordinates.filter(sqlfunctions.col('i') != n - 1)
-            .select(vIDcol.alias('src'), downIDcol.alias('dst')))
+        rightIDcol = toIDudf(sqlfunctions.col("i"), sqlfunctions.col("j") + 1)
+        downIDcol = toIDudf(sqlfunctions.col("i") + 1, sqlfunctions.col("j"))
+        horizontalEdges = coordinates.filter(sqlfunctions.col("j") != n - 1).select(
+            vIDcol.alias("src"), rightIDcol.alias("dst")
+        )
+        verticalEdges = coordinates.filter(sqlfunctions.col("i") != n - 1).select(
+            vIDcol.alias("src"), downIDcol.alias("dst")
+        )
         allEdges = horizontalEdges.unionAll(verticalEdges)
         # add random parameters from a normal distribution
-        edges = allEdges.withColumn('b', sqlfunctions.randn(seed + 1) * eStd)
+        edges = allEdges.withColumn("b", sqlfunctions.randn(seed + 1) * eStd)
 
         # create the GraphFrame
         g = GraphFrame(vertices, edges)
diff --git a/python/graphframes/graphframe.py b/python/graphframes/graphframe.py
index 5381ec8b5..1d177e1a2 100644
--- a/python/graphframes/graphframe.py
+++ b/python/graphframes/graphframe.py
@@ -16,18 +16,19 @@
 #
 
 import sys
-from typing import Any, Union, Optional
+from typing import Any, Optional, Union
 
-if sys.version > '3':
+if sys.version > "3":
     basestring = str
 
-from graphframes.lib import Pregel
 from pyspark import SparkContext
 from pyspark.sql import Column, DataFrame, SparkSession
 from pyspark.storagelevel import StorageLevel
 
+from graphframes.lib import Pregel
+
 
-def _from_java_gf(jgf: Any, spark: SparkSession) -> 'GraphFrame':
+def _from_java_gf(jgf: Any, spark: SparkSession) -> "GraphFrame":
     """
     (internal) creates a python GraphFrame wrapper from a java GraphFrame.
 
@@ -37,10 +38,15 @@ def _from_java_gf(jgf: Any, spark: SparkSession) -> 'GraphFrame':
     pe = DataFrame(jgf.edges(), spark)
     return GraphFrame(pv, pe)
 
+
 def _java_api(jsc: SparkContext) -> Any:
     javaClassName = "org.graphframes.GraphFramePythonAPI"
-    return jsc._jvm.Thread.currentThread().getContextClassLoader().loadClass(javaClassName) \
-            .newInstance()
+    return (
+        jsc._jvm.Thread.currentThread()
+        .getContextClassLoader()
+        .loadClass(javaClassName)
+        .newInstance()
+    )
 
 
 class GraphFrame:
@@ -76,16 +82,22 @@ def __init__(self, v: DataFrame, e: DataFrame) -> None:
         # Check that provided DataFrames contain required columns
         if self.ID not in v.columns:
             raise ValueError(
-                "Vertex ID column {} missing from vertex DataFrame, which has columns: {}"
-                .format(self.ID, ",".join(v.columns)))
+                "Vertex ID column {} missing from vertex DataFrame, which has columns: {}".format(
+                    self.ID, ",".join(v.columns)
+                )
+            )
         if self.SRC not in e.columns:
             raise ValueError(
-                "Source vertex ID column {} missing from edge DataFrame, which has columns: {}"
-                .format(self.SRC, ",".join(e.columns)))
+                "Source vertex ID column {} missing from edge DataFrame, which has columns: {}".format(  # noqa: E501
+                    self.SRC, ",".join(e.columns)
+                )
+            )
         if self.DST not in e.columns:
             raise ValueError(
-                "Destination vertex ID column {} missing from edge DataFrame, which has columns: {}"
-                .format(self.DST, ",".join(e.columns)))
+                "Destination vertex ID column {} missing from edge DataFrame, which has columns: {}".format(  # noqa: E501
+                    self.DST, ",".join(e.columns)
+                )
+            )
 
         self._jvm_graph = self._jvm_gf_api.createGraph(v._jdf, e._jdf)
 
@@ -109,8 +121,8 @@ def edges(self) -> DataFrame:
     def __repr__(self):
         return self._jvm_graph.toString()
 
-    def cache(self) -> 'GraphFrame':
-        """ Persist the dataframe representation of vertices and edges of the graph with the default
+    def cache(self) -> "GraphFrame":
+        """Persist the dataframe representation of vertices and edges of the graph with the default
         storage level.
         """
         self._jvm_graph.cache()
@@ -124,7 +136,7 @@ def persist(self, storageLevel: StorageLevel = StorageLevel.MEMORY_ONLY) -> "Gra
         self._jvm_graph.persist(javaStorageLevel)
         return self
 
-    def unpersist(self, blocking: bool = False) -> 'GraphFrame':
+    def unpersist(self, blocking: bool = False) -> "GraphFrame":
         """Mark the dataframe representation of vertices and edges of the graph as non-persistent,
         and remove all blocks for it from memory and disk.
         """
@@ -209,12 +221,12 @@ def find(self, pattern: str) -> DataFrame:
         jdf = self._jvm_graph.find(pattern)
         return DataFrame(jdf, self._spark)
 
-    def filterVertices(self, condition: Union[str, Column]) -> 'GraphFrame':
+    def filterVertices(self, condition: Union[str, Column]) -> "GraphFrame":
         """
         Filters the vertices based on expression, remove edges containing any dropped vertices.
-        
+
         :param condition: String or Column describing the condition expression for filtering.
-        :return: GraphFrame with filtered vertices and edges. 
+        :return: GraphFrame with filtered vertices and edges.
         """
 
         if isinstance(condition, basestring):
@@ -225,12 +237,12 @@ def filterVertices(self, condition: Union[str, Column]) -> 'GraphFrame':
             raise TypeError("condition should be string or Column")
         return _from_java_gf(jdf, self._spark)
 
-    def filterEdges(self, condition: Union[str, Column]) -> 'GraphFrame':
+    def filterEdges(self, condition: Union[str, Column]) -> "GraphFrame":
         """
         Filters the edges based on expression, keep all vertices.
-        
+
         :param condition: String or Column describing the condition expression for filtering.
-        :return: GraphFrame with filtered edges. 
+        :return: GraphFrame with filtered edges.
         """
         if isinstance(condition, basestring):
             jdf = self._jvm_graph.filterEdges(condition)
@@ -240,18 +252,18 @@ def filterEdges(self, condition: Union[str, Column]) -> 'GraphFrame':
             raise TypeError("condition should be string or Column")
         return _from_java_gf(jdf, self._spark)
 
-    def dropIsolatedVertices(self) -> 'GraphFrame':
+    def dropIsolatedVertices(self) -> "GraphFrame":
         """
         Drops isolated vertices, vertices are not contained in any edges.
 
-        :return: GraphFrame with filtered vertices. 
+        :return: GraphFrame with filtered vertices.
         """
         jdf = self._jvm_graph.dropIsolatedVertices()
         return _from_java_gf(jdf, self._spark)
 
-    def bfs(self, fromExpr: str, toExpr: str,
-            edgeFilter: Optional[str] = None,
-            maxPathLength: int = 10) -> DataFrame:
+    def bfs(
+        self, fromExpr: str, toExpr: str, edgeFilter: Optional[str] = None, maxPathLength: int = 10
+    ) -> DataFrame:
         """
         Breadth-first search (BFS).
 
@@ -259,18 +271,20 @@ def bfs(self, fromExpr: str, toExpr: str,
 
         :return: DataFrame with one Row for each shortest path between matching vertices.
         """
-        builder = self._jvm_graph.bfs()\
-            .fromExpr(fromExpr)\
-            .toExpr(toExpr)\
-            .maxPathLength(maxPathLength)
+        builder = (
+            self._jvm_graph.bfs().fromExpr(fromExpr).toExpr(toExpr).maxPathLength(maxPathLength)
+        )
         if edgeFilter is not None:
             builder.edgeFilter(edgeFilter)
         jdf = builder.run()
         return DataFrame(jdf, self._spark)
 
-    def aggregateMessages(self, aggCol: Union[Column, str],
-                         sendToSrc: Union[Column, str, None] = None,
-                         sendToDst: Union[Column, str, None] = None) -> DataFrame:
+    def aggregateMessages(
+        self,
+        aggCol: Union[Column, str],
+        sendToSrc: Union[Column, str, None] = None,
+        sendToDst: Union[Column, str, None] = None,
+    ) -> DataFrame:
         """
         Aggregates messages from the neighbours.
 
@@ -314,9 +328,12 @@ def aggregateMessages(self, aggCol: Union[Column, str],
 
     # Standard algorithms
 
-    def connectedComponents(self, algorithm: str = 'graphframes',
-                          checkpointInterval: int = 2,
-                          broadcastThreshold: int = 1000000) -> DataFrame:
+    def connectedComponents(
+        self,
+        algorithm: str = "graphframes",
+        checkpointInterval: int = 2,
+        broadcastThreshold: int = 1000000,
+    ) -> DataFrame:
         """
         Computes the connected components of the graph.
 
@@ -330,11 +347,13 @@ def connectedComponents(self, algorithm: str = 'graphframes',
 
         :return: DataFrame with new vertices column "component"
         """
-        jdf = self._jvm_graph.connectedComponents() \
-            .setAlgorithm(algorithm) \
-            .setCheckpointInterval(checkpointInterval) \
-            .setBroadcastThreshold(broadcastThreshold) \
+        jdf = (
+            self._jvm_graph.connectedComponents()
+            .setAlgorithm(algorithm)
+            .setCheckpointInterval(checkpointInterval)
+            .setBroadcastThreshold(broadcastThreshold)
             .run()
+        )
         return DataFrame(jdf, self._spark)
 
     def labelPropagation(self, maxIter: int) -> DataFrame:
@@ -349,10 +368,13 @@ def labelPropagation(self, maxIter: int) -> DataFrame:
         jdf = self._jvm_graph.labelPropagation().maxIter(maxIter).run()
         return DataFrame(jdf, self._spark)
 
-    def pageRank(self, resetProbability: float = 0.15,
-                 sourceId: Optional[Any] = None,
-                 maxIter: Optional[int] = None,
-                 tol: Optional[float] = None) -> 'GraphFrame':
+    def pageRank(
+        self,
+        resetProbability: float = 0.15,
+        sourceId: Optional[Any] = None,
+        maxIter: Optional[int] = None,
+        tol: Optional[float] = None,
+    ) -> "GraphFrame":
         """
         Runs the PageRank algorithm on the graph.
         Note: Exactly one of fixed_num_iter or tolerance must be set.
@@ -379,9 +401,12 @@ def pageRank(self, resetProbability: float = 0.15,
         jgf = builder.run()
         return _from_java_gf(jgf, self._spark)
 
-    def parallelPersonalizedPageRank(self, resetProbability: float = 0.15,
-                                   sourceIds: Optional[list[Any]] = None,
-                                   maxIter: Optional[int] = None) -> 'GraphFrame':
+    def parallelPersonalizedPageRank(
+        self,
+        resetProbability: float = 0.15,
+        sourceIds: Optional[list[Any]] = None,
+        maxIter: Optional[int] = None,
+    ) -> "GraphFrame":
         """
         Run the personalized PageRank algorithm on the graph,
         from the provided list of sources in parallel for a fixed number of iterations.
@@ -393,7 +418,9 @@ def parallelPersonalizedPageRank(self, resetProbability: float = 0.15,
         :param maxIter: the fixed number of iterations this algorithm runs
         :return:  GraphFrame with new vertices column "pageranks" and new edges column "weight"
         """
-        assert sourceIds is not None and len(sourceIds) > 0, "Source vertices Ids sourceIds must be provided"
+        assert (
+            sourceIds is not None and len(sourceIds) > 0
+        ), "Source vertices Ids sourceIds must be provided"
         assert maxIter is not None, "Max number of iterations maxIter must be provided"
         sourceIds = self._sc._jvm.PythonUtils.toArray(sourceIds)
         builder = self._jvm_graph.parallelPersonalizedPageRank()
@@ -427,10 +454,17 @@ def stronglyConnectedComponents(self, maxIter: int) -> DataFrame:
         jdf = self._jvm_graph.stronglyConnectedComponents().maxIter(maxIter).run()
         return DataFrame(jdf, self._spark)
 
-    def svdPlusPlus(self, rank: int = 10, maxIter: int = 2,
-                    minValue: float = 0.0, maxValue: float = 5.0,
-                    gamma1: float = 0.007, gamma2: float = 0.007,
-                    gamma6: float = 0.005, gamma7: float = 0.015) -> tuple[DataFrame, float]:
+    def svdPlusPlus(
+        self,
+        rank: int = 10,
+        maxIter: int = 2,
+        minValue: float = 0.0,
+        maxValue: float = 5.0,
+        gamma1: float = 0.007,
+        gamma2: float = 0.007,
+        gamma6: float = 0.005,
+        gamma7: float = 0.015,
+    ) -> tuple[DataFrame, float]:
         """
         Runs the SVD++ algorithm.
 
@@ -461,13 +495,16 @@ def triangleCount(self) -> DataFrame:
 
 def _test():
     import doctest
+
     import graphframe
+
     globs = graphframe.__dict__.copy()
-    globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
-    globs['spark'] = SparkSession(globs['sc']).builder.getOrCreate()
+    globs["sc"] = SparkContext("local[4]", "PythonTest", batchSize=2)
+    globs["spark"] = SparkSession(globs["sc"]).builder.getOrCreate()
     (failure_count, test_count) = doctest.testmod(
-        globs=globs, optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE)
-    globs['sc'].stop()
+        globs=globs, optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE
+    )
+    globs["sc"].stop()
     if failure_count:
         exit(-1)
 
diff --git a/python/graphframes/lib/__init__.py b/python/graphframes/lib/__init__.py
index 325e74543..076dd5232 100644
--- a/python/graphframes/lib/__init__.py
+++ b/python/graphframes/lib/__init__.py
@@ -1,5 +1,4 @@
-
 from .aggregate_messages import AggregateMessages
 from .pregel import Pregel
 
-__all__ = ['AggregateMessages', 'Pregel']
+__all__ = ["AggregateMessages", "Pregel"]
diff --git a/python/graphframes/lib/aggregate_messages.py b/python/graphframes/lib/aggregate_messages.py
index c0867dcd0..bb454b008 100644
--- a/python/graphframes/lib/aggregate_messages.py
+++ b/python/graphframes/lib/aggregate_messages.py
@@ -18,13 +18,18 @@
 from typing import Any
 
 from pyspark import SparkContext
-from pyspark.sql import DataFrame, functions as sqlfunctions, SparkSession, Column
+from pyspark.sql import Column, DataFrame, SparkSession
+from pyspark.sql import functions as sqlfunctions
 
 
 def _java_api(jsc: SparkContext) -> Any:
     javaClassName = "org.graphframes.GraphFramePythonAPI"
-    return jsc._jvm.Thread.currentThread().getContextClassLoader().loadClass(javaClassName) \
-            .newInstance()
+    return (
+        jsc._jvm.Thread.currentThread()
+        .getContextClassLoader()
+        .loadClass(javaClassName)
+        .newInstance()
+    )
 
 
 class _ClassProperty:
diff --git a/python/graphframes/lib/pregel.py b/python/graphframes/lib/pregel.py
index 72077c25c..0d9c5c25f 100644
--- a/python/graphframes/lib/pregel.py
+++ b/python/graphframes/lib/pregel.py
@@ -16,13 +16,17 @@
 #
 
 import sys
-from typing import Any
-if sys.version > '3':
+from typing import TYPE_CHECKING, Any
+
+if sys.version > "3":
     basestring = str
 
+from pyspark.ml.wrapper import JavaWrapper
 from pyspark.sql import DataFrame, SparkSession
 from pyspark.sql.functions import col
-from pyspark.ml.wrapper import JavaWrapper
+
+if TYPE_CHECKING:
+    from graphframes import GraphFrame
 
 
 class Pregel(JavaWrapper):
@@ -75,11 +79,11 @@ class Pregel(JavaWrapper):
     ...     .sendMsgToDst(Pregel.src("rank") / Pregel.src("outDegree")) \
     ...     .aggMsgs(sum(Pregel.msg())) \
     ...     .run()
-    """
+    """  # noqa: E501
 
     def __init__(self, graph: "GraphFrame") -> None:
         super(Pregel, self).__init__()
-        from graphframes import GraphFrame
+
         self.graph = graph
         self._java_obj = self._new_java_obj("org.graphframes.lib.Pregel", graph._jvm_graph)
 
@@ -102,7 +106,9 @@ def setCheckpointInterval(self, value: int) -> "Pregel":
         self._java_obj.setCheckpointInterval(int(value))
         return self
 
-    def withVertexColumn(self, colName: str, initialExpr: Any, updateAfterAggMsgsExpr: Any) -> "Pregel":
+    def withVertexColumn(
+        self, colName: str, initialExpr: Any, updateAfterAggMsgsExpr: Any
+    ) -> "Pregel":
         """
         Defines an additional vertex column at the start of run and how to update it in each iteration.
 
@@ -116,7 +122,7 @@ def withVertexColumn(self, colName: str, initialExpr: Any, updateAfterAggMsgsExp
                                        You can reference all original vertex columns, additional vertex columns, and the
                                        aggregated message column using :func:`msg`.
                                        If the vertex received no messages, the message column would be null.
-        """
+        """  # noqa: E501
         self._java_obj.withVertexColumn(colName, initialExpr._jc, updateAfterAggMsgsExpr._jc)
         return self
 
@@ -133,7 +139,7 @@ def sendMsgToSrc(self, msgExpr: Any) -> "Pregel":
                         and `edge`, respectively.
                         You can reference them using :func:`src`, :func:`dst`, and :func:`edge`.
                         Null messages are not included in message aggregation.
-        """
+        """  # noqa: E501
         self._java_obj.sendMsgToSrc(msgExpr._jc)
         return self
 
@@ -150,7 +156,7 @@ def sendMsgToDst(self, msgExpr: Any) -> "Pregel":
                         and `edge`, respectively.
                         You can reference them using :func:`src`, :func:`dst`, and :func:`edge`.
                         Null messages are not included in message aggregation.
-        """
+        """  # noqa: E501
         self._java_obj.sendMsgToDst(msgExpr._jc)
         return self
 
@@ -161,7 +167,7 @@ def aggMsgs(self, aggExpr: Any) -> "Pregel":
         :param aggExpr: the message aggregation expression, such as `sum(Pregel.msg())`.
                         You can reference the message column by :func:`msg` and the vertex ID by `col("id")`,
                         while the latter is usually not used.
-        """
+        """  # noqa: E501
         self._java_obj.aggMsgs(aggExpr._jc)
         return self
 
@@ -170,7 +176,7 @@ def run(self) -> DataFrame:
         Runs the defined Pregel algorithm.
 
         :return: the result vertex DataFrame from the final iteration including both original and additional columns.
-        """
+        """  # noqa: E501
         return DataFrame(self._java_obj.run(), SparkSession.getActiveSession())
 
     @staticmethod
@@ -179,7 +185,7 @@ def msg() -> Any:
         References the message column in aggregating messages and updating additional vertex columns.
 
         See :func:`aggMsgs` and :func:`withVertexColumn`
-        """
+        """  # noqa: E501
         return col("_pregel_msg_")
 
     @staticmethod
diff --git a/python/graphframes/tests.py b/python/graphframes/tests.py
index 9a7ad1371..dee8405e8 100644
--- a/python/graphframes/tests.py
+++ b/python/graphframes/tests.py
@@ -15,60 +15,63 @@
 # limitations under the License.
 #
 
+import re
+import shutil
 import sys
 import tempfile
-import shutil
-import re
 
 if sys.version_info[:2] <= (2, 6):
     try:
         import unittest2 as unittest
     except ImportError:
-        sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier')
+        sys.stderr.write("Please install unittest2 to test with Python 2.6 or earlier")
         sys.exit(1)
 else:
     import unittest
 
 from pyspark import SparkContext
-from pyspark.sql import functions as sqlfunctions, SparkSession
+from pyspark.sql import SparkSession
+from pyspark.sql import functions as sqlfunctions
 
-from .graphframe import GraphFrame, Pregel, _java_api, _from_java_gf
+from .examples import BeliefPropagation, Graphs
+from .graphframe import GraphFrame, Pregel, _from_java_gf, _java_api
 from .lib import AggregateMessages as AM
-from .examples import Graphs, BeliefPropagation
 
-class GraphFrameTestUtils(object):
 
+class GraphFrameTestUtils(object):
     @classmethod
     def parse_spark_version(cls, version_str):
-        """ take an input version string
-            return version items in a dictionary
+        """take an input version string
+        return version items in a dictionary
         """
-        _sc_ver_patt = r'(\d+)\.(\d+)(\.(\d+)(-(.+))?)?'
+        _sc_ver_patt = r"(\d+)\.(\d+)(\.(\d+)(-(.+))?)?"
         m = re.match(_sc_ver_patt, version_str)
         if not m:
-            raise TypeError("version {} shoud be in <major>.<minor>.<maintenance>".format(version_str))
+            raise TypeError(
+                "version {} shoud be in <major>.<minor>.<maintenance>".format(version_str)
+            )
         version_info = {}
         try:
-            version_info['major'] = int(m.group(1))
-        except:
+            version_info["major"] = int(m.group(1))
+        except:  # noqa: E722
             raise TypeError("invalid minor version")
         try:
-            version_info['minor'] = int(m.group(2))
-        except:
+            version_info["minor"] = int(m.group(2))
+        except:  # noqa: E722
             raise TypeError("invalid major version")
         try:
-            version_info['maintenance'] = int(m.group(4))
-        except:
-            version_info['maintenance'] = 0
+            version_info["maintenance"] = int(m.group(4))
+        except:  # noqa: E722
+            version_info["maintenance"] = 0
         try:
-            version_info['special'] = m.group(6)
-        except:
+            version_info["special"] = m.group(6)
+        except:  # noqa: E722
             pass
         return version_info
 
     @classmethod
     def createSparkContext(cls):
-        cls.sc = sc = SparkContext('local[4]', "GraphFramesTests")
+        cls.sc = sc = SparkContext("local[4]", "GraphFramesTests")
         cls.checkpointDir = tempfile.mkdtemp()
         cls.sc.setCheckpointDir(cls.checkpointDir)
         cls.spark_version = cls.parse_spark_version(sc.version)
@@ -81,10 +84,10 @@ def stopSparkContext(cls):
 
     @classmethod
     def spark_at_least_of_version(cls, version_str):
-        assert hasattr(cls, 'spark_version')
+        assert hasattr(cls, "spark_version")
         required_version = cls.parse_spark_version(version_str)
         spark_version = cls.spark_version
-        for _name in ['major', 'minor', 'maintenance']:
+        for _name in ["major", "minor", "maintenance"]:
             sc_ver = spark_version[_name]
             req_ver = required_version[_name]
             if sc_ver != req_ver:
@@ -92,19 +95,24 @@ def spark_at_least_of_version(cls, version_str):
         # All major.minor.maintenance equal
         return True
 
+
 def setUpModule():
     GraphFrameTestUtils.createSparkContext()
 
+
 def tearDownModule():
     GraphFrameTestUtils.stopSparkContext()
 
 
 class GraphFrameTestCase(unittest.TestCase):
-
     @classmethod
     def setUpClass(cls):
         # Small tests run much faster with spark.sql.shuffle.partitions = 4
-        cls.spark = SparkSession(GraphFrameTestUtils.sc).builder.config('spark.sql.shuffle.partitions', 4).getOrCreate()
+        cls.spark = (
+            SparkSession(GraphFrameTestUtils.sc)
+            .builder.config("spark.sql.shuffle.partitions", 4)
+            .getOrCreate()
+        )
 
     @classmethod
     def tearDownClass(cls):
@@ -136,14 +144,20 @@ def test_construction(self):
         assert sorted(vertexIDs) == [1, 2, 3]
         edgeActions = map(lambda x: x[0], g.edges.select("action").collect())
         assert sorted(edgeActions) == ["follow", "hate", "love"]
-        tripletsFirst = list(map(lambda x: (x[0][1], x[1][1], x[2][2]),
-                            g.triplets.sort("src.id").select("src", "dst", "edge").take(1)))
+        tripletsFirst = list(
+            map(
+                lambda x: (x[0][1], x[1][1], x[2][2]),
+                g.triplets.sort("src.id").select("src", "dst", "edge").take(1),
+            )
+        )
         assert tripletsFirst == [("A", "B", "love")], tripletsFirst
         # Try with invalid vertices and edges DataFrames
         v_invalid = self.spark.createDataFrame(
-            [(1, "A"), (2, "B"), (3, "C")], ["invalid_colname_1", "invalid_colname_2"])
+            [(1, "A"), (2, "B"), (3, "C")], ["invalid_colname_1", "invalid_colname_2"]
+        )
         e_invalid = self.spark.createDataFrame(
-            [(1, 2), (2, 3), (3, 1)], ["invalid_colname_3", "invalid_colname_4"])
+            [(1, 2), (2, 3), (3, 1)], ["invalid_colname_3", "invalid_colname_4"]
+        )
         with self.assertRaises(ValueError):
             GraphFrame(v_invalid, e_invalid)
 
@@ -223,14 +237,12 @@ def setUp(self):
         super(PregelTest, self).setUp()
 
     def test_page_rank(self):
-        from pyspark.sql.functions import coalesce, col, lit, sum, when
-        edges = self.spark.createDataFrame([[0, 1],
-                                          [1, 2],
-                                          [2, 4],
-                                          [2, 0],
-                                          [3, 4], # 3 has no in-links
-                                          [4, 0],
-                                          [4, 2]], ["src", "dst"])
+        from pyspark.sql.functions import coalesce, lit, sum
+
+        edges = self.spark.createDataFrame(
+            [[0, 1], [1, 2], [2, 4], [2, 0], [3, 4], [4, 0], [4, 2]],  # 3 has no in-links
+            ["src", "dst"],
+        )
         edges.cache()
         vertices = self.spark.createDataFrame([[0], [1], [2], [3], [4]], ["id"])
         numVertices = vertices.count()
@@ -238,19 +250,22 @@ def test_page_rank(self):
         vertices.cache()
         graph = GraphFrame(vertices, edges)
         alpha = 0.15
-        ranks = graph.pregel \
-            .setMaxIter(5) \
-            .withVertexColumn("rank", lit(1.0 / numVertices),
-                              coalesce(Pregel.msg(),
-                                       lit(0.0)) * lit(1.0 - alpha) + lit(alpha / numVertices)) \
-            .sendMsgToDst(Pregel.src("rank") / Pregel.src("outDegree")) \
-            .aggMsgs(sum(Pregel.msg())) \
+        ranks = (
+            graph.pregel.setMaxIter(5)
+            .withVertexColumn(
+                "rank",
+                lit(1.0 / numVertices),
+                coalesce(Pregel.msg(), lit(0.0)) * lit(1.0 - alpha) + lit(alpha / numVertices),
+            )
+            .sendMsgToDst(Pregel.src("rank") / Pregel.src("outDegree"))
+            .aggMsgs(sum(Pregel.msg()))
             .run()
+        )
         resultRows = ranks.sort(ranks.id).collect()
         result = map(lambda x: x.rank, resultRows)
         expected = [0.245, 0.224, 0.303, 0.03, 0.197]
         for a, b in zip(result, expected):
-            self.assertAlmostEqual(a, b, delta = 1e-3)
+            self.assertAlmostEqual(a, b, delta=1e-3)
 
 
 class GraphFrameLibTest(GraphFrameTestCase):
@@ -258,11 +273,11 @@ def setUp(self):
         super(GraphFrameLibTest, self).setUp()
         self.japi = _java_api(self.spark._sc)
 
-    def _hasCols(self, graph, vcols = [], ecols = []):
+    def _hasCols(self, graph, vcols=[], ecols=[]):
         map(lambda c: self.assertIn(c, graph.vertices.columns), vcols)
         map(lambda c: self.assertIn(c, graph.edges.columns), ecols)
 
-    def _df_hasCols(self, vertices, vcols = []):
+    def _df_hasCols(self, vertices, vcols=[]):
         map(lambda c: self.assertIn(c, vertices.columns), vcols)
 
     def _graph(self, name, *args):
@@ -272,7 +287,7 @@ def _graph(self, name, *args):
         :param name: the name of the example graph
         :param args: all the required arguments, without the initial spark session
         :return:
-        """
+        """  # noqa: E501
         examples = self.japi.examples()
         jgraph = getattr(examples, name)(*args)
         return _from_java_gf(jgraph, self.spark)
@@ -281,30 +296,27 @@ def test_aggregate_messages(self):
         g = self._graph("friends")
         # For each user, sum the ages of the adjacent users,
         # plus 1 for the src's sum if the edge is "friend".
-        sendToSrc = (
-            AM.dst['age'] +
-            sqlfunctions.when(
-                AM.edge['relationship'] == 'friend',
-                sqlfunctions.lit(1)
-            ).otherwise(0))
-        sendToDst = AM.src['age']
+        sendToSrc = AM.dst["age"] + sqlfunctions.when(
+            AM.edge["relationship"] == "friend", sqlfunctions.lit(1)
+        ).otherwise(0)
+        sendToDst = AM.src["age"]
         agg = g.aggregateMessages(
-            sqlfunctions.sum(AM.msg).alias('summedAges'),
-            sendToSrc=sendToSrc,
-            sendToDst=sendToDst)
+            sqlfunctions.sum(AM.msg).alias("summedAges"), sendToSrc=sendToSrc, sendToDst=sendToDst
+        )
         # Run the aggregation again providing SQL expressions as String instead.
         agg2 = g.aggregateMessages(
             "sum(MSG) AS `summedAges`",
-            sendToSrc="(dst['age'] + CASE WHEN (edge['relationship'] = 'friend') THEN 1 ELSE 0 END)",
-            sendToDst="src['age']")
+            sendToSrc="(dst['age'] + CASE WHEN (edge['relationship'] = 'friend') THEN 1 ELSE 0 END)",  # noqa: E501
+            sendToDst="src['age']",
+        )
         # Convert agg and agg2 to a mapping from id to the aggregated message.
-        aggMap = {id_: s for id_, s in agg.select('id', 'summedAges').collect()}
-        agg2Map = {id_: s for id_, s in agg2.select('id', 'summedAges').collect()}
+        aggMap = {id_: s for id_, s in agg.select("id", "summedAges").collect()}
+        agg2Map = {id_: s for id_, s in agg2.select("id", "summedAges").collect()}
         # Compute the truth via brute force.
-        user2age = {id_: age for id_, age in g.vertices.select('id', 'age').collect()}
+        user2age = {id_: age for id_, age in g.vertices.select("id", "age").collect()}
         trueAgg = {}
         for src, dst, rel in g.edges.select("src", "dst", "relationship").collect():
-            trueAgg[src] = trueAgg.get(src, 0) + user2age[dst] + (1 if rel == 'friend' else 0)
+            trueAgg[src] = trueAgg.get(src, 0) + user2age[dst] + (1 if rel == "friend" else 0)
             trueAgg[dst] = trueAgg.get(dst, 0) + user2age[src]
         # Compare if the agg mappings match the brute force mapping
         self.assertEqual(aggMap, trueAgg)
@@ -312,22 +324,19 @@ def test_aggregate_messages(self):
         # Check that TypeError is raises with messages of wrong type
         with self.assertRaises(TypeError):
             g.aggregateMessages(
-                "sum(MSG) AS `summedAges`",
-                sendToSrc=object(),
-                sendToDst="src['age']")
+                "sum(MSG) AS `summedAges`", sendToSrc=object(), sendToDst="src['age']"
+            )
         with self.assertRaises(TypeError):
             g.aggregateMessages(
-                "sum(MSG) AS `summedAges`",
-                sendToSrc=dst['age'],
-                sendToDst=object())
+                "sum(MSG) AS `summedAges`", sendToSrc=dst["age"], sendToDst=object()
+            )
 
     def test_connected_components(self):
-        v = self.spark.createDataFrame([
-        (0, "a", "b")], ["id", "vattr", "gender"])
+        v = self.spark.createDataFrame([(0, "a", "b")], ["id", "vattr", "gender"])
         e = self.spark.createDataFrame([(0, 0, 1)], ["src", "dst", "test"]).filter("src > 10")
         g = GraphFrame(v, e)
         comps = g.connectedComponents()
-        self._df_hasCols(comps, vcols=['id', 'component', 'vattr', 'gender'])
+        self._df_hasCols(comps, vcols=["id", "component", "vattr", "gender"])
         self.assertEqual(comps.count(), 1)
 
     def test_connected_components2(self):
@@ -335,7 +344,7 @@ def test_connected_components2(self):
         e = self.spark.createDataFrame([(0, 1, "a01", "b01")], ["src", "dst", "A", "B"])
         g = GraphFrame(v, e)
         comps = g.connectedComponents()
-        self._df_hasCols(comps, vcols=['id', 'component', 'A', 'B'])
+        self._df_hasCols(comps, vcols=["id", "component", "A", "B"])
         self.assertEqual(comps.count(), 2)
 
     def test_connected_components_friends(self):
@@ -367,7 +376,7 @@ def test_page_rank(self):
         resetProb = 0.15
         errorTol = 1.0e-5
         pr = g.pageRank(resetProb, tol=errorTol)
-        self._hasCols(pr, vcols=['id', 'pagerank'], ecols=['src', 'dst', 'weight'])
+        self._hasCols(pr, vcols=["id", "pagerank"], ecols=["src", "dst", "weight"])
 
     def test_parallel_personalized_page_rank(self):
         n = 100
@@ -376,7 +385,7 @@ def test_parallel_personalized_page_rank(self):
         maxIter = 15
         sourceIds = [1, 2, 3, 4]
         pr = g.parallelPersonalizedPageRank(resetProb, sourceIds=sourceIds, maxIter=maxIter)
-        self._hasCols(pr, vcols=['id', 'pageranks'], ecols=['src', 'dst', 'weight'])
+        self._hasCols(pr, vcols=["id", "pageranks"], ecols=["src", "dst", "weight"])
 
     def test_shortest_paths(self):
         edges = [(1, 2), (1, 5), (2, 3), (2, 5), (3, 4), (4, 5), (4, 6)]
@@ -391,7 +400,7 @@ def test_shortest_paths(self):
     def test_svd_plus_plus(self):
         g = self._graph("ALSSyntheticData")
         (v2, cost) = g.svdPlusPlus()
-        self._df_hasCols(v2, vcols=['id', 'column1', 'column2', 'column3', 'column4'])
+        self._df_hasCols(v2, vcols=["id", "column1", "column2", "column3", "column4"])
 
     def test_strongly_connected_components(self):
         # Simple island test
@@ -408,25 +417,26 @@ def test_triangle_counts(self):
         g = GraphFrame(vertices, edges)
         c = g.triangleCount()
         for row in c.select("id", "count").collect():
-            self.assertEqual(row.asDict()['count'], 1)
-            
+            self.assertEqual(row.asDict()["count"], 1)
+
     def test_mutithreaded_sparksession_usage(self):
         # Test that we can use the GraphFrame API from multiple threads
         localVertices = [(1, "A"), (2, "B"), (3, "C")]
         localEdges = [(1, 2, "love"), (2, 1, "hate"), (2, 3, "follow")]
         v = self.spark.createDataFrame(localVertices, ["id", "name"])
         e = self.spark.createDataFrame(localEdges, ["src", "dst", "action"])
-        
-        
+
         exc = None
+
         def run_graphframe() -> None:
             try:
                 GraphFrame(v, e)
             except Exception as _e:
                 nonlocal exc
                 exc = _e
-        
+
         import threading
+
         thread = threading.Thread(target=run_graphframe)
         thread.start()
         thread.join()
@@ -445,11 +455,12 @@ def test_belief_propagation(self):
         numIter = 5
         results = BeliefPropagation.runBPwithGraphFrames(g, numIter)
         # check beliefs are valid
-        for row in results.vertices.select('belief').collect():
-            belief = row['belief']
+        for row in results.vertices.select("belief").collect():
+            belief = row["belief"]
             self.assertTrue(
                 0 <= belief <= 1,
-                msg="Expected belief to be probability in [0,1], but found {}".format(belief))
+                msg="Expected belief to be probability in [0,1], but found {}".format(belief),
+            )
 
     def test_graph_friends(self):
         # construct graph
@@ -462,7 +473,7 @@ def test_graph_grid_ising_model(self):
         n = 3
         g = Graphs(self.spark).gridIsingModel(n)
         # check that all the vertices exist
-        ids = [v['id'] for v in g.vertices.collect()]
+        ids = [v["id"] for v in g.vertices.collect()]
         for i in range(n):
             for j in range(n):
-                self.assertIn('{},{}'.format(i, j), ids)
+                self.assertIn("{},{}".format(i, j), ids)
diff --git a/python/graphframes/tutorials/motif.py b/python/graphframes/tutorials/motif.py
index 59691946a..5fae8b87b 100644
--- a/python/graphframes/tutorials/motif.py
+++ b/python/graphframes/tutorials/motif.py
@@ -1,9 +1,11 @@
-"""Demonstrate GraphFrames network motif finding capabilities. Code from the Network Motif Finding Tutorial."""
+"""Demonstrate GraphFrames network motif finding capabilities. Code from the Network Motif Finding Tutorial."""  # noqa: E501
 
 #
 # Interactive Usage: pyspark --packages graphframes:graphframes:0.8.4-spark3.5-s_2.12
 #
-# Batch Usage: spark-submit --packages graphframes:graphframes:0.8.4-spark3.5-s_2.12 python/graphframes/tutorials/motif.py
+# Batch Usage:
+#  spark-submit \
+#  --packages graphframes:graphframes:0.8.4-spark3.5-s_2.12 python/graphframes/tutorials/motif.py
 #
 
 import click
@@ -22,7 +24,7 @@
 
 
 #
-# Load the nodes and edges from disk, repartition, checkpoint [plan got long for some reason] and cache.
+# Load the nodes and edges from disk, repartition, checkpoint [plan got long for some reason] and cache. # noqa: E501
 #
 
 # We created these in stackexchange.py from Stack Exchange data dump XML files
diff --git a/python/graphframes/tutorials/stackexchange.py b/python/graphframes/tutorials/stackexchange.py
index 5dab1eafe..32507455f 100644
--- a/python/graphframes/tutorials/stackexchange.py
+++ b/python/graphframes/tutorials/stackexchange.py
@@ -3,7 +3,9 @@
 #
 # Interactive Usage: pyspark --packages com.databricks:spark-xml_2.12:0.18.0
 #
-# Batch Usage: spark-submit --packages com.databricks:spark-xml_2.12:0.18.0 python/graphframes/tutorials/stackexchange.py
+# Batch Usage:
+#   spark-submit \
+#   --packages com.databricks:spark-xml_2.12:0.18.0 python/graphframes/tutorials/stackexchange.py
 #
 
 from __future__ import annotations
@@ -362,7 +364,8 @@ def add_missing_columns(df: DataFrame, all_cols: list[tuple[str, T.StructField]]
 # * [Post]--Links-->[Post]
 #
 # Remember: 'src', 'dst' and 'relationship' are standard edge fields in GraphFrames
-# Remember: we must produce src/dst based on lowercase 'id' UUID, not 'Id' which is Stack Overflow's integer.
+# Remember: we must produce src/dst based on lowercase 'id' UUID,
+# not 'Id' which is Stack Overflow's integer.
 #
 
 #
@@ -410,7 +413,7 @@ def add_missing_columns(df: DataFrame, all_cols: list[tuple[str, T.StructField]]
 )
 click.echo(f"Total Asks edges: {user_asks_edges_df.count():,}")
 click.echo(
-    f"Percentage of asked questions linked to users: {user_asks_edges_df.count() / questions_df.count():.2%}\n"
+    f"Percentage of asked questions linked to users: {user_asks_edges_df.count() / questions_df.count():.2%}\n"  # noqa: E501
 )
 
 
@@ -435,7 +438,7 @@ def add_missing_columns(df: DataFrame, all_cols: list[tuple[str, T.StructField]]
 )
 click.echo(f"Total User Answers edges: {user_answers_edges_df.count():,}")
 click.echo(
-    f"Percentage of answers linked to users: {user_answers_edges_df.count() / answers_df.count():.2%}\n"
+    f"Percentage of answers linked to users: {user_answers_edges_df.count() / answers_df.count():.2%}\n"  # noqa: E501
 )
 
 
diff --git a/python/graphframes/tutorials/utils.py b/python/graphframes/tutorials/utils.py
index 46db14d96..c57ead15a 100644
--- a/python/graphframes/tutorials/utils.py
+++ b/python/graphframes/tutorials/utils.py
@@ -18,7 +18,7 @@ def three_edge_count(paths: DataFrame) -> DataFrame:
     -------
     DataFrame
         A DataFrame of the counts of the different types of 3-node graphlets in the graph.
-    """
+    """  # noqa: E501
     graphlet_type_df = paths.select(
         F.col("a.Type").alias("A_Type"),
         F.col("e1.relationship").alias("E_relationship"),
@@ -113,7 +113,7 @@ def add_type_degree(g: GraphFrame) -> DataFrame:
     -------
     DataFrame - I am broke, next line is wrong
         A GraphFrame with a map[type:degree] 'type_degree' field added to the vertices
-    """
+    """  # noqa: E501
     type_degree: DataFrame = (
         g.edges.select(F.col("src").alias("id"), "relationship")
         .filter(F.col("id").isNotNull())
diff --git a/python/poetry.lock b/python/poetry.lock
index fa319d849..cca2c1efd 100644
--- a/python/poetry.lock
+++ b/python/poetry.lock
@@ -1,35 +1,34 @@
-# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
 
 [[package]]
 name = "black"
-version = "25.1.0"
+version = "23.12.1"
 description = "The uncompromising code formatter."
 optional = false
-python-versions = ">=3.9"
-groups = ["dev"]
+python-versions = ">=3.8"
 files = [
-    {file = "black-25.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:759e7ec1e050a15f89b770cefbf91ebee8917aac5c20483bc2d80a6c3a04df32"},
-    {file = "black-25.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e519ecf93120f34243e6b0054db49c00a35f84f195d5bce7e9f5cfc578fc2da"},
-    {file = "black-25.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:055e59b198df7ac0b7efca5ad7ff2516bca343276c466be72eb04a3bcc1f82d7"},
-    {file = "black-25.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:db8ea9917d6f8fc62abd90d944920d95e73c83a5ee3383493e35d271aca872e9"},
-    {file = "black-25.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a39337598244de4bae26475f77dda852ea00a93bd4c728e09eacd827ec929df0"},
-    {file = "black-25.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96c1c7cd856bba8e20094e36e0f948718dc688dba4a9d78c3adde52b9e6c2299"},
-    {file = "black-25.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bce2e264d59c91e52d8000d507eb20a9aca4a778731a08cfff7e5ac4a4bb7096"},
-    {file = "black-25.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:172b1dbff09f86ce6f4eb8edf9dede08b1fce58ba194c87d7a4f1a5aa2f5b3c2"},
-    {file = "black-25.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b60580e829091e6f9238c848ea6750efed72140b91b048770b64e74fe04908b"},
-    {file = "black-25.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e2978f6df243b155ef5fa7e558a43037c3079093ed5d10fd84c43900f2d8ecc"},
-    {file = "black-25.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b48735872ec535027d979e8dcb20bf4f70b5ac75a8ea99f127c106a7d7aba9f"},
-    {file = "black-25.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea0213189960bda9cf99be5b8c8ce66bb054af5e9e861249cd23471bd7b0b3ba"},
-    {file = "black-25.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8f0b18a02996a836cc9c9c78e5babec10930862827b1b724ddfe98ccf2f2fe4f"},
-    {file = "black-25.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:afebb7098bfbc70037a053b91ae8437c3857482d3a690fefc03e9ff7aa9a5fd3"},
-    {file = "black-25.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:030b9759066a4ee5e5aca28c3c77f9c64789cdd4de8ac1df642c40b708be6171"},
-    {file = "black-25.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:a22f402b410566e2d1c950708c77ebf5ebd5d0d88a6a2e87c86d9fb48afa0d18"},
-    {file = "black-25.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1ee0a0c330f7b5130ce0caed9936a904793576ef4d2b98c40835d6a65afa6a0"},
-    {file = "black-25.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f3df5f1bf91d36002b0a75389ca8663510cf0531cca8aa5c1ef695b46d98655f"},
-    {file = "black-25.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9e6827d563a2c820772b32ce8a42828dc6790f095f441beef18f96aa6f8294e"},
-    {file = "black-25.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:bacabb307dca5ebaf9c118d2d2f6903da0d62c9faa82bd21a33eecc319559355"},
-    {file = "black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717"},
-    {file = "black-25.1.0.tar.gz", hash = "sha256:33496d5cd1222ad73391352b4ae8da15253c5de89b93a80b3e2c8d9a19ec2666"},
+    {file = "black-23.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0aaf6041986767a5e0ce663c7a2f0e9eaf21e6ff87a5f95cbf3675bfd4c41d2"},
+    {file = "black-23.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c88b3711d12905b74206227109272673edce0cb29f27e1385f33b0163c414bba"},
+    {file = "black-23.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a920b569dc6b3472513ba6ddea21f440d4b4c699494d2e972a1753cdc25df7b0"},
+    {file = "black-23.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:3fa4be75ef2a6b96ea8d92b1587dd8cb3a35c7e3d51f0738ced0781c3aa3a5a3"},
+    {file = "black-23.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8d4df77958a622f9b5a4c96edb4b8c0034f8434032ab11077ec6c56ae9f384ba"},
+    {file = "black-23.12.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:602cfb1196dc692424c70b6507593a2b29aac0547c1be9a1d1365f0d964c353b"},
+    {file = "black-23.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c4352800f14be5b4864016882cdba10755bd50805c95f728011bcb47a4afd59"},
+    {file = "black-23.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:0808494f2b2df923ffc5723ed3c7b096bd76341f6213989759287611e9837d50"},
+    {file = "black-23.12.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:25e57fd232a6d6ff3f4478a6fd0580838e47c93c83eaf1ccc92d4faf27112c4e"},
+    {file = "black-23.12.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2d9e13db441c509a3763a7a3d9a49ccc1b4e974a47be4e08ade2a228876500ec"},
+    {file = "black-23.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d1bd9c210f8b109b1762ec9fd36592fdd528485aadb3f5849b2740ef17e674e"},
+    {file = "black-23.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:ae76c22bde5cbb6bfd211ec343ded2163bba7883c7bc77f6b756a1049436fbb9"},
+    {file = "black-23.12.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1fa88a0f74e50e4487477bc0bb900c6781dbddfdfa32691e780bf854c3b4a47f"},
+    {file = "black-23.12.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a4d6a9668e45ad99d2f8ec70d5c8c04ef4f32f648ef39048d010b0689832ec6d"},
+    {file = "black-23.12.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b18fb2ae6c4bb63eebe5be6bd869ba2f14fd0259bda7d18a46b764d8fb86298a"},
+    {file = "black-23.12.1-cp38-cp38-win_amd64.whl", hash = "sha256:c04b6d9d20e9c13f43eee8ea87d44156b8505ca8a3c878773f68b4e4812a421e"},
+    {file = "black-23.12.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3e1b38b3135fd4c025c28c55ddfc236b05af657828a8a6abe5deec419a0b7055"},
+    {file = "black-23.12.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4f0031eaa7b921db76decd73636ef3a12c942ed367d8c3841a0739412b260a54"},
+    {file = "black-23.12.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97e56155c6b737854e60a9ab1c598ff2533d57e7506d97af5481141671abf3ea"},
+    {file = "black-23.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:dd15245c8b68fe2b6bd0f32c1556509d11bb33aec9b5d0866dd8e2ed3dba09c2"},
+    {file = "black-23.12.1-py3-none-any.whl", hash = "sha256:78baad24af0f033958cad29731e27363183e140962595def56423e626f4bee3e"},
+    {file = "black-23.12.1.tar.gz", hash = "sha256:4ce3ef14ebe8d9509188014d96af1c456a910d5b5cbf434a09fef7e024b3d0d5"},
 ]
 
 [package.dependencies]
@@ -43,7 +42,7 @@ typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""}
 
 [package.extras]
 colorama = ["colorama (>=0.4.3)"]
-d = ["aiohttp (>=3.10)"]
+d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"]
 jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
 uvloop = ["uvloop (>=0.15.2)"]
 
@@ -53,8 +52,6 @@ version = "1.1.0"
 description = "Python bindings for the Brotli compression library"
 optional = false
 python-versions = "*"
-groups = ["tutorials"]
-markers = "platform_python_implementation == \"CPython\""
 files = [
     {file = "Brotli-1.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e1140c64812cb9b06c922e77f1c26a75ec5e3f0fb2bf92cc8c58720dec276752"},
     {file = "Brotli-1.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c8fd5270e906eef71d4a8d19b7c6a43760c6abcfcc10c9101d14eb2357418de9"},
@@ -189,8 +186,6 @@ version = "1.1.0.0"
 description = "Python CFFI bindings to the Brotli library"
 optional = false
 python-versions = ">=3.7"
-groups = ["tutorials"]
-markers = "platform_python_implementation == \"PyPy\""
 files = [
     {file = "brotlicffi-1.1.0.0-cp37-abi3-macosx_10_9_x86_64.whl", hash = "sha256:9b7ae6bd1a3f0df532b6d67ff674099a96d22bc0948955cb338488c31bfb8851"},
     {file = "brotlicffi-1.1.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19ffc919fa4fc6ace69286e0a23b3789b4219058313cf9b45625016bf7ff996b"},
@@ -230,7 +225,6 @@ version = "2025.1.31"
 description = "Python package for providing Mozilla's CA Bundle."
 optional = false
 python-versions = ">=3.6"
-groups = ["tutorials"]
 files = [
     {file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"},
     {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"},
@@ -242,8 +236,6 @@ version = "1.17.1"
 description = "Foreign Function Interface for Python calling C code."
 optional = false
 python-versions = ">=3.8"
-groups = ["tutorials"]
-markers = "platform_python_implementation == \"PyPy\""
 files = [
     {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"},
     {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"},
@@ -323,7 +315,6 @@ version = "3.4.1"
 description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
 optional = false
 python-versions = ">=3.7"
-groups = ["tutorials"]
 files = [
     {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"},
     {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"},
@@ -425,7 +416,6 @@ version = "8.1.8"
 description = "Composable command line interface toolkit"
 optional = false
 python-versions = ">=3.7"
-groups = ["dev", "tutorials"]
 files = [
     {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"},
     {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"},
@@ -440,8 +430,6 @@ version = "0.4.6"
 description = "Cross-platform colored terminal text."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
-groups = ["dev", "tutorials"]
-markers = "platform_system == \"Windows\""
 files = [
     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
@@ -453,7 +441,6 @@ version = "7.1.2"
 description = "the modular source code checker: pep8 pyflakes and co"
 optional = false
 python-versions = ">=3.8.1"
-groups = ["dev"]
 files = [
     {file = "flake8-7.1.2-py2.py3-none-any.whl", hash = "sha256:1cbc62e65536f65e6d754dfe6f1bada7f5cf392d6f5db3c2b85892466c3e7c1a"},
     {file = "flake8-7.1.2.tar.gz", hash = "sha256:c586ffd0b41540951ae41af572e6790dbd49fc12b3aa2541685d253d9bd504bd"},
@@ -470,7 +457,6 @@ version = "3.10"
 description = "Internationalized Domain Names in Applications (IDNA)"
 optional = false
 python-versions = ">=3.6"
-groups = ["tutorials"]
 files = [
     {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"},
     {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"},
@@ -485,7 +471,6 @@ version = "1.0.1"
 description = "deflate64 compression/decompression library"
 optional = false
 python-versions = ">=3.9"
-groups = ["tutorials"]
 files = [
     {file = "inflate64-1.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5122a188995e47a735ab969edc9129d42bbd97b993df5a3f0819b87205ce81b4"},
     {file = "inflate64-1.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:975ed694c680e46a5c0bb872380a9c9da271a91f9c0646561c58e8f3714347d4"},
@@ -537,14 +522,13 @@ test = ["pytest"]
 
 [[package]]
 name = "isort"
-version = "6.0.0"
+version = "6.0.1"
 description = "A Python utility / library to sort Python imports."
 optional = false
 python-versions = ">=3.9.0"
-groups = ["dev"]
 files = [
-    {file = "isort-6.0.0-py3-none-any.whl", hash = "sha256:567954102bb47bb12e0fae62606570faacddd441e45683968c8d1734fb1af892"},
-    {file = "isort-6.0.0.tar.gz", hash = "sha256:75d9d8a1438a9432a7d7b54f2d3b45cad9a4a0fdba43617d9873379704a8bdf1"},
+    {file = "isort-6.0.1-py3-none-any.whl", hash = "sha256:2dc5d7f65c9678d94c88dfc29161a320eec67328bc97aad576874cb4be1e9615"},
+    {file = "isort-6.0.1.tar.gz", hash = "sha256:1cb5df28dfbc742e490c5e41bad6da41b805b0a8be7bc93cd0fb2a8a890ac450"},
 ]
 
 [package.extras]
@@ -557,7 +541,6 @@ version = "0.7.0"
 description = "McCabe checker, plugin for flake8"
 optional = false
 python-versions = ">=3.6"
-groups = ["dev"]
 files = [
     {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"},
     {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
@@ -569,7 +552,6 @@ version = "0.2.3"
 description = "multi volume file wrapper library"
 optional = false
 python-versions = ">=3.6"
-groups = ["tutorials"]
 files = [
     {file = "multivolumefile-0.2.3-py3-none-any.whl", hash = "sha256:237f4353b60af1703087cf7725755a1f6fcaeeea48421e1896940cd1c920d678"},
     {file = "multivolumefile-0.2.3.tar.gz", hash = "sha256:a0648d0aafbc96e59198d5c17e9acad7eb531abea51035d08ce8060dcad709d6"},
@@ -586,7 +568,6 @@ version = "1.0.0"
 description = "Type system extensions for programs checked with the mypy type checker."
 optional = false
 python-versions = ">=3.5"
-groups = ["dev"]
 files = [
     {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
     {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
@@ -598,7 +579,6 @@ version = "1.3.7"
 description = "nose extends unittest to make testing easier"
 optional = false
 python-versions = "*"
-groups = ["main"]
 files = [
     {file = "nose-1.3.7-py2-none-any.whl", hash = "sha256:dadcddc0aefbf99eea214e0f1232b94f2fa9bd98fa8353711dacb112bfcbbb2a"},
     {file = "nose-1.3.7-py3-none-any.whl", hash = "sha256:9ff7c6cc443f8c51994b34a667bbcf45afd6d945be7477b52e97516fd17c53ac"},
@@ -611,7 +591,6 @@ version = "2.0.2"
 description = "Fundamental package for array computing in Python"
 optional = false
 python-versions = ">=3.9"
-groups = ["main"]
 files = [
     {file = "numpy-2.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:51129a29dbe56f9ca83438b706e2e69a39892b5eda6cedcb6b0c9fdc9b0d3ece"},
     {file = "numpy-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f15975dfec0cf2239224d80e32c3170b1d168335eaedee69da84fbe9f1f9cd04"},
@@ -666,7 +645,6 @@ version = "24.2"
 description = "Core utilities for Python packages"
 optional = false
 python-versions = ">=3.8"
-groups = ["dev"]
 files = [
     {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"},
     {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
@@ -678,7 +656,6 @@ version = "0.12.1"
 description = "Utility library for gitignore style pattern matching of file paths."
 optional = false
 python-versions = ">=3.8"
-groups = ["dev"]
 files = [
     {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
     {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
@@ -690,7 +667,6 @@ version = "4.3.6"
 description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
 optional = false
 python-versions = ">=3.8"
-groups = ["dev"]
 files = [
     {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"},
     {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"},
@@ -707,8 +683,6 @@ version = "7.0.0"
 description = "Cross-platform lib for process and system monitoring in Python.  NOTE: the syntax of this script MUST be kept compatible with Python 2.7."
 optional = false
 python-versions = ">=3.6"
-groups = ["tutorials"]
-markers = "sys_platform != \"cygwin\""
 files = [
     {file = "psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25"},
     {file = "psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da"},
@@ -732,7 +706,6 @@ version = "0.10.9.7"
 description = "Enables Python programs to dynamically access arbitrary Java objects"
 optional = false
 python-versions = "*"
-groups = ["main"]
 files = [
     {file = "py4j-0.10.9.7-py2.py3-none-any.whl", hash = "sha256:85defdfd2b2376eb3abf5ca6474b51ab7e0de341c75a02f46dc9b5976f5a5c1b"},
     {file = "py4j-0.10.9.7.tar.gz", hash = "sha256:0b6e5315bb3ada5cf62ac651d107bb2ebc02def3dee9d9548e3baac644ea8dbb"},
@@ -744,7 +717,6 @@ version = "0.22.0"
 description = "Pure python 7-zip library"
 optional = false
 python-versions = ">=3.8"
-groups = ["tutorials"]
 files = [
     {file = "py7zr-0.22.0-py3-none-any.whl", hash = "sha256:993b951b313500697d71113da2681386589b7b74f12e48ba13cc12beca79d078"},
     {file = "py7zr-0.22.0.tar.gz", hash = "sha256:c6c7aea5913535184003b73938490f9a4d8418598e533f9ca991d3b8e45a139e"},
@@ -775,7 +747,6 @@ version = "1.0.3"
 description = "bcj filter library"
 optional = false
 python-versions = ">=3.9"
-groups = ["tutorials"]
 files = [
     {file = "pybcj-1.0.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0bd8afeacf9173af091a08783aa9111500f5619ce0ae486bffb5ee4d08a331b4"},
     {file = "pybcj-1.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fc81d3c941485e7d3c2812834ca005849fe91a624977ed5227658cf952d19696"},
@@ -830,7 +801,6 @@ version = "2.12.1"
 description = "Python style guide checker"
 optional = false
 python-versions = ">=3.8"
-groups = ["dev"]
 files = [
     {file = "pycodestyle-2.12.1-py2.py3-none-any.whl", hash = "sha256:46f0fb92069a7c28ab7bb558f05bfc0110dac69a0cd23c61ea0040283a9d78b3"},
     {file = "pycodestyle-2.12.1.tar.gz", hash = "sha256:6838eae08bbce4f6accd5d5572075c63626a15ee3e6f842df996bf62f6d73521"},
@@ -842,8 +812,6 @@ version = "2.22"
 description = "C parser in Python"
 optional = false
 python-versions = ">=3.8"
-groups = ["tutorials"]
-markers = "platform_python_implementation == \"PyPy\""
 files = [
     {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"},
     {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"},
@@ -855,7 +823,6 @@ version = "3.21.0"
 description = "Cryptographic library for Python"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
-groups = ["tutorials"]
 files = [
     {file = "pycryptodomex-3.21.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:dbeb84a399373df84a69e0919c1d733b89e049752426041deeb30d68e9867822"},
     {file = "pycryptodomex-3.21.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:a192fb46c95489beba9c3f002ed7d93979423d1b2a53eab8771dbb1339eb3ddd"},
@@ -897,7 +864,6 @@ version = "3.2.0"
 description = "passive checker of Python programs"
 optional = false
 python-versions = ">=3.8"
-groups = ["dev"]
 files = [
     {file = "pyflakes-3.2.0-py2.py3-none-any.whl", hash = "sha256:84b5be138a2dfbb40689ca07e2152deb896a65c3a3e24c251c5c62489568074a"},
     {file = "pyflakes-3.2.0.tar.gz", hash = "sha256:1c61603ff154621fb2a9172037d84dca3500def8c8b630657d1701f026f8af3f"},
@@ -909,7 +875,6 @@ version = "1.1.1"
 description = "PPMd compression/decompression library"
 optional = false
 python-versions = ">=3.9"
-groups = ["tutorials"]
 files = [
     {file = "pyppmd-1.1.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:406b184132c69e3f60ea9621b69eaa0c5494e83f82c307b3acce7b86a4f8f888"},
     {file = "pyppmd-1.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c2cf003bb184adf306e1ac1828107307927737dde63474715ba16462e266cbef"},
@@ -976,13 +941,12 @@ test = ["coverage[toml] (>=5.2)", "hypothesis", "pytest (>=6.0)", "pytest-benchm
 
 [[package]]
 name = "pyspark"
-version = "3.5.4"
+version = "3.5.5"
 description = "Apache Spark Python API"
 optional = false
 python-versions = ">=3.8"
-groups = ["main"]
 files = [
-    {file = "pyspark-3.5.4.tar.gz", hash = "sha256:1c2926d63020902163f58222466adf6f8016f6c43c1f319b8e7a71dbaa05fc51"},
+    {file = "pyspark-3.5.5.tar.gz", hash = "sha256:6effc9ce98edf231f4d683fd14f7270629bf8458c628d6a2620ded4bb34f3cb9"},
 ]
 
 [package.dependencies]
@@ -1001,7 +965,6 @@ version = "0.16.2"
 description = "Python bindings to Zstandard (zstd) compression library."
 optional = false
 python-versions = ">=3.5"
-groups = ["tutorials"]
 files = [
     {file = "pyzstd-0.16.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:637376c8f8cbd0afe1cab613f8c75fd502bd1016bf79d10760a2d5a00905fe62"},
     {file = "pyzstd-0.16.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3e7a7118cbcfa90ca2ddbf9890c7cb582052a9a8cf2b7e2c1bbaf544bee0f16a"},
@@ -1094,7 +1057,6 @@ version = "2.32.3"
 description = "Python HTTP for Humans."
 optional = false
 python-versions = ">=3.8"
-groups = ["tutorials"]
 files = [
     {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"},
     {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
@@ -1116,7 +1078,6 @@ version = "1.7.0"
 description = "module to create simple ASCII tables"
 optional = false
 python-versions = "*"
-groups = ["tutorials"]
 files = [
     {file = "texttable-1.7.0-py2.py3-none-any.whl", hash = "sha256:72227d592c82b3d7f672731ae73e4d1f88cd8e2ef5b075a7a7f01a23a3743917"},
     {file = "texttable-1.7.0.tar.gz", hash = "sha256:2d2068fb55115807d3ac77a4ca68fa48803e84ebb0ee2340f858107a36522638"},
@@ -1128,8 +1089,6 @@ version = "2.2.1"
 description = "A lil' TOML parser"
 optional = false
 python-versions = ">=3.8"
-groups = ["dev"]
-markers = "python_version < \"3.11\""
 files = [
     {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
     {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"},
@@ -1171,8 +1130,6 @@ version = "4.12.2"
 description = "Backported and Experimental Type Hints for Python 3.8+"
 optional = false
 python-versions = ">=3.8"
-groups = ["dev"]
-markers = "python_version < \"3.11\""
 files = [
     {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
     {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
@@ -1184,19 +1141,18 @@ version = "2.3.0"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
-groups = ["tutorials"]
 files = [
     {file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"},
     {file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"},
 ]
 
 [package.extras]
-brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""]
+brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"]
 h2 = ["h2 (>=4,<5)"]
 socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
 zstd = ["zstandard (>=0.18.0)"]
 
 [metadata]
-lock-version = "2.1"
+lock-version = "2.0"
 python-versions = ">=3.9 <3.13"
-content-hash = "33ae7f96a3999d6822af7778f9b7878355d811534a4b5fec14d51ec29aa8dce2"
+content-hash = "8ed74b87abe8e7d5dcb10549a8d1ce35cce4d2db642f902d8021c08cf4b17345"
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 819d2bbdd..8cb109bfe 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -30,7 +30,7 @@ pyspark = "^3.4"
 numpy = ">= 1.7"
 
 [tool.poetry.group.dev.dependencies]
-black = "^25.1.0"
+black = "^23.12.1"
 flake8 = "^7.1.1"
 isort = "^6.0.0"
 
@@ -50,9 +50,7 @@ build-backend = "poetry.core.masonry.api"
 line-length = 100
 target-version = ["py39"]
 include = ["graphframes"]
-
-[tool.flake8]
-max-line-length = 100
+required-version = "23.12.1"
 
 [tool.isort]
 profile = "black"
diff --git a/python/tox.ini b/python/tox.ini
new file mode 100644
index 000000000..ab0a6a1f2
--- /dev/null
+++ b/python/tox.ini
@@ -0,0 +1,8 @@
+[flake8]
+ignore =
+    E203,
+    E402,
+    F811,
+    W503,
+    W504,
+max-line-length = 100