diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml index 9632ca9a6..3e3ff95dc 100644 --- a/.github/workflows/python-ci.yml +++ b/.github/workflows/python-ci.yml @@ -43,7 +43,13 @@ jobs: working-directory: ./python run: | poetry build - poetry install + poetry install --with dev + - name: Code Style + working-directory: ./python + run: | + poetry run python -m black --check graphframes + poetry run python -m flake8 graphframes + poetry run python -m isort --check graphframes - name: Test working-directory: ./python run: | diff --git a/python/graphframes/__init__.py b/python/graphframes/__init__.py index 03f1e4943..bded262bc 100644 --- a/python/graphframes/__init__.py +++ b/python/graphframes/__init__.py @@ -1,4 +1,3 @@ - from .graphframe import GraphFrame -__all__ = ['GraphFrame'] +__all__ = ["GraphFrame"] diff --git a/python/graphframes/console.py b/python/graphframes/console.py index d2b38d28b..dd9c4cd31 100644 --- a/python/graphframes/console.py +++ b/python/graphframes/console.py @@ -1,4 +1,5 @@ import click + from graphframes.tutorials import download diff --git a/python/graphframes/examples/__init__.py b/python/graphframes/examples/__init__.py index 8b92ef01f..2003b0191 100644 --- a/python/graphframes/examples/__init__.py +++ b/python/graphframes/examples/__init__.py @@ -1,5 +1,4 @@ - from .belief_propagation import BeliefPropagation from .graphs import Graphs -__all__ = ['BeliefPropagation', 'Graphs'] +__all__ = ["BeliefPropagation", "Graphs"] diff --git a/python/graphframes/examples/belief_propagation.py b/python/graphframes/examples/belief_propagation.py index c013450d7..b1434e231 100644 --- a/python/graphframes/examples/belief_propagation.py +++ b/python/graphframes/examples/belief_propagation.py @@ -18,14 +18,17 @@ import math from typing import Union +from pyspark.sql import SparkSession +from pyspark.sql import functions as sqlfunctions +from pyspark.sql import types + # Import subpackage examples here explicitly so that # this module can be run directly with spark-submit. import graphframes.examples from graphframes import GraphFrame from graphframes.lib import AggregateMessages as AM -from pyspark.sql import SparkSession, functions as sqlfunctions, types -__all__ = ['BeliefPropagation'] +__all__ = ["BeliefPropagation"] class BeliefPropagation: @@ -61,7 +64,7 @@ class BeliefPropagation: * Coloring the graph by assigning a color to each vertex such that no neighboring vertices share the same color. * In each step of BP, update all vertices of a single color. Alternate colors. - """ + """ # noqa: W605 @classmethod def runBPwithGraphFrames(cls, g: GraphFrame, numIter: int) -> GraphFrame: @@ -71,12 +74,12 @@ def runBPwithGraphFrames(cls, g: GraphFrame, numIter: int) -> GraphFrame: """ # choose colors for vertices for BP scheduling colorG = cls._colorGraph(g) - numColors = colorG.vertices.select('color').distinct().count() + numColors = colorG.vertices.select("color").distinct().count() # TODO: handle vertices without any edges # initialize vertex beliefs at 0.0 - gx = GraphFrame(colorG.vertices.withColumn('belief', sqlfunctions.lit(0.0)), colorG.edges) + gx = GraphFrame(colorG.vertices.withColumn("belief", sqlfunctions.lit(0.0)), colorG.edges) # run BP for numIter iterations for iter_ in range(numIter): @@ -85,37 +88,40 @@ def runBPwithGraphFrames(cls, g: GraphFrame, numIter: int) -> GraphFrame: # Send messages to vertices of the current color. # We may send to source or destination since edges are treated as undirected. msgForSrc = sqlfunctions.when( - AM.src['color'] == color, - AM.edge['b'] * AM.dst['belief']) + AM.src["color"] == color, AM.edge["b"] * AM.dst["belief"] + ) msgForDst = sqlfunctions.when( - AM.dst['color'] == color, - AM.edge['b'] * AM.src['belief']) + AM.dst["color"] == color, AM.edge["b"] * AM.src["belief"] + ) # numerically stable sigmoid logistic = sqlfunctions.udf(cls._sigmoid, returnType=types.DoubleType()) aggregates = gx.aggregateMessages( sqlfunctions.sum(AM.msg).alias("aggMess"), sendToSrc=msgForSrc, - sendToDst=msgForDst) + sendToDst=msgForDst, + ) v = gx.vertices # receive messages and update beliefs for vertices of the current color newBeliefCol = sqlfunctions.when( - (v['color'] == color) & (aggregates['aggMess'].isNotNull()), - logistic(aggregates['aggMess'] + v['a']) - ).otherwise(v['belief']) # keep old beliefs for other colors - newVertices = (v - .join(aggregates, on=(v['id'] == aggregates['id']), how='left_outer') - .drop(aggregates['id']) # drop duplicate ID column (from outer join) - .withColumn('newBelief', newBeliefCol) # compute new beliefs - .drop('aggMess') # drop messages - .drop('belief') # drop old beliefs - .withColumnRenamed('newBelief', 'belief') + (v["color"] == color) & (aggregates["aggMess"].isNotNull()), + logistic(aggregates["aggMess"] + v["a"]), + ).otherwise( + v["belief"] + ) # keep old beliefs for other colors + newVertices = ( + v.join(aggregates, on=(v["id"] == aggregates["id"]), how="left_outer") + .drop(aggregates["id"]) # drop duplicate ID column (from outer join) + .withColumn("newBelief", newBeliefCol) # compute new beliefs + .drop("aggMess") # drop messages + .drop("belief") # drop old beliefs + .withColumnRenamed("newBelief", "belief") ) # cache new vertices using workaround for SPARK-1334 cachedNewVertices = AM.getCachedDataFrame(newVertices) gx = GraphFrame(cachedNewVertices, gx.edges) # Drop the "color" column from vertices - return GraphFrame(gx.vertices.drop('color'), gx.edges) + return GraphFrame(gx.vertices.drop("color"), gx.edges) @staticmethod def _colorGraph(g: GraphFrame) -> GraphFrame: @@ -132,7 +138,7 @@ def _colorGraph(g: GraphFrame) -> GraphFrame: """ colorUDF = sqlfunctions.udf(lambda i, j: (i + j) % 2, returnType=types.IntegerType()) - v = g.vertices.withColumn('color', colorUDF(sqlfunctions.col('i'), sqlfunctions.col('j'))) + v = g.vertices.withColumn("color", colorUDF(sqlfunctions.col("i"), sqlfunctions.col("j"))) return GraphFrame(v, g.edges) @staticmethod @@ -164,12 +170,12 @@ def main() -> None: results = BeliefPropagation.runBPwithGraphFrames(g, numIter) # display beliefs - beliefs = results.vertices.select('id', 'belief') + beliefs = results.vertices.select("id", "belief") print("Done with BP. Final beliefs after {} iterations:".format(numIter)) beliefs.show() spark.stop() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/python/graphframes/examples/graphs.py b/python/graphframes/examples/graphs.py index 8db04aecc..0a3af2028 100644 --- a/python/graphframes/examples/graphs.py +++ b/python/graphframes/examples/graphs.py @@ -17,11 +17,12 @@ import itertools -from pyspark.sql import functions as sqlfunctions, SparkSession +from pyspark.sql import SparkSession +from pyspark.sql import functions as sqlfunctions from graphframes import GraphFrame -__all__ = ['Graphs'] +__all__ = ["Graphs"] class Graphs: @@ -37,24 +38,30 @@ def __init__(self, spark: SparkSession) -> None: def friends(self) -> GraphFrame: """A GraphFrame of friends in a (fake) social network.""" # Vertex DataFrame - v = self._spark.createDataFrame([ - ("a", "Alice", 34), - ("b", "Bob", 36), - ("c", "Charlie", 30), - ("d", "David", 29), - ("e", "Esther", 32), - ("f", "Fanny", 36) - ], ["id", "name", "age"]) + v = self._spark.createDataFrame( + [ + ("a", "Alice", 34), + ("b", "Bob", 36), + ("c", "Charlie", 30), + ("d", "David", 29), + ("e", "Esther", 32), + ("f", "Fanny", 36), + ], + ["id", "name", "age"], + ) # Edge DataFrame - e = self._spark.createDataFrame([ - ("a", "b", "friend"), - ("b", "c", "follow"), - ("c", "b", "follow"), - ("f", "c", "follow"), - ("e", "f", "follow"), - ("e", "d", "friend"), - ("d", "a", "friend") - ], ["src", "dst", "relationship"]) + e = self._spark.createDataFrame( + [ + ("a", "b", "friend"), + ("b", "c", "follow"), + ("c", "b", "follow"), + ("f", "c", "follow"), + ("e", "f", "follow"), + ("e", "d", "friend"), + ("d", "a", "friend"), + ], + ["src", "dst", "relationship"], + ) # Create a GraphFrame return GraphFrame(v, e) @@ -83,41 +90,44 @@ def gridIsingModel(self, n: int, vStd: float = 1.0, eStd: float = 1.0) -> GraphF and "b". Edges are directed, but they should be treated as undirected in any algorithms run on this model. Vertex IDs are of the form "i,j". E.g., vertex "1,3" is in the second row and fourth column of the grid. - """ + """ # noqa: W605 # check param n if n < 1: raise ValueError( - "Grid graph must have size >= 1, but was given invalid value n = {}" - .format(n)) + "Grid graph must have size >= 1, but was given invalid value n = {}".format(n) + ) # create coodinates grid coordinates = self._spark.createDataFrame( - itertools.product(range(n), range(n)), - schema=('i', 'j')) + itertools.product(range(n), range(n)), schema=("i", "j") + ) # create SQL expression for converting coordinates (i,j) to a string ID "i,j" # avoid Cartesian join due to SPARK-15425: use generator since n should be small - toIDudf = sqlfunctions.udf(lambda i, j: '{},{}'.format(i,j)) + toIDudf = sqlfunctions.udf(lambda i, j: "{},{}".format(i, j)) # create the vertex DataFrame # create SQL expression for converting coordinates (i,j) to a string ID "i,j" - vIDcol = toIDudf(sqlfunctions.col('i'), sqlfunctions.col('j')) + vIDcol = toIDudf(sqlfunctions.col("i"), sqlfunctions.col("j")) # add random parameters generated from a normal distribution seed = 12345 - vertices = (coordinates.withColumn('id', vIDcol) - .withColumn('a', sqlfunctions.randn(seed) * vStd)) + vertices = coordinates.withColumn("id", vIDcol).withColumn( + "a", sqlfunctions.randn(seed) * vStd + ) # create the edge DataFrame # create SQL expression for converting coordinates (i,j+1) and (i+1,j) to string IDs - rightIDcol = toIDudf(sqlfunctions.col('i'), sqlfunctions.col('j') + 1) - downIDcol = toIDudf(sqlfunctions.col('i') + 1, sqlfunctions.col('j')) - horizontalEdges = (coordinates.filter(sqlfunctions.col('j') != n - 1) - .select(vIDcol.alias('src'), rightIDcol.alias('dst'))) - verticalEdges = (coordinates.filter(sqlfunctions.col('i') != n - 1) - .select(vIDcol.alias('src'), downIDcol.alias('dst'))) + rightIDcol = toIDudf(sqlfunctions.col("i"), sqlfunctions.col("j") + 1) + downIDcol = toIDudf(sqlfunctions.col("i") + 1, sqlfunctions.col("j")) + horizontalEdges = coordinates.filter(sqlfunctions.col("j") != n - 1).select( + vIDcol.alias("src"), rightIDcol.alias("dst") + ) + verticalEdges = coordinates.filter(sqlfunctions.col("i") != n - 1).select( + vIDcol.alias("src"), downIDcol.alias("dst") + ) allEdges = horizontalEdges.unionAll(verticalEdges) # add random parameters from a normal distribution - edges = allEdges.withColumn('b', sqlfunctions.randn(seed + 1) * eStd) + edges = allEdges.withColumn("b", sqlfunctions.randn(seed + 1) * eStd) # create the GraphFrame g = GraphFrame(vertices, edges) diff --git a/python/graphframes/graphframe.py b/python/graphframes/graphframe.py index 5381ec8b5..1d177e1a2 100644 --- a/python/graphframes/graphframe.py +++ b/python/graphframes/graphframe.py @@ -16,18 +16,19 @@ # import sys -from typing import Any, Union, Optional +from typing import Any, Optional, Union -if sys.version > '3': +if sys.version > "3": basestring = str -from graphframes.lib import Pregel from pyspark import SparkContext from pyspark.sql import Column, DataFrame, SparkSession from pyspark.storagelevel import StorageLevel +from graphframes.lib import Pregel + -def _from_java_gf(jgf: Any, spark: SparkSession) -> 'GraphFrame': +def _from_java_gf(jgf: Any, spark: SparkSession) -> "GraphFrame": """ (internal) creates a python GraphFrame wrapper from a java GraphFrame. @@ -37,10 +38,15 @@ def _from_java_gf(jgf: Any, spark: SparkSession) -> 'GraphFrame': pe = DataFrame(jgf.edges(), spark) return GraphFrame(pv, pe) + def _java_api(jsc: SparkContext) -> Any: javaClassName = "org.graphframes.GraphFramePythonAPI" - return jsc._jvm.Thread.currentThread().getContextClassLoader().loadClass(javaClassName) \ - .newInstance() + return ( + jsc._jvm.Thread.currentThread() + .getContextClassLoader() + .loadClass(javaClassName) + .newInstance() + ) class GraphFrame: @@ -76,16 +82,22 @@ def __init__(self, v: DataFrame, e: DataFrame) -> None: # Check that provided DataFrames contain required columns if self.ID not in v.columns: raise ValueError( - "Vertex ID column {} missing from vertex DataFrame, which has columns: {}" - .format(self.ID, ",".join(v.columns))) + "Vertex ID column {} missing from vertex DataFrame, which has columns: {}".format( + self.ID, ",".join(v.columns) + ) + ) if self.SRC not in e.columns: raise ValueError( - "Source vertex ID column {} missing from edge DataFrame, which has columns: {}" - .format(self.SRC, ",".join(e.columns))) + "Source vertex ID column {} missing from edge DataFrame, which has columns: {}".format( # noqa: E501 + self.SRC, ",".join(e.columns) + ) + ) if self.DST not in e.columns: raise ValueError( - "Destination vertex ID column {} missing from edge DataFrame, which has columns: {}" - .format(self.DST, ",".join(e.columns))) + "Destination vertex ID column {} missing from edge DataFrame, which has columns: {}".format( # noqa: E501 + self.DST, ",".join(e.columns) + ) + ) self._jvm_graph = self._jvm_gf_api.createGraph(v._jdf, e._jdf) @@ -109,8 +121,8 @@ def edges(self) -> DataFrame: def __repr__(self): return self._jvm_graph.toString() - def cache(self) -> 'GraphFrame': - """ Persist the dataframe representation of vertices and edges of the graph with the default + def cache(self) -> "GraphFrame": + """Persist the dataframe representation of vertices and edges of the graph with the default storage level. """ self._jvm_graph.cache() @@ -124,7 +136,7 @@ def persist(self, storageLevel: StorageLevel = StorageLevel.MEMORY_ONLY) -> "Gra self._jvm_graph.persist(javaStorageLevel) return self - def unpersist(self, blocking: bool = False) -> 'GraphFrame': + def unpersist(self, blocking: bool = False) -> "GraphFrame": """Mark the dataframe representation of vertices and edges of the graph as non-persistent, and remove all blocks for it from memory and disk. """ @@ -209,12 +221,12 @@ def find(self, pattern: str) -> DataFrame: jdf = self._jvm_graph.find(pattern) return DataFrame(jdf, self._spark) - def filterVertices(self, condition: Union[str, Column]) -> 'GraphFrame': + def filterVertices(self, condition: Union[str, Column]) -> "GraphFrame": """ Filters the vertices based on expression, remove edges containing any dropped vertices. - + :param condition: String or Column describing the condition expression for filtering. - :return: GraphFrame with filtered vertices and edges. + :return: GraphFrame with filtered vertices and edges. """ if isinstance(condition, basestring): @@ -225,12 +237,12 @@ def filterVertices(self, condition: Union[str, Column]) -> 'GraphFrame': raise TypeError("condition should be string or Column") return _from_java_gf(jdf, self._spark) - def filterEdges(self, condition: Union[str, Column]) -> 'GraphFrame': + def filterEdges(self, condition: Union[str, Column]) -> "GraphFrame": """ Filters the edges based on expression, keep all vertices. - + :param condition: String or Column describing the condition expression for filtering. - :return: GraphFrame with filtered edges. + :return: GraphFrame with filtered edges. """ if isinstance(condition, basestring): jdf = self._jvm_graph.filterEdges(condition) @@ -240,18 +252,18 @@ def filterEdges(self, condition: Union[str, Column]) -> 'GraphFrame': raise TypeError("condition should be string or Column") return _from_java_gf(jdf, self._spark) - def dropIsolatedVertices(self) -> 'GraphFrame': + def dropIsolatedVertices(self) -> "GraphFrame": """ Drops isolated vertices, vertices are not contained in any edges. - :return: GraphFrame with filtered vertices. + :return: GraphFrame with filtered vertices. """ jdf = self._jvm_graph.dropIsolatedVertices() return _from_java_gf(jdf, self._spark) - def bfs(self, fromExpr: str, toExpr: str, - edgeFilter: Optional[str] = None, - maxPathLength: int = 10) -> DataFrame: + def bfs( + self, fromExpr: str, toExpr: str, edgeFilter: Optional[str] = None, maxPathLength: int = 10 + ) -> DataFrame: """ Breadth-first search (BFS). @@ -259,18 +271,20 @@ def bfs(self, fromExpr: str, toExpr: str, :return: DataFrame with one Row for each shortest path between matching vertices. """ - builder = self._jvm_graph.bfs()\ - .fromExpr(fromExpr)\ - .toExpr(toExpr)\ - .maxPathLength(maxPathLength) + builder = ( + self._jvm_graph.bfs().fromExpr(fromExpr).toExpr(toExpr).maxPathLength(maxPathLength) + ) if edgeFilter is not None: builder.edgeFilter(edgeFilter) jdf = builder.run() return DataFrame(jdf, self._spark) - def aggregateMessages(self, aggCol: Union[Column, str], - sendToSrc: Union[Column, str, None] = None, - sendToDst: Union[Column, str, None] = None) -> DataFrame: + def aggregateMessages( + self, + aggCol: Union[Column, str], + sendToSrc: Union[Column, str, None] = None, + sendToDst: Union[Column, str, None] = None, + ) -> DataFrame: """ Aggregates messages from the neighbours. @@ -314,9 +328,12 @@ def aggregateMessages(self, aggCol: Union[Column, str], # Standard algorithms - def connectedComponents(self, algorithm: str = 'graphframes', - checkpointInterval: int = 2, - broadcastThreshold: int = 1000000) -> DataFrame: + def connectedComponents( + self, + algorithm: str = "graphframes", + checkpointInterval: int = 2, + broadcastThreshold: int = 1000000, + ) -> DataFrame: """ Computes the connected components of the graph. @@ -330,11 +347,13 @@ def connectedComponents(self, algorithm: str = 'graphframes', :return: DataFrame with new vertices column "component" """ - jdf = self._jvm_graph.connectedComponents() \ - .setAlgorithm(algorithm) \ - .setCheckpointInterval(checkpointInterval) \ - .setBroadcastThreshold(broadcastThreshold) \ + jdf = ( + self._jvm_graph.connectedComponents() + .setAlgorithm(algorithm) + .setCheckpointInterval(checkpointInterval) + .setBroadcastThreshold(broadcastThreshold) .run() + ) return DataFrame(jdf, self._spark) def labelPropagation(self, maxIter: int) -> DataFrame: @@ -349,10 +368,13 @@ def labelPropagation(self, maxIter: int) -> DataFrame: jdf = self._jvm_graph.labelPropagation().maxIter(maxIter).run() return DataFrame(jdf, self._spark) - def pageRank(self, resetProbability: float = 0.15, - sourceId: Optional[Any] = None, - maxIter: Optional[int] = None, - tol: Optional[float] = None) -> 'GraphFrame': + def pageRank( + self, + resetProbability: float = 0.15, + sourceId: Optional[Any] = None, + maxIter: Optional[int] = None, + tol: Optional[float] = None, + ) -> "GraphFrame": """ Runs the PageRank algorithm on the graph. Note: Exactly one of fixed_num_iter or tolerance must be set. @@ -379,9 +401,12 @@ def pageRank(self, resetProbability: float = 0.15, jgf = builder.run() return _from_java_gf(jgf, self._spark) - def parallelPersonalizedPageRank(self, resetProbability: float = 0.15, - sourceIds: Optional[list[Any]] = None, - maxIter: Optional[int] = None) -> 'GraphFrame': + def parallelPersonalizedPageRank( + self, + resetProbability: float = 0.15, + sourceIds: Optional[list[Any]] = None, + maxIter: Optional[int] = None, + ) -> "GraphFrame": """ Run the personalized PageRank algorithm on the graph, from the provided list of sources in parallel for a fixed number of iterations. @@ -393,7 +418,9 @@ def parallelPersonalizedPageRank(self, resetProbability: float = 0.15, :param maxIter: the fixed number of iterations this algorithm runs :return: GraphFrame with new vertices column "pageranks" and new edges column "weight" """ - assert sourceIds is not None and len(sourceIds) > 0, "Source vertices Ids sourceIds must be provided" + assert ( + sourceIds is not None and len(sourceIds) > 0 + ), "Source vertices Ids sourceIds must be provided" assert maxIter is not None, "Max number of iterations maxIter must be provided" sourceIds = self._sc._jvm.PythonUtils.toArray(sourceIds) builder = self._jvm_graph.parallelPersonalizedPageRank() @@ -427,10 +454,17 @@ def stronglyConnectedComponents(self, maxIter: int) -> DataFrame: jdf = self._jvm_graph.stronglyConnectedComponents().maxIter(maxIter).run() return DataFrame(jdf, self._spark) - def svdPlusPlus(self, rank: int = 10, maxIter: int = 2, - minValue: float = 0.0, maxValue: float = 5.0, - gamma1: float = 0.007, gamma2: float = 0.007, - gamma6: float = 0.005, gamma7: float = 0.015) -> tuple[DataFrame, float]: + def svdPlusPlus( + self, + rank: int = 10, + maxIter: int = 2, + minValue: float = 0.0, + maxValue: float = 5.0, + gamma1: float = 0.007, + gamma2: float = 0.007, + gamma6: float = 0.005, + gamma7: float = 0.015, + ) -> tuple[DataFrame, float]: """ Runs the SVD++ algorithm. @@ -461,13 +495,16 @@ def triangleCount(self) -> DataFrame: def _test(): import doctest + import graphframe + globs = graphframe.__dict__.copy() - globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2) - globs['spark'] = SparkSession(globs['sc']).builder.getOrCreate() + globs["sc"] = SparkContext("local[4]", "PythonTest", batchSize=2) + globs["spark"] = SparkSession(globs["sc"]).builder.getOrCreate() (failure_count, test_count) = doctest.testmod( - globs=globs, optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE) - globs['sc'].stop() + globs=globs, optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE + ) + globs["sc"].stop() if failure_count: exit(-1) diff --git a/python/graphframes/lib/__init__.py b/python/graphframes/lib/__init__.py index 325e74543..076dd5232 100644 --- a/python/graphframes/lib/__init__.py +++ b/python/graphframes/lib/__init__.py @@ -1,5 +1,4 @@ - from .aggregate_messages import AggregateMessages from .pregel import Pregel -__all__ = ['AggregateMessages', 'Pregel'] +__all__ = ["AggregateMessages", "Pregel"] diff --git a/python/graphframes/lib/aggregate_messages.py b/python/graphframes/lib/aggregate_messages.py index c0867dcd0..bb454b008 100644 --- a/python/graphframes/lib/aggregate_messages.py +++ b/python/graphframes/lib/aggregate_messages.py @@ -18,13 +18,18 @@ from typing import Any from pyspark import SparkContext -from pyspark.sql import DataFrame, functions as sqlfunctions, SparkSession, Column +from pyspark.sql import Column, DataFrame, SparkSession +from pyspark.sql import functions as sqlfunctions def _java_api(jsc: SparkContext) -> Any: javaClassName = "org.graphframes.GraphFramePythonAPI" - return jsc._jvm.Thread.currentThread().getContextClassLoader().loadClass(javaClassName) \ - .newInstance() + return ( + jsc._jvm.Thread.currentThread() + .getContextClassLoader() + .loadClass(javaClassName) + .newInstance() + ) class _ClassProperty: diff --git a/python/graphframes/lib/pregel.py b/python/graphframes/lib/pregel.py index 72077c25c..0d9c5c25f 100644 --- a/python/graphframes/lib/pregel.py +++ b/python/graphframes/lib/pregel.py @@ -16,13 +16,17 @@ # import sys -from typing import Any -if sys.version > '3': +from typing import TYPE_CHECKING, Any + +if sys.version > "3": basestring = str +from pyspark.ml.wrapper import JavaWrapper from pyspark.sql import DataFrame, SparkSession from pyspark.sql.functions import col -from pyspark.ml.wrapper import JavaWrapper + +if TYPE_CHECKING: + from graphframes import GraphFrame class Pregel(JavaWrapper): @@ -75,11 +79,11 @@ class Pregel(JavaWrapper): ... .sendMsgToDst(Pregel.src("rank") / Pregel.src("outDegree")) \ ... .aggMsgs(sum(Pregel.msg())) \ ... .run() - """ + """ # noqa: E501 def __init__(self, graph: "GraphFrame") -> None: super(Pregel, self).__init__() - from graphframes import GraphFrame + self.graph = graph self._java_obj = self._new_java_obj("org.graphframes.lib.Pregel", graph._jvm_graph) @@ -102,7 +106,9 @@ def setCheckpointInterval(self, value: int) -> "Pregel": self._java_obj.setCheckpointInterval(int(value)) return self - def withVertexColumn(self, colName: str, initialExpr: Any, updateAfterAggMsgsExpr: Any) -> "Pregel": + def withVertexColumn( + self, colName: str, initialExpr: Any, updateAfterAggMsgsExpr: Any + ) -> "Pregel": """ Defines an additional vertex column at the start of run and how to update it in each iteration. @@ -116,7 +122,7 @@ def withVertexColumn(self, colName: str, initialExpr: Any, updateAfterAggMsgsExp You can reference all original vertex columns, additional vertex columns, and the aggregated message column using :func:`msg`. If the vertex received no messages, the message column would be null. - """ + """ # noqa: E501 self._java_obj.withVertexColumn(colName, initialExpr._jc, updateAfterAggMsgsExpr._jc) return self @@ -133,7 +139,7 @@ def sendMsgToSrc(self, msgExpr: Any) -> "Pregel": and `edge`, respectively. You can reference them using :func:`src`, :func:`dst`, and :func:`edge`. Null messages are not included in message aggregation. - """ + """ # noqa: E501 self._java_obj.sendMsgToSrc(msgExpr._jc) return self @@ -150,7 +156,7 @@ def sendMsgToDst(self, msgExpr: Any) -> "Pregel": and `edge`, respectively. You can reference them using :func:`src`, :func:`dst`, and :func:`edge`. Null messages are not included in message aggregation. - """ + """ # noqa: E501 self._java_obj.sendMsgToDst(msgExpr._jc) return self @@ -161,7 +167,7 @@ def aggMsgs(self, aggExpr: Any) -> "Pregel": :param aggExpr: the message aggregation expression, such as `sum(Pregel.msg())`. You can reference the message column by :func:`msg` and the vertex ID by `col("id")`, while the latter is usually not used. - """ + """ # noqa: E501 self._java_obj.aggMsgs(aggExpr._jc) return self @@ -170,7 +176,7 @@ def run(self) -> DataFrame: Runs the defined Pregel algorithm. :return: the result vertex DataFrame from the final iteration including both original and additional columns. - """ + """ # noqa: E501 return DataFrame(self._java_obj.run(), SparkSession.getActiveSession()) @staticmethod @@ -179,7 +185,7 @@ def msg() -> Any: References the message column in aggregating messages and updating additional vertex columns. See :func:`aggMsgs` and :func:`withVertexColumn` - """ + """ # noqa: E501 return col("_pregel_msg_") @staticmethod diff --git a/python/graphframes/tests.py b/python/graphframes/tests.py index 9a7ad1371..dee8405e8 100644 --- a/python/graphframes/tests.py +++ b/python/graphframes/tests.py @@ -15,60 +15,63 @@ # limitations under the License. # +import re +import shutil import sys import tempfile -import shutil -import re if sys.version_info[:2] <= (2, 6): try: import unittest2 as unittest except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.stderr.write("Please install unittest2 to test with Python 2.6 or earlier") sys.exit(1) else: import unittest from pyspark import SparkContext -from pyspark.sql import functions as sqlfunctions, SparkSession +from pyspark.sql import SparkSession +from pyspark.sql import functions as sqlfunctions -from .graphframe import GraphFrame, Pregel, _java_api, _from_java_gf +from .examples import BeliefPropagation, Graphs +from .graphframe import GraphFrame, Pregel, _from_java_gf, _java_api from .lib import AggregateMessages as AM -from .examples import Graphs, BeliefPropagation -class GraphFrameTestUtils(object): +class GraphFrameTestUtils(object): @classmethod def parse_spark_version(cls, version_str): - """ take an input version string - return version items in a dictionary + """take an input version string + return version items in a dictionary """ - _sc_ver_patt = r'(\d+)\.(\d+)(\.(\d+)(-(.+))?)?' + _sc_ver_patt = r"(\d+)\.(\d+)(\.(\d+)(-(.+))?)?" m = re.match(_sc_ver_patt, version_str) if not m: - raise TypeError("version {} shoud be in ..".format(version_str)) + raise TypeError( + "version {} shoud be in ..".format(version_str) + ) version_info = {} try: - version_info['major'] = int(m.group(1)) - except: + version_info["major"] = int(m.group(1)) + except: # noqa: E722 raise TypeError("invalid minor version") try: - version_info['minor'] = int(m.group(2)) - except: + version_info["minor"] = int(m.group(2)) + except: # noqa: E722 raise TypeError("invalid major version") try: - version_info['maintenance'] = int(m.group(4)) - except: - version_info['maintenance'] = 0 + version_info["maintenance"] = int(m.group(4)) + except: # noqa: E722 + version_info["maintenance"] = 0 try: - version_info['special'] = m.group(6) - except: + version_info["special"] = m.group(6) + except: # noqa: E722 pass return version_info @classmethod def createSparkContext(cls): - cls.sc = sc = SparkContext('local[4]', "GraphFramesTests") + cls.sc = sc = SparkContext("local[4]", "GraphFramesTests") cls.checkpointDir = tempfile.mkdtemp() cls.sc.setCheckpointDir(cls.checkpointDir) cls.spark_version = cls.parse_spark_version(sc.version) @@ -81,10 +84,10 @@ def stopSparkContext(cls): @classmethod def spark_at_least_of_version(cls, version_str): - assert hasattr(cls, 'spark_version') + assert hasattr(cls, "spark_version") required_version = cls.parse_spark_version(version_str) spark_version = cls.spark_version - for _name in ['major', 'minor', 'maintenance']: + for _name in ["major", "minor", "maintenance"]: sc_ver = spark_version[_name] req_ver = required_version[_name] if sc_ver != req_ver: @@ -92,19 +95,24 @@ def spark_at_least_of_version(cls, version_str): # All major.minor.maintenance equal return True + def setUpModule(): GraphFrameTestUtils.createSparkContext() + def tearDownModule(): GraphFrameTestUtils.stopSparkContext() class GraphFrameTestCase(unittest.TestCase): - @classmethod def setUpClass(cls): # Small tests run much faster with spark.sql.shuffle.partitions = 4 - cls.spark = SparkSession(GraphFrameTestUtils.sc).builder.config('spark.sql.shuffle.partitions', 4).getOrCreate() + cls.spark = ( + SparkSession(GraphFrameTestUtils.sc) + .builder.config("spark.sql.shuffle.partitions", 4) + .getOrCreate() + ) @classmethod def tearDownClass(cls): @@ -136,14 +144,20 @@ def test_construction(self): assert sorted(vertexIDs) == [1, 2, 3] edgeActions = map(lambda x: x[0], g.edges.select("action").collect()) assert sorted(edgeActions) == ["follow", "hate", "love"] - tripletsFirst = list(map(lambda x: (x[0][1], x[1][1], x[2][2]), - g.triplets.sort("src.id").select("src", "dst", "edge").take(1))) + tripletsFirst = list( + map( + lambda x: (x[0][1], x[1][1], x[2][2]), + g.triplets.sort("src.id").select("src", "dst", "edge").take(1), + ) + ) assert tripletsFirst == [("A", "B", "love")], tripletsFirst # Try with invalid vertices and edges DataFrames v_invalid = self.spark.createDataFrame( - [(1, "A"), (2, "B"), (3, "C")], ["invalid_colname_1", "invalid_colname_2"]) + [(1, "A"), (2, "B"), (3, "C")], ["invalid_colname_1", "invalid_colname_2"] + ) e_invalid = self.spark.createDataFrame( - [(1, 2), (2, 3), (3, 1)], ["invalid_colname_3", "invalid_colname_4"]) + [(1, 2), (2, 3), (3, 1)], ["invalid_colname_3", "invalid_colname_4"] + ) with self.assertRaises(ValueError): GraphFrame(v_invalid, e_invalid) @@ -223,14 +237,12 @@ def setUp(self): super(PregelTest, self).setUp() def test_page_rank(self): - from pyspark.sql.functions import coalesce, col, lit, sum, when - edges = self.spark.createDataFrame([[0, 1], - [1, 2], - [2, 4], - [2, 0], - [3, 4], # 3 has no in-links - [4, 0], - [4, 2]], ["src", "dst"]) + from pyspark.sql.functions import coalesce, lit, sum + + edges = self.spark.createDataFrame( + [[0, 1], [1, 2], [2, 4], [2, 0], [3, 4], [4, 0], [4, 2]], # 3 has no in-links + ["src", "dst"], + ) edges.cache() vertices = self.spark.createDataFrame([[0], [1], [2], [3], [4]], ["id"]) numVertices = vertices.count() @@ -238,19 +250,22 @@ def test_page_rank(self): vertices.cache() graph = GraphFrame(vertices, edges) alpha = 0.15 - ranks = graph.pregel \ - .setMaxIter(5) \ - .withVertexColumn("rank", lit(1.0 / numVertices), - coalesce(Pregel.msg(), - lit(0.0)) * lit(1.0 - alpha) + lit(alpha / numVertices)) \ - .sendMsgToDst(Pregel.src("rank") / Pregel.src("outDegree")) \ - .aggMsgs(sum(Pregel.msg())) \ + ranks = ( + graph.pregel.setMaxIter(5) + .withVertexColumn( + "rank", + lit(1.0 / numVertices), + coalesce(Pregel.msg(), lit(0.0)) * lit(1.0 - alpha) + lit(alpha / numVertices), + ) + .sendMsgToDst(Pregel.src("rank") / Pregel.src("outDegree")) + .aggMsgs(sum(Pregel.msg())) .run() + ) resultRows = ranks.sort(ranks.id).collect() result = map(lambda x: x.rank, resultRows) expected = [0.245, 0.224, 0.303, 0.03, 0.197] for a, b in zip(result, expected): - self.assertAlmostEqual(a, b, delta = 1e-3) + self.assertAlmostEqual(a, b, delta=1e-3) class GraphFrameLibTest(GraphFrameTestCase): @@ -258,11 +273,11 @@ def setUp(self): super(GraphFrameLibTest, self).setUp() self.japi = _java_api(self.spark._sc) - def _hasCols(self, graph, vcols = [], ecols = []): + def _hasCols(self, graph, vcols=[], ecols=[]): map(lambda c: self.assertIn(c, graph.vertices.columns), vcols) map(lambda c: self.assertIn(c, graph.edges.columns), ecols) - def _df_hasCols(self, vertices, vcols = []): + def _df_hasCols(self, vertices, vcols=[]): map(lambda c: self.assertIn(c, vertices.columns), vcols) def _graph(self, name, *args): @@ -272,7 +287,7 @@ def _graph(self, name, *args): :param name: the name of the example graph :param args: all the required arguments, without the initial spark session :return: - """ + """ # noqa: E501 examples = self.japi.examples() jgraph = getattr(examples, name)(*args) return _from_java_gf(jgraph, self.spark) @@ -281,30 +296,27 @@ def test_aggregate_messages(self): g = self._graph("friends") # For each user, sum the ages of the adjacent users, # plus 1 for the src's sum if the edge is "friend". - sendToSrc = ( - AM.dst['age'] + - sqlfunctions.when( - AM.edge['relationship'] == 'friend', - sqlfunctions.lit(1) - ).otherwise(0)) - sendToDst = AM.src['age'] + sendToSrc = AM.dst["age"] + sqlfunctions.when( + AM.edge["relationship"] == "friend", sqlfunctions.lit(1) + ).otherwise(0) + sendToDst = AM.src["age"] agg = g.aggregateMessages( - sqlfunctions.sum(AM.msg).alias('summedAges'), - sendToSrc=sendToSrc, - sendToDst=sendToDst) + sqlfunctions.sum(AM.msg).alias("summedAges"), sendToSrc=sendToSrc, sendToDst=sendToDst + ) # Run the aggregation again providing SQL expressions as String instead. agg2 = g.aggregateMessages( "sum(MSG) AS `summedAges`", - sendToSrc="(dst['age'] + CASE WHEN (edge['relationship'] = 'friend') THEN 1 ELSE 0 END)", - sendToDst="src['age']") + sendToSrc="(dst['age'] + CASE WHEN (edge['relationship'] = 'friend') THEN 1 ELSE 0 END)", # noqa: E501 + sendToDst="src['age']", + ) # Convert agg and agg2 to a mapping from id to the aggregated message. - aggMap = {id_: s for id_, s in agg.select('id', 'summedAges').collect()} - agg2Map = {id_: s for id_, s in agg2.select('id', 'summedAges').collect()} + aggMap = {id_: s for id_, s in agg.select("id", "summedAges").collect()} + agg2Map = {id_: s for id_, s in agg2.select("id", "summedAges").collect()} # Compute the truth via brute force. - user2age = {id_: age for id_, age in g.vertices.select('id', 'age').collect()} + user2age = {id_: age for id_, age in g.vertices.select("id", "age").collect()} trueAgg = {} for src, dst, rel in g.edges.select("src", "dst", "relationship").collect(): - trueAgg[src] = trueAgg.get(src, 0) + user2age[dst] + (1 if rel == 'friend' else 0) + trueAgg[src] = trueAgg.get(src, 0) + user2age[dst] + (1 if rel == "friend" else 0) trueAgg[dst] = trueAgg.get(dst, 0) + user2age[src] # Compare if the agg mappings match the brute force mapping self.assertEqual(aggMap, trueAgg) @@ -312,22 +324,19 @@ def test_aggregate_messages(self): # Check that TypeError is raises with messages of wrong type with self.assertRaises(TypeError): g.aggregateMessages( - "sum(MSG) AS `summedAges`", - sendToSrc=object(), - sendToDst="src['age']") + "sum(MSG) AS `summedAges`", sendToSrc=object(), sendToDst="src['age']" + ) with self.assertRaises(TypeError): g.aggregateMessages( - "sum(MSG) AS `summedAges`", - sendToSrc=dst['age'], - sendToDst=object()) + "sum(MSG) AS `summedAges`", sendToSrc=dst["age"], sendToDst=object() + ) def test_connected_components(self): - v = self.spark.createDataFrame([ - (0, "a", "b")], ["id", "vattr", "gender"]) + v = self.spark.createDataFrame([(0, "a", "b")], ["id", "vattr", "gender"]) e = self.spark.createDataFrame([(0, 0, 1)], ["src", "dst", "test"]).filter("src > 10") g = GraphFrame(v, e) comps = g.connectedComponents() - self._df_hasCols(comps, vcols=['id', 'component', 'vattr', 'gender']) + self._df_hasCols(comps, vcols=["id", "component", "vattr", "gender"]) self.assertEqual(comps.count(), 1) def test_connected_components2(self): @@ -335,7 +344,7 @@ def test_connected_components2(self): e = self.spark.createDataFrame([(0, 1, "a01", "b01")], ["src", "dst", "A", "B"]) g = GraphFrame(v, e) comps = g.connectedComponents() - self._df_hasCols(comps, vcols=['id', 'component', 'A', 'B']) + self._df_hasCols(comps, vcols=["id", "component", "A", "B"]) self.assertEqual(comps.count(), 2) def test_connected_components_friends(self): @@ -367,7 +376,7 @@ def test_page_rank(self): resetProb = 0.15 errorTol = 1.0e-5 pr = g.pageRank(resetProb, tol=errorTol) - self._hasCols(pr, vcols=['id', 'pagerank'], ecols=['src', 'dst', 'weight']) + self._hasCols(pr, vcols=["id", "pagerank"], ecols=["src", "dst", "weight"]) def test_parallel_personalized_page_rank(self): n = 100 @@ -376,7 +385,7 @@ def test_parallel_personalized_page_rank(self): maxIter = 15 sourceIds = [1, 2, 3, 4] pr = g.parallelPersonalizedPageRank(resetProb, sourceIds=sourceIds, maxIter=maxIter) - self._hasCols(pr, vcols=['id', 'pageranks'], ecols=['src', 'dst', 'weight']) + self._hasCols(pr, vcols=["id", "pageranks"], ecols=["src", "dst", "weight"]) def test_shortest_paths(self): edges = [(1, 2), (1, 5), (2, 3), (2, 5), (3, 4), (4, 5), (4, 6)] @@ -391,7 +400,7 @@ def test_shortest_paths(self): def test_svd_plus_plus(self): g = self._graph("ALSSyntheticData") (v2, cost) = g.svdPlusPlus() - self._df_hasCols(v2, vcols=['id', 'column1', 'column2', 'column3', 'column4']) + self._df_hasCols(v2, vcols=["id", "column1", "column2", "column3", "column4"]) def test_strongly_connected_components(self): # Simple island test @@ -408,25 +417,26 @@ def test_triangle_counts(self): g = GraphFrame(vertices, edges) c = g.triangleCount() for row in c.select("id", "count").collect(): - self.assertEqual(row.asDict()['count'], 1) - + self.assertEqual(row.asDict()["count"], 1) + def test_mutithreaded_sparksession_usage(self): # Test that we can use the GraphFrame API from multiple threads localVertices = [(1, "A"), (2, "B"), (3, "C")] localEdges = [(1, 2, "love"), (2, 1, "hate"), (2, 3, "follow")] v = self.spark.createDataFrame(localVertices, ["id", "name"]) e = self.spark.createDataFrame(localEdges, ["src", "dst", "action"]) - - + exc = None + def run_graphframe() -> None: try: GraphFrame(v, e) except Exception as _e: nonlocal exc exc = _e - + import threading + thread = threading.Thread(target=run_graphframe) thread.start() thread.join() @@ -445,11 +455,12 @@ def test_belief_propagation(self): numIter = 5 results = BeliefPropagation.runBPwithGraphFrames(g, numIter) # check beliefs are valid - for row in results.vertices.select('belief').collect(): - belief = row['belief'] + for row in results.vertices.select("belief").collect(): + belief = row["belief"] self.assertTrue( 0 <= belief <= 1, - msg="Expected belief to be probability in [0,1], but found {}".format(belief)) + msg="Expected belief to be probability in [0,1], but found {}".format(belief), + ) def test_graph_friends(self): # construct graph @@ -462,7 +473,7 @@ def test_graph_grid_ising_model(self): n = 3 g = Graphs(self.spark).gridIsingModel(n) # check that all the vertices exist - ids = [v['id'] for v in g.vertices.collect()] + ids = [v["id"] for v in g.vertices.collect()] for i in range(n): for j in range(n): - self.assertIn('{},{}'.format(i, j), ids) + self.assertIn("{},{}".format(i, j), ids) diff --git a/python/graphframes/tutorials/motif.py b/python/graphframes/tutorials/motif.py index 59691946a..5fae8b87b 100644 --- a/python/graphframes/tutorials/motif.py +++ b/python/graphframes/tutorials/motif.py @@ -1,9 +1,11 @@ -"""Demonstrate GraphFrames network motif finding capabilities. Code from the Network Motif Finding Tutorial.""" +"""Demonstrate GraphFrames network motif finding capabilities. Code from the Network Motif Finding Tutorial.""" # noqa: E501 # # Interactive Usage: pyspark --packages graphframes:graphframes:0.8.4-spark3.5-s_2.12 # -# Batch Usage: spark-submit --packages graphframes:graphframes:0.8.4-spark3.5-s_2.12 python/graphframes/tutorials/motif.py +# Batch Usage: +# spark-submit \ +# --packages graphframes:graphframes:0.8.4-spark3.5-s_2.12 python/graphframes/tutorials/motif.py # import click @@ -22,7 +24,7 @@ # -# Load the nodes and edges from disk, repartition, checkpoint [plan got long for some reason] and cache. +# Load the nodes and edges from disk, repartition, checkpoint [plan got long for some reason] and cache. # noqa: E501 # # We created these in stackexchange.py from Stack Exchange data dump XML files diff --git a/python/graphframes/tutorials/stackexchange.py b/python/graphframes/tutorials/stackexchange.py index 5dab1eafe..32507455f 100644 --- a/python/graphframes/tutorials/stackexchange.py +++ b/python/graphframes/tutorials/stackexchange.py @@ -3,7 +3,9 @@ # # Interactive Usage: pyspark --packages com.databricks:spark-xml_2.12:0.18.0 # -# Batch Usage: spark-submit --packages com.databricks:spark-xml_2.12:0.18.0 python/graphframes/tutorials/stackexchange.py +# Batch Usage: +# spark-submit \ +# --packages com.databricks:spark-xml_2.12:0.18.0 python/graphframes/tutorials/stackexchange.py # from __future__ import annotations @@ -362,7 +364,8 @@ def add_missing_columns(df: DataFrame, all_cols: list[tuple[str, T.StructField]] # * [Post]--Links-->[Post] # # Remember: 'src', 'dst' and 'relationship' are standard edge fields in GraphFrames -# Remember: we must produce src/dst based on lowercase 'id' UUID, not 'Id' which is Stack Overflow's integer. +# Remember: we must produce src/dst based on lowercase 'id' UUID, +# not 'Id' which is Stack Overflow's integer. # # @@ -410,7 +413,7 @@ def add_missing_columns(df: DataFrame, all_cols: list[tuple[str, T.StructField]] ) click.echo(f"Total Asks edges: {user_asks_edges_df.count():,}") click.echo( - f"Percentage of asked questions linked to users: {user_asks_edges_df.count() / questions_df.count():.2%}\n" + f"Percentage of asked questions linked to users: {user_asks_edges_df.count() / questions_df.count():.2%}\n" # noqa: E501 ) @@ -435,7 +438,7 @@ def add_missing_columns(df: DataFrame, all_cols: list[tuple[str, T.StructField]] ) click.echo(f"Total User Answers edges: {user_answers_edges_df.count():,}") click.echo( - f"Percentage of answers linked to users: {user_answers_edges_df.count() / answers_df.count():.2%}\n" + f"Percentage of answers linked to users: {user_answers_edges_df.count() / answers_df.count():.2%}\n" # noqa: E501 ) diff --git a/python/graphframes/tutorials/utils.py b/python/graphframes/tutorials/utils.py index 46db14d96..c57ead15a 100644 --- a/python/graphframes/tutorials/utils.py +++ b/python/graphframes/tutorials/utils.py @@ -18,7 +18,7 @@ def three_edge_count(paths: DataFrame) -> DataFrame: ------- DataFrame A DataFrame of the counts of the different types of 3-node graphlets in the graph. - """ + """ # noqa: E501 graphlet_type_df = paths.select( F.col("a.Type").alias("A_Type"), F.col("e1.relationship").alias("E_relationship"), @@ -113,7 +113,7 @@ def add_type_degree(g: GraphFrame) -> DataFrame: ------- DataFrame - I am broke, next line is wrong A GraphFrame with a map[type:degree] 'type_degree' field added to the vertices - """ + """ # noqa: E501 type_degree: DataFrame = ( g.edges.select(F.col("src").alias("id"), "relationship") .filter(F.col("id").isNotNull()) diff --git a/python/poetry.lock b/python/poetry.lock index fa319d849..cca2c1efd 100644 --- a/python/poetry.lock +++ b/python/poetry.lock @@ -1,35 +1,34 @@ -# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "black" -version = "25.1.0" +version = "23.12.1" description = "The uncompromising code formatter." optional = false -python-versions = ">=3.9" -groups = ["dev"] +python-versions = ">=3.8" files = [ - {file = "black-25.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:759e7ec1e050a15f89b770cefbf91ebee8917aac5c20483bc2d80a6c3a04df32"}, - {file = "black-25.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e519ecf93120f34243e6b0054db49c00a35f84f195d5bce7e9f5cfc578fc2da"}, - {file = "black-25.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:055e59b198df7ac0b7efca5ad7ff2516bca343276c466be72eb04a3bcc1f82d7"}, - {file = "black-25.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:db8ea9917d6f8fc62abd90d944920d95e73c83a5ee3383493e35d271aca872e9"}, - {file = "black-25.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a39337598244de4bae26475f77dda852ea00a93bd4c728e09eacd827ec929df0"}, - {file = "black-25.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96c1c7cd856bba8e20094e36e0f948718dc688dba4a9d78c3adde52b9e6c2299"}, - {file = "black-25.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bce2e264d59c91e52d8000d507eb20a9aca4a778731a08cfff7e5ac4a4bb7096"}, - {file = "black-25.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:172b1dbff09f86ce6f4eb8edf9dede08b1fce58ba194c87d7a4f1a5aa2f5b3c2"}, - {file = "black-25.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b60580e829091e6f9238c848ea6750efed72140b91b048770b64e74fe04908b"}, - {file = "black-25.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e2978f6df243b155ef5fa7e558a43037c3079093ed5d10fd84c43900f2d8ecc"}, - {file = "black-25.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b48735872ec535027d979e8dcb20bf4f70b5ac75a8ea99f127c106a7d7aba9f"}, - {file = "black-25.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea0213189960bda9cf99be5b8c8ce66bb054af5e9e861249cd23471bd7b0b3ba"}, - {file = "black-25.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8f0b18a02996a836cc9c9c78e5babec10930862827b1b724ddfe98ccf2f2fe4f"}, - {file = "black-25.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:afebb7098bfbc70037a053b91ae8437c3857482d3a690fefc03e9ff7aa9a5fd3"}, - {file = "black-25.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:030b9759066a4ee5e5aca28c3c77f9c64789cdd4de8ac1df642c40b708be6171"}, - {file = "black-25.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:a22f402b410566e2d1c950708c77ebf5ebd5d0d88a6a2e87c86d9fb48afa0d18"}, - {file = "black-25.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1ee0a0c330f7b5130ce0caed9936a904793576ef4d2b98c40835d6a65afa6a0"}, - {file = "black-25.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f3df5f1bf91d36002b0a75389ca8663510cf0531cca8aa5c1ef695b46d98655f"}, - {file = "black-25.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9e6827d563a2c820772b32ce8a42828dc6790f095f441beef18f96aa6f8294e"}, - {file = "black-25.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:bacabb307dca5ebaf9c118d2d2f6903da0d62c9faa82bd21a33eecc319559355"}, - {file = "black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717"}, - {file = "black-25.1.0.tar.gz", hash = "sha256:33496d5cd1222ad73391352b4ae8da15253c5de89b93a80b3e2c8d9a19ec2666"}, + {file = "black-23.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0aaf6041986767a5e0ce663c7a2f0e9eaf21e6ff87a5f95cbf3675bfd4c41d2"}, + {file = "black-23.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c88b3711d12905b74206227109272673edce0cb29f27e1385f33b0163c414bba"}, + {file = "black-23.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a920b569dc6b3472513ba6ddea21f440d4b4c699494d2e972a1753cdc25df7b0"}, + {file = "black-23.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:3fa4be75ef2a6b96ea8d92b1587dd8cb3a35c7e3d51f0738ced0781c3aa3a5a3"}, + {file = "black-23.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8d4df77958a622f9b5a4c96edb4b8c0034f8434032ab11077ec6c56ae9f384ba"}, + {file = "black-23.12.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:602cfb1196dc692424c70b6507593a2b29aac0547c1be9a1d1365f0d964c353b"}, + {file = "black-23.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c4352800f14be5b4864016882cdba10755bd50805c95f728011bcb47a4afd59"}, + {file = "black-23.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:0808494f2b2df923ffc5723ed3c7b096bd76341f6213989759287611e9837d50"}, + {file = "black-23.12.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:25e57fd232a6d6ff3f4478a6fd0580838e47c93c83eaf1ccc92d4faf27112c4e"}, + {file = "black-23.12.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2d9e13db441c509a3763a7a3d9a49ccc1b4e974a47be4e08ade2a228876500ec"}, + {file = "black-23.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d1bd9c210f8b109b1762ec9fd36592fdd528485aadb3f5849b2740ef17e674e"}, + {file = "black-23.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:ae76c22bde5cbb6bfd211ec343ded2163bba7883c7bc77f6b756a1049436fbb9"}, + {file = "black-23.12.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1fa88a0f74e50e4487477bc0bb900c6781dbddfdfa32691e780bf854c3b4a47f"}, + {file = "black-23.12.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a4d6a9668e45ad99d2f8ec70d5c8c04ef4f32f648ef39048d010b0689832ec6d"}, + {file = "black-23.12.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b18fb2ae6c4bb63eebe5be6bd869ba2f14fd0259bda7d18a46b764d8fb86298a"}, + {file = "black-23.12.1-cp38-cp38-win_amd64.whl", hash = "sha256:c04b6d9d20e9c13f43eee8ea87d44156b8505ca8a3c878773f68b4e4812a421e"}, + {file = "black-23.12.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3e1b38b3135fd4c025c28c55ddfc236b05af657828a8a6abe5deec419a0b7055"}, + {file = "black-23.12.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4f0031eaa7b921db76decd73636ef3a12c942ed367d8c3841a0739412b260a54"}, + {file = "black-23.12.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97e56155c6b737854e60a9ab1c598ff2533d57e7506d97af5481141671abf3ea"}, + {file = "black-23.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:dd15245c8b68fe2b6bd0f32c1556509d11bb33aec9b5d0866dd8e2ed3dba09c2"}, + {file = "black-23.12.1-py3-none-any.whl", hash = "sha256:78baad24af0f033958cad29731e27363183e140962595def56423e626f4bee3e"}, + {file = "black-23.12.1.tar.gz", hash = "sha256:4ce3ef14ebe8d9509188014d96af1c456a910d5b5cbf434a09fef7e024b3d0d5"}, ] [package.dependencies] @@ -43,7 +42,7 @@ typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} [package.extras] colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.10)"] +d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] @@ -53,8 +52,6 @@ version = "1.1.0" description = "Python bindings for the Brotli compression library" optional = false python-versions = "*" -groups = ["tutorials"] -markers = "platform_python_implementation == \"CPython\"" files = [ {file = "Brotli-1.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e1140c64812cb9b06c922e77f1c26a75ec5e3f0fb2bf92cc8c58720dec276752"}, {file = "Brotli-1.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c8fd5270e906eef71d4a8d19b7c6a43760c6abcfcc10c9101d14eb2357418de9"}, @@ -189,8 +186,6 @@ version = "1.1.0.0" description = "Python CFFI bindings to the Brotli library" optional = false python-versions = ">=3.7" -groups = ["tutorials"] -markers = "platform_python_implementation == \"PyPy\"" files = [ {file = "brotlicffi-1.1.0.0-cp37-abi3-macosx_10_9_x86_64.whl", hash = "sha256:9b7ae6bd1a3f0df532b6d67ff674099a96d22bc0948955cb338488c31bfb8851"}, {file = "brotlicffi-1.1.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19ffc919fa4fc6ace69286e0a23b3789b4219058313cf9b45625016bf7ff996b"}, @@ -230,7 +225,6 @@ version = "2025.1.31" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" -groups = ["tutorials"] files = [ {file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"}, {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"}, @@ -242,8 +236,6 @@ version = "1.17.1" description = "Foreign Function Interface for Python calling C code." optional = false python-versions = ">=3.8" -groups = ["tutorials"] -markers = "platform_python_implementation == \"PyPy\"" files = [ {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"}, {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"}, @@ -323,7 +315,6 @@ version = "3.4.1" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7" -groups = ["tutorials"] files = [ {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"}, {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"}, @@ -425,7 +416,6 @@ version = "8.1.8" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" -groups = ["dev", "tutorials"] files = [ {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, @@ -440,8 +430,6 @@ version = "0.4.6" description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -groups = ["dev", "tutorials"] -markers = "platform_system == \"Windows\"" files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, @@ -453,7 +441,6 @@ version = "7.1.2" description = "the modular source code checker: pep8 pyflakes and co" optional = false python-versions = ">=3.8.1" -groups = ["dev"] files = [ {file = "flake8-7.1.2-py2.py3-none-any.whl", hash = "sha256:1cbc62e65536f65e6d754dfe6f1bada7f5cf392d6f5db3c2b85892466c3e7c1a"}, {file = "flake8-7.1.2.tar.gz", hash = "sha256:c586ffd0b41540951ae41af572e6790dbd49fc12b3aa2541685d253d9bd504bd"}, @@ -470,7 +457,6 @@ version = "3.10" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.6" -groups = ["tutorials"] files = [ {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, @@ -485,7 +471,6 @@ version = "1.0.1" description = "deflate64 compression/decompression library" optional = false python-versions = ">=3.9" -groups = ["tutorials"] files = [ {file = "inflate64-1.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5122a188995e47a735ab969edc9129d42bbd97b993df5a3f0819b87205ce81b4"}, {file = "inflate64-1.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:975ed694c680e46a5c0bb872380a9c9da271a91f9c0646561c58e8f3714347d4"}, @@ -537,14 +522,13 @@ test = ["pytest"] [[package]] name = "isort" -version = "6.0.0" +version = "6.0.1" description = "A Python utility / library to sort Python imports." optional = false python-versions = ">=3.9.0" -groups = ["dev"] files = [ - {file = "isort-6.0.0-py3-none-any.whl", hash = "sha256:567954102bb47bb12e0fae62606570faacddd441e45683968c8d1734fb1af892"}, - {file = "isort-6.0.0.tar.gz", hash = "sha256:75d9d8a1438a9432a7d7b54f2d3b45cad9a4a0fdba43617d9873379704a8bdf1"}, + {file = "isort-6.0.1-py3-none-any.whl", hash = "sha256:2dc5d7f65c9678d94c88dfc29161a320eec67328bc97aad576874cb4be1e9615"}, + {file = "isort-6.0.1.tar.gz", hash = "sha256:1cb5df28dfbc742e490c5e41bad6da41b805b0a8be7bc93cd0fb2a8a890ac450"}, ] [package.extras] @@ -557,7 +541,6 @@ version = "0.7.0" description = "McCabe checker, plugin for flake8" optional = false python-versions = ">=3.6" -groups = ["dev"] files = [ {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, @@ -569,7 +552,6 @@ version = "0.2.3" description = "multi volume file wrapper library" optional = false python-versions = ">=3.6" -groups = ["tutorials"] files = [ {file = "multivolumefile-0.2.3-py3-none-any.whl", hash = "sha256:237f4353b60af1703087cf7725755a1f6fcaeeea48421e1896940cd1c920d678"}, {file = "multivolumefile-0.2.3.tar.gz", hash = "sha256:a0648d0aafbc96e59198d5c17e9acad7eb531abea51035d08ce8060dcad709d6"}, @@ -586,7 +568,6 @@ version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." optional = false python-versions = ">=3.5" -groups = ["dev"] files = [ {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, @@ -598,7 +579,6 @@ version = "1.3.7" description = "nose extends unittest to make testing easier" optional = false python-versions = "*" -groups = ["main"] files = [ {file = "nose-1.3.7-py2-none-any.whl", hash = "sha256:dadcddc0aefbf99eea214e0f1232b94f2fa9bd98fa8353711dacb112bfcbbb2a"}, {file = "nose-1.3.7-py3-none-any.whl", hash = "sha256:9ff7c6cc443f8c51994b34a667bbcf45afd6d945be7477b52e97516fd17c53ac"}, @@ -611,7 +591,6 @@ version = "2.0.2" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.9" -groups = ["main"] files = [ {file = "numpy-2.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:51129a29dbe56f9ca83438b706e2e69a39892b5eda6cedcb6b0c9fdc9b0d3ece"}, {file = "numpy-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f15975dfec0cf2239224d80e32c3170b1d168335eaedee69da84fbe9f1f9cd04"}, @@ -666,7 +645,6 @@ version = "24.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, @@ -678,7 +656,6 @@ version = "0.12.1" description = "Utility library for gitignore style pattern matching of file paths." optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, @@ -690,7 +667,6 @@ version = "4.3.6" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"}, {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"}, @@ -707,8 +683,6 @@ version = "7.0.0" description = "Cross-platform lib for process and system monitoring in Python. NOTE: the syntax of this script MUST be kept compatible with Python 2.7." optional = false python-versions = ">=3.6" -groups = ["tutorials"] -markers = "sys_platform != \"cygwin\"" files = [ {file = "psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25"}, {file = "psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da"}, @@ -732,7 +706,6 @@ version = "0.10.9.7" description = "Enables Python programs to dynamically access arbitrary Java objects" optional = false python-versions = "*" -groups = ["main"] files = [ {file = "py4j-0.10.9.7-py2.py3-none-any.whl", hash = "sha256:85defdfd2b2376eb3abf5ca6474b51ab7e0de341c75a02f46dc9b5976f5a5c1b"}, {file = "py4j-0.10.9.7.tar.gz", hash = "sha256:0b6e5315bb3ada5cf62ac651d107bb2ebc02def3dee9d9548e3baac644ea8dbb"}, @@ -744,7 +717,6 @@ version = "0.22.0" description = "Pure python 7-zip library" optional = false python-versions = ">=3.8" -groups = ["tutorials"] files = [ {file = "py7zr-0.22.0-py3-none-any.whl", hash = "sha256:993b951b313500697d71113da2681386589b7b74f12e48ba13cc12beca79d078"}, {file = "py7zr-0.22.0.tar.gz", hash = "sha256:c6c7aea5913535184003b73938490f9a4d8418598e533f9ca991d3b8e45a139e"}, @@ -775,7 +747,6 @@ version = "1.0.3" description = "bcj filter library" optional = false python-versions = ">=3.9" -groups = ["tutorials"] files = [ {file = "pybcj-1.0.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0bd8afeacf9173af091a08783aa9111500f5619ce0ae486bffb5ee4d08a331b4"}, {file = "pybcj-1.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fc81d3c941485e7d3c2812834ca005849fe91a624977ed5227658cf952d19696"}, @@ -830,7 +801,6 @@ version = "2.12.1" description = "Python style guide checker" optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ {file = "pycodestyle-2.12.1-py2.py3-none-any.whl", hash = "sha256:46f0fb92069a7c28ab7bb558f05bfc0110dac69a0cd23c61ea0040283a9d78b3"}, {file = "pycodestyle-2.12.1.tar.gz", hash = "sha256:6838eae08bbce4f6accd5d5572075c63626a15ee3e6f842df996bf62f6d73521"}, @@ -842,8 +812,6 @@ version = "2.22" description = "C parser in Python" optional = false python-versions = ">=3.8" -groups = ["tutorials"] -markers = "platform_python_implementation == \"PyPy\"" files = [ {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, @@ -855,7 +823,6 @@ version = "3.21.0" description = "Cryptographic library for Python" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" -groups = ["tutorials"] files = [ {file = "pycryptodomex-3.21.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:dbeb84a399373df84a69e0919c1d733b89e049752426041deeb30d68e9867822"}, {file = "pycryptodomex-3.21.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:a192fb46c95489beba9c3f002ed7d93979423d1b2a53eab8771dbb1339eb3ddd"}, @@ -897,7 +864,6 @@ version = "3.2.0" description = "passive checker of Python programs" optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ {file = "pyflakes-3.2.0-py2.py3-none-any.whl", hash = "sha256:84b5be138a2dfbb40689ca07e2152deb896a65c3a3e24c251c5c62489568074a"}, {file = "pyflakes-3.2.0.tar.gz", hash = "sha256:1c61603ff154621fb2a9172037d84dca3500def8c8b630657d1701f026f8af3f"}, @@ -909,7 +875,6 @@ version = "1.1.1" description = "PPMd compression/decompression library" optional = false python-versions = ">=3.9" -groups = ["tutorials"] files = [ {file = "pyppmd-1.1.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:406b184132c69e3f60ea9621b69eaa0c5494e83f82c307b3acce7b86a4f8f888"}, {file = "pyppmd-1.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c2cf003bb184adf306e1ac1828107307927737dde63474715ba16462e266cbef"}, @@ -976,13 +941,12 @@ test = ["coverage[toml] (>=5.2)", "hypothesis", "pytest (>=6.0)", "pytest-benchm [[package]] name = "pyspark" -version = "3.5.4" +version = "3.5.5" description = "Apache Spark Python API" optional = false python-versions = ">=3.8" -groups = ["main"] files = [ - {file = "pyspark-3.5.4.tar.gz", hash = "sha256:1c2926d63020902163f58222466adf6f8016f6c43c1f319b8e7a71dbaa05fc51"}, + {file = "pyspark-3.5.5.tar.gz", hash = "sha256:6effc9ce98edf231f4d683fd14f7270629bf8458c628d6a2620ded4bb34f3cb9"}, ] [package.dependencies] @@ -1001,7 +965,6 @@ version = "0.16.2" description = "Python bindings to Zstandard (zstd) compression library." optional = false python-versions = ">=3.5" -groups = ["tutorials"] files = [ {file = "pyzstd-0.16.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:637376c8f8cbd0afe1cab613f8c75fd502bd1016bf79d10760a2d5a00905fe62"}, {file = "pyzstd-0.16.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3e7a7118cbcfa90ca2ddbf9890c7cb582052a9a8cf2b7e2c1bbaf544bee0f16a"}, @@ -1094,7 +1057,6 @@ version = "2.32.3" description = "Python HTTP for Humans." optional = false python-versions = ">=3.8" -groups = ["tutorials"] files = [ {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, @@ -1116,7 +1078,6 @@ version = "1.7.0" description = "module to create simple ASCII tables" optional = false python-versions = "*" -groups = ["tutorials"] files = [ {file = "texttable-1.7.0-py2.py3-none-any.whl", hash = "sha256:72227d592c82b3d7f672731ae73e4d1f88cd8e2ef5b075a7a7f01a23a3743917"}, {file = "texttable-1.7.0.tar.gz", hash = "sha256:2d2068fb55115807d3ac77a4ca68fa48803e84ebb0ee2340f858107a36522638"}, @@ -1128,8 +1089,6 @@ version = "2.2.1" description = "A lil' TOML parser" optional = false python-versions = ">=3.8" -groups = ["dev"] -markers = "python_version < \"3.11\"" files = [ {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, @@ -1171,8 +1130,6 @@ version = "4.12.2" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" -groups = ["dev"] -markers = "python_version < \"3.11\"" files = [ {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, @@ -1184,19 +1141,18 @@ version = "2.3.0" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" -groups = ["tutorials"] files = [ {file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"}, {file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"}, ] [package.extras] -brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] [metadata] -lock-version = "2.1" +lock-version = "2.0" python-versions = ">=3.9 <3.13" -content-hash = "33ae7f96a3999d6822af7778f9b7878355d811534a4b5fec14d51ec29aa8dce2" +content-hash = "8ed74b87abe8e7d5dcb10549a8d1ce35cce4d2db642f902d8021c08cf4b17345" diff --git a/python/pyproject.toml b/python/pyproject.toml index 819d2bbdd..8cb109bfe 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -30,7 +30,7 @@ pyspark = "^3.4" numpy = ">= 1.7" [tool.poetry.group.dev.dependencies] -black = "^25.1.0" +black = "^23.12.1" flake8 = "^7.1.1" isort = "^6.0.0" @@ -50,9 +50,7 @@ build-backend = "poetry.core.masonry.api" line-length = 100 target-version = ["py39"] include = ["graphframes"] - -[tool.flake8] -max-line-length = 100 +required-version = "23.12.1" [tool.isort] profile = "black" diff --git a/python/tox.ini b/python/tox.ini new file mode 100644 index 000000000..ab0a6a1f2 --- /dev/null +++ b/python/tox.ini @@ -0,0 +1,8 @@ +[flake8] +ignore = + E203, + E402, + F811, + W503, + W504, +max-line-length = 100