8000 feat: refactoring of python && docs && sync pyspark with core by SemyonSinchenko · Pull Request #711 · graphframes/graphframes · GitHub
[go: up one dir, main page]

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,6 @@ connect/project
# Auto-generated doc
/docs/src/02-quick-start/01-installation.md
/docs/src/05-blog/feed.xml

# Local spark distro
spark-*
1 change: 1 addition & 0 deletions .scalafix.conf
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ rules = [
OrganizeImports
ExplicitResultTypes
]
OrganizeImports.targetDialect = Scala3
4 changes: 2 additions & 2 deletions .scalafmt.conf
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ optIn = {
danglingParentheses.preset = false
docstrings.style = Asterisk
maxColumn = 98
runner.dialect = scala213
version = 3.8.5
runner.dialect = Scala213Source3
version = 3.8.5
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import org.apache.spark.sql.types.StructType
import org.apache.spark.storage.StorageLevel
import org.graphframes.GraphFrame
import org.graphframes.examples.LDBCUtils
import org.openjdk.jmh.annotations._
import org.openjdk.jmh.annotations.*
import org.openjdk.jmh.infra.Blackhole

import java.io.File
Expand Down
2 changes: 1 addition & 1 deletion buf.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
version: v2
modules:
- path: graphframes-connect/src/main/protobuf
- path: connect/src/main/protobuf
87 changes: 68 additions & 19 deletions connect/src/main/protobuf/graphframes.proto
Original file line number Diff line number Diff line change
Expand Up @@ -5,40 +5,63 @@ package org.graphframes.connect.proto;
option java_multiple_files = true;
option java_package = "org.graphframes.connect.proto";
option java_generate_equals_and_hash = true;
option optimize_for=SPEED;
option optimize_for = SPEED;


// GraphFramesAPI represents the core message type for GraphFrames operations
// containing graph data and the specific graph algorithm to be executed
message GraphFramesAPI {
// Serialized vertex DataFrame containing node information
bytes vertices = 1;
// Serialized edge DataFrame containing relationship information
bytes edges = 2;
// Specifies which graph algorithm operation to perform
oneof method {
AggregateMessages aggregate_messages = 3;
BFS bfs = 4;
ConnectedComponents connected_components = 5;
DropIsolatedVertices drop_isolated_vertices = 6;
FilterEdges filter_edges = 7;
FilterVertices filter_vertices = 8;
Find find = 9;
LabelPropagation label_propagation = 10;
PageRank page_rank = 11;
ParallelPersonalizedPageRank parallel_personalized_page_rank = 12;
PowerIterationClustering power_iteration_clustering = 13;
Pregel pregel = 14;
ShortestPaths shortest_paths = 15;
StronglyConnectedComponents strongly_connected_components = 16;
SVDPlusPlus svd_plus_plus = 17;
TriangleCount triangle_count = 18;
Triplets triplets = 19;
DetectingCycles detecting_cycles = 7;
FilterEdges filter_edges = 8;
FilterVertices filter_vertices = 9;
Find find = 10;
LabelPropagation label_propagation = 11;
PageRank page_rank = 12;
ParallelPersonalizedPageRank parallel_personalized_page_rank = 13;
PowerIterationClustering power_iteration_clustering = 14;
Pregel pregel = 15;
ShortestPaths shortest_paths = 16;
StronglyConnectedComponents strongly_connected_components = 17;
SVDPlusPlus svd_plus_plus = 18;
TriangleCount triangle_count = 19;
Triplets triplets = 20;
}
}

// Mapping follows PySpark Storage Levels!
// (not Scala-Spark Storage Levels)
message StorageLevel {
oneof storage_level {
bool disk_only = 1;
bool disk_only_2 = 2;
bool disk_only_3 = 3;
bool memory_and_disk = 4;
bool memory_and_disk_2 = 5;
bool memory_and_disk_deser = 6;
bool memory_only = 7;
bool memory_only_2 = 8;
}
}

// String expression or serialized column
message ColumnOrExpression {
oneof col_or_expr {
bytes col = 1;
string expr = 2;
}
}

// Connect supports only string or long-like IDs
message StringOrLongID {
oneof id {
int64 long_id = 1;
Expand All @@ -47,9 +70,10 @@ message StringOrLongID {
}

message AggregateMessages {
ColumnOrExpression agg_col = 1;
optional ColumnOrExpression send_to_src = 2;
optional ColumnOrExpression send_to_dst = 3;
repeated ColumnOrExpression agg_col = 1;
repeated ColumnOrExpression send_to_src = 2;
repeated ColumnOrExpression send_to_dst = 3;
optional StorageLevel storage_level = 4;
}

message BFS {
Expand All @@ -64,6 +88,15 @@ message ConnectedComponents {
int32 checkpoint_interval = 2;
int32 broadcast_threshold = 3;
bool use_labels_as_components = 4;
bool use_local_checkpoints = 5;
int32 max_iter = 6;
optional StorageLevel storage_level = 7;
}

message DetectingCycles {
bool use_local_checkpoints = 1;
int32 checkpoint_interval = 2;
optional StorageLevel CBC4 storage_level = 3;
}

message DropIsolatedVertices {}
Expand All @@ -81,7 +114,11 @@ message Find {
}

message LabelPropagation {
int32 max_iter = 1;
string algorithm = 1;
int32 max_iter = 2;
bool use_local_checkpoints = 3;
int32 checkpoint_interval = 4;
optional StorageLevel storage_level = 5;
}

message PageRank {
Expand Down Expand Up @@ -113,10 +150,20 @@ message Pregel {
ColumnOrExpression additional_col_initial = 7;
ColumnOrExpression additional_col_upd = 8;
optional bool early_stopping = 9;
bool use_local_checkpoints = 10;
optional StorageLevel storage_level = 11;
optional bool stop_if_all_non_active = 12;
optional ColumnOrExpression initial_active_expr = 13;
optional ColumnOrExpression update_active_expr = 14;
optional bool skip_messages_from_non_active = 15;
}

message ShortestPaths {
repeated StringOrLongID landmarks = 1;
string algorithm = 2;
bool use_local_checkpoints = 3;
int32 checkpoint_interval = 4;
optional StorageLevel storage_level = 5;
}

message StronglyConnectedComponents {
Expand All @@ -134,6 +181,8 @@ message SVDPlusPlus {
double gamma7 = 8;
}

message TriangleCount {}
message TriangleCount {
optional StorageLevel storage_level = 1;
}

message Triplets {}
Loading
0