From 668f32a01cb01187383ca14cbaf930ba281f95e0 Mon Sep 17 00:00:00 2001 From: Ji Chen Date: Tue, 11 Jul 2023 16:03:39 -0400 Subject: [PATCH] embed commit url update openai-flows --- .gitignore | 5 + Cargo.toml | 22 ++++ README-zh.md | 130 +++++++++++++++++++++ README.md | 118 +++++++++++++++++++ src/github-pr-summary.rs | 246 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 521 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 README-zh.md create mode 100644 README.md create mode 100644 src/github-pr-summary.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..aa07357 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +/target +.env +Cargo.lock +diff.json +details.md \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..e290b83 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "github-pr-summary" +version = "0.1.0" +edition = "2021" + +[lib] +path = "src/github-pr-summary.rs" +crate-type = ["cdylib"] + +[dependencies] +dotenv = "0.15.0" +github-flows = "0.5" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0.93" +tokio_wasi = { version = "1.25.1", features = ["macros", "rt"] } +anyhow = "1" +flowsnet-platform-sdk = "0.1" +lazy_static = "1.4.0" +regex = "1.7.1" +openai-flows = "0.8.2" +words-count = "0.1.4" +log = "0.4" diff --git a/README-zh.md b/README-zh.md new file mode 100644 index 0000000..70b1635 --- /dev/null +++ b/README-zh.md @@ -0,0 +1,130 @@ +
+ +[English](README.md) + +
+ +#

ChatGPT/4 加成的 Github PR 代码检查机器人

+ +

+ + flows.network Discord + + + flows.network Twitter + + + Create a flow + +

+ +[部署此函数到 flows.network](#deploy-your-own-code-review-bot-in-3-simple-steps),你将获得一个 GitHub 🤖 来检查代码和总结拉取请求。它可以帮助忙碌的开源贡献者更快地理解并对 PR 采取行动!下面是一些示例! + +* [[Rust] 支持 WasmEdge Rust SDK 中的主机函数](https://github.com/WasmEdge/WasmEdge/pull/2394#issuecomment-1497819842) +* [[bash] 支持 WasmEdge 安装程序中的 ARM 体系结构](https://github.com/WasmEdge/WasmEdge/pull/1084#issuecomment-1497830324) +* [[C++] 为 WasmEdge 添加 eBPF 插件](https://github.com/WasmEdge/WasmEdge/pull/2314#issuecomment-1497861516) +* [[Haskell] 优化 WasmEdge Component Model 工具的 CLI 实用程序](https://github.com/second-state/witc/pull/73#issuecomment-1507539260) + +> 还没被惊艳到吗?[请见此处 bot 指出的“潜在问题1”](https://github.com/second-state/wasmedge-quickjs/pull/82#issuecomment-1498299630),它识别出了 Rust 算法的低效实现。🤯 + +这个机器人会**总结 PR 中提交的信息**。或者,可以使用[这个机器人](https://github.com/flows-network/github-pr-review)来检查PR中更改的文件。 + + +## 如何工作 + +当在指定的 GitHub repo 中创建新的 PR 时,此 flow 函数(或🤖)将被触发。 flow 函数会收集 PR 中的内容,并请求 ChatGPT/4 进行检查和总结。结果会作为评论发布回 PR。flow 函数是用Rust编写的,并在[WasmEdge 运行时](https://github.com/wasmedge)上在托管的[flows.network](https://flows.network/)中运行。 + +* 每次将新的提交推送到此 PR 时,都会自动更新代码检查评论。 +* 当有人在 PR 的评论中说出一个魔法*触发词*时,可以触发新的代码检查。默认的触发词是"flows summarize"。 + +## 简单3步部署自己的代码检查机器人 + +1. 从模板创建一个机器人 +2. 添加你的 OpenAI API密钥 +3. 配置机器人以检查指定 GitHub repo 上的PR + + +### 0 前期准备 + +需要使用自己的 [OpenAI API 密钥](https://openai.com/blog/openai-api)。如果还没有注册,请[在此处注册](https://platform.openai.com/signup)。 + +还需要使用 GitHub 帐户登录 [flows.network](https://flows.network/)。这是免费的。 + +### 1 从模板创建机器人 + +### [单击此处](https://flows.network/flow/createByTemplate/Summarize-Pull-Request) + +请检查 `trigger_phrase` 变量。这是你在 PR 评论中手动召唤检查机器人的魔法词。 + +单击 **Create and Build** 按钮。 + +### 2 添加你的 OpenAI API 密钥 + +现在你将设置 OpenAI integration。单击**连接**,输入您的密钥并为其命名。 + +[image](https://user-images.githubusercontent.com/45785633/222973214-ecd052dc-72c2-4711-90ec-db1ec9d5f24e.png) + +完成后关闭选项卡并返回 flow.network 页面。 点击**继续**。 + +>让我们看一个示例。你想要部署机器人从而总结 `WasmEdge/wasmedge_hyper_demo` repo 中的 PR。这里 `github_owner = WasmEdge`,`github_repo = wasmedge_hyper_demo`。 + +单击“创建和部署(Create and deploy)”按钮。 + +## 3 配置机器人以访问 GitHub + +接下来,你需要告诉机器人它需要监控哪个 GitHub repo 以查看即将到来的 PR 进行检查。 + +* `github_owner`: *你想要为 repo 部署 🤖 的* GitHub org +* `github_repo` : *你想部署 🤖 的* GitHub repo + +> 让我们看一个例子。您想要部署机器人来检查`WasmEdge/wasmedge_hyper_demo` repo 中的PR代码。这里 `github_owner = WasmEdge` 且 `github_repo = wasmedge_hyper_demo`。 + +点击 **Connect** 或 **+ Add new authentication** 按钮,以使函数可以访问 GitHub repo 并部署🤖️。你将被重定向到一个新页面,在此页面须授予 [flows.network](https://flows.network/) 对该 repo 的权限。 + +[image](https://github.com/flows-network/github-pr-summary/assets/45785633/6cefff19-9eeb-4533-a20b-03c6a9c89473) + +完成后请关闭标签页并返回 flow.network 页面。点击 **Deploy**. + +这就好了!你现在处在 flow 详细信息页面,正在等待 flow 函数构建。一旦 flow +状态变为 `运行中`,机器人就准备好进行代码检查了!每个新PR、每个新提交以及PR评论中的魔法词(即`trigger_phrase`),都会召唤机器人。 + +[image](https://user-images.githubusercontent.com/45785633/229329247-16273aec-f89b-4375-bf2b-4ffce5e35a33.png) + + + +## 常见问题解答 + +### 自定义机器人 + +机器人的源代码可在你从模板克隆的 GitHub repo 中找到。请根据自己的需求任意更改源代码(例如,模型、上下文长度、API 密钥和提示)。如果需要帮助,请在 [Discord 中询问](https://discord.gg/ccZn9ZMfFf)! + + +### 使用 GPT4 + +默认情况下,该机器人使用 GPT3.5 进行代码审核。如果你的 OpenAI API 密钥可以访问 GPT4,则可以在克隆的源代码 repo 中打开 `src/github-pr-review.rs` 文件,并在源代码中将 `GPT35Turbo` 更改为 `GPT4`。将更改提交并推送回 GitHub。 +flows.network 平台将自动检测并从你的更新的源代码重建机器人。 + + +### 在多个 repo 上使用机器人 + +你可以手动[创建一个新的 flow](https://flows.network/flow/new),并导入机器人的源代码 repo(即你从模板克隆的 repo)。然后,可以使用 flow config 来指定 `github_owner` 和 `github_repo`,以指向你需要在其上部署机器人的目标 repo。部署并授权访问该目标 repo。 + +可以把这个机器人安装在你想要部署此机器人的所有目标 repo 上。 + +>你可以将单个 flow 函数 repo 部署为多个机器人的源代码。当更新 repo 中的源代码并将其推送到 GitHub 时,会更改所有机器人的行为。 + + +### 定制自己的魔法词 + +进入机器人正在运行的 flow 函数的 "Settings" 选项卡,你可以更新 `trigger_phrase` 配置。该配置的值是让用户触发机器人的魔法词,可以从 PR 评论触发检查。 + + +## 鸣谢 + +此 flow 函数最初由 [Jay Chen](https://github.com/jaykchen) 创建,[jinser](https://github.com/jetjinser) 为优化来自 GitHub 的事件触发器做出了重大贡献。 + + + +

+GPT Nitro for Github PR - A ChatGPT-based reviewer for your GitHub pull requests | Product Hunt +

diff --git a/README.md b/README.md new file mode 100644 index 0000000..d897c21 --- /dev/null +++ b/README.md @@ -0,0 +1,118 @@ +
+ + [中文文档](README-zh.md) + +
+ +#

ChatGPT/4 code reviewer for Github PR

+ +

+ + flows.network Discord + + + flows.network Twitter + + + Create a flow + +

+ +[Deploy this function on flows.network](#deploy-your-own-code-review-bot-in-3-simple-steps), and you will get a GitHub 🤖 to review and summarize Pull Requests. It helps busy open source contributors understand and make decisions on PRs faster! A few examples below! + +* [[Rust] Improve support for host functions in the WasmEdge Rust SDK](https://github.com/WasmEdge/WasmEdge/pull/2394#issuecomment-1497819842) +* [[bash] Support ARM architecture in the WasmEdge installer](https://github.com/WasmEdge/WasmEdge/pull/1084#issuecomment-1497830324) +* [[C++] Add an eBPF plugin for WasmEdge](https://github.com/WasmEdge/WasmEdge/pull/2314#issuecomment-1497861516) +* [[Haskell] Improve the CLI utility for WasmEdge Component Model tooling](https://github.com/second-state/witc/pull/73#issuecomment-1507539260) + +> Still not convinced? [See "potential problems 1" in this review](https://github.com/second-state/wasmedge-quickjs/pull/82#issuecomment-1498299630), it identified an inefficient Rust implementation of an algorithm. 🤯 + +This bot **summarizes commits in the PR**. Alternatively, you can use [this bot](https://github.com/flows-network/github-pr-review) to review changed files in the PR. + +## How it works + +This flow function (or 🤖) will be triggered when a new PR is raised in the designated GitHub repo. The flow function collects the content in the PR, and asks ChatGPT/4 to review and summarize it. The result is then posted back to the PR as a comment. The flow functions are written in Rust and run in hosted [WasmEdge Runtimes](https://github.com/wasmedge) on [flows.network](https://flows.network/). + +* The code review comment is updated automatically every time a new commit is pushed to this PR. +* A new code review could be triggered when someone says a magic *trigger phrase* in the PR's comments section. The default trigger phrase is "flows summarize". + +## Deploy your own code review bot in 3 simple steps + +1. Create a bot from a template +2. Add your OpenAI API key +3. Configure the bot to review PRs on a specified GitHub repo + +### 0 Prerequisites + +You will need to bring your own [OpenAI API key](https://openai.com/blog/openai-api). If you do not already have one, [sign up here](https://platform.openai.com/signup). + +You will also need to sign into [flows.network](https://flows.network/) from your GitHub account. It is free. + +### 1 Create a bot from a template + +[**Just click here**](https://flows.network/flow/createByTemplate/Summarize-Pull-Request) + +Review the `trigger_phrase` variable. It is the magic words you type in a PR comment to manually summon the review bot. + +Click on the **Create and Build** button. + +### 2 Add your OpenAI API key + +You will now set up OpenAI integration. Click on **Connect**, enter your key and give it a name. + +[image](https://user-images.githubusercontent.com/45785633/222973214-ecd052dc-72c2-4711-90ec-db1ec9d5f24e.png) + +Close the tab and go back to the flow.network page once you are done. Click on **Continue**. + +### 3 Configure the bot to access GitHub + +Next, you will tell the bot which GitHub repo it needs to monitor for upcoming PRs to review. + +* `github_owner`: GitHub org for the repo *you want to deploy the 🤖 on*. +* `github_repo` : GitHub repo *you want to deploy the 🤖 on*. + +> Let's see an example. You would like to deploy the bot to review code in PRs on `WasmEdge/wasmedge_hyper_demo` repo. Here `github_owner = WasmEdge` and `github_repo = wasmedge_hyper_demo`. + +Click on the **Connect** or **+ Add new authentication** button to give the function access to the GitHub repo to deploy the 🤖. You'll be redirected to a new page where you must grant [flows.network](https://flows.network/) permission to the repo. + +[image](https://github.com/flows-network/github-pr-summary/assets/45785633/6cefff19-9eeb-4533-a20b-03c6a9c89473) + +Close the tab and go back to the flow.network page once you are done. Click on **Deploy**. + +### Wait for the magic! + +This is it! You are now on the flow details page waiting for the flow function to build. As soon as the flow's status became `running`, the bot is ready to give code reviews! The bot is summoned by every new PR, every new commit, as well as magic words (i.e., `trigger_phrase`) in PR comments. + +[image](https://user-images.githubusercontent.com/45785633/229329247-16273aec-f89b-4375-bf2b-4ffce5e35a33.png) + +## FAQ + +### Customize the bot + +The bot's source code is available in the GitHub repo you cloned from the template. Feel free to make changes to the source code (e.g., model, context length, API key and prompts) to fit your own needs. If you need help, [ask in Discord](https://discord.gg/ccZn9ZMfFf)! + +### Use GPT4 + +By default, the bot uses GPT3.5 for code review. If your OpenAI API key has access to GPT4, you can open the `src/github-pr-review.rs` file +in your cloned source code repo, and change `GPT35Turbo` to `GPT4` in the source code. Commit and push the change back to GitHub. +The flows.network platform will automatically detect and rebuild the bot from your updated source code. + +### Use the bot on multiple repos + +You can [manually create a new flow](https://flows.network/flow/new) and import the source code repo for the bot (i.e., the repo you cloned from the template). Then, you can use the flow config to specify the `github_owner` and `github_repo` to point to the target repo you need to deploy the bot on. Deploy and authorize access to that target repo. + +You can repeat this for all target repos you would like to deploy this bot on. + +> You could have a single flow function repo deployed as the source code for multiple bots. When you update the source code in the repo, and push it to GitHub, it will change the behavior of all the bots. + +### Change the magic phrase + +Go to the "Settings" tab of the running flow function for the bot, you can update the `trigger_phrase` config. The value of this config is the magic phrase the user will say to trigger a review from a PR comment. + +## Credits + +This flow function is originally created by [Jay Chen](https://github.com/jaykchen), and [jinser](https://github.com/jetjinser) made significant contributions to optimize the event triggers from GitHub. + +

+GPT Nitro for Github PR - A ChatGPT-based reviewer for your GitHub pull requests | Product Hunt +

diff --git a/src/github-pr-summary.rs b/src/github-pr-summary.rs new file mode 100644 index 0000000..ee10e8f --- /dev/null +++ b/src/github-pr-summary.rs @@ -0,0 +1,246 @@ +use dotenv::dotenv; +use flowsnet_platform_sdk::logger; +use github_flows::{ + get_octo, listen_to_event, + octocrab::models::events::payload::{IssueCommentEventAction, PullRequestEventAction}, + octocrab::models::CommentId, + EventPayload, GithubLogin, +}; +use openai_flows::{ + chat::{ChatModel, ChatOptions}, + OpenAIFlows, +}; +use std::env; + +// The soft character limit of the input context size +// the max token size or word count for GPT4 is 8192 +// the max token size or word count for GPT35Turbo16K is 16384 +static CHAR_SOFT_LIMIT: usize = 30000; +static MODEL: ChatModel = ChatModel::GPT35Turbo16K; +// static MODEL : ChatModel = ChatModel::GPT4; + +#[no_mangle] +#[tokio::main(flavor = "current_thread")] +pub async fn run() -> anyhow::Result<()> { + dotenv().ok(); + logger::init(); + log::debug!("Running github-pr-summary/main"); + + let owner = env::var("github_owner").unwrap_or("juntao".to_string()); + let repo = env::var("github_repo").unwrap_or("test".to_string()); + let trigger_phrase = env::var("trigger_phrase").unwrap_or("flows summarize".to_string()); + + let events = vec!["pull_request", "issue_comment"]; + listen_to_event(&GithubLogin::Default, &owner, &repo, events, |payload| { + handler(&owner, &repo, &trigger_phrase, payload) + }) + .await; + + Ok(()) +} + +async fn handler(owner: &str, repo: &str, trigger_phrase: &str, payload: EventPayload) { + let mut new_commit: bool = false; + let (title, pull_number, _contributor) = match payload { + EventPayload::PullRequestEvent(e) => { + if e.action == PullRequestEventAction::Opened { + log::debug!("Received payload: PR Opened"); + } else if e.action == PullRequestEventAction::Synchronize { + new_commit = true; + log::debug!("Received payload: PR Synced"); + } else { + log::debug!("Not an Opened or Synchronize event for PR"); + return; + } + let p = e.pull_request; + ( + p.title.unwrap_or("".to_string()), + p.number, + p.user.unwrap().login, + ) + } + EventPayload::IssueCommentEvent(e) => { + if e.action == IssueCommentEventAction::Deleted { + log::debug!("Deleted issue comment"); + return; + } + log::debug!("Other event for issue comment"); + + let body = e.comment.body.unwrap_or_default(); + + // if e.comment.performed_via_github_app.is_some() { + // return; + // } + // TODO: Makeshift but operational + if body.starts_with("Hello, I am a [code review bot]") { + log::info!("Ignore comment via bot"); + return; + }; + + if !body.to_lowercase().contains(&trigger_phrase.to_lowercase()) { + log::info!("Ignore the comment without the magic words"); + return; + } + + (e.issue.title, e.issue.number, e.issue.user.login) + } + _ => return, + }; + + let octo = get_octo(&GithubLogin::Default); + let issues = octo.issues(owner, repo); + let mut comment_id: CommentId = 0u64.into(); + if new_commit { + // Find the first "Hello, I am a [code review bot]" comment to update + match issues.list_comments(pull_number).send().await { + Ok(comments) => { + for c in comments.items { + if c.body + .unwrap_or_default() + .starts_with("Hello, I am a [code review bot]") + { + comment_id = c.id; + break; + } + } + } + Err(error) => { + log::error!("Error getting comments: {}", error); + return; + } + } + } else { + // PR OPEN or Trigger phrase: create a new comment + match issues.create_comment(pull_number, "Hello, I am a [code review bot](https://github.com/flows-network/github-pr-summary/) on [flows.network](https://flows.network/).\n\nIt could take a few minutes for me to analyze this PR. Relax, grab a cup of coffee and check back later. Thanks!").await { + Ok(comment) => { + comment_id = comment.id; + } + Err(error) => { + log::error!("Error posting comment: {}", error); + return; + } + } + } + if comment_id == 0u64.into() { + return; + } + + let pulls = octo.pulls(owner, repo); + let patch_as_text = pulls.get_patch(pull_number).await.unwrap(); + let mut current_commit = String::new(); + let mut commits: Vec = Vec::new(); + for line in patch_as_text.lines() { + if line.starts_with("From ") { + // Detected a new commit + if !current_commit.is_empty() { + // Store the previous commit + commits.push(current_commit.clone()); + } + // Start a new commit + current_commit.clear(); + } + // Append the line to the current commit if the current commit is less than CHAR_SOFT_LIMIT + if current_commit.len() < CHAR_SOFT_LIMIT { + current_commit.push_str(line); + current_commit.push('\n'); + } + } + if !current_commit.is_empty() { + // Store the last commit + commits.push(current_commit.clone()); + } + + if commits.is_empty() { + log::error!("Cannot parse any commit from the patch file"); + return; + } + + let chat_id = format!("PR#{pull_number}"); + let system = &format!("You are an experienced software developer. You will act as a reviewer for a GitHub Pull Request titled \"{}\".", title); + let mut openai = OpenAIFlows::new(); + openai.set_retry_times(3); + + let mut reviews: Vec = Vec::new(); + let mut reviews_md_str: Vec = Vec::new(); + let mut reviews_text = String::new(); + for (_i, commit) in commits.iter().enumerate() { + let commit_hash = &commit[5..45]; + log::debug!("Sending patch to OpenAI: {}", commit_hash); + let co = ChatOptions { + model: MODEL, + restart: true, + system_prompt: Some(system), + max_tokens: Some(200), + ..Default::default() + }; + let question = "The following is a GitHub patch. Please summarize the key changes and identify potential problems. Start with the most important findings.\n\n".to_string() + truncate(commit, CHAR_SOFT_LIMIT); + match openai.chat_completion(&chat_id, &question, &co).await { + Ok(r) => { + if reviews_text.len() < CHAR_SOFT_LIMIT { + reviews_text.push_str("------\n"); + reviews_text.push_str(&r.choice); + reviews_text.push_str("\n"); + } + let mut review = String::new(); + // review.push_str(&format!("### [Commit {commit_hash}](https://github.com/WasmEdge/WasmEdge/pull/{pull_number}/commits/{commit_hash})\n")); + review.push_str(&r.choice); + review.push_str("\n\n"); + reviews.push(review.clone()); + let formatted_review = format!( + r#"
+ Commit {} + {}
"#, + owner, repo, pull_number, commit_hash, commit_hash, review + ); + reviews_md_str.push(formatted_review); + log::debug!("Received OpenAI resp for patch: {}", commit_hash); + } + Err(e) => { + log::error!("OpenAI returned an error for commit {commit_hash}: {}", e); + } + } + } + + let mut resp = String::new(); + resp.push_str("Hello, I am a [code review bot](https://github.com/flows-network/github-pr-summary/) on [flows.network](https://flows.network/). Here are my reviews of code commits in this PR.\n\n------\n\n"); + if reviews.len() > 1 { + log::debug!("Sending all reviews to OpenAI for summarization"); + let co = ChatOptions { + model: MODEL, + restart: true, + system_prompt: Some(system), + max_tokens: Some(4000), + ..Default::default() + }; + let question = "Here is a set of summaries for software source code patches. Each summary starts with a ------ line. Please write an overall summary considering all the individual summary. Please present the potential issues and errors first, following by the most important findings, in your summary.\n\n".to_string() + &reviews_text; + match openai.chat_completion(&chat_id, &question, &co).await { + Ok(r) => { + resp.push_str(&r.choice); + resp.push_str("\n\n## Details\n\n"); + log::debug!("Received the overall summary"); + } + Err(e) => { + log::error!("OpenAI returned an error for the overall summary: {}", e); + } + } + } + for (_i, review) in reviews_md_str.iter().enumerate() { + resp.push_str(review); + } + + // Send the entire response to GitHub PR + // issues.create_comment(pull_number, resp).await.unwrap(); + match issues.update_comment(comment_id, resp).await { + Err(error) => { + log::error!("Error posting resp: {}", error); + } + _ => {} + } +} + +fn truncate(s: &str, max_chars: usize) -> &str { + match s.char_indices().nth(max_chars) { + None => s, + Some((idx, _)) => &s[..idx], + } +}