Browse Source

Add PR auto-assignment.

Eric Huss 2 năm trước cách đây
mục cha
commit
7597fb3b82

+ 2 - 0
Cargo.lock

@@ -2049,6 +2049,7 @@ dependencies = [
  "glob",
  "hex",
  "hyper",
+ "ignore",
  "itertools",
  "lazy_static",
  "native-tls",
@@ -2057,6 +2058,7 @@ dependencies = [
  "openssl",
  "parser",
  "postgres-native-tls",
+ "rand",
  "regex",
  "reqwest",
  "route-recognizer",

+ 2 - 0
Cargo.toml

@@ -40,6 +40,8 @@ cynic = { version = "0.14" }
 itertools = "0.10.2"
 tower = { version = "0.4.13", features = ["util", "limit", "buffer", "load-shed"] }
 github-graphql = { path = "github-graphql" }
+rand = "0.8.5"
+ignore = "0.4.18"
 
 [dependencies.serde]
 version = "1"

+ 2 - 1
src/actions.rs

@@ -109,8 +109,9 @@ impl<'a> Action for Step<'a> {
             for repo in repos {
                 let repository = Repository {
                     full_name: format!("{}/{}", repo.0, repo.1),
-                    // This is unused for query.
+                    // These are unused for query.
                     default_branch: "master".to_string(),
+                    fork: false,
                 };
 
                 for QueryMap { name, kind, query } in queries {

+ 19 - 2
src/config.rs

@@ -75,8 +75,20 @@ pub(crate) struct PingTeamConfig {
 
 #[derive(PartialEq, Eq, Debug, serde::Deserialize)]
 pub(crate) struct AssignConfig {
+    /// If `true`, then posts a warning comment if the PR is opened against a
+    /// different branch than the default (usually master or main).
     #[serde(default)]
-    _empty: (),
+    pub(crate) non_default_branch: bool,
+    /// A URL to include in the welcome message.
+    pub(crate) contributing_url: Option<String>,
+    /// Ad-hoc groups that can be referred to in `owners`.
+    #[serde(default)]
+    pub(crate) groups: HashMap<String, Vec<String>>,
+    /// Users to assign when a new PR is opened.
+    /// The key is a gitignore-style path, and the value is a list of
+    /// usernames, team names, or ad-hoc groups.
+    #[serde(default)]
+    pub(crate) owners: HashMap<String, Vec<String>>,
 }
 
 #[derive(PartialEq, Eq, Debug, serde::Deserialize)]
@@ -354,7 +366,12 @@ mod tests {
                 relabel: Some(RelabelConfig {
                     allow_unauthenticated: vec!["C-*".into()],
                 }),
-                assign: Some(AssignConfig { _empty: () }),
+                assign: Some(AssignConfig {
+                    non_default_branch: false,
+                    contributing_url: None,
+                    groups: HashMap::new(),
+                    owners: HashMap::new(),
+                }),
                 note: Some(NoteConfig { _empty: () }),
                 ping: Some(PingConfig { teams: ping_teams }),
                 nominate: Some(NominateConfig {

+ 48 - 3
src/github.rs

@@ -611,7 +611,9 @@ impl Issue {
     }
 
     pub fn contain_assignee(&self, user: &str) -> bool {
-        self.assignees.iter().any(|a| a.login == user)
+        self.assignees
+            .iter()
+            .any(|a| a.login.to_lowercase() == user.to_lowercase())
     }
 
     pub async fn remove_assignees(
@@ -637,7 +639,7 @@ impl Issue {
                 .assignees
                 .iter()
                 .map(|u| u.login.as_str())
-                .filter(|&u| u != user)
+                .filter(|&u| u.to_lowercase() != user.to_lowercase())
                 .collect::<Vec<_>>(),
         };
 
@@ -677,7 +679,10 @@ impl Issue {
             .map_err(AssignmentError::Http)?;
         // Invalid assignees are silently ignored. We can just check if the user is now
         // contained in the assignees list.
-        let success = result.assignees.iter().any(|u| u.login.as_str() == user);
+        let success = result
+            .assignees
+            .iter()
+            .any(|u| u.login.as_str().to_lowercase() == user.to_lowercase());
 
         if success {
             Ok(())
@@ -942,10 +947,17 @@ pub struct IssueSearchResult {
     pub items: Vec<Issue>,
 }
 
+#[derive(Debug, serde::Deserialize)]
+struct CommitSearchResult {
+    total_count: u32,
+}
+
 #[derive(Clone, Debug, serde::Deserialize)]
 pub struct Repository {
     pub full_name: String,
     pub default_branch: String,
+    #[serde(default)]
+    pub fork: bool,
 }
 
 #[derive(Copy, Clone)]
@@ -1505,6 +1517,39 @@ impl GithubClient {
             }
         }
     }
+
+    /// Returns whether or not the given GitHub login has made any commits to
+    /// the given repo.
+    pub async fn is_new_contributor(&self, repo: &Repository, author: &str) -> bool {
+        if repo.fork {
+            // Forks always return 0 results.
+            return false;
+        }
+        let url = format!(
+            "{}/search/commits?q=repo:{}+author:{}",
+            Repository::GITHUB_API_URL,
+            repo.full_name,
+            author,
+        );
+        let req = self.get(&url);
+        match self.json::<CommitSearchResult>(req).await {
+            Ok(res) => res.total_count == 0,
+            Err(e) => {
+                // 422 is returned for unknown user
+                if e.downcast_ref::<reqwest::Error>().map_or(false, |e| {
+                    e.status() == Some(StatusCode::UNPROCESSABLE_ENTITY)
+                }) {
+                    true
+                } else {
+                    log::warn!(
+                        "failed to search for user commits in {} for author {author}: {e}",
+                        repo.full_name
+                    );
+                    false
+                }
+            }
+        }
+    }
 }
 
 #[derive(Debug, serde::Deserialize)]

+ 2 - 1
src/handlers.rs

@@ -46,7 +46,7 @@ mod shortcut;
 pub async fn handle(ctx: &Context, event: &Event) -> Vec<HandlerError> {
     let config = config::get(&ctx.github, event.repo()).await;
     if let Err(e) = &config {
-        log::warn!("failed to load configuration: {e}");
+        log::warn!("configuration error {}: {e}", event.repo().full_name);
     }
     let mut errors = Vec::new();
 
@@ -155,6 +155,7 @@ macro_rules! issue_handlers {
 // This is for events that happen only on issues (e.g. label changes).
 // Each module in the list must contain the functions `parse_input` and `handle_input`.
 issue_handlers! {
+    assign,
     autolabel,
     major_change,
     mentions,

+ 569 - 36
src/handlers/assign.rs

@@ -1,34 +1,399 @@
-//! Permit assignment of any user to issues, without requiring "write" access to the repository.
+//! Handles PR and issue assignment.
 //!
-//! We need to fake-assign ourselves and add a 'claimed by' section to the top-level comment.
+//! This supports several ways for setting issue/PR assignment:
 //!
-//! Such assigned issues should also be placed in a queue to ensure that the user remains
-//! active; the assigned user will be asked for a status report every 2 weeks (XXX: timing).
+//! * `@rustbot assign @gh-user`: Assigns to the given user.
+//! * `@rustbot claim`: Assigns to the comment author.
+//! * `@rustbot release-assignment`: Removes the commenter's assignment.
+//! * `r? @user`: Assigns to the given user (PRs only).
 //!
-//! If we're intending to ask for a status report but no comments from the assigned user have
-//! been given for the past 2 weeks, the bot will de-assign the user. They can once more claim
-//! the issue if necessary.
+//! This is capable of assigning to any user, even if they do not have write
+//! access to the repo. It does this by fake-assigning the bot and adding a
+//! "claimed by" section to the top-level comment.
 //!
-//! Assign users with `@rustbot assign @gh-user` or `@rustbot claim` (self-claim).
+//! Configuration is done with the `[assign]` table.
+//!
+//! This also supports auto-assignment of new PRs. Based on rules in the
+//! `assign.owners` config, it will auto-select an assignee based on the files
+//! the PR modifies.
 
 use crate::{
     config::AssignConfig,
-    github::{self, Event, Selection},
-    handlers::Context,
+    github::{self, Event, Issue, IssuesAction, Selection},
+    handlers::{Context, GithubClient, IssuesEvent},
     interactions::EditIssueBody,
 };
-use anyhow::Context as _;
+use anyhow::{bail, Context as _};
 use parser::command::assign::AssignCommand;
+use parser::command::{Command, Input};
+use rand::seq::IteratorRandom;
+use rust_team_data::v1::Teams;
+use std::collections::{HashMap, HashSet};
+use std::fmt;
 use tracing as log;
 
+#[cfg(test)]
+mod tests_candidates;
+#[cfg(test)]
+mod tests_from_diff;
+
+const NEW_USER_WELCOME_MESSAGE: &str = "Thanks for the pull request, and welcome! \
+The Rust team is excited to review your changes, and you should hear from {who} soon.";
+
+const CONTRIBUTION_MESSAGE: &str = "\
+Please see [the contribution instructions]({contributing_url}) for more information.";
+
+const WELCOME_WITH_REVIEWER: &str = "@{assignee} (or someone else)";
+
+const WELCOME_WITHOUT_REVIEWER: &str = "@Mark-Simulacrum (NB. this repo may be misconfigured)";
+
+const RETURNING_USER_WELCOME_MESSAGE: &str = "r? @{assignee}
+
+(rustbot has picked a reviewer for you, use r? to override)";
+
+const RETURNING_USER_WELCOME_MESSAGE_NO_REVIEWER: &str =
+    "@{author}: no appropriate reviewer found, use r? to override";
+
+const NON_DEFAULT_BRANCH: &str =
+    "Pull requests are usually filed against the {default} branch for this repo, \
+     but this one is against {target}. \
+     Please double check that you specified the right target!";
+
+const SUBMODULE_WARNING_MSG: &str = "These commits modify **submodules**.";
+
 #[derive(Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
 struct AssignData {
     user: Option<String>,
 }
 
+/// Input for auto-assignment when a PR is created.
+pub(super) struct AssignInput {
+    diff: String,
+}
+
+/// Prepares the input when a new PR is opened.
+pub(super) async fn parse_input(
+    ctx: &Context,
+    event: &IssuesEvent,
+    config: Option<&AssignConfig>,
+) -> Result<Option<AssignInput>, String> {
+    let config = match config {
+        Some(config) => config,
+        None => return Ok(None),
+    };
+    if config.owners.is_empty()
+        || !matches!(event.action, IssuesAction::Opened)
+        || !event.issue.is_pr()
+    {
+        return Ok(None);
+    }
+    let diff = match event.issue.diff(&ctx.github).await {
+        Ok(None) => return Ok(None),
+        Err(e) => {
+            log::error!("failed to fetch diff: {:?}", e);
+            return Ok(None);
+        }
+        Ok(Some(diff)) => diff,
+    };
+    Ok(Some(AssignInput { diff }))
+}
+
+/// Handles the work of setting an assignment for a new PR and posting a
+/// welcome message.
+pub(super) async fn handle_input(
+    ctx: &Context,
+    config: &AssignConfig,
+    event: &IssuesEvent,
+    input: AssignInput,
+) -> anyhow::Result<()> {
+    // Don't auto-assign or welcome if the user manually set the assignee when opening.
+    if event.issue.assignees.is_empty() {
+        let (assignee, from_comment) = determine_assignee(ctx, event, config, &input).await?;
+        if assignee.as_deref() == Some("ghost") {
+            // "ghost" is GitHub's placeholder account for deleted accounts.
+            // It is used here as a convenient way to prevent assignment. This
+            // is typically used for rollups or experiments where you don't
+            // want any assignments or noise.
+            return Ok(());
+        }
+        let welcome = if ctx
+            .github
+            .is_new_contributor(&event.repository, &event.issue.user.login)
+            .await
+        {
+            let who_text = match &assignee {
+                Some(assignee) => WELCOME_WITH_REVIEWER.replace("{assignee}", assignee),
+                None => WELCOME_WITHOUT_REVIEWER.to_string(),
+            };
+            let mut welcome = NEW_USER_WELCOME_MESSAGE.replace("{who}", &who_text);
+            if let Some(contrib) = &config.contributing_url {
+                welcome.push_str("\n\n");
+                welcome.push_str(&CONTRIBUTION_MESSAGE.replace("{contributing_url}", contrib));
+            }
+            Some(welcome)
+        } else if !from_comment {
+            let welcome = match &assignee {
+                Some(assignee) => RETURNING_USER_WELCOME_MESSAGE.replace("{assignee}", assignee),
+                None => RETURNING_USER_WELCOME_MESSAGE_NO_REVIEWER
+                    .replace("{author}", &event.issue.user.login),
+            };
+            Some(welcome)
+        } else {
+            // No welcome is posted if they are not new and they used `r?` in the opening body.
+            None
+        };
+        if let Some(assignee) = assignee {
+            set_assignee(&event.issue, &ctx.github, &assignee).await;
+        }
+
+        if let Some(welcome) = welcome {
+            if let Err(e) = event.issue.post_comment(&ctx.github, &welcome).await {
+                log::warn!(
+                    "failed to post welcome comment to {}: {e}",
+                    event.issue.global_id()
+                );
+            }
+        }
+    }
+
+    // Compute some warning messages to post to new PRs.
+    let mut warnings = Vec::new();
+    if config.non_default_branch {
+        warnings.extend(non_default_branch(event));
+    }
+    warnings.extend(modifies_submodule(&input.diff));
+    if !warnings.is_empty() {
+        let warnings: Vec<_> = warnings
+            .iter()
+            .map(|warning| format!("* {warning}"))
+            .collect();
+        let warning = format!(":warning: **Warning** :warning:\n\n{}", warnings.join("\n"));
+        event.issue.post_comment(&ctx.github, &warning).await?;
+    };
+    Ok(())
+}
+
+/// Finds the `r?` command in the PR body.
+///
+/// Returns the name after the `r?` command, or None if not found.
+fn find_assign_command(ctx: &Context, event: &IssuesEvent) -> Option<String> {
+    let mut input = Input::new(&event.issue.body, vec![&ctx.username]);
+    input.find_map(|command| match command {
+        Command::Assign(Ok(AssignCommand::ReviewName { name })) => Some(name),
+        _ => None,
+    })
+}
+
+/// Returns a message if the PR is opened against the non-default branch.
+fn non_default_branch(event: &IssuesEvent) -> Option<String> {
+    let target_branch = &event.issue.base.as_ref().unwrap().git_ref;
+    let default_branch = &event.repository.default_branch;
+    if target_branch == default_branch {
+        return None;
+    }
+    Some(
+        NON_DEFAULT_BRANCH
+            .replace("{default}", default_branch)
+            .replace("{target}", target_branch),
+    )
+}
+
+/// Returns a message if the PR modifies a git submodule.
+fn modifies_submodule(diff: &str) -> Option<String> {
+    let re = regex::Regex::new(r"\+Subproject\scommit\s").unwrap();
+    if re.is_match(diff) {
+        Some(SUBMODULE_WARNING_MSG.to_string())
+    } else {
+        None
+    }
+}
+
+/// Sets the assignee of a PR, alerting any errors.
+async fn set_assignee(issue: &Issue, github: &GithubClient, username: &str) {
+    // Don't re-assign if already assigned, e.g. on comment edit
+    if issue.contain_assignee(&username) {
+        log::trace!(
+            "ignoring assign PR {} to {}, already assigned",
+            issue.global_id(),
+            username,
+        );
+        return;
+    }
+    if let Err(err) = issue.set_assignee(github, &username).await {
+        log::warn!(
+            "failed to set assignee of PR {} to {}: {:?}",
+            issue.global_id(),
+            username,
+            err
+        );
+        if let Err(e) = issue
+            .post_comment(
+                github,
+                &format!(
+                    "Failed to set assignee to `{username}`: {err}\n\
+                     \n\
+                     > **Note**: Only org members, users with write \
+                       permissions, or people who have commented on the PR may \
+                       be assigned."
+                ),
+            )
+            .await
+        {
+            log::warn!("failed to post error comment: {e}");
+        }
+    }
+}
+
+/// Determines who to assign the PR to based on either an `r?` command, or
+/// based on which files were modified.
+///
+/// Returns `(assignee, from_comment)` where `assignee` is who to assign to
+/// (or None if no assignee could be found). `from_comment` is a boolean
+/// indicating if the assignee came from an `r?` command (it is false if
+/// determined from the diff).
+async fn determine_assignee(
+    ctx: &Context,
+    event: &IssuesEvent,
+    config: &AssignConfig,
+    input: &AssignInput,
+) -> anyhow::Result<(Option<String>, bool)> {
+    let teams = crate::team_data::teams(&ctx.github).await?;
+    if let Some(name) = find_assign_command(ctx, event) {
+        // User included `r?` in the opening PR body.
+        match find_reviewer_from_names(&teams, config, &event.issue, &[name]) {
+            Ok(assignee) => return Ok((Some(assignee), true)),
+            Err(e) => {
+                event
+                    .issue
+                    .post_comment(&ctx.github, &e.to_string())
+                    .await?;
+                // Fall through below for normal diff detection.
+            }
+        }
+    }
+    // Errors fall-through to try fallback group.
+    match find_reviewers_from_diff(config, &input.diff) {
+        Ok(candidates) if !candidates.is_empty() => {
+            match find_reviewer_from_names(&teams, config, &event.issue, &candidates) {
+                Ok(assignee) => return Ok((Some(assignee), false)),
+                Err(FindReviewerError::TeamNotFound(team)) => log::warn!(
+                    "team {team} not found via diff from PR {}, \
+                    is there maybe a misconfigured group?",
+                    event.issue.global_id()
+                ),
+                Err(FindReviewerError::NoReviewer(names)) => log::trace!(
+                    "no reviewer could be determined for PR {} with candidate name {names:?}",
+                    event.issue.global_id()
+                ),
+            }
+        }
+        // If no owners matched the diff, fall-through.
+        Ok(_) => {}
+        Err(e) => {
+            log::warn!(
+                "failed to find candidate reviewer from diff due to error: {e}\n\
+                 Is the triagebot.toml misconfigured?"
+            );
+        }
+    }
+
+    if let Some(fallback) = config.groups.get("fallback") {
+        match find_reviewer_from_names(&teams, config, &event.issue, fallback) {
+            Ok(assignee) => return Ok((Some(assignee), false)),
+            Err(e) => {
+                log::trace!(
+                    "failed to select from fallback group for PR {}: {e}",
+                    event.issue.global_id()
+                );
+            }
+        }
+    }
+    Ok((None, false))
+}
+
+/// Returns a list of candidate reviewers to use based on which files were changed.
+///
+/// May return an error if the owners map is misconfigured.
+///
+/// Beware this may return an empty list if nothing matches.
+fn find_reviewers_from_diff(config: &AssignConfig, diff: &str) -> anyhow::Result<Vec<String>> {
+    // Map of `owners` path to the number of changes found in that path.
+    // This weights the reviewer choice towards places where the most edits are done.
+    let mut counts: HashMap<&str, u32> = HashMap::new();
+    // List of the longest `owners` patterns that match the current path. This
+    // prefers choosing reviewers from deeply nested paths over those defined
+    // for top-level paths, under the assumption that they are more
+    // specialized.
+    //
+    // This is a list to handle the situation if multiple paths of the same
+    // length match.
+    let mut longest_owner_patterns = Vec::new();
+    // Iterate over the diff, finding the start of each file. After each file
+    // is found, it counts the number of modified lines in that file, and
+    // tracks those in the `counts` map.
+    for line in diff.split('\n') {
+        if line.starts_with("diff --git ") {
+            // Start of a new file.
+            longest_owner_patterns.clear();
+            let path = line[line.find(" b/").unwrap()..]
+                .strip_prefix(" b/")
+                .unwrap();
+            // Find the longest `owners` entries that match this path.
+            let mut longest = HashMap::new();
+            for owner_pattern in config.owners.keys() {
+                let ignore = ignore::gitignore::GitignoreBuilder::new("/")
+                    .add_line(None, owner_pattern)
+                    .with_context(|| format!("owner file pattern `{owner_pattern}` is not valid"))?
+                    .build()?;
+                if ignore.matched_path_or_any_parents(path, false).is_ignore() {
+                    let owner_len = owner_pattern.split('/').count();
+                    longest.insert(owner_pattern, owner_len);
+                }
+            }
+            let max_count = longest.values().copied().max().unwrap_or(0);
+            longest_owner_patterns.extend(
+                longest
+                    .iter()
+                    .filter(|(_, count)| **count == max_count)
+                    .map(|x| *x.0),
+            );
+            // Give some weight to these patterns to start. This helps with
+            // files modified without any lines changed.
+            for owner_pattern in &longest_owner_patterns {
+                *counts.entry(owner_pattern).or_default() += 1;
+            }
+            continue;
+        }
+        // Check for a modified line.
+        if (!line.starts_with("+++") && line.starts_with('+'))
+            || (!line.starts_with("---") && line.starts_with('-'))
+        {
+            for owner_path in &longest_owner_patterns {
+                *counts.entry(owner_path).or_default() += 1;
+            }
+        }
+    }
+    // Use the `owners` entry with the most number of modifications.
+    let max_count = counts.values().copied().max().unwrap_or(0);
+    let max_paths = counts
+        .iter()
+        .filter(|(_, count)| **count == max_count)
+        .map(|x| x.0);
+    let mut potential: Vec<_> = max_paths
+        .flat_map(|owner_path| &config.owners[*owner_path])
+        .map(|owner| owner.to_string())
+        .collect();
+    // Dedupe. This isn't strictly necessary, as `find_reviewer_from_names` will deduplicate.
+    // However, this helps with testing.
+    potential.sort();
+    potential.dedup();
+    Ok(potential)
+}
+
+/// Handles a command posted in a comment.
 pub(super) async fn handle_command(
     ctx: &Context,
-    _config: &AssignConfig,
+    config: &AssignConfig,
     event: &Event,
     cmd: AssignCommand,
 ) -> anyhow::Result<()> {
@@ -39,11 +404,18 @@ pub(super) async fn handle_command(
         true
     };
 
+    // Don't handle commands in comments from the bot. Some of the comments it
+    // posts contain commands to instruct the user, not things that the bot
+    // should respond to.
+    if event.comment_from() == Some(ctx.username.as_str()) {
+        return Ok(());
+    }
+
     let issue = event.issue().unwrap();
     if issue.is_pr() {
-        let username = match &cmd {
+        let username = match cmd {
             AssignCommand::Own => event.user().login.clone(),
-            AssignCommand::User { username } => username.clone(),
+            AssignCommand::User { username } => username,
             AssignCommand::Release => {
                 log::trace!(
                     "ignoring release on PR {:?}, must always have assignee",
@@ -51,25 +423,35 @@ pub(super) async fn handle_command(
                 );
                 return Ok(());
             }
-            AssignCommand::ReviewName { .. } => todo!(),
+            AssignCommand::ReviewName { name } => {
+                if config.owners.is_empty() {
+                    // To avoid conflicts with the highfive bot while transitioning,
+                    // r? is ignored if `owners` is not configured in triagebot.toml.
+                    return Ok(());
+                }
+                if matches!(
+                    event,
+                    Event::Issue(IssuesEvent {
+                        action: IssuesAction::Opened,
+                        ..
+                    })
+                ) {
+                    // Don't handle r? comments on new PRs. Those will be
+                    // handled by the new PR trigger (which also handles the
+                    // welcome message).
+                    return Ok(());
+                }
+                let teams = crate::team_data::teams(&ctx.github).await?;
+                match find_reviewer_from_names(&teams, config, issue, &[name]) {
+                    Ok(assignee) => assignee,
+                    Err(e) => {
+                        issue.post_comment(&ctx.github, &e.to_string()).await?;
+                        return Ok(());
+                    }
+                }
+            }
         };
-        // Don't re-assign if already assigned, e.g. on comment edit
-        if issue.contain_assignee(&username) {
-            log::trace!(
-                "ignoring assign PR {} to {}, already assigned",
-                issue.global_id(),
-                username,
-            );
-            return Ok(());
-        }
-        if let Err(err) = issue.set_assignee(&ctx.github, &username).await {
-            log::warn!(
-                "failed to set assignee of PR {} to {}: {:?}",
-                issue.global_id(),
-                username,
-                err
-            );
-        }
+        set_assignee(issue, &ctx.github, &username).await;
         return Ok(());
     }
 
@@ -79,7 +461,7 @@ pub(super) async fn handle_command(
         AssignCommand::Own => event.user().login.clone(),
         AssignCommand::User { username } => {
             if !is_team_member && username != event.user().login {
-                anyhow::bail!("Only Rust team members can assign other users");
+                bail!("Only Rust team members can assign other users");
             }
             username.clone()
         }
@@ -94,7 +476,7 @@ pub(super) async fn handle_command(
                         .await?;
                     return Ok(());
                 } else {
-                    anyhow::bail!("Cannot release another user's assignment");
+                    bail!("Cannot release another user's assignment");
                 }
             } else {
                 let current = &event.user().login;
@@ -106,11 +488,11 @@ pub(super) async fn handle_command(
                         .await?;
                     return Ok(());
                 } else {
-                    anyhow::bail!("Cannot release unassigned issue");
+                    bail!("Cannot release unassigned issue");
                 }
             };
         }
-        AssignCommand::ReviewName { .. } => todo!(),
+        AssignCommand::ReviewName { .. } => bail!("r? is only allowed on PRs."),
     };
     // Don't re-assign if aleady assigned, e.g. on comment edit
     if issue.contain_assignee(&to_assign) {
@@ -146,3 +528,154 @@ pub(super) async fn handle_command(
 
     Ok(())
 }
+
+#[derive(Debug)]
+enum FindReviewerError {
+    /// User specified something like `r? foo/bar` where that team name could
+    /// not be found.
+    TeamNotFound(String),
+    /// No reviewer could be found. The field is the list of candidate names
+    /// that were used to seed the selection. One example where this happens
+    /// is if the given name was for a team where the PR author is the only
+    /// member.
+    NoReviewer(Vec<String>),
+}
+
+impl std::error::Error for FindReviewerError {}
+
+impl fmt::Display for FindReviewerError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
+        match self {
+            FindReviewerError::TeamNotFound(team) => write!(f, "Team or group `{team}` not found.\n\
+                \n\
+                rust-lang team names can be found at https://github.com/rust-lang/team/tree/master/teams.\n\
+                Reviewer group names can be found in `triagebot.toml` in this repo."),
+            FindReviewerError::NoReviewer(names) => write!(
+                f,
+                "Could not determine reviewer from `{}`.",
+                names.join(",")
+            ),
+        }
+    }
+}
+
+/// Finds a reviewer to assign to a PR.
+///
+/// The `names` is a list of candidate reviewers `r?`, such as `compiler` or
+/// `@octocat`, or names from the owners map. It can contain GitHub usernames,
+/// auto-assign groups, or rust-lang team names. It must have at least one
+/// entry.
+fn find_reviewer_from_names(
+    teams: &Teams,
+    config: &AssignConfig,
+    issue: &Issue,
+    names: &[String],
+) -> Result<String, FindReviewerError> {
+    let candidates = candidate_reviewers_from_names(teams, config, issue, names)?;
+    // This uses a relatively primitive random choice algorithm.
+    // GitHub's CODEOWNERS supports much more sophisticated options, such as:
+    //
+    // - Round robin: Chooses reviewers based on who's received the least
+    //   recent review request, focusing on alternating between all members of
+    //   the team regardless of the number of outstanding reviews they
+    //   currently have.
+    // - Load balance: Chooses reviewers based on each member's total number
+    //   of recent review requests and considers the number of outstanding
+    //   reviews for each member. The load balance algorithm tries to ensure
+    //   that each team member reviews an equal number of pull requests in any
+    //   30 day period.
+    //
+    // Additionally, with CODEOWNERS, users marked as "Busy" in the GitHub UI
+    // will not be selected for reviewer. There are several other options for
+    // configuring CODEOWNERS as well.
+    //
+    // These are all ideas for improving the selection here. However, I'm not
+    // sure they are really worth the effort.
+    match candidates.into_iter().choose(&mut rand::thread_rng()) {
+        Some(candidate) => Ok(candidate.to_string()),
+        None => Err(FindReviewerError::NoReviewer(
+            names.iter().map(|n| n.to_string()).collect(),
+        )),
+    }
+}
+
+/// Returns a list of candidate usernames to choose as a reviewer.
+fn candidate_reviewers_from_names<'a>(
+    teams: &'a Teams,
+    config: &'a AssignConfig,
+    issue: &Issue,
+    names: &'a [String],
+) -> Result<HashSet<&'a str>, FindReviewerError> {
+    // Set of candidate usernames to choose from. This uses a set to
+    // deduplicate entries so that someone in multiple teams isn't
+    // over-weighted.
+    let mut candidates: HashSet<&str> = HashSet::new();
+    // Keep track of groups seen to avoid cycles and avoid expanding the same
+    // team multiple times.
+    let mut seen = HashSet::new();
+    // This is a queue of potential groups or usernames to expand. The loop
+    // below will pop from this and then append the expanded results of teams.
+    // Usernames will be added to `candidates`.
+    let mut group_expansion: Vec<&str> = names.iter().map(|n| n.as_str()).collect();
+    let repo = issue.repository();
+    let org_prefix = format!("{}/", repo.organization);
+    // Don't allow groups or teams to include the current author or assignee.
+    let filter = |name: &&str| -> bool {
+        let name_lower = name.to_lowercase();
+        name_lower != issue.user.login.to_lowercase()
+            && !issue
+                .assignees
+                .iter()
+                .any(|assignee| name_lower == assignee.login.to_lowercase())
+    };
+
+    // Loop over groups to recursively expand them.
+    while let Some(group_or_user) = group_expansion.pop() {
+        let group_or_user = group_or_user.strip_prefix('@').unwrap_or(group_or_user);
+
+        // Try ad-hoc groups first.
+        // Allow `rust-lang/compiler` to match `compiler`.
+        let maybe_group = group_or_user
+            .strip_prefix(&org_prefix)
+            .unwrap_or(group_or_user);
+        if let Some(group_members) = config.groups.get(maybe_group) {
+            // If a group has already been expanded, don't expand it again.
+            if seen.insert(maybe_group) {
+                group_expansion.extend(
+                    group_members
+                        .iter()
+                        .map(|member| member.as_str())
+                        .filter(filter),
+                );
+            }
+            continue;
+        }
+
+        // Check for a team name.
+        // Allow either a direct team name like `rustdoc` or a GitHub-style
+        // team name of `rust-lang/rustdoc` (though this does not check if
+        // that is a real GitHub team name).
+        //
+        // This ignores subteam relationships (it only uses direct members).
+        let maybe_team = group_or_user
+            .strip_prefix("rust-lang/")
+            .unwrap_or(group_or_user);
+        if let Some(team) = teams.teams.get(maybe_team) {
+            candidates.extend(
+                team.members
+                    .iter()
+                    .map(|member| member.github.as_str())
+                    .filter(filter),
+            );
+            continue;
+        }
+
+        if group_or_user.contains('/') {
+            return Err(FindReviewerError::TeamNotFound(group_or_user.to_string()));
+        }
+
+        // Assume it is a user.
+        candidates.insert(group_or_user);
+    }
+    Ok(candidates)
+}

+ 236 - 0
src/handlers/assign/tests_candidates.rs

@@ -0,0 +1,236 @@
+//! Tests for `candidate_reviewers_from_names`
+
+use super::*;
+
+/// Basic test function for testing `candidate_reviewers_from_names`.
+fn test_from_names(
+    teams: Option<toml::Value>,
+    config: toml::Value,
+    issue: serde_json::Value,
+    names: &[&str],
+    expected: &[&str],
+) {
+    let (teams, config, issue) = convert_simplified(teams, config, issue);
+    let names: Vec<_> = names.iter().map(|n| n.to_string()).collect();
+    let candidates = candidate_reviewers_from_names(&teams, &config, &issue, &names).unwrap();
+    let mut candidates: Vec<_> = candidates.into_iter().collect();
+    candidates.sort();
+    let expected: Vec<_> = expected.iter().map(|x| *x).collect();
+    assert_eq!(candidates, expected);
+}
+
+/// Convert the simplified input in preparation for `candidate_reviewers_from_names`.
+fn convert_simplified(
+    teams: Option<toml::Value>,
+    config: toml::Value,
+    issue: serde_json::Value,
+) -> (Teams, AssignConfig, Issue) {
+    // Convert the simplified team config to a real team config.
+    // This uses serde_json since it is easier to manipulate than toml.
+    let teams: serde_json::Value = match teams {
+        Some(teams) => teams.try_into().unwrap(),
+        None => serde_json::json!({}),
+    };
+    let mut teams_config = serde_json::json!({});
+    for (team_name, members) in teams.as_object().unwrap() {
+        let members: Vec<_> = members.as_array().unwrap().iter().map(|member| {
+            serde_json::json!({"name": member, "github": member, "github_id": 1, "is_lead": false})
+        }).collect();
+        teams_config[team_name] = serde_json::json!({
+            "name": team_name,
+            "kind": "team",
+            "members": serde_json::Value::Array(members),
+            "alumni": [],
+            "discord": [],
+        });
+    }
+    let teams = serde_json::value::from_value(teams_config).unwrap();
+    let config = config.try_into().unwrap();
+    let issue = serde_json::value::from_value(issue).unwrap();
+    (teams, config, issue)
+}
+
+fn generic_issue(author: &str, repo: &str) -> serde_json::Value {
+    serde_json::json!({
+        "number": 1234,
+        "created_at": "2022-06-26T21:31:31Z",
+        "updated_at": "2022-06-26T21:31:31Z",
+        "title": "Example PR",
+        "body": "PR body",
+        "html_url": "https://github.com/rust-lang/rust/pull/1234",
+        "user": {
+            "login": author,
+            "id": 583231,
+        },
+        "labels": [],
+        "assignees": [],
+        "comments_url": format!("https://api.github.com/repos/{repo}/pull/1234/comments"),
+    })
+}
+
+#[test]
+fn circular_groups() {
+    // A cycle in the groups map.
+    let config = toml::toml!(
+        [groups]
+        compiler = ["other"]
+        other = ["compiler"]
+    );
+    let issue = generic_issue("octocat", "rust-lang/rust");
+    test_from_names(None, config, issue, &["compiler"], &[]);
+}
+
+#[test]
+fn nested_groups() {
+    // Test choosing a reviewer from group with nested groups.
+    let config = toml::toml!(
+        [groups]
+        a = ["@pnkfelix"]
+        b = ["@nrc"]
+        c = ["a", "b"]
+    );
+    let issue = generic_issue("octocat", "rust-lang/rust");
+    test_from_names(None, config, issue, &["c"], &["nrc", "pnkfelix"]);
+}
+
+#[test]
+fn candidate_filtered_author_only_candidate() {
+    // When the author is the only candidate.
+    let config = toml::toml!(
+        [groups]
+        compiler = ["nikomatsakis"]
+    );
+    let issue = generic_issue("nikomatsakis", "rust-lang/rust");
+    test_from_names(None, config, issue, &["compiler"], &[]);
+}
+
+#[test]
+fn candidate_filtered_author() {
+    // Filter out the author from the candidates.
+    let config = toml::toml!(
+        [groups]
+        compiler = ["user1", "user2", "user3", "group2"]
+        group2 = ["user2", "user4"]
+    );
+    let issue = generic_issue("user2", "rust-lang/rust");
+    test_from_names(
+        None,
+        config,
+        issue,
+        &["compiler"],
+        &["user1", "user3", "user4"],
+    );
+}
+
+#[test]
+fn candidate_filtered_assignee() {
+    // Filter out an existing assignee from the candidates.
+    let config = toml::toml!(
+        [groups]
+        compiler = ["user1", "user2", "user3", "user4"]
+    );
+    let mut issue = generic_issue("user2", "rust-lang/rust");
+    issue["assignees"] = serde_json::json!([
+        {"login": "user1", "id": 1},
+        {"login": "user3", "id": 3},
+    ]);
+    test_from_names(None, config, issue, &["compiler"], &["user4"]);
+}
+
+#[test]
+fn groups_teams_users() {
+    // Assortment of groups, teams, and users all selected at once.
+    let teams = toml::toml!(
+        team1 = ["t-user1"]
+        team2 = ["t-user2"]
+    );
+    let config = toml::toml!(
+        [groups]
+        group1 = ["user1", "rust-lang/team2"]
+    );
+    let issue = generic_issue("octocat", "rust-lang/rust");
+    test_from_names(
+        Some(teams),
+        config,
+        issue,
+        &["team1", "group1", "user3"],
+        &["t-user1", "t-user2", "user1", "user3"],
+    );
+}
+
+#[test]
+fn group_team_user_precedence() {
+    // How it handles ambiguity when names overlap.
+    let teams = toml::toml!(compiler = ["t-user1"]);
+    let config = toml::toml!(
+        [groups]
+        compiler = ["user2"]
+    );
+    let issue = generic_issue("octocat", "rust-lang/rust");
+    test_from_names(
+        Some(teams.clone()),
+        config.clone(),
+        issue.clone(),
+        &["compiler"],
+        &["user2"],
+    );
+    test_from_names(
+        Some(teams.clone()),
+        config.clone(),
+        issue.clone(),
+        &["rust-lang/compiler"],
+        &["user2"],
+    );
+}
+
+#[test]
+fn what_do_slashes_mean() {
+    // How slashed names are handled.
+    let teams = toml::toml!(compiler = ["t-user1"]);
+    let config = toml::toml!(
+        [groups]
+        compiler = ["user2"]
+        "foo/bar" = ["foo-user"]
+    );
+    let issue = generic_issue("octocat", "rust-lang-nursery/rust");
+    // Random slash names should work from groups.
+    test_from_names(
+        Some(teams.clone()),
+        config.clone(),
+        issue.clone(),
+        &["foo/bar"],
+        &["foo-user"],
+    );
+    // Since this is rust-lang-nursery, it uses the rust-lang team, not the group.
+    test_from_names(
+        Some(teams.clone()),
+        config.clone(),
+        issue.clone(),
+        &["rust-lang/compiler"],
+        &["t-user1"],
+    );
+    test_from_names(
+        Some(teams.clone()),
+        config.clone(),
+        issue.clone(),
+        &["rust-lang-nursery/compiler"],
+        &["user2"],
+    );
+}
+
+#[test]
+fn invalid_org_doesnt_match() {
+    let teams = toml::toml!(compiler = ["t-user1"]);
+    let config = toml::toml!(
+        [groups]
+        compiler = ["user2"]
+    );
+    let issue = generic_issue("octocat", "rust-lang/rust");
+    let (teams, config, issue) = convert_simplified(Some(teams), config, issue);
+    let names = vec!["github/compiler".to_string()];
+    match candidate_reviewers_from_names(&teams, &config, &issue, &names) {
+        Ok(x) => panic!("expected err, got {x:?}"),
+        Err(FindReviewerError::TeamNotFound(_)) => {}
+        Err(e) => panic!("unexpected error {e:?}"),
+    }
+}

+ 148 - 0
src/handlers/assign/tests_from_diff.rs

@@ -0,0 +1,148 @@
+//! Tests for `find_reviewers_from_diff`
+
+use super::*;
+use crate::config::AssignConfig;
+use std::fmt::Write;
+
+fn test_from_diff(diff: &str, config: toml::Value, expected: &[&str]) {
+    let aconfig: AssignConfig = config.try_into().unwrap();
+    assert_eq!(
+        find_reviewers_from_diff(&aconfig, diff).unwrap(),
+        expected.iter().map(|x| x.to_string()).collect::<Vec<_>>()
+    );
+}
+
+/// Generates a fake diff that touches the given files.
+///
+/// `paths` should be a slice of `(path, added, removed)` tuples where `added`
+/// is the number of lines added, and `removed` is the number of lines
+/// removed.
+fn make_fake_diff(paths: &[(&str, u32, u32)]) -> String {
+    // This isn't a properly structured diff, but it has approximately enough
+    // information for what is needed for testing.
+    paths
+        .iter()
+        .map(|(path, added, removed)| {
+            let mut diff = format!(
+                "diff --git a/{path} b/{path}\n\
+                 index 1677422122e..1108c1f4d4c 100644\n\
+                 --- a/{path}\n\
+                 +++ b/{path}\n\
+                 @@ -0,0 +1 @@\n"
+            );
+            for n in 0..*added {
+                writeln!(diff, "+Added line {n}").unwrap();
+            }
+            for n in 0..*removed {
+                writeln!(diff, "-Removed line {n}").unwrap();
+            }
+            diff.push('\n');
+            diff
+        })
+        .collect()
+}
+
+#[test]
+fn no_matching_owners() {
+    // When none of the owners match the diff.
+    let config = toml::toml!(
+        [owners]
+        "/compiler" = ["compiler"]
+        "/library" = ["libs"]
+    );
+    let diff = make_fake_diff(&[("foo/bar.rs", 5, 0)]);
+    test_from_diff(&diff, config, &[]);
+}
+
+#[test]
+fn from_diff_submodule() {
+    let config = toml::toml!(
+        [owners]
+        "/src" = ["user1", "user2"]
+    );
+    let diff = "\
+        diff --git a/src/jemalloc b/src/jemalloc\n\
+        index 2dba541..b001609 160000\n\
+        --- a/src/jemalloc\n\
+        +++ b/src/jemalloc\n\
+        @@ -1 +1 @@\n\
+        -Subproject commit 2dba541881fb8e35246d653bbe2e7c7088777a4a\n\
+        +Subproject commit b001609960ca33047e5cbc5a231c1e24b6041d4b\n\
+    ";
+    test_from_diff(diff, config, &["user1", "user2"]);
+}
+
+#[test]
+fn prefixed_dirs() {
+    // Test dirs with multiple overlapping prefixes.
+    let config = toml::toml!(
+        [owners]
+        "/compiler" = ["compiler"]
+        "/compiler/rustc_llvm" = ["llvm"]
+        "/compiler/rustc_parse" = ["parser"]
+        "/compiler/rustc_parse/src/parse/lexer" = ["lexer"]
+    );
+    // Base compiler rule should catch anything in compiler/
+    let diff = make_fake_diff(&[("compiler/foo", 1, 1)]);
+    test_from_diff(&diff, config.clone(), &["compiler"]);
+
+    // Anything in rustc_llvm should go to llvm.
+    let diff = make_fake_diff(&[("compiler/rustc_llvm/foo", 1, 1)]);
+    test_from_diff(&diff, config.clone(), &["llvm"]);
+
+    // 1 change in rustc_llvm, multiple changes in other directories, the
+    // other directories win because they have more changes.
+    let diff = make_fake_diff(&[
+        ("compiler/rustc_llvm/foo", 1, 1),
+        ("compiler/rustc_traits/src/foo.rs", 0, 1),
+        ("compiler/rustc_macros//src/foo.rs", 2, 3),
+    ]);
+    test_from_diff(&diff, config.clone(), &["compiler"]);
+
+    // Change in a deeply nested directory should win over its parent.
+    let diff = make_fake_diff(&[("compiler/rustc_parse/src/parse/lexer/foo.rs", 1, 1)]);
+    test_from_diff(&diff, config.clone(), &["lexer"]);
+
+    // Most changes in one component should win over the base compiler.
+    let diff = make_fake_diff(&[
+        ("compiler/rustc_parse/src/foo.rs", 5, 10),
+        ("compiler/rustc_llvm/src/foo.rs", 1, 1),
+    ]);
+    test_from_diff(&diff, config.clone(), &["parser"]);
+}
+
+#[test]
+fn deleted_file() {
+    // Test dirs matching for a deleted file.
+    let config = toml::toml!(
+        [owners]
+        "/compiler" = ["compiler"]
+        "/compiler/rustc_parse" = ["parser"]
+    );
+    let diff = make_fake_diff(&[("compiler/rustc_parse/src/foo.rs", 0, 10)]);
+    test_from_diff(&diff, config, &["parser"]);
+}
+
+#[test]
+fn empty_file_still_counts() {
+    let config = toml::toml!(
+        [owners]
+        "/compiler" = ["compiler"]
+        "/compiler/rustc_parse" = ["parser"]
+    );
+    let diff = "diff --git a/compiler/rustc_parse/src/foo.rs b/compiler/rustc_parse/src/foo.rs\n\
+                new file mode 100644\n\
+                index 0000000..e69de29\n";
+    test_from_diff(&diff, config, &["parser"]);
+}
+
+#[test]
+fn basic_gitignore_pattern() {
+    let config = toml::toml!(
+        [owners]
+        "*.js" = ["javascript-reviewers"]
+        "/compiler/rustc_parse" = ["parser"]
+    );
+    let diff = make_fake_diff(&[("src/librustdoc/html/static/js/settings.js", 10, 1)]);
+    test_from_diff(&diff, config, &["javascript-reviewers"]);
+}