Эх сурвалжийг харах

Merge pull request #1656 from ehuss/highfive

Support highfive functionality
Mark Rousskov 2 жил өмнө
parent
commit
a3d0606b64

+ 7 - 4
Cargo.lock

@@ -1186,6 +1186,7 @@ version = "0.1.0"
 dependencies = [
  "log",
  "pulldown-cmark",
+ "regex",
 ]
 
 [[package]]
@@ -1426,9 +1427,9 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.5.5"
+version = "1.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286"
+checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -1446,9 +1447,9 @@ dependencies = [
 
 [[package]]
 name = "regex-syntax"
-version = "0.6.25"
+version = "0.6.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
+checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244"
 
 [[package]]
 name = "remove_dir_all"
@@ -2048,6 +2049,7 @@ dependencies = [
  "glob",
  "hex",
  "hyper",
+ "ignore",
  "itertools",
  "lazy_static",
  "native-tls",
@@ -2056,6 +2058,7 @@ dependencies = [
  "openssl",
  "parser",
  "postgres-native-tls",
+ "rand",
  "regex",
  "reqwest",
  "route-recognizer",

+ 2 - 0
Cargo.toml

@@ -40,6 +40,8 @@ cynic = { version = "0.14" }
 itertools = "0.10.2"
 tower = { version = "0.4.13", features = ["util", "limit", "buffer", "load-shed"] }
 github-graphql = { path = "github-graphql" }
+rand = "0.8.5"
+ignore = "0.4.18"
 
 [dependencies.serde]
 version = "1"

+ 1 - 0
parser/Cargo.toml

@@ -7,3 +7,4 @@ edition = "2021"
 [dependencies]
 pulldown-cmark = "0.7.0"
 log = "0.4"
+regex = "1.6.0"

+ 111 - 41
parser/src/command.rs

@@ -1,6 +1,7 @@
 use crate::error::Error;
 use crate::ignore_block::IgnoreBlocks;
-use crate::token::{Token, Tokenizer};
+use crate::token::Tokenizer;
+use regex::Regex;
 
 pub mod assign;
 pub mod close;
@@ -13,10 +14,6 @@ pub mod relabel;
 pub mod second;
 pub mod shortcut;
 
-pub fn find_command_start(input: &str, bot: &str) -> Option<usize> {
-    input.to_ascii_lowercase().find(&format!("@{}", bot))
-}
-
 #[derive(Debug, PartialEq)]
 pub enum Command<'a> {
     Relabel(Result<relabel::RelabelCommand, Error<'a>>),
@@ -36,9 +33,9 @@ pub struct Input<'a> {
     all: &'a str,
     parsed: usize,
     ignore: IgnoreBlocks,
-
-    // A list of possible bot names.
-    bot: Vec<&'a str>,
+    /// A pattern for finding the start of a command based on the name of the
+    /// configured bots.
+    bot_re: Regex,
 }
 
 fn parse_single_command<'a, T, F, M>(
@@ -63,25 +60,22 @@ where
 
 impl<'a> Input<'a> {
     pub fn new(input: &'a str, bot: Vec<&'a str>) -> Input<'a> {
+        let bots: Vec<_> = bot.iter().map(|bot| format!(r"(?:@{bot}\b)")).collect();
+        let bot_re = Regex::new(&format!(
+            r#"(?i)(?P<review>\br\?)|{bots}"#,
+            bots = bots.join("|")
+        ))
+        .unwrap();
         Input {
             all: input,
             parsed: 0,
             ignore: IgnoreBlocks::new(input),
-            bot,
+            bot_re,
         }
     }
 
     fn parse_command(&mut self) -> Option<Command<'a>> {
-        let mut tok = Tokenizer::new(&self.all[self.parsed..]);
-        let name_length = if let Ok(Some(Token::Word(bot_name))) = tok.next_token() {
-            assert!(self
-                .bot
-                .iter()
-                .any(|name| bot_name.eq_ignore_ascii_case(&format!("@{}", name))));
-            bot_name.len()
-        } else {
-            panic!("no bot name?")
-        };
+        let tok = Tokenizer::new(&self.all[self.parsed..]);
         log::info!("identified potential command");
 
         let mut success = vec![];
@@ -147,24 +141,28 @@ impl<'a> Input<'a> {
             );
         }
 
-        if self
-            .ignore
-            .overlaps_ignore((self.parsed)..(self.parsed + tok.position()))
-            .is_some()
-        {
-            log::info!("command overlaps ignored block; ignore: {:?}", self.ignore);
-            return None;
-        }
-
         let (mut tok, c) = success.pop()?;
         // if we errored out while parsing the command do not move the input forwards
-        self.parsed += if c.is_ok() {
-            tok.position()
-        } else {
-            name_length
-        };
+        if c.is_ok() {
+            self.parsed += tok.position();
+        }
         Some(c)
     }
+
+    /// Parses command for `r?`
+    fn parse_review(&mut self) -> Option<Command<'a>> {
+        let tok = Tokenizer::new(&self.all[self.parsed..]);
+        match parse_single_command(assign::AssignCommand::parse_review, Command::Assign, &tok) {
+            Some((mut tok, command)) => {
+                self.parsed += tok.position();
+                Some(command)
+            }
+            None => {
+                log::warn!("expected r? parser to return something: {:?}", self.all);
+                None
+            }
+        }
+    }
 }
 
 impl<'a> Iterator for Input<'a> {
@@ -172,16 +170,26 @@ impl<'a> Iterator for Input<'a> {
 
     fn next(&mut self) -> Option<Command<'a>> {
         loop {
-            let start = self
-                .bot
-                .iter()
-                .filter_map(|name| find_command_start(&self.all[self.parsed..], name))
-                .min()?;
-            self.parsed += start;
-            if let Some(command) = self.parse_command() {
+            let caps = self.bot_re.captures(&self.all[self.parsed..])?;
+            let m = caps.get(0).unwrap();
+            if self
+                .ignore
+                .overlaps_ignore((self.parsed + m.start())..(self.parsed + m.end()))
+                .is_some()
+            {
+                log::info!("command overlaps ignored block; ignore: {:?}", self.ignore);
+                self.parsed += m.end();
+                continue;
+            }
+
+            self.parsed += m.end();
+            if caps.name("review").is_some() {
+                if let Some(command) = self.parse_review() {
+                    return Some(command);
+                }
+            } else if let Some(command) = self.parse_command() {
                 return Some(command);
             }
-            self.parsed += self.bot.len() + 1;
         }
     }
 }
@@ -230,6 +238,20 @@ fn code_2() {
     assert!(input.next().is_none());
 }
 
+#[test]
+fn resumes_after_code() {
+    // Handles a command after an ignored block.
+    let input = "```
+@bot modify labels: +bug.
+```
+
+@bot claim
+    ";
+    let mut input = Input::new(input, vec!["bot"]);
+    assert!(matches!(input.next(), Some(Command::Assign(Ok(_)))));
+    assert_eq!(input.next(), None);
+}
+
 #[test]
 fn edit_1() {
     let input_old = "@bot modify labels: +bug.";
@@ -277,3 +299,51 @@ fn multiname() {
     assert!(input.next().unwrap().is_ok());
     assert!(input.next().is_none());
 }
+
+#[test]
+fn review_commands() {
+    for (input, name) in [
+        ("r? @octocat", "octocat"),
+        ("r? octocat", "octocat"),
+        ("R? @octocat", "octocat"),
+        ("can I r? someone?", "someone"),
+        ("Please r? @octocat can you review?", "octocat"),
+        ("r? rust-lang/compiler", "rust-lang/compiler"),
+        ("r? @D--a--s-h", "D--a--s-h"),
+    ] {
+        let mut input = Input::new(input, vec!["bot"]);
+        assert_eq!(
+            input.next(),
+            Some(Command::Assign(Ok(assign::AssignCommand::ReviewName {
+                name: name.to_string()
+            })))
+        );
+        assert_eq!(input.next(), None);
+    }
+}
+
+#[test]
+fn review_errors() {
+    use std::error::Error;
+    for input in ["r?", "r? @", "r? @ user", "r?:user", "r?! @foo", "r?\nline"] {
+        let mut input = Input::new(input, vec!["bot"]);
+        let err = match input.next() {
+            Some(Command::Assign(Err(err))) => err,
+            c => panic!("unexpected {:?}", c),
+        };
+        assert_eq!(
+            err.source().unwrap().downcast_ref(),
+            Some(&assign::ParseError::NoUser)
+        );
+        assert_eq!(input.next(), None);
+    }
+}
+
+#[test]
+fn review_ignored() {
+    // Checks for things that shouldn't be detected.
+    for input in ["r", "reviewer? abc", "r foo"] {
+        let mut input = Input::new(input, vec!["bot"]);
+        assert_eq!(input.next(), None);
+    }
+}

+ 94 - 32
parser/src/command/assign.rs

@@ -17,6 +17,7 @@ pub enum AssignCommand {
     Own,
     Release,
     User { username: String },
+    ReviewName { name: String },
 }
 
 #[derive(PartialEq, Eq, Debug)]
@@ -76,43 +77,104 @@ impl AssignCommand {
             return Ok(None);
         }
     }
+
+    /// Parses the input for `r?` command.
+    pub fn parse_review<'a>(input: &mut Tokenizer<'a>) -> Result<Option<Self>, Error<'a>> {
+        match input.next_token() {
+            Ok(Some(Token::Word(name))) => {
+                let name = name.strip_prefix('@').unwrap_or(name).to_string();
+                if name.is_empty() {
+                    return Err(input.error(ParseError::NoUser));
+                }
+                Ok(Some(AssignCommand::ReviewName { name }))
+            }
+            _ => Err(input.error(ParseError::NoUser)),
+        }
+    }
 }
 
 #[cfg(test)]
-fn parse<'a>(input: &'a str) -> Result<Option<AssignCommand>, Error<'a>> {
-    let mut toks = Tokenizer::new(input);
-    Ok(AssignCommand::parse(&mut toks)?)
-}
+mod tests {
+    use super::*;
 
-#[test]
-fn test_1() {
-    assert_eq!(parse("claim."), Ok(Some(AssignCommand::Own)),);
-}
+    fn parse<'a>(input: &'a str) -> Result<Option<AssignCommand>, Error<'a>> {
+        let mut toks = Tokenizer::new(input);
+        Ok(AssignCommand::parse(&mut toks)?)
+    }
 
-#[test]
-fn test_2() {
-    assert_eq!(parse("claim"), Ok(Some(AssignCommand::Own)),);
-}
+    #[test]
+    fn test_1() {
+        assert_eq!(parse("claim."), Ok(Some(AssignCommand::Own)),);
+    }
 
-#[test]
-fn test_3() {
-    assert_eq!(
-        parse("assign @user"),
-        Ok(Some(AssignCommand::User {
-            username: "user".to_owned()
-        })),
-    );
-}
+    #[test]
+    fn test_2() {
+        assert_eq!(parse("claim"), Ok(Some(AssignCommand::Own)),);
+    }
 
-#[test]
-fn test_4() {
-    use std::error::Error;
-    assert_eq!(
-        parse("assign @")
-            .unwrap_err()
-            .source()
-            .unwrap()
-            .downcast_ref(),
-        Some(&ParseError::MentionUser),
-    );
+    #[test]
+    fn test_3() {
+        assert_eq!(
+            parse("assign @user"),
+            Ok(Some(AssignCommand::User {
+                username: "user".to_owned()
+            })),
+        );
+    }
+
+    #[test]
+    fn test_4() {
+        use std::error::Error;
+        assert_eq!(
+            parse("assign @")
+                .unwrap_err()
+                .source()
+                .unwrap()
+                .downcast_ref(),
+            Some(&ParseError::MentionUser),
+        );
+    }
+
+    fn parse_review<'a>(input: &'a str) -> Result<Option<AssignCommand>, Error<'a>> {
+        let mut toks = Tokenizer::new(input);
+        Ok(AssignCommand::parse_review(&mut toks)?)
+    }
+
+    #[test]
+    fn review_names() {
+        for (input, name) in [
+            ("octocat", "octocat"),
+            ("@octocat", "octocat"),
+            ("rust-lang/compiler", "rust-lang/compiler"),
+            ("@rust-lang/cargo", "rust-lang/cargo"),
+            ("abc xyz", "abc"),
+            ("@user?", "user"),
+            ("@user.", "user"),
+            ("@user!", "user"),
+        ] {
+            assert_eq!(
+                parse_review(input),
+                Ok(Some(AssignCommand::ReviewName {
+                    name: name.to_string()
+                })),
+                "failed on {input}"
+            );
+        }
+    }
+
+    #[test]
+    fn review_names_errs() {
+        use std::error::Error;
+        for input in ["", "@", "@ user"] {
+            assert_eq!(
+                parse_review(input)
+                    .unwrap_err()
+                    .source()
+                    .unwrap()
+                    .downcast_ref(),
+                Some(&ParseError::NoUser),
+                "failed on {input}"
+            )
+        }
+    }
 }

+ 2 - 1
src/actions.rs

@@ -109,8 +109,9 @@ impl<'a> Action for Step<'a> {
             for repo in repos {
                 let repository = Repository {
                     full_name: format!("{}/{}", repo.0, repo.1),
-                    // This is unused for query.
+                    // These are unused for query.
                     default_branch: "master".to_string(),
+                    fork: false,
                 };
 
                 for QueryMap { name, kind, query } in queries {

+ 21 - 2
src/config.rs

@@ -76,8 +76,20 @@ pub(crate) struct PingTeamConfig {
 
 #[derive(PartialEq, Eq, Debug, serde::Deserialize)]
 pub(crate) struct AssignConfig {
+    /// If `true`, then posts a warning comment if the PR is opened against a
+    /// different branch than the default (usually master or main).
     #[serde(default)]
-    _empty: (),
+    pub(crate) warn_non_default_branch: bool,
+    /// A URL to include in the welcome message.
+    pub(crate) contributing_url: Option<String>,
+    /// Ad-hoc groups that can be referred to in `owners`.
+    #[serde(default)]
+    pub(crate) adhoc_groups: HashMap<String, Vec<String>>,
+    /// Users to assign when a new PR is opened.
+    /// The key is a gitignore-style path, and the value is a list of
+    /// usernames, team names, or ad-hoc groups.
+    #[serde(default)]
+    pub(crate) owners: HashMap<String, Vec<String>>,
 }
 
 #[derive(PartialEq, Eq, Debug, serde::Deserialize)]
@@ -149,6 +161,8 @@ pub(crate) struct AutolabelLabelConfig {
     pub(crate) exclude_labels: Vec<String>,
     #[serde(default)]
     pub(crate) trigger_files: Vec<String>,
+    #[serde(default)]
+    pub(crate) new_pr: bool,
 }
 
 #[derive(PartialEq, Eq, Debug, serde::Deserialize)]
@@ -359,7 +373,12 @@ mod tests {
                 relabel: Some(RelabelConfig {
                     allow_unauthenticated: vec!["C-*".into()],
                 }),
-                assign: Some(AssignConfig { _empty: () }),
+                assign: Some(AssignConfig {
+                    warn_non_default_branch: false,
+                    contributing_url: None,
+                    adhoc_groups: HashMap::new(),
+                    owners: HashMap::new(),
+                }),
                 note: Some(NoteConfig { _empty: () }),
                 ping: Some(PingConfig { teams: ping_teams }),
                 nominate: Some(NominateConfig {

+ 48 - 3
src/github.rs

@@ -297,6 +297,15 @@ pub struct Issue {
     /// The head commit for a PR (the branch from the source repo).
     #[serde(default)]
     pub head: Option<CommitBase>,
+    /// Whether it is open or closed.
+    pub state: IssueState,
+}
+
+#[derive(Debug, serde::Deserialize, Eq, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum IssueState {
+    Open,
+    Closed,
 }
 
 /// Contains only the parts of `Issue` that are needed for turning the issue title into a Zulip
@@ -467,6 +476,10 @@ impl Issue {
         self.pull_request.is_some()
     }
 
+    pub fn is_open(&self) -> bool {
+        self.state == IssueState::Open
+    }
+
     pub async fn get_comment(&self, client: &GithubClient, id: usize) -> anyhow::Result<Comment> {
         let comment_url = format!("{}/issues/comments/{}", self.repository().url(), id);
         let comment = client.json(client.get(&comment_url)).await?;
@@ -611,7 +624,9 @@ impl Issue {
     }
 
     pub fn contain_assignee(&self, user: &str) -> bool {
-        self.assignees.iter().any(|a| a.login == user)
+        self.assignees
+            .iter()
+            .any(|a| a.login.to_lowercase() == user.to_lowercase())
     }
 
     pub async fn remove_assignees(
@@ -637,7 +652,7 @@ impl Issue {
                 .assignees
                 .iter()
                 .map(|u| u.login.as_str())
-                .filter(|&u| u != user)
+                .filter(|&u| u.to_lowercase() != user.to_lowercase())
                 .collect::<Vec<_>>(),
         };
 
@@ -677,7 +692,10 @@ impl Issue {
             .map_err(AssignmentError::Http)?;
         // Invalid assignees are silently ignored. We can just check if the user is now
         // contained in the assignees list.
-        let success = result.assignees.iter().any(|u| u.login.as_str() == user);
+        let success = result
+            .assignees
+            .iter()
+            .any(|u| u.login.as_str().to_lowercase() == user.to_lowercase());
 
         if success {
             Ok(())
@@ -973,6 +991,8 @@ pub struct IssueSearchResult {
 pub struct Repository {
     pub full_name: String,
     pub default_branch: String,
+    #[serde(default)]
+    pub fork: bool,
 }
 
 #[derive(Copy, Clone)]
@@ -1532,6 +1552,31 @@ impl GithubClient {
             }
         }
     }
+
+    /// Returns whether or not the given GitHub login has made any commits to
+    /// the given repo.
+    pub async fn is_new_contributor(&self, repo: &Repository, author: &str) -> bool {
+        let url = format!(
+            "{}/repos/{}/commits?author={}",
+            Repository::GITHUB_API_URL,
+            repo.full_name,
+            author,
+        );
+        let req = self.get(&url);
+        match self.json::<Vec<GithubCommit>>(req).await {
+            // Note: This only returns results for the default branch.
+            // That should be fine in most cases since I think it is rare for
+            // new users to make their first commit to a different branch.
+            Ok(res) => res.is_empty(),
+            Err(e) => {
+                log::warn!(
+                    "failed to search for user commits in {} for author {author}: {e}",
+                    repo.full_name
+                );
+                false
+            }
+        }
+    }
 }
 
 #[derive(Debug, serde::Deserialize)]

+ 2 - 1
src/handlers.rs

@@ -47,7 +47,7 @@ mod shortcut;
 pub async fn handle(ctx: &Context, event: &Event) -> Vec<HandlerError> {
     let config = config::get(&ctx.github, event.repo()).await;
     if let Err(e) = &config {
-        log::warn!("failed to load configuration: {e}");
+        log::warn!("configuration error {}: {e}", event.repo().full_name);
     }
     let mut errors = Vec::new();
 
@@ -156,6 +156,7 @@ macro_rules! issue_handlers {
 // This is for events that happen only on issues (e.g. label changes).
 // Each module in the list must contain the functions `parse_input` and `handle_input`.
 issue_handlers! {
+    assign,
     autolabel,
     major_change,
     mentions,

+ 576 - 34
src/handlers/assign.rs

@@ -1,34 +1,400 @@
-//! Permit assignment of any user to issues, without requiring "write" access to the repository.
+//! Handles PR and issue assignment.
 //!
-//! We need to fake-assign ourselves and add a 'claimed by' section to the top-level comment.
+//! This supports several ways for setting issue/PR assignment:
 //!
-//! Such assigned issues should also be placed in a queue to ensure that the user remains
-//! active; the assigned user will be asked for a status report every 2 weeks (XXX: timing).
+//! * `@rustbot assign @gh-user`: Assigns to the given user.
+//! * `@rustbot claim`: Assigns to the comment author.
+//! * `@rustbot release-assignment`: Removes the commenter's assignment.
+//! * `r? @user`: Assigns to the given user (PRs only).
 //!
-//! If we're intending to ask for a status report but no comments from the assigned user have
-//! been given for the past 2 weeks, the bot will de-assign the user. They can once more claim
-//! the issue if necessary.
+//! This is capable of assigning to any user, even if they do not have write
+//! access to the repo. It does this by fake-assigning the bot and adding a
+//! "claimed by" section to the top-level comment.
 //!
-//! Assign users with `@rustbot assign @gh-user` or `@rustbot claim` (self-claim).
+//! Configuration is done with the `[assign]` table.
+//!
+//! This also supports auto-assignment of new PRs. Based on rules in the
+//! `assign.owners` config, it will auto-select an assignee based on the files
+//! the PR modifies.
 
 use crate::{
     config::AssignConfig,
-    github::{self, Event, Selection},
-    handlers::Context,
+    github::{self, Event, Issue, IssuesAction, Selection},
+    handlers::{Context, GithubClient, IssuesEvent},
     interactions::EditIssueBody,
 };
-use anyhow::Context as _;
+use anyhow::{bail, Context as _};
 use parser::command::assign::AssignCommand;
+use parser::command::{Command, Input};
+use rand::seq::IteratorRandom;
+use rust_team_data::v1::Teams;
+use std::collections::{HashMap, HashSet};
+use std::fmt;
 use tracing as log;
 
+#[cfg(test)]
+mod tests {
+    mod tests_candidates;
+    mod tests_from_diff;
+}
+
+const NEW_USER_WELCOME_MESSAGE: &str = "Thanks for the pull request, and welcome! \
+The Rust team is excited to review your changes, and you should hear from {who} soon.";
+
+const CONTRIBUTION_MESSAGE: &str = "\
+Please see [the contribution instructions]({contributing_url}) for more information.";
+
+const WELCOME_WITH_REVIEWER: &str = "@{assignee} (or someone else)";
+
+const WELCOME_WITHOUT_REVIEWER: &str = "@Mark-Simulacrum (NB. this repo may be misconfigured)";
+
+const RETURNING_USER_WELCOME_MESSAGE: &str = "r? @{assignee}
+
+(rustbot has picked a reviewer for you, use r? to override)";
+
+const RETURNING_USER_WELCOME_MESSAGE_NO_REVIEWER: &str =
+    "@{author}: no appropriate reviewer found, use r? to override";
+
+const NON_DEFAULT_BRANCH: &str =
+    "Pull requests are usually filed against the {default} branch for this repo, \
+     but this one is against {target}. \
+     Please double check that you specified the right target!";
+
+const SUBMODULE_WARNING_MSG: &str = "These commits modify **submodules**.";
+
 #[derive(Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
 struct AssignData {
     user: Option<String>,
 }
 
+/// Input for auto-assignment when a PR is created.
+pub(super) struct AssignInput {
+    git_diff: String,
+}
+
+/// Prepares the input when a new PR is opened.
+pub(super) async fn parse_input(
+    ctx: &Context,
+    event: &IssuesEvent,
+    config: Option<&AssignConfig>,
+) -> Result<Option<AssignInput>, String> {
+    let config = match config {
+        Some(config) => config,
+        None => return Ok(None),
+    };
+    if config.owners.is_empty()
+        || !matches!(event.action, IssuesAction::Opened)
+        || !event.issue.is_pr()
+    {
+        return Ok(None);
+    }
+    let git_diff = match event.issue.diff(&ctx.github).await {
+        Ok(None) => return Ok(None),
+        Err(e) => {
+            log::error!("failed to fetch diff: {:?}", e);
+            return Ok(None);
+        }
+        Ok(Some(diff)) => diff,
+    };
+    Ok(Some(AssignInput { git_diff }))
+}
+
+/// Handles the work of setting an assignment for a new PR and posting a
+/// welcome message.
+pub(super) async fn handle_input(
+    ctx: &Context,
+    config: &AssignConfig,
+    event: &IssuesEvent,
+    input: AssignInput,
+) -> anyhow::Result<()> {
+    // Don't auto-assign or welcome if the user manually set the assignee when opening.
+    if event.issue.assignees.is_empty() {
+        let (assignee, from_comment) = determine_assignee(ctx, event, config, &input).await?;
+        if assignee.as_deref() == Some("ghost") {
+            // "ghost" is GitHub's placeholder account for deleted accounts.
+            // It is used here as a convenient way to prevent assignment. This
+            // is typically used for rollups or experiments where you don't
+            // want any assignments or noise.
+            return Ok(());
+        }
+        let welcome = if ctx
+            .github
+            .is_new_contributor(&event.repository, &event.issue.user.login)
+            .await
+        {
+            let who_text = match &assignee {
+                Some(assignee) => WELCOME_WITH_REVIEWER.replace("{assignee}", assignee),
+                None => WELCOME_WITHOUT_REVIEWER.to_string(),
+            };
+            let mut welcome = NEW_USER_WELCOME_MESSAGE.replace("{who}", &who_text);
+            if let Some(contrib) = &config.contributing_url {
+                welcome.push_str("\n\n");
+                welcome.push_str(&CONTRIBUTION_MESSAGE.replace("{contributing_url}", contrib));
+            }
+            Some(welcome)
+        } else if !from_comment {
+            let welcome = match &assignee {
+                Some(assignee) => RETURNING_USER_WELCOME_MESSAGE.replace("{assignee}", assignee),
+                None => RETURNING_USER_WELCOME_MESSAGE_NO_REVIEWER
+                    .replace("{author}", &event.issue.user.login),
+            };
+            Some(welcome)
+        } else {
+            // No welcome is posted if they are not new and they used `r?` in the opening body.
+            None
+        };
+        if let Some(assignee) = assignee {
+            set_assignee(&event.issue, &ctx.github, &assignee).await;
+        }
+
+        if let Some(welcome) = welcome {
+            if let Err(e) = event.issue.post_comment(&ctx.github, &welcome).await {
+                log::warn!(
+                    "failed to post welcome comment to {}: {e}",
+                    event.issue.global_id()
+                );
+            }
+        }
+    }
+
+    // Compute some warning messages to post to new PRs.
+    let mut warnings = Vec::new();
+    if config.warn_non_default_branch {
+        warnings.extend(non_default_branch(event));
+    }
+    warnings.extend(modifies_submodule(&input.git_diff));
+    if !warnings.is_empty() {
+        let warnings: Vec<_> = warnings
+            .iter()
+            .map(|warning| format!("* {warning}"))
+            .collect();
+        let warning = format!(":warning: **Warning** :warning:\n\n{}", warnings.join("\n"));
+        event.issue.post_comment(&ctx.github, &warning).await?;
+    };
+    Ok(())
+}
+
+/// Finds the `r?` command in the PR body.
+///
+/// Returns the name after the `r?` command, or None if not found.
+fn find_assign_command(ctx: &Context, event: &IssuesEvent) -> Option<String> {
+    let mut input = Input::new(&event.issue.body, vec![&ctx.username]);
+    input.find_map(|command| match command {
+        Command::Assign(Ok(AssignCommand::ReviewName { name })) => Some(name),
+        _ => None,
+    })
+}
+
+/// Returns a message if the PR is opened against the non-default branch.
+fn non_default_branch(event: &IssuesEvent) -> Option<String> {
+    let target_branch = &event.issue.base.as_ref().unwrap().git_ref;
+    let default_branch = &event.repository.default_branch;
+    if target_branch == default_branch {
+        return None;
+    }
+    Some(
+        NON_DEFAULT_BRANCH
+            .replace("{default}", default_branch)
+            .replace("{target}", target_branch),
+    )
+}
+
+/// Returns a message if the PR modifies a git submodule.
+fn modifies_submodule(diff: &str) -> Option<String> {
+    let re = regex::Regex::new(r"\+Subproject\scommit\s").unwrap();
+    if re.is_match(diff) {
+        Some(SUBMODULE_WARNING_MSG.to_string())
+    } else {
+        None
+    }
+}
+
+/// Sets the assignee of a PR, alerting any errors.
+async fn set_assignee(issue: &Issue, github: &GithubClient, username: &str) {
+    // Don't re-assign if already assigned, e.g. on comment edit
+    if issue.contain_assignee(&username) {
+        log::trace!(
+            "ignoring assign PR {} to {}, already assigned",
+            issue.global_id(),
+            username,
+        );
+        return;
+    }
+    if let Err(err) = issue.set_assignee(github, &username).await {
+        log::warn!(
+            "failed to set assignee of PR {} to {}: {:?}",
+            issue.global_id(),
+            username,
+            err
+        );
+        if let Err(e) = issue
+            .post_comment(
+                github,
+                &format!(
+                    "Failed to set assignee to `{username}`: {err}\n\
+                     \n\
+                     > **Note**: Only org members, users with write \
+                       permissions, or people who have commented on the PR may \
+                       be assigned."
+                ),
+            )
+            .await
+        {
+            log::warn!("failed to post error comment: {e}");
+        }
+    }
+}
+
+/// Determines who to assign the PR to based on either an `r?` command, or
+/// based on which files were modified.
+///
+/// Returns `(assignee, from_comment)` where `assignee` is who to assign to
+/// (or None if no assignee could be found). `from_comment` is a boolean
+/// indicating if the assignee came from an `r?` command (it is false if
+/// determined from the diff).
+async fn determine_assignee(
+    ctx: &Context,
+    event: &IssuesEvent,
+    config: &AssignConfig,
+    input: &AssignInput,
+) -> anyhow::Result<(Option<String>, bool)> {
+    let teams = crate::team_data::teams(&ctx.github).await?;
+    if let Some(name) = find_assign_command(ctx, event) {
+        // User included `r?` in the opening PR body.
+        match find_reviewer_from_names(&teams, config, &event.issue, &[name]) {
+            Ok(assignee) => return Ok((Some(assignee), true)),
+            Err(e) => {
+                event
+                    .issue
+                    .post_comment(&ctx.github, &e.to_string())
+                    .await?;
+                // Fall through below for normal diff detection.
+            }
+        }
+    }
+    // Errors fall-through to try fallback group.
+    match find_reviewers_from_diff(config, &input.git_diff) {
+        Ok(candidates) if !candidates.is_empty() => {
+            match find_reviewer_from_names(&teams, config, &event.issue, &candidates) {
+                Ok(assignee) => return Ok((Some(assignee), false)),
+                Err(FindReviewerError::TeamNotFound(team)) => log::warn!(
+                    "team {team} not found via diff from PR {}, \
+                    is there maybe a misconfigured group?",
+                    event.issue.global_id()
+                ),
+                Err(FindReviewerError::NoReviewer(names)) => log::trace!(
+                    "no reviewer could be determined for PR {} with candidate name {names:?}",
+                    event.issue.global_id()
+                ),
+            }
+        }
+        // If no owners matched the diff, fall-through.
+        Ok(_) => {}
+        Err(e) => {
+            log::warn!(
+                "failed to find candidate reviewer from diff due to error: {e}\n\
+                 Is the triagebot.toml misconfigured?"
+            );
+        }
+    }
+
+    if let Some(fallback) = config.adhoc_groups.get("fallback") {
+        match find_reviewer_from_names(&teams, config, &event.issue, fallback) {
+            Ok(assignee) => return Ok((Some(assignee), false)),
+            Err(e) => {
+                log::trace!(
+                    "failed to select from fallback group for PR {}: {e}",
+                    event.issue.global_id()
+                );
+            }
+        }
+    }
+    Ok((None, false))
+}
+
+/// Returns a list of candidate reviewers to use based on which files were changed.
+///
+/// May return an error if the owners map is misconfigured.
+///
+/// Beware this may return an empty list if nothing matches.
+fn find_reviewers_from_diff(config: &AssignConfig, diff: &str) -> anyhow::Result<Vec<String>> {
+    // Map of `owners` path to the number of changes found in that path.
+    // This weights the reviewer choice towards places where the most edits are done.
+    let mut counts: HashMap<&str, u32> = HashMap::new();
+    // List of the longest `owners` patterns that match the current path. This
+    // prefers choosing reviewers from deeply nested paths over those defined
+    // for top-level paths, under the assumption that they are more
+    // specialized.
+    //
+    // This is a list to handle the situation if multiple paths of the same
+    // length match.
+    let mut longest_owner_patterns = Vec::new();
+    // Iterate over the diff, finding the start of each file. After each file
+    // is found, it counts the number of modified lines in that file, and
+    // tracks those in the `counts` map.
+    for line in diff.split('\n') {
+        if line.starts_with("diff --git ") {
+            // Start of a new file.
+            longest_owner_patterns.clear();
+            let path = line[line.find(" b/").unwrap()..]
+                .strip_prefix(" b/")
+                .unwrap();
+            // Find the longest `owners` entries that match this path.
+            let mut longest = HashMap::new();
+            for owner_pattern in config.owners.keys() {
+                let ignore = ignore::gitignore::GitignoreBuilder::new("/")
+                    .add_line(None, owner_pattern)
+                    .with_context(|| format!("owner file pattern `{owner_pattern}` is not valid"))?
+                    .build()?;
+                if ignore.matched_path_or_any_parents(path, false).is_ignore() {
+                    let owner_len = owner_pattern.split('/').count();
+                    longest.insert(owner_pattern, owner_len);
+                }
+            }
+            let max_count = longest.values().copied().max().unwrap_or(0);
+            longest_owner_patterns.extend(
+                longest
+                    .iter()
+                    .filter(|(_, count)| **count == max_count)
+                    .map(|x| *x.0),
+            );
+            // Give some weight to these patterns to start. This helps with
+            // files modified without any lines changed.
+            for owner_pattern in &longest_owner_patterns {
+                *counts.entry(owner_pattern).or_default() += 1;
+            }
+            continue;
+        }
+        // Check for a modified line.
+        if (!line.starts_with("+++") && line.starts_with('+'))
+            || (!line.starts_with("---") && line.starts_with('-'))
+        {
+            for owner_path in &longest_owner_patterns {
+                *counts.entry(owner_path).or_default() += 1;
+            }
+        }
+    }
+    // Use the `owners` entry with the most number of modifications.
+    let max_count = counts.values().copied().max().unwrap_or(0);
+    let max_paths = counts
+        .iter()
+        .filter(|(_, count)| **count == max_count)
+        .map(|(path, _)| path);
+    let mut potential: Vec<_> = max_paths
+        .flat_map(|owner_path| &config.owners[*owner_path])
+        .map(|owner| owner.to_string())
+        .collect();
+    // Dedupe. This isn't strictly necessary, as `find_reviewer_from_names` will deduplicate.
+    // However, this helps with testing.
+    potential.sort();
+    potential.dedup();
+    Ok(potential)
+}
+
+/// Handles a command posted in a comment.
 pub(super) async fn handle_command(
     ctx: &Context,
-    _config: &AssignConfig,
+    config: &AssignConfig,
     event: &Event,
     cmd: AssignCommand,
 ) -> anyhow::Result<()> {
@@ -39,11 +405,24 @@ pub(super) async fn handle_command(
         true
     };
 
+    // Don't handle commands in comments from the bot. Some of the comments it
+    // posts contain commands to instruct the user, not things that the bot
+    // should respond to.
+    if event.comment_from() == Some(ctx.username.as_str()) {
+        return Ok(());
+    }
+
     let issue = event.issue().unwrap();
     if issue.is_pr() {
-        let username = match &cmd {
+        if !issue.is_open() {
+            issue
+                .post_comment(&ctx.github, "Assignment is not allowed on a closed PR.")
+                .await?;
+            return Ok(());
+        }
+        let username = match cmd {
             AssignCommand::Own => event.user().login.clone(),
-            AssignCommand::User { username } => username.clone(),
+            AssignCommand::User { username } => username,
             AssignCommand::Release => {
                 log::trace!(
                     "ignoring release on PR {:?}, must always have assignee",
@@ -51,24 +430,35 @@ pub(super) async fn handle_command(
                 );
                 return Ok(());
             }
+            AssignCommand::ReviewName { name } => {
+                if config.owners.is_empty() {
+                    // To avoid conflicts with the highfive bot while transitioning,
+                    // r? is ignored if `owners` is not configured in triagebot.toml.
+                    return Ok(());
+                }
+                if matches!(
+                    event,
+                    Event::Issue(IssuesEvent {
+                        action: IssuesAction::Opened,
+                        ..
+                    })
+                ) {
+                    // Don't handle r? comments on new PRs. Those will be
+                    // handled by the new PR trigger (which also handles the
+                    // welcome message).
+                    return Ok(());
+                }
+                let teams = crate::team_data::teams(&ctx.github).await?;
+                match find_reviewer_from_names(&teams, config, issue, &[name]) {
+                    Ok(assignee) => assignee,
+                    Err(e) => {
+                        issue.post_comment(&ctx.github, &e.to_string()).await?;
+                        return Ok(());
+                    }
+                }
+            }
         };
-        // Don't re-assign if already assigned, e.g. on comment edit
-        if issue.contain_assignee(&username) {
-            log::trace!(
-                "ignoring assign PR {} to {}, already assigned",
-                issue.global_id(),
-                username,
-            );
-            return Ok(());
-        }
-        if let Err(err) = issue.set_assignee(&ctx.github, &username).await {
-            log::warn!(
-                "failed to set assignee of PR {} to {}: {:?}",
-                issue.global_id(),
-                username,
-                err
-            );
-        }
+        set_assignee(issue, &ctx.github, &username).await;
         return Ok(());
     }
 
@@ -78,7 +468,7 @@ pub(super) async fn handle_command(
         AssignCommand::Own => event.user().login.clone(),
         AssignCommand::User { username } => {
             if !is_team_member && username != event.user().login {
-                anyhow::bail!("Only Rust team members can assign other users");
+                bail!("Only Rust team members can assign other users");
             }
             username.clone()
         }
@@ -93,7 +483,7 @@ pub(super) async fn handle_command(
                         .await?;
                     return Ok(());
                 } else {
-                    anyhow::bail!("Cannot release another user's assignment");
+                    bail!("Cannot release another user's assignment");
                 }
             } else {
                 let current = &event.user().login;
@@ -105,10 +495,11 @@ pub(super) async fn handle_command(
                         .await?;
                     return Ok(());
                 } else {
-                    anyhow::bail!("Cannot release unassigned issue");
+                    bail!("Cannot release unassigned issue");
                 }
             };
         }
+        AssignCommand::ReviewName { .. } => bail!("r? is only allowed on PRs."),
     };
     // Don't re-assign if aleady assigned, e.g. on comment edit
     if issue.contain_assignee(&to_assign) {
@@ -144,3 +535,154 @@ pub(super) async fn handle_command(
 
     Ok(())
 }
+
+#[derive(Debug)]
+enum FindReviewerError {
+    /// User specified something like `r? foo/bar` where that team name could
+    /// not be found.
+    TeamNotFound(String),
+    /// No reviewer could be found. The field is the list of candidate names
+    /// that were used to seed the selection. One example where this happens
+    /// is if the given name was for a team where the PR author is the only
+    /// member.
+    NoReviewer(Vec<String>),
+}
+
+impl std::error::Error for FindReviewerError {}
+
+impl fmt::Display for FindReviewerError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
+        match self {
+            FindReviewerError::TeamNotFound(team) => write!(f, "Team or group `{team}` not found.\n\
+                \n\
+                rust-lang team names can be found at https://github.com/rust-lang/team/tree/master/teams.\n\
+                Reviewer group names can be found in `triagebot.toml` in this repo."),
+            FindReviewerError::NoReviewer(names) => write!(
+                f,
+                "Could not determine reviewer from `{}`.",
+                names.join(",")
+            ),
+        }
+    }
+}
+
+/// Finds a reviewer to assign to a PR.
+///
+/// The `names` is a list of candidate reviewers `r?`, such as `compiler` or
+/// `@octocat`, or names from the owners map. It can contain GitHub usernames,
+/// auto-assign groups, or rust-lang team names. It must have at least one
+/// entry.
+fn find_reviewer_from_names(
+    teams: &Teams,
+    config: &AssignConfig,
+    issue: &Issue,
+    names: &[String],
+) -> Result<String, FindReviewerError> {
+    let candidates = candidate_reviewers_from_names(teams, config, issue, names)?;
+    // This uses a relatively primitive random choice algorithm.
+    // GitHub's CODEOWNERS supports much more sophisticated options, such as:
+    //
+    // - Round robin: Chooses reviewers based on who's received the least
+    //   recent review request, focusing on alternating between all members of
+    //   the team regardless of the number of outstanding reviews they
+    //   currently have.
+    // - Load balance: Chooses reviewers based on each member's total number
+    //   of recent review requests and considers the number of outstanding
+    //   reviews for each member. The load balance algorithm tries to ensure
+    //   that each team member reviews an equal number of pull requests in any
+    //   30 day period.
+    //
+    // Additionally, with CODEOWNERS, users marked as "Busy" in the GitHub UI
+    // will not be selected for reviewer. There are several other options for
+    // configuring CODEOWNERS as well.
+    //
+    // These are all ideas for improving the selection here. However, I'm not
+    // sure they are really worth the effort.
+    match candidates.into_iter().choose(&mut rand::thread_rng()) {
+        Some(candidate) => Ok(candidate.to_string()),
+        None => Err(FindReviewerError::NoReviewer(
+            names.iter().map(|n| n.to_string()).collect(),
+        )),
+    }
+}
+
+/// Returns a list of candidate usernames to choose as a reviewer.
+fn candidate_reviewers_from_names<'a>(
+    teams: &'a Teams,
+    config: &'a AssignConfig,
+    issue: &Issue,
+    names: &'a [String],
+) -> Result<HashSet<&'a str>, FindReviewerError> {
+    // Set of candidate usernames to choose from. This uses a set to
+    // deduplicate entries so that someone in multiple teams isn't
+    // over-weighted.
+    let mut candidates: HashSet<&str> = HashSet::new();
+    // Keep track of groups seen to avoid cycles and avoid expanding the same
+    // team multiple times.
+    let mut seen = HashSet::new();
+    // This is a queue of potential groups or usernames to expand. The loop
+    // below will pop from this and then append the expanded results of teams.
+    // Usernames will be added to `candidates`.
+    let mut group_expansion: Vec<&str> = names.iter().map(|n| n.as_str()).collect();
+    let repo = issue.repository();
+    let org_prefix = format!("{}/", repo.organization);
+    // Don't allow groups or teams to include the current author or assignee.
+    let filter = |name: &&str| -> bool {
+        let name_lower = name.to_lowercase();
+        name_lower != issue.user.login.to_lowercase()
+            && !issue
+                .assignees
+                .iter()
+                .any(|assignee| name_lower == assignee.login.to_lowercase())
+    };
+
+    // Loop over groups to recursively expand them.
+    while let Some(group_or_user) = group_expansion.pop() {
+        let group_or_user = group_or_user.strip_prefix('@').unwrap_or(group_or_user);
+
+        // Try ad-hoc groups first.
+        // Allow `rust-lang/compiler` to match `compiler`.
+        let maybe_group = group_or_user
+            .strip_prefix(&org_prefix)
+            .unwrap_or(group_or_user);
+        if let Some(group_members) = config.adhoc_groups.get(maybe_group) {
+            // If a group has already been expanded, don't expand it again.
+            if seen.insert(maybe_group) {
+                group_expansion.extend(
+                    group_members
+                        .iter()
+                        .map(|member| member.as_str())
+                        .filter(filter),
+                );
+            }
+            continue;
+        }
+
+        // Check for a team name.
+        // Allow either a direct team name like `rustdoc` or a GitHub-style
+        // team name of `rust-lang/rustdoc` (though this does not check if
+        // that is a real GitHub team name).
+        //
+        // This ignores subteam relationships (it only uses direct members).
+        let maybe_team = group_or_user
+            .strip_prefix("rust-lang/")
+            .unwrap_or(group_or_user);
+        if let Some(team) = teams.teams.get(maybe_team) {
+            candidates.extend(
+                team.members
+                    .iter()
+                    .map(|member| member.github.as_str())
+                    .filter(filter),
+            );
+            continue;
+        }
+
+        if group_or_user.contains('/') {
+            return Err(FindReviewerError::TeamNotFound(group_or_user.to_string()));
+        }
+
+        // Assume it is a user.
+        candidates.insert(group_or_user);
+    }
+    Ok(candidates)
+}

+ 237 - 0
src/handlers/assign/tests/tests_candidates.rs

@@ -0,0 +1,237 @@
+//! Tests for `candidate_reviewers_from_names`
+
+use super::super::*;
+
+/// Basic test function for testing `candidate_reviewers_from_names`.
+fn test_from_names(
+    teams: Option<toml::Value>,
+    config: toml::Value,
+    issue: serde_json::Value,
+    names: &[&str],
+    expected: &[&str],
+) {
+    let (teams, config, issue) = convert_simplified(teams, config, issue);
+    let names: Vec<_> = names.iter().map(|n| n.to_string()).collect();
+    let candidates = candidate_reviewers_from_names(&teams, &config, &issue, &names).unwrap();
+    let mut candidates: Vec<_> = candidates.into_iter().collect();
+    candidates.sort();
+    let expected: Vec<_> = expected.iter().map(|x| *x).collect();
+    assert_eq!(candidates, expected);
+}
+
+/// Convert the simplified input in preparation for `candidate_reviewers_from_names`.
+fn convert_simplified(
+    teams: Option<toml::Value>,
+    config: toml::Value,
+    issue: serde_json::Value,
+) -> (Teams, AssignConfig, Issue) {
+    // Convert the simplified team config to a real team config.
+    // This uses serde_json since it is easier to manipulate than toml.
+    let teams: serde_json::Value = match teams {
+        Some(teams) => teams.try_into().unwrap(),
+        None => serde_json::json!({}),
+    };
+    let mut teams_config = serde_json::json!({});
+    for (team_name, members) in teams.as_object().unwrap() {
+        let members: Vec<_> = members.as_array().unwrap().iter().map(|member| {
+            serde_json::json!({"name": member, "github": member, "github_id": 1, "is_lead": false})
+        }).collect();
+        teams_config[team_name] = serde_json::json!({
+            "name": team_name,
+            "kind": "team",
+            "members": serde_json::Value::Array(members),
+            "alumni": [],
+            "discord": [],
+        });
+    }
+    let teams = serde_json::value::from_value(teams_config).unwrap();
+    let config = config.try_into().unwrap();
+    let issue = serde_json::value::from_value(issue).unwrap();
+    (teams, config, issue)
+}
+
+fn generic_issue(author: &str, repo: &str) -> serde_json::Value {
+    serde_json::json!({
+        "number": 1234,
+        "created_at": "2022-06-26T21:31:31Z",
+        "updated_at": "2022-06-26T21:31:31Z",
+        "title": "Example PR",
+        "body": "PR body",
+        "html_url": "https://github.com/rust-lang/rust/pull/1234",
+        "user": {
+            "login": author,
+            "id": 583231,
+        },
+        "labels": [],
+        "assignees": [],
+        "comments_url": format!("https://api.github.com/repos/{repo}/pull/1234/comments"),
+        "state": "open",
+    })
+}
+
+#[test]
+fn circular_groups() {
+    // A cycle in the groups map.
+    let config = toml::toml!(
+        [adhoc_groups]
+        compiler = ["other"]
+        other = ["compiler"]
+    );
+    let issue = generic_issue("octocat", "rust-lang/rust");
+    test_from_names(None, config, issue, &["compiler"], &[]);
+}
+
+#[test]
+fn nested_groups() {
+    // Test choosing a reviewer from group with nested groups.
+    let config = toml::toml!(
+        [adhoc_groups]
+        a = ["@pnkfelix"]
+        b = ["@nrc"]
+        c = ["a", "b"]
+    );
+    let issue = generic_issue("octocat", "rust-lang/rust");
+    test_from_names(None, config, issue, &["c"], &["nrc", "pnkfelix"]);
+}
+
+#[test]
+fn candidate_filtered_author_only_candidate() {
+    // When the author is the only candidate.
+    let config = toml::toml!(
+        [adhoc_groups]
+        compiler = ["nikomatsakis"]
+    );
+    let issue = generic_issue("nikomatsakis", "rust-lang/rust");
+    test_from_names(None, config, issue, &["compiler"], &[]);
+}
+
+#[test]
+fn candidate_filtered_author() {
+    // Filter out the author from the candidates.
+    let config = toml::toml!(
+        [adhoc_groups]
+        compiler = ["user1", "user2", "user3", "group2"]
+        group2 = ["user2", "user4"]
+    );
+    let issue = generic_issue("user2", "rust-lang/rust");
+    test_from_names(
+        None,
+        config,
+        issue,
+        &["compiler"],
+        &["user1", "user3", "user4"],
+    );
+}
+
+#[test]
+fn candidate_filtered_assignee() {
+    // Filter out an existing assignee from the candidates.
+    let config = toml::toml!(
+        [adhoc_groups]
+        compiler = ["user1", "user2", "user3", "user4"]
+    );
+    let mut issue = generic_issue("user2", "rust-lang/rust");
+    issue["assignees"] = serde_json::json!([
+        {"login": "user1", "id": 1},
+        {"login": "user3", "id": 3},
+    ]);
+    test_from_names(None, config, issue, &["compiler"], &["user4"]);
+}
+
+#[test]
+fn groups_teams_users() {
+    // Assortment of groups, teams, and users all selected at once.
+    let teams = toml::toml!(
+        team1 = ["t-user1"]
+        team2 = ["t-user2"]
+    );
+    let config = toml::toml!(
+        [adhoc_groups]
+        group1 = ["user1", "rust-lang/team2"]
+    );
+    let issue = generic_issue("octocat", "rust-lang/rust");
+    test_from_names(
+        Some(teams),
+        config,
+        issue,
+        &["team1", "group1", "user3"],
+        &["t-user1", "t-user2", "user1", "user3"],
+    );
+}
+
+#[test]
+fn group_team_user_precedence() {
+    // How it handles ambiguity when names overlap.
+    let teams = toml::toml!(compiler = ["t-user1"]);
+    let config = toml::toml!(
+        [adhoc_groups]
+        compiler = ["user2"]
+    );
+    let issue = generic_issue("octocat", "rust-lang/rust");
+    test_from_names(
+        Some(teams.clone()),
+        config.clone(),
+        issue.clone(),
+        &["compiler"],
+        &["user2"],
+    );
+    test_from_names(
+        Some(teams.clone()),
+        config.clone(),
+        issue.clone(),
+        &["rust-lang/compiler"],
+        &["user2"],
+    );
+}
+
+#[test]
+fn what_do_slashes_mean() {
+    // How slashed names are handled.
+    let teams = toml::toml!(compiler = ["t-user1"]);
+    let config = toml::toml!(
+        [adhoc_groups]
+        compiler = ["user2"]
+        "foo/bar" = ["foo-user"]
+    );
+    let issue = generic_issue("octocat", "rust-lang-nursery/rust");
+    // Random slash names should work from groups.
+    test_from_names(
+        Some(teams.clone()),
+        config.clone(),
+        issue.clone(),
+        &["foo/bar"],
+        &["foo-user"],
+    );
+    // Since this is rust-lang-nursery, it uses the rust-lang team, not the group.
+    test_from_names(
+        Some(teams.clone()),
+        config.clone(),
+        issue.clone(),
+        &["rust-lang/compiler"],
+        &["t-user1"],
+    );
+    test_from_names(
+        Some(teams.clone()),
+        config.clone(),
+        issue.clone(),
+        &["rust-lang-nursery/compiler"],
+        &["user2"],
+    );
+}
+
+#[test]
+fn invalid_org_doesnt_match() {
+    let teams = toml::toml!(compiler = ["t-user1"]);
+    let config = toml::toml!(
+        [adhoc_groups]
+        compiler = ["user2"]
+    );
+    let issue = generic_issue("octocat", "rust-lang/rust");
+    let (teams, config, issue) = convert_simplified(Some(teams), config, issue);
+    let names = vec!["github/compiler".to_string()];
+    match candidate_reviewers_from_names(&teams, &config, &issue, &names) {
+        Ok(x) => panic!("expected err, got {x:?}"),
+        Err(FindReviewerError::TeamNotFound(_)) => {}
+        Err(e) => panic!("unexpected error {e:?}"),
+    }
+}

+ 148 - 0
src/handlers/assign/tests/tests_from_diff.rs

@@ -0,0 +1,148 @@
+//! Tests for `find_reviewers_from_diff`
+
+use super::super::*;
+use crate::config::AssignConfig;
+use std::fmt::Write;
+
+fn test_from_diff(diff: &str, config: toml::Value, expected: &[&str]) {
+    let aconfig: AssignConfig = config.try_into().unwrap();
+    assert_eq!(
+        find_reviewers_from_diff(&aconfig, diff).unwrap(),
+        expected.iter().map(|x| x.to_string()).collect::<Vec<_>>()
+    );
+}
+
+/// Generates a fake diff that touches the given files.
+///
+/// `paths` should be a slice of `(path, added, removed)` tuples where `added`
+/// is the number of lines added, and `removed` is the number of lines
+/// removed.
+fn make_fake_diff(paths: &[(&str, u32, u32)]) -> String {
+    // This isn't a properly structured diff, but it has approximately enough
+    // information for what is needed for testing.
+    paths
+        .iter()
+        .map(|(path, added, removed)| {
+            let mut diff = format!(
+                "diff --git a/{path} b/{path}\n\
+                 index 1677422122e..1108c1f4d4c 100644\n\
+                 --- a/{path}\n\
+                 +++ b/{path}\n\
+                 @@ -0,0 +1 @@\n"
+            );
+            for n in 0..*added {
+                writeln!(diff, "+Added line {n}").unwrap();
+            }
+            for n in 0..*removed {
+                writeln!(diff, "-Removed line {n}").unwrap();
+            }
+            diff.push('\n');
+            diff
+        })
+        .collect()
+}
+
+#[test]
+fn no_matching_owners() {
+    // When none of the owners match the diff.
+    let config = toml::toml!(
+        [owners]
+        "/compiler" = ["compiler"]
+        "/library" = ["libs"]
+    );
+    let diff = make_fake_diff(&[("foo/bar.rs", 5, 0)]);
+    test_from_diff(&diff, config, &[]);
+}
+
+#[test]
+fn from_diff_submodule() {
+    let config = toml::toml!(
+        [owners]
+        "/src" = ["user1", "user2"]
+    );
+    let diff = "\
+        diff --git a/src/jemalloc b/src/jemalloc\n\
+        index 2dba541..b001609 160000\n\
+        --- a/src/jemalloc\n\
+        +++ b/src/jemalloc\n\
+        @@ -1 +1 @@\n\
+        -Subproject commit 2dba541881fb8e35246d653bbe2e7c7088777a4a\n\
+        +Subproject commit b001609960ca33047e5cbc5a231c1e24b6041d4b\n\
+    ";
+    test_from_diff(diff, config, &["user1", "user2"]);
+}
+
+#[test]
+fn prefixed_dirs() {
+    // Test dirs with multiple overlapping prefixes.
+    let config = toml::toml!(
+        [owners]
+        "/compiler" = ["compiler"]
+        "/compiler/rustc_llvm" = ["llvm"]
+        "/compiler/rustc_parse" = ["parser"]
+        "/compiler/rustc_parse/src/parse/lexer" = ["lexer"]
+    );
+    // Base compiler rule should catch anything in compiler/
+    let diff = make_fake_diff(&[("compiler/foo", 1, 1)]);
+    test_from_diff(&diff, config.clone(), &["compiler"]);
+
+    // Anything in rustc_llvm should go to llvm.
+    let diff = make_fake_diff(&[("compiler/rustc_llvm/foo", 1, 1)]);
+    test_from_diff(&diff, config.clone(), &["llvm"]);
+
+    // 1 change in rustc_llvm, multiple changes in other directories, the
+    // other directories win because they have more changes.
+    let diff = make_fake_diff(&[
+        ("compiler/rustc_llvm/foo", 1, 1),
+        ("compiler/rustc_traits/src/foo.rs", 0, 1),
+        ("compiler/rustc_macros//src/foo.rs", 2, 3),
+    ]);
+    test_from_diff(&diff, config.clone(), &["compiler"]);
+
+    // Change in a deeply nested directory should win over its parent.
+    let diff = make_fake_diff(&[("compiler/rustc_parse/src/parse/lexer/foo.rs", 1, 1)]);
+    test_from_diff(&diff, config.clone(), &["lexer"]);
+
+    // Most changes in one component should win over the base compiler.
+    let diff = make_fake_diff(&[
+        ("compiler/rustc_parse/src/foo.rs", 5, 10),
+        ("compiler/rustc_llvm/src/foo.rs", 1, 1),
+    ]);
+    test_from_diff(&diff, config.clone(), &["parser"]);
+}
+
+#[test]
+fn deleted_file() {
+    // Test dirs matching for a deleted file.
+    let config = toml::toml!(
+        [owners]
+        "/compiler" = ["compiler"]
+        "/compiler/rustc_parse" = ["parser"]
+    );
+    let diff = make_fake_diff(&[("compiler/rustc_parse/src/foo.rs", 0, 10)]);
+    test_from_diff(&diff, config, &["parser"]);
+}
+
+#[test]
+fn empty_file_still_counts() {
+    let config = toml::toml!(
+        [owners]
+        "/compiler" = ["compiler"]
+        "/compiler/rustc_parse" = ["parser"]
+    );
+    let diff = "diff --git a/compiler/rustc_parse/src/foo.rs b/compiler/rustc_parse/src/foo.rs\n\
+                new file mode 100644\n\
+                index 0000000..e69de29\n";
+    test_from_diff(&diff, config, &["parser"]);
+}
+
+#[test]
+fn basic_gitignore_pattern() {
+    let config = toml::toml!(
+        [owners]
+        "*.js" = ["javascript-reviewers"]
+        "/compiler/rustc_parse" = ["parser"]
+    );
+    let diff = make_fake_diff(&[("src/librustdoc/html/static/js/settings.js", 10, 1)]);
+    test_from_diff(&diff, config, &["javascript-reviewers"]);
+}

+ 81 - 76
src/handlers/autolabel.rs

@@ -16,104 +16,109 @@ pub(super) async fn parse_input(
     event: &IssuesEvent,
     config: Option<&AutolabelConfig>,
 ) -> Result<Option<AutolabelInput>, String> {
+    let config = match config {
+        Some(config) => config,
+        None => return Ok(None),
+    };
     // On opening a new PR or sync'ing the branch, look at the diff and try to
     // add any appropriate labels.
     //
     // FIXME: This will re-apply labels after a push that the user had tried to
     // remove. Not much can be done about that currently; the before/after on
     // synchronize may be straddling a rebase, which will break diff generation.
-    if let Some(config) = config {
-        if event.action == IssuesAction::Opened || event.action == IssuesAction::Synchronize {
-            if let Some(diff) = event
-                .issue
-                .diff(&ctx.github)
-                .await
-                .map_err(|e| {
-                    log::error!("failed to fetch diff: {:?}", e);
-                })
-                .unwrap_or_default()
-            {
-                let files = files_changed(&diff);
-                let mut autolabels = Vec::new();
-                'outer: for (label, cfg) in config.labels.iter() {
-                    if cfg
-                        .trigger_files
+    if event.action == IssuesAction::Opened || event.action == IssuesAction::Synchronize {
+        if let Some(diff) = event
+            .issue
+            .diff(&ctx.github)
+            .await
+            .map_err(|e| {
+                log::error!("failed to fetch diff: {:?}", e);
+            })
+            .unwrap_or_default()
+        {
+            let files = files_changed(&diff);
+            let mut autolabels = Vec::new();
+            'outer: for (label, cfg) in config.labels.iter() {
+                if cfg
+                    .trigger_files
+                    .iter()
+                    .any(|f| files.iter().any(|diff_file| diff_file.starts_with(f)))
+                {
+                    let exclude_patterns: Vec<glob::Pattern> = cfg
+                        .exclude_labels
                         .iter()
-                        .any(|f| files.iter().any(|diff_file| diff_file.starts_with(f)))
-                    {
-                        let exclude_patterns: Vec<glob::Pattern> = cfg
-                            .exclude_labels
-                            .iter()
-                            .filter_map(|label| match glob::Pattern::new(label) {
-                                Ok(exclude_glob) => Some(exclude_glob),
-                                Err(error) => {
-                                    log::error!("Invalid glob pattern: {}", error);
-                                    None
-                                }
-                            })
-                            .collect();
-                        for label in event.issue.labels() {
-                            for pat in &exclude_patterns {
-                                if pat.matches(&label.name) {
-                                    // If we hit an excluded label, ignore this autolabel and check the next
-                                    continue 'outer;
-                                }
+                        .filter_map(|label| match glob::Pattern::new(label) {
+                            Ok(exclude_glob) => Some(exclude_glob),
+                            Err(error) => {
+                                log::error!("Invalid glob pattern: {}", error);
+                                None
+                            }
+                        })
+                        .collect();
+                    for label in event.issue.labels() {
+                        for pat in &exclude_patterns {
+                            if pat.matches(&label.name) {
+                                // If we hit an excluded label, ignore this autolabel and check the next
+                                continue 'outer;
                             }
                         }
-
-                        autolabels.push(Label {
-                            name: label.to_owned(),
-                        });
                     }
+
+                    autolabels.push(Label {
+                        name: label.to_owned(),
+                    });
                 }
-                if !autolabels.is_empty() {
-                    return Ok(Some(AutolabelInput {
-                        add: autolabels,
-                        remove: vec![],
-                    }));
+                if cfg.new_pr && event.action == IssuesAction::Opened {
+                    autolabels.push(Label {
+                        name: label.to_owned(),
+                    });
                 }
             }
+            if !autolabels.is_empty() {
+                return Ok(Some(AutolabelInput {
+                    add: autolabels,
+                    remove: vec![],
+                }));
+            }
         }
     }
 
     if event.action == IssuesAction::Labeled {
-        if let Some(config) = config {
-            let mut autolabels = Vec::new();
-            let applied_label = &event.label.as_ref().expect("label").name;
+        let mut autolabels = Vec::new();
+        let applied_label = &event.label.as_ref().expect("label").name;
 
-            'outer: for (label, config) in config.get_by_trigger(applied_label) {
-                let exclude_patterns: Vec<glob::Pattern> = config
-                    .exclude_labels
-                    .iter()
-                    .filter_map(|label| match glob::Pattern::new(label) {
-                        Ok(exclude_glob) => Some(exclude_glob),
-                        Err(error) => {
-                            log::error!("Invalid glob pattern: {}", error);
-                            None
-                        }
-                    })
-                    .collect();
+        'outer: for (label, config) in config.get_by_trigger(applied_label) {
+            let exclude_patterns: Vec<glob::Pattern> = config
+                .exclude_labels
+                .iter()
+                .filter_map(|label| match glob::Pattern::new(label) {
+                    Ok(exclude_glob) => Some(exclude_glob),
+                    Err(error) => {
+                        log::error!("Invalid glob pattern: {}", error);
+                        None
+                    }
+                })
+                .collect();
 
-                for label in event.issue.labels() {
-                    for pat in &exclude_patterns {
-                        if pat.matches(&label.name) {
-                            // If we hit an excluded label, ignore this autolabel and check the next
-                            continue 'outer;
-                        }
+            for label in event.issue.labels() {
+                for pat in &exclude_patterns {
+                    if pat.matches(&label.name) {
+                        // If we hit an excluded label, ignore this autolabel and check the next
+                        continue 'outer;
                     }
                 }
-
-                // If we reach here, no excluded labels were found, so we should apply the autolabel.
-                autolabels.push(Label {
-                    name: label.to_owned(),
-                });
-            }
-            if !autolabels.is_empty() {
-                return Ok(Some(AutolabelInput {
-                    add: autolabels,
-                    remove: vec![],
-                }));
             }
+
+            // If we reach here, no excluded labels were found, so we should apply the autolabel.
+            autolabels.push(Label {
+                name: label.to_owned(),
+            });
+        }
+        if !autolabels.is_empty() {
+            return Ok(Some(AutolabelInput {
+                add: autolabels,
+                remove: vec![],
+            }));
         }
     }
     Ok(None)