浏览代码

Merge pull request #1658 from mcass19/feature/scheduled-jobs

[FEATURE] Scheduled Jobs
Mark Rousskov 2 年之前
父节点
当前提交
fad3d1f780
共有 9 个文件被更改,包括 377 次插入6 次删除
  1. 47 4
      Cargo.lock
  2. 4 2
      Cargo.toml
  3. 57 0
      src/db.rs
  4. 137 0
      src/db/jobs.rs
  5. 1 0
      src/handlers.rs
  6. 21 0
      src/handlers/jobs.rs
  7. 51 0
      src/jobs.rs
  8. 1 0
      src/lib.rs
  9. 58 0
      src/main.rs

+ 47 - 4
Cargo.lock

@@ -279,6 +279,17 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "cron"
+version = "0.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ff76b51e4c068c52bfd2866e1567bee7c567ae8f24ada09fd4307019e25eab7"
+dependencies = [
+ "chrono",
+ "nom",
+ "once_cell",
+]
+
 [[package]]
 name = "crossbeam-utils"
 version = "0.8.8"
@@ -991,6 +1002,12 @@ version = "0.3.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d"
 
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
 [[package]]
 name = "miniz_oxide"
 version = "0.4.4"
@@ -1042,6 +1059,16 @@ dependencies = [
  "tempfile",
 ]
 
+[[package]]
+name = "nom"
+version = "7.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
 [[package]]
 name = "ntapi"
 version = "0.3.7"
@@ -1294,6 +1321,17 @@ version = "0.3.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae"
 
+[[package]]
+name = "postgres-derive"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0c2c18e40b92144b05e6f3ae9d1ee931f0d1afa9410ac8b97486c6eaaf91201"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "postgres-native-tls"
 version = "0.5.0"
@@ -1309,9 +1347,9 @@ dependencies = [
 
 [[package]]
 name = "postgres-protocol"
-version = "0.6.3"
+version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "79ec03bce71f18b4a27c4c64c6ba2ddf74686d69b91d8714fb32ead3adaed713"
+checksum = "878c6cbf956e03af9aa8204b407b9cbf47c072164800aa918c516cd4b056c50c"
 dependencies = [
  "base64",
  "byteorder",
@@ -1327,16 +1365,18 @@ dependencies = [
 
 [[package]]
 name = "postgres-types"
-version = "0.2.2"
+version = "0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04619f94ba0cc80999f4fc7073607cb825bc739a883cb6d20900fc5e009d6b0d"
+checksum = "73d946ec7d256b04dfadc4e6a3292324e6f417124750fc5c0950f981b703a0f1"
 dependencies = [
  "bytes",
  "chrono",
  "fallible-iterator",
+ "postgres-derive",
  "postgres-protocol",
  "serde",
  "serde_json",
+ "uuid",
 ]
 
 [[package]]
@@ -2042,6 +2082,7 @@ dependencies = [
  "async-trait",
  "chrono",
  "comrak",
+ "cron",
  "cynic",
  "dotenv",
  "futures",
@@ -2058,6 +2099,7 @@ dependencies = [
  "openssl",
  "parser",
  "postgres-native-tls",
+ "postgres-types",
  "rand",
  "regex",
  "reqwest",
@@ -2238,6 +2280,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7"
 dependencies = [
  "getrandom",
+ "serde",
 ]
 
 [[package]]

+ 4 - 2
Cargo.toml

@@ -23,13 +23,13 @@ hyper = { version = "0.14.4", features = ["server", "stream"]}
 tokio = { version = "1.7.1", features = ["macros", "time", "rt"] }
 futures = { version = "0.3", default-features = false, features = ["std"] }
 async-trait = "0.1.31"
-uuid = { version = "0.8", features = ["v4"] }
+uuid = { version = "0.8", features = ["v4", "serde"] }
 tracing = "0.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 url = "2.1.0"
 once_cell = "1"
 chrono = { version = "0.4", features = ["serde"] }
-tokio-postgres = { version = "0.7.2", features = ["with-chrono-0_4", "with-serde_json-1"] }
+tokio-postgres = { version = "0.7.2", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-0_8"] }
 postgres-native-tls = "0.5.0"
 native-tls = "0.2"
 serde_path_to_error = "0.1.2"
@@ -42,6 +42,8 @@ tower = { version = "0.4.13", features = ["util", "limit", "buffer", "load-shed"
 github-graphql = { path = "github-graphql" }
 rand = "0.8.5"
 ignore = "0.4.18"
+postgres-types = { version = "0.2.4", features = ["derive"] }
+cron = { version = "0.12.0" }
 
 [dependencies.serde]
 version = "1"

+ 57 - 0
src/db.rs

@@ -1,4 +1,7 @@
+use crate::db::jobs::*;
+use crate::handlers::jobs::handle_job;
 use anyhow::Context as _;
+use chrono::Utc;
 use native_tls::{Certificate, TlsConnector};
 use postgres_native_tls::MakeTlsConnector;
 use std::sync::{Arc, Mutex};
@@ -6,6 +9,7 @@ use tokio::sync::{OwnedSemaphorePermit, Semaphore};
 use tokio_postgres::Client as DbClient;
 
 pub mod issue_data;
+pub mod jobs;
 pub mod notifications;
 pub mod rustc_commits;
 
@@ -179,6 +183,43 @@ pub async fn run_migrations(client: &DbClient) -> anyhow::Result<()> {
     Ok(())
 }
 
+pub async fn schedule_jobs(db: &DbClient, jobs: Vec<JobSchedule>) -> anyhow::Result<()> {
+    for job in jobs {
+        let mut upcoming = job.schedule.upcoming(Utc).take(1);
+
+        if let Some(scheduled_at) = upcoming.next() {
+            if let Err(_) = get_job_by_name_and_scheduled_at(&db, &job.name, &scheduled_at).await {
+                // mean there's no job already in the db with that name and scheduled_at
+                insert_job(&db, &job.name, &scheduled_at, &job.metadata).await?;
+            }
+        }
+    }
+
+    Ok(())
+}
+
+pub async fn run_scheduled_jobs(db: &DbClient) -> anyhow::Result<()> {
+    let jobs = get_jobs_to_execute(&db).await.unwrap();
+    tracing::trace!("jobs to execute: {:#?}", jobs);
+
+    for job in jobs.iter() {
+        update_job_executed_at(&db, &job.id).await?;
+
+        match handle_job(&job.name, &job.metadata).await {
+            Ok(_) => {
+                tracing::trace!("job successfully executed (id={})", job.id);
+                delete_job(&db, &job.id).await?;
+            }
+            Err(e) => {
+                tracing::trace!("job failed on execution (id={:?}, error={:?})", job.id, e);
+                update_job_error_message(&db, &job.id, &e.to_string()).await?;
+            }
+        }
+    }
+
+    Ok(())
+}
+
 static MIGRATIONS: &[&str] = &[
     "
 CREATE TABLE notifications (
@@ -215,5 +256,21 @@ CREATE TABLE issue_data (
     data JSONB,
     PRIMARY KEY (repo, issue_number, key)
 );
+",
+    "
+CREATE TABLE jobs (
+    id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
+    name TEXT NOT NULL,
+    scheduled_at TIMESTAMP WITH TIME ZONE NOT NULL,
+    metadata JSONB,
+    executed_at TIMESTAMP WITH TIME ZONE,
+    error_message TEXT
+);
+",
+    "
+CREATE UNIQUE INDEX jobs_name_scheduled_at_unique_index 
+    ON jobs (
+        name, scheduled_at
+    );
 ",
 ];

+ 137 - 0
src/db/jobs.rs

@@ -0,0 +1,137 @@
+//! The `jobs` table provides a way to have scheduled jobs
+use anyhow::{Context as _, Result};
+use chrono::{DateTime, Utc};
+use cron::Schedule;
+use serde::{Deserialize, Serialize};
+use tokio_postgres::Client as DbClient;
+use uuid::Uuid;
+
+pub struct JobSchedule {
+    pub name: String,
+    pub schedule: Schedule,
+    pub metadata: serde_json::Value,
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct Job {
+    pub id: Uuid,
+    pub name: String,
+    pub scheduled_at: DateTime<Utc>,
+    pub metadata: serde_json::Value,
+    pub executed_at: Option<DateTime<Utc>>,
+    pub error_message: Option<String>,
+}
+
+pub async fn insert_job(
+    db: &DbClient,
+    name: &String,
+    scheduled_at: &DateTime<Utc>,
+    metadata: &serde_json::Value,
+) -> Result<()> {
+    tracing::trace!("insert_job(name={})", name);
+
+    db.execute(
+        "INSERT INTO jobs (name, scheduled_at, metadata) VALUES ($1, $2, $3) 
+            ON CONFLICT (name, scheduled_at) DO UPDATE SET metadata = EXCLUDED.metadata",
+        &[&name, &scheduled_at, &metadata],
+    )
+    .await
+    .context("Inserting job")?;
+
+    Ok(())
+}
+
+pub async fn delete_job(db: &DbClient, id: &Uuid) -> Result<()> {
+    tracing::trace!("delete_job(id={})", id);
+
+    db.execute("DELETE FROM jobs WHERE id = $1", &[&id])
+        .await
+        .context("Deleting job")?;
+
+    Ok(())
+}
+
+pub async fn update_job_error_message(db: &DbClient, id: &Uuid, message: &String) -> Result<()> {
+    tracing::trace!("update_job_error_message(id={})", id);
+
+    db.execute(
+        "UPDATE jobs SET error_message = $2 WHERE id = $1",
+        &[&id, &message],
+    )
+    .await
+    .context("Updating job error message")?;
+
+    Ok(())
+}
+
+pub async fn update_job_executed_at(db: &DbClient, id: &Uuid) -> Result<()> {
+    tracing::trace!("update_job_executed_at(id={})", id);
+
+    db.execute("UPDATE jobs SET executed_at = now() WHERE id = $1", &[&id])
+        .await
+        .context("Updating job executed at")?;
+
+    Ok(())
+}
+
+pub async fn get_job_by_name_and_scheduled_at(
+    db: &DbClient,
+    name: &String,
+    scheduled_at: &DateTime<Utc>,
+) -> Result<Job> {
+    tracing::trace!(
+        "get_job_by_name_and_scheduled_at(name={}, scheduled_at={})",
+        name,
+        scheduled_at
+    );
+
+    let job = db
+        .query_one(
+            "SELECT * FROM jobs WHERE name = $1 AND scheduled_at = $2",
+            &[&name, &scheduled_at],
+        )
+        .await
+        .context("Select job by name and scheduled at")?;
+
+    deserialize_job(&job)
+}
+
+// Selects all jobs with:
+//  - scheduled_at in the past
+//  - error_message is null or executed_at is at least 60 minutes ago (intended to make repeat executions rare enough)
+pub async fn get_jobs_to_execute(db: &DbClient) -> Result<Vec<Job>> {
+    let jobs = db
+        .query(
+            "
+        SELECT * FROM jobs WHERE scheduled_at <= now() AND (error_message IS NULL OR executed_at <= now() - INTERVAL '60 minutes')",
+            &[],
+        )
+        .await
+        .context("Getting jobs data")?;
+
+    let mut data = Vec::with_capacity(jobs.len());
+    for job in jobs {
+        let serialized_job = deserialize_job(&job);
+        data.push(serialized_job.unwrap());
+    }
+
+    Ok(data)
+}
+
+fn deserialize_job(row: &tokio_postgres::row::Row) -> Result<Job> {
+    let id: Uuid = row.try_get(0)?;
+    let name: String = row.try_get(1)?;
+    let scheduled_at: DateTime<Utc> = row.try_get(2)?;
+    let metadata: serde_json::Value = row.try_get(3)?;
+    let executed_at: Option<DateTime<Utc>> = row.try_get(4)?;
+    let error_message: Option<String> = row.try_get(5)?;
+
+    Ok(Job {
+        id,
+        name,
+        scheduled_at,
+        metadata,
+        executed_at,
+        error_message,
+    })
+}

+ 1 - 0
src/handlers.rs

@@ -28,6 +28,7 @@ mod autolabel;
 mod close;
 mod github_releases;
 mod glacier;
+pub mod jobs;
 mod major_change;
 mod mentions;
 mod milestone_prs;

+ 21 - 0
src/handlers/jobs.rs

@@ -0,0 +1,21 @@
+// Function to match the scheduled job function with its corresponding handler.
+// In case you want to add a new one, just add a new clause to the match with
+// the job name and the corresponding function.
+
+// Further info could be find in src/jobs.rs
+
+pub async fn handle_job(name: &String, metadata: &serde_json::Value) -> anyhow::Result<()> {
+    match name {
+        _ => default(&name, &metadata),
+    }
+}
+
+fn default(name: &String, metadata: &serde_json::Value) -> anyhow::Result<()> {
+    tracing::trace!(
+        "handle_job fell into default case: (name={:?}, metadata={:?})",
+        name,
+        metadata
+    );
+
+    Ok(())
+}

+ 51 - 0
src/jobs.rs

@@ -0,0 +1,51 @@
+//! SCHEDULED JOBS
+//!
+//! The metadata is a serde_json::Value
+//! Please refer to https://docs.rs/serde_json/latest/serde_json/value/fn.from_value.html
+//! on how to interpret it as an instance of type T, implementing Serialize/Deserialize.
+//!
+//! The schedule is a cron::Schedule
+//! Please refer to https://docs.rs/cron/latest/cron/struct.Schedule.html for further info
+//!
+//! For example, if we want to sends a Zulip message every Friday at 11:30am ET into #t-release
+//! with a @T-release meeting! content, we should create some JobSchedule like:
+//!
+//!    #[derive(Serialize, Deserialize)]
+//!    struct ZulipMetadata {
+//!      pub message: String
+//!    }
+//!
+//!    let metadata = serde_json::value::to_value(ZulipMetadata {
+//!      message: "@T-release meeting!".to_string()
+//!    }).unwrap();
+//!
+//!    let schedule = Schedule::from_str("0 30 11 * * FRI *").unwrap();
+//!    
+//!    let new_job = JobSchedule {
+//!      name: "send_zulip_message".to_owned(),
+//!      schedule: schedule,
+//!      metadata: metadata
+//!    }
+//!
+//! and include it in the below vector in jobs():
+//!
+//!   jobs.push(new_job);
+//!
+//! ... fianlly, add the corresponding "send_zulip_message" handler in src/handlers/jobs.rs
+
+use crate::db::jobs::JobSchedule;
+
+// How often new cron-based jobs will be placed in the queue.
+// This is the minimum period *between* a single cron task's executions.
+pub const JOB_SCHEDULING_CADENCE_IN_SECS: u64 = 1800;
+
+// How often the database is inspected for jobs which need to execute.
+// This is the granularity at which events will occur.
+pub const JOB_PROCESSING_CADENCE_IN_SECS: u64 = 60;
+
+pub fn jobs() -> Vec<JobSchedule> {
+    // Add to this vector any new cron task you want (as explained above)
+    let jobs: Vec<JobSchedule> = Vec::new();
+
+    jobs
+}

+ 1 - 0
src/lib.rs

@@ -20,6 +20,7 @@ pub mod github;
 pub mod handlers;
 pub mod http_client;
 pub mod interactions;
+pub mod jobs;
 pub mod notification_listing;
 pub mod payload;
 pub mod rfcbot;

+ 58 - 0
src/main.rs

@@ -7,9 +7,11 @@ use hyper::{header, Body, Request, Response, Server, StatusCode};
 use reqwest::Client;
 use route_recognizer::Router;
 use std::{env, net::SocketAddr, sync::Arc};
+use tokio::{task, time};
 use tower::{Service, ServiceExt};
 use tracing as log;
 use tracing::Instrument;
+use triagebot::jobs::{jobs, JOB_PROCESSING_CADENCE_IN_SECS, JOB_SCHEDULING_CADENCE_IN_SECS};
 use triagebot::{db, github, handlers::Context, notification_listing, payload, EventName};
 
 async fn handle_agenda_request(req: String) -> anyhow::Result<String> {
@@ -237,6 +239,62 @@ async fn run_server(addr: SocketAddr) -> anyhow::Result<()> {
         .await
         .context("database migrations")?;
 
+    // spawning a background task that will schedule the jobs
+    // every JOB_SCHEDULING_CADENCE_IN_SECS
+    task::spawn(async move {
+        loop {
+            let res = task::spawn(async move {
+                let pool = db::ClientPool::new();
+                let mut interval =
+                    time::interval(time::Duration::from_secs(JOB_SCHEDULING_CADENCE_IN_SECS));
+
+                loop {
+                    interval.tick().await;
+                    db::schedule_jobs(&*pool.get().await, jobs())
+                        .await
+                        .context("database schedule jobs")
+                        .unwrap();
+                }
+            });
+
+            match res.await {
+                Err(err) if err.is_panic() => {
+                    /* handle panic in above task, re-launching */
+                    tracing::trace!("schedule_jobs task died (error={})", err);
+                }
+                _ => unreachable!(),
+            }
+        }
+    });
+
+    // spawning a background task that will run the scheduled jobs
+    // every JOB_PROCESSING_CADENCE_IN_SECS
+    task::spawn(async move {
+        loop {
+            let res = task::spawn(async move {
+                let pool = db::ClientPool::new();
+                let mut interval =
+                    time::interval(time::Duration::from_secs(JOB_PROCESSING_CADENCE_IN_SECS));
+
+                loop {
+                    interval.tick().await;
+                    db::run_scheduled_jobs(&*pool.get().await)
+                        .await
+                        .context("run database scheduled jobs")
+                        .unwrap();
+                }
+            });
+
+            match res.await {
+                Err(err) if err.is_panic() => {
+                    /* handle panic in above task, re-launching */
+                    tracing::trace!("run_scheduled_jobs task died (error={})", err);
+                }
+                _ => unreachable!(),
+            }
+        }
+    });
+
     let client = Client::new();
     let gh = github::GithubClient::new_with_default_token(client.clone());
     let oc = octocrab::OctocrabBuilder::new()