Parcourir la source

Use cron crate in separate loop for job scheduling

Mauricio Cassola il y a 2 ans
Parent
commit
a7379ab2e4
8 fichiers modifiés avec 185 ajouts et 52 suppressions
  1. 28 0
      Cargo.lock
  2. 1 0
      Cargo.toml
  3. 16 2
      src/db.rs
  4. 54 20
      src/db/jobs.rs
  5. 1 23
      src/handlers/jobs.rs
  6. 49 0
      src/jobs.rs
  7. 1 0
      src/lib.rs
  8. 35 7
      src/main.rs

+ 28 - 0
Cargo.lock

@@ -279,6 +279,17 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "cron"
+version = "0.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ff76b51e4c068c52bfd2866e1567bee7c567ae8f24ada09fd4307019e25eab7"
+dependencies = [
+ "chrono",
+ "nom",
+ "once_cell",
+]
+
 [[package]]
 name = "crossbeam-utils"
 version = "0.8.8"
@@ -991,6 +1002,12 @@ version = "0.3.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d"
 
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
 [[package]]
 name = "miniz_oxide"
 version = "0.4.4"
@@ -1042,6 +1059,16 @@ dependencies = [
  "tempfile",
 ]
 
+[[package]]
+name = "nom"
+version = "7.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
 [[package]]
 name = "ntapi"
 version = "0.3.7"
@@ -2055,6 +2082,7 @@ dependencies = [
  "async-trait",
  "chrono",
  "comrak",
+ "cron",
  "cynic",
  "dotenv",
  "futures",

+ 1 - 0
Cargo.toml

@@ -43,6 +43,7 @@ github-graphql = { path = "github-graphql" }
 rand = "0.8.5"
 ignore = "0.4.18"
 postgres-types = { version = "0.2.4", features = ["derive"] }
+cron = { version = "0.12.0" }
 
 [dependencies.serde]
 version = "1"

+ 16 - 2
src/db.rs

@@ -1,6 +1,7 @@
 use crate::db::jobs::*;
 use crate::handlers::jobs::handle_job;
 use anyhow::Context as _;
+use chrono::Utc;
 use native_tls::{Certificate, TlsConnector};
 use postgres_native_tls::MakeTlsConnector;
 use std::sync::{Arc, Mutex};
@@ -182,6 +183,21 @@ pub async fn run_migrations(client: &DbClient) -> anyhow::Result<()> {
     Ok(())
 }
 
+pub async fn schedule_jobs(db: &DbClient, jobs: Vec<JobSchedule>) -> anyhow::Result<()> {
+    for job in jobs {
+        let mut upcoming = job.schedule.upcoming(Utc).take(1);
+
+        if let Some(scheduled_at) = upcoming.next() {
+            if let Err(_) = get_job_by_name_and_scheduled_at(&db, &job.name, &scheduled_at).await {
+                // mean there's no job already in the db with that name and scheduled_at
+                insert_job(&db, &job.name, &scheduled_at, &job.metadata).await?;
+            }
+        }
+    }
+
+    Ok(())
+}
+
 pub async fn run_scheduled_jobs(db: &DbClient) -> anyhow::Result<()> {
     let jobs = get_jobs_to_execute(&db).await.unwrap();
     tracing::trace!("jobs to execute: {:#?}", jobs);
@@ -192,12 +208,10 @@ pub async fn run_scheduled_jobs(db: &DbClient) -> anyhow::Result<()> {
         match handle_job(&job.name, &job.metadata).await {
             Ok(_) => {
                 tracing::trace!("job succesfully executed (id={})", job.id);
-
                 delete_job(&db, &job.id).await?;
             }
             Err(e) => {
                 tracing::trace!("job failed on execution (id={:?}, error={:?})", job.id, e);
-
                 update_job_error_message(&db, &job.id, &e.to_string()).await?;
             }
         }

+ 54 - 20
src/db/jobs.rs

@@ -1,30 +1,37 @@
 //! The `jobs` table provides a way to have scheduled jobs
 use anyhow::{Context as _, Result};
-use chrono::{DateTime, FixedOffset};
+use chrono::{DateTime, Utc};
+use cron::Schedule;
 use serde::{Deserialize, Serialize};
 use tokio_postgres::Client as DbClient;
 use uuid::Uuid;
 
+pub struct JobSchedule {
+    pub name: String,
+    pub schedule: Schedule,
+    pub metadata: serde_json::Value,
+}
+
 #[derive(Serialize, Deserialize, Debug)]
 pub struct Job {
     pub id: Uuid,
     pub name: String,
-    pub scheduled_at: DateTime<FixedOffset>,
+    pub scheduled_at: DateTime<Utc>,
     pub metadata: serde_json::Value,
-    pub executed_at: Option<DateTime<FixedOffset>>,
+    pub executed_at: Option<DateTime<Utc>>,
     pub error_message: Option<String>,
 }
 
 pub async fn insert_job(
     db: &DbClient,
     name: &String,
-    scheduled_at: &DateTime<FixedOffset>,
+    scheduled_at: &DateTime<Utc>,
     metadata: &serde_json::Value,
 ) -> Result<()> {
     tracing::trace!("insert_job(name={})", name);
 
     db.execute(
-        "INSERT INTO jobs (name, scheduled_at, metadata) VALUES ($1, $2, $3, $4, $5) 
+        "INSERT INTO jobs (name, scheduled_at, metadata) VALUES ($1, $2, $3) 
             ON CONFLICT (name, scheduled_at) DO UPDATE SET metadata = EXCLUDED.metadata",
         &[&name, &scheduled_at, &metadata],
     )
@@ -67,6 +74,28 @@ pub async fn update_job_executed_at(db: &DbClient, id: &Uuid) -> Result<()> {
     Ok(())
 }
 
+pub async fn get_job_by_name_and_scheduled_at(
+    db: &DbClient,
+    name: &String,
+    scheduled_at: &DateTime<Utc>,
+) -> Result<Job> {
+    tracing::trace!(
+        "get_job_by_name_and_scheduled_at(name={}, scheduled_at={})",
+        name,
+        scheduled_at
+    );
+
+    let job = db
+        .query_one(
+            "SELECT * FROM jobs WHERE name = $1 AND scheduled_at = $2",
+            &[&name, &scheduled_at],
+        )
+        .await
+        .context("Select job by name and scheduled at")?;
+
+    serialize_job(&job)
+}
+
 // Selects all jobs with:
 //  - scheduled_at in the past
 //  - error_message is null or executed_at is at least 60 minutes ago (intended to make repeat executions rare enough)
@@ -82,22 +111,27 @@ pub async fn get_jobs_to_execute(db: &DbClient) -> Result<Vec<Job>> {
 
     let mut data = Vec::with_capacity(jobs.len());
     for job in jobs {
-        let id: Uuid = job.get(0);
-        let name: String = job.get(1);
-        let scheduled_at: DateTime<FixedOffset> = job.get(2);
-        let metadata: serde_json::Value = job.get(5);
-        let executed_at: Option<DateTime<FixedOffset>> = job.get(6);
-        let error_message: Option<String> = job.get(7);
-
-        data.push(Job {
-            id,
-            name,
-            scheduled_at,
-            metadata,
-            executed_at,
-            error_message,
-        });
+        let serialized_job = serialize_job(&job);
+        data.push(serialized_job.unwrap());
     }
 
     Ok(data)
 }
+
+fn serialize_job(row: &tokio_postgres::row::Row) -> Result<Job> {
+    let id: Uuid = row.try_get(0)?;
+    let name: String = row.try_get(1)?;
+    let scheduled_at: DateTime<Utc> = row.try_get(2)?;
+    let metadata: serde_json::Value = row.try_get(3)?;
+    let executed_at: Option<DateTime<Utc>> = row.try_get(4)?;
+    let error_message: Option<String> = row.try_get(5)?;
+
+    Ok(Job {
+        id,
+        name,
+        scheduled_at,
+        metadata,
+        executed_at,
+        error_message,
+    })
+}

+ 1 - 23
src/handlers/jobs.rs

@@ -2,29 +2,7 @@
 // In case you want to add a new one, just add a new clause to the match with
 // the job name and the corresponding function.
 
-// The metadata is a serde_json::Value
-// Please refer to https://docs.rs/serde_json/latest/serde_json/value/fn.from_value.html
-// on how to interpret it as an instance of type T, implementing Serialize/Deserialize.
-
-// For example, if we want to sends a Zulip message every Friday at 11:30am ET into #t-release
-// with a @T-release meeting! content, we should create some Job like:
-//
-//    #[derive(Serialize, Deserialize)]
-//    struct ZulipMetadata {
-//      pub message: String
-//    }
-//
-//    let metadata = serde_json::value::to_value(ZulipMetadata {
-//      message: "@T-release meeting!".to_string()
-//     }).unwrap();
-//
-//    Job {
-//      name: "send_zulip_message",
-//      scheduled_at: "2022-09-30T11:30:00+10:00",
-//      metadata: metadata
-//    }
-//
-// ... and add the corresponding "send_zulip_message" handler.
+// Further info could be find in src/jobs.rs
 
 pub async fn handle_job(name: &String, metadata: &serde_json::Value) -> anyhow::Result<()> {
     match name {

+ 49 - 0
src/jobs.rs

@@ -0,0 +1,49 @@
+//! SCHEDULED JOBS
+//!
+//! The metadata is a serde_json::Value
+//! Please refer to https://docs.rs/serde_json/latest/serde_json/value/fn.from_value.html
+//! on how to interpret it as an instance of type T, implementing Serialize/Deserialize.
+//!
+//! The schedule is a cron::Schedule
+//! Please refer to https://docs.rs/cron/latest/cron/struct.Schedule.html for further info
+//!
+//! For example, if we want to sends a Zulip message every Friday at 11:30am ET into #t-release
+//! with a @T-release meeting! content, we should create some JobSchedule like:
+//!
+//!    #[derive(Serialize, Deserialize)]
+//!    struct ZulipMetadata {
+//!      pub message: String
+//!    }
+//!
+//!    let metadata = serde_json::value::to_value(ZulipMetadata {
+//!      message: "@T-release meeting!".to_string()
+//!    }).unwrap();
+//!
+//!    let schedule = Schedule::from_str("0 30 11 * * FRI *").unwrap();
+//!    
+//!    let new_job = JobSchedule {
+//!      name: "send_zulip_message".to_owned(),
+//!      schedule: schedule,
+//!      metadata: metadata
+//!    }
+//!
+//! and include it in the below vector in jobs():
+//!
+//!   jobs.push(new_job);
+//!
+//! ... fianlly, add the corresponding "send_zulip_message" handler in src/handlers/jobs.rs
+
+use crate::db::jobs::JobSchedule;
+
+// Cadence in seconds with which the jobs will be scheduled
+pub const JOB_SCHEDULING_CADENCE_IN_SECS: u64 = 1800;
+
+// Cadence in seconds with which the jobs will be processed
+pub const JOB_PROCESSING_CADENCE_IN_SECS: u64 = 60;
+
+pub fn jobs() -> Vec<JobSchedule> {
+    // Add to this vector any new cron task you want (as explained above)
+    let jobs: Vec<JobSchedule> = Vec::new();
+
+    jobs
+}

+ 1 - 0
src/lib.rs

@@ -20,6 +20,7 @@ pub mod github;
 pub mod handlers;
 pub mod http_client;
 pub mod interactions;
+pub mod jobs;
 pub mod notification_listing;
 pub mod payload;
 pub mod rfcbot;

+ 35 - 7
src/main.rs

@@ -6,15 +6,14 @@ use futures::StreamExt;
 use hyper::{header, Body, Request, Response, Server, StatusCode};
 use reqwest::Client;
 use route_recognizer::Router;
-use std::{env, net::SocketAddr, sync::Arc, time::Duration};
-use tokio::{task, time::sleep};
+use std::{env, net::SocketAddr, sync::Arc};
+use tokio::{task, time};
 use tower::{Service, ServiceExt};
 use tracing as log;
 use tracing::Instrument;
+use triagebot::jobs::{jobs, JOB_PROCESSING_CADENCE_IN_SECS, JOB_SCHEDULING_CADENCE_IN_SECS};
 use triagebot::{db, github, handlers::Context, notification_listing, payload, EventName};
 
-const JOB_PROCESSING_CADENCE_IN_SECS: u64 = 60;
-
 async fn handle_agenda_request(req: String) -> anyhow::Result<String> {
     if req == "/agenda/lang/triage" {
         return triagebot::agenda::lang().call().await;
@@ -240,20 +239,49 @@ async fn run_server(addr: SocketAddr) -> anyhow::Result<()> {
         .await
         .context("database migrations")?;
 
+    // spawning a background task that will schedule the jobs
+    // every JOB_SCHEDULING_CADENCE_IN_SECS
+    task::spawn(async move {
+        loop {
+            let res = task::spawn(async move {
+                let pool = db::ClientPool::new();
+                let mut interval =
+                    time::interval(time::Duration::from_secs(JOB_SCHEDULING_CADENCE_IN_SECS));
+
+                loop {
+                    interval.tick().await;
+                    db::schedule_jobs(&*pool.get().await, jobs())
+                        .await
+                        .context("database schedule jobs")
+                        .unwrap();
+                }
+            });
+
+            match res.await {
+                Err(err) if err.is_panic() => {
+                    /* handle panic in above task, re-launching */
+                    tracing::trace!("schedule_jobs task died (error={})", err);
+                }
+                _ => unreachable!(),
+            }
+        }
+    });
+
     // spawning a background task that will run the scheduled jobs
     // every JOB_PROCESSING_CADENCE_IN_SECS
     task::spawn(async move {
         loop {
             let res = task::spawn(async move {
                 let pool = db::ClientPool::new();
+                let mut interval =
+                    time::interval(time::Duration::from_secs(JOB_PROCESSING_CADENCE_IN_SECS));
 
                 loop {
+                    interval.tick().await;
                     db::run_scheduled_jobs(&*pool.get().await)
                         .await
                         .context("run database scheduled jobs")
                         .unwrap();
-
-                    sleep(Duration::from_secs(JOB_PROCESSING_CADENCE_IN_SECS)).await;
                 }
             });
 
@@ -262,7 +290,7 @@ async fn run_server(addr: SocketAddr) -> anyhow::Result<()> {
                     /* handle panic in above task, re-launching */
                     tracing::trace!("run_scheduled_jobs task died (error={})", err);
                 }
-                _ => unreachable!()
+                _ => unreachable!(),
             }
         }
     });