Skip to content

Commit

Permalink
adding some updates to how illumina fastqs are matched
Browse files Browse the repository at this point in the history
  • Loading branch information
nrminor committed Sep 18, 2024
1 parent ca44353 commit 38717e3
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 21 deletions.
32 changes: 16 additions & 16 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@ pub struct Cli {
#[derive(Subcommand)]
pub enum Commands {
#[clap(
about = "Generate an input samplesheet for `nf-core/viralrecon`.",
aliases = &["vr", "virrec", "vrc"]
)]
Viralrecon {
about = "Generate an input samplesheet for `nf-core/scrnaseq`.",
aliases = &["sc", "scr"]
)]
Scrnaseq {
/// Input directory to traverse for FASTQ files.
#[arg(short, long, required = false)]
input_dir: PathBuf,
Expand All @@ -54,22 +54,19 @@ pub enum Commands {
#[arg(short, long, required = false, default_value = ".fastq.gz")]
fastq_ext: String,

/// The sequencing platform where FASTQs came from
#[arg(short, long, required = true)]
platform: SeqPlatform,
/// the number of cells expected
#[arg(short, long, required = true, default_value_t = 10000)]
expected_cells: i64,

/// Output file prefix (the part before the `_samplesheet.csv`)
#[arg(short, long, required = false, default_value = None)]
output_prefix: Option<String>,
// /// Check a pre-existing samplesheet
// #[arg(short, long, required = false, default_value = "samplesheet.csv")]
// check: Option<String>,
},
#[clap(
about = "Generate an input samplesheet for `nf-core/scrnaseq`.",
aliases = &["sc", "scr"]
)]
Scrnaseq {
about = "Generate an input samplesheet for `nf-core/viralrecon`.",
aliases = &["vr", "virrec", "vrc"]
)]
Viralrecon {
/// Input directory to traverse for FASTQ files.
#[arg(short, long, required = false)]
input_dir: PathBuf,
Expand All @@ -78,12 +75,15 @@ pub enum Commands {
#[arg(short, long, required = false, default_value = ".fastq.gz")]
fastq_ext: String,

/// the number of cells expected
/// The sequencing platform where FASTQs came from
#[arg(short, long, required = true)]
expected_cells: i64,
platform: SeqPlatform,

/// Output file prefix (the part before the `_samplesheet.csv`)
#[arg(short, long, required = false, default_value = None)]
output_prefix: Option<String>,
// /// Check a pre-existing samplesheet
// #[arg(short, long, required = false, default_value = "samplesheet.csv")]
// check: Option<String>,
},
}
5 changes: 5 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ pub mod scrnaseq;
pub mod utils;
pub mod viralrecon;

/// .
///
/// # Errors
///
/// This function will return an error if .
fn main() -> Result<()> {
let cli = Cli::parse();
match &cli.command {
Expand Down
32 changes: 27 additions & 5 deletions src/scrnaseq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,15 @@ use crate::utils::write_lines;
pub use crate::viralrecon::find_files;
use color_eyre::eyre::Result;

fn retrieve_samples(file_paths: &[Rc<Path>]) -> HashSet<Rc<str>> {
let illumina_pattern = Regex::new(r"_L\d{3}_R\d_\d{3}\.fastq\.gz$").unwrap();
/// .
///
/// # Panics
///
/// Panics if .
fn retrieve_samples(file_paths: &[Rc<Path>]) -> Result<HashSet<Rc<str>>> {
let illumina_pattern = Regex::new(r".*_R[12].*\.fastq\.gz$")?;

file_paths
let hits = file_paths
.iter()
.map(|path| {
Rc::from(
Expand All @@ -20,7 +25,9 @@ fn retrieve_samples(file_paths: &[Rc<Path>]) -> HashSet<Rc<str>> {
)
})
.map(|x| Rc::from(illumina_pattern.replace_all(&x, "").to_string()))
.collect()
.collect();

Ok(hits)
}

fn check_sample_ids(sample_ids: &HashSet<Rc<str>>) {
Expand All @@ -31,6 +38,15 @@ fn check_sample_ids(sample_ids: &HashSet<Rc<str>>) {
}
}

/// .
///
/// # Panics
///
/// Panics if .
///
/// # Errors
///
/// This function will return an error if .
fn collect_per_sample(
sample_id: &Rc<str>,
fastq_paths: &[Rc<Path>],
Expand Down Expand Up @@ -61,6 +77,7 @@ fn collect_per_sample(
Ok([sample_id.as_ref(), fastq1, fastq2, &cell_str].join(","))
}

/// .
fn concat_lines(
sample_ids: &HashSet<Rc<str>>,
fastq_paths: &[Rc<Path>],
Expand All @@ -72,6 +89,11 @@ fn concat_lines(
.collect::<Vec<String>>()
}

/// .
///
/// # Errors
///
/// This function will return an error if .
pub fn give_a_sheet(
input_dir: &Path,
fastq_ext: &str,
Expand All @@ -80,7 +102,7 @@ pub fn give_a_sheet(
) -> Result<()> {
// find the FASTQ files and separate out the unique sample IDs
let fastq_paths = find_files(input_dir, fastq_ext)?;
let sample_ids: HashSet<Rc<str>> = retrieve_samples(&fastq_paths);
let sample_ids = retrieve_samples(&fastq_paths)?;

// check the sample IDs for any that are too long
check_sample_ids(&sample_ids);
Expand Down
5 changes: 5 additions & 0 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ pub trait RetrieveSampleIds {
fn retrieve_samples(&self, file_paths: &[Rc<Path>]) -> Result<HashSet<Rc<str>>>;
}

/// .
///
/// # Errors
///
/// This function will return an error if .
pub fn write_lines(lines: &[String], header: &str, output_prefix: &Option<String>) -> Result<()> {
let out_name = match output_prefix {
Some(prefix) => format!("{}_samplesheet.csv", prefix),
Expand Down
15 changes: 15 additions & 0 deletions src/viralrecon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@ use std::{collections::HashSet, ffi::OsStr, path::Path, rc::Rc};

use crate::utils::RetrieveSampleIds;

/// .
///
/// # Panics
///
/// Panics if .
///
/// # Errors
///
/// This function will return an error if .
pub fn find_files(search_dir: &Path, fastq_suffix: &str) -> Result<Vec<Rc<Path>>> {
// define the full pattern
let pattern = format!("{}/*{}", &search_dir.display(), &fastq_suffix);
Expand Down Expand Up @@ -112,6 +121,7 @@ impl CollectByPlatform for SeqPlatform {
}
}

/// .
pub fn concat_lines(
sample_ids: &HashSet<Rc<str>>,
fastq_paths: &[Rc<Path>],
Expand All @@ -123,6 +133,11 @@ pub fn concat_lines(
.collect::<Vec<String>>()
}

/// .
///
/// # Errors
///
/// This function will return an error if .
pub fn give_a_sheet(
input_dir: &Path,
fastq_ext: &str,
Expand Down

0 comments on commit 38717e3

Please sign in to comment.