Getting Started

Welcome to SingleRust! This guide will help you take your first steps with SingleRust, from setup through a basic single-cell RNA-seq analysis workflow.

🦀

Step 1: Rust Environment Setup

Before you get started, check if Rust has already been installed via the rustup command. Just use rustup show to see what toolchain has been installed.

If Rust is not installed, visit rustup.rs and follow the installation instructions for your platform.

📦

Step 2: Add SingleRust to Your Project

Add SingleRust as a dependency in your Cargo.toml file:

[dependencies]
single_rust = "0.5.6"
âš¡

Step 3: Basic Analysis Workflow

Here's a typical single-cell analysis workflow using SingleRust. This example demonstrates loading data, filtering, normalization, HVG computation, and PCA.

Load Data into Memory

use single_rust::io::read_h5ad_memory;

// Load h5ad file into memory
let adata = read_h5ad_memory("path/to/data.h5ad")?;

println!("Loaded {} cells and {} genes", adata.n_obs(), adata.n_vars());

Quality Control Metrics

use single_rust::memory::statistics::qc::qc_metrics;

// Calculate QC metrics (includes mitochondrial percentage)
qc_metrics(&adata)?;

// QC metrics are now stored in adata.obs() and adata.var()
// Check columns like: n_genes_by_counts, total_counts, pct_counts_mito

Filter Cells and Genes

use single_rust::memory::processing::filtering::{mark_filter_cells, mark_filter_genes};

// Filter cells based on QC thresholds
let cell_mask = mark_filter_cells::<u32, f64>(
    &adata,
    Some(200),      // min_genes: at least 200 genes expressed
    Some(5000),     // max_genes: at most 5000 genes (remove doublets)
    Some(1000.0),   // min_counts: at least 1000 total counts
    None,           // max_counts: no upper limit
    None,           // min_fraction
    None,           // max_fraction
)?;

// Filter genes expressed in at least 3 cells
let gene_mask = mark_filter_genes::<u32, f64>(
    &adata,
    Some(3),        // min_cells: gene must be in at least 3 cells
    None,           // max_cells
    None,           // min_counts
    None,           // max_counts
    None,           // min_fraction
    None,           // max_fraction
)?;

// Apply filters to create subset (implementation depends on your needs)
println!("Keeping {} cells and {} genes after filtering",
         cell_mask.iter().filter(|&&x| x).count(),
         gene_mask.iter().filter(|&&x| x).count());

Normalization and Log Transformation

use single_rust::memory::processing::{normalize_expression, log1p_expression};
use single_utilities::types::Direction;

// Normalize to 10,000 counts per cell
normalize_expression(&adata.x(), 10_000, &Direction::ROW, None)?;

// Apply log1p transformation
log1p_expression(&adata.x(), None)?;

Identify Highly Variable Genes

use single_rust::memory::processing::compute_highly_variable_genes;
use single_rust::shared::HVGParams;

// Find highly variable genes with default parameters (Seurat method)
compute_highly_variable_genes(&adata, None)?;

// Or with custom parameters
let hvg_params = HVGParams {
    n_top_genes: Some(2000),     // Select top 2000 HVGs
    min_mean: 0.0125,
    max_mean: 3.0,
    min_dispersion: 0.5,
    ..Default::default()
};
compute_highly_variable_genes(&adata, Some(hvg_params))?;

// Results are stored in adata.var() with column "highly_variable"

Compute PCA on Highly Variable Genes

use single_rust::memory::processing::dimred::pca::run_pca_sparse_masked;
use single_rust::memory::processing::dimred::FeatureSelectionMethod;
use single_algebra::dimred::pca::SVDMethod;

// Get HVG mask from the var dataframe
let var_df = adata.var().get_data();
let hvg_col = var_df.column("highly_variable")?.bool()?;
let hvg_mask: Vec<bool> = hvg_col.into_iter()
    .map(|x| x.unwrap_or(false))
    .collect();

// Create feature selection method
let feature_selection = FeatureSelectionMethod::HighlyVariableSelection(hvg_mask);

// Run PCA with 50 components
let pca_result = run_pca_sparse_masked::<f64>(
    &adata.x(),
    Some(feature_selection),
    Some(true),                      // Center the data
    Some(false),                     // No verbose output
    Some(50),                        // 50 components
    Some(1.0),                       // Regularization alpha
    Some(42),                        // Random seed for reproducibility
    Some(SVDMethod::Randomized),     // Fast randomized SVD
)?;

// Access PCA results
let embeddings = pca_result.transformed;  // Cell embeddings (n_cells × 50)
let variance_explained = pca_result.explained_variance_ratio;
let cumulative_variance = pca_result.cumulative_explained_variance_ratio;

println!("PCA computed: {} cells × {} components", 
         embeddings.nrows(), embeddings.ncols());
println!("Total variance explained by 50 PCs: {.2}%", 
         cumulative_variance[[49]] * 100.0);
🎯

Complete Example

Putting it all together in a single workflow:

use single_rust::io::read_h5ad_memory;
use single_rust::memory::statistics::qc::qc_metrics;
use single_rust::memory::processing::filtering::{mark_filter_cells, mark_filter_genes};
use single_rust::memory::processing::{normalize_expression, log1p_expression, compute_highly_variable_genes};
use single_rust::memory::processing::dimred::pca::run_pca_sparse_masked;
use single_rust::memory::processing::dimred::FeatureSelectionMethod;
use single_rust::shared::HVGParams;
use single_utilities::types::Direction;
use single_algebra::dimred::pca::SVDMethod;

fn main() -> anyhow::Result<()> {
    // 1. Load data
    let adata = read_h5ad_memory("data.h5ad")?;
    println!("Loaded: {} cells × {} genes", adata.n_obs(), adata.n_vars());
    
    // 2. Quality control
    qc_metrics(&adata)?;
    
    // 3. Filter
    let cell_mask = mark_filter_cells::<u32, f64>(
        &adata, Some(200), Some(5000), Some(1000.0), None, None, None
    )?;
    let gene_mask = mark_filter_genes::<u32, f64>(
        &adata, Some(3), None, None, None, None, None
    )?;
    
    // 4. Normalize and transform
    normalize_expression(&adata.x(), 10_000, &Direction::ROW, None)?;
    log1p_expression(&adata.x(), None)?;
    
    // 5. Find HVGs
    compute_highly_variable_genes(&adata, Some(HVGParams {
        n_top_genes: Some(2000),
        ..Default::default()
    })?)?;
    
    // 6. Run PCA
    let var_df = adata.var().get_data();
    let hvg_mask: Vec<bool> = var_df.column("highly_variable")?.bool()?
        .into_iter().map(|x| x.unwrap_or(false)).collect();
    
    let pca_result = run_pca_sparse_masked::<f64>(
        &adata.x(),
        Some(FeatureSelectionMethod::HighlyVariableSelection(hvg_mask)),
        Some(true), Some(false), Some(50), Some(1.0), Some(42),
        Some(SVDMethod::Randomized)
    )?;
    
    println!("Analysis complete! PCA variance explained: {.2}%",
             pca_result.cumulative_explained_variance_ratio[[49]] * 100.0);
    
    Ok(())
}
🚀

Next Steps

Now that you've completed a basic workflow, you can explore:

  • â–¸ Differential Expression: Use rank_gene_groups to find marker genes between cell populations
  • â–¸ Advanced Filtering: Apply mitochondrial percentage thresholds and other custom filters
  • â–¸ Batch Effects: Handle multiple samples with batch-aware HVG detection
  • â–¸ Custom Analyses: Build your own pipelines using SingleRust's modular components

Check out the API Documentation for comprehensive reference.